{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 10.0, "eval_steps": 500, "global_step": 14460, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0006915629322268327, "grad_norm": 3.9518067836761475, "learning_rate": 3.4578146611341636e-08, "log_odds_chosen": 0.4893013536930084, "log_odds_ratio": -0.5827709436416626, "logits/chosen": -1.040260672569275, "logits/rejected": -1.050236701965332, "logps/chosen": -2.8453192710876465, "logps/rejected": -3.280632495880127, "loss": 9.734, "nll_loss": 2.3752317428588867, "rewards/accuracies": 0.625, "rewards/chosen": -0.28453195095062256, "rewards/margins": 0.043531302362680435, "rewards/rejected": -0.3280632495880127, "step": 1 }, { "epoch": 0.0013831258644536654, "grad_norm": 2.5233092308044434, "learning_rate": 6.915629322268327e-08, "log_odds_chosen": 0.0836641862988472, "log_odds_ratio": -0.6804438233375549, "logits/chosen": -1.2517393827438354, "logits/rejected": -1.226942539215088, "logps/chosen": -3.62654972076416, "logps/rejected": -3.6911375522613525, "loss": 7.2619, "nll_loss": 1.747441291809082, "rewards/accuracies": 0.75, "rewards/chosen": -0.36265498399734497, "rewards/margins": 0.0064587704837322235, "rewards/rejected": -0.3691137433052063, "step": 2 }, { "epoch": 0.002074688796680498, "grad_norm": 2.5664076805114746, "learning_rate": 1.037344398340249e-07, "log_odds_chosen": -0.4574589431285858, "log_odds_ratio": -0.9838818311691284, "logits/chosen": -1.0710841417312622, "logits/rejected": -1.0666425228118896, "logps/chosen": -2.612703800201416, "logps/rejected": -2.2216196060180664, "loss": 7.7538, "nll_loss": 1.840064525604248, "rewards/accuracies": 0.25, "rewards/chosen": -0.2612703740596771, "rewards/margins": -0.039108406752347946, "rewards/rejected": -0.22216194868087769, "step": 3 }, { "epoch": 0.0027662517289073307, "grad_norm": 3.470735788345337, "learning_rate": 1.3831258644536654e-07, "log_odds_chosen": -0.5373206734657288, "log_odds_ratio": -1.1326806545257568, "logits/chosen": -1.2449288368225098, "logits/rejected": -1.2681998014450073, "logps/chosen": -3.79819393157959, "logps/rejected": -3.27523136138916, "loss": 9.8075, "nll_loss": 2.3386147022247314, "rewards/accuracies": 0.25, "rewards/chosen": -0.3798193633556366, "rewards/margins": -0.052296221256256104, "rewards/rejected": -0.3275231420993805, "step": 4 }, { "epoch": 0.003457814661134163, "grad_norm": 2.069977283477783, "learning_rate": 1.7289073305670816e-07, "log_odds_chosen": 0.06638160347938538, "log_odds_ratio": -0.7242591977119446, "logits/chosen": -0.8228747844696045, "logits/rejected": -0.8221093416213989, "logps/chosen": -2.0583748817443848, "logps/rejected": -2.0935425758361816, "loss": 7.1762, "nll_loss": 1.7216312885284424, "rewards/accuracies": 0.625, "rewards/chosen": -0.20583748817443848, "rewards/margins": 0.003516765311360359, "rewards/rejected": -0.2093542516231537, "step": 5 }, { "epoch": 0.004149377593360996, "grad_norm": 3.3388400077819824, "learning_rate": 2.074688796680498e-07, "log_odds_chosen": -0.002388477325439453, "log_odds_ratio": -0.961333692073822, "logits/chosen": -1.0542824268341064, "logits/rejected": -1.0340723991394043, "logps/chosen": -3.8489990234375, "logps/rejected": -3.853835344314575, "loss": 10.4565, "nll_loss": 2.5179810523986816, "rewards/accuracies": 0.625, "rewards/chosen": -0.38489991426467896, "rewards/margins": 0.0004836246371269226, "rewards/rejected": -0.3853835463523865, "step": 6 }, { "epoch": 0.004840940525587829, "grad_norm": 2.113387107849121, "learning_rate": 2.420470262793914e-07, "log_odds_chosen": -0.07964649051427841, "log_odds_ratio": -0.7827624678611755, "logits/chosen": -1.0113275051116943, "logits/rejected": -1.021159052848816, "logps/chosen": -3.271230459213257, "logps/rejected": -3.1815080642700195, "loss": 7.4722, "nll_loss": 1.7897660732269287, "rewards/accuracies": 0.5, "rewards/chosen": -0.32712307572364807, "rewards/margins": -0.008972249925136566, "rewards/rejected": -0.3181508183479309, "step": 7 }, { "epoch": 0.005532503457814661, "grad_norm": 2.046140670776367, "learning_rate": 2.766251728907331e-07, "log_odds_chosen": 0.09409669041633606, "log_odds_ratio": -0.7343361377716064, "logits/chosen": -1.4247064590454102, "logits/rejected": -1.4529236555099487, "logps/chosen": -2.1046290397644043, "logps/rejected": -2.0878255367279053, "loss": 8.2393, "nll_loss": 1.9863979816436768, "rewards/accuracies": 0.625, "rewards/chosen": -0.21046291291713715, "rewards/margins": -0.0016803601756691933, "rewards/rejected": -0.20878255367279053, "step": 8 }, { "epoch": 0.006224066390041493, "grad_norm": 4.004306793212891, "learning_rate": 3.112033195020747e-07, "log_odds_chosen": -0.39638814330101013, "log_odds_ratio": -1.0122816562652588, "logits/chosen": -1.4044495820999146, "logits/rejected": -1.4218255281448364, "logps/chosen": -4.350630760192871, "logps/rejected": -3.954375982284546, "loss": 10.5476, "nll_loss": 2.5356690883636475, "rewards/accuracies": 0.5, "rewards/chosen": -0.43506306409835815, "rewards/margins": -0.03962545841932297, "rewards/rejected": -0.395437628030777, "step": 9 }, { "epoch": 0.006915629322268326, "grad_norm": 4.281415939331055, "learning_rate": 3.457814661134163e-07, "log_odds_chosen": 0.08228078484535217, "log_odds_ratio": -0.9024434089660645, "logits/chosen": -0.951551079750061, "logits/rejected": -0.9782785177230835, "logps/chosen": -3.5066018104553223, "logps/rejected": -3.498887777328491, "loss": 9.1385, "nll_loss": 2.1943864822387695, "rewards/accuracies": 0.5, "rewards/chosen": -0.3506602346897125, "rewards/margins": -0.0007714331150054932, "rewards/rejected": -0.34988880157470703, "step": 10 }, { "epoch": 0.007607192254495159, "grad_norm": 3.4988086223602295, "learning_rate": 3.8035961272475794e-07, "log_odds_chosen": -0.7721527218818665, "log_odds_ratio": -1.4881058931350708, "logits/chosen": -1.3219575881958008, "logits/rejected": -1.3483083248138428, "logps/chosen": -3.6868557929992676, "logps/rejected": -2.9282896518707275, "loss": 9.4678, "nll_loss": 2.218129873275757, "rewards/accuracies": 0.25, "rewards/chosen": -0.3686855435371399, "rewards/margins": -0.07585658878087997, "rewards/rejected": -0.2928289771080017, "step": 11 }, { "epoch": 0.008298755186721992, "grad_norm": 3.7678544521331787, "learning_rate": 4.149377593360996e-07, "log_odds_chosen": -0.027068674564361572, "log_odds_ratio": -0.837154746055603, "logits/chosen": -1.2728548049926758, "logits/rejected": -1.2553644180297852, "logps/chosen": -3.4807701110839844, "logps/rejected": -3.46420955657959, "loss": 9.862, "nll_loss": 2.381793260574341, "rewards/accuracies": 0.5, "rewards/chosen": -0.34807702898979187, "rewards/margins": -0.00165606290102005, "rewards/rejected": -0.34642094373703003, "step": 12 }, { "epoch": 0.008990318118948824, "grad_norm": 4.905233860015869, "learning_rate": 4.495159059474412e-07, "log_odds_chosen": -0.32843559980392456, "log_odds_ratio": -1.060568928718567, "logits/chosen": -1.2958381175994873, "logits/rejected": -1.3102128505706787, "logps/chosen": -3.4880740642547607, "logps/rejected": -3.1222989559173584, "loss": 9.009, "nll_loss": 2.146188259124756, "rewards/accuracies": 0.5, "rewards/chosen": -0.3488073945045471, "rewards/margins": -0.0365775004029274, "rewards/rejected": -0.3122299015522003, "step": 13 }, { "epoch": 0.009681881051175657, "grad_norm": 2.7104506492614746, "learning_rate": 4.840940525587828e-07, "log_odds_chosen": -0.008607611060142517, "log_odds_ratio": -0.7274478673934937, "logits/chosen": -1.0369820594787598, "logits/rejected": -1.02875816822052, "logps/chosen": -2.9381771087646484, "logps/rejected": -2.95135498046875, "loss": 8.9553, "nll_loss": 2.166069507598877, "rewards/accuracies": 0.625, "rewards/chosen": -0.2938177287578583, "rewards/margins": 0.001317792572081089, "rewards/rejected": -0.2951355278491974, "step": 14 }, { "epoch": 0.01037344398340249, "grad_norm": 2.7053277492523193, "learning_rate": 5.186721991701245e-07, "log_odds_chosen": -0.06366106122732162, "log_odds_ratio": -0.7992855310440063, "logits/chosen": -1.388371229171753, "logits/rejected": -1.3940556049346924, "logps/chosen": -2.585225820541382, "logps/rejected": -2.4857850074768066, "loss": 7.9704, "nll_loss": 1.9126801490783691, "rewards/accuracies": 0.625, "rewards/chosen": -0.2585225999355316, "rewards/margins": -0.009944092482328415, "rewards/rejected": -0.2485785037279129, "step": 15 }, { "epoch": 0.011065006915629323, "grad_norm": 1.4869391918182373, "learning_rate": 5.532503457814662e-07, "log_odds_chosen": 0.05625841021537781, "log_odds_ratio": -0.6945948600769043, "logits/chosen": -1.3012064695358276, "logits/rejected": -1.3060497045516968, "logps/chosen": -2.5380234718322754, "logps/rejected": -2.5695972442626953, "loss": 6.1568, "nll_loss": 1.46973717212677, "rewards/accuracies": 0.625, "rewards/chosen": -0.2538023591041565, "rewards/margins": 0.003157366067171097, "rewards/rejected": -0.2569597363471985, "step": 16 }, { "epoch": 0.011756569847856155, "grad_norm": 4.137122631072998, "learning_rate": 5.878284923928077e-07, "log_odds_chosen": -0.10792502760887146, "log_odds_ratio": -0.7970627546310425, "logits/chosen": -1.3739025592803955, "logits/rejected": -1.3377840518951416, "logps/chosen": -3.974924325942993, "logps/rejected": -3.8697359561920166, "loss": 10.8296, "nll_loss": 2.627690315246582, "rewards/accuracies": 0.375, "rewards/chosen": -0.3974924385547638, "rewards/margins": -0.010518837720155716, "rewards/rejected": -0.38697361946105957, "step": 17 }, { "epoch": 0.012448132780082987, "grad_norm": 2.9468846321105957, "learning_rate": 6.224066390041494e-07, "log_odds_chosen": 0.20884117484092712, "log_odds_ratio": -0.723279595375061, "logits/chosen": -1.262000560760498, "logits/rejected": -1.3064842224121094, "logps/chosen": -3.247279644012451, "logps/rejected": -3.4324727058410645, "loss": 8.8942, "nll_loss": 2.151226758956909, "rewards/accuracies": 0.625, "rewards/chosen": -0.32472798228263855, "rewards/margins": 0.018519282341003418, "rewards/rejected": -0.3432472348213196, "step": 18 }, { "epoch": 0.01313969571230982, "grad_norm": 3.104160785675049, "learning_rate": 6.569847856154911e-07, "log_odds_chosen": -0.3776034414768219, "log_odds_ratio": -1.2113221883773804, "logits/chosen": -1.3295966386795044, "logits/rejected": -1.3142446279525757, "logps/chosen": -3.445866584777832, "logps/rejected": -3.0813324451446533, "loss": 8.868, "nll_loss": 2.0958707332611084, "rewards/accuracies": 0.75, "rewards/chosen": -0.34458667039871216, "rewards/margins": -0.03645342215895653, "rewards/rejected": -0.30813324451446533, "step": 19 }, { "epoch": 0.013831258644536652, "grad_norm": 2.2052125930786133, "learning_rate": 6.915629322268326e-07, "log_odds_chosen": 0.24842804670333862, "log_odds_ratio": -0.7053789496421814, "logits/chosen": -1.1727490425109863, "logits/rejected": -1.2424461841583252, "logps/chosen": -2.45582914352417, "logps/rejected": -2.6469976902008057, "loss": 8.6392, "nll_loss": 2.0892579555511475, "rewards/accuracies": 0.5, "rewards/chosen": -0.2455829232931137, "rewards/margins": 0.019116871058940887, "rewards/rejected": -0.264699786901474, "step": 20 }, { "epoch": 0.014522821576763486, "grad_norm": 2.8067734241485596, "learning_rate": 7.261410788381743e-07, "log_odds_chosen": 0.139600932598114, "log_odds_ratio": -0.6512514352798462, "logits/chosen": -1.1420315504074097, "logits/rejected": -1.1386845111846924, "logps/chosen": -3.4955501556396484, "logps/rejected": -3.6302804946899414, "loss": 9.1101, "nll_loss": 2.2123937606811523, "rewards/accuracies": 0.625, "rewards/chosen": -0.3495550751686096, "rewards/margins": 0.013473005034029484, "rewards/rejected": -0.36302804946899414, "step": 21 }, { "epoch": 0.015214384508990318, "grad_norm": 3.0798892974853516, "learning_rate": 7.607192254495159e-07, "log_odds_chosen": 0.3492021858692169, "log_odds_ratio": -0.6532081961631775, "logits/chosen": -1.3779373168945312, "logits/rejected": -1.3605176210403442, "logps/chosen": -3.1275863647460938, "logps/rejected": -3.465847969055176, "loss": 8.2735, "nll_loss": 2.0030555725097656, "rewards/accuracies": 0.75, "rewards/chosen": -0.3127586543560028, "rewards/margins": 0.03382612764835358, "rewards/rejected": -0.3465847671031952, "step": 22 }, { "epoch": 0.01590594744121715, "grad_norm": 3.224397659301758, "learning_rate": 7.952973720608575e-07, "log_odds_chosen": 0.9248179197311401, "log_odds_ratio": -0.5056746006011963, "logits/chosen": -0.9516175389289856, "logits/rejected": -1.020263671875, "logps/chosen": -2.944352149963379, "logps/rejected": -3.7298731803894043, "loss": 9.1044, "nll_loss": 2.2255210876464844, "rewards/accuracies": 0.75, "rewards/chosen": -0.2944352328777313, "rewards/margins": 0.07855206727981567, "rewards/rejected": -0.3729872703552246, "step": 23 }, { "epoch": 0.016597510373443983, "grad_norm": 2.069481611251831, "learning_rate": 8.298755186721992e-07, "log_odds_chosen": -0.09091458469629288, "log_odds_ratio": -0.7582843899726868, "logits/chosen": -0.9845443964004517, "logits/rejected": -1.0020672082901, "logps/chosen": -2.7381744384765625, "logps/rejected": -2.6412293910980225, "loss": 7.4084, "nll_loss": 1.7762739658355713, "rewards/accuracies": 0.5, "rewards/chosen": -0.27381742000579834, "rewards/margins": -0.009694469161331654, "rewards/rejected": -0.26412296295166016, "step": 24 }, { "epoch": 0.017289073305670817, "grad_norm": 2.335552930831909, "learning_rate": 8.644536652835409e-07, "log_odds_chosen": -0.03683727979660034, "log_odds_ratio": -0.7664669752120972, "logits/chosen": -0.9482793807983398, "logits/rejected": -0.9446390867233276, "logps/chosen": -2.417475461959839, "logps/rejected": -2.370396614074707, "loss": 7.387, "nll_loss": 1.7701003551483154, "rewards/accuracies": 0.625, "rewards/chosen": -0.24174755811691284, "rewards/margins": -0.00470789335668087, "rewards/rejected": -0.23703965544700623, "step": 25 }, { "epoch": 0.017980636237897647, "grad_norm": 4.149723529815674, "learning_rate": 8.990318118948824e-07, "log_odds_chosen": 0.2577996253967285, "log_odds_ratio": -0.634295642375946, "logits/chosen": -1.2715983390808105, "logits/rejected": -1.2856321334838867, "logps/chosen": -3.455782413482666, "logps/rejected": -3.7151052951812744, "loss": 9.5517, "nll_loss": 2.3244986534118652, "rewards/accuracies": 0.5, "rewards/chosen": -0.34557828307151794, "rewards/margins": 0.025932257995009422, "rewards/rejected": -0.3715105354785919, "step": 26 }, { "epoch": 0.01867219917012448, "grad_norm": 3.2110884189605713, "learning_rate": 9.336099585062241e-07, "log_odds_chosen": -0.0950632095336914, "log_odds_ratio": -0.8977338075637817, "logits/chosen": -0.823533296585083, "logits/rejected": -0.8555487990379333, "logps/chosen": -3.454474449157715, "logps/rejected": -3.363515853881836, "loss": 8.3972, "nll_loss": 2.0095200538635254, "rewards/accuracies": 0.375, "rewards/chosen": -0.34544745087623596, "rewards/margins": -0.009095855057239532, "rewards/rejected": -0.33635157346725464, "step": 27 }, { "epoch": 0.019363762102351315, "grad_norm": 2.4229812622070312, "learning_rate": 9.681881051175657e-07, "log_odds_chosen": 0.26231566071510315, "log_odds_ratio": -0.5937422513961792, "logits/chosen": -1.199454426765442, "logits/rejected": -1.2336453199386597, "logps/chosen": -2.7520751953125, "logps/rejected": -3.018535614013672, "loss": 6.768, "nll_loss": 1.6326138973236084, "rewards/accuracies": 0.625, "rewards/chosen": -0.27520751953125, "rewards/margins": 0.026646055281162262, "rewards/rejected": -0.30185356736183167, "step": 28 }, { "epoch": 0.020055325034578148, "grad_norm": 2.8843135833740234, "learning_rate": 1.0027662517289075e-06, "log_odds_chosen": 0.5056191682815552, "log_odds_ratio": -0.5950756669044495, "logits/chosen": -1.338259220123291, "logits/rejected": -1.3438336849212646, "logps/chosen": -2.64237642288208, "logps/rejected": -3.1244683265686035, "loss": 8.0822, "nll_loss": 1.9610333442687988, "rewards/accuracies": 0.625, "rewards/chosen": -0.264237642288208, "rewards/margins": 0.048209186643362045, "rewards/rejected": -0.31244683265686035, "step": 29 }, { "epoch": 0.02074688796680498, "grad_norm": 4.502927303314209, "learning_rate": 1.037344398340249e-06, "log_odds_chosen": -0.34477755427360535, "log_odds_ratio": -1.1222412586212158, "logits/chosen": -0.8900103569030762, "logits/rejected": -0.8709148168563843, "logps/chosen": -3.582998275756836, "logps/rejected": -3.256059169769287, "loss": 7.2353, "nll_loss": 1.696608543395996, "rewards/accuracies": 0.625, "rewards/chosen": -0.3582998216152191, "rewards/margins": -0.032693929970264435, "rewards/rejected": -0.3256058990955353, "step": 30 }, { "epoch": 0.021438450899031812, "grad_norm": 2.193732976913452, "learning_rate": 1.0719225449515906e-06, "log_odds_chosen": -0.05980326980352402, "log_odds_ratio": -0.740838348865509, "logits/chosen": -1.2712316513061523, "logits/rejected": -1.2823163270950317, "logps/chosen": -2.9276375770568848, "logps/rejected": -2.8619937896728516, "loss": 7.2937, "nll_loss": 1.749345302581787, "rewards/accuracies": 0.375, "rewards/chosen": -0.29276376962661743, "rewards/margins": -0.006564359180629253, "rewards/rejected": -0.2861993908882141, "step": 31 }, { "epoch": 0.022130013831258646, "grad_norm": 3.5711543560028076, "learning_rate": 1.1065006915629324e-06, "log_odds_chosen": 0.08109360188245773, "log_odds_ratio": -0.9470595121383667, "logits/chosen": -1.3242299556732178, "logits/rejected": -1.3326642513275146, "logps/chosen": -3.59979510307312, "logps/rejected": -3.677622079849243, "loss": 9.7337, "nll_loss": 2.338721752166748, "rewards/accuracies": 0.375, "rewards/chosen": -0.359979510307312, "rewards/margins": 0.007782714441418648, "rewards/rejected": -0.3677622079849243, "step": 32 }, { "epoch": 0.022821576763485476, "grad_norm": 2.2873423099517822, "learning_rate": 1.141078838174274e-06, "log_odds_chosen": 0.6533421874046326, "log_odds_ratio": -0.4733458459377289, "logits/chosen": -1.2940324544906616, "logits/rejected": -1.2814232110977173, "logps/chosen": -3.2225732803344727, "logps/rejected": -3.8556478023529053, "loss": 7.6812, "nll_loss": 1.8729662895202637, "rewards/accuracies": 0.875, "rewards/chosen": -0.3222573399543762, "rewards/margins": 0.06330744922161102, "rewards/rejected": -0.38556480407714844, "step": 33 }, { "epoch": 0.02351313969571231, "grad_norm": 3.838637590408325, "learning_rate": 1.1756569847856155e-06, "log_odds_chosen": -0.2496761828660965, "log_odds_ratio": -1.0469396114349365, "logits/chosen": -1.2598438262939453, "logits/rejected": -1.233662486076355, "logps/chosen": -3.9419360160827637, "logps/rejected": -3.6916069984436035, "loss": 8.944, "nll_loss": 2.1313016414642334, "rewards/accuracies": 0.5, "rewards/chosen": -0.3941935896873474, "rewards/margins": -0.02503291144967079, "rewards/rejected": -0.3691607117652893, "step": 34 }, { "epoch": 0.024204702627939143, "grad_norm": 2.1356067657470703, "learning_rate": 1.2102351313969573e-06, "log_odds_chosen": 0.15863251686096191, "log_odds_ratio": -0.6830345392227173, "logits/chosen": -0.9747909307479858, "logits/rejected": -0.9530578851699829, "logps/chosen": -3.0132992267608643, "logps/rejected": -3.1510977745056152, "loss": 6.7208, "nll_loss": 1.6118903160095215, "rewards/accuracies": 0.75, "rewards/chosen": -0.30132991075515747, "rewards/margins": 0.013779876753687859, "rewards/rejected": -0.3151097893714905, "step": 35 }, { "epoch": 0.024896265560165973, "grad_norm": 3.4909298419952393, "learning_rate": 1.2448132780082988e-06, "log_odds_chosen": -0.2739093005657196, "log_odds_ratio": -1.0388438701629639, "logits/chosen": -1.1628073453903198, "logits/rejected": -1.1294801235198975, "logps/chosen": -3.61557936668396, "logps/rejected": -3.371711492538452, "loss": 9.8512, "nll_loss": 2.3589138984680176, "rewards/accuracies": 0.125, "rewards/chosen": -0.3615579605102539, "rewards/margins": -0.024386780336499214, "rewards/rejected": -0.33717119693756104, "step": 36 }, { "epoch": 0.025587828492392807, "grad_norm": 3.2615578174591064, "learning_rate": 1.2793914246196404e-06, "log_odds_chosen": 0.03353884071111679, "log_odds_ratio": -0.7446186542510986, "logits/chosen": -1.1005709171295166, "logits/rejected": -1.0909405946731567, "logps/chosen": -2.8883144855499268, "logps/rejected": -2.917815923690796, "loss": 9.5219, "nll_loss": 2.3060121536254883, "rewards/accuracies": 0.625, "rewards/chosen": -0.2888314723968506, "rewards/margins": 0.0029501384124159813, "rewards/rejected": -0.29178160429000854, "step": 37 }, { "epoch": 0.02627939142461964, "grad_norm": 2.5613675117492676, "learning_rate": 1.3139695712309822e-06, "log_odds_chosen": -0.23992179334163666, "log_odds_ratio": -0.9381309747695923, "logits/chosen": -1.258825421333313, "logits/rejected": -1.2358593940734863, "logps/chosen": -3.4032936096191406, "logps/rejected": -3.213279962539673, "loss": 7.4326, "nll_loss": 1.7643382549285889, "rewards/accuracies": 0.5, "rewards/chosen": -0.3403293788433075, "rewards/margins": -0.019001349806785583, "rewards/rejected": -0.3213280439376831, "step": 38 }, { "epoch": 0.026970954356846474, "grad_norm": 2.939889669418335, "learning_rate": 1.3485477178423237e-06, "log_odds_chosen": -1.1127384901046753, "log_odds_ratio": -1.6729886531829834, "logits/chosen": -1.371311068534851, "logits/rejected": -1.34170401096344, "logps/chosen": -3.899885416030884, "logps/rejected": -2.8833463191986084, "loss": 8.3659, "nll_loss": 1.9241644144058228, "rewards/accuracies": 0.5, "rewards/chosen": -0.38998857140541077, "rewards/margins": -0.10165394842624664, "rewards/rejected": -0.2883346378803253, "step": 39 }, { "epoch": 0.027662517289073305, "grad_norm": 2.1610751152038574, "learning_rate": 1.3831258644536653e-06, "log_odds_chosen": 0.07632875442504883, "log_odds_ratio": -0.7144181728363037, "logits/chosen": -1.1104373931884766, "logits/rejected": -1.1167352199554443, "logps/chosen": -2.4445266723632812, "logps/rejected": -2.515613555908203, "loss": 7.8256, "nll_loss": 1.884954571723938, "rewards/accuracies": 0.5, "rewards/chosen": -0.24445268511772156, "rewards/margins": 0.0071086823008954525, "rewards/rejected": -0.25156137347221375, "step": 40 }, { "epoch": 0.028354080221300138, "grad_norm": 3.2550132274627686, "learning_rate": 1.417704011065007e-06, "log_odds_chosen": -0.283014178276062, "log_odds_ratio": -0.8731192350387573, "logits/chosen": -1.4890809059143066, "logits/rejected": -1.45075523853302, "logps/chosen": -3.078587770462036, "logps/rejected": -2.8364899158477783, "loss": 8.9882, "nll_loss": 2.159730911254883, "rewards/accuracies": 0.375, "rewards/chosen": -0.30785876512527466, "rewards/margins": -0.02420978806912899, "rewards/rejected": -0.2836489975452423, "step": 41 }, { "epoch": 0.029045643153526972, "grad_norm": 2.4653944969177246, "learning_rate": 1.4522821576763486e-06, "log_odds_chosen": 0.2099519520998001, "log_odds_ratio": -0.6373003125190735, "logits/chosen": -1.3268687725067139, "logits/rejected": -1.3488104343414307, "logps/chosen": -2.963639974594116, "logps/rejected": -3.1906325817108154, "loss": 8.3625, "nll_loss": 2.0268898010253906, "rewards/accuracies": 0.5, "rewards/chosen": -0.2963640093803406, "rewards/margins": 0.022699259221553802, "rewards/rejected": -0.319063276052475, "step": 42 }, { "epoch": 0.029737206085753802, "grad_norm": 3.8058722019195557, "learning_rate": 1.4868603042876902e-06, "log_odds_chosen": -0.4854162335395813, "log_odds_ratio": -1.0482676029205322, "logits/chosen": -1.4056271314620972, "logits/rejected": -1.3884409666061401, "logps/chosen": -3.5880017280578613, "logps/rejected": -3.1194963455200195, "loss": 7.3837, "nll_loss": 1.7411028146743774, "rewards/accuracies": 0.25, "rewards/chosen": -0.35880017280578613, "rewards/margins": -0.04685051366686821, "rewards/rejected": -0.3119496703147888, "step": 43 }, { "epoch": 0.030428769017980636, "grad_norm": 3.0560855865478516, "learning_rate": 1.5214384508990318e-06, "log_odds_chosen": -0.056353405117988586, "log_odds_ratio": -0.8186872601509094, "logits/chosen": -1.2890833616256714, "logits/rejected": -1.2734991312026978, "logps/chosen": -4.082864761352539, "logps/rejected": -4.05389928817749, "loss": 9.6216, "nll_loss": 2.323543071746826, "rewards/accuracies": 0.5, "rewards/chosen": -0.408286452293396, "rewards/margins": -0.0028965286910533905, "rewards/rejected": -0.4053899347782135, "step": 44 }, { "epoch": 0.03112033195020747, "grad_norm": 3.232548475265503, "learning_rate": 1.5560165975103735e-06, "log_odds_chosen": 0.009376328438520432, "log_odds_ratio": -0.7260125875473022, "logits/chosen": -1.0361629724502563, "logits/rejected": -1.0547370910644531, "logps/chosen": -2.6392018795013428, "logps/rejected": -2.6416492462158203, "loss": 9.7193, "nll_loss": 2.3572349548339844, "rewards/accuracies": 0.625, "rewards/chosen": -0.2639201879501343, "rewards/margins": 0.00024472130462527275, "rewards/rejected": -0.26416492462158203, "step": 45 }, { "epoch": 0.0318118948824343, "grad_norm": 1.8216197490692139, "learning_rate": 1.590594744121715e-06, "log_odds_chosen": 0.4674333333969116, "log_odds_ratio": -0.5335142612457275, "logits/chosen": -1.0167092084884644, "logits/rejected": -1.0238672494888306, "logps/chosen": -2.955474376678467, "logps/rejected": -3.4067180156707764, "loss": 6.3723, "nll_loss": 1.5397170782089233, "rewards/accuracies": 0.75, "rewards/chosen": -0.2955474257469177, "rewards/margins": 0.0451243557035923, "rewards/rejected": -0.3406718075275421, "step": 46 }, { "epoch": 0.03250345781466114, "grad_norm": 3.082559823989868, "learning_rate": 1.6251728907330569e-06, "log_odds_chosen": -0.2621142566204071, "log_odds_ratio": -0.874950110912323, "logits/chosen": -1.1282384395599365, "logits/rejected": -1.116199254989624, "logps/chosen": -3.469964027404785, "logps/rejected": -3.223210334777832, "loss": 8.8783, "nll_loss": 2.132092237472534, "rewards/accuracies": 0.5, "rewards/chosen": -0.3469964265823364, "rewards/margins": -0.02467537298798561, "rewards/rejected": -0.3223210573196411, "step": 47 }, { "epoch": 0.03319502074688797, "grad_norm": 2.0938286781311035, "learning_rate": 1.6597510373443984e-06, "log_odds_chosen": 0.11893421411514282, "log_odds_ratio": -0.8492321968078613, "logits/chosen": -0.9938136339187622, "logits/rejected": -0.9602646827697754, "logps/chosen": -2.0542774200439453, "logps/rejected": -2.28214955329895, "loss": 6.8048, "nll_loss": 1.6162786483764648, "rewards/accuracies": 0.375, "rewards/chosen": -0.20542772114276886, "rewards/margins": 0.022787224501371384, "rewards/rejected": -0.22821494936943054, "step": 48 }, { "epoch": 0.0338865836791148, "grad_norm": 2.0529825687408447, "learning_rate": 1.69432918395574e-06, "log_odds_chosen": 0.47180014848709106, "log_odds_ratio": -0.5816795825958252, "logits/chosen": -1.263287901878357, "logits/rejected": -1.2728713750839233, "logps/chosen": -3.0481815338134766, "logps/rejected": -3.511841058731079, "loss": 6.6536, "nll_loss": 1.6052374839782715, "rewards/accuracies": 0.5, "rewards/chosen": -0.30481815338134766, "rewards/margins": 0.04636598750948906, "rewards/rejected": -0.3511841297149658, "step": 49 }, { "epoch": 0.034578146611341634, "grad_norm": 2.3218564987182617, "learning_rate": 1.7289073305670818e-06, "log_odds_chosen": -0.06447839736938477, "log_odds_ratio": -0.772865355014801, "logits/chosen": -1.3173282146453857, "logits/rejected": -1.3115019798278809, "logps/chosen": -2.42673659324646, "logps/rejected": -2.3573877811431885, "loss": 6.855, "nll_loss": 1.6364696025848389, "rewards/accuracies": 0.625, "rewards/chosen": -0.24267366528511047, "rewards/margins": -0.006934887729585171, "rewards/rejected": -0.23573878407478333, "step": 50 }, { "epoch": 0.035269709543568464, "grad_norm": 4.621151924133301, "learning_rate": 1.7634854771784233e-06, "log_odds_chosen": -0.05129563808441162, "log_odds_ratio": -0.8750765323638916, "logits/chosen": -1.0982708930969238, "logits/rejected": -1.1156631708145142, "logps/chosen": -2.895352363586426, "logps/rejected": -2.7839388847351074, "loss": 10.8387, "nll_loss": 2.6221795082092285, "rewards/accuracies": 0.625, "rewards/chosen": -0.2895352244377136, "rewards/margins": -0.011141344904899597, "rewards/rejected": -0.2783938944339752, "step": 51 }, { "epoch": 0.035961272475795295, "grad_norm": 1.4168874025344849, "learning_rate": 1.798063623789765e-06, "log_odds_chosen": -0.29925307631492615, "log_odds_ratio": -0.8848166465759277, "logits/chosen": -1.0895323753356934, "logits/rejected": -1.0248337984085083, "logps/chosen": -2.1519713401794434, "logps/rejected": -1.9345920085906982, "loss": 5.7303, "nll_loss": 1.3440864086151123, "rewards/accuracies": 0.25, "rewards/chosen": -0.2151971459388733, "rewards/margins": -0.021737944334745407, "rewards/rejected": -0.19345919787883759, "step": 52 }, { "epoch": 0.03665283540802213, "grad_norm": 2.1725051403045654, "learning_rate": 1.8326417704011067e-06, "log_odds_chosen": 0.03270392119884491, "log_odds_ratio": -0.7351056337356567, "logits/chosen": -1.3653390407562256, "logits/rejected": -1.3562443256378174, "logps/chosen": -3.246081829071045, "logps/rejected": -3.289919137954712, "loss": 6.9984, "nll_loss": 1.676098346710205, "rewards/accuracies": 0.5, "rewards/chosen": -0.32460817694664, "rewards/margins": 0.004383735358715057, "rewards/rejected": -0.32899191975593567, "step": 53 }, { "epoch": 0.03734439834024896, "grad_norm": 2.737290859222412, "learning_rate": 1.8672199170124482e-06, "log_odds_chosen": 0.3347844183444977, "log_odds_ratio": -0.6068406105041504, "logits/chosen": -1.3338276147842407, "logits/rejected": -1.3446011543273926, "logps/chosen": -3.3161258697509766, "logps/rejected": -3.630776882171631, "loss": 8.3089, "nll_loss": 2.016543388366699, "rewards/accuracies": 0.75, "rewards/chosen": -0.33161258697509766, "rewards/margins": 0.03146512061357498, "rewards/rejected": -0.36307770013809204, "step": 54 }, { "epoch": 0.03803596127247579, "grad_norm": 2.663578510284424, "learning_rate": 1.9017980636237896e-06, "log_odds_chosen": 0.4287371337413788, "log_odds_ratio": -0.5677598714828491, "logits/chosen": -1.2210997343063354, "logits/rejected": -1.272507667541504, "logps/chosen": -2.382035732269287, "logps/rejected": -2.683682918548584, "loss": 7.7922, "nll_loss": 1.8912622928619385, "rewards/accuracies": 0.625, "rewards/chosen": -0.23820357024669647, "rewards/margins": 0.03016471117734909, "rewards/rejected": -0.26836830377578735, "step": 55 }, { "epoch": 0.03872752420470263, "grad_norm": 1.5175490379333496, "learning_rate": 1.9363762102351314e-06, "log_odds_chosen": -0.08139551430940628, "log_odds_ratio": -0.7952121496200562, "logits/chosen": -0.8203100562095642, "logits/rejected": -0.8402999043464661, "logps/chosen": -2.7972769737243652, "logps/rejected": -2.7121500968933105, "loss": 6.0176, "nll_loss": 1.4248862266540527, "rewards/accuracies": 0.5, "rewards/chosen": -0.2797277271747589, "rewards/margins": -0.008512692525982857, "rewards/rejected": -0.27121502161026, "step": 56 }, { "epoch": 0.03941908713692946, "grad_norm": 2.924398899078369, "learning_rate": 1.970954356846473e-06, "log_odds_chosen": -0.7466222643852234, "log_odds_ratio": -1.2244271039962769, "logits/chosen": -1.4082560539245605, "logits/rejected": -1.334429383277893, "logps/chosen": -3.302445888519287, "logps/rejected": -2.6759796142578125, "loss": 8.1842, "nll_loss": 1.9236195087432861, "rewards/accuracies": 0.125, "rewards/chosen": -0.3302445709705353, "rewards/margins": -0.06264658272266388, "rewards/rejected": -0.2675980031490326, "step": 57 }, { "epoch": 0.040110650069156296, "grad_norm": 2.6948745250701904, "learning_rate": 2.005532503457815e-06, "log_odds_chosen": -0.20200856029987335, "log_odds_ratio": -0.8478542566299438, "logits/chosen": -0.728827714920044, "logits/rejected": -0.7348635196685791, "logps/chosen": -2.619049072265625, "logps/rejected": -2.4454312324523926, "loss": 7.6473, "nll_loss": 1.8270339965820312, "rewards/accuracies": 0.5, "rewards/chosen": -0.26190489530563354, "rewards/margins": -0.01736176386475563, "rewards/rejected": -0.2445431351661682, "step": 58 }, { "epoch": 0.04080221300138313, "grad_norm": 2.7902441024780273, "learning_rate": 2.0401106500691565e-06, "log_odds_chosen": -0.33980101346969604, "log_odds_ratio": -1.0024666786193848, "logits/chosen": -0.9672735333442688, "logits/rejected": -0.9773224592208862, "logps/chosen": -2.3472862243652344, "logps/rejected": -2.0535407066345215, "loss": 8.2159, "nll_loss": 1.9537204504013062, "rewards/accuracies": 0.5, "rewards/chosen": -0.2347286343574524, "rewards/margins": -0.02937454544007778, "rewards/rejected": -0.20535407960414886, "step": 59 }, { "epoch": 0.04149377593360996, "grad_norm": 2.9321954250335693, "learning_rate": 2.074688796680498e-06, "log_odds_chosen": 0.14047515392303467, "log_odds_ratio": -0.7460314035415649, "logits/chosen": -1.103363037109375, "logits/rejected": -1.0946812629699707, "logps/chosen": -3.310117244720459, "logps/rejected": -3.5168280601501465, "loss": 6.8914, "nll_loss": 1.6482346057891846, "rewards/accuracies": 0.5, "rewards/chosen": -0.33101174235343933, "rewards/margins": 0.020671118050813675, "rewards/rejected": -0.3516828417778015, "step": 60 }, { "epoch": 0.042185338865836794, "grad_norm": 2.5498883724212646, "learning_rate": 2.1092669432918396e-06, "log_odds_chosen": -0.5384204983711243, "log_odds_ratio": -1.1760982275009155, "logits/chosen": -0.9892525672912598, "logits/rejected": -0.9684339761734009, "logps/chosen": -2.563851833343506, "logps/rejected": -2.093398332595825, "loss": 7.0613, "nll_loss": 1.6477240324020386, "rewards/accuracies": 0.125, "rewards/chosen": -0.2563851773738861, "rewards/margins": -0.04704533517360687, "rewards/rejected": -0.20933984220027924, "step": 61 }, { "epoch": 0.042876901798063624, "grad_norm": 4.019651889801025, "learning_rate": 2.143845089903181e-06, "log_odds_chosen": 0.28454190492630005, "log_odds_ratio": -0.6172089576721191, "logits/chosen": -0.7720848917961121, "logits/rejected": -0.7707473039627075, "logps/chosen": -2.709263801574707, "logps/rejected": -3.0105907917022705, "loss": 8.2458, "nll_loss": 1.9997224807739258, "rewards/accuracies": 0.625, "rewards/chosen": -0.2709263563156128, "rewards/margins": 0.030132703483104706, "rewards/rejected": -0.3010590672492981, "step": 62 }, { "epoch": 0.043568464730290454, "grad_norm": 3.3269991874694824, "learning_rate": 2.1784232365145227e-06, "log_odds_chosen": 0.11333783715963364, "log_odds_ratio": -0.676737368106842, "logits/chosen": -1.1614861488342285, "logits/rejected": -1.1610134840011597, "logps/chosen": -2.9012930393218994, "logps/rejected": -3.0032124519348145, "loss": 8.5807, "nll_loss": 2.0775046348571777, "rewards/accuracies": 0.625, "rewards/chosen": -0.29012930393218994, "rewards/margins": 0.010191947221755981, "rewards/rejected": -0.3003212511539459, "step": 63 }, { "epoch": 0.04426002766251729, "grad_norm": 2.0815582275390625, "learning_rate": 2.2130013831258647e-06, "log_odds_chosen": 0.12025430798530579, "log_odds_ratio": -0.7256815433502197, "logits/chosen": -1.159436821937561, "logits/rejected": -1.1309454441070557, "logps/chosen": -3.0659329891204834, "logps/rejected": -3.178705930709839, "loss": 7.1109, "nll_loss": 1.7051641941070557, "rewards/accuracies": 0.625, "rewards/chosen": -0.30659329891204834, "rewards/margins": 0.01127728819847107, "rewards/rejected": -0.3178706169128418, "step": 64 }, { "epoch": 0.04495159059474412, "grad_norm": 6.716753959655762, "learning_rate": 2.2475795297372063e-06, "log_odds_chosen": -0.30332645773887634, "log_odds_ratio": -1.194240927696228, "logits/chosen": -1.0181807279586792, "logits/rejected": -1.0282114744186401, "logps/chosen": -4.083698272705078, "logps/rejected": -3.759035110473633, "loss": 10.9353, "nll_loss": 2.61440372467041, "rewards/accuracies": 0.5, "rewards/chosen": -0.40836983919143677, "rewards/margins": -0.032466333359479904, "rewards/rejected": -0.37590354681015015, "step": 65 }, { "epoch": 0.04564315352697095, "grad_norm": 4.329280853271484, "learning_rate": 2.282157676348548e-06, "log_odds_chosen": -0.043851837515830994, "log_odds_ratio": -0.781743049621582, "logits/chosen": -1.4313268661499023, "logits/rejected": -1.453513741493225, "logps/chosen": -3.223816156387329, "logps/rejected": -3.164360761642456, "loss": 9.5621, "nll_loss": 2.312361717224121, "rewards/accuracies": 0.5, "rewards/chosen": -0.3223816156387329, "rewards/margins": -0.0059455279260873795, "rewards/rejected": -0.3164360821247101, "step": 66 }, { "epoch": 0.04633471645919779, "grad_norm": 1.5088504552841187, "learning_rate": 2.3167358229598894e-06, "log_odds_chosen": 0.08667584508657455, "log_odds_ratio": -0.6951309442520142, "logits/chosen": -1.1596637964248657, "logits/rejected": -1.134047031402588, "logps/chosen": -1.920853614807129, "logps/rejected": -1.9628230333328247, "loss": 5.1904, "nll_loss": 1.2280784845352173, "rewards/accuracies": 0.625, "rewards/chosen": -0.1920853555202484, "rewards/margins": 0.004196947440505028, "rewards/rejected": -0.196282297372818, "step": 67 }, { "epoch": 0.04702627939142462, "grad_norm": 3.1953086853027344, "learning_rate": 2.351313969571231e-06, "log_odds_chosen": -0.16054373979568481, "log_odds_ratio": -0.8236711025238037, "logits/chosen": -0.9192508459091187, "logits/rejected": -0.9316079616546631, "logps/chosen": -2.5424296855926514, "logps/rejected": -2.3846731185913086, "loss": 8.2374, "nll_loss": 1.976994514465332, "rewards/accuracies": 0.75, "rewards/chosen": -0.2542429566383362, "rewards/margins": -0.015775656327605247, "rewards/rejected": -0.23846730589866638, "step": 68 }, { "epoch": 0.04771784232365145, "grad_norm": 1.7714017629623413, "learning_rate": 2.3858921161825725e-06, "log_odds_chosen": 0.5153631567955017, "log_odds_ratio": -0.5464873313903809, "logits/chosen": -1.1873700618743896, "logits/rejected": -1.2427599430084229, "logps/chosen": -1.9302818775177002, "logps/rejected": -2.423107147216797, "loss": 5.5537, "nll_loss": 1.3337669372558594, "rewards/accuracies": 0.75, "rewards/chosen": -0.19302821159362793, "rewards/margins": 0.049282513558864594, "rewards/rejected": -0.24231071770191193, "step": 69 }, { "epoch": 0.048409405255878286, "grad_norm": 2.594485282897949, "learning_rate": 2.4204702627939145e-06, "log_odds_chosen": 0.2057522088289261, "log_odds_ratio": -0.6587037444114685, "logits/chosen": -1.0115017890930176, "logits/rejected": -0.994685709476471, "logps/chosen": -2.51837420463562, "logps/rejected": -2.7061476707458496, "loss": 7.4934, "nll_loss": 1.8074700832366943, "rewards/accuracies": 0.5, "rewards/chosen": -0.25183743238449097, "rewards/margins": 0.01877736859023571, "rewards/rejected": -0.2706148028373718, "step": 70 }, { "epoch": 0.04910096818810512, "grad_norm": 5.582400798797607, "learning_rate": 2.455048409405256e-06, "log_odds_chosen": -0.19556845724582672, "log_odds_ratio": -0.8797652721405029, "logits/chosen": -1.2655856609344482, "logits/rejected": -1.286374807357788, "logps/chosen": -3.659990072250366, "logps/rejected": -3.4588847160339355, "loss": 9.3168, "nll_loss": 2.2412219047546387, "rewards/accuracies": 0.25, "rewards/chosen": -0.3659989833831787, "rewards/margins": -0.020110521465539932, "rewards/rejected": -0.3458884656429291, "step": 71 }, { "epoch": 0.04979253112033195, "grad_norm": 5.756114959716797, "learning_rate": 2.4896265560165977e-06, "log_odds_chosen": 0.04178621619939804, "log_odds_ratio": -0.6979033946990967, "logits/chosen": -1.0562987327575684, "logits/rejected": -1.1279411315917969, "logps/chosen": -3.996288299560547, "logps/rejected": -4.0451226234436035, "loss": 11.881, "nll_loss": 2.9004478454589844, "rewards/accuracies": 0.625, "rewards/chosen": -0.3996288776397705, "rewards/margins": 0.0048833731561899185, "rewards/rejected": -0.4045122265815735, "step": 72 }, { "epoch": 0.050484094052558784, "grad_norm": 3.1050422191619873, "learning_rate": 2.5242047026279392e-06, "log_odds_chosen": 0.377094030380249, "log_odds_ratio": -0.6772429943084717, "logits/chosen": -1.2963155508041382, "logits/rejected": -1.33097505569458, "logps/chosen": -2.747310161590576, "logps/rejected": -3.087376356124878, "loss": 9.158, "nll_loss": 2.221768379211426, "rewards/accuracies": 0.625, "rewards/chosen": -0.27473101019859314, "rewards/margins": 0.03400661423802376, "rewards/rejected": -0.3087376356124878, "step": 73 }, { "epoch": 0.051175656984785614, "grad_norm": 2.4878885746002197, "learning_rate": 2.5587828492392808e-06, "log_odds_chosen": 0.13045404851436615, "log_odds_ratio": -0.6519996523857117, "logits/chosen": -1.1966079473495483, "logits/rejected": -1.1773067712783813, "logps/chosen": -2.2391517162323, "logps/rejected": -2.336344003677368, "loss": 7.4135, "nll_loss": 1.7881792783737183, "rewards/accuracies": 0.5, "rewards/chosen": -0.22391517460346222, "rewards/margins": 0.009719207882881165, "rewards/rejected": -0.23363438248634338, "step": 74 }, { "epoch": 0.05186721991701245, "grad_norm": 4.009108066558838, "learning_rate": 2.5933609958506228e-06, "log_odds_chosen": -0.608934760093689, "log_odds_ratio": -1.387421727180481, "logits/chosen": -1.170054316520691, "logits/rejected": -1.1777504682540894, "logps/chosen": -3.128124237060547, "logps/rejected": -2.479940891265869, "loss": 10.3093, "nll_loss": 2.438586711883545, "rewards/accuracies": 0.625, "rewards/chosen": -0.312812477350235, "rewards/margins": -0.0648183524608612, "rewards/rejected": -0.2479940950870514, "step": 75 }, { "epoch": 0.05255878284923928, "grad_norm": 3.367987871170044, "learning_rate": 2.6279391424619643e-06, "log_odds_chosen": -0.12288656830787659, "log_odds_ratio": -0.7879306674003601, "logits/chosen": -1.2347142696380615, "logits/rejected": -1.2641103267669678, "logps/chosen": -3.0773115158081055, "logps/rejected": -2.97043776512146, "loss": 7.4922, "nll_loss": 1.794250726699829, "rewards/accuracies": 0.375, "rewards/chosen": -0.30773118138313293, "rewards/margins": -0.010687386617064476, "rewards/rejected": -0.2970438003540039, "step": 76 }, { "epoch": 0.05325034578146611, "grad_norm": 2.115560293197632, "learning_rate": 2.662517289073306e-06, "log_odds_chosen": 0.1649816632270813, "log_odds_ratio": -0.658004879951477, "logits/chosen": -1.1515793800354004, "logits/rejected": -1.1574221849441528, "logps/chosen": -2.2130730152130127, "logps/rejected": -2.367478370666504, "loss": 6.2837, "nll_loss": 1.5051299333572388, "rewards/accuracies": 0.625, "rewards/chosen": -0.22130730748176575, "rewards/margins": 0.015440553426742554, "rewards/rejected": -0.2367478609085083, "step": 77 }, { "epoch": 0.05394190871369295, "grad_norm": 3.061014175415039, "learning_rate": 2.6970954356846475e-06, "log_odds_chosen": -0.8962035775184631, "log_odds_ratio": -1.6122883558273315, "logits/chosen": -1.5006728172302246, "logits/rejected": -1.491066575050354, "logps/chosen": -3.76662015914917, "logps/rejected": -2.943807363510132, "loss": 8.4838, "nll_loss": 1.9597277641296387, "rewards/accuracies": 0.625, "rewards/chosen": -0.376662015914917, "rewards/margins": -0.08228126168251038, "rewards/rejected": -0.29438072443008423, "step": 78 }, { "epoch": 0.05463347164591978, "grad_norm": 1.8718522787094116, "learning_rate": 2.731673582295989e-06, "log_odds_chosen": -0.046965256333351135, "log_odds_ratio": -0.7528050541877747, "logits/chosen": -1.0281174182891846, "logits/rejected": -1.100132942199707, "logps/chosen": -2.0911521911621094, "logps/rejected": -2.0604071617126465, "loss": 6.9623, "nll_loss": 1.6652878522872925, "rewards/accuracies": 0.25, "rewards/chosen": -0.20911523699760437, "rewards/margins": -0.0030745062977075577, "rewards/rejected": -0.20604071021080017, "step": 79 }, { "epoch": 0.05532503457814661, "grad_norm": 3.6880903244018555, "learning_rate": 2.7662517289073306e-06, "log_odds_chosen": 0.20674392580986023, "log_odds_ratio": -0.6412222981452942, "logits/chosen": -0.9123594164848328, "logits/rejected": -0.9457440972328186, "logps/chosen": -2.468709945678711, "logps/rejected": -2.680603265762329, "loss": 7.7926, "nll_loss": 1.8840311765670776, "rewards/accuracies": 0.625, "rewards/chosen": -0.2468709796667099, "rewards/margins": 0.021189335733652115, "rewards/rejected": -0.2680603265762329, "step": 80 }, { "epoch": 0.056016597510373446, "grad_norm": 2.5244028568267822, "learning_rate": 2.8008298755186726e-06, "log_odds_chosen": 0.2665032744407654, "log_odds_ratio": -0.5810218453407288, "logits/chosen": -1.2331328392028809, "logits/rejected": -1.3183355331420898, "logps/chosen": -2.5980257987976074, "logps/rejected": -2.8580169677734375, "loss": 7.1341, "nll_loss": 1.7254328727722168, "rewards/accuracies": 0.625, "rewards/chosen": -0.25980257987976074, "rewards/margins": 0.025999119505286217, "rewards/rejected": -0.2858017086982727, "step": 81 }, { "epoch": 0.056708160442600276, "grad_norm": 3.8314919471740723, "learning_rate": 2.835408022130014e-06, "log_odds_chosen": 0.8098757863044739, "log_odds_ratio": -0.4692530632019043, "logits/chosen": -1.3716188669204712, "logits/rejected": -1.416751742362976, "logps/chosen": -3.450601577758789, "logps/rejected": -4.224173545837402, "loss": 9.16, "nll_loss": 2.2430672645568848, "rewards/accuracies": 0.75, "rewards/chosen": -0.34506019949913025, "rewards/margins": 0.07735716551542282, "rewards/rejected": -0.42241737246513367, "step": 82 }, { "epoch": 0.05739972337482711, "grad_norm": 3.0442752838134766, "learning_rate": 2.8699861687413553e-06, "log_odds_chosen": 0.2403930127620697, "log_odds_ratio": -0.6385015249252319, "logits/chosen": -1.2987475395202637, "logits/rejected": -1.288628101348877, "logps/chosen": -2.989830493927002, "logps/rejected": -3.2324090003967285, "loss": 8.18, "nll_loss": 1.9811550378799438, "rewards/accuracies": 0.5, "rewards/chosen": -0.29898306727409363, "rewards/margins": 0.024257831275463104, "rewards/rejected": -0.32324090600013733, "step": 83 }, { "epoch": 0.058091286307053944, "grad_norm": 3.6023521423339844, "learning_rate": 2.9045643153526973e-06, "log_odds_chosen": 0.25399988889694214, "log_odds_ratio": -0.6751829385757446, "logits/chosen": -1.1019551753997803, "logits/rejected": -1.1007038354873657, "logps/chosen": -3.27341628074646, "logps/rejected": -3.516066074371338, "loss": 8.322, "nll_loss": 2.0129876136779785, "rewards/accuracies": 0.75, "rewards/chosen": -0.32734164595603943, "rewards/margins": 0.024264976382255554, "rewards/rejected": -0.3516066074371338, "step": 84 }, { "epoch": 0.058782849239280774, "grad_norm": 1.5585123300552368, "learning_rate": 2.939142461964039e-06, "log_odds_chosen": -0.03407038748264313, "log_odds_ratio": -0.7980966567993164, "logits/chosen": -0.9366306066513062, "logits/rejected": -0.9029936790466309, "logps/chosen": -2.6618621349334717, "logps/rejected": -2.6580753326416016, "loss": 6.4503, "nll_loss": 1.5327749252319336, "rewards/accuracies": 0.375, "rewards/chosen": -0.2661862075328827, "rewards/margins": -0.0003786766901612282, "rewards/rejected": -0.26580753922462463, "step": 85 }, { "epoch": 0.059474412171507604, "grad_norm": 2.9539215564727783, "learning_rate": 2.9737206085753804e-06, "log_odds_chosen": -0.021699100732803345, "log_odds_ratio": -0.7429624795913696, "logits/chosen": -1.4628320932388306, "logits/rejected": -1.4432883262634277, "logps/chosen": -2.684389114379883, "logps/rejected": -2.669247627258301, "loss": 9.0412, "nll_loss": 2.186001777648926, "rewards/accuracies": 0.375, "rewards/chosen": -0.2684389352798462, "rewards/margins": -0.0015141433104872704, "rewards/rejected": -0.26692479848861694, "step": 86 }, { "epoch": 0.06016597510373444, "grad_norm": 3.238790512084961, "learning_rate": 3.0082987551867224e-06, "log_odds_chosen": -0.34586936235427856, "log_odds_ratio": -1.0915687084197998, "logits/chosen": -0.8685006499290466, "logits/rejected": -0.86763596534729, "logps/chosen": -1.9744906425476074, "logps/rejected": -1.6153260469436646, "loss": 6.9039, "nll_loss": 1.6168253421783447, "rewards/accuracies": 0.625, "rewards/chosen": -0.19744905829429626, "rewards/margins": -0.03591645881533623, "rewards/rejected": -0.16153259575366974, "step": 87 }, { "epoch": 0.06085753803596127, "grad_norm": 2.4273881912231445, "learning_rate": 3.0428769017980635e-06, "log_odds_chosen": 0.20019817352294922, "log_odds_ratio": -0.8292399644851685, "logits/chosen": -0.5661689043045044, "logits/rejected": -0.5779998302459717, "logps/chosen": -2.4900712966918945, "logps/rejected": -2.707916259765625, "loss": 6.937, "nll_loss": 1.6513311862945557, "rewards/accuracies": 0.5, "rewards/chosen": -0.24900715053081512, "rewards/margins": 0.0217844620347023, "rewards/rejected": -0.270791620016098, "step": 88 }, { "epoch": 0.06154910096818811, "grad_norm": 2.613290309906006, "learning_rate": 3.0774550484094055e-06, "log_odds_chosen": -0.08278333395719528, "log_odds_ratio": -0.9090401530265808, "logits/chosen": -1.3825005292892456, "logits/rejected": -1.3741035461425781, "logps/chosen": -2.779364824295044, "logps/rejected": -2.6913795471191406, "loss": 6.6745, "nll_loss": 1.57771635055542, "rewards/accuracies": 0.375, "rewards/chosen": -0.27793648838996887, "rewards/margins": -0.0087985098361969, "rewards/rejected": -0.269137978553772, "step": 89 }, { "epoch": 0.06224066390041494, "grad_norm": 4.615622043609619, "learning_rate": 3.112033195020747e-06, "log_odds_chosen": 0.27620837092399597, "log_odds_ratio": -0.6446676850318909, "logits/chosen": -1.2485114336013794, "logits/rejected": -1.2515029907226562, "logps/chosen": -3.3156979084014893, "logps/rejected": -3.592181444168091, "loss": 10.2327, "nll_loss": 2.4937028884887695, "rewards/accuracies": 0.625, "rewards/chosen": -0.3315698206424713, "rewards/margins": 0.027648335322737694, "rewards/rejected": -0.35921815037727356, "step": 90 }, { "epoch": 0.06293222683264177, "grad_norm": 3.800359010696411, "learning_rate": 3.1466113416320886e-06, "log_odds_chosen": -0.9488328099250793, "log_odds_ratio": -1.5567295551300049, "logits/chosen": -1.0518579483032227, "logits/rejected": -1.051997423171997, "logps/chosen": -3.5880908966064453, "logps/rejected": -2.6912951469421387, "loss": 7.9834, "nll_loss": 1.8401869535446167, "rewards/accuracies": 0.5, "rewards/chosen": -0.35880908370018005, "rewards/margins": -0.08967956155538559, "rewards/rejected": -0.26912954449653625, "step": 91 }, { "epoch": 0.0636237897648686, "grad_norm": 3.3406009674072266, "learning_rate": 3.18118948824343e-06, "log_odds_chosen": -0.2876474857330322, "log_odds_ratio": -0.9973258376121521, "logits/chosen": -1.3440685272216797, "logits/rejected": -1.3308429718017578, "logps/chosen": -3.4420344829559326, "logps/rejected": -3.1430163383483887, "loss": 9.1634, "nll_loss": 2.191114902496338, "rewards/accuracies": 0.25, "rewards/chosen": -0.3442034423351288, "rewards/margins": -0.029901809990406036, "rewards/rejected": -0.31430163979530334, "step": 92 }, { "epoch": 0.06431535269709543, "grad_norm": 4.966706275939941, "learning_rate": 3.2157676348547718e-06, "log_odds_chosen": 0.09749498963356018, "log_odds_ratio": -0.6567263603210449, "logits/chosen": -1.2255221605300903, "logits/rejected": -1.2361114025115967, "logps/chosen": -2.9487504959106445, "logps/rejected": -3.0364670753479004, "loss": 10.2881, "nll_loss": 2.506345510482788, "rewards/accuracies": 0.5, "rewards/chosen": -0.29487505555152893, "rewards/margins": 0.008771630004048347, "rewards/rejected": -0.30364668369293213, "step": 93 }, { "epoch": 0.06500691562932227, "grad_norm": 2.9755330085754395, "learning_rate": 3.2503457814661137e-06, "log_odds_chosen": -0.014040261507034302, "log_odds_ratio": -0.8920272588729858, "logits/chosen": -1.2634459733963013, "logits/rejected": -1.2027406692504883, "logps/chosen": -3.3989415168762207, "logps/rejected": -3.4355568885803223, "loss": 7.9149, "nll_loss": 1.8895316123962402, "rewards/accuracies": 0.5, "rewards/chosen": -0.33989417552948, "rewards/margins": 0.0036615319550037384, "rewards/rejected": -0.3435557186603546, "step": 94 }, { "epoch": 0.0656984785615491, "grad_norm": 4.007265567779541, "learning_rate": 3.2849239280774553e-06, "log_odds_chosen": -0.07402561604976654, "log_odds_ratio": -0.7418888807296753, "logits/chosen": -1.3279504776000977, "logits/rejected": -1.3760790824890137, "logps/chosen": -2.9946768283843994, "logps/rejected": -2.9116311073303223, "loss": 7.8239, "nll_loss": 1.8817808628082275, "rewards/accuracies": 0.375, "rewards/chosen": -0.29946768283843994, "rewards/margins": -0.008304571732878685, "rewards/rejected": -0.2911631166934967, "step": 95 }, { "epoch": 0.06639004149377593, "grad_norm": 3.38915753364563, "learning_rate": 3.319502074688797e-06, "log_odds_chosen": 0.24476172029972076, "log_odds_ratio": -0.7002239227294922, "logits/chosen": -1.25504469871521, "logits/rejected": -1.2935394048690796, "logps/chosen": -2.665768623352051, "logps/rejected": -2.892899513244629, "loss": 8.7056, "nll_loss": 2.1063828468322754, "rewards/accuracies": 0.625, "rewards/chosen": -0.266576886177063, "rewards/margins": 0.022713065147399902, "rewards/rejected": -0.2892899513244629, "step": 96 }, { "epoch": 0.06708160442600276, "grad_norm": 2.5748276710510254, "learning_rate": 3.3540802213001384e-06, "log_odds_chosen": -0.019003883004188538, "log_odds_ratio": -0.7211747169494629, "logits/chosen": -1.149442195892334, "logits/rejected": -1.1671631336212158, "logps/chosen": -3.067318916320801, "logps/rejected": -3.0267174243927, "loss": 6.695, "nll_loss": 1.601643681526184, "rewards/accuracies": 0.375, "rewards/chosen": -0.3067318797111511, "rewards/margins": -0.004060110077261925, "rewards/rejected": -0.30267176032066345, "step": 97 }, { "epoch": 0.0677731673582296, "grad_norm": 4.646411895751953, "learning_rate": 3.38865836791148e-06, "log_odds_chosen": 0.02650478109717369, "log_odds_ratio": -0.7107594013214111, "logits/chosen": -1.3651975393295288, "logits/rejected": -1.3814575672149658, "logps/chosen": -2.978908061981201, "logps/rejected": -2.998114824295044, "loss": 10.4827, "nll_loss": 2.54960560798645, "rewards/accuracies": 0.625, "rewards/chosen": -0.2978908121585846, "rewards/margins": 0.0019206637516617775, "rewards/rejected": -0.2998114824295044, "step": 98 }, { "epoch": 0.06846473029045644, "grad_norm": 3.0044188499450684, "learning_rate": 3.423236514522822e-06, "log_odds_chosen": 0.23153075575828552, "log_odds_ratio": -0.6025316715240479, "logits/chosen": -0.852349579334259, "logits/rejected": -0.8624266386032104, "logps/chosen": -1.607686996459961, "logps/rejected": -1.7681227922439575, "loss": 7.2859, "nll_loss": 1.7612199783325195, "rewards/accuracies": 0.75, "rewards/chosen": -0.16076868772506714, "rewards/margins": 0.016043581068515778, "rewards/rejected": -0.1768122762441635, "step": 99 }, { "epoch": 0.06915629322268327, "grad_norm": 1.6636029481887817, "learning_rate": 3.4578146611341635e-06, "log_odds_chosen": -0.05533628910779953, "log_odds_ratio": -0.7417080998420715, "logits/chosen": -0.9845290184020996, "logits/rejected": -0.9741227626800537, "logps/chosen": -1.9345066547393799, "logps/rejected": -1.8781304359436035, "loss": 5.6163, "nll_loss": 1.329894781112671, "rewards/accuracies": 0.625, "rewards/chosen": -0.1934506595134735, "rewards/margins": -0.005637619644403458, "rewards/rejected": -0.18781304359436035, "step": 100 }, { "epoch": 0.0698478561549101, "grad_norm": 1.76613187789917, "learning_rate": 3.492392807745505e-06, "log_odds_chosen": -0.31366345286369324, "log_odds_ratio": -0.9095494747161865, "logits/chosen": -1.0726799964904785, "logits/rejected": -1.057313084602356, "logps/chosen": -2.6118955612182617, "logps/rejected": -2.3254950046539307, "loss": 5.3665, "nll_loss": 1.2506669759750366, "rewards/accuracies": 0.375, "rewards/chosen": -0.2611895799636841, "rewards/margins": -0.028640054166316986, "rewards/rejected": -0.2325495034456253, "step": 101 }, { "epoch": 0.07053941908713693, "grad_norm": 2.1601548194885254, "learning_rate": 3.5269709543568467e-06, "log_odds_chosen": 0.225947305560112, "log_odds_ratio": -0.7366925477981567, "logits/chosen": -0.9250924587249756, "logits/rejected": -0.872596025466919, "logps/chosen": -2.657607316970825, "logps/rejected": -2.8689475059509277, "loss": 6.3597, "nll_loss": 1.516251564025879, "rewards/accuracies": 0.375, "rewards/chosen": -0.26576074957847595, "rewards/margins": 0.021134020760655403, "rewards/rejected": -0.2868947684764862, "step": 102 }, { "epoch": 0.07123098201936376, "grad_norm": 3.250379800796509, "learning_rate": 3.5615491009681882e-06, "log_odds_chosen": 0.41885554790496826, "log_odds_ratio": -0.5761613845825195, "logits/chosen": -0.9390544295310974, "logits/rejected": -0.9515710473060608, "logps/chosen": -2.112879753112793, "logps/rejected": -2.4834203720092773, "loss": 8.3838, "nll_loss": 2.038335084915161, "rewards/accuracies": 0.75, "rewards/chosen": -0.2112879753112793, "rewards/margins": 0.03705406188964844, "rewards/rejected": -0.24834203720092773, "step": 103 }, { "epoch": 0.07192254495159059, "grad_norm": 4.528255939483643, "learning_rate": 3.59612724757953e-06, "log_odds_chosen": 0.31654834747314453, "log_odds_ratio": -0.6934396028518677, "logits/chosen": -0.9241195917129517, "logits/rejected": -0.8918487429618835, "logps/chosen": -2.244730234146118, "logps/rejected": -2.5783205032348633, "loss": 8.1125, "nll_loss": 1.958787441253662, "rewards/accuracies": 0.625, "rewards/chosen": -0.2244730293750763, "rewards/margins": 0.03335902467370033, "rewards/rejected": -0.25783205032348633, "step": 104 }, { "epoch": 0.07261410788381743, "grad_norm": 3.3515546321868896, "learning_rate": 3.6307053941908718e-06, "log_odds_chosen": 0.019478052854537964, "log_odds_ratio": -0.7779797315597534, "logits/chosen": -1.184047818183899, "logits/rejected": -1.2227816581726074, "logps/chosen": -2.3940818309783936, "logps/rejected": -2.4155819416046143, "loss": 6.6174, "nll_loss": 1.576558232307434, "rewards/accuracies": 0.5, "rewards/chosen": -0.2394081950187683, "rewards/margins": 0.0021500196307897568, "rewards/rejected": -0.24155820906162262, "step": 105 }, { "epoch": 0.07330567081604426, "grad_norm": 4.590144634246826, "learning_rate": 3.6652835408022133e-06, "log_odds_chosen": -0.278724730014801, "log_odds_ratio": -0.8659406304359436, "logits/chosen": -1.0056959390640259, "logits/rejected": -0.9648569822311401, "logps/chosen": -3.568976879119873, "logps/rejected": -3.291322708129883, "loss": 8.3612, "nll_loss": 2.0036964416503906, "rewards/accuracies": 0.5, "rewards/chosen": -0.3568977117538452, "rewards/margins": -0.02776542864739895, "rewards/rejected": -0.3291322588920593, "step": 106 }, { "epoch": 0.0739972337482711, "grad_norm": 3.8044564723968506, "learning_rate": 3.699861687413555e-06, "log_odds_chosen": -0.07593946903944016, "log_odds_ratio": -0.7997829914093018, "logits/chosen": -1.1828924417495728, "logits/rejected": -1.219299077987671, "logps/chosen": -2.4561989307403564, "logps/rejected": -2.3595938682556152, "loss": 7.8965, "nll_loss": 1.8941495418548584, "rewards/accuracies": 0.375, "rewards/chosen": -0.24561990797519684, "rewards/margins": -0.009660521522164345, "rewards/rejected": -0.23595938086509705, "step": 107 }, { "epoch": 0.07468879668049792, "grad_norm": 3.134294271469116, "learning_rate": 3.7344398340248965e-06, "log_odds_chosen": 0.5621838569641113, "log_odds_ratio": -0.4798460006713867, "logits/chosen": -1.1768224239349365, "logits/rejected": -1.2381618022918701, "logps/chosen": -1.9236397743225098, "logps/rejected": -2.380974769592285, "loss": 7.811, "nll_loss": 1.904759168624878, "rewards/accuracies": 1.0, "rewards/chosen": -0.1923639476299286, "rewards/margins": 0.04573351517319679, "rewards/rejected": -0.23809745907783508, "step": 108 }, { "epoch": 0.07538035961272475, "grad_norm": 4.6720709800720215, "learning_rate": 3.769017980636238e-06, "log_odds_chosen": 0.4407891035079956, "log_odds_ratio": -0.5491933822631836, "logits/chosen": -1.250891923904419, "logits/rejected": -1.251495599746704, "logps/chosen": -2.444951057434082, "logps/rejected": -2.8636553287506104, "loss": 9.0964, "nll_loss": 2.2191882133483887, "rewards/accuracies": 0.75, "rewards/chosen": -0.24449510872364044, "rewards/margins": 0.04187043011188507, "rewards/rejected": -0.2863655388355255, "step": 109 }, { "epoch": 0.07607192254495158, "grad_norm": 2.9060957431793213, "learning_rate": 3.803596127247579e-06, "log_odds_chosen": 0.03529013693332672, "log_odds_ratio": -0.7689645886421204, "logits/chosen": -1.411515235900879, "logits/rejected": -1.3903522491455078, "logps/chosen": -2.516648054122925, "logps/rejected": -2.6515069007873535, "loss": 7.2097, "nll_loss": 1.7255399227142334, "rewards/accuracies": 0.625, "rewards/chosen": -0.25166481733322144, "rewards/margins": 0.013485876843333244, "rewards/rejected": -0.26515069603919983, "step": 110 }, { "epoch": 0.07676348547717843, "grad_norm": 2.940915107727051, "learning_rate": 3.838174273858921e-06, "log_odds_chosen": 0.23484264314174652, "log_odds_ratio": -0.5932224988937378, "logits/chosen": -0.9165379405021667, "logits/rejected": -0.9511304497718811, "logps/chosen": -2.0779600143432617, "logps/rejected": -2.3309521675109863, "loss": 6.6854, "nll_loss": 1.6120387315750122, "rewards/accuracies": 0.875, "rewards/chosen": -0.20779599249362946, "rewards/margins": 0.025299228727817535, "rewards/rejected": -0.2330952286720276, "step": 111 }, { "epoch": 0.07745504840940526, "grad_norm": 4.432619094848633, "learning_rate": 3.872752420470263e-06, "log_odds_chosen": 0.3886290490627289, "log_odds_ratio": -0.5865916013717651, "logits/chosen": -1.2094829082489014, "logits/rejected": -1.2666633129119873, "logps/chosen": -2.9593513011932373, "logps/rejected": -3.270914077758789, "loss": 8.8685, "nll_loss": 2.158459186553955, "rewards/accuracies": 0.75, "rewards/chosen": -0.29593515396118164, "rewards/margins": 0.03115629032254219, "rewards/rejected": -0.32709142565727234, "step": 112 }, { "epoch": 0.07814661134163209, "grad_norm": 4.220597743988037, "learning_rate": 3.907330567081604e-06, "log_odds_chosen": -0.2154398411512375, "log_odds_ratio": -1.1117606163024902, "logits/chosen": -1.3798332214355469, "logits/rejected": -1.3210017681121826, "logps/chosen": -2.220973014831543, "logps/rejected": -2.12764835357666, "loss": 8.4091, "nll_loss": 1.9911069869995117, "rewards/accuracies": 0.375, "rewards/chosen": -0.22209730744361877, "rewards/margins": -0.009332460351288319, "rewards/rejected": -0.21276485919952393, "step": 113 }, { "epoch": 0.07883817427385892, "grad_norm": 4.478047847747803, "learning_rate": 3.941908713692946e-06, "log_odds_chosen": -0.19631054997444153, "log_odds_ratio": -1.0323046445846558, "logits/chosen": -1.1578409671783447, "logits/rejected": -1.154388189315796, "logps/chosen": -1.7710871696472168, "logps/rejected": -1.6202861070632935, "loss": 7.2636, "nll_loss": 1.712680459022522, "rewards/accuracies": 0.5, "rewards/chosen": -0.17710871994495392, "rewards/margins": -0.015080108307301998, "rewards/rejected": -0.16202861070632935, "step": 114 }, { "epoch": 0.07952973720608575, "grad_norm": 3.203106641769409, "learning_rate": 3.976486860304287e-06, "log_odds_chosen": -0.10156537592411041, "log_odds_ratio": -0.7922493815422058, "logits/chosen": -1.2877484560012817, "logits/rejected": -1.2778607606887817, "logps/chosen": -1.9843857288360596, "logps/rejected": -1.888746738433838, "loss": 7.4244, "nll_loss": 1.7768635749816895, "rewards/accuracies": 0.5, "rewards/chosen": -0.1984385848045349, "rewards/margins": -0.009563901461660862, "rewards/rejected": -0.18887467682361603, "step": 115 }, { "epoch": 0.08022130013831259, "grad_norm": 2.964470863342285, "learning_rate": 4.01106500691563e-06, "log_odds_chosen": 0.14609771966934204, "log_odds_ratio": -0.6924587488174438, "logits/chosen": -1.3852272033691406, "logits/rejected": -1.3699078559875488, "logps/chosen": -2.1615686416625977, "logps/rejected": -2.2567572593688965, "loss": 8.114, "nll_loss": 1.9592458009719849, "rewards/accuracies": 0.625, "rewards/chosen": -0.21615687012672424, "rewards/margins": 0.009518839418888092, "rewards/rejected": -0.22567571699619293, "step": 116 }, { "epoch": 0.08091286307053942, "grad_norm": 3.5704469680786133, "learning_rate": 4.045643153526971e-06, "log_odds_chosen": -0.2644822299480438, "log_odds_ratio": -0.8610115647315979, "logits/chosen": -0.7173007726669312, "logits/rejected": -0.6642380952835083, "logps/chosen": -2.625227451324463, "logps/rejected": -2.385984420776367, "loss": 7.9542, "nll_loss": 1.9024397134780884, "rewards/accuracies": 0.25, "rewards/chosen": -0.26252272725105286, "rewards/margins": -0.02392430230975151, "rewards/rejected": -0.23859843611717224, "step": 117 }, { "epoch": 0.08160442600276625, "grad_norm": 4.570618629455566, "learning_rate": 4.080221300138313e-06, "log_odds_chosen": 0.9268831014633179, "log_odds_ratio": -0.4015723764896393, "logits/chosen": -1.1652560234069824, "logits/rejected": -1.2067581415176392, "logps/chosen": -1.5085891485214233, "logps/rejected": -2.3109521865844727, "loss": 8.9819, "nll_loss": 2.205305814743042, "rewards/accuracies": 0.875, "rewards/chosen": -0.15085892379283905, "rewards/margins": 0.08023631572723389, "rewards/rejected": -0.23109523952007294, "step": 118 }, { "epoch": 0.08229598893499308, "grad_norm": 2.8909966945648193, "learning_rate": 4.1147994467496545e-06, "log_odds_chosen": -0.18048861622810364, "log_odds_ratio": -0.879106342792511, "logits/chosen": -1.1618527173995972, "logits/rejected": -1.1411261558532715, "logps/chosen": -2.1716060638427734, "logps/rejected": -2.0435569286346436, "loss": 6.6815, "nll_loss": 1.5824534893035889, "rewards/accuracies": 0.5, "rewards/chosen": -0.21716058254241943, "rewards/margins": -0.012804889120161533, "rewards/rejected": -0.20435568690299988, "step": 119 }, { "epoch": 0.08298755186721991, "grad_norm": 2.6412265300750732, "learning_rate": 4.149377593360996e-06, "log_odds_chosen": -0.01931702345609665, "log_odds_ratio": -0.7372455596923828, "logits/chosen": -1.2916526794433594, "logits/rejected": -1.2799456119537354, "logps/chosen": -2.639468193054199, "logps/rejected": -2.6471922397613525, "loss": 5.9978, "nll_loss": 1.425737738609314, "rewards/accuracies": 0.375, "rewards/chosen": -0.2639468014240265, "rewards/margins": 0.0007724054157733917, "rewards/rejected": -0.2647192180156708, "step": 120 }, { "epoch": 0.08367911479944674, "grad_norm": 4.046600341796875, "learning_rate": 4.183955739972338e-06, "log_odds_chosen": -0.1946304440498352, "log_odds_ratio": -0.8274456262588501, "logits/chosen": -1.2523605823516846, "logits/rejected": -1.2300465106964111, "logps/chosen": -2.1097776889801025, "logps/rejected": -1.9532036781311035, "loss": 7.2348, "nll_loss": 1.7259578704833984, "rewards/accuracies": 0.5, "rewards/chosen": -0.21097779273986816, "rewards/margins": -0.01565741002559662, "rewards/rejected": -0.19532036781311035, "step": 121 }, { "epoch": 0.08437067773167359, "grad_norm": 2.7409684658050537, "learning_rate": 4.218533886583679e-06, "log_odds_chosen": 0.13821080327033997, "log_odds_ratio": -0.6783009767532349, "logits/chosen": -1.0376652479171753, "logits/rejected": -1.0456650257110596, "logps/chosen": -2.253610372543335, "logps/rejected": -2.3703272342681885, "loss": 6.2914, "nll_loss": 1.505028486251831, "rewards/accuracies": 0.625, "rewards/chosen": -0.22536104917526245, "rewards/margins": 0.011671675369143486, "rewards/rejected": -0.2370327264070511, "step": 122 }, { "epoch": 0.08506224066390042, "grad_norm": 4.59444522857666, "learning_rate": 4.253112033195021e-06, "log_odds_chosen": 0.14218562841415405, "log_odds_ratio": -0.6852483153343201, "logits/chosen": -1.2118958234786987, "logits/rejected": -1.2340459823608398, "logps/chosen": -2.2804534435272217, "logps/rejected": -2.4017200469970703, "loss": 8.8156, "nll_loss": 2.135375738143921, "rewards/accuracies": 0.375, "rewards/chosen": -0.22804535925388336, "rewards/margins": 0.012126652523875237, "rewards/rejected": -0.24017199873924255, "step": 123 }, { "epoch": 0.08575380359612725, "grad_norm": 2.6190905570983887, "learning_rate": 4.287690179806362e-06, "log_odds_chosen": 0.07235060632228851, "log_odds_ratio": -0.66350257396698, "logits/chosen": -1.0323631763458252, "logits/rejected": -1.0517666339874268, "logps/chosen": -1.3956609964370728, "logps/rejected": -1.4603394269943237, "loss": 7.0443, "nll_loss": 1.6947202682495117, "rewards/accuracies": 0.5, "rewards/chosen": -0.1395660936832428, "rewards/margins": 0.0064678434282541275, "rewards/rejected": -0.14603394269943237, "step": 124 }, { "epoch": 0.08644536652835408, "grad_norm": 4.491865634918213, "learning_rate": 4.322268326417704e-06, "log_odds_chosen": -0.17503008246421814, "log_odds_ratio": -0.8976634740829468, "logits/chosen": -1.0372291803359985, "logits/rejected": -1.035827398300171, "logps/chosen": -2.6728475093841553, "logps/rejected": -2.502103090286255, "loss": 8.9689, "nll_loss": 2.152448892593384, "rewards/accuracies": 0.5, "rewards/chosen": -0.2672847509384155, "rewards/margins": -0.017074448987841606, "rewards/rejected": -0.25021031498908997, "step": 125 }, { "epoch": 0.08713692946058091, "grad_norm": 5.2629780769348145, "learning_rate": 4.3568464730290455e-06, "log_odds_chosen": 0.5604333877563477, "log_odds_ratio": -0.5766538381576538, "logits/chosen": -1.2006797790527344, "logits/rejected": -1.2117624282836914, "logps/chosen": -2.1328866481781006, "logps/rejected": -2.68172550201416, "loss": 7.9791, "nll_loss": 1.9371154308319092, "rewards/accuracies": 0.75, "rewards/chosen": -0.21328869462013245, "rewards/margins": 0.054883863776922226, "rewards/rejected": -0.26817256212234497, "step": 126 }, { "epoch": 0.08782849239280774, "grad_norm": 2.4697422981262207, "learning_rate": 4.391424619640387e-06, "log_odds_chosen": 0.7080565690994263, "log_odds_ratio": -0.46416789293289185, "logits/chosen": -1.017878532409668, "logits/rejected": -0.994848370552063, "logps/chosen": -1.7571378946304321, "logps/rejected": -2.2879459857940674, "loss": 6.4419, "nll_loss": 1.5640522241592407, "rewards/accuracies": 0.75, "rewards/chosen": -0.17571380734443665, "rewards/margins": 0.05308079719543457, "rewards/rejected": -0.22879458963871002, "step": 127 }, { "epoch": 0.08852005532503458, "grad_norm": 3.796071767807007, "learning_rate": 4.4260027662517294e-06, "log_odds_chosen": -0.17505794763565063, "log_odds_ratio": -0.8826912045478821, "logits/chosen": -1.288775086402893, "logits/rejected": -1.231229305267334, "logps/chosen": -1.9284441471099854, "logps/rejected": -1.7314453125, "loss": 6.9746, "nll_loss": 1.6553906202316284, "rewards/accuracies": 0.5, "rewards/chosen": -0.192844420671463, "rewards/margins": -0.019699882715940475, "rewards/rejected": -0.17314454913139343, "step": 128 }, { "epoch": 0.08921161825726141, "grad_norm": 2.491612672805786, "learning_rate": 4.460580912863071e-06, "log_odds_chosen": 0.39554885029792786, "log_odds_ratio": -0.5392511487007141, "logits/chosen": -1.2205698490142822, "logits/rejected": -1.2139629125595093, "logps/chosen": -1.0191541910171509, "logps/rejected": -1.3029963970184326, "loss": 6.6261, "nll_loss": 1.6025913953781128, "rewards/accuracies": 0.75, "rewards/chosen": -0.10191541165113449, "rewards/margins": 0.02838423103094101, "rewards/rejected": -0.1302996575832367, "step": 129 }, { "epoch": 0.08990318118948824, "grad_norm": 3.537321090698242, "learning_rate": 4.4951590594744126e-06, "log_odds_chosen": 0.21087250113487244, "log_odds_ratio": -0.6249895095825195, "logits/chosen": -0.9488554000854492, "logits/rejected": -0.9826868772506714, "logps/chosen": -1.5986173152923584, "logps/rejected": -1.7431650161743164, "loss": 7.216, "nll_loss": 1.741507887840271, "rewards/accuracies": 0.5, "rewards/chosen": -0.1598617434501648, "rewards/margins": 0.014454763382673264, "rewards/rejected": -0.17431651055812836, "step": 130 }, { "epoch": 0.09059474412171507, "grad_norm": 5.5984883308410645, "learning_rate": 4.529737206085754e-06, "log_odds_chosen": 0.40902745723724365, "log_odds_ratio": -0.5497879981994629, "logits/chosen": -1.3714349269866943, "logits/rejected": -1.3984029293060303, "logps/chosen": -1.4219186305999756, "logps/rejected": -1.6851788759231567, "loss": 10.107, "nll_loss": 2.4717788696289062, "rewards/accuracies": 0.75, "rewards/chosen": -0.14219185709953308, "rewards/margins": 0.026326032355427742, "rewards/rejected": -0.16851788759231567, "step": 131 }, { "epoch": 0.0912863070539419, "grad_norm": 5.334275722503662, "learning_rate": 4.564315352697096e-06, "log_odds_chosen": -0.11085011065006256, "log_odds_ratio": -0.8024502396583557, "logits/chosen": -1.4571048021316528, "logits/rejected": -1.4007424116134644, "logps/chosen": -1.4272351264953613, "logps/rejected": -1.4018197059631348, "loss": 8.9514, "nll_loss": 2.157599449157715, "rewards/accuracies": 0.5, "rewards/chosen": -0.14272351562976837, "rewards/margins": -0.0025415411219000816, "rewards/rejected": -0.14018197357654572, "step": 132 }, { "epoch": 0.09197786998616875, "grad_norm": 4.106137275695801, "learning_rate": 4.598893499308437e-06, "log_odds_chosen": 0.33311688899993896, "log_odds_ratio": -0.6020650863647461, "logits/chosen": -1.1125842332839966, "logits/rejected": -1.1575082540512085, "logps/chosen": -1.534233570098877, "logps/rejected": -1.7893953323364258, "loss": 7.5926, "nll_loss": 1.8379466533660889, "rewards/accuracies": 0.625, "rewards/chosen": -0.15342335402965546, "rewards/margins": 0.025516191497445107, "rewards/rejected": -0.178939551115036, "step": 133 }, { "epoch": 0.09266943291839558, "grad_norm": 2.852998733520508, "learning_rate": 4.633471645919779e-06, "log_odds_chosen": 0.2178751528263092, "log_odds_ratio": -0.6271660327911377, "logits/chosen": -1.0826423168182373, "logits/rejected": -1.0785956382751465, "logps/chosen": -1.284407138824463, "logps/rejected": -1.4057343006134033, "loss": 6.5956, "nll_loss": 1.5861728191375732, "rewards/accuracies": 0.75, "rewards/chosen": -0.1284407079219818, "rewards/margins": 0.01213272288441658, "rewards/rejected": -0.1405734419822693, "step": 134 }, { "epoch": 0.09336099585062241, "grad_norm": 3.6493396759033203, "learning_rate": 4.66804979253112e-06, "log_odds_chosen": 0.3888116478919983, "log_odds_ratio": -0.6063941121101379, "logits/chosen": -0.8740944862365723, "logits/rejected": -0.8482604622840881, "logps/chosen": -1.223939299583435, "logps/rejected": -1.533890962600708, "loss": 6.8399, "nll_loss": 1.6493427753448486, "rewards/accuracies": 0.5, "rewards/chosen": -0.12239392846822739, "rewards/margins": 0.030995164066553116, "rewards/rejected": -0.1533890962600708, "step": 135 }, { "epoch": 0.09405255878284924, "grad_norm": 2.6961162090301514, "learning_rate": 4.702627939142462e-06, "log_odds_chosen": -0.04538653790950775, "log_odds_ratio": -0.7914632558822632, "logits/chosen": -1.2952909469604492, "logits/rejected": -1.305565595626831, "logps/chosen": -1.4900434017181396, "logps/rejected": -1.449129581451416, "loss": 7.09, "nll_loss": 1.6933479309082031, "rewards/accuracies": 0.375, "rewards/chosen": -0.14900435507297516, "rewards/margins": -0.00409140158444643, "rewards/rejected": -0.1449129581451416, "step": 136 }, { "epoch": 0.09474412171507607, "grad_norm": 5.1169657707214355, "learning_rate": 4.7372060857538035e-06, "log_odds_chosen": 0.2797987759113312, "log_odds_ratio": -0.6090816259384155, "logits/chosen": -1.2741329669952393, "logits/rejected": -1.330481767654419, "logps/chosen": -1.1922301054000854, "logps/rejected": -1.3819254636764526, "loss": 9.6619, "nll_loss": 2.3545656204223633, "rewards/accuracies": 0.875, "rewards/chosen": -0.11922300606966019, "rewards/margins": 0.018969547003507614, "rewards/rejected": -0.1381925493478775, "step": 137 }, { "epoch": 0.0954356846473029, "grad_norm": 4.451641082763672, "learning_rate": 4.771784232365145e-06, "log_odds_chosen": 0.2165432572364807, "log_odds_ratio": -0.6945505738258362, "logits/chosen": -1.1947180032730103, "logits/rejected": -1.1982449293136597, "logps/chosen": -1.1427407264709473, "logps/rejected": -1.402220606803894, "loss": 8.8009, "nll_loss": 2.130781888961792, "rewards/accuracies": 0.625, "rewards/chosen": -0.11427406966686249, "rewards/margins": 0.025947997346520424, "rewards/rejected": -0.14022207260131836, "step": 138 }, { "epoch": 0.09612724757952974, "grad_norm": 3.2618391513824463, "learning_rate": 4.8063623789764875e-06, "log_odds_chosen": -0.11838006228208542, "log_odds_ratio": -0.7790952920913696, "logits/chosen": -1.2024139165878296, "logits/rejected": -1.1736711263656616, "logps/chosen": -1.2197315692901611, "logps/rejected": -1.1040195226669312, "loss": 7.4106, "nll_loss": 1.774742603302002, "rewards/accuracies": 0.5, "rewards/chosen": -0.12197314947843552, "rewards/margins": -0.011571199633181095, "rewards/rejected": -0.11040195822715759, "step": 139 }, { "epoch": 0.09681881051175657, "grad_norm": 3.434873580932617, "learning_rate": 4.840940525587829e-06, "log_odds_chosen": 0.010386921465396881, "log_odds_ratio": -0.7327617406845093, "logits/chosen": -1.3008506298065186, "logits/rejected": -1.2888820171356201, "logps/chosen": -1.301018476486206, "logps/rejected": -1.288162112236023, "loss": 7.1551, "nll_loss": 1.7154977321624756, "rewards/accuracies": 0.375, "rewards/chosen": -0.13010185956954956, "rewards/margins": -0.0012856395915150642, "rewards/rejected": -0.12881621718406677, "step": 140 }, { "epoch": 0.0975103734439834, "grad_norm": 2.7035653591156006, "learning_rate": 4.875518672199171e-06, "log_odds_chosen": -0.29188334941864014, "log_odds_ratio": -0.8879889249801636, "logits/chosen": -0.9949595332145691, "logits/rejected": -0.992701530456543, "logps/chosen": -1.429956316947937, "logps/rejected": -1.2038675546646118, "loss": 6.5933, "nll_loss": 1.5595312118530273, "rewards/accuracies": 0.375, "rewards/chosen": -0.14299562573432922, "rewards/margins": -0.02260887622833252, "rewards/rejected": -0.1203867644071579, "step": 141 }, { "epoch": 0.09820193637621023, "grad_norm": 3.0859532356262207, "learning_rate": 4.910096818810512e-06, "log_odds_chosen": 0.660336971282959, "log_odds_ratio": -0.4694107472896576, "logits/chosen": -0.798190712928772, "logits/rejected": -0.847572922706604, "logps/chosen": -0.8726248741149902, "logps/rejected": -1.2767419815063477, "loss": 6.6484, "nll_loss": 1.6151642799377441, "rewards/accuracies": 0.75, "rewards/chosen": -0.08726249635219574, "rewards/margins": 0.04041171073913574, "rewards/rejected": -0.12767420709133148, "step": 142 }, { "epoch": 0.09889349930843706, "grad_norm": 3.3809046745300293, "learning_rate": 4.944674965421854e-06, "log_odds_chosen": 0.15960107743740082, "log_odds_ratio": -0.6858676671981812, "logits/chosen": -1.0418691635131836, "logits/rejected": -1.0430397987365723, "logps/chosen": -1.2576993703842163, "logps/rejected": -1.391213059425354, "loss": 7.7321, "nll_loss": 1.8644449710845947, "rewards/accuracies": 0.5, "rewards/chosen": -0.1257699429988861, "rewards/margins": 0.013351368717849255, "rewards/rejected": -0.13912130892276764, "step": 143 }, { "epoch": 0.0995850622406639, "grad_norm": 3.0167365074157715, "learning_rate": 4.979253112033195e-06, "log_odds_chosen": 0.49617624282836914, "log_odds_ratio": -0.5587316751480103, "logits/chosen": -0.8929398059844971, "logits/rejected": -0.9107470512390137, "logps/chosen": -1.127401351928711, "logps/rejected": -1.4853781461715698, "loss": 7.3534, "nll_loss": 1.7824782133102417, "rewards/accuracies": 0.75, "rewards/chosen": -0.11274014413356781, "rewards/margins": 0.03579767793416977, "rewards/rejected": -0.14853781461715698, "step": 144 }, { "epoch": 0.10027662517289074, "grad_norm": 4.485063552856445, "learning_rate": 5.013831258644537e-06, "log_odds_chosen": 0.5366233587265015, "log_odds_ratio": -0.46875566244125366, "logits/chosen": -1.0820214748382568, "logits/rejected": -1.1352897882461548, "logps/chosen": -0.8813449144363403, "logps/rejected": -1.1867763996124268, "loss": 8.9173, "nll_loss": 2.182441473007202, "rewards/accuracies": 1.0, "rewards/chosen": -0.08813448995351791, "rewards/margins": 0.030543144792318344, "rewards/rejected": -0.11867764592170715, "step": 145 }, { "epoch": 0.10096818810511757, "grad_norm": 3.3979671001434326, "learning_rate": 5.0484094052558784e-06, "log_odds_chosen": 0.3244428336620331, "log_odds_ratio": -0.5853853225708008, "logits/chosen": -0.9036159515380859, "logits/rejected": -0.9359475374221802, "logps/chosen": -0.820245087146759, "logps/rejected": -1.015293002128601, "loss": 6.9945, "nll_loss": 1.690085768699646, "rewards/accuracies": 0.75, "rewards/chosen": -0.08202450722455978, "rewards/margins": 0.019504791125655174, "rewards/rejected": -0.10152930021286011, "step": 146 }, { "epoch": 0.1016597510373444, "grad_norm": 3.1067051887512207, "learning_rate": 5.08298755186722e-06, "log_odds_chosen": 0.47029638290405273, "log_odds_ratio": -0.5233011245727539, "logits/chosen": -0.9140459299087524, "logits/rejected": -0.9160831570625305, "logps/chosen": -0.7125457525253296, "logps/rejected": -0.9512070417404175, "loss": 5.5303, "nll_loss": 1.3302336931228638, "rewards/accuracies": 0.625, "rewards/chosen": -0.07125458121299744, "rewards/margins": 0.02386613003909588, "rewards/rejected": -0.09512070566415787, "step": 147 }, { "epoch": 0.10235131396957123, "grad_norm": 2.4614651203155518, "learning_rate": 5.1175656984785616e-06, "log_odds_chosen": 0.17932581901550293, "log_odds_ratio": -0.6718851923942566, "logits/chosen": -0.9580909013748169, "logits/rejected": -0.9631924033164978, "logps/chosen": -0.7362526059150696, "logps/rejected": -0.8400034308433533, "loss": 6.4654, "nll_loss": 1.5491598844528198, "rewards/accuracies": 0.375, "rewards/chosen": -0.07362527400255203, "rewards/margins": 0.010375075973570347, "rewards/rejected": -0.08400034159421921, "step": 148 }, { "epoch": 0.10304287690179806, "grad_norm": 3.6997735500335693, "learning_rate": 5.152143845089903e-06, "log_odds_chosen": 0.16198191046714783, "log_odds_ratio": -0.6887600421905518, "logits/chosen": -1.2218208312988281, "logits/rejected": -1.1994661092758179, "logps/chosen": -0.9160218238830566, "logps/rejected": -0.9438376426696777, "loss": 6.9703, "nll_loss": 1.6736863851547241, "rewards/accuracies": 0.5, "rewards/chosen": -0.09160219132900238, "rewards/margins": 0.002781575545668602, "rewards/rejected": -0.09438376128673553, "step": 149 }, { "epoch": 0.1037344398340249, "grad_norm": 4.104522228240967, "learning_rate": 5.1867219917012455e-06, "log_odds_chosen": 0.4990871548652649, "log_odds_ratio": -0.5001032948493958, "logits/chosen": -1.1245300769805908, "logits/rejected": -1.146864414215088, "logps/chosen": -0.7892501950263977, "logps/rejected": -1.066573143005371, "loss": 7.7041, "nll_loss": 1.876022219657898, "rewards/accuracies": 1.0, "rewards/chosen": -0.07892502099275589, "rewards/margins": 0.027732297778129578, "rewards/rejected": -0.10665731877088547, "step": 150 }, { "epoch": 0.10442600276625173, "grad_norm": 4.101137638092041, "learning_rate": 5.221300138312587e-06, "log_odds_chosen": 0.6196571588516235, "log_odds_ratio": -0.4808695316314697, "logits/chosen": -1.1832971572875977, "logits/rejected": -1.2204134464263916, "logps/chosen": -0.8114309906959534, "logps/rejected": -1.205329179763794, "loss": 7.5169, "nll_loss": 1.8311498165130615, "rewards/accuracies": 0.625, "rewards/chosen": -0.0811430960893631, "rewards/margins": 0.03938981145620346, "rewards/rejected": -0.12053291499614716, "step": 151 }, { "epoch": 0.10511756569847856, "grad_norm": 3.7619707584381104, "learning_rate": 5.255878284923929e-06, "log_odds_chosen": 0.16248542070388794, "log_odds_ratio": -0.6626776456832886, "logits/chosen": -1.3932623863220215, "logits/rejected": -1.4151134490966797, "logps/chosen": -0.7183889746665955, "logps/rejected": -0.81103515625, "loss": 8.604, "nll_loss": 2.084726095199585, "rewards/accuracies": 0.75, "rewards/chosen": -0.07183889299631119, "rewards/margins": 0.009264619089663029, "rewards/rejected": -0.08110351115465164, "step": 152 }, { "epoch": 0.10580912863070539, "grad_norm": 3.2583189010620117, "learning_rate": 5.29045643153527e-06, "log_odds_chosen": 0.04566054046154022, "log_odds_ratio": -0.7379659414291382, "logits/chosen": -0.9627122282981873, "logits/rejected": -0.9689441323280334, "logps/chosen": -0.9377343058586121, "logps/rejected": -0.9663422107696533, "loss": 7.0429, "nll_loss": 1.686922311782837, "rewards/accuracies": 0.375, "rewards/chosen": -0.09377343207597733, "rewards/margins": 0.002860790118575096, "rewards/rejected": -0.09663422405719757, "step": 153 }, { "epoch": 0.10650069156293222, "grad_norm": 3.12202787399292, "learning_rate": 5.325034578146612e-06, "log_odds_chosen": -0.14066341519355774, "log_odds_ratio": -0.8131635785102844, "logits/chosen": -1.1361145973205566, "logits/rejected": -1.161047339439392, "logps/chosen": -0.8530935049057007, "logps/rejected": -0.7701809406280518, "loss": 7.1593, "nll_loss": 1.7085163593292236, "rewards/accuracies": 0.25, "rewards/chosen": -0.08530934900045395, "rewards/margins": -0.008291250094771385, "rewards/rejected": -0.07701809704303741, "step": 154 }, { "epoch": 0.10719225449515905, "grad_norm": 2.6539435386657715, "learning_rate": 5.359612724757953e-06, "log_odds_chosen": 0.05288369208574295, "log_odds_ratio": -0.6911187171936035, "logits/chosen": -1.1930322647094727, "logits/rejected": -1.2084239721298218, "logps/chosen": -0.7693685293197632, "logps/rejected": -0.7825278043746948, "loss": 5.6941, "nll_loss": 1.3544032573699951, "rewards/accuracies": 0.75, "rewards/chosen": -0.07693684101104736, "rewards/margins": 0.0013159322552382946, "rewards/rejected": -0.07825277745723724, "step": 155 }, { "epoch": 0.1078838174273859, "grad_norm": 2.016812801361084, "learning_rate": 5.394190871369295e-06, "log_odds_chosen": -0.2215977907180786, "log_odds_ratio": -0.8341439962387085, "logits/chosen": -0.985969066619873, "logits/rejected": -0.9674792289733887, "logps/chosen": -0.8357418775558472, "logps/rejected": -0.7325291633605957, "loss": 5.6336, "nll_loss": 1.324975848197937, "rewards/accuracies": 0.25, "rewards/chosen": -0.08357419073581696, "rewards/margins": -0.010321276262402534, "rewards/rejected": -0.07325291633605957, "step": 156 }, { "epoch": 0.10857538035961273, "grad_norm": 4.376221179962158, "learning_rate": 5.4287690179806365e-06, "log_odds_chosen": 0.7335000038146973, "log_odds_ratio": -0.4274066686630249, "logits/chosen": -1.0363941192626953, "logits/rejected": -1.07184636592865, "logps/chosen": -0.5886829495429993, "logps/rejected": -0.9144682884216309, "loss": 7.8089, "nll_loss": 1.9094841480255127, "rewards/accuracies": 1.0, "rewards/chosen": -0.05886829271912575, "rewards/margins": 0.03257853537797928, "rewards/rejected": -0.09144683182239532, "step": 157 }, { "epoch": 0.10926694329183956, "grad_norm": 2.5418620109558105, "learning_rate": 5.463347164591978e-06, "log_odds_chosen": 0.11510226130485535, "log_odds_ratio": -0.6988317370414734, "logits/chosen": -0.9949272274971008, "logits/rejected": -1.0054848194122314, "logps/chosen": -0.6619945168495178, "logps/rejected": -0.7326085567474365, "loss": 6.3483, "nll_loss": 1.5171840190887451, "rewards/accuracies": 0.625, "rewards/chosen": -0.06619945168495178, "rewards/margins": 0.007061406970024109, "rewards/rejected": -0.07326085865497589, "step": 158 }, { "epoch": 0.10995850622406639, "grad_norm": 3.0811150074005127, "learning_rate": 5.49792531120332e-06, "log_odds_chosen": 0.48675233125686646, "log_odds_ratio": -0.5503637194633484, "logits/chosen": -1.2820873260498047, "logits/rejected": -1.2850234508514404, "logps/chosen": -0.6293501853942871, "logps/rejected": -0.831933319568634, "loss": 6.8161, "nll_loss": 1.6489897966384888, "rewards/accuracies": 0.75, "rewards/chosen": -0.06293501704931259, "rewards/margins": 0.020258314907550812, "rewards/rejected": -0.0831933319568634, "step": 159 }, { "epoch": 0.11065006915629322, "grad_norm": 3.9759669303894043, "learning_rate": 5.532503457814661e-06, "log_odds_chosen": 0.592308759689331, "log_odds_ratio": -0.4961738884449005, "logits/chosen": -0.9351903200149536, "logits/rejected": -0.9440701007843018, "logps/chosen": -0.5135587453842163, "logps/rejected": -0.8421783447265625, "loss": 6.8458, "nll_loss": 1.6618294715881348, "rewards/accuracies": 0.875, "rewards/chosen": -0.05135587230324745, "rewards/margins": 0.032861970365047455, "rewards/rejected": -0.08421783894300461, "step": 160 }, { "epoch": 0.11134163208852006, "grad_norm": 2.940592050552368, "learning_rate": 5.5670816044260036e-06, "log_odds_chosen": 0.3694240152835846, "log_odds_ratio": -0.5937953591346741, "logits/chosen": -1.092789888381958, "logits/rejected": -1.086504578590393, "logps/chosen": -0.37033939361572266, "logps/rejected": -0.5343331098556519, "loss": 6.7006, "nll_loss": 1.61576247215271, "rewards/accuracies": 0.625, "rewards/chosen": -0.037033941596746445, "rewards/margins": 0.01639937050640583, "rewards/rejected": -0.053433313965797424, "step": 161 }, { "epoch": 0.11203319502074689, "grad_norm": 3.694589614868164, "learning_rate": 5.601659751037345e-06, "log_odds_chosen": 0.6592048406600952, "log_odds_ratio": -0.4732610583305359, "logits/chosen": -1.0874364376068115, "logits/rejected": -1.0700212717056274, "logps/chosen": -0.27317383885383606, "logps/rejected": -0.4280146062374115, "loss": 6.7253, "nll_loss": 1.6339954137802124, "rewards/accuracies": 0.875, "rewards/chosen": -0.027317382395267487, "rewards/margins": 0.015484076924622059, "rewards/rejected": -0.04280146211385727, "step": 162 }, { "epoch": 0.11272475795297372, "grad_norm": 2.992234706878662, "learning_rate": 5.636237897648687e-06, "log_odds_chosen": 0.10452957451343536, "log_odds_ratio": -0.6631104946136475, "logits/chosen": -1.0844966173171997, "logits/rejected": -1.0631647109985352, "logps/chosen": -0.4913597106933594, "logps/rejected": -0.5436203479766846, "loss": 5.5863, "nll_loss": 1.3302576541900635, "rewards/accuracies": 0.625, "rewards/chosen": -0.049135975539684296, "rewards/margins": 0.0052260663360357285, "rewards/rejected": -0.054362036287784576, "step": 163 }, { "epoch": 0.11341632088520055, "grad_norm": 2.8842968940734863, "learning_rate": 5.670816044260028e-06, "log_odds_chosen": 0.5779211521148682, "log_odds_ratio": -0.5084534883499146, "logits/chosen": -0.9483163356781006, "logits/rejected": -0.9567909240722656, "logps/chosen": -0.4137590825557709, "logps/rejected": -0.6266723275184631, "loss": 5.9067, "nll_loss": 1.425826072692871, "rewards/accuracies": 0.75, "rewards/chosen": -0.04137590900063515, "rewards/margins": 0.021291323006153107, "rewards/rejected": -0.06266723573207855, "step": 164 }, { "epoch": 0.11410788381742738, "grad_norm": 3.21958589553833, "learning_rate": 5.70539419087137e-06, "log_odds_chosen": 0.6306442022323608, "log_odds_ratio": -0.4906991720199585, "logits/chosen": -1.0865769386291504, "logits/rejected": -1.1414381265640259, "logps/chosen": -0.38455918431282043, "logps/rejected": -0.7448874711990356, "loss": 5.6921, "nll_loss": 1.3739588260650635, "rewards/accuracies": 0.5, "rewards/chosen": -0.03845591843128204, "rewards/margins": 0.03603282943367958, "rewards/rejected": -0.07448874413967133, "step": 165 }, { "epoch": 0.11479944674965421, "grad_norm": 3.038536548614502, "learning_rate": 5.7399723374827105e-06, "log_odds_chosen": 0.3064265549182892, "log_odds_ratio": -0.6025457382202148, "logits/chosen": -1.0255924463272095, "logits/rejected": -1.0379023551940918, "logps/chosen": -0.43842604756355286, "logps/rejected": -0.5147268772125244, "loss": 4.9638, "nll_loss": 1.1806881427764893, "rewards/accuracies": 0.625, "rewards/chosen": -0.043842606246471405, "rewards/margins": 0.007630080450326204, "rewards/rejected": -0.05147268623113632, "step": 166 }, { "epoch": 0.11549100968188106, "grad_norm": 3.0498814582824707, "learning_rate": 5.774550484094053e-06, "log_odds_chosen": 0.2696775794029236, "log_odds_ratio": -0.6612348556518555, "logits/chosen": -1.0600786209106445, "logits/rejected": -1.0842914581298828, "logps/chosen": -0.30538222193717957, "logps/rejected": -0.4055224061012268, "loss": 7.08, "nll_loss": 1.7038698196411133, "rewards/accuracies": 0.5, "rewards/chosen": -0.030538223683834076, "rewards/margins": 0.01001401711255312, "rewards/rejected": -0.04055224359035492, "step": 167 }, { "epoch": 0.11618257261410789, "grad_norm": 2.9541399478912354, "learning_rate": 5.8091286307053945e-06, "log_odds_chosen": 0.7906651496887207, "log_odds_ratio": -0.4121550917625427, "logits/chosen": -0.8397430181503296, "logits/rejected": -0.8527986407279968, "logps/chosen": -0.41424304246902466, "logps/rejected": -0.7723820805549622, "loss": 6.7591, "nll_loss": 1.648551344871521, "rewards/accuracies": 0.875, "rewards/chosen": -0.041424304246902466, "rewards/margins": 0.03581390529870987, "rewards/rejected": -0.07723820954561234, "step": 168 }, { "epoch": 0.11687413554633472, "grad_norm": 3.013000249862671, "learning_rate": 5.843706777316736e-06, "log_odds_chosen": 0.6137700080871582, "log_odds_ratio": -0.4964900612831116, "logits/chosen": -1.203710913658142, "logits/rejected": -1.221206545829773, "logps/chosen": -0.2094321846961975, "logps/rejected": -0.32387834787368774, "loss": 6.0946, "nll_loss": 1.4740005731582642, "rewards/accuracies": 0.875, "rewards/chosen": -0.02094321884214878, "rewards/margins": 0.011444617062807083, "rewards/rejected": -0.032387834042310715, "step": 169 }, { "epoch": 0.11756569847856155, "grad_norm": 3.396745204925537, "learning_rate": 5.878284923928078e-06, "log_odds_chosen": 0.7227403521537781, "log_odds_ratio": -0.5156093239784241, "logits/chosen": -1.0030102729797363, "logits/rejected": -1.0096819400787354, "logps/chosen": -0.2418069839477539, "logps/rejected": -0.49622681736946106, "loss": 6.7511, "nll_loss": 1.6362210512161255, "rewards/accuracies": 0.75, "rewards/chosen": -0.02418069913983345, "rewards/margins": 0.025441987439990044, "rewards/rejected": -0.049622680991888046, "step": 170 }, { "epoch": 0.11825726141078838, "grad_norm": 2.8235480785369873, "learning_rate": 5.912863070539419e-06, "log_odds_chosen": 1.0059127807617188, "log_odds_ratio": -0.39074984192848206, "logits/chosen": -0.79595547914505, "logits/rejected": -0.8163602352142334, "logps/chosen": -0.3028831481933594, "logps/rejected": -0.5650444626808167, "loss": 6.15, "nll_loss": 1.4984172582626343, "rewards/accuracies": 0.875, "rewards/chosen": -0.030288314446806908, "rewards/margins": 0.026216134428977966, "rewards/rejected": -0.05650445073843002, "step": 171 }, { "epoch": 0.11894882434301521, "grad_norm": 3.315999984741211, "learning_rate": 5.947441217150761e-06, "log_odds_chosen": -0.772477388381958, "log_odds_ratio": -1.4490861892700195, "logits/chosen": -1.0406111478805542, "logits/rejected": -1.0026965141296387, "logps/chosen": -1.0363490581512451, "logps/rejected": -0.5162218809127808, "loss": 7.9681, "nll_loss": 1.8471150398254395, "rewards/accuracies": 0.25, "rewards/chosen": -0.10363490134477615, "rewards/margins": -0.05201271176338196, "rewards/rejected": -0.0516221821308136, "step": 172 }, { "epoch": 0.11964038727524205, "grad_norm": 2.5711920261383057, "learning_rate": 5.982019363762103e-06, "log_odds_chosen": 1.1415014266967773, "log_odds_ratio": -0.39422786235809326, "logits/chosen": -0.7181402444839478, "logits/rejected": -0.7053050994873047, "logps/chosen": -0.2831200957298279, "logps/rejected": -0.5622020959854126, "loss": 5.6992, "nll_loss": 1.3853861093521118, "rewards/accuracies": 0.75, "rewards/chosen": -0.02831200882792473, "rewards/margins": 0.02790820226073265, "rewards/rejected": -0.05622021108865738, "step": 173 }, { "epoch": 0.12033195020746888, "grad_norm": 2.762683868408203, "learning_rate": 6.016597510373445e-06, "log_odds_chosen": 0.8880830407142639, "log_odds_ratio": -0.45892333984375, "logits/chosen": -0.9726389646530151, "logits/rejected": -0.9073729515075684, "logps/chosen": -0.3582124710083008, "logps/rejected": -0.6857412457466125, "loss": 6.6004, "nll_loss": 1.6042182445526123, "rewards/accuracies": 0.75, "rewards/chosen": -0.03582124784588814, "rewards/margins": 0.0327528715133667, "rewards/rejected": -0.06857412308454514, "step": 174 }, { "epoch": 0.12102351313969571, "grad_norm": 3.1388444900512695, "learning_rate": 6.051175656984786e-06, "log_odds_chosen": 0.3940921723842621, "log_odds_ratio": -0.595094621181488, "logits/chosen": -1.1326537132263184, "logits/rejected": -1.1883403062820435, "logps/chosen": -0.3753691613674164, "logps/rejected": -0.4759666323661804, "loss": 6.4509, "nll_loss": 1.5532217025756836, "rewards/accuracies": 0.625, "rewards/chosen": -0.03753691911697388, "rewards/margins": 0.010059747844934464, "rewards/rejected": -0.04759666323661804, "step": 175 }, { "epoch": 0.12171507607192254, "grad_norm": 2.726529598236084, "learning_rate": 6.085753803596127e-06, "log_odds_chosen": 0.3556770086288452, "log_odds_ratio": -0.6936931014060974, "logits/chosen": -1.009606957435608, "logits/rejected": -1.0062367916107178, "logps/chosen": -0.31316474080085754, "logps/rejected": -0.47506463527679443, "loss": 5.9535, "nll_loss": 1.419015884399414, "rewards/accuracies": 0.75, "rewards/chosen": -0.031316474080085754, "rewards/margins": 0.01618999056518078, "rewards/rejected": -0.04750646650791168, "step": 176 }, { "epoch": 0.12240663900414937, "grad_norm": 4.527401924133301, "learning_rate": 6.120331950207469e-06, "log_odds_chosen": 0.00036665797233581543, "log_odds_ratio": -0.826472282409668, "logits/chosen": -1.197486400604248, "logits/rejected": -1.1863048076629639, "logps/chosen": -0.3690447211265564, "logps/rejected": -0.4162423312664032, "loss": 8.1314, "nll_loss": 1.9502149820327759, "rewards/accuracies": 0.625, "rewards/chosen": -0.036904476583004, "rewards/margins": 0.00471975514665246, "rewards/rejected": -0.04162422940135002, "step": 177 }, { "epoch": 0.12309820193637622, "grad_norm": 3.836942195892334, "learning_rate": 6.154910096818811e-06, "log_odds_chosen": 0.45801472663879395, "log_odds_ratio": -0.5604403614997864, "logits/chosen": -0.7728670835494995, "logits/rejected": -0.790350079536438, "logps/chosen": -0.3449108898639679, "logps/rejected": -0.458249568939209, "loss": 6.2138, "nll_loss": 1.497396469116211, "rewards/accuracies": 0.625, "rewards/chosen": -0.03449108451604843, "rewards/margins": 0.011333871632814407, "rewards/rejected": -0.04582495987415314, "step": 178 }, { "epoch": 0.12378976486860305, "grad_norm": 4.046421051025391, "learning_rate": 6.1894882434301526e-06, "log_odds_chosen": 0.6479674577713013, "log_odds_ratio": -0.5464097857475281, "logits/chosen": -0.843544602394104, "logits/rejected": -0.87617027759552, "logps/chosen": -0.19211876392364502, "logps/rejected": -0.3340243101119995, "loss": 6.3723, "nll_loss": 1.5384306907653809, "rewards/accuracies": 0.625, "rewards/chosen": -0.01921188086271286, "rewards/margins": 0.01419055089354515, "rewards/rejected": -0.03340243175625801, "step": 179 }, { "epoch": 0.12448132780082988, "grad_norm": 2.370387554168701, "learning_rate": 6.224066390041494e-06, "log_odds_chosen": 1.467092514038086, "log_odds_ratio": -0.3379737436771393, "logits/chosen": -1.0904818773269653, "logits/rejected": -1.1200087070465088, "logps/chosen": -0.28249266743659973, "logps/rejected": -0.6824603080749512, "loss": 5.638, "nll_loss": 1.375713586807251, "rewards/accuracies": 0.875, "rewards/chosen": -0.028249267488718033, "rewards/margins": 0.03999676555395126, "rewards/rejected": -0.068246029317379, "step": 180 }, { "epoch": 0.1251728907330567, "grad_norm": 4.152231693267822, "learning_rate": 6.258644536652836e-06, "log_odds_chosen": -0.037120670080184937, "log_odds_ratio": -0.8663021922111511, "logits/chosen": -1.0969313383102417, "logits/rejected": -1.0759817361831665, "logps/chosen": -0.4796963930130005, "logps/rejected": -0.46426907181739807, "loss": 6.8526, "nll_loss": 1.626530647277832, "rewards/accuracies": 0.5, "rewards/chosen": -0.04796964302659035, "rewards/margins": -0.0015427323523908854, "rewards/rejected": -0.04642690718173981, "step": 181 }, { "epoch": 0.12586445366528354, "grad_norm": 3.547257423400879, "learning_rate": 6.293222683264177e-06, "log_odds_chosen": -0.031091928482055664, "log_odds_ratio": -1.1040213108062744, "logits/chosen": -1.212889313697815, "logits/rejected": -1.206860065460205, "logps/chosen": -0.6664987802505493, "logps/rejected": -0.3842780292034149, "loss": 6.6819, "nll_loss": 1.5600645542144775, "rewards/accuracies": 0.75, "rewards/chosen": -0.06664987653493881, "rewards/margins": -0.028222069144248962, "rewards/rejected": -0.03842780366539955, "step": 182 }, { "epoch": 0.12655601659751037, "grad_norm": 3.0481033325195312, "learning_rate": 6.327800829875519e-06, "log_odds_chosen": 0.3628125488758087, "log_odds_ratio": -0.628267765045166, "logits/chosen": -1.2803454399108887, "logits/rejected": -1.286865472793579, "logps/chosen": -0.250302791595459, "logps/rejected": -0.368587464094162, "loss": 5.7693, "nll_loss": 1.379507064819336, "rewards/accuracies": 0.5, "rewards/chosen": -0.025030281394720078, "rewards/margins": 0.0118284672498703, "rewards/rejected": -0.03685874491930008, "step": 183 }, { "epoch": 0.1272475795297372, "grad_norm": 2.8776659965515137, "learning_rate": 6.36237897648686e-06, "log_odds_chosen": 0.21505118906497955, "log_odds_ratio": -0.6434811949729919, "logits/chosen": -0.8477299809455872, "logits/rejected": -0.8031063079833984, "logps/chosen": -0.27206921577453613, "logps/rejected": -0.312256395816803, "loss": 6.5827, "nll_loss": 1.5813323259353638, "rewards/accuracies": 0.5, "rewards/chosen": -0.027206923812627792, "rewards/margins": 0.0040187169797718525, "rewards/rejected": -0.03122563846409321, "step": 184 }, { "epoch": 0.12793914246196403, "grad_norm": 2.9824650287628174, "learning_rate": 6.396957123098202e-06, "log_odds_chosen": 0.3025757074356079, "log_odds_ratio": -0.6380969882011414, "logits/chosen": -0.8896502256393433, "logits/rejected": -0.9428619146347046, "logps/chosen": -0.25282663106918335, "logps/rejected": -0.3300777077674866, "loss": 5.2551, "nll_loss": 1.249961256980896, "rewards/accuracies": 0.75, "rewards/chosen": -0.025282664224505424, "rewards/margins": 0.0077251046895980835, "rewards/rejected": -0.03300777077674866, "step": 185 }, { "epoch": 0.12863070539419086, "grad_norm": 3.4677364826202393, "learning_rate": 6.4315352697095435e-06, "log_odds_chosen": 0.5208099484443665, "log_odds_ratio": -0.70698082447052, "logits/chosen": -0.7441154718399048, "logits/rejected": -0.7471722960472107, "logps/chosen": -0.2849894165992737, "logps/rejected": -0.48318296670913696, "loss": 6.2467, "nll_loss": 1.4909782409667969, "rewards/accuracies": 0.5, "rewards/chosen": -0.028498942032456398, "rewards/margins": 0.01981935277581215, "rewards/rejected": -0.0483182929456234, "step": 186 }, { "epoch": 0.12932226832641772, "grad_norm": 3.294553518295288, "learning_rate": 6.466113416320886e-06, "log_odds_chosen": -0.1137196272611618, "log_odds_ratio": -0.7959656715393066, "logits/chosen": -0.5830647945404053, "logits/rejected": -0.5787371397018433, "logps/chosen": -0.2610514163970947, "logps/rejected": -0.2726013958454132, "loss": 6.2343, "nll_loss": 1.47898530960083, "rewards/accuracies": 0.5, "rewards/chosen": -0.026105143129825592, "rewards/margins": 0.001154996920377016, "rewards/rejected": -0.02726013958454132, "step": 187 }, { "epoch": 0.13001383125864455, "grad_norm": 2.292361259460449, "learning_rate": 6.5006915629322275e-06, "log_odds_chosen": 0.7241369485855103, "log_odds_ratio": -0.4350699782371521, "logits/chosen": -0.7973431348800659, "logits/rejected": -0.8062911033630371, "logps/chosen": -0.3231046199798584, "logps/rejected": -0.5692073702812195, "loss": 4.8058, "nll_loss": 1.1579421758651733, "rewards/accuracies": 0.875, "rewards/chosen": -0.03231046348810196, "rewards/margins": 0.024610277265310287, "rewards/rejected": -0.05692073702812195, "step": 188 }, { "epoch": 0.13070539419087138, "grad_norm": 3.161369800567627, "learning_rate": 6.535269709543569e-06, "log_odds_chosen": 0.8151931762695312, "log_odds_ratio": -0.5472856760025024, "logits/chosen": -1.0429188013076782, "logits/rejected": -1.051293134689331, "logps/chosen": -0.27129557728767395, "logps/rejected": -0.5605320930480957, "loss": 5.7897, "nll_loss": 1.392688274383545, "rewards/accuracies": 0.75, "rewards/chosen": -0.027129555121064186, "rewards/margins": 0.028923654928803444, "rewards/rejected": -0.05605321004986763, "step": 189 }, { "epoch": 0.1313969571230982, "grad_norm": 3.7443580627441406, "learning_rate": 6.569847856154911e-06, "log_odds_chosen": 0.6477518081665039, "log_odds_ratio": -0.5179813504219055, "logits/chosen": -1.2564938068389893, "logits/rejected": -1.273054838180542, "logps/chosen": -0.1866637021303177, "logps/rejected": -0.3534211814403534, "loss": 7.8754, "nll_loss": 1.9170430898666382, "rewards/accuracies": 0.625, "rewards/chosen": -0.01866636984050274, "rewards/margins": 0.01667574793100357, "rewards/rejected": -0.03534211963415146, "step": 190 }, { "epoch": 0.13208852005532504, "grad_norm": 3.5630128383636475, "learning_rate": 6.604426002766252e-06, "log_odds_chosen": 0.0035073384642601013, "log_odds_ratio": -0.7637513875961304, "logits/chosen": -0.7529496550559998, "logits/rejected": -0.7476365566253662, "logps/chosen": -0.3738434910774231, "logps/rejected": -0.3310817778110504, "loss": 5.1497, "nll_loss": 1.2110416889190674, "rewards/accuracies": 0.625, "rewards/chosen": -0.03738434985280037, "rewards/margins": -0.004276171792298555, "rewards/rejected": -0.0331081785261631, "step": 191 }, { "epoch": 0.13278008298755187, "grad_norm": 3.5751423835754395, "learning_rate": 6.639004149377594e-06, "log_odds_chosen": 0.4771386981010437, "log_odds_ratio": -0.5333381295204163, "logits/chosen": -1.2637674808502197, "logits/rejected": -1.2599058151245117, "logps/chosen": -0.24162539839744568, "logps/rejected": -0.33872294425964355, "loss": 6.5613, "nll_loss": 1.5869882106781006, "rewards/accuracies": 0.625, "rewards/chosen": -0.02416253834962845, "rewards/margins": 0.009709754958748817, "rewards/rejected": -0.033872295171022415, "step": 192 }, { "epoch": 0.1334716459197787, "grad_norm": 4.024544715881348, "learning_rate": 6.673582295988935e-06, "log_odds_chosen": 0.45041486620903015, "log_odds_ratio": -0.6190093755722046, "logits/chosen": -0.9153847098350525, "logits/rejected": -0.9401689767837524, "logps/chosen": -0.20653948187828064, "logps/rejected": -0.3575013279914856, "loss": 6.2296, "nll_loss": 1.4955058097839355, "rewards/accuracies": 0.625, "rewards/chosen": -0.020653948187828064, "rewards/margins": 0.015096185728907585, "rewards/rejected": -0.0357501320540905, "step": 193 }, { "epoch": 0.13416320885200553, "grad_norm": 2.5976641178131104, "learning_rate": 6.708160442600277e-06, "log_odds_chosen": 0.7277088761329651, "log_odds_ratio": -0.4611130654811859, "logits/chosen": -1.1048755645751953, "logits/rejected": -1.107547402381897, "logps/chosen": -0.2420302927494049, "logps/rejected": -0.4254276156425476, "loss": 6.6071, "nll_loss": 1.6056554317474365, "rewards/accuracies": 0.875, "rewards/chosen": -0.02420303039252758, "rewards/margins": 0.01833973452448845, "rewards/rejected": -0.04254276305437088, "step": 194 }, { "epoch": 0.13485477178423236, "grad_norm": 2.962162733078003, "learning_rate": 6.7427385892116184e-06, "log_odds_chosen": 0.519116997718811, "log_odds_ratio": -0.5505663752555847, "logits/chosen": -1.0663193464279175, "logits/rejected": -1.0633552074432373, "logps/chosen": -0.1306827962398529, "logps/rejected": -0.23576460778713226, "loss": 3.9006, "nll_loss": 0.9200987219810486, "rewards/accuracies": 0.75, "rewards/chosen": -0.01306828111410141, "rewards/margins": 0.010508181527256966, "rewards/rejected": -0.023576460778713226, "step": 195 }, { "epoch": 0.1355463347164592, "grad_norm": 4.697093486785889, "learning_rate": 6.77731673582296e-06, "log_odds_chosen": 0.3077636957168579, "log_odds_ratio": -0.6371062397956848, "logits/chosen": -1.100118637084961, "logits/rejected": -1.0694329738616943, "logps/chosen": -0.21723446249961853, "logps/rejected": -0.3122878670692444, "loss": 6.3028, "nll_loss": 1.5120017528533936, "rewards/accuracies": 0.5, "rewards/chosen": -0.021723445504903793, "rewards/margins": 0.00950533989816904, "rewards/rejected": -0.03122878633439541, "step": 196 }, { "epoch": 0.13623789764868602, "grad_norm": 2.884382486343384, "learning_rate": 6.8118948824343016e-06, "log_odds_chosen": -0.0943203717470169, "log_odds_ratio": -0.7874737977981567, "logits/chosen": -0.5834493637084961, "logits/rejected": -0.5732121467590332, "logps/chosen": -0.2202082872390747, "logps/rejected": -0.23534724116325378, "loss": 4.5543, "nll_loss": 1.0598318576812744, "rewards/accuracies": 0.375, "rewards/chosen": -0.02202082984149456, "rewards/margins": 0.001513894647359848, "rewards/rejected": -0.023534726351499557, "step": 197 }, { "epoch": 0.13692946058091288, "grad_norm": 3.0852651596069336, "learning_rate": 6.846473029045644e-06, "log_odds_chosen": 1.2487186193466187, "log_odds_ratio": -0.5184494256973267, "logits/chosen": -0.833308756351471, "logits/rejected": -0.8734216690063477, "logps/chosen": -0.18357661366462708, "logps/rejected": -0.36280807852745056, "loss": 6.8616, "nll_loss": 1.6635560989379883, "rewards/accuracies": 0.625, "rewards/chosen": -0.018357660621404648, "rewards/margins": 0.017923148348927498, "rewards/rejected": -0.036280810832977295, "step": 198 }, { "epoch": 0.1376210235131397, "grad_norm": 3.3805103302001953, "learning_rate": 6.8810511756569855e-06, "log_odds_chosen": 0.44542139768600464, "log_odds_ratio": -0.5214876532554626, "logits/chosen": -0.8738541603088379, "logits/rejected": -0.8638968467712402, "logps/chosen": -0.19244788587093353, "logps/rejected": -0.28946179151535034, "loss": 6.6065, "nll_loss": 1.599472165107727, "rewards/accuracies": 0.875, "rewards/chosen": -0.019244790077209473, "rewards/margins": 0.009701389819383621, "rewards/rejected": -0.028946179896593094, "step": 199 }, { "epoch": 0.13831258644536654, "grad_norm": 4.198734760284424, "learning_rate": 6.915629322268327e-06, "log_odds_chosen": 0.34398406744003296, "log_odds_ratio": -0.6945401430130005, "logits/chosen": -0.725531280040741, "logits/rejected": -0.7579768896102905, "logps/chosen": -0.26485031843185425, "logps/rejected": -0.25032472610473633, "loss": 6.4356, "nll_loss": 1.5394471883773804, "rewards/accuracies": 0.625, "rewards/chosen": -0.026485033333301544, "rewards/margins": -0.0014525633305311203, "rewards/rejected": -0.025032471865415573, "step": 200 }, { "epoch": 0.13900414937759337, "grad_norm": 4.543229103088379, "learning_rate": 6.950207468879669e-06, "log_odds_chosen": 1.7519770860671997, "log_odds_ratio": -0.33452051877975464, "logits/chosen": -1.173638939857483, "logits/rejected": -1.2212574481964111, "logps/chosen": -0.14706477522850037, "logps/rejected": -0.7356031537055969, "loss": 7.496, "nll_loss": 1.8405449390411377, "rewards/accuracies": 0.75, "rewards/chosen": -0.014706477522850037, "rewards/margins": 0.058853842318058014, "rewards/rejected": -0.07356031984090805, "step": 201 }, { "epoch": 0.1396957123098202, "grad_norm": 2.9318344593048096, "learning_rate": 6.98478561549101e-06, "log_odds_chosen": 0.624595582485199, "log_odds_ratio": -0.4736407995223999, "logits/chosen": -0.9576144218444824, "logits/rejected": -0.9811232686042786, "logps/chosen": -0.22369879484176636, "logps/rejected": -0.393297016620636, "loss": 6.1301, "nll_loss": 1.485149621963501, "rewards/accuracies": 0.875, "rewards/chosen": -0.022369876503944397, "rewards/margins": 0.016959823668003082, "rewards/rejected": -0.03932970389723778, "step": 202 }, { "epoch": 0.14038727524204703, "grad_norm": 3.0044398307800293, "learning_rate": 7.019363762102352e-06, "log_odds_chosen": 0.9112159013748169, "log_odds_ratio": -0.5825238227844238, "logits/chosen": -1.040523886680603, "logits/rejected": -1.0338996648788452, "logps/chosen": -0.15042006969451904, "logps/rejected": -0.40081560611724854, "loss": 5.4207, "nll_loss": 1.2969247102737427, "rewards/accuracies": 0.625, "rewards/chosen": -0.015042006969451904, "rewards/margins": 0.02503955364227295, "rewards/rejected": -0.04008156433701515, "step": 203 }, { "epoch": 0.14107883817427386, "grad_norm": 4.994499206542969, "learning_rate": 7.053941908713693e-06, "log_odds_chosen": 0.053051501512527466, "log_odds_ratio": -0.77434903383255, "logits/chosen": -1.011791467666626, "logits/rejected": -1.0063687562942505, "logps/chosen": -0.29917120933532715, "logps/rejected": -0.3704149127006531, "loss": 6.1848, "nll_loss": 1.4687684774398804, "rewards/accuracies": 0.625, "rewards/chosen": -0.029917120933532715, "rewards/margins": 0.007124368101358414, "rewards/rejected": -0.03704149276018143, "step": 204 }, { "epoch": 0.1417704011065007, "grad_norm": 5.865853786468506, "learning_rate": 7.088520055325035e-06, "log_odds_chosen": 0.41838449239730835, "log_odds_ratio": -0.8382209539413452, "logits/chosen": -0.9785934686660767, "logits/rejected": -0.9925634264945984, "logps/chosen": -0.21129783987998962, "logps/rejected": -0.4044276475906372, "loss": 6.6055, "nll_loss": 1.5675466060638428, "rewards/accuracies": 0.5, "rewards/chosen": -0.021129783242940903, "rewards/margins": 0.019312981516122818, "rewards/rejected": -0.04044276475906372, "step": 205 }, { "epoch": 0.14246196403872752, "grad_norm": 3.181340217590332, "learning_rate": 7.1230982019363765e-06, "log_odds_chosen": 1.314388632774353, "log_odds_ratio": -0.3325830101966858, "logits/chosen": -0.8861981630325317, "logits/rejected": -0.8500977754592896, "logps/chosen": -0.12594662606716156, "logps/rejected": -0.34991195797920227, "loss": 6.2821, "nll_loss": 1.5372616052627563, "rewards/accuracies": 0.875, "rewards/chosen": -0.01259466353803873, "rewards/margins": 0.022396530956029892, "rewards/rejected": -0.034991197288036346, "step": 206 }, { "epoch": 0.14315352697095435, "grad_norm": 3.157027006149292, "learning_rate": 7.157676348547718e-06, "log_odds_chosen": 0.8879727125167847, "log_odds_ratio": -0.5650953054428101, "logits/chosen": -0.612299919128418, "logits/rejected": -0.6093465089797974, "logps/chosen": -0.2196851372718811, "logps/rejected": -0.3596701920032501, "loss": 6.3127, "nll_loss": 1.5216560363769531, "rewards/accuracies": 0.625, "rewards/chosen": -0.02196851372718811, "rewards/margins": 0.013998505659401417, "rewards/rejected": -0.03596701845526695, "step": 207 }, { "epoch": 0.14384508990318118, "grad_norm": 4.110046863555908, "learning_rate": 7.19225449515906e-06, "log_odds_chosen": -0.11123251914978027, "log_odds_ratio": -0.9420138001441956, "logits/chosen": -0.8208776116371155, "logits/rejected": -0.814195990562439, "logps/chosen": -0.2516728937625885, "logps/rejected": -0.26293325424194336, "loss": 5.744, "nll_loss": 1.3417918682098389, "rewards/accuracies": 0.5, "rewards/chosen": -0.02516729012131691, "rewards/margins": 0.0011260367464274168, "rewards/rejected": -0.026293326169252396, "step": 208 }, { "epoch": 0.14453665283540804, "grad_norm": 3.6285037994384766, "learning_rate": 7.226832641770402e-06, "log_odds_chosen": 1.427751064300537, "log_odds_ratio": -0.37405925989151, "logits/chosen": -1.077873706817627, "logits/rejected": -1.1133126020431519, "logps/chosen": -0.12508371472358704, "logps/rejected": -0.466911256313324, "loss": 7.4872, "nll_loss": 1.8343921899795532, "rewards/accuracies": 0.875, "rewards/chosen": -0.012508371844887733, "rewards/margins": 0.03418275713920593, "rewards/rejected": -0.04669112712144852, "step": 209 }, { "epoch": 0.14522821576763487, "grad_norm": 2.4061126708984375, "learning_rate": 7.2614107883817436e-06, "log_odds_chosen": 1.315793752670288, "log_odds_ratio": -0.36149081587791443, "logits/chosen": -0.8516741991043091, "logits/rejected": -0.8345063924789429, "logps/chosen": -0.12234769016504288, "logps/rejected": -0.31906723976135254, "loss": 4.86, "nll_loss": 1.1788439750671387, "rewards/accuracies": 0.75, "rewards/chosen": -0.012234769761562347, "rewards/margins": 0.019671954214572906, "rewards/rejected": -0.031906723976135254, "step": 210 }, { "epoch": 0.1459197786998617, "grad_norm": 5.4849395751953125, "learning_rate": 7.295988934993085e-06, "log_odds_chosen": 0.9772852659225464, "log_odds_ratio": -0.6391040682792664, "logits/chosen": -0.8843315243721008, "logits/rejected": -0.9143941402435303, "logps/chosen": -0.17293697595596313, "logps/rejected": -0.3915655314922333, "loss": 5.7228, "nll_loss": 1.3667978048324585, "rewards/accuracies": 0.625, "rewards/chosen": -0.017293699085712433, "rewards/margins": 0.021862853318452835, "rewards/rejected": -0.039156556129455566, "step": 211 }, { "epoch": 0.14661134163208853, "grad_norm": 3.6493422985076904, "learning_rate": 7.330567081604427e-06, "log_odds_chosen": -0.14418122172355652, "log_odds_ratio": -1.1010364294052124, "logits/chosen": -0.7637627720832825, "logits/rejected": -0.8069720268249512, "logps/chosen": -0.5388314723968506, "logps/rejected": -0.20467592775821686, "loss": 5.8703, "nll_loss": 1.3574600219726562, "rewards/accuracies": 0.375, "rewards/chosen": -0.053883146494627, "rewards/margins": -0.033415548503398895, "rewards/rejected": -0.020467594265937805, "step": 212 }, { "epoch": 0.14730290456431536, "grad_norm": 6.45885705947876, "learning_rate": 7.365145228215768e-06, "log_odds_chosen": 0.9364676475524902, "log_odds_ratio": -0.7672489285469055, "logits/chosen": -1.0151984691619873, "logits/rejected": -1.0229125022888184, "logps/chosen": -0.21881504356861115, "logps/rejected": -0.4902130365371704, "loss": 6.6348, "nll_loss": 1.581984043121338, "rewards/accuracies": 0.625, "rewards/chosen": -0.021881505846977234, "rewards/margins": 0.027139799669384956, "rewards/rejected": -0.04902130365371704, "step": 213 }, { "epoch": 0.1479944674965422, "grad_norm": 2.9415462017059326, "learning_rate": 7.39972337482711e-06, "log_odds_chosen": 0.662085771560669, "log_odds_ratio": -0.5639595985412598, "logits/chosen": -0.6300270557403564, "logits/rejected": -0.6536536812782288, "logps/chosen": -0.17133358120918274, "logps/rejected": -0.30744150280952454, "loss": 4.2607, "nll_loss": 1.0087754726409912, "rewards/accuracies": 0.625, "rewards/chosen": -0.017133358865976334, "rewards/margins": 0.013610792346298695, "rewards/rejected": -0.030744148418307304, "step": 214 }, { "epoch": 0.14868603042876902, "grad_norm": 3.18498158454895, "learning_rate": 7.434301521438451e-06, "log_odds_chosen": 0.3586312234401703, "log_odds_ratio": -0.8149222731590271, "logits/chosen": -0.7855114936828613, "logits/rejected": -0.7873052358627319, "logps/chosen": -0.26449501514434814, "logps/rejected": -0.2783412039279938, "loss": 5.692, "nll_loss": 1.3415006399154663, "rewards/accuracies": 0.375, "rewards/chosen": -0.026449501514434814, "rewards/margins": 0.0013846198562532663, "rewards/rejected": -0.027834121137857437, "step": 215 }, { "epoch": 0.14937759336099585, "grad_norm": 4.490957260131836, "learning_rate": 7.468879668049793e-06, "log_odds_chosen": 0.6908822059631348, "log_odds_ratio": -0.6412980556488037, "logits/chosen": -0.873936653137207, "logits/rejected": -0.905531644821167, "logps/chosen": -0.25872665643692017, "logps/rejected": -0.318185031414032, "loss": 5.5292, "nll_loss": 1.318181037902832, "rewards/accuracies": 0.625, "rewards/chosen": -0.025872664526104927, "rewards/margins": 0.005945838056504726, "rewards/rejected": -0.03181850537657738, "step": 216 }, { "epoch": 0.15006915629322268, "grad_norm": 4.169113636016846, "learning_rate": 7.5034578146611345e-06, "log_odds_chosen": 0.30677735805511475, "log_odds_ratio": -0.7200303077697754, "logits/chosen": -1.0342464447021484, "logits/rejected": -1.0058650970458984, "logps/chosen": -0.2100173383951187, "logps/rejected": -0.2714996933937073, "loss": 5.6223, "nll_loss": 1.3335614204406738, "rewards/accuracies": 0.625, "rewards/chosen": -0.02100173383951187, "rewards/margins": 0.006148234941065311, "rewards/rejected": -0.027149969711899757, "step": 217 }, { "epoch": 0.1507607192254495, "grad_norm": 2.975595474243164, "learning_rate": 7.538035961272476e-06, "log_odds_chosen": 1.100553035736084, "log_odds_ratio": -0.4032314419746399, "logits/chosen": -0.943057119846344, "logits/rejected": -0.9438403844833374, "logps/chosen": -0.19745934009552002, "logps/rejected": -0.48316293954849243, "loss": 4.7155, "nll_loss": 1.1385633945465088, "rewards/accuracies": 0.875, "rewards/chosen": -0.01974593475461006, "rewards/margins": 0.02857036143541336, "rewards/rejected": -0.04831629619002342, "step": 218 }, { "epoch": 0.15145228215767634, "grad_norm": 4.872454643249512, "learning_rate": 7.572614107883818e-06, "log_odds_chosen": -0.028303883969783783, "log_odds_ratio": -1.1446633338928223, "logits/chosen": -1.0451685190200806, "logits/rejected": -1.0431214570999146, "logps/chosen": -0.34600648283958435, "logps/rejected": -0.4508405923843384, "loss": 6.3832, "nll_loss": 1.4813305139541626, "rewards/accuracies": 0.375, "rewards/chosen": -0.034600649029016495, "rewards/margins": 0.010483408346772194, "rewards/rejected": -0.04508405551314354, "step": 219 }, { "epoch": 0.15214384508990317, "grad_norm": 4.090143203735352, "learning_rate": 7.607192254495158e-06, "log_odds_chosen": 0.5904234647750854, "log_odds_ratio": -0.6713616847991943, "logits/chosen": -0.8287036418914795, "logits/rejected": -0.8195431232452393, "logps/chosen": -0.4618722200393677, "logps/rejected": -0.5354244112968445, "loss": 6.1785, "nll_loss": 1.4774805307388306, "rewards/accuracies": 0.625, "rewards/chosen": -0.04618722200393677, "rewards/margins": 0.007355216890573502, "rewards/rejected": -0.05354243889451027, "step": 220 }, { "epoch": 0.15283540802213003, "grad_norm": 4.119716644287109, "learning_rate": 7.641770401106502e-06, "log_odds_chosen": 0.6317969560623169, "log_odds_ratio": -0.6077767610549927, "logits/chosen": -1.0859671831130981, "logits/rejected": -1.0858830213546753, "logps/chosen": -0.20056869089603424, "logps/rejected": -0.3793398141860962, "loss": 7.1166, "nll_loss": 1.7183798551559448, "rewards/accuracies": 0.75, "rewards/chosen": -0.020056869834661484, "rewards/margins": 0.017877109348773956, "rewards/rejected": -0.03793397918343544, "step": 221 }, { "epoch": 0.15352697095435686, "grad_norm": 3.8006575107574463, "learning_rate": 7.676348547717842e-06, "log_odds_chosen": 1.653045892715454, "log_odds_ratio": -0.4079207479953766, "logits/chosen": -0.7932633757591248, "logits/rejected": -0.7943170666694641, "logps/chosen": -0.15025781095027924, "logps/rejected": -0.329763799905777, "loss": 6.9906, "nll_loss": 1.706859827041626, "rewards/accuracies": 0.875, "rewards/chosen": -0.015025781467556953, "rewards/margins": 0.017950598150491714, "rewards/rejected": -0.03297637775540352, "step": 222 }, { "epoch": 0.1542185338865837, "grad_norm": 5.726372718811035, "learning_rate": 7.710926694329184e-06, "log_odds_chosen": 1.1458200216293335, "log_odds_ratio": -0.6311984062194824, "logits/chosen": -0.8566406965255737, "logits/rejected": -0.8902969360351562, "logps/chosen": -0.2128678560256958, "logps/rejected": -0.3930707275867462, "loss": 6.5217, "nll_loss": 1.5672950744628906, "rewards/accuracies": 0.5, "rewards/chosen": -0.02128678746521473, "rewards/margins": 0.018020285293459892, "rewards/rejected": -0.03930707648396492, "step": 223 }, { "epoch": 0.15491009681881052, "grad_norm": 3.6631853580474854, "learning_rate": 7.745504840940525e-06, "log_odds_chosen": 0.3010627329349518, "log_odds_ratio": -0.6827666759490967, "logits/chosen": -0.6696209907531738, "logits/rejected": -0.6726163029670715, "logps/chosen": -0.11562220007181168, "logps/rejected": -0.16979828476905823, "loss": 4.5242, "nll_loss": 1.0627738237380981, "rewards/accuracies": 0.5, "rewards/chosen": -0.011562219820916653, "rewards/margins": 0.00541760865598917, "rewards/rejected": -0.016979828476905823, "step": 224 }, { "epoch": 0.15560165975103735, "grad_norm": 2.3640875816345215, "learning_rate": 7.780082987551867e-06, "log_odds_chosen": 0.9647090435028076, "log_odds_ratio": -0.3855966627597809, "logits/chosen": -0.4787185788154602, "logits/rejected": -0.4876733422279358, "logps/chosen": -0.10393448173999786, "logps/rejected": -0.26539182662963867, "loss": 4.5303, "nll_loss": 1.0940086841583252, "rewards/accuracies": 0.875, "rewards/chosen": -0.010393448173999786, "rewards/margins": 0.0161457359790802, "rewards/rejected": -0.026539184153079987, "step": 225 }, { "epoch": 0.15629322268326418, "grad_norm": 3.5224838256835938, "learning_rate": 7.814661134163209e-06, "log_odds_chosen": 1.3051297664642334, "log_odds_ratio": -0.44781693816185, "logits/chosen": -0.813154399394989, "logits/rejected": -0.8059035539627075, "logps/chosen": -0.1292162537574768, "logps/rejected": -0.32095658779144287, "loss": 6.3816, "nll_loss": 1.5506187677383423, "rewards/accuracies": 0.875, "rewards/chosen": -0.012921624816954136, "rewards/margins": 0.019174031913280487, "rewards/rejected": -0.03209565579891205, "step": 226 }, { "epoch": 0.156984785615491, "grad_norm": 3.95707106590271, "learning_rate": 7.84923928077455e-06, "log_odds_chosen": 0.27357932925224304, "log_odds_ratio": -0.9965370297431946, "logits/chosen": -0.703487753868103, "logits/rejected": -0.6508299112319946, "logps/chosen": -0.3244343400001526, "logps/rejected": -0.35959532856941223, "loss": 5.0346, "nll_loss": 1.1590083837509155, "rewards/accuracies": 0.5, "rewards/chosen": -0.03244343400001526, "rewards/margins": 0.0035160984843969345, "rewards/rejected": -0.03595953434705734, "step": 227 }, { "epoch": 0.15767634854771784, "grad_norm": 3.188214063644409, "learning_rate": 7.883817427385892e-06, "log_odds_chosen": 0.1925515979528427, "log_odds_ratio": -0.6929178237915039, "logits/chosen": -0.9470303058624268, "logits/rejected": -0.9407349228858948, "logps/chosen": -0.226731076836586, "logps/rejected": -0.31980404257774353, "loss": 4.4676, "nll_loss": 1.0476175546646118, "rewards/accuracies": 0.375, "rewards/chosen": -0.0226731076836586, "rewards/margins": 0.009307296946644783, "rewards/rejected": -0.03198040649294853, "step": 228 }, { "epoch": 0.15836791147994467, "grad_norm": 5.166046142578125, "learning_rate": 7.918395573997233e-06, "log_odds_chosen": -0.8341346979141235, "log_odds_ratio": -1.5097215175628662, "logits/chosen": -0.9731870889663696, "logits/rejected": -0.960036039352417, "logps/chosen": -0.4464319944381714, "logps/rejected": -0.2061399519443512, "loss": 6.0878, "nll_loss": 1.370969533920288, "rewards/accuracies": 0.5, "rewards/chosen": -0.04464320093393326, "rewards/margins": -0.024029206484556198, "rewards/rejected": -0.02061399631202221, "step": 229 }, { "epoch": 0.1590594744121715, "grad_norm": 3.6036903858184814, "learning_rate": 7.952973720608575e-06, "log_odds_chosen": 0.7378802299499512, "log_odds_ratio": -0.5369628667831421, "logits/chosen": -0.6612091064453125, "logits/rejected": -0.6341161727905273, "logps/chosen": -0.1552380621433258, "logps/rejected": -0.2605130076408386, "loss": 5.5298, "nll_loss": 1.328743815422058, "rewards/accuracies": 0.75, "rewards/chosen": -0.015523807145655155, "rewards/margins": 0.010527495294809341, "rewards/rejected": -0.02605130337178707, "step": 230 }, { "epoch": 0.15975103734439833, "grad_norm": 2.9829447269439697, "learning_rate": 7.987551867219916e-06, "log_odds_chosen": 0.7720410227775574, "log_odds_ratio": -0.6032599210739136, "logits/chosen": -0.9018489122390747, "logits/rejected": -0.8468755483627319, "logps/chosen": -0.24757295846939087, "logps/rejected": -0.333136647939682, "loss": 5.1035, "nll_loss": 1.215556263923645, "rewards/accuracies": 0.5, "rewards/chosen": -0.024757295846939087, "rewards/margins": 0.008556367829442024, "rewards/rejected": -0.03331366181373596, "step": 231 }, { "epoch": 0.16044260027662519, "grad_norm": 4.110328197479248, "learning_rate": 8.02213001383126e-06, "log_odds_chosen": -0.3504614531993866, "log_odds_ratio": -1.1943254470825195, "logits/chosen": -0.8923564553260803, "logits/rejected": -0.8823562860488892, "logps/chosen": -0.5723345875740051, "logps/rejected": -0.25823917984962463, "loss": 6.3108, "nll_loss": 1.4582581520080566, "rewards/accuracies": 0.5, "rewards/chosen": -0.05723346397280693, "rewards/margins": -0.03140954673290253, "rewards/rejected": -0.025823919102549553, "step": 232 }, { "epoch": 0.16113416320885202, "grad_norm": 3.6569745540618896, "learning_rate": 8.056708160442601e-06, "log_odds_chosen": 0.28908300399780273, "log_odds_ratio": -0.6737136840820312, "logits/chosen": -0.9151521921157837, "logits/rejected": -0.9005734920501709, "logps/chosen": -0.37384504079818726, "logps/rejected": -0.3533002734184265, "loss": 6.6159, "nll_loss": 1.5866096019744873, "rewards/accuracies": 0.5, "rewards/chosen": -0.037384502589702606, "rewards/margins": -0.0020544761791825294, "rewards/rejected": -0.03533002734184265, "step": 233 }, { "epoch": 0.16182572614107885, "grad_norm": 3.0560779571533203, "learning_rate": 8.091286307053943e-06, "log_odds_chosen": 0.14291715621948242, "log_odds_ratio": -1.0155534744262695, "logits/chosen": -0.9692038297653198, "logits/rejected": -0.9579259157180786, "logps/chosen": -0.44459283351898193, "logps/rejected": -0.2261468470096588, "loss": 6.0231, "nll_loss": 1.4042236804962158, "rewards/accuracies": 0.625, "rewards/chosen": -0.04445928335189819, "rewards/margins": -0.02184459939599037, "rewards/rejected": -0.02261468581855297, "step": 234 }, { "epoch": 0.16251728907330568, "grad_norm": 3.007359266281128, "learning_rate": 8.125864453665284e-06, "log_odds_chosen": 0.24351224303245544, "log_odds_ratio": -0.6321843266487122, "logits/chosen": -0.9844968318939209, "logits/rejected": -0.9822818040847778, "logps/chosen": -0.18725910782814026, "logps/rejected": -0.19656959176063538, "loss": 5.6756, "nll_loss": 1.3556902408599854, "rewards/accuracies": 0.625, "rewards/chosen": -0.018725909292697906, "rewards/margins": 0.0009310483001172543, "rewards/rejected": -0.019656959921121597, "step": 235 }, { "epoch": 0.1632088520055325, "grad_norm": 3.4686291217803955, "learning_rate": 8.160442600276626e-06, "log_odds_chosen": 0.16042087972164154, "log_odds_ratio": -0.8572741150856018, "logits/chosen": -0.7890150547027588, "logits/rejected": -0.7376941442489624, "logps/chosen": -0.23788484930992126, "logps/rejected": -0.29508593678474426, "loss": 5.0317, "nll_loss": 1.1721961498260498, "rewards/accuracies": 0.5, "rewards/chosen": -0.023788485676050186, "rewards/margins": 0.005720109213143587, "rewards/rejected": -0.029508594423532486, "step": 236 }, { "epoch": 0.16390041493775934, "grad_norm": 2.9869203567504883, "learning_rate": 8.195020746887967e-06, "log_odds_chosen": 0.40226978063583374, "log_odds_ratio": -0.7669799327850342, "logits/chosen": -0.6808412671089172, "logits/rejected": -0.635565459728241, "logps/chosen": -0.21583965420722961, "logps/rejected": -0.37887197732925415, "loss": 6.1774, "nll_loss": 1.4676564931869507, "rewards/accuracies": 0.5, "rewards/chosen": -0.021583963185548782, "rewards/margins": 0.016303233802318573, "rewards/rejected": -0.037887200713157654, "step": 237 }, { "epoch": 0.16459197786998617, "grad_norm": 3.5267534255981445, "learning_rate": 8.229598893499309e-06, "log_odds_chosen": 0.41741740703582764, "log_odds_ratio": -0.6975635290145874, "logits/chosen": -0.8619732856750488, "logits/rejected": -0.8443213701248169, "logps/chosen": -0.24961000680923462, "logps/rejected": -0.3617687225341797, "loss": 6.1947, "nll_loss": 1.4789154529571533, "rewards/accuracies": 0.625, "rewards/chosen": -0.02496100217103958, "rewards/margins": 0.011215871199965477, "rewards/rejected": -0.03617687523365021, "step": 238 }, { "epoch": 0.165283540802213, "grad_norm": 3.9766554832458496, "learning_rate": 8.26417704011065e-06, "log_odds_chosen": 1.0255920886993408, "log_odds_ratio": -0.6245083808898926, "logits/chosen": -0.8572830557823181, "logits/rejected": -0.8940591216087341, "logps/chosen": -0.15826298296451569, "logps/rejected": -0.5650017261505127, "loss": 5.4706, "nll_loss": 1.3052107095718384, "rewards/accuracies": 0.625, "rewards/chosen": -0.015826299786567688, "rewards/margins": 0.0406738743185997, "rewards/rejected": -0.05650017410516739, "step": 239 }, { "epoch": 0.16597510373443983, "grad_norm": 3.8060593605041504, "learning_rate": 8.298755186721992e-06, "log_odds_chosen": 1.074886441230774, "log_odds_ratio": -0.3917398750782013, "logits/chosen": -0.9265488982200623, "logits/rejected": -0.9145898222923279, "logps/chosen": -0.16986651718616486, "logps/rejected": -0.3676261305809021, "loss": 6.0459, "nll_loss": 1.4722979068756104, "rewards/accuracies": 0.75, "rewards/chosen": -0.016986653208732605, "rewards/margins": 0.019775960594415665, "rewards/rejected": -0.03676261380314827, "step": 240 }, { "epoch": 0.16666666666666666, "grad_norm": 3.665224552154541, "learning_rate": 8.333333333333334e-06, "log_odds_chosen": 1.9552595615386963, "log_odds_ratio": -0.2515340745449066, "logits/chosen": -0.7696788907051086, "logits/rejected": -0.7876995801925659, "logps/chosen": -0.11934017390012741, "logps/rejected": -0.5310408473014832, "loss": 6.5986, "nll_loss": 1.6245028972625732, "rewards/accuracies": 1.0, "rewards/chosen": -0.011934017762541771, "rewards/margins": 0.041170068085193634, "rewards/rejected": -0.053104087710380554, "step": 241 }, { "epoch": 0.1673582295988935, "grad_norm": 4.817811012268066, "learning_rate": 8.367911479944675e-06, "log_odds_chosen": -0.005749780684709549, "log_odds_ratio": -0.7882553339004517, "logits/chosen": -0.8050568103790283, "logits/rejected": -0.8051145076751709, "logps/chosen": -0.21885880827903748, "logps/rejected": -0.1906094253063202, "loss": 5.7043, "nll_loss": 1.3472473621368408, "rewards/accuracies": 0.375, "rewards/chosen": -0.021885879337787628, "rewards/margins": -0.002824939088895917, "rewards/rejected": -0.01906094141304493, "step": 242 }, { "epoch": 0.16804979253112035, "grad_norm": 4.204524040222168, "learning_rate": 8.402489626556017e-06, "log_odds_chosen": 1.3039932250976562, "log_odds_ratio": -0.36999383568763733, "logits/chosen": -1.1626389026641846, "logits/rejected": -1.1959856748580933, "logps/chosen": -0.14168334007263184, "logps/rejected": -0.3843676745891571, "loss": 6.8428, "nll_loss": 1.673712134361267, "rewards/accuracies": 0.75, "rewards/chosen": -0.014168335124850273, "rewards/margins": 0.024268433451652527, "rewards/rejected": -0.03843677043914795, "step": 243 }, { "epoch": 0.16874135546334718, "grad_norm": 3.659269094467163, "learning_rate": 8.437067773167358e-06, "log_odds_chosen": 1.0554488897323608, "log_odds_ratio": -0.5076719522476196, "logits/chosen": -0.7935200929641724, "logits/rejected": -0.8115878105163574, "logps/chosen": -0.1356905847787857, "logps/rejected": -0.25169292092323303, "loss": 5.9024, "nll_loss": 1.4248311519622803, "rewards/accuracies": 0.75, "rewards/chosen": -0.013569057919085026, "rewards/margins": 0.011600232683122158, "rewards/rejected": -0.025169292464852333, "step": 244 }, { "epoch": 0.169432918395574, "grad_norm": 3.3517634868621826, "learning_rate": 8.4716459197787e-06, "log_odds_chosen": 0.6800482273101807, "log_odds_ratio": -0.6337831020355225, "logits/chosen": -0.9669154286384583, "logits/rejected": -1.0145224332809448, "logps/chosen": -0.19900593161582947, "logps/rejected": -0.28002551198005676, "loss": 6.2381, "nll_loss": 1.4961349964141846, "rewards/accuracies": 0.75, "rewards/chosen": -0.019900593906641006, "rewards/margins": 0.00810195691883564, "rewards/rejected": -0.028002548962831497, "step": 245 }, { "epoch": 0.17012448132780084, "grad_norm": 3.6895792484283447, "learning_rate": 8.506224066390042e-06, "log_odds_chosen": 0.8594756126403809, "log_odds_ratio": -0.5369733572006226, "logits/chosen": -1.1025549173355103, "logits/rejected": -1.1240384578704834, "logps/chosen": -0.14387311041355133, "logps/rejected": -0.3072226941585541, "loss": 5.7825, "nll_loss": 1.391920804977417, "rewards/accuracies": 0.75, "rewards/chosen": -0.014387311413884163, "rewards/margins": 0.016334956511855125, "rewards/rejected": -0.03072226792573929, "step": 246 }, { "epoch": 0.17081604426002767, "grad_norm": 3.3241751194000244, "learning_rate": 8.540802213001383e-06, "log_odds_chosen": 0.6280308961868286, "log_odds_ratio": -0.4941891133785248, "logits/chosen": -0.7816058397293091, "logits/rejected": -0.7533829212188721, "logps/chosen": -0.2505534887313843, "logps/rejected": -0.37605032324790955, "loss": 5.6624, "nll_loss": 1.366180419921875, "rewards/accuracies": 0.75, "rewards/chosen": -0.025055352598428726, "rewards/margins": 0.012549685314297676, "rewards/rejected": -0.03760503977537155, "step": 247 }, { "epoch": 0.1715076071922545, "grad_norm": 4.798497676849365, "learning_rate": 8.575380359612725e-06, "log_odds_chosen": -0.01965467631816864, "log_odds_ratio": -0.8393452167510986, "logits/chosen": -0.8515428304672241, "logits/rejected": -0.8434562683105469, "logps/chosen": -0.23914137482643127, "logps/rejected": -0.2302568256855011, "loss": 5.8272, "nll_loss": 1.3728694915771484, "rewards/accuracies": 0.5, "rewards/chosen": -0.023914135992527008, "rewards/margins": -0.0008884554263204336, "rewards/rejected": -0.02302568219602108, "step": 248 }, { "epoch": 0.17219917012448133, "grad_norm": 3.0810976028442383, "learning_rate": 8.609958506224066e-06, "log_odds_chosen": 0.8071850538253784, "log_odds_ratio": -0.43749377131462097, "logits/chosen": -0.8725613951683044, "logits/rejected": -0.8765714168548584, "logps/chosen": -0.15248632431030273, "logps/rejected": -0.2783476710319519, "loss": 5.7413, "nll_loss": 1.391564130783081, "rewards/accuracies": 0.75, "rewards/chosen": -0.015248632058501244, "rewards/margins": 0.012586135417222977, "rewards/rejected": -0.02783476747572422, "step": 249 }, { "epoch": 0.17289073305670816, "grad_norm": 5.421039581298828, "learning_rate": 8.644536652835408e-06, "log_odds_chosen": -0.3103007674217224, "log_odds_ratio": -0.9351741671562195, "logits/chosen": -0.8125675916671753, "logits/rejected": -0.7800061702728271, "logps/chosen": -0.19023606181144714, "logps/rejected": -0.17538899183273315, "loss": 5.8487, "nll_loss": 1.3686484098434448, "rewards/accuracies": 0.5, "rewards/chosen": -0.019023608416318893, "rewards/margins": -0.0014847079291939735, "rewards/rejected": -0.017538899555802345, "step": 250 }, { "epoch": 0.173582295988935, "grad_norm": 4.459559440612793, "learning_rate": 8.67911479944675e-06, "log_odds_chosen": 1.124354600906372, "log_odds_ratio": -0.5342854261398315, "logits/chosen": -1.0505995750427246, "logits/rejected": -1.0487140417099, "logps/chosen": -0.13768863677978516, "logps/rejected": -0.33856356143951416, "loss": 6.1951, "nll_loss": 1.4953362941741943, "rewards/accuracies": 0.625, "rewards/chosen": -0.013768864795565605, "rewards/margins": 0.02008748985826969, "rewards/rejected": -0.0338563546538353, "step": 251 }, { "epoch": 0.17427385892116182, "grad_norm": 4.052998065948486, "learning_rate": 8.713692946058091e-06, "log_odds_chosen": 0.19669348001480103, "log_odds_ratio": -0.8254600763320923, "logits/chosen": -0.9436452388763428, "logits/rejected": -0.9478350877761841, "logps/chosen": -0.24237194657325745, "logps/rejected": -0.316573828458786, "loss": 5.9555, "nll_loss": 1.4063365459442139, "rewards/accuracies": 0.5, "rewards/chosen": -0.024237193167209625, "rewards/margins": 0.0074201906099915504, "rewards/rejected": -0.0316573828458786, "step": 252 }, { "epoch": 0.17496542185338865, "grad_norm": 3.448655605316162, "learning_rate": 8.748271092669432e-06, "log_odds_chosen": 0.10002302378416061, "log_odds_ratio": -0.7270524501800537, "logits/chosen": -0.8413020968437195, "logits/rejected": -0.8493680953979492, "logps/chosen": -0.30035310983657837, "logps/rejected": -0.2797276973724365, "loss": 6.0363, "nll_loss": 1.4363651275634766, "rewards/accuracies": 0.5, "rewards/chosen": -0.030035313218832016, "rewards/margins": -0.0020625414326786995, "rewards/rejected": -0.027972770854830742, "step": 253 }, { "epoch": 0.17565698478561548, "grad_norm": 3.6428167819976807, "learning_rate": 8.782849239280774e-06, "log_odds_chosen": 0.5170423984527588, "log_odds_ratio": -0.5325826406478882, "logits/chosen": -0.8044297099113464, "logits/rejected": -0.8021126389503479, "logps/chosen": -0.20451143383979797, "logps/rejected": -0.34033912420272827, "loss": 6.0516, "nll_loss": 1.4596498012542725, "rewards/accuracies": 0.75, "rewards/chosen": -0.020451147109270096, "rewards/margins": 0.01358276791870594, "rewards/rejected": -0.03403391316533089, "step": 254 }, { "epoch": 0.17634854771784234, "grad_norm": 3.2239508628845215, "learning_rate": 8.817427385892117e-06, "log_odds_chosen": 1.6908828020095825, "log_odds_ratio": -0.3723670542240143, "logits/chosen": -0.9017342329025269, "logits/rejected": -0.94149249792099, "logps/chosen": -0.17674483358860016, "logps/rejected": -0.42865657806396484, "loss": 5.8309, "nll_loss": 1.4204809665679932, "rewards/accuracies": 0.75, "rewards/chosen": -0.017674485221505165, "rewards/margins": 0.0251911748200655, "rewards/rejected": -0.04286566376686096, "step": 255 }, { "epoch": 0.17704011065006917, "grad_norm": 3.5115952491760254, "learning_rate": 8.852005532503459e-06, "log_odds_chosen": 1.3778259754180908, "log_odds_ratio": -0.43739527463912964, "logits/chosen": -0.9482162594795227, "logits/rejected": -0.9856005907058716, "logps/chosen": -0.2021976113319397, "logps/rejected": -0.8205811381340027, "loss": 5.2779, "nll_loss": 1.2757349014282227, "rewards/accuracies": 0.875, "rewards/chosen": -0.02021976187825203, "rewards/margins": 0.06183835491538048, "rewards/rejected": -0.08205811679363251, "step": 256 }, { "epoch": 0.177731673582296, "grad_norm": 3.428536891937256, "learning_rate": 8.8865836791148e-06, "log_odds_chosen": 1.5049645900726318, "log_odds_ratio": -0.4445021450519562, "logits/chosen": -0.8670735359191895, "logits/rejected": -0.9449098110198975, "logps/chosen": -0.2028433382511139, "logps/rejected": -0.5087571740150452, "loss": 4.5936, "nll_loss": 1.1039620637893677, "rewards/accuracies": 0.75, "rewards/chosen": -0.02028433419764042, "rewards/margins": 0.03059137985110283, "rewards/rejected": -0.0508757159113884, "step": 257 }, { "epoch": 0.17842323651452283, "grad_norm": 3.801056385040283, "learning_rate": 8.921161825726142e-06, "log_odds_chosen": 1.4026226997375488, "log_odds_ratio": -0.5079362988471985, "logits/chosen": -0.9943914413452148, "logits/rejected": -0.9608233571052551, "logps/chosen": -0.18597161769866943, "logps/rejected": -0.4813200831413269, "loss": 4.6482, "nll_loss": 1.1112600564956665, "rewards/accuracies": 0.5, "rewards/chosen": -0.018597161397337914, "rewards/margins": 0.029534848406910896, "rewards/rejected": -0.04813200980424881, "step": 258 }, { "epoch": 0.17911479944674966, "grad_norm": 3.898007869720459, "learning_rate": 8.955739972337484e-06, "log_odds_chosen": 0.8249939680099487, "log_odds_ratio": -0.6307661533355713, "logits/chosen": -1.0948408842086792, "logits/rejected": -1.1247516870498657, "logps/chosen": -0.21543753147125244, "logps/rejected": -0.3504295349121094, "loss": 6.9519, "nll_loss": 1.6748905181884766, "rewards/accuracies": 0.625, "rewards/chosen": -0.021543754264712334, "rewards/margins": 0.013499200344085693, "rewards/rejected": -0.03504295274615288, "step": 259 }, { "epoch": 0.1798063623789765, "grad_norm": 4.461514949798584, "learning_rate": 8.990318118948825e-06, "log_odds_chosen": -0.24836111068725586, "log_odds_ratio": -1.4388465881347656, "logits/chosen": -0.8681745529174805, "logits/rejected": -0.8156797885894775, "logps/chosen": -0.6750339865684509, "logps/rejected": -0.2794366180896759, "loss": 5.7843, "nll_loss": 1.30219566822052, "rewards/accuracies": 0.625, "rewards/chosen": -0.06750340014696121, "rewards/margins": -0.0395597368478775, "rewards/rejected": -0.02794366329908371, "step": 260 }, { "epoch": 0.18049792531120332, "grad_norm": 3.908860683441162, "learning_rate": 9.024896265560167e-06, "log_odds_chosen": 0.9576330184936523, "log_odds_ratio": -0.5267143845558167, "logits/chosen": -1.064504861831665, "logits/rejected": -1.1053556203842163, "logps/chosen": -0.15372568368911743, "logps/rejected": -0.30671191215515137, "loss": 6.5403, "nll_loss": 1.5824048519134521, "rewards/accuracies": 0.625, "rewards/chosen": -0.015372568741440773, "rewards/margins": 0.015298626385629177, "rewards/rejected": -0.030671194195747375, "step": 261 }, { "epoch": 0.18118948824343015, "grad_norm": 3.937495231628418, "learning_rate": 9.059474412171508e-06, "log_odds_chosen": 0.8034919500350952, "log_odds_ratio": -0.5796293616294861, "logits/chosen": -0.7137233018875122, "logits/rejected": -0.7320935130119324, "logps/chosen": -0.17258089780807495, "logps/rejected": -0.30214375257492065, "loss": 5.7763, "nll_loss": 1.3861002922058105, "rewards/accuracies": 0.75, "rewards/chosen": -0.017258090898394585, "rewards/margins": 0.012956284917891026, "rewards/rejected": -0.030214374884963036, "step": 262 }, { "epoch": 0.18188105117565698, "grad_norm": 4.973979949951172, "learning_rate": 9.09405255878285e-06, "log_odds_chosen": -0.3208833336830139, "log_odds_ratio": -1.0229796171188354, "logits/chosen": -0.9595593214035034, "logits/rejected": -0.9800918102264404, "logps/chosen": -0.46374863386154175, "logps/rejected": -0.2736175060272217, "loss": 7.3563, "nll_loss": 1.7367701530456543, "rewards/accuracies": 0.625, "rewards/chosen": -0.046374864876270294, "rewards/margins": -0.019013112410902977, "rewards/rejected": -0.02736174874007702, "step": 263 }, { "epoch": 0.1825726141078838, "grad_norm": 4.1658830642700195, "learning_rate": 9.128630705394191e-06, "log_odds_chosen": 0.6217221021652222, "log_odds_ratio": -0.6416304111480713, "logits/chosen": -0.7790597677230835, "logits/rejected": -0.7822107672691345, "logps/chosen": -0.17077568173408508, "logps/rejected": -0.2453240156173706, "loss": 6.015, "nll_loss": 1.4395976066589355, "rewards/accuracies": 0.5, "rewards/chosen": -0.017077568918466568, "rewards/margins": 0.007454832550138235, "rewards/rejected": -0.02453240193426609, "step": 264 }, { "epoch": 0.18326417704011064, "grad_norm": 3.1925108432769775, "learning_rate": 9.163208852005533e-06, "log_odds_chosen": 2.2674779891967773, "log_odds_ratio": -0.3325195908546448, "logits/chosen": -0.9382967948913574, "logits/rejected": -0.9911866188049316, "logps/chosen": -0.05271158739924431, "logps/rejected": -0.5133439898490906, "loss": 5.6839, "nll_loss": 1.3877242803573608, "rewards/accuracies": 0.875, "rewards/chosen": -0.005271159112453461, "rewards/margins": 0.04606323689222336, "rewards/rejected": -0.05133439600467682, "step": 265 }, { "epoch": 0.1839557399723375, "grad_norm": 4.016493320465088, "learning_rate": 9.197786998616875e-06, "log_odds_chosen": 0.9899934530258179, "log_odds_ratio": -0.4765719175338745, "logits/chosen": -1.0239133834838867, "logits/rejected": -0.9974214434623718, "logps/chosen": -0.14653241634368896, "logps/rejected": -0.35717684030532837, "loss": 6.1179, "nll_loss": 1.4818254709243774, "rewards/accuracies": 0.75, "rewards/chosen": -0.014653241261839867, "rewards/margins": 0.02106444165110588, "rewards/rejected": -0.0357176810503006, "step": 266 }, { "epoch": 0.18464730290456433, "grad_norm": 3.0796597003936768, "learning_rate": 9.232365145228216e-06, "log_odds_chosen": 0.2046814262866974, "log_odds_ratio": -0.6776267290115356, "logits/chosen": -0.9972403049468994, "logits/rejected": -0.9939769506454468, "logps/chosen": -0.2929823100566864, "logps/rejected": -0.3252931833267212, "loss": 6.4836, "nll_loss": 1.5531431436538696, "rewards/accuracies": 0.625, "rewards/chosen": -0.02929823100566864, "rewards/margins": 0.0032310900278389454, "rewards/rejected": -0.0325293205678463, "step": 267 }, { "epoch": 0.18533886583679116, "grad_norm": 4.9689459800720215, "learning_rate": 9.266943291839558e-06, "log_odds_chosen": 0.17572666704654694, "log_odds_ratio": -0.7039728164672852, "logits/chosen": -0.999485969543457, "logits/rejected": -0.9911883473396301, "logps/chosen": -0.22734355926513672, "logps/rejected": -0.28024086356163025, "loss": 6.8594, "nll_loss": 1.6444549560546875, "rewards/accuracies": 0.5, "rewards/chosen": -0.02273435704410076, "rewards/margins": 0.005289729684591293, "rewards/rejected": -0.028024086728692055, "step": 268 }, { "epoch": 0.18603042876901799, "grad_norm": 3.828569173812866, "learning_rate": 9.3015214384509e-06, "log_odds_chosen": 1.1488004922866821, "log_odds_ratio": -0.41310569643974304, "logits/chosen": -0.7810618877410889, "logits/rejected": -0.7955660820007324, "logps/chosen": -0.1333785206079483, "logps/rejected": -0.2772391140460968, "loss": 5.4026, "nll_loss": 1.3093348741531372, "rewards/accuracies": 0.75, "rewards/chosen": -0.01333785243332386, "rewards/margins": 0.01438605785369873, "rewards/rejected": -0.02772390842437744, "step": 269 }, { "epoch": 0.18672199170124482, "grad_norm": 3.72995662689209, "learning_rate": 9.33609958506224e-06, "log_odds_chosen": 0.4392034411430359, "log_odds_ratio": -0.5941404104232788, "logits/chosen": -0.9812883138656616, "logits/rejected": -0.988927960395813, "logps/chosen": -0.16869939863681793, "logps/rejected": -0.21304473280906677, "loss": 5.6749, "nll_loss": 1.359307885169983, "rewards/accuracies": 0.625, "rewards/chosen": -0.016869939863681793, "rewards/margins": 0.004434533417224884, "rewards/rejected": -0.021304473280906677, "step": 270 }, { "epoch": 0.18741355463347165, "grad_norm": 2.949136734008789, "learning_rate": 9.370677731673582e-06, "log_odds_chosen": 2.5781455039978027, "log_odds_ratio": -0.2511579096317291, "logits/chosen": -0.6993355751037598, "logits/rejected": -0.7362976670265198, "logps/chosen": -0.08703863620758057, "logps/rejected": -0.5672510862350464, "loss": 5.5008, "nll_loss": 1.350090503692627, "rewards/accuracies": 0.875, "rewards/chosen": -0.008703864179551601, "rewards/margins": 0.04802124947309494, "rewards/rejected": -0.056725114583969116, "step": 271 }, { "epoch": 0.18810511756569848, "grad_norm": 4.6766133308410645, "learning_rate": 9.405255878284924e-06, "log_odds_chosen": 0.4586489200592041, "log_odds_ratio": -0.5706301927566528, "logits/chosen": -0.8487648963928223, "logits/rejected": -0.8466265201568604, "logps/chosen": -0.20309209823608398, "logps/rejected": -0.3035036325454712, "loss": 5.498, "nll_loss": 1.317443609237671, "rewards/accuracies": 0.875, "rewards/chosen": -0.0203092098236084, "rewards/margins": 0.010041153989732265, "rewards/rejected": -0.03035036474466324, "step": 272 }, { "epoch": 0.1887966804979253, "grad_norm": 4.04107666015625, "learning_rate": 9.439834024896265e-06, "log_odds_chosen": 0.7465725541114807, "log_odds_ratio": -0.6487242579460144, "logits/chosen": -1.0556849241256714, "logits/rejected": -1.0306531190872192, "logps/chosen": -0.18017810583114624, "logps/rejected": -0.3081243336200714, "loss": 5.5121, "nll_loss": 1.313159465789795, "rewards/accuracies": 0.625, "rewards/chosen": -0.018017811700701714, "rewards/margins": 0.012794620357453823, "rewards/rejected": -0.030812431126832962, "step": 273 }, { "epoch": 0.18948824343015214, "grad_norm": 2.471160888671875, "learning_rate": 9.474412171507607e-06, "log_odds_chosen": 0.851750373840332, "log_odds_ratio": -0.39355215430259705, "logits/chosen": -0.7050144076347351, "logits/rejected": -0.754703164100647, "logps/chosen": -0.11717454344034195, "logps/rejected": -0.22633624076843262, "loss": 5.4761, "nll_loss": 1.3296756744384766, "rewards/accuracies": 0.875, "rewards/chosen": -0.01171745453029871, "rewards/margins": 0.010916169732809067, "rewards/rejected": -0.02263362519443035, "step": 274 }, { "epoch": 0.19017980636237897, "grad_norm": 3.6157355308532715, "learning_rate": 9.508990318118949e-06, "log_odds_chosen": 0.6131385564804077, "log_odds_ratio": -0.5318353176116943, "logits/chosen": -0.8138557076454163, "logits/rejected": -0.817249059677124, "logps/chosen": -0.14839550852775574, "logps/rejected": -0.18212762475013733, "loss": 6.5553, "nll_loss": 1.5856428146362305, "rewards/accuracies": 0.875, "rewards/chosen": -0.014839550480246544, "rewards/margins": 0.003373212879523635, "rewards/rejected": -0.018212763592600822, "step": 275 }, { "epoch": 0.1908713692946058, "grad_norm": 2.5300960540771484, "learning_rate": 9.54356846473029e-06, "log_odds_chosen": 0.7017526626586914, "log_odds_ratio": -0.44837111234664917, "logits/chosen": -0.9915153980255127, "logits/rejected": -1.0183674097061157, "logps/chosen": -0.17959809303283691, "logps/rejected": -0.3399566113948822, "loss": 4.4278, "nll_loss": 1.0621116161346436, "rewards/accuracies": 0.875, "rewards/chosen": -0.01795980893075466, "rewards/margins": 0.01603585109114647, "rewards/rejected": -0.03399566188454628, "step": 276 }, { "epoch": 0.19156293222683266, "grad_norm": 3.8363876342773438, "learning_rate": 9.578146611341633e-06, "log_odds_chosen": 1.7807199954986572, "log_odds_ratio": -0.407881498336792, "logits/chosen": -0.7593604326248169, "logits/rejected": -0.776255190372467, "logps/chosen": -0.101393923163414, "logps/rejected": -0.45290789008140564, "loss": 5.9338, "nll_loss": 1.442673683166504, "rewards/accuracies": 0.75, "rewards/chosen": -0.01013939268887043, "rewards/margins": 0.0351513996720314, "rewards/rejected": -0.04529079049825668, "step": 277 }, { "epoch": 0.19225449515905949, "grad_norm": 3.2401390075683594, "learning_rate": 9.612724757952975e-06, "log_odds_chosen": 1.1467278003692627, "log_odds_ratio": -0.39693349599838257, "logits/chosen": -0.9011764526367188, "logits/rejected": -0.9105724096298218, "logps/chosen": -0.18877184391021729, "logps/rejected": -0.4855310916900635, "loss": 6.1813, "nll_loss": 1.5056235790252686, "rewards/accuracies": 0.875, "rewards/chosen": -0.0188771840184927, "rewards/margins": 0.029675928875803947, "rewards/rejected": -0.048553116619586945, "step": 278 }, { "epoch": 0.19294605809128632, "grad_norm": 2.911855459213257, "learning_rate": 9.647302904564317e-06, "log_odds_chosen": 1.363403081893921, "log_odds_ratio": -0.46704989671707153, "logits/chosen": -0.6899612545967102, "logits/rejected": -0.7372763752937317, "logps/chosen": -0.13555309176445007, "logps/rejected": -0.27910101413726807, "loss": 4.6959, "nll_loss": 1.1272612810134888, "rewards/accuracies": 0.625, "rewards/chosen": -0.013555308803915977, "rewards/margins": 0.014354792423546314, "rewards/rejected": -0.027910098433494568, "step": 279 }, { "epoch": 0.19363762102351315, "grad_norm": 2.9546871185302734, "learning_rate": 9.681881051175658e-06, "log_odds_chosen": 1.2636113166809082, "log_odds_ratio": -0.5243982672691345, "logits/chosen": -1.0961809158325195, "logits/rejected": -1.1375336647033691, "logps/chosen": -0.23278352618217468, "logps/rejected": -0.3730795383453369, "loss": 5.5835, "nll_loss": 1.3434245586395264, "rewards/accuracies": 0.75, "rewards/chosen": -0.02327835187315941, "rewards/margins": 0.014029599726200104, "rewards/rejected": -0.03730795159935951, "step": 280 }, { "epoch": 0.19432918395573998, "grad_norm": 3.927760601043701, "learning_rate": 9.716459197787e-06, "log_odds_chosen": -0.1890764832496643, "log_odds_ratio": -1.1451356410980225, "logits/chosen": -0.680740475654602, "logits/rejected": -0.6450449824333191, "logps/chosen": -0.40619486570358276, "logps/rejected": -0.12343654036521912, "loss": 5.5365, "nll_loss": 1.269623041152954, "rewards/accuracies": 0.5, "rewards/chosen": -0.040619488805532455, "rewards/margins": -0.028275832533836365, "rewards/rejected": -0.012343653477728367, "step": 281 }, { "epoch": 0.1950207468879668, "grad_norm": 2.726196050643921, "learning_rate": 9.751037344398341e-06, "log_odds_chosen": 0.6973831057548523, "log_odds_ratio": -0.5315908193588257, "logits/chosen": -0.8917272090911865, "logits/rejected": -0.8878331184387207, "logps/chosen": -0.21855196356773376, "logps/rejected": -0.3041384816169739, "loss": 4.9619, "nll_loss": 1.1873137950897217, "rewards/accuracies": 0.625, "rewards/chosen": -0.021855197846889496, "rewards/margins": 0.008558651432394981, "rewards/rejected": -0.030413847416639328, "step": 282 }, { "epoch": 0.19571230982019364, "grad_norm": 2.687487840652466, "learning_rate": 9.785615491009683e-06, "log_odds_chosen": 0.8881605863571167, "log_odds_ratio": -0.49772194027900696, "logits/chosen": -0.9113330841064453, "logits/rejected": -0.9342383146286011, "logps/chosen": -0.16241443157196045, "logps/rejected": -0.31999391317367554, "loss": 5.1659, "nll_loss": 1.2416975498199463, "rewards/accuracies": 0.75, "rewards/chosen": -0.016241442412137985, "rewards/margins": 0.015757950022816658, "rewards/rejected": -0.03199939429759979, "step": 283 }, { "epoch": 0.19640387275242047, "grad_norm": 4.079690933227539, "learning_rate": 9.820193637621024e-06, "log_odds_chosen": 0.7447575330734253, "log_odds_ratio": -0.48378610610961914, "logits/chosen": -1.0826882123947144, "logits/rejected": -1.093569040298462, "logps/chosen": -0.15584491193294525, "logps/rejected": -0.24808132648468018, "loss": 5.3322, "nll_loss": 1.2846788167953491, "rewards/accuracies": 0.625, "rewards/chosen": -0.015584491193294525, "rewards/margins": 0.009223640896379948, "rewards/rejected": -0.024808134883642197, "step": 284 }, { "epoch": 0.1970954356846473, "grad_norm": 3.588355302810669, "learning_rate": 9.854771784232366e-06, "log_odds_chosen": 0.44871243834495544, "log_odds_ratio": -0.9932539463043213, "logits/chosen": -0.5877626538276672, "logits/rejected": -0.5795567035675049, "logps/chosen": -0.435401052236557, "logps/rejected": -0.32708725333213806, "loss": 5.1331, "nll_loss": 1.1839531660079956, "rewards/accuracies": 0.75, "rewards/chosen": -0.0435401052236557, "rewards/margins": -0.01083137933164835, "rewards/rejected": -0.032708726823329926, "step": 285 }, { "epoch": 0.19778699861687413, "grad_norm": 3.3288917541503906, "learning_rate": 9.889349930843707e-06, "log_odds_chosen": 1.1832314729690552, "log_odds_ratio": -0.5124855041503906, "logits/chosen": -0.9348160624504089, "logits/rejected": -0.898857593536377, "logps/chosen": -0.15667709708213806, "logps/rejected": -0.33472001552581787, "loss": 5.1491, "nll_loss": 1.2360328435897827, "rewards/accuracies": 0.625, "rewards/chosen": -0.015667710453271866, "rewards/margins": 0.01780429482460022, "rewards/rejected": -0.03347200155258179, "step": 286 }, { "epoch": 0.19847856154910096, "grad_norm": 3.9891884326934814, "learning_rate": 9.923928077455049e-06, "log_odds_chosen": 1.312206506729126, "log_odds_ratio": -0.41736629605293274, "logits/chosen": -0.820662260055542, "logits/rejected": -0.8876691460609436, "logps/chosen": -0.15436021983623505, "logps/rejected": -0.4415496587753296, "loss": 6.3401, "nll_loss": 1.543293833732605, "rewards/accuracies": 0.625, "rewards/chosen": -0.015436021611094475, "rewards/margins": 0.028718942776322365, "rewards/rejected": -0.04415496438741684, "step": 287 }, { "epoch": 0.1991701244813278, "grad_norm": 3.754223346710205, "learning_rate": 9.95850622406639e-06, "log_odds_chosen": 2.2290995121002197, "log_odds_ratio": -0.20051251351833344, "logits/chosen": -0.7229880690574646, "logits/rejected": -0.7098461985588074, "logps/chosen": -0.08578348904848099, "logps/rejected": -0.4376526176929474, "loss": 6.0212, "nll_loss": 1.485244870185852, "rewards/accuracies": 0.875, "rewards/chosen": -0.008578348904848099, "rewards/margins": 0.03518691286444664, "rewards/rejected": -0.04376526176929474, "step": 288 }, { "epoch": 0.19986168741355465, "grad_norm": 3.978665351867676, "learning_rate": 9.993084370677732e-06, "log_odds_chosen": 0.7419009208679199, "log_odds_ratio": -0.4755893647670746, "logits/chosen": -0.6797462701797485, "logits/rejected": -0.6705136299133301, "logps/chosen": -0.14312395453453064, "logps/rejected": -0.3239539563655853, "loss": 5.3251, "nll_loss": 1.2837040424346924, "rewards/accuracies": 0.875, "rewards/chosen": -0.014312395825982094, "rewards/margins": 0.01808299869298935, "rewards/rejected": -0.03239539638161659, "step": 289 }, { "epoch": 0.20055325034578148, "grad_norm": 4.12111759185791, "learning_rate": 1.0027662517289074e-05, "log_odds_chosen": 0.7455897331237793, "log_odds_ratio": -0.8224204778671265, "logits/chosen": -0.5983878970146179, "logits/rejected": -0.623099148273468, "logps/chosen": -0.2823043465614319, "logps/rejected": -0.44747430086135864, "loss": 5.7119, "nll_loss": 1.3457216024398804, "rewards/accuracies": 0.5, "rewards/chosen": -0.028230436146259308, "rewards/margins": 0.016516994684934616, "rewards/rejected": -0.044747427105903625, "step": 290 }, { "epoch": 0.2012448132780083, "grad_norm": 2.2981762886047363, "learning_rate": 1.0062240663900415e-05, "log_odds_chosen": 1.614776849746704, "log_odds_ratio": -0.5352373123168945, "logits/chosen": -0.5616164207458496, "logits/rejected": -0.5507243871688843, "logps/chosen": -0.16399338841438293, "logps/rejected": -0.41412293910980225, "loss": 4.2938, "nll_loss": 1.0199339389801025, "rewards/accuracies": 0.75, "rewards/chosen": -0.016399338841438293, "rewards/margins": 0.02501295693218708, "rewards/rejected": -0.04141229763627052, "step": 291 }, { "epoch": 0.20193637621023514, "grad_norm": 3.037929058074951, "learning_rate": 1.0096818810511757e-05, "log_odds_chosen": 0.8889395594596863, "log_odds_ratio": -0.5420886278152466, "logits/chosen": -0.6951072812080383, "logits/rejected": -0.7086105942726135, "logps/chosen": -0.16874369978904724, "logps/rejected": -0.29494836926460266, "loss": 5.1379, "nll_loss": 1.2302649021148682, "rewards/accuracies": 0.625, "rewards/chosen": -0.016874369233846664, "rewards/margins": 0.012620468623936176, "rewards/rejected": -0.029494838789105415, "step": 292 }, { "epoch": 0.20262793914246197, "grad_norm": 2.978886842727661, "learning_rate": 1.0131396957123098e-05, "log_odds_chosen": 2.0868780612945557, "log_odds_ratio": -0.25021910667419434, "logits/chosen": -1.0118746757507324, "logits/rejected": -1.016335129737854, "logps/chosen": -0.1282978355884552, "logps/rejected": -0.49944478273391724, "loss": 5.2727, "nll_loss": 1.2931417226791382, "rewards/accuracies": 0.875, "rewards/chosen": -0.01282978430390358, "rewards/margins": 0.0371146984398365, "rewards/rejected": -0.04994447901844978, "step": 293 }, { "epoch": 0.2033195020746888, "grad_norm": 3.695957660675049, "learning_rate": 1.016597510373444e-05, "log_odds_chosen": 1.4583425521850586, "log_odds_ratio": -0.4338352382183075, "logits/chosen": -0.8413094878196716, "logits/rejected": -0.8705133199691772, "logps/chosen": -0.1513550579547882, "logps/rejected": -0.33852750062942505, "loss": 4.9433, "nll_loss": 1.1924374103546143, "rewards/accuracies": 0.75, "rewards/chosen": -0.01513550616800785, "rewards/margins": 0.018717240542173386, "rewards/rejected": -0.033852748572826385, "step": 294 }, { "epoch": 0.20401106500691563, "grad_norm": 4.3729681968688965, "learning_rate": 1.0200553250345782e-05, "log_odds_chosen": 0.8586447238922119, "log_odds_ratio": -1.0533874034881592, "logits/chosen": -0.963996410369873, "logits/rejected": -0.934209406375885, "logps/chosen": -0.44819894433021545, "logps/rejected": -0.3105297088623047, "loss": 5.5288, "nll_loss": 1.2768583297729492, "rewards/accuracies": 0.625, "rewards/chosen": -0.044819898903369904, "rewards/margins": -0.013766927644610405, "rewards/rejected": -0.031052973121404648, "step": 295 }, { "epoch": 0.20470262793914246, "grad_norm": 3.3607981204986572, "learning_rate": 1.0235131396957123e-05, "log_odds_chosen": 2.0125203132629395, "log_odds_ratio": -0.48399215936660767, "logits/chosen": -0.8093291521072388, "logits/rejected": -0.856208324432373, "logps/chosen": -0.2095358669757843, "logps/rejected": -0.6241459250450134, "loss": 5.2864, "nll_loss": 1.2732126712799072, "rewards/accuracies": 0.875, "rewards/chosen": -0.02095358818769455, "rewards/margins": 0.04146100580692291, "rewards/rejected": -0.062414590269327164, "step": 296 }, { "epoch": 0.2053941908713693, "grad_norm": 3.651738405227661, "learning_rate": 1.0269709543568465e-05, "log_odds_chosen": 1.384002685546875, "log_odds_ratio": -0.523183286190033, "logits/chosen": -0.725758969783783, "logits/rejected": -0.7471228241920471, "logps/chosen": -0.16776351630687714, "logps/rejected": -0.44493597745895386, "loss": 6.5535, "nll_loss": 1.5860533714294434, "rewards/accuracies": 0.75, "rewards/chosen": -0.016776353120803833, "rewards/margins": 0.027717245742678642, "rewards/rejected": -0.044493597000837326, "step": 297 }, { "epoch": 0.20608575380359612, "grad_norm": 2.7523751258850098, "learning_rate": 1.0304287690179806e-05, "log_odds_chosen": 1.5752019882202148, "log_odds_ratio": -0.4772804379463196, "logits/chosen": -0.8213521838188171, "logits/rejected": -0.8141341805458069, "logps/chosen": -0.12757200002670288, "logps/rejected": -0.4987594485282898, "loss": 5.6836, "nll_loss": 1.373165249824524, "rewards/accuracies": 0.75, "rewards/chosen": -0.012757200747728348, "rewards/margins": 0.03711874410510063, "rewards/rejected": -0.04987594485282898, "step": 298 }, { "epoch": 0.20677731673582295, "grad_norm": 3.8582205772399902, "learning_rate": 1.0338865836791148e-05, "log_odds_chosen": 1.3856735229492188, "log_odds_ratio": -0.3291966915130615, "logits/chosen": -0.5608149170875549, "logits/rejected": -0.5706402063369751, "logps/chosen": -0.08333322405815125, "logps/rejected": -0.25464698672294617, "loss": 5.537, "nll_loss": 1.3513189554214478, "rewards/accuracies": 0.875, "rewards/chosen": -0.00833332259207964, "rewards/margins": 0.017131377011537552, "rewards/rejected": -0.025464700534939766, "step": 299 }, { "epoch": 0.2074688796680498, "grad_norm": 5.307126522064209, "learning_rate": 1.0373443983402491e-05, "log_odds_chosen": 0.05142582952976227, "log_odds_ratio": -0.7213780283927917, "logits/chosen": -0.9054761528968811, "logits/rejected": -0.8753982186317444, "logps/chosen": -0.18677140772342682, "logps/rejected": -0.16824619472026825, "loss": 6.0093, "nll_loss": 1.4301897287368774, "rewards/accuracies": 0.625, "rewards/chosen": -0.018677137792110443, "rewards/margins": -0.0018525202758610249, "rewards/rejected": -0.016824619844555855, "step": 300 }, { "epoch": 0.20816044260027664, "grad_norm": 3.757661819458008, "learning_rate": 1.0408022130013833e-05, "log_odds_chosen": 0.9107809066772461, "log_odds_ratio": -0.5286827087402344, "logits/chosen": -0.7676786184310913, "logits/rejected": -0.7682151198387146, "logps/chosen": -0.18552105128765106, "logps/rejected": -0.2996525168418884, "loss": 5.3472, "nll_loss": 1.2839436531066895, "rewards/accuracies": 0.75, "rewards/chosen": -0.018552104011178017, "rewards/margins": 0.011413146741688251, "rewards/rejected": -0.029965251684188843, "step": 301 }, { "epoch": 0.20885200553250347, "grad_norm": 3.3357064723968506, "learning_rate": 1.0442600276625174e-05, "log_odds_chosen": 2.001539707183838, "log_odds_ratio": -0.22806967794895172, "logits/chosen": -1.101258397102356, "logits/rejected": -1.1736907958984375, "logps/chosen": -0.08202943950891495, "logps/rejected": -0.5091502666473389, "loss": 5.017, "nll_loss": 1.2314308881759644, "rewards/accuracies": 1.0, "rewards/chosen": -0.00820294488221407, "rewards/margins": 0.04271208494901657, "rewards/rejected": -0.050915028899908066, "step": 302 }, { "epoch": 0.2095435684647303, "grad_norm": 5.6853108406066895, "learning_rate": 1.0477178423236516e-05, "log_odds_chosen": 0.8550553321838379, "log_odds_ratio": -0.5609914660453796, "logits/chosen": -0.7329128980636597, "logits/rejected": -0.7598764896392822, "logps/chosen": -0.35300227999687195, "logps/rejected": -0.3239184021949768, "loss": 6.053, "nll_loss": 1.4571560621261597, "rewards/accuracies": 0.875, "rewards/chosen": -0.035300228744745255, "rewards/margins": -0.0029083851259201765, "rewards/rejected": -0.03239184617996216, "step": 303 }, { "epoch": 0.21023513139695713, "grad_norm": 4.545161724090576, "learning_rate": 1.0511756569847857e-05, "log_odds_chosen": 3.2867326736450195, "log_odds_ratio": -0.21116581559181213, "logits/chosen": -0.8189299702644348, "logits/rejected": -0.8567197322845459, "logps/chosen": -0.07318108528852463, "logps/rejected": -0.832836925983429, "loss": 7.3767, "nll_loss": 1.8230527639389038, "rewards/accuracies": 0.75, "rewards/chosen": -0.007318108808249235, "rewards/margins": 0.07596558332443237, "rewards/rejected": -0.0832836925983429, "step": 304 }, { "epoch": 0.21092669432918396, "grad_norm": 5.584658622741699, "learning_rate": 1.0546334716459199e-05, "log_odds_chosen": 2.03281569480896, "log_odds_ratio": -0.4121776223182678, "logits/chosen": -0.8407848477363586, "logits/rejected": -0.8673202395439148, "logps/chosen": -0.15794141590595245, "logps/rejected": -0.48691418766975403, "loss": 5.3831, "nll_loss": 1.3045486211776733, "rewards/accuracies": 0.75, "rewards/chosen": -0.015794143080711365, "rewards/margins": 0.03289727866649628, "rewards/rejected": -0.048691414296627045, "step": 305 }, { "epoch": 0.21161825726141079, "grad_norm": 3.8299202919006348, "learning_rate": 1.058091286307054e-05, "log_odds_chosen": 1.1094874143600464, "log_odds_ratio": -0.4206291139125824, "logits/chosen": -0.7327075004577637, "logits/rejected": -0.6932054162025452, "logps/chosen": -0.1686730682849884, "logps/rejected": -0.2844252288341522, "loss": 5.7894, "nll_loss": 1.405289649963379, "rewards/accuracies": 0.875, "rewards/chosen": -0.01686730608344078, "rewards/margins": 0.011575218290090561, "rewards/rejected": -0.028442522510886192, "step": 306 }, { "epoch": 0.21230982019363762, "grad_norm": 5.454954624176025, "learning_rate": 1.0615491009681882e-05, "log_odds_chosen": 0.04435592144727707, "log_odds_ratio": -0.7941852807998657, "logits/chosen": -0.6925509572029114, "logits/rejected": -0.7195000052452087, "logps/chosen": -0.23101723194122314, "logps/rejected": -0.24052780866622925, "loss": 6.023, "nll_loss": 1.4263360500335693, "rewards/accuracies": 0.5, "rewards/chosen": -0.023101722821593285, "rewards/margins": 0.0009510572999715805, "rewards/rejected": -0.024052780121564865, "step": 307 }, { "epoch": 0.21300138312586445, "grad_norm": 5.1639533042907715, "learning_rate": 1.0650069156293224e-05, "log_odds_chosen": 0.2002013623714447, "log_odds_ratio": -0.8262504935264587, "logits/chosen": -0.5839241147041321, "logits/rejected": -0.5827579498291016, "logps/chosen": -0.18781417608261108, "logps/rejected": -0.16054357588291168, "loss": 5.1734, "nll_loss": 1.2107203006744385, "rewards/accuracies": 0.5, "rewards/chosen": -0.01878141611814499, "rewards/margins": -0.0027270594146102667, "rewards/rejected": -0.016054358333349228, "step": 308 }, { "epoch": 0.21369294605809128, "grad_norm": 17.813648223876953, "learning_rate": 1.0684647302904565e-05, "log_odds_chosen": 0.9967387914657593, "log_odds_ratio": -0.7046850919723511, "logits/chosen": -0.9163264632225037, "logits/rejected": -0.9688165187835693, "logps/chosen": -0.3284730315208435, "logps/rejected": -0.489883154630661, "loss": 5.1107, "nll_loss": 1.207213282585144, "rewards/accuracies": 0.5, "rewards/chosen": -0.03284730017185211, "rewards/margins": 0.0161410141736269, "rewards/rejected": -0.04898831248283386, "step": 309 }, { "epoch": 0.2143845089903181, "grad_norm": 4.739478588104248, "learning_rate": 1.0719225449515907e-05, "log_odds_chosen": 1.4200026988983154, "log_odds_ratio": -0.7030278444290161, "logits/chosen": -0.8466711640357971, "logits/rejected": -0.8449913263320923, "logps/chosen": -0.18683403730392456, "logps/rejected": -0.3169473707675934, "loss": 5.5492, "nll_loss": 1.3169974088668823, "rewards/accuracies": 0.875, "rewards/chosen": -0.018683403730392456, "rewards/margins": 0.013011332601308823, "rewards/rejected": -0.03169473633170128, "step": 310 }, { "epoch": 0.21507607192254496, "grad_norm": 3.767282485961914, "learning_rate": 1.0753803596127248e-05, "log_odds_chosen": 1.637789011001587, "log_odds_ratio": -0.30916017293930054, "logits/chosen": -0.5770536661148071, "logits/rejected": -0.6048831343650818, "logps/chosen": -0.08278335630893707, "logps/rejected": -0.23345719277858734, "loss": 4.6282, "nll_loss": 1.1261216402053833, "rewards/accuracies": 1.0, "rewards/chosen": -0.008278336375951767, "rewards/margins": 0.015067384578287601, "rewards/rejected": -0.023345721885561943, "step": 311 }, { "epoch": 0.2157676348547718, "grad_norm": 3.3942456245422363, "learning_rate": 1.078838174273859e-05, "log_odds_chosen": 2.3648693561553955, "log_odds_ratio": -0.2751004099845886, "logits/chosen": -0.6818414926528931, "logits/rejected": -0.6695123910903931, "logps/chosen": -0.09515105187892914, "logps/rejected": -0.41627973318099976, "loss": 5.145, "nll_loss": 1.2587300539016724, "rewards/accuracies": 0.875, "rewards/chosen": -0.009515105746686459, "rewards/margins": 0.03211286664009094, "rewards/rejected": -0.041627973318099976, "step": 312 }, { "epoch": 0.21645919778699863, "grad_norm": 4.549081802368164, "learning_rate": 1.0822959889349931e-05, "log_odds_chosen": 1.1085354089736938, "log_odds_ratio": -0.7170664072036743, "logits/chosen": -0.559461772441864, "logits/rejected": -0.5786145925521851, "logps/chosen": -0.22476282715797424, "logps/rejected": -0.42009395360946655, "loss": 5.1471, "nll_loss": 1.2150615453720093, "rewards/accuracies": 0.75, "rewards/chosen": -0.022476285696029663, "rewards/margins": 0.01953311078250408, "rewards/rejected": -0.0420093908905983, "step": 313 }, { "epoch": 0.21715076071922546, "grad_norm": 3.267533302307129, "learning_rate": 1.0857538035961273e-05, "log_odds_chosen": 1.3190233707427979, "log_odds_ratio": -0.4539645314216614, "logits/chosen": -0.19708727300167084, "logits/rejected": -0.2204999327659607, "logps/chosen": -0.17690463364124298, "logps/rejected": -0.36837536096572876, "loss": 3.9881, "nll_loss": 0.9516271352767944, "rewards/accuracies": 0.75, "rewards/chosen": -0.017690464854240417, "rewards/margins": 0.019147075712680817, "rewards/rejected": -0.036837536841630936, "step": 314 }, { "epoch": 0.21784232365145229, "grad_norm": 5.523169040679932, "learning_rate": 1.0892116182572615e-05, "log_odds_chosen": 0.7681819796562195, "log_odds_ratio": -0.7182871103286743, "logits/chosen": -0.8303021192550659, "logits/rejected": -0.8364777565002441, "logps/chosen": -0.22490473091602325, "logps/rejected": -0.41915619373321533, "loss": 6.7917, "nll_loss": 1.626107931137085, "rewards/accuracies": 0.5, "rewards/chosen": -0.022490475326776505, "rewards/margins": 0.01942514255642891, "rewards/rejected": -0.041915617883205414, "step": 315 }, { "epoch": 0.21853388658367912, "grad_norm": 3.44968581199646, "learning_rate": 1.0926694329183956e-05, "log_odds_chosen": 0.487798810005188, "log_odds_ratio": -0.575210452079773, "logits/chosen": -0.806228518486023, "logits/rejected": -0.788817822933197, "logps/chosen": -0.17850428819656372, "logps/rejected": -0.22373415529727936, "loss": 4.2689, "nll_loss": 1.0097146034240723, "rewards/accuracies": 0.75, "rewards/chosen": -0.017850428819656372, "rewards/margins": 0.004522987641394138, "rewards/rejected": -0.022373415529727936, "step": 316 }, { "epoch": 0.21922544951590595, "grad_norm": 2.8417859077453613, "learning_rate": 1.0961272475795298e-05, "log_odds_chosen": 2.186145305633545, "log_odds_ratio": -0.26623407006263733, "logits/chosen": -0.7959799766540527, "logits/rejected": -0.826103687286377, "logps/chosen": -0.09745047241449356, "logps/rejected": -0.4826028048992157, "loss": 5.4007, "nll_loss": 1.3235399723052979, "rewards/accuracies": 1.0, "rewards/chosen": -0.009745046496391296, "rewards/margins": 0.038515232503414154, "rewards/rejected": -0.04826027899980545, "step": 317 }, { "epoch": 0.21991701244813278, "grad_norm": 5.369155406951904, "learning_rate": 1.099585062240664e-05, "log_odds_chosen": -0.07499188184738159, "log_odds_ratio": -0.9024602174758911, "logits/chosen": -0.7481403350830078, "logits/rejected": -0.7806286215782166, "logps/chosen": -0.2302655577659607, "logps/rejected": -0.2344481348991394, "loss": 5.633, "nll_loss": 1.3180158138275146, "rewards/accuracies": 0.5, "rewards/chosen": -0.02302655577659607, "rewards/margins": 0.0004182555712759495, "rewards/rejected": -0.02344481088221073, "step": 318 }, { "epoch": 0.2206085753803596, "grad_norm": 5.221406936645508, "learning_rate": 1.103042876901798e-05, "log_odds_chosen": 1.0126641988754272, "log_odds_ratio": -0.6377867460250854, "logits/chosen": -0.9906193017959595, "logits/rejected": -0.9781680703163147, "logps/chosen": -0.24248827993869781, "logps/rejected": -0.48205310106277466, "loss": 6.2934, "nll_loss": 1.5095751285552979, "rewards/accuracies": 0.625, "rewards/chosen": -0.02424883097410202, "rewards/margins": 0.023956481367349625, "rewards/rejected": -0.048205308616161346, "step": 319 }, { "epoch": 0.22130013831258644, "grad_norm": 3.497264862060547, "learning_rate": 1.1065006915629322e-05, "log_odds_chosen": 2.176384925842285, "log_odds_ratio": -0.29211416840553284, "logits/chosen": -0.7758827805519104, "logits/rejected": -0.8231465220451355, "logps/chosen": -0.09156069159507751, "logps/rejected": -0.3845745325088501, "loss": 5.5199, "nll_loss": 1.3507754802703857, "rewards/accuracies": 1.0, "rewards/chosen": -0.009156068786978722, "rewards/margins": 0.029301384463906288, "rewards/rejected": -0.03845745325088501, "step": 320 }, { "epoch": 0.22199170124481327, "grad_norm": 5.271878242492676, "learning_rate": 1.1099585062240664e-05, "log_odds_chosen": 2.6156089305877686, "log_odds_ratio": -0.5581331253051758, "logits/chosen": -0.7301532030105591, "logits/rejected": -0.8199042677879333, "logps/chosen": -0.206491619348526, "logps/rejected": -0.6840195059776306, "loss": 5.643, "nll_loss": 1.3549339771270752, "rewards/accuracies": 0.625, "rewards/chosen": -0.02064916118979454, "rewards/margins": 0.047752782702445984, "rewards/rejected": -0.06840194761753082, "step": 321 }, { "epoch": 0.22268326417704012, "grad_norm": 4.354135513305664, "learning_rate": 1.1134163208852007e-05, "log_odds_chosen": 1.7553741931915283, "log_odds_ratio": -0.5252009034156799, "logits/chosen": -0.9048160910606384, "logits/rejected": -0.9343039393424988, "logps/chosen": -0.18464252352714539, "logps/rejected": -0.7623762488365173, "loss": 6.0298, "nll_loss": 1.4549338817596436, "rewards/accuracies": 0.625, "rewards/chosen": -0.01846425235271454, "rewards/margins": 0.05777337774634361, "rewards/rejected": -0.07623762637376785, "step": 322 }, { "epoch": 0.22337482710926695, "grad_norm": 4.396636009216309, "learning_rate": 1.1168741355463349e-05, "log_odds_chosen": 1.224029302597046, "log_odds_ratio": -0.7107768058776855, "logits/chosen": -1.1329301595687866, "logits/rejected": -1.1671746969223022, "logps/chosen": -0.20167645812034607, "logps/rejected": -0.4650338292121887, "loss": 5.0124, "nll_loss": 1.1820275783538818, "rewards/accuracies": 0.375, "rewards/chosen": -0.020167646929621696, "rewards/margins": 0.026335733011364937, "rewards/rejected": -0.04650337994098663, "step": 323 }, { "epoch": 0.22406639004149378, "grad_norm": 5.422794342041016, "learning_rate": 1.120331950207469e-05, "log_odds_chosen": 0.9470016360282898, "log_odds_ratio": -0.6606626510620117, "logits/chosen": -0.8801020979881287, "logits/rejected": -0.8777154088020325, "logps/chosen": -0.13853338360786438, "logps/rejected": -0.35402578115463257, "loss": 4.4383, "nll_loss": 1.04351806640625, "rewards/accuracies": 0.625, "rewards/chosen": -0.013853337615728378, "rewards/margins": 0.021549241617321968, "rewards/rejected": -0.0354025773704052, "step": 324 }, { "epoch": 0.22475795297372061, "grad_norm": 3.7938013076782227, "learning_rate": 1.1237897648686032e-05, "log_odds_chosen": 2.538409948348999, "log_odds_ratio": -0.17612087726593018, "logits/chosen": -0.9144092798233032, "logits/rejected": -0.9531471729278564, "logps/chosen": -0.09165746718645096, "logps/rejected": -0.6212788224220276, "loss": 5.4653, "nll_loss": 1.3487169742584229, "rewards/accuracies": 0.875, "rewards/chosen": -0.009165747091174126, "rewards/margins": 0.05296213552355766, "rewards/rejected": -0.06212788075208664, "step": 325 }, { "epoch": 0.22544951590594745, "grad_norm": 3.0676229000091553, "learning_rate": 1.1272475795297373e-05, "log_odds_chosen": 1.3782696723937988, "log_odds_ratio": -0.3701854646205902, "logits/chosen": -0.8568637371063232, "logits/rejected": -0.8561961650848389, "logps/chosen": -0.10448767989873886, "logps/rejected": -0.4444238543510437, "loss": 4.5713, "nll_loss": 1.105804681777954, "rewards/accuracies": 0.875, "rewards/chosen": -0.010448767803609371, "rewards/margins": 0.033993616700172424, "rewards/rejected": -0.04444238543510437, "step": 326 }, { "epoch": 0.22614107883817428, "grad_norm": 5.147009372711182, "learning_rate": 1.1307053941908715e-05, "log_odds_chosen": 0.35377904772758484, "log_odds_ratio": -0.753959059715271, "logits/chosen": -0.8954100608825684, "logits/rejected": -0.9011775851249695, "logps/chosen": -0.2625162601470947, "logps/rejected": -0.3287270665168762, "loss": 4.7927, "nll_loss": 1.122768521308899, "rewards/accuracies": 0.75, "rewards/chosen": -0.026251627132296562, "rewards/margins": 0.006621081382036209, "rewards/rejected": -0.03287270665168762, "step": 327 }, { "epoch": 0.2268326417704011, "grad_norm": 2.6937825679779053, "learning_rate": 1.1341632088520057e-05, "log_odds_chosen": 1.5019047260284424, "log_odds_ratio": -0.3758338391780853, "logits/chosen": -1.226015567779541, "logits/rejected": -1.298396110534668, "logps/chosen": -0.22487083077430725, "logps/rejected": -0.5187363624572754, "loss": 6.2312, "nll_loss": 1.5202105045318604, "rewards/accuracies": 1.0, "rewards/chosen": -0.022487085312604904, "rewards/margins": 0.029386552050709724, "rewards/rejected": -0.05187363922595978, "step": 328 }, { "epoch": 0.22752420470262794, "grad_norm": 4.040054798126221, "learning_rate": 1.1376210235131398e-05, "log_odds_chosen": 1.9598990678787231, "log_odds_ratio": -0.292378693819046, "logits/chosen": -0.742152988910675, "logits/rejected": -0.7864880561828613, "logps/chosen": -0.1894950270652771, "logps/rejected": -0.5066390037536621, "loss": 6.1423, "nll_loss": 1.5063270330429077, "rewards/accuracies": 1.0, "rewards/chosen": -0.01894950121641159, "rewards/margins": 0.03171439841389656, "rewards/rejected": -0.05066390335559845, "step": 329 }, { "epoch": 0.22821576763485477, "grad_norm": 4.373732089996338, "learning_rate": 1.141078838174274e-05, "log_odds_chosen": 0.03542667627334595, "log_odds_ratio": -1.1298328638076782, "logits/chosen": -0.5621449947357178, "logits/rejected": -0.5511815547943115, "logps/chosen": -0.34871435165405273, "logps/rejected": -0.2797914147377014, "loss": 5.2115, "nll_loss": 1.189887523651123, "rewards/accuracies": 0.5, "rewards/chosen": -0.03487143665552139, "rewards/margins": -0.006892295554280281, "rewards/rejected": -0.02797914296388626, "step": 330 }, { "epoch": 0.2289073305670816, "grad_norm": 3.9931676387786865, "learning_rate": 1.144536652835408e-05, "log_odds_chosen": 1.8632118701934814, "log_odds_ratio": -0.3370683789253235, "logits/chosen": -0.7310777902603149, "logits/rejected": -0.7843050360679626, "logps/chosen": -0.08296102285385132, "logps/rejected": -0.3721938729286194, "loss": 5.4324, "nll_loss": 1.3244011402130127, "rewards/accuracies": 0.75, "rewards/chosen": -0.008296102285385132, "rewards/margins": 0.028923287987709045, "rewards/rejected": -0.03721938654780388, "step": 331 }, { "epoch": 0.22959889349930843, "grad_norm": 3.0267841815948486, "learning_rate": 1.1479944674965421e-05, "log_odds_chosen": 1.2454099655151367, "log_odds_ratio": -0.33922553062438965, "logits/chosen": -0.8828202486038208, "logits/rejected": -0.8880357146263123, "logps/chosen": -0.1263776272535324, "logps/rejected": -0.3546895980834961, "loss": 5.3861, "nll_loss": 1.312612771987915, "rewards/accuracies": 0.875, "rewards/chosen": -0.01263776421546936, "rewards/margins": 0.02283119410276413, "rewards/rejected": -0.03546895831823349, "step": 332 }, { "epoch": 0.23029045643153526, "grad_norm": 4.601929664611816, "learning_rate": 1.1514522821576763e-05, "log_odds_chosen": 0.9013376832008362, "log_odds_ratio": -0.5258499979972839, "logits/chosen": -0.814623236656189, "logits/rejected": -0.8508963584899902, "logps/chosen": -0.13411368429660797, "logps/rejected": -0.23433759808540344, "loss": 6.2399, "nll_loss": 1.5073976516723633, "rewards/accuracies": 0.625, "rewards/chosen": -0.013411369174718857, "rewards/margins": 0.010022390633821487, "rewards/rejected": -0.023433759808540344, "step": 333 }, { "epoch": 0.23098201936376211, "grad_norm": 7.3901753425598145, "learning_rate": 1.1549100968188106e-05, "log_odds_chosen": 1.8473443984985352, "log_odds_ratio": -0.6285804510116577, "logits/chosen": -0.7013222575187683, "logits/rejected": -0.734618067741394, "logps/chosen": -0.17842142283916473, "logps/rejected": -0.4848504364490509, "loss": 5.5798, "nll_loss": 1.3320882320404053, "rewards/accuracies": 0.75, "rewards/chosen": -0.017842141911387444, "rewards/margins": 0.030642900615930557, "rewards/rejected": -0.04848504066467285, "step": 334 }, { "epoch": 0.23167358229598894, "grad_norm": 3.9874866008758545, "learning_rate": 1.1583679114799447e-05, "log_odds_chosen": 2.6798810958862305, "log_odds_ratio": -0.25364208221435547, "logits/chosen": -0.820704996585846, "logits/rejected": -0.8510127067565918, "logps/chosen": -0.1315419226884842, "logps/rejected": -0.7463638782501221, "loss": 6.0871, "nll_loss": 1.496415615081787, "rewards/accuracies": 0.75, "rewards/chosen": -0.013154192827641964, "rewards/margins": 0.06148219481110573, "rewards/rejected": -0.07463638484477997, "step": 335 }, { "epoch": 0.23236514522821577, "grad_norm": 3.520430088043213, "learning_rate": 1.1618257261410789e-05, "log_odds_chosen": 1.1615909337997437, "log_odds_ratio": -0.43198850750923157, "logits/chosen": -0.7266113758087158, "logits/rejected": -0.7631188631057739, "logps/chosen": -0.15298506617546082, "logps/rejected": -0.34811288118362427, "loss": 4.4336, "nll_loss": 1.0652047395706177, "rewards/accuracies": 0.75, "rewards/chosen": -0.015298506245017052, "rewards/margins": 0.019512783735990524, "rewards/rejected": -0.03481128811836243, "step": 336 }, { "epoch": 0.2330567081604426, "grad_norm": 4.572351932525635, "learning_rate": 1.165283540802213e-05, "log_odds_chosen": 2.346097946166992, "log_odds_ratio": -0.3432542383670807, "logits/chosen": -1.0011042356491089, "logits/rejected": -1.037811517715454, "logps/chosen": -0.08525515347719193, "logps/rejected": -0.3206099271774292, "loss": 5.3928, "nll_loss": 1.3138808012008667, "rewards/accuracies": 0.75, "rewards/chosen": -0.008525514975190163, "rewards/margins": 0.023535478860139847, "rewards/rejected": -0.03206099569797516, "step": 337 }, { "epoch": 0.23374827109266944, "grad_norm": 6.0374908447265625, "learning_rate": 1.1687413554633472e-05, "log_odds_chosen": 0.3840335011482239, "log_odds_ratio": -0.9351744055747986, "logits/chosen": -0.5911035537719727, "logits/rejected": -0.6028836965560913, "logps/chosen": -0.28335726261138916, "logps/rejected": -0.2893810570240021, "loss": 5.1259, "nll_loss": 1.1879698038101196, "rewards/accuracies": 0.625, "rewards/chosen": -0.028335727751255035, "rewards/margins": 0.0006023813039064407, "rewards/rejected": -0.028938109055161476, "step": 338 }, { "epoch": 0.23443983402489627, "grad_norm": 9.87275218963623, "learning_rate": 1.1721991701244814e-05, "log_odds_chosen": -1.1624599695205688, "log_odds_ratio": -1.561909556388855, "logits/chosen": -0.7426409125328064, "logits/rejected": -0.7292524576187134, "logps/chosen": -0.33654242753982544, "logps/rejected": -0.15829023718833923, "loss": 6.1163, "nll_loss": 1.3728806972503662, "rewards/accuracies": 0.125, "rewards/chosen": -0.03365424647927284, "rewards/margins": -0.01782522350549698, "rewards/rejected": -0.015829022973775864, "step": 339 }, { "epoch": 0.2351313969571231, "grad_norm": 3.4978086948394775, "learning_rate": 1.1756569847856155e-05, "log_odds_chosen": 1.4125635623931885, "log_odds_ratio": -0.3743363618850708, "logits/chosen": -0.8770706653594971, "logits/rejected": -0.8505789041519165, "logps/chosen": -0.08787395060062408, "logps/rejected": -0.3069489598274231, "loss": 4.5954, "nll_loss": 1.1114096641540527, "rewards/accuracies": 0.875, "rewards/chosen": -0.008787395432591438, "rewards/margins": 0.02190750278532505, "rewards/rejected": -0.03069489635527134, "step": 340 }, { "epoch": 0.23582295988934993, "grad_norm": 5.122701168060303, "learning_rate": 1.1791147994467497e-05, "log_odds_chosen": 1.5185163021087646, "log_odds_ratio": -0.5956940054893494, "logits/chosen": -0.7384462952613831, "logits/rejected": -0.7524312734603882, "logps/chosen": -0.18539643287658691, "logps/rejected": -0.4253931939601898, "loss": 6.206, "nll_loss": 1.4919191598892212, "rewards/accuracies": 0.875, "rewards/chosen": -0.01853964291512966, "rewards/margins": 0.02399967610836029, "rewards/rejected": -0.0425393208861351, "step": 341 }, { "epoch": 0.23651452282157676, "grad_norm": 5.095941543579102, "learning_rate": 1.1825726141078838e-05, "log_odds_chosen": 1.302222728729248, "log_odds_ratio": -0.48803675174713135, "logits/chosen": -0.6658348441123962, "logits/rejected": -0.6755991578102112, "logps/chosen": -0.14942830801010132, "logps/rejected": -0.4372924864292145, "loss": 5.3003, "nll_loss": 1.2762644290924072, "rewards/accuracies": 0.625, "rewards/chosen": -0.014942830428481102, "rewards/margins": 0.028786418959498405, "rewards/rejected": -0.04372924938797951, "step": 342 }, { "epoch": 0.23720608575380359, "grad_norm": 4.108216762542725, "learning_rate": 1.186030428769018e-05, "log_odds_chosen": 1.0591542720794678, "log_odds_ratio": -0.35927653312683105, "logits/chosen": -0.7216286659240723, "logits/rejected": -0.7346411347389221, "logps/chosen": -0.17469891905784607, "logps/rejected": -0.36906424164772034, "loss": 5.0595, "nll_loss": 1.228936791419983, "rewards/accuracies": 1.0, "rewards/chosen": -0.017469894140958786, "rewards/margins": 0.019436530768871307, "rewards/rejected": -0.036906421184539795, "step": 343 }, { "epoch": 0.23789764868603042, "grad_norm": 3.7954001426696777, "learning_rate": 1.1894882434301522e-05, "log_odds_chosen": 1.0637387037277222, "log_odds_ratio": -0.4897671043872833, "logits/chosen": -0.7606596350669861, "logits/rejected": -0.7664926648139954, "logps/chosen": -0.1042921245098114, "logps/rejected": -0.3099287152290344, "loss": 4.8263, "nll_loss": 1.1575967073440552, "rewards/accuracies": 0.75, "rewards/chosen": -0.01042921282351017, "rewards/margins": 0.020563658326864243, "rewards/rejected": -0.030992871150374413, "step": 344 }, { "epoch": 0.23858921161825727, "grad_norm": 2.977518081665039, "learning_rate": 1.1929460580912865e-05, "log_odds_chosen": 1.6749167442321777, "log_odds_ratio": -0.4589526951313019, "logits/chosen": -0.9313449859619141, "logits/rejected": -0.9575750231742859, "logps/chosen": -0.22524240612983704, "logps/rejected": -0.42715808749198914, "loss": 4.3979, "nll_loss": 1.0535855293273926, "rewards/accuracies": 0.625, "rewards/chosen": -0.022524241358041763, "rewards/margins": 0.02019157074391842, "rewards/rejected": -0.04271581396460533, "step": 345 }, { "epoch": 0.2392807745504841, "grad_norm": 4.326882839202881, "learning_rate": 1.1964038727524206e-05, "log_odds_chosen": 0.5998474359512329, "log_odds_ratio": -0.6148593425750732, "logits/chosen": -0.8653595447540283, "logits/rejected": -0.9045172929763794, "logps/chosen": -0.2513960301876068, "logps/rejected": -0.4176265597343445, "loss": 5.8248, "nll_loss": 1.394715666770935, "rewards/accuracies": 0.625, "rewards/chosen": -0.02513960376381874, "rewards/margins": 0.016623055562376976, "rewards/rejected": -0.04176265746355057, "step": 346 }, { "epoch": 0.23997233748271093, "grad_norm": 4.429263591766357, "learning_rate": 1.1998616874135548e-05, "log_odds_chosen": 1.5210059881210327, "log_odds_ratio": -0.5017091035842896, "logits/chosen": -0.9123212099075317, "logits/rejected": -0.9576847553253174, "logps/chosen": -0.14953172206878662, "logps/rejected": -0.5459752082824707, "loss": 5.5626, "nll_loss": 1.340486764907837, "rewards/accuracies": 0.75, "rewards/chosen": -0.014953171834349632, "rewards/margins": 0.039644353091716766, "rewards/rejected": -0.05459752306342125, "step": 347 }, { "epoch": 0.24066390041493776, "grad_norm": 3.6143527030944824, "learning_rate": 1.203319502074689e-05, "log_odds_chosen": 0.5170705914497375, "log_odds_ratio": -0.5560952425003052, "logits/chosen": -0.6124237775802612, "logits/rejected": -0.5826144814491272, "logps/chosen": -0.16328248381614685, "logps/rejected": -0.24581731855869293, "loss": 3.5029, "nll_loss": 0.8201128840446472, "rewards/accuracies": 0.75, "rewards/chosen": -0.016328249126672745, "rewards/margins": 0.008253482170403004, "rewards/rejected": -0.024581734091043472, "step": 348 }, { "epoch": 0.2413554633471646, "grad_norm": 3.123534917831421, "learning_rate": 1.2067773167358231e-05, "log_odds_chosen": 0.731002688407898, "log_odds_ratio": -0.49530866742134094, "logits/chosen": -0.8178795576095581, "logits/rejected": -0.8388358950614929, "logps/chosen": -0.1596156358718872, "logps/rejected": -0.25716426968574524, "loss": 5.6156, "nll_loss": 1.354375958442688, "rewards/accuracies": 0.75, "rewards/chosen": -0.01596156321465969, "rewards/margins": 0.009754862636327744, "rewards/rejected": -0.025716423988342285, "step": 349 }, { "epoch": 0.24204702627939143, "grad_norm": 3.329681396484375, "learning_rate": 1.2102351313969573e-05, "log_odds_chosen": 0.6945995092391968, "log_odds_ratio": -0.5422481298446655, "logits/chosen": -0.7990038990974426, "logits/rejected": -0.8053203821182251, "logps/chosen": -0.11132040619850159, "logps/rejected": -0.16899347305297852, "loss": 4.5106, "nll_loss": 1.0734366178512573, "rewards/accuracies": 0.75, "rewards/chosen": -0.011132041923701763, "rewards/margins": 0.00576730677857995, "rewards/rejected": -0.016899349167943, "step": 350 }, { "epoch": 0.24273858921161826, "grad_norm": 3.027214765548706, "learning_rate": 1.2136929460580914e-05, "log_odds_chosen": 1.642749547958374, "log_odds_ratio": -0.41968217492103577, "logits/chosen": -0.9147093296051025, "logits/rejected": -0.9439373016357422, "logps/chosen": -0.10925433784723282, "logps/rejected": -0.3146194517612457, "loss": 5.1456, "nll_loss": 1.244423270225525, "rewards/accuracies": 0.75, "rewards/chosen": -0.010925433598458767, "rewards/margins": 0.0205365139991045, "rewards/rejected": -0.03146194666624069, "step": 351 }, { "epoch": 0.24343015214384509, "grad_norm": 3.000566005706787, "learning_rate": 1.2171507607192254e-05, "log_odds_chosen": 0.5409971475601196, "log_odds_ratio": -0.5828170776367188, "logits/chosen": -0.6988095045089722, "logits/rejected": -0.6698228716850281, "logps/chosen": -0.22581708431243896, "logps/rejected": -0.3958743214607239, "loss": 4.9495, "nll_loss": 1.1790937185287476, "rewards/accuracies": 0.625, "rewards/chosen": -0.022581709548830986, "rewards/margins": 0.01700572483241558, "rewards/rejected": -0.03958743438124657, "step": 352 }, { "epoch": 0.24412171507607192, "grad_norm": 3.9589076042175293, "learning_rate": 1.2206085753803596e-05, "log_odds_chosen": 0.8446111679077148, "log_odds_ratio": -0.5762701630592346, "logits/chosen": -0.7837926149368286, "logits/rejected": -0.7676455974578857, "logps/chosen": -0.17044731974601746, "logps/rejected": -0.288826584815979, "loss": 5.6256, "nll_loss": 1.3487628698349, "rewards/accuracies": 0.75, "rewards/chosen": -0.017044732347130775, "rewards/margins": 0.011837925761938095, "rewards/rejected": -0.02888265810906887, "step": 353 }, { "epoch": 0.24481327800829875, "grad_norm": 2.9041407108306885, "learning_rate": 1.2240663900414937e-05, "log_odds_chosen": 1.5135369300842285, "log_odds_ratio": -0.5100223422050476, "logits/chosen": -0.7986371517181396, "logits/rejected": -0.8036400675773621, "logps/chosen": -0.1771228313446045, "logps/rejected": -0.48527291417121887, "loss": 4.3205, "nll_loss": 1.0291134119033813, "rewards/accuracies": 0.625, "rewards/chosen": -0.017712285742163658, "rewards/margins": 0.0308150053024292, "rewards/rejected": -0.04852728918194771, "step": 354 }, { "epoch": 0.24550484094052558, "grad_norm": 4.075167655944824, "learning_rate": 1.2275242047026279e-05, "log_odds_chosen": 1.6503653526306152, "log_odds_ratio": -0.3700273334980011, "logits/chosen": -0.8282778263092041, "logits/rejected": -0.8640443086624146, "logps/chosen": -0.11108995229005814, "logps/rejected": -0.3973962962627411, "loss": 5.0046, "nll_loss": 1.2141525745391846, "rewards/accuracies": 0.875, "rewards/chosen": -0.011108995415270329, "rewards/margins": 0.028630636632442474, "rewards/rejected": -0.03973962739109993, "step": 355 }, { "epoch": 0.24619640387275243, "grad_norm": 4.823635101318359, "learning_rate": 1.2309820193637622e-05, "log_odds_chosen": 0.9693795442581177, "log_odds_ratio": -0.41709911823272705, "logits/chosen": -0.6768261194229126, "logits/rejected": -0.7117671966552734, "logps/chosen": -0.14925503730773926, "logps/rejected": -0.3569108247756958, "loss": 5.3088, "nll_loss": 1.2854878902435303, "rewards/accuracies": 0.875, "rewards/chosen": -0.01492550503462553, "rewards/margins": 0.020765576511621475, "rewards/rejected": -0.03569108247756958, "step": 356 }, { "epoch": 0.24688796680497926, "grad_norm": 4.039694786071777, "learning_rate": 1.2344398340248964e-05, "log_odds_chosen": 1.6349093914031982, "log_odds_ratio": -0.30501067638397217, "logits/chosen": -0.6676627397537231, "logits/rejected": -0.6829565763473511, "logps/chosen": -0.12125631421804428, "logps/rejected": -0.4480995833873749, "loss": 6.7252, "nll_loss": 1.6507999897003174, "rewards/accuracies": 0.875, "rewards/chosen": -0.012125632725656033, "rewards/margins": 0.032684326171875, "rewards/rejected": -0.04480995610356331, "step": 357 }, { "epoch": 0.2475795297372061, "grad_norm": 4.4998884201049805, "learning_rate": 1.2378976486860305e-05, "log_odds_chosen": 2.042746067047119, "log_odds_ratio": -0.30104365944862366, "logits/chosen": -0.7379899621009827, "logits/rejected": -0.7694678902626038, "logps/chosen": -0.14233702421188354, "logps/rejected": -0.5452017188072205, "loss": 6.9534, "nll_loss": 1.7082533836364746, "rewards/accuracies": 0.875, "rewards/chosen": -0.01423370186239481, "rewards/margins": 0.04028647020459175, "rewards/rejected": -0.054520174860954285, "step": 358 }, { "epoch": 0.24827109266943292, "grad_norm": 4.526036262512207, "learning_rate": 1.2413554633471647e-05, "log_odds_chosen": 1.137697458267212, "log_odds_ratio": -0.47182533144950867, "logits/chosen": -0.5749889612197876, "logits/rejected": -0.6108061075210571, "logps/chosen": -0.12986072897911072, "logps/rejected": -0.2841159403324127, "loss": 5.0898, "nll_loss": 1.225273847579956, "rewards/accuracies": 0.75, "rewards/chosen": -0.012986073270440102, "rewards/margins": 0.015425523743033409, "rewards/rejected": -0.02841159515082836, "step": 359 }, { "epoch": 0.24896265560165975, "grad_norm": 4.702746391296387, "learning_rate": 1.2448132780082988e-05, "log_odds_chosen": 1.1218202114105225, "log_odds_ratio": -0.3816485106945038, "logits/chosen": -0.5540226697921753, "logits/rejected": -0.6201716065406799, "logps/chosen": -0.12513335049152374, "logps/rejected": -0.3358425796031952, "loss": 5.328, "nll_loss": 1.2938361167907715, "rewards/accuracies": 0.75, "rewards/chosen": -0.012513335794210434, "rewards/margins": 0.021070925518870354, "rewards/rejected": -0.03358425945043564, "step": 360 }, { "epoch": 0.24965421853388658, "grad_norm": 4.272010803222656, "learning_rate": 1.248271092669433e-05, "log_odds_chosen": 0.28873878717422485, "log_odds_ratio": -0.9356129169464111, "logits/chosen": -0.9438433647155762, "logits/rejected": -0.9586924910545349, "logps/chosen": -0.2660728096961975, "logps/rejected": -0.1731991171836853, "loss": 4.8786, "nll_loss": 1.1260899305343628, "rewards/accuracies": 0.625, "rewards/chosen": -0.02660728059709072, "rewards/margins": -0.009287366643548012, "rewards/rejected": -0.01731991209089756, "step": 361 }, { "epoch": 0.2503457814661134, "grad_norm": 4.26386833190918, "learning_rate": 1.2517289073305671e-05, "log_odds_chosen": 1.449477195739746, "log_odds_ratio": -0.42578303813934326, "logits/chosen": -0.6013892889022827, "logits/rejected": -0.6026226282119751, "logps/chosen": -0.1423933207988739, "logps/rejected": -0.2896794378757477, "loss": 5.13, "nll_loss": 1.2399232387542725, "rewards/accuracies": 0.75, "rewards/chosen": -0.014239332638680935, "rewards/margins": 0.014728610403835773, "rewards/rejected": -0.02896794304251671, "step": 362 }, { "epoch": 0.25103734439834025, "grad_norm": 4.793960094451904, "learning_rate": 1.2551867219917013e-05, "log_odds_chosen": 2.400606393814087, "log_odds_ratio": -0.26553744077682495, "logits/chosen": -0.5489617586135864, "logits/rejected": -0.6443371176719666, "logps/chosen": -0.07715193927288055, "logps/rejected": -0.51331627368927, "loss": 5.9403, "nll_loss": 1.4585212469100952, "rewards/accuracies": 0.875, "rewards/chosen": -0.007715193554759026, "rewards/margins": 0.04361642897129059, "rewards/rejected": -0.05133162438869476, "step": 363 }, { "epoch": 0.2517289073305671, "grad_norm": 5.656650543212891, "learning_rate": 1.2586445366528355e-05, "log_odds_chosen": 1.3067384958267212, "log_odds_ratio": -0.5937015414237976, "logits/chosen": -0.7901173830032349, "logits/rejected": -0.8001323342323303, "logps/chosen": -0.13530637323856354, "logps/rejected": -0.39692115783691406, "loss": 6.1923, "nll_loss": 1.4887058734893799, "rewards/accuracies": 0.625, "rewards/chosen": -0.013530636206269264, "rewards/margins": 0.026161476969718933, "rewards/rejected": -0.03969211503863335, "step": 364 }, { "epoch": 0.2524204702627939, "grad_norm": 3.2860074043273926, "learning_rate": 1.2621023513139696e-05, "log_odds_chosen": 0.46500369906425476, "log_odds_ratio": -0.6647549867630005, "logits/chosen": -0.6945218443870544, "logits/rejected": -0.7753416299819946, "logps/chosen": -0.21082139015197754, "logps/rejected": -0.2310963124036789, "loss": 4.4554, "nll_loss": 1.0473787784576416, "rewards/accuracies": 0.625, "rewards/chosen": -0.021082140505313873, "rewards/margins": 0.002027492504566908, "rewards/rejected": -0.02310963161289692, "step": 365 }, { "epoch": 0.25311203319502074, "grad_norm": 2.8510890007019043, "learning_rate": 1.2655601659751038e-05, "log_odds_chosen": 0.5070021748542786, "log_odds_ratio": -0.5470277070999146, "logits/chosen": -0.40646564960479736, "logits/rejected": -0.4076330363750458, "logps/chosen": -0.13049736618995667, "logps/rejected": -0.24043381214141846, "loss": 4.2234, "nll_loss": 1.001145601272583, "rewards/accuracies": 0.75, "rewards/chosen": -0.013049736618995667, "rewards/margins": 0.01099364273250103, "rewards/rejected": -0.024043381214141846, "step": 366 }, { "epoch": 0.25380359612724757, "grad_norm": 5.104526519775391, "learning_rate": 1.269017980636238e-05, "log_odds_chosen": 0.47252020239830017, "log_odds_ratio": -0.8805572986602783, "logits/chosen": -0.8176181316375732, "logits/rejected": -0.8377338647842407, "logps/chosen": -0.1574239730834961, "logps/rejected": -0.26355281472206116, "loss": 5.7228, "nll_loss": 1.3426513671875, "rewards/accuracies": 0.625, "rewards/chosen": -0.01574239507317543, "rewards/margins": 0.010612884536385536, "rewards/rejected": -0.026355283334851265, "step": 367 }, { "epoch": 0.2544951590594744, "grad_norm": 4.936967372894287, "learning_rate": 1.272475795297372e-05, "log_odds_chosen": 2.6330790519714355, "log_odds_ratio": -0.2361859381198883, "logits/chosen": -0.6837531924247742, "logits/rejected": -0.7225916385650635, "logps/chosen": -0.12778867781162262, "logps/rejected": -0.5990055203437805, "loss": 6.8949, "nll_loss": 1.7001042366027832, "rewards/accuracies": 1.0, "rewards/chosen": -0.012778868898749352, "rewards/margins": 0.04712168127298355, "rewards/rejected": -0.05990055203437805, "step": 368 }, { "epoch": 0.2551867219917012, "grad_norm": 4.205926418304443, "learning_rate": 1.2759336099585062e-05, "log_odds_chosen": 0.6637147068977356, "log_odds_ratio": -0.665414035320282, "logits/chosen": -0.7646222114562988, "logits/rejected": -0.7779873013496399, "logps/chosen": -0.16148130595684052, "logps/rejected": -0.23958323895931244, "loss": 4.5449, "nll_loss": 1.0696804523468018, "rewards/accuracies": 0.625, "rewards/chosen": -0.01614813134074211, "rewards/margins": 0.007810194510966539, "rewards/rejected": -0.023958325386047363, "step": 369 }, { "epoch": 0.25587828492392806, "grad_norm": 3.8479011058807373, "learning_rate": 1.2793914246196404e-05, "log_odds_chosen": 0.786837100982666, "log_odds_ratio": -0.5847955942153931, "logits/chosen": -0.795537531375885, "logits/rejected": -0.8256769180297852, "logps/chosen": -0.15839308500289917, "logps/rejected": -0.3057611882686615, "loss": 4.7425, "nll_loss": 1.127145767211914, "rewards/accuracies": 0.625, "rewards/chosen": -0.015839308500289917, "rewards/margins": 0.014736810699105263, "rewards/rejected": -0.03057611919939518, "step": 370 }, { "epoch": 0.2565698478561549, "grad_norm": 4.731762886047363, "learning_rate": 1.2828492392807745e-05, "log_odds_chosen": 1.866947054862976, "log_odds_ratio": -0.45763030648231506, "logits/chosen": -0.8834202289581299, "logits/rejected": -0.8629869222640991, "logps/chosen": -0.18499600887298584, "logps/rejected": -0.4734047055244446, "loss": 5.7119, "nll_loss": 1.3822195529937744, "rewards/accuracies": 0.75, "rewards/chosen": -0.018499599769711494, "rewards/margins": 0.028840873390436172, "rewards/rejected": -0.04734047129750252, "step": 371 }, { "epoch": 0.2572614107883817, "grad_norm": 3.759676218032837, "learning_rate": 1.2863070539419087e-05, "log_odds_chosen": 2.1450793743133545, "log_odds_ratio": -0.3913061320781708, "logits/chosen": -0.7757332921028137, "logits/rejected": -0.7739498615264893, "logps/chosen": -0.07313913851976395, "logps/rejected": -0.2787635028362274, "loss": 4.0562, "nll_loss": 0.9749183654785156, "rewards/accuracies": 0.75, "rewards/chosen": -0.007313914131373167, "rewards/margins": 0.020562436431646347, "rewards/rejected": -0.0278763510286808, "step": 372 }, { "epoch": 0.2579529737206086, "grad_norm": 8.916706085205078, "learning_rate": 1.289764868603043e-05, "log_odds_chosen": 0.3090290129184723, "log_odds_ratio": -0.7983847856521606, "logits/chosen": -1.282267451286316, "logits/rejected": -1.275241494178772, "logps/chosen": -0.16115368902683258, "logps/rejected": -0.2360834777355194, "loss": 6.7564, "nll_loss": 1.6092727184295654, "rewards/accuracies": 0.5, "rewards/chosen": -0.016115369275212288, "rewards/margins": 0.007492979057133198, "rewards/rejected": -0.02360834740102291, "step": 373 }, { "epoch": 0.25864453665283543, "grad_norm": 4.516635894775391, "learning_rate": 1.2932226832641772e-05, "log_odds_chosen": 1.2972509860992432, "log_odds_ratio": -0.342338889837265, "logits/chosen": -0.7201129198074341, "logits/rejected": -0.7343860268592834, "logps/chosen": -0.11302807927131653, "logps/rejected": -0.33227887749671936, "loss": 5.0235, "nll_loss": 1.2216522693634033, "rewards/accuracies": 0.875, "rewards/chosen": -0.011302808299660683, "rewards/margins": 0.021925078704953194, "rewards/rejected": -0.033227887004613876, "step": 374 }, { "epoch": 0.25933609958506226, "grad_norm": 3.1193737983703613, "learning_rate": 1.2966804979253113e-05, "log_odds_chosen": 1.8649317026138306, "log_odds_ratio": -0.28167441487312317, "logits/chosen": -0.6341934204101562, "logits/rejected": -0.6485381126403809, "logps/chosen": -0.0737493708729744, "logps/rejected": -0.3434864580631256, "loss": 4.0531, "nll_loss": 0.98511803150177, "rewards/accuracies": 1.0, "rewards/chosen": -0.007374937646090984, "rewards/margins": 0.02697370946407318, "rewards/rejected": -0.03434864804148674, "step": 375 }, { "epoch": 0.2600276625172891, "grad_norm": 4.7184247970581055, "learning_rate": 1.3001383125864455e-05, "log_odds_chosen": 1.4624907970428467, "log_odds_ratio": -0.5053262114524841, "logits/chosen": -0.577054500579834, "logits/rejected": -0.6243833303451538, "logps/chosen": -0.22585873305797577, "logps/rejected": -0.47850340604782104, "loss": 6.4885, "nll_loss": 1.5715917348861694, "rewards/accuracies": 0.75, "rewards/chosen": -0.022585874423384666, "rewards/margins": 0.025264466181397438, "rewards/rejected": -0.047850340604782104, "step": 376 }, { "epoch": 0.2607192254495159, "grad_norm": 5.825414180755615, "learning_rate": 1.3035961272475797e-05, "log_odds_chosen": 0.8218759894371033, "log_odds_ratio": -0.774000883102417, "logits/chosen": -0.6426214575767517, "logits/rejected": -0.6425043344497681, "logps/chosen": -0.18728148937225342, "logps/rejected": -0.2688373327255249, "loss": 5.8574, "nll_loss": 1.3869421482086182, "rewards/accuracies": 0.375, "rewards/chosen": -0.01872815005481243, "rewards/margins": 0.00815558061003685, "rewards/rejected": -0.02688373252749443, "step": 377 }, { "epoch": 0.26141078838174275, "grad_norm": 4.1526780128479, "learning_rate": 1.3070539419087138e-05, "log_odds_chosen": 1.282230019569397, "log_odds_ratio": -0.4600529670715332, "logits/chosen": -0.9115185737609863, "logits/rejected": -0.9095169305801392, "logps/chosen": -0.18320423364639282, "logps/rejected": -0.3419126272201538, "loss": 5.7571, "nll_loss": 1.3932691812515259, "rewards/accuracies": 0.75, "rewards/chosen": -0.018320422619581223, "rewards/margins": 0.0158708393573761, "rewards/rejected": -0.03419126197695732, "step": 378 }, { "epoch": 0.2621023513139696, "grad_norm": 2.8454952239990234, "learning_rate": 1.310511756569848e-05, "log_odds_chosen": 1.661702275276184, "log_odds_ratio": -0.20927149057388306, "logits/chosen": -0.5011343955993652, "logits/rejected": -0.49014002084732056, "logps/chosen": -0.0761743038892746, "logps/rejected": -0.3358593285083771, "loss": 4.2203, "nll_loss": 1.0341438055038452, "rewards/accuracies": 1.0, "rewards/chosen": -0.0076174307614564896, "rewards/margins": 0.025968503206968307, "rewards/rejected": -0.03358593210577965, "step": 379 }, { "epoch": 0.2627939142461964, "grad_norm": 6.023113250732422, "learning_rate": 1.3139695712309821e-05, "log_odds_chosen": 0.7916541695594788, "log_odds_ratio": -0.5888239145278931, "logits/chosen": -0.7584908604621887, "logits/rejected": -0.7206529378890991, "logps/chosen": -0.1440635770559311, "logps/rejected": -0.3327956199645996, "loss": 5.0561, "nll_loss": 1.205130934715271, "rewards/accuracies": 0.625, "rewards/chosen": -0.014406357891857624, "rewards/margins": 0.018873201683163643, "rewards/rejected": -0.03327956050634384, "step": 380 }, { "epoch": 0.26348547717842324, "grad_norm": 3.5837507247924805, "learning_rate": 1.3174273858921163e-05, "log_odds_chosen": 0.8221812844276428, "log_odds_ratio": -0.4075019359588623, "logits/chosen": -0.8715775012969971, "logits/rejected": -0.9140419363975525, "logps/chosen": -0.14378435909748077, "logps/rejected": -0.31232964992523193, "loss": 3.6665, "nll_loss": 0.8758648037910461, "rewards/accuracies": 1.0, "rewards/chosen": -0.014378435909748077, "rewards/margins": 0.016854530200362206, "rewards/rejected": -0.031232964247465134, "step": 381 }, { "epoch": 0.2641770401106501, "grad_norm": 4.768825531005859, "learning_rate": 1.3208852005532504e-05, "log_odds_chosen": 2.0849530696868896, "log_odds_ratio": -0.3738650679588318, "logits/chosen": -0.9447451233863831, "logits/rejected": -0.9916914701461792, "logps/chosen": -0.11316876113414764, "logps/rejected": -0.4777146577835083, "loss": 6.7512, "nll_loss": 1.6504106521606445, "rewards/accuracies": 0.875, "rewards/chosen": -0.011316876858472824, "rewards/margins": 0.036454588174819946, "rewards/rejected": -0.04777146503329277, "step": 382 }, { "epoch": 0.2648686030428769, "grad_norm": 4.187633037567139, "learning_rate": 1.3243430152143846e-05, "log_odds_chosen": 0.632165789604187, "log_odds_ratio": -0.5006855130195618, "logits/chosen": -0.6153163909912109, "logits/rejected": -0.5733051300048828, "logps/chosen": -0.17688298225402832, "logps/rejected": -0.3255842924118042, "loss": 4.8609, "nll_loss": 1.1651612520217896, "rewards/accuracies": 0.75, "rewards/chosen": -0.017688296735286713, "rewards/margins": 0.014870131388306618, "rewards/rejected": -0.03255842626094818, "step": 383 }, { "epoch": 0.26556016597510373, "grad_norm": 5.697615623474121, "learning_rate": 1.3278008298755187e-05, "log_odds_chosen": 1.6235935688018799, "log_odds_ratio": -0.8028425574302673, "logits/chosen": -0.9089243412017822, "logits/rejected": -0.9217929840087891, "logps/chosen": -0.2022382915019989, "logps/rejected": -0.4012358784675598, "loss": 4.5655, "nll_loss": 1.0610859394073486, "rewards/accuracies": 0.5, "rewards/chosen": -0.02022382989525795, "rewards/margins": 0.019899757578969002, "rewards/rejected": -0.0401235893368721, "step": 384 }, { "epoch": 0.26625172890733056, "grad_norm": 2.4368789196014404, "learning_rate": 1.3312586445366529e-05, "log_odds_chosen": 2.746462821960449, "log_odds_ratio": -0.1782023161649704, "logits/chosen": -1.0373375415802002, "logits/rejected": -1.0150465965270996, "logps/chosen": -0.10444726049900055, "logps/rejected": -0.7134073376655579, "loss": 4.0557, "nll_loss": 0.9960981607437134, "rewards/accuracies": 1.0, "rewards/chosen": -0.01044472586363554, "rewards/margins": 0.06089600920677185, "rewards/rejected": -0.07134073227643967, "step": 385 }, { "epoch": 0.2669432918395574, "grad_norm": 4.904059886932373, "learning_rate": 1.334716459197787e-05, "log_odds_chosen": 0.5463998317718506, "log_odds_ratio": -0.7089335918426514, "logits/chosen": -0.7584419250488281, "logits/rejected": -0.6511543989181519, "logps/chosen": -0.13503843545913696, "logps/rejected": -0.19024111330509186, "loss": 4.8678, "nll_loss": 1.1460521221160889, "rewards/accuracies": 0.75, "rewards/chosen": -0.013503843918442726, "rewards/margins": 0.005520268343389034, "rewards/rejected": -0.019024113193154335, "step": 386 }, { "epoch": 0.2676348547717842, "grad_norm": 4.778861045837402, "learning_rate": 1.3381742738589212e-05, "log_odds_chosen": 0.6914415955543518, "log_odds_ratio": -0.568891704082489, "logits/chosen": -1.069885015487671, "logits/rejected": -1.0567914247512817, "logps/chosen": -0.20774585008621216, "logps/rejected": -0.3966343104839325, "loss": 5.8371, "nll_loss": 1.4023927450180054, "rewards/accuracies": 0.625, "rewards/chosen": -0.020774584263563156, "rewards/margins": 0.018888846039772034, "rewards/rejected": -0.03966343030333519, "step": 387 }, { "epoch": 0.26832641770401106, "grad_norm": 5.75725793838501, "learning_rate": 1.3416320885200554e-05, "log_odds_chosen": 0.8915666341781616, "log_odds_ratio": -0.7970221042633057, "logits/chosen": -0.6445469856262207, "logits/rejected": -0.6691566109657288, "logps/chosen": -0.18700812757015228, "logps/rejected": -0.45797044038772583, "loss": 4.3015, "nll_loss": 0.9956653118133545, "rewards/accuracies": 0.625, "rewards/chosen": -0.01870081201195717, "rewards/margins": 0.027096234261989594, "rewards/rejected": -0.04579704999923706, "step": 388 }, { "epoch": 0.2690179806362379, "grad_norm": 2.98270583152771, "learning_rate": 1.3450899031811895e-05, "log_odds_chosen": 1.9932804107666016, "log_odds_ratio": -0.20717641711235046, "logits/chosen": -0.4077128767967224, "logits/rejected": -0.45207881927490234, "logps/chosen": -0.0988541767001152, "logps/rejected": -0.30075618624687195, "loss": 4.0487, "nll_loss": 0.991469144821167, "rewards/accuracies": 1.0, "rewards/chosen": -0.00988541729748249, "rewards/margins": 0.020190201699733734, "rewards/rejected": -0.030075618997216225, "step": 389 }, { "epoch": 0.2697095435684647, "grad_norm": 5.803657531738281, "learning_rate": 1.3485477178423237e-05, "log_odds_chosen": 1.3662338256835938, "log_odds_ratio": -0.6121001839637756, "logits/chosen": -0.6869480609893799, "logits/rejected": -0.7207568287849426, "logps/chosen": -0.1792462170124054, "logps/rejected": -0.5913968086242676, "loss": 6.2556, "nll_loss": 1.5027010440826416, "rewards/accuracies": 0.875, "rewards/chosen": -0.01792462170124054, "rewards/margins": 0.04121505841612816, "rewards/rejected": -0.059139683842659, "step": 390 }, { "epoch": 0.27040110650069155, "grad_norm": 4.610645771026611, "learning_rate": 1.3520055325034578e-05, "log_odds_chosen": 1.56792151927948, "log_odds_ratio": -0.5591750741004944, "logits/chosen": -0.744486391544342, "logits/rejected": -0.7582099437713623, "logps/chosen": -0.14866414666175842, "logps/rejected": -0.6275578737258911, "loss": 3.3825, "nll_loss": 0.7897180318832397, "rewards/accuracies": 0.75, "rewards/chosen": -0.014866415411233902, "rewards/margins": 0.04788937419652939, "rewards/rejected": -0.06275579333305359, "step": 391 }, { "epoch": 0.2710926694329184, "grad_norm": 5.005346775054932, "learning_rate": 1.355463347164592e-05, "log_odds_chosen": 1.5481021404266357, "log_odds_ratio": -0.34036847949028015, "logits/chosen": -0.9398497939109802, "logits/rejected": -0.9297147393226624, "logps/chosen": -0.14529621601104736, "logps/rejected": -0.3949902057647705, "loss": 5.6398, "nll_loss": 1.3759061098098755, "rewards/accuracies": 0.875, "rewards/chosen": -0.014529621228575706, "rewards/margins": 0.024969400838017464, "rewards/rejected": -0.03949902206659317, "step": 392 }, { "epoch": 0.2717842323651452, "grad_norm": 4.769950866699219, "learning_rate": 1.3589211618257262e-05, "log_odds_chosen": 0.877053439617157, "log_odds_ratio": -0.5863583087921143, "logits/chosen": -0.6077442765235901, "logits/rejected": -0.5932273864746094, "logps/chosen": -0.16600897908210754, "logps/rejected": -0.3204789459705353, "loss": 5.4064, "nll_loss": 1.2929534912109375, "rewards/accuracies": 0.625, "rewards/chosen": -0.016600897535681725, "rewards/margins": 0.015446994453668594, "rewards/rejected": -0.03204789012670517, "step": 393 }, { "epoch": 0.27247579529737204, "grad_norm": 4.381612300872803, "learning_rate": 1.3623789764868603e-05, "log_odds_chosen": 1.4782297611236572, "log_odds_ratio": -0.4741993546485901, "logits/chosen": -1.0078778266906738, "logits/rejected": -0.9697948694229126, "logps/chosen": -0.15128880739212036, "logps/rejected": -0.5114619731903076, "loss": 5.2417, "nll_loss": 1.2629996538162231, "rewards/accuracies": 0.625, "rewards/chosen": -0.015128879807889462, "rewards/margins": 0.036017317324876785, "rewards/rejected": -0.05114620178937912, "step": 394 }, { "epoch": 0.27316735822959887, "grad_norm": 5.883626937866211, "learning_rate": 1.3658367911479945e-05, "log_odds_chosen": 1.8567073345184326, "log_odds_ratio": -0.4506278932094574, "logits/chosen": -0.8221858739852905, "logits/rejected": -0.8678174018859863, "logps/chosen": -0.1326407641172409, "logps/rejected": -0.481861412525177, "loss": 5.6703, "nll_loss": 1.3725148439407349, "rewards/accuracies": 0.875, "rewards/chosen": -0.013264075852930546, "rewards/margins": 0.03492206707596779, "rewards/rejected": -0.04818614572286606, "step": 395 }, { "epoch": 0.27385892116182575, "grad_norm": 2.958195924758911, "learning_rate": 1.3692946058091288e-05, "log_odds_chosen": 1.946399450302124, "log_odds_ratio": -0.2867945432662964, "logits/chosen": -0.6446447372436523, "logits/rejected": -0.6454988718032837, "logps/chosen": -0.1034713089466095, "logps/rejected": -0.30294114351272583, "loss": 4.4459, "nll_loss": 1.0828043222427368, "rewards/accuracies": 0.875, "rewards/chosen": -0.010347130708396435, "rewards/margins": 0.019946984946727753, "rewards/rejected": -0.030294114723801613, "step": 396 }, { "epoch": 0.2745504840940526, "grad_norm": 7.297904968261719, "learning_rate": 1.372752420470263e-05, "log_odds_chosen": 0.1172141432762146, "log_odds_ratio": -0.780999481678009, "logits/chosen": -0.7667773365974426, "logits/rejected": -0.7440715432167053, "logps/chosen": -0.17577888071537018, "logps/rejected": -0.20231610536575317, "loss": 5.8445, "nll_loss": 1.38302481174469, "rewards/accuracies": 0.5, "rewards/chosen": -0.017577888444066048, "rewards/margins": 0.002653722185641527, "rewards/rejected": -0.02023160830140114, "step": 397 }, { "epoch": 0.2752420470262794, "grad_norm": 5.8652849197387695, "learning_rate": 1.3762102351313971e-05, "log_odds_chosen": 1.4110496044158936, "log_odds_ratio": -0.7352426052093506, "logits/chosen": -0.7150549292564392, "logits/rejected": -0.704857349395752, "logps/chosen": -0.18836373090744019, "logps/rejected": -0.3111065924167633, "loss": 4.5665, "nll_loss": 1.0680984258651733, "rewards/accuracies": 0.75, "rewards/chosen": -0.01883637346327305, "rewards/margins": 0.01227428950369358, "rewards/rejected": -0.03111066296696663, "step": 398 }, { "epoch": 0.27593360995850624, "grad_norm": 6.426219463348389, "learning_rate": 1.3796680497925313e-05, "log_odds_chosen": 0.8483314514160156, "log_odds_ratio": -1.1913480758666992, "logits/chosen": -1.009595513343811, "logits/rejected": -1.0208425521850586, "logps/chosen": -0.41472068428993225, "logps/rejected": -0.3666849434375763, "loss": 6.2569, "nll_loss": 1.4450807571411133, "rewards/accuracies": 0.5, "rewards/chosen": -0.041472069919109344, "rewards/margins": -0.00480357650667429, "rewards/rejected": -0.03666849434375763, "step": 399 }, { "epoch": 0.2766251728907331, "grad_norm": 4.07353401184082, "learning_rate": 1.3831258644536654e-05, "log_odds_chosen": 0.9386905431747437, "log_odds_ratio": -0.463863343000412, "logits/chosen": -0.7032485008239746, "logits/rejected": -0.7028173208236694, "logps/chosen": -0.107542023062706, "logps/rejected": -0.3094400465488434, "loss": 4.3246, "nll_loss": 1.0347694158554077, "rewards/accuracies": 0.75, "rewards/chosen": -0.010754203423857689, "rewards/margins": 0.0201898030936718, "rewards/rejected": -0.03094400465488434, "step": 400 }, { "epoch": 0.2773167358229599, "grad_norm": 3.0598976612091064, "learning_rate": 1.3865836791147996e-05, "log_odds_chosen": 1.7289857864379883, "log_odds_ratio": -0.2864301800727844, "logits/chosen": -0.7190135717391968, "logits/rejected": -0.7345994710922241, "logps/chosen": -0.11391860246658325, "logps/rejected": -0.3247866630554199, "loss": 3.956, "nll_loss": 0.9603667259216309, "rewards/accuracies": 0.875, "rewards/chosen": -0.011391859501600266, "rewards/margins": 0.021086808294057846, "rewards/rejected": -0.03247866779565811, "step": 401 }, { "epoch": 0.27800829875518673, "grad_norm": 4.122477054595947, "learning_rate": 1.3900414937759337e-05, "log_odds_chosen": 1.7716256380081177, "log_odds_ratio": -0.3900958299636841, "logits/chosen": -0.848552942276001, "logits/rejected": -0.9127116203308105, "logps/chosen": -0.14013731479644775, "logps/rejected": -0.5122645497322083, "loss": 4.9266, "nll_loss": 1.1926302909851074, "rewards/accuracies": 0.75, "rewards/chosen": -0.014013731852173805, "rewards/margins": 0.03721272572875023, "rewards/rejected": -0.05122645944356918, "step": 402 }, { "epoch": 0.27869986168741356, "grad_norm": 4.57697868347168, "learning_rate": 1.3934993084370679e-05, "log_odds_chosen": 1.1865309476852417, "log_odds_ratio": -0.5246220827102661, "logits/chosen": -0.8770260810852051, "logits/rejected": -0.8688606023788452, "logps/chosen": -0.15459983050823212, "logps/rejected": -0.391287624835968, "loss": 5.626, "nll_loss": 1.3540284633636475, "rewards/accuracies": 0.625, "rewards/chosen": -0.015459983609616756, "rewards/margins": 0.02366877906024456, "rewards/rejected": -0.03912876546382904, "step": 403 }, { "epoch": 0.2793914246196404, "grad_norm": 5.066405296325684, "learning_rate": 1.396957123098202e-05, "log_odds_chosen": 2.0249691009521484, "log_odds_ratio": -0.4475286602973938, "logits/chosen": -0.9386860132217407, "logits/rejected": -0.9849787354469299, "logps/chosen": -0.1488751769065857, "logps/rejected": -0.47698670625686646, "loss": 6.2464, "nll_loss": 1.5168423652648926, "rewards/accuracies": 0.625, "rewards/chosen": -0.014887519180774689, "rewards/margins": 0.03281114995479584, "rewards/rejected": -0.047698669135570526, "step": 404 }, { "epoch": 0.2800829875518672, "grad_norm": 4.11107063293457, "learning_rate": 1.4004149377593362e-05, "log_odds_chosen": 1.035890817642212, "log_odds_ratio": -0.4360055923461914, "logits/chosen": -0.8740240335464478, "logits/rejected": -0.88525390625, "logps/chosen": -0.10174266248941422, "logps/rejected": -0.2971685528755188, "loss": 5.0939, "nll_loss": 1.2298824787139893, "rewards/accuracies": 0.75, "rewards/chosen": -0.010174266062676907, "rewards/margins": 0.019542589783668518, "rewards/rejected": -0.029716856777668, "step": 405 }, { "epoch": 0.28077455048409405, "grad_norm": 4.380761623382568, "learning_rate": 1.4038727524204704e-05, "log_odds_chosen": 1.4169279336929321, "log_odds_ratio": -0.34448444843292236, "logits/chosen": -0.9655833840370178, "logits/rejected": -0.9509184956550598, "logps/chosen": -0.11824437975883484, "logps/rejected": -0.33905068039894104, "loss": 4.5858, "nll_loss": 1.111992597579956, "rewards/accuracies": 0.75, "rewards/chosen": -0.011824438348412514, "rewards/margins": 0.02208063006401062, "rewards/rejected": -0.03390507027506828, "step": 406 }, { "epoch": 0.2814661134163209, "grad_norm": 4.629016399383545, "learning_rate": 1.4073305670816045e-05, "log_odds_chosen": 3.032594680786133, "log_odds_ratio": -0.40330177545547485, "logits/chosen": -0.8205009698867798, "logits/rejected": -0.8420271873474121, "logps/chosen": -0.0831131562590599, "logps/rejected": -0.4899987578392029, "loss": 4.7165, "nll_loss": 1.1388006210327148, "rewards/accuracies": 0.875, "rewards/chosen": -0.00831131637096405, "rewards/margins": 0.04068855941295624, "rewards/rejected": -0.04899987950921059, "step": 407 }, { "epoch": 0.2821576763485477, "grad_norm": 5.947005748748779, "learning_rate": 1.4107883817427387e-05, "log_odds_chosen": 1.4556632041931152, "log_odds_ratio": -0.5705589056015015, "logits/chosen": -0.6047676205635071, "logits/rejected": -0.6060796976089478, "logps/chosen": -0.17796441912651062, "logps/rejected": -0.3482607305049896, "loss": 4.8253, "nll_loss": 1.1492795944213867, "rewards/accuracies": 0.75, "rewards/chosen": -0.01779644377529621, "rewards/margins": 0.01702963188290596, "rewards/rejected": -0.03482607752084732, "step": 408 }, { "epoch": 0.28284923928077454, "grad_norm": 3.4069530963897705, "learning_rate": 1.4142461964038728e-05, "log_odds_chosen": 2.048111915588379, "log_odds_ratio": -0.26843875646591187, "logits/chosen": -0.6469959616661072, "logits/rejected": -0.6459199786186218, "logps/chosen": -0.08298209309577942, "logps/rejected": -0.2996857762336731, "loss": 4.1631, "nll_loss": 1.0139367580413818, "rewards/accuracies": 0.875, "rewards/chosen": -0.008298208937048912, "rewards/margins": 0.021670371294021606, "rewards/rejected": -0.02996858023107052, "step": 409 }, { "epoch": 0.2835408022130014, "grad_norm": 8.421577453613281, "learning_rate": 1.417704011065007e-05, "log_odds_chosen": 1.7312108278274536, "log_odds_ratio": -0.6507084369659424, "logits/chosen": -0.7071995735168457, "logits/rejected": -0.7199549674987793, "logps/chosen": -0.10671839118003845, "logps/rejected": -0.22450372576713562, "loss": 5.4683, "nll_loss": 1.3020111322402954, "rewards/accuracies": 0.5, "rewards/chosen": -0.010671839118003845, "rewards/margins": 0.011778535321354866, "rewards/rejected": -0.02245037443935871, "step": 410 }, { "epoch": 0.2842323651452282, "grad_norm": 4.508456230163574, "learning_rate": 1.4211618257261411e-05, "log_odds_chosen": 2.592770576477051, "log_odds_ratio": -0.2583865523338318, "logits/chosen": -0.7199490070343018, "logits/rejected": -0.7735538482666016, "logps/chosen": -0.04898786544799805, "logps/rejected": -0.36356252431869507, "loss": 5.7314, "nll_loss": 1.4070085287094116, "rewards/accuracies": 0.875, "rewards/chosen": -0.004898787010461092, "rewards/margins": 0.03145746514201164, "rewards/rejected": -0.03635625168681145, "step": 411 }, { "epoch": 0.28492392807745504, "grad_norm": 4.336709499359131, "learning_rate": 1.4246196403872753e-05, "log_odds_chosen": 1.3862974643707275, "log_odds_ratio": -0.333371102809906, "logits/chosen": -0.7664021253585815, "logits/rejected": -0.7992305755615234, "logps/chosen": -0.1225011795759201, "logps/rejected": -0.3386220932006836, "loss": 5.4525, "nll_loss": 1.3297803401947021, "rewards/accuracies": 0.875, "rewards/chosen": -0.01225011795759201, "rewards/margins": 0.021612092852592468, "rewards/rejected": -0.03386221081018448, "step": 412 }, { "epoch": 0.28561549100968187, "grad_norm": 5.788278102874756, "learning_rate": 1.4280774550484095e-05, "log_odds_chosen": -0.11120755970478058, "log_odds_ratio": -0.7838518023490906, "logits/chosen": -1.0839319229125977, "logits/rejected": -1.0791661739349365, "logps/chosen": -0.2397696077823639, "logps/rejected": -0.22414036095142365, "loss": 5.2104, "nll_loss": 1.2242244482040405, "rewards/accuracies": 0.5, "rewards/chosen": -0.023976963013410568, "rewards/margins": -0.00156292540486902, "rewards/rejected": -0.022414034232497215, "step": 413 }, { "epoch": 0.2863070539419087, "grad_norm": 3.2388267517089844, "learning_rate": 1.4315352697095436e-05, "log_odds_chosen": 2.4697952270507812, "log_odds_ratio": -0.2609533369541168, "logits/chosen": -0.9798930883407593, "logits/rejected": -1.0141501426696777, "logps/chosen": -0.10586879402399063, "logps/rejected": -0.5839859247207642, "loss": 5.1401, "nll_loss": 1.2589378356933594, "rewards/accuracies": 0.875, "rewards/chosen": -0.010586880147457123, "rewards/margins": 0.047811709344387054, "rewards/rejected": -0.05839858949184418, "step": 414 }, { "epoch": 0.2869986168741355, "grad_norm": 6.667247772216797, "learning_rate": 1.4349930843706778e-05, "log_odds_chosen": 1.369337558746338, "log_odds_ratio": -0.5773957371711731, "logits/chosen": -0.7696110010147095, "logits/rejected": -0.8015477657318115, "logps/chosen": -0.14734028279781342, "logps/rejected": -0.4369919002056122, "loss": 4.6701, "nll_loss": 1.1097811460494995, "rewards/accuracies": 0.875, "rewards/chosen": -0.014734027907252312, "rewards/margins": 0.028965162113308907, "rewards/rejected": -0.04369918629527092, "step": 415 }, { "epoch": 0.28769017980636236, "grad_norm": 4.8606133460998535, "learning_rate": 1.438450899031812e-05, "log_odds_chosen": 1.2960329055786133, "log_odds_ratio": -0.3151377737522125, "logits/chosen": -0.7202938199043274, "logits/rejected": -0.7407795190811157, "logps/chosen": -0.1336207389831543, "logps/rejected": -0.32754695415496826, "loss": 6.095, "nll_loss": 1.4922322034835815, "rewards/accuracies": 1.0, "rewards/chosen": -0.01336207427084446, "rewards/margins": 0.019392620772123337, "rewards/rejected": -0.032754696905612946, "step": 416 }, { "epoch": 0.2883817427385892, "grad_norm": 3.6973016262054443, "learning_rate": 1.441908713692946e-05, "log_odds_chosen": 1.6160345077514648, "log_odds_ratio": -0.4387581944465637, "logits/chosen": -0.5735676884651184, "logits/rejected": -0.5724626779556274, "logps/chosen": -0.16073375940322876, "logps/rejected": -0.3444352149963379, "loss": 3.3725, "nll_loss": 0.7992392778396606, "rewards/accuracies": 0.625, "rewards/chosen": -0.016073375940322876, "rewards/margins": 0.018370144069194794, "rewards/rejected": -0.03444352000951767, "step": 417 }, { "epoch": 0.28907330567081607, "grad_norm": 4.923066139221191, "learning_rate": 1.4453665283540804e-05, "log_odds_chosen": 2.589185953140259, "log_odds_ratio": -0.3006196916103363, "logits/chosen": -0.8475648164749146, "logits/rejected": -0.8302005529403687, "logps/chosen": -0.06312157213687897, "logps/rejected": -0.41984111070632935, "loss": 5.6696, "nll_loss": 1.3873448371887207, "rewards/accuracies": 0.75, "rewards/chosen": -0.006312157027423382, "rewards/margins": 0.03567195683717728, "rewards/rejected": -0.041984111070632935, "step": 418 }, { "epoch": 0.2897648686030429, "grad_norm": 5.649261951446533, "learning_rate": 1.4488243430152146e-05, "log_odds_chosen": 0.3662152886390686, "log_odds_ratio": -0.6572021245956421, "logits/chosen": -0.748294472694397, "logits/rejected": -0.7791862487792969, "logps/chosen": -0.22678637504577637, "logps/rejected": -0.34637874364852905, "loss": 6.1642, "nll_loss": 1.4753245115280151, "rewards/accuracies": 0.625, "rewards/chosen": -0.022678637877106667, "rewards/margins": 0.011959237977862358, "rewards/rejected": -0.034637875854969025, "step": 419 }, { "epoch": 0.29045643153526973, "grad_norm": 10.321755409240723, "learning_rate": 1.4522821576763487e-05, "log_odds_chosen": 1.4131522178649902, "log_odds_ratio": -0.981365442276001, "logits/chosen": -0.7823167443275452, "logits/rejected": -0.7747278213500977, "logps/chosen": -0.23187300562858582, "logps/rejected": -0.38720259070396423, "loss": 5.0025, "nll_loss": 1.1524810791015625, "rewards/accuracies": 0.5, "rewards/chosen": -0.02318730019032955, "rewards/margins": 0.0155329629778862, "rewards/rejected": -0.0387202613055706, "step": 420 }, { "epoch": 0.29114799446749656, "grad_norm": 5.31191349029541, "learning_rate": 1.4557399723374829e-05, "log_odds_chosen": 2.2740554809570312, "log_odds_ratio": -0.4833701252937317, "logits/chosen": -0.9618872404098511, "logits/rejected": -0.974646270275116, "logps/chosen": -0.1343914270401001, "logps/rejected": -0.6901106834411621, "loss": 6.4381, "nll_loss": 1.56119966506958, "rewards/accuracies": 0.625, "rewards/chosen": -0.01343914121389389, "rewards/margins": 0.05557192862033844, "rewards/rejected": -0.06901106983423233, "step": 421 }, { "epoch": 0.2918395573997234, "grad_norm": 4.671203136444092, "learning_rate": 1.459197786998617e-05, "log_odds_chosen": 0.7123202085494995, "log_odds_ratio": -0.5865266919136047, "logits/chosen": -1.0744010210037231, "logits/rejected": -1.1003762483596802, "logps/chosen": -0.1542780101299286, "logps/rejected": -0.2637343108654022, "loss": 5.6411, "nll_loss": 1.3516212701797485, "rewards/accuracies": 0.75, "rewards/chosen": -0.01542779989540577, "rewards/margins": 0.010945630259811878, "rewards/rejected": -0.026373429223895073, "step": 422 }, { "epoch": 0.2925311203319502, "grad_norm": 7.3605265617370605, "learning_rate": 1.4626556016597512e-05, "log_odds_chosen": 1.9674979448318481, "log_odds_ratio": -0.4501616656780243, "logits/chosen": -0.8395421504974365, "logits/rejected": -0.8268611431121826, "logps/chosen": -0.14120453596115112, "logps/rejected": -0.4928848147392273, "loss": 5.3382, "nll_loss": 1.2895439863204956, "rewards/accuracies": 0.75, "rewards/chosen": -0.014120453037321568, "rewards/margins": 0.03516802936792374, "rewards/rejected": -0.04928848147392273, "step": 423 }, { "epoch": 0.29322268326417705, "grad_norm": 4.548860549926758, "learning_rate": 1.4661134163208853e-05, "log_odds_chosen": 1.915052890777588, "log_odds_ratio": -0.5355948805809021, "logits/chosen": -0.9863214492797852, "logits/rejected": -1.032691240310669, "logps/chosen": -0.1866421401500702, "logps/rejected": -0.4246392846107483, "loss": 6.0418, "nll_loss": 1.4568803310394287, "rewards/accuracies": 0.75, "rewards/chosen": -0.01866421476006508, "rewards/margins": 0.023799719288945198, "rewards/rejected": -0.042463935911655426, "step": 424 }, { "epoch": 0.2939142461964039, "grad_norm": 3.851501226425171, "learning_rate": 1.4695712309820195e-05, "log_odds_chosen": 1.2596676349639893, "log_odds_ratio": -0.3075161278247833, "logits/chosen": -0.7728994488716125, "logits/rejected": -0.7785760760307312, "logps/chosen": -0.0999259352684021, "logps/rejected": -0.3053882122039795, "loss": 4.9755, "nll_loss": 1.213114619255066, "rewards/accuracies": 1.0, "rewards/chosen": -0.009992592968046665, "rewards/margins": 0.02054622769355774, "rewards/rejected": -0.03053881973028183, "step": 425 }, { "epoch": 0.2946058091286307, "grad_norm": 5.264786243438721, "learning_rate": 1.4730290456431537e-05, "log_odds_chosen": 0.816191554069519, "log_odds_ratio": -0.7403416633605957, "logits/chosen": -1.103607177734375, "logits/rejected": -1.0989108085632324, "logps/chosen": -0.1591918021440506, "logps/rejected": -0.33083173632621765, "loss": 5.3931, "nll_loss": 1.2742496728897095, "rewards/accuracies": 0.625, "rewards/chosen": -0.01591918058693409, "rewards/margins": 0.017163995653390884, "rewards/rejected": -0.033083174377679825, "step": 426 }, { "epoch": 0.29529737206085754, "grad_norm": 4.415341854095459, "learning_rate": 1.4764868603042878e-05, "log_odds_chosen": 1.500899076461792, "log_odds_ratio": -0.3782936930656433, "logits/chosen": -0.8219119310379028, "logits/rejected": -0.8497810363769531, "logps/chosen": -0.062062859535217285, "logps/rejected": -0.2865581512451172, "loss": 4.4984, "nll_loss": 1.0867613554000854, "rewards/accuracies": 0.875, "rewards/chosen": -0.0062062861397862434, "rewards/margins": 0.02244953066110611, "rewards/rejected": -0.02865581586956978, "step": 427 }, { "epoch": 0.2959889349930844, "grad_norm": 4.308657646179199, "learning_rate": 1.479944674965422e-05, "log_odds_chosen": 2.037611484527588, "log_odds_ratio": -0.3803531527519226, "logits/chosen": -0.5021131634712219, "logits/rejected": -0.5083498358726501, "logps/chosen": -0.07269049435853958, "logps/rejected": -0.38383227586746216, "loss": 5.7543, "nll_loss": 1.4005486965179443, "rewards/accuracies": 0.875, "rewards/chosen": -0.007269049528986216, "rewards/margins": 0.031114177778363228, "rewards/rejected": -0.03838322311639786, "step": 428 }, { "epoch": 0.2966804979253112, "grad_norm": 4.234870433807373, "learning_rate": 1.4834024896265561e-05, "log_odds_chosen": 2.4187357425689697, "log_odds_ratio": -0.3537042737007141, "logits/chosen": -0.7538959383964539, "logits/rejected": -0.7366330623626709, "logps/chosen": -0.08905172348022461, "logps/rejected": -0.35023993253707886, "loss": 3.5571, "nll_loss": 0.853912353515625, "rewards/accuracies": 0.75, "rewards/chosen": -0.008905172348022461, "rewards/margins": 0.026118820533156395, "rewards/rejected": -0.035023994743824005, "step": 429 }, { "epoch": 0.29737206085753803, "grad_norm": 4.188929557800293, "learning_rate": 1.4868603042876903e-05, "log_odds_chosen": 2.099421739578247, "log_odds_ratio": -0.36733826994895935, "logits/chosen": -0.8708817362785339, "logits/rejected": -0.8622686266899109, "logps/chosen": -0.09997798502445221, "logps/rejected": -0.2955341339111328, "loss": 5.2648, "nll_loss": 1.2794567346572876, "rewards/accuracies": 0.875, "rewards/chosen": -0.009997798129916191, "rewards/margins": 0.01955561712384224, "rewards/rejected": -0.02955341339111328, "step": 430 }, { "epoch": 0.29806362378976486, "grad_norm": 6.499512672424316, "learning_rate": 1.4903181189488244e-05, "log_odds_chosen": 1.8117034435272217, "log_odds_ratio": -0.44585707783699036, "logits/chosen": -0.6863371133804321, "logits/rejected": -0.7225992679595947, "logps/chosen": -0.13132785260677338, "logps/rejected": -0.5516153573989868, "loss": 7.6748, "nll_loss": 1.8741196393966675, "rewards/accuracies": 0.75, "rewards/chosen": -0.013132785446941853, "rewards/margins": 0.0420287549495697, "rewards/rejected": -0.05516153573989868, "step": 431 }, { "epoch": 0.2987551867219917, "grad_norm": 5.252923965454102, "learning_rate": 1.4937759336099586e-05, "log_odds_chosen": 0.8260930776596069, "log_odds_ratio": -0.5757949352264404, "logits/chosen": -0.7838727831840515, "logits/rejected": -0.7577035427093506, "logps/chosen": -0.2402959018945694, "logps/rejected": -0.39940059185028076, "loss": 6.0088, "nll_loss": 1.444616675376892, "rewards/accuracies": 0.625, "rewards/chosen": -0.02402959018945694, "rewards/margins": 0.015910470858216286, "rewards/rejected": -0.039940062910318375, "step": 432 }, { "epoch": 0.2994467496542185, "grad_norm": 6.205774784088135, "learning_rate": 1.4972337482710927e-05, "log_odds_chosen": 0.7388364672660828, "log_odds_ratio": -0.7631775140762329, "logits/chosen": -1.18083655834198, "logits/rejected": -1.2100516557693481, "logps/chosen": -0.1917128562927246, "logps/rejected": -0.25602954626083374, "loss": 5.6047, "nll_loss": 1.324852705001831, "rewards/accuracies": 0.5, "rewards/chosen": -0.01917128637433052, "rewards/margins": 0.006431670393794775, "rewards/rejected": -0.025602955371141434, "step": 433 }, { "epoch": 0.30013831258644535, "grad_norm": 4.936527252197266, "learning_rate": 1.5006915629322269e-05, "log_odds_chosen": 1.6309081315994263, "log_odds_ratio": -0.3397252559661865, "logits/chosen": -0.7085134983062744, "logits/rejected": -0.6994410157203674, "logps/chosen": -0.09206433594226837, "logps/rejected": -0.2645779252052307, "loss": 4.6834, "nll_loss": 1.1368815898895264, "rewards/accuracies": 0.75, "rewards/chosen": -0.009206433780491352, "rewards/margins": 0.017251359298825264, "rewards/rejected": -0.02645779401063919, "step": 434 }, { "epoch": 0.3008298755186722, "grad_norm": 4.56192684173584, "learning_rate": 1.504149377593361e-05, "log_odds_chosen": 1.898086667060852, "log_odds_ratio": -0.3460543155670166, "logits/chosen": -1.0442228317260742, "logits/rejected": -1.0632047653198242, "logps/chosen": -0.11367730796337128, "logps/rejected": -0.4655604064464569, "loss": 4.9245, "nll_loss": 1.196526288986206, "rewards/accuracies": 0.75, "rewards/chosen": -0.011367732658982277, "rewards/margins": 0.03518830984830856, "rewards/rejected": -0.04655604064464569, "step": 435 }, { "epoch": 0.301521438450899, "grad_norm": 4.4461469650268555, "learning_rate": 1.5076071922544952e-05, "log_odds_chosen": 3.3471388816833496, "log_odds_ratio": -0.146846741437912, "logits/chosen": -0.8048645257949829, "logits/rejected": -0.7955081462860107, "logps/chosen": -0.05524428188800812, "logps/rejected": -0.6378879547119141, "loss": 5.6174, "nll_loss": 1.3896702527999878, "rewards/accuracies": 1.0, "rewards/chosen": -0.005524428561329842, "rewards/margins": 0.058264363557100296, "rewards/rejected": -0.06378879398107529, "step": 436 }, { "epoch": 0.30221300138312585, "grad_norm": 3.9305005073547363, "learning_rate": 1.5110650069156294e-05, "log_odds_chosen": 0.9514443874359131, "log_odds_ratio": -0.4392205476760864, "logits/chosen": -0.6176141500473022, "logits/rejected": -0.6424593925476074, "logps/chosen": -0.11313779652118683, "logps/rejected": -0.22065243124961853, "loss": 4.0434, "nll_loss": 0.9669332504272461, "rewards/accuracies": 0.75, "rewards/chosen": -0.011313780210912228, "rewards/margins": 0.010751464404165745, "rewards/rejected": -0.022065244615077972, "step": 437 }, { "epoch": 0.3029045643153527, "grad_norm": 5.2028045654296875, "learning_rate": 1.5145228215767635e-05, "log_odds_chosen": 1.9534803628921509, "log_odds_ratio": -0.5827986598014832, "logits/chosen": -0.9196781516075134, "logits/rejected": -0.8753281235694885, "logps/chosen": -0.11949843168258667, "logps/rejected": -0.46784883737564087, "loss": 4.4481, "nll_loss": 1.053735375404358, "rewards/accuracies": 0.75, "rewards/chosen": -0.011949843727052212, "rewards/margins": 0.03483504056930542, "rewards/rejected": -0.046784885227680206, "step": 438 }, { "epoch": 0.3035961272475795, "grad_norm": 4.5554518699646, "learning_rate": 1.5179806362378977e-05, "log_odds_chosen": 2.1872141361236572, "log_odds_ratio": -0.5206718444824219, "logits/chosen": -0.4848793148994446, "logits/rejected": -0.508102297782898, "logps/chosen": -0.18532368540763855, "logps/rejected": -0.5864068865776062, "loss": 4.0094, "nll_loss": 0.9502801895141602, "rewards/accuracies": 0.75, "rewards/chosen": -0.018532367423176765, "rewards/margins": 0.040108323097229004, "rewards/rejected": -0.05864068865776062, "step": 439 }, { "epoch": 0.30428769017980634, "grad_norm": 5.9851202964782715, "learning_rate": 1.5214384508990317e-05, "log_odds_chosen": 2.0922446250915527, "log_odds_ratio": -0.5738884210586548, "logits/chosen": -0.6798893213272095, "logits/rejected": -0.7033019661903381, "logps/chosen": -0.1237122043967247, "logps/rejected": -0.3784821629524231, "loss": 4.3193, "nll_loss": 1.0224275588989258, "rewards/accuracies": 0.625, "rewards/chosen": -0.012371220625936985, "rewards/margins": 0.02547699585556984, "rewards/rejected": -0.03784821555018425, "step": 440 }, { "epoch": 0.3049792531120332, "grad_norm": 6.054779529571533, "learning_rate": 1.5248962655601662e-05, "log_odds_chosen": 0.9233060479164124, "log_odds_ratio": -0.6769165396690369, "logits/chosen": -0.8891846537590027, "logits/rejected": -0.9096454381942749, "logps/chosen": -0.16924817860126495, "logps/rejected": -0.2498570829629898, "loss": 5.3895, "nll_loss": 1.2796918153762817, "rewards/accuracies": 0.625, "rewards/chosen": -0.016924817115068436, "rewards/margins": 0.00806089024990797, "rewards/rejected": -0.02498571015894413, "step": 441 }, { "epoch": 0.30567081604426005, "grad_norm": 5.956704139709473, "learning_rate": 1.5283540802213005e-05, "log_odds_chosen": 1.323585033416748, "log_odds_ratio": -0.505527913570404, "logits/chosen": -0.6470650434494019, "logits/rejected": -0.6922782063484192, "logps/chosen": -0.14604339003562927, "logps/rejected": -0.30373379588127136, "loss": 5.2503, "nll_loss": 1.2620216608047485, "rewards/accuracies": 0.625, "rewards/chosen": -0.014604338444769382, "rewards/margins": 0.01576904021203518, "rewards/rejected": -0.030373381450772285, "step": 442 }, { "epoch": 0.3063623789764869, "grad_norm": 7.952620506286621, "learning_rate": 1.5318118948824346e-05, "log_odds_chosen": 2.377955913543701, "log_odds_ratio": -1.1432132720947266, "logits/chosen": -0.7932155728340149, "logits/rejected": -0.7838965058326721, "logps/chosen": -0.27961477637290955, "logps/rejected": -0.3812497854232788, "loss": 4.0085, "nll_loss": 0.887802243232727, "rewards/accuracies": 0.625, "rewards/chosen": -0.027961477637290955, "rewards/margins": 0.010163499042391777, "rewards/rejected": -0.03812497854232788, "step": 443 }, { "epoch": 0.3070539419087137, "grad_norm": 4.7465362548828125, "learning_rate": 1.5352697095435685e-05, "log_odds_chosen": 2.2108235359191895, "log_odds_ratio": -0.5854645371437073, "logits/chosen": -0.40235084295272827, "logits/rejected": -0.43530935049057007, "logps/chosen": -0.2257867455482483, "logps/rejected": -0.4482637047767639, "loss": 5.0511, "nll_loss": 1.204227328300476, "rewards/accuracies": 0.625, "rewards/chosen": -0.02257867529988289, "rewards/margins": 0.02224769815802574, "rewards/rejected": -0.04482637345790863, "step": 444 }, { "epoch": 0.30774550484094054, "grad_norm": 5.645203113555908, "learning_rate": 1.5387275242047026e-05, "log_odds_chosen": 2.5590360164642334, "log_odds_ratio": -0.5274600982666016, "logits/chosen": -0.9219954013824463, "logits/rejected": -0.8904546499252319, "logps/chosen": -0.13450196385383606, "logps/rejected": -0.6550885438919067, "loss": 7.0305, "nll_loss": 1.7048795223236084, "rewards/accuracies": 0.75, "rewards/chosen": -0.013450197875499725, "rewards/margins": 0.052058663219213486, "rewards/rejected": -0.06550885736942291, "step": 445 }, { "epoch": 0.3084370677731674, "grad_norm": 16.25078010559082, "learning_rate": 1.5421853388658368e-05, "log_odds_chosen": 3.1059811115264893, "log_odds_ratio": -0.7972801923751831, "logits/chosen": -0.3910766839981079, "logits/rejected": -0.4266047775745392, "logps/chosen": -0.13340765237808228, "logps/rejected": -0.6035365462303162, "loss": 3.8148, "nll_loss": 0.873967707157135, "rewards/accuracies": 0.75, "rewards/chosen": -0.013340767472982407, "rewards/margins": 0.04701288416981697, "rewards/rejected": -0.06035365164279938, "step": 446 }, { "epoch": 0.3091286307053942, "grad_norm": 5.219182014465332, "learning_rate": 1.545643153526971e-05, "log_odds_chosen": 1.8788645267486572, "log_odds_ratio": -0.4780917465686798, "logits/chosen": -0.788673996925354, "logits/rejected": -0.8205811381340027, "logps/chosen": -0.11047720164060593, "logps/rejected": -0.3218545913696289, "loss": 4.8663, "nll_loss": 1.1687740087509155, "rewards/accuracies": 0.75, "rewards/chosen": -0.011047719977796078, "rewards/margins": 0.021137740463018417, "rewards/rejected": -0.03218546137213707, "step": 447 }, { "epoch": 0.30982019363762103, "grad_norm": 4.042296886444092, "learning_rate": 1.549100968188105e-05, "log_odds_chosen": 2.074093818664551, "log_odds_ratio": -0.3665195107460022, "logits/chosen": -0.6632601022720337, "logits/rejected": -0.7092807292938232, "logps/chosen": -0.10485205054283142, "logps/rejected": -0.4138421416282654, "loss": 4.8116, "nll_loss": 1.1662427186965942, "rewards/accuracies": 0.75, "rewards/chosen": -0.010485205799341202, "rewards/margins": 0.030899008736014366, "rewards/rejected": -0.04138421267271042, "step": 448 }, { "epoch": 0.31051175656984786, "grad_norm": 3.6253623962402344, "learning_rate": 1.5525587828492392e-05, "log_odds_chosen": 1.5242533683776855, "log_odds_ratio": -0.42981696128845215, "logits/chosen": -0.6863400340080261, "logits/rejected": -0.679867684841156, "logps/chosen": -0.151654452085495, "logps/rejected": -0.3692028224468231, "loss": 5.0138, "nll_loss": 1.2104747295379639, "rewards/accuracies": 0.75, "rewards/chosen": -0.015165446326136589, "rewards/margins": 0.021754834800958633, "rewards/rejected": -0.03692027926445007, "step": 449 }, { "epoch": 0.3112033195020747, "grad_norm": 5.387983322143555, "learning_rate": 1.5560165975103734e-05, "log_odds_chosen": 1.390764594078064, "log_odds_ratio": -0.6059899926185608, "logits/chosen": -1.0066139698028564, "logits/rejected": -0.9922845363616943, "logps/chosen": -0.167924165725708, "logps/rejected": -0.27415433526039124, "loss": 3.7726, "nll_loss": 0.8825591802597046, "rewards/accuracies": 0.625, "rewards/chosen": -0.0167924165725708, "rewards/margins": 0.010623017325997353, "rewards/rejected": -0.027415433898568153, "step": 450 }, { "epoch": 0.3118948824343015, "grad_norm": 6.010692596435547, "learning_rate": 1.5594744121715076e-05, "log_odds_chosen": 2.2689998149871826, "log_odds_ratio": -0.353574275970459, "logits/chosen": -1.0005192756652832, "logits/rejected": -0.9740056991577148, "logps/chosen": -0.11832346022129059, "logps/rejected": -0.49686843156814575, "loss": 6.1829, "nll_loss": 1.5103559494018555, "rewards/accuracies": 0.875, "rewards/chosen": -0.011832346208393574, "rewards/margins": 0.03785449266433716, "rewards/rejected": -0.049686841666698456, "step": 451 }, { "epoch": 0.31258644536652835, "grad_norm": 5.422815799713135, "learning_rate": 1.5629322268326417e-05, "log_odds_chosen": 1.0189049243927002, "log_odds_ratio": -0.6779848337173462, "logits/chosen": -0.7761744260787964, "logits/rejected": -0.7803124189376831, "logps/chosen": -0.23633910715579987, "logps/rejected": -0.2328052967786789, "loss": 4.2235, "nll_loss": 0.988079845905304, "rewards/accuracies": 0.625, "rewards/chosen": -0.023633908480405807, "rewards/margins": -0.0003533794078975916, "rewards/rejected": -0.023280533030629158, "step": 452 }, { "epoch": 0.3132780082987552, "grad_norm": 4.928466796875, "learning_rate": 1.566390041493776e-05, "log_odds_chosen": 0.30510926246643066, "log_odds_ratio": -0.697333812713623, "logits/chosen": -0.7703343629837036, "logits/rejected": -0.7511740922927856, "logps/chosen": -0.19591832160949707, "logps/rejected": -0.2541714906692505, "loss": 6.5869, "nll_loss": 1.5769838094711304, "rewards/accuracies": 0.5, "rewards/chosen": -0.019591832533478737, "rewards/margins": 0.005825321190059185, "rewards/rejected": -0.025417150929570198, "step": 453 }, { "epoch": 0.313969571230982, "grad_norm": 3.4888973236083984, "learning_rate": 1.56984785615491e-05, "log_odds_chosen": 0.9905411601066589, "log_odds_ratio": -0.6393153667449951, "logits/chosen": -0.6037004590034485, "logits/rejected": -0.6011282801628113, "logps/chosen": -0.18709006905555725, "logps/rejected": -0.24275116622447968, "loss": 3.9967, "nll_loss": 0.9352476596832275, "rewards/accuracies": 0.5, "rewards/chosen": -0.018709007650613785, "rewards/margins": 0.005566108971834183, "rewards/rejected": -0.024275116622447968, "step": 454 }, { "epoch": 0.31466113416320884, "grad_norm": 3.14521861076355, "learning_rate": 1.5733056708160442e-05, "log_odds_chosen": 1.300163984298706, "log_odds_ratio": -0.6069784164428711, "logits/chosen": -0.6802898645401001, "logits/rejected": -0.6661124229431152, "logps/chosen": -0.09800460934638977, "logps/rejected": -0.19043129682540894, "loss": 3.8805, "nll_loss": 0.909426212310791, "rewards/accuracies": 0.75, "rewards/chosen": -0.009800462052226067, "rewards/margins": 0.009242668747901917, "rewards/rejected": -0.019043128937482834, "step": 455 }, { "epoch": 0.3153526970954357, "grad_norm": 5.763227939605713, "learning_rate": 1.5767634854771783e-05, "log_odds_chosen": 0.5036981701850891, "log_odds_ratio": -0.6302422285079956, "logits/chosen": -0.885701060295105, "logits/rejected": -0.8422713875770569, "logps/chosen": -0.08884412050247192, "logps/rejected": -0.15536990761756897, "loss": 4.7726, "nll_loss": 1.1301307678222656, "rewards/accuracies": 0.75, "rewards/chosen": -0.008884412236511707, "rewards/margins": 0.006652578711509705, "rewards/rejected": -0.015536990016698837, "step": 456 }, { "epoch": 0.3160442600276625, "grad_norm": 3.6244726181030273, "learning_rate": 1.5802213001383125e-05, "log_odds_chosen": -0.06441202759742737, "log_odds_ratio": -0.7889564037322998, "logits/chosen": -0.7942500114440918, "logits/rejected": -0.7812097072601318, "logps/chosen": -0.15385322272777557, "logps/rejected": -0.15002594888210297, "loss": 4.216, "nll_loss": 0.9750944972038269, "rewards/accuracies": 0.375, "rewards/chosen": -0.015385321341454983, "rewards/margins": -0.0003827265463769436, "rewards/rejected": -0.015002595260739326, "step": 457 }, { "epoch": 0.31673582295988933, "grad_norm": 8.510712623596191, "learning_rate": 1.5836791147994467e-05, "log_odds_chosen": 2.3712868690490723, "log_odds_ratio": -0.5050680041313171, "logits/chosen": -0.6574930548667908, "logits/rejected": -0.6933423280715942, "logps/chosen": -0.1535794585943222, "logps/rejected": -0.4186011552810669, "loss": 7.2306, "nll_loss": 1.757137656211853, "rewards/accuracies": 0.5, "rewards/chosen": -0.01535794697701931, "rewards/margins": 0.02650216966867447, "rewards/rejected": -0.04186011478304863, "step": 458 }, { "epoch": 0.31742738589211617, "grad_norm": 6.765665531158447, "learning_rate": 1.5871369294605808e-05, "log_odds_chosen": 0.1631660908460617, "log_odds_ratio": -0.7178246378898621, "logits/chosen": -1.0220084190368652, "logits/rejected": -1.027510643005371, "logps/chosen": -0.1926945149898529, "logps/rejected": -0.2280651032924652, "loss": 5.968, "nll_loss": 1.4202252626419067, "rewards/accuracies": 0.5, "rewards/chosen": -0.01926945522427559, "rewards/margins": 0.003537057200446725, "rewards/rejected": -0.02280651032924652, "step": 459 }, { "epoch": 0.318118948824343, "grad_norm": 6.357319355010986, "learning_rate": 1.590594744121715e-05, "log_odds_chosen": 1.1054508686065674, "log_odds_ratio": -0.6838944554328918, "logits/chosen": -1.1939641237258911, "logits/rejected": -1.2263739109039307, "logps/chosen": -0.10918106883764267, "logps/rejected": -0.3688642978668213, "loss": 6.5195, "nll_loss": 1.5614843368530273, "rewards/accuracies": 0.625, "rewards/chosen": -0.010918107815086842, "rewards/margins": 0.02596832811832428, "rewards/rejected": -0.03688643127679825, "step": 460 }, { "epoch": 0.3188105117565698, "grad_norm": 3.7231040000915527, "learning_rate": 1.594052558782849e-05, "log_odds_chosen": 2.0059876441955566, "log_odds_ratio": -0.4559151232242584, "logits/chosen": -0.224339097738266, "logits/rejected": -0.22532802820205688, "logps/chosen": -0.10951988399028778, "logps/rejected": -0.24661627411842346, "loss": 3.3677, "nll_loss": 0.7963347434997559, "rewards/accuracies": 0.875, "rewards/chosen": -0.010951988399028778, "rewards/margins": 0.013709638267755508, "rewards/rejected": -0.024661626666784286, "step": 461 }, { "epoch": 0.31950207468879666, "grad_norm": 6.247403621673584, "learning_rate": 1.5975103734439833e-05, "log_odds_chosen": 1.5408384799957275, "log_odds_ratio": -0.5715996623039246, "logits/chosen": -0.9309489130973816, "logits/rejected": -0.9221597909927368, "logps/chosen": -0.19324921071529388, "logps/rejected": -0.38850072026252747, "loss": 5.1677, "nll_loss": 1.23475980758667, "rewards/accuracies": 0.75, "rewards/chosen": -0.01932491920888424, "rewards/margins": 0.019525151699781418, "rewards/rejected": -0.038850072771310806, "step": 462 }, { "epoch": 0.3201936376210235, "grad_norm": 4.389791965484619, "learning_rate": 1.6009681881051174e-05, "log_odds_chosen": 1.7772701978683472, "log_odds_ratio": -0.446536660194397, "logits/chosen": -1.0960631370544434, "logits/rejected": -1.1086949110031128, "logps/chosen": -0.11626395583152771, "logps/rejected": -0.38875579833984375, "loss": 5.5352, "nll_loss": 1.3391516208648682, "rewards/accuracies": 0.875, "rewards/chosen": -0.01162639632821083, "rewards/margins": 0.027249179780483246, "rewards/rejected": -0.03887557610869408, "step": 463 }, { "epoch": 0.32088520055325037, "grad_norm": 4.312671661376953, "learning_rate": 1.604426002766252e-05, "log_odds_chosen": 0.7777257561683655, "log_odds_ratio": -0.5274251103401184, "logits/chosen": -1.0016077756881714, "logits/rejected": -1.0476646423339844, "logps/chosen": -0.42495298385620117, "logps/rejected": -0.5335454940795898, "loss": 4.4051, "nll_loss": 1.0485405921936035, "rewards/accuracies": 0.625, "rewards/chosen": -0.04249529913067818, "rewards/margins": 0.01085924543440342, "rewards/rejected": -0.053354546427726746, "step": 464 }, { "epoch": 0.3215767634854772, "grad_norm": 3.821538209915161, "learning_rate": 1.607883817427386e-05, "log_odds_chosen": 1.589055061340332, "log_odds_ratio": -0.5532294511795044, "logits/chosen": -0.7510684728622437, "logits/rejected": -0.753553569316864, "logps/chosen": -0.0831575095653534, "logps/rejected": -0.31230461597442627, "loss": 3.5947, "nll_loss": 0.8433531522750854, "rewards/accuracies": 0.625, "rewards/chosen": -0.00831575132906437, "rewards/margins": 0.022914709523320198, "rewards/rejected": -0.031230460852384567, "step": 465 }, { "epoch": 0.32226832641770403, "grad_norm": 4.866796493530273, "learning_rate": 1.6113416320885202e-05, "log_odds_chosen": 2.049978017807007, "log_odds_ratio": -0.5020076036453247, "logits/chosen": -1.1018893718719482, "logits/rejected": -1.086807131767273, "logps/chosen": -0.17358484864234924, "logps/rejected": -0.4650211036205292, "loss": 4.7312, "nll_loss": 1.1325989961624146, "rewards/accuracies": 0.75, "rewards/chosen": -0.017358483746647835, "rewards/margins": 0.029143624007701874, "rewards/rejected": -0.04650210589170456, "step": 466 }, { "epoch": 0.32295988934993086, "grad_norm": 4.3561906814575195, "learning_rate": 1.6147994467496544e-05, "log_odds_chosen": 1.2381858825683594, "log_odds_ratio": -0.5811147689819336, "logits/chosen": -1.0292110443115234, "logits/rejected": -1.0253915786743164, "logps/chosen": -0.21870014071464539, "logps/rejected": -0.46153783798217773, "loss": 4.2517, "nll_loss": 1.004812240600586, "rewards/accuracies": 0.625, "rewards/chosen": -0.021870015189051628, "rewards/margins": 0.024283768609166145, "rewards/rejected": -0.04615378752350807, "step": 467 }, { "epoch": 0.3236514522821577, "grad_norm": 6.4525017738342285, "learning_rate": 1.6182572614107886e-05, "log_odds_chosen": 1.469836950302124, "log_odds_ratio": -0.6502451300621033, "logits/chosen": -1.014503002166748, "logits/rejected": -0.9917067289352417, "logps/chosen": -0.2316511869430542, "logps/rejected": -0.4777286648750305, "loss": 6.1528, "nll_loss": 1.473185420036316, "rewards/accuracies": 0.625, "rewards/chosen": -0.02316511794924736, "rewards/margins": 0.024607747793197632, "rewards/rejected": -0.04777286574244499, "step": 468 }, { "epoch": 0.3243430152143845, "grad_norm": 5.878066539764404, "learning_rate": 1.6217150760719227e-05, "log_odds_chosen": 1.0376574993133545, "log_odds_ratio": -0.5789130926132202, "logits/chosen": -1.0515923500061035, "logits/rejected": -1.0690171718597412, "logps/chosen": -0.24930287897586823, "logps/rejected": -0.3463584780693054, "loss": 4.356, "nll_loss": 1.0311205387115479, "rewards/accuracies": 0.75, "rewards/chosen": -0.024930287152528763, "rewards/margins": 0.009705559350550175, "rewards/rejected": -0.03463584929704666, "step": 469 }, { "epoch": 0.32503457814661135, "grad_norm": 3.6630163192749023, "learning_rate": 1.625172890733057e-05, "log_odds_chosen": 1.750572919845581, "log_odds_ratio": -0.36723726987838745, "logits/chosen": -0.8682456016540527, "logits/rejected": -0.8956509828567505, "logps/chosen": -0.17868489027023315, "logps/rejected": -0.36978819966316223, "loss": 4.0217, "nll_loss": 0.968694806098938, "rewards/accuracies": 0.875, "rewards/chosen": -0.017868489027023315, "rewards/margins": 0.01911032944917679, "rewards/rejected": -0.036978818476200104, "step": 470 }, { "epoch": 0.3257261410788382, "grad_norm": 4.763913631439209, "learning_rate": 1.628630705394191e-05, "log_odds_chosen": 0.9818230271339417, "log_odds_ratio": -0.5156347751617432, "logits/chosen": -0.7395144701004028, "logits/rejected": -0.7224610447883606, "logps/chosen": -0.1149940937757492, "logps/rejected": -0.23291422426700592, "loss": 4.7129, "nll_loss": 1.126656413078308, "rewards/accuracies": 0.625, "rewards/chosen": -0.011499409563839436, "rewards/margins": 0.011792012490332127, "rewards/rejected": -0.023291420191526413, "step": 471 }, { "epoch": 0.326417704011065, "grad_norm": 5.32498836517334, "learning_rate": 1.6320885200553252e-05, "log_odds_chosen": 0.628685712814331, "log_odds_ratio": -0.44037503004074097, "logits/chosen": -1.0388284921646118, "logits/rejected": -1.0592763423919678, "logps/chosen": -0.18162651360034943, "logps/rejected": -0.32462698221206665, "loss": 6.1673, "nll_loss": 1.4977924823760986, "rewards/accuracies": 1.0, "rewards/chosen": -0.018162650987505913, "rewards/margins": 0.014300045557320118, "rewards/rejected": -0.032462697476148605, "step": 472 }, { "epoch": 0.32710926694329184, "grad_norm": 4.730746269226074, "learning_rate": 1.6355463347164593e-05, "log_odds_chosen": 2.820221424102783, "log_odds_ratio": -0.34589338302612305, "logits/chosen": -0.8554526567459106, "logits/rejected": -0.8768529891967773, "logps/chosen": -0.10701534152030945, "logps/rejected": -0.7232824563980103, "loss": 4.4983, "nll_loss": 1.0899972915649414, "rewards/accuracies": 0.75, "rewards/chosen": -0.01070153433829546, "rewards/margins": 0.06162671372294426, "rewards/rejected": -0.07232824712991714, "step": 473 }, { "epoch": 0.3278008298755187, "grad_norm": 5.415292263031006, "learning_rate": 1.6390041493775935e-05, "log_odds_chosen": 1.2005585432052612, "log_odds_ratio": -0.6597185730934143, "logits/chosen": -0.5219282507896423, "logits/rejected": -0.523371160030365, "logps/chosen": -0.1712622493505478, "logps/rejected": -0.28718945384025574, "loss": 5.7351, "nll_loss": 1.3677911758422852, "rewards/accuracies": 0.625, "rewards/chosen": -0.01712622493505478, "rewards/margins": 0.011592721566557884, "rewards/rejected": -0.028718946501612663, "step": 474 }, { "epoch": 0.3284923928077455, "grad_norm": 6.83495569229126, "learning_rate": 1.6424619640387277e-05, "log_odds_chosen": 1.5683974027633667, "log_odds_ratio": -0.325946569442749, "logits/chosen": -0.7668761610984802, "logits/rejected": -0.8554536700248718, "logps/chosen": -0.07481614500284195, "logps/rejected": -0.23265601694583893, "loss": 3.3869, "nll_loss": 0.8141358494758606, "rewards/accuracies": 1.0, "rewards/chosen": -0.007481614127755165, "rewards/margins": 0.015783987939357758, "rewards/rejected": -0.023265602067112923, "step": 475 }, { "epoch": 0.32918395573997233, "grad_norm": 5.765621662139893, "learning_rate": 1.6459197786998618e-05, "log_odds_chosen": 0.0016551315784454346, "log_odds_ratio": -0.9704076647758484, "logits/chosen": -0.8410443067550659, "logits/rejected": -0.8325227499008179, "logps/chosen": -0.1430882066488266, "logps/rejected": -0.13438117504119873, "loss": 4.226, "nll_loss": 0.959465503692627, "rewards/accuracies": 0.5, "rewards/chosen": -0.014308820478618145, "rewards/margins": -0.000870703486725688, "rewards/rejected": -0.013438117690384388, "step": 476 }, { "epoch": 0.32987551867219916, "grad_norm": 4.890822887420654, "learning_rate": 1.649377593360996e-05, "log_odds_chosen": 1.6676287651062012, "log_odds_ratio": -0.2915194034576416, "logits/chosen": -0.7431758642196655, "logits/rejected": -0.7679858803749084, "logps/chosen": -0.1129474937915802, "logps/rejected": -0.3145168125629425, "loss": 4.9774, "nll_loss": 1.2151939868927002, "rewards/accuracies": 0.875, "rewards/chosen": -0.011294749565422535, "rewards/margins": 0.02015693299472332, "rewards/rejected": -0.03145168349146843, "step": 477 }, { "epoch": 0.330567081604426, "grad_norm": 4.508111953735352, "learning_rate": 1.65283540802213e-05, "log_odds_chosen": 1.216369867324829, "log_odds_ratio": -0.42188069224357605, "logits/chosen": -1.0423190593719482, "logits/rejected": -1.047698974609375, "logps/chosen": -0.10603365302085876, "logps/rejected": -0.3019612729549408, "loss": 4.6539, "nll_loss": 1.121298909187317, "rewards/accuracies": 0.75, "rewards/chosen": -0.01060336735099554, "rewards/margins": 0.019592760130763054, "rewards/rejected": -0.03019612841308117, "step": 478 }, { "epoch": 0.3312586445366528, "grad_norm": 3.8689982891082764, "learning_rate": 1.6562932226832643e-05, "log_odds_chosen": 1.1521224975585938, "log_odds_ratio": -0.3827163279056549, "logits/chosen": -0.7599689364433289, "logits/rejected": -0.7738473415374756, "logps/chosen": -0.11503064632415771, "logps/rejected": -0.32514065504074097, "loss": 4.676, "nll_loss": 1.1307350397109985, "rewards/accuracies": 0.875, "rewards/chosen": -0.011503065004944801, "rewards/margins": 0.021010998636484146, "rewards/rejected": -0.0325140655040741, "step": 479 }, { "epoch": 0.33195020746887965, "grad_norm": 6.303074359893799, "learning_rate": 1.6597510373443984e-05, "log_odds_chosen": 1.5216987133026123, "log_odds_ratio": -0.578852117061615, "logits/chosen": -0.43270474672317505, "logits/rejected": -0.4453016519546509, "logps/chosen": -0.15122368931770325, "logps/rejected": -0.26229071617126465, "loss": 4.962, "nll_loss": 1.1826035976409912, "rewards/accuracies": 0.625, "rewards/chosen": -0.015122368931770325, "rewards/margins": 0.01110670156776905, "rewards/rejected": -0.026229072362184525, "step": 480 }, { "epoch": 0.3326417704011065, "grad_norm": 4.696047306060791, "learning_rate": 1.6632088520055326e-05, "log_odds_chosen": 1.1306276321411133, "log_odds_ratio": -0.5183489322662354, "logits/chosen": -1.0037342309951782, "logits/rejected": -1.044380784034729, "logps/chosen": -0.1369670182466507, "logps/rejected": -0.2618962526321411, "loss": 4.4882, "nll_loss": 1.0702087879180908, "rewards/accuracies": 0.75, "rewards/chosen": -0.013696704059839249, "rewards/margins": 0.012492923997342587, "rewards/rejected": -0.02618962712585926, "step": 481 }, { "epoch": 0.3333333333333333, "grad_norm": 4.115095615386963, "learning_rate": 1.6666666666666667e-05, "log_odds_chosen": 1.7002249956130981, "log_odds_ratio": -0.3261849284172058, "logits/chosen": -0.8328145146369934, "logits/rejected": -0.7903501391410828, "logps/chosen": -0.10042671114206314, "logps/rejected": -0.33854255080223083, "loss": 4.6884, "nll_loss": 1.1394823789596558, "rewards/accuracies": 0.75, "rewards/chosen": -0.010042671114206314, "rewards/margins": 0.02381158620119095, "rewards/rejected": -0.03385425731539726, "step": 482 }, { "epoch": 0.33402489626556015, "grad_norm": 5.309091567993164, "learning_rate": 1.670124481327801e-05, "log_odds_chosen": -0.06614989042282104, "log_odds_ratio": -0.7974898219108582, "logits/chosen": -0.9340482354164124, "logits/rejected": -0.9295551180839539, "logps/chosen": -0.1734018474817276, "logps/rejected": -0.1410864144563675, "loss": 4.9642, "nll_loss": 1.161311149597168, "rewards/accuracies": 0.5, "rewards/chosen": -0.01734018512070179, "rewards/margins": -0.0032315438147634268, "rewards/rejected": -0.014108642935752869, "step": 483 }, { "epoch": 0.334716459197787, "grad_norm": 5.502237796783447, "learning_rate": 1.673582295988935e-05, "log_odds_chosen": 1.4345967769622803, "log_odds_ratio": -0.4077805280685425, "logits/chosen": -0.9030207395553589, "logits/rejected": -0.9316298961639404, "logps/chosen": -0.15604329109191895, "logps/rejected": -0.4233133792877197, "loss": 6.3053, "nll_loss": 1.5355592966079712, "rewards/accuracies": 0.625, "rewards/chosen": -0.01560432743281126, "rewards/margins": 0.026727013289928436, "rewards/rejected": -0.04233134165406227, "step": 484 }, { "epoch": 0.3354080221300138, "grad_norm": 4.085949420928955, "learning_rate": 1.6770401106500692e-05, "log_odds_chosen": 0.6022038459777832, "log_odds_ratio": -0.5693349242210388, "logits/chosen": -0.7707476019859314, "logits/rejected": -0.7946466207504272, "logps/chosen": -0.12312422692775726, "logps/rejected": -0.3257462978363037, "loss": 5.4492, "nll_loss": 1.3053703308105469, "rewards/accuracies": 0.625, "rewards/chosen": -0.012312421575188637, "rewards/margins": 0.020262207835912704, "rewards/rejected": -0.03257462754845619, "step": 485 }, { "epoch": 0.3360995850622407, "grad_norm": 5.308136463165283, "learning_rate": 1.6804979253112034e-05, "log_odds_chosen": 0.49736395478248596, "log_odds_ratio": -0.5352747440338135, "logits/chosen": -0.538204550743103, "logits/rejected": -0.5477010607719421, "logps/chosen": -0.18097350001335144, "logps/rejected": -0.27122586965560913, "loss": 4.8144, "nll_loss": 1.1500778198242188, "rewards/accuracies": 0.75, "rewards/chosen": -0.018097348511219025, "rewards/margins": 0.00902523659169674, "rewards/rejected": -0.027122585102915764, "step": 486 }, { "epoch": 0.3367911479944675, "grad_norm": 6.017159938812256, "learning_rate": 1.6839557399723375e-05, "log_odds_chosen": 2.9066498279571533, "log_odds_ratio": -0.3275456428527832, "logits/chosen": -0.9392266273498535, "logits/rejected": -0.958389401435852, "logps/chosen": -0.19047018885612488, "logps/rejected": -0.7778450846672058, "loss": 5.6669, "nll_loss": 1.3839747905731201, "rewards/accuracies": 0.875, "rewards/chosen": -0.019047021865844727, "rewards/margins": 0.058737486600875854, "rewards/rejected": -0.07778450846672058, "step": 487 }, { "epoch": 0.33748271092669435, "grad_norm": 8.302292823791504, "learning_rate": 1.6874135546334717e-05, "log_odds_chosen": 0.5107730031013489, "log_odds_ratio": -1.322838544845581, "logits/chosen": -0.8151763081550598, "logits/rejected": -0.7991777062416077, "logps/chosen": -0.42283517122268677, "logps/rejected": -0.34093451499938965, "loss": 4.967, "nll_loss": 1.1094615459442139, "rewards/accuracies": 0.625, "rewards/chosen": -0.04228351637721062, "rewards/margins": -0.008190065622329712, "rewards/rejected": -0.034093454480171204, "step": 488 }, { "epoch": 0.3381742738589212, "grad_norm": 3.8961689472198486, "learning_rate": 1.690871369294606e-05, "log_odds_chosen": 2.57572603225708, "log_odds_ratio": -0.43755820393562317, "logits/chosen": -0.8054549694061279, "logits/rejected": -0.8180067539215088, "logps/chosen": -0.06617649644613266, "logps/rejected": -0.4114043116569519, "loss": 4.2428, "nll_loss": 1.016952395439148, "rewards/accuracies": 0.625, "rewards/chosen": -0.006617650389671326, "rewards/margins": 0.034522779285907745, "rewards/rejected": -0.04114042967557907, "step": 489 }, { "epoch": 0.338865836791148, "grad_norm": 7.237804889678955, "learning_rate": 1.69432918395574e-05, "log_odds_chosen": 1.2616724967956543, "log_odds_ratio": -0.6169224381446838, "logits/chosen": -1.0781421661376953, "logits/rejected": -1.076696515083313, "logps/chosen": -0.13765141367912292, "logps/rejected": -0.36732688546180725, "loss": 6.1128, "nll_loss": 1.4665086269378662, "rewards/accuracies": 0.75, "rewards/chosen": -0.013765140436589718, "rewards/margins": 0.022967549040913582, "rewards/rejected": -0.036732688546180725, "step": 490 }, { "epoch": 0.33955739972337484, "grad_norm": 3.6425857543945312, "learning_rate": 1.697786998616874e-05, "log_odds_chosen": 1.7106664180755615, "log_odds_ratio": -0.46601614356040955, "logits/chosen": -1.2055742740631104, "logits/rejected": -1.2279021739959717, "logps/chosen": -0.1318834125995636, "logps/rejected": -0.2605992257595062, "loss": 5.4442, "nll_loss": 1.3144505023956299, "rewards/accuracies": 0.75, "rewards/chosen": -0.01318834163248539, "rewards/margins": 0.012871582061052322, "rewards/rejected": -0.02605992555618286, "step": 491 }, { "epoch": 0.34024896265560167, "grad_norm": 5.997433185577393, "learning_rate": 1.7012448132780083e-05, "log_odds_chosen": 1.4230337142944336, "log_odds_ratio": -0.8804874420166016, "logits/chosen": -0.6543467044830322, "logits/rejected": -0.6305510401725769, "logps/chosen": -0.1759967803955078, "logps/rejected": -0.5112686157226562, "loss": 4.3876, "nll_loss": 1.008862018585205, "rewards/accuracies": 0.5, "rewards/chosen": -0.0175996795296669, "rewards/margins": 0.033527180552482605, "rewards/rejected": -0.051126863807439804, "step": 492 }, { "epoch": 0.3409405255878285, "grad_norm": 5.290637969970703, "learning_rate": 1.7047026279391425e-05, "log_odds_chosen": 1.3665871620178223, "log_odds_ratio": -0.6540405750274658, "logits/chosen": -0.7667418122291565, "logits/rejected": -0.7568073272705078, "logps/chosen": -0.22884142398834229, "logps/rejected": -0.5813363194465637, "loss": 4.7473, "nll_loss": 1.1214134693145752, "rewards/accuracies": 0.5, "rewards/chosen": -0.02288414165377617, "rewards/margins": 0.0352494940161705, "rewards/rejected": -0.05813363194465637, "step": 493 }, { "epoch": 0.34163208852005533, "grad_norm": 4.3733720779418945, "learning_rate": 1.7081604426002766e-05, "log_odds_chosen": 2.179429054260254, "log_odds_ratio": -0.53110271692276, "logits/chosen": -1.1724821329116821, "logits/rejected": -1.217232346534729, "logps/chosen": -0.1547495275735855, "logps/rejected": -0.5294268727302551, "loss": 5.0789, "nll_loss": 1.216625452041626, "rewards/accuracies": 0.625, "rewards/chosen": -0.015474953688681126, "rewards/margins": 0.03746773302555084, "rewards/rejected": -0.05294268578290939, "step": 494 }, { "epoch": 0.34232365145228216, "grad_norm": 4.039161682128906, "learning_rate": 1.7116182572614108e-05, "log_odds_chosen": 1.4472264051437378, "log_odds_ratio": -0.47876089811325073, "logits/chosen": -1.058305263519287, "logits/rejected": -1.0702276229858398, "logps/chosen": -0.1727418750524521, "logps/rejected": -0.3430398106575012, "loss": 4.4938, "nll_loss": 1.0755620002746582, "rewards/accuracies": 0.75, "rewards/chosen": -0.01727418787777424, "rewards/margins": 0.017029793933033943, "rewards/rejected": -0.03430397808551788, "step": 495 }, { "epoch": 0.343015214384509, "grad_norm": 4.610265731811523, "learning_rate": 1.715076071922545e-05, "log_odds_chosen": 2.3182146549224854, "log_odds_ratio": -0.28784000873565674, "logits/chosen": -0.9007983803749084, "logits/rejected": -0.8986612558364868, "logps/chosen": -0.1349295973777771, "logps/rejected": -0.44597327709198, "loss": 4.6976, "nll_loss": 1.1456117630004883, "rewards/accuracies": 0.875, "rewards/chosen": -0.013492961414158344, "rewards/margins": 0.031104369089007378, "rewards/rejected": -0.044597327709198, "step": 496 }, { "epoch": 0.3437067773167358, "grad_norm": 7.051185131072998, "learning_rate": 1.718533886583679e-05, "log_odds_chosen": 1.7162365913391113, "log_odds_ratio": -1.1342723369598389, "logits/chosen": -1.022646188735962, "logits/rejected": -1.019441843032837, "logps/chosen": -0.3917577266693115, "logps/rejected": -0.5872882008552551, "loss": 4.6114, "nll_loss": 1.039417028427124, "rewards/accuracies": 0.5, "rewards/chosen": -0.03917577490210533, "rewards/margins": 0.0195530503988266, "rewards/rejected": -0.05872882157564163, "step": 497 }, { "epoch": 0.34439834024896265, "grad_norm": 7.3869757652282715, "learning_rate": 1.7219917012448132e-05, "log_odds_chosen": 0.07555952668190002, "log_odds_ratio": -0.8538734912872314, "logits/chosen": -0.8446725606918335, "logits/rejected": -0.8641011714935303, "logps/chosen": -0.23065711557865143, "logps/rejected": -0.21285629272460938, "loss": 6.8209, "nll_loss": 1.6198467016220093, "rewards/accuracies": 0.625, "rewards/chosen": -0.023065710440278053, "rewards/margins": -0.0017800810746848583, "rewards/rejected": -0.021285628899931908, "step": 498 }, { "epoch": 0.3450899031811895, "grad_norm": 4.562398433685303, "learning_rate": 1.7254495159059474e-05, "log_odds_chosen": 1.819996953010559, "log_odds_ratio": -0.4225596487522125, "logits/chosen": -0.8365169763565063, "logits/rejected": -0.8637485504150391, "logps/chosen": -0.13593707978725433, "logps/rejected": -0.42044973373413086, "loss": 4.9074, "nll_loss": 1.1845835447311401, "rewards/accuracies": 0.75, "rewards/chosen": -0.013593706302344799, "rewards/margins": 0.028451265767216682, "rewards/rejected": -0.04204497113823891, "step": 499 }, { "epoch": 0.3457814661134163, "grad_norm": 4.474560260772705, "learning_rate": 1.7289073305670816e-05, "log_odds_chosen": 0.9567031860351562, "log_odds_ratio": -0.42330771684646606, "logits/chosen": -1.1170122623443604, "logits/rejected": -1.1414347887039185, "logps/chosen": -0.13176178932189941, "logps/rejected": -0.3465428054332733, "loss": 5.3409, "nll_loss": 1.292906641960144, "rewards/accuracies": 0.875, "rewards/chosen": -0.01317618042230606, "rewards/margins": 0.02147809974849224, "rewards/rejected": -0.03465428203344345, "step": 500 }, { "epoch": 0.34647302904564314, "grad_norm": 5.560184955596924, "learning_rate": 1.7323651452282157e-05, "log_odds_chosen": 2.8556575775146484, "log_odds_ratio": -0.28292903304100037, "logits/chosen": -0.7554357051849365, "logits/rejected": -0.7841203212738037, "logps/chosen": -0.14785116910934448, "logps/rejected": -0.6094825267791748, "loss": 6.0501, "nll_loss": 1.484229564666748, "rewards/accuracies": 0.875, "rewards/chosen": -0.014785117469727993, "rewards/margins": 0.04616313427686691, "rewards/rejected": -0.060948245227336884, "step": 501 }, { "epoch": 0.34716459197787, "grad_norm": 4.023012161254883, "learning_rate": 1.73582295988935e-05, "log_odds_chosen": 1.4860143661499023, "log_odds_ratio": -0.3626514673233032, "logits/chosen": -0.9559276103973389, "logits/rejected": -0.9815624952316284, "logps/chosen": -0.11361101269721985, "logps/rejected": -0.34690365195274353, "loss": 5.1345, "nll_loss": 1.2473585605621338, "rewards/accuracies": 0.75, "rewards/chosen": -0.011361101642251015, "rewards/margins": 0.02332925982773304, "rewards/rejected": -0.034690361469984055, "step": 502 }, { "epoch": 0.3478561549100968, "grad_norm": 3.716696262359619, "learning_rate": 1.739280774550484e-05, "log_odds_chosen": 1.6061586141586304, "log_odds_ratio": -0.38605138659477234, "logits/chosen": -0.9349660873413086, "logits/rejected": -0.9320093393325806, "logps/chosen": -0.14063169062137604, "logps/rejected": -0.38299134373664856, "loss": 5.7973, "nll_loss": 1.410730004310608, "rewards/accuracies": 0.75, "rewards/chosen": -0.014063170179724693, "rewards/margins": 0.02423596754670143, "rewards/rejected": -0.038299135863780975, "step": 503 }, { "epoch": 0.34854771784232363, "grad_norm": 4.08083438873291, "learning_rate": 1.7427385892116182e-05, "log_odds_chosen": 1.4298001527786255, "log_odds_ratio": -0.4044135808944702, "logits/chosen": -1.0546984672546387, "logits/rejected": -1.0735547542572021, "logps/chosen": -0.2744176983833313, "logps/rejected": -0.5156873464584351, "loss": 5.2015, "nll_loss": 1.259932279586792, "rewards/accuracies": 0.75, "rewards/chosen": -0.02744176983833313, "rewards/margins": 0.024126969277858734, "rewards/rejected": -0.051568739116191864, "step": 504 }, { "epoch": 0.34923928077455046, "grad_norm": 5.01504373550415, "learning_rate": 1.7461964038727523e-05, "log_odds_chosen": 3.2316083908081055, "log_odds_ratio": -0.1560392826795578, "logits/chosen": -0.6426920294761658, "logits/rejected": -0.6856198906898499, "logps/chosen": -0.07277484238147736, "logps/rejected": -0.4405496120452881, "loss": 5.1007, "nll_loss": 1.2595752477645874, "rewards/accuracies": 1.0, "rewards/chosen": -0.007277484517544508, "rewards/margins": 0.03677747771143913, "rewards/rejected": -0.04405496269464493, "step": 505 }, { "epoch": 0.3499308437067773, "grad_norm": 3.568671941757202, "learning_rate": 1.7496542185338865e-05, "log_odds_chosen": 2.227147102355957, "log_odds_ratio": -0.5110369324684143, "logits/chosen": -0.7542319297790527, "logits/rejected": -0.7737609148025513, "logps/chosen": -0.17752686142921448, "logps/rejected": -0.3617754578590393, "loss": 3.9289, "nll_loss": 0.9311113357543945, "rewards/accuracies": 0.625, "rewards/chosen": -0.017752686515450478, "rewards/margins": 0.018424857407808304, "rewards/rejected": -0.03617754578590393, "step": 506 }, { "epoch": 0.3506224066390041, "grad_norm": 4.465025901794434, "learning_rate": 1.7531120331950207e-05, "log_odds_chosen": 1.4633152484893799, "log_odds_ratio": -0.6008234620094299, "logits/chosen": -0.7912360429763794, "logits/rejected": -0.79715895652771, "logps/chosen": -0.24662283062934875, "logps/rejected": -0.4220726788043976, "loss": 4.4915, "nll_loss": 1.062793254852295, "rewards/accuracies": 0.625, "rewards/chosen": -0.024662284180521965, "rewards/margins": 0.017544984817504883, "rewards/rejected": -0.0422072634100914, "step": 507 }, { "epoch": 0.35131396957123096, "grad_norm": 3.592378616333008, "learning_rate": 1.7565698478561548e-05, "log_odds_chosen": 2.0615651607513428, "log_odds_ratio": -0.3657073378562927, "logits/chosen": -0.7249419093132019, "logits/rejected": -0.7586057186126709, "logps/chosen": -0.11939063668251038, "logps/rejected": -0.4244507849216461, "loss": 3.253, "nll_loss": 0.7766897082328796, "rewards/accuracies": 0.875, "rewards/chosen": -0.011939063668251038, "rewards/margins": 0.030506014823913574, "rewards/rejected": -0.04244507849216461, "step": 508 }, { "epoch": 0.35200553250345784, "grad_norm": 5.696559429168701, "learning_rate": 1.7600276625172893e-05, "log_odds_chosen": 1.1087409257888794, "log_odds_ratio": -0.6127991676330566, "logits/chosen": -0.8956174850463867, "logits/rejected": -0.9211435914039612, "logps/chosen": -0.21373379230499268, "logps/rejected": -0.3666113615036011, "loss": 5.2028, "nll_loss": 1.2394108772277832, "rewards/accuracies": 0.625, "rewards/chosen": -0.021373379975557327, "rewards/margins": 0.015287760645151138, "rewards/rejected": -0.036661140620708466, "step": 509 }, { "epoch": 0.35269709543568467, "grad_norm": 6.187593460083008, "learning_rate": 1.7634854771784235e-05, "log_odds_chosen": 0.7654345035552979, "log_odds_ratio": -0.703902006149292, "logits/chosen": -0.7929449081420898, "logits/rejected": -0.7793622016906738, "logps/chosen": -0.3261670768260956, "logps/rejected": -0.6445503234863281, "loss": 5.5787, "nll_loss": 1.3242785930633545, "rewards/accuracies": 0.5, "rewards/chosen": -0.03261670842766762, "rewards/margins": 0.031838320195674896, "rewards/rejected": -0.06445503234863281, "step": 510 }, { "epoch": 0.3533886583679115, "grad_norm": 7.367726802825928, "learning_rate": 1.7669432918395576e-05, "log_odds_chosen": 0.41668662428855896, "log_odds_ratio": -0.8817450404167175, "logits/chosen": -0.6256126165390015, "logits/rejected": -0.6103242039680481, "logps/chosen": -0.2497943639755249, "logps/rejected": -0.33595484495162964, "loss": 4.6828, "nll_loss": 1.0825315713882446, "rewards/accuracies": 0.375, "rewards/chosen": -0.02497943490743637, "rewards/margins": 0.008616046980023384, "rewards/rejected": -0.033595483750104904, "step": 511 }, { "epoch": 0.35408022130013833, "grad_norm": 3.762199640274048, "learning_rate": 1.7704011065006918e-05, "log_odds_chosen": 3.9033827781677246, "log_odds_ratio": -0.27078744769096375, "logits/chosen": -0.9025722742080688, "logits/rejected": -0.945976972579956, "logps/chosen": -0.13871155679225922, "logps/rejected": -0.6132323741912842, "loss": 3.0666, "nll_loss": 0.7395758032798767, "rewards/accuracies": 0.875, "rewards/chosen": -0.013871154747903347, "rewards/margins": 0.047452084720134735, "rewards/rejected": -0.06132324039936066, "step": 512 }, { "epoch": 0.35477178423236516, "grad_norm": 5.514883041381836, "learning_rate": 1.773858921161826e-05, "log_odds_chosen": 1.5800803899765015, "log_odds_ratio": -0.5254863500595093, "logits/chosen": -0.8947157859802246, "logits/rejected": -0.9294208288192749, "logps/chosen": -0.19791993498802185, "logps/rejected": -0.44762054085731506, "loss": 6.4006, "nll_loss": 1.5476136207580566, "rewards/accuracies": 0.625, "rewards/chosen": -0.019791992381215096, "rewards/margins": 0.02497006021440029, "rewards/rejected": -0.04476205259561539, "step": 513 }, { "epoch": 0.355463347164592, "grad_norm": 5.778316497802734, "learning_rate": 1.77731673582296e-05, "log_odds_chosen": 3.0969834327697754, "log_odds_ratio": -0.2676597535610199, "logits/chosen": -0.7269794940948486, "logits/rejected": -0.7525280714035034, "logps/chosen": -0.09116362780332565, "logps/rejected": -0.5763051509857178, "loss": 4.9457, "nll_loss": 1.2096540927886963, "rewards/accuracies": 1.0, "rewards/chosen": -0.00911636371165514, "rewards/margins": 0.04851415008306503, "rewards/rejected": -0.0576305128633976, "step": 514 }, { "epoch": 0.3561549100968188, "grad_norm": 5.426172256469727, "learning_rate": 1.7807745504840942e-05, "log_odds_chosen": 2.969529151916504, "log_odds_ratio": -0.3110653758049011, "logits/chosen": -0.7612947225570679, "logits/rejected": -0.731289267539978, "logps/chosen": -0.06461675465106964, "logps/rejected": -0.4509395360946655, "loss": 5.0502, "nll_loss": 1.2314317226409912, "rewards/accuracies": 0.875, "rewards/chosen": -0.0064616757445037365, "rewards/margins": 0.03863228112459183, "rewards/rejected": -0.04509395360946655, "step": 515 }, { "epoch": 0.35684647302904565, "grad_norm": 7.923834323883057, "learning_rate": 1.7842323651452284e-05, "log_odds_chosen": 0.44794902205467224, "log_odds_ratio": -0.8729050159454346, "logits/chosen": -0.8051487803459167, "logits/rejected": -0.8108866214752197, "logps/chosen": -0.1843547224998474, "logps/rejected": -0.2477795034646988, "loss": 5.1599, "nll_loss": 1.2026830911636353, "rewards/accuracies": 0.625, "rewards/chosen": -0.018435470759868622, "rewards/margins": 0.006342479493469, "rewards/rejected": -0.024777952581644058, "step": 516 }, { "epoch": 0.3575380359612725, "grad_norm": 4.760327339172363, "learning_rate": 1.7876901798063626e-05, "log_odds_chosen": 2.5328943729400635, "log_odds_ratio": -0.30655527114868164, "logits/chosen": -0.7165100574493408, "logits/rejected": -0.7217994332313538, "logps/chosen": -0.11191149055957794, "logps/rejected": -0.6096492409706116, "loss": 5.0692, "nll_loss": 1.2366557121276855, "rewards/accuracies": 0.875, "rewards/chosen": -0.011191150173544884, "rewards/margins": 0.04977377504110336, "rewards/rejected": -0.060964927077293396, "step": 517 }, { "epoch": 0.3582295988934993, "grad_norm": 6.100254535675049, "learning_rate": 1.7911479944674967e-05, "log_odds_chosen": 1.5004901885986328, "log_odds_ratio": -0.6379998922348022, "logits/chosen": -0.43648284673690796, "logits/rejected": -0.4436464011669159, "logps/chosen": -0.11927513778209686, "logps/rejected": -0.2644681930541992, "loss": 4.0639, "nll_loss": 0.9521628022193909, "rewards/accuracies": 0.625, "rewards/chosen": -0.011927514337003231, "rewards/margins": 0.014519304037094116, "rewards/rejected": -0.026446819305419922, "step": 518 }, { "epoch": 0.35892116182572614, "grad_norm": 8.609460830688477, "learning_rate": 1.794605809128631e-05, "log_odds_chosen": 1.5812023878097534, "log_odds_ratio": -0.7274914979934692, "logits/chosen": -0.9389444589614868, "logits/rejected": -0.9337302446365356, "logps/chosen": -0.2057965099811554, "logps/rejected": -0.5808533430099487, "loss": 5.5316, "nll_loss": 1.310141682624817, "rewards/accuracies": 0.75, "rewards/chosen": -0.02057965099811554, "rewards/margins": 0.03750568628311157, "rewards/rejected": -0.058085329830646515, "step": 519 }, { "epoch": 0.359612724757953, "grad_norm": 6.230218410491943, "learning_rate": 1.798063623789765e-05, "log_odds_chosen": 1.7745656967163086, "log_odds_ratio": -0.3179192543029785, "logits/chosen": -0.7594548463821411, "logits/rejected": -0.7903056144714355, "logps/chosen": -0.11681567132472992, "logps/rejected": -0.470163494348526, "loss": 4.6051, "nll_loss": 1.1194841861724854, "rewards/accuracies": 1.0, "rewards/chosen": -0.011681567877531052, "rewards/margins": 0.03533478081226349, "rewards/rejected": -0.04701634868979454, "step": 520 }, { "epoch": 0.3603042876901798, "grad_norm": 5.00435733795166, "learning_rate": 1.8015214384508992e-05, "log_odds_chosen": 3.240253448486328, "log_odds_ratio": -0.3954428434371948, "logits/chosen": -0.695622980594635, "logits/rejected": -0.7130659818649292, "logps/chosen": -0.11247075349092484, "logps/rejected": -0.6345089077949524, "loss": 3.7368, "nll_loss": 0.8946676850318909, "rewards/accuracies": 0.875, "rewards/chosen": -0.011247076094150543, "rewards/margins": 0.052203819155693054, "rewards/rejected": -0.0634508952498436, "step": 521 }, { "epoch": 0.36099585062240663, "grad_norm": 3.5418221950531006, "learning_rate": 1.8049792531120333e-05, "log_odds_chosen": 2.4172143936157227, "log_odds_ratio": -0.3795178234577179, "logits/chosen": -0.7669139504432678, "logits/rejected": -0.7880896925926208, "logps/chosen": -0.13434094190597534, "logps/rejected": -0.4309152066707611, "loss": 5.098, "nll_loss": 1.2365572452545166, "rewards/accuracies": 0.75, "rewards/chosen": -0.013434093445539474, "rewards/margins": 0.029657430946826935, "rewards/rejected": -0.04309152439236641, "step": 522 }, { "epoch": 0.36168741355463346, "grad_norm": 3.2109227180480957, "learning_rate": 1.8084370677731675e-05, "log_odds_chosen": 3.076549530029297, "log_odds_ratio": -0.1862131655216217, "logits/chosen": -0.9441479444503784, "logits/rejected": -0.9136902689933777, "logps/chosen": -0.10313470661640167, "logps/rejected": -0.6239364743232727, "loss": 4.1863, "nll_loss": 1.0279431343078613, "rewards/accuracies": 0.875, "rewards/chosen": -0.010313471779227257, "rewards/margins": 0.0520801767706871, "rewards/rejected": -0.06239365041255951, "step": 523 }, { "epoch": 0.3623789764868603, "grad_norm": 4.191302299499512, "learning_rate": 1.8118948824343017e-05, "log_odds_chosen": 3.1018226146698, "log_odds_ratio": -0.3014015257358551, "logits/chosen": -0.8339476585388184, "logits/rejected": -0.7866331338882446, "logps/chosen": -0.09632863104343414, "logps/rejected": -0.4858103096485138, "loss": 4.1661, "nll_loss": 1.0113775730133057, "rewards/accuracies": 0.875, "rewards/chosen": -0.009632863104343414, "rewards/margins": 0.038948170840740204, "rewards/rejected": -0.04858103394508362, "step": 524 }, { "epoch": 0.3630705394190871, "grad_norm": 4.952174663543701, "learning_rate": 1.8153526970954358e-05, "log_odds_chosen": 2.119576930999756, "log_odds_ratio": -0.2787606120109558, "logits/chosen": -0.666994571685791, "logits/rejected": -0.6753822565078735, "logps/chosen": -0.07383690774440765, "logps/rejected": -0.3994881510734558, "loss": 5.4373, "nll_loss": 1.3314471244812012, "rewards/accuracies": 0.875, "rewards/chosen": -0.007383690681308508, "rewards/margins": 0.032565124332904816, "rewards/rejected": -0.03994881734251976, "step": 525 }, { "epoch": 0.36376210235131395, "grad_norm": 9.110729217529297, "learning_rate": 1.81881051175657e-05, "log_odds_chosen": 1.177870750427246, "log_odds_ratio": -1.0585416555404663, "logits/chosen": -0.7838261127471924, "logits/rejected": -0.7817600965499878, "logps/chosen": -0.21810297667980194, "logps/rejected": -0.24028678238391876, "loss": 4.1757, "nll_loss": 0.9380632638931274, "rewards/accuracies": 0.375, "rewards/chosen": -0.021810296922922134, "rewards/margins": 0.002218380104750395, "rewards/rejected": -0.024028677493333817, "step": 526 }, { "epoch": 0.3644536652835408, "grad_norm": 6.334846019744873, "learning_rate": 1.822268326417704e-05, "log_odds_chosen": 1.2779005765914917, "log_odds_ratio": -0.7157418727874756, "logits/chosen": -0.7719458341598511, "logits/rejected": -0.8071249723434448, "logps/chosen": -0.23595497012138367, "logps/rejected": -0.523496150970459, "loss": 4.9821, "nll_loss": 1.1739587783813477, "rewards/accuracies": 0.625, "rewards/chosen": -0.023595497012138367, "rewards/margins": 0.02875412628054619, "rewards/rejected": -0.05234961956739426, "step": 527 }, { "epoch": 0.3651452282157676, "grad_norm": 4.921584129333496, "learning_rate": 1.8257261410788383e-05, "log_odds_chosen": 1.9070172309875488, "log_odds_ratio": -0.30627143383026123, "logits/chosen": -0.8669978380203247, "logits/rejected": -0.8720003366470337, "logps/chosen": -0.12546661496162415, "logps/rejected": -0.5315241813659668, "loss": 5.7189, "nll_loss": 1.3990854024887085, "rewards/accuracies": 1.0, "rewards/chosen": -0.012546662241220474, "rewards/margins": 0.040605757385492325, "rewards/rejected": -0.0531524196267128, "step": 528 }, { "epoch": 0.36583679114799444, "grad_norm": 4.885773181915283, "learning_rate": 1.8291839557399724e-05, "log_odds_chosen": 2.917936325073242, "log_odds_ratio": -0.4400659203529358, "logits/chosen": -0.7196451425552368, "logits/rejected": -0.720146894454956, "logps/chosen": -0.16993539035320282, "logps/rejected": -0.5515943765640259, "loss": 4.9134, "nll_loss": 1.1843429803848267, "rewards/accuracies": 0.75, "rewards/chosen": -0.016993539407849312, "rewards/margins": 0.038165897130966187, "rewards/rejected": -0.05515943467617035, "step": 529 }, { "epoch": 0.3665283540802213, "grad_norm": 3.7564799785614014, "learning_rate": 1.8326417704011066e-05, "log_odds_chosen": 2.123663902282715, "log_odds_ratio": -0.2414604127407074, "logits/chosen": -0.7857799530029297, "logits/rejected": -0.8454867005348206, "logps/chosen": -0.1178518757224083, "logps/rejected": -0.5733444690704346, "loss": 4.1923, "nll_loss": 1.0239187479019165, "rewards/accuracies": 0.875, "rewards/chosen": -0.011785187758505344, "rewards/margins": 0.04554925486445427, "rewards/rejected": -0.05733444541692734, "step": 530 }, { "epoch": 0.36721991701244816, "grad_norm": 4.089568614959717, "learning_rate": 1.8360995850622407e-05, "log_odds_chosen": 3.5009407997131348, "log_odds_ratio": -0.24155713617801666, "logits/chosen": -0.5445857048034668, "logits/rejected": -0.5896902084350586, "logps/chosen": -0.09425552189350128, "logps/rejected": -0.7995942234992981, "loss": 3.381, "nll_loss": 0.8211044073104858, "rewards/accuracies": 0.875, "rewards/chosen": -0.009425551630556583, "rewards/margins": 0.0705338716506958, "rewards/rejected": -0.07995942234992981, "step": 531 }, { "epoch": 0.367911479944675, "grad_norm": 3.8665342330932617, "learning_rate": 1.839557399723375e-05, "log_odds_chosen": 2.036942958831787, "log_odds_ratio": -0.3373142182826996, "logits/chosen": -0.8292122483253479, "logits/rejected": -0.877589762210846, "logps/chosen": -0.15983963012695312, "logps/rejected": -0.5212113857269287, "loss": 4.6423, "nll_loss": 1.1268372535705566, "rewards/accuracies": 0.75, "rewards/chosen": -0.015983961522579193, "rewards/margins": 0.036137182265520096, "rewards/rejected": -0.05212114751338959, "step": 532 }, { "epoch": 0.3686030428769018, "grad_norm": 6.855545997619629, "learning_rate": 1.843015214384509e-05, "log_odds_chosen": 1.7628200054168701, "log_odds_ratio": -0.6212955713272095, "logits/chosen": -0.6676622033119202, "logits/rejected": -0.6422132849693298, "logps/chosen": -0.1455618143081665, "logps/rejected": -0.312483012676239, "loss": 5.3469, "nll_loss": 1.2745840549468994, "rewards/accuracies": 0.875, "rewards/chosen": -0.01455618068575859, "rewards/margins": 0.01669212058186531, "rewards/rejected": -0.03124830313026905, "step": 533 }, { "epoch": 0.36929460580912865, "grad_norm": 3.9764904975891113, "learning_rate": 1.8464730290456432e-05, "log_odds_chosen": 1.620558500289917, "log_odds_ratio": -0.552440881729126, "logits/chosen": -1.1512162685394287, "logits/rejected": -1.1916279792785645, "logps/chosen": -0.2041110098361969, "logps/rejected": -0.6735808253288269, "loss": 4.6524, "nll_loss": 1.1078450679779053, "rewards/accuracies": 0.625, "rewards/chosen": -0.02041110023856163, "rewards/margins": 0.04694698005914688, "rewards/rejected": -0.06735808402299881, "step": 534 }, { "epoch": 0.3699861687413555, "grad_norm": 3.739170551300049, "learning_rate": 1.8499308437067774e-05, "log_odds_chosen": 4.318858623504639, "log_odds_ratio": -0.27778083086013794, "logits/chosen": -0.8107898235321045, "logits/rejected": -0.853278636932373, "logps/chosen": -0.0777362585067749, "logps/rejected": -0.883296012878418, "loss": 4.6828, "nll_loss": 1.1429297924041748, "rewards/accuracies": 0.75, "rewards/chosen": -0.007773626130074263, "rewards/margins": 0.0805559754371643, "rewards/rejected": -0.08832959830760956, "step": 535 }, { "epoch": 0.3706777316735823, "grad_norm": 4.370072841644287, "learning_rate": 1.8533886583679115e-05, "log_odds_chosen": 3.689222574234009, "log_odds_ratio": -0.17596468329429626, "logits/chosen": -0.7374777793884277, "logits/rejected": -0.8291355967521667, "logps/chosen": -0.06306464970111847, "logps/rejected": -0.7058030366897583, "loss": 4.2286, "nll_loss": 1.0395509004592896, "rewards/accuracies": 0.875, "rewards/chosen": -0.006306465249508619, "rewards/margins": 0.06427383422851562, "rewards/rejected": -0.07058030366897583, "step": 536 }, { "epoch": 0.37136929460580914, "grad_norm": 5.0150017738342285, "learning_rate": 1.8568464730290457e-05, "log_odds_chosen": 2.186718463897705, "log_odds_ratio": -0.4063662886619568, "logits/chosen": -0.8127549886703491, "logits/rejected": -0.8689401149749756, "logps/chosen": -0.11313273012638092, "logps/rejected": -0.39278122782707214, "loss": 4.6602, "nll_loss": 1.1244102716445923, "rewards/accuracies": 0.75, "rewards/chosen": -0.011313272640109062, "rewards/margins": 0.027964850887656212, "rewards/rejected": -0.039278123527765274, "step": 537 }, { "epoch": 0.37206085753803597, "grad_norm": 5.019992828369141, "learning_rate": 1.86030428769018e-05, "log_odds_chosen": 1.2885156869888306, "log_odds_ratio": -0.6232368350028992, "logits/chosen": -0.7190303206443787, "logits/rejected": -0.7032252550125122, "logps/chosen": -0.13489317893981934, "logps/rejected": -0.19020210206508636, "loss": 4.7953, "nll_loss": 1.136505126953125, "rewards/accuracies": 0.5, "rewards/chosen": -0.013489319011569023, "rewards/margins": 0.005530891008675098, "rewards/rejected": -0.019020210951566696, "step": 538 }, { "epoch": 0.3727524204702628, "grad_norm": 6.01793098449707, "learning_rate": 1.863762102351314e-05, "log_odds_chosen": 0.862777829170227, "log_odds_ratio": -0.7624509334564209, "logits/chosen": -0.5267829298973083, "logits/rejected": -0.5604238510131836, "logps/chosen": -0.264478474855423, "logps/rejected": -0.33356383442878723, "loss": 4.7699, "nll_loss": 1.1162419319152832, "rewards/accuracies": 0.75, "rewards/chosen": -0.026447847485542297, "rewards/margins": 0.006908536888659, "rewards/rejected": -0.03335638344287872, "step": 539 }, { "epoch": 0.37344398340248963, "grad_norm": 5.9148030281066895, "learning_rate": 1.867219917012448e-05, "log_odds_chosen": 0.3767799139022827, "log_odds_ratio": -0.5631717443466187, "logits/chosen": -0.9739837646484375, "logits/rejected": -1.014035940170288, "logps/chosen": -0.13336896896362305, "logps/rejected": -0.19003111124038696, "loss": 6.9421, "nll_loss": 1.679201602935791, "rewards/accuracies": 0.625, "rewards/chosen": -0.01333689596503973, "rewards/margins": 0.005666215904057026, "rewards/rejected": -0.019003111869096756, "step": 540 }, { "epoch": 0.37413554633471646, "grad_norm": 7.738327503204346, "learning_rate": 1.8706777316735823e-05, "log_odds_chosen": 0.9757472276687622, "log_odds_ratio": -1.0805482864379883, "logits/chosen": -0.8048467040061951, "logits/rejected": -0.8325028419494629, "logps/chosen": -0.14648893475532532, "logps/rejected": -0.49185705184936523, "loss": 5.6322, "nll_loss": 1.2999882698059082, "rewards/accuracies": 0.625, "rewards/chosen": -0.014648893848061562, "rewards/margins": 0.03453681245446205, "rewards/rejected": -0.04918570816516876, "step": 541 }, { "epoch": 0.3748271092669433, "grad_norm": 4.354764938354492, "learning_rate": 1.8741355463347165e-05, "log_odds_chosen": 0.5504599809646606, "log_odds_ratio": -0.5843324661254883, "logits/chosen": -0.6657752990722656, "logits/rejected": -0.6720460653305054, "logps/chosen": -0.17936018109321594, "logps/rejected": -0.31276610493659973, "loss": 4.3939, "nll_loss": 1.040050983428955, "rewards/accuracies": 0.75, "rewards/chosen": -0.017936021089553833, "rewards/margins": 0.013340589590370655, "rewards/rejected": -0.03127660974860191, "step": 542 }, { "epoch": 0.3755186721991701, "grad_norm": 6.16148567199707, "learning_rate": 1.8775933609958506e-05, "log_odds_chosen": 2.187774658203125, "log_odds_ratio": -0.4758215844631195, "logits/chosen": -0.6338940858840942, "logits/rejected": -0.6519688367843628, "logps/chosen": -0.13961473107337952, "logps/rejected": -0.42610496282577515, "loss": 4.8044, "nll_loss": 1.153525471687317, "rewards/accuracies": 0.875, "rewards/chosen": -0.013961473479866982, "rewards/margins": 0.028649020940065384, "rewards/rejected": -0.042610496282577515, "step": 543 }, { "epoch": 0.37621023513139695, "grad_norm": 4.357821941375732, "learning_rate": 1.8810511756569848e-05, "log_odds_chosen": 1.0460649728775024, "log_odds_ratio": -0.49968603253364563, "logits/chosen": -1.1150552034378052, "logits/rejected": -1.1179910898208618, "logps/chosen": -0.10702711343765259, "logps/rejected": -0.2099991887807846, "loss": 4.6849, "nll_loss": 1.1212637424468994, "rewards/accuracies": 0.75, "rewards/chosen": -0.010702710598707199, "rewards/margins": 0.010297207161784172, "rewards/rejected": -0.02099991962313652, "step": 544 }, { "epoch": 0.3769017980636238, "grad_norm": 3.573343276977539, "learning_rate": 1.884508990318119e-05, "log_odds_chosen": 0.6429002285003662, "log_odds_ratio": -0.5074270963668823, "logits/chosen": -0.8373913168907166, "logits/rejected": -0.8756779432296753, "logps/chosen": -0.18509633839130402, "logps/rejected": -0.2932808995246887, "loss": 4.0565, "nll_loss": 0.9633736610412598, "rewards/accuracies": 0.875, "rewards/chosen": -0.0185096338391304, "rewards/margins": 0.01081845723092556, "rewards/rejected": -0.02932809107005596, "step": 545 }, { "epoch": 0.3775933609958506, "grad_norm": 5.617794990539551, "learning_rate": 1.887966804979253e-05, "log_odds_chosen": 1.6232762336730957, "log_odds_ratio": -0.6035907864570618, "logits/chosen": -0.8689144849777222, "logits/rejected": -0.8932918310165405, "logps/chosen": -0.17928524315357208, "logps/rejected": -0.34406036138534546, "loss": 3.2081, "nll_loss": 0.7416632175445557, "rewards/accuracies": 0.75, "rewards/chosen": -0.01792852394282818, "rewards/margins": 0.016477510333061218, "rewards/rejected": -0.034406036138534546, "step": 546 }, { "epoch": 0.37828492392807744, "grad_norm": 4.233097076416016, "learning_rate": 1.8914246196403872e-05, "log_odds_chosen": 2.6939659118652344, "log_odds_ratio": -0.30268973112106323, "logits/chosen": -0.7411539554595947, "logits/rejected": -0.764373242855072, "logps/chosen": -0.1007307767868042, "logps/rejected": -0.35213255882263184, "loss": 4.1591, "nll_loss": 1.0095144510269165, "rewards/accuracies": 0.75, "rewards/chosen": -0.01007307693362236, "rewards/margins": 0.025140177458524704, "rewards/rejected": -0.035213254392147064, "step": 547 }, { "epoch": 0.3789764868603043, "grad_norm": 3.8841028213500977, "learning_rate": 1.8948824343015214e-05, "log_odds_chosen": 2.9004340171813965, "log_odds_ratio": -0.23978981375694275, "logits/chosen": -0.5224136114120483, "logits/rejected": -0.525780439376831, "logps/chosen": -0.09610556066036224, "logps/rejected": -0.38277459144592285, "loss": 4.026, "nll_loss": 0.9825116395950317, "rewards/accuracies": 1.0, "rewards/chosen": -0.009610556997358799, "rewards/margins": 0.028666902333498, "rewards/rejected": -0.038277462124824524, "step": 548 }, { "epoch": 0.3796680497925311, "grad_norm": 4.983913898468018, "learning_rate": 1.8983402489626556e-05, "log_odds_chosen": 2.435208559036255, "log_odds_ratio": -0.41046613454818726, "logits/chosen": -0.8081510066986084, "logits/rejected": -0.792250394821167, "logps/chosen": -0.0936388149857521, "logps/rejected": -0.2591051161289215, "loss": 4.3009, "nll_loss": 1.0341734886169434, "rewards/accuracies": 0.75, "rewards/chosen": -0.00936388224363327, "rewards/margins": 0.01654663123190403, "rewards/rejected": -0.02591051161289215, "step": 549 }, { "epoch": 0.38035961272475793, "grad_norm": 3.430642604827881, "learning_rate": 1.9017980636237897e-05, "log_odds_chosen": 3.36209774017334, "log_odds_ratio": -0.246946781873703, "logits/chosen": -0.8941298723220825, "logits/rejected": -0.9081979990005493, "logps/chosen": -0.13310852646827698, "logps/rejected": -0.429534375667572, "loss": 3.6823, "nll_loss": 0.8958902359008789, "rewards/accuracies": 0.875, "rewards/chosen": -0.013310853391885757, "rewards/margins": 0.029642587527632713, "rewards/rejected": -0.04295343905687332, "step": 550 }, { "epoch": 0.38105117565698476, "grad_norm": 3.9265270233154297, "learning_rate": 1.905255878284924e-05, "log_odds_chosen": 1.881701946258545, "log_odds_ratio": -0.515370786190033, "logits/chosen": -0.9915303587913513, "logits/rejected": -0.9318628907203674, "logps/chosen": -0.10293899476528168, "logps/rejected": -0.3324930667877197, "loss": 3.9277, "nll_loss": 0.930385172367096, "rewards/accuracies": 0.75, "rewards/chosen": -0.010293899103999138, "rewards/margins": 0.022955408319830894, "rewards/rejected": -0.03324930742383003, "step": 551 }, { "epoch": 0.3817427385892116, "grad_norm": 4.866530418395996, "learning_rate": 1.908713692946058e-05, "log_odds_chosen": 0.5543054342269897, "log_odds_ratio": -0.7098829746246338, "logits/chosen": -1.2122690677642822, "logits/rejected": -1.1861469745635986, "logps/chosen": -0.19430440664291382, "logps/rejected": -0.2644846737384796, "loss": 4.9296, "nll_loss": 1.1614184379577637, "rewards/accuracies": 0.5, "rewards/chosen": -0.01943044178187847, "rewards/margins": 0.00701802596449852, "rewards/rejected": -0.02644846774637699, "step": 552 }, { "epoch": 0.3824343015214384, "grad_norm": 4.363667964935303, "learning_rate": 1.9121715076071922e-05, "log_odds_chosen": 1.6609487533569336, "log_odds_ratio": -0.22528058290481567, "logits/chosen": -1.2467082738876343, "logits/rejected": -1.256028413772583, "logps/chosen": -0.07459308207035065, "logps/rejected": -0.304107129573822, "loss": 4.7053, "nll_loss": 1.153801679611206, "rewards/accuracies": 1.0, "rewards/chosen": -0.007459308486431837, "rewards/margins": 0.022951405495405197, "rewards/rejected": -0.03041071444749832, "step": 553 }, { "epoch": 0.3831258644536653, "grad_norm": 4.976971626281738, "learning_rate": 1.9156293222683267e-05, "log_odds_chosen": 1.7708755731582642, "log_odds_ratio": -0.37831786274909973, "logits/chosen": -0.7834327220916748, "logits/rejected": -0.7940959930419922, "logps/chosen": -0.14502611756324768, "logps/rejected": -0.36740654706954956, "loss": 4.2815, "nll_loss": 1.0325376987457275, "rewards/accuracies": 0.875, "rewards/chosen": -0.014502611942589283, "rewards/margins": 0.022238047793507576, "rewards/rejected": -0.036740656942129135, "step": 554 }, { "epoch": 0.38381742738589214, "grad_norm": 3.4731082916259766, "learning_rate": 1.919087136929461e-05, "log_odds_chosen": 2.0094597339630127, "log_odds_ratio": -0.44111326336860657, "logits/chosen": -0.7475765943527222, "logits/rejected": -0.764420747756958, "logps/chosen": -0.116146519780159, "logps/rejected": -0.338163286447525, "loss": 3.6618, "nll_loss": 0.8713344931602478, "rewards/accuracies": 0.75, "rewards/chosen": -0.01161465235054493, "rewards/margins": 0.022201674059033394, "rewards/rejected": -0.03381632640957832, "step": 555 }, { "epoch": 0.38450899031811897, "grad_norm": 5.620976448059082, "learning_rate": 1.922544951590595e-05, "log_odds_chosen": 0.9785915017127991, "log_odds_ratio": -0.5120671391487122, "logits/chosen": -1.029029130935669, "logits/rejected": -1.0345444679260254, "logps/chosen": -0.14693522453308105, "logps/rejected": -0.3012365996837616, "loss": 4.3674, "nll_loss": 1.040635108947754, "rewards/accuracies": 0.75, "rewards/chosen": -0.014693522825837135, "rewards/margins": 0.015430137515068054, "rewards/rejected": -0.03012366034090519, "step": 556 }, { "epoch": 0.3852005532503458, "grad_norm": 6.698596477508545, "learning_rate": 1.926002766251729e-05, "log_odds_chosen": 1.5996103286743164, "log_odds_ratio": -0.8160156607627869, "logits/chosen": -0.6407162547111511, "logits/rejected": -0.6249457001686096, "logps/chosen": -0.18899376690387726, "logps/rejected": -0.3072316646575928, "loss": 3.864, "nll_loss": 0.8844013214111328, "rewards/accuracies": 0.5, "rewards/chosen": -0.018899379298090935, "rewards/margins": 0.011823788285255432, "rewards/rejected": -0.030723167583346367, "step": 557 }, { "epoch": 0.38589211618257263, "grad_norm": 6.878204345703125, "learning_rate": 1.9294605809128633e-05, "log_odds_chosen": 2.2048490047454834, "log_odds_ratio": -0.3432249128818512, "logits/chosen": -0.7492395639419556, "logits/rejected": -0.8243151307106018, "logps/chosen": -0.0980885699391365, "logps/rejected": -0.33490556478500366, "loss": 5.5948, "nll_loss": 1.364375114440918, "rewards/accuracies": 0.875, "rewards/chosen": -0.00980885699391365, "rewards/margins": 0.023681702092289925, "rewards/rejected": -0.033490557223558426, "step": 558 }, { "epoch": 0.38658367911479946, "grad_norm": 4.561729907989502, "learning_rate": 1.9329183955739975e-05, "log_odds_chosen": 2.502473831176758, "log_odds_ratio": -0.32938480377197266, "logits/chosen": -0.917141318321228, "logits/rejected": -0.9629212021827698, "logps/chosen": -0.13379818201065063, "logps/rejected": -0.3232942819595337, "loss": 4.1279, "nll_loss": 0.9990299940109253, "rewards/accuracies": 0.875, "rewards/chosen": -0.013379817828536034, "rewards/margins": 0.018949609249830246, "rewards/rejected": -0.03232942894101143, "step": 559 }, { "epoch": 0.3872752420470263, "grad_norm": 8.337691307067871, "learning_rate": 1.9363762102351316e-05, "log_odds_chosen": 2.1198501586914062, "log_odds_ratio": -0.4438222646713257, "logits/chosen": -0.7641869187355042, "logits/rejected": -0.7751225233078003, "logps/chosen": -0.14018933475017548, "logps/rejected": -0.5707254409790039, "loss": 5.6817, "nll_loss": 1.3760305643081665, "rewards/accuracies": 0.75, "rewards/chosen": -0.014018935151398182, "rewards/margins": 0.043053604662418365, "rewards/rejected": -0.05707254260778427, "step": 560 }, { "epoch": 0.3879668049792531, "grad_norm": 4.920252323150635, "learning_rate": 1.9398340248962658e-05, "log_odds_chosen": 0.6740873456001282, "log_odds_ratio": -0.5108063220977783, "logits/chosen": -0.8343955278396606, "logits/rejected": -0.8382387161254883, "logps/chosen": -0.13594703376293182, "logps/rejected": -0.21453788876533508, "loss": 4.8277, "nll_loss": 1.1558473110198975, "rewards/accuracies": 0.75, "rewards/chosen": -0.013594703748822212, "rewards/margins": 0.007859084755182266, "rewards/rejected": -0.02145378850400448, "step": 561 }, { "epoch": 0.38865836791147995, "grad_norm": 6.755950927734375, "learning_rate": 1.9432918395574e-05, "log_odds_chosen": 2.570904016494751, "log_odds_ratio": -0.3146067261695862, "logits/chosen": -0.9997011423110962, "logits/rejected": -1.0078551769256592, "logps/chosen": -0.12847094237804413, "logps/rejected": -0.4903247654438019, "loss": 5.5276, "nll_loss": 1.350435495376587, "rewards/accuracies": 1.0, "rewards/chosen": -0.012847093865275383, "rewards/margins": 0.036185383796691895, "rewards/rejected": -0.04903247952461243, "step": 562 }, { "epoch": 0.3893499308437068, "grad_norm": 5.547338962554932, "learning_rate": 1.946749654218534e-05, "log_odds_chosen": 0.8543901443481445, "log_odds_ratio": -0.570191502571106, "logits/chosen": -0.7356718182563782, "logits/rejected": -0.7660771012306213, "logps/chosen": -0.19542016088962555, "logps/rejected": -0.4686131477355957, "loss": 5.2079, "nll_loss": 1.2449650764465332, "rewards/accuracies": 0.625, "rewards/chosen": -0.019542016088962555, "rewards/margins": 0.027319299057126045, "rewards/rejected": -0.04686131328344345, "step": 563 }, { "epoch": 0.3900414937759336, "grad_norm": 5.287246227264404, "learning_rate": 1.9502074688796682e-05, "log_odds_chosen": 2.0964884757995605, "log_odds_ratio": -0.5565468668937683, "logits/chosen": -0.9580271244049072, "logits/rejected": -0.9527782797813416, "logps/chosen": -0.08842720836400986, "logps/rejected": -0.23043900728225708, "loss": 3.9914, "nll_loss": 0.9421975612640381, "rewards/accuracies": 0.75, "rewards/chosen": -0.008842721581459045, "rewards/margins": 0.014201181009411812, "rewards/rejected": -0.023043902590870857, "step": 564 }, { "epoch": 0.39073305670816044, "grad_norm": 8.411381721496582, "learning_rate": 1.9536652835408024e-05, "log_odds_chosen": 1.5264617204666138, "log_odds_ratio": -0.4682188034057617, "logits/chosen": -0.8114876747131348, "logits/rejected": -0.830116868019104, "logps/chosen": -0.31981879472732544, "logps/rejected": -0.5894278883934021, "loss": 7.672, "nll_loss": 1.8711817264556885, "rewards/accuracies": 0.75, "rewards/chosen": -0.031981877982616425, "rewards/margins": 0.026960909366607666, "rewards/rejected": -0.05894278734922409, "step": 565 }, { "epoch": 0.3914246196403873, "grad_norm": 9.792768478393555, "learning_rate": 1.9571230982019366e-05, "log_odds_chosen": 1.3716531991958618, "log_odds_ratio": -0.8037305474281311, "logits/chosen": -0.8083940744400024, "logits/rejected": -0.8525917530059814, "logps/chosen": -0.10131937265396118, "logps/rejected": -0.37159010767936707, "loss": 4.41, "nll_loss": 1.0221236944198608, "rewards/accuracies": 0.625, "rewards/chosen": -0.010131937451660633, "rewards/margins": 0.0270270723849535, "rewards/rejected": -0.03715901076793671, "step": 566 }, { "epoch": 0.3921161825726141, "grad_norm": 4.492656707763672, "learning_rate": 1.9605809128630707e-05, "log_odds_chosen": 3.694186210632324, "log_odds_ratio": -0.20484068989753723, "logits/chosen": -0.467551052570343, "logits/rejected": -0.4827098250389099, "logps/chosen": -0.07522110641002655, "logps/rejected": -0.4975152909755707, "loss": 3.9541, "nll_loss": 0.9680354595184326, "rewards/accuracies": 0.875, "rewards/chosen": -0.00752211082726717, "rewards/margins": 0.04222942143678665, "rewards/rejected": -0.049751535058021545, "step": 567 }, { "epoch": 0.39280774550484093, "grad_norm": 3.082396984100342, "learning_rate": 1.964038727524205e-05, "log_odds_chosen": 1.8576481342315674, "log_odds_ratio": -0.3983916640281677, "logits/chosen": -0.9992802739143372, "logits/rejected": -1.008885383605957, "logps/chosen": -0.10009613633155823, "logps/rejected": -0.3305109143257141, "loss": 3.2665, "nll_loss": 0.7767845392227173, "rewards/accuracies": 0.75, "rewards/chosen": -0.010009613819420338, "rewards/margins": 0.023041479289531708, "rewards/rejected": -0.03305109590291977, "step": 568 }, { "epoch": 0.39349930843706776, "grad_norm": 5.877315998077393, "learning_rate": 1.967496542185339e-05, "log_odds_chosen": 2.1820318698883057, "log_odds_ratio": -0.2573472261428833, "logits/chosen": -1.050216794013977, "logits/rejected": -1.063097596168518, "logps/chosen": -0.08402914553880692, "logps/rejected": -0.45616158843040466, "loss": 5.5968, "nll_loss": 1.3734532594680786, "rewards/accuracies": 1.0, "rewards/chosen": -0.008402915671467781, "rewards/margins": 0.037213243544101715, "rewards/rejected": -0.04561615735292435, "step": 569 }, { "epoch": 0.3941908713692946, "grad_norm": 3.782557487487793, "learning_rate": 1.9709543568464732e-05, "log_odds_chosen": 1.2247357368469238, "log_odds_ratio": -0.5550665259361267, "logits/chosen": -0.9844390153884888, "logits/rejected": -0.9861627221107483, "logps/chosen": -0.13897764682769775, "logps/rejected": -0.3307046592235565, "loss": 4.0865, "nll_loss": 0.9661211967468262, "rewards/accuracies": 0.625, "rewards/chosen": -0.01389776449650526, "rewards/margins": 0.019172698259353638, "rewards/rejected": -0.03307046368718147, "step": 570 }, { "epoch": 0.3948824343015214, "grad_norm": 4.290762901306152, "learning_rate": 1.9744121715076073e-05, "log_odds_chosen": 1.3584752082824707, "log_odds_ratio": -0.4768679738044739, "logits/chosen": -0.7764488458633423, "logits/rejected": -0.7982219457626343, "logps/chosen": -0.1681971698999405, "logps/rejected": -0.33647996187210083, "loss": 4.5738, "nll_loss": 1.0957714319229126, "rewards/accuracies": 0.875, "rewards/chosen": -0.01681971736252308, "rewards/margins": 0.016828283667564392, "rewards/rejected": -0.03364799916744232, "step": 571 }, { "epoch": 0.39557399723374825, "grad_norm": 4.797642230987549, "learning_rate": 1.9778699861687415e-05, "log_odds_chosen": 1.5044949054718018, "log_odds_ratio": -0.3889736235141754, "logits/chosen": -0.4714691638946533, "logits/rejected": -0.5175361633300781, "logps/chosen": -0.20814576745033264, "logps/rejected": -0.6175678968429565, "loss": 3.5804, "nll_loss": 0.8561970591545105, "rewards/accuracies": 0.75, "rewards/chosen": -0.020814577117562294, "rewards/margins": 0.04094220697879791, "rewards/rejected": -0.061756789684295654, "step": 572 }, { "epoch": 0.3962655601659751, "grad_norm": 6.371168613433838, "learning_rate": 1.9813278008298757e-05, "log_odds_chosen": 1.5652942657470703, "log_odds_ratio": -0.6328588724136353, "logits/chosen": -0.832341730594635, "logits/rejected": -0.7894372940063477, "logps/chosen": -0.07531416416168213, "logps/rejected": -0.16684159636497498, "loss": 3.9637, "nll_loss": 0.9276465177536011, "rewards/accuracies": 0.75, "rewards/chosen": -0.007531417068094015, "rewards/margins": 0.009152742102742195, "rewards/rejected": -0.016684159636497498, "step": 573 }, { "epoch": 0.3969571230982019, "grad_norm": 2.120250940322876, "learning_rate": 1.9847856154910098e-05, "log_odds_chosen": 3.8697755336761475, "log_odds_ratio": -0.16896286606788635, "logits/chosen": -1.1625442504882812, "logits/rejected": -1.2002800703048706, "logps/chosen": -0.06508542597293854, "logps/rejected": -0.5326133966445923, "loss": 2.7737, "nll_loss": 0.6765269637107849, "rewards/accuracies": 1.0, "rewards/chosen": -0.006508542690426111, "rewards/margins": 0.046752795577049255, "rewards/rejected": -0.05326133966445923, "step": 574 }, { "epoch": 0.39764868603042874, "grad_norm": 5.065700531005859, "learning_rate": 1.988243430152144e-05, "log_odds_chosen": 1.8119397163391113, "log_odds_ratio": -0.33278965950012207, "logits/chosen": -0.9758837223052979, "logits/rejected": -0.9816950559616089, "logps/chosen": -0.1355278044939041, "logps/rejected": -0.3939824402332306, "loss": 5.7527, "nll_loss": 1.404889464378357, "rewards/accuracies": 0.875, "rewards/chosen": -0.013552782125771046, "rewards/margins": 0.025845464318990707, "rewards/rejected": -0.03939824551343918, "step": 575 }, { "epoch": 0.3983402489626556, "grad_norm": 5.197612762451172, "learning_rate": 1.991701244813278e-05, "log_odds_chosen": 2.7448010444641113, "log_odds_ratio": -0.3634113669395447, "logits/chosen": -1.033627986907959, "logits/rejected": -1.049392580986023, "logps/chosen": -0.10442063957452774, "logps/rejected": -0.6225466728210449, "loss": 4.248, "nll_loss": 1.025671362876892, "rewards/accuracies": 0.875, "rewards/chosen": -0.010442064143717289, "rewards/margins": 0.05181260406970978, "rewards/rejected": -0.06225466728210449, "step": 576 }, { "epoch": 0.39903181189488246, "grad_norm": 4.7536115646362305, "learning_rate": 1.9951590594744123e-05, "log_odds_chosen": 2.725403070449829, "log_odds_ratio": -0.37399032711982727, "logits/chosen": -1.0591670274734497, "logits/rejected": -1.117004632949829, "logps/chosen": -0.07127417623996735, "logps/rejected": -0.3425430655479431, "loss": 3.4978, "nll_loss": 0.8370423316955566, "rewards/accuracies": 0.875, "rewards/chosen": -0.007127417717128992, "rewards/margins": 0.027126889675855637, "rewards/rejected": -0.03425430506467819, "step": 577 }, { "epoch": 0.3997233748271093, "grad_norm": 4.59316873550415, "learning_rate": 1.9986168741355464e-05, "log_odds_chosen": 1.6278140544891357, "log_odds_ratio": -0.3864939510822296, "logits/chosen": -1.1396081447601318, "logits/rejected": -1.1573009490966797, "logps/chosen": -0.12526187300682068, "logps/rejected": -0.4201142191886902, "loss": 4.8807, "nll_loss": 1.1815369129180908, "rewards/accuracies": 0.875, "rewards/chosen": -0.012526188045740128, "rewards/margins": 0.02948523312807083, "rewards/rejected": -0.04201142117381096, "step": 578 }, { "epoch": 0.4004149377593361, "grad_norm": 7.344080924987793, "learning_rate": 2.0020746887966806e-05, "log_odds_chosen": 3.3043441772460938, "log_odds_ratio": -0.28534916043281555, "logits/chosen": -0.6253362894058228, "logits/rejected": -0.6439019441604614, "logps/chosen": -0.10288303345441818, "logps/rejected": -0.6352183818817139, "loss": 4.3995, "nll_loss": 1.0713284015655518, "rewards/accuracies": 0.875, "rewards/chosen": -0.010288302786648273, "rewards/margins": 0.05323353409767151, "rewards/rejected": -0.0635218396782875, "step": 579 }, { "epoch": 0.40110650069156295, "grad_norm": 5.617142200469971, "learning_rate": 2.0055325034578147e-05, "log_odds_chosen": 1.7103029489517212, "log_odds_ratio": -0.7923794984817505, "logits/chosen": -0.8352532386779785, "logits/rejected": -0.8463157415390015, "logps/chosen": -0.16378286480903625, "logps/rejected": -0.38926514983177185, "loss": 4.3002, "nll_loss": 0.9958136677742004, "rewards/accuracies": 0.5, "rewards/chosen": -0.016378287225961685, "rewards/margins": 0.02254822477698326, "rewards/rejected": -0.038926515728235245, "step": 580 }, { "epoch": 0.4017980636237898, "grad_norm": 3.304642677307129, "learning_rate": 2.008990318118949e-05, "log_odds_chosen": 3.5990259647369385, "log_odds_ratio": -0.14686137437820435, "logits/chosen": -0.756050705909729, "logits/rejected": -0.8004224300384521, "logps/chosen": -0.06934195756912231, "logps/rejected": -0.5959687829017639, "loss": 3.3677, "nll_loss": 0.8272408843040466, "rewards/accuracies": 1.0, "rewards/chosen": -0.0069341957569122314, "rewards/margins": 0.0526626855134964, "rewards/rejected": -0.05959688127040863, "step": 581 }, { "epoch": 0.4024896265560166, "grad_norm": 4.192474842071533, "learning_rate": 2.012448132780083e-05, "log_odds_chosen": 1.6783723831176758, "log_odds_ratio": -0.3463253378868103, "logits/chosen": -0.640341579914093, "logits/rejected": -0.6563262939453125, "logps/chosen": -0.18390527367591858, "logps/rejected": -0.6369175314903259, "loss": 4.1321, "nll_loss": 0.9983953833580017, "rewards/accuracies": 0.875, "rewards/chosen": -0.018390528857707977, "rewards/margins": 0.045301225036382675, "rewards/rejected": -0.06369175761938095, "step": 582 }, { "epoch": 0.40318118948824344, "grad_norm": 3.8522253036499023, "learning_rate": 2.0159059474412172e-05, "log_odds_chosen": 0.8378918170928955, "log_odds_ratio": -0.5052666664123535, "logits/chosen": -0.7618661522865295, "logits/rejected": -0.7571154236793518, "logps/chosen": -0.1491318643093109, "logps/rejected": -0.29800164699554443, "loss": 4.1378, "nll_loss": 0.9839212894439697, "rewards/accuracies": 0.625, "rewards/chosen": -0.01491318829357624, "rewards/margins": 0.014886979945003986, "rewards/rejected": -0.029800167307257652, "step": 583 }, { "epoch": 0.40387275242047027, "grad_norm": 4.542193412780762, "learning_rate": 2.0193637621023514e-05, "log_odds_chosen": 2.0368504524230957, "log_odds_ratio": -0.3491406738758087, "logits/chosen": -0.9571303129196167, "logits/rejected": -1.0110360383987427, "logps/chosen": -0.1318981647491455, "logps/rejected": -0.48875343799591064, "loss": 4.4299, "nll_loss": 1.0725702047348022, "rewards/accuracies": 0.75, "rewards/chosen": -0.01318981684744358, "rewards/margins": 0.03568553179502487, "rewards/rejected": -0.048875343054533005, "step": 584 }, { "epoch": 0.4045643153526971, "grad_norm": 4.351795673370361, "learning_rate": 2.0228215767634855e-05, "log_odds_chosen": 1.6439603567123413, "log_odds_ratio": -0.4035801887512207, "logits/chosen": -0.8380446434020996, "logits/rejected": -0.9095944762229919, "logps/chosen": -0.1933341771364212, "logps/rejected": -0.36412349343299866, "loss": 4.176, "nll_loss": 1.003645896911621, "rewards/accuracies": 0.875, "rewards/chosen": -0.01933341845870018, "rewards/margins": 0.017078930512070656, "rewards/rejected": -0.036412350833415985, "step": 585 }, { "epoch": 0.40525587828492393, "grad_norm": 6.966506004333496, "learning_rate": 2.0262793914246197e-05, "log_odds_chosen": 4.041241645812988, "log_odds_ratio": -0.7675859928131104, "logits/chosen": -0.9329093098640442, "logits/rejected": -0.9795863628387451, "logps/chosen": -0.1338355988264084, "logps/rejected": -0.6674110889434814, "loss": 4.2325, "nll_loss": 0.9813593626022339, "rewards/accuracies": 0.75, "rewards/chosen": -0.013383558951318264, "rewards/margins": 0.053357549011707306, "rewards/rejected": -0.06674110889434814, "step": 586 }, { "epoch": 0.40594744121715076, "grad_norm": 4.690940856933594, "learning_rate": 2.029737206085754e-05, "log_odds_chosen": 0.8209674954414368, "log_odds_ratio": -0.5149362683296204, "logits/chosen": -1.1141588687896729, "logits/rejected": -1.1204617023468018, "logps/chosen": -0.2986541986465454, "logps/rejected": -0.5440924167633057, "loss": 5.1435, "nll_loss": 1.2343727350234985, "rewards/accuracies": 0.625, "rewards/chosen": -0.02986541949212551, "rewards/margins": 0.024543821811676025, "rewards/rejected": -0.054409243166446686, "step": 587 }, { "epoch": 0.4066390041493776, "grad_norm": 4.465209007263184, "learning_rate": 2.033195020746888e-05, "log_odds_chosen": 2.6052603721618652, "log_odds_ratio": -0.5808659791946411, "logits/chosen": -0.8501052856445312, "logits/rejected": -0.8835092782974243, "logps/chosen": -0.2466723620891571, "logps/rejected": -0.7465604543685913, "loss": 4.3399, "nll_loss": 1.026882290840149, "rewards/accuracies": 0.75, "rewards/chosen": -0.02466723695397377, "rewards/margins": 0.04998881369829178, "rewards/rejected": -0.07465604692697525, "step": 588 }, { "epoch": 0.4073305670816044, "grad_norm": 4.006109714508057, "learning_rate": 2.036652835408022e-05, "log_odds_chosen": 1.892028570175171, "log_odds_ratio": -0.562629759311676, "logits/chosen": -0.8217758536338806, "logits/rejected": -0.8682578802108765, "logps/chosen": -0.20783497393131256, "logps/rejected": -0.3692111372947693, "loss": 3.471, "nll_loss": 0.8114974498748779, "rewards/accuracies": 0.75, "rewards/chosen": -0.020783497020602226, "rewards/margins": 0.016137616708874702, "rewards/rejected": -0.03692111372947693, "step": 589 }, { "epoch": 0.40802213001383125, "grad_norm": 3.260128974914551, "learning_rate": 2.0401106500691563e-05, "log_odds_chosen": 3.6313135623931885, "log_odds_ratio": -0.28211459517478943, "logits/chosen": -0.6204289793968201, "logits/rejected": -0.6435818672180176, "logps/chosen": -0.13274721801280975, "logps/rejected": -0.507023811340332, "loss": 4.3703, "nll_loss": 1.0643757581710815, "rewards/accuracies": 0.875, "rewards/chosen": -0.0132747208699584, "rewards/margins": 0.037427663803100586, "rewards/rejected": -0.05070238560438156, "step": 590 }, { "epoch": 0.4087136929460581, "grad_norm": 3.4640462398529053, "learning_rate": 2.0435684647302905e-05, "log_odds_chosen": 2.581511974334717, "log_odds_ratio": -0.2007879614830017, "logits/chosen": -1.0353610515594482, "logits/rejected": -0.992326021194458, "logps/chosen": -0.08307419717311859, "logps/rejected": -0.6447066068649292, "loss": 3.3652, "nll_loss": 0.8212136030197144, "rewards/accuracies": 0.875, "rewards/chosen": -0.008307419717311859, "rewards/margins": 0.0561632364988327, "rewards/rejected": -0.06447066366672516, "step": 591 }, { "epoch": 0.4094052558782849, "grad_norm": 3.2936601638793945, "learning_rate": 2.0470262793914246e-05, "log_odds_chosen": 5.063530921936035, "log_odds_ratio": -0.11747785657644272, "logits/chosen": -0.9911887645721436, "logits/rejected": -0.9868468046188354, "logps/chosen": -0.03351970762014389, "logps/rejected": -0.7749127149581909, "loss": 4.5265, "nll_loss": 1.1198837757110596, "rewards/accuracies": 0.875, "rewards/chosen": -0.003351970575749874, "rewards/margins": 0.0741392970085144, "rewards/rejected": -0.07749126851558685, "step": 592 }, { "epoch": 0.41009681881051174, "grad_norm": 4.080872058868408, "learning_rate": 2.0504840940525588e-05, "log_odds_chosen": 1.4647037982940674, "log_odds_ratio": -0.47586244344711304, "logits/chosen": -0.7562670707702637, "logits/rejected": -0.7988969087600708, "logps/chosen": -0.07946252077817917, "logps/rejected": -0.2722882926464081, "loss": 3.7475, "nll_loss": 0.8892887234687805, "rewards/accuracies": 0.75, "rewards/chosen": -0.007946252822875977, "rewards/margins": 0.01928257942199707, "rewards/rejected": -0.027228832244873047, "step": 593 }, { "epoch": 0.4107883817427386, "grad_norm": 2.9525630474090576, "learning_rate": 2.053941908713693e-05, "log_odds_chosen": 3.360999584197998, "log_odds_ratio": -0.30566781759262085, "logits/chosen": -0.8931617140769958, "logits/rejected": -0.946820855140686, "logps/chosen": -0.07567352056503296, "logps/rejected": -0.4830871820449829, "loss": 3.7883, "nll_loss": 0.9165017604827881, "rewards/accuracies": 0.875, "rewards/chosen": -0.007567352615296841, "rewards/margins": 0.040741365402936935, "rewards/rejected": -0.04830871894955635, "step": 594 }, { "epoch": 0.4114799446749654, "grad_norm": 3.956282377243042, "learning_rate": 2.057399723374827e-05, "log_odds_chosen": 1.7785038948059082, "log_odds_ratio": -0.4785193204879761, "logits/chosen": -1.0089384317398071, "logits/rejected": -0.9966273307800293, "logps/chosen": -0.1856726109981537, "logps/rejected": -0.36326441168785095, "loss": 4.9856, "nll_loss": 1.1985530853271484, "rewards/accuracies": 0.75, "rewards/chosen": -0.018567262217402458, "rewards/margins": 0.017759177833795547, "rewards/rejected": -0.036326438188552856, "step": 595 }, { "epoch": 0.41217150760719223, "grad_norm": 2.674511194229126, "learning_rate": 2.0608575380359612e-05, "log_odds_chosen": 3.8712658882141113, "log_odds_ratio": -0.21342608332633972, "logits/chosen": -0.9700406789779663, "logits/rejected": -0.952599287033081, "logps/chosen": -0.08244549483060837, "logps/rejected": -0.7506368160247803, "loss": 3.3317, "nll_loss": 0.811578094959259, "rewards/accuracies": 0.875, "rewards/chosen": -0.008244549855589867, "rewards/margins": 0.06681913137435913, "rewards/rejected": -0.07506367564201355, "step": 596 }, { "epoch": 0.41286307053941906, "grad_norm": 3.9791877269744873, "learning_rate": 2.0643153526970954e-05, "log_odds_chosen": 3.953857421875, "log_odds_ratio": -0.22624865174293518, "logits/chosen": -1.003037452697754, "logits/rejected": -1.02415931224823, "logps/chosen": -0.0745580866932869, "logps/rejected": -0.60592120885849, "loss": 4.6389, "nll_loss": 1.137102484703064, "rewards/accuracies": 0.875, "rewards/chosen": -0.007455809041857719, "rewards/margins": 0.05313631147146225, "rewards/rejected": -0.06059212237596512, "step": 597 }, { "epoch": 0.4135546334716459, "grad_norm": 4.131697177886963, "learning_rate": 2.0677731673582296e-05, "log_odds_chosen": 1.4341011047363281, "log_odds_ratio": -0.36636972427368164, "logits/chosen": -0.6704150438308716, "logits/rejected": -0.6725776195526123, "logps/chosen": -0.15876276791095734, "logps/rejected": -0.4130246639251709, "loss": 4.1232, "nll_loss": 0.9941583871841431, "rewards/accuracies": 0.875, "rewards/chosen": -0.015876278281211853, "rewards/margins": 0.025426192209124565, "rewards/rejected": -0.04130247235298157, "step": 598 }, { "epoch": 0.4142461964038728, "grad_norm": 3.8321592807769775, "learning_rate": 2.071230982019364e-05, "log_odds_chosen": 2.999331474304199, "log_odds_ratio": -0.2566238045692444, "logits/chosen": -0.7986801862716675, "logits/rejected": -0.8267126679420471, "logps/chosen": -0.15586483478546143, "logps/rejected": -0.5835660696029663, "loss": 3.7385, "nll_loss": 0.9089583158493042, "rewards/accuracies": 0.875, "rewards/chosen": -0.015586483292281628, "rewards/margins": 0.04277012497186661, "rewards/rejected": -0.05835660919547081, "step": 599 }, { "epoch": 0.4149377593360996, "grad_norm": 9.953721046447754, "learning_rate": 2.0746887966804982e-05, "log_odds_chosen": 2.450714588165283, "log_odds_ratio": -0.8266931176185608, "logits/chosen": -1.1641135215759277, "logits/rejected": -1.2072113752365112, "logps/chosen": -0.1697208136320114, "logps/rejected": -0.699099600315094, "loss": 4.7114, "nll_loss": 1.095177412033081, "rewards/accuracies": 0.75, "rewards/chosen": -0.01697208173573017, "rewards/margins": 0.05293788015842438, "rewards/rejected": -0.0699099600315094, "step": 600 }, { "epoch": 0.41562932226832644, "grad_norm": 4.421914100646973, "learning_rate": 2.0781466113416324e-05, "log_odds_chosen": 1.5719330310821533, "log_odds_ratio": -0.40029793977737427, "logits/chosen": -0.844862699508667, "logits/rejected": -0.8587499856948853, "logps/chosen": -0.20900601148605347, "logps/rejected": -0.47964876890182495, "loss": 4.1449, "nll_loss": 0.9961846470832825, "rewards/accuracies": 0.625, "rewards/chosen": -0.020900603383779526, "rewards/margins": 0.02706427499651909, "rewards/rejected": -0.047964878380298615, "step": 601 }, { "epoch": 0.41632088520055327, "grad_norm": 7.170130252838135, "learning_rate": 2.0816044260027665e-05, "log_odds_chosen": 2.6125829219818115, "log_odds_ratio": -0.6497005224227905, "logits/chosen": -0.9419523477554321, "logits/rejected": -0.9898269176483154, "logps/chosen": -0.13422423601150513, "logps/rejected": -0.6038023829460144, "loss": 3.9398, "nll_loss": 0.9199838638305664, "rewards/accuracies": 0.75, "rewards/chosen": -0.013422423973679543, "rewards/margins": 0.04695781320333481, "rewards/rejected": -0.0603802390396595, "step": 602 }, { "epoch": 0.4170124481327801, "grad_norm": 7.772970199584961, "learning_rate": 2.0850622406639007e-05, "log_odds_chosen": 3.680398464202881, "log_odds_ratio": -0.5544787049293518, "logits/chosen": -0.5327585339546204, "logits/rejected": -0.5497394800186157, "logps/chosen": -0.07278777658939362, "logps/rejected": -0.6879750490188599, "loss": 4.1216, "nll_loss": 0.9749466180801392, "rewards/accuracies": 0.875, "rewards/chosen": -0.007278777658939362, "rewards/margins": 0.06151873245835304, "rewards/rejected": -0.0687975138425827, "step": 603 }, { "epoch": 0.41770401106500693, "grad_norm": 4.757108688354492, "learning_rate": 2.088520055325035e-05, "log_odds_chosen": 2.0446386337280273, "log_odds_ratio": -0.5148335695266724, "logits/chosen": -0.6910528540611267, "logits/rejected": -0.6742294430732727, "logps/chosen": -0.14390459656715393, "logps/rejected": -0.4189976751804352, "loss": 5.4156, "nll_loss": 1.3024276494979858, "rewards/accuracies": 0.75, "rewards/chosen": -0.014390461146831512, "rewards/margins": 0.027509307488799095, "rewards/rejected": -0.04189977049827576, "step": 604 }, { "epoch": 0.41839557399723376, "grad_norm": 8.23976993560791, "learning_rate": 2.091977869986169e-05, "log_odds_chosen": 1.552129864692688, "log_odds_ratio": -0.6863776445388794, "logits/chosen": -0.557546854019165, "logits/rejected": -0.6108566522598267, "logps/chosen": -0.21034224331378937, "logps/rejected": -0.514655590057373, "loss": 6.1186, "nll_loss": 1.4610066413879395, "rewards/accuracies": 0.75, "rewards/chosen": -0.021034223958849907, "rewards/margins": 0.030431339517235756, "rewards/rejected": -0.05146556347608566, "step": 605 }, { "epoch": 0.4190871369294606, "grad_norm": 3.723231077194214, "learning_rate": 2.095435684647303e-05, "log_odds_chosen": 1.9156570434570312, "log_odds_ratio": -0.49034303426742554, "logits/chosen": -1.0356037616729736, "logits/rejected": -1.0161478519439697, "logps/chosen": -0.10302520543336868, "logps/rejected": -0.24236340820789337, "loss": 4.8403, "nll_loss": 1.1610405445098877, "rewards/accuracies": 0.625, "rewards/chosen": -0.010302520357072353, "rewards/margins": 0.013933821581304073, "rewards/rejected": -0.024236343801021576, "step": 606 }, { "epoch": 0.4197786998616874, "grad_norm": 7.568348407745361, "learning_rate": 2.0988934993084373e-05, "log_odds_chosen": 2.4463589191436768, "log_odds_ratio": -0.4542555809020996, "logits/chosen": -0.4883726239204407, "logits/rejected": -0.5257099270820618, "logps/chosen": -0.10223409533500671, "logps/rejected": -0.5903890132904053, "loss": 4.2713, "nll_loss": 1.0224037170410156, "rewards/accuracies": 0.625, "rewards/chosen": -0.010223409160971642, "rewards/margins": 0.04881549999117851, "rewards/rejected": -0.059038907289505005, "step": 607 }, { "epoch": 0.42047026279391425, "grad_norm": 7.222330570220947, "learning_rate": 2.1023513139695715e-05, "log_odds_chosen": 1.1048989295959473, "log_odds_ratio": -0.7384801506996155, "logits/chosen": -0.7793760299682617, "logits/rejected": -0.7979192733764648, "logps/chosen": -0.16994166374206543, "logps/rejected": -0.4164144992828369, "loss": 5.7197, "nll_loss": 1.3560773134231567, "rewards/accuracies": 0.75, "rewards/chosen": -0.016994165256619453, "rewards/margins": 0.024647288024425507, "rewards/rejected": -0.04164145141839981, "step": 608 }, { "epoch": 0.4211618257261411, "grad_norm": 5.54396915435791, "learning_rate": 2.1058091286307056e-05, "log_odds_chosen": 2.350543975830078, "log_odds_ratio": -0.32651910185813904, "logits/chosen": -0.6226840615272522, "logits/rejected": -0.6696463227272034, "logps/chosen": -0.14187708497047424, "logps/rejected": -0.6169252395629883, "loss": 5.5046, "nll_loss": 1.343508005142212, "rewards/accuracies": 0.625, "rewards/chosen": -0.01418770756572485, "rewards/margins": 0.047504812479019165, "rewards/rejected": -0.06169252097606659, "step": 609 }, { "epoch": 0.4218533886583679, "grad_norm": 7.533912181854248, "learning_rate": 2.1092669432918398e-05, "log_odds_chosen": 3.0908732414245605, "log_odds_ratio": -0.41519248485565186, "logits/chosen": -0.8241167068481445, "logits/rejected": -0.9262205958366394, "logps/chosen": -0.10551971942186356, "logps/rejected": -0.5151045322418213, "loss": 3.5644, "nll_loss": 0.8495787382125854, "rewards/accuracies": 0.875, "rewards/chosen": -0.010551970452070236, "rewards/margins": 0.04095848649740219, "rewards/rejected": -0.05151045694947243, "step": 610 }, { "epoch": 0.42254495159059474, "grad_norm": 3.861281633377075, "learning_rate": 2.112724757952974e-05, "log_odds_chosen": 2.33772349357605, "log_odds_ratio": -0.4349439740180969, "logits/chosen": -0.7974127531051636, "logits/rejected": -0.836585283279419, "logps/chosen": -0.11797378957271576, "logps/rejected": -0.3172440528869629, "loss": 3.3181, "nll_loss": 0.7860289812088013, "rewards/accuracies": 0.625, "rewards/chosen": -0.011797377839684486, "rewards/margins": 0.019927026703953743, "rewards/rejected": -0.03172440454363823, "step": 611 }, { "epoch": 0.42323651452282157, "grad_norm": 5.57545280456543, "learning_rate": 2.116182572614108e-05, "log_odds_chosen": 1.7101471424102783, "log_odds_ratio": -0.4371544122695923, "logits/chosen": -0.9524636268615723, "logits/rejected": -0.9920735359191895, "logps/chosen": -0.15005937218666077, "logps/rejected": -0.39813879132270813, "loss": 5.6735, "nll_loss": 1.374664306640625, "rewards/accuracies": 0.625, "rewards/chosen": -0.015005936846137047, "rewards/margins": 0.024807943031191826, "rewards/rejected": -0.03981388360261917, "step": 612 }, { "epoch": 0.4239280774550484, "grad_norm": 5.678771495819092, "learning_rate": 2.1196403872752422e-05, "log_odds_chosen": 0.9636862277984619, "log_odds_ratio": -0.9654866456985474, "logits/chosen": -0.7794173955917358, "logits/rejected": -0.7428369522094727, "logps/chosen": -0.10875187069177628, "logps/rejected": -0.3289680778980255, "loss": 5.6094, "nll_loss": 1.3057971000671387, "rewards/accuracies": 0.625, "rewards/chosen": -0.010875186882913113, "rewards/margins": 0.022021621465682983, "rewards/rejected": -0.03289680555462837, "step": 613 }, { "epoch": 0.42461964038727523, "grad_norm": 4.860713958740234, "learning_rate": 2.1230982019363764e-05, "log_odds_chosen": 1.2260526418685913, "log_odds_ratio": -0.5476371645927429, "logits/chosen": -0.8112804889678955, "logits/rejected": -0.8208972215652466, "logps/chosen": -0.12507055699825287, "logps/rejected": -0.20496192574501038, "loss": 3.966, "nll_loss": 0.9367334246635437, "rewards/accuracies": 0.625, "rewards/chosen": -0.012507054954767227, "rewards/margins": 0.007989136502146721, "rewards/rejected": -0.020496191456913948, "step": 614 }, { "epoch": 0.42531120331950206, "grad_norm": 4.082617282867432, "learning_rate": 2.1265560165975106e-05, "log_odds_chosen": 1.8591127395629883, "log_odds_ratio": -0.3332652747631073, "logits/chosen": -0.9899469614028931, "logits/rejected": -0.9953739047050476, "logps/chosen": -0.13035787642002106, "logps/rejected": -0.4335346519947052, "loss": 4.61, "nll_loss": 1.1191822290420532, "rewards/accuracies": 0.875, "rewards/chosen": -0.013035789132118225, "rewards/margins": 0.030317679047584534, "rewards/rejected": -0.04335346817970276, "step": 615 }, { "epoch": 0.4260027662517289, "grad_norm": 3.619682550430298, "learning_rate": 2.1300138312586447e-05, "log_odds_chosen": 2.190673828125, "log_odds_ratio": -0.4910670518875122, "logits/chosen": -0.5571558475494385, "logits/rejected": -0.5376981496810913, "logps/chosen": -0.11940689384937286, "logps/rejected": -0.520751953125, "loss": 3.6402, "nll_loss": 0.8609509468078613, "rewards/accuracies": 0.75, "rewards/chosen": -0.011940689757466316, "rewards/margins": 0.04013450816273689, "rewards/rejected": -0.05207519978284836, "step": 616 }, { "epoch": 0.4266943291839557, "grad_norm": 2.6865475177764893, "learning_rate": 2.133471645919779e-05, "log_odds_chosen": 3.608241319656372, "log_odds_ratio": -0.2808303236961365, "logits/chosen": -0.7004544734954834, "logits/rejected": -0.7063158750534058, "logps/chosen": -0.10437381267547607, "logps/rejected": -0.4118492007255554, "loss": 2.8528, "nll_loss": 0.6851093173027039, "rewards/accuracies": 0.875, "rewards/chosen": -0.010437380522489548, "rewards/margins": 0.030747540295124054, "rewards/rejected": -0.0411849245429039, "step": 617 }, { "epoch": 0.42738589211618255, "grad_norm": 2.807631731033325, "learning_rate": 2.136929460580913e-05, "log_odds_chosen": 3.1713521480560303, "log_odds_ratio": -0.2818281054496765, "logits/chosen": -0.19942578673362732, "logits/rejected": -0.23111185431480408, "logps/chosen": -0.1086578443646431, "logps/rejected": -0.46914684772491455, "loss": 3.9685, "nll_loss": 0.9639319777488708, "rewards/accuracies": 1.0, "rewards/chosen": -0.01086578518152237, "rewards/margins": 0.036048900336027145, "rewards/rejected": -0.046914685517549515, "step": 618 }, { "epoch": 0.4280774550484094, "grad_norm": 3.6067707538604736, "learning_rate": 2.1403872752420472e-05, "log_odds_chosen": 2.436103582382202, "log_odds_ratio": -0.1551538109779358, "logits/chosen": -0.8461130261421204, "logits/rejected": -0.9088087677955627, "logps/chosen": -0.1261477768421173, "logps/rejected": -0.8308358192443848, "loss": 3.525, "nll_loss": 0.865744948387146, "rewards/accuracies": 1.0, "rewards/chosen": -0.012614777311682701, "rewards/margins": 0.07046880573034286, "rewards/rejected": -0.08308358490467072, "step": 619 }, { "epoch": 0.4287690179806362, "grad_norm": 3.759319305419922, "learning_rate": 2.1438450899031813e-05, "log_odds_chosen": 3.011648178100586, "log_odds_ratio": -0.40124091506004333, "logits/chosen": -0.7172807455062866, "logits/rejected": -0.7169740796089172, "logps/chosen": -0.17018908262252808, "logps/rejected": -0.4564322233200073, "loss": 4.2084, "nll_loss": 1.0119715929031372, "rewards/accuracies": 0.75, "rewards/chosen": -0.017018906772136688, "rewards/margins": 0.028624314814805984, "rewards/rejected": -0.04564322531223297, "step": 620 }, { "epoch": 0.42946058091286304, "grad_norm": 3.819620370864868, "learning_rate": 2.147302904564315e-05, "log_odds_chosen": 2.1871187686920166, "log_odds_ratio": -0.30014118552207947, "logits/chosen": -0.5669596791267395, "logits/rejected": -0.5761108994483948, "logps/chosen": -0.14405927062034607, "logps/rejected": -0.31124773621559143, "loss": 4.5619, "nll_loss": 1.1104607582092285, "rewards/accuracies": 0.75, "rewards/chosen": -0.014405926689505577, "rewards/margins": 0.016718847677111626, "rewards/rejected": -0.031124770641326904, "step": 621 }, { "epoch": 0.43015214384508993, "grad_norm": 3.3658599853515625, "learning_rate": 2.1507607192254497e-05, "log_odds_chosen": 1.3140965700149536, "log_odds_ratio": -0.3687862455844879, "logits/chosen": -0.48315146565437317, "logits/rejected": -0.4790656566619873, "logps/chosen": -0.14348377287387848, "logps/rejected": -0.553962767124176, "loss": 3.5421, "nll_loss": 0.8486409187316895, "rewards/accuracies": 0.75, "rewards/chosen": -0.014348377473652363, "rewards/margins": 0.041047900915145874, "rewards/rejected": -0.055396273732185364, "step": 622 }, { "epoch": 0.43084370677731676, "grad_norm": 4.934183120727539, "learning_rate": 2.1542185338865838e-05, "log_odds_chosen": 0.5347107648849487, "log_odds_ratio": -0.8448858857154846, "logits/chosen": -0.739535927772522, "logits/rejected": -0.7089710235595703, "logps/chosen": -0.21264883875846863, "logps/rejected": -0.19111944735050201, "loss": 4.5833, "nll_loss": 1.0613362789154053, "rewards/accuracies": 0.375, "rewards/chosen": -0.021264884620904922, "rewards/margins": -0.0021529390942305326, "rewards/rejected": -0.01911194622516632, "step": 623 }, { "epoch": 0.4315352697095436, "grad_norm": 3.4687881469726562, "learning_rate": 2.157676348547718e-05, "log_odds_chosen": 2.592643976211548, "log_odds_ratio": -0.42990919947624207, "logits/chosen": -0.5274770259857178, "logits/rejected": -0.564875066280365, "logps/chosen": -0.14812719821929932, "logps/rejected": -0.48280104994773865, "loss": 4.1303, "nll_loss": 0.9895771741867065, "rewards/accuracies": 0.625, "rewards/chosen": -0.014812720008194447, "rewards/margins": 0.033467382192611694, "rewards/rejected": -0.048280104994773865, "step": 624 }, { "epoch": 0.4322268326417704, "grad_norm": 6.197664260864258, "learning_rate": 2.161134163208852e-05, "log_odds_chosen": 1.2445203065872192, "log_odds_ratio": -0.5221874713897705, "logits/chosen": -0.16059088706970215, "logits/rejected": -0.23864303529262543, "logps/chosen": -0.16100680828094482, "logps/rejected": -0.32815873622894287, "loss": 4.8393, "nll_loss": 1.1576130390167236, "rewards/accuracies": 0.5, "rewards/chosen": -0.016100682318210602, "rewards/margins": 0.016715193167328835, "rewards/rejected": -0.03281587362289429, "step": 625 }, { "epoch": 0.43291839557399725, "grad_norm": 5.179163455963135, "learning_rate": 2.1645919778699863e-05, "log_odds_chosen": 1.6051888465881348, "log_odds_ratio": -0.40840965509414673, "logits/chosen": -0.9315775036811829, "logits/rejected": -0.9024972915649414, "logps/chosen": -0.17119812965393066, "logps/rejected": -0.3284410238265991, "loss": 4.5274, "nll_loss": 1.0910115242004395, "rewards/accuracies": 0.875, "rewards/chosen": -0.017119813710451126, "rewards/margins": 0.015724290162324905, "rewards/rejected": -0.03284410387277603, "step": 626 }, { "epoch": 0.4336099585062241, "grad_norm": 6.263176918029785, "learning_rate": 2.1680497925311204e-05, "log_odds_chosen": 2.8963382244110107, "log_odds_ratio": -0.7403358817100525, "logits/chosen": -0.8731557130813599, "logits/rejected": -0.8686691522598267, "logps/chosen": -0.11738395690917969, "logps/rejected": -0.5990802049636841, "loss": 3.758, "nll_loss": 0.8654546141624451, "rewards/accuracies": 0.75, "rewards/chosen": -0.011738396249711514, "rewards/margins": 0.04816962778568268, "rewards/rejected": -0.05990801751613617, "step": 627 }, { "epoch": 0.4343015214384509, "grad_norm": 4.25787878036499, "learning_rate": 2.1715076071922546e-05, "log_odds_chosen": 3.804713487625122, "log_odds_ratio": -0.11228744685649872, "logits/chosen": -0.5167519450187683, "logits/rejected": -0.5582841634750366, "logps/chosen": -0.05870117247104645, "logps/rejected": -0.5987078547477722, "loss": 4.6895, "nll_loss": 1.1611560583114624, "rewards/accuracies": 1.0, "rewards/chosen": -0.005870117340236902, "rewards/margins": 0.05400066822767258, "rewards/rejected": -0.05987078696489334, "step": 628 }, { "epoch": 0.43499308437067774, "grad_norm": 5.743461608886719, "learning_rate": 2.1749654218533887e-05, "log_odds_chosen": 0.03474217653274536, "log_odds_ratio": -0.8090510964393616, "logits/chosen": -0.890548586845398, "logits/rejected": -0.907269299030304, "logps/chosen": -0.3029828667640686, "logps/rejected": -0.2693331241607666, "loss": 4.6926, "nll_loss": 1.0922552347183228, "rewards/accuracies": 0.625, "rewards/chosen": -0.03029828704893589, "rewards/margins": -0.003364972770214081, "rewards/rejected": -0.02693331427872181, "step": 629 }, { "epoch": 0.43568464730290457, "grad_norm": 4.5716352462768555, "learning_rate": 2.178423236514523e-05, "log_odds_chosen": 0.03434586524963379, "log_odds_ratio": -0.7138271331787109, "logits/chosen": -0.7514895796775818, "logits/rejected": -0.7773147821426392, "logps/chosen": -0.21131566166877747, "logps/rejected": -0.22896406054496765, "loss": 5.0569, "nll_loss": 1.192832589149475, "rewards/accuracies": 0.5, "rewards/chosen": -0.021131567656993866, "rewards/margins": 0.0017648395150899887, "rewards/rejected": -0.022896405309438705, "step": 630 }, { "epoch": 0.4363762102351314, "grad_norm": 4.4561686515808105, "learning_rate": 2.181881051175657e-05, "log_odds_chosen": 1.4079740047454834, "log_odds_ratio": -0.5032125115394592, "logits/chosen": -0.927483320236206, "logits/rejected": -0.8899856805801392, "logps/chosen": -0.1549498587846756, "logps/rejected": -0.4571008086204529, "loss": 4.6199, "nll_loss": 1.1046602725982666, "rewards/accuracies": 0.75, "rewards/chosen": -0.01549498364329338, "rewards/margins": 0.030215097591280937, "rewards/rejected": -0.04571007937192917, "step": 631 }, { "epoch": 0.43706777316735823, "grad_norm": 4.596878528594971, "learning_rate": 2.1853388658367912e-05, "log_odds_chosen": -0.02922854572534561, "log_odds_ratio": -0.8003402352333069, "logits/chosen": -0.7364927530288696, "logits/rejected": -0.7289812564849854, "logps/chosen": -0.17805808782577515, "logps/rejected": -0.1403406709432602, "loss": 3.4764, "nll_loss": 0.7890704870223999, "rewards/accuracies": 0.375, "rewards/chosen": -0.017805809155106544, "rewards/margins": -0.0037717418745160103, "rewards/rejected": -0.014034068211913109, "step": 632 }, { "epoch": 0.43775933609958506, "grad_norm": 3.0470612049102783, "learning_rate": 2.1887966804979254e-05, "log_odds_chosen": 1.3135905265808105, "log_odds_ratio": -0.3760731816291809, "logits/chosen": -0.9930360317230225, "logits/rejected": -0.990397572517395, "logps/chosen": -0.16257613897323608, "logps/rejected": -0.36039984226226807, "loss": 4.3786, "nll_loss": 1.0570416450500488, "rewards/accuracies": 0.875, "rewards/chosen": -0.01625761389732361, "rewards/margins": 0.019782373681664467, "rewards/rejected": -0.036039985716342926, "step": 633 }, { "epoch": 0.4384508990318119, "grad_norm": 4.420305252075195, "learning_rate": 2.1922544951590595e-05, "log_odds_chosen": 1.2010859251022339, "log_odds_ratio": -0.34093958139419556, "logits/chosen": -0.5854348540306091, "logits/rejected": -0.6194449663162231, "logps/chosen": -0.1107054203748703, "logps/rejected": -0.3933059573173523, "loss": 4.8822, "nll_loss": 1.1864490509033203, "rewards/accuracies": 0.875, "rewards/chosen": -0.01107054203748703, "rewards/margins": 0.028260057792067528, "rewards/rejected": -0.03933060169219971, "step": 634 }, { "epoch": 0.4391424619640387, "grad_norm": 4.327555179595947, "learning_rate": 2.1957123098201937e-05, "log_odds_chosen": 1.896942377090454, "log_odds_ratio": -0.4892352819442749, "logits/chosen": -0.8643075227737427, "logits/rejected": -0.8878147006034851, "logps/chosen": -0.08768102526664734, "logps/rejected": -0.3476106524467468, "loss": 4.4977, "nll_loss": 1.0755122900009155, "rewards/accuracies": 0.625, "rewards/chosen": -0.008768102154135704, "rewards/margins": 0.02599296346306801, "rewards/rejected": -0.03476106375455856, "step": 635 }, { "epoch": 0.43983402489626555, "grad_norm": 4.274322986602783, "learning_rate": 2.199170124481328e-05, "log_odds_chosen": 0.9763669371604919, "log_odds_ratio": -0.4171779751777649, "logits/chosen": -0.8166045546531677, "logits/rejected": -0.8884382843971252, "logps/chosen": -0.128249853849411, "logps/rejected": -0.3486913740634918, "loss": 5.1655, "nll_loss": 1.2496671676635742, "rewards/accuracies": 0.75, "rewards/chosen": -0.012824985198676586, "rewards/margins": 0.02204415202140808, "rewards/rejected": -0.03486913815140724, "step": 636 }, { "epoch": 0.4405255878284924, "grad_norm": 7.383416652679443, "learning_rate": 2.202627939142462e-05, "log_odds_chosen": 0.6402369737625122, "log_odds_ratio": -0.9285612106323242, "logits/chosen": -0.7861868143081665, "logits/rejected": -0.7962783575057983, "logps/chosen": -0.29655033349990845, "logps/rejected": -0.42859429121017456, "loss": 5.2194, "nll_loss": 1.2119812965393066, "rewards/accuracies": 0.625, "rewards/chosen": -0.029655033722519875, "rewards/margins": 0.013204396702349186, "rewards/rejected": -0.04285942763090134, "step": 637 }, { "epoch": 0.4412171507607192, "grad_norm": 3.3463234901428223, "learning_rate": 2.206085753803596e-05, "log_odds_chosen": 1.1340889930725098, "log_odds_ratio": -0.4663980007171631, "logits/chosen": -1.0553393363952637, "logits/rejected": -1.0700688362121582, "logps/chosen": -0.16461165249347687, "logps/rejected": -0.3293513357639313, "loss": 4.5737, "nll_loss": 1.0967905521392822, "rewards/accuracies": 0.75, "rewards/chosen": -0.016461165621876717, "rewards/margins": 0.01647396758198738, "rewards/rejected": -0.03293513134121895, "step": 638 }, { "epoch": 0.44190871369294604, "grad_norm": 3.887077808380127, "learning_rate": 2.2095435684647303e-05, "log_odds_chosen": 1.06056809425354, "log_odds_ratio": -0.3603925406932831, "logits/chosen": -0.6518492102622986, "logits/rejected": -0.6650881171226501, "logps/chosen": -0.20834115147590637, "logps/rejected": -0.42792510986328125, "loss": 5.1296, "nll_loss": 1.2463585138320923, "rewards/accuracies": 0.875, "rewards/chosen": -0.020834118127822876, "rewards/margins": 0.021958395838737488, "rewards/rejected": -0.042792513966560364, "step": 639 }, { "epoch": 0.4426002766251729, "grad_norm": 3.2058866024017334, "learning_rate": 2.2130013831258645e-05, "log_odds_chosen": 1.502551794052124, "log_odds_ratio": -0.31615087389945984, "logits/chosen": -0.5435585975646973, "logits/rejected": -0.5261815786361694, "logps/chosen": -0.0911509096622467, "logps/rejected": -0.2400660216808319, "loss": 3.3395, "nll_loss": 0.8032507300376892, "rewards/accuracies": 1.0, "rewards/chosen": -0.009115091525018215, "rewards/margins": 0.014891511760652065, "rewards/rejected": -0.02400660328567028, "step": 640 }, { "epoch": 0.4432918395573997, "grad_norm": 3.7415406703948975, "learning_rate": 2.2164591977869986e-05, "log_odds_chosen": 2.1081349849700928, "log_odds_ratio": -0.25261473655700684, "logits/chosen": -0.7255756855010986, "logits/rejected": -0.7845942974090576, "logps/chosen": -0.12538139522075653, "logps/rejected": -0.41523393988609314, "loss": 4.2104, "nll_loss": 1.0273290872573853, "rewards/accuracies": 0.875, "rewards/chosen": -0.012538139708340168, "rewards/margins": 0.02898525446653366, "rewards/rejected": -0.041523393243551254, "step": 641 }, { "epoch": 0.44398340248962653, "grad_norm": 4.018771171569824, "learning_rate": 2.2199170124481328e-05, "log_odds_chosen": 1.4001842737197876, "log_odds_ratio": -0.4438078999519348, "logits/chosen": -0.7838208675384521, "logits/rejected": -0.8029565215110779, "logps/chosen": -0.41601836681365967, "logps/rejected": -0.5780717134475708, "loss": 4.6326, "nll_loss": 1.113771915435791, "rewards/accuracies": 0.75, "rewards/chosen": -0.04160183668136597, "rewards/margins": 0.016205335035920143, "rewards/rejected": -0.05780716985464096, "step": 642 }, { "epoch": 0.44467496542185336, "grad_norm": 4.972719192504883, "learning_rate": 2.223374827109267e-05, "log_odds_chosen": 0.3314962387084961, "log_odds_ratio": -0.6846669316291809, "logits/chosen": -0.8766312599182129, "logits/rejected": -0.8915233612060547, "logps/chosen": -0.2336045205593109, "logps/rejected": -0.32554876804351807, "loss": 4.8175, "nll_loss": 1.1359007358551025, "rewards/accuracies": 0.625, "rewards/chosen": -0.02336045168340206, "rewards/margins": 0.009194424375891685, "rewards/rejected": -0.03255487605929375, "step": 643 }, { "epoch": 0.44536652835408025, "grad_norm": 5.33884859085083, "learning_rate": 2.2268326417704014e-05, "log_odds_chosen": 1.5173817873001099, "log_odds_ratio": -0.5441713929176331, "logits/chosen": -0.808599054813385, "logits/rejected": -0.8076343536376953, "logps/chosen": -0.14385217428207397, "logps/rejected": -0.31490224599838257, "loss": 4.8053, "nll_loss": 1.146899938583374, "rewards/accuracies": 0.625, "rewards/chosen": -0.014385217800736427, "rewards/margins": 0.01710500568151474, "rewards/rejected": -0.03149022161960602, "step": 644 }, { "epoch": 0.4460580912863071, "grad_norm": 2.935650110244751, "learning_rate": 2.2302904564315356e-05, "log_odds_chosen": 2.2890067100524902, "log_odds_ratio": -0.3063238263130188, "logits/chosen": -0.9418952465057373, "logits/rejected": -0.9721782207489014, "logps/chosen": -0.14255841076374054, "logps/rejected": -0.43673819303512573, "loss": 3.4497, "nll_loss": 0.8317903876304626, "rewards/accuracies": 0.875, "rewards/chosen": -0.014255841262638569, "rewards/margins": 0.02941797859966755, "rewards/rejected": -0.04367382079362869, "step": 645 }, { "epoch": 0.4467496542185339, "grad_norm": 4.236213684082031, "learning_rate": 2.2337482710926697e-05, "log_odds_chosen": 1.5571013689041138, "log_odds_ratio": -0.42031991481781006, "logits/chosen": -1.0419297218322754, "logits/rejected": -1.061880350112915, "logps/chosen": -0.1531745195388794, "logps/rejected": -0.385803759098053, "loss": 5.3258, "nll_loss": 1.289408564567566, "rewards/accuracies": 0.875, "rewards/chosen": -0.015317452140152454, "rewards/margins": 0.02326292172074318, "rewards/rejected": -0.03858037665486336, "step": 646 }, { "epoch": 0.44744121715076074, "grad_norm": 2.988067150115967, "learning_rate": 2.237206085753804e-05, "log_odds_chosen": 2.180973768234253, "log_odds_ratio": -0.29629412293434143, "logits/chosen": -0.5734241604804993, "logits/rejected": -0.5932707190513611, "logps/chosen": -0.08009282499551773, "logps/rejected": -0.2977696359157562, "loss": 3.2365, "nll_loss": 0.7794865369796753, "rewards/accuracies": 0.875, "rewards/chosen": -0.008009282872080803, "rewards/margins": 0.02176768146455288, "rewards/rejected": -0.029776964336633682, "step": 647 }, { "epoch": 0.44813278008298757, "grad_norm": 4.176928520202637, "learning_rate": 2.240663900414938e-05, "log_odds_chosen": 0.4870775043964386, "log_odds_ratio": -0.6710426807403564, "logits/chosen": -0.9077112078666687, "logits/rejected": -0.885935366153717, "logps/chosen": -0.20027214288711548, "logps/rejected": -0.21831238269805908, "loss": 5.4343, "nll_loss": 1.2914657592773438, "rewards/accuracies": 0.625, "rewards/chosen": -0.020027216523885727, "rewards/margins": 0.0018040239810943604, "rewards/rejected": -0.021831240504980087, "step": 648 }, { "epoch": 0.4488243430152144, "grad_norm": 2.1371140480041504, "learning_rate": 2.2441217150760722e-05, "log_odds_chosen": 4.809844493865967, "log_odds_ratio": -0.09976686537265778, "logits/chosen": -0.4416744112968445, "logits/rejected": -0.42568644881248474, "logps/chosen": -0.04137096181511879, "logps/rejected": -0.4853114187717438, "loss": 2.7075, "nll_loss": 0.6669005155563354, "rewards/accuracies": 1.0, "rewards/chosen": -0.004137096460908651, "rewards/margins": 0.04439404606819153, "rewards/rejected": -0.048531144857406616, "step": 649 }, { "epoch": 0.44951590594744123, "grad_norm": 4.71837854385376, "learning_rate": 2.2475795297372064e-05, "log_odds_chosen": 2.040015935897827, "log_odds_ratio": -0.5045047998428345, "logits/chosen": -0.932171106338501, "logits/rejected": -0.9571213126182556, "logps/chosen": -0.1530236005783081, "logps/rejected": -0.4374780058860779, "loss": 5.1655, "nll_loss": 1.2409231662750244, "rewards/accuracies": 0.75, "rewards/chosen": -0.015302360989153385, "rewards/margins": 0.02844543755054474, "rewards/rejected": -0.04374779760837555, "step": 650 }, { "epoch": 0.45020746887966806, "grad_norm": 3.8925278186798096, "learning_rate": 2.2510373443983405e-05, "log_odds_chosen": 0.5939846634864807, "log_odds_ratio": -0.5840673446655273, "logits/chosen": -0.775164008140564, "logits/rejected": -0.7598456740379333, "logps/chosen": -0.1721755415201187, "logps/rejected": -0.2940378189086914, "loss": 3.7779, "nll_loss": 0.8860760927200317, "rewards/accuracies": 0.625, "rewards/chosen": -0.01721755415201187, "rewards/margins": 0.012186229228973389, "rewards/rejected": -0.02940378151834011, "step": 651 }, { "epoch": 0.4508990318118949, "grad_norm": 5.848450660705566, "learning_rate": 2.2544951590594747e-05, "log_odds_chosen": 0.2048446238040924, "log_odds_ratio": -0.6970038414001465, "logits/chosen": -1.0518460273742676, "logits/rejected": -1.0371158123016357, "logps/chosen": -0.202000230550766, "logps/rejected": -0.21628674864768982, "loss": 4.8156, "nll_loss": 1.1341984272003174, "rewards/accuracies": 0.375, "rewards/chosen": -0.020200025290250778, "rewards/margins": 0.0014286513905972242, "rewards/rejected": -0.021628674119710922, "step": 652 }, { "epoch": 0.4515905947441217, "grad_norm": 12.044407844543457, "learning_rate": 2.257952973720609e-05, "log_odds_chosen": 0.9392263889312744, "log_odds_ratio": -1.0350747108459473, "logits/chosen": -1.006348729133606, "logits/rejected": -0.9927129149436951, "logps/chosen": -0.37167733907699585, "logps/rejected": -0.5737072229385376, "loss": 5.473, "nll_loss": 1.2647355794906616, "rewards/accuracies": 0.625, "rewards/chosen": -0.037167735397815704, "rewards/margins": 0.020202992483973503, "rewards/rejected": -0.05737072601914406, "step": 653 }, { "epoch": 0.45228215767634855, "grad_norm": 6.465015888214111, "learning_rate": 2.261410788381743e-05, "log_odds_chosen": 1.9693341255187988, "log_odds_ratio": -0.6730993390083313, "logits/chosen": -0.28224295377731323, "logits/rejected": -0.2860146760940552, "logps/chosen": -0.13423292338848114, "logps/rejected": -0.28257089853286743, "loss": 3.8267, "nll_loss": 0.8893666863441467, "rewards/accuracies": 0.5, "rewards/chosen": -0.013423292897641659, "rewards/margins": 0.01483379676938057, "rewards/rejected": -0.028257090598344803, "step": 654 }, { "epoch": 0.4529737206085754, "grad_norm": 3.4309816360473633, "learning_rate": 2.264868603042877e-05, "log_odds_chosen": 3.407935619354248, "log_odds_ratio": -0.24094924330711365, "logits/chosen": -0.5608742237091064, "logits/rejected": -0.5527865290641785, "logps/chosen": -0.08340831845998764, "logps/rejected": -0.4136614501476288, "loss": 2.9297, "nll_loss": 0.7083350419998169, "rewards/accuracies": 0.875, "rewards/chosen": -0.008340831845998764, "rewards/margins": 0.03302531689405441, "rewards/rejected": -0.04136614501476288, "step": 655 }, { "epoch": 0.4536652835408022, "grad_norm": 4.206521034240723, "learning_rate": 2.2683264177040113e-05, "log_odds_chosen": 3.273314952850342, "log_odds_ratio": -0.3209017515182495, "logits/chosen": -0.6433435678482056, "logits/rejected": -0.6615561842918396, "logps/chosen": -0.07162127643823624, "logps/rejected": -0.4444481134414673, "loss": 2.7651, "nll_loss": 0.6591819524765015, "rewards/accuracies": 0.875, "rewards/chosen": -0.007162127643823624, "rewards/margins": 0.037282682955265045, "rewards/rejected": -0.04444481059908867, "step": 656 }, { "epoch": 0.45435684647302904, "grad_norm": 4.922212600708008, "learning_rate": 2.2717842323651455e-05, "log_odds_chosen": 0.9490612745285034, "log_odds_ratio": -0.6327435970306396, "logits/chosen": -0.9662469029426575, "logits/rejected": -0.9231171607971191, "logps/chosen": -0.16946572065353394, "logps/rejected": -0.3263300061225891, "loss": 2.9991, "nll_loss": 0.686488151550293, "rewards/accuracies": 0.625, "rewards/chosen": -0.016946572810411453, "rewards/margins": 0.015686428174376488, "rewards/rejected": -0.03263299912214279, "step": 657 }, { "epoch": 0.45504840940525587, "grad_norm": 4.609391212463379, "learning_rate": 2.2752420470262796e-05, "log_odds_chosen": 1.5648826360702515, "log_odds_ratio": -0.5906928777694702, "logits/chosen": -0.6863613128662109, "logits/rejected": -0.682285726070404, "logps/chosen": -0.18679755926132202, "logps/rejected": -0.4306481182575226, "loss": 4.8784, "nll_loss": 1.1605193614959717, "rewards/accuracies": 0.75, "rewards/chosen": -0.018679756671190262, "rewards/margins": 0.024385055527091026, "rewards/rejected": -0.04306481406092644, "step": 658 }, { "epoch": 0.4557399723374827, "grad_norm": 4.7040181159973145, "learning_rate": 2.2786998616874138e-05, "log_odds_chosen": 3.1283419132232666, "log_odds_ratio": -0.21017280220985413, "logits/chosen": -0.9078444242477417, "logits/rejected": -1.0034115314483643, "logps/chosen": -0.06228271499276161, "logps/rejected": -0.530816376209259, "loss": 5.0095, "nll_loss": 1.2313501834869385, "rewards/accuracies": 1.0, "rewards/chosen": -0.006228271871805191, "rewards/margins": 0.04685336351394653, "rewards/rejected": -0.05308163911104202, "step": 659 }, { "epoch": 0.45643153526970953, "grad_norm": 5.001986026763916, "learning_rate": 2.282157676348548e-05, "log_odds_chosen": 1.5463465452194214, "log_odds_ratio": -0.49791088700294495, "logits/chosen": -0.7297683954238892, "logits/rejected": -0.729040265083313, "logps/chosen": -0.14330022037029266, "logps/rejected": -0.42162853479385376, "loss": 5.4593, "nll_loss": 1.3150358200073242, "rewards/accuracies": 0.75, "rewards/chosen": -0.014330022968351841, "rewards/margins": 0.02783283218741417, "rewards/rejected": -0.042162857949733734, "step": 660 }, { "epoch": 0.45712309820193636, "grad_norm": 4.276454448699951, "learning_rate": 2.285615491009682e-05, "log_odds_chosen": 4.342514514923096, "log_odds_ratio": -0.18762508034706116, "logits/chosen": -0.3685300946235657, "logits/rejected": -0.3769664466381073, "logps/chosen": -0.03132067620754242, "logps/rejected": -0.8504193425178528, "loss": 3.3376, "nll_loss": 0.8156321048736572, "rewards/accuracies": 0.875, "rewards/chosen": -0.003132067620754242, "rewards/margins": 0.08190987259149551, "rewards/rejected": -0.08504194021224976, "step": 661 }, { "epoch": 0.4578146611341632, "grad_norm": 7.172152519226074, "learning_rate": 2.289073305670816e-05, "log_odds_chosen": 1.3948019742965698, "log_odds_ratio": -0.7182621955871582, "logits/chosen": -0.9717065095901489, "logits/rejected": -0.9662845134735107, "logps/chosen": -0.14619432389736176, "logps/rejected": -0.45164555311203003, "loss": 4.7844, "nll_loss": 1.1242810487747192, "rewards/accuracies": 0.5, "rewards/chosen": -0.014619432389736176, "rewards/margins": 0.030545122921466827, "rewards/rejected": -0.045164551585912704, "step": 662 }, { "epoch": 0.45850622406639, "grad_norm": 3.811978578567505, "learning_rate": 2.29253112033195e-05, "log_odds_chosen": 3.057511806488037, "log_odds_ratio": -0.3753212094306946, "logits/chosen": -0.5274239778518677, "logits/rejected": -0.5684012770652771, "logps/chosen": -0.07948172837495804, "logps/rejected": -0.3966066241264343, "loss": 2.7919, "nll_loss": 0.6604464054107666, "rewards/accuracies": 0.625, "rewards/chosen": -0.007948173210024834, "rewards/margins": 0.03171249106526375, "rewards/rejected": -0.03966066613793373, "step": 663 }, { "epoch": 0.45919778699861685, "grad_norm": 3.786224365234375, "learning_rate": 2.2959889349930842e-05, "log_odds_chosen": 3.219557285308838, "log_odds_ratio": -0.2720406949520111, "logits/chosen": -0.828033983707428, "logits/rejected": -0.866054356098175, "logps/chosen": -0.09778165817260742, "logps/rejected": -0.568712592124939, "loss": 3.7211, "nll_loss": 0.903061032295227, "rewards/accuracies": 0.875, "rewards/chosen": -0.009778165258467197, "rewards/margins": 0.047093093395233154, "rewards/rejected": -0.05687125772237778, "step": 664 }, { "epoch": 0.4598893499308437, "grad_norm": 5.818863391876221, "learning_rate": 2.2994467496542184e-05, "log_odds_chosen": 4.029585361480713, "log_odds_ratio": -0.19765400886535645, "logits/chosen": -0.8430564403533936, "logits/rejected": -0.8614147901535034, "logps/chosen": -0.05954580008983612, "logps/rejected": -0.5826296806335449, "loss": 3.7868, "nll_loss": 0.9269360303878784, "rewards/accuracies": 0.875, "rewards/chosen": -0.005954580381512642, "rewards/margins": 0.05230838805437088, "rewards/rejected": -0.058262962847948074, "step": 665 }, { "epoch": 0.4605809128630705, "grad_norm": 4.759044170379639, "learning_rate": 2.3029045643153525e-05, "log_odds_chosen": 2.9879543781280518, "log_odds_ratio": -0.40324172377586365, "logits/chosen": -0.7793172597885132, "logits/rejected": -0.7813615798950195, "logps/chosen": -0.08944907784461975, "logps/rejected": -0.5450009107589722, "loss": 4.8858, "nll_loss": 1.1811367273330688, "rewards/accuracies": 0.625, "rewards/chosen": -0.008944908156991005, "rewards/margins": 0.045555178076028824, "rewards/rejected": -0.05450008437037468, "step": 666 }, { "epoch": 0.4612724757952974, "grad_norm": 5.577088832855225, "learning_rate": 2.306362378976487e-05, "log_odds_chosen": 3.68088436126709, "log_odds_ratio": -0.4639522433280945, "logits/chosen": -0.8919408321380615, "logits/rejected": -0.9049323797225952, "logps/chosen": -0.10777649283409119, "logps/rejected": -0.7049047946929932, "loss": 6.1401, "nll_loss": 1.4886412620544434, "rewards/accuracies": 0.625, "rewards/chosen": -0.010777648538351059, "rewards/margins": 0.05971283093094826, "rewards/rejected": -0.07049047946929932, "step": 667 }, { "epoch": 0.46196403872752423, "grad_norm": 8.479419708251953, "learning_rate": 2.3098201936376212e-05, "log_odds_chosen": -0.17054805159568787, "log_odds_ratio": -1.0562233924865723, "logits/chosen": -0.4395080804824829, "logits/rejected": -0.4240494668483734, "logps/chosen": -0.286432683467865, "logps/rejected": -0.14448946714401245, "loss": 4.8861, "nll_loss": 1.1159002780914307, "rewards/accuracies": 0.375, "rewards/chosen": -0.02864326722919941, "rewards/margins": -0.014194320887327194, "rewards/rejected": -0.01444894727319479, "step": 668 }, { "epoch": 0.46265560165975106, "grad_norm": 5.375706672668457, "learning_rate": 2.3132780082987553e-05, "log_odds_chosen": 2.186450242996216, "log_odds_ratio": -0.39149805903434753, "logits/chosen": -0.852817714214325, "logits/rejected": -0.8768041729927063, "logps/chosen": -0.11213727295398712, "logps/rejected": -0.42280256748199463, "loss": 4.9077, "nll_loss": 1.187782645225525, "rewards/accuracies": 0.75, "rewards/chosen": -0.011213728226721287, "rewards/margins": 0.03106653317809105, "rewards/rejected": -0.04228026047348976, "step": 669 }, { "epoch": 0.4633471645919779, "grad_norm": 6.580266952514648, "learning_rate": 2.3167358229598895e-05, "log_odds_chosen": 1.5665547847747803, "log_odds_ratio": -0.7286292910575867, "logits/chosen": -0.7778172492980957, "logits/rejected": -0.7949341535568237, "logps/chosen": -0.14386004209518433, "logps/rejected": -0.4769167900085449, "loss": 4.1815, "nll_loss": 0.9725210666656494, "rewards/accuracies": 0.625, "rewards/chosen": -0.014386004768311977, "rewards/margins": 0.03330567479133606, "rewards/rejected": -0.04769168049097061, "step": 670 }, { "epoch": 0.4640387275242047, "grad_norm": 5.457213878631592, "learning_rate": 2.3201936376210237e-05, "log_odds_chosen": 4.0841383934021, "log_odds_ratio": -0.19488346576690674, "logits/chosen": -0.9210847616195679, "logits/rejected": -0.9380112886428833, "logps/chosen": -0.06799730658531189, "logps/rejected": -0.5339650511741638, "loss": 5.0348, "nll_loss": 1.2392032146453857, "rewards/accuracies": 1.0, "rewards/chosen": -0.006799730472266674, "rewards/margins": 0.04659678041934967, "rewards/rejected": -0.05339650809764862, "step": 671 }, { "epoch": 0.46473029045643155, "grad_norm": 3.573294162750244, "learning_rate": 2.3236514522821578e-05, "log_odds_chosen": 2.17598295211792, "log_odds_ratio": -0.29349270462989807, "logits/chosen": -0.749427855014801, "logits/rejected": -0.7455752491950989, "logps/chosen": -0.14027109742164612, "logps/rejected": -0.6240657567977905, "loss": 3.9887, "nll_loss": 0.9678138494491577, "rewards/accuracies": 0.875, "rewards/chosen": -0.014027111232280731, "rewards/margins": 0.048379458487033844, "rewards/rejected": -0.062406569719314575, "step": 672 }, { "epoch": 0.4654218533886584, "grad_norm": 6.060122013092041, "learning_rate": 2.327109266943292e-05, "log_odds_chosen": 4.223729610443115, "log_odds_ratio": -0.28007790446281433, "logits/chosen": -0.4201928377151489, "logits/rejected": -0.44983333349227905, "logps/chosen": -0.08497817814350128, "logps/rejected": -0.754709005355835, "loss": 3.8559, "nll_loss": 0.9359645247459412, "rewards/accuracies": 0.75, "rewards/chosen": -0.008497818373143673, "rewards/margins": 0.06697308272123337, "rewards/rejected": -0.07547089457511902, "step": 673 }, { "epoch": 0.4661134163208852, "grad_norm": 3.797612428665161, "learning_rate": 2.330567081604426e-05, "log_odds_chosen": 1.7717167139053345, "log_odds_ratio": -0.39768701791763306, "logits/chosen": -0.7274033427238464, "logits/rejected": -0.7588223218917847, "logps/chosen": -0.08553272485733032, "logps/rejected": -0.3211662173271179, "loss": 3.6084, "nll_loss": 0.8623219728469849, "rewards/accuracies": 0.625, "rewards/chosen": -0.008553272113204002, "rewards/margins": 0.02356335148215294, "rewards/rejected": -0.03211662545800209, "step": 674 }, { "epoch": 0.46680497925311204, "grad_norm": 6.884234428405762, "learning_rate": 2.3340248962655603e-05, "log_odds_chosen": 0.7624253630638123, "log_odds_ratio": -0.7565903067588806, "logits/chosen": -0.5982872843742371, "logits/rejected": -0.6013323068618774, "logps/chosen": -0.23801694810390472, "logps/rejected": -0.3481021523475647, "loss": 4.1929, "nll_loss": 0.9725688099861145, "rewards/accuracies": 0.5, "rewards/chosen": -0.023801693692803383, "rewards/margins": 0.011008523404598236, "rewards/rejected": -0.03481021523475647, "step": 675 }, { "epoch": 0.46749654218533887, "grad_norm": 4.248331069946289, "learning_rate": 2.3374827109266944e-05, "log_odds_chosen": 2.659592628479004, "log_odds_ratio": -0.4246455729007721, "logits/chosen": -0.5385680198669434, "logits/rejected": -0.5579327940940857, "logps/chosen": -0.14692561328411102, "logps/rejected": -0.48084428906440735, "loss": 4.525, "nll_loss": 1.0887757539749146, "rewards/accuracies": 0.75, "rewards/chosen": -0.014692561700940132, "rewards/margins": 0.03339186683297157, "rewards/rejected": -0.048084430396556854, "step": 676 }, { "epoch": 0.4681881051175657, "grad_norm": 6.2764387130737305, "learning_rate": 2.3409405255878286e-05, "log_odds_chosen": 2.887273073196411, "log_odds_ratio": -0.6312262415885925, "logits/chosen": -0.7242209911346436, "logits/rejected": -0.7688803672790527, "logps/chosen": -0.21251043677330017, "logps/rejected": -0.6274941563606262, "loss": 4.3248, "nll_loss": 1.0180730819702148, "rewards/accuracies": 0.625, "rewards/chosen": -0.021251043304800987, "rewards/margins": 0.041498374193906784, "rewards/rejected": -0.06274942308664322, "step": 677 }, { "epoch": 0.46887966804979253, "grad_norm": 3.9606966972351074, "learning_rate": 2.3443983402489627e-05, "log_odds_chosen": 1.724837303161621, "log_odds_ratio": -0.35207831859588623, "logits/chosen": -0.8217482566833496, "logits/rejected": -0.8121352195739746, "logps/chosen": -0.10145239531993866, "logps/rejected": -0.26650118827819824, "loss": 6.4763, "nll_loss": 1.5838665962219238, "rewards/accuracies": 0.875, "rewards/chosen": -0.010145239531993866, "rewards/margins": 0.016504880040884018, "rewards/rejected": -0.026650119572877884, "step": 678 }, { "epoch": 0.46957123098201936, "grad_norm": 4.277392387390137, "learning_rate": 2.347856154910097e-05, "log_odds_chosen": 3.5856690406799316, "log_odds_ratio": -0.3550170361995697, "logits/chosen": -0.8307151794433594, "logits/rejected": -0.8088093996047974, "logps/chosen": -0.07082566618919373, "logps/rejected": -0.47188445925712585, "loss": 3.7765, "nll_loss": 0.9086235165596008, "rewards/accuracies": 0.875, "rewards/chosen": -0.007082565687596798, "rewards/margins": 0.04010588303208351, "rewards/rejected": -0.047188449651002884, "step": 679 }, { "epoch": 0.4702627939142462, "grad_norm": 2.5866429805755615, "learning_rate": 2.351313969571231e-05, "log_odds_chosen": 6.325022220611572, "log_odds_ratio": -0.11263729631900787, "logits/chosen": -0.6943249702453613, "logits/rejected": -0.7007040977478027, "logps/chosen": -0.028736630454659462, "logps/rejected": -0.8324602246284485, "loss": 3.3625, "nll_loss": 0.8293724060058594, "rewards/accuracies": 0.875, "rewards/chosen": -0.002873663092032075, "rewards/margins": 0.08037236332893372, "rewards/rejected": -0.08324602246284485, "step": 680 }, { "epoch": 0.470954356846473, "grad_norm": 3.6870453357696533, "learning_rate": 2.3547717842323652e-05, "log_odds_chosen": 3.5631234645843506, "log_odds_ratio": -0.2938140630722046, "logits/chosen": -0.8848822116851807, "logits/rejected": -0.9235984086990356, "logps/chosen": -0.07511557638645172, "logps/rejected": -0.5606021285057068, "loss": 4.6362, "nll_loss": 1.1296803951263428, "rewards/accuracies": 0.75, "rewards/chosen": -0.007511558011174202, "rewards/margins": 0.04854864999651909, "rewards/rejected": -0.05606020987033844, "step": 681 }, { "epoch": 0.47164591977869985, "grad_norm": 4.530055046081543, "learning_rate": 2.3582295988934994e-05, "log_odds_chosen": 2.7019920349121094, "log_odds_ratio": -0.21998381614685059, "logits/chosen": -0.38282451033592224, "logits/rejected": -0.4395584464073181, "logps/chosen": -0.04314936324954033, "logps/rejected": -0.5612409710884094, "loss": 4.021, "nll_loss": 0.9832491278648376, "rewards/accuracies": 0.875, "rewards/chosen": -0.0043149362318217754, "rewards/margins": 0.051809161901474, "rewards/rejected": -0.05612409487366676, "step": 682 }, { "epoch": 0.4723374827109267, "grad_norm": 5.116053581237793, "learning_rate": 2.3616874135546335e-05, "log_odds_chosen": 3.1295435428619385, "log_odds_ratio": -0.4350450038909912, "logits/chosen": -0.7074323892593384, "logits/rejected": -0.696124255657196, "logps/chosen": -0.13088738918304443, "logps/rejected": -0.45735907554626465, "loss": 3.6855, "nll_loss": 0.877873957157135, "rewards/accuracies": 0.75, "rewards/chosen": -0.013088738545775414, "rewards/margins": 0.03264717012643814, "rewards/rejected": -0.045735906809568405, "step": 683 }, { "epoch": 0.4730290456431535, "grad_norm": 3.400541067123413, "learning_rate": 2.3651452282157677e-05, "log_odds_chosen": 3.7050838470458984, "log_odds_ratio": -0.19541694223880768, "logits/chosen": -0.7514389753341675, "logits/rejected": -0.7832955121994019, "logps/chosen": -0.04506572708487511, "logps/rejected": -0.648411750793457, "loss": 2.8218, "nll_loss": 0.6859157681465149, "rewards/accuracies": 1.0, "rewards/chosen": -0.004506572615355253, "rewards/margins": 0.06033460050821304, "rewards/rejected": -0.06484117358922958, "step": 684 }, { "epoch": 0.47372060857538034, "grad_norm": 3.607529640197754, "learning_rate": 2.368603042876902e-05, "log_odds_chosen": 5.2442169189453125, "log_odds_ratio": -0.09644800424575806, "logits/chosen": -0.5605584383010864, "logits/rejected": -0.5699232220649719, "logps/chosen": -0.0434856116771698, "logps/rejected": -0.7973841428756714, "loss": 3.877, "nll_loss": 0.9596099853515625, "rewards/accuracies": 1.0, "rewards/chosen": -0.00434856116771698, "rewards/margins": 0.07538985460996628, "rewards/rejected": -0.07973841577768326, "step": 685 }, { "epoch": 0.47441217150760717, "grad_norm": 6.932708263397217, "learning_rate": 2.372060857538036e-05, "log_odds_chosen": 3.1771626472473145, "log_odds_ratio": -0.4026772379875183, "logits/chosen": -0.4725809693336487, "logits/rejected": -0.4851277768611908, "logps/chosen": -0.10029126703739166, "logps/rejected": -0.3951825499534607, "loss": 4.7662, "nll_loss": 1.151286005973816, "rewards/accuracies": 0.875, "rewards/chosen": -0.010029126890003681, "rewards/margins": 0.029489127919077873, "rewards/rejected": -0.03951825574040413, "step": 686 }, { "epoch": 0.475103734439834, "grad_norm": 3.666964054107666, "learning_rate": 2.37551867219917e-05, "log_odds_chosen": 3.696667432785034, "log_odds_ratio": -0.41769081354141235, "logits/chosen": -0.430128276348114, "logits/rejected": -0.4578275680541992, "logps/chosen": -0.13075147569179535, "logps/rejected": -0.5719894766807556, "loss": 3.1046, "nll_loss": 0.7343854308128357, "rewards/accuracies": 0.875, "rewards/chosen": -0.013075148686766624, "rewards/margins": 0.044123802334070206, "rewards/rejected": -0.05719895288348198, "step": 687 }, { "epoch": 0.47579529737206083, "grad_norm": 4.897703647613525, "learning_rate": 2.3789764868603043e-05, "log_odds_chosen": 2.9305665493011475, "log_odds_ratio": -0.5501704812049866, "logits/chosen": -0.8313249349594116, "logits/rejected": -0.8332083225250244, "logps/chosen": -0.1709379106760025, "logps/rejected": -0.5386735200881958, "loss": 4.0558, "nll_loss": 0.9589261412620544, "rewards/accuracies": 0.75, "rewards/chosen": -0.01709379069507122, "rewards/margins": 0.03677356243133545, "rewards/rejected": -0.05386735126376152, "step": 688 }, { "epoch": 0.47648686030428766, "grad_norm": 6.1797261238098145, "learning_rate": 2.3824343015214385e-05, "log_odds_chosen": 3.6837799549102783, "log_odds_ratio": -0.4411526322364807, "logits/chosen": -0.5774151086807251, "logits/rejected": -0.5877612829208374, "logps/chosen": -0.09230178594589233, "logps/rejected": -0.6897770166397095, "loss": 4.0497, "nll_loss": 0.9682997465133667, "rewards/accuracies": 0.875, "rewards/chosen": -0.009230178780853748, "rewards/margins": 0.059747517108917236, "rewards/rejected": -0.06897769123315811, "step": 689 }, { "epoch": 0.47717842323651455, "grad_norm": 8.8616361618042, "learning_rate": 2.385892116182573e-05, "log_odds_chosen": 1.3486244678497314, "log_odds_ratio": -0.5516138076782227, "logits/chosen": -0.569491982460022, "logits/rejected": -0.5894758105278015, "logps/chosen": -0.16861675679683685, "logps/rejected": -0.512142539024353, "loss": 5.723, "nll_loss": 1.3755992650985718, "rewards/accuracies": 0.75, "rewards/chosen": -0.016861675307154655, "rewards/margins": 0.03435257449746132, "rewards/rejected": -0.051214251667261124, "step": 690 }, { "epoch": 0.4778699861687414, "grad_norm": 3.650421380996704, "learning_rate": 2.389349930843707e-05, "log_odds_chosen": 4.223165988922119, "log_odds_ratio": -0.2957286834716797, "logits/chosen": -0.7020717263221741, "logits/rejected": -0.6890726685523987, "logps/chosen": -0.08652821183204651, "logps/rejected": -0.5754883289337158, "loss": 4.0655, "nll_loss": 0.9868116974830627, "rewards/accuracies": 0.875, "rewards/chosen": -0.008652821183204651, "rewards/margins": 0.04889601469039917, "rewards/rejected": -0.057548828423023224, "step": 691 }, { "epoch": 0.4785615491009682, "grad_norm": 4.098056793212891, "learning_rate": 2.3928077455048413e-05, "log_odds_chosen": 5.177599906921387, "log_odds_ratio": -0.1555010825395584, "logits/chosen": -0.6356836557388306, "logits/rejected": -0.6456239819526672, "logps/chosen": -0.053202398121356964, "logps/rejected": -0.9623237252235413, "loss": 5.5101, "nll_loss": 1.3619807958602905, "rewards/accuracies": 0.875, "rewards/chosen": -0.005320240277796984, "rewards/margins": 0.09091213345527649, "rewards/rejected": -0.09623237699270248, "step": 692 }, { "epoch": 0.47925311203319504, "grad_norm": 4.213980674743652, "learning_rate": 2.3962655601659754e-05, "log_odds_chosen": 4.00429105758667, "log_odds_ratio": -0.21531108021736145, "logits/chosen": -0.5026724338531494, "logits/rejected": -0.542472779750824, "logps/chosen": -0.10158185660839081, "logps/rejected": -0.9162179231643677, "loss": 3.8501, "nll_loss": 0.9409924745559692, "rewards/accuracies": 0.875, "rewards/chosen": -0.010158185847103596, "rewards/margins": 0.08146360516548157, "rewards/rejected": -0.09162179380655289, "step": 693 }, { "epoch": 0.47994467496542187, "grad_norm": 5.7204909324646, "learning_rate": 2.3997233748271096e-05, "log_odds_chosen": 3.674548625946045, "log_odds_ratio": -0.46117550134658813, "logits/chosen": -0.7658097147941589, "logits/rejected": -0.7818572521209717, "logps/chosen": -0.1041916161775589, "logps/rejected": -0.4919533431529999, "loss": 2.8983, "nll_loss": 0.6784451007843018, "rewards/accuracies": 0.75, "rewards/chosen": -0.01041916199028492, "rewards/margins": 0.03877617418766022, "rewards/rejected": -0.04919533431529999, "step": 694 }, { "epoch": 0.4806362378976487, "grad_norm": 4.009372234344482, "learning_rate": 2.4031811894882437e-05, "log_odds_chosen": 2.4284112453460693, "log_odds_ratio": -0.33100491762161255, "logits/chosen": -0.6104992032051086, "logits/rejected": -0.601692795753479, "logps/chosen": -0.10431472957134247, "logps/rejected": -0.3768067955970764, "loss": 4.076, "nll_loss": 0.9859074950218201, "rewards/accuracies": 0.875, "rewards/chosen": -0.010431474074721336, "rewards/margins": 0.027249209582805634, "rewards/rejected": -0.03768068179488182, "step": 695 }, { "epoch": 0.48132780082987553, "grad_norm": 3.8886003494262695, "learning_rate": 2.406639004149378e-05, "log_odds_chosen": 3.0958616733551025, "log_odds_ratio": -0.39199692010879517, "logits/chosen": -0.9665194749832153, "logits/rejected": -0.9609275460243225, "logps/chosen": -0.0949786901473999, "logps/rejected": -0.7616181969642639, "loss": 3.6704, "nll_loss": 0.8784018754959106, "rewards/accuracies": 0.75, "rewards/chosen": -0.009497868828475475, "rewards/margins": 0.0666639506816864, "rewards/rejected": -0.07616182416677475, "step": 696 }, { "epoch": 0.48201936376210236, "grad_norm": 4.627381801605225, "learning_rate": 2.410096818810512e-05, "log_odds_chosen": 4.46881103515625, "log_odds_ratio": -0.2650693953037262, "logits/chosen": -0.435590922832489, "logits/rejected": -0.48427075147628784, "logps/chosen": -0.04191301390528679, "logps/rejected": -1.0247564315795898, "loss": 4.7758, "nll_loss": 1.1674453020095825, "rewards/accuracies": 0.875, "rewards/chosen": -0.004191301297396421, "rewards/margins": 0.09828434139490128, "rewards/rejected": -0.10247564315795898, "step": 697 }, { "epoch": 0.4827109266943292, "grad_norm": 4.1338982582092285, "learning_rate": 2.4135546334716462e-05, "log_odds_chosen": 3.408036947250366, "log_odds_ratio": -0.18219085037708282, "logits/chosen": -0.7029824256896973, "logits/rejected": -0.7051295638084412, "logps/chosen": -0.06247822940349579, "logps/rejected": -0.5670195817947388, "loss": 4.3019, "nll_loss": 1.0572634935379028, "rewards/accuracies": 1.0, "rewards/chosen": -0.006247823126614094, "rewards/margins": 0.050454139709472656, "rewards/rejected": -0.05670195817947388, "step": 698 }, { "epoch": 0.483402489626556, "grad_norm": 3.902381420135498, "learning_rate": 2.4170124481327804e-05, "log_odds_chosen": 3.730391502380371, "log_odds_ratio": -0.25784698128700256, "logits/chosen": -0.6142232418060303, "logits/rejected": -0.6449130773544312, "logps/chosen": -0.054460309445858, "logps/rejected": -0.5013003945350647, "loss": 3.2071, "nll_loss": 0.7759826183319092, "rewards/accuracies": 0.875, "rewards/chosen": -0.005446030758321285, "rewards/margins": 0.04468400776386261, "rewards/rejected": -0.05013003945350647, "step": 699 }, { "epoch": 0.48409405255878285, "grad_norm": 3.9679691791534424, "learning_rate": 2.4204702627939145e-05, "log_odds_chosen": 2.903071880340576, "log_odds_ratio": -0.3373691737651825, "logits/chosen": -0.628058671951294, "logits/rejected": -0.6574671268463135, "logps/chosen": -0.05507644638419151, "logps/rejected": -0.4640412926673889, "loss": 3.3383, "nll_loss": 0.8008279800415039, "rewards/accuracies": 0.625, "rewards/chosen": -0.0055076442658901215, "rewards/margins": 0.04089648649096489, "rewards/rejected": -0.04640413075685501, "step": 700 }, { "epoch": 0.4847856154910097, "grad_norm": 4.491204738616943, "learning_rate": 2.4239280774550487e-05, "log_odds_chosen": 3.4920854568481445, "log_odds_ratio": -0.4841329753398895, "logits/chosen": -0.8375265002250671, "logits/rejected": -0.8460544347763062, "logps/chosen": -0.20640972256660461, "logps/rejected": -0.8564853668212891, "loss": 4.7305, "nll_loss": 1.13421630859375, "rewards/accuracies": 0.75, "rewards/chosen": -0.02064097486436367, "rewards/margins": 0.06500756740570068, "rewards/rejected": -0.0856485366821289, "step": 701 }, { "epoch": 0.4854771784232365, "grad_norm": 3.5440473556518555, "learning_rate": 2.427385892116183e-05, "log_odds_chosen": 5.956630706787109, "log_odds_ratio": -0.14299477636814117, "logits/chosen": -0.589779257774353, "logits/rejected": -0.6273432970046997, "logps/chosen": -0.05705910921096802, "logps/rejected": -1.2479016780853271, "loss": 3.6198, "nll_loss": 0.8906500339508057, "rewards/accuracies": 0.875, "rewards/chosen": -0.005705910734832287, "rewards/margins": 0.11908426880836487, "rewards/rejected": -0.12479016929864883, "step": 702 }, { "epoch": 0.48616874135546334, "grad_norm": 5.295886516571045, "learning_rate": 2.4308437067773167e-05, "log_odds_chosen": 4.244320869445801, "log_odds_ratio": -0.27056583762168884, "logits/chosen": -0.7680187225341797, "logits/rejected": -0.7666739821434021, "logps/chosen": -0.0726771354675293, "logps/rejected": -0.9253808856010437, "loss": 5.6222, "nll_loss": 1.3784937858581543, "rewards/accuracies": 0.75, "rewards/chosen": -0.007267713081091642, "rewards/margins": 0.08527037501335144, "rewards/rejected": -0.09253808110952377, "step": 703 }, { "epoch": 0.48686030428769017, "grad_norm": 5.875594139099121, "learning_rate": 2.4343015214384508e-05, "log_odds_chosen": 3.2456061840057373, "log_odds_ratio": -0.8249081373214722, "logits/chosen": -0.8685834407806396, "logits/rejected": -0.7868944406509399, "logps/chosen": -0.166039377450943, "logps/rejected": -0.7548103928565979, "loss": 4.1563, "nll_loss": 0.9565944075584412, "rewards/accuracies": 0.75, "rewards/chosen": -0.01660393737256527, "rewards/margins": 0.05887710675597191, "rewards/rejected": -0.07548104226589203, "step": 704 }, { "epoch": 0.487551867219917, "grad_norm": 4.143042087554932, "learning_rate": 2.437759336099585e-05, "log_odds_chosen": 1.445640206336975, "log_odds_ratio": -0.44271689653396606, "logits/chosen": -0.541271984577179, "logits/rejected": -0.5584791302680969, "logps/chosen": -0.19485358893871307, "logps/rejected": -0.5292760133743286, "loss": 3.6438, "nll_loss": 0.8666675686836243, "rewards/accuracies": 0.75, "rewards/chosen": -0.019485358148813248, "rewards/margins": 0.03344224393367767, "rewards/rejected": -0.05292759835720062, "step": 705 }, { "epoch": 0.48824343015214383, "grad_norm": 7.924228668212891, "learning_rate": 2.441217150760719e-05, "log_odds_chosen": 2.327423334121704, "log_odds_ratio": -0.5170378088951111, "logits/chosen": -0.6705014705657959, "logits/rejected": -0.6787251830101013, "logps/chosen": -0.26046106219291687, "logps/rejected": -0.5459519028663635, "loss": 3.8784, "nll_loss": 0.9178914427757263, "rewards/accuracies": 0.625, "rewards/chosen": -0.026046108454465866, "rewards/margins": 0.028549088165163994, "rewards/rejected": -0.05459519475698471, "step": 706 }, { "epoch": 0.48893499308437066, "grad_norm": 3.639187812805176, "learning_rate": 2.4446749654218533e-05, "log_odds_chosen": 7.317781448364258, "log_odds_ratio": -0.0036757837515324354, "logits/chosen": -0.6027560234069824, "logits/rejected": -0.6349475383758545, "logps/chosen": -0.008355571888387203, "logps/rejected": -1.3751740455627441, "loss": 3.8351, "nll_loss": 0.9584081172943115, "rewards/accuracies": 1.0, "rewards/chosen": -0.000835557235404849, "rewards/margins": 0.13668186962604523, "rewards/rejected": -0.13751742243766785, "step": 707 }, { "epoch": 0.4896265560165975, "grad_norm": 4.836334228515625, "learning_rate": 2.4481327800829874e-05, "log_odds_chosen": 2.9217796325683594, "log_odds_ratio": -0.2655583620071411, "logits/chosen": -0.7226359248161316, "logits/rejected": -0.7514448165893555, "logps/chosen": -0.21701642870903015, "logps/rejected": -0.7266488075256348, "loss": 4.8098, "nll_loss": 1.1758911609649658, "rewards/accuracies": 0.875, "rewards/chosen": -0.021701643243432045, "rewards/margins": 0.05096323788166046, "rewards/rejected": -0.07266488671302795, "step": 708 }, { "epoch": 0.4903181189488243, "grad_norm": 9.13145637512207, "learning_rate": 2.4515905947441216e-05, "log_odds_chosen": 1.2536616325378418, "log_odds_ratio": -0.7132289409637451, "logits/chosen": -0.544143795967102, "logits/rejected": -0.5465924739837646, "logps/chosen": -0.2198922038078308, "logps/rejected": -0.5221154689788818, "loss": 5.0716, "nll_loss": 1.196586012840271, "rewards/accuracies": 0.625, "rewards/chosen": -0.02198921889066696, "rewards/margins": 0.030222328379750252, "rewards/rejected": -0.052211545407772064, "step": 709 }, { "epoch": 0.49100968188105115, "grad_norm": 5.586406707763672, "learning_rate": 2.4550484094052557e-05, "log_odds_chosen": 3.5675199031829834, "log_odds_ratio": -0.3161616921424866, "logits/chosen": -0.4636297821998596, "logits/rejected": -0.5131024122238159, "logps/chosen": -0.07581526041030884, "logps/rejected": -0.7153963446617126, "loss": 4.9111, "nll_loss": 1.1961618661880493, "rewards/accuracies": 0.875, "rewards/chosen": -0.007581526413559914, "rewards/margins": 0.06395810842514038, "rewards/rejected": -0.07153964042663574, "step": 710 }, { "epoch": 0.491701244813278, "grad_norm": 3.736074924468994, "learning_rate": 2.45850622406639e-05, "log_odds_chosen": 3.691223382949829, "log_odds_ratio": -0.321685254573822, "logits/chosen": -0.819664716720581, "logits/rejected": -0.7938543558120728, "logps/chosen": -0.08634298294782639, "logps/rejected": -0.7033805847167969, "loss": 4.2003, "nll_loss": 1.0179029703140259, "rewards/accuracies": 0.75, "rewards/chosen": -0.008634298108518124, "rewards/margins": 0.06170375645160675, "rewards/rejected": -0.07033805549144745, "step": 711 }, { "epoch": 0.49239280774550487, "grad_norm": 7.242453575134277, "learning_rate": 2.4619640387275244e-05, "log_odds_chosen": 2.9408159255981445, "log_odds_ratio": -0.3289014399051666, "logits/chosen": -0.7875644564628601, "logits/rejected": -0.7483938336372375, "logps/chosen": -0.10421629995107651, "logps/rejected": -0.5258691310882568, "loss": 3.8019, "nll_loss": 0.9175827503204346, "rewards/accuracies": 0.75, "rewards/chosen": -0.01042162999510765, "rewards/margins": 0.04216528683900833, "rewards/rejected": -0.052586913108825684, "step": 712 }, { "epoch": 0.4930843706777317, "grad_norm": 4.52717399597168, "learning_rate": 2.4654218533886586e-05, "log_odds_chosen": 2.288606882095337, "log_odds_ratio": -0.5677134394645691, "logits/chosen": -0.9292905330657959, "logits/rejected": -0.9273300170898438, "logps/chosen": -0.22288769483566284, "logps/rejected": -0.6668552160263062, "loss": 6.0572, "nll_loss": 1.4575316905975342, "rewards/accuracies": 0.625, "rewards/chosen": -0.022288773208856583, "rewards/margins": 0.04439675435423851, "rewards/rejected": -0.06668552756309509, "step": 713 }, { "epoch": 0.49377593360995853, "grad_norm": 4.925570011138916, "learning_rate": 2.4688796680497927e-05, "log_odds_chosen": 2.577275037765503, "log_odds_ratio": -0.3427557349205017, "logits/chosen": -0.7094758152961731, "logits/rejected": -0.7411022782325745, "logps/chosen": -0.14292514324188232, "logps/rejected": -0.5327209830284119, "loss": 4.5115, "nll_loss": 1.0935925245285034, "rewards/accuracies": 0.875, "rewards/chosen": -0.014292514882981777, "rewards/margins": 0.038979582488536835, "rewards/rejected": -0.053272098302841187, "step": 714 }, { "epoch": 0.49446749654218536, "grad_norm": 4.740431308746338, "learning_rate": 2.472337482710927e-05, "log_odds_chosen": 4.266748428344727, "log_odds_ratio": -0.1784973442554474, "logits/chosen": -0.4813273549079895, "logits/rejected": -0.5323498845100403, "logps/chosen": -0.1453215330839157, "logps/rejected": -0.9189692139625549, "loss": 4.5187, "nll_loss": 1.1118159294128418, "rewards/accuracies": 0.875, "rewards/chosen": -0.014532153494656086, "rewards/margins": 0.07736477255821228, "rewards/rejected": -0.0918969213962555, "step": 715 }, { "epoch": 0.4951590594744122, "grad_norm": 3.258787155151367, "learning_rate": 2.475795297372061e-05, "log_odds_chosen": 4.292454242706299, "log_odds_ratio": -0.16503621637821198, "logits/chosen": -0.5896936058998108, "logits/rejected": -0.6260637640953064, "logps/chosen": -0.05939153581857681, "logps/rejected": -0.5779461860656738, "loss": 3.8843, "nll_loss": 0.9545656442642212, "rewards/accuracies": 1.0, "rewards/chosen": -0.005939153954386711, "rewards/margins": 0.051855459809303284, "rewards/rejected": -0.057794615626335144, "step": 716 }, { "epoch": 0.495850622406639, "grad_norm": 4.017149448394775, "learning_rate": 2.4792531120331952e-05, "log_odds_chosen": 1.6627793312072754, "log_odds_ratio": -0.4310210049152374, "logits/chosen": -0.9920613765716553, "logits/rejected": -1.0068923234939575, "logps/chosen": -0.17367520928382874, "logps/rejected": -0.3792310655117035, "loss": 4.0788, "nll_loss": 0.9766014814376831, "rewards/accuracies": 0.75, "rewards/chosen": -0.017367523163557053, "rewards/margins": 0.020555583760142326, "rewards/rejected": -0.03792310506105423, "step": 717 }, { "epoch": 0.49654218533886585, "grad_norm": 3.9915847778320312, "learning_rate": 2.4827109266943293e-05, "log_odds_chosen": 1.5953905582427979, "log_odds_ratio": -0.3041524887084961, "logits/chosen": -0.8089584112167358, "logits/rejected": -0.8049341440200806, "logps/chosen": -0.1256750226020813, "logps/rejected": -0.4668002724647522, "loss": 4.7225, "nll_loss": 1.1501986980438232, "rewards/accuracies": 1.0, "rewards/chosen": -0.01256750337779522, "rewards/margins": 0.03411252424120903, "rewards/rejected": -0.0466800294816494, "step": 718 }, { "epoch": 0.4972337482710927, "grad_norm": 4.275820255279541, "learning_rate": 2.4861687413554635e-05, "log_odds_chosen": 3.3468992710113525, "log_odds_ratio": -0.33127206563949585, "logits/chosen": -0.7646849155426025, "logits/rejected": -0.8305087685585022, "logps/chosen": -0.19278240203857422, "logps/rejected": -0.6699162721633911, "loss": 4.3256, "nll_loss": 1.0482611656188965, "rewards/accuracies": 0.875, "rewards/chosen": -0.01927824132144451, "rewards/margins": 0.04771338775753975, "rewards/rejected": -0.06699162721633911, "step": 719 }, { "epoch": 0.4979253112033195, "grad_norm": 7.294179916381836, "learning_rate": 2.4896265560165977e-05, "log_odds_chosen": 1.5366660356521606, "log_odds_ratio": -0.4828697443008423, "logits/chosen": -0.7735751867294312, "logits/rejected": -0.7869131565093994, "logps/chosen": -0.10392677038908005, "logps/rejected": -0.3954734206199646, "loss": 4.8667, "nll_loss": 1.1683766841888428, "rewards/accuracies": 0.625, "rewards/chosen": -0.01039267797023058, "rewards/margins": 0.029154665768146515, "rewards/rejected": -0.03954734280705452, "step": 720 }, { "epoch": 0.49861687413554634, "grad_norm": 4.376179218292236, "learning_rate": 2.4930843706777318e-05, "log_odds_chosen": 2.7048628330230713, "log_odds_ratio": -0.4194316565990448, "logits/chosen": -0.9780904650688171, "logits/rejected": -0.9741083383560181, "logps/chosen": -0.09044212102890015, "logps/rejected": -0.6422989368438721, "loss": 4.7012, "nll_loss": 1.1333608627319336, "rewards/accuracies": 0.75, "rewards/chosen": -0.009044213220477104, "rewards/margins": 0.05518568307161331, "rewards/rejected": -0.06422989815473557, "step": 721 }, { "epoch": 0.49930843706777317, "grad_norm": 4.935683727264404, "learning_rate": 2.496542185338866e-05, "log_odds_chosen": 2.501511812210083, "log_odds_ratio": -0.2555024325847626, "logits/chosen": -0.6554673314094543, "logits/rejected": -0.7062564492225647, "logps/chosen": -0.10878726094961166, "logps/rejected": -0.4665549397468567, "loss": 4.8412, "nll_loss": 1.1847527027130127, "rewards/accuracies": 0.875, "rewards/chosen": -0.010878726840019226, "rewards/margins": 0.035776764154434204, "rewards/rejected": -0.04665549099445343, "step": 722 }, { "epoch": 0.5, "grad_norm": 4.417289733886719, "learning_rate": 2.5e-05, "log_odds_chosen": 5.634490489959717, "log_odds_ratio": -0.17039619386196136, "logits/chosen": -1.154792070388794, "logits/rejected": -1.2090914249420166, "logps/chosen": -0.06233625113964081, "logps/rejected": -0.9839351177215576, "loss": 5.0622, "nll_loss": 1.248513102531433, "rewards/accuracies": 0.875, "rewards/chosen": -0.006233625113964081, "rewards/margins": 0.09215987473726273, "rewards/rejected": -0.0983935073018074, "step": 723 }, { "epoch": 0.5006915629322268, "grad_norm": 7.064770698547363, "learning_rate": 2.5034578146611343e-05, "log_odds_chosen": 3.256854772567749, "log_odds_ratio": -1.0175788402557373, "logits/chosen": -0.9441323280334473, "logits/rejected": -0.9441784620285034, "logps/chosen": -0.1523509919643402, "logps/rejected": -0.7931645512580872, "loss": 4.4789, "nll_loss": 1.0179661512374878, "rewards/accuracies": 0.625, "rewards/chosen": -0.015235099010169506, "rewards/margins": 0.0640813559293747, "rewards/rejected": -0.07931645214557648, "step": 724 }, { "epoch": 0.5013831258644537, "grad_norm": 3.7516984939575195, "learning_rate": 2.5069156293222684e-05, "log_odds_chosen": 5.143672943115234, "log_odds_ratio": -0.17888480424880981, "logits/chosen": -0.6211791038513184, "logits/rejected": -0.6152884364128113, "logps/chosen": -0.03391317278146744, "logps/rejected": -0.6649593710899353, "loss": 4.3696, "nll_loss": 1.0745129585266113, "rewards/accuracies": 0.875, "rewards/chosen": -0.003391317091882229, "rewards/margins": 0.06310462206602097, "rewards/rejected": -0.06649593263864517, "step": 725 }, { "epoch": 0.5020746887966805, "grad_norm": 4.188735485076904, "learning_rate": 2.5103734439834026e-05, "log_odds_chosen": 5.950955390930176, "log_odds_ratio": -0.1566586196422577, "logits/chosen": -0.553804874420166, "logits/rejected": -0.6219318509101868, "logps/chosen": -0.058077793568372726, "logps/rejected": -1.1520419120788574, "loss": 3.492, "nll_loss": 0.8573253154754639, "rewards/accuracies": 0.875, "rewards/chosen": -0.005807779263705015, "rewards/margins": 0.10939642041921616, "rewards/rejected": -0.11520420014858246, "step": 726 }, { "epoch": 0.5027662517289073, "grad_norm": 5.206643104553223, "learning_rate": 2.5138312586445367e-05, "log_odds_chosen": 2.201714038848877, "log_odds_ratio": -0.407717227935791, "logits/chosen": -0.8526656627655029, "logits/rejected": -0.8820828199386597, "logps/chosen": -0.14064417779445648, "logps/rejected": -0.46919354796409607, "loss": 4.2531, "nll_loss": 1.02250337600708, "rewards/accuracies": 0.75, "rewards/chosen": -0.014064418151974678, "rewards/margins": 0.03285493701696396, "rewards/rejected": -0.04691935330629349, "step": 727 }, { "epoch": 0.5034578146611342, "grad_norm": 4.013926982879639, "learning_rate": 2.517289073305671e-05, "log_odds_chosen": 5.200564384460449, "log_odds_ratio": -0.14862266182899475, "logits/chosen": -0.7379950284957886, "logits/rejected": -0.7829983234405518, "logps/chosen": -0.06311299651861191, "logps/rejected": -1.130431056022644, "loss": 3.7501, "nll_loss": 0.9226634502410889, "rewards/accuracies": 0.875, "rewards/chosen": -0.006311299279332161, "rewards/margins": 0.10673180222511292, "rewards/rejected": -0.11304309964179993, "step": 728 }, { "epoch": 0.504149377593361, "grad_norm": 6.703171253204346, "learning_rate": 2.520746887966805e-05, "log_odds_chosen": 3.670536994934082, "log_odds_ratio": -0.6451328992843628, "logits/chosen": -0.9287649393081665, "logits/rejected": -0.9407943487167358, "logps/chosen": -0.14862267673015594, "logps/rejected": -0.744902491569519, "loss": 2.8785, "nll_loss": 0.6551051139831543, "rewards/accuracies": 0.75, "rewards/chosen": -0.014862269163131714, "rewards/margins": 0.05962797999382019, "rewards/rejected": -0.0744902491569519, "step": 729 }, { "epoch": 0.5048409405255878, "grad_norm": 5.829585075378418, "learning_rate": 2.5242047026279392e-05, "log_odds_chosen": 0.3746829032897949, "log_odds_ratio": -0.7685449719429016, "logits/chosen": -0.9504293203353882, "logits/rejected": -0.9758710861206055, "logps/chosen": -0.26119324564933777, "logps/rejected": -0.31625595688819885, "loss": 5.5506, "nll_loss": 1.3107986450195312, "rewards/accuracies": 0.625, "rewards/chosen": -0.026119325309991837, "rewards/margins": 0.005506269633769989, "rewards/rejected": -0.031625594943761826, "step": 730 }, { "epoch": 0.5055325034578146, "grad_norm": 4.024864673614502, "learning_rate": 2.5276625172890734e-05, "log_odds_chosen": 1.6292681694030762, "log_odds_ratio": -0.34728798270225525, "logits/chosen": -0.9132955074310303, "logits/rejected": -0.9273039102554321, "logps/chosen": -0.1225036233663559, "logps/rejected": -0.3228994607925415, "loss": 5.56, "nll_loss": 1.3552802801132202, "rewards/accuracies": 0.875, "rewards/chosen": -0.012250362895429134, "rewards/margins": 0.02003958262503147, "rewards/rejected": -0.03228994458913803, "step": 731 }, { "epoch": 0.5062240663900415, "grad_norm": 4.193017482757568, "learning_rate": 2.5311203319502075e-05, "log_odds_chosen": 3.345766544342041, "log_odds_ratio": -0.4248673617839813, "logits/chosen": -0.6792013645172119, "logits/rejected": -0.7000718116760254, "logps/chosen": -0.10061212629079819, "logps/rejected": -0.5930604338645935, "loss": 3.9277, "nll_loss": 0.9394264817237854, "rewards/accuracies": 0.75, "rewards/chosen": -0.010061212815344334, "rewards/margins": 0.04924483224749565, "rewards/rejected": -0.05930604785680771, "step": 732 }, { "epoch": 0.5069156293222683, "grad_norm": 4.2471513748168945, "learning_rate": 2.5345781466113417e-05, "log_odds_chosen": 2.095512628555298, "log_odds_ratio": -0.363492876291275, "logits/chosen": -1.0475705862045288, "logits/rejected": -1.0593514442443848, "logps/chosen": -0.1103830561041832, "logps/rejected": -0.4578365683555603, "loss": 4.8793, "nll_loss": 1.1834713220596313, "rewards/accuracies": 0.75, "rewards/chosen": -0.01103830523788929, "rewards/margins": 0.03474535420536995, "rewards/rejected": -0.04578366130590439, "step": 733 }, { "epoch": 0.5076071922544951, "grad_norm": 4.612521171569824, "learning_rate": 2.538035961272476e-05, "log_odds_chosen": 3.1200380325317383, "log_odds_ratio": -0.23247987031936646, "logits/chosen": -0.8296999335289001, "logits/rejected": -0.8448148965835571, "logps/chosen": -0.06477497518062592, "logps/rejected": -0.6362839937210083, "loss": 4.8238, "nll_loss": 1.1827137470245361, "rewards/accuracies": 0.875, "rewards/chosen": -0.0064774975180625916, "rewards/margins": 0.05715090408921242, "rewards/rejected": -0.06362840533256531, "step": 734 }, { "epoch": 0.508298755186722, "grad_norm": 6.666332244873047, "learning_rate": 2.54149377593361e-05, "log_odds_chosen": 4.635190963745117, "log_odds_ratio": -0.3607940673828125, "logits/chosen": -0.6753288507461548, "logits/rejected": -0.7206825017929077, "logps/chosen": -0.06729499995708466, "logps/rejected": -0.7631007432937622, "loss": 3.8885, "nll_loss": 0.9360435009002686, "rewards/accuracies": 0.875, "rewards/chosen": -0.006729499902576208, "rewards/margins": 0.0695805773139, "rewards/rejected": -0.07631008327007294, "step": 735 }, { "epoch": 0.5089903181189488, "grad_norm": 2.7993688583374023, "learning_rate": 2.544951590594744e-05, "log_odds_chosen": 4.7974700927734375, "log_odds_ratio": -0.31564173102378845, "logits/chosen": -0.3785450756549835, "logits/rejected": -0.4010365903377533, "logps/chosen": -0.09814214706420898, "logps/rejected": -0.5331931114196777, "loss": 3.3022, "nll_loss": 0.7939915657043457, "rewards/accuracies": 0.875, "rewards/chosen": -0.009814215824007988, "rewards/margins": 0.043505098670721054, "rewards/rejected": -0.05331931263208389, "step": 736 }, { "epoch": 0.5096818810511756, "grad_norm": 4.751145362854004, "learning_rate": 2.5484094052558783e-05, "log_odds_chosen": 1.547588586807251, "log_odds_ratio": -0.43941688537597656, "logits/chosen": -0.5329767465591431, "logits/rejected": -0.540837824344635, "logps/chosen": -0.12904410064220428, "logps/rejected": -0.3887450397014618, "loss": 4.7745, "nll_loss": 1.14969003200531, "rewards/accuracies": 0.75, "rewards/chosen": -0.012904411181807518, "rewards/margins": 0.02597009390592575, "rewards/rejected": -0.03887450322508812, "step": 737 }, { "epoch": 0.5103734439834025, "grad_norm": 3.7940049171447754, "learning_rate": 2.5518672199170125e-05, "log_odds_chosen": 2.1066598892211914, "log_odds_ratio": -0.34529027342796326, "logits/chosen": -0.7043582201004028, "logits/rejected": -0.7103002667427063, "logps/chosen": -0.12666653096675873, "logps/rejected": -0.4064810574054718, "loss": 3.412, "nll_loss": 0.8184600472450256, "rewards/accuracies": 0.75, "rewards/chosen": -0.012666651979088783, "rewards/margins": 0.027981456369161606, "rewards/rejected": -0.04064810648560524, "step": 738 }, { "epoch": 0.5110650069156293, "grad_norm": 2.5742151737213135, "learning_rate": 2.5553250345781466e-05, "log_odds_chosen": 5.782053470611572, "log_odds_ratio": -0.07086659222841263, "logits/chosen": -0.6252003908157349, "logits/rejected": -0.6271520853042603, "logps/chosen": -0.016071034595370293, "logps/rejected": -0.5072439908981323, "loss": 2.7822, "nll_loss": 0.6884604692459106, "rewards/accuracies": 1.0, "rewards/chosen": -0.0016071033896878362, "rewards/margins": 0.04911729693412781, "rewards/rejected": -0.05072439834475517, "step": 739 }, { "epoch": 0.5117565698478561, "grad_norm": 3.867454767227173, "learning_rate": 2.5587828492392808e-05, "log_odds_chosen": 1.2175309658050537, "log_odds_ratio": -0.43134674429893494, "logits/chosen": -0.7617948651313782, "logits/rejected": -0.7681587934494019, "logps/chosen": -0.15212436020374298, "logps/rejected": -0.3493318557739258, "loss": 4.1321, "nll_loss": 0.9898828864097595, "rewards/accuracies": 0.75, "rewards/chosen": -0.015212435275316238, "rewards/margins": 0.01972074992954731, "rewards/rejected": -0.0349331870675087, "step": 740 }, { "epoch": 0.5124481327800829, "grad_norm": 4.407393932342529, "learning_rate": 2.562240663900415e-05, "log_odds_chosen": 3.897775888442993, "log_odds_ratio": -0.16987872123718262, "logits/chosen": -0.44695958495140076, "logits/rejected": -0.5096914172172546, "logps/chosen": -0.10248123109340668, "logps/rejected": -0.7007458806037903, "loss": 4.0115, "nll_loss": 0.9858871102333069, "rewards/accuracies": 1.0, "rewards/chosen": -0.010248124599456787, "rewards/margins": 0.05982645973563194, "rewards/rejected": -0.07007458806037903, "step": 741 }, { "epoch": 0.5131396957123098, "grad_norm": 6.455469131469727, "learning_rate": 2.565698478561549e-05, "log_odds_chosen": 0.6308086514472961, "log_odds_ratio": -0.6032850742340088, "logits/chosen": -0.594819962978363, "logits/rejected": -0.5979675650596619, "logps/chosen": -0.19797390699386597, "logps/rejected": -0.40863046050071716, "loss": 6.5324, "nll_loss": 1.572765827178955, "rewards/accuracies": 0.5, "rewards/chosen": -0.019797392189502716, "rewards/margins": 0.02106565237045288, "rewards/rejected": -0.0408630445599556, "step": 742 }, { "epoch": 0.5138312586445366, "grad_norm": 6.121413707733154, "learning_rate": 2.5691562932226832e-05, "log_odds_chosen": 1.7726167440414429, "log_odds_ratio": -0.4711247384548187, "logits/chosen": -0.6016209125518799, "logits/rejected": -0.6485509872436523, "logps/chosen": -0.08414126187562943, "logps/rejected": -0.5053392052650452, "loss": 4.8448, "nll_loss": 1.1640899181365967, "rewards/accuracies": 0.875, "rewards/chosen": -0.008414126932621002, "rewards/margins": 0.042119793593883514, "rewards/rejected": -0.05053391680121422, "step": 743 }, { "epoch": 0.5145228215767634, "grad_norm": 6.091677188873291, "learning_rate": 2.5726141078838174e-05, "log_odds_chosen": 2.197624444961548, "log_odds_ratio": -0.5597094893455505, "logits/chosen": -0.678301990032196, "logits/rejected": -0.7075386643409729, "logps/chosen": -0.11059334874153137, "logps/rejected": -0.3694513738155365, "loss": 4.874, "nll_loss": 1.1625367403030396, "rewards/accuracies": 0.75, "rewards/chosen": -0.011059334501624107, "rewards/margins": 0.02588580548763275, "rewards/rejected": -0.03694513812661171, "step": 744 }, { "epoch": 0.5152143845089903, "grad_norm": 4.404496192932129, "learning_rate": 2.5760719225449516e-05, "log_odds_chosen": 1.9973732233047485, "log_odds_ratio": -0.35110753774642944, "logits/chosen": -0.5603127479553223, "logits/rejected": -0.5757501721382141, "logps/chosen": -0.12605589628219604, "logps/rejected": -0.4605715274810791, "loss": 4.0381, "nll_loss": 0.9744019508361816, "rewards/accuracies": 0.875, "rewards/chosen": -0.012605588883161545, "rewards/margins": 0.033451564610004425, "rewards/rejected": -0.04605715349316597, "step": 745 }, { "epoch": 0.5159059474412172, "grad_norm": 3.317373514175415, "learning_rate": 2.579529737206086e-05, "log_odds_chosen": 3.7140681743621826, "log_odds_ratio": -0.1961238533258438, "logits/chosen": -0.9345548152923584, "logits/rejected": -0.9548307061195374, "logps/chosen": -0.08017171174287796, "logps/rejected": -0.5122724771499634, "loss": 4.3311, "nll_loss": 1.0631747245788574, "rewards/accuracies": 0.875, "rewards/chosen": -0.008017171174287796, "rewards/margins": 0.04321007430553436, "rewards/rejected": -0.05122724547982216, "step": 746 }, { "epoch": 0.516597510373444, "grad_norm": 3.674877405166626, "learning_rate": 2.5829875518672202e-05, "log_odds_chosen": 2.7474920749664307, "log_odds_ratio": -0.2940402626991272, "logits/chosen": -0.5442591309547424, "logits/rejected": -0.563199520111084, "logps/chosen": -0.10129649192094803, "logps/rejected": -0.4285820722579956, "loss": 4.7906, "nll_loss": 1.168250322341919, "rewards/accuracies": 0.875, "rewards/chosen": -0.010129649192094803, "rewards/margins": 0.03272855654358864, "rewards/rejected": -0.04285820946097374, "step": 747 }, { "epoch": 0.5172890733056709, "grad_norm": 7.059281349182129, "learning_rate": 2.5864453665283544e-05, "log_odds_chosen": -0.07221721112728119, "log_odds_ratio": -0.8372195959091187, "logits/chosen": -0.49470487236976624, "logits/rejected": -0.5321720242500305, "logps/chosen": -0.19111818075180054, "logps/rejected": -0.20531783998012543, "loss": 4.2752, "nll_loss": 0.9850720167160034, "rewards/accuracies": 0.75, "rewards/chosen": -0.019111819565296173, "rewards/margins": 0.0014199642464518547, "rewards/rejected": -0.020531784743070602, "step": 748 }, { "epoch": 0.5179806362378977, "grad_norm": 3.5188753604888916, "learning_rate": 2.5899031811894885e-05, "log_odds_chosen": 2.548219680786133, "log_odds_ratio": -0.32754892110824585, "logits/chosen": -0.8879270553588867, "logits/rejected": -0.8918251395225525, "logps/chosen": -0.14871710538864136, "logps/rejected": -0.5499089956283569, "loss": 4.0379, "nll_loss": 0.9767143726348877, "rewards/accuracies": 0.875, "rewards/chosen": -0.014871710911393166, "rewards/margins": 0.040119193494319916, "rewards/rejected": -0.05499090254306793, "step": 749 }, { "epoch": 0.5186721991701245, "grad_norm": 3.5754287242889404, "learning_rate": 2.5933609958506227e-05, "log_odds_chosen": 3.1863934993743896, "log_odds_ratio": -0.3843536376953125, "logits/chosen": -0.5169445872306824, "logits/rejected": -0.49316757917404175, "logps/chosen": -0.11643863469362259, "logps/rejected": -0.5823589563369751, "loss": 3.4881, "nll_loss": 0.8335927724838257, "rewards/accuracies": 0.75, "rewards/chosen": -0.011643863283097744, "rewards/margins": 0.04659203439950943, "rewards/rejected": -0.05823589116334915, "step": 750 }, { "epoch": 0.5193637621023514, "grad_norm": 4.024445056915283, "learning_rate": 2.596818810511757e-05, "log_odds_chosen": 1.073103666305542, "log_odds_ratio": -0.6491037607192993, "logits/chosen": -1.0050249099731445, "logits/rejected": -1.0319346189498901, "logps/chosen": -0.16097313165664673, "logps/rejected": -0.26376786828041077, "loss": 4.6072, "nll_loss": 1.0868946313858032, "rewards/accuracies": 0.625, "rewards/chosen": -0.016097312793135643, "rewards/margins": 0.010279476642608643, "rewards/rejected": -0.026376787573099136, "step": 751 }, { "epoch": 0.5200553250345782, "grad_norm": 5.002845764160156, "learning_rate": 2.600276625172891e-05, "log_odds_chosen": 1.8316545486450195, "log_odds_ratio": -0.5380937457084656, "logits/chosen": -0.5348612666130066, "logits/rejected": -0.5365390181541443, "logps/chosen": -0.19655044376850128, "logps/rejected": -0.3816303610801697, "loss": 2.9643, "nll_loss": 0.6872615814208984, "rewards/accuracies": 0.75, "rewards/chosen": -0.019655045121908188, "rewards/margins": 0.01850799284875393, "rewards/rejected": -0.03816303238272667, "step": 752 }, { "epoch": 0.520746887966805, "grad_norm": 6.330991268157959, "learning_rate": 2.603734439834025e-05, "log_odds_chosen": 0.8655611872673035, "log_odds_ratio": -0.6965842247009277, "logits/chosen": -0.8606619834899902, "logits/rejected": -0.8774101734161377, "logps/chosen": -0.2293684184551239, "logps/rejected": -0.4654915928840637, "loss": 4.2237, "nll_loss": 0.9862702488899231, "rewards/accuracies": 0.625, "rewards/chosen": -0.02293684333562851, "rewards/margins": 0.023612316697835922, "rewards/rejected": -0.04654916375875473, "step": 753 }, { "epoch": 0.5214384508990318, "grad_norm": 4.153950214385986, "learning_rate": 2.6071922544951593e-05, "log_odds_chosen": 1.9040453433990479, "log_odds_ratio": -0.5017755627632141, "logits/chosen": -0.9090903997421265, "logits/rejected": -0.9265241622924805, "logps/chosen": -0.1252647340297699, "logps/rejected": -0.46394142508506775, "loss": 3.6527, "nll_loss": 0.862996518611908, "rewards/accuracies": 0.75, "rewards/chosen": -0.012526473961770535, "rewards/margins": 0.033867672085762024, "rewards/rejected": -0.046394143253564835, "step": 754 }, { "epoch": 0.5221300138312587, "grad_norm": 4.278303623199463, "learning_rate": 2.6106500691562935e-05, "log_odds_chosen": 1.5212517976760864, "log_odds_ratio": -0.2745608985424042, "logits/chosen": -0.5291388034820557, "logits/rejected": -0.5695719718933105, "logps/chosen": -0.11442254483699799, "logps/rejected": -0.4772147238254547, "loss": 4.0293, "nll_loss": 0.9798673391342163, "rewards/accuracies": 0.875, "rewards/chosen": -0.011442254297435284, "rewards/margins": 0.03627922013401985, "rewards/rejected": -0.04772147536277771, "step": 755 }, { "epoch": 0.5228215767634855, "grad_norm": 3.9194319248199463, "learning_rate": 2.6141078838174276e-05, "log_odds_chosen": 1.879309892654419, "log_odds_ratio": -0.3652842044830322, "logits/chosen": -0.893629789352417, "logits/rejected": -0.8920744061470032, "logps/chosen": -0.11437473446130753, "logps/rejected": -0.43803346157073975, "loss": 3.9646, "nll_loss": 0.9546254873275757, "rewards/accuracies": 0.875, "rewards/chosen": -0.011437472887337208, "rewards/margins": 0.03236587345600128, "rewards/rejected": -0.04380334913730621, "step": 756 }, { "epoch": 0.5235131396957123, "grad_norm": 4.98897123336792, "learning_rate": 2.6175656984785618e-05, "log_odds_chosen": 1.3624794483184814, "log_odds_ratio": -0.44211530685424805, "logits/chosen": -0.565403163433075, "logits/rejected": -0.5806471705436707, "logps/chosen": -0.1290087103843689, "logps/rejected": -0.2725811004638672, "loss": 5.366, "nll_loss": 1.2972995042800903, "rewards/accuracies": 0.625, "rewards/chosen": -0.012900871224701405, "rewards/margins": 0.014357241801917553, "rewards/rejected": -0.027258113026618958, "step": 757 }, { "epoch": 0.5242047026279392, "grad_norm": 4.583791732788086, "learning_rate": 2.621023513139696e-05, "log_odds_chosen": 2.5393147468566895, "log_odds_ratio": -0.3373655378818512, "logits/chosen": -0.4986213147640228, "logits/rejected": -0.487330824136734, "logps/chosen": -0.1359880566596985, "logps/rejected": -0.4418767988681793, "loss": 4.0072, "nll_loss": 0.9680536389350891, "rewards/accuracies": 0.875, "rewards/chosen": -0.013598806224763393, "rewards/margins": 0.030588874593377113, "rewards/rejected": -0.04418767988681793, "step": 758 }, { "epoch": 0.524896265560166, "grad_norm": 3.971343517303467, "learning_rate": 2.62448132780083e-05, "log_odds_chosen": 1.4316984415054321, "log_odds_ratio": -0.4938808083534241, "logits/chosen": -0.5328791737556458, "logits/rejected": -0.4855450391769409, "logps/chosen": -0.1369304656982422, "logps/rejected": -0.30632275342941284, "loss": 3.7887, "nll_loss": 0.8977863788604736, "rewards/accuracies": 0.625, "rewards/chosen": -0.013693045824766159, "rewards/margins": 0.016939228400588036, "rewards/rejected": -0.030632276087999344, "step": 759 }, { "epoch": 0.5255878284923928, "grad_norm": 4.400160312652588, "learning_rate": 2.6279391424619642e-05, "log_odds_chosen": 0.7450700402259827, "log_odds_ratio": -0.8866539001464844, "logits/chosen": -0.5152993202209473, "logits/rejected": -0.4968855381011963, "logps/chosen": -0.1909049153327942, "logps/rejected": -0.22761806845664978, "loss": 3.5208, "nll_loss": 0.7915401458740234, "rewards/accuracies": 0.5, "rewards/chosen": -0.01909049227833748, "rewards/margins": 0.0036713131703436375, "rewards/rejected": -0.022761806845664978, "step": 760 }, { "epoch": 0.5262793914246197, "grad_norm": 4.3578338623046875, "learning_rate": 2.6313969571230984e-05, "log_odds_chosen": 1.7597806453704834, "log_odds_ratio": -0.3029617369174957, "logits/chosen": -0.6150257587432861, "logits/rejected": -0.6218180656433105, "logps/chosen": -0.1087150052189827, "logps/rejected": -0.39284566044807434, "loss": 4.5935, "nll_loss": 1.11807119846344, "rewards/accuracies": 0.875, "rewards/chosen": -0.01087150163948536, "rewards/margins": 0.028413068503141403, "rewards/rejected": -0.03928457200527191, "step": 761 }, { "epoch": 0.5269709543568465, "grad_norm": 3.5353329181671143, "learning_rate": 2.6348547717842326e-05, "log_odds_chosen": 1.9123175144195557, "log_odds_ratio": -0.3525024354457855, "logits/chosen": -0.8938580751419067, "logits/rejected": -0.8758429288864136, "logps/chosen": -0.13999508321285248, "logps/rejected": -0.3457835912704468, "loss": 4.4079, "nll_loss": 1.0667307376861572, "rewards/accuracies": 0.875, "rewards/chosen": -0.013999508693814278, "rewards/margins": 0.02057885192334652, "rewards/rejected": -0.0345783606171608, "step": 762 }, { "epoch": 0.5276625172890733, "grad_norm": 5.203707695007324, "learning_rate": 2.6383125864453667e-05, "log_odds_chosen": 1.0678726434707642, "log_odds_ratio": -0.6421834230422974, "logits/chosen": -0.621665358543396, "logits/rejected": -0.613940417766571, "logps/chosen": -0.16835099458694458, "logps/rejected": -0.440276563167572, "loss": 4.4273, "nll_loss": 1.042595386505127, "rewards/accuracies": 0.5, "rewards/chosen": -0.016835100948810577, "rewards/margins": 0.027192555367946625, "rewards/rejected": -0.0440276563167572, "step": 763 }, { "epoch": 0.5283540802213001, "grad_norm": 3.169253349304199, "learning_rate": 2.641770401106501e-05, "log_odds_chosen": 2.8914332389831543, "log_odds_ratio": -0.2715517282485962, "logits/chosen": -0.822980523109436, "logits/rejected": -0.853297233581543, "logps/chosen": -0.06129853054881096, "logps/rejected": -0.37394028902053833, "loss": 4.1005, "nll_loss": 0.9979735612869263, "rewards/accuracies": 0.875, "rewards/chosen": -0.006129853427410126, "rewards/margins": 0.031264178454875946, "rewards/rejected": -0.03739403188228607, "step": 764 }, { "epoch": 0.529045643153527, "grad_norm": 4.1213531494140625, "learning_rate": 2.645228215767635e-05, "log_odds_chosen": 1.256605625152588, "log_odds_ratio": -0.44500529766082764, "logits/chosen": -0.7482407093048096, "logits/rejected": -0.7464094161987305, "logps/chosen": -0.1757386028766632, "logps/rejected": -0.2952781319618225, "loss": 3.8401, "nll_loss": 0.9155247211456299, "rewards/accuracies": 0.75, "rewards/chosen": -0.01757385954260826, "rewards/margins": 0.01195395179092884, "rewards/rejected": -0.02952781319618225, "step": 765 }, { "epoch": 0.5297372060857538, "grad_norm": 4.943138122558594, "learning_rate": 2.6486860304287692e-05, "log_odds_chosen": 1.1825425624847412, "log_odds_ratio": -0.6412897109985352, "logits/chosen": -0.9547351598739624, "logits/rejected": -0.940011739730835, "logps/chosen": -0.16075746715068817, "logps/rejected": -0.27479812502861023, "loss": 5.8234, "nll_loss": 1.391717791557312, "rewards/accuracies": 0.75, "rewards/chosen": -0.016075747087597847, "rewards/margins": 0.011404067277908325, "rewards/rejected": -0.027479812502861023, "step": 766 }, { "epoch": 0.5304287690179806, "grad_norm": 3.294407367706299, "learning_rate": 2.6521438450899033e-05, "log_odds_chosen": 2.2841694355010986, "log_odds_ratio": -0.47490108013153076, "logits/chosen": -0.9181256294250488, "logits/rejected": -0.9499702453613281, "logps/chosen": -0.08856260776519775, "logps/rejected": -0.3723558783531189, "loss": 4.6973, "nll_loss": 1.1268422603607178, "rewards/accuracies": 0.75, "rewards/chosen": -0.008856261149048805, "rewards/margins": 0.028379324823617935, "rewards/rejected": -0.03723558783531189, "step": 767 }, { "epoch": 0.5311203319502075, "grad_norm": 3.231011390686035, "learning_rate": 2.6556016597510375e-05, "log_odds_chosen": 2.726728916168213, "log_odds_ratio": -0.4835449457168579, "logits/chosen": -0.8287713527679443, "logits/rejected": -0.869062066078186, "logps/chosen": -0.15231871604919434, "logps/rejected": -0.44504475593566895, "loss": 3.9759, "nll_loss": 0.9456151723861694, "rewards/accuracies": 0.625, "rewards/chosen": -0.015231871977448463, "rewards/margins": 0.02927260287106037, "rewards/rejected": -0.044504471123218536, "step": 768 }, { "epoch": 0.5318118948824343, "grad_norm": 3.9974963665008545, "learning_rate": 2.6590594744121717e-05, "log_odds_chosen": 2.2256228923797607, "log_odds_ratio": -0.3583628535270691, "logits/chosen": -0.6432703733444214, "logits/rejected": -0.6699737310409546, "logps/chosen": -0.131565660238266, "logps/rejected": -0.5114902853965759, "loss": 5.9335, "nll_loss": 1.4475429058074951, "rewards/accuracies": 0.75, "rewards/chosen": -0.013156566768884659, "rewards/margins": 0.03799246624112129, "rewards/rejected": -0.05114902928471565, "step": 769 }, { "epoch": 0.5325034578146611, "grad_norm": 2.977198839187622, "learning_rate": 2.6625172890733058e-05, "log_odds_chosen": 2.8446381092071533, "log_odds_ratio": -0.2956915497779846, "logits/chosen": -0.6301093101501465, "logits/rejected": -0.6782950162887573, "logps/chosen": -0.1516442894935608, "logps/rejected": -0.568672776222229, "loss": 4.0353, "nll_loss": 0.9792449474334717, "rewards/accuracies": 0.75, "rewards/chosen": -0.015164428390562534, "rewards/margins": 0.04170284792780876, "rewards/rejected": -0.05686727166175842, "step": 770 }, { "epoch": 0.533195020746888, "grad_norm": 4.515598773956299, "learning_rate": 2.66597510373444e-05, "log_odds_chosen": 1.9685964584350586, "log_odds_ratio": -0.4243180751800537, "logits/chosen": -0.6195254921913147, "logits/rejected": -0.6742951273918152, "logps/chosen": -0.14887145161628723, "logps/rejected": -0.7238699197769165, "loss": 4.7484, "nll_loss": 1.144672155380249, "rewards/accuracies": 0.75, "rewards/chosen": -0.014887145720422268, "rewards/margins": 0.057499852031469345, "rewards/rejected": -0.07238698750734329, "step": 771 }, { "epoch": 0.5338865836791148, "grad_norm": 4.412965297698975, "learning_rate": 2.669432918395574e-05, "log_odds_chosen": 1.7204562425613403, "log_odds_ratio": -0.39194899797439575, "logits/chosen": -0.5892022848129272, "logits/rejected": -0.6004505157470703, "logps/chosen": -0.1321462094783783, "logps/rejected": -0.3937058448791504, "loss": 4.8, "nll_loss": 1.1607953310012817, "rewards/accuracies": 0.875, "rewards/chosen": -0.013214620761573315, "rewards/margins": 0.02615596167743206, "rewards/rejected": -0.0393705815076828, "step": 772 }, { "epoch": 0.5345781466113416, "grad_norm": 4.206930637359619, "learning_rate": 2.6728907330567083e-05, "log_odds_chosen": 1.9610211849212646, "log_odds_ratio": -0.22896619141101837, "logits/chosen": -0.5846484899520874, "logits/rejected": -0.6280151605606079, "logps/chosen": -0.09105009585618973, "logps/rejected": -0.40164899826049805, "loss": 4.0853, "nll_loss": 0.9984228610992432, "rewards/accuracies": 0.875, "rewards/chosen": -0.009105009958148003, "rewards/margins": 0.03105989098548889, "rewards/rejected": -0.04016490280628204, "step": 773 }, { "epoch": 0.5352697095435685, "grad_norm": 4.103181838989258, "learning_rate": 2.6763485477178424e-05, "log_odds_chosen": 1.097496747970581, "log_odds_ratio": -0.5405707955360413, "logits/chosen": -0.37573862075805664, "logits/rejected": -0.43676427006721497, "logps/chosen": -0.1625049114227295, "logps/rejected": -0.3144679665565491, "loss": 4.3009, "nll_loss": 1.0211718082427979, "rewards/accuracies": 0.625, "rewards/chosen": -0.0162504930049181, "rewards/margins": 0.015196304768323898, "rewards/rejected": -0.031446799635887146, "step": 774 }, { "epoch": 0.5359612724757953, "grad_norm": 4.65155029296875, "learning_rate": 2.6798063623789766e-05, "log_odds_chosen": 1.3845301866531372, "log_odds_ratio": -0.39862924814224243, "logits/chosen": -0.3924928307533264, "logits/rejected": -0.40106141567230225, "logps/chosen": -0.16831707954406738, "logps/rejected": -0.39287295937538147, "loss": 4.8806, "nll_loss": 1.180276870727539, "rewards/accuracies": 0.875, "rewards/chosen": -0.016831709071993828, "rewards/margins": 0.02245558425784111, "rewards/rejected": -0.039287298917770386, "step": 775 }, { "epoch": 0.5366528354080221, "grad_norm": 6.8850932121276855, "learning_rate": 2.6832641770401107e-05, "log_odds_chosen": 2.191964864730835, "log_odds_ratio": -0.687903106212616, "logits/chosen": -0.3750625252723694, "logits/rejected": -0.4132746160030365, "logps/chosen": -0.14285290241241455, "logps/rejected": -0.40058577060699463, "loss": 4.5572, "nll_loss": 1.0705032348632812, "rewards/accuracies": 0.625, "rewards/chosen": -0.01428529154509306, "rewards/margins": 0.025773286819458008, "rewards/rejected": -0.040058575570583344, "step": 776 }, { "epoch": 0.5373443983402489, "grad_norm": 2.687833786010742, "learning_rate": 2.686721991701245e-05, "log_odds_chosen": 3.121793270111084, "log_odds_ratio": -0.29946577548980713, "logits/chosen": -0.5395219922065735, "logits/rejected": -0.5736823678016663, "logps/chosen": -0.07659432291984558, "logps/rejected": -0.3418726921081543, "loss": 3.6865, "nll_loss": 0.8916776776313782, "rewards/accuracies": 1.0, "rewards/chosen": -0.007659432012587786, "rewards/margins": 0.02652783691883087, "rewards/rejected": -0.03418726846575737, "step": 777 }, { "epoch": 0.5380359612724758, "grad_norm": 4.768846035003662, "learning_rate": 2.690179806362379e-05, "log_odds_chosen": 1.7344532012939453, "log_odds_ratio": -0.6882243156433105, "logits/chosen": -0.7784990668296814, "logits/rejected": -0.8007603883743286, "logps/chosen": -0.14528436958789825, "logps/rejected": -0.3341407775878906, "loss": 5.0192, "nll_loss": 1.1859819889068604, "rewards/accuracies": 0.625, "rewards/chosen": -0.01452843751758337, "rewards/margins": 0.018885640427470207, "rewards/rejected": -0.033414077013731, "step": 778 }, { "epoch": 0.5387275242047026, "grad_norm": 3.818828582763672, "learning_rate": 2.6936376210235132e-05, "log_odds_chosen": 2.687124252319336, "log_odds_ratio": -0.5535922646522522, "logits/chosen": -0.6772273182868958, "logits/rejected": -0.7121256589889526, "logps/chosen": -0.14388306438922882, "logps/rejected": -0.5445559620857239, "loss": 3.6195, "nll_loss": 0.8495252132415771, "rewards/accuracies": 0.625, "rewards/chosen": -0.014388306066393852, "rewards/margins": 0.040067292749881744, "rewards/rejected": -0.054455600678920746, "step": 779 }, { "epoch": 0.5394190871369294, "grad_norm": 4.158625602722168, "learning_rate": 2.6970954356846474e-05, "log_odds_chosen": 2.361208915710449, "log_odds_ratio": -0.29354727268218994, "logits/chosen": -0.3465491235256195, "logits/rejected": -0.34471794962882996, "logps/chosen": -0.1022605150938034, "logps/rejected": -0.38724422454833984, "loss": 4.1023, "nll_loss": 0.9962158203125, "rewards/accuracies": 1.0, "rewards/chosen": -0.010226051323115826, "rewards/margins": 0.028498370200395584, "rewards/rejected": -0.038724422454833984, "step": 780 }, { "epoch": 0.5401106500691563, "grad_norm": 4.943760871887207, "learning_rate": 2.7005532503457815e-05, "log_odds_chosen": 1.5981311798095703, "log_odds_ratio": -0.4409557580947876, "logits/chosen": -0.7355484366416931, "logits/rejected": -0.763656735420227, "logps/chosen": -0.13593299686908722, "logps/rejected": -0.4234578311443329, "loss": 4.972, "nll_loss": 1.1989084482192993, "rewards/accuracies": 0.625, "rewards/chosen": -0.013593301177024841, "rewards/margins": 0.028752481564879417, "rewards/rejected": -0.04234578460454941, "step": 781 }, { "epoch": 0.5408022130013831, "grad_norm": 6.2170090675354, "learning_rate": 2.7040110650069157e-05, "log_odds_chosen": 0.6322706937789917, "log_odds_ratio": -0.8293993473052979, "logits/chosen": -0.7701730132102966, "logits/rejected": -0.7138707637786865, "logps/chosen": -0.26484400033950806, "logps/rejected": -0.4796360731124878, "loss": 5.606, "nll_loss": 1.3185690641403198, "rewards/accuracies": 0.5, "rewards/chosen": -0.026484401896595955, "rewards/margins": 0.021479208022356033, "rewards/rejected": -0.04796360805630684, "step": 782 }, { "epoch": 0.5414937759336099, "grad_norm": 4.743947505950928, "learning_rate": 2.70746887966805e-05, "log_odds_chosen": 0.5518181324005127, "log_odds_ratio": -0.5969145894050598, "logits/chosen": -0.9111509919166565, "logits/rejected": -0.8899242281913757, "logps/chosen": -0.18800178170204163, "logps/rejected": -0.3061884641647339, "loss": 5.4442, "nll_loss": 1.3013533353805542, "rewards/accuracies": 0.5, "rewards/chosen": -0.01880018040537834, "rewards/margins": 0.011818666011095047, "rewards/rejected": -0.03061884641647339, "step": 783 }, { "epoch": 0.5421853388658368, "grad_norm": 3.736454725265503, "learning_rate": 2.710926694329184e-05, "log_odds_chosen": 2.629013776779175, "log_odds_ratio": -0.38394689559936523, "logits/chosen": -0.702704906463623, "logits/rejected": -0.7145882248878479, "logps/chosen": -0.08854182809591293, "logps/rejected": -0.3831250071525574, "loss": 3.3801, "nll_loss": 0.8066269159317017, "rewards/accuracies": 0.625, "rewards/chosen": -0.008854183368384838, "rewards/margins": 0.029458321630954742, "rewards/rejected": -0.038312505930662155, "step": 784 }, { "epoch": 0.5428769017980636, "grad_norm": 3.409085512161255, "learning_rate": 2.714384508990318e-05, "log_odds_chosen": 2.7632088661193848, "log_odds_ratio": -0.2678602933883667, "logits/chosen": -0.4413297176361084, "logits/rejected": -0.48410317301750183, "logps/chosen": -0.0839589387178421, "logps/rejected": -0.5882033705711365, "loss": 3.9799, "nll_loss": 0.9681931734085083, "rewards/accuracies": 1.0, "rewards/chosen": -0.008395894430577755, "rewards/margins": 0.050424449145793915, "rewards/rejected": -0.058820344507694244, "step": 785 }, { "epoch": 0.5435684647302904, "grad_norm": 3.7073123455047607, "learning_rate": 2.7178423236514523e-05, "log_odds_chosen": 2.0540857315063477, "log_odds_ratio": -0.2842232584953308, "logits/chosen": -0.8638523817062378, "logits/rejected": -0.8727890253067017, "logps/chosen": -0.06774594634771347, "logps/rejected": -0.3289458751678467, "loss": 3.3233, "nll_loss": 0.8023905158042908, "rewards/accuracies": 0.875, "rewards/chosen": -0.00677459454163909, "rewards/margins": 0.02611999399960041, "rewards/rejected": -0.03289458900690079, "step": 786 }, { "epoch": 0.5442600276625172, "grad_norm": 4.251959323883057, "learning_rate": 2.7213001383125865e-05, "log_odds_chosen": 1.566872000694275, "log_odds_ratio": -0.5363442897796631, "logits/chosen": -0.6940356492996216, "logits/rejected": -0.6940560340881348, "logps/chosen": -0.14424169063568115, "logps/rejected": -0.5402974486351013, "loss": 4.3171, "nll_loss": 1.0256503820419312, "rewards/accuracies": 0.75, "rewards/chosen": -0.014424169436097145, "rewards/margins": 0.03960557281970978, "rewards/rejected": -0.05402974411845207, "step": 787 }, { "epoch": 0.5449515905947441, "grad_norm": 5.320130348205566, "learning_rate": 2.7247579529737206e-05, "log_odds_chosen": 1.577492356300354, "log_odds_ratio": -0.40941229462623596, "logits/chosen": -0.7909371256828308, "logits/rejected": -0.7870829105377197, "logps/chosen": -0.15442880988121033, "logps/rejected": -0.3312009274959564, "loss": 4.9657, "nll_loss": 1.2004934549331665, "rewards/accuracies": 0.625, "rewards/chosen": -0.015442880801856518, "rewards/margins": 0.01767721213400364, "rewards/rejected": -0.03312009200453758, "step": 788 }, { "epoch": 0.5456431535269709, "grad_norm": 2.6318728923797607, "learning_rate": 2.7282157676348548e-05, "log_odds_chosen": 2.872804880142212, "log_odds_ratio": -0.3544527590274811, "logits/chosen": -0.46935009956359863, "logits/rejected": -0.47872787714004517, "logps/chosen": -0.0999964103102684, "logps/rejected": -0.28759926557540894, "loss": 3.5248, "nll_loss": 0.8457651138305664, "rewards/accuracies": 0.75, "rewards/chosen": -0.009999641217291355, "rewards/margins": 0.018760286271572113, "rewards/rejected": -0.028759926557540894, "step": 789 }, { "epoch": 0.5463347164591977, "grad_norm": 4.349747180938721, "learning_rate": 2.731673582295989e-05, "log_odds_chosen": 2.838932991027832, "log_odds_ratio": -0.14528866112232208, "logits/chosen": -0.500427782535553, "logits/rejected": -0.5386743545532227, "logps/chosen": -0.059282850474119186, "logps/rejected": -0.46896499395370483, "loss": 5.0646, "nll_loss": 1.2516143321990967, "rewards/accuracies": 1.0, "rewards/chosen": -0.0059282854199409485, "rewards/margins": 0.040968216955661774, "rewards/rejected": -0.04689650237560272, "step": 790 }, { "epoch": 0.5470262793914247, "grad_norm": 6.711754322052002, "learning_rate": 2.7351313969571234e-05, "log_odds_chosen": 0.7262469530105591, "log_odds_ratio": -0.6351161599159241, "logits/chosen": -0.8433955907821655, "logits/rejected": -0.8634489178657532, "logps/chosen": -0.1538301706314087, "logps/rejected": -0.20225130021572113, "loss": 6.0995, "nll_loss": 1.4613513946533203, "rewards/accuracies": 0.625, "rewards/chosen": -0.015383017249405384, "rewards/margins": 0.004842113703489304, "rewards/rejected": -0.020225130021572113, "step": 791 }, { "epoch": 0.5477178423236515, "grad_norm": 7.227035045623779, "learning_rate": 2.7385892116182576e-05, "log_odds_chosen": 1.4565250873565674, "log_odds_ratio": -0.5795712471008301, "logits/chosen": -0.5510681867599487, "logits/rejected": -0.5623601675033569, "logps/chosen": -0.19853737950325012, "logps/rejected": -0.4113742411136627, "loss": 3.8831, "nll_loss": 0.9128076434135437, "rewards/accuracies": 0.625, "rewards/chosen": -0.019853739067912102, "rewards/margins": 0.02128368616104126, "rewards/rejected": -0.04113742709159851, "step": 792 }, { "epoch": 0.5484094052558783, "grad_norm": 4.128443717956543, "learning_rate": 2.7420470262793917e-05, "log_odds_chosen": 4.165518283843994, "log_odds_ratio": -0.11458099633455276, "logits/chosen": -0.634047269821167, "logits/rejected": -0.6352896690368652, "logps/chosen": -0.06181440129876137, "logps/rejected": -0.6406897306442261, "loss": 4.2237, "nll_loss": 1.0444719791412354, "rewards/accuracies": 1.0, "rewards/chosen": -0.006181440781801939, "rewards/margins": 0.05788753554224968, "rewards/rejected": -0.06406897306442261, "step": 793 }, { "epoch": 0.5491009681881052, "grad_norm": 3.842010259628296, "learning_rate": 2.745504840940526e-05, "log_odds_chosen": 2.4175264835357666, "log_odds_ratio": -0.5656532049179077, "logits/chosen": -0.4226923882961273, "logits/rejected": -0.3965950012207031, "logps/chosen": -0.1884640008211136, "logps/rejected": -0.30576592683792114, "loss": 4.1704, "nll_loss": 0.9860259890556335, "rewards/accuracies": 0.5, "rewards/chosen": -0.01884640008211136, "rewards/margins": 0.011730191297829151, "rewards/rejected": -0.030576592311263084, "step": 794 }, { "epoch": 0.549792531120332, "grad_norm": 4.974619388580322, "learning_rate": 2.74896265560166e-05, "log_odds_chosen": 1.3438681364059448, "log_odds_ratio": -0.6321280002593994, "logits/chosen": -0.566962718963623, "logits/rejected": -0.5704185366630554, "logps/chosen": -0.1416710764169693, "logps/rejected": -0.5122509598731995, "loss": 4.0576, "nll_loss": 0.951187789440155, "rewards/accuracies": 0.625, "rewards/chosen": -0.01416710764169693, "rewards/margins": 0.037057988345623016, "rewards/rejected": -0.05122509226202965, "step": 795 }, { "epoch": 0.5504840940525588, "grad_norm": 3.7341768741607666, "learning_rate": 2.7524204702627942e-05, "log_odds_chosen": 2.616122007369995, "log_odds_ratio": -0.29485952854156494, "logits/chosen": -0.5127213001251221, "logits/rejected": -0.49903106689453125, "logps/chosen": -0.07982846349477768, "logps/rejected": -0.44080740213394165, "loss": 5.2754, "nll_loss": 1.2893630266189575, "rewards/accuracies": 0.875, "rewards/chosen": -0.007982847280800343, "rewards/margins": 0.03609789162874222, "rewards/rejected": -0.044080741703510284, "step": 796 }, { "epoch": 0.5511756569847857, "grad_norm": 4.109652042388916, "learning_rate": 2.7558782849239284e-05, "log_odds_chosen": 3.560600757598877, "log_odds_ratio": -0.29116514325141907, "logits/chosen": -0.7510650157928467, "logits/rejected": -0.779525637626648, "logps/chosen": -0.10667699575424194, "logps/rejected": -0.9304628968238831, "loss": 4.4969, "nll_loss": 1.0951179265975952, "rewards/accuracies": 0.75, "rewards/chosen": -0.01066769938915968, "rewards/margins": 0.08237859606742859, "rewards/rejected": -0.09304629266262054, "step": 797 }, { "epoch": 0.5518672199170125, "grad_norm": 4.035399436950684, "learning_rate": 2.7593360995850625e-05, "log_odds_chosen": 1.198608160018921, "log_odds_ratio": -0.3582497835159302, "logits/chosen": -0.7811927795410156, "logits/rejected": -0.786125123500824, "logps/chosen": -0.10331468284130096, "logps/rejected": -0.3704620599746704, "loss": 3.7105, "nll_loss": 0.8917912840843201, "rewards/accuracies": 1.0, "rewards/chosen": -0.010331467725336552, "rewards/margins": 0.026714740321040154, "rewards/rejected": -0.03704620897769928, "step": 798 }, { "epoch": 0.5525587828492393, "grad_norm": 3.7303075790405273, "learning_rate": 2.7627939142461967e-05, "log_odds_chosen": 3.3303298950195312, "log_odds_ratio": -0.1779707968235016, "logits/chosen": -0.4753631055355072, "logits/rejected": -0.5264405012130737, "logps/chosen": -0.06457747519016266, "logps/rejected": -0.7297971248626709, "loss": 3.5538, "nll_loss": 0.8706504702568054, "rewards/accuracies": 0.875, "rewards/chosen": -0.006457747891545296, "rewards/margins": 0.06652196496725082, "rewards/rejected": -0.07297971099615097, "step": 799 }, { "epoch": 0.5532503457814661, "grad_norm": 4.2899322509765625, "learning_rate": 2.766251728907331e-05, "log_odds_chosen": 3.094510078430176, "log_odds_ratio": -0.293547660112381, "logits/chosen": -0.9138388633728027, "logits/rejected": -0.9197445511817932, "logps/chosen": -0.0937461331486702, "logps/rejected": -0.564795970916748, "loss": 5.1177, "nll_loss": 1.250058650970459, "rewards/accuracies": 0.875, "rewards/chosen": -0.009374613873660564, "rewards/margins": 0.047104984521865845, "rewards/rejected": -0.05647960305213928, "step": 800 }, { "epoch": 0.553941908713693, "grad_norm": 5.070873737335205, "learning_rate": 2.769709543568465e-05, "log_odds_chosen": 4.334895610809326, "log_odds_ratio": -0.5068272948265076, "logits/chosen": -0.6096988320350647, "logits/rejected": -0.6185740232467651, "logps/chosen": -0.1481603980064392, "logps/rejected": -0.9830435514450073, "loss": 4.8537, "nll_loss": 1.1627322435379028, "rewards/accuracies": 0.875, "rewards/chosen": -0.014816039241850376, "rewards/margins": 0.083488330245018, "rewards/rejected": -0.09830436110496521, "step": 801 }, { "epoch": 0.5546334716459198, "grad_norm": 4.983336925506592, "learning_rate": 2.773167358229599e-05, "log_odds_chosen": 1.7390892505645752, "log_odds_ratio": -0.3052809536457062, "logits/chosen": -0.759835422039032, "logits/rejected": -0.7830816507339478, "logps/chosen": -0.14674659073352814, "logps/rejected": -0.4617050290107727, "loss": 4.7161, "nll_loss": 1.148496150970459, "rewards/accuracies": 0.875, "rewards/chosen": -0.014674659818410873, "rewards/margins": 0.031495846807956696, "rewards/rejected": -0.04617050662636757, "step": 802 }, { "epoch": 0.5553250345781466, "grad_norm": 4.5399370193481445, "learning_rate": 2.7766251728907333e-05, "log_odds_chosen": 4.40491247177124, "log_odds_ratio": -0.43303653597831726, "logits/chosen": -0.6713570952415466, "logits/rejected": -0.7115859985351562, "logps/chosen": -0.152422696352005, "logps/rejected": -0.7829738259315491, "loss": 3.7393, "nll_loss": 0.8915234804153442, "rewards/accuracies": 0.875, "rewards/chosen": -0.01524226926267147, "rewards/margins": 0.0630551129579544, "rewards/rejected": -0.07829738408327103, "step": 803 }, { "epoch": 0.5560165975103735, "grad_norm": 3.6568808555603027, "learning_rate": 2.7800829875518675e-05, "log_odds_chosen": 2.0957438945770264, "log_odds_ratio": -0.35637903213500977, "logits/chosen": -0.6042817831039429, "logits/rejected": -0.5805646777153015, "logps/chosen": -0.1572953462600708, "logps/rejected": -0.43727999925613403, "loss": 4.2036, "nll_loss": 1.0152617692947388, "rewards/accuracies": 0.875, "rewards/chosen": -0.01572953723371029, "rewards/margins": 0.027998462319374084, "rewards/rejected": -0.043727997690439224, "step": 804 }, { "epoch": 0.5567081604426003, "grad_norm": 5.165635108947754, "learning_rate": 2.7835408022130016e-05, "log_odds_chosen": 1.8557673692703247, "log_odds_ratio": -0.7694844007492065, "logits/chosen": -0.5034834742546082, "logits/rejected": -0.5410099029541016, "logps/chosen": -0.22562721371650696, "logps/rejected": -0.5017483830451965, "loss": 3.6102, "nll_loss": 0.8255925178527832, "rewards/accuracies": 0.5, "rewards/chosen": -0.022562721744179726, "rewards/margins": 0.027612116187810898, "rewards/rejected": -0.050174832344055176, "step": 805 }, { "epoch": 0.5573997233748271, "grad_norm": 8.340802192687988, "learning_rate": 2.7869986168741358e-05, "log_odds_chosen": 1.3203935623168945, "log_odds_ratio": -1.065887212753296, "logits/chosen": -0.4293314814567566, "logits/rejected": -0.45345383882522583, "logps/chosen": -0.22275424003601074, "logps/rejected": -0.3493325114250183, "loss": 4.5494, "nll_loss": 1.0307643413543701, "rewards/accuracies": 0.625, "rewards/chosen": -0.022275425493717194, "rewards/margins": 0.012657827697694302, "rewards/rejected": -0.03493325412273407, "step": 806 }, { "epoch": 0.558091286307054, "grad_norm": 3.9928297996520996, "learning_rate": 2.79045643153527e-05, "log_odds_chosen": 2.364372491836548, "log_odds_ratio": -0.4788789749145508, "logits/chosen": -0.8757091164588928, "logits/rejected": -0.8417526483535767, "logps/chosen": -0.17361877858638763, "logps/rejected": -0.674676239490509, "loss": 4.6235, "nll_loss": 1.1079771518707275, "rewards/accuracies": 0.625, "rewards/chosen": -0.017361879348754883, "rewards/margins": 0.0501057505607605, "rewards/rejected": -0.06746762990951538, "step": 807 }, { "epoch": 0.5587828492392808, "grad_norm": 4.5377044677734375, "learning_rate": 2.793914246196404e-05, "log_odds_chosen": 2.1746432781219482, "log_odds_ratio": -0.482133686542511, "logits/chosen": -0.872567355632782, "logits/rejected": -0.8851162195205688, "logps/chosen": -0.13028322160243988, "logps/rejected": -0.3184550404548645, "loss": 4.8049, "nll_loss": 1.153007984161377, "rewards/accuracies": 0.625, "rewards/chosen": -0.013028322719037533, "rewards/margins": 0.018817182630300522, "rewards/rejected": -0.03184550628066063, "step": 808 }, { "epoch": 0.5594744121715076, "grad_norm": 4.3767242431640625, "learning_rate": 2.7973720608575382e-05, "log_odds_chosen": 2.2641472816467285, "log_odds_ratio": -0.2708294689655304, "logits/chosen": -0.7464234232902527, "logits/rejected": -0.7855645418167114, "logps/chosen": -0.09002307802438736, "logps/rejected": -0.38848984241485596, "loss": 4.545, "nll_loss": 1.1091712713241577, "rewards/accuracies": 1.0, "rewards/chosen": -0.00900230836123228, "rewards/margins": 0.02984667383134365, "rewards/rejected": -0.03884898126125336, "step": 809 }, { "epoch": 0.5601659751037344, "grad_norm": 3.8829071521759033, "learning_rate": 2.8008298755186724e-05, "log_odds_chosen": 3.297788143157959, "log_odds_ratio": -0.1501597911119461, "logits/chosen": -0.41120457649230957, "logits/rejected": -0.4324203133583069, "logps/chosen": -0.07470942288637161, "logps/rejected": -0.6631616353988647, "loss": 3.5935, "nll_loss": 0.8833543062210083, "rewards/accuracies": 1.0, "rewards/chosen": -0.007470941636711359, "rewards/margins": 0.058845218271017075, "rewards/rejected": -0.066316157579422, "step": 810 }, { "epoch": 0.5608575380359613, "grad_norm": 5.252667427062988, "learning_rate": 2.8042876901798066e-05, "log_odds_chosen": 3.6831297874450684, "log_odds_ratio": -0.18154287338256836, "logits/chosen": -0.6489682197570801, "logits/rejected": -0.6909961104393005, "logps/chosen": -0.07913654297590256, "logps/rejected": -0.6723751425743103, "loss": 4.9607, "nll_loss": 1.222014307975769, "rewards/accuracies": 0.875, "rewards/chosen": -0.007913654670119286, "rewards/margins": 0.059323858469724655, "rewards/rejected": -0.06723751127719879, "step": 811 }, { "epoch": 0.5615491009681881, "grad_norm": 3.088599681854248, "learning_rate": 2.8077455048409407e-05, "log_odds_chosen": 4.496147632598877, "log_odds_ratio": -0.31289637088775635, "logits/chosen": -0.7372713685035706, "logits/rejected": -0.821730375289917, "logps/chosen": -0.07059605419635773, "logps/rejected": -0.8317077159881592, "loss": 3.4873, "nll_loss": 0.8405301570892334, "rewards/accuracies": 0.875, "rewards/chosen": -0.007059605326503515, "rewards/margins": 0.07611117511987686, "rewards/rejected": -0.08317077159881592, "step": 812 }, { "epoch": 0.5622406639004149, "grad_norm": 4.271559715270996, "learning_rate": 2.811203319502075e-05, "log_odds_chosen": 3.0948903560638428, "log_odds_ratio": -0.3525601029396057, "logits/chosen": -0.4547148644924164, "logits/rejected": -0.4978640675544739, "logps/chosen": -0.12485533952713013, "logps/rejected": -0.4798852205276489, "loss": 3.5777, "nll_loss": 0.8591761589050293, "rewards/accuracies": 0.75, "rewards/chosen": -0.012485533021390438, "rewards/margins": 0.03550298511981964, "rewards/rejected": -0.04798852279782295, "step": 813 }, { "epoch": 0.5629322268326418, "grad_norm": 3.4795942306518555, "learning_rate": 2.814661134163209e-05, "log_odds_chosen": 5.109646797180176, "log_odds_ratio": -0.19024419784545898, "logits/chosen": -0.553877055644989, "logits/rejected": -0.5855638980865479, "logps/chosen": -0.058753401041030884, "logps/rejected": -0.6228174567222595, "loss": 3.5912, "nll_loss": 0.8787802457809448, "rewards/accuracies": 0.875, "rewards/chosen": -0.005875340197235346, "rewards/margins": 0.0564064085483551, "rewards/rejected": -0.06228174269199371, "step": 814 }, { "epoch": 0.5636237897648686, "grad_norm": 3.0252182483673096, "learning_rate": 2.8181189488243432e-05, "log_odds_chosen": 3.8306195735931396, "log_odds_ratio": -0.2786122262477875, "logits/chosen": -0.5956602096557617, "logits/rejected": -0.6217089891433716, "logps/chosen": -0.07585865259170532, "logps/rejected": -0.5628564953804016, "loss": 2.9095, "nll_loss": 0.6995032429695129, "rewards/accuracies": 0.875, "rewards/chosen": -0.0075858645141124725, "rewards/margins": 0.04869978502392769, "rewards/rejected": -0.05628565326333046, "step": 815 }, { "epoch": 0.5643153526970954, "grad_norm": 3.9304304122924805, "learning_rate": 2.8215767634854773e-05, "log_odds_chosen": 4.477867603302002, "log_odds_ratio": -0.25606584548950195, "logits/chosen": -0.47317975759506226, "logits/rejected": -0.5208321809768677, "logps/chosen": -0.07767429947853088, "logps/rejected": -0.5357122421264648, "loss": 4.6473, "nll_loss": 1.1362117528915405, "rewards/accuracies": 0.75, "rewards/chosen": -0.0077674295753240585, "rewards/margins": 0.045803800225257874, "rewards/rejected": -0.05357122793793678, "step": 816 }, { "epoch": 0.5650069156293223, "grad_norm": 3.6263046264648438, "learning_rate": 2.8250345781466115e-05, "log_odds_chosen": 4.530634880065918, "log_odds_ratio": -0.16015692055225372, "logits/chosen": -0.4547904431819916, "logits/rejected": -0.44847387075424194, "logps/chosen": -0.06347014009952545, "logps/rejected": -0.7121883630752563, "loss": 3.1737, "nll_loss": 0.7773990631103516, "rewards/accuracies": 1.0, "rewards/chosen": -0.00634701456874609, "rewards/margins": 0.06487182527780533, "rewards/rejected": -0.0712188333272934, "step": 817 }, { "epoch": 0.5656984785615491, "grad_norm": 6.153628826141357, "learning_rate": 2.8284923928077457e-05, "log_odds_chosen": 3.2044761180877686, "log_odds_ratio": -0.5079742670059204, "logits/chosen": -0.5137223601341248, "logits/rejected": -0.5485202074050903, "logps/chosen": -0.15794017910957336, "logps/rejected": -0.6775748133659363, "loss": 4.6777, "nll_loss": 1.118630051612854, "rewards/accuracies": 0.625, "rewards/chosen": -0.015794018283486366, "rewards/margins": 0.05196346342563629, "rewards/rejected": -0.0677574872970581, "step": 818 }, { "epoch": 0.5663900414937759, "grad_norm": 4.21480655670166, "learning_rate": 2.8319502074688798e-05, "log_odds_chosen": 2.6262288093566895, "log_odds_ratio": -0.303774356842041, "logits/chosen": -0.7704868912696838, "logits/rejected": -0.8650259375572205, "logps/chosen": -0.12395796179771423, "logps/rejected": -0.4790630042552948, "loss": 3.8007, "nll_loss": 0.9198006391525269, "rewards/accuracies": 0.875, "rewards/chosen": -0.012395797297358513, "rewards/margins": 0.03551050275564194, "rewards/rejected": -0.0479063019156456, "step": 819 }, { "epoch": 0.5670816044260027, "grad_norm": 3.816220283508301, "learning_rate": 2.835408022130014e-05, "log_odds_chosen": 2.4669811725616455, "log_odds_ratio": -0.3575797379016876, "logits/chosen": -0.5892040133476257, "logits/rejected": -0.6085165739059448, "logps/chosen": -0.13426099717617035, "logps/rejected": -0.5866325497627258, "loss": 3.0939, "nll_loss": 0.7377179265022278, "rewards/accuracies": 0.875, "rewards/chosen": -0.01342609990388155, "rewards/margins": 0.04523715749382973, "rewards/rejected": -0.0586632564663887, "step": 820 }, { "epoch": 0.5677731673582296, "grad_norm": 7.7992730140686035, "learning_rate": 2.838865836791148e-05, "log_odds_chosen": 4.493231773376465, "log_odds_ratio": -0.5356466770172119, "logits/chosen": -0.5167162418365479, "logits/rejected": -0.5656682252883911, "logps/chosen": -0.06463811546564102, "logps/rejected": -0.8757993578910828, "loss": 4.4222, "nll_loss": 1.051975965499878, "rewards/accuracies": 0.875, "rewards/chosen": -0.006463811732828617, "rewards/margins": 0.08111612498760223, "rewards/rejected": -0.08757993578910828, "step": 821 }, { "epoch": 0.5684647302904564, "grad_norm": 5.891249179840088, "learning_rate": 2.8423236514522823e-05, "log_odds_chosen": 2.585822343826294, "log_odds_ratio": -0.6505037546157837, "logits/chosen": -0.7809338569641113, "logits/rejected": -0.7595021724700928, "logps/chosen": -0.1175026148557663, "logps/rejected": -0.3504663407802582, "loss": 5.8074, "nll_loss": 1.3868025541305542, "rewards/accuracies": 0.625, "rewards/chosen": -0.011750261299312115, "rewards/margins": 0.023296372964978218, "rewards/rejected": -0.03504663333296776, "step": 822 }, { "epoch": 0.5691562932226832, "grad_norm": 7.554938316345215, "learning_rate": 2.8457814661134164e-05, "log_odds_chosen": 1.2075903415679932, "log_odds_ratio": -0.7292794585227966, "logits/chosen": -0.5420154333114624, "logits/rejected": -0.5659103393554688, "logps/chosen": -0.1520342379808426, "logps/rejected": -0.40784332156181335, "loss": 3.883, "nll_loss": 0.8978164792060852, "rewards/accuracies": 0.625, "rewards/chosen": -0.015203425660729408, "rewards/margins": 0.025580905377864838, "rewards/rejected": -0.0407843291759491, "step": 823 }, { "epoch": 0.5698478561549101, "grad_norm": 3.3298332691192627, "learning_rate": 2.8492392807745506e-05, "log_odds_chosen": 3.9135968685150146, "log_odds_ratio": -0.3564820885658264, "logits/chosen": -0.5905018448829651, "logits/rejected": -0.6010125875473022, "logps/chosen": -0.06891832500696182, "logps/rejected": -0.408086359500885, "loss": 4.5232, "nll_loss": 1.0951402187347412, "rewards/accuracies": 0.75, "rewards/chosen": -0.006891832686960697, "rewards/margins": 0.03391680121421814, "rewards/rejected": -0.04080863296985626, "step": 824 }, { "epoch": 0.5705394190871369, "grad_norm": 5.071108341217041, "learning_rate": 2.8526970954356847e-05, "log_odds_chosen": 3.3061747550964355, "log_odds_ratio": -0.4244978129863739, "logits/chosen": -0.6132084131240845, "logits/rejected": -0.5790513753890991, "logps/chosen": -0.17267945408821106, "logps/rejected": -0.5895110368728638, "loss": 4.9681, "nll_loss": 1.1995717287063599, "rewards/accuracies": 0.75, "rewards/chosen": -0.017267946153879166, "rewards/margins": 0.04168315604329109, "rewards/rejected": -0.058951105922460556, "step": 825 }, { "epoch": 0.5712309820193637, "grad_norm": 8.846132278442383, "learning_rate": 2.856154910096819e-05, "log_odds_chosen": 2.3505032062530518, "log_odds_ratio": -0.9325801730155945, "logits/chosen": -0.6876745820045471, "logits/rejected": -0.7471765279769897, "logps/chosen": -0.11827315390110016, "logps/rejected": -0.461060494184494, "loss": 5.6731, "nll_loss": 1.3250163793563843, "rewards/accuracies": 0.75, "rewards/chosen": -0.011827315203845501, "rewards/margins": 0.034278735518455505, "rewards/rejected": -0.04610605165362358, "step": 826 }, { "epoch": 0.5719225449515906, "grad_norm": 4.555541515350342, "learning_rate": 2.859612724757953e-05, "log_odds_chosen": 0.9776923060417175, "log_odds_ratio": -0.4993519186973572, "logits/chosen": -0.72479647397995, "logits/rejected": -0.7874212265014648, "logps/chosen": -0.15950685739517212, "logps/rejected": -0.23940470814704895, "loss": 5.5452, "nll_loss": 1.336353063583374, "rewards/accuracies": 0.625, "rewards/chosen": -0.015950685366988182, "rewards/margins": 0.007989783771336079, "rewards/rejected": -0.023940470069646835, "step": 827 }, { "epoch": 0.5726141078838174, "grad_norm": 3.23429536819458, "learning_rate": 2.8630705394190872e-05, "log_odds_chosen": 4.071626663208008, "log_odds_ratio": -0.24565596878528595, "logits/chosen": -0.5955303907394409, "logits/rejected": -0.624784529209137, "logps/chosen": -0.05326705053448677, "logps/rejected": -0.3535764217376709, "loss": 3.4374, "nll_loss": 0.8347886800765991, "rewards/accuracies": 0.875, "rewards/chosen": -0.005326705053448677, "rewards/margins": 0.030030936002731323, "rewards/rejected": -0.03535763919353485, "step": 828 }, { "epoch": 0.5733056708160442, "grad_norm": 2.840412139892578, "learning_rate": 2.8665283540802214e-05, "log_odds_chosen": 4.180113315582275, "log_odds_ratio": -0.19920429587364197, "logits/chosen": -0.9072915315628052, "logits/rejected": -0.9142999649047852, "logps/chosen": -0.04711640253663063, "logps/rejected": -0.48545461893081665, "loss": 3.5186, "nll_loss": 0.8597191572189331, "rewards/accuracies": 0.875, "rewards/chosen": -0.0047116405330598354, "rewards/margins": 0.04383382201194763, "rewards/rejected": -0.048545461148023605, "step": 829 }, { "epoch": 0.573997233748271, "grad_norm": 3.699284553527832, "learning_rate": 2.8699861687413555e-05, "log_odds_chosen": 2.9122934341430664, "log_odds_ratio": -0.379080593585968, "logits/chosen": -0.5995203256607056, "logits/rejected": -0.6436635255813599, "logps/chosen": -0.0784071683883667, "logps/rejected": -0.46496570110321045, "loss": 3.2445, "nll_loss": 0.7732207179069519, "rewards/accuracies": 0.75, "rewards/chosen": -0.0078407172113657, "rewards/margins": 0.038655854761600494, "rewards/rejected": -0.046496570110321045, "step": 830 }, { "epoch": 0.5746887966804979, "grad_norm": 3.74281644821167, "learning_rate": 2.8734439834024897e-05, "log_odds_chosen": 2.593203544616699, "log_odds_ratio": -0.5199276208877563, "logits/chosen": -0.15824517607688904, "logits/rejected": -0.17762351036071777, "logps/chosen": -0.06203662231564522, "logps/rejected": -0.3429993987083435, "loss": 3.3275, "nll_loss": 0.7798757553100586, "rewards/accuracies": 0.625, "rewards/chosen": -0.006203662138432264, "rewards/margins": 0.028096279129385948, "rewards/rejected": -0.03429993987083435, "step": 831 }, { "epoch": 0.5753803596127247, "grad_norm": 4.471902847290039, "learning_rate": 2.876901798063624e-05, "log_odds_chosen": 2.951345920562744, "log_odds_ratio": -0.42411351203918457, "logits/chosen": -0.3936513662338257, "logits/rejected": -0.41638216376304626, "logps/chosen": -0.08869173377752304, "logps/rejected": -0.4445950984954834, "loss": 4.7829, "nll_loss": 1.153320550918579, "rewards/accuracies": 0.875, "rewards/chosen": -0.008869173005223274, "rewards/margins": 0.035590335726737976, "rewards/rejected": -0.0444595068693161, "step": 832 }, { "epoch": 0.5760719225449515, "grad_norm": 4.303694725036621, "learning_rate": 2.880359612724758e-05, "log_odds_chosen": 1.960557222366333, "log_odds_ratio": -0.6316541433334351, "logits/chosen": -0.927675187587738, "logits/rejected": -0.9279346466064453, "logps/chosen": -0.2110701948404312, "logps/rejected": -0.47497111558914185, "loss": 4.5291, "nll_loss": 1.0691012144088745, "rewards/accuracies": 0.625, "rewards/chosen": -0.02110701985657215, "rewards/margins": 0.026390092447400093, "rewards/rejected": -0.047497112303972244, "step": 833 }, { "epoch": 0.5767634854771784, "grad_norm": 4.320366859436035, "learning_rate": 2.883817427385892e-05, "log_odds_chosen": 3.7120985984802246, "log_odds_ratio": -0.21159572899341583, "logits/chosen": -0.8098641633987427, "logits/rejected": -0.7814208269119263, "logps/chosen": -0.06647158414125443, "logps/rejected": -0.5353458523750305, "loss": 4.282, "nll_loss": 1.0493314266204834, "rewards/accuracies": 1.0, "rewards/chosen": -0.006647157948464155, "rewards/margins": 0.04688742756843567, "rewards/rejected": -0.05353458225727081, "step": 834 }, { "epoch": 0.5774550484094052, "grad_norm": 3.6654677391052246, "learning_rate": 2.8872752420470263e-05, "log_odds_chosen": 2.1516904830932617, "log_odds_ratio": -0.4506683051586151, "logits/chosen": -0.8107026815414429, "logits/rejected": -0.8079274892807007, "logps/chosen": -0.11864329874515533, "logps/rejected": -0.40280842781066895, "loss": 4.4068, "nll_loss": 1.0566353797912598, "rewards/accuracies": 0.625, "rewards/chosen": -0.011864329688251019, "rewards/margins": 0.028416510671377182, "rewards/rejected": -0.040280841290950775, "step": 835 }, { "epoch": 0.5781466113416321, "grad_norm": 3.123896360397339, "learning_rate": 2.8907330567081608e-05, "log_odds_chosen": 3.5088181495666504, "log_odds_ratio": -0.23648536205291748, "logits/chosen": -0.33534562587738037, "logits/rejected": -0.31095805764198303, "logps/chosen": -0.09103836119174957, "logps/rejected": -0.34189480543136597, "loss": 3.1665, "nll_loss": 0.767978847026825, "rewards/accuracies": 0.75, "rewards/chosen": -0.009103836491703987, "rewards/margins": 0.02508564665913582, "rewards/rejected": -0.034189481288194656, "step": 836 }, { "epoch": 0.578838174273859, "grad_norm": 4.09024715423584, "learning_rate": 2.894190871369295e-05, "log_odds_chosen": 2.4240686893463135, "log_odds_ratio": -0.21396838128566742, "logits/chosen": -0.7405085563659668, "logits/rejected": -0.7897012233734131, "logps/chosen": -0.092780202627182, "logps/rejected": -0.3410795331001282, "loss": 3.9187, "nll_loss": 0.9582738876342773, "rewards/accuracies": 1.0, "rewards/chosen": -0.00927801989018917, "rewards/margins": 0.024829933419823647, "rewards/rejected": -0.03410795331001282, "step": 837 }, { "epoch": 0.5795297372060858, "grad_norm": 4.34940242767334, "learning_rate": 2.897648686030429e-05, "log_odds_chosen": 3.650613784790039, "log_odds_ratio": -0.13455849885940552, "logits/chosen": -0.6593092679977417, "logits/rejected": -0.7048452496528625, "logps/chosen": -0.06898073852062225, "logps/rejected": -0.9512230753898621, "loss": 4.6662, "nll_loss": 1.1531026363372803, "rewards/accuracies": 1.0, "rewards/chosen": -0.006898073945194483, "rewards/margins": 0.08822423219680786, "rewards/rejected": -0.0951223075389862, "step": 838 }, { "epoch": 0.5802213001383126, "grad_norm": 4.64874792098999, "learning_rate": 2.9011065006915633e-05, "log_odds_chosen": 2.257146120071411, "log_odds_ratio": -0.5997257232666016, "logits/chosen": -0.4204868674278259, "logits/rejected": -0.42794424295425415, "logps/chosen": -0.21649272739887238, "logps/rejected": -0.3857274055480957, "loss": 3.7636, "nll_loss": 0.8809358477592468, "rewards/accuracies": 0.625, "rewards/chosen": -0.021649271249771118, "rewards/margins": 0.016923464834690094, "rewards/rejected": -0.03857273608446121, "step": 839 }, { "epoch": 0.5809128630705395, "grad_norm": 3.4235551357269287, "learning_rate": 2.9045643153526974e-05, "log_odds_chosen": 2.356065511703491, "log_odds_ratio": -0.4604770839214325, "logits/chosen": -0.7503741979598999, "logits/rejected": -0.7579087018966675, "logps/chosen": -0.20275188982486725, "logps/rejected": -0.5521271228790283, "loss": 4.1885, "nll_loss": 1.0010693073272705, "rewards/accuracies": 0.625, "rewards/chosen": -0.020275190472602844, "rewards/margins": 0.03493752330541611, "rewards/rejected": -0.05521271377801895, "step": 840 }, { "epoch": 0.5816044260027663, "grad_norm": 3.7340457439422607, "learning_rate": 2.9080221300138316e-05, "log_odds_chosen": 5.318211555480957, "log_odds_ratio": -0.39543986320495605, "logits/chosen": -0.5348352193832397, "logits/rejected": -0.541537880897522, "logps/chosen": -0.05745195224881172, "logps/rejected": -0.6514012813568115, "loss": 3.1775, "nll_loss": 0.7548248171806335, "rewards/accuracies": 0.75, "rewards/chosen": -0.005745194852352142, "rewards/margins": 0.05939492955803871, "rewards/rejected": -0.06514012813568115, "step": 841 }, { "epoch": 0.5822959889349931, "grad_norm": 4.628314971923828, "learning_rate": 2.9114799446749657e-05, "log_odds_chosen": 2.1264233589172363, "log_odds_ratio": -0.5673332810401917, "logits/chosen": -0.4301891028881073, "logits/rejected": -0.45492786169052124, "logps/chosen": -0.18683162331581116, "logps/rejected": -0.4376612901687622, "loss": 4.6018, "nll_loss": 1.0937283039093018, "rewards/accuracies": 0.625, "rewards/chosen": -0.018683163449168205, "rewards/margins": 0.025082964450120926, "rewards/rejected": -0.04376612976193428, "step": 842 }, { "epoch": 0.58298755186722, "grad_norm": 4.024649620056152, "learning_rate": 2.9149377593361e-05, "log_odds_chosen": 4.945640563964844, "log_odds_ratio": -0.18473605811595917, "logits/chosen": -0.7456772923469543, "logits/rejected": -0.8036054372787476, "logps/chosen": -0.06031443178653717, "logps/rejected": -0.6921895146369934, "loss": 4.4804, "nll_loss": 1.1016205549240112, "rewards/accuracies": 0.875, "rewards/chosen": -0.006031442899256945, "rewards/margins": 0.06318750977516174, "rewards/rejected": -0.0692189484834671, "step": 843 }, { "epoch": 0.5836791147994468, "grad_norm": 3.0777909755706787, "learning_rate": 2.918395573997234e-05, "log_odds_chosen": 3.7343509197235107, "log_odds_ratio": -0.29216429591178894, "logits/chosen": -0.6594283580780029, "logits/rejected": -0.6560637354850769, "logps/chosen": -0.068955197930336, "logps/rejected": -0.4723179042339325, "loss": 3.2491, "nll_loss": 0.7830635905265808, "rewards/accuracies": 0.875, "rewards/chosen": -0.0068955197930336, "rewards/margins": 0.04033627361059189, "rewards/rejected": -0.04723179340362549, "step": 844 }, { "epoch": 0.5843706777316736, "grad_norm": 4.926107883453369, "learning_rate": 2.9218533886583682e-05, "log_odds_chosen": 2.450533628463745, "log_odds_ratio": -0.4356068968772888, "logits/chosen": -0.6373806595802307, "logits/rejected": -0.626931369304657, "logps/chosen": -0.195645272731781, "logps/rejected": -0.5948528051376343, "loss": 3.5552, "nll_loss": 0.8452330231666565, "rewards/accuracies": 0.75, "rewards/chosen": -0.01956452801823616, "rewards/margins": 0.03992075473070145, "rewards/rejected": -0.05948528274893761, "step": 845 }, { "epoch": 0.5850622406639004, "grad_norm": 7.288669109344482, "learning_rate": 2.9253112033195024e-05, "log_odds_chosen": 1.0796414613723755, "log_odds_ratio": -0.7791949510574341, "logits/chosen": -0.8181466460227966, "logits/rejected": -0.7781654596328735, "logps/chosen": -0.14210152626037598, "logps/rejected": -0.24084502458572388, "loss": 4.4744, "nll_loss": 1.0406807661056519, "rewards/accuracies": 0.75, "rewards/chosen": -0.014210152439773083, "rewards/margins": 0.009874352253973484, "rewards/rejected": -0.024084504693746567, "step": 846 }, { "epoch": 0.5857538035961273, "grad_norm": 5.573554515838623, "learning_rate": 2.9287690179806365e-05, "log_odds_chosen": 1.4154304265975952, "log_odds_ratio": -0.5396012663841248, "logits/chosen": -0.6344490051269531, "logits/rejected": -0.6770589351654053, "logps/chosen": -0.12416580319404602, "logps/rejected": -0.306435227394104, "loss": 5.0875, "nll_loss": 1.217907428741455, "rewards/accuracies": 0.625, "rewards/chosen": -0.012416580691933632, "rewards/margins": 0.01822694018483162, "rewards/rejected": -0.0306435227394104, "step": 847 }, { "epoch": 0.5864453665283541, "grad_norm": 3.9988412857055664, "learning_rate": 2.9322268326417707e-05, "log_odds_chosen": 1.2546517848968506, "log_odds_ratio": -0.46386343240737915, "logits/chosen": -0.44906535744667053, "logits/rejected": -0.473858118057251, "logps/chosen": -0.13993997871875763, "logps/rejected": -0.3896486759185791, "loss": 3.897, "nll_loss": 0.9278663396835327, "rewards/accuracies": 0.625, "rewards/chosen": -0.013993998058140278, "rewards/margins": 0.024970872327685356, "rewards/rejected": -0.03896487131714821, "step": 848 }, { "epoch": 0.5871369294605809, "grad_norm": 2.8597145080566406, "learning_rate": 2.935684647302905e-05, "log_odds_chosen": 2.0133144855499268, "log_odds_ratio": -0.5181325078010559, "logits/chosen": -0.5793277621269226, "logits/rejected": -0.5990381240844727, "logps/chosen": -0.12990695238113403, "logps/rejected": -0.4745805859565735, "loss": 2.9299, "nll_loss": 0.6806671619415283, "rewards/accuracies": 0.75, "rewards/chosen": -0.012990695424377918, "rewards/margins": 0.034467361867427826, "rewards/rejected": -0.04745806008577347, "step": 849 }, { "epoch": 0.5878284923928078, "grad_norm": 3.8037021160125732, "learning_rate": 2.939142461964039e-05, "log_odds_chosen": 1.8560802936553955, "log_odds_ratio": -0.4328434467315674, "logits/chosen": -0.9212394952774048, "logits/rejected": -0.9172543287277222, "logps/chosen": -0.08324627578258514, "logps/rejected": -0.2946692705154419, "loss": 3.972, "nll_loss": 0.9497216939926147, "rewards/accuracies": 0.75, "rewards/chosen": -0.008324628695845604, "rewards/margins": 0.021142300218343735, "rewards/rejected": -0.02946692705154419, "step": 850 }, { "epoch": 0.5885200553250346, "grad_norm": 3.4924614429473877, "learning_rate": 2.942600276625173e-05, "log_odds_chosen": 2.0488245487213135, "log_odds_ratio": -0.2997685372829437, "logits/chosen": -0.3865346610546112, "logits/rejected": -0.42198455333709717, "logps/chosen": -0.09593084454536438, "logps/rejected": -0.37145107984542847, "loss": 3.6509, "nll_loss": 0.8827521204948425, "rewards/accuracies": 0.875, "rewards/chosen": -0.009593085385859013, "rewards/margins": 0.02755202353000641, "rewards/rejected": -0.03714510798454285, "step": 851 }, { "epoch": 0.5892116182572614, "grad_norm": 3.511770009994507, "learning_rate": 2.9460580912863073e-05, "log_odds_chosen": 3.7782740592956543, "log_odds_ratio": -0.24993540346622467, "logits/chosen": -0.710189938545227, "logits/rejected": -0.7517250776290894, "logps/chosen": -0.09729503840208054, "logps/rejected": -0.5149400234222412, "loss": 3.9751, "nll_loss": 0.9687862992286682, "rewards/accuracies": 1.0, "rewards/chosen": -0.009729502722620964, "rewards/margins": 0.04176449775695801, "rewards/rejected": -0.05149400234222412, "step": 852 }, { "epoch": 0.5899031811894883, "grad_norm": 4.0979390144348145, "learning_rate": 2.9495159059474415e-05, "log_odds_chosen": 2.0559842586517334, "log_odds_ratio": -0.3502409756183624, "logits/chosen": -0.7304658889770508, "logits/rejected": -0.7085290551185608, "logps/chosen": -0.11763148754835129, "logps/rejected": -0.38747963309288025, "loss": 4.2136, "nll_loss": 1.0183758735656738, "rewards/accuracies": 0.875, "rewards/chosen": -0.011763148941099644, "rewards/margins": 0.026984816417098045, "rewards/rejected": -0.038747966289520264, "step": 853 }, { "epoch": 0.5905947441217151, "grad_norm": 4.737348556518555, "learning_rate": 2.9529737206085756e-05, "log_odds_chosen": 0.7259271144866943, "log_odds_ratio": -0.5310872793197632, "logits/chosen": -0.7076024413108826, "logits/rejected": -0.6707653403282166, "logps/chosen": -0.12364031374454498, "logps/rejected": -0.22116045653820038, "loss": 4.4756, "nll_loss": 1.0657799243927002, "rewards/accuracies": 0.75, "rewards/chosen": -0.012364029884338379, "rewards/margins": 0.009752015583217144, "rewards/rejected": -0.022116046398878098, "step": 854 }, { "epoch": 0.5912863070539419, "grad_norm": 4.541485786437988, "learning_rate": 2.9564315352697098e-05, "log_odds_chosen": 1.7745822668075562, "log_odds_ratio": -0.3305359184741974, "logits/chosen": -0.8056719899177551, "logits/rejected": -0.8085699081420898, "logps/chosen": -0.11804518103599548, "logps/rejected": -0.3992602229118347, "loss": 4.4872, "nll_loss": 1.0887389183044434, "rewards/accuracies": 0.875, "rewards/chosen": -0.011804519221186638, "rewards/margins": 0.028121504932641983, "rewards/rejected": -0.03992602229118347, "step": 855 }, { "epoch": 0.5919778699861687, "grad_norm": 4.4644951820373535, "learning_rate": 2.959889349930844e-05, "log_odds_chosen": 3.1081631183624268, "log_odds_ratio": -0.11160407960414886, "logits/chosen": -0.6001406908035278, "logits/rejected": -0.6196090579032898, "logps/chosen": -0.12042544782161713, "logps/rejected": -1.225574016571045, "loss": 3.2166, "nll_loss": 0.7929803133010864, "rewards/accuracies": 1.0, "rewards/chosen": -0.012042545713484287, "rewards/margins": 0.11051484942436218, "rewards/rejected": -0.1225573942065239, "step": 856 }, { "epoch": 0.5926694329183956, "grad_norm": 4.001978397369385, "learning_rate": 2.963347164591978e-05, "log_odds_chosen": 4.053859233856201, "log_odds_ratio": -0.2311854213476181, "logits/chosen": -0.8147008419036865, "logits/rejected": -0.7994644045829773, "logps/chosen": -0.05535433441400528, "logps/rejected": -0.6948223114013672, "loss": 4.2184, "nll_loss": 1.0314934253692627, "rewards/accuracies": 0.875, "rewards/chosen": -0.005535434000194073, "rewards/margins": 0.06394679844379425, "rewards/rejected": -0.0694822371006012, "step": 857 }, { "epoch": 0.5933609958506224, "grad_norm": 5.344448089599609, "learning_rate": 2.9668049792531122e-05, "log_odds_chosen": 1.0760059356689453, "log_odds_ratio": -0.7446597218513489, "logits/chosen": -0.8145299553871155, "logits/rejected": -0.8130720853805542, "logps/chosen": -0.1919110268354416, "logps/rejected": -0.483597993850708, "loss": 5.2387, "nll_loss": 1.235212802886963, "rewards/accuracies": 0.5, "rewards/chosen": -0.01919110305607319, "rewards/margins": 0.02916869707405567, "rewards/rejected": -0.04835980013012886, "step": 858 }, { "epoch": 0.5940525587828492, "grad_norm": 4.593982696533203, "learning_rate": 2.9702627939142464e-05, "log_odds_chosen": 3.4344637393951416, "log_odds_ratio": -0.24414914846420288, "logits/chosen": -0.5376627445220947, "logits/rejected": -0.5609232187271118, "logps/chosen": -0.07658465951681137, "logps/rejected": -0.7597995400428772, "loss": 4.6779, "nll_loss": 1.1450719833374023, "rewards/accuracies": 0.875, "rewards/chosen": -0.007658466696739197, "rewards/margins": 0.06832148879766464, "rewards/rejected": -0.07597995549440384, "step": 859 }, { "epoch": 0.5947441217150761, "grad_norm": 4.40407657623291, "learning_rate": 2.9737206085753806e-05, "log_odds_chosen": 2.1988892555236816, "log_odds_ratio": -0.4513634443283081, "logits/chosen": -0.8038565516471863, "logits/rejected": -0.8830662369728088, "logps/chosen": -0.1267254501581192, "logps/rejected": -0.44491326808929443, "loss": 5.182, "nll_loss": 1.250370979309082, "rewards/accuracies": 0.75, "rewards/chosen": -0.012672546319663525, "rewards/margins": 0.031818781048059464, "rewards/rejected": -0.04449132829904556, "step": 860 }, { "epoch": 0.5954356846473029, "grad_norm": 4.350531578063965, "learning_rate": 2.9771784232365147e-05, "log_odds_chosen": 5.146965503692627, "log_odds_ratio": -0.09328575432300568, "logits/chosen": -0.8370950818061829, "logits/rejected": -0.8731504678726196, "logps/chosen": -0.029911965131759644, "logps/rejected": -1.0596965551376343, "loss": 4.2535, "nll_loss": 1.0540424585342407, "rewards/accuracies": 1.0, "rewards/chosen": -0.0029911966994404793, "rewards/margins": 0.10297845304012299, "rewards/rejected": -0.10596965253353119, "step": 861 }, { "epoch": 0.5961272475795297, "grad_norm": 4.999265670776367, "learning_rate": 2.980636237897649e-05, "log_odds_chosen": 4.388033866882324, "log_odds_ratio": -0.17923061549663544, "logits/chosen": -0.7717255353927612, "logits/rejected": -0.8007091879844666, "logps/chosen": -0.07025519758462906, "logps/rejected": -0.7537840604782104, "loss": 4.2105, "nll_loss": 1.034711480140686, "rewards/accuracies": 0.875, "rewards/chosen": -0.007025519385933876, "rewards/margins": 0.06835289299488068, "rewards/rejected": -0.0753784030675888, "step": 862 }, { "epoch": 0.5968188105117566, "grad_norm": 4.830355644226074, "learning_rate": 2.984094052558783e-05, "log_odds_chosen": 4.727936744689941, "log_odds_ratio": -0.1790466606616974, "logits/chosen": -0.5581704378128052, "logits/rejected": -0.5863669514656067, "logps/chosen": -0.3133222460746765, "logps/rejected": -1.0449085235595703, "loss": 4.303, "nll_loss": 1.0578471422195435, "rewards/accuracies": 0.875, "rewards/chosen": -0.03133222460746765, "rewards/margins": 0.0731586217880249, "rewards/rejected": -0.10449084639549255, "step": 863 }, { "epoch": 0.5975103734439834, "grad_norm": 5.1829447746276855, "learning_rate": 2.9875518672199172e-05, "log_odds_chosen": 1.112974762916565, "log_odds_ratio": -0.6885846853256226, "logits/chosen": -0.6578227281570435, "logits/rejected": -0.626107931137085, "logps/chosen": -0.17409192025661469, "logps/rejected": -0.4953766167163849, "loss": 3.5169, "nll_loss": 0.8103781342506409, "rewards/accuracies": 0.5, "rewards/chosen": -0.01740919053554535, "rewards/margins": 0.0321284681558609, "rewards/rejected": -0.04953765869140625, "step": 864 }, { "epoch": 0.5982019363762102, "grad_norm": 4.5902581214904785, "learning_rate": 2.9910096818810513e-05, "log_odds_chosen": 0.7052420377731323, "log_odds_ratio": -0.5897258520126343, "logits/chosen": -0.6574615836143494, "logits/rejected": -0.6554303765296936, "logps/chosen": -0.1836041957139969, "logps/rejected": -0.3788834810256958, "loss": 3.7562, "nll_loss": 0.8800770044326782, "rewards/accuracies": 0.75, "rewards/chosen": -0.01836041919887066, "rewards/margins": 0.01952793076634407, "rewards/rejected": -0.03788834810256958, "step": 865 }, { "epoch": 0.598893499308437, "grad_norm": 4.048480987548828, "learning_rate": 2.9944674965421855e-05, "log_odds_chosen": 1.4362030029296875, "log_odds_ratio": -0.38652119040489197, "logits/chosen": -0.8279319405555725, "logits/rejected": -0.8762853145599365, "logps/chosen": -0.14335329830646515, "logps/rejected": -0.7207509875297546, "loss": 3.5014, "nll_loss": 0.8366998434066772, "rewards/accuracies": 0.875, "rewards/chosen": -0.014335330575704575, "rewards/margins": 0.05773976817727089, "rewards/rejected": -0.07207509875297546, "step": 866 }, { "epoch": 0.5995850622406639, "grad_norm": 5.155201435089111, "learning_rate": 2.9979253112033196e-05, "log_odds_chosen": 2.813506603240967, "log_odds_ratio": -0.5240182876586914, "logits/chosen": -0.6967120170593262, "logits/rejected": -0.6847906112670898, "logps/chosen": -0.18069618940353394, "logps/rejected": -0.5114045143127441, "loss": 4.5955, "nll_loss": 1.0964840650558472, "rewards/accuracies": 0.75, "rewards/chosen": -0.018069619312882423, "rewards/margins": 0.03307083621621132, "rewards/rejected": -0.05114045366644859, "step": 867 }, { "epoch": 0.6002766251728907, "grad_norm": 3.6505987644195557, "learning_rate": 3.0013831258644538e-05, "log_odds_chosen": 0.4830799996852875, "log_odds_ratio": -0.5487514138221741, "logits/chosen": -0.7270222902297974, "logits/rejected": -0.7637308835983276, "logps/chosen": -0.22091691195964813, "logps/rejected": -0.36204272508621216, "loss": 4.494, "nll_loss": 1.0686300992965698, "rewards/accuracies": 0.625, "rewards/chosen": -0.022091692313551903, "rewards/margins": 0.014112580567598343, "rewards/rejected": -0.036204271018505096, "step": 868 }, { "epoch": 0.6009681881051175, "grad_norm": 6.205440044403076, "learning_rate": 3.004840940525588e-05, "log_odds_chosen": 3.524580955505371, "log_odds_ratio": -0.4662216305732727, "logits/chosen": -0.754102349281311, "logits/rejected": -0.7670979499816895, "logps/chosen": -0.13366849720478058, "logps/rejected": -0.6823487281799316, "loss": 4.7184, "nll_loss": 1.1329686641693115, "rewards/accuracies": 0.75, "rewards/chosen": -0.013366851024329662, "rewards/margins": 0.054868023842573166, "rewards/rejected": -0.0682348757982254, "step": 869 }, { "epoch": 0.6016597510373444, "grad_norm": 2.9457459449768066, "learning_rate": 3.008298755186722e-05, "log_odds_chosen": 2.1648120880126953, "log_odds_ratio": -0.3084190785884857, "logits/chosen": -0.7266798615455627, "logits/rejected": -0.7544651031494141, "logps/chosen": -0.09362323582172394, "logps/rejected": -0.4190313518047333, "loss": 3.7257, "nll_loss": 0.9005783796310425, "rewards/accuracies": 0.875, "rewards/chosen": -0.009362323209643364, "rewards/margins": 0.03254081681370735, "rewards/rejected": -0.041903138160705566, "step": 870 }, { "epoch": 0.6023513139695712, "grad_norm": 3.978710412979126, "learning_rate": 3.0117565698478563e-05, "log_odds_chosen": 3.242192029953003, "log_odds_ratio": -0.1863611340522766, "logits/chosen": -0.552879273891449, "logits/rejected": -0.5906392335891724, "logps/chosen": -0.11116364598274231, "logps/rejected": -0.8105045557022095, "loss": 3.6538, "nll_loss": 0.8948162794113159, "rewards/accuracies": 0.875, "rewards/chosen": -0.01111636497080326, "rewards/margins": 0.06993409246206284, "rewards/rejected": -0.08105045557022095, "step": 871 }, { "epoch": 0.603042876901798, "grad_norm": 4.808966636657715, "learning_rate": 3.0152143845089904e-05, "log_odds_chosen": 3.1818840503692627, "log_odds_ratio": -0.5515273213386536, "logits/chosen": -0.7102684378623962, "logits/rejected": -0.7035820484161377, "logps/chosen": -0.2942872941493988, "logps/rejected": -0.8675200343132019, "loss": 4.3676, "nll_loss": 1.0367563962936401, "rewards/accuracies": 0.625, "rewards/chosen": -0.02942873351275921, "rewards/margins": 0.05732327699661255, "rewards/rejected": -0.0867520123720169, "step": 872 }, { "epoch": 0.6037344398340249, "grad_norm": 5.278499126434326, "learning_rate": 3.0186721991701246e-05, "log_odds_chosen": 3.4745728969573975, "log_odds_ratio": -0.4629146158695221, "logits/chosen": -0.8261131048202515, "logits/rejected": -0.8101270198822021, "logps/chosen": -0.14608454704284668, "logps/rejected": -0.45583587884902954, "loss": 3.7457, "nll_loss": 0.890127420425415, "rewards/accuracies": 0.875, "rewards/chosen": -0.014608454890549183, "rewards/margins": 0.030975131317973137, "rewards/rejected": -0.04558359086513519, "step": 873 }, { "epoch": 0.6044260027662517, "grad_norm": 6.244517803192139, "learning_rate": 3.0221300138312587e-05, "log_odds_chosen": 1.8965070247650146, "log_odds_ratio": -0.45901423692703247, "logits/chosen": -0.3932605981826782, "logits/rejected": -0.46693626046180725, "logps/chosen": -0.086180180311203, "logps/rejected": -0.41221320629119873, "loss": 3.7194, "nll_loss": 0.8839367628097534, "rewards/accuracies": 0.875, "rewards/chosen": -0.00861801765859127, "rewards/margins": 0.03260330110788345, "rewards/rejected": -0.04122132062911987, "step": 874 }, { "epoch": 0.6051175656984785, "grad_norm": 3.957749843597412, "learning_rate": 3.025587828492393e-05, "log_odds_chosen": 3.5800201892852783, "log_odds_ratio": -0.3158847391605377, "logits/chosen": -0.8257095813751221, "logits/rejected": -0.8298307657241821, "logps/chosen": -0.12911739945411682, "logps/rejected": -0.637715220451355, "loss": 3.9396, "nll_loss": 0.9532997608184814, "rewards/accuracies": 0.875, "rewards/chosen": -0.012911740690469742, "rewards/margins": 0.050859782844781876, "rewards/rejected": -0.06377153098583221, "step": 875 }, { "epoch": 0.6058091286307054, "grad_norm": 4.251432418823242, "learning_rate": 3.029045643153527e-05, "log_odds_chosen": 4.258300304412842, "log_odds_ratio": -0.1648968756198883, "logits/chosen": -0.8471444845199585, "logits/rejected": -0.891996443271637, "logps/chosen": -0.04554177075624466, "logps/rejected": -0.8233499526977539, "loss": 4.0591, "nll_loss": 0.9982973337173462, "rewards/accuracies": 1.0, "rewards/chosen": -0.004554177634418011, "rewards/margins": 0.0777808129787445, "rewards/rejected": -0.08233499526977539, "step": 876 }, { "epoch": 0.6065006915629322, "grad_norm": 3.713589668273926, "learning_rate": 3.0325034578146612e-05, "log_odds_chosen": 3.3973257541656494, "log_odds_ratio": -0.20859216153621674, "logits/chosen": -0.3923302888870239, "logits/rejected": -0.4933090806007385, "logps/chosen": -0.054292913526296616, "logps/rejected": -0.6101047992706299, "loss": 3.6514, "nll_loss": 0.8919917941093445, "rewards/accuracies": 0.875, "rewards/chosen": -0.005429290700703859, "rewards/margins": 0.055581189692020416, "rewards/rejected": -0.06101047992706299, "step": 877 }, { "epoch": 0.607192254495159, "grad_norm": 6.882563591003418, "learning_rate": 3.0359612724757954e-05, "log_odds_chosen": 0.854179859161377, "log_odds_ratio": -0.8967254161834717, "logits/chosen": -0.9078612327575684, "logits/rejected": -0.9028449058532715, "logps/chosen": -0.11673790216445923, "logps/rejected": -0.4252132177352905, "loss": 5.0935, "nll_loss": 1.1836953163146973, "rewards/accuracies": 0.5, "rewards/chosen": -0.011673791334033012, "rewards/margins": 0.03084753267467022, "rewards/rejected": -0.04252132400870323, "step": 878 }, { "epoch": 0.6078838174273858, "grad_norm": 7.392794132232666, "learning_rate": 3.0394190871369292e-05, "log_odds_chosen": 0.07599025964736938, "log_odds_ratio": -1.283529281616211, "logits/chosen": -0.7483278512954712, "logits/rejected": -0.7108883857727051, "logps/chosen": -0.25827908515930176, "logps/rejected": -0.20767498016357422, "loss": 6.6526, "nll_loss": 1.5347901582717896, "rewards/accuracies": 0.375, "rewards/chosen": -0.025827907025814056, "rewards/margins": -0.005060410127043724, "rewards/rejected": -0.02076749876141548, "step": 879 }, { "epoch": 0.6085753803596127, "grad_norm": 6.576107978820801, "learning_rate": 3.0428769017980633e-05, "log_odds_chosen": 2.8613975048065186, "log_odds_ratio": -0.34593522548675537, "logits/chosen": -0.30519038438796997, "logits/rejected": -0.3554477393627167, "logps/chosen": -0.07818441838026047, "logps/rejected": -0.6126123666763306, "loss": 6.0538, "nll_loss": 1.4788503646850586, "rewards/accuracies": 0.875, "rewards/chosen": -0.007818441838026047, "rewards/margins": 0.05344279855489731, "rewards/rejected": -0.06126123666763306, "step": 880 }, { "epoch": 0.6092669432918395, "grad_norm": 4.552968502044678, "learning_rate": 3.0463347164591975e-05, "log_odds_chosen": 2.4071388244628906, "log_odds_ratio": -0.40522754192352295, "logits/chosen": -0.6670210361480713, "logits/rejected": -0.7009649872779846, "logps/chosen": -0.12310780584812164, "logps/rejected": -0.5360164642333984, "loss": 3.7009, "nll_loss": 0.8846949338912964, "rewards/accuracies": 0.75, "rewards/chosen": -0.012310780584812164, "rewards/margins": 0.04129086434841156, "rewards/rejected": -0.05360164865851402, "step": 881 }, { "epoch": 0.6099585062240664, "grad_norm": 21.216053009033203, "learning_rate": 3.0497925311203323e-05, "log_odds_chosen": 0.16731399297714233, "log_odds_ratio": -1.1148260831832886, "logits/chosen": -0.40998703241348267, "logits/rejected": -0.4283626079559326, "logps/chosen": -0.38131338357925415, "logps/rejected": -0.44890064001083374, "loss": 4.4729, "nll_loss": 1.0067414045333862, "rewards/accuracies": 0.5, "rewards/chosen": -0.038131337612867355, "rewards/margins": 0.006758726201951504, "rewards/rejected": -0.044890061020851135, "step": 882 }, { "epoch": 0.6106500691562933, "grad_norm": 5.321377754211426, "learning_rate": 3.053250345781467e-05, "log_odds_chosen": 2.30208420753479, "log_odds_ratio": -0.3444046974182129, "logits/chosen": -0.6480960845947266, "logits/rejected": -0.6440442204475403, "logps/chosen": -0.17282502353191376, "logps/rejected": -0.43156111240386963, "loss": 4.774, "nll_loss": 1.159049391746521, "rewards/accuracies": 0.875, "rewards/chosen": -0.017282500863075256, "rewards/margins": 0.025873607024550438, "rewards/rejected": -0.043156106024980545, "step": 883 }, { "epoch": 0.6113416320885201, "grad_norm": 4.070542812347412, "learning_rate": 3.056708160442601e-05, "log_odds_chosen": 2.503066062927246, "log_odds_ratio": -0.24399249255657196, "logits/chosen": -0.6377800703048706, "logits/rejected": -0.647244930267334, "logps/chosen": -0.0947108268737793, "logps/rejected": -0.521747350692749, "loss": 4.3188, "nll_loss": 1.0552964210510254, "rewards/accuracies": 0.875, "rewards/chosen": -0.00947108305990696, "rewards/margins": 0.04270365089178085, "rewards/rejected": -0.052174732089042664, "step": 884 }, { "epoch": 0.6120331950207469, "grad_norm": 4.310357570648193, "learning_rate": 3.060165975103735e-05, "log_odds_chosen": 2.998178005218506, "log_odds_ratio": -0.17078091204166412, "logits/chosen": -0.6814748048782349, "logits/rejected": -0.8221471905708313, "logps/chosen": -0.03645121678709984, "logps/rejected": -0.4962855875492096, "loss": 3.8549, "nll_loss": 0.9466458559036255, "rewards/accuracies": 1.0, "rewards/chosen": -0.0036451215855777264, "rewards/margins": 0.045983437448740005, "rewards/rejected": -0.04962855949997902, "step": 885 }, { "epoch": 0.6127247579529738, "grad_norm": 3.07930850982666, "learning_rate": 3.063623789764869e-05, "log_odds_chosen": 3.6279428005218506, "log_odds_ratio": -0.2710344195365906, "logits/chosen": -0.4153643846511841, "logits/rejected": -0.44188249111175537, "logps/chosen": -0.044143207371234894, "logps/rejected": -0.785947322845459, "loss": 2.7167, "nll_loss": 0.6520835161209106, "rewards/accuracies": 0.875, "rewards/chosen": -0.004414320457726717, "rewards/margins": 0.07418042421340942, "rewards/rejected": -0.07859473675489426, "step": 886 }, { "epoch": 0.6134163208852006, "grad_norm": 6.346424102783203, "learning_rate": 3.0670816044260035e-05, "log_odds_chosen": 2.9766647815704346, "log_odds_ratio": -0.5250827670097351, "logits/chosen": -0.6835624575614929, "logits/rejected": -0.7732921838760376, "logps/chosen": -0.13457365334033966, "logps/rejected": -0.6309829354286194, "loss": 5.0592, "nll_loss": 1.212296485900879, "rewards/accuracies": 0.875, "rewards/chosen": -0.013457365334033966, "rewards/margins": 0.04964093118906021, "rewards/rejected": -0.06309829652309418, "step": 887 }, { "epoch": 0.6141078838174274, "grad_norm": 5.160597801208496, "learning_rate": 3.070539419087137e-05, "log_odds_chosen": 3.923093795776367, "log_odds_ratio": -0.3125317096710205, "logits/chosen": -0.7844764590263367, "logits/rejected": -0.7793978452682495, "logps/chosen": -0.10180672258138657, "logps/rejected": -0.688077449798584, "loss": 4.0323, "nll_loss": 0.9768339395523071, "rewards/accuracies": 0.75, "rewards/chosen": -0.010180672630667686, "rewards/margins": 0.05862707644701004, "rewards/rejected": -0.06880774348974228, "step": 888 }, { "epoch": 0.6147994467496543, "grad_norm": 26.145076751708984, "learning_rate": 3.073997233748271e-05, "log_odds_chosen": 1.9590518474578857, "log_odds_ratio": -0.7331660389900208, "logits/chosen": -0.5167617797851562, "logits/rejected": -0.4817545413970947, "logps/chosen": -0.07678233087062836, "logps/rejected": -0.43516165018081665, "loss": 3.6756, "nll_loss": 0.8455724120140076, "rewards/accuracies": 0.75, "rewards/chosen": -0.007678233552724123, "rewards/margins": 0.03583793342113495, "rewards/rejected": -0.043516166508197784, "step": 889 }, { "epoch": 0.6154910096818811, "grad_norm": 4.597845554351807, "learning_rate": 3.077455048409405e-05, "log_odds_chosen": 2.656071662902832, "log_odds_ratio": -0.27559003233909607, "logits/chosen": -0.49670350551605225, "logits/rejected": -0.5645101070404053, "logps/chosen": -0.0880126804113388, "logps/rejected": -0.6273921132087708, "loss": 4.4735, "nll_loss": 1.0908229351043701, "rewards/accuracies": 0.875, "rewards/chosen": -0.00880126841366291, "rewards/margins": 0.053937941789627075, "rewards/rejected": -0.06273921579122543, "step": 890 }, { "epoch": 0.6161825726141079, "grad_norm": 4.356814384460449, "learning_rate": 3.0809128630705394e-05, "log_odds_chosen": 1.726629614830017, "log_odds_ratio": -0.3198900818824768, "logits/chosen": -0.9252246618270874, "logits/rejected": -0.9489607810974121, "logps/chosen": -0.16357335448265076, "logps/rejected": -0.5082889795303345, "loss": 5.4556, "nll_loss": 1.3318991661071777, "rewards/accuracies": 0.875, "rewards/chosen": -0.016357336193323135, "rewards/margins": 0.03447156772017479, "rewards/rejected": -0.050828903913497925, "step": 891 }, { "epoch": 0.6168741355463347, "grad_norm": 5.273382663726807, "learning_rate": 3.0843706777316736e-05, "log_odds_chosen": 0.9887380599975586, "log_odds_ratio": -0.9254065155982971, "logits/chosen": -0.7355146408081055, "logits/rejected": -0.6899512410163879, "logps/chosen": -0.19425490498542786, "logps/rejected": -0.30127257108688354, "loss": 3.9442, "nll_loss": 0.8935017585754395, "rewards/accuracies": 0.5, "rewards/chosen": -0.019425490871071815, "rewards/margins": 0.01070176437497139, "rewards/rejected": -0.030127257108688354, "step": 892 }, { "epoch": 0.6175656984785616, "grad_norm": 3.6554532051086426, "learning_rate": 3.087828492392808e-05, "log_odds_chosen": 3.534128189086914, "log_odds_ratio": -0.26558980345726013, "logits/chosen": -0.4329376816749573, "logits/rejected": -0.4630773067474365, "logps/chosen": -0.08066828548908234, "logps/rejected": -0.6910880208015442, "loss": 3.9841, "nll_loss": 0.9694664478302002, "rewards/accuracies": 0.875, "rewards/chosen": -0.008066828362643719, "rewards/margins": 0.061041973531246185, "rewards/rejected": -0.06910879909992218, "step": 893 }, { "epoch": 0.6182572614107884, "grad_norm": 5.20051383972168, "learning_rate": 3.091286307053942e-05, "log_odds_chosen": 2.1171774864196777, "log_odds_ratio": -0.4897395968437195, "logits/chosen": -0.3613443374633789, "logits/rejected": -0.3467557430267334, "logps/chosen": -0.13655899465084076, "logps/rejected": -0.35757774114608765, "loss": 4.3682, "nll_loss": 1.0430686473846436, "rewards/accuracies": 0.625, "rewards/chosen": -0.013655899092555046, "rewards/margins": 0.022101877257227898, "rewards/rejected": -0.035757772624492645, "step": 894 }, { "epoch": 0.6189488243430152, "grad_norm": 6.48548698425293, "learning_rate": 3.094744121715076e-05, "log_odds_chosen": 1.8737776279449463, "log_odds_ratio": -0.4800935387611389, "logits/chosen": -0.7387460470199585, "logits/rejected": -0.7719529867172241, "logps/chosen": -0.1483597457408905, "logps/rejected": -0.5151386857032776, "loss": 4.0982, "nll_loss": 0.9765384793281555, "rewards/accuracies": 0.75, "rewards/chosen": -0.014835975132882595, "rewards/margins": 0.03667789697647095, "rewards/rejected": -0.05151387304067612, "step": 895 }, { "epoch": 0.6196403872752421, "grad_norm": 4.083328723907471, "learning_rate": 3.09820193637621e-05, "log_odds_chosen": 0.5870912671089172, "log_odds_ratio": -0.6784539222717285, "logits/chosen": -1.0754952430725098, "logits/rejected": -1.0223438739776611, "logps/chosen": -0.202505961060524, "logps/rejected": -0.3597847819328308, "loss": 5.1295, "nll_loss": 1.2145276069641113, "rewards/accuracies": 0.5, "rewards/chosen": -0.0202505961060524, "rewards/margins": 0.015727879479527473, "rewards/rejected": -0.03597847744822502, "step": 896 }, { "epoch": 0.6203319502074689, "grad_norm": 3.179802417755127, "learning_rate": 3.1016597510373443e-05, "log_odds_chosen": 3.1242868900299072, "log_odds_ratio": -0.38213053345680237, "logits/chosen": -0.7337120771408081, "logits/rejected": -0.7521069049835205, "logps/chosen": -0.09369072318077087, "logps/rejected": -0.39017921686172485, "loss": 4.2453, "nll_loss": 1.023102879524231, "rewards/accuracies": 0.75, "rewards/chosen": -0.009369072504341602, "rewards/margins": 0.02964884601533413, "rewards/rejected": -0.039017919450998306, "step": 897 }, { "epoch": 0.6210235131396957, "grad_norm": 4.312764644622803, "learning_rate": 3.1051175656984785e-05, "log_odds_chosen": 1.904249906539917, "log_odds_ratio": -0.40307527780532837, "logits/chosen": -1.055102825164795, "logits/rejected": -1.0678390264511108, "logps/chosen": -0.06856757402420044, "logps/rejected": -0.3968680202960968, "loss": 4.8316, "nll_loss": 1.1675834655761719, "rewards/accuracies": 0.75, "rewards/chosen": -0.006856757681816816, "rewards/margins": 0.032830044627189636, "rewards/rejected": -0.03968679904937744, "step": 898 }, { "epoch": 0.6217150760719226, "grad_norm": 3.331369638442993, "learning_rate": 3.1085753803596127e-05, "log_odds_chosen": 3.4465959072113037, "log_odds_ratio": -0.17227791249752045, "logits/chosen": -0.8975204825401306, "logits/rejected": -0.9244405627250671, "logps/chosen": -0.07354501634836197, "logps/rejected": -0.8571603298187256, "loss": 3.7298, "nll_loss": 0.9152202010154724, "rewards/accuracies": 1.0, "rewards/chosen": -0.007354501634836197, "rewards/margins": 0.07836152613162994, "rewards/rejected": -0.08571602404117584, "step": 899 }, { "epoch": 0.6224066390041494, "grad_norm": 4.473353862762451, "learning_rate": 3.112033195020747e-05, "log_odds_chosen": 4.239065647125244, "log_odds_ratio": -0.21452511847019196, "logits/chosen": -0.5707802772521973, "logits/rejected": -0.6279401183128357, "logps/chosen": -0.061546772718429565, "logps/rejected": -0.9604505896568298, "loss": 4.919, "nll_loss": 1.2083086967468262, "rewards/accuracies": 1.0, "rewards/chosen": -0.006154676899313927, "rewards/margins": 0.08989039063453674, "rewards/rejected": -0.09604506194591522, "step": 900 }, { "epoch": 0.6230982019363762, "grad_norm": 6.345496654510498, "learning_rate": 3.115491009681881e-05, "log_odds_chosen": 1.1958024501800537, "log_odds_ratio": -0.41798198223114014, "logits/chosen": -0.5380837917327881, "logits/rejected": -0.5586702823638916, "logps/chosen": -0.07194848358631134, "logps/rejected": -0.2240828424692154, "loss": 5.5867, "nll_loss": 1.354879379272461, "rewards/accuracies": 0.75, "rewards/chosen": -0.007194849196821451, "rewards/margins": 0.015213435515761375, "rewards/rejected": -0.02240828424692154, "step": 901 }, { "epoch": 0.623789764868603, "grad_norm": 3.472062349319458, "learning_rate": 3.118948824343015e-05, "log_odds_chosen": 2.3726320266723633, "log_odds_ratio": -0.4545746147632599, "logits/chosen": -0.6491698026657104, "logits/rejected": -0.6731710433959961, "logps/chosen": -0.13186895847320557, "logps/rejected": -0.3443443775177002, "loss": 3.0496, "nll_loss": 0.7169334888458252, "rewards/accuracies": 0.5, "rewards/chosen": -0.013186894357204437, "rewards/margins": 0.021247539669275284, "rewards/rejected": -0.03443443775177002, "step": 902 }, { "epoch": 0.6244813278008299, "grad_norm": 5.7245001792907715, "learning_rate": 3.122406639004149e-05, "log_odds_chosen": 2.1482057571411133, "log_odds_ratio": -0.8374338150024414, "logits/chosen": -0.5861244201660156, "logits/rejected": -0.6452823877334595, "logps/chosen": -0.17203915119171143, "logps/rejected": -0.347610741853714, "loss": 2.8378, "nll_loss": 0.6257038116455078, "rewards/accuracies": 0.625, "rewards/chosen": -0.017203915864229202, "rewards/margins": 0.017557159066200256, "rewards/rejected": -0.03476107493042946, "step": 903 }, { "epoch": 0.6251728907330567, "grad_norm": 4.456448554992676, "learning_rate": 3.1258644536652834e-05, "log_odds_chosen": 1.7303143739700317, "log_odds_ratio": -0.4254305958747864, "logits/chosen": -0.8845744132995605, "logits/rejected": -0.9023687839508057, "logps/chosen": -0.11853601038455963, "logps/rejected": -0.25728434324264526, "loss": 4.7616, "nll_loss": 1.147857666015625, "rewards/accuracies": 0.75, "rewards/chosen": -0.011853600852191448, "rewards/margins": 0.013874834403395653, "rewards/rejected": -0.025728434324264526, "step": 904 }, { "epoch": 0.6258644536652835, "grad_norm": 4.575382709503174, "learning_rate": 3.1293222683264176e-05, "log_odds_chosen": 2.423657178878784, "log_odds_ratio": -0.20692911744117737, "logits/chosen": -0.6058048605918884, "logits/rejected": -0.5127230882644653, "logps/chosen": -0.15506532788276672, "logps/rejected": -0.5397747159004211, "loss": 4.3004, "nll_loss": 1.0544087886810303, "rewards/accuracies": 0.875, "rewards/chosen": -0.015506532974541187, "rewards/margins": 0.03847094252705574, "rewards/rejected": -0.05397747457027435, "step": 905 }, { "epoch": 0.6265560165975104, "grad_norm": 3.4776272773742676, "learning_rate": 3.132780082987552e-05, "log_odds_chosen": 3.638993263244629, "log_odds_ratio": -0.19260406494140625, "logits/chosen": -0.9499366879463196, "logits/rejected": -0.9942508339881897, "logps/chosen": -0.0676005631685257, "logps/rejected": -0.580873429775238, "loss": 3.8193, "nll_loss": 0.935560941696167, "rewards/accuracies": 1.0, "rewards/chosen": -0.006760057061910629, "rewards/margins": 0.05132729187607765, "rewards/rejected": -0.058087341487407684, "step": 906 }, { "epoch": 0.6272475795297372, "grad_norm": 5.688107013702393, "learning_rate": 3.136237897648686e-05, "log_odds_chosen": 1.3792686462402344, "log_odds_ratio": -0.5582557916641235, "logits/chosen": -0.8317203521728516, "logits/rejected": -0.7857211828231812, "logps/chosen": -0.17600604891777039, "logps/rejected": -0.444831520318985, "loss": 5.0394, "nll_loss": 1.2040131092071533, "rewards/accuracies": 0.75, "rewards/chosen": -0.01760060526430607, "rewards/margins": 0.02688254788517952, "rewards/rejected": -0.04448315501213074, "step": 907 }, { "epoch": 0.627939142461964, "grad_norm": 7.277334213256836, "learning_rate": 3.13969571230982e-05, "log_odds_chosen": 2.3631396293640137, "log_odds_ratio": -0.40137773752212524, "logits/chosen": -0.7335744500160217, "logits/rejected": -0.7411458492279053, "logps/chosen": -0.06967581808567047, "logps/rejected": -0.4501439332962036, "loss": 2.5292, "nll_loss": 0.5921643972396851, "rewards/accuracies": 0.75, "rewards/chosen": -0.006967581808567047, "rewards/margins": 0.03804681450128555, "rewards/rejected": -0.0450143963098526, "step": 908 }, { "epoch": 0.6286307053941909, "grad_norm": 4.77232551574707, "learning_rate": 3.143153526970954e-05, "log_odds_chosen": 1.4729492664337158, "log_odds_ratio": -0.35362690687179565, "logits/chosen": -0.7603928446769714, "logits/rejected": -0.696243405342102, "logps/chosen": -0.07079069316387177, "logps/rejected": -0.3067443072795868, "loss": 6.0722, "nll_loss": 1.4826915264129639, "rewards/accuracies": 0.75, "rewards/chosen": -0.007079069968312979, "rewards/margins": 0.023595361039042473, "rewards/rejected": -0.03067443147301674, "step": 909 }, { "epoch": 0.6293222683264177, "grad_norm": 6.8746256828308105, "learning_rate": 3.1466113416320884e-05, "log_odds_chosen": 1.3585569858551025, "log_odds_ratio": -0.8060304522514343, "logits/chosen": -0.5674390196800232, "logits/rejected": -0.5963701009750366, "logps/chosen": -0.15420934557914734, "logps/rejected": -0.35397300124168396, "loss": 4.8346, "nll_loss": 1.1280417442321777, "rewards/accuracies": 0.625, "rewards/chosen": -0.015420932322740555, "rewards/margins": 0.019976366311311722, "rewards/rejected": -0.03539729863405228, "step": 910 }, { "epoch": 0.6300138312586445, "grad_norm": 3.980470895767212, "learning_rate": 3.1500691562932225e-05, "log_odds_chosen": 0.9345357418060303, "log_odds_ratio": -0.5109595060348511, "logits/chosen": -0.7937835454940796, "logits/rejected": -0.8195587992668152, "logps/chosen": -0.1285438984632492, "logps/rejected": -0.294846773147583, "loss": 4.4796, "nll_loss": 1.0688003301620483, "rewards/accuracies": 0.75, "rewards/chosen": -0.01285438984632492, "rewards/margins": 0.01663028635084629, "rewards/rejected": -0.02948467805981636, "step": 911 }, { "epoch": 0.6307053941908713, "grad_norm": 3.9688222408294678, "learning_rate": 3.153526970954357e-05, "log_odds_chosen": 2.540585994720459, "log_odds_ratio": -0.36621564626693726, "logits/chosen": -0.7523171305656433, "logits/rejected": -0.6979089379310608, "logps/chosen": -0.13078458607196808, "logps/rejected": -0.5843268632888794, "loss": 3.3149, "nll_loss": 0.7921104431152344, "rewards/accuracies": 0.75, "rewards/chosen": -0.013078458607196808, "rewards/margins": 0.04535423591732979, "rewards/rejected": -0.0584326907992363, "step": 912 }, { "epoch": 0.6313969571230982, "grad_norm": 4.612651348114014, "learning_rate": 3.156984785615491e-05, "log_odds_chosen": 0.6476094722747803, "log_odds_ratio": -0.7385530471801758, "logits/chosen": -0.8418199419975281, "logits/rejected": -0.8664075136184692, "logps/chosen": -0.1603190004825592, "logps/rejected": -0.26439252495765686, "loss": 4.0473, "nll_loss": 0.9379769563674927, "rewards/accuracies": 0.625, "rewards/chosen": -0.01603190042078495, "rewards/margins": 0.01040735188871622, "rewards/rejected": -0.026439251378178596, "step": 913 }, { "epoch": 0.632088520055325, "grad_norm": 4.260547161102295, "learning_rate": 3.160442600276625e-05, "log_odds_chosen": 3.119439125061035, "log_odds_ratio": -0.19069647789001465, "logits/chosen": -0.586380660533905, "logits/rejected": -0.5719614624977112, "logps/chosen": -0.06460949778556824, "logps/rejected": -0.5709012746810913, "loss": 4.0549, "nll_loss": 0.9946677684783936, "rewards/accuracies": 1.0, "rewards/chosen": -0.006460950244218111, "rewards/margins": 0.050629179924726486, "rewards/rejected": -0.05709013342857361, "step": 914 }, { "epoch": 0.6327800829875518, "grad_norm": 5.091923713684082, "learning_rate": 3.163900414937759e-05, "log_odds_chosen": 3.6806349754333496, "log_odds_ratio": -0.27678605914115906, "logits/chosen": -0.7521594762802124, "logits/rejected": -0.761591911315918, "logps/chosen": -0.0646386444568634, "logps/rejected": -0.819200873374939, "loss": 4.513, "nll_loss": 1.1005698442459106, "rewards/accuracies": 0.875, "rewards/chosen": -0.00646386481821537, "rewards/margins": 0.07545622438192368, "rewards/rejected": -0.0819200873374939, "step": 915 }, { "epoch": 0.6334716459197787, "grad_norm": 6.1348443031311035, "learning_rate": 3.167358229598893e-05, "log_odds_chosen": 2.4885177612304688, "log_odds_ratio": -0.6434838175773621, "logits/chosen": -0.7858555316925049, "logits/rejected": -0.8438479900360107, "logps/chosen": -0.1499505490064621, "logps/rejected": -0.693801760673523, "loss": 3.9574, "nll_loss": 0.925003170967102, "rewards/accuracies": 0.625, "rewards/chosen": -0.014995056204497814, "rewards/margins": 0.05438512563705444, "rewards/rejected": -0.06938017904758453, "step": 916 }, { "epoch": 0.6341632088520055, "grad_norm": 4.475279808044434, "learning_rate": 3.1708160442600275e-05, "log_odds_chosen": 3.5078020095825195, "log_odds_ratio": -0.4594971537590027, "logits/chosen": -0.6189094185829163, "logits/rejected": -0.638657808303833, "logps/chosen": -0.11998427659273148, "logps/rejected": -0.5195710062980652, "loss": 3.0219, "nll_loss": 0.7095255851745605, "rewards/accuracies": 0.625, "rewards/chosen": -0.011998428963124752, "rewards/margins": 0.03995867446064949, "rewards/rejected": -0.05195710062980652, "step": 917 }, { "epoch": 0.6348547717842323, "grad_norm": 4.12563419342041, "learning_rate": 3.1742738589211616e-05, "log_odds_chosen": 2.7910380363464355, "log_odds_ratio": -0.3635891079902649, "logits/chosen": -0.7694523334503174, "logits/rejected": -0.8305118680000305, "logps/chosen": -0.1132592186331749, "logps/rejected": -0.5803558230400085, "loss": 4.9796, "nll_loss": 1.2085505723953247, "rewards/accuracies": 0.875, "rewards/chosen": -0.01132592186331749, "rewards/margins": 0.04670966416597366, "rewards/rejected": -0.058035582304000854, "step": 918 }, { "epoch": 0.6355463347164592, "grad_norm": 5.34503173828125, "learning_rate": 3.177731673582296e-05, "log_odds_chosen": 2.969501495361328, "log_odds_ratio": -0.17896559834480286, "logits/chosen": -0.6707146763801575, "logits/rejected": -0.672049880027771, "logps/chosen": -0.14243923127651215, "logps/rejected": -0.5696920156478882, "loss": 5.5494, "nll_loss": 1.369441032409668, "rewards/accuracies": 1.0, "rewards/chosen": -0.01424392405897379, "rewards/margins": 0.04272527992725372, "rewards/rejected": -0.05696920305490494, "step": 919 }, { "epoch": 0.636237897648686, "grad_norm": 4.605804920196533, "learning_rate": 3.18118948824343e-05, "log_odds_chosen": 1.0992157459259033, "log_odds_ratio": -0.33924224972724915, "logits/chosen": -0.8458801507949829, "logits/rejected": -0.8768494129180908, "logps/chosen": -0.10855139791965485, "logps/rejected": -0.28552332520484924, "loss": 5.0427, "nll_loss": 1.2267569303512573, "rewards/accuracies": 1.0, "rewards/chosen": -0.010855140164494514, "rewards/margins": 0.01769719272851944, "rewards/rejected": -0.028552331030368805, "step": 920 }, { "epoch": 0.6369294605809128, "grad_norm": 4.734560966491699, "learning_rate": 3.184647302904564e-05, "log_odds_chosen": 3.045750856399536, "log_odds_ratio": -0.5812844634056091, "logits/chosen": -0.5721637010574341, "logits/rejected": -0.5491136312484741, "logps/chosen": -0.10722295939922333, "logps/rejected": -0.4818299412727356, "loss": 4.2084, "nll_loss": 0.9939660429954529, "rewards/accuracies": 0.625, "rewards/chosen": -0.010722294449806213, "rewards/margins": 0.03746069595217705, "rewards/rejected": -0.04818298667669296, "step": 921 }, { "epoch": 0.6376210235131397, "grad_norm": 5.207472801208496, "learning_rate": 3.188105117565698e-05, "log_odds_chosen": 1.3426095247268677, "log_odds_ratio": -0.6640761494636536, "logits/chosen": -0.6079537868499756, "logits/rejected": -0.6411263346672058, "logps/chosen": -0.15399664640426636, "logps/rejected": -0.3464754819869995, "loss": 5.4171, "nll_loss": 1.2878714799880981, "rewards/accuracies": 0.75, "rewards/chosen": -0.015399663709104061, "rewards/margins": 0.019247887656092644, "rewards/rejected": -0.03464755043387413, "step": 922 }, { "epoch": 0.6383125864453665, "grad_norm": 4.346343994140625, "learning_rate": 3.1915629322268324e-05, "log_odds_chosen": 3.3377137184143066, "log_odds_ratio": -0.1723647117614746, "logits/chosen": -0.7857403755187988, "logits/rejected": -0.7861360907554626, "logps/chosen": -0.05585169792175293, "logps/rejected": -0.5312471389770508, "loss": 4.2025, "nll_loss": 1.0333845615386963, "rewards/accuracies": 1.0, "rewards/chosen": -0.005585169419646263, "rewards/margins": 0.047539543360471725, "rewards/rejected": -0.05312471464276314, "step": 923 }, { "epoch": 0.6390041493775933, "grad_norm": 4.295398235321045, "learning_rate": 3.1950207468879666e-05, "log_odds_chosen": 2.227273941040039, "log_odds_ratio": -0.32186567783355713, "logits/chosen": -0.8246999382972717, "logits/rejected": -0.8642113208770752, "logps/chosen": -0.17864662408828735, "logps/rejected": -0.7396684885025024, "loss": 3.9963, "nll_loss": 0.9668830633163452, "rewards/accuracies": 0.875, "rewards/chosen": -0.017864665016531944, "rewards/margins": 0.05610219016671181, "rewards/rejected": -0.0739668533205986, "step": 924 }, { "epoch": 0.6396957123098201, "grad_norm": 4.727065086364746, "learning_rate": 3.198478561549101e-05, "log_odds_chosen": 0.795332133769989, "log_odds_ratio": -0.49990129470825195, "logits/chosen": -0.6169123649597168, "logits/rejected": -0.6310234665870667, "logps/chosen": -0.15110734105110168, "logps/rejected": -0.28500136733055115, "loss": 3.7686, "nll_loss": 0.8921705484390259, "rewards/accuracies": 0.75, "rewards/chosen": -0.015110732987523079, "rewards/margins": 0.01338940393179655, "rewards/rejected": -0.028500137850642204, "step": 925 }, { "epoch": 0.640387275242047, "grad_norm": 4.3016180992126465, "learning_rate": 3.201936376210235e-05, "log_odds_chosen": 2.2576334476470947, "log_odds_ratio": -0.2811516225337982, "logits/chosen": -0.672473669052124, "logits/rejected": -0.7026387453079224, "logps/chosen": -0.11066614091396332, "logps/rejected": -0.376792311668396, "loss": 4.3925, "nll_loss": 1.0700031518936157, "rewards/accuracies": 1.0, "rewards/chosen": -0.011066613718867302, "rewards/margins": 0.026612618938088417, "rewards/rejected": -0.03767923265695572, "step": 926 }, { "epoch": 0.6410788381742739, "grad_norm": 4.539062023162842, "learning_rate": 3.20539419087137e-05, "log_odds_chosen": 1.6754204034805298, "log_odds_ratio": -0.29951563477516174, "logits/chosen": -1.038869857788086, "logits/rejected": -1.0374137163162231, "logps/chosen": -0.13555291295051575, "logps/rejected": -0.4391994774341583, "loss": 4.6734, "nll_loss": 1.1384084224700928, "rewards/accuracies": 0.875, "rewards/chosen": -0.013555290177464485, "rewards/margins": 0.030364658683538437, "rewards/rejected": -0.04391995072364807, "step": 927 }, { "epoch": 0.6417704011065007, "grad_norm": 3.144559621810913, "learning_rate": 3.208852005532504e-05, "log_odds_chosen": 3.9733846187591553, "log_odds_ratio": -0.28225821256637573, "logits/chosen": -0.5854853391647339, "logits/rejected": -0.5669970512390137, "logps/chosen": -0.08290040493011475, "logps/rejected": -0.5470890998840332, "loss": 3.6142, "nll_loss": 0.8753182888031006, "rewards/accuracies": 0.875, "rewards/chosen": -0.008290041238069534, "rewards/margins": 0.046418868005275726, "rewards/rejected": -0.05470890551805496, "step": 928 }, { "epoch": 0.6424619640387276, "grad_norm": 3.229943037033081, "learning_rate": 3.212309820193638e-05, "log_odds_chosen": 5.208747863769531, "log_odds_ratio": -0.2483353614807129, "logits/chosen": -0.9289498329162598, "logits/rejected": -0.9372609257698059, "logps/chosen": -0.04052822291851044, "logps/rejected": -0.8483723402023315, "loss": 4.7465, "nll_loss": 1.1617987155914307, "rewards/accuracies": 0.75, "rewards/chosen": -0.004052822478115559, "rewards/margins": 0.08078441023826599, "rewards/rejected": -0.08483723551034927, "step": 929 }, { "epoch": 0.6431535269709544, "grad_norm": 10.269536972045898, "learning_rate": 3.215767634854772e-05, "log_odds_chosen": 0.1895618438720703, "log_odds_ratio": -1.680237889289856, "logits/chosen": -0.7783693671226501, "logits/rejected": -0.7193617820739746, "logps/chosen": -0.32872065901756287, "logps/rejected": -0.28012144565582275, "loss": 4.6799, "nll_loss": 1.001959204673767, "rewards/accuracies": 0.5, "rewards/chosen": -0.03287206590175629, "rewards/margins": -0.004859922453761101, "rewards/rejected": -0.028012147173285484, "step": 930 }, { "epoch": 0.6438450899031812, "grad_norm": 4.42756462097168, "learning_rate": 3.219225449515906e-05, "log_odds_chosen": 2.1525275707244873, "log_odds_ratio": -0.4749143719673157, "logits/chosen": -0.4051012396812439, "logits/rejected": -0.48455822467803955, "logps/chosen": -0.15239334106445312, "logps/rejected": -0.3337353467941284, "loss": 3.7849, "nll_loss": 0.8987392783164978, "rewards/accuracies": 0.75, "rewards/chosen": -0.015239333733916283, "rewards/margins": 0.01813420280814171, "rewards/rejected": -0.03337353840470314, "step": 931 }, { "epoch": 0.6445366528354081, "grad_norm": 5.184959888458252, "learning_rate": 3.2226832641770405e-05, "log_odds_chosen": 2.667898654937744, "log_odds_ratio": -0.5918496251106262, "logits/chosen": -0.2000243365764618, "logits/rejected": -0.23744788765907288, "logps/chosen": -0.11377274245023727, "logps/rejected": -0.44666174054145813, "loss": 3.711, "nll_loss": 0.8685758113861084, "rewards/accuracies": 0.625, "rewards/chosen": -0.011377274990081787, "rewards/margins": 0.033288899809122086, "rewards/rejected": -0.04466617479920387, "step": 932 }, { "epoch": 0.6452282157676349, "grad_norm": 3.8456571102142334, "learning_rate": 3.2261410788381746e-05, "log_odds_chosen": 1.2180832624435425, "log_odds_ratio": -0.4285384714603424, "logits/chosen": -0.6114488244056702, "logits/rejected": -0.6289840936660767, "logps/chosen": -0.16820138692855835, "logps/rejected": -0.4128754138946533, "loss": 4.6335, "nll_loss": 1.1155246496200562, "rewards/accuracies": 0.75, "rewards/chosen": -0.016820138320326805, "rewards/margins": 0.024467404931783676, "rewards/rejected": -0.04128754511475563, "step": 933 }, { "epoch": 0.6459197786998617, "grad_norm": 7.9818806648254395, "learning_rate": 3.229598893499309e-05, "log_odds_chosen": 2.1372387409210205, "log_odds_ratio": -0.6383996605873108, "logits/chosen": -0.5187885761260986, "logits/rejected": -0.5804282426834106, "logps/chosen": -0.16250097751617432, "logps/rejected": -0.6418223977088928, "loss": 4.8151, "nll_loss": 1.1399288177490234, "rewards/accuracies": 0.875, "rewards/chosen": -0.016250096261501312, "rewards/margins": 0.04793214797973633, "rewards/rejected": -0.06418224424123764, "step": 934 }, { "epoch": 0.6466113416320886, "grad_norm": 6.263998985290527, "learning_rate": 3.233056708160443e-05, "log_odds_chosen": 2.2691001892089844, "log_odds_ratio": -0.6636475324630737, "logits/chosen": -0.638105571269989, "logits/rejected": -0.6674371957778931, "logps/chosen": -0.1204868033528328, "logps/rejected": -0.5415738821029663, "loss": 4.1406, "nll_loss": 0.9687949419021606, "rewards/accuracies": 0.75, "rewards/chosen": -0.012048681266605854, "rewards/margins": 0.04210871085524559, "rewards/rejected": -0.05415739119052887, "step": 935 }, { "epoch": 0.6473029045643154, "grad_norm": 5.25544548034668, "learning_rate": 3.236514522821577e-05, "log_odds_chosen": 3.077738046646118, "log_odds_ratio": -0.4996723234653473, "logits/chosen": -0.34365952014923096, "logits/rejected": -0.3545984923839569, "logps/chosen": -0.11812933534383774, "logps/rejected": -0.3927255868911743, "loss": 3.6792, "nll_loss": 0.869831919670105, "rewards/accuracies": 0.625, "rewards/chosen": -0.011812932789325714, "rewards/margins": 0.027459625154733658, "rewards/rejected": -0.03927256166934967, "step": 936 }, { "epoch": 0.6479944674965422, "grad_norm": 3.740233898162842, "learning_rate": 3.239972337482711e-05, "log_odds_chosen": 3.347403049468994, "log_odds_ratio": -0.30564332008361816, "logits/chosen": -0.6202876567840576, "logits/rejected": -0.6746382117271423, "logps/chosen": -0.09465580433607101, "logps/rejected": -0.46282535791397095, "loss": 4.0773, "nll_loss": 0.9887527227401733, "rewards/accuracies": 0.75, "rewards/chosen": -0.009465579874813557, "rewards/margins": 0.036816954612731934, "rewards/rejected": -0.04628252983093262, "step": 937 }, { "epoch": 0.648686030428769, "grad_norm": 4.68813419342041, "learning_rate": 3.2434301521438454e-05, "log_odds_chosen": 2.7533187866210938, "log_odds_ratio": -0.23865221440792084, "logits/chosen": -0.651775598526001, "logits/rejected": -0.6854273080825806, "logps/chosen": -0.10296142846345901, "logps/rejected": -0.5382181406021118, "loss": 5.4118, "nll_loss": 1.3290754556655884, "rewards/accuracies": 1.0, "rewards/chosen": -0.010296143591403961, "rewards/margins": 0.04352566972374916, "rewards/rejected": -0.05382181331515312, "step": 938 }, { "epoch": 0.6493775933609959, "grad_norm": 5.684381484985352, "learning_rate": 3.2468879668049796e-05, "log_odds_chosen": 0.0025558993220329285, "log_odds_ratio": -0.8680828809738159, "logits/chosen": -0.7046631574630737, "logits/rejected": -0.7345783114433289, "logps/chosen": -0.14862655103206635, "logps/rejected": -0.18110089004039764, "loss": 6.4886, "nll_loss": 1.5353366136550903, "rewards/accuracies": 0.5, "rewards/chosen": -0.014862654730677605, "rewards/margins": 0.003247436136007309, "rewards/rejected": -0.018110090866684914, "step": 939 }, { "epoch": 0.6500691562932227, "grad_norm": 5.221529483795166, "learning_rate": 3.250345781466114e-05, "log_odds_chosen": 0.3629959523677826, "log_odds_ratio": -0.6458583474159241, "logits/chosen": -0.5722988247871399, "logits/rejected": -0.5610537528991699, "logps/chosen": -0.156322181224823, "logps/rejected": -0.19696059823036194, "loss": 4.7117, "nll_loss": 1.1133334636688232, "rewards/accuracies": 0.625, "rewards/chosen": -0.01563221588730812, "rewards/margins": 0.004063841886818409, "rewards/rejected": -0.019696058705449104, "step": 940 }, { "epoch": 0.6507607192254495, "grad_norm": 3.8440189361572266, "learning_rate": 3.253803596127248e-05, "log_odds_chosen": 1.7516545057296753, "log_odds_ratio": -0.41569817066192627, "logits/chosen": -0.7494470477104187, "logits/rejected": -0.7926744222640991, "logps/chosen": -0.1611669659614563, "logps/rejected": -0.449934184551239, "loss": 4.4423, "nll_loss": 1.06900155544281, "rewards/accuracies": 0.625, "rewards/chosen": -0.01611669547855854, "rewards/margins": 0.02887672558426857, "rewards/rejected": -0.04499341920018196, "step": 941 }, { "epoch": 0.6514522821576764, "grad_norm": 3.899761199951172, "learning_rate": 3.257261410788382e-05, "log_odds_chosen": 1.4607198238372803, "log_odds_ratio": -0.3697126507759094, "logits/chosen": -0.4832645654678345, "logits/rejected": -0.5157491564750671, "logps/chosen": -0.16678622364997864, "logps/rejected": -0.49319201707839966, "loss": 4.1484, "nll_loss": 1.0001217126846313, "rewards/accuracies": 0.875, "rewards/chosen": -0.016678621992468834, "rewards/margins": 0.03264058008790016, "rewards/rejected": -0.049319203943014145, "step": 942 }, { "epoch": 0.6521438450899032, "grad_norm": 3.763258218765259, "learning_rate": 3.260719225449516e-05, "log_odds_chosen": 3.7982499599456787, "log_odds_ratio": -0.47766488790512085, "logits/chosen": -0.4156723916530609, "logits/rejected": -0.4465107023715973, "logps/chosen": -0.10961335897445679, "logps/rejected": -0.44413048028945923, "loss": 4.2851, "nll_loss": 1.0235079526901245, "rewards/accuracies": 0.625, "rewards/chosen": -0.010961336083710194, "rewards/margins": 0.033451713621616364, "rewards/rejected": -0.04441304877400398, "step": 943 }, { "epoch": 0.65283540802213, "grad_norm": 3.1957039833068848, "learning_rate": 3.2641770401106504e-05, "log_odds_chosen": 3.5332484245300293, "log_odds_ratio": -0.33818644285202026, "logits/chosen": -0.6256710886955261, "logits/rejected": -0.6820761561393738, "logps/chosen": -0.06437689810991287, "logps/rejected": -0.5691992044448853, "loss": 3.97, "nll_loss": 0.9586867690086365, "rewards/accuracies": 0.75, "rewards/chosen": -0.006437689997255802, "rewards/margins": 0.050482235848903656, "rewards/rejected": -0.056919924914836884, "step": 944 }, { "epoch": 0.6535269709543569, "grad_norm": 3.3025779724121094, "learning_rate": 3.2676348547717845e-05, "log_odds_chosen": 2.6594409942626953, "log_odds_ratio": -0.34340739250183105, "logits/chosen": -0.3092968165874481, "logits/rejected": -0.3505038321018219, "logps/chosen": -0.1379614770412445, "logps/rejected": -0.5781947374343872, "loss": 3.5048, "nll_loss": 0.8418477773666382, "rewards/accuracies": 0.75, "rewards/chosen": -0.013796146959066391, "rewards/margins": 0.04402332752943039, "rewards/rejected": -0.05781947076320648, "step": 945 }, { "epoch": 0.6542185338865837, "grad_norm": 7.668539524078369, "learning_rate": 3.271092669432919e-05, "log_odds_chosen": 1.77287757396698, "log_odds_ratio": -0.6184110641479492, "logits/chosen": -0.5389738082885742, "logits/rejected": -0.5569124817848206, "logps/chosen": -0.2962481677532196, "logps/rejected": -0.37920287251472473, "loss": 3.9189, "nll_loss": 0.9178899526596069, "rewards/accuracies": 0.875, "rewards/chosen": -0.02962481416761875, "rewards/margins": 0.008295468986034393, "rewards/rejected": -0.037920285016298294, "step": 946 }, { "epoch": 0.6549100968188105, "grad_norm": 3.6975250244140625, "learning_rate": 3.274550484094053e-05, "log_odds_chosen": 3.4721813201904297, "log_odds_ratio": -0.22020241618156433, "logits/chosen": -0.5824014544487, "logits/rejected": -0.636989176273346, "logps/chosen": -0.09637489169836044, "logps/rejected": -0.657294511795044, "loss": 3.4929, "nll_loss": 0.8512168526649475, "rewards/accuracies": 0.875, "rewards/chosen": -0.00963748898357153, "rewards/margins": 0.05609196051955223, "rewards/rejected": -0.06572945415973663, "step": 947 }, { "epoch": 0.6556016597510373, "grad_norm": 4.219089508056641, "learning_rate": 3.278008298755187e-05, "log_odds_chosen": 1.8522963523864746, "log_odds_ratio": -0.2913023829460144, "logits/chosen": -0.4794574975967407, "logits/rejected": -0.4770664572715759, "logps/chosen": -0.09961166977882385, "logps/rejected": -0.43963801860809326, "loss": 5.0723, "nll_loss": 1.2389488220214844, "rewards/accuracies": 1.0, "rewards/chosen": -0.009961167350411415, "rewards/margins": 0.0340026319026947, "rewards/rejected": -0.04396379739046097, "step": 948 }, { "epoch": 0.6562932226832642, "grad_norm": 6.227601051330566, "learning_rate": 3.281466113416321e-05, "log_odds_chosen": 1.6492278575897217, "log_odds_ratio": -0.4346155524253845, "logits/chosen": -0.3798789978027344, "logits/rejected": -0.3958047926425934, "logps/chosen": -0.10809013247489929, "logps/rejected": -0.4654456377029419, "loss": 5.3598, "nll_loss": 1.296478033065796, "rewards/accuracies": 0.625, "rewards/chosen": -0.010809013620018959, "rewards/margins": 0.03573554754257202, "rewards/rejected": -0.04654456302523613, "step": 949 }, { "epoch": 0.656984785615491, "grad_norm": 4.8800482749938965, "learning_rate": 3.284923928077455e-05, "log_odds_chosen": 1.9015074968338013, "log_odds_ratio": -0.40058067440986633, "logits/chosen": -0.6047090888023376, "logits/rejected": -0.6340633630752563, "logps/chosen": -0.13578994572162628, "logps/rejected": -0.4089638888835907, "loss": 5.4725, "nll_loss": 1.3280658721923828, "rewards/accuracies": 0.875, "rewards/chosen": -0.013578995130956173, "rewards/margins": 0.02731739543378353, "rewards/rejected": -0.04089638963341713, "step": 950 }, { "epoch": 0.6576763485477178, "grad_norm": 4.019526481628418, "learning_rate": 3.2883817427385895e-05, "log_odds_chosen": 2.497021198272705, "log_odds_ratio": -0.26466888189315796, "logits/chosen": -0.5571470260620117, "logits/rejected": -0.5579490661621094, "logps/chosen": -0.09435532242059708, "logps/rejected": -0.4960017800331116, "loss": 4.1844, "nll_loss": 1.0196443796157837, "rewards/accuracies": 1.0, "rewards/chosen": -0.009435532614588737, "rewards/margins": 0.04016464576125145, "rewards/rejected": -0.04960017651319504, "step": 951 }, { "epoch": 0.6583679114799447, "grad_norm": 3.2139434814453125, "learning_rate": 3.2918395573997236e-05, "log_odds_chosen": 1.3813042640686035, "log_odds_ratio": -0.3880887031555176, "logits/chosen": -0.6013690233230591, "logits/rejected": -0.5642503499984741, "logps/chosen": -0.07664715498685837, "logps/rejected": -0.2166944146156311, "loss": 4.2634, "nll_loss": 1.027036190032959, "rewards/accuracies": 0.75, "rewards/chosen": -0.007664714939892292, "rewards/margins": 0.014004725962877274, "rewards/rejected": -0.02166944183409214, "step": 952 }, { "epoch": 0.6590594744121715, "grad_norm": 3.341346502304077, "learning_rate": 3.295297372060858e-05, "log_odds_chosen": 3.3636043071746826, "log_odds_ratio": -0.30477118492126465, "logits/chosen": -0.7266377806663513, "logits/rejected": -0.7696323990821838, "logps/chosen": -0.08241377025842667, "logps/rejected": -0.49398496747016907, "loss": 3.9865, "nll_loss": 0.966143012046814, "rewards/accuracies": 0.875, "rewards/chosen": -0.008241376839578152, "rewards/margins": 0.04115711897611618, "rewards/rejected": -0.04939849674701691, "step": 953 }, { "epoch": 0.6597510373443983, "grad_norm": 3.837860107421875, "learning_rate": 3.298755186721992e-05, "log_odds_chosen": 3.0661492347717285, "log_odds_ratio": -0.33581408858299255, "logits/chosen": -0.4582061171531677, "logits/rejected": -0.47436198592185974, "logps/chosen": -0.09099718928337097, "logps/rejected": -0.45954978466033936, "loss": 3.3755, "nll_loss": 0.8102924823760986, "rewards/accuracies": 0.875, "rewards/chosen": -0.009099719114601612, "rewards/margins": 0.03685525804758072, "rewards/rejected": -0.045954976230859756, "step": 954 }, { "epoch": 0.6604426002766252, "grad_norm": 5.162682056427002, "learning_rate": 3.302213001383126e-05, "log_odds_chosen": 3.169241428375244, "log_odds_ratio": -0.2741418182849884, "logits/chosen": -0.7861517667770386, "logits/rejected": -0.7951920628547668, "logps/chosen": -0.09555049985647202, "logps/rejected": -0.6090150475502014, "loss": 5.6519, "nll_loss": 1.38556969165802, "rewards/accuracies": 0.875, "rewards/chosen": -0.009555051103234291, "rewards/margins": 0.05134645104408264, "rewards/rejected": -0.06090150400996208, "step": 955 }, { "epoch": 0.661134163208852, "grad_norm": 2.37520170211792, "learning_rate": 3.30567081604426e-05, "log_odds_chosen": 4.856169700622559, "log_odds_ratio": -0.23580773174762726, "logits/chosen": -0.3492242991924286, "logits/rejected": -0.34554314613342285, "logps/chosen": -0.06726567447185516, "logps/rejected": -0.5879815816879272, "loss": 3.3724, "nll_loss": 0.8195255398750305, "rewards/accuracies": 0.875, "rewards/chosen": -0.006726567167788744, "rewards/margins": 0.05207158997654915, "rewards/rejected": -0.058798156678676605, "step": 956 }, { "epoch": 0.6618257261410788, "grad_norm": 4.020497798919678, "learning_rate": 3.3091286307053944e-05, "log_odds_chosen": 2.5861258506774902, "log_odds_ratio": -0.47657981514930725, "logits/chosen": -0.330152302980423, "logits/rejected": -0.3692672550678253, "logps/chosen": -0.13104590773582458, "logps/rejected": -0.30247101187705994, "loss": 4.1376, "nll_loss": 0.9867503643035889, "rewards/accuracies": 0.75, "rewards/chosen": -0.013104591518640518, "rewards/margins": 0.017142511904239655, "rewards/rejected": -0.030247103422880173, "step": 957 }, { "epoch": 0.6625172890733056, "grad_norm": 4.300625801086426, "learning_rate": 3.3125864453665286e-05, "log_odds_chosen": 3.64050030708313, "log_odds_ratio": -0.2507421374320984, "logits/chosen": -0.21320414543151855, "logits/rejected": -0.2004971206188202, "logps/chosen": -0.04882103577256203, "logps/rejected": -0.4051501452922821, "loss": 3.7638, "nll_loss": 0.9158720374107361, "rewards/accuracies": 0.875, "rewards/chosen": -0.004882104694843292, "rewards/margins": 0.0356329083442688, "rewards/rejected": -0.04051501303911209, "step": 958 }, { "epoch": 0.6632088520055325, "grad_norm": 3.524125814437866, "learning_rate": 3.316044260027663e-05, "log_odds_chosen": 2.5185976028442383, "log_odds_ratio": -0.3382371664047241, "logits/chosen": -0.41052520275115967, "logits/rejected": -0.41902029514312744, "logps/chosen": -0.08878672868013382, "logps/rejected": -0.34096887707710266, "loss": 3.3122, "nll_loss": 0.7942249178886414, "rewards/accuracies": 0.875, "rewards/chosen": -0.008878673426806927, "rewards/margins": 0.025218214839696884, "rewards/rejected": -0.034096889197826385, "step": 959 }, { "epoch": 0.6639004149377593, "grad_norm": 3.816377878189087, "learning_rate": 3.319502074688797e-05, "log_odds_chosen": 3.3016445636749268, "log_odds_ratio": -0.4189302325248718, "logits/chosen": -0.4831903874874115, "logits/rejected": -0.49403488636016846, "logps/chosen": -0.14972633123397827, "logps/rejected": -0.456408828496933, "loss": 3.5794, "nll_loss": 0.8529676198959351, "rewards/accuracies": 0.75, "rewards/chosen": -0.014972632750868797, "rewards/margins": 0.03066825307905674, "rewards/rejected": -0.04564088582992554, "step": 960 }, { "epoch": 0.6645919778699861, "grad_norm": 6.777546405792236, "learning_rate": 3.322959889349931e-05, "log_odds_chosen": 0.5140173435211182, "log_odds_ratio": -1.0868242979049683, "logits/chosen": -0.7206689119338989, "logits/rejected": -0.7224574089050293, "logps/chosen": -0.19908304512500763, "logps/rejected": -0.3203428387641907, "loss": 3.4081, "nll_loss": 0.7433361411094666, "rewards/accuracies": 0.625, "rewards/chosen": -0.019908303394913673, "rewards/margins": 0.012125976383686066, "rewards/rejected": -0.03203428164124489, "step": 961 }, { "epoch": 0.665283540802213, "grad_norm": 5.210657119750977, "learning_rate": 3.326417704011065e-05, "log_odds_chosen": 3.3200583457946777, "log_odds_ratio": -0.4891462028026581, "logits/chosen": -0.5053502917289734, "logits/rejected": -0.5233083367347717, "logps/chosen": -0.1546093374490738, "logps/rejected": -0.7421329021453857, "loss": 3.9954, "nll_loss": 0.9499325752258301, "rewards/accuracies": 0.75, "rewards/chosen": -0.015460933558642864, "rewards/margins": 0.058752357959747314, "rewards/rejected": -0.07421329617500305, "step": 962 }, { "epoch": 0.6659751037344398, "grad_norm": 4.276523113250732, "learning_rate": 3.329875518672199e-05, "log_odds_chosen": 2.442716121673584, "log_odds_ratio": -0.5946823954582214, "logits/chosen": -0.3844939172267914, "logits/rejected": -0.3767719864845276, "logps/chosen": -0.11018381267786026, "logps/rejected": -0.28107550740242004, "loss": 4.1964, "nll_loss": 0.9896374940872192, "rewards/accuracies": 0.75, "rewards/chosen": -0.011018382385373116, "rewards/margins": 0.01708916947245598, "rewards/rejected": -0.028107551857829094, "step": 963 }, { "epoch": 0.6666666666666666, "grad_norm": 4.2431864738464355, "learning_rate": 3.3333333333333335e-05, "log_odds_chosen": 4.00262975692749, "log_odds_ratio": -0.17702579498291016, "logits/chosen": -0.20652252435684204, "logits/rejected": -0.1833350956439972, "logps/chosen": -0.08886405825614929, "logps/rejected": -0.554391086101532, "loss": 3.8919, "nll_loss": 0.9552844762802124, "rewards/accuracies": 1.0, "rewards/chosen": -0.008886406198143959, "rewards/margins": 0.04655269905924797, "rewards/rejected": -0.05543910712003708, "step": 964 }, { "epoch": 0.6673582295988935, "grad_norm": 4.64008092880249, "learning_rate": 3.3367911479944676e-05, "log_odds_chosen": 4.577019691467285, "log_odds_ratio": -0.2833351194858551, "logits/chosen": -0.4747796952724457, "logits/rejected": -0.4827622175216675, "logps/chosen": -0.07018810510635376, "logps/rejected": -0.5580646395683289, "loss": 3.5238, "nll_loss": 0.8526178002357483, "rewards/accuracies": 0.75, "rewards/chosen": -0.007018811535090208, "rewards/margins": 0.04878764972090721, "rewards/rejected": -0.055806465446949005, "step": 965 }, { "epoch": 0.6680497925311203, "grad_norm": 4.885814666748047, "learning_rate": 3.340248962655602e-05, "log_odds_chosen": 1.4906724691390991, "log_odds_ratio": -0.3242151439189911, "logits/chosen": -0.8521069288253784, "logits/rejected": -0.8305846452713013, "logps/chosen": -0.10241183638572693, "logps/rejected": -0.30071839690208435, "loss": 4.6939, "nll_loss": 1.1410484313964844, "rewards/accuracies": 0.875, "rewards/chosen": -0.010241183452308178, "rewards/margins": 0.019830655306577682, "rewards/rejected": -0.030071841552853584, "step": 966 }, { "epoch": 0.6687413554633471, "grad_norm": 4.377496242523193, "learning_rate": 3.343706777316736e-05, "log_odds_chosen": 1.4498246908187866, "log_odds_ratio": -0.43261057138442993, "logits/chosen": -0.8319251537322998, "logits/rejected": -0.8602160215377808, "logps/chosen": -0.1076955646276474, "logps/rejected": -0.38744795322418213, "loss": 4.8181, "nll_loss": 1.1612600088119507, "rewards/accuracies": 0.75, "rewards/chosen": -0.0107695572078228, "rewards/margins": 0.027975236997008324, "rewards/rejected": -0.03874479606747627, "step": 967 }, { "epoch": 0.669432918395574, "grad_norm": 5.273506164550781, "learning_rate": 3.34716459197787e-05, "log_odds_chosen": 2.5594263076782227, "log_odds_ratio": -0.44265487790107727, "logits/chosen": -0.827867865562439, "logits/rejected": -0.8279743194580078, "logps/chosen": -0.15520818531513214, "logps/rejected": -0.5966611504554749, "loss": 4.861, "nll_loss": 1.170979380607605, "rewards/accuracies": 0.875, "rewards/chosen": -0.015520821325480938, "rewards/margins": 0.04414529725909233, "rewards/rejected": -0.059666119515895844, "step": 968 }, { "epoch": 0.6701244813278008, "grad_norm": 2.88142991065979, "learning_rate": 3.350622406639004e-05, "log_odds_chosen": 3.7562472820281982, "log_odds_ratio": -0.11612822115421295, "logits/chosen": -0.774664044380188, "logits/rejected": -0.7495312690734863, "logps/chosen": -0.04824502021074295, "logps/rejected": -0.5066708326339722, "loss": 3.2582, "nll_loss": 0.8029303550720215, "rewards/accuracies": 1.0, "rewards/chosen": -0.004824501927942038, "rewards/margins": 0.04584258794784546, "rewards/rejected": -0.050667084753513336, "step": 969 }, { "epoch": 0.6708160442600276, "grad_norm": 4.272217273712158, "learning_rate": 3.3540802213001384e-05, "log_odds_chosen": 1.9701576232910156, "log_odds_ratio": -0.5143850445747375, "logits/chosen": -0.7145799398422241, "logits/rejected": -0.7090466022491455, "logps/chosen": -0.1732991337776184, "logps/rejected": -0.506736159324646, "loss": 4.3177, "nll_loss": 1.0279784202575684, "rewards/accuracies": 0.625, "rewards/chosen": -0.01732991263270378, "rewards/margins": 0.03334370255470276, "rewards/rejected": -0.05067361518740654, "step": 970 }, { "epoch": 0.6715076071922544, "grad_norm": 6.0354390144348145, "learning_rate": 3.3575380359612726e-05, "log_odds_chosen": 0.18285652995109558, "log_odds_ratio": -0.6881224513053894, "logits/chosen": -0.9916139841079712, "logits/rejected": -0.9856917262077332, "logps/chosen": -0.21898028254508972, "logps/rejected": -0.2937367558479309, "loss": 5.0391, "nll_loss": 1.190969467163086, "rewards/accuracies": 0.5, "rewards/chosen": -0.021898027509450912, "rewards/margins": 0.00747564947232604, "rewards/rejected": -0.02937367744743824, "step": 971 }, { "epoch": 0.6721991701244814, "grad_norm": 3.3654420375823975, "learning_rate": 3.360995850622407e-05, "log_odds_chosen": 2.4569759368896484, "log_odds_ratio": -0.2657202482223511, "logits/chosen": -0.3326878547668457, "logits/rejected": -0.3718082010746002, "logps/chosen": -0.0956912636756897, "logps/rejected": -0.3199200928211212, "loss": 3.6996, "nll_loss": 0.8983267545700073, "rewards/accuracies": 1.0, "rewards/chosen": -0.00956912711262703, "rewards/margins": 0.02242288552224636, "rewards/rejected": -0.03199201449751854, "step": 972 }, { "epoch": 0.6728907330567082, "grad_norm": 3.7501866817474365, "learning_rate": 3.364453665283541e-05, "log_odds_chosen": 1.6184512376785278, "log_odds_ratio": -0.3203818202018738, "logits/chosen": -0.746425986289978, "logits/rejected": -0.7444495558738708, "logps/chosen": -0.10139751434326172, "logps/rejected": -0.3613331913948059, "loss": 4.0865, "nll_loss": 0.9895828366279602, "rewards/accuracies": 0.875, "rewards/chosen": -0.010139752179384232, "rewards/margins": 0.02599356882274151, "rewards/rejected": -0.03613331913948059, "step": 973 }, { "epoch": 0.673582295988935, "grad_norm": 3.471388816833496, "learning_rate": 3.367911479944675e-05, "log_odds_chosen": 3.8406763076782227, "log_odds_ratio": -0.4341563284397125, "logits/chosen": -0.47200706601142883, "logits/rejected": -0.4881913661956787, "logps/chosen": -0.07415127754211426, "logps/rejected": -0.40192297101020813, "loss": 3.2404, "nll_loss": 0.7666944265365601, "rewards/accuracies": 0.625, "rewards/chosen": -0.007415127940475941, "rewards/margins": 0.032777171581983566, "rewards/rejected": -0.040192294865846634, "step": 974 }, { "epoch": 0.6742738589211619, "grad_norm": 3.176514148712158, "learning_rate": 3.371369294605809e-05, "log_odds_chosen": 1.1546623706817627, "log_odds_ratio": -0.39299866557121277, "logits/chosen": -0.36612606048583984, "logits/rejected": -0.35044723749160767, "logps/chosen": -0.16547030210494995, "logps/rejected": -0.4321480691432953, "loss": 3.0503, "nll_loss": 0.7232798337936401, "rewards/accuracies": 0.75, "rewards/chosen": -0.016547029837965965, "rewards/margins": 0.026667779311537743, "rewards/rejected": -0.04321480542421341, "step": 975 }, { "epoch": 0.6749654218533887, "grad_norm": 4.0449538230896, "learning_rate": 3.3748271092669434e-05, "log_odds_chosen": 3.5713706016540527, "log_odds_ratio": -0.3042868673801422, "logits/chosen": -0.46112552285194397, "logits/rejected": -0.5051918625831604, "logps/chosen": -0.0884426087141037, "logps/rejected": -0.48638951778411865, "loss": 4.5366, "nll_loss": 1.1037156581878662, "rewards/accuracies": 0.75, "rewards/chosen": -0.00884426198899746, "rewards/margins": 0.039794694632291794, "rewards/rejected": -0.048638954758644104, "step": 976 }, { "epoch": 0.6756569847856155, "grad_norm": 3.9816436767578125, "learning_rate": 3.3782849239280775e-05, "log_odds_chosen": 3.7466630935668945, "log_odds_ratio": -0.3648548126220703, "logits/chosen": -0.6325836777687073, "logits/rejected": -0.6226431727409363, "logps/chosen": -0.07730133831501007, "logps/rejected": -0.7135946154594421, "loss": 3.2022, "nll_loss": 0.7640625238418579, "rewards/accuracies": 0.75, "rewards/chosen": -0.007730133831501007, "rewards/margins": 0.06362932920455933, "rewards/rejected": -0.07135946303606033, "step": 977 }, { "epoch": 0.6763485477178424, "grad_norm": 3.3771657943725586, "learning_rate": 3.381742738589212e-05, "log_odds_chosen": 3.9462733268737793, "log_odds_ratio": -0.3088147044181824, "logits/chosen": -0.8675793409347534, "logits/rejected": -0.8908983469009399, "logps/chosen": -0.10656030476093292, "logps/rejected": -0.7457081079483032, "loss": 3.7328, "nll_loss": 0.9023140668869019, "rewards/accuracies": 0.875, "rewards/chosen": -0.010656031779944897, "rewards/margins": 0.06391478329896927, "rewards/rejected": -0.07457081228494644, "step": 978 }, { "epoch": 0.6770401106500692, "grad_norm": 4.066298007965088, "learning_rate": 3.385200553250346e-05, "log_odds_chosen": 2.962782859802246, "log_odds_ratio": -0.3244636654853821, "logits/chosen": -0.6073873043060303, "logits/rejected": -0.6426203846931458, "logps/chosen": -0.08333338052034378, "logps/rejected": -0.5626384615898132, "loss": 3.7632, "nll_loss": 0.9083600044250488, "rewards/accuracies": 0.75, "rewards/chosen": -0.008333337493240833, "rewards/margins": 0.047930508852005005, "rewards/rejected": -0.05626384913921356, "step": 979 }, { "epoch": 0.677731673582296, "grad_norm": 5.754980564117432, "learning_rate": 3.38865836791148e-05, "log_odds_chosen": 3.0913007259368896, "log_odds_ratio": -0.393584668636322, "logits/chosen": -0.5925887823104858, "logits/rejected": -0.6561344265937805, "logps/chosen": -0.1347353160381317, "logps/rejected": -0.5746136903762817, "loss": 4.7798, "nll_loss": 1.1555871963500977, "rewards/accuracies": 0.75, "rewards/chosen": -0.01347353309392929, "rewards/margins": 0.043987832963466644, "rewards/rejected": -0.057461366057395935, "step": 980 }, { "epoch": 0.6784232365145229, "grad_norm": 4.209239959716797, "learning_rate": 3.392116182572614e-05, "log_odds_chosen": 2.974820613861084, "log_odds_ratio": -0.3493000268936157, "logits/chosen": -0.980753481388092, "logits/rejected": -1.0084547996520996, "logps/chosen": -0.05953915789723396, "logps/rejected": -0.46828117966651917, "loss": 4.2893, "nll_loss": 1.037407398223877, "rewards/accuracies": 0.875, "rewards/chosen": -0.005953915882855654, "rewards/margins": 0.04087420180439949, "rewards/rejected": -0.04682811722159386, "step": 981 }, { "epoch": 0.6791147994467497, "grad_norm": 5.047092914581299, "learning_rate": 3.395573997233748e-05, "log_odds_chosen": -0.3905660808086395, "log_odds_ratio": -0.9512245655059814, "logits/chosen": -0.4087103009223938, "logits/rejected": -0.38396310806274414, "logps/chosen": -0.19242680072784424, "logps/rejected": -0.1307140290737152, "loss": 4.7201, "nll_loss": 1.0849030017852783, "rewards/accuracies": 0.25, "rewards/chosen": -0.019242681562900543, "rewards/margins": -0.006171277724206448, "rewards/rejected": -0.01307140477001667, "step": 982 }, { "epoch": 0.6798063623789765, "grad_norm": 5.881921291351318, "learning_rate": 3.3990318118948825e-05, "log_odds_chosen": 1.5944758653640747, "log_odds_ratio": -0.7979428172111511, "logits/chosen": -0.5588828921318054, "logits/rejected": -0.5556572675704956, "logps/chosen": -0.13492585718631744, "logps/rejected": -0.4783182144165039, "loss": 4.2007, "nll_loss": 0.9703859090805054, "rewards/accuracies": 0.625, "rewards/chosen": -0.013492586091160774, "rewards/margins": 0.03433923423290253, "rewards/rejected": -0.04783182218670845, "step": 983 }, { "epoch": 0.6804979253112033, "grad_norm": 5.97307014465332, "learning_rate": 3.4024896265560166e-05, "log_odds_chosen": 4.040063381195068, "log_odds_ratio": -0.5749695301055908, "logits/chosen": -0.43717265129089355, "logits/rejected": -0.4032226800918579, "logps/chosen": -0.08708241581916809, "logps/rejected": -0.4701418876647949, "loss": 3.4376, "nll_loss": 0.8019071221351624, "rewards/accuracies": 0.625, "rewards/chosen": -0.008708241395652294, "rewards/margins": 0.03830594941973686, "rewards/rejected": -0.04701419174671173, "step": 984 }, { "epoch": 0.6811894882434302, "grad_norm": 4.5437116622924805, "learning_rate": 3.405947441217151e-05, "log_odds_chosen": 1.420559287071228, "log_odds_ratio": -0.32596197724342346, "logits/chosen": -0.41524261236190796, "logits/rejected": -0.41757941246032715, "logps/chosen": -0.1719023883342743, "logps/rejected": -0.5850471258163452, "loss": 3.5266, "nll_loss": 0.8490634560585022, "rewards/accuracies": 0.875, "rewards/chosen": -0.01719024032354355, "rewards/margins": 0.04131447523832321, "rewards/rejected": -0.05850471183657646, "step": 985 }, { "epoch": 0.681881051175657, "grad_norm": 4.771402835845947, "learning_rate": 3.409405255878285e-05, "log_odds_chosen": 2.0101003646850586, "log_odds_ratio": -0.5677647590637207, "logits/chosen": -0.7287096977233887, "logits/rejected": -0.753067135810852, "logps/chosen": -0.16982880234718323, "logps/rejected": -0.48460328578948975, "loss": 5.6082, "nll_loss": 1.3452624082565308, "rewards/accuracies": 0.625, "rewards/chosen": -0.016982881352305412, "rewards/margins": 0.031477443873882294, "rewards/rejected": -0.048460330814123154, "step": 986 }, { "epoch": 0.6825726141078838, "grad_norm": 4.040125846862793, "learning_rate": 3.412863070539419e-05, "log_odds_chosen": 3.0267107486724854, "log_odds_ratio": -0.4347437024116516, "logits/chosen": -0.8169975280761719, "logits/rejected": -0.8093977570533752, "logps/chosen": -0.11449096351861954, "logps/rejected": -0.5360022187232971, "loss": 4.6192, "nll_loss": 1.1113377809524536, "rewards/accuracies": 0.75, "rewards/chosen": -0.011449096724390984, "rewards/margins": 0.04215112701058388, "rewards/rejected": -0.05360021814703941, "step": 987 }, { "epoch": 0.6832641770401107, "grad_norm": 7.9208807945251465, "learning_rate": 3.416320885200553e-05, "log_odds_chosen": -0.30854225158691406, "log_odds_ratio": -1.0147993564605713, "logits/chosen": -0.2861216962337494, "logits/rejected": -0.29731225967407227, "logps/chosen": -0.4028143882751465, "logps/rejected": -0.21703889966011047, "loss": 5.3238, "nll_loss": 1.2294610738754272, "rewards/accuracies": 0.5, "rewards/chosen": -0.04028144106268883, "rewards/margins": -0.01857755146920681, "rewards/rejected": -0.02170388773083687, "step": 988 }, { "epoch": 0.6839557399723375, "grad_norm": 5.581965446472168, "learning_rate": 3.4197786998616874e-05, "log_odds_chosen": 2.1221306324005127, "log_odds_ratio": -0.3847159743309021, "logits/chosen": -0.6367366909980774, "logits/rejected": -0.6817864775657654, "logps/chosen": -0.11510385572910309, "logps/rejected": -0.44107532501220703, "loss": 4.8327, "nll_loss": 1.169701099395752, "rewards/accuracies": 0.625, "rewards/chosen": -0.011510386131703854, "rewards/margins": 0.032597146928310394, "rewards/rejected": -0.04410753399133682, "step": 989 }, { "epoch": 0.6846473029045643, "grad_norm": 5.5574140548706055, "learning_rate": 3.4232365145228216e-05, "log_odds_chosen": 1.8708701133728027, "log_odds_ratio": -0.7869781851768494, "logits/chosen": -0.6626830697059631, "logits/rejected": -0.6924288272857666, "logps/chosen": -0.21340054273605347, "logps/rejected": -0.3987637758255005, "loss": 4.356, "nll_loss": 1.0102986097335815, "rewards/accuracies": 0.625, "rewards/chosen": -0.021340053528547287, "rewards/margins": 0.01853632554411888, "rewards/rejected": -0.03987637907266617, "step": 990 }, { "epoch": 0.6853388658367912, "grad_norm": 3.7821218967437744, "learning_rate": 3.426694329183956e-05, "log_odds_chosen": 3.599870204925537, "log_odds_ratio": -0.39365655183792114, "logits/chosen": -0.8973872661590576, "logits/rejected": -0.9051483273506165, "logps/chosen": -0.14106644690036774, "logps/rejected": -0.3728795647621155, "loss": 3.5174, "nll_loss": 0.83997642993927, "rewards/accuracies": 0.75, "rewards/chosen": -0.014106645248830318, "rewards/margins": 0.023181311786174774, "rewards/rejected": -0.03728795796632767, "step": 991 }, { "epoch": 0.686030428769018, "grad_norm": 4.836569786071777, "learning_rate": 3.43015214384509e-05, "log_odds_chosen": 1.069951057434082, "log_odds_ratio": -0.5975240468978882, "logits/chosen": -0.19024032354354858, "logits/rejected": -0.17968958616256714, "logps/chosen": -0.11591685563325882, "logps/rejected": -0.27811747789382935, "loss": 4.2014, "nll_loss": 0.9905920028686523, "rewards/accuracies": 0.5, "rewards/chosen": -0.011591685004532337, "rewards/margins": 0.01622006483376026, "rewards/rejected": -0.027811748906970024, "step": 992 }, { "epoch": 0.6867219917012448, "grad_norm": 2.7467453479766846, "learning_rate": 3.433609958506224e-05, "log_odds_chosen": 3.485941171646118, "log_odds_ratio": -0.3185138404369354, "logits/chosen": -0.394996702671051, "logits/rejected": -0.4043181836605072, "logps/chosen": -0.10663348436355591, "logps/rejected": -0.38438695669174194, "loss": 3.4745, "nll_loss": 0.8367683291435242, "rewards/accuracies": 0.875, "rewards/chosen": -0.010663348250091076, "rewards/margins": 0.027775347232818604, "rewards/rejected": -0.038438692688941956, "step": 993 }, { "epoch": 0.6874135546334716, "grad_norm": 11.837431907653809, "learning_rate": 3.437067773167358e-05, "log_odds_chosen": 2.9741313457489014, "log_odds_ratio": -0.7015028595924377, "logits/chosen": -0.34724104404449463, "logits/rejected": -0.3198005259037018, "logps/chosen": -0.10866589844226837, "logps/rejected": -0.4531497359275818, "loss": 3.1967, "nll_loss": 0.7290204763412476, "rewards/accuracies": 0.75, "rewards/chosen": -0.010866588912904263, "rewards/margins": 0.03444838523864746, "rewards/rejected": -0.0453149750828743, "step": 994 }, { "epoch": 0.6881051175656985, "grad_norm": 3.6138124465942383, "learning_rate": 3.4405255878284923e-05, "log_odds_chosen": 2.874924659729004, "log_odds_ratio": -0.4109228849411011, "logits/chosen": -0.34337174892425537, "logits/rejected": -0.34686705470085144, "logps/chosen": -0.1123964712023735, "logps/rejected": -0.38113945722579956, "loss": 4.2389, "nll_loss": 1.0186246633529663, "rewards/accuracies": 0.75, "rewards/chosen": -0.01123964786529541, "rewards/margins": 0.026874300092458725, "rewards/rejected": -0.038113947957754135, "step": 995 }, { "epoch": 0.6887966804979253, "grad_norm": 3.863123893737793, "learning_rate": 3.4439834024896265e-05, "log_odds_chosen": 1.4307935237884521, "log_odds_ratio": -0.7987613081932068, "logits/chosen": -0.45763593912124634, "logits/rejected": -0.4397510886192322, "logps/chosen": -0.17323186993598938, "logps/rejected": -0.29393959045410156, "loss": 4.0577, "nll_loss": 0.9345569610595703, "rewards/accuracies": 0.625, "rewards/chosen": -0.017323188483715057, "rewards/margins": 0.012070773169398308, "rewards/rejected": -0.029393963515758514, "step": 996 }, { "epoch": 0.6894882434301521, "grad_norm": 2.736734628677368, "learning_rate": 3.4474412171507607e-05, "log_odds_chosen": 1.240825891494751, "log_odds_ratio": -0.33845093846321106, "logits/chosen": -0.5953700542449951, "logits/rejected": -0.6042296886444092, "logps/chosen": -0.13993823528289795, "logps/rejected": -0.43376731872558594, "loss": 4.281, "nll_loss": 1.0364121198654175, "rewards/accuracies": 0.875, "rewards/chosen": -0.013993822038173676, "rewards/margins": 0.029382910579442978, "rewards/rejected": -0.04337673261761665, "step": 997 }, { "epoch": 0.690179806362379, "grad_norm": 2.9601619243621826, "learning_rate": 3.450899031811895e-05, "log_odds_chosen": 2.210240125656128, "log_odds_ratio": -0.4595649838447571, "logits/chosen": -0.5523463487625122, "logits/rejected": -0.6149584054946899, "logps/chosen": -0.07959660142660141, "logps/rejected": -0.25229543447494507, "loss": 3.0791, "nll_loss": 0.7238231301307678, "rewards/accuracies": 0.75, "rewards/chosen": -0.007959661073982716, "rewards/margins": 0.017269885167479515, "rewards/rejected": -0.025229543447494507, "step": 998 }, { "epoch": 0.6908713692946058, "grad_norm": 3.568354368209839, "learning_rate": 3.454356846473029e-05, "log_odds_chosen": 1.4892652034759521, "log_odds_ratio": -0.4482731223106384, "logits/chosen": -0.5335237383842468, "logits/rejected": -0.5101516246795654, "logps/chosen": -0.13420335948467255, "logps/rejected": -0.4021463394165039, "loss": 3.3719, "nll_loss": 0.7981423139572144, "rewards/accuracies": 0.75, "rewards/chosen": -0.01342033687978983, "rewards/margins": 0.026794295758008957, "rewards/rejected": -0.04021463543176651, "step": 999 }, { "epoch": 0.6915629322268326, "grad_norm": 3.095843553543091, "learning_rate": 3.457814661134163e-05, "log_odds_chosen": 4.331266403198242, "log_odds_ratio": -0.22867438197135925, "logits/chosen": -0.10455627739429474, "logits/rejected": -0.1381661295890808, "logps/chosen": -0.04887394607067108, "logps/rejected": -0.48447513580322266, "loss": 2.9581, "nll_loss": 0.7166623473167419, "rewards/accuracies": 0.875, "rewards/chosen": -0.004887394607067108, "rewards/margins": 0.04356011748313904, "rewards/rejected": -0.048447512090206146, "step": 1000 }, { "epoch": 0.6922544951590595, "grad_norm": 3.872911214828491, "learning_rate": 3.461272475795297e-05, "log_odds_chosen": 1.9170019626617432, "log_odds_ratio": -0.5383463501930237, "logits/chosen": -0.24512067437171936, "logits/rejected": -0.24772781133651733, "logps/chosen": -0.10138186067342758, "logps/rejected": -0.3690054416656494, "loss": 2.9505, "nll_loss": 0.6837884187698364, "rewards/accuracies": 0.5, "rewards/chosen": -0.010138185694813728, "rewards/margins": 0.026762360706925392, "rewards/rejected": -0.03690054640173912, "step": 1001 }, { "epoch": 0.6929460580912863, "grad_norm": 2.6410980224609375, "learning_rate": 3.4647302904564314e-05, "log_odds_chosen": 0.46685516834259033, "log_odds_ratio": -0.5900712013244629, "logits/chosen": -0.41452556848526, "logits/rejected": -0.3784329295158386, "logps/chosen": -0.13433943688869476, "logps/rejected": -0.21028949320316315, "loss": 3.0342, "nll_loss": 0.6995489597320557, "rewards/accuracies": 0.75, "rewards/chosen": -0.013433944433927536, "rewards/margins": 0.007595007307827473, "rewards/rejected": -0.021028950810432434, "step": 1002 }, { "epoch": 0.6936376210235131, "grad_norm": 3.552522897720337, "learning_rate": 3.4681881051175656e-05, "log_odds_chosen": 4.3343186378479, "log_odds_ratio": -0.1376817226409912, "logits/chosen": -0.17142628133296967, "logits/rejected": -0.1732548475265503, "logps/chosen": -0.05082971975207329, "logps/rejected": -0.39188894629478455, "loss": 3.8044, "nll_loss": 0.9373430013656616, "rewards/accuracies": 1.0, "rewards/chosen": -0.005082972347736359, "rewards/margins": 0.034105923026800156, "rewards/rejected": -0.039188895374536514, "step": 1003 }, { "epoch": 0.69432918395574, "grad_norm": 3.4876062870025635, "learning_rate": 3.4716459197787e-05, "log_odds_chosen": 0.7625752091407776, "log_odds_ratio": -0.45943683385849, "logits/chosen": -0.6727699637413025, "logits/rejected": -0.6510109901428223, "logps/chosen": -0.16815423965454102, "logps/rejected": -0.35847675800323486, "loss": 2.9769, "nll_loss": 0.6982860565185547, "rewards/accuracies": 0.75, "rewards/chosen": -0.0168154239654541, "rewards/margins": 0.019032252952456474, "rewards/rejected": -0.035847678780555725, "step": 1004 }, { "epoch": 0.6950207468879668, "grad_norm": 4.408685207366943, "learning_rate": 3.475103734439834e-05, "log_odds_chosen": 2.3867549896240234, "log_odds_ratio": -0.314208984375, "logits/chosen": -0.6479254364967346, "logits/rejected": -0.6886224150657654, "logps/chosen": -0.10548969358205795, "logps/rejected": -0.4884876310825348, "loss": 4.383, "nll_loss": 1.0643311738967896, "rewards/accuracies": 1.0, "rewards/chosen": -0.010548969730734825, "rewards/margins": 0.038299791514873505, "rewards/rejected": -0.04884876310825348, "step": 1005 }, { "epoch": 0.6957123098201936, "grad_norm": 3.915799617767334, "learning_rate": 3.478561549100968e-05, "log_odds_chosen": 3.446237087249756, "log_odds_ratio": -0.3558885157108307, "logits/chosen": -0.6331568956375122, "logits/rejected": -0.6514267921447754, "logps/chosen": -0.14412257075309753, "logps/rejected": -0.6114010810852051, "loss": 3.2002, "nll_loss": 0.7644554376602173, "rewards/accuracies": 0.625, "rewards/chosen": -0.014412256889045238, "rewards/margins": 0.046727851033210754, "rewards/rejected": -0.061140112578868866, "step": 1006 }, { "epoch": 0.6964038727524204, "grad_norm": 4.497232437133789, "learning_rate": 3.482019363762102e-05, "log_odds_chosen": 1.839732050895691, "log_odds_ratio": -0.3929927349090576, "logits/chosen": -0.3407534062862396, "logits/rejected": -0.36043086647987366, "logps/chosen": -0.07634704560041428, "logps/rejected": -0.40131884813308716, "loss": 4.2111, "nll_loss": 1.0134646892547607, "rewards/accuracies": 0.875, "rewards/chosen": -0.007634705863893032, "rewards/margins": 0.03249718248844147, "rewards/rejected": -0.040131889283657074, "step": 1007 }, { "epoch": 0.6970954356846473, "grad_norm": 3.4116599559783936, "learning_rate": 3.4854771784232364e-05, "log_odds_chosen": 2.439486026763916, "log_odds_ratio": -0.20432066917419434, "logits/chosen": -0.6585232615470886, "logits/rejected": -0.7056906223297119, "logps/chosen": -0.15550759434700012, "logps/rejected": -0.7698625922203064, "loss": 3.8301, "nll_loss": 0.9370852708816528, "rewards/accuracies": 1.0, "rewards/chosen": -0.015550761483609676, "rewards/margins": 0.06143549457192421, "rewards/rejected": -0.07698626071214676, "step": 1008 }, { "epoch": 0.6977869986168741, "grad_norm": 4.420785903930664, "learning_rate": 3.4889349930843705e-05, "log_odds_chosen": 0.5230880379676819, "log_odds_ratio": -0.551123321056366, "logits/chosen": -0.7153730988502502, "logits/rejected": -0.759360134601593, "logps/chosen": -0.14089688658714294, "logps/rejected": -0.31493067741394043, "loss": 5.1094, "nll_loss": 1.222233772277832, "rewards/accuracies": 0.75, "rewards/chosen": -0.014089690521359444, "rewards/margins": 0.0174033772200346, "rewards/rejected": -0.03149306774139404, "step": 1009 }, { "epoch": 0.6984785615491009, "grad_norm": 4.9378557205200195, "learning_rate": 3.492392807745505e-05, "log_odds_chosen": 3.2825350761413574, "log_odds_ratio": -0.2590652406215668, "logits/chosen": -0.8463261127471924, "logits/rejected": -0.9359209537506104, "logps/chosen": -0.08437243103981018, "logps/rejected": -0.7198293805122375, "loss": 5.7714, "nll_loss": 1.416931390762329, "rewards/accuracies": 0.875, "rewards/chosen": -0.008437243290245533, "rewards/margins": 0.06354568898677826, "rewards/rejected": -0.07198293507099152, "step": 1010 }, { "epoch": 0.6991701244813278, "grad_norm": 3.547576427459717, "learning_rate": 3.495850622406639e-05, "log_odds_chosen": 2.6140286922454834, "log_odds_ratio": -0.29810887575149536, "logits/chosen": -0.7257479429244995, "logits/rejected": -0.7237606048583984, "logps/chosen": -0.06791023164987564, "logps/rejected": -0.41881442070007324, "loss": 4.1977, "nll_loss": 1.0196094512939453, "rewards/accuracies": 0.875, "rewards/chosen": -0.006791023537516594, "rewards/margins": 0.03509042412042618, "rewards/rejected": -0.04188144952058792, "step": 1011 }, { "epoch": 0.6998616874135546, "grad_norm": 4.760746002197266, "learning_rate": 3.499308437067773e-05, "log_odds_chosen": 3.2602224349975586, "log_odds_ratio": -0.4387058615684509, "logits/chosen": -0.5019223690032959, "logits/rejected": -0.5217973589897156, "logps/chosen": -0.08388447761535645, "logps/rejected": -0.6735737919807434, "loss": 4.8476, "nll_loss": 1.1680355072021484, "rewards/accuracies": 0.75, "rewards/chosen": -0.008388448506593704, "rewards/margins": 0.058968931436538696, "rewards/rejected": -0.0673573836684227, "step": 1012 }, { "epoch": 0.7005532503457814, "grad_norm": 4.929751873016357, "learning_rate": 3.502766251728907e-05, "log_odds_chosen": 1.7852381467819214, "log_odds_ratio": -0.545844554901123, "logits/chosen": -0.4042653739452362, "logits/rejected": -0.44106078147888184, "logps/chosen": -0.1827096939086914, "logps/rejected": -0.5603499412536621, "loss": 4.9799, "nll_loss": 1.1903879642486572, "rewards/accuracies": 0.875, "rewards/chosen": -0.01827096939086914, "rewards/margins": 0.03776402026414871, "rewards/rejected": -0.05603498965501785, "step": 1013 }, { "epoch": 0.7012448132780082, "grad_norm": 4.870401382446289, "learning_rate": 3.506224066390041e-05, "log_odds_chosen": 3.7482364177703857, "log_odds_ratio": -0.3262961506843567, "logits/chosen": -0.794061541557312, "logits/rejected": -0.7662990093231201, "logps/chosen": -0.06687057763338089, "logps/rejected": -0.5857488512992859, "loss": 4.0956, "nll_loss": 0.9912663698196411, "rewards/accuracies": 0.875, "rewards/chosen": -0.006687058135867119, "rewards/margins": 0.05188782885670662, "rewards/rejected": -0.05857488512992859, "step": 1014 }, { "epoch": 0.7019363762102351, "grad_norm": 5.270098686218262, "learning_rate": 3.5096818810511755e-05, "log_odds_chosen": 1.8332812786102295, "log_odds_ratio": -0.5245056748390198, "logits/chosen": -0.7074244022369385, "logits/rejected": -0.7468313574790955, "logps/chosen": -0.11609043180942535, "logps/rejected": -0.3391810357570648, "loss": 4.5547, "nll_loss": 1.0862311124801636, "rewards/accuracies": 0.5, "rewards/chosen": -0.011609042063355446, "rewards/margins": 0.022309059277176857, "rewards/rejected": -0.0339181050658226, "step": 1015 }, { "epoch": 0.7026279391424619, "grad_norm": 4.283554553985596, "learning_rate": 3.5131396957123096e-05, "log_odds_chosen": 1.1753572225570679, "log_odds_ratio": -0.36678510904312134, "logits/chosen": -0.4675461947917938, "logits/rejected": -0.479078471660614, "logps/chosen": -0.12072758376598358, "logps/rejected": -0.33430609107017517, "loss": 4.0677, "nll_loss": 0.980250895023346, "rewards/accuracies": 0.875, "rewards/chosen": -0.012072758749127388, "rewards/margins": 0.02135784924030304, "rewards/rejected": -0.03343060612678528, "step": 1016 }, { "epoch": 0.7033195020746889, "grad_norm": 4.776187896728516, "learning_rate": 3.5165975103734445e-05, "log_odds_chosen": 3.0475525856018066, "log_odds_ratio": -0.21066389977931976, "logits/chosen": -0.6627320051193237, "logits/rejected": -0.6491464376449585, "logps/chosen": -0.09881362318992615, "logps/rejected": -0.5803578495979309, "loss": 4.6229, "nll_loss": 1.1346709728240967, "rewards/accuracies": 0.875, "rewards/chosen": -0.00988136138767004, "rewards/margins": 0.048154428601264954, "rewards/rejected": -0.05803578719496727, "step": 1017 }, { "epoch": 0.7040110650069157, "grad_norm": 3.657235860824585, "learning_rate": 3.5200553250345786e-05, "log_odds_chosen": 1.733577847480774, "log_odds_ratio": -0.5284569263458252, "logits/chosen": -0.42558619379997253, "logits/rejected": -0.48167991638183594, "logps/chosen": -0.1381000578403473, "logps/rejected": -0.2790611982345581, "loss": 3.4075, "nll_loss": 0.7990308403968811, "rewards/accuracies": 0.625, "rewards/chosen": -0.01381000503897667, "rewards/margins": 0.014096113853156567, "rewards/rejected": -0.02790611982345581, "step": 1018 }, { "epoch": 0.7047026279391425, "grad_norm": 3.4598958492279053, "learning_rate": 3.523513139695713e-05, "log_odds_chosen": 3.601151466369629, "log_odds_ratio": -0.22872741520404816, "logits/chosen": -0.6099940538406372, "logits/rejected": -0.6158989071846008, "logps/chosen": -0.04661906510591507, "logps/rejected": -0.6002925634384155, "loss": 3.4816, "nll_loss": 0.8475351333618164, "rewards/accuracies": 0.875, "rewards/chosen": -0.004661906510591507, "rewards/margins": 0.055367350578308105, "rewards/rejected": -0.06002925708889961, "step": 1019 }, { "epoch": 0.7053941908713693, "grad_norm": 4.966454029083252, "learning_rate": 3.526970954356847e-05, "log_odds_chosen": 1.2062649726867676, "log_odds_ratio": -0.505730390548706, "logits/chosen": -0.8687747716903687, "logits/rejected": -0.9171357750892639, "logps/chosen": -0.15147417783737183, "logps/rejected": -0.4396016597747803, "loss": 5.3178, "nll_loss": 1.2788842916488647, "rewards/accuracies": 0.875, "rewards/chosen": -0.015147417783737183, "rewards/margins": 0.028812747448682785, "rewards/rejected": -0.043960168957710266, "step": 1020 }, { "epoch": 0.7060857538035962, "grad_norm": 2.3341596126556396, "learning_rate": 3.530428769017981e-05, "log_odds_chosen": 5.564569473266602, "log_odds_ratio": -0.04462364688515663, "logits/chosen": -0.21850377321243286, "logits/rejected": -0.2372198849916458, "logps/chosen": -0.03681230917572975, "logps/rejected": -0.7510018348693848, "loss": 3.1713, "nll_loss": 0.7883737683296204, "rewards/accuracies": 1.0, "rewards/chosen": -0.0036812310572713614, "rewards/margins": 0.07141894847154617, "rewards/rejected": -0.07510018348693848, "step": 1021 }, { "epoch": 0.706777316735823, "grad_norm": 4.633392333984375, "learning_rate": 3.533886583679115e-05, "log_odds_chosen": 4.466066360473633, "log_odds_ratio": -0.2729628086090088, "logits/chosen": -0.52900230884552, "logits/rejected": -0.5783476829528809, "logps/chosen": -0.06350569427013397, "logps/rejected": -0.6221787333488464, "loss": 3.6471, "nll_loss": 0.8844875693321228, "rewards/accuracies": 0.875, "rewards/chosen": -0.006350569427013397, "rewards/margins": 0.055867306888103485, "rewards/rejected": -0.062217868864536285, "step": 1022 }, { "epoch": 0.7074688796680498, "grad_norm": 4.214491367340088, "learning_rate": 3.5373443983402494e-05, "log_odds_chosen": 3.1329104900360107, "log_odds_ratio": -0.3419583737850189, "logits/chosen": -0.6461799144744873, "logits/rejected": -0.6264448165893555, "logps/chosen": -0.0748824030160904, "logps/rejected": -0.5373539924621582, "loss": 3.7843, "nll_loss": 0.9118846654891968, "rewards/accuracies": 0.625, "rewards/chosen": -0.007488240487873554, "rewards/margins": 0.04624716192483902, "rewards/rejected": -0.05373540148139, "step": 1023 }, { "epoch": 0.7081604426002767, "grad_norm": 4.893476486206055, "learning_rate": 3.5408022130013836e-05, "log_odds_chosen": 0.9490301609039307, "log_odds_ratio": -0.8348803520202637, "logits/chosen": -0.8363821506500244, "logits/rejected": -0.8834267854690552, "logps/chosen": -0.17236392199993134, "logps/rejected": -0.4684886634349823, "loss": 4.0871, "nll_loss": 0.9382818937301636, "rewards/accuracies": 0.5, "rewards/chosen": -0.017236391082406044, "rewards/margins": 0.029612472280859947, "rewards/rejected": -0.04684887081384659, "step": 1024 }, { "epoch": 0.7088520055325035, "grad_norm": 4.346531867980957, "learning_rate": 3.544260027662518e-05, "log_odds_chosen": 5.200535297393799, "log_odds_ratio": -0.0615101084113121, "logits/chosen": -0.8259227275848389, "logits/rejected": -0.8945444822311401, "logps/chosen": -0.018674220889806747, "logps/rejected": -0.633916437625885, "loss": 4.3103, "nll_loss": 1.0714225769042969, "rewards/accuracies": 1.0, "rewards/chosen": -0.0018674221355468035, "rewards/margins": 0.061524223536252975, "rewards/rejected": -0.06339164078235626, "step": 1025 }, { "epoch": 0.7095435684647303, "grad_norm": 3.327759027481079, "learning_rate": 3.547717842323652e-05, "log_odds_chosen": 5.512311935424805, "log_odds_ratio": -0.16451141238212585, "logits/chosen": -0.8345467448234558, "logits/rejected": -0.8430370092391968, "logps/chosen": -0.07801201939582825, "logps/rejected": -0.8770555257797241, "loss": 3.289, "nll_loss": 0.8057981729507446, "rewards/accuracies": 0.875, "rewards/chosen": -0.007801203057169914, "rewards/margins": 0.07990435510873795, "rewards/rejected": -0.08770555257797241, "step": 1026 }, { "epoch": 0.7102351313969572, "grad_norm": 3.0856127738952637, "learning_rate": 3.551175656984786e-05, "log_odds_chosen": 2.5221033096313477, "log_odds_ratio": -0.3784581124782562, "logits/chosen": -0.287615031003952, "logits/rejected": -0.27814438939094543, "logps/chosen": -0.15040525794029236, "logps/rejected": -0.49353331327438354, "loss": 2.9133, "nll_loss": 0.6904860734939575, "rewards/accuracies": 0.625, "rewards/chosen": -0.01504052709788084, "rewards/margins": 0.03431280702352524, "rewards/rejected": -0.04935333877801895, "step": 1027 }, { "epoch": 0.710926694329184, "grad_norm": 3.6601574420928955, "learning_rate": 3.55463347164592e-05, "log_odds_chosen": 1.9526305198669434, "log_odds_ratio": -0.49178487062454224, "logits/chosen": -0.574364423751831, "logits/rejected": -0.5638930797576904, "logps/chosen": -0.15838921070098877, "logps/rejected": -0.48088887333869934, "loss": 3.962, "nll_loss": 0.9413228034973145, "rewards/accuracies": 0.75, "rewards/chosen": -0.015838919207453728, "rewards/margins": 0.03224996477365494, "rewards/rejected": -0.048088885843753815, "step": 1028 }, { "epoch": 0.7116182572614108, "grad_norm": 6.44592809677124, "learning_rate": 3.558091286307054e-05, "log_odds_chosen": 3.593794345855713, "log_odds_ratio": -0.5041420459747314, "logits/chosen": -0.7599477767944336, "logits/rejected": -0.7372944355010986, "logps/chosen": -0.18992546200752258, "logps/rejected": -0.566834568977356, "loss": 4.0358, "nll_loss": 0.9585399627685547, "rewards/accuracies": 0.75, "rewards/chosen": -0.018992546945810318, "rewards/margins": 0.0376909077167511, "rewards/rejected": -0.056683458387851715, "step": 1029 }, { "epoch": 0.7123098201936376, "grad_norm": 7.031370639801025, "learning_rate": 3.5615491009681885e-05, "log_odds_chosen": 3.2854695320129395, "log_odds_ratio": -0.435203492641449, "logits/chosen": -0.45540231466293335, "logits/rejected": -0.513881504535675, "logps/chosen": -0.0969720408320427, "logps/rejected": -0.5342581272125244, "loss": 3.0684, "nll_loss": 0.7235726714134216, "rewards/accuracies": 0.875, "rewards/chosen": -0.0096972044557333, "rewards/margins": 0.04372861236333847, "rewards/rejected": -0.05342581868171692, "step": 1030 }, { "epoch": 0.7130013831258645, "grad_norm": 6.708185195922852, "learning_rate": 3.5650069156293226e-05, "log_odds_chosen": 3.589202642440796, "log_odds_ratio": -0.20989595353603363, "logits/chosen": -0.4838123321533203, "logits/rejected": -0.5778621435165405, "logps/chosen": -0.10837168246507645, "logps/rejected": -0.9984228610992432, "loss": 3.532, "nll_loss": 0.8619996905326843, "rewards/accuracies": 0.875, "rewards/chosen": -0.01083716843277216, "rewards/margins": 0.08900512754917145, "rewards/rejected": -0.09984229505062103, "step": 1031 }, { "epoch": 0.7136929460580913, "grad_norm": 3.7739248275756836, "learning_rate": 3.568464730290457e-05, "log_odds_chosen": 2.3249645233154297, "log_odds_ratio": -0.4324222803115845, "logits/chosen": -0.8024967908859253, "logits/rejected": -0.7371588945388794, "logps/chosen": -0.12025891244411469, "logps/rejected": -0.7280334234237671, "loss": 3.3625, "nll_loss": 0.7973849177360535, "rewards/accuracies": 0.625, "rewards/chosen": -0.012025891803205013, "rewards/margins": 0.0607774518430233, "rewards/rejected": -0.07280334085226059, "step": 1032 }, { "epoch": 0.7143845089903181, "grad_norm": 4.37109899520874, "learning_rate": 3.571922544951591e-05, "log_odds_chosen": 1.8722158670425415, "log_odds_ratio": -0.4846808910369873, "logits/chosen": -0.4866708517074585, "logits/rejected": -0.5177431106567383, "logps/chosen": -0.03973466157913208, "logps/rejected": -0.30102258920669556, "loss": 4.1712, "nll_loss": 0.9943400025367737, "rewards/accuracies": 0.875, "rewards/chosen": -0.003973466344177723, "rewards/margins": 0.026128794997930527, "rewards/rejected": -0.030102260410785675, "step": 1033 }, { "epoch": 0.715076071922545, "grad_norm": 4.285762786865234, "learning_rate": 3.575380359612725e-05, "log_odds_chosen": 3.012889862060547, "log_odds_ratio": -0.2758004665374756, "logits/chosen": -0.22630202770233154, "logits/rejected": -0.21843719482421875, "logps/chosen": -0.18059399724006653, "logps/rejected": -0.6662799119949341, "loss": 3.9736, "nll_loss": 0.9658151865005493, "rewards/accuracies": 0.875, "rewards/chosen": -0.018059398978948593, "rewards/margins": 0.048568591475486755, "rewards/rejected": -0.06662799417972565, "step": 1034 }, { "epoch": 0.7157676348547718, "grad_norm": 5.383205413818359, "learning_rate": 3.578838174273859e-05, "log_odds_chosen": 1.5302259922027588, "log_odds_ratio": -0.45822760462760925, "logits/chosen": -0.4184732437133789, "logits/rejected": -0.3977556824684143, "logps/chosen": -0.15078751742839813, "logps/rejected": -0.4710010290145874, "loss": 4.7171, "nll_loss": 1.133442759513855, "rewards/accuracies": 0.625, "rewards/chosen": -0.015078751370310783, "rewards/margins": 0.03202135115861893, "rewards/rejected": -0.04710010439157486, "step": 1035 }, { "epoch": 0.7164591977869986, "grad_norm": 3.5321078300476074, "learning_rate": 3.5822959889349934e-05, "log_odds_chosen": 1.6457781791687012, "log_odds_ratio": -0.3123471438884735, "logits/chosen": -0.505487859249115, "logits/rejected": -0.47095245122909546, "logps/chosen": -0.17606432735919952, "logps/rejected": -0.6406019926071167, "loss": 3.0116, "nll_loss": 0.7216607928276062, "rewards/accuracies": 0.875, "rewards/chosen": -0.017606433480978012, "rewards/margins": 0.04645375907421112, "rewards/rejected": -0.06406019628047943, "step": 1036 }, { "epoch": 0.7171507607192255, "grad_norm": 4.82590389251709, "learning_rate": 3.5857538035961276e-05, "log_odds_chosen": 3.8188605308532715, "log_odds_ratio": -0.1875026524066925, "logits/chosen": -0.3388659954071045, "logits/rejected": -0.4199290871620178, "logps/chosen": -0.08805263042449951, "logps/rejected": -0.7061742544174194, "loss": 3.9242, "nll_loss": 0.9623033404350281, "rewards/accuracies": 0.875, "rewards/chosen": -0.00880526378750801, "rewards/margins": 0.061812154948711395, "rewards/rejected": -0.0706174224615097, "step": 1037 }, { "epoch": 0.7178423236514523, "grad_norm": 4.746621131896973, "learning_rate": 3.589211618257262e-05, "log_odds_chosen": 2.7862987518310547, "log_odds_ratio": -0.8785898685455322, "logits/chosen": -0.6080033779144287, "logits/rejected": -0.6072147488594055, "logps/chosen": -0.12437206506729126, "logps/rejected": -0.4387931823730469, "loss": 3.3452, "nll_loss": 0.7484517693519592, "rewards/accuracies": 0.5, "rewards/chosen": -0.012437205761671066, "rewards/margins": 0.03144211322069168, "rewards/rejected": -0.04387931898236275, "step": 1038 }, { "epoch": 0.7185338865836791, "grad_norm": 5.579493522644043, "learning_rate": 3.592669432918396e-05, "log_odds_chosen": 3.3001158237457275, "log_odds_ratio": -0.24173426628112793, "logits/chosen": -0.777419924736023, "logits/rejected": -0.7728596925735474, "logps/chosen": -0.06899379193782806, "logps/rejected": -0.6735274195671082, "loss": 4.8171, "nll_loss": 1.1800899505615234, "rewards/accuracies": 1.0, "rewards/chosen": -0.006899379659444094, "rewards/margins": 0.06045336276292801, "rewards/rejected": -0.06735274195671082, "step": 1039 }, { "epoch": 0.719225449515906, "grad_norm": 4.938050746917725, "learning_rate": 3.59612724757953e-05, "log_odds_chosen": 1.087746262550354, "log_odds_ratio": -0.5268850326538086, "logits/chosen": -0.5052182078361511, "logits/rejected": -0.5489234328269958, "logps/chosen": -0.17366079986095428, "logps/rejected": -0.4371531307697296, "loss": 4.0872, "nll_loss": 0.969113826751709, "rewards/accuracies": 0.625, "rewards/chosen": -0.0173660796135664, "rewards/margins": 0.026349231600761414, "rewards/rejected": -0.04371531680226326, "step": 1040 }, { "epoch": 0.7199170124481328, "grad_norm": 5.435187339782715, "learning_rate": 3.599585062240664e-05, "log_odds_chosen": 2.283799171447754, "log_odds_ratio": -0.5793908834457397, "logits/chosen": -0.6914126873016357, "logits/rejected": -0.7008126974105835, "logps/chosen": -0.1254206746816635, "logps/rejected": -0.3240630030632019, "loss": 4.2092, "nll_loss": 0.9943568110466003, "rewards/accuracies": 0.625, "rewards/chosen": -0.012542067095637321, "rewards/margins": 0.01986423321068287, "rewards/rejected": -0.03240630030632019, "step": 1041 }, { "epoch": 0.7206085753803596, "grad_norm": 4.84965705871582, "learning_rate": 3.6030428769017984e-05, "log_odds_chosen": 4.489874839782715, "log_odds_ratio": -0.29299384355545044, "logits/chosen": -0.6419664025306702, "logits/rejected": -0.6491258144378662, "logps/chosen": -0.13277804851531982, "logps/rejected": -0.714569628238678, "loss": 4.3872, "nll_loss": 1.067491888999939, "rewards/accuracies": 0.875, "rewards/chosen": -0.013277805410325527, "rewards/margins": 0.058179158717393875, "rewards/rejected": -0.07145696133375168, "step": 1042 }, { "epoch": 0.7213001383125864, "grad_norm": 4.387578010559082, "learning_rate": 3.6065006915629325e-05, "log_odds_chosen": 2.601278781890869, "log_odds_ratio": -0.3603097200393677, "logits/chosen": -0.5618245601654053, "logits/rejected": -0.5624284744262695, "logps/chosen": -0.08398522436618805, "logps/rejected": -0.556341290473938, "loss": 4.4761, "nll_loss": 1.082999587059021, "rewards/accuracies": 0.875, "rewards/chosen": -0.00839852262288332, "rewards/margins": 0.04723560810089111, "rewards/rejected": -0.05563412979245186, "step": 1043 }, { "epoch": 0.7219917012448133, "grad_norm": 4.411139965057373, "learning_rate": 3.609958506224067e-05, "log_odds_chosen": 2.73284649848938, "log_odds_ratio": -0.34536677598953247, "logits/chosen": -0.6625887155532837, "logits/rejected": -0.6966568231582642, "logps/chosen": -0.1273365020751953, "logps/rejected": -0.7267633676528931, "loss": 3.3589, "nll_loss": 0.8051877617835999, "rewards/accuracies": 0.875, "rewards/chosen": -0.012733649462461472, "rewards/margins": 0.059942688792943954, "rewards/rejected": -0.07267633825540543, "step": 1044 }, { "epoch": 0.7226832641770401, "grad_norm": 5.204629421234131, "learning_rate": 3.613416320885201e-05, "log_odds_chosen": 2.503190040588379, "log_odds_ratio": -0.4112352132797241, "logits/chosen": -0.7123258113861084, "logits/rejected": -0.7635508179664612, "logps/chosen": -0.14418675005435944, "logps/rejected": -0.5068868398666382, "loss": 4.7637, "nll_loss": 1.1498039960861206, "rewards/accuracies": 0.75, "rewards/chosen": -0.014418675564229488, "rewards/margins": 0.03627001494169235, "rewards/rejected": -0.05068868398666382, "step": 1045 }, { "epoch": 0.7233748271092669, "grad_norm": 4.64717960357666, "learning_rate": 3.616874135546335e-05, "log_odds_chosen": 2.7680552005767822, "log_odds_ratio": -0.22051768004894257, "logits/chosen": -0.5189613103866577, "logits/rejected": -0.5583252906799316, "logps/chosen": -0.08537647873163223, "logps/rejected": -0.5342350006103516, "loss": 3.1143, "nll_loss": 0.7565240859985352, "rewards/accuracies": 0.875, "rewards/chosen": -0.008537648245692253, "rewards/margins": 0.04488585889339447, "rewards/rejected": -0.053423501551151276, "step": 1046 }, { "epoch": 0.7240663900414938, "grad_norm": 3.2733585834503174, "learning_rate": 3.620331950207469e-05, "log_odds_chosen": 6.08332633972168, "log_odds_ratio": -0.16748517751693726, "logits/chosen": -0.35235828161239624, "logits/rejected": -0.37172678112983704, "logps/chosen": -0.06972315907478333, "logps/rejected": -0.7122898101806641, "loss": 3.2545, "nll_loss": 0.796885073184967, "rewards/accuracies": 0.875, "rewards/chosen": -0.006972315721213818, "rewards/margins": 0.06425667554140091, "rewards/rejected": -0.071228988468647, "step": 1047 }, { "epoch": 0.7247579529737206, "grad_norm": 4.058255672454834, "learning_rate": 3.623789764868603e-05, "log_odds_chosen": 1.6902698278427124, "log_odds_ratio": -0.3691096305847168, "logits/chosen": -0.6874192953109741, "logits/rejected": -0.6792905330657959, "logps/chosen": -0.14544987678527832, "logps/rejected": -0.43885865807533264, "loss": 4.3441, "nll_loss": 1.0491060018539429, "rewards/accuracies": 0.875, "rewards/chosen": -0.014544988051056862, "rewards/margins": 0.02934087999165058, "rewards/rejected": -0.04388586804270744, "step": 1048 }, { "epoch": 0.7254495159059474, "grad_norm": 5.58115816116333, "learning_rate": 3.6272475795297375e-05, "log_odds_chosen": 3.4147226810455322, "log_odds_ratio": -0.45109570026397705, "logits/chosen": -0.7348883748054504, "logits/rejected": -0.7098078727722168, "logps/chosen": -0.10721461474895477, "logps/rejected": -0.6155630350112915, "loss": 4.19, "nll_loss": 1.0024001598358154, "rewards/accuracies": 0.75, "rewards/chosen": -0.010721461847424507, "rewards/margins": 0.05083484202623367, "rewards/rejected": -0.06155630201101303, "step": 1049 }, { "epoch": 0.7261410788381742, "grad_norm": 4.20112419128418, "learning_rate": 3.6307053941908716e-05, "log_odds_chosen": 4.499558925628662, "log_odds_ratio": -0.2270698845386505, "logits/chosen": -0.23923638463020325, "logits/rejected": -0.28563469648361206, "logps/chosen": -0.07210483402013779, "logps/rejected": -0.4738559126853943, "loss": 3.026, "nll_loss": 0.7338007688522339, "rewards/accuracies": 0.75, "rewards/chosen": -0.007210483308881521, "rewards/margins": 0.04017511010169983, "rewards/rejected": -0.04738559573888779, "step": 1050 }, { "epoch": 0.7268326417704011, "grad_norm": 5.781348705291748, "learning_rate": 3.634163208852006e-05, "log_odds_chosen": 1.436905860900879, "log_odds_ratio": -0.41141462326049805, "logits/chosen": -0.6144793033599854, "logits/rejected": -0.6271205544471741, "logps/chosen": -0.13432197272777557, "logps/rejected": -0.6162081360816956, "loss": 5.1217, "nll_loss": 1.2392876148223877, "rewards/accuracies": 0.625, "rewards/chosen": -0.013432197272777557, "rewards/margins": 0.04818861931562424, "rewards/rejected": -0.061620816588401794, "step": 1051 }, { "epoch": 0.7275242047026279, "grad_norm": 6.011894226074219, "learning_rate": 3.63762102351314e-05, "log_odds_chosen": 0.6937634944915771, "log_odds_ratio": -0.5657732486724854, "logits/chosen": -0.5936172008514404, "logits/rejected": -0.6089737415313721, "logps/chosen": -0.17231324315071106, "logps/rejected": -0.3177986145019531, "loss": 6.265, "nll_loss": 1.5096734762191772, "rewards/accuracies": 0.75, "rewards/chosen": -0.017231326550245285, "rewards/margins": 0.01454853918403387, "rewards/rejected": -0.03177986294031143, "step": 1052 }, { "epoch": 0.7282157676348547, "grad_norm": 5.030375003814697, "learning_rate": 3.641078838174274e-05, "log_odds_chosen": 2.0339081287384033, "log_odds_ratio": -0.3970106840133667, "logits/chosen": -0.33543309569358826, "logits/rejected": -0.4022204279899597, "logps/chosen": -0.09676516801118851, "logps/rejected": -0.38274016976356506, "loss": 3.5718, "nll_loss": 0.8532538414001465, "rewards/accuracies": 0.75, "rewards/chosen": -0.009676516987383366, "rewards/margins": 0.028597503900527954, "rewards/rejected": -0.038274019956588745, "step": 1053 }, { "epoch": 0.7289073305670816, "grad_norm": 5.222143173217773, "learning_rate": 3.644536652835408e-05, "log_odds_chosen": 3.022456169128418, "log_odds_ratio": -0.36239343881607056, "logits/chosen": -0.23395097255706787, "logits/rejected": -0.18252798914909363, "logps/chosen": -0.0783625915646553, "logps/rejected": -0.2937096357345581, "loss": 4.2201, "nll_loss": 1.018797755241394, "rewards/accuracies": 0.75, "rewards/chosen": -0.00783625990152359, "rewards/margins": 0.02153470367193222, "rewards/rejected": -0.02937096357345581, "step": 1054 }, { "epoch": 0.7295988934993084, "grad_norm": 4.244716644287109, "learning_rate": 3.6479944674965424e-05, "log_odds_chosen": 1.170404314994812, "log_odds_ratio": -0.6230608820915222, "logits/chosen": -0.7283819913864136, "logits/rejected": -0.7557381391525269, "logps/chosen": -0.17400789260864258, "logps/rejected": -0.3624713122844696, "loss": 4.0401, "nll_loss": 0.9477148056030273, "rewards/accuracies": 0.5, "rewards/chosen": -0.01740078628063202, "rewards/margins": 0.01884634606540203, "rewards/rejected": -0.0362471342086792, "step": 1055 }, { "epoch": 0.7302904564315352, "grad_norm": 5.602895259857178, "learning_rate": 3.6514522821576766e-05, "log_odds_chosen": 4.399355888366699, "log_odds_ratio": -0.20333260297775269, "logits/chosen": -0.5755096077919006, "logits/rejected": -0.598081111907959, "logps/chosen": -0.09461069107055664, "logps/rejected": -0.7363580465316772, "loss": 4.7825, "nll_loss": 1.1752852201461792, "rewards/accuracies": 0.875, "rewards/chosen": -0.009461069479584694, "rewards/margins": 0.06417473405599594, "rewards/rejected": -0.07363580167293549, "step": 1056 }, { "epoch": 0.7309820193637621, "grad_norm": 4.749327659606934, "learning_rate": 3.654910096818811e-05, "log_odds_chosen": 0.8630741834640503, "log_odds_ratio": -0.504383385181427, "logits/chosen": -0.5899327993392944, "logits/rejected": -0.6016180515289307, "logps/chosen": -0.15858584642410278, "logps/rejected": -0.31086573004722595, "loss": 5.0885, "nll_loss": 1.2216897010803223, "rewards/accuracies": 0.875, "rewards/chosen": -0.015858585014939308, "rewards/margins": 0.015227987430989742, "rewards/rejected": -0.031086573377251625, "step": 1057 }, { "epoch": 0.7316735822959889, "grad_norm": 4.7271223068237305, "learning_rate": 3.658367911479945e-05, "log_odds_chosen": 3.1375505924224854, "log_odds_ratio": -0.23146921396255493, "logits/chosen": -0.532660961151123, "logits/rejected": -0.580690860748291, "logps/chosen": -0.10345923900604248, "logps/rejected": -0.5149369835853577, "loss": 4.4799, "nll_loss": 1.0968393087387085, "rewards/accuracies": 1.0, "rewards/chosen": -0.010345923714339733, "rewards/margins": 0.04114777594804764, "rewards/rejected": -0.05149369686841965, "step": 1058 }, { "epoch": 0.7323651452282157, "grad_norm": 3.9870223999023438, "learning_rate": 3.661825726141079e-05, "log_odds_chosen": 4.208795070648193, "log_odds_ratio": -0.29262298345565796, "logits/chosen": -0.4202018082141876, "logits/rejected": -0.4788719117641449, "logps/chosen": -0.06881583482027054, "logps/rejected": -0.4822113513946533, "loss": 4.1821, "nll_loss": 1.0162742137908936, "rewards/accuracies": 0.875, "rewards/chosen": -0.006881583947688341, "rewards/margins": 0.04133955016732216, "rewards/rejected": -0.04822114109992981, "step": 1059 }, { "epoch": 0.7330567081604425, "grad_norm": 4.029439449310303, "learning_rate": 3.665283540802213e-05, "log_odds_chosen": 1.8435285091400146, "log_odds_ratio": -0.5828537344932556, "logits/chosen": -0.3349389135837555, "logits/rejected": -0.35388249158859253, "logps/chosen": -0.13364273309707642, "logps/rejected": -0.29924899339675903, "loss": 3.2881, "nll_loss": 0.7637304663658142, "rewards/accuracies": 0.625, "rewards/chosen": -0.013364273123443127, "rewards/margins": 0.016560625284910202, "rewards/rejected": -0.029924899339675903, "step": 1060 }, { "epoch": 0.7337482710926694, "grad_norm": 5.114047527313232, "learning_rate": 3.668741355463347e-05, "log_odds_chosen": 2.7002954483032227, "log_odds_ratio": -0.3071057200431824, "logits/chosen": -0.5675312876701355, "logits/rejected": -0.5511829257011414, "logps/chosen": -0.10216303169727325, "logps/rejected": -0.4013533294200897, "loss": 4.0426, "nll_loss": 0.9799474477767944, "rewards/accuracies": 0.75, "rewards/chosen": -0.010216303169727325, "rewards/margins": 0.029919028282165527, "rewards/rejected": -0.04013533145189285, "step": 1061 }, { "epoch": 0.7344398340248963, "grad_norm": 4.512507438659668, "learning_rate": 3.6721991701244815e-05, "log_odds_chosen": 2.5141499042510986, "log_odds_ratio": -0.3996974229812622, "logits/chosen": -0.46067526936531067, "logits/rejected": -0.4886988699436188, "logps/chosen": -0.14718782901763916, "logps/rejected": -0.6637516021728516, "loss": 3.6508, "nll_loss": 0.8727205991744995, "rewards/accuracies": 0.75, "rewards/chosen": -0.014718784019351006, "rewards/margins": 0.05165638029575348, "rewards/rejected": -0.06637516617774963, "step": 1062 }, { "epoch": 0.7351313969571232, "grad_norm": 4.6516900062561035, "learning_rate": 3.6756569847856156e-05, "log_odds_chosen": 3.213118553161621, "log_odds_ratio": -0.41739317774772644, "logits/chosen": -0.7525394558906555, "logits/rejected": -0.7624413967132568, "logps/chosen": -0.13979551196098328, "logps/rejected": -0.6717532277107239, "loss": 4.4842, "nll_loss": 1.0793054103851318, "rewards/accuracies": 0.625, "rewards/chosen": -0.013979552313685417, "rewards/margins": 0.053195770829916, "rewards/rejected": -0.06717532873153687, "step": 1063 }, { "epoch": 0.73582295988935, "grad_norm": 4.892207145690918, "learning_rate": 3.67911479944675e-05, "log_odds_chosen": 1.7997876405715942, "log_odds_ratio": -0.44318562746047974, "logits/chosen": -0.18329492211341858, "logits/rejected": -0.20506246387958527, "logps/chosen": -0.15136878192424774, "logps/rejected": -0.5139152407646179, "loss": 3.9923, "nll_loss": 0.9537680149078369, "rewards/accuracies": 0.75, "rewards/chosen": -0.015136879868805408, "rewards/margins": 0.0362546443939209, "rewards/rejected": -0.05139152333140373, "step": 1064 }, { "epoch": 0.7365145228215768, "grad_norm": 3.9588499069213867, "learning_rate": 3.682572614107884e-05, "log_odds_chosen": 1.91678786277771, "log_odds_ratio": -0.4147067368030548, "logits/chosen": -0.45388561487197876, "logits/rejected": -0.4534473419189453, "logps/chosen": -0.09650453925132751, "logps/rejected": -0.3927268981933594, "loss": 3.6191, "nll_loss": 0.8632949590682983, "rewards/accuracies": 0.875, "rewards/chosen": -0.009650452993810177, "rewards/margins": 0.029622236266732216, "rewards/rejected": -0.03927268832921982, "step": 1065 }, { "epoch": 0.7372060857538036, "grad_norm": 2.8396570682525635, "learning_rate": 3.686030428769018e-05, "log_odds_chosen": 3.650707483291626, "log_odds_ratio": -0.38111308217048645, "logits/chosen": -0.5955278873443604, "logits/rejected": -0.5792263746261597, "logps/chosen": -0.09282395243644714, "logps/rejected": -0.5960294008255005, "loss": 2.7555, "nll_loss": 0.650758683681488, "rewards/accuracies": 0.75, "rewards/chosen": -0.009282395243644714, "rewards/margins": 0.050320543348789215, "rewards/rejected": -0.05960294231772423, "step": 1066 }, { "epoch": 0.7378976486860305, "grad_norm": 3.476088285446167, "learning_rate": 3.689488243430152e-05, "log_odds_chosen": 2.3841819763183594, "log_odds_ratio": -0.3378468155860901, "logits/chosen": -0.7314380407333374, "logits/rejected": -0.7951211333274841, "logps/chosen": -0.11184002459049225, "logps/rejected": -0.3191815912723541, "loss": 4.3461, "nll_loss": 1.0527474880218506, "rewards/accuracies": 1.0, "rewards/chosen": -0.01118400227278471, "rewards/margins": 0.020734157413244247, "rewards/rejected": -0.03191816061735153, "step": 1067 }, { "epoch": 0.7385892116182573, "grad_norm": 4.4976935386657715, "learning_rate": 3.6929460580912864e-05, "log_odds_chosen": 4.20717191696167, "log_odds_ratio": -0.15988406538963318, "logits/chosen": -0.45512500405311584, "logits/rejected": -0.4465624988079071, "logps/chosen": -0.03990020975470543, "logps/rejected": -0.5817291736602783, "loss": 4.7337, "nll_loss": 1.1674458980560303, "rewards/accuracies": 1.0, "rewards/chosen": -0.0039900210686028, "rewards/margins": 0.05418289452791214, "rewards/rejected": -0.05817291885614395, "step": 1068 }, { "epoch": 0.7392807745504841, "grad_norm": 3.743703603744507, "learning_rate": 3.6964038727524206e-05, "log_odds_chosen": 2.3907783031463623, "log_odds_ratio": -0.3446536362171173, "logits/chosen": -0.4428321421146393, "logits/rejected": -0.43401190638542175, "logps/chosen": -0.08195048570632935, "logps/rejected": -0.3324810266494751, "loss": 3.4512, "nll_loss": 0.8283307552337646, "rewards/accuracies": 0.75, "rewards/chosen": -0.00819504912942648, "rewards/margins": 0.025053054094314575, "rewards/rejected": -0.03324810042977333, "step": 1069 }, { "epoch": 0.739972337482711, "grad_norm": 4.063248157501221, "learning_rate": 3.699861687413555e-05, "log_odds_chosen": 4.055078983306885, "log_odds_ratio": -0.1567329466342926, "logits/chosen": -0.1771300733089447, "logits/rejected": -0.2207050770521164, "logps/chosen": -0.05447046458721161, "logps/rejected": -0.7913841009140015, "loss": 3.1308, "nll_loss": 0.767034649848938, "rewards/accuracies": 1.0, "rewards/chosen": -0.005447045899927616, "rewards/margins": 0.07369136810302734, "rewards/rejected": -0.07913841307163239, "step": 1070 }, { "epoch": 0.7406639004149378, "grad_norm": 2.8043837547302246, "learning_rate": 3.703319502074689e-05, "log_odds_chosen": 2.3212316036224365, "log_odds_ratio": -0.3333531320095062, "logits/chosen": -0.09415624290704727, "logits/rejected": -0.09188832342624664, "logps/chosen": -0.13778002560138702, "logps/rejected": -0.5300951600074768, "loss": 3.8473, "nll_loss": 0.9284874200820923, "rewards/accuracies": 0.75, "rewards/chosen": -0.013778002932667732, "rewards/margins": 0.03923151642084122, "rewards/rejected": -0.0530095174908638, "step": 1071 }, { "epoch": 0.7413554633471646, "grad_norm": 4.631345748901367, "learning_rate": 3.706777316735823e-05, "log_odds_chosen": 3.794403553009033, "log_odds_ratio": -0.16763067245483398, "logits/chosen": -0.21735386550426483, "logits/rejected": -0.21720963716506958, "logps/chosen": -0.04987889155745506, "logps/rejected": -0.5241183042526245, "loss": 3.927, "nll_loss": 0.9649972915649414, "rewards/accuracies": 1.0, "rewards/chosen": -0.004987888969480991, "rewards/margins": 0.047423943877220154, "rewards/rejected": -0.05241183191537857, "step": 1072 }, { "epoch": 0.7420470262793915, "grad_norm": 5.381296634674072, "learning_rate": 3.710235131396957e-05, "log_odds_chosen": 2.529040813446045, "log_odds_ratio": -0.21126320958137512, "logits/chosen": -0.2269861251115799, "logits/rejected": -0.23414316773414612, "logps/chosen": -0.07538623362779617, "logps/rejected": -0.47442883253097534, "loss": 4.7342, "nll_loss": 1.1624130010604858, "rewards/accuracies": 1.0, "rewards/chosen": -0.00753862364217639, "rewards/margins": 0.03990425914525986, "rewards/rejected": -0.047442883253097534, "step": 1073 }, { "epoch": 0.7427385892116183, "grad_norm": 3.97910213470459, "learning_rate": 3.7136929460580914e-05, "log_odds_chosen": 4.230005264282227, "log_odds_ratio": -0.14120924472808838, "logits/chosen": -0.521894097328186, "logits/rejected": -0.5772715210914612, "logps/chosen": -0.03921622037887573, "logps/rejected": -0.6114540100097656, "loss": 3.8008, "nll_loss": 0.9360888004302979, "rewards/accuracies": 1.0, "rewards/chosen": -0.003921622410416603, "rewards/margins": 0.057223785668611526, "rewards/rejected": -0.06114540249109268, "step": 1074 }, { "epoch": 0.7434301521438451, "grad_norm": 2.6997873783111572, "learning_rate": 3.7171507607192255e-05, "log_odds_chosen": 5.320131778717041, "log_odds_ratio": -0.1742401272058487, "logits/chosen": -0.2050633281469345, "logits/rejected": -0.2097817212343216, "logps/chosen": -0.05946308746933937, "logps/rejected": -0.6085449457168579, "loss": 2.5591, "nll_loss": 0.622360348701477, "rewards/accuracies": 1.0, "rewards/chosen": -0.0059463088400661945, "rewards/margins": 0.05490818992257118, "rewards/rejected": -0.06085449829697609, "step": 1075 }, { "epoch": 0.7441217150760719, "grad_norm": 5.3858642578125, "learning_rate": 3.72060857538036e-05, "log_odds_chosen": 2.5684211254119873, "log_odds_ratio": -0.2726511061191559, "logits/chosen": -0.8184974193572998, "logits/rejected": -0.8305962085723877, "logps/chosen": -0.08273748308420181, "logps/rejected": -0.5238097906112671, "loss": 5.5739, "nll_loss": 1.3662147521972656, "rewards/accuracies": 1.0, "rewards/chosen": -0.008273748680949211, "rewards/margins": 0.044107235968112946, "rewards/rejected": -0.05238097906112671, "step": 1076 }, { "epoch": 0.7448132780082988, "grad_norm": 4.981190204620361, "learning_rate": 3.724066390041494e-05, "log_odds_chosen": 2.198141574859619, "log_odds_ratio": -0.4150627851486206, "logits/chosen": -0.5096691250801086, "logits/rejected": -0.5472338199615479, "logps/chosen": -0.11538825929164886, "logps/rejected": -0.4828852415084839, "loss": 4.129, "nll_loss": 0.9907474517822266, "rewards/accuracies": 0.75, "rewards/chosen": -0.011538825929164886, "rewards/margins": 0.0367497019469738, "rewards/rejected": -0.04828852415084839, "step": 1077 }, { "epoch": 0.7455048409405256, "grad_norm": 3.9157955646514893, "learning_rate": 3.727524204702628e-05, "log_odds_chosen": 2.297726631164551, "log_odds_ratio": -0.371995210647583, "logits/chosen": -0.45065683126449585, "logits/rejected": -0.4426935315132141, "logps/chosen": -0.08662683516740799, "logps/rejected": -0.5210797786712646, "loss": 3.581, "nll_loss": 0.8580514192581177, "rewards/accuracies": 0.875, "rewards/chosen": -0.008662683889269829, "rewards/margins": 0.043445296585559845, "rewards/rejected": -0.05210798233747482, "step": 1078 }, { "epoch": 0.7461964038727524, "grad_norm": 4.103110313415527, "learning_rate": 3.730982019363762e-05, "log_odds_chosen": 2.5692861080169678, "log_odds_ratio": -0.34731459617614746, "logits/chosen": -0.07757198810577393, "logits/rejected": -0.10431862622499466, "logps/chosen": -0.13760052621364594, "logps/rejected": -0.558710515499115, "loss": 3.3979, "nll_loss": 0.8147333264350891, "rewards/accuracies": 0.875, "rewards/chosen": -0.013760052621364594, "rewards/margins": 0.042111001908779144, "rewards/rejected": -0.05587105453014374, "step": 1079 }, { "epoch": 0.7468879668049793, "grad_norm": 4.3268609046936035, "learning_rate": 3.734439834024896e-05, "log_odds_chosen": 3.42661190032959, "log_odds_ratio": -0.2674722969532013, "logits/chosen": -0.5790812373161316, "logits/rejected": -0.6419976949691772, "logps/chosen": -0.1267891228199005, "logps/rejected": -1.1352012157440186, "loss": 3.6002, "nll_loss": 0.8733097314834595, "rewards/accuracies": 0.875, "rewards/chosen": -0.012678911909461021, "rewards/margins": 0.10084120184183121, "rewards/rejected": -0.11352010816335678, "step": 1080 }, { "epoch": 0.7475795297372061, "grad_norm": 8.72693157196045, "learning_rate": 3.7378976486860305e-05, "log_odds_chosen": 2.839428424835205, "log_odds_ratio": -0.6887301206588745, "logits/chosen": -0.11714953929185867, "logits/rejected": -0.09862416982650757, "logps/chosen": -0.16352665424346924, "logps/rejected": -0.43877363204956055, "loss": 4.117, "nll_loss": 0.9603717923164368, "rewards/accuracies": 0.625, "rewards/chosen": -0.016352666541934013, "rewards/margins": 0.027524694800376892, "rewards/rejected": -0.043877359479665756, "step": 1081 }, { "epoch": 0.7482710926694329, "grad_norm": 5.499275207519531, "learning_rate": 3.7413554633471646e-05, "log_odds_chosen": 2.351445198059082, "log_odds_ratio": -0.3783484697341919, "logits/chosen": -0.5679388642311096, "logits/rejected": -0.6132568120956421, "logps/chosen": -0.1187373548746109, "logps/rejected": -0.541204035282135, "loss": 4.3354, "nll_loss": 1.046006679534912, "rewards/accuracies": 0.75, "rewards/chosen": -0.011873736046254635, "rewards/margins": 0.04224666580557823, "rewards/rejected": -0.05412040278315544, "step": 1082 }, { "epoch": 0.7489626556016598, "grad_norm": 5.204410076141357, "learning_rate": 3.744813278008299e-05, "log_odds_chosen": 3.1782922744750977, "log_odds_ratio": -0.5147818922996521, "logits/chosen": -0.44969701766967773, "logits/rejected": -0.5115452408790588, "logps/chosen": -0.21525892615318298, "logps/rejected": -0.6728153228759766, "loss": 4.2166, "nll_loss": 1.0026686191558838, "rewards/accuracies": 0.625, "rewards/chosen": -0.021525893360376358, "rewards/margins": 0.04575563967227936, "rewards/rejected": -0.06728152930736542, "step": 1083 }, { "epoch": 0.7496542185338866, "grad_norm": 4.421257019042969, "learning_rate": 3.748271092669433e-05, "log_odds_chosen": 3.770069122314453, "log_odds_ratio": -0.2764526605606079, "logits/chosen": -0.5885463953018188, "logits/rejected": -0.6559880971908569, "logps/chosen": -0.1313014030456543, "logps/rejected": -0.7071733474731445, "loss": 3.618, "nll_loss": 0.8768469095230103, "rewards/accuracies": 0.875, "rewards/chosen": -0.01313013769686222, "rewards/margins": 0.057587191462516785, "rewards/rejected": -0.07071733474731445, "step": 1084 }, { "epoch": 0.7503457814661134, "grad_norm": 5.926955223083496, "learning_rate": 3.751728907330567e-05, "log_odds_chosen": 2.0917410850524902, "log_odds_ratio": -0.3241581320762634, "logits/chosen": -0.5161243677139282, "logits/rejected": -0.5618958473205566, "logps/chosen": -0.18530499935150146, "logps/rejected": -0.7070559859275818, "loss": 5.7767, "nll_loss": 1.4117603302001953, "rewards/accuracies": 0.875, "rewards/chosen": -0.018530499190092087, "rewards/margins": 0.05217510089278221, "rewards/rejected": -0.0707056000828743, "step": 1085 }, { "epoch": 0.7510373443983402, "grad_norm": 4.207293510437012, "learning_rate": 3.755186721991701e-05, "log_odds_chosen": 2.306514263153076, "log_odds_ratio": -0.42577362060546875, "logits/chosen": -0.7863785624504089, "logits/rejected": -0.8544211983680725, "logps/chosen": -0.18335390090942383, "logps/rejected": -0.6212817430496216, "loss": 3.0202, "nll_loss": 0.71247798204422, "rewards/accuracies": 0.75, "rewards/chosen": -0.018335388973355293, "rewards/margins": 0.043792787939310074, "rewards/rejected": -0.062128178775310516, "step": 1086 }, { "epoch": 0.7517289073305671, "grad_norm": 5.270834445953369, "learning_rate": 3.7586445366528354e-05, "log_odds_chosen": 2.643711566925049, "log_odds_ratio": -0.4310038983821869, "logits/chosen": -0.4614013135433197, "logits/rejected": -0.4456390142440796, "logps/chosen": -0.15072834491729736, "logps/rejected": -0.5333762168884277, "loss": 2.8166, "nll_loss": 0.6610429883003235, "rewards/accuracies": 0.625, "rewards/chosen": -0.015072835609316826, "rewards/margins": 0.038264788687229156, "rewards/rejected": -0.05333762615919113, "step": 1087 }, { "epoch": 0.7524204702627939, "grad_norm": 5.288963317871094, "learning_rate": 3.7621023513139696e-05, "log_odds_chosen": 3.5286176204681396, "log_odds_ratio": -0.40869560837745667, "logits/chosen": -0.7788718938827515, "logits/rejected": -0.8168633580207825, "logps/chosen": -0.1163693368434906, "logps/rejected": -0.8496063947677612, "loss": 5.0915, "nll_loss": 1.2319990396499634, "rewards/accuracies": 0.75, "rewards/chosen": -0.01163693517446518, "rewards/margins": 0.07332369685173035, "rewards/rejected": -0.08496063202619553, "step": 1088 }, { "epoch": 0.7531120331950207, "grad_norm": 4.082790374755859, "learning_rate": 3.765560165975104e-05, "log_odds_chosen": 6.779488563537598, "log_odds_ratio": -0.03812899813055992, "logits/chosen": -0.6678798198699951, "logits/rejected": -0.7273062467575073, "logps/chosen": -0.011440301313996315, "logps/rejected": -1.4002537727355957, "loss": 4.1497, "nll_loss": 1.0336132049560547, "rewards/accuracies": 1.0, "rewards/chosen": -0.0011440301313996315, "rewards/margins": 0.1388813555240631, "rewards/rejected": -0.14002537727355957, "step": 1089 }, { "epoch": 0.7538035961272476, "grad_norm": 7.497416973114014, "learning_rate": 3.769017980636238e-05, "log_odds_chosen": 2.679985523223877, "log_odds_ratio": -0.48144277930259705, "logits/chosen": -0.7045676708221436, "logits/rejected": -0.7712850570678711, "logps/chosen": -0.18166804313659668, "logps/rejected": -0.6683666706085205, "loss": 5.1019, "nll_loss": 1.2273311614990234, "rewards/accuracies": 0.75, "rewards/chosen": -0.018166804686188698, "rewards/margins": 0.048669859766960144, "rewards/rejected": -0.06683666259050369, "step": 1090 }, { "epoch": 0.7544951590594744, "grad_norm": 11.02239990234375, "learning_rate": 3.772475795297372e-05, "log_odds_chosen": 5.144924640655518, "log_odds_ratio": -0.21654638648033142, "logits/chosen": -1.0382695198059082, "logits/rejected": -1.0143285989761353, "logps/chosen": -0.05242515355348587, "logps/rejected": -0.6436967849731445, "loss": 3.9557, "nll_loss": 0.9672713875770569, "rewards/accuracies": 0.875, "rewards/chosen": -0.005242515355348587, "rewards/margins": 0.05912715941667557, "rewards/rejected": -0.06436967849731445, "step": 1091 }, { "epoch": 0.7551867219917012, "grad_norm": 7.538413047790527, "learning_rate": 3.775933609958506e-05, "log_odds_chosen": 2.380988597869873, "log_odds_ratio": -0.8324184417724609, "logits/chosen": -0.6824043989181519, "logits/rejected": -0.6923583745956421, "logps/chosen": -0.19706298410892487, "logps/rejected": -0.5978980660438538, "loss": 4.4174, "nll_loss": 1.0211037397384644, "rewards/accuracies": 0.625, "rewards/chosen": -0.019706297665834427, "rewards/margins": 0.04008351266384125, "rewards/rejected": -0.059789810329675674, "step": 1092 }, { "epoch": 0.7558782849239281, "grad_norm": 4.259000778198242, "learning_rate": 3.7793914246196403e-05, "log_odds_chosen": 2.0974249839782715, "log_odds_ratio": -0.2188202440738678, "logits/chosen": -0.6757645010948181, "logits/rejected": -0.6860700845718384, "logps/chosen": -0.10299341380596161, "logps/rejected": -0.5411241054534912, "loss": 3.923, "nll_loss": 0.9588569402694702, "rewards/accuracies": 0.875, "rewards/chosen": -0.01029934175312519, "rewards/margins": 0.0438130684196949, "rewards/rejected": -0.054112404584884644, "step": 1093 }, { "epoch": 0.7565698478561549, "grad_norm": 3.8153839111328125, "learning_rate": 3.7828492392807745e-05, "log_odds_chosen": 4.307488441467285, "log_odds_ratio": -0.2924882769584656, "logits/chosen": -0.5414397716522217, "logits/rejected": -0.5879523158073425, "logps/chosen": -0.08990863710641861, "logps/rejected": -0.6482211947441101, "loss": 3.6132, "nll_loss": 0.8740568161010742, "rewards/accuracies": 0.75, "rewards/chosen": -0.008990864269435406, "rewards/margins": 0.05583126097917557, "rewards/rejected": -0.06482212245464325, "step": 1094 }, { "epoch": 0.7572614107883817, "grad_norm": 4.142021656036377, "learning_rate": 3.7863070539419087e-05, "log_odds_chosen": 4.343199253082275, "log_odds_ratio": -0.22691279649734497, "logits/chosen": -0.778051495552063, "logits/rejected": -0.840363621711731, "logps/chosen": -0.048902321606874466, "logps/rejected": -0.7048326730728149, "loss": 3.5437, "nll_loss": 0.8632230758666992, "rewards/accuracies": 0.75, "rewards/chosen": -0.0048902323469519615, "rewards/margins": 0.06559304147958755, "rewards/rejected": -0.0704832673072815, "step": 1095 }, { "epoch": 0.7579529737206085, "grad_norm": 3.198138952255249, "learning_rate": 3.789764868603043e-05, "log_odds_chosen": 4.851378440856934, "log_odds_ratio": -0.3832613527774811, "logits/chosen": -0.6265957355499268, "logits/rejected": -0.6354972124099731, "logps/chosen": -0.13120141625404358, "logps/rejected": -0.8701565265655518, "loss": 3.1297, "nll_loss": 0.7441052794456482, "rewards/accuracies": 0.625, "rewards/chosen": -0.013120142742991447, "rewards/margins": 0.07389551401138306, "rewards/rejected": -0.08701566606760025, "step": 1096 }, { "epoch": 0.7586445366528354, "grad_norm": 5.956087112426758, "learning_rate": 3.793222683264177e-05, "log_odds_chosen": 3.1593127250671387, "log_odds_ratio": -0.5399028062820435, "logits/chosen": -0.7491406202316284, "logits/rejected": -0.7200503945350647, "logps/chosen": -0.10669641196727753, "logps/rejected": -0.507939875125885, "loss": 3.3775, "nll_loss": 0.7903934717178345, "rewards/accuracies": 0.75, "rewards/chosen": -0.010669643059372902, "rewards/margins": 0.04012434929609299, "rewards/rejected": -0.05079399049282074, "step": 1097 }, { "epoch": 0.7593360995850622, "grad_norm": 3.8274338245391846, "learning_rate": 3.796680497925311e-05, "log_odds_chosen": 5.283022403717041, "log_odds_ratio": -0.122711181640625, "logits/chosen": -0.7252033948898315, "logits/rejected": -0.7651737332344055, "logps/chosen": -0.031005796045064926, "logps/rejected": -0.9215719699859619, "loss": 4.2154, "nll_loss": 1.0415713787078857, "rewards/accuracies": 1.0, "rewards/chosen": -0.0031005796045064926, "rewards/margins": 0.08905662596225739, "rewards/rejected": -0.09215720742940903, "step": 1098 }, { "epoch": 0.760027662517289, "grad_norm": 6.531271457672119, "learning_rate": 3.800138312586445e-05, "log_odds_chosen": 2.260535717010498, "log_odds_ratio": -0.5166959166526794, "logits/chosen": -0.5828487873077393, "logits/rejected": -0.6525802612304688, "logps/chosen": -0.12451886385679245, "logps/rejected": -0.7832128405570984, "loss": 4.359, "nll_loss": 1.0380773544311523, "rewards/accuracies": 0.75, "rewards/chosen": -0.01245188619941473, "rewards/margins": 0.06586939096450806, "rewards/rejected": -0.07832127809524536, "step": 1099 }, { "epoch": 0.7607192254495159, "grad_norm": 6.007966041564941, "learning_rate": 3.8035961272475794e-05, "log_odds_chosen": 0.9574238061904907, "log_odds_ratio": -0.7949805855751038, "logits/chosen": -0.5290077924728394, "logits/rejected": -0.5131245255470276, "logps/chosen": -0.3000096380710602, "logps/rejected": -0.44282999634742737, "loss": 4.4522, "nll_loss": 1.0335532426834106, "rewards/accuracies": 0.5, "rewards/chosen": -0.030000966042280197, "rewards/margins": 0.01428203471004963, "rewards/rejected": -0.044283002614974976, "step": 1100 }, { "epoch": 0.7614107883817427, "grad_norm": 4.777920722961426, "learning_rate": 3.8070539419087136e-05, "log_odds_chosen": 2.418083667755127, "log_odds_ratio": -0.36667677760124207, "logits/chosen": -0.6750804781913757, "logits/rejected": -0.737511396408081, "logps/chosen": -0.0945938378572464, "logps/rejected": -0.6871483325958252, "loss": 4.6545, "nll_loss": 1.1269659996032715, "rewards/accuracies": 0.75, "rewards/chosen": -0.009459384717047215, "rewards/margins": 0.0592554472386837, "rewards/rejected": -0.06871482729911804, "step": 1101 }, { "epoch": 0.7621023513139695, "grad_norm": 4.31030797958374, "learning_rate": 3.810511756569848e-05, "log_odds_chosen": 1.0872337818145752, "log_odds_ratio": -0.3637908697128296, "logits/chosen": -0.8375824689865112, "logits/rejected": -0.8417525291442871, "logps/chosen": -0.15087522566318512, "logps/rejected": -0.3603815734386444, "loss": 4.4087, "nll_loss": 1.065795660018921, "rewards/accuracies": 1.0, "rewards/chosen": -0.015087523497641087, "rewards/margins": 0.02095063589513302, "rewards/rejected": -0.03603815659880638, "step": 1102 }, { "epoch": 0.7627939142461964, "grad_norm": 4.248500823974609, "learning_rate": 3.813969571230982e-05, "log_odds_chosen": 2.1251821517944336, "log_odds_ratio": -0.5379229784011841, "logits/chosen": -0.8146258592605591, "logits/rejected": -0.8227401971817017, "logps/chosen": -0.25022798776626587, "logps/rejected": -0.43799692392349243, "loss": 3.9286, "nll_loss": 0.9283566474914551, "rewards/accuracies": 0.75, "rewards/chosen": -0.025022799149155617, "rewards/margins": 0.018776895478367805, "rewards/rejected": -0.043799690902233124, "step": 1103 }, { "epoch": 0.7634854771784232, "grad_norm": 6.478932857513428, "learning_rate": 3.817427385892116e-05, "log_odds_chosen": 3.2808871269226074, "log_odds_ratio": -0.6370083689689636, "logits/chosen": -0.5142711400985718, "logits/rejected": -0.5442649722099304, "logps/chosen": -0.17024749517440796, "logps/rejected": -0.7139173150062561, "loss": 4.3995, "nll_loss": 1.0361793041229248, "rewards/accuracies": 0.75, "rewards/chosen": -0.017024749889969826, "rewards/margins": 0.05436699092388153, "rewards/rejected": -0.07139173895120621, "step": 1104 }, { "epoch": 0.76417704011065, "grad_norm": 6.064324378967285, "learning_rate": 3.82088520055325e-05, "log_odds_chosen": 2.0439133644104004, "log_odds_ratio": -0.6495869755744934, "logits/chosen": -0.5766505002975464, "logits/rejected": -0.6375952363014221, "logps/chosen": -0.11183735728263855, "logps/rejected": -0.4476245641708374, "loss": 4.1578, "nll_loss": 0.9744910001754761, "rewards/accuracies": 0.625, "rewards/chosen": -0.01118373591452837, "rewards/margins": 0.033578719943761826, "rewards/rejected": -0.04476245492696762, "step": 1105 }, { "epoch": 0.7648686030428768, "grad_norm": 5.818049430847168, "learning_rate": 3.8243430152143844e-05, "log_odds_chosen": 3.604917526245117, "log_odds_ratio": -0.20862287282943726, "logits/chosen": -0.40988677740097046, "logits/rejected": -0.541254460811615, "logps/chosen": -0.1304902732372284, "logps/rejected": -0.8513497114181519, "loss": 4.8317, "nll_loss": 1.1870533227920532, "rewards/accuracies": 1.0, "rewards/chosen": -0.01304902695119381, "rewards/margins": 0.07208594679832458, "rewards/rejected": -0.08513498306274414, "step": 1106 }, { "epoch": 0.7655601659751037, "grad_norm": 4.074938774108887, "learning_rate": 3.8278008298755185e-05, "log_odds_chosen": 3.30131459236145, "log_odds_ratio": -0.4436090588569641, "logits/chosen": -0.2585781514644623, "logits/rejected": -0.2200646549463272, "logps/chosen": -0.17219777405261993, "logps/rejected": -0.4860021770000458, "loss": 3.6635, "nll_loss": 0.8715248703956604, "rewards/accuracies": 0.75, "rewards/chosen": -0.017219776287674904, "rewards/margins": 0.031380441039800644, "rewards/rejected": -0.0486002191901207, "step": 1107 }, { "epoch": 0.7662517289073306, "grad_norm": 4.7798752784729, "learning_rate": 3.8312586445366534e-05, "log_odds_chosen": 3.8130784034729004, "log_odds_ratio": -0.46914446353912354, "logits/chosen": -0.3986022472381592, "logits/rejected": -0.44066357612609863, "logps/chosen": -0.11577075719833374, "logps/rejected": -0.608155369758606, "loss": 2.5063, "nll_loss": 0.5796663761138916, "rewards/accuracies": 0.875, "rewards/chosen": -0.011577075347304344, "rewards/margins": 0.04923846200108528, "rewards/rejected": -0.060815539211034775, "step": 1108 }, { "epoch": 0.7669432918395575, "grad_norm": 4.062282085418701, "learning_rate": 3.8347164591977875e-05, "log_odds_chosen": 2.94562029838562, "log_odds_ratio": -0.3354604244232178, "logits/chosen": -0.9019069671630859, "logits/rejected": -0.8851369619369507, "logps/chosen": -0.13271500170230865, "logps/rejected": -0.5042838454246521, "loss": 4.0718, "nll_loss": 0.9844123125076294, "rewards/accuracies": 0.875, "rewards/chosen": -0.013271501287817955, "rewards/margins": 0.03715688735246658, "rewards/rejected": -0.05042839050292969, "step": 1109 }, { "epoch": 0.7676348547717843, "grad_norm": 3.5688350200653076, "learning_rate": 3.838174273858922e-05, "log_odds_chosen": 3.1504390239715576, "log_odds_ratio": -0.1645454466342926, "logits/chosen": -0.5958057641983032, "logits/rejected": -0.5955528020858765, "logps/chosen": -0.07106180489063263, "logps/rejected": -0.6669843792915344, "loss": 3.9271, "nll_loss": 0.9653175473213196, "rewards/accuracies": 1.0, "rewards/chosen": -0.007106180768460035, "rewards/margins": 0.05959225445985794, "rewards/rejected": -0.06669843941926956, "step": 1110 }, { "epoch": 0.7683264177040111, "grad_norm": 3.4419846534729004, "learning_rate": 3.841632088520056e-05, "log_odds_chosen": 2.8391995429992676, "log_odds_ratio": -0.2667177617549896, "logits/chosen": -0.47049030661582947, "logits/rejected": -0.4641495943069458, "logps/chosen": -0.15538278222084045, "logps/rejected": -0.5239760279655457, "loss": 3.7415, "nll_loss": 0.9087094068527222, "rewards/accuracies": 0.875, "rewards/chosen": -0.015538278967142105, "rewards/margins": 0.03685932606458664, "rewards/rejected": -0.052397601306438446, "step": 1111 }, { "epoch": 0.7690179806362379, "grad_norm": 4.275797367095947, "learning_rate": 3.84508990318119e-05, "log_odds_chosen": 1.1996444463729858, "log_odds_ratio": -0.38345867395401, "logits/chosen": -0.4088421165943146, "logits/rejected": -0.4383625388145447, "logps/chosen": -0.14941827952861786, "logps/rejected": -0.30568715929985046, "loss": 4.1739, "nll_loss": 1.005133032798767, "rewards/accuracies": 0.875, "rewards/chosen": -0.014941826462745667, "rewards/margins": 0.01562688872218132, "rewards/rejected": -0.030568715184926987, "step": 1112 }, { "epoch": 0.7697095435684648, "grad_norm": 4.479307651519775, "learning_rate": 3.848547717842324e-05, "log_odds_chosen": 2.365272283554077, "log_odds_ratio": -0.3554462790489197, "logits/chosen": -0.8425488471984863, "logits/rejected": -0.8144983053207397, "logps/chosen": -0.1148102805018425, "logps/rejected": -0.4265750050544739, "loss": 4.7071, "nll_loss": 1.1412395238876343, "rewards/accuracies": 0.75, "rewards/chosen": -0.01148102805018425, "rewards/margins": 0.031176473945379257, "rewards/rejected": -0.042657505720853806, "step": 1113 }, { "epoch": 0.7704011065006916, "grad_norm": 4.078261852264404, "learning_rate": 3.852005532503458e-05, "log_odds_chosen": 2.991490602493286, "log_odds_ratio": -0.22407673299312592, "logits/chosen": -0.5358573198318481, "logits/rejected": -0.5682927370071411, "logps/chosen": -0.10035638511180878, "logps/rejected": -0.6249147057533264, "loss": 3.4856, "nll_loss": 0.848996639251709, "rewards/accuracies": 0.875, "rewards/chosen": -0.010035638697445393, "rewards/margins": 0.052455835044384, "rewards/rejected": -0.06249146908521652, "step": 1114 }, { "epoch": 0.7710926694329184, "grad_norm": 4.2970662117004395, "learning_rate": 3.8554633471645925e-05, "log_odds_chosen": 1.0146849155426025, "log_odds_ratio": -0.46182528138160706, "logits/chosen": -0.7279335856437683, "logits/rejected": -0.7823548316955566, "logps/chosen": -0.16933849453926086, "logps/rejected": -0.41459232568740845, "loss": 4.9735, "nll_loss": 1.1972006559371948, "rewards/accuracies": 0.75, "rewards/chosen": -0.016933850944042206, "rewards/margins": 0.02452538162469864, "rewards/rejected": -0.041459232568740845, "step": 1115 }, { "epoch": 0.7717842323651453, "grad_norm": 4.411706447601318, "learning_rate": 3.8589211618257266e-05, "log_odds_chosen": 4.425901412963867, "log_odds_ratio": -0.20330864191055298, "logits/chosen": -0.5573688745498657, "logits/rejected": -0.6300353407859802, "logps/chosen": -0.08963973075151443, "logps/rejected": -0.8802515864372253, "loss": 3.438, "nll_loss": 0.8391615152359009, "rewards/accuracies": 0.875, "rewards/chosen": -0.008963974192738533, "rewards/margins": 0.07906118780374527, "rewards/rejected": -0.08802516013383865, "step": 1116 }, { "epoch": 0.7724757952973721, "grad_norm": 4.105024814605713, "learning_rate": 3.862378976486861e-05, "log_odds_chosen": 3.935359001159668, "log_odds_ratio": -0.12453159689903259, "logits/chosen": -0.7120730876922607, "logits/rejected": -0.7461434602737427, "logps/chosen": -0.08041106164455414, "logps/rejected": -0.8631589412689209, "loss": 3.263, "nll_loss": 0.8032896518707275, "rewards/accuracies": 1.0, "rewards/chosen": -0.008041106164455414, "rewards/margins": 0.07827478647232056, "rewards/rejected": -0.08631589263677597, "step": 1117 }, { "epoch": 0.7731673582295989, "grad_norm": 3.715104579925537, "learning_rate": 3.865836791147995e-05, "log_odds_chosen": 3.0843594074249268, "log_odds_ratio": -0.3770461082458496, "logits/chosen": -0.47825729846954346, "logits/rejected": -0.5002142190933228, "logps/chosen": -0.12397563457489014, "logps/rejected": -0.5420525074005127, "loss": 3.286, "nll_loss": 0.7838032245635986, "rewards/accuracies": 1.0, "rewards/chosen": -0.012397563084959984, "rewards/margins": 0.041807692497968674, "rewards/rejected": -0.05420524999499321, "step": 1118 }, { "epoch": 0.7738589211618258, "grad_norm": 3.9022834300994873, "learning_rate": 3.869294605809129e-05, "log_odds_chosen": 4.696831703186035, "log_odds_ratio": -0.1862536519765854, "logits/chosen": -0.680198609828949, "logits/rejected": -0.6572217345237732, "logps/chosen": -0.023391971364617348, "logps/rejected": -0.6749863624572754, "loss": 3.9703, "nll_loss": 0.9739567041397095, "rewards/accuracies": 0.875, "rewards/chosen": -0.002339197089895606, "rewards/margins": 0.06515943259000778, "rewards/rejected": -0.06749863177537918, "step": 1119 }, { "epoch": 0.7745504840940526, "grad_norm": 3.49310040473938, "learning_rate": 3.872752420470263e-05, "log_odds_chosen": 3.1153817176818848, "log_odds_ratio": -0.3262585699558258, "logits/chosen": -0.4032193422317505, "logits/rejected": -0.4326457679271698, "logps/chosen": -0.2284383922815323, "logps/rejected": -0.5586980581283569, "loss": 4.6027, "nll_loss": 1.1180399656295776, "rewards/accuracies": 0.875, "rewards/chosen": -0.02284383960068226, "rewards/margins": 0.033025968819856644, "rewards/rejected": -0.05586981028318405, "step": 1120 }, { "epoch": 0.7752420470262794, "grad_norm": 4.8784637451171875, "learning_rate": 3.8762102351313974e-05, "log_odds_chosen": 1.9118473529815674, "log_odds_ratio": -0.49750471115112305, "logits/chosen": -0.6266558170318604, "logits/rejected": -0.6750451326370239, "logps/chosen": -0.19696509838104248, "logps/rejected": -0.567264199256897, "loss": 4.9556, "nll_loss": 1.1891417503356934, "rewards/accuracies": 0.875, "rewards/chosen": -0.019696509465575218, "rewards/margins": 0.03702991455793381, "rewards/rejected": -0.056726425886154175, "step": 1121 }, { "epoch": 0.7759336099585062, "grad_norm": 4.29984712600708, "learning_rate": 3.8796680497925316e-05, "log_odds_chosen": 3.8260724544525146, "log_odds_ratio": -0.5127156972885132, "logits/chosen": -0.7006309032440186, "logits/rejected": -0.6704519391059875, "logps/chosen": -0.26238536834716797, "logps/rejected": -0.6787563562393188, "loss": 3.134, "nll_loss": 0.7322263717651367, "rewards/accuracies": 0.75, "rewards/chosen": -0.026238534599542618, "rewards/margins": 0.04163710027933121, "rewards/rejected": -0.06787563860416412, "step": 1122 }, { "epoch": 0.7766251728907331, "grad_norm": 3.7785251140594482, "learning_rate": 3.883125864453666e-05, "log_odds_chosen": 3.391031265258789, "log_odds_ratio": -0.25807151198387146, "logits/chosen": -0.5419265031814575, "logits/rejected": -0.5877819061279297, "logps/chosen": -0.06500020623207092, "logps/rejected": -0.5630817413330078, "loss": 4.2755, "nll_loss": 1.043055772781372, "rewards/accuracies": 0.875, "rewards/chosen": -0.006500020623207092, "rewards/margins": 0.049808159470558167, "rewards/rejected": -0.05630818009376526, "step": 1123 }, { "epoch": 0.7773167358229599, "grad_norm": 4.714626312255859, "learning_rate": 3.8865836791148e-05, "log_odds_chosen": 1.7221829891204834, "log_odds_ratio": -0.3858616054058075, "logits/chosen": -0.7497819662094116, "logits/rejected": -0.7688895463943481, "logps/chosen": -0.12314295023679733, "logps/rejected": -0.5708560347557068, "loss": 4.1738, "nll_loss": 1.0048553943634033, "rewards/accuracies": 0.875, "rewards/chosen": -0.012314295396208763, "rewards/margins": 0.04477131739258766, "rewards/rejected": -0.05708560720086098, "step": 1124 }, { "epoch": 0.7780082987551867, "grad_norm": 4.4347004890441895, "learning_rate": 3.890041493775934e-05, "log_odds_chosen": 3.8225934505462646, "log_odds_ratio": -0.278799831867218, "logits/chosen": -0.5871791839599609, "logits/rejected": -0.6095415353775024, "logps/chosen": -0.05089031159877777, "logps/rejected": -0.5935240387916565, "loss": 3.8368, "nll_loss": 0.9313230514526367, "rewards/accuracies": 0.875, "rewards/chosen": -0.005089031532406807, "rewards/margins": 0.05426337197422981, "rewards/rejected": -0.05935240536928177, "step": 1125 }, { "epoch": 0.7786998616874136, "grad_norm": 5.964032173156738, "learning_rate": 3.893499308437068e-05, "log_odds_chosen": 1.3911092281341553, "log_odds_ratio": -0.6403376460075378, "logits/chosen": -0.45489662885665894, "logits/rejected": -0.45509597659111023, "logps/chosen": -0.20690639317035675, "logps/rejected": -0.44346410036087036, "loss": 4.6253, "nll_loss": 1.0922995805740356, "rewards/accuracies": 0.625, "rewards/chosen": -0.020690638571977615, "rewards/margins": 0.02365577220916748, "rewards/rejected": -0.0443464070558548, "step": 1126 }, { "epoch": 0.7793914246196404, "grad_norm": 4.736138820648193, "learning_rate": 3.896957123098202e-05, "log_odds_chosen": 1.951234221458435, "log_odds_ratio": -0.2717779278755188, "logits/chosen": -0.4384583830833435, "logits/rejected": -0.45516031980514526, "logps/chosen": -0.180232971906662, "logps/rejected": -0.49474045634269714, "loss": 4.4218, "nll_loss": 1.0782605409622192, "rewards/accuracies": 0.875, "rewards/chosen": -0.0180232971906662, "rewards/margins": 0.031450752168893814, "rewards/rejected": -0.04947404935956001, "step": 1127 }, { "epoch": 0.7800829875518672, "grad_norm": 4.4454121589660645, "learning_rate": 3.9004149377593365e-05, "log_odds_chosen": 0.8737515211105347, "log_odds_ratio": -0.48570409417152405, "logits/chosen": -0.5742661952972412, "logits/rejected": -0.6075069308280945, "logps/chosen": -0.19278815388679504, "logps/rejected": -0.38309305906295776, "loss": 3.8146, "nll_loss": 0.9050906896591187, "rewards/accuracies": 0.75, "rewards/chosen": -0.019278815016150475, "rewards/margins": 0.019030490890145302, "rewards/rejected": -0.038309305906295776, "step": 1128 }, { "epoch": 0.780774550484094, "grad_norm": 3.730633497238159, "learning_rate": 3.9038727524204706e-05, "log_odds_chosen": 4.613859176635742, "log_odds_ratio": -0.2876426875591278, "logits/chosen": -0.618686318397522, "logits/rejected": -0.630027711391449, "logps/chosen": -0.0700341984629631, "logps/rejected": -0.9240524172782898, "loss": 3.7821, "nll_loss": 0.9167693853378296, "rewards/accuracies": 0.875, "rewards/chosen": -0.007003419566899538, "rewards/margins": 0.08540181815624237, "rewards/rejected": -0.09240524470806122, "step": 1129 }, { "epoch": 0.7814661134163209, "grad_norm": 3.9620399475097656, "learning_rate": 3.907330567081605e-05, "log_odds_chosen": 5.74013614654541, "log_odds_ratio": -0.04481692984700203, "logits/chosen": -0.5398914813995361, "logits/rejected": -0.6123236417770386, "logps/chosen": -0.020203545689582825, "logps/rejected": -0.8377431631088257, "loss": 3.3303, "nll_loss": 0.8280977010726929, "rewards/accuracies": 1.0, "rewards/chosen": -0.002020354615524411, "rewards/margins": 0.08175395429134369, "rewards/rejected": -0.08377431333065033, "step": 1130 }, { "epoch": 0.7821576763485477, "grad_norm": 4.754410743713379, "learning_rate": 3.910788381742739e-05, "log_odds_chosen": 3.7495689392089844, "log_odds_ratio": -0.6508976221084595, "logits/chosen": -0.24432966113090515, "logits/rejected": -0.3001052737236023, "logps/chosen": -0.12211127579212189, "logps/rejected": -0.8259966969490051, "loss": 3.4209, "nll_loss": 0.7901304960250854, "rewards/accuracies": 0.75, "rewards/chosen": -0.012211127206683159, "rewards/margins": 0.0703885406255722, "rewards/rejected": -0.08259966969490051, "step": 1131 }, { "epoch": 0.7828492392807745, "grad_norm": 5.5947771072387695, "learning_rate": 3.914246196403873e-05, "log_odds_chosen": 2.722187042236328, "log_odds_ratio": -0.1351226270198822, "logits/chosen": -0.27186745405197144, "logits/rejected": -0.29344063997268677, "logps/chosen": -0.045510705560445786, "logps/rejected": -0.46628302335739136, "loss": 4.7851, "nll_loss": 1.1827595233917236, "rewards/accuracies": 1.0, "rewards/chosen": -0.0045510707423090935, "rewards/margins": 0.042077235877513885, "rewards/rejected": -0.04662831127643585, "step": 1132 }, { "epoch": 0.7835408022130014, "grad_norm": 4.867863178253174, "learning_rate": 3.917704011065007e-05, "log_odds_chosen": 5.198690891265869, "log_odds_ratio": -0.270771861076355, "logits/chosen": -0.4652605950832367, "logits/rejected": -0.4993474781513214, "logps/chosen": -0.16884656250476837, "logps/rejected": -0.8110989332199097, "loss": 3.881, "nll_loss": 0.9431832432746887, "rewards/accuracies": 0.75, "rewards/chosen": -0.016884656623005867, "rewards/margins": 0.06422524154186249, "rewards/rejected": -0.0811098963022232, "step": 1133 }, { "epoch": 0.7842323651452282, "grad_norm": 4.890600681304932, "learning_rate": 3.9211618257261414e-05, "log_odds_chosen": 0.5457233786582947, "log_odds_ratio": -0.7434303760528564, "logits/chosen": -0.41072139143943787, "logits/rejected": -0.45201027393341064, "logps/chosen": -0.30787140130996704, "logps/rejected": -0.31583571434020996, "loss": 4.3617, "nll_loss": 1.016086459159851, "rewards/accuracies": 0.75, "rewards/chosen": -0.030787140130996704, "rewards/margins": 0.0007964321412146091, "rewards/rejected": -0.03158356994390488, "step": 1134 }, { "epoch": 0.784923928077455, "grad_norm": 3.413855791091919, "learning_rate": 3.9246196403872756e-05, "log_odds_chosen": 3.549720287322998, "log_odds_ratio": -0.19492925703525543, "logits/chosen": -0.4394097328186035, "logits/rejected": -0.4626482427120209, "logps/chosen": -0.14576005935668945, "logps/rejected": -0.8169733881950378, "loss": 2.9895, "nll_loss": 0.7278827428817749, "rewards/accuracies": 1.0, "rewards/chosen": -0.014576006680727005, "rewards/margins": 0.06712133437395096, "rewards/rejected": -0.08169733732938766, "step": 1135 }, { "epoch": 0.7856154910096819, "grad_norm": 5.547590255737305, "learning_rate": 3.92807745504841e-05, "log_odds_chosen": 2.836033344268799, "log_odds_ratio": -0.4607436954975128, "logits/chosen": -0.47262442111968994, "logits/rejected": -0.46149805188179016, "logps/chosen": -0.2260802686214447, "logps/rejected": -0.8693596124649048, "loss": 4.2696, "nll_loss": 1.0213253498077393, "rewards/accuracies": 0.625, "rewards/chosen": -0.02260802686214447, "rewards/margins": 0.06432792544364929, "rewards/rejected": -0.08693595230579376, "step": 1136 }, { "epoch": 0.7863070539419087, "grad_norm": 6.455709934234619, "learning_rate": 3.931535269709544e-05, "log_odds_chosen": 4.0131916999816895, "log_odds_ratio": -0.9443694353103638, "logits/chosen": -0.1703692525625229, "logits/rejected": -0.21043118834495544, "logps/chosen": -0.19897066056728363, "logps/rejected": -0.6492530703544617, "loss": 3.9973, "nll_loss": 0.9048901200294495, "rewards/accuracies": 0.875, "rewards/chosen": -0.019897066056728363, "rewards/margins": 0.04502824321389198, "rewards/rejected": -0.06492530554533005, "step": 1137 }, { "epoch": 0.7869986168741355, "grad_norm": 4.839828014373779, "learning_rate": 3.934993084370678e-05, "log_odds_chosen": 2.5431089401245117, "log_odds_ratio": -0.36189424991607666, "logits/chosen": -0.45434796810150146, "logits/rejected": -0.48614394664764404, "logps/chosen": -0.12932661175727844, "logps/rejected": -0.487295538187027, "loss": 4.8545, "nll_loss": 1.177430510520935, "rewards/accuracies": 0.75, "rewards/chosen": -0.012932661920785904, "rewards/margins": 0.035796891897916794, "rewards/rejected": -0.0487295538187027, "step": 1138 }, { "epoch": 0.7876901798063624, "grad_norm": 6.385467052459717, "learning_rate": 3.938450899031812e-05, "log_odds_chosen": 0.4432275593280792, "log_odds_ratio": -0.8290820717811584, "logits/chosen": -0.5175051093101501, "logits/rejected": -0.5431155562400818, "logps/chosen": -0.18120020627975464, "logps/rejected": -0.2309972047805786, "loss": 5.9582, "nll_loss": 1.4066460132598877, "rewards/accuracies": 0.625, "rewards/chosen": -0.018120020627975464, "rewards/margins": 0.004979700315743685, "rewards/rejected": -0.02309972234070301, "step": 1139 }, { "epoch": 0.7883817427385892, "grad_norm": 3.1535542011260986, "learning_rate": 3.9419087136929464e-05, "log_odds_chosen": 2.2779955863952637, "log_odds_ratio": -0.40133097767829895, "logits/chosen": -0.2673976421356201, "logits/rejected": -0.325452595949173, "logps/chosen": -0.09485425055027008, "logps/rejected": -0.4404667019844055, "loss": 3.3564, "nll_loss": 0.7989755868911743, "rewards/accuracies": 0.75, "rewards/chosen": -0.009485425427556038, "rewards/margins": 0.03456124663352966, "rewards/rejected": -0.04404667019844055, "step": 1140 }, { "epoch": 0.789073305670816, "grad_norm": 6.922438621520996, "learning_rate": 3.9453665283540805e-05, "log_odds_chosen": 1.6342426538467407, "log_odds_ratio": -0.5387409925460815, "logits/chosen": -0.34957778453826904, "logits/rejected": -0.3884001076221466, "logps/chosen": -0.12367647886276245, "logps/rejected": -0.31333163380622864, "loss": 4.4978, "nll_loss": 1.0705667734146118, "rewards/accuracies": 0.625, "rewards/chosen": -0.012367649003863335, "rewards/margins": 0.01896551437675953, "rewards/rejected": -0.031333163380622864, "step": 1141 }, { "epoch": 0.7897648686030428, "grad_norm": 4.580284118652344, "learning_rate": 3.948824343015215e-05, "log_odds_chosen": 1.7318023443222046, "log_odds_ratio": -0.6523119807243347, "logits/chosen": -0.6493903994560242, "logits/rejected": -0.6705228686332703, "logps/chosen": -0.27369049191474915, "logps/rejected": -0.6649531722068787, "loss": 4.2986, "nll_loss": 1.0094248056411743, "rewards/accuracies": 0.75, "rewards/chosen": -0.027369048446416855, "rewards/margins": 0.03912627696990967, "rewards/rejected": -0.06649532169103622, "step": 1142 }, { "epoch": 0.7904564315352697, "grad_norm": 5.137827396392822, "learning_rate": 3.952282157676349e-05, "log_odds_chosen": 2.366032600402832, "log_odds_ratio": -0.4727362394332886, "logits/chosen": -0.20692262053489685, "logits/rejected": -0.2002687156200409, "logps/chosen": -0.1281823217868805, "logps/rejected": -0.4092061221599579, "loss": 3.2961, "nll_loss": 0.7767484188079834, "rewards/accuracies": 0.75, "rewards/chosen": -0.012818234041333199, "rewards/margins": 0.02810238115489483, "rewards/rejected": -0.04092061519622803, "step": 1143 }, { "epoch": 0.7911479944674965, "grad_norm": 4.259830474853516, "learning_rate": 3.955739972337483e-05, "log_odds_chosen": 3.2707371711730957, "log_odds_ratio": -0.2883046269416809, "logits/chosen": -0.1852089762687683, "logits/rejected": -0.2368299812078476, "logps/chosen": -0.1016981303691864, "logps/rejected": -0.8447484970092773, "loss": 2.9771, "nll_loss": 0.7154471278190613, "rewards/accuracies": 0.875, "rewards/chosen": -0.010169814340770245, "rewards/margins": 0.07430503517389297, "rewards/rejected": -0.0844748467206955, "step": 1144 }, { "epoch": 0.7918395573997233, "grad_norm": 3.689413070678711, "learning_rate": 3.959197786998617e-05, "log_odds_chosen": 3.4060113430023193, "log_odds_ratio": -0.25230181217193604, "logits/chosen": -0.25829124450683594, "logits/rejected": -0.25649240612983704, "logps/chosen": -0.08521488308906555, "logps/rejected": -0.5981854796409607, "loss": 3.8064, "nll_loss": 0.9263700246810913, "rewards/accuracies": 0.875, "rewards/chosen": -0.0085214888677001, "rewards/margins": 0.05129706487059593, "rewards/rejected": -0.05981855094432831, "step": 1145 }, { "epoch": 0.7925311203319502, "grad_norm": 5.098305702209473, "learning_rate": 3.962655601659751e-05, "log_odds_chosen": 2.6655032634735107, "log_odds_ratio": -0.24669036269187927, "logits/chosen": -0.4905535578727722, "logits/rejected": -0.5256434082984924, "logps/chosen": -0.10529651492834091, "logps/rejected": -0.5505136847496033, "loss": 4.5956, "nll_loss": 1.1242369413375854, "rewards/accuracies": 1.0, "rewards/chosen": -0.010529652237892151, "rewards/margins": 0.04452171549201012, "rewards/rejected": -0.05505136772990227, "step": 1146 }, { "epoch": 0.793222683264177, "grad_norm": 3.806218385696411, "learning_rate": 3.9661134163208855e-05, "log_odds_chosen": 3.110368251800537, "log_odds_ratio": -0.30972862243652344, "logits/chosen": -0.15990647673606873, "logits/rejected": -0.1701805144548416, "logps/chosen": -0.10173983126878738, "logps/rejected": -0.4115472435951233, "loss": 3.4048, "nll_loss": 0.8202356696128845, "rewards/accuracies": 0.875, "rewards/chosen": -0.010173983871936798, "rewards/margins": 0.03098073974251747, "rewards/rejected": -0.04115472361445427, "step": 1147 }, { "epoch": 0.7939142461964038, "grad_norm": 3.248661994934082, "learning_rate": 3.9695712309820196e-05, "log_odds_chosen": 4.2546844482421875, "log_odds_ratio": -0.14933958649635315, "logits/chosen": -0.2126917541027069, "logits/rejected": -0.26696938276290894, "logps/chosen": -0.02835908532142639, "logps/rejected": -0.4520314931869507, "loss": 3.2796, "nll_loss": 0.8049613237380981, "rewards/accuracies": 1.0, "rewards/chosen": -0.0028359086718410254, "rewards/margins": 0.04236724227666855, "rewards/rejected": -0.04520314931869507, "step": 1148 }, { "epoch": 0.7946058091286307, "grad_norm": 4.2895588874816895, "learning_rate": 3.973029045643154e-05, "log_odds_chosen": 1.050495982170105, "log_odds_ratio": -0.6158413290977478, "logits/chosen": -0.6027485728263855, "logits/rejected": -0.5986880660057068, "logps/chosen": -0.17323151230812073, "logps/rejected": -0.3859601616859436, "loss": 3.2588, "nll_loss": 0.7531127333641052, "rewards/accuracies": 0.625, "rewards/chosen": -0.017323151230812073, "rewards/margins": 0.021272864192724228, "rewards/rejected": -0.0385960191488266, "step": 1149 }, { "epoch": 0.7952973720608575, "grad_norm": 3.7696895599365234, "learning_rate": 3.976486860304288e-05, "log_odds_chosen": 3.539609909057617, "log_odds_ratio": -0.25380614399909973, "logits/chosen": -0.3072446584701538, "logits/rejected": -0.3367466628551483, "logps/chosen": -0.07568614184856415, "logps/rejected": -0.502180814743042, "loss": 3.3981, "nll_loss": 0.824151337146759, "rewards/accuracies": 1.0, "rewards/chosen": -0.007568614557385445, "rewards/margins": 0.042649466544389725, "rewards/rejected": -0.05021808296442032, "step": 1150 }, { "epoch": 0.7959889349930843, "grad_norm": 3.330585241317749, "learning_rate": 3.979944674965422e-05, "log_odds_chosen": 2.6073431968688965, "log_odds_ratio": -0.2579598128795624, "logits/chosen": -0.23022376000881195, "logits/rejected": -0.26469650864601135, "logps/chosen": -0.0766703188419342, "logps/rejected": -0.3181542754173279, "loss": 4.0204, "nll_loss": 0.9793111085891724, "rewards/accuracies": 0.875, "rewards/chosen": -0.007667032536119223, "rewards/margins": 0.024148397147655487, "rewards/rejected": -0.03181542828679085, "step": 1151 }, { "epoch": 0.7966804979253111, "grad_norm": 4.687983512878418, "learning_rate": 3.983402489626556e-05, "log_odds_chosen": 1.84706449508667, "log_odds_ratio": -0.33897465467453003, "logits/chosen": -0.7606194019317627, "logits/rejected": -0.7207037210464478, "logps/chosen": -0.12923657894134521, "logps/rejected": -0.43369144201278687, "loss": 5.1788, "nll_loss": 1.2607989311218262, "rewards/accuracies": 0.875, "rewards/chosen": -0.012923657894134521, "rewards/margins": 0.030445488169789314, "rewards/rejected": -0.043369147926568985, "step": 1152 }, { "epoch": 0.7973720608575381, "grad_norm": 6.274543762207031, "learning_rate": 3.9868603042876904e-05, "log_odds_chosen": 0.8526477217674255, "log_odds_ratio": -0.5951778888702393, "logits/chosen": -0.20541231334209442, "logits/rejected": -0.23336751759052277, "logps/chosen": -0.19507427513599396, "logps/rejected": -0.3879839777946472, "loss": 3.8535, "nll_loss": 0.9038695096969604, "rewards/accuracies": 0.625, "rewards/chosen": -0.019507426768541336, "rewards/margins": 0.019290970638394356, "rewards/rejected": -0.03879839554429054, "step": 1153 }, { "epoch": 0.7980636237897649, "grad_norm": 4.864806652069092, "learning_rate": 3.9903181189488246e-05, "log_odds_chosen": 2.4882702827453613, "log_odds_ratio": -0.3174005150794983, "logits/chosen": -0.8158073425292969, "logits/rejected": -0.8330868482589722, "logps/chosen": -0.09785200655460358, "logps/rejected": -0.4712101221084595, "loss": 4.5925, "nll_loss": 1.1163914203643799, "rewards/accuracies": 0.875, "rewards/chosen": -0.009785201400518417, "rewards/margins": 0.03733580932021141, "rewards/rejected": -0.04712101072072983, "step": 1154 }, { "epoch": 0.7987551867219918, "grad_norm": 4.701379776000977, "learning_rate": 3.993775933609959e-05, "log_odds_chosen": 2.112551689147949, "log_odds_ratio": -0.37846639752388, "logits/chosen": -0.275676965713501, "logits/rejected": -0.2844288647174835, "logps/chosen": -0.12924633920192719, "logps/rejected": -0.33982053399086, "loss": 4.3999, "nll_loss": 1.0621216297149658, "rewards/accuracies": 0.75, "rewards/chosen": -0.012924633920192719, "rewards/margins": 0.02105741947889328, "rewards/rejected": -0.033982053399086, "step": 1155 }, { "epoch": 0.7994467496542186, "grad_norm": 4.104145526885986, "learning_rate": 3.997233748271093e-05, "log_odds_chosen": 0.3516320288181305, "log_odds_ratio": -0.6141179800033569, "logits/chosen": -0.4469658136367798, "logits/rejected": -0.468797504901886, "logps/chosen": -0.20192989706993103, "logps/rejected": -0.29918792843818665, "loss": 4.2525, "nll_loss": 1.0017073154449463, "rewards/accuracies": 0.5, "rewards/chosen": -0.020192990079522133, "rewards/margins": 0.009725801646709442, "rewards/rejected": -0.029918791726231575, "step": 1156 }, { "epoch": 0.8001383125864454, "grad_norm": 3.982905864715576, "learning_rate": 4.000691562932227e-05, "log_odds_chosen": 2.0321569442749023, "log_odds_ratio": -0.38790351152420044, "logits/chosen": -0.2542264461517334, "logits/rejected": -0.23322290182113647, "logps/chosen": -0.2226470708847046, "logps/rejected": -0.647704541683197, "loss": 3.7577, "nll_loss": 0.9006452560424805, "rewards/accuracies": 0.75, "rewards/chosen": -0.02226470783352852, "rewards/margins": 0.04250574856996536, "rewards/rejected": -0.06477045267820358, "step": 1157 }, { "epoch": 0.8008298755186722, "grad_norm": 4.697607040405273, "learning_rate": 4.004149377593361e-05, "log_odds_chosen": 1.7568845748901367, "log_odds_ratio": -0.5033901333808899, "logits/chosen": -0.6665538549423218, "logits/rejected": -0.6717506647109985, "logps/chosen": -0.11687202751636505, "logps/rejected": -0.410500168800354, "loss": 4.2899, "nll_loss": 1.0221455097198486, "rewards/accuracies": 0.625, "rewards/chosen": -0.011687202379107475, "rewards/margins": 0.029362818226218224, "rewards/rejected": -0.0410500168800354, "step": 1158 }, { "epoch": 0.8015214384508991, "grad_norm": 4.458200454711914, "learning_rate": 4.007607192254495e-05, "log_odds_chosen": 2.649742603302002, "log_odds_ratio": -0.2814682722091675, "logits/chosen": -0.3529450297355652, "logits/rejected": -0.32280710339546204, "logps/chosen": -0.13715879619121552, "logps/rejected": -0.5732930302619934, "loss": 3.8533, "nll_loss": 0.9351730346679688, "rewards/accuracies": 0.875, "rewards/chosen": -0.01371588185429573, "rewards/margins": 0.04361342638731003, "rewards/rejected": -0.05732930451631546, "step": 1159 }, { "epoch": 0.8022130013831259, "grad_norm": 5.326199054718018, "learning_rate": 4.0110650069156295e-05, "log_odds_chosen": 1.607958436012268, "log_odds_ratio": -1.345959186553955, "logits/chosen": -0.11324809491634369, "logits/rejected": -0.07060239464044571, "logps/chosen": -0.27737849950790405, "logps/rejected": -0.5359267592430115, "loss": 3.6059, "nll_loss": 0.7668741941452026, "rewards/accuracies": 0.875, "rewards/chosen": -0.027737848460674286, "rewards/margins": 0.02585482969880104, "rewards/rejected": -0.05359267443418503, "step": 1160 }, { "epoch": 0.8029045643153527, "grad_norm": 4.502684116363525, "learning_rate": 4.0145228215767636e-05, "log_odds_chosen": 2.724449872970581, "log_odds_ratio": -0.3041455149650574, "logits/chosen": -0.10399705171585083, "logits/rejected": -0.0837821215391159, "logps/chosen": -0.07750361412763596, "logps/rejected": -0.46520230174064636, "loss": 3.4773, "nll_loss": 0.8389115333557129, "rewards/accuracies": 0.875, "rewards/chosen": -0.007750361226499081, "rewards/margins": 0.03876987099647522, "rewards/rejected": -0.046520233154296875, "step": 1161 }, { "epoch": 0.8035961272475796, "grad_norm": 4.772683620452881, "learning_rate": 4.017980636237898e-05, "log_odds_chosen": 2.803502321243286, "log_odds_ratio": -0.5766577124595642, "logits/chosen": -0.05678505823016167, "logits/rejected": -0.0759805291891098, "logps/chosen": -0.17404711246490479, "logps/rejected": -0.5554315447807312, "loss": 3.6957, "nll_loss": 0.8662543296813965, "rewards/accuracies": 0.625, "rewards/chosen": -0.017404712736606598, "rewards/margins": 0.038138438016176224, "rewards/rejected": -0.05554314702749252, "step": 1162 }, { "epoch": 0.8042876901798064, "grad_norm": 3.36564564704895, "learning_rate": 4.021438450899032e-05, "log_odds_chosen": 2.6950929164886475, "log_odds_ratio": -0.3946307301521301, "logits/chosen": -0.3952489197254181, "logits/rejected": -0.41110169887542725, "logps/chosen": -0.13186201453208923, "logps/rejected": -0.4083203077316284, "loss": 3.2633, "nll_loss": 0.7763731479644775, "rewards/accuracies": 0.75, "rewards/chosen": -0.013186202384531498, "rewards/margins": 0.027645830065011978, "rewards/rejected": -0.0408320352435112, "step": 1163 }, { "epoch": 0.8049792531120332, "grad_norm": 5.37779426574707, "learning_rate": 4.024896265560166e-05, "log_odds_chosen": 1.5617663860321045, "log_odds_ratio": -0.6627476215362549, "logits/chosen": -0.3280797600746155, "logits/rejected": -0.38121557235717773, "logps/chosen": -0.26118674874305725, "logps/rejected": -0.4342179000377655, "loss": 4.6767, "nll_loss": 1.1028947830200195, "rewards/accuracies": 0.625, "rewards/chosen": -0.026118673384189606, "rewards/margins": 0.017303116619586945, "rewards/rejected": -0.04342179000377655, "step": 1164 }, { "epoch": 0.80567081604426, "grad_norm": 5.427413463592529, "learning_rate": 4.0283540802213e-05, "log_odds_chosen": 2.0146398544311523, "log_odds_ratio": -0.37571975588798523, "logits/chosen": -0.5844836235046387, "logits/rejected": -0.6303712725639343, "logps/chosen": -0.13081598281860352, "logps/rejected": -0.4544587731361389, "loss": 3.4184, "nll_loss": 0.8170157670974731, "rewards/accuracies": 0.75, "rewards/chosen": -0.013081599026918411, "rewards/margins": 0.03236427530646324, "rewards/rejected": -0.04544587433338165, "step": 1165 }, { "epoch": 0.8063623789764869, "grad_norm": 5.316728591918945, "learning_rate": 4.0318118948824344e-05, "log_odds_chosen": 1.9966132640838623, "log_odds_ratio": -0.5896454453468323, "logits/chosen": -0.23118741810321808, "logits/rejected": -0.19517435133457184, "logps/chosen": -0.19767743349075317, "logps/rejected": -0.36030542850494385, "loss": 3.8185, "nll_loss": 0.8956526517868042, "rewards/accuracies": 0.625, "rewards/chosen": -0.019767742604017258, "rewards/margins": 0.016262799501419067, "rewards/rejected": -0.036030545830726624, "step": 1166 }, { "epoch": 0.8070539419087137, "grad_norm": 5.237853050231934, "learning_rate": 4.0352697095435686e-05, "log_odds_chosen": 2.7189266681671143, "log_odds_ratio": -0.45107319951057434, "logits/chosen": -0.3652513921260834, "logits/rejected": -0.3933391571044922, "logps/chosen": -0.16551122069358826, "logps/rejected": -0.4190428555011749, "loss": 3.4939, "nll_loss": 0.8283703327178955, "rewards/accuracies": 0.875, "rewards/chosen": -0.016551122069358826, "rewards/margins": 0.025353163480758667, "rewards/rejected": -0.04190428555011749, "step": 1167 }, { "epoch": 0.8077455048409405, "grad_norm": 3.3878939151763916, "learning_rate": 4.038727524204703e-05, "log_odds_chosen": 4.432202339172363, "log_odds_ratio": -0.1157660111784935, "logits/chosen": -0.4019083082675934, "logits/rejected": -0.39598947763442993, "logps/chosen": -0.0686732605099678, "logps/rejected": -0.6455079913139343, "loss": 3.2299, "nll_loss": 0.7959014773368835, "rewards/accuracies": 1.0, "rewards/chosen": -0.0068673258647322655, "rewards/margins": 0.05768347159028053, "rewards/rejected": -0.06455080211162567, "step": 1168 }, { "epoch": 0.8084370677731674, "grad_norm": 5.72116756439209, "learning_rate": 4.042185338865837e-05, "log_odds_chosen": 2.8803868293762207, "log_odds_ratio": -0.2445899397134781, "logits/chosen": -0.35858699679374695, "logits/rejected": -0.4009200632572174, "logps/chosen": -0.1210114136338234, "logps/rejected": -0.600544810295105, "loss": 4.0163, "nll_loss": 0.9796262979507446, "rewards/accuracies": 0.875, "rewards/chosen": -0.01210114173591137, "rewards/margins": 0.04795333743095398, "rewards/rejected": -0.0600544810295105, "step": 1169 }, { "epoch": 0.8091286307053942, "grad_norm": 4.961204528808594, "learning_rate": 4.045643153526971e-05, "log_odds_chosen": 1.2866220474243164, "log_odds_ratio": -0.6377352476119995, "logits/chosen": -0.7465688586235046, "logits/rejected": -0.7543196678161621, "logps/chosen": -0.20632582902908325, "logps/rejected": -0.4361448287963867, "loss": 4.3729, "nll_loss": 1.029453992843628, "rewards/accuracies": 0.5, "rewards/chosen": -0.020632583647966385, "rewards/margins": 0.022981898859143257, "rewards/rejected": -0.04361448436975479, "step": 1170 }, { "epoch": 0.809820193637621, "grad_norm": 3.8165488243103027, "learning_rate": 4.049100968188105e-05, "log_odds_chosen": 1.9424272775650024, "log_odds_ratio": -0.5050072073936462, "logits/chosen": -0.6732468605041504, "logits/rejected": -0.657630205154419, "logps/chosen": -0.14212819933891296, "logps/rejected": -0.31241375207901, "loss": 3.3255, "nll_loss": 0.7808791399002075, "rewards/accuracies": 0.875, "rewards/chosen": -0.01421282161027193, "rewards/margins": 0.017028555274009705, "rewards/rejected": -0.03124137595295906, "step": 1171 }, { "epoch": 0.8105117565698479, "grad_norm": 4.526830673217773, "learning_rate": 4.0525587828492394e-05, "log_odds_chosen": 3.6338460445404053, "log_odds_ratio": -0.3146744966506958, "logits/chosen": -0.6114726662635803, "logits/rejected": -0.6514254808425903, "logps/chosen": -0.10209785401821136, "logps/rejected": -0.5759584903717041, "loss": 4.2295, "nll_loss": 1.0259071588516235, "rewards/accuracies": 0.875, "rewards/chosen": -0.010209785774350166, "rewards/margins": 0.04738606512546539, "rewards/rejected": -0.05759584903717041, "step": 1172 }, { "epoch": 0.8112033195020747, "grad_norm": 4.934037685394287, "learning_rate": 4.0560165975103735e-05, "log_odds_chosen": 1.4841629266738892, "log_odds_ratio": -0.3995290696620941, "logits/chosen": -0.682904839515686, "logits/rejected": -0.7407668828964233, "logps/chosen": -0.14081400632858276, "logps/rejected": -0.4084875285625458, "loss": 5.494, "nll_loss": 1.3335497379302979, "rewards/accuracies": 0.875, "rewards/chosen": -0.014081399887800217, "rewards/margins": 0.02676735632121563, "rewards/rejected": -0.0408487543463707, "step": 1173 }, { "epoch": 0.8118948824343015, "grad_norm": 4.349318981170654, "learning_rate": 4.059474412171508e-05, "log_odds_chosen": 1.8717788457870483, "log_odds_ratio": -0.38216298818588257, "logits/chosen": -0.5308178067207336, "logits/rejected": -0.5366460680961609, "logps/chosen": -0.10794338583946228, "logps/rejected": -0.34007906913757324, "loss": 4.0714, "nll_loss": 0.9796421527862549, "rewards/accuracies": 0.625, "rewards/chosen": -0.010794337838888168, "rewards/margins": 0.023213567212224007, "rewards/rejected": -0.034007906913757324, "step": 1174 }, { "epoch": 0.8125864453665284, "grad_norm": 6.510798454284668, "learning_rate": 4.062932226832642e-05, "log_odds_chosen": 1.6442862749099731, "log_odds_ratio": -0.40282881259918213, "logits/chosen": -0.7126697301864624, "logits/rejected": -0.7322872281074524, "logps/chosen": -0.13879220187664032, "logps/rejected": -0.5311157703399658, "loss": 6.3191, "nll_loss": 1.5394930839538574, "rewards/accuracies": 0.5, "rewards/chosen": -0.013879221864044666, "rewards/margins": 0.03923235461115837, "rewards/rejected": -0.05311157554388046, "step": 1175 }, { "epoch": 0.8132780082987552, "grad_norm": 3.506040096282959, "learning_rate": 4.066390041493776e-05, "log_odds_chosen": 5.572505950927734, "log_odds_ratio": -0.1323142647743225, "logits/chosen": 0.40928786993026733, "logits/rejected": 0.3669503927230835, "logps/chosen": -0.07105830311775208, "logps/rejected": -0.9678250551223755, "loss": 3.5214, "nll_loss": 0.8671307563781738, "rewards/accuracies": 0.875, "rewards/chosen": -0.007105831056833267, "rewards/margins": 0.08967668563127518, "rewards/rejected": -0.09678251296281815, "step": 1176 }, { "epoch": 0.813969571230982, "grad_norm": 4.736689567565918, "learning_rate": 4.06984785615491e-05, "log_odds_chosen": 2.2772843837738037, "log_odds_ratio": -0.5770815014839172, "logits/chosen": -0.677619218826294, "logits/rejected": -0.6756543517112732, "logps/chosen": -0.14869184792041779, "logps/rejected": -0.6958863735198975, "loss": 4.5143, "nll_loss": 1.0708606243133545, "rewards/accuracies": 0.625, "rewards/chosen": -0.014869185164570808, "rewards/margins": 0.054719455540180206, "rewards/rejected": -0.06958863884210587, "step": 1177 }, { "epoch": 0.8146611341632088, "grad_norm": 5.372425556182861, "learning_rate": 4.073305670816044e-05, "log_odds_chosen": 1.6330846548080444, "log_odds_ratio": -0.6535148620605469, "logits/chosen": -0.4990033805370331, "logits/rejected": -0.548502504825592, "logps/chosen": -0.1866636574268341, "logps/rejected": -0.5533938407897949, "loss": 3.2122, "nll_loss": 0.7376972436904907, "rewards/accuracies": 0.75, "rewards/chosen": -0.01866636611521244, "rewards/margins": 0.03667302057147026, "rewards/rejected": -0.05533938482403755, "step": 1178 }, { "epoch": 0.8153526970954357, "grad_norm": 5.9537529945373535, "learning_rate": 4.0767634854771785e-05, "log_odds_chosen": 4.260707378387451, "log_odds_ratio": -0.28442054986953735, "logits/chosen": -0.19398483633995056, "logits/rejected": -0.26974254846572876, "logps/chosen": -0.1344795972108841, "logps/rejected": -0.9466410875320435, "loss": 4.0131, "nll_loss": 0.9748204946517944, "rewards/accuracies": 0.875, "rewards/chosen": -0.013447960838675499, "rewards/margins": 0.08121615648269653, "rewards/rejected": -0.09466411918401718, "step": 1179 }, { "epoch": 0.8160442600276625, "grad_norm": 5.193739891052246, "learning_rate": 4.0802213001383126e-05, "log_odds_chosen": 1.9125900268554688, "log_odds_ratio": -0.5270196795463562, "logits/chosen": -0.29638856649398804, "logits/rejected": -0.3578253984451294, "logps/chosen": -0.17957088351249695, "logps/rejected": -0.4569128155708313, "loss": 4.2902, "nll_loss": 1.0198581218719482, "rewards/accuracies": 0.625, "rewards/chosen": -0.017957089468836784, "rewards/margins": 0.027734192088246346, "rewards/rejected": -0.04569128155708313, "step": 1180 }, { "epoch": 0.8167358229598893, "grad_norm": 5.224289894104004, "learning_rate": 4.083679114799447e-05, "log_odds_chosen": 2.933523654937744, "log_odds_ratio": -0.5448406934738159, "logits/chosen": -0.3297680616378784, "logits/rejected": -0.355973482131958, "logps/chosen": -0.0969846174120903, "logps/rejected": -0.5666028261184692, "loss": 3.8264, "nll_loss": 0.9021034836769104, "rewards/accuracies": 0.75, "rewards/chosen": -0.00969846174120903, "rewards/margins": 0.046961817890405655, "rewards/rejected": -0.056660279631614685, "step": 1181 }, { "epoch": 0.8174273858921162, "grad_norm": 3.4837048053741455, "learning_rate": 4.087136929460581e-05, "log_odds_chosen": 3.5890541076660156, "log_odds_ratio": -0.33530038595199585, "logits/chosen": 0.014912977814674377, "logits/rejected": 0.02515430748462677, "logps/chosen": -0.10411328077316284, "logps/rejected": -0.4772682189941406, "loss": 3.3812, "nll_loss": 0.811762809753418, "rewards/accuracies": 0.875, "rewards/chosen": -0.010411329567432404, "rewards/margins": 0.0373154915869236, "rewards/rejected": -0.0477268248796463, "step": 1182 }, { "epoch": 0.818118948824343, "grad_norm": 5.149781227111816, "learning_rate": 4.090594744121715e-05, "log_odds_chosen": 2.0537867546081543, "log_odds_ratio": -0.48002955317497253, "logits/chosen": -0.6776705980300903, "logits/rejected": -0.7077109813690186, "logps/chosen": -0.14051635563373566, "logps/rejected": -0.6583629846572876, "loss": 5.4693, "nll_loss": 1.3193252086639404, "rewards/accuracies": 0.75, "rewards/chosen": -0.01405163574963808, "rewards/margins": 0.051784664392471313, "rewards/rejected": -0.06583630293607712, "step": 1183 }, { "epoch": 0.8188105117565698, "grad_norm": 11.356409072875977, "learning_rate": 4.094052558782849e-05, "log_odds_chosen": 1.8903353214263916, "log_odds_ratio": -0.8911072611808777, "logits/chosen": -0.31842726469039917, "logits/rejected": -0.4309629201889038, "logps/chosen": -0.2517080307006836, "logps/rejected": -0.5393136739730835, "loss": 4.4805, "nll_loss": 1.0310028791427612, "rewards/accuracies": 0.625, "rewards/chosen": -0.02517080307006836, "rewards/margins": 0.02876056358218193, "rewards/rejected": -0.05393137037754059, "step": 1184 }, { "epoch": 0.8195020746887967, "grad_norm": 4.158552646636963, "learning_rate": 4.0975103734439834e-05, "log_odds_chosen": 1.3254547119140625, "log_odds_ratio": -0.41943395137786865, "logits/chosen": -0.2344731092453003, "logits/rejected": -0.25372275710105896, "logps/chosen": -0.095099076628685, "logps/rejected": -0.2825375199317932, "loss": 3.0469, "nll_loss": 0.719789981842041, "rewards/accuracies": 0.75, "rewards/chosen": -0.00950990803539753, "rewards/margins": 0.018743846565485, "rewards/rejected": -0.02825375273823738, "step": 1185 }, { "epoch": 0.8201936376210235, "grad_norm": 4.228912353515625, "learning_rate": 4.1009681881051176e-05, "log_odds_chosen": 1.841248631477356, "log_odds_ratio": -0.42090776562690735, "logits/chosen": -0.5038369297981262, "logits/rejected": -0.577272891998291, "logps/chosen": -0.3570842444896698, "logps/rejected": -0.5847877264022827, "loss": 4.0669, "nll_loss": 0.9746286869049072, "rewards/accuracies": 0.875, "rewards/chosen": -0.03570842370390892, "rewards/margins": 0.02277034893631935, "rewards/rejected": -0.05847877264022827, "step": 1186 }, { "epoch": 0.8208852005532503, "grad_norm": 3.153090238571167, "learning_rate": 4.104426002766252e-05, "log_odds_chosen": 2.0451748371124268, "log_odds_ratio": -0.3823314309120178, "logits/chosen": -0.6597636342048645, "logits/rejected": -0.6733765006065369, "logps/chosen": -0.09847903251647949, "logps/rejected": -0.280222624540329, "loss": 3.309, "nll_loss": 0.7890222072601318, "rewards/accuracies": 0.75, "rewards/chosen": -0.009847903624176979, "rewards/margins": 0.01817435771226883, "rewards/rejected": -0.02802225947380066, "step": 1187 }, { "epoch": 0.8215767634854771, "grad_norm": 4.841866970062256, "learning_rate": 4.107883817427386e-05, "log_odds_chosen": 1.6958292722702026, "log_odds_ratio": -0.3945018947124481, "logits/chosen": -0.5676984190940857, "logits/rejected": -0.5837230682373047, "logps/chosen": -0.08179913461208344, "logps/rejected": -0.3788478672504425, "loss": 3.481, "nll_loss": 0.8307971954345703, "rewards/accuracies": 0.75, "rewards/chosen": -0.008179914206266403, "rewards/margins": 0.029704870656132698, "rewards/rejected": -0.03788478672504425, "step": 1188 }, { "epoch": 0.822268326417704, "grad_norm": 4.054405689239502, "learning_rate": 4.11134163208852e-05, "log_odds_chosen": 2.147653818130493, "log_odds_ratio": -0.4769085645675659, "logits/chosen": -0.5489739179611206, "logits/rejected": -0.5161327123641968, "logps/chosen": -0.17612148821353912, "logps/rejected": -0.3499046266078949, "loss": 4.3547, "nll_loss": 1.0409873723983765, "rewards/accuracies": 0.75, "rewards/chosen": -0.017612148076295853, "rewards/margins": 0.017378315329551697, "rewards/rejected": -0.03499046340584755, "step": 1189 }, { "epoch": 0.8229598893499308, "grad_norm": 4.69357442855835, "learning_rate": 4.114799446749654e-05, "log_odds_chosen": 1.723274827003479, "log_odds_ratio": -0.33592188358306885, "logits/chosen": -0.20577648282051086, "logits/rejected": -0.2108028531074524, "logps/chosen": -0.11633970588445663, "logps/rejected": -0.30491918325424194, "loss": 2.9418, "nll_loss": 0.7018574476242065, "rewards/accuracies": 0.875, "rewards/chosen": -0.011633969843387604, "rewards/margins": 0.01885795034468174, "rewards/rejected": -0.030491922050714493, "step": 1190 }, { "epoch": 0.8236514522821576, "grad_norm": 6.797853946685791, "learning_rate": 4.118257261410788e-05, "log_odds_chosen": 2.888835906982422, "log_odds_ratio": -0.3923826813697815, "logits/chosen": -0.25407689809799194, "logits/rejected": -0.2665678858757019, "logps/chosen": -0.0858723446726799, "logps/rejected": -0.7471990585327148, "loss": 3.7509, "nll_loss": 0.8984828591346741, "rewards/accuracies": 0.75, "rewards/chosen": -0.008587234653532505, "rewards/margins": 0.06613267213106155, "rewards/rejected": -0.07471990585327148, "step": 1191 }, { "epoch": 0.8243430152143845, "grad_norm": 6.593566417694092, "learning_rate": 4.1217150760719225e-05, "log_odds_chosen": 2.051375150680542, "log_odds_ratio": -0.8040463924407959, "logits/chosen": -0.5109673738479614, "logits/rejected": -0.4858384430408478, "logps/chosen": -0.14845474064350128, "logps/rejected": -0.3342825770378113, "loss": 4.0657, "nll_loss": 0.9360308051109314, "rewards/accuracies": 0.625, "rewards/chosen": -0.014845474623143673, "rewards/margins": 0.018582783639431, "rewards/rejected": -0.03342825919389725, "step": 1192 }, { "epoch": 0.8250345781466113, "grad_norm": 6.4309892654418945, "learning_rate": 4.1251728907330567e-05, "log_odds_chosen": 1.1407532691955566, "log_odds_ratio": -0.4700060486793518, "logits/chosen": -0.5878971815109253, "logits/rejected": -0.6174403429031372, "logps/chosen": -0.09566640853881836, "logps/rejected": -0.34187084436416626, "loss": 4.4896, "nll_loss": 1.0753902196884155, "rewards/accuracies": 0.75, "rewards/chosen": -0.00956664141267538, "rewards/margins": 0.02462044358253479, "rewards/rejected": -0.03418708220124245, "step": 1193 }, { "epoch": 0.8257261410788381, "grad_norm": 3.923752784729004, "learning_rate": 4.128630705394191e-05, "log_odds_chosen": 4.222731113433838, "log_odds_ratio": -0.24456751346588135, "logits/chosen": -0.7110618352890015, "logits/rejected": -0.7427866458892822, "logps/chosen": -0.09104707837104797, "logps/rejected": -0.7605820894241333, "loss": 3.5788, "nll_loss": 0.8702382445335388, "rewards/accuracies": 0.875, "rewards/chosen": -0.009104708209633827, "rewards/margins": 0.06695350259542465, "rewards/rejected": -0.07605820894241333, "step": 1194 }, { "epoch": 0.826417704011065, "grad_norm": 4.21205472946167, "learning_rate": 4.132088520055325e-05, "log_odds_chosen": 4.177135944366455, "log_odds_ratio": -0.1285741925239563, "logits/chosen": -0.6350801587104797, "logits/rejected": -0.7226979732513428, "logps/chosen": -0.0693359375, "logps/rejected": -0.6927146315574646, "loss": 4.1323, "nll_loss": 1.0202298164367676, "rewards/accuracies": 1.0, "rewards/chosen": -0.0069335936568677425, "rewards/margins": 0.06233787164092064, "rewards/rejected": -0.06927146017551422, "step": 1195 }, { "epoch": 0.8271092669432918, "grad_norm": 4.5966105461120605, "learning_rate": 4.135546334716459e-05, "log_odds_chosen": 4.998106002807617, "log_odds_ratio": -0.09226921945810318, "logits/chosen": -0.345980167388916, "logits/rejected": -0.3663310110569, "logps/chosen": -0.04563899710774422, "logps/rejected": -0.8056957721710205, "loss": 3.638, "nll_loss": 0.9002783298492432, "rewards/accuracies": 1.0, "rewards/chosen": -0.004563899710774422, "rewards/margins": 0.07600568234920502, "rewards/rejected": -0.08056958019733429, "step": 1196 }, { "epoch": 0.8278008298755186, "grad_norm": 5.137960433959961, "learning_rate": 4.139004149377593e-05, "log_odds_chosen": 3.515726089477539, "log_odds_ratio": -0.2497054785490036, "logits/chosen": -0.6006046533584595, "logits/rejected": -0.5914702415466309, "logps/chosen": -0.1347048431634903, "logps/rejected": -0.7286680340766907, "loss": 3.5016, "nll_loss": 0.8504340648651123, "rewards/accuracies": 1.0, "rewards/chosen": -0.013470484875142574, "rewards/margins": 0.05939631909132004, "rewards/rejected": -0.07286680489778519, "step": 1197 }, { "epoch": 0.8284923928077456, "grad_norm": 7.066193103790283, "learning_rate": 4.142461964038728e-05, "log_odds_chosen": 2.3033316135406494, "log_odds_ratio": -0.28070491552352905, "logits/chosen": -0.29856306314468384, "logits/rejected": -0.32309019565582275, "logps/chosen": -0.09830452501773834, "logps/rejected": -0.35360878705978394, "loss": 3.6397, "nll_loss": 0.8818494081497192, "rewards/accuracies": 1.0, "rewards/chosen": -0.009830452501773834, "rewards/margins": 0.02553042583167553, "rewards/rejected": -0.035360876470804214, "step": 1198 }, { "epoch": 0.8291839557399724, "grad_norm": 6.882563591003418, "learning_rate": 4.145919778699862e-05, "log_odds_chosen": 3.472243309020996, "log_odds_ratio": -0.602714478969574, "logits/chosen": -0.27655768394470215, "logits/rejected": -0.3242935836315155, "logps/chosen": -0.3016278147697449, "logps/rejected": -0.7091034650802612, "loss": 3.4584, "nll_loss": 0.8043214678764343, "rewards/accuracies": 0.625, "rewards/chosen": -0.030162781476974487, "rewards/margins": 0.040747564285993576, "rewards/rejected": -0.07091034948825836, "step": 1199 }, { "epoch": 0.8298755186721992, "grad_norm": 5.0919013023376465, "learning_rate": 4.1493775933609964e-05, "log_odds_chosen": 2.8356375694274902, "log_odds_ratio": -0.49583184719085693, "logits/chosen": -0.5040990114212036, "logits/rejected": -0.5087206363677979, "logps/chosen": -0.18786540627479553, "logps/rejected": -0.3985045552253723, "loss": 4.3429, "nll_loss": 1.0361515283584595, "rewards/accuracies": 0.625, "rewards/chosen": -0.018786542117595673, "rewards/margins": 0.02106391452252865, "rewards/rejected": -0.03985045477747917, "step": 1200 }, { "epoch": 0.830567081604426, "grad_norm": 4.154881954193115, "learning_rate": 4.1528354080221306e-05, "log_odds_chosen": 2.0494563579559326, "log_odds_ratio": -0.46288198232650757, "logits/chosen": -0.6752278208732605, "logits/rejected": -0.7129898071289062, "logps/chosen": -0.1519179791212082, "logps/rejected": -0.5215798020362854, "loss": 4.4414, "nll_loss": 1.0640610456466675, "rewards/accuracies": 0.75, "rewards/chosen": -0.015191798098385334, "rewards/margins": 0.03696617856621742, "rewards/rejected": -0.05215797945857048, "step": 1201 }, { "epoch": 0.8312586445366529, "grad_norm": 6.282675743103027, "learning_rate": 4.156293222683265e-05, "log_odds_chosen": 1.862894058227539, "log_odds_ratio": -0.7544838786125183, "logits/chosen": -0.5247619152069092, "logits/rejected": -0.5509651899337769, "logps/chosen": -0.31346026062965393, "logps/rejected": -0.48862025141716003, "loss": 3.144, "nll_loss": 0.7105435132980347, "rewards/accuracies": 0.625, "rewards/chosen": -0.03134603053331375, "rewards/margins": 0.017515994608402252, "rewards/rejected": -0.048862025141716, "step": 1202 }, { "epoch": 0.8319502074688797, "grad_norm": 4.217153072357178, "learning_rate": 4.159751037344399e-05, "log_odds_chosen": 3.424147367477417, "log_odds_ratio": -0.4018994867801666, "logits/chosen": -0.4903850555419922, "logits/rejected": -0.4391288459300995, "logps/chosen": -0.13487458229064941, "logps/rejected": -0.5529087781906128, "loss": 3.0789, "nll_loss": 0.7295363545417786, "rewards/accuracies": 0.875, "rewards/chosen": -0.013487459160387516, "rewards/margins": 0.04180341958999634, "rewards/rejected": -0.05529087781906128, "step": 1203 }, { "epoch": 0.8326417704011065, "grad_norm": 4.596008777618408, "learning_rate": 4.163208852005533e-05, "log_odds_chosen": 5.339834213256836, "log_odds_ratio": -0.16341853141784668, "logits/chosen": -0.0553714781999588, "logits/rejected": -0.09893985092639923, "logps/chosen": -0.06320115923881531, "logps/rejected": -0.8495229482650757, "loss": 3.7488, "nll_loss": 0.9208630919456482, "rewards/accuracies": 0.875, "rewards/chosen": -0.006320116110146046, "rewards/margins": 0.0786321833729744, "rewards/rejected": -0.08495229482650757, "step": 1204 }, { "epoch": 0.8333333333333334, "grad_norm": 5.455927848815918, "learning_rate": 4.166666666666667e-05, "log_odds_chosen": 4.1446943283081055, "log_odds_ratio": -0.21529428660869598, "logits/chosen": -0.5013213753700256, "logits/rejected": -0.5345667004585266, "logps/chosen": -0.06707189232110977, "logps/rejected": -0.7442373037338257, "loss": 4.5042, "nll_loss": 1.1045323610305786, "rewards/accuracies": 0.875, "rewards/chosen": -0.006707189604640007, "rewards/margins": 0.06771654635667801, "rewards/rejected": -0.07442373782396317, "step": 1205 }, { "epoch": 0.8340248962655602, "grad_norm": 3.082413673400879, "learning_rate": 4.1701244813278014e-05, "log_odds_chosen": 4.069175720214844, "log_odds_ratio": -0.1851607710123062, "logits/chosen": -0.35634279251098633, "logits/rejected": -0.36941099166870117, "logps/chosen": -0.12784144282341003, "logps/rejected": -0.7375493049621582, "loss": 2.9003, "nll_loss": 0.7065519094467163, "rewards/accuracies": 1.0, "rewards/chosen": -0.012784144841134548, "rewards/margins": 0.060970790684223175, "rewards/rejected": -0.0737549364566803, "step": 1206 }, { "epoch": 0.834716459197787, "grad_norm": 7.601669788360596, "learning_rate": 4.1735822959889355e-05, "log_odds_chosen": 0.334051251411438, "log_odds_ratio": -1.0398133993148804, "logits/chosen": -0.4660201072692871, "logits/rejected": -0.48117709159851074, "logps/chosen": -0.1395268589258194, "logps/rejected": -0.13496336340904236, "loss": 5.0357, "nll_loss": 1.1549324989318848, "rewards/accuracies": 0.375, "rewards/chosen": -0.01395268552005291, "rewards/margins": -0.0004563478287309408, "rewards/rejected": -0.013496337458491325, "step": 1207 }, { "epoch": 0.8354080221300139, "grad_norm": 4.260904788970947, "learning_rate": 4.17704011065007e-05, "log_odds_chosen": 4.091191291809082, "log_odds_ratio": -0.24443864822387695, "logits/chosen": -0.08303473889827728, "logits/rejected": -0.11515428870916367, "logps/chosen": -0.04653660207986832, "logps/rejected": -0.5629119277000427, "loss": 3.546, "nll_loss": 0.8620575070381165, "rewards/accuracies": 0.75, "rewards/chosen": -0.004653660114854574, "rewards/margins": 0.05163753405213356, "rewards/rejected": -0.05629119649529457, "step": 1208 }, { "epoch": 0.8360995850622407, "grad_norm": 4.505746364593506, "learning_rate": 4.180497925311204e-05, "log_odds_chosen": 1.8157931566238403, "log_odds_ratio": -0.4644305109977722, "logits/chosen": -0.4604804813861847, "logits/rejected": -0.4551182687282562, "logps/chosen": -0.115799680352211, "logps/rejected": -0.3373379707336426, "loss": 3.9787, "nll_loss": 0.9482215642929077, "rewards/accuracies": 0.75, "rewards/chosen": -0.0115799680352211, "rewards/margins": 0.0221538282930851, "rewards/rejected": -0.0337337963283062, "step": 1209 }, { "epoch": 0.8367911479944675, "grad_norm": 4.050663948059082, "learning_rate": 4.183955739972338e-05, "log_odds_chosen": 1.7767119407653809, "log_odds_ratio": -0.5598441362380981, "logits/chosen": 0.06878723204135895, "logits/rejected": 0.024741366505622864, "logps/chosen": -0.1255115121603012, "logps/rejected": -0.39500701427459717, "loss": 3.7783, "nll_loss": 0.8885964155197144, "rewards/accuracies": 0.75, "rewards/chosen": -0.012551150284707546, "rewards/margins": 0.026949552819132805, "rewards/rejected": -0.039500705897808075, "step": 1210 }, { "epoch": 0.8374827109266944, "grad_norm": 5.5279221534729, "learning_rate": 4.187413554633472e-05, "log_odds_chosen": 2.310128927230835, "log_odds_ratio": -0.36447837948799133, "logits/chosen": -0.4867563545703888, "logits/rejected": -0.48124808073043823, "logps/chosen": -0.1323777288198471, "logps/rejected": -0.5982121229171753, "loss": 3.9506, "nll_loss": 0.9512027502059937, "rewards/accuracies": 0.875, "rewards/chosen": -0.013237773440778255, "rewards/margins": 0.04658343642950058, "rewards/rejected": -0.05982121080160141, "step": 1211 }, { "epoch": 0.8381742738589212, "grad_norm": 3.9652137756347656, "learning_rate": 4.190871369294606e-05, "log_odds_chosen": 4.73415994644165, "log_odds_ratio": -0.16974598169326782, "logits/chosen": -0.6232622861862183, "logits/rejected": -0.7259473204612732, "logps/chosen": -0.07471007108688354, "logps/rejected": -0.8358129858970642, "loss": 3.8503, "nll_loss": 0.9455909729003906, "rewards/accuracies": 1.0, "rewards/chosen": -0.007471007294952869, "rewards/margins": 0.07611028850078583, "rewards/rejected": -0.08358129858970642, "step": 1212 }, { "epoch": 0.838865836791148, "grad_norm": 4.576741695404053, "learning_rate": 4.1943291839557405e-05, "log_odds_chosen": 1.4289312362670898, "log_odds_ratio": -0.35713696479797363, "logits/chosen": -0.029139623045921326, "logits/rejected": -0.03655124455690384, "logps/chosen": -0.1455044150352478, "logps/rejected": -0.6169479489326477, "loss": 3.7704, "nll_loss": 0.906876802444458, "rewards/accuracies": 0.75, "rewards/chosen": -0.01455044187605381, "rewards/margins": 0.04714436084032059, "rewards/rejected": -0.06169480085372925, "step": 1213 }, { "epoch": 0.8395573997233748, "grad_norm": 5.6564483642578125, "learning_rate": 4.1977869986168746e-05, "log_odds_chosen": 1.9149457216262817, "log_odds_ratio": -0.5468197464942932, "logits/chosen": -0.6435422897338867, "logits/rejected": -0.6532891392707825, "logps/chosen": -0.13478265702724457, "logps/rejected": -0.4897097945213318, "loss": 4.3445, "nll_loss": 1.0314515829086304, "rewards/accuracies": 0.625, "rewards/chosen": -0.013478267006576061, "rewards/margins": 0.03549271076917648, "rewards/rejected": -0.04897098243236542, "step": 1214 }, { "epoch": 0.8402489626556017, "grad_norm": 3.8609166145324707, "learning_rate": 4.201244813278009e-05, "log_odds_chosen": 3.7545604705810547, "log_odds_ratio": -0.17774531245231628, "logits/chosen": -0.030912477523088455, "logits/rejected": -0.054544124752283096, "logps/chosen": -0.20503610372543335, "logps/rejected": -0.9788771867752075, "loss": 2.4621, "nll_loss": 0.5977532863616943, "rewards/accuracies": 1.0, "rewards/chosen": -0.020503612235188484, "rewards/margins": 0.0773840993642807, "rewards/rejected": -0.09788771718740463, "step": 1215 }, { "epoch": 0.8409405255878285, "grad_norm": 5.178175449371338, "learning_rate": 4.204702627939143e-05, "log_odds_chosen": 3.330840587615967, "log_odds_ratio": -0.45695760846138, "logits/chosen": -0.4083555340766907, "logits/rejected": -0.3598038852214813, "logps/chosen": -0.1588767021894455, "logps/rejected": -0.6059430241584778, "loss": 3.7545, "nll_loss": 0.8929381966590881, "rewards/accuracies": 0.75, "rewards/chosen": -0.01588767021894455, "rewards/margins": 0.044706642627716064, "rewards/rejected": -0.060594312846660614, "step": 1216 }, { "epoch": 0.8416320885200553, "grad_norm": 4.9921793937683105, "learning_rate": 4.208160442600277e-05, "log_odds_chosen": 3.022162437438965, "log_odds_ratio": -0.4056708812713623, "logits/chosen": -0.6954939365386963, "logits/rejected": -0.6824483275413513, "logps/chosen": -0.07887732237577438, "logps/rejected": -0.39871978759765625, "loss": 4.3949, "nll_loss": 1.0581598281860352, "rewards/accuracies": 0.625, "rewards/chosen": -0.007887732237577438, "rewards/margins": 0.031984247267246246, "rewards/rejected": -0.039871979504823685, "step": 1217 }, { "epoch": 0.8423236514522822, "grad_norm": 7.773573398590088, "learning_rate": 4.211618257261411e-05, "log_odds_chosen": 1.5267056226730347, "log_odds_ratio": -0.7812448740005493, "logits/chosen": -0.2832191288471222, "logits/rejected": -0.28594040870666504, "logps/chosen": -0.181090846657753, "logps/rejected": -0.46923351287841797, "loss": 4.4109, "nll_loss": 1.0245888233184814, "rewards/accuracies": 0.5, "rewards/chosen": -0.01810908503830433, "rewards/margins": 0.02881426364183426, "rewards/rejected": -0.046923354268074036, "step": 1218 }, { "epoch": 0.843015214384509, "grad_norm": 5.341215133666992, "learning_rate": 4.2150760719225454e-05, "log_odds_chosen": 2.800717353820801, "log_odds_ratio": -0.35339123010635376, "logits/chosen": -0.278065949678421, "logits/rejected": -0.24943991005420685, "logps/chosen": -0.10523054003715515, "logps/rejected": -0.4062395989894867, "loss": 3.6332, "nll_loss": 0.8729555606842041, "rewards/accuracies": 0.75, "rewards/chosen": -0.010523054748773575, "rewards/margins": 0.030100908130407333, "rewards/rejected": -0.04062396287918091, "step": 1219 }, { "epoch": 0.8437067773167358, "grad_norm": 6.229884624481201, "learning_rate": 4.2185338865836796e-05, "log_odds_chosen": 3.69413685798645, "log_odds_ratio": -0.267327219247818, "logits/chosen": -0.2839363217353821, "logits/rejected": -0.33933788537979126, "logps/chosen": -0.0874333381652832, "logps/rejected": -0.7815597653388977, "loss": 4.2217, "nll_loss": 1.0287011861801147, "rewards/accuracies": 0.75, "rewards/chosen": -0.00874333456158638, "rewards/margins": 0.06941264122724533, "rewards/rejected": -0.07815597206354141, "step": 1220 }, { "epoch": 0.8443983402489627, "grad_norm": 5.169261455535889, "learning_rate": 4.221991701244814e-05, "log_odds_chosen": 1.908298134803772, "log_odds_ratio": -0.32451504468917847, "logits/chosen": -0.8838093280792236, "logits/rejected": -0.8724334836006165, "logps/chosen": -0.18538136780261993, "logps/rejected": -0.5294378995895386, "loss": 5.8003, "nll_loss": 1.417634129524231, "rewards/accuracies": 0.875, "rewards/chosen": -0.018538137897849083, "rewards/margins": 0.034405652433633804, "rewards/rejected": -0.05294378846883774, "step": 1221 }, { "epoch": 0.8450899031811895, "grad_norm": 7.863767623901367, "learning_rate": 4.225449515905948e-05, "log_odds_chosen": 2.2556004524230957, "log_odds_ratio": -0.3413243889808655, "logits/chosen": -0.6394181251525879, "logits/rejected": -0.670673668384552, "logps/chosen": -0.14786472916603088, "logps/rejected": -0.6458082795143127, "loss": 4.1597, "nll_loss": 1.0057915449142456, "rewards/accuracies": 0.875, "rewards/chosen": -0.014786472544074059, "rewards/margins": 0.049794360995292664, "rewards/rejected": -0.06458082795143127, "step": 1222 }, { "epoch": 0.8457814661134163, "grad_norm": 5.6627936363220215, "learning_rate": 4.228907330567082e-05, "log_odds_chosen": 4.577823162078857, "log_odds_ratio": -0.4264640808105469, "logits/chosen": -0.3070671260356903, "logits/rejected": -0.27830278873443604, "logps/chosen": -0.05938584357500076, "logps/rejected": -0.8323963284492493, "loss": 2.6459, "nll_loss": 0.6188327670097351, "rewards/accuracies": 0.875, "rewards/chosen": -0.005938584450632334, "rewards/margins": 0.07730104774236679, "rewards/rejected": -0.08323963731527328, "step": 1223 }, { "epoch": 0.8464730290456431, "grad_norm": 3.9208574295043945, "learning_rate": 4.232365145228216e-05, "log_odds_chosen": 4.880911827087402, "log_odds_ratio": -0.2090207040309906, "logits/chosen": -0.39382532238960266, "logits/rejected": -0.4027000665664673, "logps/chosen": -0.08941195905208588, "logps/rejected": -0.6881061792373657, "loss": 3.1003, "nll_loss": 0.7541638612747192, "rewards/accuracies": 0.875, "rewards/chosen": -0.008941195905208588, "rewards/margins": 0.059869423508644104, "rewards/rejected": -0.06881061941385269, "step": 1224 }, { "epoch": 0.84716459197787, "grad_norm": 4.687501430511475, "learning_rate": 4.23582295988935e-05, "log_odds_chosen": 3.7448058128356934, "log_odds_ratio": -0.3105209469795227, "logits/chosen": -0.4358472228050232, "logits/rejected": -0.42677658796310425, "logps/chosen": -0.08816111832857132, "logps/rejected": -0.5220236778259277, "loss": 4.3466, "nll_loss": 1.0556085109710693, "rewards/accuracies": 0.75, "rewards/chosen": -0.008816111832857132, "rewards/margins": 0.04338625445961952, "rewards/rejected": -0.052202366292476654, "step": 1225 }, { "epoch": 0.8478561549100968, "grad_norm": 4.883336067199707, "learning_rate": 4.2392807745504845e-05, "log_odds_chosen": 2.8365492820739746, "log_odds_ratio": -0.33779749274253845, "logits/chosen": -0.3609054386615753, "logits/rejected": -0.39662984013557434, "logps/chosen": -0.20170946419239044, "logps/rejected": -0.7530872821807861, "loss": 4.0256, "nll_loss": 0.9726265072822571, "rewards/accuracies": 0.75, "rewards/chosen": -0.020170947536826134, "rewards/margins": 0.05513777956366539, "rewards/rejected": -0.07530872523784637, "step": 1226 }, { "epoch": 0.8485477178423236, "grad_norm": 3.9957022666931152, "learning_rate": 4.2427385892116186e-05, "log_odds_chosen": 1.8730344772338867, "log_odds_ratio": -0.26740092039108276, "logits/chosen": -0.2765858769416809, "logits/rejected": -0.306190550327301, "logps/chosen": -0.09251527488231659, "logps/rejected": -0.4008828401565552, "loss": 3.7071, "nll_loss": 0.9000409245491028, "rewards/accuracies": 0.875, "rewards/chosen": -0.009251527488231659, "rewards/margins": 0.03083675727248192, "rewards/rejected": -0.04008828476071358, "step": 1227 }, { "epoch": 0.8492392807745505, "grad_norm": 5.592484951019287, "learning_rate": 4.246196403872753e-05, "log_odds_chosen": 2.1070966720581055, "log_odds_ratio": -0.45303577184677124, "logits/chosen": -0.2678883373737335, "logits/rejected": -0.3001551628112793, "logps/chosen": -0.0664995014667511, "logps/rejected": -0.4867702126502991, "loss": 4.9924, "nll_loss": 1.2028013467788696, "rewards/accuracies": 0.75, "rewards/chosen": -0.006649950053542852, "rewards/margins": 0.042027074843645096, "rewards/rejected": -0.04867701977491379, "step": 1228 }, { "epoch": 0.8499308437067773, "grad_norm": 3.897392988204956, "learning_rate": 4.249654218533887e-05, "log_odds_chosen": 3.5367512702941895, "log_odds_ratio": -0.3471705913543701, "logits/chosen": -0.1914403736591339, "logits/rejected": -0.2182171642780304, "logps/chosen": -0.0768127590417862, "logps/rejected": -0.5251092314720154, "loss": 2.98, "nll_loss": 0.7102901935577393, "rewards/accuracies": 0.875, "rewards/chosen": -0.0076812757179141045, "rewards/margins": 0.04482964053750038, "rewards/rejected": -0.05251092091202736, "step": 1229 }, { "epoch": 0.8506224066390041, "grad_norm": 6.20721435546875, "learning_rate": 4.253112033195021e-05, "log_odds_chosen": 1.3702822923660278, "log_odds_ratio": -0.45010262727737427, "logits/chosen": -0.7466237545013428, "logits/rejected": -0.7181574702262878, "logps/chosen": -0.14990904927253723, "logps/rejected": -0.45504340529441833, "loss": 5.2804, "nll_loss": 1.2751015424728394, "rewards/accuracies": 0.875, "rewards/chosen": -0.014990905299782753, "rewards/margins": 0.03051343560218811, "rewards/rejected": -0.04550434276461601, "step": 1230 }, { "epoch": 0.851313969571231, "grad_norm": 5.15730619430542, "learning_rate": 4.256569847856155e-05, "log_odds_chosen": 3.9428796768188477, "log_odds_ratio": -0.23902566730976105, "logits/chosen": -0.10978386551141739, "logits/rejected": -0.1629532277584076, "logps/chosen": -0.10805842280387878, "logps/rejected": -0.7425420880317688, "loss": 2.8282, "nll_loss": 0.6831561923027039, "rewards/accuracies": 1.0, "rewards/chosen": -0.010805842466652393, "rewards/margins": 0.06344836950302124, "rewards/rejected": -0.07425420731306076, "step": 1231 }, { "epoch": 0.8520055325034578, "grad_norm": 5.834319591522217, "learning_rate": 4.2600276625172894e-05, "log_odds_chosen": 3.8613672256469727, "log_odds_ratio": -0.2609718143939972, "logits/chosen": -0.2552832365036011, "logits/rejected": -0.30748167634010315, "logps/chosen": -0.10061614215373993, "logps/rejected": -0.9150368571281433, "loss": 3.4123, "nll_loss": 0.8269892930984497, "rewards/accuracies": 0.875, "rewards/chosen": -0.010061614215373993, "rewards/margins": 0.08144207298755646, "rewards/rejected": -0.09150368720293045, "step": 1232 }, { "epoch": 0.8526970954356846, "grad_norm": 5.265030860900879, "learning_rate": 4.2634854771784236e-05, "log_odds_chosen": 3.142730236053467, "log_odds_ratio": -0.35933810472488403, "logits/chosen": -0.662431001663208, "logits/rejected": -0.7470867037773132, "logps/chosen": -0.06795337051153183, "logps/rejected": -0.4906230568885803, "loss": 3.967, "nll_loss": 0.9558151364326477, "rewards/accuracies": 0.75, "rewards/chosen": -0.006795337423682213, "rewards/margins": 0.04226697236299515, "rewards/rejected": -0.04906231164932251, "step": 1233 }, { "epoch": 0.8533886583679114, "grad_norm": 3.716066360473633, "learning_rate": 4.266943291839558e-05, "log_odds_chosen": 2.2964534759521484, "log_odds_ratio": -0.5118023157119751, "logits/chosen": -0.397636353969574, "logits/rejected": -0.4446547031402588, "logps/chosen": -0.19349884986877441, "logps/rejected": -0.4441201090812683, "loss": 3.864, "nll_loss": 0.9148226976394653, "rewards/accuracies": 0.625, "rewards/chosen": -0.01934988610446453, "rewards/margins": 0.02506212517619133, "rewards/rejected": -0.04441201686859131, "step": 1234 }, { "epoch": 0.8540802213001383, "grad_norm": 5.476595401763916, "learning_rate": 4.270401106500692e-05, "log_odds_chosen": 3.079970598220825, "log_odds_ratio": -0.22505627572536469, "logits/chosen": -0.44705498218536377, "logits/rejected": -0.4541969299316406, "logps/chosen": -0.08623314648866653, "logps/rejected": -0.7812601327896118, "loss": 4.4526, "nll_loss": 1.090645670890808, "rewards/accuracies": 0.875, "rewards/chosen": -0.008623314090073109, "rewards/margins": 0.06950270384550095, "rewards/rejected": -0.07812602072954178, "step": 1235 }, { "epoch": 0.8547717842323651, "grad_norm": 4.144224166870117, "learning_rate": 4.273858921161826e-05, "log_odds_chosen": 3.158641815185547, "log_odds_ratio": -0.24196459352970123, "logits/chosen": -0.33807045221328735, "logits/rejected": -0.30896690487861633, "logps/chosen": -0.12600083649158478, "logps/rejected": -0.7073169946670532, "loss": 3.5475, "nll_loss": 0.8626745343208313, "rewards/accuracies": 1.0, "rewards/chosen": -0.012600085698068142, "rewards/margins": 0.058131616562604904, "rewards/rejected": -0.07073169946670532, "step": 1236 }, { "epoch": 0.8554633471645919, "grad_norm": 4.101622581481934, "learning_rate": 4.27731673582296e-05, "log_odds_chosen": 4.937752723693848, "log_odds_ratio": -0.21346403658390045, "logits/chosen": -0.29906758666038513, "logits/rejected": -0.35858187079429626, "logps/chosen": -0.09986451268196106, "logps/rejected": -0.7788125872612, "loss": 3.6658, "nll_loss": 0.895104706287384, "rewards/accuracies": 0.875, "rewards/chosen": -0.00998645182698965, "rewards/margins": 0.06789480149745941, "rewards/rejected": -0.07788125425577164, "step": 1237 }, { "epoch": 0.8561549100968188, "grad_norm": 10.665369987487793, "learning_rate": 4.2807745504840944e-05, "log_odds_chosen": 0.5083409547805786, "log_odds_ratio": -0.9532041549682617, "logits/chosen": -0.5494678020477295, "logits/rejected": -0.549345850944519, "logps/chosen": -0.2034616321325302, "logps/rejected": -0.4447656571865082, "loss": 5.7725, "nll_loss": 1.3478106260299683, "rewards/accuracies": 0.75, "rewards/chosen": -0.02034616470336914, "rewards/margins": 0.024130402132868767, "rewards/rejected": -0.04447656869888306, "step": 1238 }, { "epoch": 0.8568464730290456, "grad_norm": 4.152436256408691, "learning_rate": 4.2842323651452285e-05, "log_odds_chosen": 3.5537776947021484, "log_odds_ratio": -0.4564933776855469, "logits/chosen": -0.6331797242164612, "logits/rejected": -0.64528888463974, "logps/chosen": -0.11556783318519592, "logps/rejected": -0.4677344262599945, "loss": 3.8412, "nll_loss": 0.9146407842636108, "rewards/accuracies": 0.875, "rewards/chosen": -0.011556783691048622, "rewards/margins": 0.03521666303277016, "rewards/rejected": -0.04677344113588333, "step": 1239 }, { "epoch": 0.8575380359612724, "grad_norm": 4.356481552124023, "learning_rate": 4.287690179806363e-05, "log_odds_chosen": 2.6086325645446777, "log_odds_ratio": -0.24501118063926697, "logits/chosen": -0.3237851858139038, "logits/rejected": -0.3254019618034363, "logps/chosen": -0.10340925306081772, "logps/rejected": -0.3051115870475769, "loss": 3.9994, "nll_loss": 0.9753445386886597, "rewards/accuracies": 1.0, "rewards/chosen": -0.010340925306081772, "rewards/margins": 0.02017023414373398, "rewards/rejected": -0.03051115944981575, "step": 1240 }, { "epoch": 0.8582295988934993, "grad_norm": 3.881535530090332, "learning_rate": 4.291147994467496e-05, "log_odds_chosen": 3.739959716796875, "log_odds_ratio": -0.20326995849609375, "logits/chosen": -0.5520289540290833, "logits/rejected": -0.5822651386260986, "logps/chosen": -0.13151343166828156, "logps/rejected": -0.7232505679130554, "loss": 3.8902, "nll_loss": 0.9522209167480469, "rewards/accuracies": 1.0, "rewards/chosen": -0.013151343911886215, "rewards/margins": 0.059173714369535446, "rewards/rejected": -0.07232505828142166, "step": 1241 }, { "epoch": 0.8589211618257261, "grad_norm": 5.007400989532471, "learning_rate": 4.29460580912863e-05, "log_odds_chosen": 0.6331380009651184, "log_odds_ratio": -0.6973768472671509, "logits/chosen": -0.3870214521884918, "logits/rejected": -0.386497437953949, "logps/chosen": -0.23055224120616913, "logps/rejected": -0.3669086992740631, "loss": 4.5481, "nll_loss": 1.0672754049301147, "rewards/accuracies": 0.625, "rewards/chosen": -0.023055225610733032, "rewards/margins": 0.013635647483170033, "rewards/rejected": -0.03669087216258049, "step": 1242 }, { "epoch": 0.859612724757953, "grad_norm": 4.997777462005615, "learning_rate": 4.298063623789765e-05, "log_odds_chosen": 2.6052985191345215, "log_odds_ratio": -0.16318482160568237, "logits/chosen": -0.5671769380569458, "logits/rejected": -0.607761800289154, "logps/chosen": -0.06360312551259995, "logps/rejected": -0.5905706286430359, "loss": 4.4011, "nll_loss": 1.0839494466781616, "rewards/accuracies": 1.0, "rewards/chosen": -0.006360312458127737, "rewards/margins": 0.052696749567985535, "rewards/rejected": -0.05905706062912941, "step": 1243 }, { "epoch": 0.8603042876901799, "grad_norm": 3.7530622482299805, "learning_rate": 4.301521438450899e-05, "log_odds_chosen": 3.010711669921875, "log_odds_ratio": -0.33168599009513855, "logits/chosen": -0.6687760353088379, "logits/rejected": -0.7112399339675903, "logps/chosen": -0.12361060082912445, "logps/rejected": -0.4488832950592041, "loss": 3.901, "nll_loss": 0.9420774579048157, "rewards/accuracies": 0.875, "rewards/chosen": -0.01236105989664793, "rewards/margins": 0.032527267932891846, "rewards/rejected": -0.04488833248615265, "step": 1244 }, { "epoch": 0.8609958506224067, "grad_norm": 3.3816781044006348, "learning_rate": 4.3049792531120335e-05, "log_odds_chosen": 5.055641174316406, "log_odds_ratio": -0.3109224736690521, "logits/chosen": -0.47822022438049316, "logits/rejected": -0.48512572050094604, "logps/chosen": -0.06640556454658508, "logps/rejected": -0.7292112708091736, "loss": 2.9014, "nll_loss": 0.6942633986473083, "rewards/accuracies": 0.875, "rewards/chosen": -0.006640556268393993, "rewards/margins": 0.06628057360649109, "rewards/rejected": -0.07292113453149796, "step": 1245 }, { "epoch": 0.8616874135546335, "grad_norm": 4.799623489379883, "learning_rate": 4.3084370677731676e-05, "log_odds_chosen": 3.1673483848571777, "log_odds_ratio": -0.23026300966739655, "logits/chosen": -0.5659746527671814, "logits/rejected": -0.6104631423950195, "logps/chosen": -0.1196167916059494, "logps/rejected": -0.6679284572601318, "loss": 3.9385, "nll_loss": 0.9616028666496277, "rewards/accuracies": 0.875, "rewards/chosen": -0.011961679905653, "rewards/margins": 0.05483117699623108, "rewards/rejected": -0.06679285317659378, "step": 1246 }, { "epoch": 0.8623789764868603, "grad_norm": 4.420276165008545, "learning_rate": 4.311894882434302e-05, "log_odds_chosen": 2.7055413722991943, "log_odds_ratio": -0.4239078760147095, "logits/chosen": -0.6248461604118347, "logits/rejected": -0.5993098616600037, "logps/chosen": -0.15310505032539368, "logps/rejected": -0.5843885540962219, "loss": 3.6267, "nll_loss": 0.8642741441726685, "rewards/accuracies": 0.75, "rewards/chosen": -0.015310506336390972, "rewards/margins": 0.043128348886966705, "rewards/rejected": -0.05843885987997055, "step": 1247 }, { "epoch": 0.8630705394190872, "grad_norm": 5.892736911773682, "learning_rate": 4.315352697095436e-05, "log_odds_chosen": 0.3708970546722412, "log_odds_ratio": -0.6783088445663452, "logits/chosen": -0.5890284180641174, "logits/rejected": -0.6571257710456848, "logps/chosen": -0.2017858624458313, "logps/rejected": -0.2839735746383667, "loss": 4.9916, "nll_loss": 1.1800813674926758, "rewards/accuracies": 0.625, "rewards/chosen": -0.02017858810722828, "rewards/margins": 0.008218769915401936, "rewards/rejected": -0.02839735709130764, "step": 1248 }, { "epoch": 0.863762102351314, "grad_norm": 4.3697614669799805, "learning_rate": 4.31881051175657e-05, "log_odds_chosen": 3.982241153717041, "log_odds_ratio": -0.3116607069969177, "logits/chosen": -0.20677393674850464, "logits/rejected": -0.2635921537876129, "logps/chosen": -0.052336186170578, "logps/rejected": -0.4851013123989105, "loss": 3.9766, "nll_loss": 0.9629923105239868, "rewards/accuracies": 0.75, "rewards/chosen": -0.005233618896454573, "rewards/margins": 0.04327651113271713, "rewards/rejected": -0.04851013422012329, "step": 1249 }, { "epoch": 0.8644536652835408, "grad_norm": 5.986281394958496, "learning_rate": 4.322268326417704e-05, "log_odds_chosen": 3.4355010986328125, "log_odds_ratio": -0.6999623775482178, "logits/chosen": -0.4995029866695404, "logits/rejected": -0.517444372177124, "logps/chosen": -0.18629102408885956, "logps/rejected": -0.836786687374115, "loss": 4.1068, "nll_loss": 0.9566999673843384, "rewards/accuracies": 0.875, "rewards/chosen": -0.018629100173711777, "rewards/margins": 0.06504955887794495, "rewards/rejected": -0.08367866277694702, "step": 1250 }, { "epoch": 0.8651452282157677, "grad_norm": 5.7515363693237305, "learning_rate": 4.3257261410788384e-05, "log_odds_chosen": 1.052149772644043, "log_odds_ratio": -0.4477373957633972, "logits/chosen": -0.34615832567214966, "logits/rejected": -0.34621891379356384, "logps/chosen": -0.11608318239450455, "logps/rejected": -0.31631356477737427, "loss": 4.7191, "nll_loss": 1.1350071430206299, "rewards/accuracies": 0.75, "rewards/chosen": -0.011608317494392395, "rewards/margins": 0.020023038610816002, "rewards/rejected": -0.031631357967853546, "step": 1251 }, { "epoch": 0.8658367911479945, "grad_norm": 5.770718574523926, "learning_rate": 4.3291839557399726e-05, "log_odds_chosen": 1.1486060619354248, "log_odds_ratio": -0.5084373950958252, "logits/chosen": -0.21859028935432434, "logits/rejected": -0.2105821967124939, "logps/chosen": -0.16546833515167236, "logps/rejected": -0.3545966148376465, "loss": 4.3302, "nll_loss": 1.0317028760910034, "rewards/accuracies": 0.75, "rewards/chosen": -0.016546836122870445, "rewards/margins": 0.018912825733423233, "rewards/rejected": -0.03545965999364853, "step": 1252 }, { "epoch": 0.8665283540802213, "grad_norm": 4.072329998016357, "learning_rate": 4.332641770401107e-05, "log_odds_chosen": 4.862940788269043, "log_odds_ratio": -0.339602530002594, "logits/chosen": 0.11724947392940521, "logits/rejected": 0.12245957553386688, "logps/chosen": -0.05847052484750748, "logps/rejected": -0.5670610070228577, "loss": 2.1563, "nll_loss": 0.5051190853118896, "rewards/accuracies": 0.875, "rewards/chosen": -0.005847052205353975, "rewards/margins": 0.05085904896259308, "rewards/rejected": -0.05670610070228577, "step": 1253 }, { "epoch": 0.8672199170124482, "grad_norm": 3.4656217098236084, "learning_rate": 4.336099585062241e-05, "log_odds_chosen": 3.4801251888275146, "log_odds_ratio": -0.31768786907196045, "logits/chosen": -0.5136781930923462, "logits/rejected": -0.5579401850700378, "logps/chosen": -0.18964815139770508, "logps/rejected": -0.8065962791442871, "loss": 3.0265, "nll_loss": 0.7248650193214417, "rewards/accuracies": 0.875, "rewards/chosen": -0.01896481402218342, "rewards/margins": 0.06169482320547104, "rewards/rejected": -0.08065963536500931, "step": 1254 }, { "epoch": 0.867911479944675, "grad_norm": 7.574423313140869, "learning_rate": 4.339557399723375e-05, "log_odds_chosen": 2.4200801849365234, "log_odds_ratio": -0.5579121112823486, "logits/chosen": -0.8971307873725891, "logits/rejected": -0.8899210691452026, "logps/chosen": -0.07807845622301102, "logps/rejected": -0.6198257207870483, "loss": 3.4674, "nll_loss": 0.8110649585723877, "rewards/accuracies": 0.625, "rewards/chosen": -0.0078078461810946465, "rewards/margins": 0.054174721240997314, "rewards/rejected": -0.061982572078704834, "step": 1255 }, { "epoch": 0.8686030428769018, "grad_norm": 2.9485676288604736, "learning_rate": 4.343015214384509e-05, "log_odds_chosen": 5.759621620178223, "log_odds_ratio": -0.08616334944963455, "logits/chosen": -0.1814260631799698, "logits/rejected": -0.18436534702777863, "logps/chosen": -0.032411232590675354, "logps/rejected": -0.5909621119499207, "loss": 2.5601, "nll_loss": 0.6313992142677307, "rewards/accuracies": 1.0, "rewards/chosen": -0.003241123864427209, "rewards/margins": 0.05585508793592453, "rewards/rejected": -0.059096213430166245, "step": 1256 }, { "epoch": 0.8692946058091287, "grad_norm": 5.415265083312988, "learning_rate": 4.346473029045643e-05, "log_odds_chosen": 2.480560779571533, "log_odds_ratio": -0.4247099459171295, "logits/chosen": -0.4166119694709778, "logits/rejected": -0.4697090685367584, "logps/chosen": -0.14666330814361572, "logps/rejected": -0.5061662793159485, "loss": 4.6707, "nll_loss": 1.1251946687698364, "rewards/accuracies": 0.75, "rewards/chosen": -0.014666330069303513, "rewards/margins": 0.03595029562711716, "rewards/rejected": -0.05061662942171097, "step": 1257 }, { "epoch": 0.8699861687413555, "grad_norm": 3.902346611022949, "learning_rate": 4.3499308437067775e-05, "log_odds_chosen": 2.743802070617676, "log_odds_ratio": -0.3696817457675934, "logits/chosen": -0.5543731451034546, "logits/rejected": -0.5735875368118286, "logps/chosen": -0.08180180191993713, "logps/rejected": -0.4036181569099426, "loss": 3.124, "nll_loss": 0.7440320253372192, "rewards/accuracies": 0.75, "rewards/chosen": -0.008180180564522743, "rewards/margins": 0.03218163549900055, "rewards/rejected": -0.04036181420087814, "step": 1258 }, { "epoch": 0.8706777316735823, "grad_norm": 5.200778961181641, "learning_rate": 4.3533886583679116e-05, "log_odds_chosen": 0.7463172078132629, "log_odds_ratio": -0.7367535829544067, "logits/chosen": -0.5876007676124573, "logits/rejected": -0.5731717944145203, "logps/chosen": -0.2275894433259964, "logps/rejected": -0.3888647258281708, "loss": 3.7748, "nll_loss": 0.8700259923934937, "rewards/accuracies": 0.5, "rewards/chosen": -0.02275894396007061, "rewards/margins": 0.016127530485391617, "rewards/rejected": -0.03888647258281708, "step": 1259 }, { "epoch": 0.8713692946058091, "grad_norm": 6.475817680358887, "learning_rate": 4.356846473029046e-05, "log_odds_chosen": 0.4220742881298065, "log_odds_ratio": -0.5629880428314209, "logits/chosen": -0.4277142286300659, "logits/rejected": -0.4545121192932129, "logps/chosen": -0.16203731298446655, "logps/rejected": -0.28819459676742554, "loss": 5.9425, "nll_loss": 1.4293360710144043, "rewards/accuracies": 0.75, "rewards/chosen": -0.016203733161091805, "rewards/margins": 0.012615729123353958, "rewards/rejected": -0.028819462284445763, "step": 1260 }, { "epoch": 0.872060857538036, "grad_norm": 3.48825740814209, "learning_rate": 4.36030428769018e-05, "log_odds_chosen": 4.7352705001831055, "log_odds_ratio": -0.20084872841835022, "logits/chosen": -0.5206946134567261, "logits/rejected": -0.561223030090332, "logps/chosen": -0.06274496763944626, "logps/rejected": -0.6954832077026367, "loss": 3.657, "nll_loss": 0.8941764235496521, "rewards/accuracies": 0.875, "rewards/chosen": -0.006274497136473656, "rewards/margins": 0.0632738322019577, "rewards/rejected": -0.06954832375049591, "step": 1261 }, { "epoch": 0.8727524204702628, "grad_norm": 4.392824649810791, "learning_rate": 4.363762102351314e-05, "log_odds_chosen": 1.6250078678131104, "log_odds_ratio": -0.30818629264831543, "logits/chosen": -0.6393093466758728, "logits/rejected": -0.6915218830108643, "logps/chosen": -0.08740460127592087, "logps/rejected": -0.2715250849723816, "loss": 4.279, "nll_loss": 1.038927435874939, "rewards/accuracies": 0.875, "rewards/chosen": -0.008740460500121117, "rewards/margins": 0.018412049859762192, "rewards/rejected": -0.02715251035988331, "step": 1262 }, { "epoch": 0.8734439834024896, "grad_norm": 7.467405796051025, "learning_rate": 4.367219917012448e-05, "log_odds_chosen": 2.354684829711914, "log_odds_ratio": -0.36135029792785645, "logits/chosen": -0.4317726492881775, "logits/rejected": -0.49516892433166504, "logps/chosen": -0.13952602446079254, "logps/rejected": -0.6113516092300415, "loss": 4.828, "nll_loss": 1.170866847038269, "rewards/accuracies": 0.75, "rewards/chosen": -0.013952603563666344, "rewards/margins": 0.04718255624175072, "rewards/rejected": -0.06113515794277191, "step": 1263 }, { "epoch": 0.8741355463347165, "grad_norm": 4.659549236297607, "learning_rate": 4.3706777316735824e-05, "log_odds_chosen": 3.3404979705810547, "log_odds_ratio": -0.2198869287967682, "logits/chosen": -0.44002625346183777, "logits/rejected": -0.5099364519119263, "logps/chosen": -0.07723425328731537, "logps/rejected": -0.7068568468093872, "loss": 3.4741, "nll_loss": 0.8465284109115601, "rewards/accuracies": 0.875, "rewards/chosen": -0.0077234250493347645, "rewards/margins": 0.06296226382255554, "rewards/rejected": -0.07068568468093872, "step": 1264 }, { "epoch": 0.8748271092669433, "grad_norm": 3.586311101913452, "learning_rate": 4.3741355463347166e-05, "log_odds_chosen": 2.737506151199341, "log_odds_ratio": -0.3485250771045685, "logits/chosen": -0.48119568824768066, "logits/rejected": -0.4383758306503296, "logps/chosen": -0.12813690304756165, "logps/rejected": -0.4432229995727539, "loss": 3.5006, "nll_loss": 0.8403025269508362, "rewards/accuracies": 0.875, "rewards/chosen": -0.012813691049814224, "rewards/margins": 0.031508613377809525, "rewards/rejected": -0.04432230070233345, "step": 1265 }, { "epoch": 0.8755186721991701, "grad_norm": 5.795801639556885, "learning_rate": 4.377593360995851e-05, "log_odds_chosen": 1.0400015115737915, "log_odds_ratio": -0.7847639322280884, "logits/chosen": -0.7246870994567871, "logits/rejected": -0.7208892107009888, "logps/chosen": -0.1682136058807373, "logps/rejected": -0.30135980248451233, "loss": 5.2164, "nll_loss": 1.2256345748901367, "rewards/accuracies": 0.75, "rewards/chosen": -0.01682136207818985, "rewards/margins": 0.013314621523022652, "rewards/rejected": -0.030135981738567352, "step": 1266 }, { "epoch": 0.876210235131397, "grad_norm": 4.90360164642334, "learning_rate": 4.381051175656985e-05, "log_odds_chosen": 1.7198656797409058, "log_odds_ratio": -0.39128538966178894, "logits/chosen": -0.7646574974060059, "logits/rejected": -0.8175445795059204, "logps/chosen": -0.1509498655796051, "logps/rejected": -0.6070072650909424, "loss": 4.1454, "nll_loss": 0.9972254037857056, "rewards/accuracies": 0.75, "rewards/chosen": -0.01509498618543148, "rewards/margins": 0.04560573399066925, "rewards/rejected": -0.06070072203874588, "step": 1267 }, { "epoch": 0.8769017980636238, "grad_norm": 4.177268028259277, "learning_rate": 4.384508990318119e-05, "log_odds_chosen": 0.8822661638259888, "log_odds_ratio": -0.5075497627258301, "logits/chosen": -0.3998650908470154, "logits/rejected": -0.40977737307548523, "logps/chosen": -0.173186793923378, "logps/rejected": -0.38849419355392456, "loss": 3.9601, "nll_loss": 0.9392576217651367, "rewards/accuracies": 0.625, "rewards/chosen": -0.01731867901980877, "rewards/margins": 0.021530739963054657, "rewards/rejected": -0.03884941712021828, "step": 1268 }, { "epoch": 0.8775933609958506, "grad_norm": 2.707632303237915, "learning_rate": 4.387966804979253e-05, "log_odds_chosen": 1.4121384620666504, "log_odds_ratio": -0.3443738520145416, "logits/chosen": -0.6001982092857361, "logits/rejected": -0.606619119644165, "logps/chosen": -0.09227811545133591, "logps/rejected": -0.4715130925178528, "loss": 3.0712, "nll_loss": 0.7333630919456482, "rewards/accuracies": 0.75, "rewards/chosen": -0.009227811358869076, "rewards/margins": 0.03792349994182587, "rewards/rejected": -0.04715131223201752, "step": 1269 }, { "epoch": 0.8782849239280774, "grad_norm": 5.621554374694824, "learning_rate": 4.3914246196403874e-05, "log_odds_chosen": 2.2095069885253906, "log_odds_ratio": -0.3407396078109741, "logits/chosen": -0.4042474031448364, "logits/rejected": -0.461988240480423, "logps/chosen": -0.0528687983751297, "logps/rejected": -0.4888181686401367, "loss": 4.8491, "nll_loss": 1.1781988143920898, "rewards/accuracies": 0.75, "rewards/chosen": -0.005286880303174257, "rewards/margins": 0.04359494149684906, "rewards/rejected": -0.04888181760907173, "step": 1270 }, { "epoch": 0.8789764868603043, "grad_norm": 4.035280704498291, "learning_rate": 4.3948824343015215e-05, "log_odds_chosen": 1.842458963394165, "log_odds_ratio": -0.353007972240448, "logits/chosen": -0.1685333549976349, "logits/rejected": -0.2191891223192215, "logps/chosen": -0.09284445643424988, "logps/rejected": -0.5506631135940552, "loss": 3.2703, "nll_loss": 0.782278835773468, "rewards/accuracies": 0.875, "rewards/chosen": -0.009284445084631443, "rewards/margins": 0.04578186571598053, "rewards/rejected": -0.0550663098692894, "step": 1271 }, { "epoch": 0.8796680497925311, "grad_norm": 6.146232604980469, "learning_rate": 4.398340248962656e-05, "log_odds_chosen": 2.9054505825042725, "log_odds_ratio": -0.2413417100906372, "logits/chosen": -0.11257967352867126, "logits/rejected": -0.18791714310646057, "logps/chosen": -0.0745718702673912, "logps/rejected": -0.5622603893280029, "loss": 5.5758, "nll_loss": 1.3698159456253052, "rewards/accuracies": 0.875, "rewards/chosen": -0.007457186467945576, "rewards/margins": 0.048768848180770874, "rewards/rejected": -0.056226037442684174, "step": 1272 }, { "epoch": 0.8803596127247579, "grad_norm": 4.25452184677124, "learning_rate": 4.40179806362379e-05, "log_odds_chosen": 3.095830202102661, "log_odds_ratio": -0.48948919773101807, "logits/chosen": -0.1420711725950241, "logits/rejected": -0.12481048703193665, "logps/chosen": -0.09185618162155151, "logps/rejected": -0.44011950492858887, "loss": 3.1873, "nll_loss": 0.7478775382041931, "rewards/accuracies": 0.625, "rewards/chosen": -0.009185617789626122, "rewards/margins": 0.034826330840587616, "rewards/rejected": -0.04401195049285889, "step": 1273 }, { "epoch": 0.8810511756569848, "grad_norm": 2.990342855453491, "learning_rate": 4.405255878284924e-05, "log_odds_chosen": 4.47576904296875, "log_odds_ratio": -0.22933019697666168, "logits/chosen": -0.43872299790382385, "logits/rejected": -0.47745269536972046, "logps/chosen": -0.11944451928138733, "logps/rejected": -0.690141499042511, "loss": 2.723, "nll_loss": 0.6578062772750854, "rewards/accuracies": 0.875, "rewards/chosen": -0.011944452300667763, "rewards/margins": 0.057069696485996246, "rewards/rejected": -0.06901414692401886, "step": 1274 }, { "epoch": 0.8817427385892116, "grad_norm": 4.838727951049805, "learning_rate": 4.408713692946058e-05, "log_odds_chosen": 0.7123834490776062, "log_odds_ratio": -0.63567054271698, "logits/chosen": -0.774634599685669, "logits/rejected": -0.7992992401123047, "logps/chosen": -0.16042813658714294, "logps/rejected": -0.3580513894557953, "loss": 4.8929, "nll_loss": 1.1596554517745972, "rewards/accuracies": 0.375, "rewards/chosen": -0.016042813658714294, "rewards/margins": 0.019762322306632996, "rewards/rejected": -0.03580513596534729, "step": 1275 }, { "epoch": 0.8824343015214384, "grad_norm": 3.9668753147125244, "learning_rate": 4.412171507607192e-05, "log_odds_chosen": 4.125424385070801, "log_odds_ratio": -0.0830477774143219, "logits/chosen": -0.3952116370201111, "logits/rejected": -0.45113319158554077, "logps/chosen": -0.04570968449115753, "logps/rejected": -0.5450409054756165, "loss": 3.504, "nll_loss": 0.8676958084106445, "rewards/accuracies": 1.0, "rewards/chosen": -0.004570968449115753, "rewards/margins": 0.04993312433362007, "rewards/rejected": -0.054504092782735825, "step": 1276 }, { "epoch": 0.8831258644536653, "grad_norm": 3.731466770172119, "learning_rate": 4.4156293222683265e-05, "log_odds_chosen": 1.79964280128479, "log_odds_ratio": -0.48185083270072937, "logits/chosen": -0.6289352774620056, "logits/rejected": -0.6494641304016113, "logps/chosen": -0.13553780317306519, "logps/rejected": -0.45093098282814026, "loss": 3.3266, "nll_loss": 0.7834726572036743, "rewards/accuracies": 0.625, "rewards/chosen": -0.013553779572248459, "rewards/margins": 0.031539320945739746, "rewards/rejected": -0.045093100517988205, "step": 1277 }, { "epoch": 0.8838174273858921, "grad_norm": 4.510237216949463, "learning_rate": 4.4190871369294606e-05, "log_odds_chosen": 1.7436089515686035, "log_odds_ratio": -0.4460057020187378, "logits/chosen": 0.017364047467708588, "logits/rejected": 0.0052915215492248535, "logps/chosen": -0.13879433274269104, "logps/rejected": -0.4545135200023651, "loss": 3.6902, "nll_loss": 0.877946138381958, "rewards/accuracies": 0.75, "rewards/chosen": -0.013879433274269104, "rewards/margins": 0.031571924686431885, "rewards/rejected": -0.04545135423541069, "step": 1278 }, { "epoch": 0.8845089903181189, "grad_norm": 3.648057222366333, "learning_rate": 4.422544951590595e-05, "log_odds_chosen": 3.3090474605560303, "log_odds_ratio": -0.20653875172138214, "logits/chosen": -0.4803401529788971, "logits/rejected": -0.516859769821167, "logps/chosen": -0.08723768591880798, "logps/rejected": -0.630642294883728, "loss": 3.0028, "nll_loss": 0.7300387620925903, "rewards/accuracies": 1.0, "rewards/chosen": -0.008723769336938858, "rewards/margins": 0.054340463131666183, "rewards/rejected": -0.06306423246860504, "step": 1279 }, { "epoch": 0.8852005532503457, "grad_norm": 4.398556232452393, "learning_rate": 4.426002766251729e-05, "log_odds_chosen": 1.5525989532470703, "log_odds_ratio": -0.5788559317588806, "logits/chosen": -0.4554845690727234, "logits/rejected": -0.44896024465560913, "logps/chosen": -0.17942111194133759, "logps/rejected": -0.49931180477142334, "loss": 3.7082, "nll_loss": 0.8691673874855042, "rewards/accuracies": 0.75, "rewards/chosen": -0.017942111939191818, "rewards/margins": 0.031989071518182755, "rewards/rejected": -0.04993118345737457, "step": 1280 }, { "epoch": 0.8858921161825726, "grad_norm": 5.154435634613037, "learning_rate": 4.429460580912863e-05, "log_odds_chosen": 1.6350841522216797, "log_odds_ratio": -0.35474780201911926, "logits/chosen": -0.5933663249015808, "logits/rejected": -0.6430088877677917, "logps/chosen": -0.11515937745571136, "logps/rejected": -0.4523059129714966, "loss": 4.1953, "nll_loss": 1.0133419036865234, "rewards/accuracies": 0.75, "rewards/chosen": -0.011515937745571136, "rewards/margins": 0.0337146520614624, "rewards/rejected": -0.04523058980703354, "step": 1281 }, { "epoch": 0.8865836791147994, "grad_norm": 4.977684020996094, "learning_rate": 4.432918395573997e-05, "log_odds_chosen": 5.273676872253418, "log_odds_ratio": -0.21022650599479675, "logits/chosen": -0.4650052487850189, "logits/rejected": -0.5259711742401123, "logps/chosen": -0.09879347681999207, "logps/rejected": -0.7697173357009888, "loss": 3.1604, "nll_loss": 0.7690898180007935, "rewards/accuracies": 0.875, "rewards/chosen": -0.009879347868263721, "rewards/margins": 0.06709238886833191, "rewards/rejected": -0.07697173207998276, "step": 1282 }, { "epoch": 0.8872752420470262, "grad_norm": 3.8348000049591064, "learning_rate": 4.4363762102351314e-05, "log_odds_chosen": 4.379862308502197, "log_odds_ratio": -0.35318854451179504, "logits/chosen": -0.4663954973220825, "logits/rejected": -0.5029880404472351, "logps/chosen": -0.07573398947715759, "logps/rejected": -0.7639623880386353, "loss": 2.9837, "nll_loss": 0.7106069922447205, "rewards/accuracies": 0.75, "rewards/chosen": -0.007573399692773819, "rewards/margins": 0.06882283836603165, "rewards/rejected": -0.07639623433351517, "step": 1283 }, { "epoch": 0.8879668049792531, "grad_norm": 5.035210609436035, "learning_rate": 4.4398340248962656e-05, "log_odds_chosen": 1.4314640760421753, "log_odds_ratio": -0.4312036335468292, "logits/chosen": -0.406145840883255, "logits/rejected": -0.36703863739967346, "logps/chosen": -0.12340890616178513, "logps/rejected": -0.3170912265777588, "loss": 4.0451, "nll_loss": 0.9681638479232788, "rewards/accuracies": 0.75, "rewards/chosen": -0.012340890243649483, "rewards/margins": 0.019368231296539307, "rewards/rejected": -0.03170912340283394, "step": 1284 }, { "epoch": 0.8886583679114799, "grad_norm": 5.095395088195801, "learning_rate": 4.4432918395574e-05, "log_odds_chosen": 0.9760552048683167, "log_odds_ratio": -0.7666757702827454, "logits/chosen": -0.4045482277870178, "logits/rejected": -0.4359784722328186, "logps/chosen": -0.3768349587917328, "logps/rejected": -0.4175845980644226, "loss": 4.5482, "nll_loss": 1.060389518737793, "rewards/accuracies": 0.625, "rewards/chosen": -0.03768349438905716, "rewards/margins": 0.00407496839761734, "rewards/rejected": -0.0417584627866745, "step": 1285 }, { "epoch": 0.8893499308437067, "grad_norm": 2.2948174476623535, "learning_rate": 4.446749654218534e-05, "log_odds_chosen": 3.8000855445861816, "log_odds_ratio": -0.1091739609837532, "logits/chosen": -0.6121377944946289, "logits/rejected": -0.568754255771637, "logps/chosen": -0.044313665479421616, "logps/rejected": -0.5707491636276245, "loss": 2.6511, "nll_loss": 0.6518504619598389, "rewards/accuracies": 1.0, "rewards/chosen": -0.004431366454809904, "rewards/margins": 0.0526435561478138, "rewards/rejected": -0.05707491934299469, "step": 1286 }, { "epoch": 0.8900414937759336, "grad_norm": 4.04536247253418, "learning_rate": 4.450207468879668e-05, "log_odds_chosen": 3.1700429916381836, "log_odds_ratio": -0.4066445827484131, "logits/chosen": -0.6255698204040527, "logits/rejected": -0.6513446569442749, "logps/chosen": -0.11673114448785782, "logps/rejected": -0.700387716293335, "loss": 3.6983, "nll_loss": 0.8839007616043091, "rewards/accuracies": 0.75, "rewards/chosen": -0.011673114262521267, "rewards/margins": 0.058365657925605774, "rewards/rejected": -0.07003877311944962, "step": 1287 }, { "epoch": 0.8907330567081605, "grad_norm": 7.219163417816162, "learning_rate": 4.453665283540803e-05, "log_odds_chosen": 3.674023151397705, "log_odds_ratio": -0.47398021817207336, "logits/chosen": -0.39232513308525085, "logits/rejected": -0.41735202074050903, "logps/chosen": -0.10538452863693237, "logps/rejected": -0.6500795483589172, "loss": 4.1308, "nll_loss": 0.9852948188781738, "rewards/accuracies": 0.75, "rewards/chosen": -0.010538453236222267, "rewards/margins": 0.054469503462314606, "rewards/rejected": -0.06500795483589172, "step": 1288 }, { "epoch": 0.8914246196403873, "grad_norm": 4.936799049377441, "learning_rate": 4.457123098201937e-05, "log_odds_chosen": 1.5761115550994873, "log_odds_ratio": -0.6042423248291016, "logits/chosen": -0.5079135894775391, "logits/rejected": -0.5012636780738831, "logps/chosen": -0.20412951707839966, "logps/rejected": -0.3674103915691376, "loss": 3.8832, "nll_loss": 0.9103636741638184, "rewards/accuracies": 0.75, "rewards/chosen": -0.020412951707839966, "rewards/margins": 0.016328085213899612, "rewards/rejected": -0.03674103692173958, "step": 1289 }, { "epoch": 0.8921161825726142, "grad_norm": 3.823768377304077, "learning_rate": 4.460580912863071e-05, "log_odds_chosen": 6.826704502105713, "log_odds_ratio": -0.05582105368375778, "logits/chosen": -0.3447270691394806, "logits/rejected": -0.4047142565250397, "logps/chosen": -0.023953121155500412, "logps/rejected": -0.9755678772926331, "loss": 3.1965, "nll_loss": 0.793531060218811, "rewards/accuracies": 1.0, "rewards/chosen": -0.002395312301814556, "rewards/margins": 0.09516146779060364, "rewards/rejected": -0.09755679219961166, "step": 1290 }, { "epoch": 0.892807745504841, "grad_norm": 5.092657566070557, "learning_rate": 4.464038727524205e-05, "log_odds_chosen": 2.33146071434021, "log_odds_ratio": -0.4566318988800049, "logits/chosen": -0.6432561278343201, "logits/rejected": -0.6718426942825317, "logps/chosen": -0.12250132858753204, "logps/rejected": -0.5979148149490356, "loss": 4.1411, "nll_loss": 0.9896198511123657, "rewards/accuracies": 0.75, "rewards/chosen": -0.012250132858753204, "rewards/margins": 0.04754134267568588, "rewards/rejected": -0.059791479259729385, "step": 1291 }, { "epoch": 0.8934993084370678, "grad_norm": 4.353096008300781, "learning_rate": 4.4674965421853395e-05, "log_odds_chosen": 4.498590469360352, "log_odds_ratio": -0.15228214859962463, "logits/chosen": -0.6290316581726074, "logits/rejected": -0.6855980157852173, "logps/chosen": -0.06508232653141022, "logps/rejected": -0.8101258277893066, "loss": 3.72, "nll_loss": 0.914771318435669, "rewards/accuracies": 1.0, "rewards/chosen": -0.006508233025670052, "rewards/margins": 0.07450436055660248, "rewards/rejected": -0.08101259171962738, "step": 1292 }, { "epoch": 0.8941908713692946, "grad_norm": 3.421708583831787, "learning_rate": 4.4709543568464736e-05, "log_odds_chosen": 4.2662034034729, "log_odds_ratio": -0.1641775369644165, "logits/chosen": -0.7522150278091431, "logits/rejected": -0.8374617695808411, "logps/chosen": -0.08146195113658905, "logps/rejected": -0.6217236518859863, "loss": 3.0554, "nll_loss": 0.7474253177642822, "rewards/accuracies": 1.0, "rewards/chosen": -0.008146194741129875, "rewards/margins": 0.05402617156505585, "rewards/rejected": -0.06217236444354057, "step": 1293 }, { "epoch": 0.8948824343015215, "grad_norm": 5.032227039337158, "learning_rate": 4.474412171507608e-05, "log_odds_chosen": 2.9761674404144287, "log_odds_ratio": -0.3079957664012909, "logits/chosen": -0.37861043214797974, "logits/rejected": -0.4537746012210846, "logps/chosen": -0.12657591700553894, "logps/rejected": -0.6328597068786621, "loss": 3.3381, "nll_loss": 0.8037294149398804, "rewards/accuracies": 0.875, "rewards/chosen": -0.01265759114176035, "rewards/margins": 0.050628382712602615, "rewards/rejected": -0.06328596919775009, "step": 1294 }, { "epoch": 0.8955739972337483, "grad_norm": 3.9334354400634766, "learning_rate": 4.477869986168742e-05, "log_odds_chosen": 2.1492156982421875, "log_odds_ratio": -0.38227516412734985, "logits/chosen": -0.7759724259376526, "logits/rejected": -0.8038296699523926, "logps/chosen": -0.10162755846977234, "logps/rejected": -0.4932019114494324, "loss": 4.4286, "nll_loss": 1.0689126253128052, "rewards/accuracies": 0.875, "rewards/chosen": -0.010162755846977234, "rewards/margins": 0.039157435297966, "rewards/rejected": -0.04932019114494324, "step": 1295 }, { "epoch": 0.8962655601659751, "grad_norm": 4.790040016174316, "learning_rate": 4.481327800829876e-05, "log_odds_chosen": 3.400041341781616, "log_odds_ratio": -0.3399104177951813, "logits/chosen": -0.45178931951522827, "logits/rejected": -0.46649065613746643, "logps/chosen": -0.08820350468158722, "logps/rejected": -0.8288697600364685, "loss": 3.4904, "nll_loss": 0.83861243724823, "rewards/accuracies": 0.875, "rewards/chosen": -0.008820349350571632, "rewards/margins": 0.07406662404537201, "rewards/rejected": -0.08288698643445969, "step": 1296 }, { "epoch": 0.896957123098202, "grad_norm": 4.286818981170654, "learning_rate": 4.48478561549101e-05, "log_odds_chosen": 3.6408259868621826, "log_odds_ratio": -0.3869631886482239, "logits/chosen": -0.10497380793094635, "logits/rejected": -0.15320220589637756, "logps/chosen": -0.08386063575744629, "logps/rejected": -0.34298986196517944, "loss": 3.1143, "nll_loss": 0.7398838996887207, "rewards/accuracies": 0.75, "rewards/chosen": -0.00838606245815754, "rewards/margins": 0.025912927463650703, "rewards/rejected": -0.034298986196517944, "step": 1297 }, { "epoch": 0.8976486860304288, "grad_norm": 3.562063694000244, "learning_rate": 4.4882434301521444e-05, "log_odds_chosen": 5.407161712646484, "log_odds_ratio": -0.23755621910095215, "logits/chosen": -0.35799068212509155, "logits/rejected": -0.3617667555809021, "logps/chosen": -0.10267575085163116, "logps/rejected": -0.901630163192749, "loss": 2.9682, "nll_loss": 0.718295693397522, "rewards/accuracies": 0.875, "rewards/chosen": -0.010267574340105057, "rewards/margins": 0.07989544421434402, "rewards/rejected": -0.09016300737857819, "step": 1298 }, { "epoch": 0.8983402489626556, "grad_norm": 5.247976779937744, "learning_rate": 4.4917012448132786e-05, "log_odds_chosen": 2.938415288925171, "log_odds_ratio": -0.2645170986652374, "logits/chosen": -0.678473711013794, "logits/rejected": -0.7426837682723999, "logps/chosen": -0.15398818254470825, "logps/rejected": -0.6524811387062073, "loss": 4.3283, "nll_loss": 1.0556238889694214, "rewards/accuracies": 0.875, "rewards/chosen": -0.01539881806820631, "rewards/margins": 0.04984929785132408, "rewards/rejected": -0.06524811685085297, "step": 1299 }, { "epoch": 0.8990318118948825, "grad_norm": 4.963733196258545, "learning_rate": 4.495159059474413e-05, "log_odds_chosen": 4.816115856170654, "log_odds_ratio": -0.1597924530506134, "logits/chosen": -0.5552168488502502, "logits/rejected": -0.6095180511474609, "logps/chosen": -0.03244926035404205, "logps/rejected": -0.570087194442749, "loss": 2.9926, "nll_loss": 0.7321832180023193, "rewards/accuracies": 0.875, "rewards/chosen": -0.0032449259888380766, "rewards/margins": 0.053763799369335175, "rewards/rejected": -0.05700872093439102, "step": 1300 }, { "epoch": 0.8997233748271093, "grad_norm": 3.502925157546997, "learning_rate": 4.498616874135547e-05, "log_odds_chosen": 5.5950822830200195, "log_odds_ratio": -0.08565986901521683, "logits/chosen": -0.6689096689224243, "logits/rejected": -0.7246332764625549, "logps/chosen": -0.06416517496109009, "logps/rejected": -0.8265728950500488, "loss": 2.5299, "nll_loss": 0.6238970756530762, "rewards/accuracies": 1.0, "rewards/chosen": -0.0064165182411670685, "rewards/margins": 0.07624077051877975, "rewards/rejected": -0.08265729248523712, "step": 1301 }, { "epoch": 0.9004149377593361, "grad_norm": 6.148613929748535, "learning_rate": 4.502074688796681e-05, "log_odds_chosen": 1.8418501615524292, "log_odds_ratio": -0.475241482257843, "logits/chosen": -0.6767906546592712, "logits/rejected": -0.7121725678443909, "logps/chosen": -0.11665619909763336, "logps/rejected": -0.456548810005188, "loss": 4.6573, "nll_loss": 1.11681067943573, "rewards/accuracies": 0.75, "rewards/chosen": -0.01166562084108591, "rewards/margins": 0.03398926556110382, "rewards/rejected": -0.04565488174557686, "step": 1302 }, { "epoch": 0.901106500691563, "grad_norm": 4.376565456390381, "learning_rate": 4.505532503457815e-05, "log_odds_chosen": 2.30222225189209, "log_odds_ratio": -0.2232993245124817, "logits/chosen": -0.7871577739715576, "logits/rejected": -0.7928929328918457, "logps/chosen": -0.08736582845449448, "logps/rejected": -0.4544295072555542, "loss": 3.7713, "nll_loss": 0.9204996824264526, "rewards/accuracies": 1.0, "rewards/chosen": -0.008736583404242992, "rewards/margins": 0.03670636564493179, "rewards/rejected": -0.04544294998049736, "step": 1303 }, { "epoch": 0.9017980636237898, "grad_norm": 4.588575839996338, "learning_rate": 4.5089903181189494e-05, "log_odds_chosen": 2.5985970497131348, "log_odds_ratio": -0.37684494256973267, "logits/chosen": -0.44517451524734497, "logits/rejected": -0.4426910877227783, "logps/chosen": -0.13602550327777863, "logps/rejected": -0.566986083984375, "loss": 4.3455, "nll_loss": 1.0486829280853271, "rewards/accuracies": 0.75, "rewards/chosen": -0.013602551072835922, "rewards/margins": 0.04309605434536934, "rewards/rejected": -0.05669860541820526, "step": 1304 }, { "epoch": 0.9024896265560166, "grad_norm": 6.157674312591553, "learning_rate": 4.5124481327800835e-05, "log_odds_chosen": 2.6842451095581055, "log_odds_ratio": -0.5304502248764038, "logits/chosen": -0.574425995349884, "logits/rejected": -0.6412628889083862, "logps/chosen": -0.10813743621110916, "logps/rejected": -0.5658566951751709, "loss": 4.5154, "nll_loss": 1.0758013725280762, "rewards/accuracies": 0.75, "rewards/chosen": -0.010813743807375431, "rewards/margins": 0.04577192664146423, "rewards/rejected": -0.05658566579222679, "step": 1305 }, { "epoch": 0.9031811894882434, "grad_norm": 4.149651050567627, "learning_rate": 4.515905947441218e-05, "log_odds_chosen": 2.3158023357391357, "log_odds_ratio": -0.18550823628902435, "logits/chosen": -0.5790281295776367, "logits/rejected": -0.6453101634979248, "logps/chosen": -0.11300928145647049, "logps/rejected": -0.7054611444473267, "loss": 3.7652, "nll_loss": 0.9227438569068909, "rewards/accuracies": 1.0, "rewards/chosen": -0.011300928890705109, "rewards/margins": 0.059245187789201736, "rewards/rejected": -0.07054612040519714, "step": 1306 }, { "epoch": 0.9038727524204703, "grad_norm": 4.822201728820801, "learning_rate": 4.519363762102352e-05, "log_odds_chosen": 4.911643028259277, "log_odds_ratio": -0.1145109161734581, "logits/chosen": -0.47414761781692505, "logits/rejected": -0.4934576451778412, "logps/chosen": -0.05742540955543518, "logps/rejected": -0.8317654132843018, "loss": 4.7059, "nll_loss": 1.165034294128418, "rewards/accuracies": 0.875, "rewards/chosen": -0.005742541514337063, "rewards/margins": 0.0774339959025383, "rewards/rejected": -0.08317653834819794, "step": 1307 }, { "epoch": 0.9045643153526971, "grad_norm": 35.87089538574219, "learning_rate": 4.522821576763486e-05, "log_odds_chosen": 1.5534108877182007, "log_odds_ratio": -0.5431495904922485, "logits/chosen": -0.615991473197937, "logits/rejected": -0.6219574809074402, "logps/chosen": -0.1171765848994255, "logps/rejected": -0.3493393659591675, "loss": 3.5324, "nll_loss": 0.8287971615791321, "rewards/accuracies": 0.875, "rewards/chosen": -0.01171765848994255, "rewards/margins": 0.023216277360916138, "rewards/rejected": -0.03493393585085869, "step": 1308 }, { "epoch": 0.9052558782849239, "grad_norm": 5.447350978851318, "learning_rate": 4.52627939142462e-05, "log_odds_chosen": 1.6186761856079102, "log_odds_ratio": -0.40729713439941406, "logits/chosen": -0.7507280111312866, "logits/rejected": -0.7362810373306274, "logps/chosen": -0.09505996853113174, "logps/rejected": -0.25165241956710815, "loss": 4.7086, "nll_loss": 1.1364130973815918, "rewards/accuracies": 0.875, "rewards/chosen": -0.00950599741190672, "rewards/margins": 0.015659242868423462, "rewards/rejected": -0.025165241211652756, "step": 1309 }, { "epoch": 0.9059474412171508, "grad_norm": 7.766451835632324, "learning_rate": 4.529737206085754e-05, "log_odds_chosen": 2.389831304550171, "log_odds_ratio": -0.5831612944602966, "logits/chosen": -0.44988909363746643, "logits/rejected": -0.447091281414032, "logps/chosen": -0.15839967131614685, "logps/rejected": -0.7640863060951233, "loss": 3.9613, "nll_loss": 0.9320147037506104, "rewards/accuracies": 0.75, "rewards/chosen": -0.015839967876672745, "rewards/margins": 0.060568664222955704, "rewards/rejected": -0.07640863209962845, "step": 1310 }, { "epoch": 0.9066390041493776, "grad_norm": 2.5082616806030273, "learning_rate": 4.5331950207468885e-05, "log_odds_chosen": 4.344583988189697, "log_odds_ratio": -0.14952819049358368, "logits/chosen": -0.5728102326393127, "logits/rejected": -0.5895368456840515, "logps/chosen": -0.056445784866809845, "logps/rejected": -0.4942803978919983, "loss": 2.454, "nll_loss": 0.5985533595085144, "rewards/accuracies": 1.0, "rewards/chosen": -0.0056445784866809845, "rewards/margins": 0.043783463537693024, "rewards/rejected": -0.04942803829908371, "step": 1311 }, { "epoch": 0.9073305670816044, "grad_norm": 4.3802170753479, "learning_rate": 4.5366528354080226e-05, "log_odds_chosen": 3.1736373901367188, "log_odds_ratio": -0.21422179043293, "logits/chosen": -0.7820005416870117, "logits/rejected": -0.8080264925956726, "logps/chosen": -0.09875577688217163, "logps/rejected": -0.523554265499115, "loss": 4.3245, "nll_loss": 1.0596930980682373, "rewards/accuracies": 1.0, "rewards/chosen": -0.009875577874481678, "rewards/margins": 0.042479850351810455, "rewards/rejected": -0.05235542729496956, "step": 1312 }, { "epoch": 0.9080221300138313, "grad_norm": 3.8520901203155518, "learning_rate": 4.540110650069157e-05, "log_odds_chosen": 4.601001739501953, "log_odds_ratio": -0.07977695763111115, "logits/chosen": -0.6756404638290405, "logits/rejected": -0.7021632790565491, "logps/chosen": -0.05860934406518936, "logps/rejected": -0.9899218082427979, "loss": 2.8374, "nll_loss": 0.7013697624206543, "rewards/accuracies": 1.0, "rewards/chosen": -0.005860934499651194, "rewards/margins": 0.09313125163316727, "rewards/rejected": -0.09899218380451202, "step": 1313 }, { "epoch": 0.9087136929460581, "grad_norm": 5.126208305358887, "learning_rate": 4.543568464730291e-05, "log_odds_chosen": 3.0823912620544434, "log_odds_ratio": -0.6001900434494019, "logits/chosen": -0.793992280960083, "logits/rejected": -0.8149877786636353, "logps/chosen": -0.15848688781261444, "logps/rejected": -0.7364607453346252, "loss": 3.3215, "nll_loss": 0.7703518867492676, "rewards/accuracies": 0.75, "rewards/chosen": -0.015848688781261444, "rewards/margins": 0.0577973872423172, "rewards/rejected": -0.07364607602357864, "step": 1314 }, { "epoch": 0.9094052558782849, "grad_norm": 5.542551517486572, "learning_rate": 4.547026279391425e-05, "log_odds_chosen": 2.469801902770996, "log_odds_ratio": -0.21200266480445862, "logits/chosen": -0.4572201669216156, "logits/rejected": -0.5229408144950867, "logps/chosen": -0.08554461598396301, "logps/rejected": -0.7156955599784851, "loss": 5.0535, "nll_loss": 1.2421646118164062, "rewards/accuracies": 0.875, "rewards/chosen": -0.008554462343454361, "rewards/margins": 0.06301509588956833, "rewards/rejected": -0.07156955450773239, "step": 1315 }, { "epoch": 0.9100968188105117, "grad_norm": 5.043685436248779, "learning_rate": 4.550484094052559e-05, "log_odds_chosen": 1.3790621757507324, "log_odds_ratio": -0.7095733880996704, "logits/chosen": -0.579319417476654, "logits/rejected": -0.5887928605079651, "logps/chosen": -0.18025648593902588, "logps/rejected": -0.34477776288986206, "loss": 4.1403, "nll_loss": 0.96412593126297, "rewards/accuracies": 0.625, "rewards/chosen": -0.018025647848844528, "rewards/margins": 0.016452128067612648, "rewards/rejected": -0.034477777779102325, "step": 1316 }, { "epoch": 0.9107883817427386, "grad_norm": 4.892166614532471, "learning_rate": 4.5539419087136934e-05, "log_odds_chosen": 3.1222496032714844, "log_odds_ratio": -0.26753729581832886, "logits/chosen": -0.7339975237846375, "logits/rejected": -0.7486129999160767, "logps/chosen": -0.09540008753538132, "logps/rejected": -0.5913593769073486, "loss": 4.3887, "nll_loss": 1.070409893989563, "rewards/accuracies": 0.875, "rewards/chosen": -0.009540008381009102, "rewards/margins": 0.04959592968225479, "rewards/rejected": -0.059135936200618744, "step": 1317 }, { "epoch": 0.9114799446749654, "grad_norm": 3.516747236251831, "learning_rate": 4.5573997233748275e-05, "log_odds_chosen": 6.2766432762146, "log_odds_ratio": -0.05572151765227318, "logits/chosen": -0.5407491326332092, "logits/rejected": -0.5907287001609802, "logps/chosen": -0.05054568871855736, "logps/rejected": -1.1098434925079346, "loss": 3.5414, "nll_loss": 0.8797795176506042, "rewards/accuracies": 1.0, "rewards/chosen": -0.0050545684061944485, "rewards/margins": 0.10592978447675705, "rewards/rejected": -0.11098435521125793, "step": 1318 }, { "epoch": 0.9121715076071922, "grad_norm": 5.598543643951416, "learning_rate": 4.560857538035962e-05, "log_odds_chosen": 3.204986572265625, "log_odds_ratio": -0.42890167236328125, "logits/chosen": -0.6990206837654114, "logits/rejected": -0.7492403984069824, "logps/chosen": -0.10166987776756287, "logps/rejected": -0.813813328742981, "loss": 4.8704, "nll_loss": 1.1747119426727295, "rewards/accuracies": 0.75, "rewards/chosen": -0.010166987776756287, "rewards/margins": 0.07121434807777405, "rewards/rejected": -0.08138133585453033, "step": 1319 }, { "epoch": 0.9128630705394191, "grad_norm": 3.9839718341827393, "learning_rate": 4.564315352697096e-05, "log_odds_chosen": 1.6930179595947266, "log_odds_ratio": -0.44334709644317627, "logits/chosen": -0.43449825048446655, "logits/rejected": -0.41399112343788147, "logps/chosen": -0.1631818413734436, "logps/rejected": -0.24806104600429535, "loss": 2.9659, "nll_loss": 0.6971408724784851, "rewards/accuracies": 0.75, "rewards/chosen": -0.01631818525493145, "rewards/margins": 0.00848792027682066, "rewards/rejected": -0.024806104600429535, "step": 1320 }, { "epoch": 0.9135546334716459, "grad_norm": 4.427506446838379, "learning_rate": 4.56777316735823e-05, "log_odds_chosen": 3.5876333713531494, "log_odds_ratio": -0.33876973390579224, "logits/chosen": -0.6090391874313354, "logits/rejected": -0.6520763635635376, "logps/chosen": -0.1511664092540741, "logps/rejected": -0.7406303286552429, "loss": 3.6463, "nll_loss": 0.8776899576187134, "rewards/accuracies": 0.75, "rewards/chosen": -0.01511664129793644, "rewards/margins": 0.0589463897049427, "rewards/rejected": -0.07406303286552429, "step": 1321 }, { "epoch": 0.9142461964038727, "grad_norm": 7.1556291580200195, "learning_rate": 4.571230982019364e-05, "log_odds_chosen": 0.20019125938415527, "log_odds_ratio": -0.9610413908958435, "logits/chosen": -0.6211057305335999, "logits/rejected": -0.6151083111763, "logps/chosen": -0.19979843497276306, "logps/rejected": -0.21615344285964966, "loss": 3.8671, "nll_loss": 0.870674192905426, "rewards/accuracies": 0.625, "rewards/chosen": -0.019979843869805336, "rewards/margins": 0.0016355020925402641, "rewards/rejected": -0.021615345031023026, "step": 1322 }, { "epoch": 0.9149377593360996, "grad_norm": 5.3502197265625, "learning_rate": 4.5746887966804977e-05, "log_odds_chosen": 3.4048633575439453, "log_odds_ratio": -0.4762808084487915, "logits/chosen": -0.82079017162323, "logits/rejected": -0.8388082981109619, "logps/chosen": -0.13025324046611786, "logps/rejected": -0.6816924214363098, "loss": 4.202, "nll_loss": 1.0028724670410156, "rewards/accuracies": 0.625, "rewards/chosen": -0.013025323860347271, "rewards/margins": 0.055143918842077255, "rewards/rejected": -0.0681692436337471, "step": 1323 }, { "epoch": 0.9156293222683264, "grad_norm": 4.137228488922119, "learning_rate": 4.578146611341632e-05, "log_odds_chosen": 4.749207496643066, "log_odds_ratio": -0.20173031091690063, "logits/chosen": -0.842710018157959, "logits/rejected": -0.8378841280937195, "logps/chosen": -0.06035904958844185, "logps/rejected": -0.8911387920379639, "loss": 4.0766, "nll_loss": 0.9989691972732544, "rewards/accuracies": 0.875, "rewards/chosen": -0.006035904865711927, "rewards/margins": 0.08307798206806183, "rewards/rejected": -0.08911389112472534, "step": 1324 }, { "epoch": 0.9163208852005532, "grad_norm": 4.485111713409424, "learning_rate": 4.581604426002766e-05, "log_odds_chosen": 2.276975154876709, "log_odds_ratio": -0.1457901895046234, "logits/chosen": -0.38137882947921753, "logits/rejected": -0.37996378540992737, "logps/chosen": -0.11912259459495544, "logps/rejected": -0.6932668089866638, "loss": 3.2816, "nll_loss": 0.8058204650878906, "rewards/accuracies": 1.0, "rewards/chosen": -0.011912260204553604, "rewards/margins": 0.057414423674345016, "rewards/rejected": -0.06932668387889862, "step": 1325 }, { "epoch": 0.91701244813278, "grad_norm": 6.287432670593262, "learning_rate": 4.5850622406639e-05, "log_odds_chosen": 2.9762678146362305, "log_odds_ratio": -0.40890878438949585, "logits/chosen": -0.8684566617012024, "logits/rejected": -0.8920563459396362, "logps/chosen": -0.12412737309932709, "logps/rejected": -0.7340701818466187, "loss": 4.3594, "nll_loss": 1.0489689111709595, "rewards/accuracies": 0.75, "rewards/chosen": -0.012412738054990768, "rewards/margins": 0.06099428981542587, "rewards/rejected": -0.07340703159570694, "step": 1326 }, { "epoch": 0.9177040110650069, "grad_norm": 3.8076276779174805, "learning_rate": 4.588520055325034e-05, "log_odds_chosen": 1.818742275238037, "log_odds_ratio": -0.36743277311325073, "logits/chosen": -0.750927209854126, "logits/rejected": -0.7433460354804993, "logps/chosen": -0.1195850744843483, "logps/rejected": -0.4962850511074066, "loss": 2.9064, "nll_loss": 0.6898566484451294, "rewards/accuracies": 0.75, "rewards/chosen": -0.01195850782096386, "rewards/margins": 0.03766999766230583, "rewards/rejected": -0.04962850734591484, "step": 1327 }, { "epoch": 0.9183955739972337, "grad_norm": 6.428443431854248, "learning_rate": 4.5919778699861684e-05, "log_odds_chosen": 0.8354544639587402, "log_odds_ratio": -0.48252594470977783, "logits/chosen": -0.9051334261894226, "logits/rejected": -0.906627893447876, "logps/chosen": -0.10690341144800186, "logps/rejected": -0.27170899510383606, "loss": 4.5899, "nll_loss": 1.0992116928100586, "rewards/accuracies": 0.75, "rewards/chosen": -0.010690340772271156, "rewards/margins": 0.01648055762052536, "rewards/rejected": -0.027170900255441666, "step": 1328 }, { "epoch": 0.9190871369294605, "grad_norm": 6.3958353996276855, "learning_rate": 4.5954356846473026e-05, "log_odds_chosen": 1.6040034294128418, "log_odds_ratio": -0.26059266924858093, "logits/chosen": -0.4876313805580139, "logits/rejected": -0.5052316188812256, "logps/chosen": -0.09867852926254272, "logps/rejected": -0.5049740076065063, "loss": 5.1261, "nll_loss": 1.2554727792739868, "rewards/accuracies": 1.0, "rewards/chosen": -0.009867853485047817, "rewards/margins": 0.0406295470893383, "rewards/rejected": -0.050497397780418396, "step": 1329 }, { "epoch": 0.9197786998616874, "grad_norm": 3.9454269409179688, "learning_rate": 4.598893499308437e-05, "log_odds_chosen": 3.4766697883605957, "log_odds_ratio": -0.3413624167442322, "logits/chosen": -0.6844198107719421, "logits/rejected": -0.6933927536010742, "logps/chosen": -0.10315560549497604, "logps/rejected": -0.6214383840560913, "loss": 3.0776, "nll_loss": 0.7352601885795593, "rewards/accuracies": 0.75, "rewards/chosen": -0.010315561667084694, "rewards/margins": 0.05182827636599541, "rewards/rejected": -0.06214383617043495, "step": 1330 }, { "epoch": 0.9204702627939142, "grad_norm": 6.752826690673828, "learning_rate": 4.602351313969571e-05, "log_odds_chosen": 2.6723358631134033, "log_odds_ratio": -0.40230464935302734, "logits/chosen": -0.5666855573654175, "logits/rejected": -0.6329537630081177, "logps/chosen": -0.11216460913419724, "logps/rejected": -0.4415185749530792, "loss": 4.4679, "nll_loss": 1.076737642288208, "rewards/accuracies": 0.75, "rewards/chosen": -0.011216461658477783, "rewards/margins": 0.03293539956212044, "rewards/rejected": -0.04415185749530792, "step": 1331 }, { "epoch": 0.921161825726141, "grad_norm": 3.5908589363098145, "learning_rate": 4.605809128630705e-05, "log_odds_chosen": 3.980630874633789, "log_odds_ratio": -0.30837661027908325, "logits/chosen": -0.786279559135437, "logits/rejected": -0.8102754354476929, "logps/chosen": -0.09069174528121948, "logps/rejected": -0.7721251249313354, "loss": 3.2405, "nll_loss": 0.7792943120002747, "rewards/accuracies": 0.75, "rewards/chosen": -0.009069174528121948, "rewards/margins": 0.06814335286617279, "rewards/rejected": -0.07721251249313354, "step": 1332 }, { "epoch": 0.9218533886583679, "grad_norm": 5.429046630859375, "learning_rate": 4.609266943291839e-05, "log_odds_chosen": 4.113162517547607, "log_odds_ratio": -0.20421762764453888, "logits/chosen": -0.6347546577453613, "logits/rejected": -0.6887112259864807, "logps/chosen": -0.09734141826629639, "logps/rejected": -0.7304251790046692, "loss": 4.5229, "nll_loss": 1.1103060245513916, "rewards/accuracies": 0.875, "rewards/chosen": -0.009734141640365124, "rewards/margins": 0.06330837309360504, "rewards/rejected": -0.07304251939058304, "step": 1333 }, { "epoch": 0.9225449515905948, "grad_norm": 3.887255907058716, "learning_rate": 4.612724757952974e-05, "log_odds_chosen": 4.6803507804870605, "log_odds_ratio": -0.11616200953722, "logits/chosen": -0.1559465080499649, "logits/rejected": -0.15856684744358063, "logps/chosen": -0.04484165087342262, "logps/rejected": -0.7281444072723389, "loss": 2.6469, "nll_loss": 0.6501127481460571, "rewards/accuracies": 1.0, "rewards/chosen": -0.004484164994210005, "rewards/margins": 0.06833028048276901, "rewards/rejected": -0.07281444221735, "step": 1334 }, { "epoch": 0.9232365145228216, "grad_norm": 3.4572935104370117, "learning_rate": 4.616182572614108e-05, "log_odds_chosen": 3.0293996334075928, "log_odds_ratio": -0.17892718315124512, "logits/chosen": -0.6547147035598755, "logits/rejected": -0.7216066718101501, "logps/chosen": -0.075624480843544, "logps/rejected": -0.41122573614120483, "loss": 2.958, "nll_loss": 0.7216038703918457, "rewards/accuracies": 0.875, "rewards/chosen": -0.0075624482706189156, "rewards/margins": 0.0335601270198822, "rewards/rejected": -0.04112257435917854, "step": 1335 }, { "epoch": 0.9239280774550485, "grad_norm": 3.7187869548797607, "learning_rate": 4.6196403872752424e-05, "log_odds_chosen": 3.6043601036071777, "log_odds_ratio": -0.4560887813568115, "logits/chosen": -0.4577183723449707, "logits/rejected": -0.41571831703186035, "logps/chosen": -0.0708552822470665, "logps/rejected": -0.3021210730075836, "loss": 2.6082, "nll_loss": 0.6064510345458984, "rewards/accuracies": 0.625, "rewards/chosen": -0.00708552822470665, "rewards/margins": 0.02312657982110977, "rewards/rejected": -0.03021210804581642, "step": 1336 }, { "epoch": 0.9246196403872753, "grad_norm": 6.289156913757324, "learning_rate": 4.6230982019363765e-05, "log_odds_chosen": 1.629589319229126, "log_odds_ratio": -0.3251434564590454, "logits/chosen": -0.7396148443222046, "logits/rejected": -0.799910843372345, "logps/chosen": -0.094536691904068, "logps/rejected": -0.34461310505867004, "loss": 4.7423, "nll_loss": 1.153063178062439, "rewards/accuracies": 0.875, "rewards/chosen": -0.0094536691904068, "rewards/margins": 0.025007642805576324, "rewards/rejected": -0.034461311995983124, "step": 1337 }, { "epoch": 0.9253112033195021, "grad_norm": 5.073808193206787, "learning_rate": 4.626556016597511e-05, "log_odds_chosen": 2.9201889038085938, "log_odds_ratio": -0.35377949476242065, "logits/chosen": -0.5850831270217896, "logits/rejected": -0.5737963914871216, "logps/chosen": -0.1539689302444458, "logps/rejected": -0.5299727916717529, "loss": 4.3157, "nll_loss": 1.043558120727539, "rewards/accuracies": 0.75, "rewards/chosen": -0.01539689302444458, "rewards/margins": 0.03760039061307907, "rewards/rejected": -0.05299727991223335, "step": 1338 }, { "epoch": 0.926002766251729, "grad_norm": 5.053715705871582, "learning_rate": 4.630013831258645e-05, "log_odds_chosen": 3.1605682373046875, "log_odds_ratio": -0.39162132143974304, "logits/chosen": -0.7348707318305969, "logits/rejected": -0.7419517636299133, "logps/chosen": -0.18457387387752533, "logps/rejected": -0.8330814242362976, "loss": 4.2734, "nll_loss": 1.0291779041290283, "rewards/accuracies": 0.75, "rewards/chosen": -0.018457388505339622, "rewards/margins": 0.06485076248645782, "rewards/rejected": -0.083308145403862, "step": 1339 }, { "epoch": 0.9266943291839558, "grad_norm": 4.149688243865967, "learning_rate": 4.633471645919779e-05, "log_odds_chosen": 2.2877182960510254, "log_odds_ratio": -0.20909997820854187, "logits/chosen": -0.8047329187393188, "logits/rejected": -0.8140844702720642, "logps/chosen": -0.12095511704683304, "logps/rejected": -0.6085599660873413, "loss": 3.9157, "nll_loss": 0.9580215215682983, "rewards/accuracies": 1.0, "rewards/chosen": -0.012095511890947819, "rewards/margins": 0.048760488629341125, "rewards/rejected": -0.06085599958896637, "step": 1340 }, { "epoch": 0.9273858921161826, "grad_norm": 5.761998653411865, "learning_rate": 4.636929460580913e-05, "log_odds_chosen": 3.480170249938965, "log_odds_ratio": -0.35071712732315063, "logits/chosen": -0.6200214624404907, "logits/rejected": -0.6776783466339111, "logps/chosen": -0.07922127842903137, "logps/rejected": -0.7400749921798706, "loss": 4.1748, "nll_loss": 1.0086222887039185, "rewards/accuracies": 0.75, "rewards/chosen": -0.007922128774225712, "rewards/margins": 0.06608536839485168, "rewards/rejected": -0.07400749623775482, "step": 1341 }, { "epoch": 0.9280774550484094, "grad_norm": 6.504542827606201, "learning_rate": 4.640387275242047e-05, "log_odds_chosen": 2.966991662979126, "log_odds_ratio": -0.9646126627922058, "logits/chosen": -0.900704562664032, "logits/rejected": -0.9117063283920288, "logps/chosen": -0.09276466071605682, "logps/rejected": -0.6016995906829834, "loss": 5.0409, "nll_loss": 1.1637513637542725, "rewards/accuracies": 0.625, "rewards/chosen": -0.009276467375457287, "rewards/margins": 0.05089349299669266, "rewards/rejected": -0.06016996130347252, "step": 1342 }, { "epoch": 0.9287690179806363, "grad_norm": 5.371675491333008, "learning_rate": 4.6438450899031815e-05, "log_odds_chosen": 3.9872875213623047, "log_odds_ratio": -0.5040350556373596, "logits/chosen": -0.6492640376091003, "logits/rejected": -0.7425565719604492, "logps/chosen": -0.10791552066802979, "logps/rejected": -0.619098961353302, "loss": 2.7258, "nll_loss": 0.6310492753982544, "rewards/accuracies": 0.75, "rewards/chosen": -0.010791551321744919, "rewards/margins": 0.05111834406852722, "rewards/rejected": -0.06190989911556244, "step": 1343 }, { "epoch": 0.9294605809128631, "grad_norm": 3.99408221244812, "learning_rate": 4.6473029045643156e-05, "log_odds_chosen": 3.311504602432251, "log_odds_ratio": -0.15633322298526764, "logits/chosen": -0.9153397083282471, "logits/rejected": -0.9816678762435913, "logps/chosen": -0.038775935769081116, "logps/rejected": -0.48394450545310974, "loss": 2.6258, "nll_loss": 0.6408182382583618, "rewards/accuracies": 1.0, "rewards/chosen": -0.0038775932043790817, "rewards/margins": 0.04451685771346092, "rewards/rejected": -0.04839445278048515, "step": 1344 }, { "epoch": 0.9301521438450899, "grad_norm": 3.6404597759246826, "learning_rate": 4.65076071922545e-05, "log_odds_chosen": 2.5649046897888184, "log_odds_ratio": -0.29447752237319946, "logits/chosen": -0.5716572403907776, "logits/rejected": -0.6139044761657715, "logps/chosen": -0.07814640551805496, "logps/rejected": -0.4611126184463501, "loss": 2.5634, "nll_loss": 0.6114119291305542, "rewards/accuracies": 0.75, "rewards/chosen": -0.007814640179276466, "rewards/margins": 0.038296617567539215, "rewards/rejected": -0.04611125960946083, "step": 1345 }, { "epoch": 0.9308437067773168, "grad_norm": 6.4514384269714355, "learning_rate": 4.654218533886584e-05, "log_odds_chosen": 1.3883261680603027, "log_odds_ratio": -0.6939107179641724, "logits/chosen": -0.6356791257858276, "logits/rejected": -0.6466587781906128, "logps/chosen": -0.1649370789527893, "logps/rejected": -0.4422614574432373, "loss": 5.3489, "nll_loss": 1.2678287029266357, "rewards/accuracies": 0.75, "rewards/chosen": -0.01649370789527893, "rewards/margins": 0.027732431888580322, "rewards/rejected": -0.04422614350914955, "step": 1346 }, { "epoch": 0.9315352697095436, "grad_norm": 5.485098361968994, "learning_rate": 4.657676348547718e-05, "log_odds_chosen": 3.7240209579467773, "log_odds_ratio": -0.5168907642364502, "logits/chosen": -0.23317617177963257, "logits/rejected": -0.2783960700035095, "logps/chosen": -0.11563809216022491, "logps/rejected": -0.6522781848907471, "loss": 3.8546, "nll_loss": 0.9119526147842407, "rewards/accuracies": 0.625, "rewards/chosen": -0.011563808657228947, "rewards/margins": 0.05366401746869087, "rewards/rejected": -0.06522782146930695, "step": 1347 }, { "epoch": 0.9322268326417704, "grad_norm": 3.336510181427002, "learning_rate": 4.661134163208852e-05, "log_odds_chosen": 1.6070709228515625, "log_odds_ratio": -0.3758274018764496, "logits/chosen": -0.7622817158699036, "logits/rejected": -0.7541989684104919, "logps/chosen": -0.13546738028526306, "logps/rejected": -0.6061623096466064, "loss": 2.603, "nll_loss": 0.6131584644317627, "rewards/accuracies": 0.625, "rewards/chosen": -0.013546737842261791, "rewards/margins": 0.0470694974064827, "rewards/rejected": -0.060616232454776764, "step": 1348 }, { "epoch": 0.9329183955739973, "grad_norm": 5.757965564727783, "learning_rate": 4.6645919778699864e-05, "log_odds_chosen": 3.743828058242798, "log_odds_ratio": -0.37967419624328613, "logits/chosen": -0.5838915705680847, "logits/rejected": -0.5985926985740662, "logps/chosen": -0.14416922628879547, "logps/rejected": -0.8762768507003784, "loss": 3.4319, "nll_loss": 0.820005476474762, "rewards/accuracies": 0.75, "rewards/chosen": -0.014416922815144062, "rewards/margins": 0.07321076840162277, "rewards/rejected": -0.08762768656015396, "step": 1349 }, { "epoch": 0.9336099585062241, "grad_norm": 5.270911693572998, "learning_rate": 4.6680497925311206e-05, "log_odds_chosen": 2.170856237411499, "log_odds_ratio": -0.519264280796051, "logits/chosen": -0.4287693500518799, "logits/rejected": -0.49412840604782104, "logps/chosen": -0.13331323862075806, "logps/rejected": -0.5560269951820374, "loss": 3.0761, "nll_loss": 0.7170995473861694, "rewards/accuracies": 0.75, "rewards/chosen": -0.013331323862075806, "rewards/margins": 0.04227137565612793, "rewards/rejected": -0.055602699518203735, "step": 1350 }, { "epoch": 0.9343015214384509, "grad_norm": 5.0149126052856445, "learning_rate": 4.671507607192255e-05, "log_odds_chosen": 4.14609956741333, "log_odds_ratio": -0.19730296730995178, "logits/chosen": -0.40930646657943726, "logits/rejected": -0.42448240518569946, "logps/chosen": -0.08510898798704147, "logps/rejected": -0.7383308410644531, "loss": 3.784, "nll_loss": 0.926262617111206, "rewards/accuracies": 0.875, "rewards/chosen": -0.008510898798704147, "rewards/margins": 0.06532219052314758, "rewards/rejected": -0.07383308559656143, "step": 1351 }, { "epoch": 0.9349930843706777, "grad_norm": 6.72976016998291, "learning_rate": 4.674965421853389e-05, "log_odds_chosen": 1.5673390626907349, "log_odds_ratio": -0.6566497683525085, "logits/chosen": -0.3811492919921875, "logits/rejected": -0.3969017565250397, "logps/chosen": -0.14099657535552979, "logps/rejected": -0.5028352737426758, "loss": 4.0345, "nll_loss": 0.9429715871810913, "rewards/accuracies": 0.625, "rewards/chosen": -0.014099658466875553, "rewards/margins": 0.03618386387825012, "rewards/rejected": -0.0502835214138031, "step": 1352 }, { "epoch": 0.9356846473029046, "grad_norm": 4.536650657653809, "learning_rate": 4.678423236514523e-05, "log_odds_chosen": 1.6238508224487305, "log_odds_ratio": -0.408079594373703, "logits/chosen": -0.493243932723999, "logits/rejected": -0.49688708782196045, "logps/chosen": -0.11562632769346237, "logps/rejected": -0.42109858989715576, "loss": 2.8957, "nll_loss": 0.6831123232841492, "rewards/accuracies": 0.875, "rewards/chosen": -0.011562633328139782, "rewards/margins": 0.03054722398519516, "rewards/rejected": -0.04210985451936722, "step": 1353 }, { "epoch": 0.9363762102351314, "grad_norm": 4.0943474769592285, "learning_rate": 4.681881051175657e-05, "log_odds_chosen": 1.6092420816421509, "log_odds_ratio": -0.41765856742858887, "logits/chosen": -0.2855399250984192, "logits/rejected": -0.2806923985481262, "logps/chosen": -0.1413896083831787, "logps/rejected": -0.35888469219207764, "loss": 3.0766, "nll_loss": 0.7273715734481812, "rewards/accuracies": 0.75, "rewards/chosen": -0.014138958416879177, "rewards/margins": 0.02174951136112213, "rewards/rejected": -0.03588847070932388, "step": 1354 }, { "epoch": 0.9370677731673582, "grad_norm": 6.500419616699219, "learning_rate": 4.685338865836791e-05, "log_odds_chosen": 2.8715126514434814, "log_odds_ratio": -0.16753099858760834, "logits/chosen": -0.4005013704299927, "logits/rejected": -0.3878091871738434, "logps/chosen": -0.0721023827791214, "logps/rejected": -0.5393887162208557, "loss": 5.1474, "nll_loss": 1.2700891494750977, "rewards/accuracies": 1.0, "rewards/chosen": -0.007210238371044397, "rewards/margins": 0.04672863706946373, "rewards/rejected": -0.05393887311220169, "step": 1355 }, { "epoch": 0.9377593360995851, "grad_norm": 4.416325092315674, "learning_rate": 4.6887966804979255e-05, "log_odds_chosen": 2.915926218032837, "log_odds_ratio": -0.13118229806423187, "logits/chosen": -0.904473066329956, "logits/rejected": -0.9156371355056763, "logps/chosen": -0.060720786452293396, "logps/rejected": -0.7894104719161987, "loss": 3.4559, "nll_loss": 0.8508535623550415, "rewards/accuracies": 1.0, "rewards/chosen": -0.0060720788314938545, "rewards/margins": 0.07286897301673889, "rewards/rejected": -0.07894104719161987, "step": 1356 }, { "epoch": 0.9384508990318119, "grad_norm": 4.253722667694092, "learning_rate": 4.6922544951590596e-05, "log_odds_chosen": 2.4106616973876953, "log_odds_ratio": -0.37549322843551636, "logits/chosen": -0.8365063071250916, "logits/rejected": -0.8938258290290833, "logps/chosen": -0.09556761384010315, "logps/rejected": -0.48990726470947266, "loss": 4.3275, "nll_loss": 1.044328212738037, "rewards/accuracies": 0.875, "rewards/chosen": -0.009556761011481285, "rewards/margins": 0.03943396359682083, "rewards/rejected": -0.048990726470947266, "step": 1357 }, { "epoch": 0.9391424619640387, "grad_norm": 4.91149377822876, "learning_rate": 4.695712309820194e-05, "log_odds_chosen": 4.468101501464844, "log_odds_ratio": -0.18852251768112183, "logits/chosen": -0.057901933789253235, "logits/rejected": -0.11767878383398056, "logps/chosen": -0.031196830794215202, "logps/rejected": -0.6462098360061646, "loss": 3.5735, "nll_loss": 0.8745160102844238, "rewards/accuracies": 1.0, "rewards/chosen": -0.003119683125987649, "rewards/margins": 0.06150130555033684, "rewards/rejected": -0.0646209865808487, "step": 1358 }, { "epoch": 0.9398340248962656, "grad_norm": 4.495655059814453, "learning_rate": 4.699170124481328e-05, "log_odds_chosen": 2.976381301879883, "log_odds_ratio": -0.24520474672317505, "logits/chosen": -0.6155415773391724, "logits/rejected": -0.5602388381958008, "logps/chosen": -0.08388067781925201, "logps/rejected": -0.5136565566062927, "loss": 4.3834, "nll_loss": 1.0713173151016235, "rewards/accuracies": 0.875, "rewards/chosen": -0.008388067595660686, "rewards/margins": 0.04297758638858795, "rewards/rejected": -0.05136565491557121, "step": 1359 }, { "epoch": 0.9405255878284924, "grad_norm": 7.075529098510742, "learning_rate": 4.702627939142462e-05, "log_odds_chosen": 1.5126800537109375, "log_odds_ratio": -0.40790507197380066, "logits/chosen": -0.5208859443664551, "logits/rejected": -0.5781892538070679, "logps/chosen": -0.1379278004169464, "logps/rejected": -0.4327913820743561, "loss": 5.0908, "nll_loss": 1.2318994998931885, "rewards/accuracies": 0.875, "rewards/chosen": -0.01379278115928173, "rewards/margins": 0.029486358165740967, "rewards/rejected": -0.04327914118766785, "step": 1360 }, { "epoch": 0.9412171507607192, "grad_norm": 3.1823527812957764, "learning_rate": 4.706085753803596e-05, "log_odds_chosen": 2.7076611518859863, "log_odds_ratio": -0.2057853639125824, "logits/chosen": -0.2056216150522232, "logits/rejected": -0.2584799528121948, "logps/chosen": -0.06254203617572784, "logps/rejected": -0.3413214087486267, "loss": 2.9938, "nll_loss": 0.7278686761856079, "rewards/accuracies": 1.0, "rewards/chosen": -0.006254204083234072, "rewards/margins": 0.027877936139702797, "rewards/rejected": -0.03413214161992073, "step": 1361 }, { "epoch": 0.941908713692946, "grad_norm": 5.588365077972412, "learning_rate": 4.7095435684647304e-05, "log_odds_chosen": 2.5802206993103027, "log_odds_ratio": -0.31722715497016907, "logits/chosen": -0.3173674941062927, "logits/rejected": -0.4348156154155731, "logps/chosen": -0.09981952607631683, "logps/rejected": -0.6218388080596924, "loss": 4.6298, "nll_loss": 1.1257364749908447, "rewards/accuracies": 0.75, "rewards/chosen": -0.009981952607631683, "rewards/margins": 0.052201926708221436, "rewards/rejected": -0.06218387186527252, "step": 1362 }, { "epoch": 0.9426002766251729, "grad_norm": 6.2187299728393555, "learning_rate": 4.7130013831258646e-05, "log_odds_chosen": 3.495363473892212, "log_odds_ratio": -0.3896557092666626, "logits/chosen": -0.9022430181503296, "logits/rejected": -0.902992308139801, "logps/chosen": -0.08296022564172745, "logps/rejected": -0.7100075483322144, "loss": 5.2592, "nll_loss": 1.275832176208496, "rewards/accuracies": 0.875, "rewards/chosen": -0.008296022191643715, "rewards/margins": 0.06270474195480347, "rewards/rejected": -0.07100075483322144, "step": 1363 }, { "epoch": 0.9432918395573997, "grad_norm": 4.568310260772705, "learning_rate": 4.716459197786999e-05, "log_odds_chosen": 3.555234432220459, "log_odds_ratio": -0.4674950838088989, "logits/chosen": -0.5476137399673462, "logits/rejected": -0.5729899406433105, "logps/chosen": -0.11827027052640915, "logps/rejected": -0.7366992235183716, "loss": 2.5127, "nll_loss": 0.5814200043678284, "rewards/accuracies": 0.75, "rewards/chosen": -0.011827027425169945, "rewards/margins": 0.061842888593673706, "rewards/rejected": -0.0736699178814888, "step": 1364 }, { "epoch": 0.9439834024896265, "grad_norm": 3.968846321105957, "learning_rate": 4.719917012448133e-05, "log_odds_chosen": 4.848783493041992, "log_odds_ratio": -0.16408772766590118, "logits/chosen": -0.6661707162857056, "logits/rejected": -0.7089472413063049, "logps/chosen": -0.07180530577898026, "logps/rejected": -0.8554694652557373, "loss": 3.0071, "nll_loss": 0.7353719472885132, "rewards/accuracies": 1.0, "rewards/chosen": -0.007180530112236738, "rewards/margins": 0.07836642116308212, "rewards/rejected": -0.08554694801568985, "step": 1365 }, { "epoch": 0.9446749654218534, "grad_norm": 5.436463832855225, "learning_rate": 4.723374827109267e-05, "log_odds_chosen": 1.8622217178344727, "log_odds_ratio": -0.3799074590206146, "logits/chosen": -0.7138546109199524, "logits/rejected": -0.7268157005310059, "logps/chosen": -0.11516305804252625, "logps/rejected": -0.5172901153564453, "loss": 4.521, "nll_loss": 1.0922627449035645, "rewards/accuracies": 0.75, "rewards/chosen": -0.01151630561798811, "rewards/margins": 0.04021270573139191, "rewards/rejected": -0.05172900855541229, "step": 1366 }, { "epoch": 0.9453665283540802, "grad_norm": 4.4956955909729, "learning_rate": 4.726832641770401e-05, "log_odds_chosen": 1.9010629653930664, "log_odds_ratio": -0.56339430809021, "logits/chosen": -0.1682775616645813, "logits/rejected": -0.1694241464138031, "logps/chosen": -0.1430377960205078, "logps/rejected": -0.5368797183036804, "loss": 3.296, "nll_loss": 0.7676615715026855, "rewards/accuracies": 0.625, "rewards/chosen": -0.014303779229521751, "rewards/margins": 0.03938418999314308, "rewards/rejected": -0.05368797108530998, "step": 1367 }, { "epoch": 0.946058091286307, "grad_norm": 5.558333873748779, "learning_rate": 4.7302904564315354e-05, "log_odds_chosen": 3.761000871658325, "log_odds_ratio": -0.17737360298633575, "logits/chosen": -0.46184465289115906, "logits/rejected": -0.4802972674369812, "logps/chosen": -0.08399471640586853, "logps/rejected": -1.0271053314208984, "loss": 4.8689, "nll_loss": 1.1994938850402832, "rewards/accuracies": 0.875, "rewards/chosen": -0.008399471640586853, "rewards/margins": 0.09431107342243195, "rewards/rejected": -0.1027105450630188, "step": 1368 }, { "epoch": 0.9467496542185339, "grad_norm": 4.409332275390625, "learning_rate": 4.7337482710926695e-05, "log_odds_chosen": 2.9365220069885254, "log_odds_ratio": -0.38205763697624207, "logits/chosen": -0.6523790955543518, "logits/rejected": -0.6939494609832764, "logps/chosen": -0.15215745568275452, "logps/rejected": -0.7071554660797119, "loss": 3.9908, "nll_loss": 0.9594962000846863, "rewards/accuracies": 0.75, "rewards/chosen": -0.015215746127068996, "rewards/margins": 0.05549979954957962, "rewards/rejected": -0.07071554660797119, "step": 1369 }, { "epoch": 0.9474412171507607, "grad_norm": 2.9507594108581543, "learning_rate": 4.737206085753804e-05, "log_odds_chosen": 4.197795867919922, "log_odds_ratio": -0.3220274746417999, "logits/chosen": -0.5537489652633667, "logits/rejected": -0.5800063610076904, "logps/chosen": -0.09319345653057098, "logps/rejected": -0.5641083121299744, "loss": 2.8204, "nll_loss": 0.6729087233543396, "rewards/accuracies": 0.875, "rewards/chosen": -0.009319345466792583, "rewards/margins": 0.04709148779511452, "rewards/rejected": -0.056410834193229675, "step": 1370 }, { "epoch": 0.9481327800829875, "grad_norm": 4.980175971984863, "learning_rate": 4.740663900414938e-05, "log_odds_chosen": 5.336559295654297, "log_odds_ratio": -0.10088712722063065, "logits/chosen": -0.3527514636516571, "logits/rejected": -0.37984007596969604, "logps/chosen": -0.05419841408729553, "logps/rejected": -0.985955536365509, "loss": 3.0996, "nll_loss": 0.7648058533668518, "rewards/accuracies": 1.0, "rewards/chosen": -0.005419841967523098, "rewards/margins": 0.09317570924758911, "rewards/rejected": -0.09859554469585419, "step": 1371 }, { "epoch": 0.9488243430152143, "grad_norm": 6.840479373931885, "learning_rate": 4.744121715076072e-05, "log_odds_chosen": 1.5749034881591797, "log_odds_ratio": -0.4523867964744568, "logits/chosen": -0.6053675413131714, "logits/rejected": -0.621617317199707, "logps/chosen": -0.12378253042697906, "logps/rejected": -0.3877543807029724, "loss": 4.6367, "nll_loss": 1.1139403581619263, "rewards/accuracies": 0.75, "rewards/chosen": -0.012378253042697906, "rewards/margins": 0.026397183537483215, "rewards/rejected": -0.03877543285489082, "step": 1372 }, { "epoch": 0.9495159059474412, "grad_norm": 6.94858455657959, "learning_rate": 4.747579529737206e-05, "log_odds_chosen": 3.1703052520751953, "log_odds_ratio": -0.5808858871459961, "logits/chosen": -0.8809736967086792, "logits/rejected": -0.8974775075912476, "logps/chosen": -0.10179749131202698, "logps/rejected": -0.544264554977417, "loss": 4.1248, "nll_loss": 0.9731166362762451, "rewards/accuracies": 0.625, "rewards/chosen": -0.010179748758673668, "rewards/margins": 0.04424671083688736, "rewards/rejected": -0.05442645773291588, "step": 1373 }, { "epoch": 0.950207468879668, "grad_norm": 6.224476337432861, "learning_rate": 4.75103734439834e-05, "log_odds_chosen": 2.24906587600708, "log_odds_ratio": -0.36297982931137085, "logits/chosen": -0.5064922571182251, "logits/rejected": -0.5028213262557983, "logps/chosen": -0.14730899035930634, "logps/rejected": -0.5468143820762634, "loss": 4.5969, "nll_loss": 1.112921953201294, "rewards/accuracies": 0.875, "rewards/chosen": -0.014730898663401604, "rewards/margins": 0.03995054215192795, "rewards/rejected": -0.0546814389526844, "step": 1374 }, { "epoch": 0.9508990318118948, "grad_norm": 5.267735958099365, "learning_rate": 4.7544951590594745e-05, "log_odds_chosen": 0.8886209726333618, "log_odds_ratio": -0.6777921319007874, "logits/chosen": -0.5937820076942444, "logits/rejected": -0.5795533061027527, "logps/chosen": -0.16547569632530212, "logps/rejected": -0.3152911067008972, "loss": 4.4504, "nll_loss": 1.0448328256607056, "rewards/accuracies": 0.375, "rewards/chosen": -0.016547568142414093, "rewards/margins": 0.014981540851294994, "rewards/rejected": -0.03152911365032196, "step": 1375 }, { "epoch": 0.9515905947441217, "grad_norm": 4.287798881530762, "learning_rate": 4.7579529737206086e-05, "log_odds_chosen": 1.8869426250457764, "log_odds_ratio": -0.5077506303787231, "logits/chosen": -0.6354760527610779, "logits/rejected": -0.6710132360458374, "logps/chosen": -0.16248366236686707, "logps/rejected": -0.6342530250549316, "loss": 3.3212, "nll_loss": 0.7795161008834839, "rewards/accuracies": 0.625, "rewards/chosen": -0.016248365864157677, "rewards/margins": 0.04717693477869034, "rewards/rejected": -0.06342530250549316, "step": 1376 }, { "epoch": 0.9522821576763485, "grad_norm": 4.387780666351318, "learning_rate": 4.761410788381743e-05, "log_odds_chosen": 3.9893524646759033, "log_odds_ratio": -0.2768661379814148, "logits/chosen": -0.9158992767333984, "logits/rejected": -0.9356086850166321, "logps/chosen": -0.08836707472801208, "logps/rejected": -0.5554932951927185, "loss": 4.7146, "nll_loss": 1.1509664058685303, "rewards/accuracies": 0.875, "rewards/chosen": -0.008836708031594753, "rewards/margins": 0.04671262204647064, "rewards/rejected": -0.05554932728409767, "step": 1377 }, { "epoch": 0.9529737206085753, "grad_norm": 4.432811737060547, "learning_rate": 4.764868603042877e-05, "log_odds_chosen": 2.32497501373291, "log_odds_ratio": -0.3427910804748535, "logits/chosen": -0.5060633420944214, "logits/rejected": -0.5108827352523804, "logps/chosen": -0.11491291224956512, "logps/rejected": -0.4623175859451294, "loss": 3.8105, "nll_loss": 0.9183489084243774, "rewards/accuracies": 0.75, "rewards/chosen": -0.011491292156279087, "rewards/margins": 0.03474046662449837, "rewards/rejected": -0.04623176157474518, "step": 1378 }, { "epoch": 0.9536652835408023, "grad_norm": 5.681545257568359, "learning_rate": 4.768326417704012e-05, "log_odds_chosen": 2.833235740661621, "log_odds_ratio": -0.5704039335250854, "logits/chosen": -0.6774800419807434, "logits/rejected": -0.749790370464325, "logps/chosen": -0.10722468048334122, "logps/rejected": -0.47469180822372437, "loss": 4.0847, "nll_loss": 0.9641448259353638, "rewards/accuracies": 0.625, "rewards/chosen": -0.010722469538450241, "rewards/margins": 0.036746710538864136, "rewards/rejected": -0.047469183802604675, "step": 1379 }, { "epoch": 0.9543568464730291, "grad_norm": 5.541007995605469, "learning_rate": 4.771784232365146e-05, "log_odds_chosen": 3.154003858566284, "log_odds_ratio": -0.16309258341789246, "logits/chosen": -0.40195342898368835, "logits/rejected": -0.4969612956047058, "logps/chosen": -0.18587008118629456, "logps/rejected": -0.8497620224952698, "loss": 4.5835, "nll_loss": 1.1295685768127441, "rewards/accuracies": 1.0, "rewards/chosen": -0.018587008118629456, "rewards/margins": 0.06638918817043304, "rewards/rejected": -0.0849761962890625, "step": 1380 }, { "epoch": 0.9550484094052559, "grad_norm": 4.687609672546387, "learning_rate": 4.77524204702628e-05, "log_odds_chosen": 1.5811307430267334, "log_odds_ratio": -0.7333714365959167, "logits/chosen": -0.6358388662338257, "logits/rejected": -0.6384164690971375, "logps/chosen": -0.21824342012405396, "logps/rejected": -0.5271948575973511, "loss": 3.788, "nll_loss": 0.8736591339111328, "rewards/accuracies": 0.5, "rewards/chosen": -0.021824343129992485, "rewards/margins": 0.030895143747329712, "rewards/rejected": -0.052719488739967346, "step": 1381 }, { "epoch": 0.9557399723374828, "grad_norm": 5.607902526855469, "learning_rate": 4.778699861687414e-05, "log_odds_chosen": 1.194372534751892, "log_odds_ratio": -0.5874398946762085, "logits/chosen": -0.7090533971786499, "logits/rejected": -0.732360303401947, "logps/chosen": -0.26333528757095337, "logps/rejected": -0.5452054142951965, "loss": 4.5204, "nll_loss": 1.0713554620742798, "rewards/accuracies": 0.625, "rewards/chosen": -0.026333527639508247, "rewards/margins": 0.028187017887830734, "rewards/rejected": -0.05452054366469383, "step": 1382 }, { "epoch": 0.9564315352697096, "grad_norm": 4.289752960205078, "learning_rate": 4.7821576763485484e-05, "log_odds_chosen": 3.3134241104125977, "log_odds_ratio": -0.19969701766967773, "logits/chosen": -0.6987218260765076, "logits/rejected": -0.7583480477333069, "logps/chosen": -0.06717553734779358, "logps/rejected": -0.5206062197685242, "loss": 3.9022, "nll_loss": 0.9555697441101074, "rewards/accuracies": 1.0, "rewards/chosen": -0.006717554293572903, "rewards/margins": 0.045343067497015, "rewards/rejected": -0.052060626447200775, "step": 1383 }, { "epoch": 0.9571230982019364, "grad_norm": 3.9954605102539062, "learning_rate": 4.7856154910096825e-05, "log_odds_chosen": 2.87797474861145, "log_odds_ratio": -0.31735920906066895, "logits/chosen": -0.41059356927871704, "logits/rejected": -0.42806077003479004, "logps/chosen": -0.1289018988609314, "logps/rejected": -0.5481069684028625, "loss": 3.6808, "nll_loss": 0.8884710669517517, "rewards/accuracies": 0.875, "rewards/chosen": -0.012890191748738289, "rewards/margins": 0.041920505464076996, "rewards/rejected": -0.054810699075460434, "step": 1384 }, { "epoch": 0.9578146611341632, "grad_norm": 2.9284474849700928, "learning_rate": 4.789073305670817e-05, "log_odds_chosen": 1.96919584274292, "log_odds_ratio": -0.22860193252563477, "logits/chosen": -0.6298066973686218, "logits/rejected": -0.6608700752258301, "logps/chosen": -0.10201866924762726, "logps/rejected": -0.5613384246826172, "loss": 2.7856, "nll_loss": 0.6735316514968872, "rewards/accuracies": 1.0, "rewards/chosen": -0.010201867669820786, "rewards/margins": 0.04593197628855705, "rewards/rejected": -0.05613384395837784, "step": 1385 }, { "epoch": 0.9585062240663901, "grad_norm": 4.66770076751709, "learning_rate": 4.792531120331951e-05, "log_odds_chosen": 2.5473694801330566, "log_odds_ratio": -0.3094061017036438, "logits/chosen": -0.564201831817627, "logits/rejected": -0.5761914253234863, "logps/chosen": -0.11359382420778275, "logps/rejected": -0.4843006730079651, "loss": 4.2746, "nll_loss": 1.037712812423706, "rewards/accuracies": 1.0, "rewards/chosen": -0.011359382420778275, "rewards/margins": 0.03707068786025047, "rewards/rejected": -0.04843007028102875, "step": 1386 }, { "epoch": 0.9591977869986169, "grad_norm": 3.5193567276000977, "learning_rate": 4.795988934993085e-05, "log_odds_chosen": 1.3455833196640015, "log_odds_ratio": -0.5004943609237671, "logits/chosen": -0.5322783589363098, "logits/rejected": -0.5484592914581299, "logps/chosen": -0.12864184379577637, "logps/rejected": -0.31443101167678833, "loss": 3.4441, "nll_loss": 0.8109761476516724, "rewards/accuracies": 0.75, "rewards/chosen": -0.012864183634519577, "rewards/margins": 0.018578914925456047, "rewards/rejected": -0.03144310042262077, "step": 1387 }, { "epoch": 0.9598893499308437, "grad_norm": 4.228249549865723, "learning_rate": 4.799446749654219e-05, "log_odds_chosen": 2.267373561859131, "log_odds_ratio": -0.4458346962928772, "logits/chosen": -0.5130159258842468, "logits/rejected": -0.5397688150405884, "logps/chosen": -0.1408432275056839, "logps/rejected": -0.542976975440979, "loss": 2.7079, "nll_loss": 0.632402777671814, "rewards/accuracies": 0.75, "rewards/chosen": -0.01408432237803936, "rewards/margins": 0.04021337628364563, "rewards/rejected": -0.05429770052433014, "step": 1388 }, { "epoch": 0.9605809128630706, "grad_norm": 5.8462677001953125, "learning_rate": 4.802904564315353e-05, "log_odds_chosen": 2.109428882598877, "log_odds_ratio": -0.608425498008728, "logits/chosen": -0.33053314685821533, "logits/rejected": -0.3880746364593506, "logps/chosen": -0.1225154846906662, "logps/rejected": -0.3235834836959839, "loss": 3.6282, "nll_loss": 0.8462128639221191, "rewards/accuracies": 0.625, "rewards/chosen": -0.01225154846906662, "rewards/margins": 0.02010679990053177, "rewards/rejected": -0.03235834836959839, "step": 1389 }, { "epoch": 0.9612724757952974, "grad_norm": 6.464356422424316, "learning_rate": 4.8063623789764875e-05, "log_odds_chosen": 1.8575029373168945, "log_odds_ratio": -0.40052133798599243, "logits/chosen": -0.4968864321708679, "logits/rejected": -0.5065348744392395, "logps/chosen": -0.08285154402256012, "logps/rejected": -0.35983797907829285, "loss": 4.0866, "nll_loss": 0.9815993905067444, "rewards/accuracies": 0.875, "rewards/chosen": -0.008285155519843102, "rewards/margins": 0.027698643505573273, "rewards/rejected": -0.03598380088806152, "step": 1390 }, { "epoch": 0.9619640387275242, "grad_norm": 4.375123500823975, "learning_rate": 4.8098201936376216e-05, "log_odds_chosen": 2.18118953704834, "log_odds_ratio": -0.489065557718277, "logits/chosen": -0.7576559782028198, "logits/rejected": -0.7377809882164001, "logps/chosen": -0.12921804189682007, "logps/rejected": -0.48704081773757935, "loss": 3.0138, "nll_loss": 0.7045431137084961, "rewards/accuracies": 0.75, "rewards/chosen": -0.012921803630888462, "rewards/margins": 0.03578227758407593, "rewards/rejected": -0.048704084008932114, "step": 1391 }, { "epoch": 0.9626556016597511, "grad_norm": 6.688558578491211, "learning_rate": 4.813278008298756e-05, "log_odds_chosen": 3.997199535369873, "log_odds_ratio": -0.18422558903694153, "logits/chosen": -0.009374737739562988, "logits/rejected": -0.01648634299635887, "logps/chosen": -0.06665275990962982, "logps/rejected": -0.8346791863441467, "loss": 3.2844, "nll_loss": 0.8026823997497559, "rewards/accuracies": 0.875, "rewards/chosen": -0.006665276363492012, "rewards/margins": 0.07680264115333557, "rewards/rejected": -0.08346791565418243, "step": 1392 }, { "epoch": 0.9633471645919779, "grad_norm": 6.806331157684326, "learning_rate": 4.81673582295989e-05, "log_odds_chosen": 2.9461169242858887, "log_odds_ratio": -0.3872566819190979, "logits/chosen": -0.506131649017334, "logits/rejected": -0.595481276512146, "logps/chosen": -0.10255613178014755, "logps/rejected": -0.7516794204711914, "loss": 4.0962, "nll_loss": 0.985315203666687, "rewards/accuracies": 0.875, "rewards/chosen": -0.010255612432956696, "rewards/margins": 0.0649123340845108, "rewards/rejected": -0.0751679390668869, "step": 1393 }, { "epoch": 0.9640387275242047, "grad_norm": 5.865633487701416, "learning_rate": 4.820193637621024e-05, "log_odds_chosen": 2.411072254180908, "log_odds_ratio": -0.27777040004730225, "logits/chosen": -0.9139069318771362, "logits/rejected": -0.9897363781929016, "logps/chosen": -0.09145885705947876, "logps/rejected": -0.3880517780780792, "loss": 5.0715, "nll_loss": 1.2401103973388672, "rewards/accuracies": 1.0, "rewards/chosen": -0.009145885705947876, "rewards/margins": 0.029659289866685867, "rewards/rejected": -0.03880517557263374, "step": 1394 }, { "epoch": 0.9647302904564315, "grad_norm": 6.374701499938965, "learning_rate": 4.823651452282158e-05, "log_odds_chosen": 2.4968247413635254, "log_odds_ratio": -0.248407244682312, "logits/chosen": -0.1413920819759369, "logits/rejected": -0.19391274452209473, "logps/chosen": -0.06600844860076904, "logps/rejected": -0.3738167881965637, "loss": 4.7476, "nll_loss": 1.1620688438415527, "rewards/accuracies": 1.0, "rewards/chosen": -0.006600845605134964, "rewards/margins": 0.030780835077166557, "rewards/rejected": -0.03738168254494667, "step": 1395 }, { "epoch": 0.9654218533886584, "grad_norm": 4.07810640335083, "learning_rate": 4.8271092669432924e-05, "log_odds_chosen": 3.7929351329803467, "log_odds_ratio": -0.1519448459148407, "logits/chosen": -0.19091452658176422, "logits/rejected": -0.24754397571086884, "logps/chosen": -0.044879138469696045, "logps/rejected": -0.5073044896125793, "loss": 2.6384, "nll_loss": 0.644395649433136, "rewards/accuracies": 1.0, "rewards/chosen": -0.004487914033234119, "rewards/margins": 0.04624253511428833, "rewards/rejected": -0.05073045194149017, "step": 1396 }, { "epoch": 0.9661134163208852, "grad_norm": 3.521620512008667, "learning_rate": 4.8305670816044266e-05, "log_odds_chosen": 3.7254812717437744, "log_odds_ratio": -0.23378178477287292, "logits/chosen": -0.027941592037677765, "logits/rejected": -0.03508976846933365, "logps/chosen": -0.07573119550943375, "logps/rejected": -0.5350953340530396, "loss": 3.7968, "nll_loss": 0.9258334636688232, "rewards/accuracies": 0.875, "rewards/chosen": -0.00757311936467886, "rewards/margins": 0.04593641683459282, "rewards/rejected": -0.053509537130594254, "step": 1397 }, { "epoch": 0.966804979253112, "grad_norm": 5.105077743530273, "learning_rate": 4.834024896265561e-05, "log_odds_chosen": 3.2875313758850098, "log_odds_ratio": -0.2844654321670532, "logits/chosen": -0.19148701429367065, "logits/rejected": -0.2091885507106781, "logps/chosen": -0.09642978012561798, "logps/rejected": -0.5320587754249573, "loss": 3.3054, "nll_loss": 0.7979139089584351, "rewards/accuracies": 0.875, "rewards/chosen": -0.009642978198826313, "rewards/margins": 0.04356290400028229, "rewards/rejected": -0.053205881267786026, "step": 1398 }, { "epoch": 0.9674965421853389, "grad_norm": 4.500959396362305, "learning_rate": 4.837482710926695e-05, "log_odds_chosen": 2.458662748336792, "log_odds_ratio": -0.47104576230049133, "logits/chosen": -0.5823588967323303, "logits/rejected": -0.585030734539032, "logps/chosen": -0.18328362703323364, "logps/rejected": -0.5744332671165466, "loss": 3.0462, "nll_loss": 0.7144502997398376, "rewards/accuracies": 0.625, "rewards/chosen": -0.018328363075852394, "rewards/margins": 0.039114970713853836, "rewards/rejected": -0.05744332820177078, "step": 1399 }, { "epoch": 0.9681881051175657, "grad_norm": 4.3684401512146, "learning_rate": 4.840940525587829e-05, "log_odds_chosen": 1.7222740650177002, "log_odds_ratio": -0.4732230305671692, "logits/chosen": -0.4307154417037964, "logits/rejected": -0.4691559970378876, "logps/chosen": -0.18791911005973816, "logps/rejected": -0.42685210704803467, "loss": 3.9824, "nll_loss": 0.9482815265655518, "rewards/accuracies": 0.75, "rewards/chosen": -0.018791913986206055, "rewards/margins": 0.02389329858124256, "rewards/rejected": -0.04268521070480347, "step": 1400 }, { "epoch": 0.9688796680497925, "grad_norm": 6.100607395172119, "learning_rate": 4.844398340248963e-05, "log_odds_chosen": 2.7508108615875244, "log_odds_ratio": -0.2012925148010254, "logits/chosen": -0.5389729142189026, "logits/rejected": -0.5223175883293152, "logps/chosen": -0.10300426185131073, "logps/rejected": -0.6810008883476257, "loss": 4.3147, "nll_loss": 1.058542251586914, "rewards/accuracies": 1.0, "rewards/chosen": -0.010300425812602043, "rewards/margins": 0.05779966339468956, "rewards/rejected": -0.06810008734464645, "step": 1401 }, { "epoch": 0.9695712309820194, "grad_norm": 5.2680768966674805, "learning_rate": 4.8478561549100974e-05, "log_odds_chosen": 3.3759124279022217, "log_odds_ratio": -0.28325340151786804, "logits/chosen": -0.36735105514526367, "logits/rejected": -0.39844614267349243, "logps/chosen": -0.07357937842607498, "logps/rejected": -0.695828914642334, "loss": 3.9927, "nll_loss": 0.9698481559753418, "rewards/accuracies": 1.0, "rewards/chosen": -0.007357938215136528, "rewards/margins": 0.06222495436668396, "rewards/rejected": -0.06958289444446564, "step": 1402 }, { "epoch": 0.9702627939142462, "grad_norm": 6.146441459655762, "learning_rate": 4.8513139695712315e-05, "log_odds_chosen": 2.8977818489074707, "log_odds_ratio": -0.34373605251312256, "logits/chosen": -0.08919993788003922, "logits/rejected": -0.1025874987244606, "logps/chosen": -0.12175912410020828, "logps/rejected": -0.4791489541530609, "loss": 4.2676, "nll_loss": 1.0325186252593994, "rewards/accuracies": 1.0, "rewards/chosen": -0.012175912037491798, "rewards/margins": 0.0357389822602272, "rewards/rejected": -0.04791489243507385, "step": 1403 }, { "epoch": 0.970954356846473, "grad_norm": 5.012028217315674, "learning_rate": 4.854771784232366e-05, "log_odds_chosen": 4.039868354797363, "log_odds_ratio": -0.1392008513212204, "logits/chosen": -0.4420851469039917, "logits/rejected": -0.4822046160697937, "logps/chosen": -0.062176115810871124, "logps/rejected": -0.6904194951057434, "loss": 3.8251, "nll_loss": 0.942352831363678, "rewards/accuracies": 1.0, "rewards/chosen": -0.006217611953616142, "rewards/margins": 0.06282433867454529, "rewards/rejected": -0.06904194504022598, "step": 1404 }, { "epoch": 0.9716459197786999, "grad_norm": 6.326803684234619, "learning_rate": 4.858229598893499e-05, "log_odds_chosen": 5.957394599914551, "log_odds_ratio": -0.08176662027835846, "logits/chosen": -0.33497846126556396, "logits/rejected": -0.35597074031829834, "logps/chosen": -0.040950141847133636, "logps/rejected": -1.1453843116760254, "loss": 4.6783, "nll_loss": 1.1614075899124146, "rewards/accuracies": 1.0, "rewards/chosen": -0.004095014184713364, "rewards/margins": 0.11044342070817947, "rewards/rejected": -0.11453843861818314, "step": 1405 }, { "epoch": 0.9723374827109267, "grad_norm": 4.005640029907227, "learning_rate": 4.861687413554633e-05, "log_odds_chosen": 1.7429275512695312, "log_odds_ratio": -0.5316041111946106, "logits/chosen": -0.5472002029418945, "logits/rejected": -0.5267659425735474, "logps/chosen": -0.10989716649055481, "logps/rejected": -0.5483049154281616, "loss": 3.148, "nll_loss": 0.7338396310806274, "rewards/accuracies": 0.625, "rewards/chosen": -0.010989716276526451, "rewards/margins": 0.04384077712893486, "rewards/rejected": -0.05483049526810646, "step": 1406 }, { "epoch": 0.9730290456431535, "grad_norm": 4.7602219581604, "learning_rate": 4.8651452282157675e-05, "log_odds_chosen": 3.336592674255371, "log_odds_ratio": -0.26619279384613037, "logits/chosen": -0.4598739743232727, "logits/rejected": -0.4864497780799866, "logps/chosen": -0.08146242797374725, "logps/rejected": -0.5847065448760986, "loss": 4.358, "nll_loss": 1.0628809928894043, "rewards/accuracies": 0.75, "rewards/chosen": -0.008146243169903755, "rewards/margins": 0.05032441020011902, "rewards/rejected": -0.058470651507377625, "step": 1407 }, { "epoch": 0.9737206085753803, "grad_norm": 4.0709357261657715, "learning_rate": 4.8686030428769016e-05, "log_odds_chosen": 3.5210342407226562, "log_odds_ratio": -0.22207771241664886, "logits/chosen": -0.4946010708808899, "logits/rejected": -0.5341157913208008, "logps/chosen": -0.08640432357788086, "logps/rejected": -0.6819725036621094, "loss": 2.8891, "nll_loss": 0.7000560760498047, "rewards/accuracies": 0.875, "rewards/chosen": -0.008640431798994541, "rewards/margins": 0.05955682322382927, "rewards/rejected": -0.06819725036621094, "step": 1408 }, { "epoch": 0.9744121715076072, "grad_norm": 4.645059108734131, "learning_rate": 4.872060857538036e-05, "log_odds_chosen": 3.6667566299438477, "log_odds_ratio": -0.19257347285747528, "logits/chosen": -0.4007118344306946, "logits/rejected": -0.4578131139278412, "logps/chosen": -0.09479233622550964, "logps/rejected": -0.5741965770721436, "loss": 3.3919, "nll_loss": 0.8287203907966614, "rewards/accuracies": 0.875, "rewards/chosen": -0.009479233995079994, "rewards/margins": 0.04794042184948921, "rewards/rejected": -0.057419657707214355, "step": 1409 }, { "epoch": 0.975103734439834, "grad_norm": 5.395598888397217, "learning_rate": 4.87551867219917e-05, "log_odds_chosen": 2.5936923027038574, "log_odds_ratio": -0.3879097104072571, "logits/chosen": -0.3431122601032257, "logits/rejected": -0.39480048418045044, "logps/chosen": -0.12003860622644424, "logps/rejected": -0.5975310206413269, "loss": 4.2286, "nll_loss": 1.0183528661727905, "rewards/accuracies": 0.875, "rewards/chosen": -0.012003861367702484, "rewards/margins": 0.047749243676662445, "rewards/rejected": -0.05975310876965523, "step": 1410 }, { "epoch": 0.9757952973720608, "grad_norm": 5.991229057312012, "learning_rate": 4.878976486860304e-05, "log_odds_chosen": 4.690047740936279, "log_odds_ratio": -0.18323415517807007, "logits/chosen": -0.48473188281059265, "logits/rejected": -0.5005260109901428, "logps/chosen": -0.06240301951766014, "logps/rejected": -1.179287075996399, "loss": 3.4513, "nll_loss": 0.844507098197937, "rewards/accuracies": 0.875, "rewards/chosen": -0.0062403022311627865, "rewards/margins": 0.11168840527534485, "rewards/rejected": -0.11792870610952377, "step": 1411 }, { "epoch": 0.9764868603042877, "grad_norm": 4.65676736831665, "learning_rate": 4.882434301521438e-05, "log_odds_chosen": 0.7128917574882507, "log_odds_ratio": -0.48616403341293335, "logits/chosen": -0.7598358392715454, "logits/rejected": -0.7617342472076416, "logps/chosen": -0.09561615437269211, "logps/rejected": -0.18550316989421844, "loss": 3.743, "nll_loss": 0.8871452212333679, "rewards/accuracies": 0.75, "rewards/chosen": -0.009561615064740181, "rewards/margins": 0.008988700807094574, "rewards/rejected": -0.018550317734479904, "step": 1412 }, { "epoch": 0.9771784232365145, "grad_norm": 5.093491077423096, "learning_rate": 4.8858921161825724e-05, "log_odds_chosen": 1.3392752408981323, "log_odds_ratio": -0.48251235485076904, "logits/chosen": -0.477012574672699, "logits/rejected": -0.4597123861312866, "logps/chosen": -0.22213858366012573, "logps/rejected": -0.3907717764377594, "loss": 3.4779, "nll_loss": 0.8212242126464844, "rewards/accuracies": 0.75, "rewards/chosen": -0.022213861346244812, "rewards/margins": 0.016863318160176277, "rewards/rejected": -0.03907717764377594, "step": 1413 }, { "epoch": 0.9778699861687413, "grad_norm": 10.657920837402344, "learning_rate": 4.8893499308437066e-05, "log_odds_chosen": 0.4027661979198456, "log_odds_ratio": -1.558465600013733, "logits/chosen": -0.6197187900543213, "logits/rejected": -0.6360077857971191, "logps/chosen": -0.3210720717906952, "logps/rejected": -0.2586687505245209, "loss": 5.5928, "nll_loss": 1.2423492670059204, "rewards/accuracies": 0.625, "rewards/chosen": -0.03210721164941788, "rewards/margins": -0.0062403371557593346, "rewards/rejected": -0.025866875424981117, "step": 1414 }, { "epoch": 0.9785615491009682, "grad_norm": 6.654989719390869, "learning_rate": 4.892807745504841e-05, "log_odds_chosen": 2.0434505939483643, "log_odds_ratio": -0.43897897005081177, "logits/chosen": -0.6745843887329102, "logits/rejected": -0.6595047116279602, "logps/chosen": -0.11813121289014816, "logps/rejected": -0.3862118721008301, "loss": 4.2663, "nll_loss": 1.0226771831512451, "rewards/accuracies": 0.75, "rewards/chosen": -0.011813120916485786, "rewards/margins": 0.02680806629359722, "rewards/rejected": -0.038621190935373306, "step": 1415 }, { "epoch": 0.979253112033195, "grad_norm": 5.987014293670654, "learning_rate": 4.896265560165975e-05, "log_odds_chosen": 3.5931477546691895, "log_odds_ratio": -0.3616059124469757, "logits/chosen": -0.3304971158504486, "logits/rejected": -0.4050114154815674, "logps/chosen": -0.07955209910869598, "logps/rejected": -0.7297384738922119, "loss": 4.1158, "nll_loss": 0.9927953481674194, "rewards/accuracies": 0.625, "rewards/chosen": -0.007955210283398628, "rewards/margins": 0.06501863896846771, "rewards/rejected": -0.0729738399386406, "step": 1416 }, { "epoch": 0.9799446749654218, "grad_norm": 7.944802761077881, "learning_rate": 4.899723374827109e-05, "log_odds_chosen": 2.194571018218994, "log_odds_ratio": -0.7392339110374451, "logits/chosen": -0.18101269006729126, "logits/rejected": -0.1946060061454773, "logps/chosen": -0.08568310737609863, "logps/rejected": -0.389140784740448, "loss": 3.6976, "nll_loss": 0.8504800796508789, "rewards/accuracies": 0.75, "rewards/chosen": -0.008568311110138893, "rewards/margins": 0.030345769599080086, "rewards/rejected": -0.03891407698392868, "step": 1417 }, { "epoch": 0.9806362378976486, "grad_norm": 5.2776312828063965, "learning_rate": 4.903181189488243e-05, "log_odds_chosen": 3.4539592266082764, "log_odds_ratio": -0.17639483511447906, "logits/chosen": -0.6359360814094543, "logits/rejected": -0.681241512298584, "logps/chosen": -0.07430019974708557, "logps/rejected": -0.6378059387207031, "loss": 3.5222, "nll_loss": 0.8629008531570435, "rewards/accuracies": 1.0, "rewards/chosen": -0.007430019788444042, "rewards/margins": 0.056350577622652054, "rewards/rejected": -0.06378059834241867, "step": 1418 }, { "epoch": 0.9813278008298755, "grad_norm": 7.109945774078369, "learning_rate": 4.9066390041493773e-05, "log_odds_chosen": 3.779050350189209, "log_odds_ratio": -0.6532815098762512, "logits/chosen": -0.6393461227416992, "logits/rejected": -0.6872209310531616, "logps/chosen": -0.11103115975856781, "logps/rejected": -0.819476842880249, "loss": 5.2681, "nll_loss": 1.2516968250274658, "rewards/accuracies": 0.5, "rewards/chosen": -0.011103115975856781, "rewards/margins": 0.07084456831216812, "rewards/rejected": -0.0819476842880249, "step": 1419 }, { "epoch": 0.9820193637621023, "grad_norm": 5.775816440582275, "learning_rate": 4.9100968188105115e-05, "log_odds_chosen": 1.6225054264068604, "log_odds_ratio": -0.5428125858306885, "logits/chosen": -0.7603781223297119, "logits/rejected": -0.7697376608848572, "logps/chosen": -0.14464308321475983, "logps/rejected": -0.5490061640739441, "loss": 4.3808, "nll_loss": 1.0409178733825684, "rewards/accuracies": 0.75, "rewards/chosen": -0.014464308507740498, "rewards/margins": 0.04043630510568619, "rewards/rejected": -0.05490061640739441, "step": 1420 }, { "epoch": 0.9827109266943291, "grad_norm": 5.920970439910889, "learning_rate": 4.9135546334716457e-05, "log_odds_chosen": 4.283205986022949, "log_odds_ratio": -0.17093268036842346, "logits/chosen": -0.7627154588699341, "logits/rejected": -0.8561334013938904, "logps/chosen": -0.04168498143553734, "logps/rejected": -0.7488090991973877, "loss": 4.9513, "nll_loss": 1.2207205295562744, "rewards/accuracies": 0.875, "rewards/chosen": -0.004168498329818249, "rewards/margins": 0.07071240991353989, "rewards/rejected": -0.07488091289997101, "step": 1421 }, { "epoch": 0.983402489626556, "grad_norm": 6.730568885803223, "learning_rate": 4.91701244813278e-05, "log_odds_chosen": 1.5627930164337158, "log_odds_ratio": -0.3418138027191162, "logits/chosen": -0.39804694056510925, "logits/rejected": -0.4181201756000519, "logps/chosen": -0.14453352987766266, "logps/rejected": -0.5822803974151611, "loss": 4.5374, "nll_loss": 1.1001561880111694, "rewards/accuracies": 0.875, "rewards/chosen": -0.014453353360295296, "rewards/margins": 0.04377468675374985, "rewards/rejected": -0.05822804570198059, "step": 1422 }, { "epoch": 0.9840940525587828, "grad_norm": 4.172337532043457, "learning_rate": 4.920470262793914e-05, "log_odds_chosen": 3.565321922302246, "log_odds_ratio": -0.16967037320137024, "logits/chosen": -0.4934771955013275, "logits/rejected": -0.5961635112762451, "logps/chosen": -0.08974127471446991, "logps/rejected": -0.806494951248169, "loss": 2.8905, "nll_loss": 0.7056623697280884, "rewards/accuracies": 0.875, "rewards/chosen": -0.008974128402769566, "rewards/margins": 0.0716753676533699, "rewards/rejected": -0.0806494951248169, "step": 1423 }, { "epoch": 0.9847856154910097, "grad_norm": 3.726513385772705, "learning_rate": 4.923928077455049e-05, "log_odds_chosen": 3.001035213470459, "log_odds_ratio": -0.4421585202217102, "logits/chosen": -0.6121970415115356, "logits/rejected": -0.6204871535301208, "logps/chosen": -0.0803869366645813, "logps/rejected": -0.4731726050376892, "loss": 2.6134, "nll_loss": 0.609136164188385, "rewards/accuracies": 0.625, "rewards/chosen": -0.008038693107664585, "rewards/margins": 0.03927857056260109, "rewards/rejected": -0.0473172664642334, "step": 1424 }, { "epoch": 0.9854771784232366, "grad_norm": 5.099123954772949, "learning_rate": 4.927385892116183e-05, "log_odds_chosen": 2.7881312370300293, "log_odds_ratio": -0.2055206596851349, "logits/chosen": -0.5875111818313599, "logits/rejected": -0.7261995077133179, "logps/chosen": -0.06383083760738373, "logps/rejected": -0.47276073694229126, "loss": 3.7443, "nll_loss": 0.9155149459838867, "rewards/accuracies": 0.875, "rewards/chosen": -0.006383083760738373, "rewards/margins": 0.040892988443374634, "rewards/rejected": -0.04727607220411301, "step": 1425 }, { "epoch": 0.9861687413554634, "grad_norm": 5.290557384490967, "learning_rate": 4.930843706777317e-05, "log_odds_chosen": 3.4888155460357666, "log_odds_ratio": -0.18133285641670227, "logits/chosen": -0.3766958713531494, "logits/rejected": -0.439983606338501, "logps/chosen": -0.08132694661617279, "logps/rejected": -0.933536171913147, "loss": 3.4533, "nll_loss": 0.8451938629150391, "rewards/accuracies": 1.0, "rewards/chosen": -0.008132695220410824, "rewards/margins": 0.08522091805934906, "rewards/rejected": -0.09335361421108246, "step": 1426 }, { "epoch": 0.9868603042876902, "grad_norm": 6.354071140289307, "learning_rate": 4.934301521438451e-05, "log_odds_chosen": 3.1632020473480225, "log_odds_ratio": -0.21364012360572815, "logits/chosen": -0.4825863242149353, "logits/rejected": -0.5225251317024231, "logps/chosen": -0.05160689353942871, "logps/rejected": -0.6081209182739258, "loss": 5.2659, "nll_loss": 1.2951115369796753, "rewards/accuracies": 1.0, "rewards/chosen": -0.005160689353942871, "rewards/margins": 0.055651403963565826, "rewards/rejected": -0.0608120895922184, "step": 1427 }, { "epoch": 0.9875518672199171, "grad_norm": 5.97199821472168, "learning_rate": 4.9377593360995854e-05, "log_odds_chosen": 3.9173221588134766, "log_odds_ratio": -0.1347779929637909, "logits/chosen": -0.5775250196456909, "logits/rejected": -0.624565839767456, "logps/chosen": -0.0501507967710495, "logps/rejected": -0.64739990234375, "loss": 3.8972, "nll_loss": 0.9608168601989746, "rewards/accuracies": 1.0, "rewards/chosen": -0.005015079397708178, "rewards/margins": 0.05972491204738617, "rewards/rejected": -0.06473998725414276, "step": 1428 }, { "epoch": 0.9882434301521439, "grad_norm": 4.286798477172852, "learning_rate": 4.9412171507607196e-05, "log_odds_chosen": 2.3756041526794434, "log_odds_ratio": -0.3553531765937805, "logits/chosen": -0.7835421562194824, "logits/rejected": -0.8416650891304016, "logps/chosen": -0.07701346278190613, "logps/rejected": -0.359640896320343, "loss": 3.0372, "nll_loss": 0.7237566709518433, "rewards/accuracies": 0.75, "rewards/chosen": -0.007701346650719643, "rewards/margins": 0.02826274186372757, "rewards/rejected": -0.03596408665180206, "step": 1429 }, { "epoch": 0.9889349930843707, "grad_norm": 4.213981628417969, "learning_rate": 4.944674965421854e-05, "log_odds_chosen": 3.0098376274108887, "log_odds_ratio": -0.2841997742652893, "logits/chosen": -0.809171199798584, "logits/rejected": -0.837843656539917, "logps/chosen": -0.1136767566204071, "logps/rejected": -0.6050674319267273, "loss": 3.5371, "nll_loss": 0.8558429479598999, "rewards/accuracies": 0.875, "rewards/chosen": -0.01136767491698265, "rewards/margins": 0.04913906753063202, "rewards/rejected": -0.06050674617290497, "step": 1430 }, { "epoch": 0.9896265560165975, "grad_norm": 3.492421865463257, "learning_rate": 4.948132780082988e-05, "log_odds_chosen": 3.6703217029571533, "log_odds_ratio": -0.12424807995557785, "logits/chosen": -0.3869936466217041, "logits/rejected": -0.4417737126350403, "logps/chosen": -0.08608690649271011, "logps/rejected": -0.9713066816329956, "loss": 2.9985, "nll_loss": 0.7371969819068909, "rewards/accuracies": 1.0, "rewards/chosen": -0.008608691394329071, "rewards/margins": 0.08852197974920273, "rewards/rejected": -0.0971306711435318, "step": 1431 }, { "epoch": 0.9903181189488244, "grad_norm": 3.1567420959472656, "learning_rate": 4.951590594744122e-05, "log_odds_chosen": 5.578029632568359, "log_odds_ratio": -0.166071355342865, "logits/chosen": -0.6140339970588684, "logits/rejected": -0.6234652996063232, "logps/chosen": -0.05416777357459068, "logps/rejected": -0.7071319818496704, "loss": 3.3025, "nll_loss": 0.8090150356292725, "rewards/accuracies": 1.0, "rewards/chosen": -0.005416777450591326, "rewards/margins": 0.06529641896486282, "rewards/rejected": -0.07071319967508316, "step": 1432 }, { "epoch": 0.9910096818810512, "grad_norm": 5.914407730102539, "learning_rate": 4.955048409405256e-05, "log_odds_chosen": 2.671858310699463, "log_odds_ratio": -0.27089083194732666, "logits/chosen": -0.4663957953453064, "logits/rejected": -0.5192126035690308, "logps/chosen": -0.08659430593252182, "logps/rejected": -0.5890503525733948, "loss": 4.767, "nll_loss": 1.164661169052124, "rewards/accuracies": 1.0, "rewards/chosen": -0.008659430779516697, "rewards/margins": 0.050245605409145355, "rewards/rejected": -0.05890503525733948, "step": 1433 }, { "epoch": 0.991701244813278, "grad_norm": 3.4547741413116455, "learning_rate": 4.9585062240663904e-05, "log_odds_chosen": 3.578338861465454, "log_odds_ratio": -0.12430144101381302, "logits/chosen": -0.6321961879730225, "logits/rejected": -0.6485381126403809, "logps/chosen": -0.07194468379020691, "logps/rejected": -0.7826632261276245, "loss": 3.1203, "nll_loss": 0.7676528692245483, "rewards/accuracies": 1.0, "rewards/chosen": -0.007194467820227146, "rewards/margins": 0.07107185572385788, "rewards/rejected": -0.07826632261276245, "step": 1434 }, { "epoch": 0.9923928077455049, "grad_norm": 4.012014389038086, "learning_rate": 4.9619640387275245e-05, "log_odds_chosen": 2.3743512630462646, "log_odds_ratio": -0.5197234153747559, "logits/chosen": -0.1886919140815735, "logits/rejected": -0.19670218229293823, "logps/chosen": -0.14259615540504456, "logps/rejected": -0.418677419424057, "loss": 3.1601, "nll_loss": 0.7380492687225342, "rewards/accuracies": 0.625, "rewards/chosen": -0.014259614050388336, "rewards/margins": 0.027608126401901245, "rewards/rejected": -0.04186774045228958, "step": 1435 }, { "epoch": 0.9930843706777317, "grad_norm": 4.641087532043457, "learning_rate": 4.965421853388659e-05, "log_odds_chosen": 3.459301471710205, "log_odds_ratio": -0.30342578887939453, "logits/chosen": -0.8673899173736572, "logits/rejected": -0.9011232256889343, "logps/chosen": -0.043145108968019485, "logps/rejected": -0.3700050115585327, "loss": 4.9722, "nll_loss": 1.2126994132995605, "rewards/accuracies": 0.75, "rewards/chosen": -0.004314510617405176, "rewards/margins": 0.03268599137663841, "rewards/rejected": -0.03700050339102745, "step": 1436 }, { "epoch": 0.9937759336099585, "grad_norm": 5.460515022277832, "learning_rate": 4.968879668049793e-05, "log_odds_chosen": 3.272472381591797, "log_odds_ratio": -0.13704834878444672, "logits/chosen": -0.2897428870201111, "logits/rejected": -0.35923218727111816, "logps/chosen": -0.05391785502433777, "logps/rejected": -0.46236205101013184, "loss": 3.6047, "nll_loss": 0.8874602317810059, "rewards/accuracies": 1.0, "rewards/chosen": -0.005391785409301519, "rewards/margins": 0.040844421833753586, "rewards/rejected": -0.04623620584607124, "step": 1437 }, { "epoch": 0.9944674965421854, "grad_norm": 4.706887245178223, "learning_rate": 4.972337482710927e-05, "log_odds_chosen": 3.8396406173706055, "log_odds_ratio": -0.22891446948051453, "logits/chosen": -0.4871414601802826, "logits/rejected": -0.565714418888092, "logps/chosen": -0.057451147586107254, "logps/rejected": -0.5715197324752808, "loss": 4.5186, "nll_loss": 1.1067641973495483, "rewards/accuracies": 0.75, "rewards/chosen": -0.005745114758610725, "rewards/margins": 0.0514068529009819, "rewards/rejected": -0.05715196952223778, "step": 1438 }, { "epoch": 0.9951590594744122, "grad_norm": 3.4306483268737793, "learning_rate": 4.975795297372061e-05, "log_odds_chosen": 5.28213357925415, "log_odds_ratio": -0.038520678877830505, "logits/chosen": -0.7845959663391113, "logits/rejected": -0.7963880300521851, "logps/chosen": -0.012803150340914726, "logps/rejected": -0.6970179080963135, "loss": 3.6504, "nll_loss": 0.9087599515914917, "rewards/accuracies": 1.0, "rewards/chosen": -0.001280314987525344, "rewards/margins": 0.06842147558927536, "rewards/rejected": -0.06970179080963135, "step": 1439 }, { "epoch": 0.995850622406639, "grad_norm": 7.943667411804199, "learning_rate": 4.979253112033195e-05, "log_odds_chosen": 1.939913272857666, "log_odds_ratio": -0.914853572845459, "logits/chosen": -0.6151790022850037, "logits/rejected": -0.6059738397598267, "logps/chosen": -0.24091029167175293, "logps/rejected": -0.6626577377319336, "loss": 4.3262, "nll_loss": 0.9900734424591064, "rewards/accuracies": 0.625, "rewards/chosen": -0.024091029539704323, "rewards/margins": 0.04217474162578583, "rewards/rejected": -0.0662657767534256, "step": 1440 }, { "epoch": 0.9965421853388658, "grad_norm": 5.455565929412842, "learning_rate": 4.9827109266943295e-05, "log_odds_chosen": 4.995009899139404, "log_odds_ratio": -0.1817580908536911, "logits/chosen": -0.30914661288261414, "logits/rejected": -0.31398558616638184, "logps/chosen": -0.084615059196949, "logps/rejected": -0.8779784440994263, "loss": 3.2643, "nll_loss": 0.797911524772644, "rewards/accuracies": 0.875, "rewards/chosen": -0.008461506105959415, "rewards/margins": 0.07933633774518967, "rewards/rejected": -0.08779783546924591, "step": 1441 }, { "epoch": 0.9972337482710927, "grad_norm": 5.774052143096924, "learning_rate": 4.9861687413554636e-05, "log_odds_chosen": 3.2858119010925293, "log_odds_ratio": -0.4178237318992615, "logits/chosen": -0.9737169742584229, "logits/rejected": -1.011305570602417, "logps/chosen": -0.04260983318090439, "logps/rejected": -0.5319869518280029, "loss": 3.9489, "nll_loss": 0.9454514980316162, "rewards/accuracies": 0.875, "rewards/chosen": -0.004260983318090439, "rewards/margins": 0.048937711864709854, "rewards/rejected": -0.05319869518280029, "step": 1442 }, { "epoch": 0.9979253112033195, "grad_norm": 5.342999458312988, "learning_rate": 4.989626556016598e-05, "log_odds_chosen": 2.6681790351867676, "log_odds_ratio": -0.4021229147911072, "logits/chosen": -0.6921200752258301, "logits/rejected": -0.7400314807891846, "logps/chosen": -0.14808860421180725, "logps/rejected": -0.8047324419021606, "loss": 4.6528, "nll_loss": 1.122977614402771, "rewards/accuracies": 0.75, "rewards/chosen": -0.01480886060744524, "rewards/margins": 0.0656643807888031, "rewards/rejected": -0.08047324419021606, "step": 1443 }, { "epoch": 0.9986168741355463, "grad_norm": 4.999363899230957, "learning_rate": 4.993084370677732e-05, "log_odds_chosen": 2.2260918617248535, "log_odds_ratio": -0.40468379855155945, "logits/chosen": -0.6280920505523682, "logits/rejected": -0.6217027902603149, "logps/chosen": -0.11846964806318283, "logps/rejected": -0.38483330607414246, "loss": 3.5864, "nll_loss": 0.856123685836792, "rewards/accuracies": 0.875, "rewards/chosen": -0.011846965178847313, "rewards/margins": 0.026636367663741112, "rewards/rejected": -0.038483329117298126, "step": 1444 }, { "epoch": 0.9993084370677732, "grad_norm": 25.162742614746094, "learning_rate": 4.996542185338866e-05, "log_odds_chosen": 0.30654793977737427, "log_odds_ratio": -1.2838389873504639, "logits/chosen": -0.40273237228393555, "logits/rejected": -0.3698718249797821, "logps/chosen": -0.3533599376678467, "logps/rejected": -0.33225274085998535, "loss": 3.3748, "nll_loss": 0.7153055667877197, "rewards/accuracies": 0.75, "rewards/chosen": -0.03533599525690079, "rewards/margins": -0.0021107199136167765, "rewards/rejected": -0.033225271850824356, "step": 1445 }, { "epoch": 1.0, "grad_norm": 3.8861796855926514, "learning_rate": 5e-05, "log_odds_chosen": 3.859959602355957, "log_odds_ratio": -0.33482107520103455, "logits/chosen": -0.59022057056427, "logits/rejected": -0.5907670259475708, "logps/chosen": -0.17323844134807587, "logps/rejected": -0.8100842237472534, "loss": 2.379, "nll_loss": 0.561260461807251, "rewards/accuracies": 0.875, "rewards/chosen": -0.017323845997452736, "rewards/margins": 0.06368458271026611, "rewards/rejected": -0.0810084268450737, "step": 1446 }, { "epoch": 1.0006915629322268, "grad_norm": 4.5464582443237305, "learning_rate": 4.9996157983709855e-05, "log_odds_chosen": 3.976539134979248, "log_odds_ratio": -0.40323778986930847, "logits/chosen": -0.837387204170227, "logits/rejected": -0.9365677833557129, "logps/chosen": -0.054401617497205734, "logps/rejected": -0.6226985454559326, "loss": 4.0711, "nll_loss": 0.9774460792541504, "rewards/accuracies": 0.625, "rewards/chosen": -0.0054401615634560585, "rewards/margins": 0.05682969093322754, "rewards/rejected": -0.06226985156536102, "step": 1447 }, { "epoch": 1.0013831258644537, "grad_norm": 4.172989845275879, "learning_rate": 4.999231596741971e-05, "log_odds_chosen": 3.1294639110565186, "log_odds_ratio": -0.13460323214530945, "logits/chosen": -0.662013828754425, "logits/rejected": -0.6416223049163818, "logps/chosen": -0.11852079629898071, "logps/rejected": -0.8950018882751465, "loss": 2.598, "nll_loss": 0.6360316276550293, "rewards/accuracies": 1.0, "rewards/chosen": -0.011852080002427101, "rewards/margins": 0.0776481032371521, "rewards/rejected": -0.08950018882751465, "step": 1448 }, { "epoch": 1.0020746887966805, "grad_norm": 4.302820205688477, "learning_rate": 4.998847395112955e-05, "log_odds_chosen": 4.37247371673584, "log_odds_ratio": -0.2136344015598297, "logits/chosen": -0.5137135982513428, "logits/rejected": -0.5863500237464905, "logps/chosen": -0.0761384591460228, "logps/rejected": -1.1133251190185547, "loss": 3.1741, "nll_loss": 0.7721676826477051, "rewards/accuracies": 1.0, "rewards/chosen": -0.007613845635205507, "rewards/margins": 0.10371865332126617, "rewards/rejected": -0.11133251339197159, "step": 1449 }, { "epoch": 1.0027662517289073, "grad_norm": 6.997933864593506, "learning_rate": 4.9984631934839405e-05, "log_odds_chosen": 2.459064483642578, "log_odds_ratio": -0.46886980533599854, "logits/chosen": -0.599526047706604, "logits/rejected": -0.610713005065918, "logps/chosen": -0.09736969321966171, "logps/rejected": -0.683413565158844, "loss": 4.5402, "nll_loss": 1.0881648063659668, "rewards/accuracies": 0.625, "rewards/chosen": -0.009736969135701656, "rewards/margins": 0.05860438942909241, "rewards/rejected": -0.06834135949611664, "step": 1450 }, { "epoch": 1.0034578146611342, "grad_norm": 3.562448024749756, "learning_rate": 4.998078991854926e-05, "log_odds_chosen": 5.7006330490112305, "log_odds_ratio": -0.06817111372947693, "logits/chosen": -0.598158061504364, "logits/rejected": -0.6181397438049316, "logps/chosen": -0.03915969282388687, "logps/rejected": -0.7007030248641968, "loss": 2.7675, "nll_loss": 0.685059666633606, "rewards/accuracies": 1.0, "rewards/chosen": -0.003915969282388687, "rewards/margins": 0.06615433096885681, "rewards/rejected": -0.0700703039765358, "step": 1451 }, { "epoch": 1.004149377593361, "grad_norm": 4.6683549880981445, "learning_rate": 4.997694790225911e-05, "log_odds_chosen": 1.3357759714126587, "log_odds_ratio": -0.4112657308578491, "logits/chosen": -1.0096291303634644, "logits/rejected": -1.0104484558105469, "logps/chosen": -0.13406968116760254, "logps/rejected": -0.3350994288921356, "loss": 4.2212, "nll_loss": 1.0141762495040894, "rewards/accuracies": 0.75, "rewards/chosen": -0.013406967744231224, "rewards/margins": 0.020102977752685547, "rewards/rejected": -0.03350994735956192, "step": 1452 }, { "epoch": 1.0048409405255878, "grad_norm": 5.696212291717529, "learning_rate": 4.9973105885968956e-05, "log_odds_chosen": 4.117009162902832, "log_odds_ratio": -0.2986528277397156, "logits/chosen": -0.7022649049758911, "logits/rejected": -0.7363672256469727, "logps/chosen": -0.05535222589969635, "logps/rejected": -0.8268040418624878, "loss": 3.4259, "nll_loss": 0.8266156911849976, "rewards/accuracies": 0.875, "rewards/chosen": -0.005535222589969635, "rewards/margins": 0.07714518904685974, "rewards/rejected": -0.08268041163682938, "step": 1453 }, { "epoch": 1.0055325034578146, "grad_norm": 5.526662349700928, "learning_rate": 4.9969263869678815e-05, "log_odds_chosen": 0.7705850601196289, "log_odds_ratio": -0.43338674306869507, "logits/chosen": -0.6624119281768799, "logits/rejected": -0.6808663606643677, "logps/chosen": -0.18742117285728455, "logps/rejected": -0.2641259431838989, "loss": 3.6687, "nll_loss": 0.8738378882408142, "rewards/accuracies": 0.75, "rewards/chosen": -0.018742118030786514, "rewards/margins": 0.007670475170016289, "rewards/rejected": -0.026412591338157654, "step": 1454 }, { "epoch": 1.0062240663900415, "grad_norm": 7.230208873748779, "learning_rate": 4.996542185338866e-05, "log_odds_chosen": 4.472329139709473, "log_odds_ratio": -0.23803406953811646, "logits/chosen": -0.6536309719085693, "logits/rejected": -0.6856323480606079, "logps/chosen": -0.09958557039499283, "logps/rejected": -0.8282464742660522, "loss": 3.4442, "nll_loss": 0.8372559547424316, "rewards/accuracies": 0.875, "rewards/chosen": -0.009958556853234768, "rewards/margins": 0.07286608964204788, "rewards/rejected": -0.08282465487718582, "step": 1455 }, { "epoch": 1.0069156293222683, "grad_norm": 5.879416465759277, "learning_rate": 4.996157983709851e-05, "log_odds_chosen": 3.733511447906494, "log_odds_ratio": -0.40536871552467346, "logits/chosen": -0.424949586391449, "logits/rejected": -0.47743141651153564, "logps/chosen": -0.04454517364501953, "logps/rejected": -0.724551796913147, "loss": 3.1424, "nll_loss": 0.7450686097145081, "rewards/accuracies": 0.875, "rewards/chosen": -0.004454517271369696, "rewards/margins": 0.0680006593465805, "rewards/rejected": -0.07245517522096634, "step": 1456 }, { "epoch": 1.0076071922544951, "grad_norm": 5.5041728019714355, "learning_rate": 4.9957737820808366e-05, "log_odds_chosen": 0.9899818301200867, "log_odds_ratio": -0.47608351707458496, "logits/chosen": -0.32593727111816406, "logits/rejected": -0.3411751985549927, "logps/chosen": -0.13348256051540375, "logps/rejected": -0.40093713998794556, "loss": 3.9442, "nll_loss": 0.9384334087371826, "rewards/accuracies": 0.625, "rewards/chosen": -0.01334825623780489, "rewards/margins": 0.02674545720219612, "rewards/rejected": -0.040093712508678436, "step": 1457 }, { "epoch": 1.008298755186722, "grad_norm": 4.41768741607666, "learning_rate": 4.995389580451821e-05, "log_odds_chosen": 5.170443534851074, "log_odds_ratio": -0.08081326633691788, "logits/chosen": -0.5705434679985046, "logits/rejected": -0.7547807097434998, "logps/chosen": -0.059746697545051575, "logps/rejected": -0.8604313135147095, "loss": 3.2622, "nll_loss": 0.807471752166748, "rewards/accuracies": 1.0, "rewards/chosen": -0.005974669940769672, "rewards/margins": 0.08006846159696579, "rewards/rejected": -0.08604312688112259, "step": 1458 }, { "epoch": 1.0089903181189488, "grad_norm": 5.407264232635498, "learning_rate": 4.9950053788228064e-05, "log_odds_chosen": 7.331927299499512, "log_odds_ratio": -0.031412504613399506, "logits/chosen": -0.2545586824417114, "logits/rejected": -0.35101965069770813, "logps/chosen": -0.01821615919470787, "logps/rejected": -1.1783541440963745, "loss": 4.2907, "nll_loss": 1.0695462226867676, "rewards/accuracies": 1.0, "rewards/chosen": -0.0018216159660369158, "rewards/margins": 0.11601380258798599, "rewards/rejected": -0.11783542484045029, "step": 1459 }, { "epoch": 1.0096818810511756, "grad_norm": 3.847421884536743, "learning_rate": 4.9946211771937916e-05, "log_odds_chosen": 2.8356752395629883, "log_odds_ratio": -0.29734063148498535, "logits/chosen": -0.7546320557594299, "logits/rejected": -0.7500847578048706, "logps/chosen": -0.1304139792919159, "logps/rejected": -0.783536434173584, "loss": 3.2944, "nll_loss": 0.7938747406005859, "rewards/accuracies": 0.875, "rewards/chosen": -0.01304139755666256, "rewards/margins": 0.06531224399805069, "rewards/rejected": -0.0783536434173584, "step": 1460 }, { "epoch": 1.0103734439834025, "grad_norm": 5.793185234069824, "learning_rate": 4.994236975564777e-05, "log_odds_chosen": 3.1138916015625, "log_odds_ratio": -0.4364016652107239, "logits/chosen": -0.3820783495903015, "logits/rejected": -0.4282950758934021, "logps/chosen": -0.15490713715553284, "logps/rejected": -0.8658761978149414, "loss": 4.3823, "nll_loss": 1.0519235134124756, "rewards/accuracies": 0.625, "rewards/chosen": -0.015490712597966194, "rewards/margins": 0.07109691202640533, "rewards/rejected": -0.08658762276172638, "step": 1461 }, { "epoch": 1.0110650069156293, "grad_norm": 7.400942802429199, "learning_rate": 4.9938527739357614e-05, "log_odds_chosen": 2.564420461654663, "log_odds_ratio": -0.31582629680633545, "logits/chosen": -0.6081953048706055, "logits/rejected": -0.6425383687019348, "logps/chosen": -0.12189154326915741, "logps/rejected": -0.6753732562065125, "loss": 4.3823, "nll_loss": 1.063995599746704, "rewards/accuracies": 0.75, "rewards/chosen": -0.01218915544450283, "rewards/margins": 0.055348172783851624, "rewards/rejected": -0.067537322640419, "step": 1462 }, { "epoch": 1.0117565698478561, "grad_norm": 3.8683624267578125, "learning_rate": 4.9934685723067474e-05, "log_odds_chosen": 3.5754759311676025, "log_odds_ratio": -0.2153715193271637, "logits/chosen": -0.5791640877723694, "logits/rejected": -0.5993859767913818, "logps/chosen": -0.09025327861309052, "logps/rejected": -0.6351896524429321, "loss": 3.3673, "nll_loss": 0.8202858567237854, "rewards/accuracies": 0.875, "rewards/chosen": -0.009025328792631626, "rewards/margins": 0.05449363589286804, "rewards/rejected": -0.0635189637541771, "step": 1463 }, { "epoch": 1.012448132780083, "grad_norm": 12.99779987335205, "learning_rate": 4.993084370677732e-05, "log_odds_chosen": 2.696488618850708, "log_odds_ratio": -0.5498002767562866, "logits/chosen": -0.7150594592094421, "logits/rejected": -0.7320340871810913, "logps/chosen": -0.15543246269226074, "logps/rejected": -0.6988484859466553, "loss": 5.2035, "nll_loss": 1.2458841800689697, "rewards/accuracies": 0.75, "rewards/chosen": -0.015543246641755104, "rewards/margins": 0.05434160679578781, "rewards/rejected": -0.06988485157489777, "step": 1464 }, { "epoch": 1.0131396957123098, "grad_norm": 3.1184329986572266, "learning_rate": 4.992700169048717e-05, "log_odds_chosen": 4.265319347381592, "log_odds_ratio": -0.10288287699222565, "logits/chosen": -0.7221165299415588, "logits/rejected": -0.7270498275756836, "logps/chosen": -0.09049959480762482, "logps/rejected": -0.8920851349830627, "loss": 2.8165, "nll_loss": 0.6938475370407104, "rewards/accuracies": 1.0, "rewards/chosen": -0.009049959480762482, "rewards/margins": 0.08015856146812439, "rewards/rejected": -0.08920852094888687, "step": 1465 }, { "epoch": 1.0138312586445366, "grad_norm": 5.818999290466309, "learning_rate": 4.9923159674197024e-05, "log_odds_chosen": 5.4633588790893555, "log_odds_ratio": -0.30595359206199646, "logits/chosen": -0.7791047096252441, "logits/rejected": -0.7961665391921997, "logps/chosen": -0.08309157937765121, "logps/rejected": -1.0019042491912842, "loss": 2.8203, "nll_loss": 0.6744802594184875, "rewards/accuracies": 0.875, "rewards/chosen": -0.008309157565236092, "rewards/margins": 0.09188126027584076, "rewards/rejected": -0.1001904308795929, "step": 1466 }, { "epoch": 1.0145228215767634, "grad_norm": 5.464219570159912, "learning_rate": 4.991931765790687e-05, "log_odds_chosen": 3.736652135848999, "log_odds_ratio": -0.217147096991539, "logits/chosen": -0.9352800846099854, "logits/rejected": -0.9811190366744995, "logps/chosen": -0.0618901327252388, "logps/rejected": -0.6639367341995239, "loss": 3.7076, "nll_loss": 0.9051859974861145, "rewards/accuracies": 0.875, "rewards/chosen": -0.0061890133656561375, "rewards/margins": 0.06020466610789299, "rewards/rejected": -0.06639367341995239, "step": 1467 }, { "epoch": 1.0152143845089903, "grad_norm": 13.700241088867188, "learning_rate": 4.991547564161672e-05, "log_odds_chosen": 2.2971251010894775, "log_odds_ratio": -0.5517905354499817, "logits/chosen": -0.8629281520843506, "logits/rejected": -0.8985836505889893, "logps/chosen": -0.12846173346042633, "logps/rejected": -0.543826699256897, "loss": 3.8535, "nll_loss": 0.908200204372406, "rewards/accuracies": 0.75, "rewards/chosen": -0.012846173718571663, "rewards/margins": 0.04153650254011154, "rewards/rejected": -0.054382674396038055, "step": 1468 }, { "epoch": 1.015905947441217, "grad_norm": 5.728583812713623, "learning_rate": 4.9911633625326575e-05, "log_odds_chosen": 4.474362850189209, "log_odds_ratio": -0.3257821500301361, "logits/chosen": -0.736266016960144, "logits/rejected": -0.7569507360458374, "logps/chosen": -0.05592148005962372, "logps/rejected": -0.8763000965118408, "loss": 3.1086, "nll_loss": 0.7445786595344543, "rewards/accuracies": 0.875, "rewards/chosen": -0.005592147819697857, "rewards/margins": 0.08203786611557007, "rewards/rejected": -0.0876300036907196, "step": 1469 }, { "epoch": 1.016597510373444, "grad_norm": 6.4466657638549805, "learning_rate": 4.990779160903643e-05, "log_odds_chosen": 3.7242801189422607, "log_odds_ratio": -0.2941632866859436, "logits/chosen": -0.4376232624053955, "logits/rejected": -0.5244469046592712, "logps/chosen": -0.07507316023111343, "logps/rejected": -0.6646156311035156, "loss": 4.0845, "nll_loss": 0.9917135238647461, "rewards/accuracies": 0.75, "rewards/chosen": -0.0075073158368468285, "rewards/margins": 0.05895423889160156, "rewards/rejected": -0.06646155565977097, "step": 1470 }, { "epoch": 1.0172890733056708, "grad_norm": 6.7467474937438965, "learning_rate": 4.990394959274627e-05, "log_odds_chosen": 2.9118337631225586, "log_odds_ratio": -0.2949202060699463, "logits/chosen": -0.7435550689697266, "logits/rejected": -0.7765440940856934, "logps/chosen": -0.060861919075250626, "logps/rejected": -0.4571167826652527, "loss": 4.0488, "nll_loss": 0.9826978445053101, "rewards/accuracies": 0.875, "rewards/chosen": -0.0060861920937895775, "rewards/margins": 0.039625488221645355, "rewards/rejected": -0.04571168124675751, "step": 1471 }, { "epoch": 1.0179806362378976, "grad_norm": 3.7251908779144287, "learning_rate": 4.990010757645613e-05, "log_odds_chosen": 5.3352484703063965, "log_odds_ratio": -0.22203417122364044, "logits/chosen": -0.6861305832862854, "logits/rejected": -0.6410164833068848, "logps/chosen": -0.05618961900472641, "logps/rejected": -0.7430349588394165, "loss": 3.0389, "nll_loss": 0.7375138401985168, "rewards/accuracies": 0.875, "rewards/chosen": -0.005618962924927473, "rewards/margins": 0.06868454813957214, "rewards/rejected": -0.0743035078048706, "step": 1472 }, { "epoch": 1.0186721991701244, "grad_norm": 4.516066074371338, "learning_rate": 4.989626556016598e-05, "log_odds_chosen": 4.567673683166504, "log_odds_ratio": -0.12776193022727966, "logits/chosen": -0.34971410036087036, "logits/rejected": -0.3900047540664673, "logps/chosen": -0.07080184668302536, "logps/rejected": -0.6615493893623352, "loss": 3.6239, "nll_loss": 0.8931936025619507, "rewards/accuracies": 1.0, "rewards/chosen": -0.007080184295773506, "rewards/margins": 0.0590747594833374, "rewards/rejected": -0.06615494191646576, "step": 1473 }, { "epoch": 1.0193637621023512, "grad_norm": 6.061563014984131, "learning_rate": 4.989242354387583e-05, "log_odds_chosen": 3.5704660415649414, "log_odds_ratio": -0.33379295468330383, "logits/chosen": -1.1629607677459717, "logits/rejected": -1.2340478897094727, "logps/chosen": -0.14167319238185883, "logps/rejected": -1.1132009029388428, "loss": 4.6314, "nll_loss": 1.124472737312317, "rewards/accuracies": 0.75, "rewards/chosen": -0.014167319051921368, "rewards/margins": 0.09715276211500168, "rewards/rejected": -0.11132007837295532, "step": 1474 }, { "epoch": 1.020055325034578, "grad_norm": 6.204284191131592, "learning_rate": 4.988858152758568e-05, "log_odds_chosen": 4.754334926605225, "log_odds_ratio": -0.14875973761081696, "logits/chosen": -0.5835494995117188, "logits/rejected": -0.6686667203903198, "logps/chosen": -0.04585752636194229, "logps/rejected": -0.9479017853736877, "loss": 3.5374, "nll_loss": 0.8694802522659302, "rewards/accuracies": 0.875, "rewards/chosen": -0.004585752729326487, "rewards/margins": 0.09020442515611649, "rewards/rejected": -0.09479017555713654, "step": 1475 }, { "epoch": 1.020746887966805, "grad_norm": 4.354371070861816, "learning_rate": 4.988473951129553e-05, "log_odds_chosen": 3.4431748390197754, "log_odds_ratio": -0.3371661901473999, "logits/chosen": -0.6801312565803528, "logits/rejected": -0.715042769908905, "logps/chosen": -0.09811490774154663, "logps/rejected": -0.7584939002990723, "loss": 3.0323, "nll_loss": 0.7243598103523254, "rewards/accuracies": 0.75, "rewards/chosen": -0.009811490774154663, "rewards/margins": 0.06603790074586868, "rewards/rejected": -0.07584939152002335, "step": 1476 }, { "epoch": 1.0214384508990317, "grad_norm": 4.167712688446045, "learning_rate": 4.988089749500538e-05, "log_odds_chosen": 1.9616563320159912, "log_odds_ratio": -0.30320820212364197, "logits/chosen": -0.867897629737854, "logits/rejected": -0.9229037761688232, "logps/chosen": -0.13874275982379913, "logps/rejected": -0.36793744564056396, "loss": 2.7929, "nll_loss": 0.6678975820541382, "rewards/accuracies": 0.75, "rewards/chosen": -0.013874277472496033, "rewards/margins": 0.022919466719031334, "rewards/rejected": -0.036793746054172516, "step": 1477 }, { "epoch": 1.0221300138312586, "grad_norm": 5.813611030578613, "learning_rate": 4.987705547871523e-05, "log_odds_chosen": 4.935079574584961, "log_odds_ratio": -0.2710667550563812, "logits/chosen": -0.6112565398216248, "logits/rejected": -0.6051702499389648, "logps/chosen": -0.08972442150115967, "logps/rejected": -0.6998757123947144, "loss": 2.919, "nll_loss": 0.702637791633606, "rewards/accuracies": 0.75, "rewards/chosen": -0.008972441777586937, "rewards/margins": 0.061015136539936066, "rewards/rejected": -0.06998757272958755, "step": 1478 }, { "epoch": 1.0228215767634854, "grad_norm": 5.390235424041748, "learning_rate": 4.9873213462425086e-05, "log_odds_chosen": 6.563016414642334, "log_odds_ratio": -0.127326101064682, "logits/chosen": -0.4359290897846222, "logits/rejected": -0.52577805519104, "logps/chosen": -0.06370159238576889, "logps/rejected": -1.0844309329986572, "loss": 3.073, "nll_loss": 0.7555067539215088, "rewards/accuracies": 0.875, "rewards/chosen": -0.006370158866047859, "rewards/margins": 0.10207293927669525, "rewards/rejected": -0.10844309628009796, "step": 1479 }, { "epoch": 1.0235131396957122, "grad_norm": 5.924533367156982, "learning_rate": 4.986937144613493e-05, "log_odds_chosen": 4.72926139831543, "log_odds_ratio": -0.29802221059799194, "logits/chosen": -0.8602408170700073, "logits/rejected": -0.9133716225624084, "logps/chosen": -0.06283199042081833, "logps/rejected": -1.035302996635437, "loss": 3.3873, "nll_loss": 0.8170216083526611, "rewards/accuracies": 0.875, "rewards/chosen": -0.006283198483288288, "rewards/margins": 0.09724709391593933, "rewards/rejected": -0.10353029519319534, "step": 1480 }, { "epoch": 1.024204702627939, "grad_norm": 5.796539783477783, "learning_rate": 4.986552942984479e-05, "log_odds_chosen": 5.10892391204834, "log_odds_ratio": -0.0661209300160408, "logits/chosen": -0.41841191053390503, "logits/rejected": -0.49035269021987915, "logps/chosen": -0.05405154824256897, "logps/rejected": -0.9556215405464172, "loss": 3.9195, "nll_loss": 0.9732711315155029, "rewards/accuracies": 1.0, "rewards/chosen": -0.005405155010521412, "rewards/margins": 0.09015700221061707, "rewards/rejected": -0.0955621600151062, "step": 1481 }, { "epoch": 1.0248962655601659, "grad_norm": 7.5419440269470215, "learning_rate": 4.9861687413554636e-05, "log_odds_chosen": 3.7392358779907227, "log_odds_ratio": -0.3376656770706177, "logits/chosen": -0.5626893043518066, "logits/rejected": -0.6578677892684937, "logps/chosen": -0.05953347682952881, "logps/rejected": -1.008660078048706, "loss": 3.9953, "nll_loss": 0.965064525604248, "rewards/accuracies": 0.875, "rewards/chosen": -0.005953347310423851, "rewards/margins": 0.09491265565156937, "rewards/rejected": -0.10086600482463837, "step": 1482 }, { "epoch": 1.0255878284923927, "grad_norm": 4.64001989364624, "learning_rate": 4.985784539726449e-05, "log_odds_chosen": 2.772674322128296, "log_odds_ratio": -0.33215615153312683, "logits/chosen": -0.6052103042602539, "logits/rejected": -0.7159792184829712, "logps/chosen": -0.09696868807077408, "logps/rejected": -0.4555617570877075, "loss": 3.1045, "nll_loss": 0.7428995370864868, "rewards/accuracies": 0.875, "rewards/chosen": -0.009696869179606438, "rewards/margins": 0.035859305411577225, "rewards/rejected": -0.04555617272853851, "step": 1483 }, { "epoch": 1.0262793914246195, "grad_norm": 4.410762310028076, "learning_rate": 4.985400338097434e-05, "log_odds_chosen": 3.116834878921509, "log_odds_ratio": -0.173253133893013, "logits/chosen": -0.5153848528862, "logits/rejected": -0.5361400246620178, "logps/chosen": -0.16222445666790009, "logps/rejected": -1.062785029411316, "loss": 2.8669, "nll_loss": 0.6994096636772156, "rewards/accuracies": 0.875, "rewards/chosen": -0.016222447156906128, "rewards/margins": 0.09005605429410934, "rewards/rejected": -0.10627850890159607, "step": 1484 }, { "epoch": 1.0269709543568464, "grad_norm": 4.671135425567627, "learning_rate": 4.985016136468419e-05, "log_odds_chosen": 3.684541702270508, "log_odds_ratio": -0.36840176582336426, "logits/chosen": -0.8689281940460205, "logits/rejected": -0.9373396039009094, "logps/chosen": -0.13366033136844635, "logps/rejected": -1.042647361755371, "loss": 2.7474, "nll_loss": 0.6500199437141418, "rewards/accuracies": 0.75, "rewards/chosen": -0.013366032391786575, "rewards/margins": 0.09089870750904083, "rewards/rejected": -0.10426473617553711, "step": 1485 }, { "epoch": 1.0276625172890732, "grad_norm": 5.958430767059326, "learning_rate": 4.984631934839404e-05, "log_odds_chosen": 4.020816802978516, "log_odds_ratio": -0.1794414073228836, "logits/chosen": -0.8697519302368164, "logits/rejected": -0.9032071828842163, "logps/chosen": -0.0766788125038147, "logps/rejected": -0.9268592596054077, "loss": 4.5648, "nll_loss": 1.123245120048523, "rewards/accuracies": 0.875, "rewards/chosen": -0.007667881436645985, "rewards/margins": 0.08501805365085602, "rewards/rejected": -0.09268593043088913, "step": 1486 }, { "epoch": 1.0283540802213, "grad_norm": 5.243428707122803, "learning_rate": 4.984247733210389e-05, "log_odds_chosen": 4.724527359008789, "log_odds_ratio": -0.1316288858652115, "logits/chosen": -0.6422492265701294, "logits/rejected": -0.6679896116256714, "logps/chosen": -0.09851549565792084, "logps/rejected": -0.9931231141090393, "loss": 3.0589, "nll_loss": 0.7515564560890198, "rewards/accuracies": 1.0, "rewards/chosen": -0.009851549752056599, "rewards/margins": 0.08946076780557632, "rewards/rejected": -0.09931232035160065, "step": 1487 }, { "epoch": 1.0290456431535269, "grad_norm": 4.835902214050293, "learning_rate": 4.9838635315813744e-05, "log_odds_chosen": 5.668395519256592, "log_odds_ratio": -0.05924345925450325, "logits/chosen": -0.5757212042808533, "logits/rejected": -0.6555768251419067, "logps/chosen": -0.0362117774784565, "logps/rejected": -1.1513760089874268, "loss": 2.7235, "nll_loss": 0.6749587059020996, "rewards/accuracies": 1.0, "rewards/chosen": -0.0036211777478456497, "rewards/margins": 0.11151641607284546, "rewards/rejected": -0.11513759940862656, "step": 1488 }, { "epoch": 1.0297372060857537, "grad_norm": 7.987643718719482, "learning_rate": 4.983479329952359e-05, "log_odds_chosen": 1.2246575355529785, "log_odds_ratio": -0.5338953137397766, "logits/chosen": -0.8425899744033813, "logits/rejected": -0.8281242847442627, "logps/chosen": -0.10856227576732635, "logps/rejected": -0.3495185673236847, "loss": 4.8577, "nll_loss": 1.1610369682312012, "rewards/accuracies": 0.625, "rewards/chosen": -0.010856227949261665, "rewards/margins": 0.024095630273222923, "rewards/rejected": -0.03495185822248459, "step": 1489 }, { "epoch": 1.0304287690179805, "grad_norm": 5.155649185180664, "learning_rate": 4.983095128323345e-05, "log_odds_chosen": 4.663093566894531, "log_odds_ratio": -0.16769303381443024, "logits/chosen": -0.8803797364234924, "logits/rejected": -0.9577143788337708, "logps/chosen": -0.11833354085683823, "logps/rejected": -1.161036491394043, "loss": 3.4158, "nll_loss": 0.8371741771697998, "rewards/accuracies": 0.875, "rewards/chosen": -0.011833353899419308, "rewards/margins": 0.10427028685808182, "rewards/rejected": -0.1161036491394043, "step": 1490 }, { "epoch": 1.0311203319502074, "grad_norm": 6.939423561096191, "learning_rate": 4.9827109266943295e-05, "log_odds_chosen": 2.6805033683776855, "log_odds_ratio": -0.2592310607433319, "logits/chosen": -0.8418576121330261, "logits/rejected": -0.8511228561401367, "logps/chosen": -0.08779346942901611, "logps/rejected": -0.6378369331359863, "loss": 4.4381, "nll_loss": 1.0835912227630615, "rewards/accuracies": 0.875, "rewards/chosen": -0.008779346942901611, "rewards/margins": 0.05500435084104538, "rewards/rejected": -0.06378369778394699, "step": 1491 }, { "epoch": 1.0318118948824342, "grad_norm": 3.8663382530212402, "learning_rate": 4.982326725065315e-05, "log_odds_chosen": 4.59311580657959, "log_odds_ratio": -0.34453898668289185, "logits/chosen": -0.5843978524208069, "logits/rejected": -0.6239404082298279, "logps/chosen": -0.08339428901672363, "logps/rejected": -0.8839590549468994, "loss": 2.7118, "nll_loss": 0.6434944272041321, "rewards/accuracies": 0.75, "rewards/chosen": -0.008339428342878819, "rewards/margins": 0.08005647361278534, "rewards/rejected": -0.08839590102434158, "step": 1492 }, { "epoch": 1.0325034578146612, "grad_norm": 3.709350109100342, "learning_rate": 4.9819425234363e-05, "log_odds_chosen": 3.1544461250305176, "log_odds_ratio": -0.24927958846092224, "logits/chosen": -0.38115400075912476, "logits/rejected": -0.33510446548461914, "logps/chosen": -0.08069927990436554, "logps/rejected": -0.5364916324615479, "loss": 2.9121, "nll_loss": 0.7030921578407288, "rewards/accuracies": 0.875, "rewards/chosen": -0.008069928735494614, "rewards/margins": 0.04557923972606659, "rewards/rejected": -0.053649164736270905, "step": 1493 }, { "epoch": 1.033195020746888, "grad_norm": 6.313911437988281, "learning_rate": 4.9815583218072845e-05, "log_odds_chosen": 1.3357501029968262, "log_odds_ratio": -0.5104138255119324, "logits/chosen": -0.9382165670394897, "logits/rejected": -0.9525442123413086, "logps/chosen": -0.1589067131280899, "logps/rejected": -0.6409029960632324, "loss": 4.2722, "nll_loss": 1.0169986486434937, "rewards/accuracies": 0.625, "rewards/chosen": -0.01589067280292511, "rewards/margins": 0.048199623823165894, "rewards/rejected": -0.064090296626091, "step": 1494 }, { "epoch": 1.033886583679115, "grad_norm": 5.793435096740723, "learning_rate": 4.98117412017827e-05, "log_odds_chosen": 3.6047868728637695, "log_odds_ratio": -0.3205690383911133, "logits/chosen": -0.4956324100494385, "logits/rejected": -0.5108366012573242, "logps/chosen": -0.07623106241226196, "logps/rejected": -0.5219119787216187, "loss": 3.1138, "nll_loss": 0.7464025020599365, "rewards/accuracies": 0.875, "rewards/chosen": -0.007623106241226196, "rewards/margins": 0.04456809163093567, "rewards/rejected": -0.052191197872161865, "step": 1495 }, { "epoch": 1.0345781466113417, "grad_norm": 6.194908142089844, "learning_rate": 4.980789918549255e-05, "log_odds_chosen": 3.547358989715576, "log_odds_ratio": -0.19596754014492035, "logits/chosen": -0.6217373013496399, "logits/rejected": -0.6675953269004822, "logps/chosen": -0.05426674336194992, "logps/rejected": -0.7067912817001343, "loss": 3.5106, "nll_loss": 0.8580626249313354, "rewards/accuracies": 0.875, "rewards/chosen": -0.005426674149930477, "rewards/margins": 0.06525244563817978, "rewards/rejected": -0.07067912071943283, "step": 1496 }, { "epoch": 1.0352697095435686, "grad_norm": 5.629507064819336, "learning_rate": 4.98040571692024e-05, "log_odds_chosen": 3.7559783458709717, "log_odds_ratio": -0.2629086971282959, "logits/chosen": -0.4892784655094147, "logits/rejected": -0.4466947019100189, "logps/chosen": -0.04877060279250145, "logps/rejected": -0.835755467414856, "loss": 3.1084, "nll_loss": 0.7508119940757751, "rewards/accuracies": 0.875, "rewards/chosen": -0.004877060651779175, "rewards/margins": 0.07869848608970642, "rewards/rejected": -0.0835755467414856, "step": 1497 }, { "epoch": 1.0359612724757954, "grad_norm": 5.053825855255127, "learning_rate": 4.980021515291225e-05, "log_odds_chosen": 4.632743835449219, "log_odds_ratio": -0.11885064840316772, "logits/chosen": -0.49171894788742065, "logits/rejected": -0.5932501554489136, "logps/chosen": -0.029469992965459824, "logps/rejected": -0.7572300434112549, "loss": 3.4089, "nll_loss": 0.8403362035751343, "rewards/accuracies": 1.0, "rewards/chosen": -0.0029469996225088835, "rewards/margins": 0.07277600467205048, "rewards/rejected": -0.07572300732135773, "step": 1498 }, { "epoch": 1.0366528354080222, "grad_norm": 5.869250297546387, "learning_rate": 4.979637313662211e-05, "log_odds_chosen": 2.212305784225464, "log_odds_ratio": -0.5324792861938477, "logits/chosen": -0.38662075996398926, "logits/rejected": -0.4400796890258789, "logps/chosen": -0.11539213359355927, "logps/rejected": -0.5057278871536255, "loss": 3.3367, "nll_loss": 0.7809147834777832, "rewards/accuracies": 0.75, "rewards/chosen": -0.011539213359355927, "rewards/margins": 0.03903357312083244, "rewards/rejected": -0.05057279020547867, "step": 1499 }, { "epoch": 1.037344398340249, "grad_norm": 4.107361316680908, "learning_rate": 4.979253112033195e-05, "log_odds_chosen": 3.1276094913482666, "log_odds_ratio": -0.2214275598526001, "logits/chosen": -0.7829334735870361, "logits/rejected": -0.8359056711196899, "logps/chosen": -0.08103692531585693, "logps/rejected": -0.7872143983840942, "loss": 2.925, "nll_loss": 0.7091047167778015, "rewards/accuracies": 0.875, "rewards/chosen": -0.008103692904114723, "rewards/margins": 0.07061775028705597, "rewards/rejected": -0.07872144877910614, "step": 1500 }, { "epoch": 1.0380359612724759, "grad_norm": 7.110342502593994, "learning_rate": 4.9788689104041805e-05, "log_odds_chosen": 3.4415764808654785, "log_odds_ratio": -0.21897970139980316, "logits/chosen": -0.5591660141944885, "logits/rejected": -0.5564761161804199, "logps/chosen": -0.09651856124401093, "logps/rejected": -0.7514764666557312, "loss": 3.952, "nll_loss": 0.9661027789115906, "rewards/accuracies": 0.875, "rewards/chosen": -0.009651856496930122, "rewards/margins": 0.0654957965016365, "rewards/rejected": -0.07514764368534088, "step": 1501 }, { "epoch": 1.0387275242047027, "grad_norm": 4.894821643829346, "learning_rate": 4.978484708775166e-05, "log_odds_chosen": 5.221924781799316, "log_odds_ratio": -0.025731677189469337, "logits/chosen": -0.33677947521209717, "logits/rejected": -0.4343298077583313, "logps/chosen": -0.021347129717469215, "logps/rejected": -0.7246700525283813, "loss": 3.2321, "nll_loss": 0.8054642677307129, "rewards/accuracies": 1.0, "rewards/chosen": -0.0021347124129533768, "rewards/margins": 0.07033228874206543, "rewards/rejected": -0.07246700674295425, "step": 1502 }, { "epoch": 1.0394190871369295, "grad_norm": 4.469569683074951, "learning_rate": 4.9781005071461504e-05, "log_odds_chosen": 5.758516788482666, "log_odds_ratio": -0.22051161527633667, "logits/chosen": -0.9311298131942749, "logits/rejected": -1.0032835006713867, "logps/chosen": -0.09000095725059509, "logps/rejected": -0.904339611530304, "loss": 2.8294, "nll_loss": 0.6853088140487671, "rewards/accuracies": 0.75, "rewards/chosen": -0.009000095538794994, "rewards/margins": 0.08143387734889984, "rewards/rejected": -0.09043397009372711, "step": 1503 }, { "epoch": 1.0401106500691564, "grad_norm": 5.033740520477295, "learning_rate": 4.9777163055171356e-05, "log_odds_chosen": 4.276998519897461, "log_odds_ratio": -0.09011676162481308, "logits/chosen": -0.8271830677986145, "logits/rejected": -0.8114771842956543, "logps/chosen": -0.043199293315410614, "logps/rejected": -0.6810850501060486, "loss": 4.096, "nll_loss": 1.014995813369751, "rewards/accuracies": 1.0, "rewards/chosen": -0.004319929517805576, "rewards/margins": 0.06378857791423798, "rewards/rejected": -0.06810849905014038, "step": 1504 }, { "epoch": 1.0408022130013832, "grad_norm": 6.084150791168213, "learning_rate": 4.977332103888121e-05, "log_odds_chosen": 4.076897621154785, "log_odds_ratio": -0.08082269877195358, "logits/chosen": -0.5791704654693604, "logits/rejected": -0.6189771294593811, "logps/chosen": -0.06673917174339294, "logps/rejected": -1.0344374179840088, "loss": 3.3458, "nll_loss": 0.8283798098564148, "rewards/accuracies": 1.0, "rewards/chosen": -0.006673917640000582, "rewards/margins": 0.09676983207464218, "rewards/rejected": -0.10344374179840088, "step": 1505 }, { "epoch": 1.04149377593361, "grad_norm": 3.709134578704834, "learning_rate": 4.976947902259106e-05, "log_odds_chosen": 7.363162994384766, "log_odds_ratio": -0.011417560279369354, "logits/chosen": -0.33076947927474976, "logits/rejected": -0.43420493602752686, "logps/chosen": -0.029158981516957283, "logps/rejected": -1.3433349132537842, "loss": 2.1727, "nll_loss": 0.5420453548431396, "rewards/accuracies": 1.0, "rewards/chosen": -0.0029158983379602432, "rewards/margins": 0.13141758739948273, "rewards/rejected": -0.13433349132537842, "step": 1506 }, { "epoch": 1.0421853388658369, "grad_norm": 6.592757225036621, "learning_rate": 4.9765637006300907e-05, "log_odds_chosen": 3.5828444957733154, "log_odds_ratio": -0.3698550760746002, "logits/chosen": -0.7420531511306763, "logits/rejected": -0.7805401086807251, "logps/chosen": -0.10293908417224884, "logps/rejected": -0.9354575872421265, "loss": 3.6139, "nll_loss": 0.866478443145752, "rewards/accuracies": 0.875, "rewards/chosen": -0.010293908417224884, "rewards/margins": 0.08325185626745224, "rewards/rejected": -0.09354576468467712, "step": 1507 }, { "epoch": 1.0428769017980637, "grad_norm": 5.358985424041748, "learning_rate": 4.9761794990010766e-05, "log_odds_chosen": 3.160102367401123, "log_odds_ratio": -0.2559373378753662, "logits/chosen": -0.5014327764511108, "logits/rejected": -0.5404451489448547, "logps/chosen": -0.07068739831447601, "logps/rejected": -0.4019555151462555, "loss": 3.1128, "nll_loss": 0.7525964379310608, "rewards/accuracies": 0.875, "rewards/chosen": -0.007068739738315344, "rewards/margins": 0.033126816153526306, "rewards/rejected": -0.04019555449485779, "step": 1508 }, { "epoch": 1.0435684647302905, "grad_norm": 5.2016072273254395, "learning_rate": 4.975795297372061e-05, "log_odds_chosen": 3.912111520767212, "log_odds_ratio": -0.325267493724823, "logits/chosen": -0.4946413040161133, "logits/rejected": -0.5560250282287598, "logps/chosen": -0.15734195709228516, "logps/rejected": -0.8032873272895813, "loss": 3.3712, "nll_loss": 0.8102628588676453, "rewards/accuracies": 0.75, "rewards/chosen": -0.015734193846583366, "rewards/margins": 0.06459453701972961, "rewards/rejected": -0.08032873272895813, "step": 1509 }, { "epoch": 1.0442600276625174, "grad_norm": 6.495105743408203, "learning_rate": 4.9754110957430464e-05, "log_odds_chosen": 2.503323554992676, "log_odds_ratio": -0.8614378571510315, "logits/chosen": -0.8828580379486084, "logits/rejected": -0.9524918794631958, "logps/chosen": -0.1063104122877121, "logps/rejected": -0.4049009084701538, "loss": 3.8731, "nll_loss": 0.8821337223052979, "rewards/accuracies": 0.75, "rewards/chosen": -0.01063104160130024, "rewards/margins": 0.02985905110836029, "rewards/rejected": -0.04049009084701538, "step": 1510 }, { "epoch": 1.0449515905947442, "grad_norm": 6.673771858215332, "learning_rate": 4.9750268941140316e-05, "log_odds_chosen": 2.5765671730041504, "log_odds_ratio": -0.24561844766139984, "logits/chosen": -0.6686314344406128, "logits/rejected": -0.6705050468444824, "logps/chosen": -0.14821584522724152, "logps/rejected": -0.7514455914497375, "loss": 3.3937, "nll_loss": 0.8238645792007446, "rewards/accuracies": 1.0, "rewards/chosen": -0.014821582473814487, "rewards/margins": 0.06032297760248184, "rewards/rejected": -0.07514456659555435, "step": 1511 }, { "epoch": 1.045643153526971, "grad_norm": 4.638186454772949, "learning_rate": 4.974642692485016e-05, "log_odds_chosen": 5.006286144256592, "log_odds_ratio": -0.1460382491350174, "logits/chosen": -1.0378838777542114, "logits/rejected": -1.0734418630599976, "logps/chosen": -0.036317795515060425, "logps/rejected": -1.1099814176559448, "loss": 3.5714, "nll_loss": 0.8782393932342529, "rewards/accuracies": 0.875, "rewards/chosen": -0.003631779458373785, "rewards/margins": 0.10736636072397232, "rewards/rejected": -0.11099813878536224, "step": 1512 }, { "epoch": 1.0463347164591978, "grad_norm": 4.667377948760986, "learning_rate": 4.9742584908560014e-05, "log_odds_chosen": 3.783291816711426, "log_odds_ratio": -0.10481594502925873, "logits/chosen": -0.5721790790557861, "logits/rejected": -0.6407243013381958, "logps/chosen": -0.06656525284051895, "logps/rejected": -0.7319782972335815, "loss": 3.7715, "nll_loss": 0.9324032664299011, "rewards/accuracies": 1.0, "rewards/chosen": -0.006656525656580925, "rewards/margins": 0.06654130667448044, "rewards/rejected": -0.07319782674312592, "step": 1513 }, { "epoch": 1.0470262793914247, "grad_norm": 6.9356279373168945, "learning_rate": 4.973874289226986e-05, "log_odds_chosen": 3.897022008895874, "log_odds_ratio": -0.3799517750740051, "logits/chosen": -0.6409194469451904, "logits/rejected": -0.6361832618713379, "logps/chosen": -0.1504475176334381, "logps/rejected": -0.7533466219902039, "loss": 3.8866, "nll_loss": 0.9336593747138977, "rewards/accuracies": 0.75, "rewards/chosen": -0.015044751577079296, "rewards/margins": 0.060289908200502396, "rewards/rejected": -0.07533465325832367, "step": 1514 }, { "epoch": 1.0477178423236515, "grad_norm": 4.3579816818237305, "learning_rate": 4.973490087597972e-05, "log_odds_chosen": 4.464286804199219, "log_odds_ratio": -0.4597773551940918, "logits/chosen": -0.7858383655548096, "logits/rejected": -0.7813628911972046, "logps/chosen": -0.14199215173721313, "logps/rejected": -0.5497534871101379, "loss": 2.8294, "nll_loss": 0.6613786220550537, "rewards/accuracies": 0.75, "rewards/chosen": -0.014199215918779373, "rewards/margins": 0.04077612981200218, "rewards/rejected": -0.054975349456071854, "step": 1515 }, { "epoch": 1.0484094052558783, "grad_norm": 4.271621227264404, "learning_rate": 4.9731058859689565e-05, "log_odds_chosen": 4.928406238555908, "log_odds_ratio": -0.06683094799518585, "logits/chosen": -1.0744023323059082, "logits/rejected": -1.1269543170928955, "logps/chosen": -0.045706942677497864, "logps/rejected": -1.0398142337799072, "loss": 4.0336, "nll_loss": 1.001724362373352, "rewards/accuracies": 1.0, "rewards/chosen": -0.004570694640278816, "rewards/margins": 0.09941072762012482, "rewards/rejected": -0.10398142784833908, "step": 1516 }, { "epoch": 1.0491009681881052, "grad_norm": 6.2232232093811035, "learning_rate": 4.972721684339942e-05, "log_odds_chosen": 4.167832851409912, "log_odds_ratio": -0.151000514626503, "logits/chosen": -0.5242301821708679, "logits/rejected": -0.6229081153869629, "logps/chosen": -0.09259649366140366, "logps/rejected": -0.8858538866043091, "loss": 3.5524, "nll_loss": 0.8730012774467468, "rewards/accuracies": 1.0, "rewards/chosen": -0.009259650483727455, "rewards/margins": 0.07932574301958084, "rewards/rejected": -0.08858539164066315, "step": 1517 }, { "epoch": 1.049792531120332, "grad_norm": 6.540098667144775, "learning_rate": 4.972337482710927e-05, "log_odds_chosen": 3.922050714492798, "log_odds_ratio": -0.5066487789154053, "logits/chosen": -0.8467140793800354, "logits/rejected": -0.8433650732040405, "logps/chosen": -0.0856909528374672, "logps/rejected": -0.6936833262443542, "loss": 3.9991, "nll_loss": 0.9490994811058044, "rewards/accuracies": 0.875, "rewards/chosen": -0.00856909528374672, "rewards/margins": 0.060799237340688705, "rewards/rejected": -0.06936833262443542, "step": 1518 }, { "epoch": 1.0504840940525588, "grad_norm": 5.885974884033203, "learning_rate": 4.971953281081912e-05, "log_odds_chosen": 2.73998761177063, "log_odds_ratio": -0.43364351987838745, "logits/chosen": -0.48864781856536865, "logits/rejected": -0.48220106959342957, "logps/chosen": -0.12833350896835327, "logps/rejected": -0.44956284761428833, "loss": 3.5138, "nll_loss": 0.8350796699523926, "rewards/accuracies": 0.625, "rewards/chosen": -0.012833353132009506, "rewards/margins": 0.03212292864918709, "rewards/rejected": -0.044956281781196594, "step": 1519 }, { "epoch": 1.0511756569847857, "grad_norm": 4.99786901473999, "learning_rate": 4.971569079452897e-05, "log_odds_chosen": 3.619704246520996, "log_odds_ratio": -0.3211745321750641, "logits/chosen": -0.6291632652282715, "logits/rejected": -0.6346349716186523, "logps/chosen": -0.08411780744791031, "logps/rejected": -0.5153727531433105, "loss": 3.7475, "nll_loss": 0.9047644734382629, "rewards/accuracies": 0.875, "rewards/chosen": -0.00841178186237812, "rewards/margins": 0.043125495314598083, "rewards/rejected": -0.051537275314331055, "step": 1520 }, { "epoch": 1.0518672199170125, "grad_norm": 3.7386040687561035, "learning_rate": 4.971184877823882e-05, "log_odds_chosen": 4.540435791015625, "log_odds_ratio": -0.13528023660182953, "logits/chosen": -0.40115034580230713, "logits/rejected": -0.415203332901001, "logps/chosen": -0.02656002901494503, "logps/rejected": -0.8941652178764343, "loss": 2.5831, "nll_loss": 0.6322515606880188, "rewards/accuracies": 1.0, "rewards/chosen": -0.0026560029946267605, "rewards/margins": 0.0867605209350586, "rewards/rejected": -0.08941652625799179, "step": 1521 }, { "epoch": 1.0525587828492393, "grad_norm": 4.414981365203857, "learning_rate": 4.970800676194867e-05, "log_odds_chosen": 6.532523155212402, "log_odds_ratio": -0.03326879441738129, "logits/chosen": -0.522810161113739, "logits/rejected": -0.597527027130127, "logps/chosen": -0.032224591821432114, "logps/rejected": -1.2256418466567993, "loss": 3.1706, "nll_loss": 0.7893306612968445, "rewards/accuracies": 1.0, "rewards/chosen": -0.00322245922870934, "rewards/margins": 0.11934173107147217, "rewards/rejected": -0.12256418168544769, "step": 1522 }, { "epoch": 1.0532503457814661, "grad_norm": 6.495909214019775, "learning_rate": 4.970416474565852e-05, "log_odds_chosen": 1.773823618888855, "log_odds_ratio": -0.5499988198280334, "logits/chosen": -0.6086165308952332, "logits/rejected": -0.5990644693374634, "logps/chosen": -0.1282849758863449, "logps/rejected": -0.4344036281108856, "loss": 4.7242, "nll_loss": 1.1260621547698975, "rewards/accuracies": 0.875, "rewards/chosen": -0.012828497216105461, "rewards/margins": 0.03061186708509922, "rewards/rejected": -0.04344036430120468, "step": 1523 }, { "epoch": 1.053941908713693, "grad_norm": 5.455717086791992, "learning_rate": 4.970032272936838e-05, "log_odds_chosen": 3.1658692359924316, "log_odds_ratio": -0.3641965687274933, "logits/chosen": -0.8424069285392761, "logits/rejected": -0.8386783599853516, "logps/chosen": -0.14120902121067047, "logps/rejected": -0.5841333866119385, "loss": 3.4216, "nll_loss": 0.8189803957939148, "rewards/accuracies": 0.875, "rewards/chosen": -0.014120901934802532, "rewards/margins": 0.04429244622588158, "rewards/rejected": -0.058413345366716385, "step": 1524 }, { "epoch": 1.0546334716459198, "grad_norm": 5.827929496765137, "learning_rate": 4.9696480713078223e-05, "log_odds_chosen": 2.437635898590088, "log_odds_ratio": -0.5703208446502686, "logits/chosen": -0.6680049896240234, "logits/rejected": -0.6942263841629028, "logps/chosen": -0.22066247463226318, "logps/rejected": -0.478730171918869, "loss": 3.363, "nll_loss": 0.7837151288986206, "rewards/accuracies": 0.75, "rewards/chosen": -0.02206624671816826, "rewards/margins": 0.025806769728660583, "rewards/rejected": -0.04787301644682884, "step": 1525 }, { "epoch": 1.0553250345781466, "grad_norm": 4.877053737640381, "learning_rate": 4.9692638696788076e-05, "log_odds_chosen": 3.6916658878326416, "log_odds_ratio": -0.19331425428390503, "logits/chosen": -0.19534748792648315, "logits/rejected": -0.24064458906650543, "logps/chosen": -0.08929431438446045, "logps/rejected": -0.7749847769737244, "loss": 2.8245, "nll_loss": 0.686805784702301, "rewards/accuracies": 0.875, "rewards/chosen": -0.008929431438446045, "rewards/margins": 0.06856904923915863, "rewards/rejected": -0.07749848067760468, "step": 1526 }, { "epoch": 1.0560165975103735, "grad_norm": 5.007440090179443, "learning_rate": 4.968879668049793e-05, "log_odds_chosen": 3.8080079555511475, "log_odds_ratio": -0.28198474645614624, "logits/chosen": -0.7920790314674377, "logits/rejected": -0.8061708211898804, "logps/chosen": -0.059589091688394547, "logps/rejected": -0.8138545751571655, "loss": 3.2012, "nll_loss": 0.7721074223518372, "rewards/accuracies": 0.75, "rewards/chosen": -0.005958909168839455, "rewards/margins": 0.07542654871940613, "rewards/rejected": -0.08138545602560043, "step": 1527 }, { "epoch": 1.0567081604426003, "grad_norm": 7.326679229736328, "learning_rate": 4.968495466420778e-05, "log_odds_chosen": 2.553607940673828, "log_odds_ratio": -0.20654824376106262, "logits/chosen": -0.5083712339401245, "logits/rejected": -0.5545042157173157, "logps/chosen": -0.11978105455636978, "logps/rejected": -0.673089861869812, "loss": 4.8292, "nll_loss": 1.186640739440918, "rewards/accuracies": 1.0, "rewards/chosen": -0.011978104710578918, "rewards/margins": 0.05533087998628616, "rewards/rejected": -0.06730898469686508, "step": 1528 }, { "epoch": 1.0573997233748271, "grad_norm": 6.929790496826172, "learning_rate": 4.9681112647917626e-05, "log_odds_chosen": 4.561022758483887, "log_odds_ratio": -0.0534825474023819, "logits/chosen": -0.5944857597351074, "logits/rejected": -0.6960824728012085, "logps/chosen": -0.057704515755176544, "logps/rejected": -0.9158083200454712, "loss": 4.3409, "nll_loss": 1.0798721313476562, "rewards/accuracies": 1.0, "rewards/chosen": -0.005770451854914427, "rewards/margins": 0.08581038564443588, "rewards/rejected": -0.0915808379650116, "step": 1529 }, { "epoch": 1.058091286307054, "grad_norm": 5.326831817626953, "learning_rate": 4.967727063162748e-05, "log_odds_chosen": 2.998952865600586, "log_odds_ratio": -0.31420373916625977, "logits/chosen": -0.6921024322509766, "logits/rejected": -0.7197988033294678, "logps/chosen": -0.16037589311599731, "logps/rejected": -0.823391318321228, "loss": 3.7279, "nll_loss": 0.9005526304244995, "rewards/accuracies": 0.875, "rewards/chosen": -0.01603759080171585, "rewards/margins": 0.06630153954029083, "rewards/rejected": -0.08233913779258728, "step": 1530 }, { "epoch": 1.0587828492392808, "grad_norm": 5.03424596786499, "learning_rate": 4.967342861533733e-05, "log_odds_chosen": 3.576352834701538, "log_odds_ratio": -0.19618278741836548, "logits/chosen": -0.3750036954879761, "logits/rejected": -0.3847036361694336, "logps/chosen": -0.0660768672823906, "logps/rejected": -0.5405435562133789, "loss": 3.5426, "nll_loss": 0.8660234808921814, "rewards/accuracies": 1.0, "rewards/chosen": -0.006607687100768089, "rewards/margins": 0.04744666814804077, "rewards/rejected": -0.05405435711145401, "step": 1531 }, { "epoch": 1.0594744121715076, "grad_norm": 6.5278472900390625, "learning_rate": 4.966958659904718e-05, "log_odds_chosen": 5.937303066253662, "log_odds_ratio": -0.061495859175920486, "logits/chosen": -0.7140944600105286, "logits/rejected": -0.7931259870529175, "logps/chosen": -0.03675852343440056, "logps/rejected": -1.057922124862671, "loss": 4.0925, "nll_loss": 1.0169788599014282, "rewards/accuracies": 1.0, "rewards/chosen": -0.0036758524365723133, "rewards/margins": 0.10211636126041412, "rewards/rejected": -0.10579221695661545, "step": 1532 }, { "epoch": 1.0601659751037344, "grad_norm": 8.326800346374512, "learning_rate": 4.9665744582757036e-05, "log_odds_chosen": 4.401022434234619, "log_odds_ratio": -0.22211284935474396, "logits/chosen": -0.6128544211387634, "logits/rejected": -0.6509073972702026, "logps/chosen": -0.060591697692871094, "logps/rejected": -0.8965548276901245, "loss": 4.3236, "nll_loss": 1.0586953163146973, "rewards/accuracies": 1.0, "rewards/chosen": -0.006059169769287109, "rewards/margins": 0.08359631896018982, "rewards/rejected": -0.08965548872947693, "step": 1533 }, { "epoch": 1.0608575380359613, "grad_norm": 6.853762149810791, "learning_rate": 4.966190256646688e-05, "log_odds_chosen": 3.25315523147583, "log_odds_ratio": -0.30279356241226196, "logits/chosen": -0.45043084025382996, "logits/rejected": -0.4535585641860962, "logps/chosen": -0.060727495700120926, "logps/rejected": -0.5386378169059753, "loss": 4.0742, "nll_loss": 0.9882668256759644, "rewards/accuracies": 0.75, "rewards/chosen": -0.006072749383747578, "rewards/margins": 0.04779103398323059, "rewards/rejected": -0.053863782435655594, "step": 1534 }, { "epoch": 1.061549100968188, "grad_norm": 9.890820503234863, "learning_rate": 4.9658060550176734e-05, "log_odds_chosen": 4.420248031616211, "log_odds_ratio": -0.46526646614074707, "logits/chosen": -0.45639920234680176, "logits/rejected": -0.5076233148574829, "logps/chosen": -0.08660363405942917, "logps/rejected": -0.7423797845840454, "loss": 3.3507, "nll_loss": 0.791154146194458, "rewards/accuracies": 0.75, "rewards/chosen": -0.008660363964736462, "rewards/margins": 0.06557761132717133, "rewards/rejected": -0.07423797994852066, "step": 1535 }, { "epoch": 1.062240663900415, "grad_norm": 5.7195658683776855, "learning_rate": 4.965421853388659e-05, "log_odds_chosen": 3.7188186645507812, "log_odds_ratio": -0.18315275013446808, "logits/chosen": -1.041092872619629, "logits/rejected": -1.1016302108764648, "logps/chosen": -0.06354139000177383, "logps/rejected": -0.5816217660903931, "loss": 5.1011, "nll_loss": 1.2569714784622192, "rewards/accuracies": 0.875, "rewards/chosen": -0.006354139186441898, "rewards/margins": 0.051808036863803864, "rewards/rejected": -0.05816217511892319, "step": 1536 }, { "epoch": 1.0629322268326418, "grad_norm": 6.2236223220825195, "learning_rate": 4.965037651759644e-05, "log_odds_chosen": 2.0946202278137207, "log_odds_ratio": -0.40443798899650574, "logits/chosen": -0.6937721371650696, "logits/rejected": -0.7108144760131836, "logps/chosen": -0.14707300066947937, "logps/rejected": -0.6219024658203125, "loss": 3.6966, "nll_loss": 0.8837115168571472, "rewards/accuracies": 0.75, "rewards/chosen": -0.014707300812005997, "rewards/margins": 0.047482945024967194, "rewards/rejected": -0.06219024583697319, "step": 1537 }, { "epoch": 1.0636237897648686, "grad_norm": 6.805111408233643, "learning_rate": 4.9646534501306285e-05, "log_odds_chosen": 1.7916090488433838, "log_odds_ratio": -0.5535953044891357, "logits/chosen": -0.5534647703170776, "logits/rejected": -0.5669266581535339, "logps/chosen": -0.19315959513187408, "logps/rejected": -0.5117899775505066, "loss": 4.121, "nll_loss": 0.9748976826667786, "rewards/accuracies": 0.625, "rewards/chosen": -0.019315961748361588, "rewards/margins": 0.03186304122209549, "rewards/rejected": -0.05117899924516678, "step": 1538 }, { "epoch": 1.0643153526970954, "grad_norm": 4.819196701049805, "learning_rate": 4.964269248501614e-05, "log_odds_chosen": 6.569972038269043, "log_odds_ratio": -0.09104802459478378, "logits/chosen": -0.7071633338928223, "logits/rejected": -0.7946109175682068, "logps/chosen": -0.030333345755934715, "logps/rejected": -0.6713792085647583, "loss": 3.5293, "nll_loss": 0.8732243180274963, "rewards/accuracies": 1.0, "rewards/chosen": -0.003033334854990244, "rewards/margins": 0.0641045868396759, "rewards/rejected": -0.06713791936635971, "step": 1539 }, { "epoch": 1.0650069156293223, "grad_norm": 5.15584659576416, "learning_rate": 4.963885046872599e-05, "log_odds_chosen": 4.203775405883789, "log_odds_ratio": -0.19854849576950073, "logits/chosen": -0.2534320652484894, "logits/rejected": -0.2955503761768341, "logps/chosen": -0.03829387575387955, "logps/rejected": -0.6372541189193726, "loss": 4.1082, "nll_loss": 1.0071941614151, "rewards/accuracies": 0.875, "rewards/chosen": -0.0038293874822556973, "rewards/margins": 0.059896018356084824, "rewards/rejected": -0.06372541189193726, "step": 1540 }, { "epoch": 1.065698478561549, "grad_norm": 6.698187828063965, "learning_rate": 4.9635008452435835e-05, "log_odds_chosen": 4.564090728759766, "log_odds_ratio": -0.19210776686668396, "logits/chosen": -0.3504962921142578, "logits/rejected": -0.4118233323097229, "logps/chosen": -0.09770353138446808, "logps/rejected": -0.9626650810241699, "loss": 3.631, "nll_loss": 0.888546347618103, "rewards/accuracies": 1.0, "rewards/chosen": -0.009770354256033897, "rewards/margins": 0.08649615198373795, "rewards/rejected": -0.09626650810241699, "step": 1541 }, { "epoch": 1.066390041493776, "grad_norm": 6.590559005737305, "learning_rate": 4.9631166436145695e-05, "log_odds_chosen": 0.892957329750061, "log_odds_ratio": -0.48052775859832764, "logits/chosen": -0.5946922898292542, "logits/rejected": -0.6033682823181152, "logps/chosen": -0.1246226578950882, "logps/rejected": -0.23645266890525818, "loss": 3.8066, "nll_loss": 0.9035993218421936, "rewards/accuracies": 0.75, "rewards/chosen": -0.01246226578950882, "rewards/margins": 0.011183001101016998, "rewards/rejected": -0.023645266890525818, "step": 1542 }, { "epoch": 1.0670816044260027, "grad_norm": 6.689427375793457, "learning_rate": 4.962732441985554e-05, "log_odds_chosen": 1.9284253120422363, "log_odds_ratio": -0.5196525454521179, "logits/chosen": -0.7123900055885315, "logits/rejected": -0.7312474846839905, "logps/chosen": -0.294888973236084, "logps/rejected": -0.5123586654663086, "loss": 3.0642, "nll_loss": 0.7140846252441406, "rewards/accuracies": 0.75, "rewards/chosen": -0.02948889695107937, "rewards/margins": 0.02174697443842888, "rewards/rejected": -0.0512358695268631, "step": 1543 }, { "epoch": 1.0677731673582296, "grad_norm": 7.583176136016846, "learning_rate": 4.962348240356539e-05, "log_odds_chosen": 2.621812105178833, "log_odds_ratio": -0.6792709827423096, "logits/chosen": -0.6956069469451904, "logits/rejected": -0.7238144874572754, "logps/chosen": -0.14746025204658508, "logps/rejected": -0.49026352167129517, "loss": 3.7706, "nll_loss": 0.8747116923332214, "rewards/accuracies": 0.625, "rewards/chosen": -0.014746023342013359, "rewards/margins": 0.03428032994270325, "rewards/rejected": -0.049026355147361755, "step": 1544 }, { "epoch": 1.0684647302904564, "grad_norm": 7.3789849281311035, "learning_rate": 4.9619640387275245e-05, "log_odds_chosen": 4.767126083374023, "log_odds_ratio": -0.3121826648712158, "logits/chosen": -0.7488409876823425, "logits/rejected": -0.8396613001823425, "logps/chosen": -0.07528477907180786, "logps/rejected": -0.9295564293861389, "loss": 4.9592, "nll_loss": 1.208593726158142, "rewards/accuracies": 0.75, "rewards/chosen": -0.007528477814048529, "rewards/margins": 0.0854271650314331, "rewards/rejected": -0.09295564889907837, "step": 1545 }, { "epoch": 1.0691562932226832, "grad_norm": 4.530989646911621, "learning_rate": 4.96157983709851e-05, "log_odds_chosen": 3.770918130874634, "log_odds_ratio": -0.25483426451683044, "logits/chosen": -0.286983460187912, "logits/rejected": -0.2833419144153595, "logps/chosen": -0.09215762466192245, "logps/rejected": -0.7823206186294556, "loss": 4.1782, "nll_loss": 1.0190585851669312, "rewards/accuracies": 0.875, "rewards/chosen": -0.009215762838721275, "rewards/margins": 0.06901630014181137, "rewards/rejected": -0.0782320648431778, "step": 1546 }, { "epoch": 1.06984785615491, "grad_norm": 7.37415885925293, "learning_rate": 4.961195635469494e-05, "log_odds_chosen": 2.621229410171509, "log_odds_ratio": -0.9123156070709229, "logits/chosen": -0.5532602667808533, "logits/rejected": -0.5623765587806702, "logps/chosen": -0.23067545890808105, "logps/rejected": -0.7292701005935669, "loss": 3.1782, "nll_loss": 0.703306257724762, "rewards/accuracies": 0.5, "rewards/chosen": -0.023067545145750046, "rewards/margins": 0.049859460443258286, "rewards/rejected": -0.07292701303958893, "step": 1547 }, { "epoch": 1.070539419087137, "grad_norm": 2.7616488933563232, "learning_rate": 4.9608114338404796e-05, "log_odds_chosen": 2.966463088989258, "log_odds_ratio": -0.22822074592113495, "logits/chosen": -0.6201618909835815, "logits/rejected": -0.6020743250846863, "logps/chosen": -0.07064022868871689, "logps/rejected": -0.6099626421928406, "loss": 3.038, "nll_loss": 0.7366786003112793, "rewards/accuracies": 0.875, "rewards/chosen": -0.0070640225894749165, "rewards/margins": 0.05393224209547043, "rewards/rejected": -0.06099626421928406, "step": 1548 }, { "epoch": 1.0712309820193637, "grad_norm": 5.657704830169678, "learning_rate": 4.960427232211465e-05, "log_odds_chosen": 5.151440620422363, "log_odds_ratio": -0.034663643687963486, "logits/chosen": -0.41759589314460754, "logits/rejected": -0.4986497759819031, "logps/chosen": -0.03338145092129707, "logps/rejected": -0.799149215221405, "loss": 3.465, "nll_loss": 0.8627803325653076, "rewards/accuracies": 1.0, "rewards/chosen": -0.0033381450921297073, "rewards/margins": 0.07657677680253983, "rewards/rejected": -0.07991492748260498, "step": 1549 }, { "epoch": 1.0719225449515906, "grad_norm": 4.098145961761475, "learning_rate": 4.9600430305824494e-05, "log_odds_chosen": 3.907048463821411, "log_odds_ratio": -0.25910213589668274, "logits/chosen": -0.28188061714172363, "logits/rejected": -0.40207451581954956, "logps/chosen": -0.07275703549385071, "logps/rejected": -0.8932787179946899, "loss": 2.4006, "nll_loss": 0.5742417573928833, "rewards/accuracies": 0.875, "rewards/chosen": -0.007275703828781843, "rewards/margins": 0.08205216377973557, "rewards/rejected": -0.089327871799469, "step": 1550 }, { "epoch": 1.0726141078838174, "grad_norm": 6.418388843536377, "learning_rate": 4.959658828953435e-05, "log_odds_chosen": 2.885356903076172, "log_odds_ratio": -0.3832821249961853, "logits/chosen": -0.6497433185577393, "logits/rejected": -0.7168554663658142, "logps/chosen": -0.14998841285705566, "logps/rejected": -0.7326866388320923, "loss": 4.6042, "nll_loss": 1.1127233505249023, "rewards/accuracies": 0.875, "rewards/chosen": -0.014998842030763626, "rewards/margins": 0.05826983600854874, "rewards/rejected": -0.07326867431402206, "step": 1551 }, { "epoch": 1.0733056708160442, "grad_norm": 6.9985833168029785, "learning_rate": 4.95927462732442e-05, "log_odds_chosen": 1.6743195056915283, "log_odds_ratio": -0.39890745282173157, "logits/chosen": -0.5378249287605286, "logits/rejected": -0.5748019218444824, "logps/chosen": -0.11988474428653717, "logps/rejected": -0.4952792525291443, "loss": 3.4803, "nll_loss": 0.8301811814308167, "rewards/accuracies": 1.0, "rewards/chosen": -0.011988474056124687, "rewards/margins": 0.03753945231437683, "rewards/rejected": -0.04952792450785637, "step": 1552 }, { "epoch": 1.073997233748271, "grad_norm": 4.325546741485596, "learning_rate": 4.958890425695405e-05, "log_odds_chosen": 3.9147095680236816, "log_odds_ratio": -0.21128058433532715, "logits/chosen": -0.307517945766449, "logits/rejected": -0.3628294765949249, "logps/chosen": -0.0758337453007698, "logps/rejected": -0.78581303358078, "loss": 2.5512, "nll_loss": 0.6166709661483765, "rewards/accuracies": 1.0, "rewards/chosen": -0.007583374157547951, "rewards/margins": 0.0709979385137558, "rewards/rejected": -0.078581303358078, "step": 1553 }, { "epoch": 1.0746887966804979, "grad_norm": 3.2370855808258057, "learning_rate": 4.9585062240663904e-05, "log_odds_chosen": 3.607590436935425, "log_odds_ratio": -0.2519790828227997, "logits/chosen": -0.37377700209617615, "logits/rejected": -0.3569980263710022, "logps/chosen": -0.06945005059242249, "logps/rejected": -0.809980571269989, "loss": 2.6024, "nll_loss": 0.6253975033760071, "rewards/accuracies": 0.875, "rewards/chosen": -0.006945005152374506, "rewards/margins": 0.07405305653810501, "rewards/rejected": -0.08099806308746338, "step": 1554 }, { "epoch": 1.0753803596127247, "grad_norm": 5.439840793609619, "learning_rate": 4.9581220224373756e-05, "log_odds_chosen": 4.531608581542969, "log_odds_ratio": -0.266179621219635, "logits/chosen": -0.5603072047233582, "logits/rejected": -0.5774667263031006, "logps/chosen": -0.039245884865522385, "logps/rejected": -0.5564451813697815, "loss": 3.4032, "nll_loss": 0.8241841793060303, "rewards/accuracies": 0.875, "rewards/chosen": -0.003924589138478041, "rewards/margins": 0.05171992629766464, "rewards/rejected": -0.05564451962709427, "step": 1555 }, { "epoch": 1.0760719225449515, "grad_norm": 7.069887161254883, "learning_rate": 4.95773782080836e-05, "log_odds_chosen": 1.0571117401123047, "log_odds_ratio": -0.4433063864707947, "logits/chosen": -0.7253471612930298, "logits/rejected": -0.7856532335281372, "logps/chosen": -0.17213042080402374, "logps/rejected": -0.4319838881492615, "loss": 3.9714, "nll_loss": 0.9485308527946472, "rewards/accuracies": 0.75, "rewards/chosen": -0.017213044688105583, "rewards/margins": 0.025985345244407654, "rewards/rejected": -0.043198391795158386, "step": 1556 }, { "epoch": 1.0767634854771784, "grad_norm": 6.502851963043213, "learning_rate": 4.9573536191793454e-05, "log_odds_chosen": 2.863762855529785, "log_odds_ratio": -0.33171597123146057, "logits/chosen": -0.6376508474349976, "logits/rejected": -0.7480642795562744, "logps/chosen": -0.10218179225921631, "logps/rejected": -0.664842963218689, "loss": 2.8928, "nll_loss": 0.6900296807289124, "rewards/accuracies": 0.875, "rewards/chosen": -0.010218179784715176, "rewards/margins": 0.056266117841005325, "rewards/rejected": -0.06648429483175278, "step": 1557 }, { "epoch": 1.0774550484094052, "grad_norm": 6.30776834487915, "learning_rate": 4.956969417550331e-05, "log_odds_chosen": 4.367179870605469, "log_odds_ratio": -0.41399842500686646, "logits/chosen": -0.3242146968841553, "logits/rejected": -0.4026827812194824, "logps/chosen": -0.09431909769773483, "logps/rejected": -0.5847798585891724, "loss": 3.5125, "nll_loss": 0.8367260694503784, "rewards/accuracies": 0.75, "rewards/chosen": -0.009431909769773483, "rewards/margins": 0.04904608428478241, "rewards/rejected": -0.058477990329265594, "step": 1558 }, { "epoch": 1.078146611341632, "grad_norm": 8.217262268066406, "learning_rate": 4.956585215921315e-05, "log_odds_chosen": 3.6869709491729736, "log_odds_ratio": -0.5149865746498108, "logits/chosen": -0.9258292317390442, "logits/rejected": -0.9052362442016602, "logps/chosen": -0.12747938930988312, "logps/rejected": -0.7195074558258057, "loss": 4.7971, "nll_loss": 1.1477842330932617, "rewards/accuracies": 0.75, "rewards/chosen": -0.012747939676046371, "rewards/margins": 0.059202805161476135, "rewards/rejected": -0.07195074111223221, "step": 1559 }, { "epoch": 1.0788381742738589, "grad_norm": 4.424699306488037, "learning_rate": 4.956201014292301e-05, "log_odds_chosen": 3.942409038543701, "log_odds_ratio": -0.08518670499324799, "logits/chosen": -0.4492560029029846, "logits/rejected": -0.4492393732070923, "logps/chosen": -0.05345804616808891, "logps/rejected": -0.7698019742965698, "loss": 2.9677, "nll_loss": 0.7333984375, "rewards/accuracies": 1.0, "rewards/chosen": -0.005345804616808891, "rewards/margins": 0.07163438946008682, "rewards/rejected": -0.07698019593954086, "step": 1560 }, { "epoch": 1.0795297372060857, "grad_norm": 5.800602436065674, "learning_rate": 4.955816812663286e-05, "log_odds_chosen": 5.187628269195557, "log_odds_ratio": -0.12586824595928192, "logits/chosen": -0.7637317180633545, "logits/rejected": -0.8406319618225098, "logps/chosen": -0.08760840445756912, "logps/rejected": -0.8338153958320618, "loss": 3.7892, "nll_loss": 0.9347092509269714, "rewards/accuracies": 1.0, "rewards/chosen": -0.008760839700698853, "rewards/margins": 0.07462070137262344, "rewards/rejected": -0.0833815410733223, "step": 1561 }, { "epoch": 1.0802213001383125, "grad_norm": 4.874590873718262, "learning_rate": 4.955432611034271e-05, "log_odds_chosen": 3.5219225883483887, "log_odds_ratio": -0.30585017800331116, "logits/chosen": -0.8220917582511902, "logits/rejected": -0.8466977477073669, "logps/chosen": -0.0972166433930397, "logps/rejected": -0.4325979948043823, "loss": 3.2432, "nll_loss": 0.7802194952964783, "rewards/accuracies": 0.75, "rewards/chosen": -0.009721663780510426, "rewards/margins": 0.03353814035654068, "rewards/rejected": -0.04325980320572853, "step": 1562 }, { "epoch": 1.0809128630705394, "grad_norm": 6.767172336578369, "learning_rate": 4.955048409405256e-05, "log_odds_chosen": 3.9182686805725098, "log_odds_ratio": -0.2959219813346863, "logits/chosen": -0.6551793813705444, "logits/rejected": -0.7170536518096924, "logps/chosen": -0.08494836837053299, "logps/rejected": -0.7065892219543457, "loss": 3.554, "nll_loss": 0.8589138984680176, "rewards/accuracies": 0.75, "rewards/chosen": -0.008494837209582329, "rewards/margins": 0.06216409057378769, "rewards/rejected": -0.07065892964601517, "step": 1563 }, { "epoch": 1.0816044260027662, "grad_norm": 3.542426824569702, "learning_rate": 4.9546642077762415e-05, "log_odds_chosen": 4.997422695159912, "log_odds_ratio": -0.1264844536781311, "logits/chosen": -0.6658202409744263, "logits/rejected": -0.7189267873764038, "logps/chosen": -0.10747257620096207, "logps/rejected": -0.9709796905517578, "loss": 2.1569, "nll_loss": 0.5265790820121765, "rewards/accuracies": 1.0, "rewards/chosen": -0.010747257620096207, "rewards/margins": 0.0863507091999054, "rewards/rejected": -0.0970979705452919, "step": 1564 }, { "epoch": 1.082295988934993, "grad_norm": 5.220712661743164, "learning_rate": 4.954280006147226e-05, "log_odds_chosen": 2.417450428009033, "log_odds_ratio": -0.3215015232563019, "logits/chosen": -0.6923054456710815, "logits/rejected": -0.6714261770248413, "logps/chosen": -0.0629698857665062, "logps/rejected": -0.37886086106300354, "loss": 3.6002, "nll_loss": 0.8678989410400391, "rewards/accuracies": 0.875, "rewards/chosen": -0.0062969885766506195, "rewards/margins": 0.031589098274707794, "rewards/rejected": -0.037886083126068115, "step": 1565 }, { "epoch": 1.0829875518672198, "grad_norm": 5.408576488494873, "learning_rate": 4.953895804518211e-05, "log_odds_chosen": 2.9714107513427734, "log_odds_ratio": -0.2437531054019928, "logits/chosen": -0.7561995983123779, "logits/rejected": -0.7987399697303772, "logps/chosen": -0.09682756662368774, "logps/rejected": -0.8127809166908264, "loss": 2.9384, "nll_loss": 0.7102223634719849, "rewards/accuracies": 0.875, "rewards/chosen": -0.009682757779955864, "rewards/margins": 0.07159534096717834, "rewards/rejected": -0.08127809315919876, "step": 1566 }, { "epoch": 1.0836791147994467, "grad_norm": 7.883374214172363, "learning_rate": 4.9535116028891965e-05, "log_odds_chosen": 0.3866366147994995, "log_odds_ratio": -0.6445382237434387, "logits/chosen": -1.159950613975525, "logits/rejected": -1.1587477922439575, "logps/chosen": -0.1852284073829651, "logps/rejected": -0.28827083110809326, "loss": 6.2101, "nll_loss": 1.4880727529525757, "rewards/accuracies": 0.625, "rewards/chosen": -0.01852283999323845, "rewards/margins": 0.010304244235157967, "rewards/rejected": -0.028827082365751266, "step": 1567 }, { "epoch": 1.0843706777316735, "grad_norm": 7.807645797729492, "learning_rate": 4.953127401260181e-05, "log_odds_chosen": 2.2896320819854736, "log_odds_ratio": -0.406283974647522, "logits/chosen": -1.0614399909973145, "logits/rejected": -1.1143429279327393, "logps/chosen": -0.33437466621398926, "logps/rejected": -0.8825758695602417, "loss": 5.8773, "nll_loss": 1.4286930561065674, "rewards/accuracies": 0.875, "rewards/chosen": -0.033437468111515045, "rewards/margins": 0.05482013151049614, "rewards/rejected": -0.08825759589672089, "step": 1568 }, { "epoch": 1.0850622406639003, "grad_norm": 7.627025604248047, "learning_rate": 4.952743199631167e-05, "log_odds_chosen": 4.397810459136963, "log_odds_ratio": -0.48821020126342773, "logits/chosen": -0.5610281229019165, "logits/rejected": -0.611020565032959, "logps/chosen": -0.06754257529973984, "logps/rejected": -0.7311649918556213, "loss": 4.1127, "nll_loss": 0.9793562889099121, "rewards/accuracies": 0.75, "rewards/chosen": -0.006754256784915924, "rewards/margins": 0.06636224687099457, "rewards/rejected": -0.07311650365591049, "step": 1569 }, { "epoch": 1.0857538035961272, "grad_norm": 9.063570976257324, "learning_rate": 4.9523589980021516e-05, "log_odds_chosen": 3.481013298034668, "log_odds_ratio": -0.37235239148139954, "logits/chosen": -0.8404412269592285, "logits/rejected": -0.7990936040878296, "logps/chosen": -0.1394955813884735, "logps/rejected": -0.6244330406188965, "loss": 3.5202, "nll_loss": 0.8428088426589966, "rewards/accuracies": 0.875, "rewards/chosen": -0.013949558138847351, "rewards/margins": 0.04849374666810036, "rewards/rejected": -0.06244330108165741, "step": 1570 }, { "epoch": 1.086445366528354, "grad_norm": 6.26206111907959, "learning_rate": 4.951974796373137e-05, "log_odds_chosen": 3.6194040775299072, "log_odds_ratio": -0.18340617418289185, "logits/chosen": -0.8630995750427246, "logits/rejected": -0.9298602938652039, "logps/chosen": -0.10304134339094162, "logps/rejected": -0.6944648623466492, "loss": 4.4837, "nll_loss": 1.1025748252868652, "rewards/accuracies": 1.0, "rewards/chosen": -0.010304134339094162, "rewards/margins": 0.0591423474252224, "rewards/rejected": -0.06944648176431656, "step": 1571 }, { "epoch": 1.0871369294605808, "grad_norm": 5.816069602966309, "learning_rate": 4.951590594744122e-05, "log_odds_chosen": 5.30167293548584, "log_odds_ratio": -0.15548737347126007, "logits/chosen": -0.26609960198402405, "logits/rejected": -0.312613308429718, "logps/chosen": -0.041022010147571564, "logps/rejected": -0.6375818252563477, "loss": 3.3537, "nll_loss": 0.8228654861450195, "rewards/accuracies": 1.0, "rewards/chosen": -0.004102201201021671, "rewards/margins": 0.05965597555041313, "rewards/rejected": -0.06375817954540253, "step": 1572 }, { "epoch": 1.0878284923928077, "grad_norm": 8.704594612121582, "learning_rate": 4.951206393115107e-05, "log_odds_chosen": 3.002938985824585, "log_odds_ratio": -0.4912012815475464, "logits/chosen": -0.9212777614593506, "logits/rejected": -0.9318854808807373, "logps/chosen": -0.09731482714414597, "logps/rejected": -0.6755044460296631, "loss": 4.4402, "nll_loss": 1.0609227418899536, "rewards/accuracies": 0.75, "rewards/chosen": -0.009731482714414597, "rewards/margins": 0.05781896412372589, "rewards/rejected": -0.06755045056343079, "step": 1573 }, { "epoch": 1.0885200553250345, "grad_norm": 5.034717082977295, "learning_rate": 4.950822191486092e-05, "log_odds_chosen": 2.4043898582458496, "log_odds_ratio": -0.4333747625350952, "logits/chosen": -0.6324424743652344, "logits/rejected": -0.662071168422699, "logps/chosen": -0.1544969230890274, "logps/rejected": -0.5821070075035095, "loss": 3.2963, "nll_loss": 0.7807316780090332, "rewards/accuracies": 0.625, "rewards/chosen": -0.015449692495167255, "rewards/margins": 0.04276100918650627, "rewards/rejected": -0.05821070447564125, "step": 1574 }, { "epoch": 1.0892116182572613, "grad_norm": 5.510666847229004, "learning_rate": 4.950437989857077e-05, "log_odds_chosen": 5.295717716217041, "log_odds_ratio": -0.21159470081329346, "logits/chosen": -0.6146509647369385, "logits/rejected": -0.6928945779800415, "logps/chosen": -0.06159878149628639, "logps/rejected": -0.9186801314353943, "loss": 3.8207, "nll_loss": 0.9340190291404724, "rewards/accuracies": 0.875, "rewards/chosen": -0.006159878335893154, "rewards/margins": 0.0857081338763237, "rewards/rejected": -0.09186801314353943, "step": 1575 }, { "epoch": 1.0899031811894881, "grad_norm": 5.914917469024658, "learning_rate": 4.9500537882280624e-05, "log_odds_chosen": 4.013111114501953, "log_odds_ratio": -0.19167517125606537, "logits/chosen": -0.5529786348342896, "logits/rejected": -0.5917526483535767, "logps/chosen": -0.04883315786719322, "logps/rejected": -0.667991042137146, "loss": 2.7463, "nll_loss": 0.6674108505249023, "rewards/accuracies": 1.0, "rewards/chosen": -0.004883316345512867, "rewards/margins": 0.06191578879952431, "rewards/rejected": -0.0667991042137146, "step": 1576 }, { "epoch": 1.090594744121715, "grad_norm": 6.839190483093262, "learning_rate": 4.949669586599047e-05, "log_odds_chosen": 5.532386779785156, "log_odds_ratio": -0.0764627605676651, "logits/chosen": -0.4853833019733429, "logits/rejected": -0.5567290782928467, "logps/chosen": -0.03992730379104614, "logps/rejected": -1.2442706823349, "loss": 4.5269, "nll_loss": 1.124070167541504, "rewards/accuracies": 1.0, "rewards/chosen": -0.003992730751633644, "rewards/margins": 0.12043432891368866, "rewards/rejected": -0.12442706525325775, "step": 1577 }, { "epoch": 1.0912863070539418, "grad_norm": 4.283989906311035, "learning_rate": 4.949285384970033e-05, "log_odds_chosen": 5.226707458496094, "log_odds_ratio": -0.2467494159936905, "logits/chosen": -0.5581457018852234, "logits/rejected": -0.6312193870544434, "logps/chosen": -0.07011242210865021, "logps/rejected": -0.948968231678009, "loss": 2.6167, "nll_loss": 0.6295046210289001, "rewards/accuracies": 0.875, "rewards/chosen": -0.007011242676526308, "rewards/margins": 0.08788558095693588, "rewards/rejected": -0.0948968306183815, "step": 1578 }, { "epoch": 1.0919778699861689, "grad_norm": 7.571458339691162, "learning_rate": 4.9489011833410174e-05, "log_odds_chosen": 2.7708773612976074, "log_odds_ratio": -0.4572438895702362, "logits/chosen": -0.45970478653907776, "logits/rejected": -0.49945998191833496, "logps/chosen": -0.2505786120891571, "logps/rejected": -0.6798125505447388, "loss": 4.4457, "nll_loss": 1.0656884908676147, "rewards/accuracies": 0.875, "rewards/chosen": -0.02505786530673504, "rewards/margins": 0.042923398315906525, "rewards/rejected": -0.06798125803470612, "step": 1579 }, { "epoch": 1.0926694329183957, "grad_norm": 6.045781135559082, "learning_rate": 4.9485169817120027e-05, "log_odds_chosen": 1.9347407817840576, "log_odds_ratio": -0.558551549911499, "logits/chosen": -0.6301894187927246, "logits/rejected": -0.6606278419494629, "logps/chosen": -0.1072370707988739, "logps/rejected": -0.4773583710193634, "loss": 3.232, "nll_loss": 0.7521458268165588, "rewards/accuracies": 0.625, "rewards/chosen": -0.01072370633482933, "rewards/margins": 0.03701213374733925, "rewards/rejected": -0.04773584008216858, "step": 1580 }, { "epoch": 1.0933609958506225, "grad_norm": 10.899459838867188, "learning_rate": 4.948132780082988e-05, "log_odds_chosen": 5.716094970703125, "log_odds_ratio": -0.26957541704177856, "logits/chosen": -0.3576154112815857, "logits/rejected": -0.40196752548217773, "logps/chosen": -0.06499480456113815, "logps/rejected": -1.0065155029296875, "loss": 3.6276, "nll_loss": 0.8799489140510559, "rewards/accuracies": 0.875, "rewards/chosen": -0.006499480456113815, "rewards/margins": 0.09415207803249359, "rewards/rejected": -0.10065155476331711, "step": 1581 }, { "epoch": 1.0940525587828493, "grad_norm": 3.8481316566467285, "learning_rate": 4.947748578453973e-05, "log_odds_chosen": 4.175989627838135, "log_odds_ratio": -0.0638933852314949, "logits/chosen": -0.9346051216125488, "logits/rejected": -0.9710807800292969, "logps/chosen": -0.04190801829099655, "logps/rejected": -0.6167564392089844, "loss": 3.0485, "nll_loss": 0.7557366490364075, "rewards/accuracies": 1.0, "rewards/chosen": -0.00419080164283514, "rewards/margins": 0.05748484656214714, "rewards/rejected": -0.06167564168572426, "step": 1582 }, { "epoch": 1.0947441217150762, "grad_norm": 3.2850422859191895, "learning_rate": 4.947364376824958e-05, "log_odds_chosen": 4.834670543670654, "log_odds_ratio": -0.07640227675437927, "logits/chosen": -0.4402886629104614, "logits/rejected": -0.44064515829086304, "logps/chosen": -0.05761062353849411, "logps/rejected": -0.8796223998069763, "loss": 2.8049, "nll_loss": 0.6935828328132629, "rewards/accuracies": 1.0, "rewards/chosen": -0.005761062726378441, "rewards/margins": 0.08220118284225464, "rewards/rejected": -0.08796224743127823, "step": 1583 }, { "epoch": 1.095435684647303, "grad_norm": 7.48633337020874, "learning_rate": 4.946980175195943e-05, "log_odds_chosen": 2.777125597000122, "log_odds_ratio": -0.2875290513038635, "logits/chosen": -0.4086695611476898, "logits/rejected": -0.44259917736053467, "logps/chosen": -0.12264515459537506, "logps/rejected": -0.5968244075775146, "loss": 3.8138, "nll_loss": 0.924686074256897, "rewards/accuracies": 1.0, "rewards/chosen": -0.012264516204595566, "rewards/margins": 0.04741792380809784, "rewards/rejected": -0.059682440012693405, "step": 1584 }, { "epoch": 1.0961272475795298, "grad_norm": 5.133641242980957, "learning_rate": 4.946595973566928e-05, "log_odds_chosen": 0.8376002311706543, "log_odds_ratio": -0.7386839389801025, "logits/chosen": -0.661615788936615, "logits/rejected": -0.6790576577186584, "logps/chosen": -0.2840120792388916, "logps/rejected": -0.3249834179878235, "loss": 3.1574, "nll_loss": 0.7154770493507385, "rewards/accuracies": 0.75, "rewards/chosen": -0.0284012071788311, "rewards/margins": 0.0040971338748931885, "rewards/rejected": -0.03249834105372429, "step": 1585 }, { "epoch": 1.0968188105117567, "grad_norm": 5.681310653686523, "learning_rate": 4.946211771937913e-05, "log_odds_chosen": 5.562228202819824, "log_odds_ratio": -0.19052885472774506, "logits/chosen": -0.5465707778930664, "logits/rejected": -0.5676652193069458, "logps/chosen": -0.04916710406541824, "logps/rejected": -0.8330110311508179, "loss": 3.2122, "nll_loss": 0.7839978337287903, "rewards/accuracies": 0.875, "rewards/chosen": -0.004916710779070854, "rewards/margins": 0.0783843994140625, "rewards/rejected": -0.0833011046051979, "step": 1586 }, { "epoch": 1.0975103734439835, "grad_norm": 3.980339527130127, "learning_rate": 4.945827570308899e-05, "log_odds_chosen": 4.279679298400879, "log_odds_ratio": -0.30568966269493103, "logits/chosen": -0.33977723121643066, "logits/rejected": -0.33284032344818115, "logps/chosen": -0.09073701500892639, "logps/rejected": -0.6302847266197205, "loss": 2.272, "nll_loss": 0.5374258160591125, "rewards/accuracies": 0.875, "rewards/chosen": -0.009073702618479729, "rewards/margins": 0.053954772651195526, "rewards/rejected": -0.06302846968173981, "step": 1587 }, { "epoch": 1.0982019363762103, "grad_norm": 6.770112991333008, "learning_rate": 4.945443368679883e-05, "log_odds_chosen": 4.432427406311035, "log_odds_ratio": -0.5763689875602722, "logits/chosen": -0.7433520555496216, "logits/rejected": -0.7090204954147339, "logps/chosen": -0.2709885835647583, "logps/rejected": -0.893592357635498, "loss": 2.7834, "nll_loss": 0.6382165551185608, "rewards/accuracies": 0.875, "rewards/chosen": -0.02709886059165001, "rewards/margins": 0.062260378152132034, "rewards/rejected": -0.08935923129320145, "step": 1588 }, { "epoch": 1.0988934993084372, "grad_norm": 6.708295822143555, "learning_rate": 4.9450591670508685e-05, "log_odds_chosen": 3.567749500274658, "log_odds_ratio": -0.39118099212646484, "logits/chosen": -0.2514309287071228, "logits/rejected": -0.2853849232196808, "logps/chosen": -0.09239472448825836, "logps/rejected": -0.3336440920829773, "loss": 3.9691, "nll_loss": 0.9531650543212891, "rewards/accuracies": 0.75, "rewards/chosen": -0.00923947338014841, "rewards/margins": 0.024124938994646072, "rewards/rejected": -0.03336441144347191, "step": 1589 }, { "epoch": 1.099585062240664, "grad_norm": 10.586297988891602, "learning_rate": 4.944674965421854e-05, "log_odds_chosen": 4.613847732543945, "log_odds_ratio": -0.29507291316986084, "logits/chosen": -0.6949521899223328, "logits/rejected": -0.7418277859687805, "logps/chosen": -0.08647487312555313, "logps/rejected": -0.9586604237556458, "loss": 3.1212, "nll_loss": 0.750788688659668, "rewards/accuracies": 0.875, "rewards/chosen": -0.008647486567497253, "rewards/margins": 0.08721855282783508, "rewards/rejected": -0.09586603939533234, "step": 1590 }, { "epoch": 1.1002766251728908, "grad_norm": 6.597928047180176, "learning_rate": 4.944290763792839e-05, "log_odds_chosen": 1.6849793195724487, "log_odds_ratio": -0.6137993931770325, "logits/chosen": -0.20653149485588074, "logits/rejected": -0.31308573484420776, "logps/chosen": -0.1943899691104889, "logps/rejected": -0.5025385618209839, "loss": 3.4048, "nll_loss": 0.7898290157318115, "rewards/accuracies": 0.75, "rewards/chosen": -0.01943899691104889, "rewards/margins": 0.030814863741397858, "rewards/rejected": -0.05025385692715645, "step": 1591 }, { "epoch": 1.1009681881051177, "grad_norm": 7.803439140319824, "learning_rate": 4.9439065621638235e-05, "log_odds_chosen": 2.9654581546783447, "log_odds_ratio": -0.6031391620635986, "logits/chosen": -0.4306519627571106, "logits/rejected": -0.5081591010093689, "logps/chosen": -0.17305631935596466, "logps/rejected": -0.736358642578125, "loss": 4.1712, "nll_loss": 0.9824913740158081, "rewards/accuracies": 0.875, "rewards/chosen": -0.017305633053183556, "rewards/margins": 0.05633023381233215, "rewards/rejected": -0.07363586127758026, "step": 1592 }, { "epoch": 1.1016597510373445, "grad_norm": 4.649277687072754, "learning_rate": 4.943522360534809e-05, "log_odds_chosen": 4.267962455749512, "log_odds_ratio": -0.23143798112869263, "logits/chosen": -0.6768631339073181, "logits/rejected": -0.6536177396774292, "logps/chosen": -0.06421162188053131, "logps/rejected": -0.9346715807914734, "loss": 3.2449, "nll_loss": 0.7880828976631165, "rewards/accuracies": 1.0, "rewards/chosen": -0.0064211622811853886, "rewards/margins": 0.08704599738121033, "rewards/rejected": -0.09346716105937958, "step": 1593 }, { "epoch": 1.1023513139695713, "grad_norm": 23.250423431396484, "learning_rate": 4.943138158905794e-05, "log_odds_chosen": 2.1543641090393066, "log_odds_ratio": -0.31085923314094543, "logits/chosen": -0.4932831823825836, "logits/rejected": -0.4758214056491852, "logps/chosen": -0.061881616711616516, "logps/rejected": -0.518766462802887, "loss": 3.1535, "nll_loss": 0.757276713848114, "rewards/accuracies": 0.875, "rewards/chosen": -0.006188162136822939, "rewards/margins": 0.04568849131464958, "rewards/rejected": -0.051876652985811234, "step": 1594 }, { "epoch": 1.1030428769017981, "grad_norm": 6.492717266082764, "learning_rate": 4.9427539572767786e-05, "log_odds_chosen": 2.071760654449463, "log_odds_ratio": -0.36288756132125854, "logits/chosen": -0.6168765425682068, "logits/rejected": -0.6260075569152832, "logps/chosen": -0.14397895336151123, "logps/rejected": -0.5518575310707092, "loss": 4.0722, "nll_loss": 0.9817653298377991, "rewards/accuracies": 0.625, "rewards/chosen": -0.014397896826267242, "rewards/margins": 0.04078786075115204, "rewards/rejected": -0.05518575757741928, "step": 1595 }, { "epoch": 1.103734439834025, "grad_norm": 5.582458019256592, "learning_rate": 4.9423697556477645e-05, "log_odds_chosen": 4.845449924468994, "log_odds_ratio": -0.17791111767292023, "logits/chosen": -0.2792462706565857, "logits/rejected": -0.3469136357307434, "logps/chosen": -0.07031789422035217, "logps/rejected": -0.8743381500244141, "loss": 2.9334, "nll_loss": 0.7155571579933167, "rewards/accuracies": 1.0, "rewards/chosen": -0.0070317890495061874, "rewards/margins": 0.08040202409029007, "rewards/rejected": -0.0874338150024414, "step": 1596 }, { "epoch": 1.1044260027662518, "grad_norm": 5.206540107727051, "learning_rate": 4.941985554018749e-05, "log_odds_chosen": 5.46361780166626, "log_odds_ratio": -0.21550363302230835, "logits/chosen": -0.44038888812065125, "logits/rejected": -0.44270017743110657, "logps/chosen": -0.08260742574930191, "logps/rejected": -0.9547374248504639, "loss": 3.4688, "nll_loss": 0.8456557989120483, "rewards/accuracies": 0.875, "rewards/chosen": -0.008260741829872131, "rewards/margins": 0.08721300959587097, "rewards/rejected": -0.0954737514257431, "step": 1597 }, { "epoch": 1.1051175656984786, "grad_norm": 6.907774448394775, "learning_rate": 4.941601352389734e-05, "log_odds_chosen": 5.112415313720703, "log_odds_ratio": -0.15309226512908936, "logits/chosen": -0.5420889854431152, "logits/rejected": -0.6159816980361938, "logps/chosen": -0.04379098117351532, "logps/rejected": -1.0684349536895752, "loss": 3.6694, "nll_loss": 0.9020520448684692, "rewards/accuracies": 1.0, "rewards/chosen": -0.00437909783795476, "rewards/margins": 0.10246440768241882, "rewards/rejected": -0.106843501329422, "step": 1598 }, { "epoch": 1.1058091286307055, "grad_norm": 4.947270393371582, "learning_rate": 4.9412171507607196e-05, "log_odds_chosen": 4.351035118103027, "log_odds_ratio": -0.1412781924009323, "logits/chosen": -0.5069587230682373, "logits/rejected": -0.5466160178184509, "logps/chosen": -0.11968033015727997, "logps/rejected": -0.9916271567344666, "loss": 3.2332, "nll_loss": 0.79417884349823, "rewards/accuracies": 1.0, "rewards/chosen": -0.011968032456934452, "rewards/margins": 0.08719468861818314, "rewards/rejected": -0.09916272759437561, "step": 1599 }, { "epoch": 1.1065006915629323, "grad_norm": 3.7760555744171143, "learning_rate": 4.940832949131705e-05, "log_odds_chosen": 5.466026782989502, "log_odds_ratio": -0.09592100977897644, "logits/chosen": -0.661428689956665, "logits/rejected": -0.7313367128372192, "logps/chosen": -0.03490148112177849, "logps/rejected": -0.9429963231086731, "loss": 2.319, "nll_loss": 0.5701475739479065, "rewards/accuracies": 1.0, "rewards/chosen": -0.003490148112177849, "rewards/margins": 0.09080948680639267, "rewards/rejected": -0.09429963678121567, "step": 1600 }, { "epoch": 1.1071922544951591, "grad_norm": 3.3002185821533203, "learning_rate": 4.9404487475026894e-05, "log_odds_chosen": 4.276036739349365, "log_odds_ratio": -0.07263210415840149, "logits/chosen": -0.6134432554244995, "logits/rejected": -0.6024748682975769, "logps/chosen": -0.05085566267371178, "logps/rejected": -0.7766672372817993, "loss": 3.3723, "nll_loss": 0.8358083963394165, "rewards/accuracies": 1.0, "rewards/chosen": -0.00508556654676795, "rewards/margins": 0.07258116453886032, "rewards/rejected": -0.07766672968864441, "step": 1601 }, { "epoch": 1.107883817427386, "grad_norm": 7.756932735443115, "learning_rate": 4.9400645458736746e-05, "log_odds_chosen": 2.1110987663269043, "log_odds_ratio": -1.3031134605407715, "logits/chosen": -0.41942086815834045, "logits/rejected": -0.48380038142204285, "logps/chosen": -0.2565928101539612, "logps/rejected": -0.5842873454093933, "loss": 4.4358, "nll_loss": 0.9786398410797119, "rewards/accuracies": 0.625, "rewards/chosen": -0.025659281760454178, "rewards/margins": 0.032769449055194855, "rewards/rejected": -0.05842873081564903, "step": 1602 }, { "epoch": 1.1085753803596128, "grad_norm": 6.662200927734375, "learning_rate": 4.93968034424466e-05, "log_odds_chosen": 3.6231627464294434, "log_odds_ratio": -0.18868622183799744, "logits/chosen": -0.5040012001991272, "logits/rejected": -0.5356638431549072, "logps/chosen": -0.10449366271495819, "logps/rejected": -0.7985894680023193, "loss": 2.8902, "nll_loss": 0.7036839127540588, "rewards/accuracies": 0.875, "rewards/chosen": -0.010449366644024849, "rewards/margins": 0.0694095715880394, "rewards/rejected": -0.0798589363694191, "step": 1603 }, { "epoch": 1.1092669432918396, "grad_norm": 5.240853309631348, "learning_rate": 4.9392961426156444e-05, "log_odds_chosen": 1.5222573280334473, "log_odds_ratio": -0.4411067068576813, "logits/chosen": -0.6297593116760254, "logits/rejected": -0.6809091567993164, "logps/chosen": -0.0897793397307396, "logps/rejected": -0.4441949725151062, "loss": 4.194, "nll_loss": 1.0043995380401611, "rewards/accuracies": 0.875, "rewards/chosen": -0.008977933786809444, "rewards/margins": 0.0354415625333786, "rewards/rejected": -0.04441949725151062, "step": 1604 }, { "epoch": 1.1099585062240664, "grad_norm": 4.411398887634277, "learning_rate": 4.9389119409866304e-05, "log_odds_chosen": 2.3982813358306885, "log_odds_ratio": -0.3286263644695282, "logits/chosen": -0.13925503194332123, "logits/rejected": -0.11078141629695892, "logps/chosen": -0.08166931569576263, "logps/rejected": -0.3501182794570923, "loss": 3.8694, "nll_loss": 0.9344936609268188, "rewards/accuracies": 0.75, "rewards/chosen": -0.008166931569576263, "rewards/margins": 0.026844896376132965, "rewards/rejected": -0.03501182794570923, "step": 1605 }, { "epoch": 1.1106500691562933, "grad_norm": 5.958127975463867, "learning_rate": 4.938527739357615e-05, "log_odds_chosen": 3.813007354736328, "log_odds_ratio": -0.12042003870010376, "logits/chosen": -0.8060808777809143, "logits/rejected": -0.8647295832633972, "logps/chosen": -0.08172804862260818, "logps/rejected": -0.9784216284751892, "loss": 4.7046, "nll_loss": 1.1641055345535278, "rewards/accuracies": 1.0, "rewards/chosen": -0.008172805421054363, "rewards/margins": 0.0896693542599678, "rewards/rejected": -0.09784215688705444, "step": 1606 }, { "epoch": 1.11134163208852, "grad_norm": 7.819185256958008, "learning_rate": 4.9381435377286e-05, "log_odds_chosen": 2.3151051998138428, "log_odds_ratio": -0.5686360597610474, "logits/chosen": -0.5417439937591553, "logits/rejected": -0.5752671957015991, "logps/chosen": -0.1530493199825287, "logps/rejected": -0.4273419976234436, "loss": 3.5647, "nll_loss": 0.8343040943145752, "rewards/accuracies": 0.625, "rewards/chosen": -0.015304931439459324, "rewards/margins": 0.02742926776409149, "rewards/rejected": -0.04273419827222824, "step": 1607 }, { "epoch": 1.112033195020747, "grad_norm": 5.927016258239746, "learning_rate": 4.9377593360995854e-05, "log_odds_chosen": 3.2013847827911377, "log_odds_ratio": -0.33018064498901367, "logits/chosen": -0.6260197162628174, "logits/rejected": -0.6978428363800049, "logps/chosen": -0.06922274827957153, "logps/rejected": -0.8326290249824524, "loss": 3.1934, "nll_loss": 0.7653228640556335, "rewards/accuracies": 0.75, "rewards/chosen": -0.006922274827957153, "rewards/margins": 0.07634063065052032, "rewards/rejected": -0.08326290547847748, "step": 1608 }, { "epoch": 1.1127247579529738, "grad_norm": 3.3232882022857666, "learning_rate": 4.937375134470571e-05, "log_odds_chosen": 3.8514456748962402, "log_odds_ratio": -0.26576095819473267, "logits/chosen": -0.2728078067302704, "logits/rejected": -0.26904305815696716, "logps/chosen": -0.08268368989229202, "logps/rejected": -0.513020396232605, "loss": 2.8992, "nll_loss": 0.6982300877571106, "rewards/accuracies": 0.875, "rewards/chosen": -0.008268369361758232, "rewards/margins": 0.043033670634031296, "rewards/rejected": -0.05130203813314438, "step": 1609 }, { "epoch": 1.1134163208852006, "grad_norm": 3.1397273540496826, "learning_rate": 4.936990932841555e-05, "log_odds_chosen": 3.1608638763427734, "log_odds_ratio": -0.3535284996032715, "logits/chosen": -0.3686169683933258, "logits/rejected": -0.38490238785743713, "logps/chosen": -0.10619036853313446, "logps/rejected": -0.6823399662971497, "loss": 2.3135, "nll_loss": 0.5430222153663635, "rewards/accuracies": 0.75, "rewards/chosen": -0.010619036853313446, "rewards/margins": 0.05761495977640152, "rewards/rejected": -0.06823400408029556, "step": 1610 }, { "epoch": 1.1141078838174274, "grad_norm": 5.7114338874816895, "learning_rate": 4.9366067312125405e-05, "log_odds_chosen": 0.835321307182312, "log_odds_ratio": -0.516608715057373, "logits/chosen": -0.9049027562141418, "logits/rejected": -0.876893162727356, "logps/chosen": -0.149239644408226, "logps/rejected": -0.3646370470523834, "loss": 3.99, "nll_loss": 0.9458338618278503, "rewards/accuracies": 0.75, "rewards/chosen": -0.014923964627087116, "rewards/margins": 0.02153974026441574, "rewards/rejected": -0.03646370768547058, "step": 1611 }, { "epoch": 1.1147994467496543, "grad_norm": 7.030287742614746, "learning_rate": 4.936222529583526e-05, "log_odds_chosen": 3.394101619720459, "log_odds_ratio": -0.3108062744140625, "logits/chosen": -0.19622401893138885, "logits/rejected": -0.26739972829818726, "logps/chosen": -0.05431075766682625, "logps/rejected": -0.4297984838485718, "loss": 2.801, "nll_loss": 0.6691676378250122, "rewards/accuracies": 0.75, "rewards/chosen": -0.00543107558041811, "rewards/margins": 0.03754877299070358, "rewards/rejected": -0.042979851365089417, "step": 1612 }, { "epoch": 1.115491009681881, "grad_norm": 5.743906021118164, "learning_rate": 4.93583832795451e-05, "log_odds_chosen": 3.954920768737793, "log_odds_ratio": -0.15968072414398193, "logits/chosen": -0.3926483392715454, "logits/rejected": -0.47625932097435, "logps/chosen": -0.06609839200973511, "logps/rejected": -0.8475942015647888, "loss": 3.3477, "nll_loss": 0.8209632635116577, "rewards/accuracies": 0.875, "rewards/chosen": -0.006609839387238026, "rewards/margins": 0.07814957201480865, "rewards/rejected": -0.0847594141960144, "step": 1613 }, { "epoch": 1.116182572614108, "grad_norm": 7.3016510009765625, "learning_rate": 4.935454126325496e-05, "log_odds_chosen": 3.8284425735473633, "log_odds_ratio": -0.25773143768310547, "logits/chosen": -0.48374220728874207, "logits/rejected": -0.5044052004814148, "logps/chosen": -0.08846524357795715, "logps/rejected": -0.7487151026725769, "loss": 3.6295, "nll_loss": 0.8816075325012207, "rewards/accuracies": 0.75, "rewards/chosen": -0.008846525102853775, "rewards/margins": 0.06602498888969421, "rewards/rejected": -0.07487151026725769, "step": 1614 }, { "epoch": 1.1168741355463347, "grad_norm": 11.499844551086426, "learning_rate": 4.935069924696481e-05, "log_odds_chosen": 3.526914596557617, "log_odds_ratio": -0.6500409841537476, "logits/chosen": -0.8252017498016357, "logits/rejected": -0.8671191334724426, "logps/chosen": -0.13813264667987823, "logps/rejected": -0.5451509356498718, "loss": 4.4419, "nll_loss": 1.045462727546692, "rewards/accuracies": 0.75, "rewards/chosen": -0.013813264667987823, "rewards/margins": 0.04070183262228966, "rewards/rejected": -0.05451509729027748, "step": 1615 }, { "epoch": 1.1175656984785616, "grad_norm": 4.009091854095459, "learning_rate": 4.934685723067466e-05, "log_odds_chosen": 6.222552299499512, "log_odds_ratio": -0.047023192048072815, "logits/chosen": -0.5882456302642822, "logits/rejected": -0.5744817852973938, "logps/chosen": -0.03364326059818268, "logps/rejected": -0.5920311212539673, "loss": 2.6308, "nll_loss": 0.6530083417892456, "rewards/accuracies": 1.0, "rewards/chosen": -0.0033643266651779413, "rewards/margins": 0.05583879351615906, "rewards/rejected": -0.05920311436057091, "step": 1616 }, { "epoch": 1.1182572614107884, "grad_norm": 5.012261390686035, "learning_rate": 4.934301521438451e-05, "log_odds_chosen": 1.9860203266143799, "log_odds_ratio": -0.3052294850349426, "logits/chosen": -0.20576004683971405, "logits/rejected": -0.1735822856426239, "logps/chosen": -0.10027210414409637, "logps/rejected": -0.3483325242996216, "loss": 3.0656, "nll_loss": 0.7358713746070862, "rewards/accuracies": 0.875, "rewards/chosen": -0.010027211159467697, "rewards/margins": 0.02480604313313961, "rewards/rejected": -0.03483325615525246, "step": 1617 }, { "epoch": 1.1189488243430152, "grad_norm": 4.4571919441223145, "learning_rate": 4.9339173198094365e-05, "log_odds_chosen": 4.763401508331299, "log_odds_ratio": -0.167281836271286, "logits/chosen": -0.5365985631942749, "logits/rejected": -0.5737805366516113, "logps/chosen": -0.08558344095945358, "logps/rejected": -0.7303501963615417, "loss": 3.3979, "nll_loss": 0.8327397704124451, "rewards/accuracies": 1.0, "rewards/chosen": -0.008558344095945358, "rewards/margins": 0.06447667628526688, "rewards/rejected": -0.07303501665592194, "step": 1618 }, { "epoch": 1.119640387275242, "grad_norm": 5.40239953994751, "learning_rate": 4.933533118180421e-05, "log_odds_chosen": 4.589512825012207, "log_odds_ratio": -0.21769876778125763, "logits/chosen": -0.66706782579422, "logits/rejected": -0.6870805025100708, "logps/chosen": -0.036408498883247375, "logps/rejected": -0.6844362020492554, "loss": 3.0529, "nll_loss": 0.7414535284042358, "rewards/accuracies": 0.875, "rewards/chosen": -0.0036408500745892525, "rewards/margins": 0.06480278074741364, "rewards/rejected": -0.06844362616539001, "step": 1619 }, { "epoch": 1.120331950207469, "grad_norm": 4.73685884475708, "learning_rate": 4.933148916551406e-05, "log_odds_chosen": 0.8199967741966248, "log_odds_ratio": -0.4512670636177063, "logits/chosen": -0.333723247051239, "logits/rejected": -0.3674015700817108, "logps/chosen": -0.12762649357318878, "logps/rejected": -0.306660532951355, "loss": 3.2674, "nll_loss": 0.7717109322547913, "rewards/accuracies": 0.625, "rewards/chosen": -0.012762648984789848, "rewards/margins": 0.0179034024477005, "rewards/rejected": -0.030666053295135498, "step": 1620 }, { "epoch": 1.1210235131396957, "grad_norm": 5.175172328948975, "learning_rate": 4.9327647149223916e-05, "log_odds_chosen": 2.6243436336517334, "log_odds_ratio": -0.4764016568660736, "logits/chosen": -0.7795975208282471, "logits/rejected": -0.782432496547699, "logps/chosen": -0.051674984395504, "logps/rejected": -0.7004712820053101, "loss": 3.3344, "nll_loss": 0.7859505414962769, "rewards/accuracies": 0.75, "rewards/chosen": -0.005167498253285885, "rewards/margins": 0.06487962603569031, "rewards/rejected": -0.07004712522029877, "step": 1621 }, { "epoch": 1.1217150760719226, "grad_norm": 4.103938579559326, "learning_rate": 4.932380513293376e-05, "log_odds_chosen": 4.642533302307129, "log_odds_ratio": -0.101071797311306, "logits/chosen": -0.6278355717658997, "logits/rejected": -0.6976598501205444, "logps/chosen": -0.05617416650056839, "logps/rejected": -0.7917832136154175, "loss": 2.8799, "nll_loss": 0.709868848323822, "rewards/accuracies": 1.0, "rewards/chosen": -0.005617417395114899, "rewards/margins": 0.0735609158873558, "rewards/rejected": -0.0791783332824707, "step": 1622 }, { "epoch": 1.1224066390041494, "grad_norm": 4.957241058349609, "learning_rate": 4.931996311664362e-05, "log_odds_chosen": 6.685695648193359, "log_odds_ratio": -0.04919375851750374, "logits/chosen": -0.6535751223564148, "logits/rejected": -0.6850297451019287, "logps/chosen": -0.06118635833263397, "logps/rejected": -1.2937774658203125, "loss": 3.101, "nll_loss": 0.770325779914856, "rewards/accuracies": 1.0, "rewards/chosen": -0.00611863611266017, "rewards/margins": 0.12325912714004517, "rewards/rejected": -0.12937775254249573, "step": 1623 }, { "epoch": 1.1230982019363762, "grad_norm": 9.584799766540527, "learning_rate": 4.9316121100353466e-05, "log_odds_chosen": 3.091644763946533, "log_odds_ratio": -0.31044328212738037, "logits/chosen": -0.4470682740211487, "logits/rejected": -0.5123588442802429, "logps/chosen": -0.07803031802177429, "logps/rejected": -0.7579048871994019, "loss": 4.4858, "nll_loss": 1.090402603149414, "rewards/accuracies": 0.875, "rewards/chosen": -0.007803032640367746, "rewards/margins": 0.06798745691776276, "rewards/rejected": -0.07579049468040466, "step": 1624 }, { "epoch": 1.123789764868603, "grad_norm": 5.5097174644470215, "learning_rate": 4.931227908406332e-05, "log_odds_chosen": 4.695748805999756, "log_odds_ratio": -0.1543882191181183, "logits/chosen": -0.15720303356647491, "logits/rejected": -0.1915234476327896, "logps/chosen": -0.0845288410782814, "logps/rejected": -0.7179161310195923, "loss": 2.4497, "nll_loss": 0.5969738960266113, "rewards/accuracies": 1.0, "rewards/chosen": -0.008452883921563625, "rewards/margins": 0.06333872675895691, "rewards/rejected": -0.07179160416126251, "step": 1625 }, { "epoch": 1.1244813278008299, "grad_norm": 8.026557922363281, "learning_rate": 4.930843706777317e-05, "log_odds_chosen": 3.344681739807129, "log_odds_ratio": -1.0812357664108276, "logits/chosen": -0.48144322633743286, "logits/rejected": -0.4669128954410553, "logps/chosen": -0.21000587940216064, "logps/rejected": -0.7212158441543579, "loss": 4.0158, "nll_loss": 0.8958293795585632, "rewards/accuracies": 0.75, "rewards/chosen": -0.021000590175390244, "rewards/margins": 0.05112099647521973, "rewards/rejected": -0.07212159037590027, "step": 1626 }, { "epoch": 1.1251728907330567, "grad_norm": 9.976818084716797, "learning_rate": 4.9304595051483024e-05, "log_odds_chosen": 0.8328192234039307, "log_odds_ratio": -0.9626883864402771, "logits/chosen": -0.653387188911438, "logits/rejected": -0.6768147945404053, "logps/chosen": -0.20581936836242676, "logps/rejected": -0.33645424246788025, "loss": 3.9907, "nll_loss": 0.9014164805412292, "rewards/accuracies": 0.625, "rewards/chosen": -0.020581936463713646, "rewards/margins": 0.013063488528132439, "rewards/rejected": -0.03364542871713638, "step": 1627 }, { "epoch": 1.1258644536652835, "grad_norm": 5.68765926361084, "learning_rate": 4.930075303519287e-05, "log_odds_chosen": 4.571466445922852, "log_odds_ratio": -0.20608262717723846, "logits/chosen": -0.6150608658790588, "logits/rejected": -0.6334342360496521, "logps/chosen": -0.05543144419789314, "logps/rejected": -0.8365674018859863, "loss": 4.3687, "nll_loss": 1.0715715885162354, "rewards/accuracies": 0.875, "rewards/chosen": -0.005543144885450602, "rewards/margins": 0.0781136006116867, "rewards/rejected": -0.08365673571825027, "step": 1628 }, { "epoch": 1.1265560165975104, "grad_norm": 4.4451494216918945, "learning_rate": 4.929691101890272e-05, "log_odds_chosen": 3.742349863052368, "log_odds_ratio": -0.23910017311573029, "logits/chosen": -0.3370603024959564, "logits/rejected": -0.3568029999732971, "logps/chosen": -0.16225500404834747, "logps/rejected": -0.7111620903015137, "loss": 3.7668, "nll_loss": 0.9177911877632141, "rewards/accuracies": 0.875, "rewards/chosen": -0.016225501894950867, "rewards/margins": 0.0548907108604908, "rewards/rejected": -0.07111620903015137, "step": 1629 }, { "epoch": 1.1272475795297372, "grad_norm": 3.7970314025878906, "learning_rate": 4.9293069002612574e-05, "log_odds_chosen": 2.94815731048584, "log_odds_ratio": -0.15021851658821106, "logits/chosen": -0.5437855124473572, "logits/rejected": -0.5586941838264465, "logps/chosen": -0.12474802136421204, "logps/rejected": -0.7907891273498535, "loss": 2.7239, "nll_loss": 0.6659583449363708, "rewards/accuracies": 1.0, "rewards/chosen": -0.012474801391363144, "rewards/margins": 0.06660410761833191, "rewards/rejected": -0.07907891273498535, "step": 1630 }, { "epoch": 1.127939142461964, "grad_norm": 8.042040824890137, "learning_rate": 4.9289226986322427e-05, "log_odds_chosen": 3.3266515731811523, "log_odds_ratio": -0.4503074884414673, "logits/chosen": -0.45929020643234253, "logits/rejected": -0.4860483407974243, "logps/chosen": -0.11026670038700104, "logps/rejected": -0.6502195596694946, "loss": 4.1101, "nll_loss": 0.9824921488761902, "rewards/accuracies": 0.875, "rewards/chosen": -0.011026670224964619, "rewards/margins": 0.0539952851831913, "rewards/rejected": -0.06502196192741394, "step": 1631 }, { "epoch": 1.1286307053941909, "grad_norm": 4.391267776489258, "learning_rate": 4.928538497003228e-05, "log_odds_chosen": 5.245411396026611, "log_odds_ratio": -0.34077537059783936, "logits/chosen": -0.699263870716095, "logits/rejected": -0.7080675363540649, "logps/chosen": -0.10102634876966476, "logps/rejected": -0.7285547256469727, "loss": 2.3565, "nll_loss": 0.5550588369369507, "rewards/accuracies": 0.875, "rewards/chosen": -0.010102634318172932, "rewards/margins": 0.06275284290313721, "rewards/rejected": -0.07285548001527786, "step": 1632 }, { "epoch": 1.1293222683264177, "grad_norm": 4.450112819671631, "learning_rate": 4.9281542953742125e-05, "log_odds_chosen": 7.433021545410156, "log_odds_ratio": -0.0016825036145746708, "logits/chosen": -0.3040405511856079, "logits/rejected": -0.34822529554367065, "logps/chosen": -0.00632679695263505, "logps/rejected": -1.2802190780639648, "loss": 2.5501, "nll_loss": 0.637348473072052, "rewards/accuracies": 1.0, "rewards/chosen": -0.0006326796719804406, "rewards/margins": 0.12738922238349915, "rewards/rejected": -0.12802191078662872, "step": 1633 }, { "epoch": 1.1300138312586445, "grad_norm": 5.715757369995117, "learning_rate": 4.927770093745198e-05, "log_odds_chosen": 3.635809898376465, "log_odds_ratio": -0.4132714867591858, "logits/chosen": -0.35934287309646606, "logits/rejected": -0.3480125963687897, "logps/chosen": -0.11658094823360443, "logps/rejected": -0.7394274473190308, "loss": 3.198, "nll_loss": 0.7581678032875061, "rewards/accuracies": 0.75, "rewards/chosen": -0.011658095754683018, "rewards/margins": 0.062284644693136215, "rewards/rejected": -0.07394274324178696, "step": 1634 }, { "epoch": 1.1307053941908713, "grad_norm": 7.623841762542725, "learning_rate": 4.927385892116183e-05, "log_odds_chosen": 2.3251943588256836, "log_odds_ratio": -0.43498295545578003, "logits/chosen": -0.5964546799659729, "logits/rejected": -0.5950238704681396, "logps/chosen": -0.12847104668617249, "logps/rejected": -0.5509581565856934, "loss": 4.8888, "nll_loss": 1.1787103414535522, "rewards/accuracies": 0.625, "rewards/chosen": -0.012847105041146278, "rewards/margins": 0.042248714715242386, "rewards/rejected": -0.055095817893743515, "step": 1635 }, { "epoch": 1.1313969571230982, "grad_norm": 4.627635955810547, "learning_rate": 4.927001690487168e-05, "log_odds_chosen": 6.360747337341309, "log_odds_ratio": -0.03663550317287445, "logits/chosen": -0.3691325783729553, "logits/rejected": -0.4044398069381714, "logps/chosen": -0.05212727189064026, "logps/rejected": -1.1166536808013916, "loss": 2.5455, "nll_loss": 0.6327031850814819, "rewards/accuracies": 1.0, "rewards/chosen": -0.005212727934122086, "rewards/margins": 0.10645262897014618, "rewards/rejected": -0.11166536808013916, "step": 1636 }, { "epoch": 1.132088520055325, "grad_norm": 6.78596830368042, "learning_rate": 4.926617488858153e-05, "log_odds_chosen": 2.127685070037842, "log_odds_ratio": -0.6240582466125488, "logits/chosen": -0.4540773630142212, "logits/rejected": -0.49073025584220886, "logps/chosen": -0.16882912814617157, "logps/rejected": -0.4076695740222931, "loss": 4.0789, "nll_loss": 0.9573305249214172, "rewards/accuracies": 0.625, "rewards/chosen": -0.016882915049791336, "rewards/margins": 0.02388404682278633, "rewards/rejected": -0.04076696187257767, "step": 1637 }, { "epoch": 1.1327800829875518, "grad_norm": 6.193438529968262, "learning_rate": 4.926233287229138e-05, "log_odds_chosen": 5.44583797454834, "log_odds_ratio": -0.17725984752178192, "logits/chosen": -0.003792904317378998, "logits/rejected": -0.11088617146015167, "logps/chosen": -0.043570052832365036, "logps/rejected": -1.0186293125152588, "loss": 3.391, "nll_loss": 0.8300263285636902, "rewards/accuracies": 0.875, "rewards/chosen": -0.0043570054695010185, "rewards/margins": 0.09750592708587646, "rewards/rejected": -0.10186292976140976, "step": 1638 }, { "epoch": 1.1334716459197787, "grad_norm": 7.459033489227295, "learning_rate": 4.925849085600123e-05, "log_odds_chosen": 3.1296372413635254, "log_odds_ratio": -0.23613426089286804, "logits/chosen": -0.49093225598335266, "logits/rejected": -0.5520147681236267, "logps/chosen": -0.08632385730743408, "logps/rejected": -0.6004340648651123, "loss": 3.683, "nll_loss": 0.8971471786499023, "rewards/accuracies": 0.875, "rewards/chosen": -0.008632385171949863, "rewards/margins": 0.05141102522611618, "rewards/rejected": -0.06004340946674347, "step": 1639 }, { "epoch": 1.1341632088520055, "grad_norm": 5.153171539306641, "learning_rate": 4.9254648839711085e-05, "log_odds_chosen": 4.974581718444824, "log_odds_ratio": -0.2203356772661209, "logits/chosen": -0.34750106930732727, "logits/rejected": -0.3961317241191864, "logps/chosen": -0.062355298548936844, "logps/rejected": -0.7640964388847351, "loss": 2.6511, "nll_loss": 0.6407467722892761, "rewards/accuracies": 0.75, "rewards/chosen": -0.006235530134290457, "rewards/margins": 0.07017411291599274, "rewards/rejected": -0.07640964537858963, "step": 1640 }, { "epoch": 1.1348547717842323, "grad_norm": 4.96319055557251, "learning_rate": 4.925080682342094e-05, "log_odds_chosen": 3.6856021881103516, "log_odds_ratio": -0.34094586968421936, "logits/chosen": -0.618395209312439, "logits/rejected": -0.6706283092498779, "logps/chosen": -0.17205843329429626, "logps/rejected": -0.5053707361221313, "loss": 3.246, "nll_loss": 0.777412474155426, "rewards/accuracies": 0.875, "rewards/chosen": -0.017205843701958656, "rewards/margins": 0.03333123028278351, "rewards/rejected": -0.050537072122097015, "step": 1641 }, { "epoch": 1.1355463347164592, "grad_norm": 6.023739814758301, "learning_rate": 4.924696480713078e-05, "log_odds_chosen": 2.418586492538452, "log_odds_ratio": -0.3684270977973938, "logits/chosen": -0.7153379321098328, "logits/rejected": -0.7132663130760193, "logps/chosen": -0.10732871294021606, "logps/rejected": -0.4773138761520386, "loss": 3.5234, "nll_loss": 0.8440024852752686, "rewards/accuracies": 0.75, "rewards/chosen": -0.010732870548963547, "rewards/margins": 0.03699852153658867, "rewards/rejected": -0.047731392085552216, "step": 1642 }, { "epoch": 1.136237897648686, "grad_norm": 3.6020874977111816, "learning_rate": 4.9243122790840636e-05, "log_odds_chosen": 3.060307502746582, "log_odds_ratio": -0.13144780695438385, "logits/chosen": -0.6625022888183594, "logits/rejected": -0.6797756552696228, "logps/chosen": -0.09465555101633072, "logps/rejected": -0.6426516175270081, "loss": 3.4357, "nll_loss": 0.8457767963409424, "rewards/accuracies": 1.0, "rewards/chosen": -0.009465554729104042, "rewards/margins": 0.054799605160951614, "rewards/rejected": -0.0642651617527008, "step": 1643 }, { "epoch": 1.1369294605809128, "grad_norm": 4.302525520324707, "learning_rate": 4.923928077455049e-05, "log_odds_chosen": 2.8316779136657715, "log_odds_ratio": -0.469307005405426, "logits/chosen": -0.5069953203201294, "logits/rejected": -0.511053204536438, "logps/chosen": -0.10691452026367188, "logps/rejected": -0.5705878734588623, "loss": 3.149, "nll_loss": 0.7403192520141602, "rewards/accuracies": 0.625, "rewards/chosen": -0.010691452771425247, "rewards/margins": 0.0463673360645771, "rewards/rejected": -0.05705878883600235, "step": 1644 }, { "epoch": 1.1376210235131397, "grad_norm": 7.63559103012085, "learning_rate": 4.923543875826034e-05, "log_odds_chosen": 4.579002857208252, "log_odds_ratio": -0.09951350837945938, "logits/chosen": 0.05032390356063843, "logits/rejected": -0.00823403149843216, "logps/chosen": -0.02870844677090645, "logps/rejected": -0.9091194272041321, "loss": 4.3199, "nll_loss": 1.070017695426941, "rewards/accuracies": 1.0, "rewards/chosen": -0.0028708449099212885, "rewards/margins": 0.08804109692573547, "rewards/rejected": -0.09091193974018097, "step": 1645 }, { "epoch": 1.1383125864453665, "grad_norm": 4.535558700561523, "learning_rate": 4.9231596741970186e-05, "log_odds_chosen": 3.8096237182617188, "log_odds_ratio": -0.36003577709198, "logits/chosen": -0.8161823153495789, "logits/rejected": -0.8410078287124634, "logps/chosen": -0.07835372537374496, "logps/rejected": -0.7174544334411621, "loss": 3.0849, "nll_loss": 0.735215425491333, "rewards/accuracies": 0.75, "rewards/chosen": -0.007835373282432556, "rewards/margins": 0.06391007453203201, "rewards/rejected": -0.07174544036388397, "step": 1646 }, { "epoch": 1.1390041493775933, "grad_norm": 4.016298294067383, "learning_rate": 4.9227754725680045e-05, "log_odds_chosen": 3.7809200286865234, "log_odds_ratio": -0.2413710504770279, "logits/chosen": -0.42067304253578186, "logits/rejected": -0.37396830320358276, "logps/chosen": -0.09345149993896484, "logps/rejected": -0.6482632160186768, "loss": 2.8125, "nll_loss": 0.6789858341217041, "rewards/accuracies": 0.875, "rewards/chosen": -0.00934515055269003, "rewards/margins": 0.05548117309808731, "rewards/rejected": -0.06482632458209991, "step": 1647 }, { "epoch": 1.1396957123098201, "grad_norm": 7.45605993270874, "learning_rate": 4.922391270938989e-05, "log_odds_chosen": 2.0952701568603516, "log_odds_ratio": -0.37152040004730225, "logits/chosen": -0.580081582069397, "logits/rejected": -0.6073364019393921, "logps/chosen": -0.1313703954219818, "logps/rejected": -0.597480058670044, "loss": 4.4212, "nll_loss": 1.0681456327438354, "rewards/accuracies": 0.875, "rewards/chosen": -0.01313704065978527, "rewards/margins": 0.04661097005009651, "rewards/rejected": -0.05974800884723663, "step": 1648 }, { "epoch": 1.140387275242047, "grad_norm": 5.093718528747559, "learning_rate": 4.9220070693099743e-05, "log_odds_chosen": 1.542366623878479, "log_odds_ratio": -0.34795552492141724, "logits/chosen": -0.5907109975814819, "logits/rejected": -0.5933347940444946, "logps/chosen": -0.09473934769630432, "logps/rejected": -0.30543074011802673, "loss": 3.4758, "nll_loss": 0.834161639213562, "rewards/accuracies": 0.75, "rewards/chosen": -0.009473934769630432, "rewards/margins": 0.02106913924217224, "rewards/rejected": -0.030543074011802673, "step": 1649 }, { "epoch": 1.1410788381742738, "grad_norm": 10.639129638671875, "learning_rate": 4.921622867680959e-05, "log_odds_chosen": 2.3673408031463623, "log_odds_ratio": -0.22872783243656158, "logits/chosen": -0.2898896634578705, "logits/rejected": -0.39120471477508545, "logps/chosen": -0.18079392611980438, "logps/rejected": -0.9120924472808838, "loss": 5.7336, "nll_loss": 1.4105350971221924, "rewards/accuracies": 0.875, "rewards/chosen": -0.018079392611980438, "rewards/margins": 0.07312985509634018, "rewards/rejected": -0.09120924770832062, "step": 1650 }, { "epoch": 1.1417704011065006, "grad_norm": 6.867001533508301, "learning_rate": 4.921238666051944e-05, "log_odds_chosen": 2.202517509460449, "log_odds_ratio": -0.3454076945781708, "logits/chosen": -0.6197003126144409, "logits/rejected": -0.6169849634170532, "logps/chosen": -0.09023884683847427, "logps/rejected": -0.3344111442565918, "loss": 5.8866, "nll_loss": 1.4371070861816406, "rewards/accuracies": 0.75, "rewards/chosen": -0.009023885242640972, "rewards/margins": 0.024417227134108543, "rewards/rejected": -0.03344111517071724, "step": 1651 }, { "epoch": 1.1424619640387275, "grad_norm": 5.405807971954346, "learning_rate": 4.9208544644229294e-05, "log_odds_chosen": 2.6169772148132324, "log_odds_ratio": -0.48516252636909485, "logits/chosen": -0.5111470818519592, "logits/rejected": -0.5195670127868652, "logps/chosen": -0.12113036215305328, "logps/rejected": -0.5095824599266052, "loss": 3.1151, "nll_loss": 0.7302597761154175, "rewards/accuracies": 0.75, "rewards/chosen": -0.012113036587834358, "rewards/margins": 0.038845207542181015, "rewards/rejected": -0.05095824971795082, "step": 1652 }, { "epoch": 1.1431535269709543, "grad_norm": 5.10026741027832, "learning_rate": 4.920470262793914e-05, "log_odds_chosen": 3.596342086791992, "log_odds_ratio": -0.4324381947517395, "logits/chosen": -0.4986104369163513, "logits/rejected": -0.5461660027503967, "logps/chosen": -0.15873593091964722, "logps/rejected": -0.6344982385635376, "loss": 3.1703, "nll_loss": 0.7493206262588501, "rewards/accuracies": 0.75, "rewards/chosen": -0.015873592346906662, "rewards/margins": 0.04757623374462128, "rewards/rejected": -0.06344982981681824, "step": 1653 }, { "epoch": 1.1438450899031811, "grad_norm": 4.311457633972168, "learning_rate": 4.9200860611649e-05, "log_odds_chosen": 3.192807197570801, "log_odds_ratio": -0.24010953307151794, "logits/chosen": -0.3876314163208008, "logits/rejected": -0.4332069456577301, "logps/chosen": -0.08185150474309921, "logps/rejected": -0.5942113399505615, "loss": 2.6524, "nll_loss": 0.6390830874443054, "rewards/accuracies": 0.875, "rewards/chosen": -0.008185150101780891, "rewards/margins": 0.051235977560281754, "rewards/rejected": -0.05942113697528839, "step": 1654 }, { "epoch": 1.144536652835408, "grad_norm": 4.532189846038818, "learning_rate": 4.9197018595358845e-05, "log_odds_chosen": 3.096100091934204, "log_odds_ratio": -0.23214709758758545, "logits/chosen": -0.6746965646743774, "logits/rejected": -0.6621944308280945, "logps/chosen": -0.10050790756940842, "logps/rejected": -0.555006742477417, "loss": 3.6235, "nll_loss": 0.8826611042022705, "rewards/accuracies": 1.0, "rewards/chosen": -0.010050790384411812, "rewards/margins": 0.0454498827457428, "rewards/rejected": -0.05550067499279976, "step": 1655 }, { "epoch": 1.1452282157676348, "grad_norm": 4.945940971374512, "learning_rate": 4.91931765790687e-05, "log_odds_chosen": 2.6006646156311035, "log_odds_ratio": -0.2788270115852356, "logits/chosen": -0.6870574355125427, "logits/rejected": -0.6803454756736755, "logps/chosen": -0.06980787962675095, "logps/rejected": -0.5878862142562866, "loss": 3.3184, "nll_loss": 0.8017243146896362, "rewards/accuracies": 0.875, "rewards/chosen": -0.006980787497013807, "rewards/margins": 0.05180782452225685, "rewards/rejected": -0.058788616210222244, "step": 1656 }, { "epoch": 1.1459197786998616, "grad_norm": 10.107165336608887, "learning_rate": 4.918933456277855e-05, "log_odds_chosen": 3.2974982261657715, "log_odds_ratio": -0.4954441785812378, "logits/chosen": -0.3904740512371063, "logits/rejected": -0.378157377243042, "logps/chosen": -0.10745969414710999, "logps/rejected": -0.4751533567905426, "loss": 4.543, "nll_loss": 1.0862106084823608, "rewards/accuracies": 0.75, "rewards/chosen": -0.010745970532298088, "rewards/margins": 0.03676936775445938, "rewards/rejected": -0.04751533642411232, "step": 1657 }, { "epoch": 1.1466113416320884, "grad_norm": 5.9534173011779785, "learning_rate": 4.91854925464884e-05, "log_odds_chosen": 5.842714309692383, "log_odds_ratio": -0.09394479542970657, "logits/chosen": -0.4603232443332672, "logits/rejected": -0.5066232681274414, "logps/chosen": -0.058203209191560745, "logps/rejected": -1.0633374452590942, "loss": 3.2159, "nll_loss": 0.7945787310600281, "rewards/accuracies": 1.0, "rewards/chosen": -0.005820321384817362, "rewards/margins": 0.10051342844963074, "rewards/rejected": -0.10633374750614166, "step": 1658 }, { "epoch": 1.1473029045643153, "grad_norm": 5.841182708740234, "learning_rate": 4.918165053019825e-05, "log_odds_chosen": 4.049822807312012, "log_odds_ratio": -0.23675966262817383, "logits/chosen": -0.3512836694717407, "logits/rejected": -0.46696898341178894, "logps/chosen": -0.08876635134220123, "logps/rejected": -1.013091802597046, "loss": 4.286, "nll_loss": 1.04783034324646, "rewards/accuracies": 0.75, "rewards/chosen": -0.008876635693013668, "rewards/margins": 0.09243255108594894, "rewards/rejected": -0.10130918771028519, "step": 1659 }, { "epoch": 1.147994467496542, "grad_norm": 4.923659801483154, "learning_rate": 4.91778085139081e-05, "log_odds_chosen": 5.361041069030762, "log_odds_ratio": -0.16646316647529602, "logits/chosen": -0.47271567583084106, "logits/rejected": -0.4884890019893646, "logps/chosen": -0.0668402761220932, "logps/rejected": -0.7941054105758667, "loss": 2.6559, "nll_loss": 0.647331714630127, "rewards/accuracies": 1.0, "rewards/chosen": -0.00668402761220932, "rewards/margins": 0.07272651046514511, "rewards/rejected": -0.07941053807735443, "step": 1660 }, { "epoch": 1.148686030428769, "grad_norm": 5.230505466461182, "learning_rate": 4.917396649761795e-05, "log_odds_chosen": 4.7292022705078125, "log_odds_ratio": -0.158156618475914, "logits/chosen": -0.8374958038330078, "logits/rejected": -0.8386293649673462, "logps/chosen": -0.06256973743438721, "logps/rejected": -0.8360474705696106, "loss": 4.0505, "nll_loss": 0.9968149662017822, "rewards/accuracies": 0.875, "rewards/chosen": -0.0062569743022322655, "rewards/margins": 0.0773477777838707, "rewards/rejected": -0.08360475301742554, "step": 1661 }, { "epoch": 1.1493775933609958, "grad_norm": 6.226810455322266, "learning_rate": 4.91701244813278e-05, "log_odds_chosen": 3.0641565322875977, "log_odds_ratio": -0.2409423142671585, "logits/chosen": -0.569155216217041, "logits/rejected": -0.5948754549026489, "logps/chosen": -0.06277336925268173, "logps/rejected": -0.5739333033561707, "loss": 4.5011, "nll_loss": 1.1011693477630615, "rewards/accuracies": 1.0, "rewards/chosen": -0.006277337204664946, "rewards/margins": 0.05111599713563919, "rewards/rejected": -0.057393334805965424, "step": 1662 }, { "epoch": 1.1500691562932226, "grad_norm": 7.280765056610107, "learning_rate": 4.916628246503766e-05, "log_odds_chosen": 2.8586554527282715, "log_odds_ratio": -0.18323729932308197, "logits/chosen": -0.5155065059661865, "logits/rejected": -0.5760002732276917, "logps/chosen": -0.07034897804260254, "logps/rejected": -0.602337121963501, "loss": 5.7805, "nll_loss": 1.4267985820770264, "rewards/accuracies": 1.0, "rewards/chosen": -0.007034897338598967, "rewards/margins": 0.053198814392089844, "rewards/rejected": -0.0602337121963501, "step": 1663 }, { "epoch": 1.1507607192254494, "grad_norm": 7.845277309417725, "learning_rate": 4.91624404487475e-05, "log_odds_chosen": 5.0160417556762695, "log_odds_ratio": -0.2870984673500061, "logits/chosen": -0.3389076590538025, "logits/rejected": -0.3596407175064087, "logps/chosen": -0.055796217173337936, "logps/rejected": -1.035224437713623, "loss": 4.6075, "nll_loss": 1.1231759786605835, "rewards/accuracies": 0.875, "rewards/chosen": -0.005579621996730566, "rewards/margins": 0.09794282913208008, "rewards/rejected": -0.10352244973182678, "step": 1664 }, { "epoch": 1.1514522821576763, "grad_norm": 5.204296588897705, "learning_rate": 4.9158598432457355e-05, "log_odds_chosen": 6.408123970031738, "log_odds_ratio": -0.12883061170578003, "logits/chosen": -0.22733397781848907, "logits/rejected": -0.31913691759109497, "logps/chosen": -0.05134060978889465, "logps/rejected": -0.7932345271110535, "loss": 2.6177, "nll_loss": 0.6415426135063171, "rewards/accuracies": 0.875, "rewards/chosen": -0.005134060978889465, "rewards/margins": 0.07418939471244812, "rewards/rejected": -0.07932344824075699, "step": 1665 }, { "epoch": 1.152143845089903, "grad_norm": 4.909811973571777, "learning_rate": 4.915475641616721e-05, "log_odds_chosen": 4.934022903442383, "log_odds_ratio": -0.12989267706871033, "logits/chosen": -0.3042985796928406, "logits/rejected": -0.3891471028327942, "logps/chosen": -0.07688090205192566, "logps/rejected": -0.896817684173584, "loss": 2.7567, "nll_loss": 0.676192045211792, "rewards/accuracies": 1.0, "rewards/chosen": -0.007688090205192566, "rewards/margins": 0.08199368417263031, "rewards/rejected": -0.08968178182840347, "step": 1666 }, { "epoch": 1.15283540802213, "grad_norm": 5.710595607757568, "learning_rate": 4.915091439987706e-05, "log_odds_chosen": 3.5815577507019043, "log_odds_ratio": -0.2634783983230591, "logits/chosen": -0.7198811769485474, "logits/rejected": -0.7444949150085449, "logps/chosen": -0.11194150894880295, "logps/rejected": -0.595949113368988, "loss": 4.0449, "nll_loss": 0.9848828911781311, "rewards/accuracies": 0.75, "rewards/chosen": -0.011194150894880295, "rewards/margins": 0.04840076342225075, "rewards/rejected": -0.05959491431713104, "step": 1667 }, { "epoch": 1.1535269709543567, "grad_norm": 8.014622688293457, "learning_rate": 4.9147072383586906e-05, "log_odds_chosen": 2.484999418258667, "log_odds_ratio": -0.5048198103904724, "logits/chosen": 0.004195423796772957, "logits/rejected": -0.047088563442230225, "logps/chosen": -0.18642094731330872, "logps/rejected": -0.5310749411582947, "loss": 4.4745, "nll_loss": 1.0681509971618652, "rewards/accuracies": 0.75, "rewards/chosen": -0.018642093986272812, "rewards/margins": 0.034465398639440536, "rewards/rejected": -0.05310749262571335, "step": 1668 }, { "epoch": 1.1542185338865836, "grad_norm": 5.527405738830566, "learning_rate": 4.914323036729676e-05, "log_odds_chosen": 2.0551114082336426, "log_odds_ratio": -0.18518784642219543, "logits/chosen": -0.3637649118900299, "logits/rejected": -0.3871780037879944, "logps/chosen": -0.09218564629554749, "logps/rejected": -0.518540620803833, "loss": 3.6033, "nll_loss": 0.8823060393333435, "rewards/accuracies": 0.875, "rewards/chosen": -0.009218564257025719, "rewards/margins": 0.042635492980480194, "rewards/rejected": -0.05185405910015106, "step": 1669 }, { "epoch": 1.1549100968188104, "grad_norm": 4.045838356018066, "learning_rate": 4.913938835100661e-05, "log_odds_chosen": 5.368803977966309, "log_odds_ratio": -0.06457238644361496, "logits/chosen": -0.7588016986846924, "logits/rejected": -0.7690705060958862, "logps/chosen": -0.015393403358757496, "logps/rejected": -0.855527400970459, "loss": 2.7714, "nll_loss": 0.6863961219787598, "rewards/accuracies": 1.0, "rewards/chosen": -0.001539340359158814, "rewards/margins": 0.08401340246200562, "rewards/rejected": -0.08555274456739426, "step": 1670 }, { "epoch": 1.1556016597510372, "grad_norm": 5.965897083282471, "learning_rate": 4.9135546334716457e-05, "log_odds_chosen": 2.9094269275665283, "log_odds_ratio": -0.46307048201560974, "logits/chosen": -0.2961413860321045, "logits/rejected": -0.27826306223869324, "logps/chosen": -0.09813665598630905, "logps/rejected": -0.6231961250305176, "loss": 3.5255, "nll_loss": 0.835060715675354, "rewards/accuracies": 0.625, "rewards/chosen": -0.009813666343688965, "rewards/margins": 0.05250595137476921, "rewards/rejected": -0.062319621443748474, "step": 1671 }, { "epoch": 1.156293222683264, "grad_norm": 4.455894470214844, "learning_rate": 4.9131704318426316e-05, "log_odds_chosen": 4.973597049713135, "log_odds_ratio": -0.09037226438522339, "logits/chosen": -0.6276228427886963, "logits/rejected": -0.6904111504554749, "logps/chosen": -0.06606549024581909, "logps/rejected": -1.0546118021011353, "loss": 3.0021, "nll_loss": 0.7414852380752563, "rewards/accuracies": 1.0, "rewards/chosen": -0.006606548558920622, "rewards/margins": 0.09885463863611221, "rewards/rejected": -0.10546118021011353, "step": 1672 }, { "epoch": 1.156984785615491, "grad_norm": 4.100534439086914, "learning_rate": 4.912786230213616e-05, "log_odds_chosen": 5.852409362792969, "log_odds_ratio": -0.06742922961711884, "logits/chosen": -0.6525583863258362, "logits/rejected": -0.7228882908821106, "logps/chosen": -0.023829951882362366, "logps/rejected": -1.2559055089950562, "loss": 3.5361, "nll_loss": 0.8772757649421692, "rewards/accuracies": 1.0, "rewards/chosen": -0.002382995095103979, "rewards/margins": 0.12320756167173386, "rewards/rejected": -0.12559056282043457, "step": 1673 }, { "epoch": 1.1576763485477177, "grad_norm": 7.9378790855407715, "learning_rate": 4.9124020285846014e-05, "log_odds_chosen": 2.524151563644409, "log_odds_ratio": -0.6948474049568176, "logits/chosen": -0.7985988855361938, "logits/rejected": -0.8701484799385071, "logps/chosen": -0.11264081299304962, "logps/rejected": -0.5082597732543945, "loss": 5.014, "nll_loss": 1.1840271949768066, "rewards/accuracies": 0.75, "rewards/chosen": -0.011264080181717873, "rewards/margins": 0.03956189751625061, "rewards/rejected": -0.050825975835323334, "step": 1674 }, { "epoch": 1.1583679114799446, "grad_norm": 6.94173002243042, "learning_rate": 4.9120178269555866e-05, "log_odds_chosen": 4.101590156555176, "log_odds_ratio": -0.3209138512611389, "logits/chosen": -0.3075600564479828, "logits/rejected": -0.34465670585632324, "logps/chosen": -0.13615387678146362, "logps/rejected": -1.0777006149291992, "loss": 3.8975, "nll_loss": 0.9422775506973267, "rewards/accuracies": 0.75, "rewards/chosen": -0.013615388423204422, "rewards/margins": 0.09415467083454132, "rewards/rejected": -0.10777007043361664, "step": 1675 }, { "epoch": 1.1590594744121714, "grad_norm": 4.6008195877075195, "learning_rate": 4.911633625326572e-05, "log_odds_chosen": 4.601840972900391, "log_odds_ratio": -0.21844017505645752, "logits/chosen": -0.18146252632141113, "logits/rejected": -0.19315530359745026, "logps/chosen": -0.11097032576799393, "logps/rejected": -0.8364883661270142, "loss": 3.2465, "nll_loss": 0.7897868752479553, "rewards/accuracies": 0.75, "rewards/chosen": -0.011097033508121967, "rewards/margins": 0.07255180180072784, "rewards/rejected": -0.08364883810281754, "step": 1676 }, { "epoch": 1.1597510373443982, "grad_norm": 5.564633846282959, "learning_rate": 4.9112494236975564e-05, "log_odds_chosen": 3.663455009460449, "log_odds_ratio": -0.11242678761482239, "logits/chosen": -0.7468219995498657, "logits/rejected": -0.7474039196968079, "logps/chosen": -0.133896142244339, "logps/rejected": -1.2125940322875977, "loss": 3.4401, "nll_loss": 0.8487862348556519, "rewards/accuracies": 1.0, "rewards/chosen": -0.013389615342020988, "rewards/margins": 0.10786978900432587, "rewards/rejected": -0.12125939875841141, "step": 1677 }, { "epoch": 1.1604426002766253, "grad_norm": 5.151391506195068, "learning_rate": 4.910865222068542e-05, "log_odds_chosen": 3.6544852256774902, "log_odds_ratio": -0.3815726935863495, "logits/chosen": -0.5739408135414124, "logits/rejected": -0.6253122687339783, "logps/chosen": -0.1026638001203537, "logps/rejected": -0.4522859752178192, "loss": 3.6439, "nll_loss": 0.8728081583976746, "rewards/accuracies": 0.75, "rewards/chosen": -0.010266379453241825, "rewards/margins": 0.03496221452951431, "rewards/rejected": -0.04522860050201416, "step": 1678 }, { "epoch": 1.161134163208852, "grad_norm": 4.976283550262451, "learning_rate": 4.910481020439527e-05, "log_odds_chosen": 5.420558929443359, "log_odds_ratio": -0.09251780807971954, "logits/chosen": -0.658862292766571, "logits/rejected": -0.6868621706962585, "logps/chosen": -0.03994268178939819, "logps/rejected": -0.7436200380325317, "loss": 2.3847, "nll_loss": 0.5869274139404297, "rewards/accuracies": 1.0, "rewards/chosen": -0.003994268365204334, "rewards/margins": 0.0703677386045456, "rewards/rejected": -0.07436200976371765, "step": 1679 }, { "epoch": 1.161825726141079, "grad_norm": 4.847886562347412, "learning_rate": 4.9100968188105115e-05, "log_odds_chosen": 3.639221429824829, "log_odds_ratio": -0.24989987909793854, "logits/chosen": -0.5126394629478455, "logits/rejected": -0.5004561543464661, "logps/chosen": -0.07378698885440826, "logps/rejected": -0.6940549612045288, "loss": 3.1331, "nll_loss": 0.7582850456237793, "rewards/accuracies": 0.875, "rewards/chosen": -0.007378697860985994, "rewards/margins": 0.062026798725128174, "rewards/rejected": -0.06940549612045288, "step": 1680 }, { "epoch": 1.1625172890733058, "grad_norm": 6.826456546783447, "learning_rate": 4.9097126171814974e-05, "log_odds_chosen": 4.123432636260986, "log_odds_ratio": -0.23046131432056427, "logits/chosen": -0.5177972912788391, "logits/rejected": -0.562575101852417, "logps/chosen": -0.055708374828100204, "logps/rejected": -0.8523918390274048, "loss": 3.4889, "nll_loss": 0.8491816520690918, "rewards/accuracies": 0.875, "rewards/chosen": -0.005570837762206793, "rewards/margins": 0.07966834306716919, "rewards/rejected": -0.08523918688297272, "step": 1681 }, { "epoch": 1.1632088520055326, "grad_norm": 4.31039571762085, "learning_rate": 4.909328415552482e-05, "log_odds_chosen": 3.9521539211273193, "log_odds_ratio": -0.318380206823349, "logits/chosen": -0.30143076181411743, "logits/rejected": -0.30992391705513, "logps/chosen": -0.1237991452217102, "logps/rejected": -0.7632213234901428, "loss": 2.6952, "nll_loss": 0.6419578194618225, "rewards/accuracies": 0.875, "rewards/chosen": -0.01237991638481617, "rewards/margins": 0.06394222378730774, "rewards/rejected": -0.07632213830947876, "step": 1682 }, { "epoch": 1.1639004149377594, "grad_norm": 5.163027286529541, "learning_rate": 4.908944213923467e-05, "log_odds_chosen": 4.225794315338135, "log_odds_ratio": -0.20646844804286957, "logits/chosen": -0.170430988073349, "logits/rejected": -0.26145029067993164, "logps/chosen": -0.08614880591630936, "logps/rejected": -0.6882296204566956, "loss": 2.9342, "nll_loss": 0.7129086852073669, "rewards/accuracies": 0.875, "rewards/chosen": -0.008614880032837391, "rewards/margins": 0.06020808592438698, "rewards/rejected": -0.0688229650259018, "step": 1683 }, { "epoch": 1.1645919778699863, "grad_norm": 6.314855575561523, "learning_rate": 4.9085600122944525e-05, "log_odds_chosen": 6.348881721496582, "log_odds_ratio": -0.03455987200140953, "logits/chosen": -0.7655620574951172, "logits/rejected": -0.794634222984314, "logps/chosen": -0.01900785230100155, "logps/rejected": -1.014559030532837, "loss": 3.2221, "nll_loss": 0.8020722270011902, "rewards/accuracies": 1.0, "rewards/chosen": -0.001900785369798541, "rewards/margins": 0.0995551124215126, "rewards/rejected": -0.10145590454339981, "step": 1684 }, { "epoch": 1.165283540802213, "grad_norm": 6.513770580291748, "learning_rate": 4.908175810665438e-05, "log_odds_chosen": 4.891922473907471, "log_odds_ratio": -0.09184511005878448, "logits/chosen": -0.4809439778327942, "logits/rejected": -0.5717316269874573, "logps/chosen": -0.06514959037303925, "logps/rejected": -0.9885765314102173, "loss": 4.2231, "nll_loss": 1.0465795993804932, "rewards/accuracies": 1.0, "rewards/chosen": -0.006514958571642637, "rewards/margins": 0.09234270453453064, "rewards/rejected": -0.09885765612125397, "step": 1685 }, { "epoch": 1.16597510373444, "grad_norm": 6.499868392944336, "learning_rate": 4.907791609036422e-05, "log_odds_chosen": 5.693251609802246, "log_odds_ratio": -0.3279362618923187, "logits/chosen": -0.574987530708313, "logits/rejected": -0.6270314455032349, "logps/chosen": -0.06349683552980423, "logps/rejected": -0.9440611600875854, "loss": 1.991, "nll_loss": 0.4649551510810852, "rewards/accuracies": 0.875, "rewards/chosen": -0.006349683273583651, "rewards/margins": 0.08805642277002335, "rewards/rejected": -0.0944061130285263, "step": 1686 }, { "epoch": 1.1666666666666667, "grad_norm": 7.496671676635742, "learning_rate": 4.9074074074074075e-05, "log_odds_chosen": 3.44323992729187, "log_odds_ratio": -0.5197282433509827, "logits/chosen": -0.415819376707077, "logits/rejected": -0.40631169080734253, "logps/chosen": -0.12665203213691711, "logps/rejected": -0.8945168256759644, "loss": 3.9831, "nll_loss": 0.9438073039054871, "rewards/accuracies": 0.75, "rewards/chosen": -0.01266520470380783, "rewards/margins": 0.07678647339344025, "rewards/rejected": -0.08945168554782867, "step": 1687 }, { "epoch": 1.1673582295988936, "grad_norm": 5.101566314697266, "learning_rate": 4.907023205778393e-05, "log_odds_chosen": 3.5672669410705566, "log_odds_ratio": -0.20680952072143555, "logits/chosen": -0.42624789476394653, "logits/rejected": -0.466632604598999, "logps/chosen": -0.1136215478181839, "logps/rejected": -0.874742329120636, "loss": 3.3617, "nll_loss": 0.8197535276412964, "rewards/accuracies": 0.75, "rewards/chosen": -0.011362154968082905, "rewards/margins": 0.07611207664012909, "rewards/rejected": -0.08747424185276031, "step": 1688 }, { "epoch": 1.1680497925311204, "grad_norm": 6.7860283851623535, "learning_rate": 4.9066390041493773e-05, "log_odds_chosen": 4.040188312530518, "log_odds_ratio": -0.37084370851516724, "logits/chosen": -0.4637543261051178, "logits/rejected": -0.5015013813972473, "logps/chosen": -0.1526714712381363, "logps/rejected": -0.9633198976516724, "loss": 4.2893, "nll_loss": 1.0352481603622437, "rewards/accuracies": 0.75, "rewards/chosen": -0.015267147682607174, "rewards/margins": 0.08106484264135361, "rewards/rejected": -0.09633199125528336, "step": 1689 }, { "epoch": 1.1687413554633472, "grad_norm": 7.010456085205078, "learning_rate": 4.906254802520363e-05, "log_odds_chosen": 2.909613609313965, "log_odds_ratio": -0.18302452564239502, "logits/chosen": -0.15228617191314697, "logits/rejected": -0.10386423766613007, "logps/chosen": -0.09300586581230164, "logps/rejected": -0.5666298270225525, "loss": 4.2026, "nll_loss": 1.0323392152786255, "rewards/accuracies": 1.0, "rewards/chosen": -0.009300586767494678, "rewards/margins": 0.047362394630908966, "rewards/rejected": -0.05666298419237137, "step": 1690 }, { "epoch": 1.169432918395574, "grad_norm": 5.009253025054932, "learning_rate": 4.905870600891348e-05, "log_odds_chosen": 4.034602642059326, "log_odds_ratio": -0.22675380110740662, "logits/chosen": -0.3099116086959839, "logits/rejected": -0.34099122881889343, "logps/chosen": -0.1059737503528595, "logps/rejected": -0.7704473733901978, "loss": 3.7081, "nll_loss": 0.9043589234352112, "rewards/accuracies": 0.75, "rewards/chosen": -0.010597375221550465, "rewards/margins": 0.06644736230373383, "rewards/rejected": -0.07704474031925201, "step": 1691 }, { "epoch": 1.170124481327801, "grad_norm": 6.25744104385376, "learning_rate": 4.905486399262333e-05, "log_odds_chosen": 4.162661075592041, "log_odds_ratio": -0.06848907470703125, "logits/chosen": -0.5197206735610962, "logits/rejected": -0.5505616664886475, "logps/chosen": -0.03739035129547119, "logps/rejected": -0.6413342952728271, "loss": 4.7416, "nll_loss": 1.1785529851913452, "rewards/accuracies": 1.0, "rewards/chosen": -0.003739034989848733, "rewards/margins": 0.06039439141750336, "rewards/rejected": -0.0641334280371666, "step": 1692 }, { "epoch": 1.1708160442600277, "grad_norm": 10.99494457244873, "learning_rate": 4.905102197633318e-05, "log_odds_chosen": 5.198581218719482, "log_odds_ratio": -0.0648670345544815, "logits/chosen": -0.4666910767555237, "logits/rejected": -0.47967618703842163, "logps/chosen": -0.07335232943296432, "logps/rejected": -0.8208720684051514, "loss": 2.714, "nll_loss": 0.6720219850540161, "rewards/accuracies": 1.0, "rewards/chosen": -0.00733523303642869, "rewards/margins": 0.07475197315216064, "rewards/rejected": -0.0820872038602829, "step": 1693 }, { "epoch": 1.1715076071922546, "grad_norm": 4.8345441818237305, "learning_rate": 4.9047179960043036e-05, "log_odds_chosen": 2.570599317550659, "log_odds_ratio": -0.496211975812912, "logits/chosen": -0.5026839375495911, "logits/rejected": -0.4709765911102295, "logps/chosen": -0.16171272099018097, "logps/rejected": -0.49773257970809937, "loss": 2.9871, "nll_loss": 0.6971441507339478, "rewards/accuracies": 0.625, "rewards/chosen": -0.016171272844076157, "rewards/margins": 0.03360198438167572, "rewards/rejected": -0.04977325722575188, "step": 1694 }, { "epoch": 1.1721991701244814, "grad_norm": 5.599493980407715, "learning_rate": 4.904333794375288e-05, "log_odds_chosen": 6.313642501831055, "log_odds_ratio": -0.23186209797859192, "logits/chosen": -0.17842333018779755, "logits/rejected": -0.20784892141819, "logps/chosen": -0.10879230499267578, "logps/rejected": -0.7824175357818604, "loss": 3.2663, "nll_loss": 0.793390154838562, "rewards/accuracies": 0.875, "rewards/chosen": -0.010879230685532093, "rewards/margins": 0.06736251711845398, "rewards/rejected": -0.0782417505979538, "step": 1695 }, { "epoch": 1.1728907330567082, "grad_norm": 4.865135192871094, "learning_rate": 4.9039495927462734e-05, "log_odds_chosen": 2.9153175354003906, "log_odds_ratio": -0.29287028312683105, "logits/chosen": -0.3575715720653534, "logits/rejected": -0.3803892731666565, "logps/chosen": -0.0807889997959137, "logps/rejected": -0.5074627995491028, "loss": 3.2277, "nll_loss": 0.7776476740837097, "rewards/accuracies": 0.875, "rewards/chosen": -0.00807889923453331, "rewards/margins": 0.04266738519072533, "rewards/rejected": -0.050746284425258636, "step": 1696 }, { "epoch": 1.173582295988935, "grad_norm": 5.336679935455322, "learning_rate": 4.9035653911172586e-05, "log_odds_chosen": 7.413576602935791, "log_odds_ratio": -0.004513254389166832, "logits/chosen": -0.46858909726142883, "logits/rejected": -0.4611837863922119, "logps/chosen": -0.01939975842833519, "logps/rejected": -1.1275957822799683, "loss": 3.077, "nll_loss": 0.7687873840332031, "rewards/accuracies": 1.0, "rewards/chosen": -0.0019399760058149695, "rewards/margins": 0.11081959307193756, "rewards/rejected": -0.11275956779718399, "step": 1697 }, { "epoch": 1.1742738589211619, "grad_norm": 4.496361255645752, "learning_rate": 4.903181189488243e-05, "log_odds_chosen": 5.660953521728516, "log_odds_ratio": -0.05301395803689957, "logits/chosen": -0.27618223428726196, "logits/rejected": -0.31661874055862427, "logps/chosen": -0.02332213707268238, "logps/rejected": -0.6976031064987183, "loss": 2.9289, "nll_loss": 0.726930558681488, "rewards/accuracies": 1.0, "rewards/chosen": -0.002332213567569852, "rewards/margins": 0.0674280971288681, "rewards/rejected": -0.06976031512022018, "step": 1698 }, { "epoch": 1.1749654218533887, "grad_norm": 6.71616268157959, "learning_rate": 4.902796987859229e-05, "log_odds_chosen": 2.6607582569122314, "log_odds_ratio": -0.28555721044540405, "logits/chosen": -0.7132636904716492, "logits/rejected": -0.7477965354919434, "logps/chosen": -0.15023541450500488, "logps/rejected": -0.6951221227645874, "loss": 4.5801, "nll_loss": 1.1164746284484863, "rewards/accuracies": 0.875, "rewards/chosen": -0.015023542568087578, "rewards/margins": 0.05448867008090019, "rewards/rejected": -0.06951221823692322, "step": 1699 }, { "epoch": 1.1756569847856155, "grad_norm": 5.5194783210754395, "learning_rate": 4.902412786230214e-05, "log_odds_chosen": 2.8025269508361816, "log_odds_ratio": -0.5562505722045898, "logits/chosen": -0.6272051334381104, "logits/rejected": -0.6220443248748779, "logps/chosen": -0.1548004150390625, "logps/rejected": -0.5177695751190186, "loss": 3.2813, "nll_loss": 0.7647000551223755, "rewards/accuracies": 0.625, "rewards/chosen": -0.01548004150390625, "rewards/margins": 0.036296918988227844, "rewards/rejected": -0.051776956766843796, "step": 1700 }, { "epoch": 1.1763485477178424, "grad_norm": 4.19633150100708, "learning_rate": 4.902028584601199e-05, "log_odds_chosen": 5.554427146911621, "log_odds_ratio": -0.12019255757331848, "logits/chosen": -0.19540315866470337, "logits/rejected": -0.2124204933643341, "logps/chosen": -0.039816707372665405, "logps/rejected": -1.157930612564087, "loss": 3.2301, "nll_loss": 0.7955155968666077, "rewards/accuracies": 0.875, "rewards/chosen": -0.003981670830398798, "rewards/margins": 0.11181138455867767, "rewards/rejected": -0.11579305678606033, "step": 1701 }, { "epoch": 1.1770401106500692, "grad_norm": 4.260463237762451, "learning_rate": 4.901644382972184e-05, "log_odds_chosen": 5.628140449523926, "log_odds_ratio": -0.08788633346557617, "logits/chosen": -0.7331647872924805, "logits/rejected": -0.7790195345878601, "logps/chosen": -0.025510985404253006, "logps/rejected": -0.7099208235740662, "loss": 2.6703, "nll_loss": 0.6587742567062378, "rewards/accuracies": 1.0, "rewards/chosen": -0.0025510985869914293, "rewards/margins": 0.0684409886598587, "rewards/rejected": -0.07099208235740662, "step": 1702 }, { "epoch": 1.177731673582296, "grad_norm": 5.894955158233643, "learning_rate": 4.9012601813431694e-05, "log_odds_chosen": 1.5804680585861206, "log_odds_ratio": -0.6186449527740479, "logits/chosen": -0.4635010361671448, "logits/rejected": -0.5034515857696533, "logps/chosen": -0.1285967230796814, "logps/rejected": -0.4574553966522217, "loss": 3.4464, "nll_loss": 0.7997399568557739, "rewards/accuracies": 0.625, "rewards/chosen": -0.01285967230796814, "rewards/margins": 0.032885871827602386, "rewards/rejected": -0.045745544135570526, "step": 1703 }, { "epoch": 1.1784232365145229, "grad_norm": 4.228522777557373, "learning_rate": 4.900875979714154e-05, "log_odds_chosen": 5.511290550231934, "log_odds_ratio": -0.052493322640657425, "logits/chosen": -0.354582816362381, "logits/rejected": -0.3201233744621277, "logps/chosen": -0.045021433383226395, "logps/rejected": -0.7105008959770203, "loss": 2.8926, "nll_loss": 0.7179022431373596, "rewards/accuracies": 1.0, "rewards/chosen": -0.004502143245190382, "rewards/margins": 0.0665479451417923, "rewards/rejected": -0.07105008512735367, "step": 1704 }, { "epoch": 1.1791147994467497, "grad_norm": 6.06090784072876, "learning_rate": 4.900491778085139e-05, "log_odds_chosen": 4.680421352386475, "log_odds_ratio": -0.04448583722114563, "logits/chosen": -0.6964292526245117, "logits/rejected": -0.6882299780845642, "logps/chosen": -0.027521885931491852, "logps/rejected": -0.6695336699485779, "loss": 3.5434, "nll_loss": 0.881403386592865, "rewards/accuracies": 1.0, "rewards/chosen": -0.002752188825979829, "rewards/margins": 0.06420118361711502, "rewards/rejected": -0.06695336848497391, "step": 1705 }, { "epoch": 1.1798063623789765, "grad_norm": 5.982486248016357, "learning_rate": 4.9001075764561245e-05, "log_odds_chosen": 4.029968738555908, "log_odds_ratio": -0.15640224516391754, "logits/chosen": -0.42293068766593933, "logits/rejected": -0.5107755661010742, "logps/chosen": -0.08865299820899963, "logps/rejected": -0.9007152318954468, "loss": 3.6548, "nll_loss": 0.898070752620697, "rewards/accuracies": 0.875, "rewards/chosen": -0.008865299634635448, "rewards/margins": 0.08120621740818024, "rewards/rejected": -0.09007152169942856, "step": 1706 }, { "epoch": 1.1804979253112033, "grad_norm": 8.436861991882324, "learning_rate": 4.899723374827109e-05, "log_odds_chosen": 3.1094229221343994, "log_odds_ratio": -0.23974156379699707, "logits/chosen": -0.6728495955467224, "logits/rejected": -0.7456769943237305, "logps/chosen": -0.07639665901660919, "logps/rejected": -0.45884567499160767, "loss": 4.2164, "nll_loss": 1.0301259756088257, "rewards/accuracies": 0.875, "rewards/chosen": -0.007639665622264147, "rewards/margins": 0.03824490308761597, "rewards/rejected": -0.045884571969509125, "step": 1707 }, { "epoch": 1.1811894882434302, "grad_norm": 6.517073154449463, "learning_rate": 4.899339173198095e-05, "log_odds_chosen": 4.08290433883667, "log_odds_ratio": -0.45967811346054077, "logits/chosen": -0.3719734251499176, "logits/rejected": -0.4058666527271271, "logps/chosen": -0.04212799295783043, "logps/rejected": -0.5582305192947388, "loss": 3.415, "nll_loss": 0.8077764511108398, "rewards/accuracies": 0.75, "rewards/chosen": -0.004212799482047558, "rewards/margins": 0.051610250025987625, "rewards/rejected": -0.05582305043935776, "step": 1708 }, { "epoch": 1.181881051175657, "grad_norm": 6.491971969604492, "learning_rate": 4.8989549715690795e-05, "log_odds_chosen": 2.971602201461792, "log_odds_ratio": -0.25535744428634644, "logits/chosen": -0.6482892036437988, "logits/rejected": -0.642250657081604, "logps/chosen": -0.11500442028045654, "logps/rejected": -0.7281869053840637, "loss": 3.9964, "nll_loss": 0.9735685586929321, "rewards/accuracies": 0.75, "rewards/chosen": -0.011500442400574684, "rewards/margins": 0.06131824851036072, "rewards/rejected": -0.07281868904829025, "step": 1709 }, { "epoch": 1.1825726141078838, "grad_norm": 4.486275672912598, "learning_rate": 4.898570769940065e-05, "log_odds_chosen": 5.068606376647949, "log_odds_ratio": -0.21615208685398102, "logits/chosen": -0.7731133699417114, "logits/rejected": -0.7959321737289429, "logps/chosen": -0.10496488958597183, "logps/rejected": -0.5765972137451172, "loss": 3.7558, "nll_loss": 0.9173317551612854, "rewards/accuracies": 0.875, "rewards/chosen": -0.010496489703655243, "rewards/margins": 0.047163236886262894, "rewards/rejected": -0.05765972286462784, "step": 1710 }, { "epoch": 1.1832641770401107, "grad_norm": 6.759716987609863, "learning_rate": 4.89818656831105e-05, "log_odds_chosen": 4.021801948547363, "log_odds_ratio": -0.2717605531215668, "logits/chosen": -0.3650795817375183, "logits/rejected": -0.4151354432106018, "logps/chosen": -0.06702595949172974, "logps/rejected": -0.6509315967559814, "loss": 3.7085, "nll_loss": 0.899939775466919, "rewards/accuracies": 0.75, "rewards/chosen": -0.006702595390379429, "rewards/margins": 0.05839055776596069, "rewards/rejected": -0.06509315967559814, "step": 1711 }, { "epoch": 1.1839557399723375, "grad_norm": 5.515722751617432, "learning_rate": 4.897802366682035e-05, "log_odds_chosen": 2.76090145111084, "log_odds_ratio": -0.47430720925331116, "logits/chosen": -0.1472322642803192, "logits/rejected": -0.19560343027114868, "logps/chosen": -0.09127159416675568, "logps/rejected": -0.47359779477119446, "loss": 3.639, "nll_loss": 0.8623095154762268, "rewards/accuracies": 0.75, "rewards/chosen": -0.009127158671617508, "rewards/margins": 0.03823261708021164, "rewards/rejected": -0.047359779477119446, "step": 1712 }, { "epoch": 1.1846473029045643, "grad_norm": 5.288144111633301, "learning_rate": 4.89741816505302e-05, "log_odds_chosen": 4.28141975402832, "log_odds_ratio": -0.12948086857795715, "logits/chosen": 0.026375465095043182, "logits/rejected": 0.011478882282972336, "logps/chosen": -0.07874485850334167, "logps/rejected": -0.8107168674468994, "loss": 2.7414, "nll_loss": 0.6723971366882324, "rewards/accuracies": 1.0, "rewards/chosen": -0.007874486967921257, "rewards/margins": 0.07319720089435577, "rewards/rejected": -0.08107168227434158, "step": 1713 }, { "epoch": 1.1853388658367912, "grad_norm": 5.372937202453613, "learning_rate": 4.897033963424005e-05, "log_odds_chosen": 2.137545585632324, "log_odds_ratio": -0.44550761580467224, "logits/chosen": -0.6334435939788818, "logits/rejected": -0.6503807902336121, "logps/chosen": -0.10513751208782196, "logps/rejected": -0.3197248876094818, "loss": 3.7874, "nll_loss": 0.9022948741912842, "rewards/accuracies": 0.75, "rewards/chosen": -0.010513750836253166, "rewards/margins": 0.021458739414811134, "rewards/rejected": -0.031972486525774, "step": 1714 }, { "epoch": 1.186030428769018, "grad_norm": 4.750962734222412, "learning_rate": 4.89664976179499e-05, "log_odds_chosen": 3.8123714923858643, "log_odds_ratio": -0.2624880075454712, "logits/chosen": -0.5540913343429565, "logits/rejected": -0.5961364507675171, "logps/chosen": -0.08316250890493393, "logps/rejected": -0.7523815631866455, "loss": 3.0809, "nll_loss": 0.7439756989479065, "rewards/accuracies": 0.875, "rewards/chosen": -0.008316251449286938, "rewards/margins": 0.0669219046831131, "rewards/rejected": -0.07523815333843231, "step": 1715 }, { "epoch": 1.1867219917012448, "grad_norm": 8.46352481842041, "learning_rate": 4.896265560165975e-05, "log_odds_chosen": 5.548409938812256, "log_odds_ratio": -0.22372980415821075, "logits/chosen": -0.20582397282123566, "logits/rejected": -0.1781938374042511, "logps/chosen": -0.06882276386022568, "logps/rejected": -0.8869989514350891, "loss": 4.0499, "nll_loss": 0.9900984764099121, "rewards/accuracies": 0.875, "rewards/chosen": -0.006882276386022568, "rewards/margins": 0.0818176195025444, "rewards/rejected": -0.08869989961385727, "step": 1716 }, { "epoch": 1.1874135546334716, "grad_norm": 5.552541732788086, "learning_rate": 4.895881358536961e-05, "log_odds_chosen": 5.818368911743164, "log_odds_ratio": -0.052378714084625244, "logits/chosen": -0.19172891974449158, "logits/rejected": -0.352263867855072, "logps/chosen": -0.016883784905076027, "logps/rejected": -0.8089572191238403, "loss": 2.9399, "nll_loss": 0.7297303080558777, "rewards/accuracies": 1.0, "rewards/chosen": -0.0016883786302059889, "rewards/margins": 0.07920734584331512, "rewards/rejected": -0.08089572191238403, "step": 1717 }, { "epoch": 1.1881051175656985, "grad_norm": 5.634227275848389, "learning_rate": 4.8954971569079454e-05, "log_odds_chosen": 4.411200046539307, "log_odds_ratio": -0.13465569913387299, "logits/chosen": -0.39296990633010864, "logits/rejected": -0.44110405445098877, "logps/chosen": -0.041829999536275864, "logps/rejected": -0.8756586909294128, "loss": 3.3538, "nll_loss": 0.8249937891960144, "rewards/accuracies": 1.0, "rewards/chosen": -0.004182999953627586, "rewards/margins": 0.08338287472724915, "rewards/rejected": -0.08756587654352188, "step": 1718 }, { "epoch": 1.1887966804979253, "grad_norm": 6.191216468811035, "learning_rate": 4.8951129552789306e-05, "log_odds_chosen": 4.064810752868652, "log_odds_ratio": -0.2015867829322815, "logits/chosen": -0.4229384958744049, "logits/rejected": -0.4340212047100067, "logps/chosen": -0.04063744470477104, "logps/rejected": -0.73988938331604, "loss": 2.6727, "nll_loss": 0.6480090618133545, "rewards/accuracies": 0.875, "rewards/chosen": -0.004063744563609362, "rewards/margins": 0.0699252039194107, "rewards/rejected": -0.07398894429206848, "step": 1719 }, { "epoch": 1.1894882434301521, "grad_norm": 3.5908620357513428, "learning_rate": 4.894728753649916e-05, "log_odds_chosen": 4.018005847930908, "log_odds_ratio": -0.30734917521476746, "logits/chosen": -0.5695242881774902, "logits/rejected": -0.5691094994544983, "logps/chosen": -0.08586877584457397, "logps/rejected": -0.659201979637146, "loss": 2.8098, "nll_loss": 0.6717254519462585, "rewards/accuracies": 0.75, "rewards/chosen": -0.008586877025663853, "rewards/margins": 0.057333312928676605, "rewards/rejected": -0.06592019647359848, "step": 1720 }, { "epoch": 1.190179806362379, "grad_norm": 5.031204700469971, "learning_rate": 4.894344552020901e-05, "log_odds_chosen": 3.613013505935669, "log_odds_ratio": -0.14017079770565033, "logits/chosen": -0.6082075834274292, "logits/rejected": -0.640134871006012, "logps/chosen": -0.06326667964458466, "logps/rejected": -0.5767212510108948, "loss": 2.7309, "nll_loss": 0.6687150001525879, "rewards/accuracies": 1.0, "rewards/chosen": -0.006326667964458466, "rewards/margins": 0.05134545639157295, "rewards/rejected": -0.05767212063074112, "step": 1721 }, { "epoch": 1.1908713692946058, "grad_norm": 4.486302852630615, "learning_rate": 4.8939603503918857e-05, "log_odds_chosen": 3.342613697052002, "log_odds_ratio": -0.20768174529075623, "logits/chosen": -0.2848474383354187, "logits/rejected": -0.29988640546798706, "logps/chosen": -0.09562725573778152, "logps/rejected": -0.66987544298172, "loss": 2.6715, "nll_loss": 0.6471147537231445, "rewards/accuracies": 1.0, "rewards/chosen": -0.009562725201249123, "rewards/margins": 0.057424817234277725, "rewards/rejected": -0.066987544298172, "step": 1722 }, { "epoch": 1.1915629322268326, "grad_norm": 8.49174690246582, "learning_rate": 4.893576148762871e-05, "log_odds_chosen": 3.0292856693267822, "log_odds_ratio": -0.30213072896003723, "logits/chosen": -0.6463524103164673, "logits/rejected": -0.6709821224212646, "logps/chosen": -0.15487344563007355, "logps/rejected": -0.8251013159751892, "loss": 4.1571, "nll_loss": 1.0090569257736206, "rewards/accuracies": 0.875, "rewards/chosen": -0.015487344935536385, "rewards/margins": 0.06702278554439545, "rewards/rejected": -0.08251012861728668, "step": 1723 }, { "epoch": 1.1922544951590595, "grad_norm": 7.511002063751221, "learning_rate": 4.893191947133856e-05, "log_odds_chosen": 4.571445465087891, "log_odds_ratio": -0.4141373634338379, "logits/chosen": -0.4878627359867096, "logits/rejected": -0.5303633809089661, "logps/chosen": -0.248141810297966, "logps/rejected": -0.6373559832572937, "loss": 3.8155, "nll_loss": 0.9124590158462524, "rewards/accuracies": 0.75, "rewards/chosen": -0.02481417916715145, "rewards/margins": 0.03892141580581665, "rewards/rejected": -0.06373559683561325, "step": 1724 }, { "epoch": 1.1929460580912863, "grad_norm": 7.655296802520752, "learning_rate": 4.892807745504841e-05, "log_odds_chosen": 3.3585290908813477, "log_odds_ratio": -0.39322608709335327, "logits/chosen": -0.6439844965934753, "logits/rejected": -0.6731945276260376, "logps/chosen": -0.12331248819828033, "logps/rejected": -1.0248024463653564, "loss": 5.2115, "nll_loss": 1.2635518312454224, "rewards/accuracies": 0.75, "rewards/chosen": -0.012331248261034489, "rewards/margins": 0.09014899283647537, "rewards/rejected": -0.10248024016618729, "step": 1725 }, { "epoch": 1.1936376210235131, "grad_norm": 5.599480152130127, "learning_rate": 4.8924235438758266e-05, "log_odds_chosen": 5.788207530975342, "log_odds_ratio": -0.25669771432876587, "logits/chosen": -0.7034620046615601, "logits/rejected": -0.7278566360473633, "logps/chosen": -0.03290760889649391, "logps/rejected": -0.6548459529876709, "loss": 3.2463, "nll_loss": 0.7858980298042297, "rewards/accuracies": 0.875, "rewards/chosen": -0.0032907610293477774, "rewards/margins": 0.06219382956624031, "rewards/rejected": -0.06548459082841873, "step": 1726 }, { "epoch": 1.19432918395574, "grad_norm": 8.53808879852295, "learning_rate": 4.892039342246811e-05, "log_odds_chosen": 4.921740531921387, "log_odds_ratio": -0.2440432608127594, "logits/chosen": -0.38131314516067505, "logits/rejected": -0.44898858666419983, "logps/chosen": -0.12483307719230652, "logps/rejected": -1.2572139501571655, "loss": 4.7227, "nll_loss": 1.1562637090682983, "rewards/accuracies": 0.875, "rewards/chosen": -0.012483308091759682, "rewards/margins": 0.11323809623718262, "rewards/rejected": -0.12572139501571655, "step": 1727 }, { "epoch": 1.1950207468879668, "grad_norm": 7.684170722961426, "learning_rate": 4.8916551406177964e-05, "log_odds_chosen": 5.221076965332031, "log_odds_ratio": -0.22764191031455994, "logits/chosen": -0.4904865622520447, "logits/rejected": -0.4865496754646301, "logps/chosen": -0.08739637583494186, "logps/rejected": -0.7149273157119751, "loss": 4.4827, "nll_loss": 1.0979145765304565, "rewards/accuracies": 0.875, "rewards/chosen": -0.008739637210965157, "rewards/margins": 0.0627530962228775, "rewards/rejected": -0.0714927390217781, "step": 1728 }, { "epoch": 1.1957123098201936, "grad_norm": 4.598145961761475, "learning_rate": 4.891270938988782e-05, "log_odds_chosen": 4.581998825073242, "log_odds_ratio": -0.14316655695438385, "logits/chosen": -0.6146881580352783, "logits/rejected": -0.6451320648193359, "logps/chosen": -0.03969764709472656, "logps/rejected": -0.9337931871414185, "loss": 3.057, "nll_loss": 0.7499375343322754, "rewards/accuracies": 0.875, "rewards/chosen": -0.003969764802604914, "rewards/margins": 0.08940955996513367, "rewards/rejected": -0.09337931871414185, "step": 1729 }, { "epoch": 1.1964038727524204, "grad_norm": 5.302453517913818, "learning_rate": 4.890886737359767e-05, "log_odds_chosen": 2.8931965827941895, "log_odds_ratio": -0.29582732915878296, "logits/chosen": -0.23103663325309753, "logits/rejected": -0.19957208633422852, "logps/chosen": -0.09102919697761536, "logps/rejected": -0.4125361740589142, "loss": 3.5228, "nll_loss": 0.8511098027229309, "rewards/accuracies": 0.875, "rewards/chosen": -0.009102920070290565, "rewards/margins": 0.03215069696307182, "rewards/rejected": -0.04125361889600754, "step": 1730 }, { "epoch": 1.1970954356846473, "grad_norm": 3.398452043533325, "learning_rate": 4.8905025357307515e-05, "log_odds_chosen": 4.475583553314209, "log_odds_ratio": -0.29882586002349854, "logits/chosen": -0.21776120364665985, "logits/rejected": -0.20349350571632385, "logps/chosen": -0.10076668113470078, "logps/rejected": -0.5111818909645081, "loss": 2.7408, "nll_loss": 0.6553149819374084, "rewards/accuracies": 0.875, "rewards/chosen": -0.010076668113470078, "rewards/margins": 0.04104151949286461, "rewards/rejected": -0.051118187606334686, "step": 1731 }, { "epoch": 1.197786998616874, "grad_norm": 3.415905714035034, "learning_rate": 4.890118334101737e-05, "log_odds_chosen": 7.551507949829102, "log_odds_ratio": -0.020409587770700455, "logits/chosen": -0.6365310549736023, "logits/rejected": -0.6799354553222656, "logps/chosen": -0.01089848019182682, "logps/rejected": -0.8678996562957764, "loss": 2.3784, "nll_loss": 0.5925613641738892, "rewards/accuracies": 1.0, "rewards/chosen": -0.0010898481123149395, "rewards/margins": 0.08570010960102081, "rewards/rejected": -0.08678996562957764, "step": 1732 }, { "epoch": 1.198478561549101, "grad_norm": 5.842837810516357, "learning_rate": 4.889734132472722e-05, "log_odds_chosen": 4.548971176147461, "log_odds_ratio": -0.4501718282699585, "logits/chosen": -0.5090807676315308, "logits/rejected": -0.524118185043335, "logps/chosen": -0.1523047387599945, "logps/rejected": -0.854877769947052, "loss": 2.9738, "nll_loss": 0.6984264254570007, "rewards/accuracies": 0.75, "rewards/chosen": -0.015230474062263966, "rewards/margins": 0.07025731354951859, "rewards/rejected": -0.08548778295516968, "step": 1733 }, { "epoch": 1.1991701244813278, "grad_norm": 5.5528082847595215, "learning_rate": 4.8893499308437066e-05, "log_odds_chosen": 4.443816184997559, "log_odds_ratio": -0.20689380168914795, "logits/chosen": -0.1926521509885788, "logits/rejected": -0.238718181848526, "logps/chosen": -0.06474921107292175, "logps/rejected": -0.8699467182159424, "loss": 3.2818, "nll_loss": 0.799752950668335, "rewards/accuracies": 0.875, "rewards/chosen": -0.006474921479821205, "rewards/margins": 0.08051975071430206, "rewards/rejected": -0.08699467033147812, "step": 1734 }, { "epoch": 1.1998616874135546, "grad_norm": 6.228503227233887, "learning_rate": 4.8889657292146925e-05, "log_odds_chosen": 3.841547727584839, "log_odds_ratio": -0.2785239517688751, "logits/chosen": -0.6458946466445923, "logits/rejected": -0.6550995707511902, "logps/chosen": -0.10351575911045074, "logps/rejected": -0.9910328388214111, "loss": 4.1892, "nll_loss": 1.019450068473816, "rewards/accuracies": 0.75, "rewards/chosen": -0.010351575911045074, "rewards/margins": 0.08875171840190887, "rewards/rejected": -0.09910327941179276, "step": 1735 }, { "epoch": 1.2005532503457814, "grad_norm": 8.065655708312988, "learning_rate": 4.888581527585677e-05, "log_odds_chosen": 4.7300567626953125, "log_odds_ratio": -0.24043934047222137, "logits/chosen": -0.40952181816101074, "logits/rejected": -0.43163684010505676, "logps/chosen": -0.12518151104450226, "logps/rejected": -1.2267175912857056, "loss": 2.9163, "nll_loss": 0.7050365209579468, "rewards/accuracies": 0.75, "rewards/chosen": -0.012518150731921196, "rewards/margins": 0.11015360057353973, "rewards/rejected": -0.12267175316810608, "step": 1736 }, { "epoch": 1.2012448132780082, "grad_norm": 6.8196282386779785, "learning_rate": 4.888197325956662e-05, "log_odds_chosen": 4.278225898742676, "log_odds_ratio": -0.2729211151599884, "logits/chosen": -0.6910278797149658, "logits/rejected": -0.7113723158836365, "logps/chosen": -0.09786369651556015, "logps/rejected": -0.619482159614563, "loss": 3.8214, "nll_loss": 0.9280692338943481, "rewards/accuracies": 0.75, "rewards/chosen": -0.009786369279026985, "rewards/margins": 0.05216185003519058, "rewards/rejected": -0.06194822117686272, "step": 1737 }, { "epoch": 1.201936376210235, "grad_norm": 5.485548973083496, "learning_rate": 4.8878131243276475e-05, "log_odds_chosen": 2.264752149581909, "log_odds_ratio": -0.496864378452301, "logits/chosen": -0.3320154547691345, "logits/rejected": -0.31849053502082825, "logps/chosen": -0.13289959728717804, "logps/rejected": -0.7237868905067444, "loss": 2.6569, "nll_loss": 0.6145286560058594, "rewards/accuracies": 0.625, "rewards/chosen": -0.013289961032569408, "rewards/margins": 0.059088729321956635, "rewards/rejected": -0.07237869501113892, "step": 1738 }, { "epoch": 1.202627939142462, "grad_norm": 6.142898082733154, "learning_rate": 4.887428922698633e-05, "log_odds_chosen": 3.9606635570526123, "log_odds_ratio": -0.31309106945991516, "logits/chosen": -0.5523556470870972, "logits/rejected": -0.635855495929718, "logps/chosen": -0.08076249063014984, "logps/rejected": -0.8161361217498779, "loss": 3.7155, "nll_loss": 0.8975592255592346, "rewards/accuracies": 0.75, "rewards/chosen": -0.008076248690485954, "rewards/margins": 0.07353736460208893, "rewards/rejected": -0.08161361515522003, "step": 1739 }, { "epoch": 1.2033195020746887, "grad_norm": 5.83258056640625, "learning_rate": 4.8870447210696173e-05, "log_odds_chosen": 2.0771408081054688, "log_odds_ratio": -0.33731359243392944, "logits/chosen": -0.679516077041626, "logits/rejected": -0.6932682991027832, "logps/chosen": -0.11285239458084106, "logps/rejected": -0.41165733337402344, "loss": 3.7447, "nll_loss": 0.902442455291748, "rewards/accuracies": 0.75, "rewards/chosen": -0.011285239830613136, "rewards/margins": 0.029880493879318237, "rewards/rejected": -0.041165731847286224, "step": 1740 }, { "epoch": 1.2040110650069156, "grad_norm": 5.038331508636475, "learning_rate": 4.8866605194406026e-05, "log_odds_chosen": 6.890632629394531, "log_odds_ratio": -0.0032947673462331295, "logits/chosen": -0.5037215948104858, "logits/rejected": -0.526785135269165, "logps/chosen": -0.007296360097825527, "logps/rejected": -1.105158805847168, "loss": 3.131, "nll_loss": 0.7824151515960693, "rewards/accuracies": 1.0, "rewards/chosen": -0.0007296359981410205, "rewards/margins": 0.10978624224662781, "rewards/rejected": -0.11051587760448456, "step": 1741 }, { "epoch": 1.2047026279391424, "grad_norm": 5.662741184234619, "learning_rate": 4.886276317811588e-05, "log_odds_chosen": 3.73443603515625, "log_odds_ratio": -0.10789938271045685, "logits/chosen": -0.44654685258865356, "logits/rejected": -0.4576447606086731, "logps/chosen": -0.07846559584140778, "logps/rejected": -0.8716351389884949, "loss": 2.8468, "nll_loss": 0.7009090185165405, "rewards/accuracies": 1.0, "rewards/chosen": -0.007846559397876263, "rewards/margins": 0.07931695878505707, "rewards/rejected": -0.0871635228395462, "step": 1742 }, { "epoch": 1.2053941908713692, "grad_norm": 5.150761127471924, "learning_rate": 4.8858921161825724e-05, "log_odds_chosen": 2.4600987434387207, "log_odds_ratio": -0.3202195167541504, "logits/chosen": -0.2952830493450165, "logits/rejected": -0.27832943201065063, "logps/chosen": -0.10139881074428558, "logps/rejected": -0.6176132559776306, "loss": 3.2098, "nll_loss": 0.7704358100891113, "rewards/accuracies": 0.875, "rewards/chosen": -0.010139882564544678, "rewards/margins": 0.0516214445233345, "rewards/rejected": -0.06176132708787918, "step": 1743 }, { "epoch": 1.206085753803596, "grad_norm": 6.006669044494629, "learning_rate": 4.885507914553558e-05, "log_odds_chosen": 7.206621170043945, "log_odds_ratio": -0.11751189827919006, "logits/chosen": -0.11540503799915314, "logits/rejected": -0.22454169392585754, "logps/chosen": -0.04032893851399422, "logps/rejected": -1.2910562753677368, "loss": 4.2113, "nll_loss": 1.04107666015625, "rewards/accuracies": 0.875, "rewards/chosen": -0.004032894037663937, "rewards/margins": 0.12507273256778717, "rewards/rejected": -0.12910562753677368, "step": 1744 }, { "epoch": 1.206777316735823, "grad_norm": 4.959017276763916, "learning_rate": 4.885123712924543e-05, "log_odds_chosen": 1.8773472309112549, "log_odds_ratio": -0.47992217540740967, "logits/chosen": -0.6515315175056458, "logits/rejected": -0.6257120966911316, "logps/chosen": -0.17926810681819916, "logps/rejected": -0.661084771156311, "loss": 3.5644, "nll_loss": 0.8431035280227661, "rewards/accuracies": 0.875, "rewards/chosen": -0.017926812171936035, "rewards/margins": 0.04818166047334671, "rewards/rejected": -0.06610847264528275, "step": 1745 }, { "epoch": 1.2074688796680497, "grad_norm": 8.701241493225098, "learning_rate": 4.884739511295528e-05, "log_odds_chosen": 4.010853290557861, "log_odds_ratio": -0.11723016947507858, "logits/chosen": -0.568061888217926, "logits/rejected": -0.6578323841094971, "logps/chosen": -0.07474225759506226, "logps/rejected": -0.9790551662445068, "loss": 4.6718, "nll_loss": 1.1562221050262451, "rewards/accuracies": 1.0, "rewards/chosen": -0.007474225480109453, "rewards/margins": 0.09043128788471222, "rewards/rejected": -0.09790551662445068, "step": 1746 }, { "epoch": 1.2081604426002766, "grad_norm": 5.493067264556885, "learning_rate": 4.8843553096665134e-05, "log_odds_chosen": 4.669642925262451, "log_odds_ratio": -0.23646828532218933, "logits/chosen": -0.3074951171875, "logits/rejected": -0.3432926833629608, "logps/chosen": -0.06846563518047333, "logps/rejected": -0.9051128625869751, "loss": 3.3922, "nll_loss": 0.824414074420929, "rewards/accuracies": 0.875, "rewards/chosen": -0.006846563890576363, "rewards/margins": 0.08366473019123077, "rewards/rejected": -0.09051129221916199, "step": 1747 }, { "epoch": 1.2088520055325034, "grad_norm": 8.25934886932373, "learning_rate": 4.8839711080374986e-05, "log_odds_chosen": 3.223919630050659, "log_odds_ratio": -0.21303078532218933, "logits/chosen": -0.42224496603012085, "logits/rejected": -0.43888431787490845, "logps/chosen": -0.08300723135471344, "logps/rejected": -0.7980213165283203, "loss": 3.9078, "nll_loss": 0.9556349515914917, "rewards/accuracies": 1.0, "rewards/chosen": -0.0083007225766778, "rewards/margins": 0.07150140404701233, "rewards/rejected": -0.07980212569236755, "step": 1748 }, { "epoch": 1.2095435684647302, "grad_norm": 8.729022979736328, "learning_rate": 4.883586906408483e-05, "log_odds_chosen": 4.861759185791016, "log_odds_ratio": -0.23737916350364685, "logits/chosen": -0.32016968727111816, "logits/rejected": -0.3375993072986603, "logps/chosen": -0.14943954348564148, "logps/rejected": -0.7811824679374695, "loss": 3.8816, "nll_loss": 0.9466730952262878, "rewards/accuracies": 0.875, "rewards/chosen": -0.014943954534828663, "rewards/margins": 0.0631742924451828, "rewards/rejected": -0.07811824232339859, "step": 1749 }, { "epoch": 1.210235131396957, "grad_norm": 5.598297595977783, "learning_rate": 4.8832027047794684e-05, "log_odds_chosen": 3.8768386840820312, "log_odds_ratio": -0.2456800788640976, "logits/chosen": -0.5290508270263672, "logits/rejected": -0.5527206659317017, "logps/chosen": -0.10265764594078064, "logps/rejected": -0.6469843983650208, "loss": 3.3323, "nll_loss": 0.8085078001022339, "rewards/accuracies": 0.875, "rewards/chosen": -0.010265765711665154, "rewards/margins": 0.054432667791843414, "rewards/rejected": -0.06469843536615372, "step": 1750 }, { "epoch": 1.2109266943291839, "grad_norm": 6.770428657531738, "learning_rate": 4.882818503150454e-05, "log_odds_chosen": 2.9814016819000244, "log_odds_ratio": -0.2961695194244385, "logits/chosen": -0.690618634223938, "logits/rejected": -0.6883652806282043, "logps/chosen": -0.1316520869731903, "logps/rejected": -0.7826559543609619, "loss": 4.3335, "nll_loss": 1.0537523031234741, "rewards/accuracies": 0.875, "rewards/chosen": -0.01316520944237709, "rewards/margins": 0.06510038673877716, "rewards/rejected": -0.07826559990644455, "step": 1751 }, { "epoch": 1.2116182572614107, "grad_norm": 6.940248966217041, "learning_rate": 4.882434301521438e-05, "log_odds_chosen": 0.9520793557167053, "log_odds_ratio": -0.5022197961807251, "logits/chosen": -0.45700353384017944, "logits/rejected": -0.41526639461517334, "logps/chosen": -0.19162046909332275, "logps/rejected": -0.46913477778434753, "loss": 4.8986, "nll_loss": 1.174436330795288, "rewards/accuracies": 0.75, "rewards/chosen": -0.019162047654390335, "rewards/margins": 0.027751432731747627, "rewards/rejected": -0.04691348224878311, "step": 1752 }, { "epoch": 1.2123098201936375, "grad_norm": 12.57465648651123, "learning_rate": 4.882050099892424e-05, "log_odds_chosen": 1.6119085550308228, "log_odds_ratio": -1.0399413108825684, "logits/chosen": -0.6990538835525513, "logits/rejected": -0.6973117589950562, "logps/chosen": -0.2371227741241455, "logps/rejected": -0.3703542649745941, "loss": 4.8505, "nll_loss": 1.1086349487304688, "rewards/accuracies": 0.625, "rewards/chosen": -0.02371227741241455, "rewards/margins": 0.013323148712515831, "rewards/rejected": -0.03703542798757553, "step": 1753 }, { "epoch": 1.2130013831258644, "grad_norm": 6.856281757354736, "learning_rate": 4.881665898263409e-05, "log_odds_chosen": 3.3835794925689697, "log_odds_ratio": -0.37793684005737305, "logits/chosen": -0.6199455261230469, "logits/rejected": -0.6514655351638794, "logps/chosen": -0.12874896824359894, "logps/rejected": -0.5390415191650391, "loss": 3.995, "nll_loss": 0.9609636664390564, "rewards/accuracies": 0.75, "rewards/chosen": -0.012874897569417953, "rewards/margins": 0.04102925583720207, "rewards/rejected": -0.053904157131910324, "step": 1754 }, { "epoch": 1.2136929460580912, "grad_norm": 5.524672508239746, "learning_rate": 4.881281696634394e-05, "log_odds_chosen": 5.13658332824707, "log_odds_ratio": -0.03698335960507393, "logits/chosen": -0.44332507252693176, "logits/rejected": -0.4719582200050354, "logps/chosen": -0.053218670189380646, "logps/rejected": -0.8304323554039001, "loss": 2.7617, "nll_loss": 0.6867244243621826, "rewards/accuracies": 1.0, "rewards/chosen": -0.005321866367012262, "rewards/margins": 0.07772136479616165, "rewards/rejected": -0.08304324001073837, "step": 1755 }, { "epoch": 1.214384508990318, "grad_norm": 12.355244636535645, "learning_rate": 4.880897495005379e-05, "log_odds_chosen": 3.635563850402832, "log_odds_ratio": -0.7260986566543579, "logits/chosen": -0.4599088430404663, "logits/rejected": -0.42827102541923523, "logps/chosen": -0.18452264368534088, "logps/rejected": -0.5236896276473999, "loss": 3.6846, "nll_loss": 0.8485289812088013, "rewards/accuracies": 0.625, "rewards/chosen": -0.018452264368534088, "rewards/margins": 0.03391670063138008, "rewards/rejected": -0.05236896127462387, "step": 1756 }, { "epoch": 1.215076071922545, "grad_norm": 4.876830101013184, "learning_rate": 4.8805132933763645e-05, "log_odds_chosen": 5.384311676025391, "log_odds_ratio": -0.04986800625920296, "logits/chosen": -0.28661617636680603, "logits/rejected": -0.3225519061088562, "logps/chosen": -0.026972772553563118, "logps/rejected": -0.7848159670829773, "loss": 3.338, "nll_loss": 0.8295071125030518, "rewards/accuracies": 1.0, "rewards/chosen": -0.0026972773484885693, "rewards/margins": 0.07578432559967041, "rewards/rejected": -0.07848159968852997, "step": 1757 }, { "epoch": 1.215767634854772, "grad_norm": 3.9064624309539795, "learning_rate": 4.880129091747349e-05, "log_odds_chosen": 4.191158294677734, "log_odds_ratio": -0.12977999448776245, "logits/chosen": -0.5362319946289062, "logits/rejected": -0.5842596292495728, "logps/chosen": -0.062384773045778275, "logps/rejected": -0.7664477229118347, "loss": 2.7305, "nll_loss": 0.6696509718894958, "rewards/accuracies": 1.0, "rewards/chosen": -0.0062384773045778275, "rewards/margins": 0.07040630280971527, "rewards/rejected": -0.07664477080106735, "step": 1758 }, { "epoch": 1.2164591977869987, "grad_norm": 6.504387855529785, "learning_rate": 4.879744890118334e-05, "log_odds_chosen": 1.8020520210266113, "log_odds_ratio": -0.5221161842346191, "logits/chosen": -0.6099242568016052, "logits/rejected": -0.5896930694580078, "logps/chosen": -0.11337540298700333, "logps/rejected": -0.4469042420387268, "loss": 3.8603, "nll_loss": 0.9128572344779968, "rewards/accuracies": 0.75, "rewards/chosen": -0.011337541043758392, "rewards/margins": 0.03335288166999817, "rewards/rejected": -0.04469042271375656, "step": 1759 }, { "epoch": 1.2171507607192256, "grad_norm": 4.438575267791748, "learning_rate": 4.8793606884893195e-05, "log_odds_chosen": 4.415384292602539, "log_odds_ratio": -0.12809635698795319, "logits/chosen": -1.0808501243591309, "logits/rejected": -1.1437695026397705, "logps/chosen": -0.07756942510604858, "logps/rejected": -0.9279264211654663, "loss": 4.8248, "nll_loss": 1.1933966875076294, "rewards/accuracies": 1.0, "rewards/chosen": -0.007756942883133888, "rewards/margins": 0.08503570407629013, "rewards/rejected": -0.09279264509677887, "step": 1760 }, { "epoch": 1.2178423236514524, "grad_norm": 3.9479455947875977, "learning_rate": 4.878976486860304e-05, "log_odds_chosen": 5.265148162841797, "log_odds_ratio": -0.12130524963140488, "logits/chosen": -0.12485721707344055, "logits/rejected": -0.14175529778003693, "logps/chosen": -0.05179094150662422, "logps/rejected": -1.0051532983779907, "loss": 2.4347, "nll_loss": 0.596540629863739, "rewards/accuracies": 1.0, "rewards/chosen": -0.005179094150662422, "rewards/margins": 0.09533624351024628, "rewards/rejected": -0.10051532834768295, "step": 1761 }, { "epoch": 1.2185338865836792, "grad_norm": 7.4558515548706055, "learning_rate": 4.87859228523129e-05, "log_odds_chosen": 3.7101030349731445, "log_odds_ratio": -0.14278443157672882, "logits/chosen": -0.36934995651245117, "logits/rejected": -0.403659850358963, "logps/chosen": -0.09488293528556824, "logps/rejected": -0.6350594162940979, "loss": 5.1023, "nll_loss": 1.2613072395324707, "rewards/accuracies": 1.0, "rewards/chosen": -0.009488292969763279, "rewards/margins": 0.054017651826143265, "rewards/rejected": -0.06350594758987427, "step": 1762 }, { "epoch": 1.219225449515906, "grad_norm": 5.117076873779297, "learning_rate": 4.8782080836022746e-05, "log_odds_chosen": 1.7559460401535034, "log_odds_ratio": -0.2781575322151184, "logits/chosen": -0.5458097457885742, "logits/rejected": -0.5692986845970154, "logps/chosen": -0.14074856042861938, "logps/rejected": -0.42338860034942627, "loss": 3.6414, "nll_loss": 0.8825327157974243, "rewards/accuracies": 0.875, "rewards/chosen": -0.014074856415390968, "rewards/margins": 0.0282640028744936, "rewards/rejected": -0.042338863015174866, "step": 1763 }, { "epoch": 1.2199170124481329, "grad_norm": 5.407169818878174, "learning_rate": 4.87782388197326e-05, "log_odds_chosen": 4.143532752990723, "log_odds_ratio": -0.15496212244033813, "logits/chosen": -0.6489083766937256, "logits/rejected": -0.6936314702033997, "logps/chosen": -0.034726690500974655, "logps/rejected": -0.7174232602119446, "loss": 3.3003, "nll_loss": 0.8095715045928955, "rewards/accuracies": 0.875, "rewards/chosen": -0.0034726690500974655, "rewards/margins": 0.06826966255903244, "rewards/rejected": -0.07174232602119446, "step": 1764 }, { "epoch": 1.2206085753803597, "grad_norm": 6.849701881408691, "learning_rate": 4.877439680344245e-05, "log_odds_chosen": 4.303737163543701, "log_odds_ratio": -0.28363704681396484, "logits/chosen": -0.6390360593795776, "logits/rejected": -0.6330482959747314, "logps/chosen": -0.09742604196071625, "logps/rejected": -0.8334695100784302, "loss": 3.9848, "nll_loss": 0.9678481221199036, "rewards/accuracies": 0.75, "rewards/chosen": -0.009742604568600655, "rewards/margins": 0.07360435277223587, "rewards/rejected": -0.08334695547819138, "step": 1765 }, { "epoch": 1.2213001383125865, "grad_norm": 4.578864574432373, "learning_rate": 4.87705547871523e-05, "log_odds_chosen": 4.171421527862549, "log_odds_ratio": -0.32398200035095215, "logits/chosen": -0.3163169324398041, "logits/rejected": -0.2699214816093445, "logps/chosen": -0.07687410712242126, "logps/rejected": -0.562875509262085, "loss": 2.4355, "nll_loss": 0.5764786005020142, "rewards/accuracies": 0.75, "rewards/chosen": -0.007687410339713097, "rewards/margins": 0.04860014095902443, "rewards/rejected": -0.05628754943609238, "step": 1766 }, { "epoch": 1.2219917012448134, "grad_norm": 6.775142192840576, "learning_rate": 4.876671277086215e-05, "log_odds_chosen": 3.3114845752716064, "log_odds_ratio": -0.30798664689064026, "logits/chosen": -0.8572962284088135, "logits/rejected": -0.8945976495742798, "logps/chosen": -0.14023393392562866, "logps/rejected": -0.7111852169036865, "loss": 4.3299, "nll_loss": 1.0516669750213623, "rewards/accuracies": 0.75, "rewards/chosen": -0.014023393392562866, "rewards/margins": 0.057095129042863846, "rewards/rejected": -0.07111851871013641, "step": 1767 }, { "epoch": 1.2226832641770402, "grad_norm": 3.9804606437683105, "learning_rate": 4.8762870754572e-05, "log_odds_chosen": 4.615160942077637, "log_odds_ratio": -0.18223318457603455, "logits/chosen": -0.6927958726882935, "logits/rejected": -0.6910476088523865, "logps/chosen": -0.029494691640138626, "logps/rejected": -0.6091811656951904, "loss": 2.7294, "nll_loss": 0.6641297340393066, "rewards/accuracies": 0.875, "rewards/chosen": -0.0029494690243154764, "rewards/margins": 0.05796864628791809, "rewards/rejected": -0.06091811880469322, "step": 1768 }, { "epoch": 1.223374827109267, "grad_norm": 7.519423484802246, "learning_rate": 4.8759028738281854e-05, "log_odds_chosen": 2.6739237308502197, "log_odds_ratio": -0.5759358406066895, "logits/chosen": -0.7101910710334778, "logits/rejected": -0.735167384147644, "logps/chosen": -0.17419900000095367, "logps/rejected": -0.5741370916366577, "loss": 3.8378, "nll_loss": 0.9018557667732239, "rewards/accuracies": 0.625, "rewards/chosen": -0.017419900745153427, "rewards/margins": 0.039993807673454285, "rewards/rejected": -0.05741371214389801, "step": 1769 }, { "epoch": 1.2240663900414939, "grad_norm": 5.838580131530762, "learning_rate": 4.87551867219917e-05, "log_odds_chosen": 4.783184051513672, "log_odds_ratio": -0.05730760842561722, "logits/chosen": -0.3828684687614441, "logits/rejected": -0.4162397086620331, "logps/chosen": -0.024466292932629585, "logps/rejected": -0.8661034107208252, "loss": 3.8114, "nll_loss": 0.947107195854187, "rewards/accuracies": 1.0, "rewards/chosen": -0.002446629572659731, "rewards/margins": 0.08416370302438736, "rewards/rejected": -0.0866103321313858, "step": 1770 }, { "epoch": 1.2247579529737207, "grad_norm": 4.113922595977783, "learning_rate": 4.875134470570156e-05, "log_odds_chosen": 6.979005336761475, "log_odds_ratio": -0.019842159003019333, "logits/chosen": -0.5436484813690186, "logits/rejected": -0.585045576095581, "logps/chosen": -0.025577958673238754, "logps/rejected": -0.8694262504577637, "loss": 2.8166, "nll_loss": 0.7021672129631042, "rewards/accuracies": 1.0, "rewards/chosen": -0.002557795960456133, "rewards/margins": 0.08438482880592346, "rewards/rejected": -0.0869426280260086, "step": 1771 }, { "epoch": 1.2254495159059475, "grad_norm": 5.46701717376709, "learning_rate": 4.8747502689411404e-05, "log_odds_chosen": 3.2045738697052, "log_odds_ratio": -0.4168136715888977, "logits/chosen": -0.47816532850265503, "logits/rejected": -0.5259116888046265, "logps/chosen": -0.08361619710922241, "logps/rejected": -0.41244637966156006, "loss": 2.8807, "nll_loss": 0.678496241569519, "rewards/accuracies": 0.875, "rewards/chosen": -0.00836162082850933, "rewards/margins": 0.03288302198052406, "rewards/rejected": -0.041244640946388245, "step": 1772 }, { "epoch": 1.2261410788381744, "grad_norm": 3.5881946086883545, "learning_rate": 4.874366067312126e-05, "log_odds_chosen": 4.681888103485107, "log_odds_ratio": -0.23527833819389343, "logits/chosen": -0.4189876317977905, "logits/rejected": -0.43972110748291016, "logps/chosen": -0.09223958849906921, "logps/rejected": -0.6895633935928345, "loss": 2.6699, "nll_loss": 0.6439555883407593, "rewards/accuracies": 0.875, "rewards/chosen": -0.009223959408700466, "rewards/margins": 0.05973239243030548, "rewards/rejected": -0.06895634531974792, "step": 1773 }, { "epoch": 1.2268326417704012, "grad_norm": 5.046339988708496, "learning_rate": 4.873981865683111e-05, "log_odds_chosen": 2.407923936843872, "log_odds_ratio": -0.16615843772888184, "logits/chosen": -0.2516796588897705, "logits/rejected": -0.2353476881980896, "logps/chosen": -0.07308045029640198, "logps/rejected": -0.49336302280426025, "loss": 2.8996, "nll_loss": 0.7082836627960205, "rewards/accuracies": 1.0, "rewards/chosen": -0.007308045402169228, "rewards/margins": 0.04202825948596001, "rewards/rejected": -0.04933629930019379, "step": 1774 }, { "epoch": 1.227524204702628, "grad_norm": 6.417335033416748, "learning_rate": 4.873597664054096e-05, "log_odds_chosen": 3.896390199661255, "log_odds_ratio": -0.1744166910648346, "logits/chosen": -0.3504815101623535, "logits/rejected": -0.3109341263771057, "logps/chosen": -0.06069917231798172, "logps/rejected": -0.7529281377792358, "loss": 3.9176, "nll_loss": 0.9619663953781128, "rewards/accuracies": 1.0, "rewards/chosen": -0.006069916766136885, "rewards/margins": 0.06922289729118347, "rewards/rejected": -0.07529281079769135, "step": 1775 }, { "epoch": 1.2282157676348548, "grad_norm": 7.379581928253174, "learning_rate": 4.873213462425081e-05, "log_odds_chosen": 3.4607551097869873, "log_odds_ratio": -0.37025904655456543, "logits/chosen": -0.5527825355529785, "logits/rejected": -0.5550543665885925, "logps/chosen": -0.10252739489078522, "logps/rejected": -0.5675938725471497, "loss": 3.6683, "nll_loss": 0.8800457715988159, "rewards/accuracies": 0.875, "rewards/chosen": -0.010252740234136581, "rewards/margins": 0.046506647020578384, "rewards/rejected": -0.056759387254714966, "step": 1776 }, { "epoch": 1.2289073305670817, "grad_norm": 9.051420211791992, "learning_rate": 4.872829260796066e-05, "log_odds_chosen": 1.5939397811889648, "log_odds_ratio": -0.34218525886535645, "logits/chosen": -0.6052234172821045, "logits/rejected": -0.6254400610923767, "logps/chosen": -0.16898562014102936, "logps/rejected": -0.6538894176483154, "loss": 3.4297, "nll_loss": 0.8231991529464722, "rewards/accuracies": 0.75, "rewards/chosen": -0.016898561269044876, "rewards/margins": 0.04849037900567055, "rewards/rejected": -0.06538893282413483, "step": 1777 }, { "epoch": 1.2295988934993085, "grad_norm": 11.628338813781738, "learning_rate": 4.872445059167051e-05, "log_odds_chosen": 3.865147590637207, "log_odds_ratio": -0.27473387122154236, "logits/chosen": -0.183550626039505, "logits/rejected": -0.26342085003852844, "logps/chosen": -0.03889723867177963, "logps/rejected": -0.5629591345787048, "loss": 3.0423, "nll_loss": 0.7331140637397766, "rewards/accuracies": 0.875, "rewards/chosen": -0.0038897236809134483, "rewards/margins": 0.052406199276447296, "rewards/rejected": -0.05629592016339302, "step": 1778 }, { "epoch": 1.2302904564315353, "grad_norm": 5.52664852142334, "learning_rate": 4.872060857538036e-05, "log_odds_chosen": 6.228176116943359, "log_odds_ratio": -0.03211307153105736, "logits/chosen": -0.31341448426246643, "logits/rejected": -0.3368358016014099, "logps/chosen": -0.030239183455705643, "logps/rejected": -0.7390041351318359, "loss": 3.3062, "nll_loss": 0.8233508467674255, "rewards/accuracies": 1.0, "rewards/chosen": -0.0030239184852689505, "rewards/margins": 0.07087649405002594, "rewards/rejected": -0.07390041649341583, "step": 1779 }, { "epoch": 1.2309820193637622, "grad_norm": 6.157802104949951, "learning_rate": 4.871676655909022e-05, "log_odds_chosen": 4.707504749298096, "log_odds_ratio": -0.13479316234588623, "logits/chosen": -0.5304163694381714, "logits/rejected": -0.5511650443077087, "logps/chosen": -0.052768275141716, "logps/rejected": -0.7715237140655518, "loss": 4.0619, "nll_loss": 1.0019972324371338, "rewards/accuracies": 1.0, "rewards/chosen": -0.005276828072965145, "rewards/margins": 0.07187553495168686, "rewards/rejected": -0.07715237140655518, "step": 1780 }, { "epoch": 1.231673582295989, "grad_norm": 8.27289867401123, "learning_rate": 4.871292454280006e-05, "log_odds_chosen": 4.5420122146606445, "log_odds_ratio": -0.2521488070487976, "logits/chosen": -0.5395856499671936, "logits/rejected": -0.5455723404884338, "logps/chosen": -0.1704384982585907, "logps/rejected": -0.8473212718963623, "loss": 4.0229, "nll_loss": 0.980500340461731, "rewards/accuracies": 0.875, "rewards/chosen": -0.01704385131597519, "rewards/margins": 0.06768827140331268, "rewards/rejected": -0.08473212271928787, "step": 1781 }, { "epoch": 1.2323651452282158, "grad_norm": 6.026027679443359, "learning_rate": 4.8709082526509915e-05, "log_odds_chosen": 2.95859694480896, "log_odds_ratio": -0.18901872634887695, "logits/chosen": -0.6355127096176147, "logits/rejected": -0.6376572251319885, "logps/chosen": -0.08060871064662933, "logps/rejected": -0.5930425524711609, "loss": 3.294, "nll_loss": 0.8046071529388428, "rewards/accuracies": 0.875, "rewards/chosen": -0.008060871623456478, "rewards/margins": 0.051243383437395096, "rewards/rejected": -0.05930424854159355, "step": 1782 }, { "epoch": 1.2330567081604427, "grad_norm": 5.428910732269287, "learning_rate": 4.870524051021977e-05, "log_odds_chosen": 7.058526515960693, "log_odds_ratio": -0.03021731786429882, "logits/chosen": -0.30677375197410583, "logits/rejected": -0.4313288927078247, "logps/chosen": -0.011306393891572952, "logps/rejected": -1.24741530418396, "loss": 3.2365, "nll_loss": 0.806109607219696, "rewards/accuracies": 1.0, "rewards/chosen": -0.0011306394590064883, "rewards/margins": 0.12361088395118713, "rewards/rejected": -0.12474152445793152, "step": 1783 }, { "epoch": 1.2337482710926695, "grad_norm": 5.7618231773376465, "learning_rate": 4.870139849392962e-05, "log_odds_chosen": 6.493813991546631, "log_odds_ratio": -0.09601682424545288, "logits/chosen": -0.435533344745636, "logits/rejected": -0.4357895851135254, "logps/chosen": -0.03549773246049881, "logps/rejected": -1.1804301738739014, "loss": 2.9047, "nll_loss": 0.7165642380714417, "rewards/accuracies": 1.0, "rewards/chosen": -0.0035497734788805246, "rewards/margins": 0.1144932359457016, "rewards/rejected": -0.11804300546646118, "step": 1784 }, { "epoch": 1.2344398340248963, "grad_norm": 9.983572959899902, "learning_rate": 4.8697556477639466e-05, "log_odds_chosen": 3.681022882461548, "log_odds_ratio": -0.39471232891082764, "logits/chosen": -0.20369184017181396, "logits/rejected": -0.2624896764755249, "logps/chosen": -0.10708246380090714, "logps/rejected": -1.0745576620101929, "loss": 4.8753, "nll_loss": 1.1793495416641235, "rewards/accuracies": 0.875, "rewards/chosen": -0.010708245448768139, "rewards/margins": 0.0967475175857544, "rewards/rejected": -0.1074557676911354, "step": 1785 }, { "epoch": 1.2351313969571232, "grad_norm": 3.3492255210876465, "learning_rate": 4.869371446134932e-05, "log_odds_chosen": 3.9649410247802734, "log_odds_ratio": -0.11080026626586914, "logits/chosen": -0.7051090002059937, "logits/rejected": -0.7223587036132812, "logps/chosen": -0.053031086921691895, "logps/rejected": -0.6676794290542603, "loss": 2.287, "nll_loss": 0.5606632828712463, "rewards/accuracies": 1.0, "rewards/chosen": -0.005303108599036932, "rewards/margins": 0.061464838683605194, "rewards/rejected": -0.06676793843507767, "step": 1786 }, { "epoch": 1.23582295988935, "grad_norm": 6.671707630157471, "learning_rate": 4.868987244505917e-05, "log_odds_chosen": 4.573537349700928, "log_odds_ratio": -0.16145706176757812, "logits/chosen": -0.5118668079376221, "logits/rejected": -0.568949818611145, "logps/chosen": -0.06815500557422638, "logps/rejected": -0.9939965009689331, "loss": 3.7013, "nll_loss": 0.9091796278953552, "rewards/accuracies": 0.875, "rewards/chosen": -0.006815500557422638, "rewards/margins": 0.09258415549993515, "rewards/rejected": -0.09939965605735779, "step": 1787 }, { "epoch": 1.2365145228215768, "grad_norm": 4.77935791015625, "learning_rate": 4.8686030428769016e-05, "log_odds_chosen": 4.834593296051025, "log_odds_ratio": -0.20173893868923187, "logits/chosen": -0.41683146357536316, "logits/rejected": -0.3714733421802521, "logps/chosen": -0.17759756743907928, "logps/rejected": -0.8496260046958923, "loss": 3.5081, "nll_loss": 0.8568394184112549, "rewards/accuracies": 0.875, "rewards/chosen": -0.01775975711643696, "rewards/margins": 0.0672028437256813, "rewards/rejected": -0.08496260643005371, "step": 1788 }, { "epoch": 1.2372060857538036, "grad_norm": 7.869295597076416, "learning_rate": 4.868218841247887e-05, "log_odds_chosen": 2.6747584342956543, "log_odds_ratio": -0.38161760568618774, "logits/chosen": -0.6707320213317871, "logits/rejected": -0.7061043977737427, "logps/chosen": -0.13883009552955627, "logps/rejected": -0.6048102974891663, "loss": 4.4771, "nll_loss": 1.081102967262268, "rewards/accuracies": 0.75, "rewards/chosen": -0.013883009552955627, "rewards/margins": 0.04659801721572876, "rewards/rejected": -0.06048102676868439, "step": 1789 }, { "epoch": 1.2378976486860305, "grad_norm": 6.983148574829102, "learning_rate": 4.867834639618872e-05, "log_odds_chosen": 5.052033424377441, "log_odds_ratio": -0.3466581404209137, "logits/chosen": -0.541464626789093, "logits/rejected": -0.6000796556472778, "logps/chosen": -0.0706629678606987, "logps/rejected": -0.9605896472930908, "loss": 3.2206, "nll_loss": 0.7704898118972778, "rewards/accuracies": 0.75, "rewards/chosen": -0.007066297344863415, "rewards/margins": 0.08899267017841339, "rewards/rejected": -0.09605896472930908, "step": 1790 }, { "epoch": 1.2385892116182573, "grad_norm": 5.895198822021484, "learning_rate": 4.8674504379898574e-05, "log_odds_chosen": 3.636683940887451, "log_odds_ratio": -0.4684436023235321, "logits/chosen": -0.18043142557144165, "logits/rejected": -0.1791577935218811, "logps/chosen": -0.10141946375370026, "logps/rejected": -0.514195442199707, "loss": 2.7713, "nll_loss": 0.6459746956825256, "rewards/accuracies": 0.75, "rewards/chosen": -0.010141946375370026, "rewards/margins": 0.04127759858965874, "rewards/rejected": -0.05141954496502876, "step": 1791 }, { "epoch": 1.2392807745504841, "grad_norm": 15.659523963928223, "learning_rate": 4.867066236360842e-05, "log_odds_chosen": 1.765541672706604, "log_odds_ratio": -0.8988336324691772, "logits/chosen": -0.36447399854660034, "logits/rejected": -0.4492056369781494, "logps/chosen": -0.14289280772209167, "logps/rejected": -0.5673074722290039, "loss": 4.6843, "nll_loss": 1.0811951160430908, "rewards/accuracies": 0.875, "rewards/chosen": -0.014289281331002712, "rewards/margins": 0.042441464960575104, "rewards/rejected": -0.05673075094819069, "step": 1792 }, { "epoch": 1.239972337482711, "grad_norm": 8.014297485351562, "learning_rate": 4.866682034731828e-05, "log_odds_chosen": 6.788247108459473, "log_odds_ratio": -0.04455922171473503, "logits/chosen": -0.45799165964126587, "logits/rejected": -0.5114096403121948, "logps/chosen": -0.026142219081521034, "logps/rejected": -1.3048663139343262, "loss": 4.0886, "nll_loss": 1.017690658569336, "rewards/accuracies": 1.0, "rewards/chosen": -0.0026142222341150045, "rewards/margins": 0.12787240743637085, "rewards/rejected": -0.1304866373538971, "step": 1793 }, { "epoch": 1.2406639004149378, "grad_norm": 7.309274196624756, "learning_rate": 4.8662978331028124e-05, "log_odds_chosen": 6.315692901611328, "log_odds_ratio": -0.16049347817897797, "logits/chosen": -0.41751378774642944, "logits/rejected": -0.4461780786514282, "logps/chosen": -0.06197963282465935, "logps/rejected": -0.7669271230697632, "loss": 3.3334, "nll_loss": 0.817298948764801, "rewards/accuracies": 0.875, "rewards/chosen": -0.0061979638412594795, "rewards/margins": 0.07049474865198135, "rewards/rejected": -0.07669270783662796, "step": 1794 }, { "epoch": 1.2413554633471646, "grad_norm": 8.051568984985352, "learning_rate": 4.8659136314737977e-05, "log_odds_chosen": 7.974938869476318, "log_odds_ratio": -0.0025558762717992067, "logits/chosen": -0.21984058618545532, "logits/rejected": -0.3406951427459717, "logps/chosen": -0.003969233483076096, "logps/rejected": -1.5013253688812256, "loss": 4.0633, "nll_loss": 1.0155622959136963, "rewards/accuracies": 1.0, "rewards/chosen": -0.00039692336576990783, "rewards/margins": 0.14973559975624084, "rewards/rejected": -0.15013253688812256, "step": 1795 }, { "epoch": 1.2420470262793915, "grad_norm": 5.935072422027588, "learning_rate": 4.865529429844783e-05, "log_odds_chosen": 2.892843246459961, "log_odds_ratio": -0.9609073400497437, "logits/chosen": -0.4119155704975128, "logits/rejected": -0.38800469040870667, "logps/chosen": -0.07128679752349854, "logps/rejected": -0.5854979753494263, "loss": 3.2578, "nll_loss": 0.7183555364608765, "rewards/accuracies": 0.875, "rewards/chosen": -0.007128680124878883, "rewards/margins": 0.051421117037534714, "rewards/rejected": -0.05854979529976845, "step": 1796 }, { "epoch": 1.2427385892116183, "grad_norm": 3.907433032989502, "learning_rate": 4.8651452282157675e-05, "log_odds_chosen": 4.29548454284668, "log_odds_ratio": -0.18526923656463623, "logits/chosen": -0.3604997992515564, "logits/rejected": -0.3359731435775757, "logps/chosen": -0.05916924402117729, "logps/rejected": -0.606911838054657, "loss": 1.9766, "nll_loss": 0.47562935948371887, "rewards/accuracies": 0.875, "rewards/chosen": -0.005916924215853214, "rewards/margins": 0.05477425456047058, "rewards/rejected": -0.06069118157029152, "step": 1797 }, { "epoch": 1.2434301521438451, "grad_norm": 7.241527557373047, "learning_rate": 4.864761026586753e-05, "log_odds_chosen": 2.8537378311157227, "log_odds_ratio": -0.36928698420524597, "logits/chosen": -0.4368304908275604, "logits/rejected": -0.49222734570503235, "logps/chosen": -0.09692305326461792, "logps/rejected": -0.6475295424461365, "loss": 4.222, "nll_loss": 1.0185637474060059, "rewards/accuracies": 0.75, "rewards/chosen": -0.009692303836345673, "rewards/margins": 0.055060647428035736, "rewards/rejected": -0.06475295126438141, "step": 1798 }, { "epoch": 1.244121715076072, "grad_norm": 6.7715535163879395, "learning_rate": 4.864376824957738e-05, "log_odds_chosen": 4.448674201965332, "log_odds_ratio": -0.12964072823524475, "logits/chosen": -0.43957293033599854, "logits/rejected": -0.5094834566116333, "logps/chosen": -0.057148225605487823, "logps/rejected": -1.0359687805175781, "loss": 3.5742, "nll_loss": 0.8805873394012451, "rewards/accuracies": 1.0, "rewards/chosen": -0.005714822560548782, "rewards/margins": 0.09788206219673157, "rewards/rejected": -0.10359688103199005, "step": 1799 }, { "epoch": 1.2448132780082988, "grad_norm": 5.604511737823486, "learning_rate": 4.863992623328723e-05, "log_odds_chosen": 4.957757949829102, "log_odds_ratio": -0.0533502958714962, "logits/chosen": -0.6584824323654175, "logits/rejected": -0.7121330499649048, "logps/chosen": -0.0186811201274395, "logps/rejected": -0.6562873125076294, "loss": 3.541, "nll_loss": 0.8799247741699219, "rewards/accuracies": 1.0, "rewards/chosen": -0.0018681121291592717, "rewards/margins": 0.06376062333583832, "rewards/rejected": -0.06562872976064682, "step": 1800 }, { "epoch": 1.2455048409405256, "grad_norm": 4.875192165374756, "learning_rate": 4.863608421699708e-05, "log_odds_chosen": 1.3748055696487427, "log_odds_ratio": -0.40649357438087463, "logits/chosen": -0.37381306290626526, "logits/rejected": -0.3776434063911438, "logps/chosen": -0.1655338704586029, "logps/rejected": -0.6377518177032471, "loss": 3.6855, "nll_loss": 0.8807240128517151, "rewards/accuracies": 0.75, "rewards/chosen": -0.01655338704586029, "rewards/margins": 0.047221794724464417, "rewards/rejected": -0.06377518177032471, "step": 1801 }, { "epoch": 1.2461964038727524, "grad_norm": 7.296558380126953, "learning_rate": 4.863224220070694e-05, "log_odds_chosen": 1.5173479318618774, "log_odds_ratio": -0.5433456897735596, "logits/chosen": -0.6616544127464294, "logits/rejected": -0.6958113312721252, "logps/chosen": -0.16676326096057892, "logps/rejected": -0.5516203045845032, "loss": 4.5563, "nll_loss": 1.0847339630126953, "rewards/accuracies": 0.625, "rewards/chosen": -0.016676325350999832, "rewards/margins": 0.03848570957779884, "rewards/rejected": -0.055162034928798676, "step": 1802 }, { "epoch": 1.2468879668049793, "grad_norm": 3.2413599491119385, "learning_rate": 4.862840018441678e-05, "log_odds_chosen": 5.263888835906982, "log_odds_ratio": -0.1274007260799408, "logits/chosen": -0.5223049521446228, "logits/rejected": -0.5819611549377441, "logps/chosen": -0.05425209179520607, "logps/rejected": -0.583556056022644, "loss": 3.2491, "nll_loss": 0.7995354533195496, "rewards/accuracies": 1.0, "rewards/chosen": -0.005425209179520607, "rewards/margins": 0.05293039232492447, "rewards/rejected": -0.05835559964179993, "step": 1803 }, { "epoch": 1.247579529737206, "grad_norm": 22.35890007019043, "learning_rate": 4.8624558168126635e-05, "log_odds_chosen": 2.573143720626831, "log_odds_ratio": -0.26976823806762695, "logits/chosen": -0.45450615882873535, "logits/rejected": -0.5272830128669739, "logps/chosen": -0.11467941105365753, "logps/rejected": -0.5504158139228821, "loss": 3.3287, "nll_loss": 0.8052026629447937, "rewards/accuracies": 1.0, "rewards/chosen": -0.011467942036688328, "rewards/margins": 0.043573640286922455, "rewards/rejected": -0.05504158139228821, "step": 1804 }, { "epoch": 1.248271092669433, "grad_norm": 8.21379566192627, "learning_rate": 4.862071615183649e-05, "log_odds_chosen": 4.7264862060546875, "log_odds_ratio": -0.3322974145412445, "logits/chosen": -0.6622597575187683, "logits/rejected": -0.6728526949882507, "logps/chosen": -0.08343342691659927, "logps/rejected": -0.7681043148040771, "loss": 3.1076, "nll_loss": 0.7436782121658325, "rewards/accuracies": 0.75, "rewards/chosen": -0.008343343622982502, "rewards/margins": 0.06846708059310913, "rewards/rejected": -0.07681042701005936, "step": 1805 }, { "epoch": 1.2489626556016598, "grad_norm": 6.535581588745117, "learning_rate": 4.861687413554633e-05, "log_odds_chosen": 1.8638652563095093, "log_odds_ratio": -0.7721409201622009, "logits/chosen": -0.4947623312473297, "logits/rejected": -0.4675024151802063, "logps/chosen": -0.16926667094230652, "logps/rejected": -0.5563034415245056, "loss": 3.2474, "nll_loss": 0.7346447110176086, "rewards/accuracies": 0.75, "rewards/chosen": -0.01692666858434677, "rewards/margins": 0.03870367258787155, "rewards/rejected": -0.05563034117221832, "step": 1806 }, { "epoch": 1.2496542185338866, "grad_norm": 4.28892183303833, "learning_rate": 4.8613032119256186e-05, "log_odds_chosen": 5.750433444976807, "log_odds_ratio": -0.047676898539066315, "logits/chosen": -0.4715877175331116, "logits/rejected": -0.4328291416168213, "logps/chosen": -0.0543404258787632, "logps/rejected": -1.378570556640625, "loss": 2.9613, "nll_loss": 0.7355595231056213, "rewards/accuracies": 1.0, "rewards/chosen": -0.005434042774140835, "rewards/margins": 0.1324230134487152, "rewards/rejected": -0.13785704970359802, "step": 1807 }, { "epoch": 1.2503457814661134, "grad_norm": 7.0970892906188965, "learning_rate": 4.860919010296604e-05, "log_odds_chosen": 3.2057337760925293, "log_odds_ratio": -0.27527907490730286, "logits/chosen": -0.5142180919647217, "logits/rejected": -0.5255178213119507, "logps/chosen": -0.08345197141170502, "logps/rejected": -0.6663510799407959, "loss": 4.0499, "nll_loss": 0.984959602355957, "rewards/accuracies": 0.875, "rewards/chosen": -0.008345197886228561, "rewards/margins": 0.058289919048547745, "rewards/rejected": -0.06663510948419571, "step": 1808 }, { "epoch": 1.2510373443983402, "grad_norm": 5.712761878967285, "learning_rate": 4.860534808667589e-05, "log_odds_chosen": 2.3765594959259033, "log_odds_ratio": -0.23137031495571136, "logits/chosen": -0.6019795536994934, "logits/rejected": -0.582493007183075, "logps/chosen": -0.0825355052947998, "logps/rejected": -0.5512726306915283, "loss": 4.1693, "nll_loss": 1.0191932916641235, "rewards/accuracies": 0.875, "rewards/chosen": -0.00825355015695095, "rewards/margins": 0.04687371850013733, "rewards/rejected": -0.05512726306915283, "step": 1809 }, { "epoch": 1.251728907330567, "grad_norm": 7.043604373931885, "learning_rate": 4.8601506070385736e-05, "log_odds_chosen": 2.8349339962005615, "log_odds_ratio": -0.37025701999664307, "logits/chosen": -0.6216073036193848, "logits/rejected": -0.6692014932632446, "logps/chosen": -0.12638868391513824, "logps/rejected": -0.4497242867946625, "loss": 4.7696, "nll_loss": 1.15536630153656, "rewards/accuracies": 0.875, "rewards/chosen": -0.012638869695365429, "rewards/margins": 0.03233356401324272, "rewards/rejected": -0.04497242718935013, "step": 1810 }, { "epoch": 1.252420470262794, "grad_norm": 5.4507880210876465, "learning_rate": 4.8597664054095595e-05, "log_odds_chosen": 4.4462409019470215, "log_odds_ratio": -0.18066637217998505, "logits/chosen": -0.3726177215576172, "logits/rejected": -0.4263952672481537, "logps/chosen": -0.06515246629714966, "logps/rejected": -0.7420536875724792, "loss": 2.7647, "nll_loss": 0.6730960607528687, "rewards/accuracies": 0.75, "rewards/chosen": -0.006515247281640768, "rewards/margins": 0.06769011914730072, "rewards/rejected": -0.07420536875724792, "step": 1811 }, { "epoch": 1.2531120331950207, "grad_norm": 8.727974891662598, "learning_rate": 4.859382203780544e-05, "log_odds_chosen": 3.2646877765655518, "log_odds_ratio": -0.3166475296020508, "logits/chosen": -0.5484322309494019, "logits/rejected": -0.5809099674224854, "logps/chosen": -0.07628434896469116, "logps/rejected": -0.6679997444152832, "loss": 3.6801, "nll_loss": 0.8883615732192993, "rewards/accuracies": 0.75, "rewards/chosen": -0.0076284343376755714, "rewards/margins": 0.05917154252529144, "rewards/rejected": -0.06679997593164444, "step": 1812 }, { "epoch": 1.2538035961272476, "grad_norm": 5.453410625457764, "learning_rate": 4.8589980021515293e-05, "log_odds_chosen": 4.002201557159424, "log_odds_ratio": -0.1269581913948059, "logits/chosen": -0.2462340146303177, "logits/rejected": -0.24367199838161469, "logps/chosen": -0.10391269624233246, "logps/rejected": -0.9489084482192993, "loss": 3.1281, "nll_loss": 0.7693310379981995, "rewards/accuracies": 1.0, "rewards/chosen": -0.010391268879175186, "rewards/margins": 0.08449958264827728, "rewards/rejected": -0.09489084780216217, "step": 1813 }, { "epoch": 1.2544951590594744, "grad_norm": 6.228405475616455, "learning_rate": 4.8586138005225146e-05, "log_odds_chosen": 3.509197950363159, "log_odds_ratio": -0.11182574927806854, "logits/chosen": 0.05738585814833641, "logits/rejected": -0.041131533682346344, "logps/chosen": -0.05739348381757736, "logps/rejected": -0.6539136171340942, "loss": 2.5143, "nll_loss": 0.6173902153968811, "rewards/accuracies": 1.0, "rewards/chosen": -0.005739348940551281, "rewards/margins": 0.05965201556682587, "rewards/rejected": -0.06539136916399002, "step": 1814 }, { "epoch": 1.2551867219917012, "grad_norm": 4.185488700866699, "learning_rate": 4.858229598893499e-05, "log_odds_chosen": 3.572788715362549, "log_odds_ratio": -0.15489646792411804, "logits/chosen": -0.3606988191604614, "logits/rejected": -0.391099750995636, "logps/chosen": -0.07718568295240402, "logps/rejected": -1.0063767433166504, "loss": 2.7932, "nll_loss": 0.6828101277351379, "rewards/accuracies": 1.0, "rewards/chosen": -0.0077185677364468575, "rewards/margins": 0.09291911125183105, "rewards/rejected": -0.10063768178224564, "step": 1815 }, { "epoch": 1.255878284923928, "grad_norm": 6.819009304046631, "learning_rate": 4.8578453972644844e-05, "log_odds_chosen": 2.5417683124542236, "log_odds_ratio": -0.6759305596351624, "logits/chosen": -0.1891404539346695, "logits/rejected": -0.16132640838623047, "logps/chosen": -0.13642950356006622, "logps/rejected": -0.48567137122154236, "loss": 3.4566, "nll_loss": 0.7965582013130188, "rewards/accuracies": 0.875, "rewards/chosen": -0.013642950914800167, "rewards/margins": 0.03492419049143791, "rewards/rejected": -0.048567142337560654, "step": 1816 }, { "epoch": 1.2565698478561549, "grad_norm": 4.596541404724121, "learning_rate": 4.8574611956354696e-05, "log_odds_chosen": 2.963163137435913, "log_odds_ratio": -0.4418284595012665, "logits/chosen": -0.227559432387352, "logits/rejected": -0.27367666363716125, "logps/chosen": -0.11844546347856522, "logps/rejected": -0.6489789485931396, "loss": 2.3538, "nll_loss": 0.5442634224891663, "rewards/accuracies": 0.75, "rewards/chosen": -0.011844546534121037, "rewards/margins": 0.0530533492565155, "rewards/rejected": -0.06489789485931396, "step": 1817 }, { "epoch": 1.2572614107883817, "grad_norm": 5.669462203979492, "learning_rate": 4.857076994006455e-05, "log_odds_chosen": 1.8501408100128174, "log_odds_ratio": -0.5410861968994141, "logits/chosen": -0.10962569713592529, "logits/rejected": -0.12081693857908249, "logps/chosen": -0.15979516506195068, "logps/rejected": -0.41644734144210815, "loss": 2.8134, "nll_loss": 0.6492462158203125, "rewards/accuracies": 0.75, "rewards/chosen": -0.015979517251253128, "rewards/margins": 0.025665219873189926, "rewards/rejected": -0.041644733399152756, "step": 1818 }, { "epoch": 1.2579529737206085, "grad_norm": 7.734716415405273, "learning_rate": 4.8566927923774395e-05, "log_odds_chosen": 5.519069671630859, "log_odds_ratio": -0.021997055038809776, "logits/chosen": -0.11598198860883713, "logits/rejected": -0.18683940172195435, "logps/chosen": -0.022436058148741722, "logps/rejected": -1.1067173480987549, "loss": 3.2506, "nll_loss": 0.810446560382843, "rewards/accuracies": 1.0, "rewards/chosen": -0.002243605675175786, "rewards/margins": 0.10842813551425934, "rewards/rejected": -0.1106717437505722, "step": 1819 }, { "epoch": 1.2586445366528354, "grad_norm": 6.248927593231201, "learning_rate": 4.8563085907484254e-05, "log_odds_chosen": 2.9996016025543213, "log_odds_ratio": -0.2759028375148773, "logits/chosen": -0.38097551465034485, "logits/rejected": -0.3946007490158081, "logps/chosen": -0.07410791516304016, "logps/rejected": -0.7756040096282959, "loss": 3.6204, "nll_loss": 0.8775010108947754, "rewards/accuracies": 0.875, "rewards/chosen": -0.007410791236907244, "rewards/margins": 0.07014961540699005, "rewards/rejected": -0.0775604099035263, "step": 1820 }, { "epoch": 1.2593360995850622, "grad_norm": 8.13671588897705, "learning_rate": 4.85592438911941e-05, "log_odds_chosen": 4.534608840942383, "log_odds_ratio": -0.20198392868041992, "logits/chosen": -0.511385977268219, "logits/rejected": -0.5996202826499939, "logps/chosen": -0.05032597482204437, "logps/rejected": -0.923053503036499, "loss": 4.5111, "nll_loss": 1.1075801849365234, "rewards/accuracies": 0.875, "rewards/chosen": -0.005032597575336695, "rewards/margins": 0.0872727483510971, "rewards/rejected": -0.09230534732341766, "step": 1821 }, { "epoch": 1.260027662517289, "grad_norm": 7.075880527496338, "learning_rate": 4.855540187490395e-05, "log_odds_chosen": 4.356635093688965, "log_odds_ratio": -0.11832673847675323, "logits/chosen": -0.2284286618232727, "logits/rejected": -0.2228046953678131, "logps/chosen": -0.0332304872572422, "logps/rejected": -0.4895108938217163, "loss": 3.2858, "nll_loss": 0.8096163272857666, "rewards/accuracies": 1.0, "rewards/chosen": -0.0033230483531951904, "rewards/margins": 0.04562804475426674, "rewards/rejected": -0.04895108938217163, "step": 1822 }, { "epoch": 1.2607192254495159, "grad_norm": 4.531290054321289, "learning_rate": 4.8551559858613804e-05, "log_odds_chosen": 5.777563571929932, "log_odds_ratio": -0.20365619659423828, "logits/chosen": -0.27427423000335693, "logits/rejected": -0.3515477478504181, "logps/chosen": -0.047163330018520355, "logps/rejected": -0.9215035438537598, "loss": 2.7918, "nll_loss": 0.6775762438774109, "rewards/accuracies": 0.875, "rewards/chosen": -0.0047163330018520355, "rewards/margins": 0.08743403106927872, "rewards/rejected": -0.09215036034584045, "step": 1823 }, { "epoch": 1.2614107883817427, "grad_norm": 9.5455322265625, "learning_rate": 4.854771784232366e-05, "log_odds_chosen": 1.7303236722946167, "log_odds_ratio": -0.5304129719734192, "logits/chosen": -0.7086005806922913, "logits/rejected": -0.6558952927589417, "logps/chosen": -0.14678457379341125, "logps/rejected": -0.598397970199585, "loss": 5.3619, "nll_loss": 1.2874255180358887, "rewards/accuracies": 0.625, "rewards/chosen": -0.01467845868319273, "rewards/margins": 0.04516134038567543, "rewards/rejected": -0.059839800000190735, "step": 1824 }, { "epoch": 1.2621023513139695, "grad_norm": 4.559263706207275, "learning_rate": 4.85438758260335e-05, "log_odds_chosen": 2.271963596343994, "log_odds_ratio": -0.1910509467124939, "logits/chosen": -0.3403293490409851, "logits/rejected": -0.3471444249153137, "logps/chosen": -0.08710083365440369, "logps/rejected": -0.5700175762176514, "loss": 3.4875, "nll_loss": 0.8527782559394836, "rewards/accuracies": 0.875, "rewards/chosen": -0.008710084483027458, "rewards/margins": 0.04829167574644089, "rewards/rejected": -0.057001762092113495, "step": 1825 }, { "epoch": 1.2627939142461964, "grad_norm": 7.239500522613525, "learning_rate": 4.8540033809743355e-05, "log_odds_chosen": 3.428022623062134, "log_odds_ratio": -0.2764246463775635, "logits/chosen": -0.808087944984436, "logits/rejected": -0.8824774026870728, "logps/chosen": -0.05949515849351883, "logps/rejected": -0.5695070028305054, "loss": 4.5764, "nll_loss": 1.1164684295654297, "rewards/accuracies": 0.875, "rewards/chosen": -0.005949515849351883, "rewards/margins": 0.051001183688640594, "rewards/rejected": -0.05695069581270218, "step": 1826 }, { "epoch": 1.2634854771784232, "grad_norm": 4.512261390686035, "learning_rate": 4.853619179345321e-05, "log_odds_chosen": 4.599119186401367, "log_odds_ratio": -0.25986239314079285, "logits/chosen": -0.5794503092765808, "logits/rejected": -0.5732518434524536, "logps/chosen": -0.10974626243114471, "logps/rejected": -0.6613384485244751, "loss": 2.8689, "nll_loss": 0.6912313103675842, "rewards/accuracies": 0.875, "rewards/chosen": -0.010974626056849957, "rewards/margins": 0.055159226059913635, "rewards/rejected": -0.06613385677337646, "step": 1827 }, { "epoch": 1.26417704011065, "grad_norm": 3.2224671840667725, "learning_rate": 4.853234977716305e-05, "log_odds_chosen": 3.540921211242676, "log_odds_ratio": -0.31274497509002686, "logits/chosen": -0.36539560556411743, "logits/rejected": -0.37292957305908203, "logps/chosen": -0.12696358561515808, "logps/rejected": -0.7113257050514221, "loss": 2.2019, "nll_loss": 0.519207775592804, "rewards/accuracies": 0.75, "rewards/chosen": -0.012696359306573868, "rewards/margins": 0.058436211198568344, "rewards/rejected": -0.07113257050514221, "step": 1828 }, { "epoch": 1.2648686030428768, "grad_norm": 8.992777824401855, "learning_rate": 4.852850776087291e-05, "log_odds_chosen": 3.9111456871032715, "log_odds_ratio": -0.403731107711792, "logits/chosen": -0.2728814482688904, "logits/rejected": -0.3519219160079956, "logps/chosen": -0.1583581566810608, "logps/rejected": -0.9107358455657959, "loss": 3.9931, "nll_loss": 0.9579125642776489, "rewards/accuracies": 0.875, "rewards/chosen": -0.01583581417798996, "rewards/margins": 0.07523778080940247, "rewards/rejected": -0.09107358753681183, "step": 1829 }, { "epoch": 1.2655601659751037, "grad_norm": 6.241001605987549, "learning_rate": 4.852466574458276e-05, "log_odds_chosen": 3.549448013305664, "log_odds_ratio": -0.22406116127967834, "logits/chosen": -0.5081853866577148, "logits/rejected": -0.5429813265800476, "logps/chosen": -0.06931378692388535, "logps/rejected": -0.6130677461624146, "loss": 3.3564, "nll_loss": 0.8167012929916382, "rewards/accuracies": 0.875, "rewards/chosen": -0.00693137850612402, "rewards/margins": 0.05437539890408516, "rewards/rejected": -0.06130677089095116, "step": 1830 }, { "epoch": 1.2662517289073305, "grad_norm": 7.095801830291748, "learning_rate": 4.852082372829261e-05, "log_odds_chosen": 3.620205879211426, "log_odds_ratio": -0.22003838419914246, "logits/chosen": -0.5255930423736572, "logits/rejected": -0.5722360610961914, "logps/chosen": -0.05655446648597717, "logps/rejected": -0.4140484929084778, "loss": 3.5879, "nll_loss": 0.874961256980896, "rewards/accuracies": 0.875, "rewards/chosen": -0.005655447021126747, "rewards/margins": 0.035749401897192, "rewards/rejected": -0.0414048507809639, "step": 1831 }, { "epoch": 1.2669432918395573, "grad_norm": 7.453549861907959, "learning_rate": 4.851698171200246e-05, "log_odds_chosen": 2.598477363586426, "log_odds_ratio": -0.3083294928073883, "logits/chosen": -0.345392644405365, "logits/rejected": -0.43535852432250977, "logps/chosen": -0.06634186208248138, "logps/rejected": -0.6782900094985962, "loss": 3.3553, "nll_loss": 0.8080006241798401, "rewards/accuracies": 0.875, "rewards/chosen": -0.006634186953306198, "rewards/margins": 0.06119481474161148, "rewards/rejected": -0.06782899796962738, "step": 1832 }, { "epoch": 1.2676348547717842, "grad_norm": 4.456362724304199, "learning_rate": 4.8513139695712315e-05, "log_odds_chosen": 5.967951774597168, "log_odds_ratio": -0.03453276678919792, "logits/chosen": -0.30861398577690125, "logits/rejected": -0.31359925866127014, "logps/chosen": -0.04853855073451996, "logps/rejected": -0.9749094247817993, "loss": 2.8744, "nll_loss": 0.7151439785957336, "rewards/accuracies": 1.0, "rewards/chosen": -0.004853855352848768, "rewards/margins": 0.0926370918750763, "rewards/rejected": -0.09749095141887665, "step": 1833 }, { "epoch": 1.268326417704011, "grad_norm": 4.406580448150635, "learning_rate": 4.850929767942216e-05, "log_odds_chosen": 4.313794136047363, "log_odds_ratio": -0.24935269355773926, "logits/chosen": -0.13199511170387268, "logits/rejected": -0.11371532827615738, "logps/chosen": -0.10822973400354385, "logps/rejected": -0.9197275638580322, "loss": 2.1964, "nll_loss": 0.5241571664810181, "rewards/accuracies": 0.875, "rewards/chosen": -0.01082297321408987, "rewards/margins": 0.08114977926015854, "rewards/rejected": -0.09197275340557098, "step": 1834 }, { "epoch": 1.2690179806362378, "grad_norm": 7.473618984222412, "learning_rate": 4.850545566313201e-05, "log_odds_chosen": 5.008003234863281, "log_odds_ratio": -0.17252132296562195, "logits/chosen": -0.5653342604637146, "logits/rejected": -0.5684071779251099, "logps/chosen": -0.04174775630235672, "logps/rejected": -0.6983299255371094, "loss": 3.6188, "nll_loss": 0.8874568939208984, "rewards/accuracies": 1.0, "rewards/chosen": -0.004174775909632444, "rewards/margins": 0.06565822660923004, "rewards/rejected": -0.06983299553394318, "step": 1835 }, { "epoch": 1.2697095435684647, "grad_norm": 6.988341331481934, "learning_rate": 4.8501613646841866e-05, "log_odds_chosen": 2.817474603652954, "log_odds_ratio": -0.5724210739135742, "logits/chosen": -0.5476275682449341, "logits/rejected": -0.6104252338409424, "logps/chosen": -0.16373126208782196, "logps/rejected": -0.46579307317733765, "loss": 3.4597, "nll_loss": 0.8076732158660889, "rewards/accuracies": 0.625, "rewards/chosen": -0.016373127698898315, "rewards/margins": 0.03020618110895157, "rewards/rejected": -0.046579305082559586, "step": 1836 }, { "epoch": 1.2704011065006915, "grad_norm": 6.129537105560303, "learning_rate": 4.849777163055171e-05, "log_odds_chosen": 4.696710586547852, "log_odds_ratio": -0.20053833723068237, "logits/chosen": -0.329264372587204, "logits/rejected": -0.3493046164512634, "logps/chosen": -0.06220349669456482, "logps/rejected": -0.9139543771743774, "loss": 2.9743, "nll_loss": 0.7235198020935059, "rewards/accuracies": 1.0, "rewards/chosen": -0.006220349110662937, "rewards/margins": 0.08517508953809738, "rewards/rejected": -0.09139543771743774, "step": 1837 }, { "epoch": 1.2710926694329183, "grad_norm": 6.702199459075928, "learning_rate": 4.849392961426157e-05, "log_odds_chosen": 3.0366287231445312, "log_odds_ratio": -0.2572011649608612, "logits/chosen": -0.28276461362838745, "logits/rejected": -0.3073059022426605, "logps/chosen": -0.0854082852602005, "logps/rejected": -0.6870397329330444, "loss": 3.8337, "nll_loss": 0.9326946139335632, "rewards/accuracies": 0.875, "rewards/chosen": -0.008540828712284565, "rewards/margins": 0.060163144022226334, "rewards/rejected": -0.06870397925376892, "step": 1838 }, { "epoch": 1.2717842323651452, "grad_norm": 5.057860851287842, "learning_rate": 4.8490087597971416e-05, "log_odds_chosen": 3.52040958404541, "log_odds_ratio": -0.35257863998413086, "logits/chosen": -0.436309278011322, "logits/rejected": -0.4628809690475464, "logps/chosen": -0.07719769328832626, "logps/rejected": -0.5573811531066895, "loss": 4.0301, "nll_loss": 0.9722760915756226, "rewards/accuracies": 0.75, "rewards/chosen": -0.007719769608229399, "rewards/margins": 0.04801835119724274, "rewards/rejected": -0.05573812127113342, "step": 1839 }, { "epoch": 1.272475795297372, "grad_norm": 5.871735095977783, "learning_rate": 4.848624558168127e-05, "log_odds_chosen": 5.993706226348877, "log_odds_ratio": -0.018410231918096542, "logits/chosen": -0.5008187890052795, "logits/rejected": -0.5109366178512573, "logps/chosen": -0.02858724817633629, "logps/rejected": -0.9414122104644775, "loss": 3.1941, "nll_loss": 0.7966926097869873, "rewards/accuracies": 1.0, "rewards/chosen": -0.0028587249107658863, "rewards/margins": 0.09128250181674957, "rewards/rejected": -0.09414122253656387, "step": 1840 }, { "epoch": 1.2731673582295988, "grad_norm": 5.2710700035095215, "learning_rate": 4.848240356539112e-05, "log_odds_chosen": 2.1282846927642822, "log_odds_ratio": -0.19243940711021423, "logits/chosen": -0.5377815961837769, "logits/rejected": -0.5604823231697083, "logps/chosen": -0.08244717121124268, "logps/rejected": -0.44714000821113586, "loss": 3.6366, "nll_loss": 0.8899109363555908, "rewards/accuracies": 1.0, "rewards/chosen": -0.008244717493653297, "rewards/margins": 0.03646928444504738, "rewards/rejected": -0.044714003801345825, "step": 1841 }, { "epoch": 1.2738589211618256, "grad_norm": 6.019529342651367, "learning_rate": 4.8478561549100974e-05, "log_odds_chosen": 2.516636610031128, "log_odds_ratio": -0.38365089893341064, "logits/chosen": -0.40743488073349, "logits/rejected": -0.4360080361366272, "logps/chosen": -0.06433766335248947, "logps/rejected": -0.4514871835708618, "loss": 3.3455, "nll_loss": 0.7980217933654785, "rewards/accuracies": 0.75, "rewards/chosen": -0.006433766335248947, "rewards/margins": 0.038714952766895294, "rewards/rejected": -0.04514871910214424, "step": 1842 }, { "epoch": 1.2745504840940525, "grad_norm": 7.783799171447754, "learning_rate": 4.847471953281082e-05, "log_odds_chosen": 1.5541954040527344, "log_odds_ratio": -0.5911738872528076, "logits/chosen": -0.7046796083450317, "logits/rejected": -0.6738811731338501, "logps/chosen": -0.1610485166311264, "logps/rejected": -0.28007322549819946, "loss": 4.0777, "nll_loss": 0.9603129625320435, "rewards/accuracies": 0.75, "rewards/chosen": -0.01610485091805458, "rewards/margins": 0.011902473866939545, "rewards/rejected": -0.028007326647639275, "step": 1843 }, { "epoch": 1.2752420470262793, "grad_norm": 6.018929958343506, "learning_rate": 4.847087751652067e-05, "log_odds_chosen": 5.784402847290039, "log_odds_ratio": -0.048644986003637314, "logits/chosen": -0.3585667610168457, "logits/rejected": -0.4079504609107971, "logps/chosen": -0.02275342121720314, "logps/rejected": -0.9510570764541626, "loss": 3.2988, "nll_loss": 0.8198418021202087, "rewards/accuracies": 1.0, "rewards/chosen": -0.002275342121720314, "rewards/margins": 0.0928303673863411, "rewards/rejected": -0.09510570764541626, "step": 1844 }, { "epoch": 1.2759336099585061, "grad_norm": 7.915690898895264, "learning_rate": 4.8467035500230524e-05, "log_odds_chosen": 5.180848121643066, "log_odds_ratio": -0.19197486340999603, "logits/chosen": -0.48925912380218506, "logits/rejected": -0.5417444705963135, "logps/chosen": -0.06355451047420502, "logps/rejected": -0.9609445333480835, "loss": 3.5779, "nll_loss": 0.8752825260162354, "rewards/accuracies": 0.875, "rewards/chosen": -0.0063554514199495316, "rewards/margins": 0.08973899483680725, "rewards/rejected": -0.09609444439411163, "step": 1845 }, { "epoch": 1.276625172890733, "grad_norm": 6.901232719421387, "learning_rate": 4.846319348394037e-05, "log_odds_chosen": 3.1123545169830322, "log_odds_ratio": -0.323904424905777, "logits/chosen": -0.568681538105011, "logits/rejected": -0.5214330554008484, "logps/chosen": -0.11305805295705795, "logps/rejected": -0.6645975112915039, "loss": 2.6877, "nll_loss": 0.6395328044891357, "rewards/accuracies": 0.75, "rewards/chosen": -0.011305805295705795, "rewards/margins": 0.055153943598270416, "rewards/rejected": -0.06645975261926651, "step": 1846 }, { "epoch": 1.2773167358229598, "grad_norm": 5.688883304595947, "learning_rate": 4.845935146765023e-05, "log_odds_chosen": 5.134402751922607, "log_odds_ratio": -0.07821536064147949, "logits/chosen": -0.6485186815261841, "logits/rejected": -0.6384646892547607, "logps/chosen": -0.06612791121006012, "logps/rejected": -0.8336848020553589, "loss": 3.464, "nll_loss": 0.8581699728965759, "rewards/accuracies": 1.0, "rewards/chosen": -0.006612791679799557, "rewards/margins": 0.07675568759441376, "rewards/rejected": -0.08336848020553589, "step": 1847 }, { "epoch": 1.2780082987551866, "grad_norm": 5.677089691162109, "learning_rate": 4.8455509451360075e-05, "log_odds_chosen": 5.470160484313965, "log_odds_ratio": -0.05512235313653946, "logits/chosen": -0.6574974656105042, "logits/rejected": -0.6757139563560486, "logps/chosen": -0.07178202271461487, "logps/rejected": -1.0133131742477417, "loss": 2.8147, "nll_loss": 0.698168933391571, "rewards/accuracies": 1.0, "rewards/chosen": -0.007178202271461487, "rewards/margins": 0.09415312111377716, "rewards/rejected": -0.10133132338523865, "step": 1848 }, { "epoch": 1.2786998616874135, "grad_norm": 7.259307384490967, "learning_rate": 4.845166743506993e-05, "log_odds_chosen": 1.1947388648986816, "log_odds_ratio": -0.4756326377391815, "logits/chosen": -0.7476555705070496, "logits/rejected": -0.7715795040130615, "logps/chosen": -0.13770818710327148, "logps/rejected": -0.2466597706079483, "loss": 3.4717, "nll_loss": 0.8203725218772888, "rewards/accuracies": 0.875, "rewards/chosen": -0.013770818710327148, "rewards/margins": 0.010895160026848316, "rewards/rejected": -0.02466597780585289, "step": 1849 }, { "epoch": 1.2793914246196403, "grad_norm": 7.345088005065918, "learning_rate": 4.844782541877978e-05, "log_odds_chosen": 2.8660197257995605, "log_odds_ratio": -0.25970110297203064, "logits/chosen": -0.5066275000572205, "logits/rejected": -0.5241155028343201, "logps/chosen": -0.08532549440860748, "logps/rejected": -0.7479270696640015, "loss": 4.1521, "nll_loss": 1.012049913406372, "rewards/accuracies": 0.875, "rewards/chosen": -0.008532550185918808, "rewards/margins": 0.06626015901565552, "rewards/rejected": -0.07479271292686462, "step": 1850 }, { "epoch": 1.2800829875518671, "grad_norm": 6.5352463722229, "learning_rate": 4.844398340248963e-05, "log_odds_chosen": 5.387038230895996, "log_odds_ratio": -0.08680490404367447, "logits/chosen": -0.38126322627067566, "logits/rejected": -0.4586794674396515, "logps/chosen": -0.025043586269021034, "logps/rejected": -0.6016091704368591, "loss": 3.5159, "nll_loss": 0.8702915906906128, "rewards/accuracies": 1.0, "rewards/chosen": -0.0025043589994311333, "rewards/margins": 0.057656560093164444, "rewards/rejected": -0.06016091629862785, "step": 1851 }, { "epoch": 1.280774550484094, "grad_norm": 7.052436828613281, "learning_rate": 4.844014138619948e-05, "log_odds_chosen": 4.889477729797363, "log_odds_ratio": -0.26520976424217224, "logits/chosen": -0.6732861399650574, "logits/rejected": -0.6910228729248047, "logps/chosen": -0.058603618294000626, "logps/rejected": -0.8419814109802246, "loss": 2.993, "nll_loss": 0.7217181921005249, "rewards/accuracies": 0.875, "rewards/chosen": -0.005860361270606518, "rewards/margins": 0.07833777368068695, "rewards/rejected": -0.08419813960790634, "step": 1852 }, { "epoch": 1.2814661134163208, "grad_norm": 7.370616436004639, "learning_rate": 4.843629936990933e-05, "log_odds_chosen": 1.2901815176010132, "log_odds_ratio": -0.5043874979019165, "logits/chosen": -0.7290447950363159, "logits/rejected": -0.7297863960266113, "logps/chosen": -0.10335765779018402, "logps/rejected": -0.5296757817268372, "loss": 3.9113, "nll_loss": 0.9273877143859863, "rewards/accuracies": 0.75, "rewards/chosen": -0.010335765779018402, "rewards/margins": 0.04263181611895561, "rewards/rejected": -0.052967578172683716, "step": 1853 }, { "epoch": 1.2821576763485476, "grad_norm": 9.12453556060791, "learning_rate": 4.843245735361918e-05, "log_odds_chosen": 2.5721120834350586, "log_odds_ratio": -0.4275425970554352, "logits/chosen": -0.45946818590164185, "logits/rejected": -0.5032299757003784, "logps/chosen": -0.15334449708461761, "logps/rejected": -0.688929557800293, "loss": 2.9555, "nll_loss": 0.6961199045181274, "rewards/accuracies": 0.75, "rewards/chosen": -0.015334450639784336, "rewards/margins": 0.053558506071567535, "rewards/rejected": -0.0688929557800293, "step": 1854 }, { "epoch": 1.2828492392807744, "grad_norm": 5.654973030090332, "learning_rate": 4.842861533732903e-05, "log_odds_chosen": 4.989964485168457, "log_odds_ratio": -0.12226064503192902, "logits/chosen": -0.6214022636413574, "logits/rejected": -0.7165380120277405, "logps/chosen": -0.06140881031751633, "logps/rejected": -0.8350120186805725, "loss": 3.106, "nll_loss": 0.7642849683761597, "rewards/accuracies": 1.0, "rewards/chosen": -0.00614088075235486, "rewards/margins": 0.07736032456159592, "rewards/rejected": -0.08350120484828949, "step": 1855 }, { "epoch": 1.2835408022130013, "grad_norm": 6.4906206130981445, "learning_rate": 4.842477332103889e-05, "log_odds_chosen": 2.749014139175415, "log_odds_ratio": -0.22980672121047974, "logits/chosen": -0.42675426602363586, "logits/rejected": -0.4076400399208069, "logps/chosen": -0.09638235718011856, "logps/rejected": -0.7707810401916504, "loss": 3.0649, "nll_loss": 0.743255615234375, "rewards/accuracies": 1.0, "rewards/chosen": -0.009638235904276371, "rewards/margins": 0.06743986904621124, "rewards/rejected": -0.07707811146974564, "step": 1856 }, { "epoch": 1.284232365145228, "grad_norm": 8.228878021240234, "learning_rate": 4.842093130474873e-05, "log_odds_chosen": 2.587768316268921, "log_odds_ratio": -0.5744894742965698, "logits/chosen": -0.6129688620567322, "logits/rejected": -0.5657984018325806, "logps/chosen": -0.1795559525489807, "logps/rejected": -0.7216471433639526, "loss": 3.8535, "nll_loss": 0.9059350490570068, "rewards/accuracies": 0.875, "rewards/chosen": -0.0179555956274271, "rewards/margins": 0.05420912057161331, "rewards/rejected": -0.07216471433639526, "step": 1857 }, { "epoch": 1.284923928077455, "grad_norm": 7.017327785491943, "learning_rate": 4.8417089288458586e-05, "log_odds_chosen": 4.121495246887207, "log_odds_ratio": -0.34169018268585205, "logits/chosen": -0.6251221895217896, "logits/rejected": -0.5807482600212097, "logps/chosen": -0.0728740394115448, "logps/rejected": -0.8007559776306152, "loss": 3.7888, "nll_loss": 0.9130185842514038, "rewards/accuracies": 0.75, "rewards/chosen": -0.007287404499948025, "rewards/margins": 0.07278818637132645, "rewards/rejected": -0.08007559180259705, "step": 1858 }, { "epoch": 1.2856154910096818, "grad_norm": 6.424606800079346, "learning_rate": 4.841324727216844e-05, "log_odds_chosen": 5.630163192749023, "log_odds_ratio": -0.11693432927131653, "logits/chosen": -0.6970394849777222, "logits/rejected": -0.7171042561531067, "logps/chosen": -0.08394990861415863, "logps/rejected": -0.9980266094207764, "loss": 3.6016, "nll_loss": 0.8887089490890503, "rewards/accuracies": 1.0, "rewards/chosen": -0.008394991047680378, "rewards/margins": 0.0914076715707779, "rewards/rejected": -0.0998026579618454, "step": 1859 }, { "epoch": 1.2863070539419086, "grad_norm": 7.732998371124268, "learning_rate": 4.840940525587829e-05, "log_odds_chosen": 5.277063369750977, "log_odds_ratio": -0.08803144097328186, "logits/chosen": -0.44812747836112976, "logits/rejected": -0.43661120533943176, "logps/chosen": -0.06136512756347656, "logps/rejected": -1.2950425148010254, "loss": 3.5298, "nll_loss": 0.8736498951911926, "rewards/accuracies": 1.0, "rewards/chosen": -0.006136512849479914, "rewards/margins": 0.12336773425340652, "rewards/rejected": -0.1295042335987091, "step": 1860 }, { "epoch": 1.2869986168741354, "grad_norm": 5.051394939422607, "learning_rate": 4.8405563239588136e-05, "log_odds_chosen": 3.0801963806152344, "log_odds_ratio": -0.43634703755378723, "logits/chosen": -0.4509056508541107, "logits/rejected": -0.4946168065071106, "logps/chosen": -0.09263553470373154, "logps/rejected": -0.7222429513931274, "loss": 3.4704, "nll_loss": 0.8239755034446716, "rewards/accuracies": 0.625, "rewards/chosen": -0.009263553656637669, "rewards/margins": 0.06296074390411377, "rewards/rejected": -0.07222429662942886, "step": 1861 }, { "epoch": 1.2876901798063622, "grad_norm": 6.35806131362915, "learning_rate": 4.840172122329799e-05, "log_odds_chosen": 3.0220894813537598, "log_odds_ratio": -0.1876365840435028, "logits/chosen": -0.5654817819595337, "logits/rejected": -0.613560676574707, "logps/chosen": -0.14155136048793793, "logps/rejected": -0.7994102239608765, "loss": 3.0146, "nll_loss": 0.7348905801773071, "rewards/accuracies": 1.0, "rewards/chosen": -0.014155135490000248, "rewards/margins": 0.06578588485717773, "rewards/rejected": -0.079941026866436, "step": 1862 }, { "epoch": 1.288381742738589, "grad_norm": 6.703084468841553, "learning_rate": 4.839787920700784e-05, "log_odds_chosen": 2.794166326522827, "log_odds_ratio": -0.45009666681289673, "logits/chosen": -0.5885961055755615, "logits/rejected": -0.6103361248970032, "logps/chosen": -0.08698848634958267, "logps/rejected": -0.5582041144371033, "loss": 3.3271, "nll_loss": 0.7867544293403625, "rewards/accuracies": 0.625, "rewards/chosen": -0.008698849007487297, "rewards/margins": 0.047121562063694, "rewards/rejected": -0.05582040920853615, "step": 1863 }, { "epoch": 1.2890733056708161, "grad_norm": 5.702164173126221, "learning_rate": 4.839403719071769e-05, "log_odds_chosen": 5.4588541984558105, "log_odds_ratio": -0.14614000916481018, "logits/chosen": -0.606258749961853, "logits/rejected": -0.6877145767211914, "logps/chosen": -0.04850241541862488, "logps/rejected": -1.0530086755752563, "loss": 4.0411, "nll_loss": 0.9956707954406738, "rewards/accuracies": 1.0, "rewards/chosen": -0.00485024182125926, "rewards/margins": 0.10045063495635986, "rewards/rejected": -0.10530087351799011, "step": 1864 }, { "epoch": 1.289764868603043, "grad_norm": 7.933743476867676, "learning_rate": 4.8390195174427546e-05, "log_odds_chosen": 3.8146302700042725, "log_odds_ratio": -0.34971341490745544, "logits/chosen": -0.48540371656417847, "logits/rejected": -0.5037742853164673, "logps/chosen": -0.06129493564367294, "logps/rejected": -0.8910010457038879, "loss": 4.3695, "nll_loss": 1.0573959350585938, "rewards/accuracies": 0.75, "rewards/chosen": -0.006129493936896324, "rewards/margins": 0.08297061175107956, "rewards/rejected": -0.08910011500120163, "step": 1865 }, { "epoch": 1.2904564315352698, "grad_norm": 7.737270832061768, "learning_rate": 4.838635315813739e-05, "log_odds_chosen": 3.4090940952301025, "log_odds_ratio": -0.29580122232437134, "logits/chosen": -0.642238438129425, "logits/rejected": -0.6717078685760498, "logps/chosen": -0.10193420946598053, "logps/rejected": -0.6569085717201233, "loss": 3.8894, "nll_loss": 0.9427617192268372, "rewards/accuracies": 0.875, "rewards/chosen": -0.010193421505391598, "rewards/margins": 0.055497437715530396, "rewards/rejected": -0.06569086015224457, "step": 1866 }, { "epoch": 1.2911479944674966, "grad_norm": 8.35338020324707, "learning_rate": 4.8382511141847244e-05, "log_odds_chosen": 5.628767967224121, "log_odds_ratio": -0.34031566977500916, "logits/chosen": -0.37301352620124817, "logits/rejected": -0.4034350514411926, "logps/chosen": -0.08956724405288696, "logps/rejected": -0.8568572998046875, "loss": 2.7278, "nll_loss": 0.647929847240448, "rewards/accuracies": 0.875, "rewards/chosen": -0.008956724777817726, "rewards/margins": 0.07672901451587677, "rewards/rejected": -0.08568572998046875, "step": 1867 }, { "epoch": 1.2918395573997234, "grad_norm": 9.274822235107422, "learning_rate": 4.8378669125557096e-05, "log_odds_chosen": 5.655916213989258, "log_odds_ratio": -0.22805175185203552, "logits/chosen": -0.5578227043151855, "logits/rejected": -0.6247016191482544, "logps/chosen": -0.07201507687568665, "logps/rejected": -0.9687396883964539, "loss": 4.2172, "nll_loss": 1.0314915180206299, "rewards/accuracies": 0.875, "rewards/chosen": -0.0072015076875686646, "rewards/margins": 0.08967246115207672, "rewards/rejected": -0.09687396883964539, "step": 1868 }, { "epoch": 1.2925311203319503, "grad_norm": 4.5447893142700195, "learning_rate": 4.837482710926695e-05, "log_odds_chosen": 7.457864761352539, "log_odds_ratio": -0.08831396698951721, "logits/chosen": -0.15770329535007477, "logits/rejected": -0.22396370768547058, "logps/chosen": -0.024904444813728333, "logps/rejected": -0.9831055402755737, "loss": 2.4908, "nll_loss": 0.6138787865638733, "rewards/accuracies": 0.875, "rewards/chosen": -0.0024904448073357344, "rewards/margins": 0.0958201140165329, "rewards/rejected": -0.09831055998802185, "step": 1869 }, { "epoch": 1.293222683264177, "grad_norm": 9.271495819091797, "learning_rate": 4.8370985092976795e-05, "log_odds_chosen": 3.8442087173461914, "log_odds_ratio": -0.3285159468650818, "logits/chosen": -0.7342332005500793, "logits/rejected": -0.7820785045623779, "logps/chosen": -0.12376535683870316, "logps/rejected": -0.7330570816993713, "loss": 3.2292, "nll_loss": 0.7744507193565369, "rewards/accuracies": 0.75, "rewards/chosen": -0.012376535683870316, "rewards/margins": 0.06092917546629906, "rewards/rejected": -0.07330571115016937, "step": 1870 }, { "epoch": 1.293914246196404, "grad_norm": 6.048008441925049, "learning_rate": 4.836714307668665e-05, "log_odds_chosen": 3.2552452087402344, "log_odds_ratio": -0.06862203031778336, "logits/chosen": -0.375715970993042, "logits/rejected": -0.44336646795272827, "logps/chosen": -0.07369048148393631, "logps/rejected": -0.7449591159820557, "loss": 4.4007, "nll_loss": 1.093301773071289, "rewards/accuracies": 1.0, "rewards/chosen": -0.007369048427790403, "rewards/margins": 0.06712686270475388, "rewards/rejected": -0.07449591159820557, "step": 1871 }, { "epoch": 1.2946058091286308, "grad_norm": 6.476336479187012, "learning_rate": 4.83633010603965e-05, "log_odds_chosen": 3.0051374435424805, "log_odds_ratio": -0.17865484952926636, "logits/chosen": -0.5499475002288818, "logits/rejected": -0.5804236531257629, "logps/chosen": -0.06498004496097565, "logps/rejected": -0.4653778374195099, "loss": 3.7613, "nll_loss": 0.922465443611145, "rewards/accuracies": 1.0, "rewards/chosen": -0.006498004775494337, "rewards/margins": 0.040039777755737305, "rewards/rejected": -0.04653778672218323, "step": 1872 }, { "epoch": 1.2952973720608576, "grad_norm": 8.106934547424316, "learning_rate": 4.8359459044106345e-05, "log_odds_chosen": 3.545710802078247, "log_odds_ratio": -0.22031505405902863, "logits/chosen": -0.7222949266433716, "logits/rejected": -0.7418359518051147, "logps/chosen": -0.060237836092710495, "logps/rejected": -0.6780364513397217, "loss": 4.4143, "nll_loss": 1.0815520286560059, "rewards/accuracies": 0.875, "rewards/chosen": -0.006023783702403307, "rewards/margins": 0.06177985668182373, "rewards/rejected": -0.06780364364385605, "step": 1873 }, { "epoch": 1.2959889349930844, "grad_norm": 3.658154010772705, "learning_rate": 4.8355617027816204e-05, "log_odds_chosen": 4.990939140319824, "log_odds_ratio": -0.10858413577079773, "logits/chosen": -0.43856334686279297, "logits/rejected": -0.5317746996879578, "logps/chosen": -0.028155002743005753, "logps/rejected": -0.6593915224075317, "loss": 2.7739, "nll_loss": 0.6826105117797852, "rewards/accuracies": 1.0, "rewards/chosen": -0.0028155003674328327, "rewards/margins": 0.0631236582994461, "rewards/rejected": -0.06593915820121765, "step": 1874 }, { "epoch": 1.2966804979253113, "grad_norm": 6.410544395446777, "learning_rate": 4.835177501152605e-05, "log_odds_chosen": 4.552112579345703, "log_odds_ratio": -0.42514729499816895, "logits/chosen": -0.6232977509498596, "logits/rejected": -0.6269210577011108, "logps/chosen": -0.10090231895446777, "logps/rejected": -0.7650644183158875, "loss": 2.7052, "nll_loss": 0.6337778568267822, "rewards/accuracies": 0.75, "rewards/chosen": -0.010090231895446777, "rewards/margins": 0.06641621887683868, "rewards/rejected": -0.07650645077228546, "step": 1875 }, { "epoch": 1.297372060857538, "grad_norm": 6.081912517547607, "learning_rate": 4.83479329952359e-05, "log_odds_chosen": 4.995113372802734, "log_odds_ratio": -0.13507652282714844, "logits/chosen": -0.0397970974445343, "logits/rejected": -0.11730852723121643, "logps/chosen": -0.06897362321615219, "logps/rejected": -0.7518665194511414, "loss": 3.0435, "nll_loss": 0.7473784685134888, "rewards/accuracies": 1.0, "rewards/chosen": -0.006897362880408764, "rewards/margins": 0.06828928738832474, "rewards/rejected": -0.07518665492534637, "step": 1876 }, { "epoch": 1.298063623789765, "grad_norm": 8.268874168395996, "learning_rate": 4.8344090978945755e-05, "log_odds_chosen": 3.9267804622650146, "log_odds_ratio": -0.2662210464477539, "logits/chosen": -0.6300618052482605, "logits/rejected": -0.6393392086029053, "logps/chosen": -0.11472286283969879, "logps/rejected": -0.5772973299026489, "loss": 4.4849, "nll_loss": 1.0945922136306763, "rewards/accuracies": 0.875, "rewards/chosen": -0.011472285725176334, "rewards/margins": 0.04625745117664337, "rewards/rejected": -0.05772973597049713, "step": 1877 }, { "epoch": 1.2987551867219918, "grad_norm": 6.93869161605835, "learning_rate": 4.834024896265561e-05, "log_odds_chosen": 3.730520248413086, "log_odds_ratio": -0.2211248278617859, "logits/chosen": -0.16885051131248474, "logits/rejected": -0.20659935474395752, "logps/chosen": -0.09142343699932098, "logps/rejected": -0.5973372459411621, "loss": 4.1071, "nll_loss": 1.0046672821044922, "rewards/accuracies": 1.0, "rewards/chosen": -0.009142343886196613, "rewards/margins": 0.05059138312935829, "rewards/rejected": -0.05973372235894203, "step": 1878 }, { "epoch": 1.2994467496542186, "grad_norm": 4.864905834197998, "learning_rate": 4.833640694636545e-05, "log_odds_chosen": 5.531225204467773, "log_odds_ratio": -0.028478192165493965, "logits/chosen": -0.6834012866020203, "logits/rejected": -0.7350859045982361, "logps/chosen": -0.05556423217058182, "logps/rejected": -1.2433018684387207, "loss": 3.2099, "nll_loss": 0.7996299862861633, "rewards/accuracies": 1.0, "rewards/chosen": -0.005556423682719469, "rewards/margins": 0.11877376586198807, "rewards/rejected": -0.12433018535375595, "step": 1879 }, { "epoch": 1.3001383125864454, "grad_norm": 9.283565521240234, "learning_rate": 4.8332564930075305e-05, "log_odds_chosen": 4.565658092498779, "log_odds_ratio": -0.5067183375358582, "logits/chosen": -0.5782751441001892, "logits/rejected": -0.5901899933815002, "logps/chosen": -0.08600565791130066, "logps/rejected": -0.9412083029747009, "loss": 3.1232, "nll_loss": 0.7301177382469177, "rewards/accuracies": 0.875, "rewards/chosen": -0.008600565604865551, "rewards/margins": 0.08552026748657227, "rewards/rejected": -0.09412083774805069, "step": 1880 }, { "epoch": 1.3008298755186722, "grad_norm": 4.7859086990356445, "learning_rate": 4.832872291378516e-05, "log_odds_chosen": 6.456204414367676, "log_odds_ratio": -0.10142803192138672, "logits/chosen": -0.24098005890846252, "logits/rejected": -0.26826679706573486, "logps/chosen": -0.04199819266796112, "logps/rejected": -0.7708888053894043, "loss": 2.5463, "nll_loss": 0.6264212131500244, "rewards/accuracies": 0.875, "rewards/chosen": -0.004199819173663855, "rewards/margins": 0.0728890597820282, "rewards/rejected": -0.07708887755870819, "step": 1881 }, { "epoch": 1.301521438450899, "grad_norm": 8.697461128234863, "learning_rate": 4.8324880897495004e-05, "log_odds_chosen": 3.373399257659912, "log_odds_ratio": -0.2699851095676422, "logits/chosen": -0.5844525694847107, "logits/rejected": -0.5994455218315125, "logps/chosen": -0.10936583578586578, "logps/rejected": -0.8947189450263977, "loss": 3.9083, "nll_loss": 0.9500669836997986, "rewards/accuracies": 0.875, "rewards/chosen": -0.010936584323644638, "rewards/margins": 0.07853531837463379, "rewards/rejected": -0.08947189897298813, "step": 1882 }, { "epoch": 1.302213001383126, "grad_norm": 12.149765968322754, "learning_rate": 4.832103888120486e-05, "log_odds_chosen": 3.4742417335510254, "log_odds_ratio": -0.2230614721775055, "logits/chosen": -0.5748246908187866, "logits/rejected": -0.6016756296157837, "logps/chosen": -0.18775507807731628, "logps/rejected": -0.7884979248046875, "loss": 2.7704, "nll_loss": 0.6702914237976074, "rewards/accuracies": 0.875, "rewards/chosen": -0.01877550780773163, "rewards/margins": 0.06007428467273712, "rewards/rejected": -0.07884979248046875, "step": 1883 }, { "epoch": 1.3029045643153527, "grad_norm": 7.945075988769531, "learning_rate": 4.831719686491471e-05, "log_odds_chosen": 3.9197511672973633, "log_odds_ratio": -0.43578043580055237, "logits/chosen": -0.7064417600631714, "logits/rejected": -0.7176086902618408, "logps/chosen": -0.20374563336372375, "logps/rejected": -0.7481101751327515, "loss": 3.5857, "nll_loss": 0.8528434634208679, "rewards/accuracies": 0.625, "rewards/chosen": -0.020374562591314316, "rewards/margins": 0.05443645641207695, "rewards/rejected": -0.07481101900339127, "step": 1884 }, { "epoch": 1.3035961272475796, "grad_norm": 6.092682361602783, "learning_rate": 4.831335484862456e-05, "log_odds_chosen": 4.127374649047852, "log_odds_ratio": -0.10231603682041168, "logits/chosen": -0.6021102070808411, "logits/rejected": -0.5776104927062988, "logps/chosen": -0.07319527119398117, "logps/rejected": -0.9779950976371765, "loss": 2.8118, "nll_loss": 0.6927098035812378, "rewards/accuracies": 1.0, "rewards/chosen": -0.0073195272125303745, "rewards/margins": 0.09047998487949371, "rewards/rejected": -0.09779950976371765, "step": 1885 }, { "epoch": 1.3042876901798064, "grad_norm": 5.868535041809082, "learning_rate": 4.830951283233441e-05, "log_odds_chosen": 5.035343170166016, "log_odds_ratio": -0.11897246539592743, "logits/chosen": -0.5982178449630737, "logits/rejected": -0.6445438861846924, "logps/chosen": -0.033809542655944824, "logps/rejected": -0.8231789469718933, "loss": 3.7005, "nll_loss": 0.9132217764854431, "rewards/accuracies": 1.0, "rewards/chosen": -0.0033809540327638388, "rewards/margins": 0.07893693447113037, "rewards/rejected": -0.08231788873672485, "step": 1886 }, { "epoch": 1.3049792531120332, "grad_norm": 7.5153937339782715, "learning_rate": 4.8305670816044266e-05, "log_odds_chosen": 2.3621668815612793, "log_odds_ratio": -0.3876497149467468, "logits/chosen": -0.6375263929367065, "logits/rejected": -0.6277173757553101, "logps/chosen": -0.20085851848125458, "logps/rejected": -0.7376888990402222, "loss": 3.3031, "nll_loss": 0.7870079874992371, "rewards/accuracies": 0.875, "rewards/chosen": -0.020085850730538368, "rewards/margins": 0.05368303507566452, "rewards/rejected": -0.07376889139413834, "step": 1887 }, { "epoch": 1.30567081604426, "grad_norm": 10.764470100402832, "learning_rate": 4.830182879975411e-05, "log_odds_chosen": 5.82547664642334, "log_odds_ratio": -0.808645486831665, "logits/chosen": -0.6361026763916016, "logits/rejected": -0.7336512207984924, "logps/chosen": -0.1351955085992813, "logps/rejected": -0.7599672079086304, "loss": 3.3539, "nll_loss": 0.7576218843460083, "rewards/accuracies": 0.75, "rewards/chosen": -0.013519550673663616, "rewards/margins": 0.062477171421051025, "rewards/rejected": -0.07599671930074692, "step": 1888 }, { "epoch": 1.3063623789764869, "grad_norm": 6.360213279724121, "learning_rate": 4.8297986783463964e-05, "log_odds_chosen": 7.0350661277771, "log_odds_ratio": -0.06198444962501526, "logits/chosen": -0.46543243527412415, "logits/rejected": -0.48943889141082764, "logps/chosen": -0.03179304301738739, "logps/rejected": -1.2069227695465088, "loss": 3.0007, "nll_loss": 0.7439780831336975, "rewards/accuracies": 1.0, "rewards/chosen": -0.003179304301738739, "rewards/margins": 0.11751297861337662, "rewards/rejected": -0.12069229781627655, "step": 1889 }, { "epoch": 1.3070539419087137, "grad_norm": 5.6595916748046875, "learning_rate": 4.8294144767173816e-05, "log_odds_chosen": 6.70374870300293, "log_odds_ratio": -0.014150972478091717, "logits/chosen": -0.33366501331329346, "logits/rejected": -0.31003445386886597, "logps/chosen": -0.018083132803440094, "logps/rejected": -0.8532133102416992, "loss": 3.0836, "nll_loss": 0.7694973349571228, "rewards/accuracies": 1.0, "rewards/chosen": -0.0018083134200423956, "rewards/margins": 0.08351302146911621, "rewards/rejected": -0.0853213369846344, "step": 1890 }, { "epoch": 1.3077455048409405, "grad_norm": 6.861349105834961, "learning_rate": 4.829030275088366e-05, "log_odds_chosen": 4.63370418548584, "log_odds_ratio": -0.1361251324415207, "logits/chosen": -0.48294711112976074, "logits/rejected": -0.5271444916725159, "logps/chosen": -0.057334210723638535, "logps/rejected": -0.8996442556381226, "loss": 3.5532, "nll_loss": 0.8746891021728516, "rewards/accuracies": 1.0, "rewards/chosen": -0.0057334210723638535, "rewards/margins": 0.08423101156949997, "rewards/rejected": -0.08996443450450897, "step": 1891 }, { "epoch": 1.3084370677731674, "grad_norm": 7.4157233238220215, "learning_rate": 4.828646073459352e-05, "log_odds_chosen": 4.082646369934082, "log_odds_ratio": -0.3548939824104309, "logits/chosen": -0.597272515296936, "logits/rejected": -0.6197874546051025, "logps/chosen": -0.07361356914043427, "logps/rejected": -0.907717764377594, "loss": 3.7981, "nll_loss": 0.9140282273292542, "rewards/accuracies": 0.875, "rewards/chosen": -0.007361356168985367, "rewards/margins": 0.08341042697429657, "rewards/rejected": -0.09077177941799164, "step": 1892 }, { "epoch": 1.3091286307053942, "grad_norm": 8.492520332336426, "learning_rate": 4.828261871830337e-05, "log_odds_chosen": 2.9942915439605713, "log_odds_ratio": -0.36342692375183105, "logits/chosen": -0.4822395145893097, "logits/rejected": -0.49275386333465576, "logps/chosen": -0.08205610513687134, "logps/rejected": -0.566351056098938, "loss": 4.007, "nll_loss": 0.9654159545898438, "rewards/accuracies": 0.875, "rewards/chosen": -0.008205609396100044, "rewards/margins": 0.048429492861032486, "rewards/rejected": -0.05663510411977768, "step": 1893 }, { "epoch": 1.309820193637621, "grad_norm": 9.320199012756348, "learning_rate": 4.827877670201322e-05, "log_odds_chosen": 3.0118792057037354, "log_odds_ratio": -0.7188223600387573, "logits/chosen": -0.4053908586502075, "logits/rejected": -0.39137327671051025, "logps/chosen": -0.13147065043449402, "logps/rejected": -0.8272587656974792, "loss": 4.1265, "nll_loss": 0.9597398042678833, "rewards/accuracies": 0.75, "rewards/chosen": -0.013147065415978432, "rewards/margins": 0.06957881152629852, "rewards/rejected": -0.0827258750796318, "step": 1894 }, { "epoch": 1.3105117565698479, "grad_norm": 5.111316204071045, "learning_rate": 4.827493468572307e-05, "log_odds_chosen": 3.6450846195220947, "log_odds_ratio": -0.08115525543689728, "logits/chosen": -0.47379356622695923, "logits/rejected": -0.43740054965019226, "logps/chosen": -0.07818682491779327, "logps/rejected": -0.911526083946228, "loss": 3.0668, "nll_loss": 0.7585898041725159, "rewards/accuracies": 1.0, "rewards/chosen": -0.007818683050572872, "rewards/margins": 0.08333393186330795, "rewards/rejected": -0.0911526158452034, "step": 1895 }, { "epoch": 1.3112033195020747, "grad_norm": 5.797129154205322, "learning_rate": 4.8271092669432924e-05, "log_odds_chosen": 4.451534271240234, "log_odds_ratio": -0.13431881368160248, "logits/chosen": -0.5808414816856384, "logits/rejected": -0.5981189608573914, "logps/chosen": -0.11908942461013794, "logps/rejected": -1.117770791053772, "loss": 3.3404, "nll_loss": 0.8216636776924133, "rewards/accuracies": 0.875, "rewards/chosen": -0.011908942833542824, "rewards/margins": 0.09986813366413116, "rewards/rejected": -0.11177708208560944, "step": 1896 }, { "epoch": 1.3118948824343015, "grad_norm": 6.0447001457214355, "learning_rate": 4.826725065314277e-05, "log_odds_chosen": 5.6041412353515625, "log_odds_ratio": -0.05469472333788872, "logits/chosen": -0.6320992112159729, "logits/rejected": -0.7025479078292847, "logps/chosen": -0.03088865801692009, "logps/rejected": -1.0616106986999512, "loss": 3.0946, "nll_loss": 0.7681707143783569, "rewards/accuracies": 1.0, "rewards/chosen": -0.0030888658948242664, "rewards/margins": 0.10307221114635468, "rewards/rejected": -0.10616108030080795, "step": 1897 }, { "epoch": 1.3125864453665284, "grad_norm": 7.3333916664123535, "learning_rate": 4.826340863685262e-05, "log_odds_chosen": 3.174428939819336, "log_odds_ratio": -0.1787259876728058, "logits/chosen": -0.46415650844573975, "logits/rejected": -0.4987673759460449, "logps/chosen": -0.05549796670675278, "logps/rejected": -0.41620373725891113, "loss": 4.3112, "nll_loss": 1.0599231719970703, "rewards/accuracies": 1.0, "rewards/chosen": -0.005549796391278505, "rewards/margins": 0.036070577800273895, "rewards/rejected": -0.04162037372589111, "step": 1898 }, { "epoch": 1.3132780082987552, "grad_norm": 6.713944435119629, "learning_rate": 4.8259566620562475e-05, "log_odds_chosen": 5.584816932678223, "log_odds_ratio": -0.3194558322429657, "logits/chosen": -0.17112912237644196, "logits/rejected": -0.19902299344539642, "logps/chosen": -0.07132703810930252, "logps/rejected": -0.9684167504310608, "loss": 2.6484, "nll_loss": 0.6301434636116028, "rewards/accuracies": 0.875, "rewards/chosen": -0.007132704369723797, "rewards/margins": 0.08970896899700165, "rewards/rejected": -0.09684167802333832, "step": 1899 }, { "epoch": 1.313969571230982, "grad_norm": 4.772759437561035, "learning_rate": 4.825572460427232e-05, "log_odds_chosen": 4.3274641036987305, "log_odds_ratio": -0.17329102754592896, "logits/chosen": -0.5920778512954712, "logits/rejected": -0.5221872329711914, "logps/chosen": -0.06731683015823364, "logps/rejected": -0.6521579623222351, "loss": 2.7345, "nll_loss": 0.6662896871566772, "rewards/accuracies": 0.875, "rewards/chosen": -0.006731683388352394, "rewards/margins": 0.058484114706516266, "rewards/rejected": -0.06521579623222351, "step": 1900 }, { "epoch": 1.3146611341632088, "grad_norm": 5.9790472984313965, "learning_rate": 4.825188258798218e-05, "log_odds_chosen": 6.322959899902344, "log_odds_ratio": -0.09761762619018555, "logits/chosen": -0.34470367431640625, "logits/rejected": -0.3735116720199585, "logps/chosen": -0.040303681045770645, "logps/rejected": -1.1409962177276611, "loss": 3.2684, "nll_loss": 0.8073413372039795, "rewards/accuracies": 1.0, "rewards/chosen": -0.0040303682908415794, "rewards/margins": 0.11006926000118256, "rewards/rejected": -0.11409962922334671, "step": 1901 }, { "epoch": 1.3153526970954357, "grad_norm": 8.579450607299805, "learning_rate": 4.8248040571692025e-05, "log_odds_chosen": 2.434113025665283, "log_odds_ratio": -0.39737313985824585, "logits/chosen": -0.7401032447814941, "logits/rejected": -0.7689638137817383, "logps/chosen": -0.06367967277765274, "logps/rejected": -0.37234988808631897, "loss": 4.2298, "nll_loss": 1.0177040100097656, "rewards/accuracies": 0.875, "rewards/chosen": -0.006367966998368502, "rewards/margins": 0.030867021530866623, "rewards/rejected": -0.037234991788864136, "step": 1902 }, { "epoch": 1.3160442600276625, "grad_norm": 8.755030632019043, "learning_rate": 4.824419855540188e-05, "log_odds_chosen": 2.1251778602600098, "log_odds_ratio": -0.5365231037139893, "logits/chosen": -0.5605593919754028, "logits/rejected": -0.5998620986938477, "logps/chosen": -0.18675807118415833, "logps/rejected": -0.45498570799827576, "loss": 4.2634, "nll_loss": 1.0122004747390747, "rewards/accuracies": 0.625, "rewards/chosen": -0.018675807863473892, "rewards/margins": 0.026822764426469803, "rewards/rejected": -0.045498572289943695, "step": 1903 }, { "epoch": 1.3167358229598893, "grad_norm": 5.525919437408447, "learning_rate": 4.824035653911173e-05, "log_odds_chosen": 3.190709352493286, "log_odds_ratio": -0.4506668448448181, "logits/chosen": -0.524328351020813, "logits/rejected": -0.5008019804954529, "logps/chosen": -0.12892203032970428, "logps/rejected": -0.6358974575996399, "loss": 2.624, "nll_loss": 0.6109344959259033, "rewards/accuracies": 0.75, "rewards/chosen": -0.012892204336822033, "rewards/margins": 0.05069754272699356, "rewards/rejected": -0.06358975172042847, "step": 1904 }, { "epoch": 1.3174273858921162, "grad_norm": 3.6526026725769043, "learning_rate": 4.823651452282158e-05, "log_odds_chosen": 3.5043106079101562, "log_odds_ratio": -0.11030441522598267, "logits/chosen": -0.2913362383842468, "logits/rejected": -0.3034833073616028, "logps/chosen": -0.05897592753171921, "logps/rejected": -0.6736852526664734, "loss": 2.6101, "nll_loss": 0.6414985656738281, "rewards/accuracies": 1.0, "rewards/chosen": -0.0058975922875106335, "rewards/margins": 0.06147093325853348, "rewards/rejected": -0.0673685297369957, "step": 1905 }, { "epoch": 1.318118948824343, "grad_norm": 4.910558700561523, "learning_rate": 4.823267250653143e-05, "log_odds_chosen": 3.7399656772613525, "log_odds_ratio": -0.19141636788845062, "logits/chosen": -0.7501358985900879, "logits/rejected": -0.774748682975769, "logps/chosen": -0.06764288246631622, "logps/rejected": -0.5902732610702515, "loss": 2.9807, "nll_loss": 0.7260439395904541, "rewards/accuracies": 0.875, "rewards/chosen": -0.006764288060367107, "rewards/margins": 0.0522630400955677, "rewards/rejected": -0.059027329087257385, "step": 1906 }, { "epoch": 1.3188105117565698, "grad_norm": 6.183116912841797, "learning_rate": 4.822883049024128e-05, "log_odds_chosen": 1.4731816053390503, "log_odds_ratio": -0.42161351442337036, "logits/chosen": -0.7233215570449829, "logits/rejected": -0.6984033584594727, "logps/chosen": -0.09123566746711731, "logps/rejected": -0.382061243057251, "loss": 4.6328, "nll_loss": 1.1160461902618408, "rewards/accuracies": 0.75, "rewards/chosen": -0.009123566560447216, "rewards/margins": 0.029082562774419785, "rewards/rejected": -0.038206130266189575, "step": 1907 }, { "epoch": 1.3195020746887967, "grad_norm": 8.602644920349121, "learning_rate": 4.822498847395113e-05, "log_odds_chosen": 5.085760593414307, "log_odds_ratio": -0.21069833636283875, "logits/chosen": -0.24763265252113342, "logits/rejected": -0.3364519476890564, "logps/chosen": -0.15351608395576477, "logps/rejected": -1.0235706567764282, "loss": 3.9976, "nll_loss": 0.97832852602005, "rewards/accuracies": 0.875, "rewards/chosen": -0.015351608395576477, "rewards/margins": 0.08700545877218246, "rewards/rejected": -0.10235706716775894, "step": 1908 }, { "epoch": 1.3201936376210235, "grad_norm": 9.28389835357666, "learning_rate": 4.822114645766098e-05, "log_odds_chosen": 4.172635078430176, "log_odds_ratio": -0.24250559508800507, "logits/chosen": -0.46899259090423584, "logits/rejected": -0.5167728066444397, "logps/chosen": -0.0976497232913971, "logps/rejected": -0.5878530740737915, "loss": 5.0903, "nll_loss": 1.2483203411102295, "rewards/accuracies": 0.875, "rewards/chosen": -0.00976497307419777, "rewards/margins": 0.04902033507823944, "rewards/rejected": -0.05878530442714691, "step": 1909 }, { "epoch": 1.3208852005532503, "grad_norm": 6.55712366104126, "learning_rate": 4.821730444137084e-05, "log_odds_chosen": 6.226669788360596, "log_odds_ratio": -0.12685224413871765, "logits/chosen": -0.3187330961227417, "logits/rejected": -0.39868754148483276, "logps/chosen": -0.050216492265462875, "logps/rejected": -1.1293668746948242, "loss": 3.7417, "nll_loss": 0.9227307438850403, "rewards/accuracies": 1.0, "rewards/chosen": -0.0050216494128108025, "rewards/margins": 0.10791504383087158, "rewards/rejected": -0.11293669044971466, "step": 1910 }, { "epoch": 1.3215767634854771, "grad_norm": 5.095497131347656, "learning_rate": 4.8213462425080684e-05, "log_odds_chosen": 3.7288761138916016, "log_odds_ratio": -0.18773989379405975, "logits/chosen": -0.6164065003395081, "logits/rejected": -0.6328135132789612, "logps/chosen": -0.10513586550951004, "logps/rejected": -0.9961248636245728, "loss": 2.8386, "nll_loss": 0.690880298614502, "rewards/accuracies": 1.0, "rewards/chosen": -0.010513586923480034, "rewards/margins": 0.08909890055656433, "rewards/rejected": -0.09961248934268951, "step": 1911 }, { "epoch": 1.322268326417704, "grad_norm": 10.32607650756836, "learning_rate": 4.8209620408790536e-05, "log_odds_chosen": 2.625572919845581, "log_odds_ratio": -0.4675644636154175, "logits/chosen": -0.5549513101577759, "logits/rejected": -0.5509620904922485, "logps/chosen": -0.10303942859172821, "logps/rejected": -0.5751489400863647, "loss": 3.7475, "nll_loss": 0.8901306986808777, "rewards/accuracies": 0.625, "rewards/chosen": -0.010303942486643791, "rewards/margins": 0.04721095412969589, "rewards/rejected": -0.057514894753694534, "step": 1912 }, { "epoch": 1.3229598893499308, "grad_norm": 7.665313243865967, "learning_rate": 4.820577839250039e-05, "log_odds_chosen": 2.6405282020568848, "log_odds_ratio": -0.33495649695396423, "logits/chosen": -0.31856656074523926, "logits/rejected": -0.37194597721099854, "logps/chosen": -0.0918373167514801, "logps/rejected": -0.6856303215026855, "loss": 3.5346, "nll_loss": 0.8501495122909546, "rewards/accuracies": 0.75, "rewards/chosen": -0.009183731861412525, "rewards/margins": 0.059379301965236664, "rewards/rejected": -0.06856303662061691, "step": 1913 }, { "epoch": 1.3236514522821576, "grad_norm": 4.601027965545654, "learning_rate": 4.820193637621024e-05, "log_odds_chosen": 4.731778144836426, "log_odds_ratio": -0.31632643938064575, "logits/chosen": -0.053993016481399536, "logits/rejected": -0.0613970011472702, "logps/chosen": -0.12295106053352356, "logps/rejected": -0.9370225667953491, "loss": 3.0581, "nll_loss": 0.7328994274139404, "rewards/accuracies": 0.875, "rewards/chosen": -0.012295106425881386, "rewards/margins": 0.08140715211629868, "rewards/rejected": -0.0937022715806961, "step": 1914 }, { "epoch": 1.3243430152143845, "grad_norm": 6.187616348266602, "learning_rate": 4.819809435992009e-05, "log_odds_chosen": 5.130054473876953, "log_odds_ratio": -0.1377926617860794, "logits/chosen": -0.49733227491378784, "logits/rejected": -0.4652095139026642, "logps/chosen": -0.031285833567380905, "logps/rejected": -0.6109557747840881, "loss": 2.7409, "nll_loss": 0.6714452505111694, "rewards/accuracies": 1.0, "rewards/chosen": -0.003128583310171962, "rewards/margins": 0.05796699598431587, "rewards/rejected": -0.06109558045864105, "step": 1915 }, { "epoch": 1.3250345781466113, "grad_norm": 7.493089199066162, "learning_rate": 4.819425234362994e-05, "log_odds_chosen": 2.8747878074645996, "log_odds_ratio": -0.2400168925523758, "logits/chosen": -0.2644622325897217, "logits/rejected": -0.3293954133987427, "logps/chosen": -0.1101066917181015, "logps/rejected": -0.7629117369651794, "loss": 3.3065, "nll_loss": 0.802635669708252, "rewards/accuracies": 0.875, "rewards/chosen": -0.01101066917181015, "rewards/margins": 0.0652805045247078, "rewards/rejected": -0.07629118114709854, "step": 1916 }, { "epoch": 1.3257261410788381, "grad_norm": 6.159051418304443, "learning_rate": 4.819041032733979e-05, "log_odds_chosen": 4.431463241577148, "log_odds_ratio": -0.15916067361831665, "logits/chosen": -0.364975243806839, "logits/rejected": -0.3880729079246521, "logps/chosen": -0.13159070909023285, "logps/rejected": -0.9299683570861816, "loss": 3.9075, "nll_loss": 0.960968017578125, "rewards/accuracies": 1.0, "rewards/chosen": -0.013159072026610374, "rewards/margins": 0.07983776926994324, "rewards/rejected": -0.09299683570861816, "step": 1917 }, { "epoch": 1.326417704011065, "grad_norm": 5.809264659881592, "learning_rate": 4.818656831104964e-05, "log_odds_chosen": 2.516145706176758, "log_odds_ratio": -0.42379873991012573, "logits/chosen": -0.6518577337265015, "logits/rejected": -0.6944831013679504, "logps/chosen": -0.0951496809720993, "logps/rejected": -0.3088337182998657, "loss": 4.5632, "nll_loss": 1.0984179973602295, "rewards/accuracies": 0.625, "rewards/chosen": -0.009514967910945415, "rewards/margins": 0.021368402987718582, "rewards/rejected": -0.030883371829986572, "step": 1918 }, { "epoch": 1.3271092669432918, "grad_norm": 4.586781024932861, "learning_rate": 4.8182726294759497e-05, "log_odds_chosen": 2.9109044075012207, "log_odds_ratio": -0.16025152802467346, "logits/chosen": -0.4489715099334717, "logits/rejected": -0.42398160696029663, "logps/chosen": -0.05299271643161774, "logps/rejected": -0.5875262022018433, "loss": 2.9342, "nll_loss": 0.7175151109695435, "rewards/accuracies": 1.0, "rewards/chosen": -0.005299271084368229, "rewards/margins": 0.053453344851732254, "rewards/rejected": -0.05875261873006821, "step": 1919 }, { "epoch": 1.3278008298755186, "grad_norm": 3.521937608718872, "learning_rate": 4.817888427846934e-05, "log_odds_chosen": 5.2917985916137695, "log_odds_ratio": -0.03396681323647499, "logits/chosen": -0.4610143303871155, "logits/rejected": -0.47300106287002563, "logps/chosen": -0.018367256969213486, "logps/rejected": -0.6851637363433838, "loss": 2.4834, "nll_loss": 0.6174432039260864, "rewards/accuracies": 1.0, "rewards/chosen": -0.0018367258599027991, "rewards/margins": 0.06667964160442352, "rewards/rejected": -0.06851637363433838, "step": 1920 }, { "epoch": 1.3284923928077454, "grad_norm": 6.283596038818359, "learning_rate": 4.8175042262179195e-05, "log_odds_chosen": 3.88523530960083, "log_odds_ratio": -0.1463552713394165, "logits/chosen": -0.4549104571342468, "logits/rejected": -0.4896395206451416, "logps/chosen": -0.07398514449596405, "logps/rejected": -1.0360349416732788, "loss": 4.0476, "nll_loss": 0.9972621202468872, "rewards/accuracies": 1.0, "rewards/chosen": -0.007398514077067375, "rewards/margins": 0.0962049812078476, "rewards/rejected": -0.10360349714756012, "step": 1921 }, { "epoch": 1.3291839557399723, "grad_norm": 13.937512397766113, "learning_rate": 4.817120024588904e-05, "log_odds_chosen": 3.372614860534668, "log_odds_ratio": -0.6902897357940674, "logits/chosen": -0.3406500816345215, "logits/rejected": -0.35912925004959106, "logps/chosen": -0.14384174346923828, "logps/rejected": -0.9012109041213989, "loss": 3.3868, "nll_loss": 0.777681291103363, "rewards/accuracies": 0.875, "rewards/chosen": -0.014384175650775433, "rewards/margins": 0.07573691010475159, "rewards/rejected": -0.0901210829615593, "step": 1922 }, { "epoch": 1.329875518672199, "grad_norm": 3.9251039028167725, "learning_rate": 4.81673582295989e-05, "log_odds_chosen": 3.5236854553222656, "log_odds_ratio": -0.08903735131025314, "logits/chosen": -0.3721243739128113, "logits/rejected": -0.39660120010375977, "logps/chosen": -0.05323631688952446, "logps/rejected": -0.5726852416992188, "loss": 3.2612, "nll_loss": 0.8063850402832031, "rewards/accuracies": 1.0, "rewards/chosen": -0.005323631688952446, "rewards/margins": 0.05194488912820816, "rewards/rejected": -0.057268522679805756, "step": 1923 }, { "epoch": 1.330567081604426, "grad_norm": 6.169641494750977, "learning_rate": 4.8163516213308745e-05, "log_odds_chosen": 4.691677093505859, "log_odds_ratio": -0.2546631693840027, "logits/chosen": -0.6151852011680603, "logits/rejected": -0.6133842468261719, "logps/chosen": -0.08545532077550888, "logps/rejected": -1.0763016939163208, "loss": 3.4508, "nll_loss": 0.8372362852096558, "rewards/accuracies": 0.875, "rewards/chosen": -0.008545532822608948, "rewards/margins": 0.09908463805913925, "rewards/rejected": -0.1076301708817482, "step": 1924 }, { "epoch": 1.3312586445366528, "grad_norm": 6.655059337615967, "learning_rate": 4.81596741970186e-05, "log_odds_chosen": 4.733453750610352, "log_odds_ratio": -0.13630807399749756, "logits/chosen": -0.7335551977157593, "logits/rejected": -0.7183327674865723, "logps/chosen": -0.06154067814350128, "logps/rejected": -0.9583780765533447, "loss": 3.6682, "nll_loss": 0.9034278392791748, "rewards/accuracies": 1.0, "rewards/chosen": -0.006154067814350128, "rewards/margins": 0.08968374133110046, "rewards/rejected": -0.09583780914545059, "step": 1925 }, { "epoch": 1.3319502074688796, "grad_norm": 6.028042316436768, "learning_rate": 4.815583218072845e-05, "log_odds_chosen": 1.7379109859466553, "log_odds_ratio": -0.703913152217865, "logits/chosen": -0.7907073497772217, "logits/rejected": -0.7652462720870972, "logps/chosen": -0.27896490693092346, "logps/rejected": -0.42122477293014526, "loss": 4.0062, "nll_loss": 0.9311593770980835, "rewards/accuracies": 0.625, "rewards/chosen": -0.027896491810679436, "rewards/margins": 0.014225986786186695, "rewards/rejected": -0.042122479528188705, "step": 1926 }, { "epoch": 1.3326417704011064, "grad_norm": 5.958827972412109, "learning_rate": 4.8151990164438296e-05, "log_odds_chosen": 4.64818000793457, "log_odds_ratio": -0.080184206366539, "logits/chosen": -0.43817225098609924, "logits/rejected": -0.5326857566833496, "logps/chosen": -0.03991539031267166, "logps/rejected": -0.8206549882888794, "loss": 3.7101, "nll_loss": 0.9195180535316467, "rewards/accuracies": 1.0, "rewards/chosen": -0.003991539124399424, "rewards/margins": 0.07807396352291107, "rewards/rejected": -0.08206550031900406, "step": 1927 }, { "epoch": 1.3333333333333333, "grad_norm": 7.063464641571045, "learning_rate": 4.814814814814815e-05, "log_odds_chosen": 4.256350517272949, "log_odds_ratio": -0.03917912021279335, "logits/chosen": -0.6081252098083496, "logits/rejected": -0.6156570911407471, "logps/chosen": -0.05544862151145935, "logps/rejected": -1.2835543155670166, "loss": 4.0754, "nll_loss": 1.0149255990982056, "rewards/accuracies": 1.0, "rewards/chosen": -0.0055448622442781925, "rewards/margins": 0.12281057238578796, "rewards/rejected": -0.12835542857646942, "step": 1928 }, { "epoch": 1.33402489626556, "grad_norm": 5.007072448730469, "learning_rate": 4.8144306131858e-05, "log_odds_chosen": 4.11650276184082, "log_odds_ratio": -0.08610834181308746, "logits/chosen": -0.6029285788536072, "logits/rejected": -0.7104057669639587, "logps/chosen": -0.07007205486297607, "logps/rejected": -0.8489803671836853, "loss": 3.7165, "nll_loss": 0.9205197095870972, "rewards/accuracies": 1.0, "rewards/chosen": -0.007007205858826637, "rewards/margins": 0.07789083570241928, "rewards/rejected": -0.08489803969860077, "step": 1929 }, { "epoch": 1.334716459197787, "grad_norm": 5.1880998611450195, "learning_rate": 4.814046411556785e-05, "log_odds_chosen": 5.354378700256348, "log_odds_ratio": -0.20343045890331268, "logits/chosen": -0.621367871761322, "logits/rejected": -0.6300415992736816, "logps/chosen": -0.05588001012802124, "logps/rejected": -0.9308750629425049, "loss": 3.9913, "nll_loss": 0.9774820804595947, "rewards/accuracies": 0.75, "rewards/chosen": -0.005588000640273094, "rewards/margins": 0.08749950677156448, "rewards/rejected": -0.09308750927448273, "step": 1930 }, { "epoch": 1.3354080221300137, "grad_norm": 5.457958221435547, "learning_rate": 4.81366220992777e-05, "log_odds_chosen": 4.170896530151367, "log_odds_ratio": -0.4549473524093628, "logits/chosen": -0.7111985683441162, "logits/rejected": -0.7823254466056824, "logps/chosen": -0.1055913120508194, "logps/rejected": -0.8930991888046265, "loss": 3.1366, "nll_loss": 0.7386659383773804, "rewards/accuracies": 0.75, "rewards/chosen": -0.010559131391346455, "rewards/margins": 0.07875078916549683, "rewards/rejected": -0.08930991590023041, "step": 1931 }, { "epoch": 1.3360995850622408, "grad_norm": 6.582731246948242, "learning_rate": 4.813278008298756e-05, "log_odds_chosen": 1.9933853149414062, "log_odds_ratio": -0.3615785241127014, "logits/chosen": -0.2690390348434448, "logits/rejected": -0.3522653579711914, "logps/chosen": -0.08049627393484116, "logps/rejected": -0.4732947051525116, "loss": 3.7376, "nll_loss": 0.8982344269752502, "rewards/accuracies": 0.75, "rewards/chosen": -0.008049627766013145, "rewards/margins": 0.039279840886592865, "rewards/rejected": -0.04732947051525116, "step": 1932 }, { "epoch": 1.3367911479944676, "grad_norm": 7.321381568908691, "learning_rate": 4.8128938066697404e-05, "log_odds_chosen": 3.5911612510681152, "log_odds_ratio": -0.18059605360031128, "logits/chosen": -0.468522310256958, "logits/rejected": -0.5160965919494629, "logps/chosen": -0.07987986505031586, "logps/rejected": -0.7631416320800781, "loss": 4.4929, "nll_loss": 1.1051725149154663, "rewards/accuracies": 0.875, "rewards/chosen": -0.00798798631876707, "rewards/margins": 0.0683261826634407, "rewards/rejected": -0.07631416618824005, "step": 1933 }, { "epoch": 1.3374827109266945, "grad_norm": 7.451639652252197, "learning_rate": 4.8125096050407256e-05, "log_odds_chosen": 3.3757247924804688, "log_odds_ratio": -0.5779038667678833, "logits/chosen": -0.6634786128997803, "logits/rejected": -0.7254400849342346, "logps/chosen": -0.1831435114145279, "logps/rejected": -0.7591239213943481, "loss": 3.3445, "nll_loss": 0.778323769569397, "rewards/accuracies": 0.625, "rewards/chosen": -0.01831435039639473, "rewards/margins": 0.0575980469584465, "rewards/rejected": -0.07591239362955093, "step": 1934 }, { "epoch": 1.3381742738589213, "grad_norm": 6.1455230712890625, "learning_rate": 4.812125403411711e-05, "log_odds_chosen": 3.1927881240844727, "log_odds_ratio": -0.21947617828845978, "logits/chosen": -0.6109656095504761, "logits/rejected": -0.5959261059761047, "logps/chosen": -0.08558580279350281, "logps/rejected": -0.5466667413711548, "loss": 3.9627, "nll_loss": 0.9687193036079407, "rewards/accuracies": 1.0, "rewards/chosen": -0.008558579720556736, "rewards/margins": 0.04610808938741684, "rewards/rejected": -0.054666668176651, "step": 1935 }, { "epoch": 1.3388658367911481, "grad_norm": 5.804905891418457, "learning_rate": 4.8117412017826954e-05, "log_odds_chosen": 6.877157211303711, "log_odds_ratio": -0.00704343942925334, "logits/chosen": -0.6004572510719299, "logits/rejected": -0.6538717150688171, "logps/chosen": -0.004800926893949509, "logps/rejected": -1.1645859479904175, "loss": 3.132, "nll_loss": 0.7823060750961304, "rewards/accuracies": 1.0, "rewards/chosen": -0.0004800926835741848, "rewards/margins": 0.11597850918769836, "rewards/rejected": -0.11645859479904175, "step": 1936 }, { "epoch": 1.339557399723375, "grad_norm": 5.952755451202393, "learning_rate": 4.811357000153681e-05, "log_odds_chosen": 2.8181982040405273, "log_odds_ratio": -0.20215541124343872, "logits/chosen": -0.39430737495422363, "logits/rejected": -0.427776575088501, "logps/chosen": -0.04715566337108612, "logps/rejected": -0.5383660793304443, "loss": 4.7374, "nll_loss": 1.1641255617141724, "rewards/accuracies": 0.875, "rewards/chosen": -0.004715566523373127, "rewards/margins": 0.04912103712558746, "rewards/rejected": -0.053836606442928314, "step": 1937 }, { "epoch": 1.3402489626556018, "grad_norm": 6.488382339477539, "learning_rate": 4.810972798524666e-05, "log_odds_chosen": 3.856659412384033, "log_odds_ratio": -0.333176851272583, "logits/chosen": -0.7704707384109497, "logits/rejected": -0.869666576385498, "logps/chosen": -0.15772998332977295, "logps/rejected": -0.9237390756607056, "loss": 3.4646, "nll_loss": 0.8328379392623901, "rewards/accuracies": 0.875, "rewards/chosen": -0.015772996470332146, "rewards/margins": 0.07660090923309326, "rewards/rejected": -0.09237390756607056, "step": 1938 }, { "epoch": 1.3409405255878286, "grad_norm": 8.147760391235352, "learning_rate": 4.810588596895651e-05, "log_odds_chosen": 2.7923974990844727, "log_odds_ratio": -0.9370527267456055, "logits/chosen": -0.9870648980140686, "logits/rejected": -1.0097748041152954, "logps/chosen": -0.16981056332588196, "logps/rejected": -0.6250455379486084, "loss": 4.2305, "nll_loss": 0.9639307856559753, "rewards/accuracies": 0.625, "rewards/chosen": -0.016981055960059166, "rewards/margins": 0.045523501932621, "rewards/rejected": -0.06250455975532532, "step": 1939 }, { "epoch": 1.3416320885200554, "grad_norm": 6.496330738067627, "learning_rate": 4.810204395266636e-05, "log_odds_chosen": 2.225219249725342, "log_odds_ratio": -0.4056919515132904, "logits/chosen": -0.6805392503738403, "logits/rejected": -0.6900457143783569, "logps/chosen": -0.08912613242864609, "logps/rejected": -0.5723999738693237, "loss": 3.8992, "nll_loss": 0.9342321157455444, "rewards/accuracies": 0.875, "rewards/chosen": -0.008912613615393639, "rewards/margins": 0.04832738637924194, "rewards/rejected": -0.05724000185728073, "step": 1940 }, { "epoch": 1.3423236514522823, "grad_norm": 4.509915828704834, "learning_rate": 4.8098201936376216e-05, "log_odds_chosen": 4.331239700317383, "log_odds_ratio": -0.370481938123703, "logits/chosen": -0.3268811106681824, "logits/rejected": -0.35085660219192505, "logps/chosen": -0.12016618996858597, "logps/rejected": -0.8190144896507263, "loss": 2.3706, "nll_loss": 0.5555914044380188, "rewards/accuracies": 0.75, "rewards/chosen": -0.012016619555652142, "rewards/margins": 0.06988482177257538, "rewards/rejected": -0.0819014459848404, "step": 1941 }, { "epoch": 1.343015214384509, "grad_norm": 7.133805274963379, "learning_rate": 4.809435992008606e-05, "log_odds_chosen": 4.085857391357422, "log_odds_ratio": -0.2963946461677551, "logits/chosen": -0.45591726899147034, "logits/rejected": -0.49084967374801636, "logps/chosen": -0.10536396503448486, "logps/rejected": -1.0363645553588867, "loss": 3.7455, "nll_loss": 0.9067329168319702, "rewards/accuracies": 0.75, "rewards/chosen": -0.010536396875977516, "rewards/margins": 0.09310007095336914, "rewards/rejected": -0.10363646596670151, "step": 1942 }, { "epoch": 1.343706777316736, "grad_norm": 7.4849443435668945, "learning_rate": 4.8090517903795915e-05, "log_odds_chosen": 3.4074530601501465, "log_odds_ratio": -0.46883493661880493, "logits/chosen": -0.6866205334663391, "logits/rejected": -0.7802227139472961, "logps/chosen": -0.08575089275836945, "logps/rejected": -0.746304988861084, "loss": 3.0132, "nll_loss": 0.7064082622528076, "rewards/accuracies": 0.75, "rewards/chosen": -0.008575089275836945, "rewards/margins": 0.06605540961027145, "rewards/rejected": -0.0746304988861084, "step": 1943 }, { "epoch": 1.3443983402489628, "grad_norm": 9.744033813476562, "learning_rate": 4.808667588750577e-05, "log_odds_chosen": 5.1300787925720215, "log_odds_ratio": -0.28685262799263, "logits/chosen": -0.8690510392189026, "logits/rejected": -0.9068571329116821, "logps/chosen": -0.126407653093338, "logps/rejected": -0.8747024536132812, "loss": 5.1364, "nll_loss": 1.2554024457931519, "rewards/accuracies": 0.875, "rewards/chosen": -0.012640764936804771, "rewards/margins": 0.07482947409152985, "rewards/rejected": -0.08747023344039917, "step": 1944 }, { "epoch": 1.3450899031811896, "grad_norm": 5.100958824157715, "learning_rate": 4.808283387121561e-05, "log_odds_chosen": 5.217370986938477, "log_odds_ratio": -0.24462248384952545, "logits/chosen": -0.7892632484436035, "logits/rejected": -0.849675714969635, "logps/chosen": -0.09817850589752197, "logps/rejected": -0.787107527256012, "loss": 2.6284, "nll_loss": 0.6326299905776978, "rewards/accuracies": 0.875, "rewards/chosen": -0.009817851707339287, "rewards/margins": 0.06889290362596512, "rewards/rejected": -0.07871074974536896, "step": 1945 }, { "epoch": 1.3457814661134164, "grad_norm": 6.703667163848877, "learning_rate": 4.8078991854925465e-05, "log_odds_chosen": 6.110369682312012, "log_odds_ratio": -0.03723360225558281, "logits/chosen": -0.49276337027549744, "logits/rejected": -0.5956921577453613, "logps/chosen": -0.039471082389354706, "logps/rejected": -1.272836685180664, "loss": 3.4174, "nll_loss": 0.850635290145874, "rewards/accuracies": 1.0, "rewards/chosen": -0.003947108052670956, "rewards/margins": 0.12333656847476959, "rewards/rejected": -0.12728366255760193, "step": 1946 }, { "epoch": 1.3464730290456433, "grad_norm": 5.189427852630615, "learning_rate": 4.807514983863532e-05, "log_odds_chosen": 3.109792947769165, "log_odds_ratio": -0.244966059923172, "logits/chosen": -0.540881335735321, "logits/rejected": -0.5172264575958252, "logps/chosen": -0.05473095923662186, "logps/rejected": -0.4827151298522949, "loss": 2.852, "nll_loss": 0.6885116696357727, "rewards/accuracies": 0.875, "rewards/chosen": -0.005473096389323473, "rewards/margins": 0.04279841482639313, "rewards/rejected": -0.04827151447534561, "step": 1947 }, { "epoch": 1.34716459197787, "grad_norm": 2.6815555095672607, "learning_rate": 4.807130782234517e-05, "log_odds_chosen": 2.74289870262146, "log_odds_ratio": -0.2474522888660431, "logits/chosen": -0.16342592239379883, "logits/rejected": -0.1653198003768921, "logps/chosen": -0.14200901985168457, "logps/rejected": -0.9104002118110657, "loss": 2.6106, "nll_loss": 0.6278988122940063, "rewards/accuracies": 0.875, "rewards/chosen": -0.014200902543962002, "rewards/margins": 0.0768391340970993, "rewards/rejected": -0.09104002267122269, "step": 1948 }, { "epoch": 1.347856154910097, "grad_norm": 6.47789192199707, "learning_rate": 4.8067465806055016e-05, "log_odds_chosen": 2.62200927734375, "log_odds_ratio": -0.6260537505149841, "logits/chosen": -0.6399807929992676, "logits/rejected": -0.6780401468276978, "logps/chosen": -0.11815258115530014, "logps/rejected": -0.8514223098754883, "loss": 3.2289, "nll_loss": 0.7446192502975464, "rewards/accuracies": 0.875, "rewards/chosen": -0.011815258301794529, "rewards/margins": 0.073326975107193, "rewards/rejected": -0.08514222502708435, "step": 1949 }, { "epoch": 1.3485477178423237, "grad_norm": 6.061791896820068, "learning_rate": 4.8063623789764875e-05, "log_odds_chosen": 3.674816131591797, "log_odds_ratio": -0.19583740830421448, "logits/chosen": -0.6116560101509094, "logits/rejected": -0.6176925897598267, "logps/chosen": -0.07769626379013062, "logps/rejected": -0.6334196925163269, "loss": 4.2678, "nll_loss": 1.0473612546920776, "rewards/accuracies": 1.0, "rewards/chosen": -0.0077696265652775764, "rewards/margins": 0.055572349578142166, "rewards/rejected": -0.06334197521209717, "step": 1950 }, { "epoch": 1.3492392807745506, "grad_norm": 5.788738250732422, "learning_rate": 4.805978177347472e-05, "log_odds_chosen": 3.3159537315368652, "log_odds_ratio": -0.346087247133255, "logits/chosen": -0.11763886362314224, "logits/rejected": -0.19081079959869385, "logps/chosen": -0.0851515457034111, "logps/rejected": -0.575129508972168, "loss": 2.8582, "nll_loss": 0.6799299120903015, "rewards/accuracies": 0.875, "rewards/chosen": -0.00851515494287014, "rewards/margins": 0.04899780452251434, "rewards/rejected": -0.057512953877449036, "step": 1951 }, { "epoch": 1.3499308437067774, "grad_norm": 4.8835954666137695, "learning_rate": 4.805593975718457e-05, "log_odds_chosen": 3.579317808151245, "log_odds_ratio": -0.3033023476600647, "logits/chosen": -0.5329922437667847, "logits/rejected": -0.5394185185432434, "logps/chosen": -0.11767933517694473, "logps/rejected": -0.7724454998970032, "loss": 3.3101, "nll_loss": 0.7971964478492737, "rewards/accuracies": 0.75, "rewards/chosen": -0.011767934076488018, "rewards/margins": 0.06547661870718002, "rewards/rejected": -0.07724454998970032, "step": 1952 }, { "epoch": 1.3506224066390042, "grad_norm": 8.65053939819336, "learning_rate": 4.8052097740894425e-05, "log_odds_chosen": 1.601304054260254, "log_odds_ratio": -0.6165583729743958, "logits/chosen": -0.7664597034454346, "logits/rejected": -0.766176700592041, "logps/chosen": -0.12955564260482788, "logps/rejected": -0.4429931640625, "loss": 5.0357, "nll_loss": 1.1972750425338745, "rewards/accuracies": 0.5, "rewards/chosen": -0.012955564074218273, "rewards/margins": 0.03134375810623169, "rewards/rejected": -0.04429932311177254, "step": 1953 }, { "epoch": 1.351313969571231, "grad_norm": 7.064445972442627, "learning_rate": 4.804825572460427e-05, "log_odds_chosen": 5.0720367431640625, "log_odds_ratio": -0.2898871898651123, "logits/chosen": -0.46167534589767456, "logits/rejected": -0.45588383078575134, "logps/chosen": -0.050215303897857666, "logps/rejected": -0.6823631525039673, "loss": 3.7218, "nll_loss": 0.901453971862793, "rewards/accuracies": 0.75, "rewards/chosen": -0.005021530669182539, "rewards/margins": 0.06321477890014648, "rewards/rejected": -0.06823631376028061, "step": 1954 }, { "epoch": 1.352005532503458, "grad_norm": 6.010137557983398, "learning_rate": 4.8044413708314124e-05, "log_odds_chosen": 1.5840513706207275, "log_odds_ratio": -0.4234069883823395, "logits/chosen": -0.42211514711380005, "logits/rejected": -0.4028991162776947, "logps/chosen": -0.11243654787540436, "logps/rejected": -0.46887820959091187, "loss": 2.9777, "nll_loss": 0.7020907402038574, "rewards/accuracies": 0.75, "rewards/chosen": -0.011243656277656555, "rewards/margins": 0.03564416617155075, "rewards/rejected": -0.046887822449207306, "step": 1955 }, { "epoch": 1.3526970954356847, "grad_norm": 8.500036239624023, "learning_rate": 4.8040571692023976e-05, "log_odds_chosen": 4.498102188110352, "log_odds_ratio": -0.18591952323913574, "logits/chosen": -0.4714905619621277, "logits/rejected": -0.5314135551452637, "logps/chosen": -0.13488513231277466, "logps/rejected": -0.9827030301094055, "loss": 3.4283, "nll_loss": 0.8384861350059509, "rewards/accuracies": 0.875, "rewards/chosen": -0.013488514348864555, "rewards/margins": 0.08478179574012756, "rewards/rejected": -0.09827030450105667, "step": 1956 }, { "epoch": 1.3533886583679116, "grad_norm": 5.889149188995361, "learning_rate": 4.803672967573383e-05, "log_odds_chosen": 1.6420878171920776, "log_odds_ratio": -1.187038540840149, "logits/chosen": -0.7033360600471497, "logits/rejected": -0.6936848163604736, "logps/chosen": -0.12075044214725494, "logps/rejected": -0.6052550673484802, "loss": 3.5898, "nll_loss": 0.7787531614303589, "rewards/accuracies": 0.625, "rewards/chosen": -0.012075044214725494, "rewards/margins": 0.04845046624541283, "rewards/rejected": -0.06052550673484802, "step": 1957 }, { "epoch": 1.3540802213001384, "grad_norm": 5.764094829559326, "learning_rate": 4.8032887659443674e-05, "log_odds_chosen": 4.8118696212768555, "log_odds_ratio": -0.2559741735458374, "logits/chosen": -0.2301330417394638, "logits/rejected": -0.2537592649459839, "logps/chosen": -0.051100168377161026, "logps/rejected": -0.9901271462440491, "loss": 2.8586, "nll_loss": 0.6890623569488525, "rewards/accuracies": 0.75, "rewards/chosen": -0.0051100170239806175, "rewards/margins": 0.09390270709991455, "rewards/rejected": -0.09901271760463715, "step": 1958 }, { "epoch": 1.3547717842323652, "grad_norm": 6.235367774963379, "learning_rate": 4.802904564315353e-05, "log_odds_chosen": 4.211156368255615, "log_odds_ratio": -0.19045324623584747, "logits/chosen": -0.5956144332885742, "logits/rejected": -0.6682155132293701, "logps/chosen": -0.07565949112176895, "logps/rejected": -0.7925881743431091, "loss": 3.4797, "nll_loss": 0.8508702516555786, "rewards/accuracies": 0.875, "rewards/chosen": -0.007565949112176895, "rewards/margins": 0.07169287651777267, "rewards/rejected": -0.07925882190465927, "step": 1959 }, { "epoch": 1.355463347164592, "grad_norm": 6.1317338943481445, "learning_rate": 4.802520362686338e-05, "log_odds_chosen": 5.707240104675293, "log_odds_ratio": -0.27061474323272705, "logits/chosen": -0.36521780490875244, "logits/rejected": -0.38194626569747925, "logps/chosen": -0.037421174347400665, "logps/rejected": -1.2415099143981934, "loss": 2.8538, "nll_loss": 0.6863940954208374, "rewards/accuracies": 0.875, "rewards/chosen": -0.0037421174347400665, "rewards/margins": 0.12040887773036957, "rewards/rejected": -0.12415099143981934, "step": 1960 }, { "epoch": 1.3561549100968189, "grad_norm": 3.9908347129821777, "learning_rate": 4.802136161057323e-05, "log_odds_chosen": 2.9918770790100098, "log_odds_ratio": -0.44292670488357544, "logits/chosen": -0.6748017072677612, "logits/rejected": -0.7269718647003174, "logps/chosen": -0.14064937829971313, "logps/rejected": -0.4798199534416199, "loss": 3.3409, "nll_loss": 0.7909257411956787, "rewards/accuracies": 0.625, "rewards/chosen": -0.014064937829971313, "rewards/margins": 0.033917058259248734, "rewards/rejected": -0.04798199608922005, "step": 1961 }, { "epoch": 1.3568464730290457, "grad_norm": 6.479868412017822, "learning_rate": 4.8017519594283084e-05, "log_odds_chosen": 4.106939315795898, "log_odds_ratio": -0.2835253179073334, "logits/chosen": -0.6259688138961792, "logits/rejected": -0.7092024683952332, "logps/chosen": -0.03148230165243149, "logps/rejected": -0.7635524272918701, "loss": 3.48, "nll_loss": 0.8416469097137451, "rewards/accuracies": 0.875, "rewards/chosen": -0.0031482302583754063, "rewards/margins": 0.07320702075958252, "rewards/rejected": -0.07635524868965149, "step": 1962 }, { "epoch": 1.3575380359612725, "grad_norm": 9.798500061035156, "learning_rate": 4.801367757799293e-05, "log_odds_chosen": 4.252465724945068, "log_odds_ratio": -0.25726497173309326, "logits/chosen": -0.5425630211830139, "logits/rejected": -0.6961837410926819, "logps/chosen": -0.04355762153863907, "logps/rejected": -0.8535860776901245, "loss": 3.9693, "nll_loss": 0.9666072130203247, "rewards/accuracies": 0.75, "rewards/chosen": -0.004355762153863907, "rewards/margins": 0.08100283890962601, "rewards/rejected": -0.08535861223936081, "step": 1963 }, { "epoch": 1.3582295988934994, "grad_norm": 7.3136305809021, "learning_rate": 4.800983556170278e-05, "log_odds_chosen": 4.736594200134277, "log_odds_ratio": -0.18831676244735718, "logits/chosen": -0.36243361234664917, "logits/rejected": -0.37697988748550415, "logps/chosen": -0.05069248378276825, "logps/rejected": -0.8661303520202637, "loss": 3.7201, "nll_loss": 0.9111894369125366, "rewards/accuracies": 0.875, "rewards/chosen": -0.005069248378276825, "rewards/margins": 0.08154379576444626, "rewards/rejected": -0.08661304414272308, "step": 1964 }, { "epoch": 1.3589211618257262, "grad_norm": 5.368043422698975, "learning_rate": 4.8005993545412634e-05, "log_odds_chosen": 2.5243210792541504, "log_odds_ratio": -0.40685027837753296, "logits/chosen": -0.40497708320617676, "logits/rejected": -0.3547658324241638, "logps/chosen": -0.06767724454402924, "logps/rejected": -0.5502372980117798, "loss": 2.753, "nll_loss": 0.6475711464881897, "rewards/accuracies": 0.75, "rewards/chosen": -0.006767723709344864, "rewards/margins": 0.048256002366542816, "rewards/rejected": -0.05502372980117798, "step": 1965 }, { "epoch": 1.359612724757953, "grad_norm": 6.931267738342285, "learning_rate": 4.800215152912249e-05, "log_odds_chosen": 5.26509952545166, "log_odds_ratio": -0.15575455129146576, "logits/chosen": -0.48398256301879883, "logits/rejected": -0.5567474961280823, "logps/chosen": -0.06316303461790085, "logps/rejected": -0.8755611181259155, "loss": 3.2898, "nll_loss": 0.8068767189979553, "rewards/accuracies": 0.875, "rewards/chosen": -0.00631630327552557, "rewards/margins": 0.08123980462551117, "rewards/rejected": -0.08755610883235931, "step": 1966 }, { "epoch": 1.3603042876901799, "grad_norm": 4.129261016845703, "learning_rate": 4.799830951283233e-05, "log_odds_chosen": 4.578372001647949, "log_odds_ratio": -0.0830615758895874, "logits/chosen": -0.4261839985847473, "logits/rejected": -0.46446114778518677, "logps/chosen": -0.0838017538189888, "logps/rejected": -0.7984384298324585, "loss": 2.2135, "nll_loss": 0.5450767874717712, "rewards/accuracies": 1.0, "rewards/chosen": -0.008380175568163395, "rewards/margins": 0.07146366685628891, "rewards/rejected": -0.07984384894371033, "step": 1967 }, { "epoch": 1.3609958506224067, "grad_norm": 7.837843894958496, "learning_rate": 4.799446749654219e-05, "log_odds_chosen": 1.4155436754226685, "log_odds_ratio": -0.6807968616485596, "logits/chosen": -0.6102103590965271, "logits/rejected": -0.6006621718406677, "logps/chosen": -0.18789350986480713, "logps/rejected": -0.41446274518966675, "loss": 3.5981, "nll_loss": 0.8314481377601624, "rewards/accuracies": 0.625, "rewards/chosen": -0.018789350986480713, "rewards/margins": 0.022656923159956932, "rewards/rejected": -0.041446272283792496, "step": 1968 }, { "epoch": 1.3616874135546335, "grad_norm": 5.931718349456787, "learning_rate": 4.799062548025204e-05, "log_odds_chosen": 4.071783542633057, "log_odds_ratio": -0.22267219424247742, "logits/chosen": -0.836118221282959, "logits/rejected": -0.8715649247169495, "logps/chosen": -0.04238056018948555, "logps/rejected": -0.7004839181900024, "loss": 3.8172, "nll_loss": 0.9320341944694519, "rewards/accuracies": 0.875, "rewards/chosen": -0.004238056018948555, "rewards/margins": 0.06581033766269684, "rewards/rejected": -0.07004839926958084, "step": 1969 }, { "epoch": 1.3623789764868603, "grad_norm": 6.518864631652832, "learning_rate": 4.798678346396189e-05, "log_odds_chosen": 4.345464706420898, "log_odds_ratio": -0.31315863132476807, "logits/chosen": -0.6527379751205444, "logits/rejected": -0.6448567509651184, "logps/chosen": -0.11621613055467606, "logps/rejected": -0.5999836921691895, "loss": 4.2446, "nll_loss": 1.0298457145690918, "rewards/accuracies": 0.875, "rewards/chosen": -0.011621613055467606, "rewards/margins": 0.04837675765156746, "rewards/rejected": -0.059998370707035065, "step": 1970 }, { "epoch": 1.3630705394190872, "grad_norm": 6.676383972167969, "learning_rate": 4.798294144767174e-05, "log_odds_chosen": 5.199181079864502, "log_odds_ratio": -0.26002073287963867, "logits/chosen": -0.46755251288414, "logits/rejected": -0.5575705766677856, "logps/chosen": -0.06826357543468475, "logps/rejected": -0.875418484210968, "loss": 2.9096, "nll_loss": 0.7013946771621704, "rewards/accuracies": 0.875, "rewards/chosen": -0.006826357915997505, "rewards/margins": 0.08071549236774445, "rewards/rejected": -0.0875418558716774, "step": 1971 }, { "epoch": 1.363762102351314, "grad_norm": 4.894266605377197, "learning_rate": 4.797909943138159e-05, "log_odds_chosen": 2.65413761138916, "log_odds_ratio": -0.24586760997772217, "logits/chosen": -0.6672540903091431, "logits/rejected": -0.6396835446357727, "logps/chosen": -0.0630340427160263, "logps/rejected": -0.6643854379653931, "loss": 2.3291, "nll_loss": 0.5576860308647156, "rewards/accuracies": 1.0, "rewards/chosen": -0.0063034044578671455, "rewards/margins": 0.06013514846563339, "rewards/rejected": -0.06643854826688766, "step": 1972 }, { "epoch": 1.3644536652835408, "grad_norm": 4.742177963256836, "learning_rate": 4.797525741509144e-05, "log_odds_chosen": 2.2191128730773926, "log_odds_ratio": -0.4817036986351013, "logits/chosen": -0.4750691056251526, "logits/rejected": -0.5359756350517273, "logps/chosen": -0.10734833031892776, "logps/rejected": -0.42450714111328125, "loss": 3.7213, "nll_loss": 0.8821476697921753, "rewards/accuracies": 0.75, "rewards/chosen": -0.010734833776950836, "rewards/margins": 0.03171588107943535, "rewards/rejected": -0.04245071858167648, "step": 1973 }, { "epoch": 1.3651452282157677, "grad_norm": 3.849248170852661, "learning_rate": 4.797141539880129e-05, "log_odds_chosen": 2.6243133544921875, "log_odds_ratio": -0.48991310596466064, "logits/chosen": -0.3240754008293152, "logits/rejected": -0.3246554136276245, "logps/chosen": -0.1363077461719513, "logps/rejected": -0.4788323938846588, "loss": 2.3044, "nll_loss": 0.527114748954773, "rewards/accuracies": 0.75, "rewards/chosen": -0.01363077387213707, "rewards/margins": 0.03425246477127075, "rewards/rejected": -0.04788323864340782, "step": 1974 }, { "epoch": 1.3658367911479945, "grad_norm": 7.029124736785889, "learning_rate": 4.7967573382511145e-05, "log_odds_chosen": 3.2474732398986816, "log_odds_ratio": -0.22893860936164856, "logits/chosen": -0.678703784942627, "logits/rejected": -0.6811733841896057, "logps/chosen": -0.06615074723958969, "logps/rejected": -0.5361521244049072, "loss": 3.8263, "nll_loss": 0.9336775541305542, "rewards/accuracies": 0.875, "rewards/chosen": -0.006615075282752514, "rewards/margins": 0.047000136226415634, "rewards/rejected": -0.05361521244049072, "step": 1975 }, { "epoch": 1.3665283540802213, "grad_norm": 4.65273904800415, "learning_rate": 4.796373136622099e-05, "log_odds_chosen": 3.6754541397094727, "log_odds_ratio": -0.12889450788497925, "logits/chosen": -0.7940229177474976, "logits/rejected": -0.7994104623794556, "logps/chosen": -0.05574585497379303, "logps/rejected": -0.6791449785232544, "loss": 3.0074, "nll_loss": 0.7389633655548096, "rewards/accuracies": 1.0, "rewards/chosen": -0.0055745854042470455, "rewards/margins": 0.062339916825294495, "rewards/rejected": -0.06791450083255768, "step": 1976 }, { "epoch": 1.3672199170124482, "grad_norm": 6.906028747558594, "learning_rate": 4.795988934993085e-05, "log_odds_chosen": 4.955214500427246, "log_odds_ratio": -0.2700616717338562, "logits/chosen": -0.33470577001571655, "logits/rejected": -0.41006895899772644, "logps/chosen": -0.060129314661026, "logps/rejected": -1.0193614959716797, "loss": 2.8268, "nll_loss": 0.679686427116394, "rewards/accuracies": 0.875, "rewards/chosen": -0.0060129314661026, "rewards/margins": 0.09592323005199432, "rewards/rejected": -0.10193616151809692, "step": 1977 }, { "epoch": 1.367911479944675, "grad_norm": 5.417724132537842, "learning_rate": 4.7956047333640696e-05, "log_odds_chosen": 3.265223979949951, "log_odds_ratio": -0.1482883095741272, "logits/chosen": -0.47248220443725586, "logits/rejected": -0.47120919823646545, "logps/chosen": -0.0854690670967102, "logps/rejected": -0.8021750450134277, "loss": 3.5534, "nll_loss": 0.873532235622406, "rewards/accuracies": 1.0, "rewards/chosen": -0.008546906523406506, "rewards/margins": 0.07167059183120728, "rewards/rejected": -0.08021750301122665, "step": 1978 }, { "epoch": 1.3686030428769018, "grad_norm": 5.667328834533691, "learning_rate": 4.795220531735055e-05, "log_odds_chosen": 2.9932069778442383, "log_odds_ratio": -0.2437632977962494, "logits/chosen": -0.6204601526260376, "logits/rejected": -0.6771796941757202, "logps/chosen": -0.09919442236423492, "logps/rejected": -0.6160801649093628, "loss": 2.8913, "nll_loss": 0.6984534859657288, "rewards/accuracies": 1.0, "rewards/chosen": -0.009919442236423492, "rewards/margins": 0.05168858543038368, "rewards/rejected": -0.061608027666807175, "step": 1979 }, { "epoch": 1.3692946058091287, "grad_norm": 5.357570648193359, "learning_rate": 4.79483633010604e-05, "log_odds_chosen": 2.7944960594177246, "log_odds_ratio": -0.25465598702430725, "logits/chosen": -0.6888865828514099, "logits/rejected": -0.7295427918434143, "logps/chosen": -0.13671831786632538, "logps/rejected": -0.6448003649711609, "loss": 3.5759, "nll_loss": 0.8685050010681152, "rewards/accuracies": 0.875, "rewards/chosen": -0.013671832159161568, "rewards/margins": 0.05080820620059967, "rewards/rejected": -0.06448003649711609, "step": 1980 }, { "epoch": 1.3699861687413555, "grad_norm": 7.210214614868164, "learning_rate": 4.7944521284770246e-05, "log_odds_chosen": 3.735138177871704, "log_odds_ratio": -0.24184566736221313, "logits/chosen": -0.759207546710968, "logits/rejected": -0.773901641368866, "logps/chosen": -0.04999281466007233, "logps/rejected": -0.5645262002944946, "loss": 3.4228, "nll_loss": 0.8315247297286987, "rewards/accuracies": 0.875, "rewards/chosen": -0.004999281372874975, "rewards/margins": 0.05145333334803581, "rewards/rejected": -0.056452613323926926, "step": 1981 }, { "epoch": 1.3706777316735823, "grad_norm": 5.053602695465088, "learning_rate": 4.79406792684801e-05, "log_odds_chosen": 3.7224245071411133, "log_odds_ratio": -0.2283429205417633, "logits/chosen": -0.6872579455375671, "logits/rejected": -0.7188456654548645, "logps/chosen": -0.09103509783744812, "logps/rejected": -0.6439297199249268, "loss": 2.5801, "nll_loss": 0.6221836805343628, "rewards/accuracies": 1.0, "rewards/chosen": -0.009103509597480297, "rewards/margins": 0.055289462208747864, "rewards/rejected": -0.06439296901226044, "step": 1982 }, { "epoch": 1.3713692946058091, "grad_norm": 4.521610736846924, "learning_rate": 4.793683725218995e-05, "log_odds_chosen": 3.951543092727661, "log_odds_ratio": -0.3210464119911194, "logits/chosen": -0.4027020335197449, "logits/rejected": -0.448123037815094, "logps/chosen": -0.0801689624786377, "logps/rejected": -0.743116557598114, "loss": 2.79, "nll_loss": 0.6653933525085449, "rewards/accuracies": 0.75, "rewards/chosen": -0.00801689550280571, "rewards/margins": 0.06629475951194763, "rewards/rejected": -0.07431165128946304, "step": 1983 }, { "epoch": 1.372060857538036, "grad_norm": 6.1983723640441895, "learning_rate": 4.7932995235899804e-05, "log_odds_chosen": 5.588142395019531, "log_odds_ratio": -0.04881987348198891, "logits/chosen": -0.22947348654270172, "logits/rejected": -0.18946635723114014, "logps/chosen": -0.04573337733745575, "logps/rejected": -1.1007776260375977, "loss": 2.6265, "nll_loss": 0.6517492532730103, "rewards/accuracies": 1.0, "rewards/chosen": -0.004573337268084288, "rewards/margins": 0.10550442337989807, "rewards/rejected": -0.11007775366306305, "step": 1984 }, { "epoch": 1.3727524204702628, "grad_norm": 8.349906921386719, "learning_rate": 4.792915321960965e-05, "log_odds_chosen": 3.1251940727233887, "log_odds_ratio": -0.6032139658927917, "logits/chosen": -0.514311671257019, "logits/rejected": -0.5597679615020752, "logps/chosen": -0.182631254196167, "logps/rejected": -0.6719191074371338, "loss": 3.0019, "nll_loss": 0.6901443004608154, "rewards/accuracies": 0.75, "rewards/chosen": -0.01826312765479088, "rewards/margins": 0.04892878234386444, "rewards/rejected": -0.06719191372394562, "step": 1985 }, { "epoch": 1.3734439834024896, "grad_norm": 7.041315078735352, "learning_rate": 4.792531120331951e-05, "log_odds_chosen": 4.945158958435059, "log_odds_ratio": -0.2092055380344391, "logits/chosen": -0.2713004946708679, "logits/rejected": -0.2928984761238098, "logps/chosen": -0.06935533881187439, "logps/rejected": -1.159374713897705, "loss": 2.9727, "nll_loss": 0.7222519516944885, "rewards/accuracies": 0.875, "rewards/chosen": -0.006935533601790667, "rewards/margins": 0.10900193452835083, "rewards/rejected": -0.11593747138977051, "step": 1986 }, { "epoch": 1.3741355463347165, "grad_norm": 5.349747180938721, "learning_rate": 4.7921469187029354e-05, "log_odds_chosen": 1.9877170324325562, "log_odds_ratio": -0.2859208583831787, "logits/chosen": -0.3041594326496124, "logits/rejected": -0.32395341992378235, "logps/chosen": -0.10919293761253357, "logps/rejected": -0.41289645433425903, "loss": 4.014, "nll_loss": 0.9749143123626709, "rewards/accuracies": 0.875, "rewards/chosen": -0.010919294320046902, "rewards/margins": 0.030370350927114487, "rewards/rejected": -0.041289642453193665, "step": 1987 }, { "epoch": 1.3748271092669433, "grad_norm": 5.41785192489624, "learning_rate": 4.791762717073921e-05, "log_odds_chosen": 5.79414176940918, "log_odds_ratio": -0.16315719485282898, "logits/chosen": -0.2273552268743515, "logits/rejected": -0.25023555755615234, "logps/chosen": -0.031983837485313416, "logps/rejected": -0.6889051198959351, "loss": 3.0326, "nll_loss": 0.7418302297592163, "rewards/accuracies": 0.875, "rewards/chosen": -0.003198383841663599, "rewards/margins": 0.06569212675094604, "rewards/rejected": -0.0688905119895935, "step": 1988 }, { "epoch": 1.3755186721991701, "grad_norm": 7.066270351409912, "learning_rate": 4.791378515444906e-05, "log_odds_chosen": 3.625401735305786, "log_odds_ratio": -0.13533686101436615, "logits/chosen": -0.33719366788864136, "logits/rejected": -0.37789204716682434, "logps/chosen": -0.05393574386835098, "logps/rejected": -0.6988438367843628, "loss": 4.1022, "nll_loss": 1.0120058059692383, "rewards/accuracies": 1.0, "rewards/chosen": -0.005393574479967356, "rewards/margins": 0.06449081003665924, "rewards/rejected": -0.06988438963890076, "step": 1989 }, { "epoch": 1.376210235131397, "grad_norm": 7.164843559265137, "learning_rate": 4.7909943138158905e-05, "log_odds_chosen": 5.247487545013428, "log_odds_ratio": -0.1880350410938263, "logits/chosen": -0.6329569816589355, "logits/rejected": -0.5379258990287781, "logps/chosen": -0.029778484255075455, "logps/rejected": -0.6082863211631775, "loss": 3.8988, "nll_loss": 0.955906093120575, "rewards/accuracies": 1.0, "rewards/chosen": -0.002977848518639803, "rewards/margins": 0.05785078555345535, "rewards/rejected": -0.06082863733172417, "step": 1990 }, { "epoch": 1.3769017980636238, "grad_norm": 5.449748516082764, "learning_rate": 4.790610112186876e-05, "log_odds_chosen": 3.5334930419921875, "log_odds_ratio": -0.21102845668792725, "logits/chosen": -0.41154390573501587, "logits/rejected": -0.38675880432128906, "logps/chosen": -0.08894149214029312, "logps/rejected": -0.6938140392303467, "loss": 3.6819, "nll_loss": 0.8993774652481079, "rewards/accuracies": 0.875, "rewards/chosen": -0.008894150145351887, "rewards/margins": 0.060487259179353714, "rewards/rejected": -0.06938140839338303, "step": 1991 }, { "epoch": 1.3775933609958506, "grad_norm": 5.393478870391846, "learning_rate": 4.790225910557861e-05, "log_odds_chosen": 3.826383590698242, "log_odds_ratio": -0.36511868238449097, "logits/chosen": -0.29448530077934265, "logits/rejected": -0.2909614145755768, "logps/chosen": -0.13047395646572113, "logps/rejected": -0.8310095071792603, "loss": 2.8987, "nll_loss": 0.6881729364395142, "rewards/accuracies": 0.625, "rewards/chosen": -0.013047396205365658, "rewards/margins": 0.07005355507135391, "rewards/rejected": -0.08310095220804214, "step": 1992 }, { "epoch": 1.3782849239280774, "grad_norm": 6.349294662475586, "learning_rate": 4.789841708928846e-05, "log_odds_chosen": 4.048874855041504, "log_odds_ratio": -0.2582094371318817, "logits/chosen": -0.5665625333786011, "logits/rejected": -0.6103119850158691, "logps/chosen": -0.09019026905298233, "logps/rejected": -0.5480784177780151, "loss": 3.733, "nll_loss": 0.907441258430481, "rewards/accuracies": 0.75, "rewards/chosen": -0.009019027464091778, "rewards/margins": 0.04578881710767746, "rewards/rejected": -0.054807838052511215, "step": 1993 }, { "epoch": 1.3789764868603043, "grad_norm": 6.726129055023193, "learning_rate": 4.789457507299831e-05, "log_odds_chosen": 3.7346792221069336, "log_odds_ratio": -0.3640691339969635, "logits/chosen": -0.3173547685146332, "logits/rejected": -0.3681148588657379, "logps/chosen": -0.14127859473228455, "logps/rejected": -0.597007155418396, "loss": 2.7151, "nll_loss": 0.642366349697113, "rewards/accuracies": 0.75, "rewards/chosen": -0.01412785891443491, "rewards/margins": 0.045572854578495026, "rewards/rejected": -0.05970071256160736, "step": 1994 }, { "epoch": 1.379668049792531, "grad_norm": 6.565385341644287, "learning_rate": 4.789073305670817e-05, "log_odds_chosen": 4.064783573150635, "log_odds_ratio": -0.21562957763671875, "logits/chosen": -0.5860500335693359, "logits/rejected": -0.688904881477356, "logps/chosen": -0.060132626444101334, "logps/rejected": -1.0031733512878418, "loss": 3.8129, "nll_loss": 0.9316558837890625, "rewards/accuracies": 0.75, "rewards/chosen": -0.006013263016939163, "rewards/margins": 0.09430407732725143, "rewards/rejected": -0.1003173366189003, "step": 1995 }, { "epoch": 1.380359612724758, "grad_norm": 5.72561502456665, "learning_rate": 4.788689104041801e-05, "log_odds_chosen": 4.816940784454346, "log_odds_ratio": -0.2247188836336136, "logits/chosen": -0.30259183049201965, "logits/rejected": -0.2999754548072815, "logps/chosen": -0.0902637168765068, "logps/rejected": -1.0141100883483887, "loss": 2.7829, "nll_loss": 0.6732596158981323, "rewards/accuracies": 0.875, "rewards/chosen": -0.009026371873915195, "rewards/margins": 0.09238463640213013, "rewards/rejected": -0.10141100734472275, "step": 1996 }, { "epoch": 1.3810511756569848, "grad_norm": 5.127386093139648, "learning_rate": 4.7883049024127865e-05, "log_odds_chosen": 4.620459079742432, "log_odds_ratio": -0.16839353740215302, "logits/chosen": -0.416474312543869, "logits/rejected": -0.4174883961677551, "logps/chosen": -0.09087523818016052, "logps/rejected": -0.835807204246521, "loss": 3.4169, "nll_loss": 0.8373754620552063, "rewards/accuracies": 0.875, "rewards/chosen": -0.009087524376809597, "rewards/margins": 0.07449319213628769, "rewards/rejected": -0.08358071744441986, "step": 1997 }, { "epoch": 1.3817427385892116, "grad_norm": 6.234887599945068, "learning_rate": 4.787920700783772e-05, "log_odds_chosen": 4.776619911193848, "log_odds_ratio": -0.18312208354473114, "logits/chosen": -0.635537326335907, "logits/rejected": -0.6250307559967041, "logps/chosen": -0.043525900691747665, "logps/rejected": -0.9725822806358337, "loss": 2.6875, "nll_loss": 0.6535642147064209, "rewards/accuracies": 0.875, "rewards/chosen": -0.0043525900691747665, "rewards/margins": 0.09290564060211182, "rewards/rejected": -0.09725822508335114, "step": 1998 }, { "epoch": 1.3824343015214384, "grad_norm": 5.526193618774414, "learning_rate": 4.787536499154756e-05, "log_odds_chosen": 4.099743843078613, "log_odds_ratio": -0.3171989321708679, "logits/chosen": -0.8929038047790527, "logits/rejected": -0.8908524513244629, "logps/chosen": -0.09101560711860657, "logps/rejected": -0.821946382522583, "loss": 3.2194, "nll_loss": 0.7731227874755859, "rewards/accuracies": 0.75, "rewards/chosen": -0.009101560339331627, "rewards/margins": 0.07309307903051376, "rewards/rejected": -0.08219464123249054, "step": 1999 }, { "epoch": 1.3831258644536653, "grad_norm": 7.315108299255371, "learning_rate": 4.7871522975257416e-05, "log_odds_chosen": 4.748763084411621, "log_odds_ratio": -0.15661533176898956, "logits/chosen": -0.8928630948066711, "logits/rejected": -0.9290038347244263, "logps/chosen": -0.05364002287387848, "logps/rejected": -1.0081446170806885, "loss": 3.0047, "nll_loss": 0.735511064529419, "rewards/accuracies": 1.0, "rewards/chosen": -0.0053640021942555904, "rewards/margins": 0.09545045346021652, "rewards/rejected": -0.10081446170806885, "step": 2000 }, { "epoch": 1.383817427385892, "grad_norm": 7.51612663269043, "learning_rate": 4.786768095896727e-05, "log_odds_chosen": 6.139799118041992, "log_odds_ratio": -0.22766713798046112, "logits/chosen": -0.242156982421875, "logits/rejected": -0.3143760561943054, "logps/chosen": -0.018331004306674004, "logps/rejected": -0.9860979318618774, "loss": 3.02, "nll_loss": 0.7322361469268799, "rewards/accuracies": 0.875, "rewards/chosen": -0.0018331005703657866, "rewards/margins": 0.09677669405937195, "rewards/rejected": -0.0986097976565361, "step": 2001 }, { "epoch": 1.384508990318119, "grad_norm": 6.356954574584961, "learning_rate": 4.786383894267712e-05, "log_odds_chosen": 3.1617636680603027, "log_odds_ratio": -0.3131043016910553, "logits/chosen": -0.7802184820175171, "logits/rejected": -0.7662279009819031, "logps/chosen": -0.09586849063634872, "logps/rejected": -0.48525965213775635, "loss": 3.7224, "nll_loss": 0.89928138256073, "rewards/accuracies": 0.75, "rewards/chosen": -0.009586849249899387, "rewards/margins": 0.03893912211060524, "rewards/rejected": -0.04852597042918205, "step": 2002 }, { "epoch": 1.3852005532503457, "grad_norm": 7.20344877243042, "learning_rate": 4.7859996926386966e-05, "log_odds_chosen": 3.3825900554656982, "log_odds_ratio": -0.33774444460868835, "logits/chosen": -0.7070195078849792, "logits/rejected": -0.7348042726516724, "logps/chosen": -0.08501673489809036, "logps/rejected": -0.8383290767669678, "loss": 3.6556, "nll_loss": 0.8801342844963074, "rewards/accuracies": 0.75, "rewards/chosen": -0.008501673117280006, "rewards/margins": 0.07533122599124908, "rewards/rejected": -0.08383290469646454, "step": 2003 }, { "epoch": 1.3858921161825726, "grad_norm": 11.765769958496094, "learning_rate": 4.7856154910096825e-05, "log_odds_chosen": 2.907918930053711, "log_odds_ratio": -0.6590770483016968, "logits/chosen": -0.4293488562107086, "logits/rejected": -0.47192126512527466, "logps/chosen": -0.2054803967475891, "logps/rejected": -0.6298776268959045, "loss": 5.2493, "nll_loss": 1.24642813205719, "rewards/accuracies": 0.5, "rewards/chosen": -0.02054804004728794, "rewards/margins": 0.04243971407413483, "rewards/rejected": -0.06298775970935822, "step": 2004 }, { "epoch": 1.3865836791147994, "grad_norm": 3.930716037750244, "learning_rate": 4.785231289380667e-05, "log_odds_chosen": 4.3756890296936035, "log_odds_ratio": -0.13180823624134064, "logits/chosen": -0.6425517201423645, "logits/rejected": -0.6409659385681152, "logps/chosen": -0.05571923032402992, "logps/rejected": -0.7726579904556274, "loss": 2.7233, "nll_loss": 0.6676549911499023, "rewards/accuracies": 1.0, "rewards/chosen": -0.00557192275300622, "rewards/margins": 0.07169388234615326, "rewards/rejected": -0.07726580649614334, "step": 2005 }, { "epoch": 1.3872752420470262, "grad_norm": 5.899771690368652, "learning_rate": 4.7848470877516524e-05, "log_odds_chosen": 4.994879722595215, "log_odds_ratio": -0.46346139907836914, "logits/chosen": -0.7579612135887146, "logits/rejected": -0.7698273658752441, "logps/chosen": -0.11928918212652206, "logps/rejected": -1.1728246212005615, "loss": 2.927, "nll_loss": 0.6854143142700195, "rewards/accuracies": 0.75, "rewards/chosen": -0.011928917840123177, "rewards/margins": 0.10535356402397156, "rewards/rejected": -0.11728248000144958, "step": 2006 }, { "epoch": 1.387966804979253, "grad_norm": 7.911510467529297, "learning_rate": 4.7844628861226376e-05, "log_odds_chosen": 2.2403409481048584, "log_odds_ratio": -0.45800328254699707, "logits/chosen": -0.7115573883056641, "logits/rejected": -0.736803412437439, "logps/chosen": -0.1595693677663803, "logps/rejected": -0.5836013555526733, "loss": 3.9246, "nll_loss": 0.9353399872779846, "rewards/accuracies": 0.75, "rewards/chosen": -0.015956936404109, "rewards/margins": 0.042403195053339005, "rewards/rejected": -0.058360133320093155, "step": 2007 }, { "epoch": 1.38865836791148, "grad_norm": 5.979852199554443, "learning_rate": 4.784078684493623e-05, "log_odds_chosen": 5.933134078979492, "log_odds_ratio": -0.018309567123651505, "logits/chosen": -0.5352558493614197, "logits/rejected": -0.5490735769271851, "logps/chosen": -0.024755114689469337, "logps/rejected": -0.852277398109436, "loss": 3.4945, "nll_loss": 0.8717933893203735, "rewards/accuracies": 1.0, "rewards/chosen": -0.002475511282682419, "rewards/margins": 0.08275222778320312, "rewards/rejected": -0.08522774279117584, "step": 2008 }, { "epoch": 1.3893499308437067, "grad_norm": 6.766330718994141, "learning_rate": 4.7836944828646074e-05, "log_odds_chosen": 3.526118278503418, "log_odds_ratio": -0.25739622116088867, "logits/chosen": -0.6153137683868408, "logits/rejected": -0.6079054474830627, "logps/chosen": -0.06622539460659027, "logps/rejected": -0.657639741897583, "loss": 3.1783, "nll_loss": 0.7688472270965576, "rewards/accuracies": 0.875, "rewards/chosen": -0.00662253936752677, "rewards/margins": 0.05914144217967987, "rewards/rejected": -0.06576397269964218, "step": 2009 }, { "epoch": 1.3900414937759336, "grad_norm": 4.855165481567383, "learning_rate": 4.7833102812355927e-05, "log_odds_chosen": 4.4697265625, "log_odds_ratio": -0.046659186482429504, "logits/chosen": -0.36698010563850403, "logits/rejected": -0.4080663323402405, "logps/chosen": -0.03516608104109764, "logps/rejected": -0.8239533305168152, "loss": 3.2684, "nll_loss": 0.8124281764030457, "rewards/accuracies": 1.0, "rewards/chosen": -0.003516607917845249, "rewards/margins": 0.0788787230849266, "rewards/rejected": -0.08239533007144928, "step": 2010 }, { "epoch": 1.3907330567081604, "grad_norm": 11.912924766540527, "learning_rate": 4.782926079606578e-05, "log_odds_chosen": 2.89589262008667, "log_odds_ratio": -0.2935729920864105, "logits/chosen": -0.29961317777633667, "logits/rejected": -0.36233705282211304, "logps/chosen": -0.09414010494947433, "logps/rejected": -0.6591374278068542, "loss": 4.819, "nll_loss": 1.1753848791122437, "rewards/accuracies": 0.875, "rewards/chosen": -0.009414010681211948, "rewards/margins": 0.05649973452091217, "rewards/rejected": -0.06591374427080154, "step": 2011 }, { "epoch": 1.3914246196403872, "grad_norm": 8.001628875732422, "learning_rate": 4.7825418779775625e-05, "log_odds_chosen": 4.689050674438477, "log_odds_ratio": -0.18099285662174225, "logits/chosen": -0.28261780738830566, "logits/rejected": -0.29922106862068176, "logps/chosen": -0.0453483983874321, "logps/rejected": -0.8597908616065979, "loss": 4.1907, "nll_loss": 1.0295741558074951, "rewards/accuracies": 0.875, "rewards/chosen": -0.004534840118139982, "rewards/margins": 0.08144424110651016, "rewards/rejected": -0.08597908914089203, "step": 2012 }, { "epoch": 1.392116182572614, "grad_norm": 7.775978088378906, "learning_rate": 4.7821576763485484e-05, "log_odds_chosen": 5.596604347229004, "log_odds_ratio": -0.16852453351020813, "logits/chosen": -0.5239167809486389, "logits/rejected": -0.4925086498260498, "logps/chosen": -0.045228488743305206, "logps/rejected": -0.7164972424507141, "loss": 4.5063, "nll_loss": 1.1097182035446167, "rewards/accuracies": 1.0, "rewards/chosen": -0.004522849339991808, "rewards/margins": 0.06712687015533447, "rewards/rejected": -0.07164972275495529, "step": 2013 }, { "epoch": 1.3928077455048409, "grad_norm": 6.019063949584961, "learning_rate": 4.781773474719533e-05, "log_odds_chosen": 3.483452320098877, "log_odds_ratio": -0.1692686378955841, "logits/chosen": -0.4919688403606415, "logits/rejected": -0.5416184663772583, "logps/chosen": -0.07065532356500626, "logps/rejected": -0.6099762916564941, "loss": 2.8116, "nll_loss": 0.685977578163147, "rewards/accuracies": 1.0, "rewards/chosen": -0.007065531797707081, "rewards/margins": 0.05393209308385849, "rewards/rejected": -0.060997627675533295, "step": 2014 }, { "epoch": 1.3934993084370677, "grad_norm": 4.556207656860352, "learning_rate": 4.781389273090518e-05, "log_odds_chosen": 5.876942157745361, "log_odds_ratio": -0.11567544937133789, "logits/chosen": -0.8399274349212646, "logits/rejected": -0.8773524761199951, "logps/chosen": -0.029017897322773933, "logps/rejected": -0.9330223202705383, "loss": 3.1699, "nll_loss": 0.7809014916419983, "rewards/accuracies": 0.875, "rewards/chosen": -0.0029017897322773933, "rewards/margins": 0.09040044993162155, "rewards/rejected": -0.09330223500728607, "step": 2015 }, { "epoch": 1.3941908713692945, "grad_norm": 4.728296279907227, "learning_rate": 4.7810050714615034e-05, "log_odds_chosen": 7.070993423461914, "log_odds_ratio": -0.006337965838611126, "logits/chosen": -0.5788023471832275, "logits/rejected": -0.6077452898025513, "logps/chosen": -0.0022974973544478416, "logps/rejected": -0.7637322545051575, "loss": 2.648, "nll_loss": 0.6613626480102539, "rewards/accuracies": 1.0, "rewards/chosen": -0.00022974974126555026, "rewards/margins": 0.07614347338676453, "rewards/rejected": -0.07637323439121246, "step": 2016 }, { "epoch": 1.3948824343015214, "grad_norm": 5.20559024810791, "learning_rate": 4.780620869832489e-05, "log_odds_chosen": 6.8388824462890625, "log_odds_ratio": -0.03591344505548477, "logits/chosen": -0.33628082275390625, "logits/rejected": -0.35934945940971375, "logps/chosen": -0.008574387989938259, "logps/rejected": -0.8044378757476807, "loss": 3.558, "nll_loss": 0.8859061002731323, "rewards/accuracies": 1.0, "rewards/chosen": -0.0008574387757107615, "rewards/margins": 0.07958635687828064, "rewards/rejected": -0.08044379204511642, "step": 2017 }, { "epoch": 1.3955739972337482, "grad_norm": 7.665406703948975, "learning_rate": 4.780236668203473e-05, "log_odds_chosen": 6.559147834777832, "log_odds_ratio": -0.058827269822359085, "logits/chosen": -0.6809530854225159, "logits/rejected": -0.709974467754364, "logps/chosen": -0.0708513855934143, "logps/rejected": -1.248304843902588, "loss": 4.2583, "nll_loss": 1.0586938858032227, "rewards/accuracies": 1.0, "rewards/chosen": -0.007085138466209173, "rewards/margins": 0.11774535477161407, "rewards/rejected": -0.12483049929141998, "step": 2018 }, { "epoch": 1.396265560165975, "grad_norm": 5.273054599761963, "learning_rate": 4.7798524665744585e-05, "log_odds_chosen": 5.406514644622803, "log_odds_ratio": -0.049261778593063354, "logits/chosen": -0.6238724589347839, "logits/rejected": -0.616300106048584, "logps/chosen": -0.014879455789923668, "logps/rejected": -0.5857996344566345, "loss": 3.1058, "nll_loss": 0.7715163826942444, "rewards/accuracies": 1.0, "rewards/chosen": -0.0014879456721246243, "rewards/margins": 0.05709202215075493, "rewards/rejected": -0.05857996270060539, "step": 2019 }, { "epoch": 1.3969571230982019, "grad_norm": 3.7898244857788086, "learning_rate": 4.779468264945444e-05, "log_odds_chosen": 7.883171081542969, "log_odds_ratio": -0.0038941281381994486, "logits/chosen": -0.43187326192855835, "logits/rejected": -0.44849109649658203, "logps/chosen": -0.007364689838141203, "logps/rejected": -1.025825023651123, "loss": 2.5187, "nll_loss": 0.6292957067489624, "rewards/accuracies": 1.0, "rewards/chosen": -0.0007364689372479916, "rewards/margins": 0.10184603929519653, "rewards/rejected": -0.10258250683546066, "step": 2020 }, { "epoch": 1.3976486860304287, "grad_norm": 7.709190368652344, "learning_rate": 4.779084063316428e-05, "log_odds_chosen": 4.407033443450928, "log_odds_ratio": -0.10557064414024353, "logits/chosen": -0.6062334775924683, "logits/rejected": -0.5914596915245056, "logps/chosen": -0.053902555257081985, "logps/rejected": -0.7024961709976196, "loss": 4.9311, "nll_loss": 1.2222228050231934, "rewards/accuracies": 1.0, "rewards/chosen": -0.005390255246311426, "rewards/margins": 0.06485936045646667, "rewards/rejected": -0.07024961709976196, "step": 2021 }, { "epoch": 1.3983402489626555, "grad_norm": 5.200230121612549, "learning_rate": 4.778699861687414e-05, "log_odds_chosen": 5.399484634399414, "log_odds_ratio": -0.21866726875305176, "logits/chosen": -0.4181535542011261, "logits/rejected": -0.4063791036605835, "logps/chosen": -0.060105275362730026, "logps/rejected": -1.2082123756408691, "loss": 3.6573, "nll_loss": 0.8924470543861389, "rewards/accuracies": 0.875, "rewards/chosen": -0.00601052725687623, "rewards/margins": 0.11481072008609772, "rewards/rejected": -0.12082124501466751, "step": 2022 }, { "epoch": 1.3990318118948823, "grad_norm": 6.319047451019287, "learning_rate": 4.778315660058399e-05, "log_odds_chosen": 5.292322158813477, "log_odds_ratio": -0.09680824726819992, "logits/chosen": -0.5350341796875, "logits/rejected": -0.5699993371963501, "logps/chosen": -0.04791120067238808, "logps/rejected": -0.6731879115104675, "loss": 2.9202, "nll_loss": 0.7203590869903564, "rewards/accuracies": 1.0, "rewards/chosen": -0.004791120067238808, "rewards/margins": 0.06252767145633698, "rewards/rejected": -0.06731878966093063, "step": 2023 }, { "epoch": 1.3997233748271092, "grad_norm": 8.506767272949219, "learning_rate": 4.777931458429384e-05, "log_odds_chosen": 5.026066303253174, "log_odds_ratio": -0.35519641637802124, "logits/chosen": -0.3691147565841675, "logits/rejected": -0.40069153904914856, "logps/chosen": -0.08445140719413757, "logps/rejected": -1.0374783277511597, "loss": 4.6222, "nll_loss": 1.1200363636016846, "rewards/accuracies": 0.875, "rewards/chosen": -0.008445140905678272, "rewards/margins": 0.09530268609523773, "rewards/rejected": -0.10374782979488373, "step": 2024 }, { "epoch": 1.400414937759336, "grad_norm": 3.4464073181152344, "learning_rate": 4.777547256800369e-05, "log_odds_chosen": 5.708957195281982, "log_odds_ratio": -0.05988878756761551, "logits/chosen": -0.6721937656402588, "logits/rejected": -0.635280966758728, "logps/chosen": -0.048227760940790176, "logps/rejected": -1.0062072277069092, "loss": 3.2867, "nll_loss": 0.815693199634552, "rewards/accuracies": 1.0, "rewards/chosen": -0.004822776187211275, "rewards/margins": 0.09579794853925705, "rewards/rejected": -0.10062072426080704, "step": 2025 }, { "epoch": 1.4011065006915628, "grad_norm": 5.286276817321777, "learning_rate": 4.7771630551713545e-05, "log_odds_chosen": 5.2256927490234375, "log_odds_ratio": -0.1297086924314499, "logits/chosen": -0.5953899621963501, "logits/rejected": -0.6047387719154358, "logps/chosen": -0.05348531901836395, "logps/rejected": -1.1254825592041016, "loss": 2.8011, "nll_loss": 0.6873102188110352, "rewards/accuracies": 0.875, "rewards/chosen": -0.005348531529307365, "rewards/margins": 0.10719972103834152, "rewards/rejected": -0.11254825443029404, "step": 2026 }, { "epoch": 1.4017980636237897, "grad_norm": 10.278658866882324, "learning_rate": 4.776778853542339e-05, "log_odds_chosen": 4.066064834594727, "log_odds_ratio": -0.500348687171936, "logits/chosen": -0.755004346370697, "logits/rejected": -0.8277742266654968, "logps/chosen": -0.06693331897258759, "logps/rejected": -1.083938717842102, "loss": 3.9488, "nll_loss": 0.9371687769889832, "rewards/accuracies": 0.875, "rewards/chosen": -0.006693332456052303, "rewards/margins": 0.10170053690671921, "rewards/rejected": -0.10839387029409409, "step": 2027 }, { "epoch": 1.4024896265560165, "grad_norm": 6.364788055419922, "learning_rate": 4.7763946519133243e-05, "log_odds_chosen": 3.97965669631958, "log_odds_ratio": -0.28485307097435, "logits/chosen": -0.5287357568740845, "logits/rejected": -0.5590074062347412, "logps/chosen": -0.04807409271597862, "logps/rejected": -0.5102754831314087, "loss": 3.0035, "nll_loss": 0.7223997712135315, "rewards/accuracies": 0.75, "rewards/chosen": -0.00480740936473012, "rewards/margins": 0.046220146119594574, "rewards/rejected": -0.05102755129337311, "step": 2028 }, { "epoch": 1.4031811894882433, "grad_norm": 5.334066390991211, "learning_rate": 4.7760104502843096e-05, "log_odds_chosen": 3.6514530181884766, "log_odds_ratio": -0.18523135781288147, "logits/chosen": -0.31928396224975586, "logits/rejected": -0.31118130683898926, "logps/chosen": -0.047676000744104385, "logps/rejected": -0.6466583609580994, "loss": 2.2694, "nll_loss": 0.5488160848617554, "rewards/accuracies": 1.0, "rewards/chosen": -0.004767600446939468, "rewards/margins": 0.05989823490381241, "rewards/rejected": -0.06466583907604218, "step": 2029 }, { "epoch": 1.4038727524204702, "grad_norm": 11.846880912780762, "learning_rate": 4.775626248655294e-05, "log_odds_chosen": 2.908698081970215, "log_odds_ratio": -0.4558899700641632, "logits/chosen": -0.41499459743499756, "logits/rejected": -0.4122236371040344, "logps/chosen": -0.10598790645599365, "logps/rejected": -0.6007672548294067, "loss": 3.3705, "nll_loss": 0.7970328330993652, "rewards/accuracies": 0.75, "rewards/chosen": -0.01059879083186388, "rewards/margins": 0.049477942287921906, "rewards/rejected": -0.06007672846317291, "step": 2030 }, { "epoch": 1.404564315352697, "grad_norm": 10.183661460876465, "learning_rate": 4.77524204702628e-05, "log_odds_chosen": 3.7660956382751465, "log_odds_ratio": -0.3516588807106018, "logits/chosen": -0.11453324556350708, "logits/rejected": -0.17370480298995972, "logps/chosen": -0.1562454104423523, "logps/rejected": -0.9619159698486328, "loss": 4.5479, "nll_loss": 1.1018214225769043, "rewards/accuracies": 0.875, "rewards/chosen": -0.01562454178929329, "rewards/margins": 0.08056706190109253, "rewards/rejected": -0.09619159996509552, "step": 2031 }, { "epoch": 1.4052558782849238, "grad_norm": 4.392728805541992, "learning_rate": 4.7748578453972646e-05, "log_odds_chosen": 6.245469570159912, "log_odds_ratio": -0.18948374688625336, "logits/chosen": -0.18185730278491974, "logits/rejected": -0.19521722197532654, "logps/chosen": -0.038732998073101044, "logps/rejected": -1.112351894378662, "loss": 3.3824, "nll_loss": 0.8266395926475525, "rewards/accuracies": 0.875, "rewards/chosen": -0.0038732998073101044, "rewards/margins": 0.10736190527677536, "rewards/rejected": -0.11123519390821457, "step": 2032 }, { "epoch": 1.4059474412171507, "grad_norm": 8.62684440612793, "learning_rate": 4.77447364376825e-05, "log_odds_chosen": 3.7161660194396973, "log_odds_ratio": -0.49389269948005676, "logits/chosen": -0.5872402191162109, "logits/rejected": -0.5600318312644958, "logps/chosen": -0.07983855903148651, "logps/rejected": -0.6433324813842773, "loss": 3.2825, "nll_loss": 0.7712404131889343, "rewards/accuracies": 0.875, "rewards/chosen": -0.007983855903148651, "rewards/margins": 0.056349389255046844, "rewards/rejected": -0.0643332451581955, "step": 2033 }, { "epoch": 1.4066390041493775, "grad_norm": 4.754065990447998, "learning_rate": 4.774089442139235e-05, "log_odds_chosen": 4.764361381530762, "log_odds_ratio": -0.05447795242071152, "logits/chosen": -0.47819578647613525, "logits/rejected": -0.4906609058380127, "logps/chosen": -0.09325191378593445, "logps/rejected": -1.2406823635101318, "loss": 3.6243, "nll_loss": 0.9006204009056091, "rewards/accuracies": 1.0, "rewards/chosen": -0.009325191378593445, "rewards/margins": 0.11474305391311646, "rewards/rejected": -0.1240682452917099, "step": 2034 }, { "epoch": 1.4073305670816043, "grad_norm": 8.18529224395752, "learning_rate": 4.7737052405102204e-05, "log_odds_chosen": 4.307243824005127, "log_odds_ratio": -0.12494970858097076, "logits/chosen": -0.3749274015426636, "logits/rejected": -0.4090096056461334, "logps/chosen": -0.03940832242369652, "logps/rejected": -0.7432606220245361, "loss": 2.9987, "nll_loss": 0.7371869087219238, "rewards/accuracies": 1.0, "rewards/chosen": -0.0039408328011631966, "rewards/margins": 0.07038523256778717, "rewards/rejected": -0.0743260532617569, "step": 2035 }, { "epoch": 1.4080221300138311, "grad_norm": 4.085348606109619, "learning_rate": 4.773321038881205e-05, "log_odds_chosen": 6.709726810455322, "log_odds_ratio": -0.08220961689949036, "logits/chosen": -0.4231798052787781, "logits/rejected": -0.488666296005249, "logps/chosen": -0.029946403577923775, "logps/rejected": -1.1328915357589722, "loss": 2.6045, "nll_loss": 0.6429080367088318, "rewards/accuracies": 1.0, "rewards/chosen": -0.0029946400318294764, "rewards/margins": 0.11029452085494995, "rewards/rejected": -0.11328916251659393, "step": 2036 }, { "epoch": 1.408713692946058, "grad_norm": 3.2187740802764893, "learning_rate": 4.77293683725219e-05, "log_odds_chosen": 6.3264288902282715, "log_odds_ratio": -0.04832165315747261, "logits/chosen": -0.22015348076820374, "logits/rejected": -0.24953240156173706, "logps/chosen": -0.013802926056087017, "logps/rejected": -0.8438926935195923, "loss": 2.8291, "nll_loss": 0.7024345993995667, "rewards/accuracies": 1.0, "rewards/chosen": -0.0013802925823256373, "rewards/margins": 0.08300897479057312, "rewards/rejected": -0.08438926935195923, "step": 2037 }, { "epoch": 1.4094052558782848, "grad_norm": 4.500889301300049, "learning_rate": 4.7725526356231754e-05, "log_odds_chosen": 6.405463218688965, "log_odds_ratio": -0.009915841743350029, "logits/chosen": -0.5794069766998291, "logits/rejected": -0.6441061496734619, "logps/chosen": -0.014284246601164341, "logps/rejected": -1.012584924697876, "loss": 3.3985, "nll_loss": 0.8486447334289551, "rewards/accuracies": 1.0, "rewards/chosen": -0.0014284246135503054, "rewards/margins": 0.09983007609844208, "rewards/rejected": -0.10125849395990372, "step": 2038 }, { "epoch": 1.4100968188105116, "grad_norm": 4.588141918182373, "learning_rate": 4.77216843399416e-05, "log_odds_chosen": 4.53325891494751, "log_odds_ratio": -0.21827146410942078, "logits/chosen": -0.5001474022865295, "logits/rejected": -0.5334861874580383, "logps/chosen": -0.1058742105960846, "logps/rejected": -0.67145836353302, "loss": 2.818, "nll_loss": 0.6826778054237366, "rewards/accuracies": 0.875, "rewards/chosen": -0.010587421245872974, "rewards/margins": 0.05655841529369354, "rewards/rejected": -0.06714583933353424, "step": 2039 }, { "epoch": 1.4107883817427385, "grad_norm": 9.857651710510254, "learning_rate": 4.771784232365146e-05, "log_odds_chosen": 3.348238945007324, "log_odds_ratio": -0.43746525049209595, "logits/chosen": -0.4963114857673645, "logits/rejected": -0.4507068395614624, "logps/chosen": -0.060887180268764496, "logps/rejected": -0.6050597429275513, "loss": 3.2402, "nll_loss": 0.7663080096244812, "rewards/accuracies": 0.75, "rewards/chosen": -0.006088717840611935, "rewards/margins": 0.05441725254058838, "rewards/rejected": -0.06050597131252289, "step": 2040 }, { "epoch": 1.4114799446749653, "grad_norm": 4.385861396789551, "learning_rate": 4.7714000307361305e-05, "log_odds_chosen": 4.662498474121094, "log_odds_ratio": -0.209502175450325, "logits/chosen": -0.4768851399421692, "logits/rejected": -0.5662028789520264, "logps/chosen": -0.054230280220508575, "logps/rejected": -0.6234518885612488, "loss": 2.458, "nll_loss": 0.5935608744621277, "rewards/accuracies": 0.875, "rewards/chosen": -0.0054230280220508575, "rewards/margins": 0.05692216008901596, "rewards/rejected": -0.06234519183635712, "step": 2041 }, { "epoch": 1.4121715076071921, "grad_norm": 6.921299934387207, "learning_rate": 4.771015829107116e-05, "log_odds_chosen": 4.092554092407227, "log_odds_ratio": -0.21367663145065308, "logits/chosen": -0.1800042986869812, "logits/rejected": -0.22171702980995178, "logps/chosen": -0.0716947615146637, "logps/rejected": -0.443551242351532, "loss": 2.6438, "nll_loss": 0.639578104019165, "rewards/accuracies": 0.875, "rewards/chosen": -0.007169476244598627, "rewards/margins": 0.037185654044151306, "rewards/rejected": -0.0443551279604435, "step": 2042 }, { "epoch": 1.412863070539419, "grad_norm": 6.809596538543701, "learning_rate": 4.770631627478101e-05, "log_odds_chosen": 4.3092546463012695, "log_odds_ratio": -0.18392224609851837, "logits/chosen": -0.8782827258110046, "logits/rejected": -0.8806307911872864, "logps/chosen": -0.0561148002743721, "logps/rejected": -0.8389385938644409, "loss": 3.899, "nll_loss": 0.9563499093055725, "rewards/accuracies": 0.875, "rewards/chosen": -0.005611480679363012, "rewards/margins": 0.07828238606452942, "rewards/rejected": -0.08389386534690857, "step": 2043 }, { "epoch": 1.4135546334716458, "grad_norm": 5.72255277633667, "learning_rate": 4.770247425849086e-05, "log_odds_chosen": 3.307846784591675, "log_odds_ratio": -0.20712444186210632, "logits/chosen": -0.3684505224227905, "logits/rejected": -0.3365062475204468, "logps/chosen": -0.08130621910095215, "logps/rejected": -0.6315487623214722, "loss": 3.3224, "nll_loss": 0.8098927736282349, "rewards/accuracies": 0.875, "rewards/chosen": -0.00813062209635973, "rewards/margins": 0.05502425879240036, "rewards/rejected": -0.06315487623214722, "step": 2044 }, { "epoch": 1.4142461964038728, "grad_norm": 4.594570636749268, "learning_rate": 4.769863224220071e-05, "log_odds_chosen": 4.4372711181640625, "log_odds_ratio": -0.09585855901241302, "logits/chosen": -0.2599387764930725, "logits/rejected": -0.2439916431903839, "logps/chosen": -0.04680861905217171, "logps/rejected": -0.9597396850585938, "loss": 3.1439, "nll_loss": 0.7763959765434265, "rewards/accuracies": 1.0, "rewards/chosen": -0.004680861718952656, "rewards/margins": 0.091293103992939, "rewards/rejected": -0.09597396850585938, "step": 2045 }, { "epoch": 1.4149377593360997, "grad_norm": 9.47522258758545, "learning_rate": 4.769479022591056e-05, "log_odds_chosen": 4.923094749450684, "log_odds_ratio": -0.6394574046134949, "logits/chosen": -0.4518549144268036, "logits/rejected": -0.48012202978134155, "logps/chosen": -0.23291736841201782, "logps/rejected": -1.0669751167297363, "loss": 3.2946, "nll_loss": 0.7597114443778992, "rewards/accuracies": 0.75, "rewards/chosen": -0.023291736841201782, "rewards/margins": 0.08340578526258469, "rewards/rejected": -0.10669751465320587, "step": 2046 }, { "epoch": 1.4156293222683265, "grad_norm": 6.063464641571045, "learning_rate": 4.769094820962041e-05, "log_odds_chosen": 3.17647385597229, "log_odds_ratio": -0.532335638999939, "logits/chosen": -0.29802849888801575, "logits/rejected": -0.2881256937980652, "logps/chosen": -0.17807450890541077, "logps/rejected": -0.4909724295139313, "loss": 3.0705, "nll_loss": 0.714398205280304, "rewards/accuracies": 0.75, "rewards/chosen": -0.017807450145483017, "rewards/margins": 0.03128979355096817, "rewards/rejected": -0.04909724369645119, "step": 2047 }, { "epoch": 1.4163208852005533, "grad_norm": 11.671127319335938, "learning_rate": 4.768710619333026e-05, "log_odds_chosen": 5.549150466918945, "log_odds_ratio": -0.19529500603675842, "logits/chosen": -0.340154230594635, "logits/rejected": -0.4136425852775574, "logps/chosen": -0.08357815444469452, "logps/rejected": -1.157057523727417, "loss": 3.2631, "nll_loss": 0.7962380647659302, "rewards/accuracies": 0.875, "rewards/chosen": -0.008357815444469452, "rewards/margins": 0.10734793543815613, "rewards/rejected": -0.11570574343204498, "step": 2048 }, { "epoch": 1.4170124481327802, "grad_norm": 6.643354415893555, "learning_rate": 4.768326417704012e-05, "log_odds_chosen": 5.000125408172607, "log_odds_ratio": -0.1833888292312622, "logits/chosen": -0.34129559993743896, "logits/rejected": -0.3918303847312927, "logps/chosen": -0.05015400052070618, "logps/rejected": -0.8530675172805786, "loss": 2.9937, "nll_loss": 0.7300856709480286, "rewards/accuracies": 0.875, "rewards/chosen": -0.005015400238335133, "rewards/margins": 0.08029135316610336, "rewards/rejected": -0.08530674874782562, "step": 2049 }, { "epoch": 1.417704011065007, "grad_norm": 4.964437961578369, "learning_rate": 4.767942216074996e-05, "log_odds_chosen": 6.179078102111816, "log_odds_ratio": -0.30751490592956543, "logits/chosen": -0.5061019062995911, "logits/rejected": -0.4916246831417084, "logps/chosen": -0.0694524496793747, "logps/rejected": -0.9263420104980469, "loss": 2.6063, "nll_loss": 0.6208357810974121, "rewards/accuracies": 0.875, "rewards/chosen": -0.006945244502276182, "rewards/margins": 0.08568896353244781, "rewards/rejected": -0.09263420104980469, "step": 2050 }, { "epoch": 1.4183955739972338, "grad_norm": 6.969486713409424, "learning_rate": 4.7675580144459816e-05, "log_odds_chosen": 4.743740558624268, "log_odds_ratio": -0.24729038774967194, "logits/chosen": -0.7440781593322754, "logits/rejected": -0.7566511631011963, "logps/chosen": -0.11772959679365158, "logps/rejected": -0.8475928902626038, "loss": 3.8343, "nll_loss": 0.9338378310203552, "rewards/accuracies": 0.75, "rewards/chosen": -0.011772960424423218, "rewards/margins": 0.07298632711172104, "rewards/rejected": -0.08475928753614426, "step": 2051 }, { "epoch": 1.4190871369294606, "grad_norm": 7.391602039337158, "learning_rate": 4.767173812816967e-05, "log_odds_chosen": 3.4940693378448486, "log_odds_ratio": -0.1731761395931244, "logits/chosen": -0.5935790538787842, "logits/rejected": -0.6371148228645325, "logps/chosen": -0.09643512964248657, "logps/rejected": -0.6096627116203308, "loss": 4.3386, "nll_loss": 1.067338228225708, "rewards/accuracies": 1.0, "rewards/chosen": -0.009643513709306717, "rewards/margins": 0.051322758197784424, "rewards/rejected": -0.06096626818180084, "step": 2052 }, { "epoch": 1.4197786998616875, "grad_norm": 6.038008689880371, "learning_rate": 4.766789611187952e-05, "log_odds_chosen": 5.126229763031006, "log_odds_ratio": -0.046565812081098557, "logits/chosen": -0.2556982934474945, "logits/rejected": -0.2397790253162384, "logps/chosen": -0.08712028712034225, "logps/rejected": -1.0902677774429321, "loss": 2.8909, "nll_loss": 0.7180594205856323, "rewards/accuracies": 1.0, "rewards/chosen": -0.008712029084563255, "rewards/margins": 0.10031475126743317, "rewards/rejected": -0.10902677476406097, "step": 2053 }, { "epoch": 1.4204702627939143, "grad_norm": 10.155299186706543, "learning_rate": 4.7664054095589366e-05, "log_odds_chosen": 1.5348985195159912, "log_odds_ratio": -1.1214851140975952, "logits/chosen": -0.3333180546760559, "logits/rejected": -0.31991809606552124, "logps/chosen": -0.21411769092082977, "logps/rejected": -0.6242636442184448, "loss": 3.7383, "nll_loss": 0.8224247694015503, "rewards/accuracies": 0.75, "rewards/chosen": -0.021411770954728127, "rewards/margins": 0.041014596819877625, "rewards/rejected": -0.0624263659119606, "step": 2054 }, { "epoch": 1.4211618257261411, "grad_norm": 9.23679256439209, "learning_rate": 4.766021207929922e-05, "log_odds_chosen": 5.134243965148926, "log_odds_ratio": -0.06426586210727692, "logits/chosen": -0.14200043678283691, "logits/rejected": -0.2121468186378479, "logps/chosen": -0.0550372339785099, "logps/rejected": -1.017485499382019, "loss": 3.8874, "nll_loss": 0.9654339551925659, "rewards/accuracies": 1.0, "rewards/chosen": -0.00550372339785099, "rewards/margins": 0.09624484181404114, "rewards/rejected": -0.10174856334924698, "step": 2055 }, { "epoch": 1.421853388658368, "grad_norm": 5.483739376068115, "learning_rate": 4.765637006300907e-05, "log_odds_chosen": 3.1780428886413574, "log_odds_ratio": -0.33298707008361816, "logits/chosen": -0.5277194976806641, "logits/rejected": -0.5595696568489075, "logps/chosen": -0.08533762395381927, "logps/rejected": -0.6815667152404785, "loss": 3.5452, "nll_loss": 0.8530020713806152, "rewards/accuracies": 0.875, "rewards/chosen": -0.008533762767910957, "rewards/margins": 0.05962291359901428, "rewards/rejected": -0.06815667450428009, "step": 2056 }, { "epoch": 1.4225449515905948, "grad_norm": 4.766363143920898, "learning_rate": 4.765252804671892e-05, "log_odds_chosen": 4.3871612548828125, "log_odds_ratio": -0.11903732270002365, "logits/chosen": -0.3458870053291321, "logits/rejected": -0.33200258016586304, "logps/chosen": -0.06314408779144287, "logps/rejected": -0.5942891836166382, "loss": 2.9096, "nll_loss": 0.7155048251152039, "rewards/accuracies": 1.0, "rewards/chosen": -0.006314409431070089, "rewards/margins": 0.05311451479792595, "rewards/rejected": -0.059428922832012177, "step": 2057 }, { "epoch": 1.4232365145228216, "grad_norm": 7.043921947479248, "learning_rate": 4.764868603042877e-05, "log_odds_chosen": 2.770045280456543, "log_odds_ratio": -0.3470918536186218, "logits/chosen": -0.5503393411636353, "logits/rejected": -0.5623016357421875, "logps/chosen": -0.10145130008459091, "logps/rejected": -0.6719057559967041, "loss": 3.0892, "nll_loss": 0.7375876903533936, "rewards/accuracies": 0.875, "rewards/chosen": -0.010145129635930061, "rewards/margins": 0.05704544484615326, "rewards/rejected": -0.06719058007001877, "step": 2058 }, { "epoch": 1.4239280774550485, "grad_norm": 9.261284828186035, "learning_rate": 4.764484401413862e-05, "log_odds_chosen": 3.0731759071350098, "log_odds_ratio": -0.43416914343833923, "logits/chosen": -0.798588752746582, "logits/rejected": -0.853974461555481, "logps/chosen": -0.07625366747379303, "logps/rejected": -0.7762160897254944, "loss": 2.7815, "nll_loss": 0.6519516706466675, "rewards/accuracies": 0.75, "rewards/chosen": -0.007625367492437363, "rewards/margins": 0.06999623775482178, "rewards/rejected": -0.07762160897254944, "step": 2059 }, { "epoch": 1.4246196403872753, "grad_norm": 6.812342166900635, "learning_rate": 4.7641001997848474e-05, "log_odds_chosen": 5.612292289733887, "log_odds_ratio": -0.177658811211586, "logits/chosen": -0.6069943308830261, "logits/rejected": -0.5836856365203857, "logps/chosen": -0.055021896958351135, "logps/rejected": -0.8681304454803467, "loss": 3.2148, "nll_loss": 0.7859339714050293, "rewards/accuracies": 0.875, "rewards/chosen": -0.005502189975231886, "rewards/margins": 0.08131085336208344, "rewards/rejected": -0.0868130475282669, "step": 2060 }, { "epoch": 1.4253112033195021, "grad_norm": 6.473294258117676, "learning_rate": 4.763715998155832e-05, "log_odds_chosen": 4.578250408172607, "log_odds_ratio": -0.38147497177124023, "logits/chosen": -0.39818042516708374, "logits/rejected": -0.4307783246040344, "logps/chosen": -0.02809958904981613, "logps/rejected": -0.9484551548957825, "loss": 2.2748, "nll_loss": 0.5305440425872803, "rewards/accuracies": 0.875, "rewards/chosen": -0.0028099589981138706, "rewards/margins": 0.09203556180000305, "rewards/rejected": -0.09484551846981049, "step": 2061 }, { "epoch": 1.426002766251729, "grad_norm": 4.594994068145752, "learning_rate": 4.763331796526818e-05, "log_odds_chosen": 4.217331886291504, "log_odds_ratio": -0.08356684446334839, "logits/chosen": -0.5503053665161133, "logits/rejected": -0.5877286195755005, "logps/chosen": -0.028734585270285606, "logps/rejected": -0.7267761826515198, "loss": 3.5088, "nll_loss": 0.8688546419143677, "rewards/accuracies": 1.0, "rewards/chosen": -0.0028734588995575905, "rewards/margins": 0.06980416178703308, "rewards/rejected": -0.0726776197552681, "step": 2062 }, { "epoch": 1.4266943291839558, "grad_norm": 3.986945867538452, "learning_rate": 4.7629475948978025e-05, "log_odds_chosen": 4.139495372772217, "log_odds_ratio": -0.15999802947044373, "logits/chosen": -0.6195761561393738, "logits/rejected": -0.668656051158905, "logps/chosen": -0.06690604239702225, "logps/rejected": -0.8718291521072388, "loss": 2.6388, "nll_loss": 0.6437124013900757, "rewards/accuracies": 0.875, "rewards/chosen": -0.006690604612231255, "rewards/margins": 0.08049231767654419, "rewards/rejected": -0.08718291670084, "step": 2063 }, { "epoch": 1.4273858921161826, "grad_norm": 11.108855247497559, "learning_rate": 4.762563393268788e-05, "log_odds_chosen": 2.0826430320739746, "log_odds_ratio": -1.056549310684204, "logits/chosen": -0.7052680253982544, "logits/rejected": -0.6390185356140137, "logps/chosen": -0.22649815678596497, "logps/rejected": -0.6641699075698853, "loss": 3.9387, "nll_loss": 0.8790209293365479, "rewards/accuracies": 0.625, "rewards/chosen": -0.022649817168712616, "rewards/margins": 0.04376717284321785, "rewards/rejected": -0.06641699373722076, "step": 2064 }, { "epoch": 1.4280774550484094, "grad_norm": 7.645991325378418, "learning_rate": 4.762179191639773e-05, "log_odds_chosen": 5.085951328277588, "log_odds_ratio": -0.10783078521490097, "logits/chosen": -0.549422562122345, "logits/rejected": -0.5912079811096191, "logps/chosen": -0.10167013108730316, "logps/rejected": -0.9657047390937805, "loss": 3.9768, "nll_loss": 0.9834229946136475, "rewards/accuracies": 1.0, "rewards/chosen": -0.010167012922465801, "rewards/margins": 0.08640345931053162, "rewards/rejected": -0.09657047688961029, "step": 2065 }, { "epoch": 1.4287690179806363, "grad_norm": 5.260788440704346, "learning_rate": 4.7617949900107575e-05, "log_odds_chosen": 4.431698322296143, "log_odds_ratio": -0.13982881605625153, "logits/chosen": -0.816025972366333, "logits/rejected": -0.8234196305274963, "logps/chosen": -0.05856183171272278, "logps/rejected": -0.8903920650482178, "loss": 2.9261, "nll_loss": 0.7175315618515015, "rewards/accuracies": 0.875, "rewards/chosen": -0.005856183357536793, "rewards/margins": 0.08318303525447845, "rewards/rejected": -0.08903920650482178, "step": 2066 }, { "epoch": 1.429460580912863, "grad_norm": 6.308394908905029, "learning_rate": 4.761410788381743e-05, "log_odds_chosen": 3.1791226863861084, "log_odds_ratio": -0.13766688108444214, "logits/chosen": -0.8693885207176208, "logits/rejected": -0.896051287651062, "logps/chosen": -0.0903547927737236, "logps/rejected": -0.5084475874900818, "loss": 3.1956, "nll_loss": 0.7851389050483704, "rewards/accuracies": 1.0, "rewards/chosen": -0.00903547927737236, "rewards/margins": 0.04180927574634552, "rewards/rejected": -0.05084475874900818, "step": 2067 }, { "epoch": 1.43015214384509, "grad_norm": 11.030671119689941, "learning_rate": 4.761026586752728e-05, "log_odds_chosen": 4.220271110534668, "log_odds_ratio": -0.23262549936771393, "logits/chosen": -0.6884947419166565, "logits/rejected": -0.7322020530700684, "logps/chosen": -0.09444166719913483, "logps/rejected": -0.5991060733795166, "loss": 3.6798, "nll_loss": 0.8966806530952454, "rewards/accuracies": 0.875, "rewards/chosen": -0.009444165974855423, "rewards/margins": 0.050466448068618774, "rewards/rejected": -0.0599106140434742, "step": 2068 }, { "epoch": 1.4308437067773168, "grad_norm": 9.617368698120117, "learning_rate": 4.760642385123713e-05, "log_odds_chosen": 3.991337299346924, "log_odds_ratio": -0.21459956467151642, "logits/chosen": -0.7082586288452148, "logits/rejected": -0.7063589096069336, "logps/chosen": -0.10309841483831406, "logps/rejected": -1.088301420211792, "loss": 3.6348, "nll_loss": 0.887231171131134, "rewards/accuracies": 0.875, "rewards/chosen": -0.010309841483831406, "rewards/margins": 0.09852030128240585, "rewards/rejected": -0.10883013904094696, "step": 2069 }, { "epoch": 1.4315352697095436, "grad_norm": 5.760544300079346, "learning_rate": 4.760258183494698e-05, "log_odds_chosen": 4.451226234436035, "log_odds_ratio": -0.08724942803382874, "logits/chosen": -0.6014732122421265, "logits/rejected": -0.6634864807128906, "logps/chosen": -0.05170953646302223, "logps/rejected": -0.8413094878196716, "loss": 2.9167, "nll_loss": 0.7204617261886597, "rewards/accuracies": 1.0, "rewards/chosen": -0.005170953925698996, "rewards/margins": 0.07895999401807785, "rewards/rejected": -0.08413094282150269, "step": 2070 }, { "epoch": 1.4322268326417704, "grad_norm": 5.429257869720459, "learning_rate": 4.759873981865684e-05, "log_odds_chosen": 6.547586441040039, "log_odds_ratio": -0.009889435023069382, "logits/chosen": -0.38951486349105835, "logits/rejected": -0.4980710744857788, "logps/chosen": -0.008761152625083923, "logps/rejected": -1.0059410333633423, "loss": 2.8611, "nll_loss": 0.7142948508262634, "rewards/accuracies": 1.0, "rewards/chosen": -0.0008761152857914567, "rewards/margins": 0.09971798956394196, "rewards/rejected": -0.10059410333633423, "step": 2071 }, { "epoch": 1.4329183955739973, "grad_norm": 5.300156593322754, "learning_rate": 4.759489780236668e-05, "log_odds_chosen": 4.442890167236328, "log_odds_ratio": -0.10472977161407471, "logits/chosen": -0.5378776788711548, "logits/rejected": -0.5973137617111206, "logps/chosen": -0.02917708456516266, "logps/rejected": -0.5998899936676025, "loss": 2.832, "nll_loss": 0.6975303292274475, "rewards/accuracies": 1.0, "rewards/chosen": -0.002917708596214652, "rewards/margins": 0.057071294635534286, "rewards/rejected": -0.05998900532722473, "step": 2072 }, { "epoch": 1.433609958506224, "grad_norm": 8.273154258728027, "learning_rate": 4.7591055786076536e-05, "log_odds_chosen": 2.775758743286133, "log_odds_ratio": -0.4968562722206116, "logits/chosen": -0.5443849563598633, "logits/rejected": -0.5393614172935486, "logps/chosen": -0.08266573399305344, "logps/rejected": -0.4756109416484833, "loss": 3.1284, "nll_loss": 0.732426643371582, "rewards/accuracies": 0.75, "rewards/chosen": -0.008266573771834373, "rewards/margins": 0.0392945222556591, "rewards/rejected": -0.04756109416484833, "step": 2073 }, { "epoch": 1.434301521438451, "grad_norm": 5.922261714935303, "learning_rate": 4.758721376978639e-05, "log_odds_chosen": 5.002694129943848, "log_odds_ratio": -0.12283527851104736, "logits/chosen": -0.6495686769485474, "logits/rejected": -0.6880848407745361, "logps/chosen": -0.036076322197914124, "logps/rejected": -0.7622798681259155, "loss": 3.6584, "nll_loss": 0.9023165702819824, "rewards/accuracies": 1.0, "rewards/chosen": -0.0036076316609978676, "rewards/margins": 0.07262035459280014, "rewards/rejected": -0.07622798532247543, "step": 2074 }, { "epoch": 1.4349930843706777, "grad_norm": 8.912012100219727, "learning_rate": 4.7583371753496234e-05, "log_odds_chosen": 3.8500843048095703, "log_odds_ratio": -0.18038401007652283, "logits/chosen": -0.7040153741836548, "logits/rejected": -0.696866512298584, "logps/chosen": -0.08037686347961426, "logps/rejected": -0.6211951971054077, "loss": 3.3774, "nll_loss": 0.8263095617294312, "rewards/accuracies": 0.875, "rewards/chosen": -0.008037686347961426, "rewards/margins": 0.05408182740211487, "rewards/rejected": -0.062119513750076294, "step": 2075 }, { "epoch": 1.4356846473029046, "grad_norm": 5.237964630126953, "learning_rate": 4.7579529737206086e-05, "log_odds_chosen": 4.842121124267578, "log_odds_ratio": -0.18194277584552765, "logits/chosen": -0.3374127149581909, "logits/rejected": -0.2697219252586365, "logps/chosen": -0.07888604700565338, "logps/rejected": -0.5460504293441772, "loss": 3.0352, "nll_loss": 0.7406092286109924, "rewards/accuracies": 0.875, "rewards/chosen": -0.007888603955507278, "rewards/margins": 0.04671643674373627, "rewards/rejected": -0.054605040699243546, "step": 2076 }, { "epoch": 1.4363762102351314, "grad_norm": 4.641595363616943, "learning_rate": 4.757568772091594e-05, "log_odds_chosen": 4.9412760734558105, "log_odds_ratio": -0.014735918492078781, "logits/chosen": -0.6826890707015991, "logits/rejected": -0.7102535367012024, "logps/chosen": -0.00749985920265317, "logps/rejected": -0.7612767219543457, "loss": 2.7544, "nll_loss": 0.6871151924133301, "rewards/accuracies": 1.0, "rewards/chosen": -0.0007499859784729779, "rewards/margins": 0.0753776878118515, "rewards/rejected": -0.07612767070531845, "step": 2077 }, { "epoch": 1.4370677731673582, "grad_norm": 7.271586894989014, "learning_rate": 4.757184570462579e-05, "log_odds_chosen": 3.560127019882202, "log_odds_ratio": -0.22132518887519836, "logits/chosen": -0.6114071011543274, "logits/rejected": -0.6493667960166931, "logps/chosen": -0.06852074712514877, "logps/rejected": -0.5435910224914551, "loss": 4.1174, "nll_loss": 1.007214069366455, "rewards/accuracies": 0.875, "rewards/chosen": -0.00685207499191165, "rewards/margins": 0.04750702530145645, "rewards/rejected": -0.05435910075902939, "step": 2078 }, { "epoch": 1.437759336099585, "grad_norm": 6.2681965827941895, "learning_rate": 4.756800368833564e-05, "log_odds_chosen": 5.811890602111816, "log_odds_ratio": -0.11716453731060028, "logits/chosen": -0.12001199275255203, "logits/rejected": -0.1396545171737671, "logps/chosen": -0.01839967630803585, "logps/rejected": -0.8142108917236328, "loss": 3.3498, "nll_loss": 0.8257352709770203, "rewards/accuracies": 0.875, "rewards/chosen": -0.0018399673281237483, "rewards/margins": 0.0795811265707016, "rewards/rejected": -0.08142108470201492, "step": 2079 }, { "epoch": 1.438450899031812, "grad_norm": 7.204479694366455, "learning_rate": 4.7564161672045496e-05, "log_odds_chosen": 5.542275905609131, "log_odds_ratio": -0.05477634072303772, "logits/chosen": -0.43915674090385437, "logits/rejected": -0.5252007246017456, "logps/chosen": -0.02469804137945175, "logps/rejected": -0.8203492760658264, "loss": 3.6115, "nll_loss": 0.8973989486694336, "rewards/accuracies": 1.0, "rewards/chosen": -0.002469804137945175, "rewards/margins": 0.0795651227235794, "rewards/rejected": -0.08203493058681488, "step": 2080 }, { "epoch": 1.4391424619640387, "grad_norm": 8.111823081970215, "learning_rate": 4.756031965575534e-05, "log_odds_chosen": 2.5070009231567383, "log_odds_ratio": -0.33566251397132874, "logits/chosen": -0.36609354615211487, "logits/rejected": -0.3842601478099823, "logps/chosen": -0.11886178702116013, "logps/rejected": -0.5776019096374512, "loss": 3.8067, "nll_loss": 0.9181172847747803, "rewards/accuracies": 0.75, "rewards/chosen": -0.011886179447174072, "rewards/margins": 0.045874010771512985, "rewards/rejected": -0.05776019021868706, "step": 2081 }, { "epoch": 1.4398340248962656, "grad_norm": 4.4983978271484375, "learning_rate": 4.7556477639465194e-05, "log_odds_chosen": 3.637857437133789, "log_odds_ratio": -0.1540573537349701, "logits/chosen": -0.6648321151733398, "logits/rejected": -0.6483675241470337, "logps/chosen": -0.06123726814985275, "logps/rejected": -0.7261663675308228, "loss": 2.5024, "nll_loss": 0.6101846098899841, "rewards/accuracies": 1.0, "rewards/chosen": -0.0061237262561917305, "rewards/margins": 0.06649291515350342, "rewards/rejected": -0.07261664420366287, "step": 2082 }, { "epoch": 1.4405255878284924, "grad_norm": 6.488410472869873, "learning_rate": 4.7552635623175047e-05, "log_odds_chosen": 4.784399032592773, "log_odds_ratio": -0.3786088526248932, "logits/chosen": -1.0810126066207886, "logits/rejected": -1.0608916282653809, "logps/chosen": -0.10673705488443375, "logps/rejected": -0.8546772003173828, "loss": 4.396, "nll_loss": 1.0611448287963867, "rewards/accuracies": 0.75, "rewards/chosen": -0.010673705488443375, "rewards/margins": 0.07479400932788849, "rewards/rejected": -0.08546771109104156, "step": 2083 }, { "epoch": 1.4412171507607192, "grad_norm": 6.633011341094971, "learning_rate": 4.754879360688489e-05, "log_odds_chosen": 4.465466499328613, "log_odds_ratio": -0.21018919348716736, "logits/chosen": -0.3606579899787903, "logits/rejected": -0.3915744423866272, "logps/chosen": -0.05982568487524986, "logps/rejected": -0.8488081097602844, "loss": 3.7668, "nll_loss": 0.9206801056861877, "rewards/accuracies": 0.875, "rewards/chosen": -0.005982568487524986, "rewards/margins": 0.07889824360609055, "rewards/rejected": -0.08488080650568008, "step": 2084 }, { "epoch": 1.441908713692946, "grad_norm": 4.620772838592529, "learning_rate": 4.7544951590594745e-05, "log_odds_chosen": 5.470822811126709, "log_odds_ratio": -0.1020824983716011, "logits/chosen": -0.6941245794296265, "logits/rejected": -0.6865053176879883, "logps/chosen": -0.049653299152851105, "logps/rejected": -0.7177039384841919, "loss": 2.4043, "nll_loss": 0.5908569097518921, "rewards/accuracies": 1.0, "rewards/chosen": -0.004965329542756081, "rewards/margins": 0.06680506467819214, "rewards/rejected": -0.07177039235830307, "step": 2085 }, { "epoch": 1.4426002766251729, "grad_norm": 9.680559158325195, "learning_rate": 4.75411095743046e-05, "log_odds_chosen": 1.9712796211242676, "log_odds_ratio": -0.47640758752822876, "logits/chosen": -0.6261113882064819, "logits/rejected": -0.6207519769668579, "logps/chosen": -0.11739076673984528, "logps/rejected": -0.43056824803352356, "loss": 2.9493, "nll_loss": 0.6896948218345642, "rewards/accuracies": 0.625, "rewards/chosen": -0.011739077046513557, "rewards/margins": 0.03131775185465813, "rewards/rejected": -0.043056827038526535, "step": 2086 }, { "epoch": 1.4432918395573997, "grad_norm": 5.5366926193237305, "learning_rate": 4.753726755801445e-05, "log_odds_chosen": 4.977890968322754, "log_odds_ratio": -0.17959055304527283, "logits/chosen": -0.6620508432388306, "logits/rejected": -0.7662349343299866, "logps/chosen": -0.056352924555540085, "logps/rejected": -0.7703478336334229, "loss": 3.3832, "nll_loss": 0.8278340697288513, "rewards/accuracies": 1.0, "rewards/chosen": -0.005635292734950781, "rewards/margins": 0.07139948755502701, "rewards/rejected": -0.07703477144241333, "step": 2087 }, { "epoch": 1.4439834024896265, "grad_norm": 6.524678707122803, "learning_rate": 4.7533425541724295e-05, "log_odds_chosen": 4.2204790115356445, "log_odds_ratio": -0.0700051486492157, "logits/chosen": -0.7024485468864441, "logits/rejected": -0.730099081993103, "logps/chosen": -0.049229852855205536, "logps/rejected": -0.8601389527320862, "loss": 3.5406, "nll_loss": 0.8781381249427795, "rewards/accuracies": 1.0, "rewards/chosen": -0.004922985564917326, "rewards/margins": 0.08109091222286224, "rewards/rejected": -0.08601390570402145, "step": 2088 }, { "epoch": 1.4446749654218534, "grad_norm": 7.487344741821289, "learning_rate": 4.7529583525434154e-05, "log_odds_chosen": 5.620451927185059, "log_odds_ratio": -0.0797644704580307, "logits/chosen": -0.5892861485481262, "logits/rejected": -0.6945428252220154, "logps/chosen": -0.03896079212427139, "logps/rejected": -0.809598445892334, "loss": 3.1271, "nll_loss": 0.7737871408462524, "rewards/accuracies": 1.0, "rewards/chosen": -0.0038960790261626244, "rewards/margins": 0.07706376165151596, "rewards/rejected": -0.08095984905958176, "step": 2089 }, { "epoch": 1.4453665283540802, "grad_norm": 4.206676483154297, "learning_rate": 4.7525741509144e-05, "log_odds_chosen": 4.1212263107299805, "log_odds_ratio": -0.2708800137042999, "logits/chosen": -0.1651289314031601, "logits/rejected": -0.12203869968652725, "logps/chosen": -0.05846680328249931, "logps/rejected": -0.6591073274612427, "loss": 2.7409, "nll_loss": 0.658129096031189, "rewards/accuracies": 0.875, "rewards/chosen": -0.005846680607646704, "rewards/margins": 0.060064058750867844, "rewards/rejected": -0.06591074168682098, "step": 2090 }, { "epoch": 1.446058091286307, "grad_norm": 8.854192733764648, "learning_rate": 4.752189949285385e-05, "log_odds_chosen": 5.342554569244385, "log_odds_ratio": -0.11022274941205978, "logits/chosen": -0.5099636912345886, "logits/rejected": -0.6596127152442932, "logps/chosen": -0.03835906460881233, "logps/rejected": -1.042710781097412, "loss": 3.9023, "nll_loss": 0.9645450711250305, "rewards/accuracies": 0.875, "rewards/chosen": -0.0038359067402780056, "rewards/margins": 0.10043518245220184, "rewards/rejected": -0.10427108407020569, "step": 2091 }, { "epoch": 1.4467496542185339, "grad_norm": 6.701385974884033, "learning_rate": 4.7518057476563705e-05, "log_odds_chosen": 2.987711191177368, "log_odds_ratio": -0.3907737731933594, "logits/chosen": -0.5788431167602539, "logits/rejected": -0.5822548866271973, "logps/chosen": -0.12158074975013733, "logps/rejected": -0.725666880607605, "loss": 3.1725, "nll_loss": 0.7540526390075684, "rewards/accuracies": 0.875, "rewards/chosen": -0.012158075347542763, "rewards/margins": 0.06040861830115318, "rewards/rejected": -0.0725666880607605, "step": 2092 }, { "epoch": 1.4474412171507607, "grad_norm": 8.469315528869629, "learning_rate": 4.751421546027355e-05, "log_odds_chosen": 6.235541343688965, "log_odds_ratio": -0.08917492628097534, "logits/chosen": -0.40043991804122925, "logits/rejected": -0.4467763304710388, "logps/chosen": -0.011788062751293182, "logps/rejected": -1.1630051136016846, "loss": 3.4903, "nll_loss": 0.8636683821678162, "rewards/accuracies": 1.0, "rewards/chosen": -0.0011788064148277044, "rewards/margins": 0.11512169986963272, "rewards/rejected": -0.11630050837993622, "step": 2093 }, { "epoch": 1.4481327800829875, "grad_norm": 5.140117168426514, "learning_rate": 4.75103734439834e-05, "log_odds_chosen": 4.032228469848633, "log_odds_ratio": -0.20453763008117676, "logits/chosen": -0.6595421433448792, "logits/rejected": -0.6425448656082153, "logps/chosen": -0.07961926609277725, "logps/rejected": -0.7643399238586426, "loss": 2.5293, "nll_loss": 0.6118590831756592, "rewards/accuracies": 0.875, "rewards/chosen": -0.007961926981806755, "rewards/margins": 0.06847207248210907, "rewards/rejected": -0.07643399387598038, "step": 2094 }, { "epoch": 1.4488243430152143, "grad_norm": 6.220903396606445, "learning_rate": 4.7506531427693255e-05, "log_odds_chosen": 4.783239364624023, "log_odds_ratio": -0.35463640093803406, "logits/chosen": -0.7399685382843018, "logits/rejected": -0.7145621180534363, "logps/chosen": -0.11588143557310104, "logps/rejected": -1.126645803451538, "loss": 3.1774, "nll_loss": 0.7588942050933838, "rewards/accuracies": 0.75, "rewards/chosen": -0.011588143184781075, "rewards/margins": 0.10107642412185669, "rewards/rejected": -0.11266457289457321, "step": 2095 }, { "epoch": 1.4495159059474412, "grad_norm": 7.440333366394043, "learning_rate": 4.750268941140311e-05, "log_odds_chosen": 4.993106842041016, "log_odds_ratio": -0.2743750512599945, "logits/chosen": -0.7840232253074646, "logits/rejected": -0.7973366975784302, "logps/chosen": -0.13951562345027924, "logps/rejected": -0.9469488859176636, "loss": 4.7916, "nll_loss": 1.1704589128494263, "rewards/accuracies": 0.875, "rewards/chosen": -0.013951562345027924, "rewards/margins": 0.08074332028627396, "rewards/rejected": -0.09469488263130188, "step": 2096 }, { "epoch": 1.450207468879668, "grad_norm": 7.338708400726318, "learning_rate": 4.7498847395112954e-05, "log_odds_chosen": 4.159909248352051, "log_odds_ratio": -0.07225679606199265, "logits/chosen": -0.9430498480796814, "logits/rejected": -0.9889695644378662, "logps/chosen": -0.053130898624658585, "logps/rejected": -0.639094889163971, "loss": 4.3992, "nll_loss": 1.0925672054290771, "rewards/accuracies": 1.0, "rewards/chosen": -0.005313089583069086, "rewards/margins": 0.058596402406692505, "rewards/rejected": -0.06390949338674545, "step": 2097 }, { "epoch": 1.4508990318118948, "grad_norm": 6.4432597160339355, "learning_rate": 4.749500537882281e-05, "log_odds_chosen": 3.2537670135498047, "log_odds_ratio": -0.263638436794281, "logits/chosen": -0.6450674533843994, "logits/rejected": -0.6910425424575806, "logps/chosen": -0.1120055690407753, "logps/rejected": -0.598624587059021, "loss": 2.9496, "nll_loss": 0.7110449075698853, "rewards/accuracies": 0.875, "rewards/chosen": -0.01120055839419365, "rewards/margins": 0.04866189882159233, "rewards/rejected": -0.05986246094107628, "step": 2098 }, { "epoch": 1.4515905947441217, "grad_norm": 7.961607456207275, "learning_rate": 4.749116336253266e-05, "log_odds_chosen": 4.801853656768799, "log_odds_ratio": -0.104258693754673, "logits/chosen": -0.6197474598884583, "logits/rejected": -0.6892693638801575, "logps/chosen": -0.05515953525900841, "logps/rejected": -1.0898054838180542, "loss": 2.591, "nll_loss": 0.637330949306488, "rewards/accuracies": 1.0, "rewards/chosen": -0.005515953525900841, "rewards/margins": 0.10346459597349167, "rewards/rejected": -0.10898055136203766, "step": 2099 }, { "epoch": 1.4522821576763485, "grad_norm": 6.290823936462402, "learning_rate": 4.748732134624251e-05, "log_odds_chosen": 3.377328872680664, "log_odds_ratio": -0.12939488887786865, "logits/chosen": -0.3903263211250305, "logits/rejected": -0.3945927619934082, "logps/chosen": -0.07465854287147522, "logps/rejected": -0.6714155673980713, "loss": 3.2051, "nll_loss": 0.7883424758911133, "rewards/accuracies": 1.0, "rewards/chosen": -0.007465854287147522, "rewards/margins": 0.05967570096254349, "rewards/rejected": -0.06714155524969101, "step": 2100 }, { "epoch": 1.4529737206085753, "grad_norm": 3.876042127609253, "learning_rate": 4.748347932995236e-05, "log_odds_chosen": 6.402103900909424, "log_odds_ratio": -0.11234617233276367, "logits/chosen": -0.3926086723804474, "logits/rejected": -0.4278343915939331, "logps/chosen": -0.04511556401848793, "logps/rejected": -1.008373737335205, "loss": 2.4422, "nll_loss": 0.5993175506591797, "rewards/accuracies": 1.0, "rewards/chosen": -0.004511556122452021, "rewards/margins": 0.0963258147239685, "rewards/rejected": -0.10083737224340439, "step": 2101 }, { "epoch": 1.4536652835408022, "grad_norm": 9.223143577575684, "learning_rate": 4.747963731366221e-05, "log_odds_chosen": 3.694127082824707, "log_odds_ratio": -0.21813002228736877, "logits/chosen": -0.935250997543335, "logits/rejected": -0.9457248449325562, "logps/chosen": -0.053896158933639526, "logps/rejected": -0.49949681758880615, "loss": 3.6024, "nll_loss": 0.878780722618103, "rewards/accuracies": 0.875, "rewards/chosen": -0.00538961635902524, "rewards/margins": 0.044560063630342484, "rewards/rejected": -0.04994967579841614, "step": 2102 }, { "epoch": 1.454356846473029, "grad_norm": 4.475998878479004, "learning_rate": 4.747579529737206e-05, "log_odds_chosen": 4.9824724197387695, "log_odds_ratio": -0.3273460566997528, "logits/chosen": -0.16247451305389404, "logits/rejected": -0.21642492711544037, "logps/chosen": -0.13452918827533722, "logps/rejected": -1.0846654176712036, "loss": 2.8663, "nll_loss": 0.6838335394859314, "rewards/accuracies": 0.75, "rewards/chosen": -0.013452919200062752, "rewards/margins": 0.09501362591981888, "rewards/rejected": -0.10846654325723648, "step": 2103 }, { "epoch": 1.4550484094052558, "grad_norm": 7.579476356506348, "learning_rate": 4.7471953281081914e-05, "log_odds_chosen": 4.738933563232422, "log_odds_ratio": -0.04160004109144211, "logits/chosen": -0.44023242592811584, "logits/rejected": -0.43835222721099854, "logps/chosen": -0.07230572402477264, "logps/rejected": -1.0475863218307495, "loss": 3.3754, "nll_loss": 0.8396894335746765, "rewards/accuracies": 1.0, "rewards/chosen": -0.007230572868138552, "rewards/margins": 0.09752806276082993, "rewards/rejected": -0.10475863516330719, "step": 2104 }, { "epoch": 1.4557399723374826, "grad_norm": 6.231069564819336, "learning_rate": 4.7468111264791766e-05, "log_odds_chosen": 4.9739909172058105, "log_odds_ratio": -0.0823458880186081, "logits/chosen": -0.6578803658485413, "logits/rejected": -0.6871607303619385, "logps/chosen": -0.04676012694835663, "logps/rejected": -0.8974286317825317, "loss": 3.3229, "nll_loss": 0.8224886655807495, "rewards/accuracies": 1.0, "rewards/chosen": -0.004676013253629208, "rewards/margins": 0.08506684750318527, "rewards/rejected": -0.08974287658929825, "step": 2105 }, { "epoch": 1.4564315352697095, "grad_norm": 6.654862403869629, "learning_rate": 4.746426924850161e-05, "log_odds_chosen": 3.6491219997406006, "log_odds_ratio": -0.4037840962409973, "logits/chosen": -0.5042887330055237, "logits/rejected": -0.5388541221618652, "logps/chosen": -0.1399620920419693, "logps/rejected": -0.6792500019073486, "loss": 2.5631, "nll_loss": 0.6003901958465576, "rewards/accuracies": 0.75, "rewards/chosen": -0.013996210880577564, "rewards/margins": 0.05392879247665405, "rewards/rejected": -0.06792499870061874, "step": 2106 }, { "epoch": 1.4571230982019363, "grad_norm": 6.00809907913208, "learning_rate": 4.746042723221147e-05, "log_odds_chosen": 3.6155893802642822, "log_odds_ratio": -0.2792835831642151, "logits/chosen": -0.6958773732185364, "logits/rejected": -0.673318088054657, "logps/chosen": -0.04103608429431915, "logps/rejected": -0.6202775239944458, "loss": 2.6871, "nll_loss": 0.6438344717025757, "rewards/accuracies": 0.875, "rewards/chosen": -0.004103608429431915, "rewards/margins": 0.057924140244722366, "rewards/rejected": -0.06202774867415428, "step": 2107 }, { "epoch": 1.4578146611341631, "grad_norm": 7.503900527954102, "learning_rate": 4.745658521592132e-05, "log_odds_chosen": 5.063538074493408, "log_odds_ratio": -0.482469767332077, "logits/chosen": -0.6590060591697693, "logits/rejected": -0.6995262503623962, "logps/chosen": -0.1371098756790161, "logps/rejected": -0.7465744614601135, "loss": 3.0332, "nll_loss": 0.710058331489563, "rewards/accuracies": 0.875, "rewards/chosen": -0.013710987754166126, "rewards/margins": 0.06094646081328392, "rewards/rejected": -0.07465744763612747, "step": 2108 }, { "epoch": 1.45850622406639, "grad_norm": 4.681285858154297, "learning_rate": 4.745274319963117e-05, "log_odds_chosen": 4.333364963531494, "log_odds_ratio": -0.1369195282459259, "logits/chosen": -0.47746366262435913, "logits/rejected": -0.5346704125404358, "logps/chosen": -0.052240658551454544, "logps/rejected": -0.602260947227478, "loss": 2.7166, "nll_loss": 0.6654695868492126, "rewards/accuracies": 1.0, "rewards/chosen": -0.005224065855145454, "rewards/margins": 0.05500202625989914, "rewards/rejected": -0.060226090252399445, "step": 2109 }, { "epoch": 1.4591977869986168, "grad_norm": 6.215029716491699, "learning_rate": 4.744890118334102e-05, "log_odds_chosen": 4.594152927398682, "log_odds_ratio": -0.2969439625740051, "logits/chosen": -0.4114540219306946, "logits/rejected": -0.4823339581489563, "logps/chosen": -0.11988166719675064, "logps/rejected": -0.5423327684402466, "loss": 2.5059, "nll_loss": 0.5967724323272705, "rewards/accuracies": 0.875, "rewards/chosen": -0.011988166719675064, "rewards/margins": 0.04224511235952377, "rewards/rejected": -0.05423327907919884, "step": 2110 }, { "epoch": 1.4598893499308436, "grad_norm": 11.740045547485352, "learning_rate": 4.744505916705087e-05, "log_odds_chosen": 2.193021774291992, "log_odds_ratio": -0.4456307291984558, "logits/chosen": -0.4481427073478699, "logits/rejected": -0.5567407608032227, "logps/chosen": -0.2550775408744812, "logps/rejected": -0.9214454293251038, "loss": 3.7604, "nll_loss": 0.8955416679382324, "rewards/accuracies": 0.625, "rewards/chosen": -0.02550775557756424, "rewards/margins": 0.06663678586483002, "rewards/rejected": -0.09214454889297485, "step": 2111 }, { "epoch": 1.4605809128630705, "grad_norm": 5.726841926574707, "learning_rate": 4.744121715076072e-05, "log_odds_chosen": 4.480843544006348, "log_odds_ratio": -0.12533259391784668, "logits/chosen": -0.44382768869400024, "logits/rejected": -0.473913311958313, "logps/chosen": -0.04405241459608078, "logps/rejected": -0.5421817898750305, "loss": 3.7368, "nll_loss": 0.9216721057891846, "rewards/accuracies": 1.0, "rewards/chosen": -0.004405241459608078, "rewards/margins": 0.04981293901801109, "rewards/rejected": -0.05421818047761917, "step": 2112 }, { "epoch": 1.4612724757952975, "grad_norm": 6.996542930603027, "learning_rate": 4.743737513447057e-05, "log_odds_chosen": 4.351029396057129, "log_odds_ratio": -0.1016198918223381, "logits/chosen": -0.48450225591659546, "logits/rejected": -0.5053142309188843, "logps/chosen": -0.08305220305919647, "logps/rejected": -1.1227951049804688, "loss": 3.0431, "nll_loss": 0.7506143450737, "rewards/accuracies": 1.0, "rewards/chosen": -0.008305220864713192, "rewards/margins": 0.10397429764270782, "rewards/rejected": -0.11227951943874359, "step": 2113 }, { "epoch": 1.4619640387275243, "grad_norm": 8.82361125946045, "learning_rate": 4.7433533118180425e-05, "log_odds_chosen": 2.8261706829071045, "log_odds_ratio": -0.5516917109489441, "logits/chosen": -0.47138428688049316, "logits/rejected": -0.46295493841171265, "logps/chosen": -0.130302295088768, "logps/rejected": -0.4903410077095032, "loss": 4.3231, "nll_loss": 1.0256012678146362, "rewards/accuracies": 0.625, "rewards/chosen": -0.013030230067670345, "rewards/margins": 0.036003872752189636, "rewards/rejected": -0.049034103751182556, "step": 2114 }, { "epoch": 1.4626556016597512, "grad_norm": 7.945962905883789, "learning_rate": 4.742969110189027e-05, "log_odds_chosen": 3.673635244369507, "log_odds_ratio": -0.10550344735383987, "logits/chosen": -0.6925126314163208, "logits/rejected": -0.6828139424324036, "logps/chosen": -0.03606286272406578, "logps/rejected": -0.6843661665916443, "loss": 3.5708, "nll_loss": 0.8821461200714111, "rewards/accuracies": 1.0, "rewards/chosen": -0.003606286598369479, "rewards/margins": 0.06483032554388046, "rewards/rejected": -0.06843661516904831, "step": 2115 }, { "epoch": 1.463347164591978, "grad_norm": 6.662603378295898, "learning_rate": 4.742584908560013e-05, "log_odds_chosen": 4.126121520996094, "log_odds_ratio": -0.24083402752876282, "logits/chosen": -0.4301590621471405, "logits/rejected": -0.5066604614257812, "logps/chosen": -0.05969817191362381, "logps/rejected": -0.7975174784660339, "loss": 3.5244, "nll_loss": 0.8570234775543213, "rewards/accuracies": 0.875, "rewards/chosen": -0.005969816818833351, "rewards/margins": 0.07378192991018295, "rewards/rejected": -0.07975174486637115, "step": 2116 }, { "epoch": 1.4640387275242048, "grad_norm": 6.692433834075928, "learning_rate": 4.7422007069309975e-05, "log_odds_chosen": 5.185555934906006, "log_odds_ratio": -0.06206965073943138, "logits/chosen": -0.6158180236816406, "logits/rejected": -0.6842352747917175, "logps/chosen": -0.05131285637617111, "logps/rejected": -1.0540692806243896, "loss": 4.124, "nll_loss": 1.0247859954833984, "rewards/accuracies": 1.0, "rewards/chosen": -0.005131285637617111, "rewards/margins": 0.10027563571929932, "rewards/rejected": -0.10540692508220673, "step": 2117 }, { "epoch": 1.4647302904564317, "grad_norm": 7.510965347290039, "learning_rate": 4.741816505301983e-05, "log_odds_chosen": 3.0582268238067627, "log_odds_ratio": -0.28565216064453125, "logits/chosen": -0.2934304177761078, "logits/rejected": -0.29644107818603516, "logps/chosen": -0.054563820362091064, "logps/rejected": -0.5622140169143677, "loss": 2.8653, "nll_loss": 0.6877476572990417, "rewards/accuracies": 0.875, "rewards/chosen": -0.005456382408738136, "rewards/margins": 0.0507650226354599, "rewards/rejected": -0.056221406906843185, "step": 2118 }, { "epoch": 1.4654218533886585, "grad_norm": 9.008056640625, "learning_rate": 4.741432303672968e-05, "log_odds_chosen": 3.7784793376922607, "log_odds_ratio": -0.28400006890296936, "logits/chosen": -0.5542048811912537, "logits/rejected": -0.6849331855773926, "logps/chosen": -0.15926668047904968, "logps/rejected": -0.9513943195343018, "loss": 3.2885, "nll_loss": 0.7937213778495789, "rewards/accuracies": 0.875, "rewards/chosen": -0.015926668420433998, "rewards/margins": 0.07921276241540909, "rewards/rejected": -0.09513943642377853, "step": 2119 }, { "epoch": 1.4661134163208853, "grad_norm": 7.5393500328063965, "learning_rate": 4.7410481020439526e-05, "log_odds_chosen": 2.863239288330078, "log_odds_ratio": -0.4638535976409912, "logits/chosen": -0.30791130661964417, "logits/rejected": -0.32997122406959534, "logps/chosen": -0.11125410348176956, "logps/rejected": -0.7325018644332886, "loss": 3.2332, "nll_loss": 0.7619196176528931, "rewards/accuracies": 0.875, "rewards/chosen": -0.011125410906970501, "rewards/margins": 0.06212478503584862, "rewards/rejected": -0.0732501968741417, "step": 2120 }, { "epoch": 1.4668049792531122, "grad_norm": 4.835433006286621, "learning_rate": 4.740663900414938e-05, "log_odds_chosen": 4.400109767913818, "log_odds_ratio": -0.2901593744754791, "logits/chosen": -0.3349055051803589, "logits/rejected": -0.2996591627597809, "logps/chosen": -0.07037417590618134, "logps/rejected": -0.7848444581031799, "loss": 2.8301, "nll_loss": 0.6785147190093994, "rewards/accuracies": 0.875, "rewards/chosen": -0.007037417963147163, "rewards/margins": 0.07144702970981598, "rewards/rejected": -0.078484445810318, "step": 2121 }, { "epoch": 1.467496542185339, "grad_norm": 6.1850666999816895, "learning_rate": 4.740279698785923e-05, "log_odds_chosen": 4.053633689880371, "log_odds_ratio": -0.5489174723625183, "logits/chosen": -0.5120274424552917, "logits/rejected": -0.5325107574462891, "logps/chosen": -0.09262340515851974, "logps/rejected": -0.5417929887771606, "loss": 2.7099, "nll_loss": 0.6225718855857849, "rewards/accuracies": 0.75, "rewards/chosen": -0.009262342005968094, "rewards/margins": 0.04491695761680603, "rewards/rejected": -0.05417929217219353, "step": 2122 }, { "epoch": 1.4681881051175658, "grad_norm": 4.262668132781982, "learning_rate": 4.739895497156908e-05, "log_odds_chosen": 6.103684425354004, "log_odds_ratio": -0.007512577343732119, "logits/chosen": -0.5071601867675781, "logits/rejected": -0.50539231300354, "logps/chosen": -0.02052057720720768, "logps/rejected": -0.9985079169273376, "loss": 3.4081, "nll_loss": 0.8512747287750244, "rewards/accuracies": 1.0, "rewards/chosen": -0.002052057534456253, "rewards/margins": 0.09779874235391617, "rewards/rejected": -0.09985079616308212, "step": 2123 }, { "epoch": 1.4688796680497926, "grad_norm": 4.665466785430908, "learning_rate": 4.739511295527893e-05, "log_odds_chosen": 4.051023960113525, "log_odds_ratio": -0.09487346559762955, "logits/chosen": -0.6261661052703857, "logits/rejected": -0.6074262261390686, "logps/chosen": -0.060799550265073776, "logps/rejected": -0.6109548211097717, "loss": 3.5126, "nll_loss": 0.8686507940292358, "rewards/accuracies": 1.0, "rewards/chosen": -0.006079955492168665, "rewards/margins": 0.055015530437231064, "rewards/rejected": -0.06109548732638359, "step": 2124 }, { "epoch": 1.4695712309820195, "grad_norm": 10.238999366760254, "learning_rate": 4.739127093898879e-05, "log_odds_chosen": 4.448538303375244, "log_odds_ratio": -0.4580115079879761, "logits/chosen": -0.12140993028879166, "logits/rejected": -0.12025927007198334, "logps/chosen": -0.09281541407108307, "logps/rejected": -0.7629176378250122, "loss": 2.5392, "nll_loss": 0.5889951586723328, "rewards/accuracies": 0.625, "rewards/chosen": -0.009281541220843792, "rewards/margins": 0.06701022386550903, "rewards/rejected": -0.0762917622923851, "step": 2125 }, { "epoch": 1.4702627939142463, "grad_norm": 7.920474529266357, "learning_rate": 4.7387428922698634e-05, "log_odds_chosen": 2.344700336456299, "log_odds_ratio": -0.37472227215766907, "logits/chosen": -0.7578220367431641, "logits/rejected": -0.775606095790863, "logps/chosen": -0.20015893876552582, "logps/rejected": -0.5982450246810913, "loss": 3.5284, "nll_loss": 0.8446251153945923, "rewards/accuracies": 0.75, "rewards/chosen": -0.0200158953666687, "rewards/margins": 0.03980860486626625, "rewards/rejected": -0.05982450023293495, "step": 2126 }, { "epoch": 1.4709543568464731, "grad_norm": 5.36145544052124, "learning_rate": 4.7383586906408486e-05, "log_odds_chosen": 2.4950942993164062, "log_odds_ratio": -0.49963515996932983, "logits/chosen": -0.4623447060585022, "logits/rejected": -0.4579474925994873, "logps/chosen": -0.17350442707538605, "logps/rejected": -0.5114143490791321, "loss": 2.9987, "nll_loss": 0.699701726436615, "rewards/accuracies": 0.5, "rewards/chosen": -0.017350442707538605, "rewards/margins": 0.033790990710258484, "rewards/rejected": -0.05114143341779709, "step": 2127 }, { "epoch": 1.4716459197787, "grad_norm": 6.284639358520508, "learning_rate": 4.737974489011834e-05, "log_odds_chosen": 6.780862808227539, "log_odds_ratio": -0.015053209848701954, "logits/chosen": -0.267768919467926, "logits/rejected": -0.24615205824375153, "logps/chosen": -0.0044655827805399895, "logps/rejected": -0.8356389999389648, "loss": 3.29, "nll_loss": 0.8209894895553589, "rewards/accuracies": 1.0, "rewards/chosen": -0.00044655834790319204, "rewards/margins": 0.08311734348535538, "rewards/rejected": -0.083563894033432, "step": 2128 }, { "epoch": 1.4723374827109268, "grad_norm": 6.118336200714111, "learning_rate": 4.7375902873828184e-05, "log_odds_chosen": 5.7112226486206055, "log_odds_ratio": -0.13390396535396576, "logits/chosen": -0.5156606435775757, "logits/rejected": -0.5889492630958557, "logps/chosen": -0.04604191705584526, "logps/rejected": -1.1898932456970215, "loss": 3.6421, "nll_loss": 0.8971379995346069, "rewards/accuracies": 0.875, "rewards/chosen": -0.004604191519320011, "rewards/margins": 0.11438513547182083, "rewards/rejected": -0.11898931860923767, "step": 2129 }, { "epoch": 1.4730290456431536, "grad_norm": 4.7239837646484375, "learning_rate": 4.737206085753804e-05, "log_odds_chosen": 3.1016533374786377, "log_odds_ratio": -0.2537999749183655, "logits/chosen": -0.40021154284477234, "logits/rejected": -0.4068134129047394, "logps/chosen": -0.09348339587450027, "logps/rejected": -0.9883545637130737, "loss": 2.8394, "nll_loss": 0.6844592094421387, "rewards/accuracies": 0.75, "rewards/chosen": -0.009348340332508087, "rewards/margins": 0.08948711305856705, "rewards/rejected": -0.09883546084165573, "step": 2130 }, { "epoch": 1.4737206085753805, "grad_norm": 10.042348861694336, "learning_rate": 4.736821884124789e-05, "log_odds_chosen": 4.484597682952881, "log_odds_ratio": -0.07834921777248383, "logits/chosen": -0.2142612487077713, "logits/rejected": -0.2503935396671295, "logps/chosen": -0.03164057806134224, "logps/rejected": -0.49265021085739136, "loss": 3.7392, "nll_loss": 0.9269661903381348, "rewards/accuracies": 1.0, "rewards/chosen": -0.0031640580855309963, "rewards/margins": 0.04610096290707588, "rewards/rejected": -0.049265019595623016, "step": 2131 }, { "epoch": 1.4744121715076073, "grad_norm": 6.352854251861572, "learning_rate": 4.736437682495774e-05, "log_odds_chosen": 6.055075645446777, "log_odds_ratio": -0.07979589700698853, "logits/chosen": -0.4343861937522888, "logits/rejected": -0.49018430709838867, "logps/chosen": -0.020101509988307953, "logps/rejected": -0.8233213424682617, "loss": 3.0005, "nll_loss": 0.7421414852142334, "rewards/accuracies": 1.0, "rewards/chosen": -0.002010151045396924, "rewards/margins": 0.08032198995351791, "rewards/rejected": -0.08233214169740677, "step": 2132 }, { "epoch": 1.4751037344398341, "grad_norm": 8.404098510742188, "learning_rate": 4.736053480866759e-05, "log_odds_chosen": 3.932952404022217, "log_odds_ratio": -0.065504290163517, "logits/chosen": -0.5252431631088257, "logits/rejected": -0.5888339281082153, "logps/chosen": -0.0439700186252594, "logps/rejected": -0.7456878423690796, "loss": 4.4127, "nll_loss": 1.0966308116912842, "rewards/accuracies": 1.0, "rewards/chosen": -0.004397002514451742, "rewards/margins": 0.0701717883348465, "rewards/rejected": -0.07456878572702408, "step": 2133 }, { "epoch": 1.475795297372061, "grad_norm": 11.165557861328125, "learning_rate": 4.7356692792377447e-05, "log_odds_chosen": 5.398857116699219, "log_odds_ratio": -0.466510146856308, "logits/chosen": -0.056302133947610855, "logits/rejected": -0.11562386155128479, "logps/chosen": -0.09982578456401825, "logps/rejected": -1.0831027030944824, "loss": 3.7737, "nll_loss": 0.8967711925506592, "rewards/accuracies": 0.75, "rewards/chosen": -0.00998257752507925, "rewards/margins": 0.09832769632339478, "rewards/rejected": -0.1083102822303772, "step": 2134 }, { "epoch": 1.4764868603042878, "grad_norm": 7.29428243637085, "learning_rate": 4.735285077608729e-05, "log_odds_chosen": 1.775020718574524, "log_odds_ratio": -0.6045640707015991, "logits/chosen": -0.4116702675819397, "logits/rejected": -0.45533379912376404, "logps/chosen": -0.10833375155925751, "logps/rejected": -0.5464069843292236, "loss": 3.363, "nll_loss": 0.7803056240081787, "rewards/accuracies": 0.625, "rewards/chosen": -0.01083337515592575, "rewards/margins": 0.04380732774734497, "rewards/rejected": -0.054640695452690125, "step": 2135 }, { "epoch": 1.4771784232365146, "grad_norm": 6.622182369232178, "learning_rate": 4.7349008759797145e-05, "log_odds_chosen": 6.178796768188477, "log_odds_ratio": -0.07372809946537018, "logits/chosen": -0.34616512060165405, "logits/rejected": -0.4092048108577728, "logps/chosen": -0.05432863533496857, "logps/rejected": -1.0202291011810303, "loss": 3.2895, "nll_loss": 0.8149974942207336, "rewards/accuracies": 1.0, "rewards/chosen": -0.005432863254100084, "rewards/margins": 0.096590057015419, "rewards/rejected": -0.10202290862798691, "step": 2136 }, { "epoch": 1.4778699861687414, "grad_norm": 5.9264235496521, "learning_rate": 4.7345166743507e-05, "log_odds_chosen": 3.3092384338378906, "log_odds_ratio": -0.18755535781383514, "logits/chosen": -0.47509706020355225, "logits/rejected": -0.4891270101070404, "logps/chosen": -0.05454757437109947, "logps/rejected": -0.5544096231460571, "loss": 4.0193, "nll_loss": 0.9860591888427734, "rewards/accuracies": 0.875, "rewards/chosen": -0.005454757250845432, "rewards/margins": 0.049986205995082855, "rewards/rejected": -0.05544096231460571, "step": 2137 }, { "epoch": 1.4785615491009683, "grad_norm": 13.524920463562012, "learning_rate": 4.734132472721684e-05, "log_odds_chosen": 1.8216850757598877, "log_odds_ratio": -0.45454758405685425, "logits/chosen": -0.4366537034511566, "logits/rejected": -0.460040807723999, "logps/chosen": -0.2892919182777405, "logps/rejected": -0.5869209170341492, "loss": 3.0996, "nll_loss": 0.7294558882713318, "rewards/accuracies": 0.75, "rewards/chosen": -0.028929192572832108, "rewards/margins": 0.02976289950311184, "rewards/rejected": -0.0586920902132988, "step": 2138 }, { "epoch": 1.479253112033195, "grad_norm": 6.360928535461426, "learning_rate": 4.7337482710926695e-05, "log_odds_chosen": 6.825347423553467, "log_odds_ratio": -0.009228329174220562, "logits/chosen": -0.5052814483642578, "logits/rejected": -0.6166610717773438, "logps/chosen": -0.017878873273730278, "logps/rejected": -1.3889663219451904, "loss": 3.8464, "nll_loss": 0.9606884717941284, "rewards/accuracies": 1.0, "rewards/chosen": -0.0017878874205052853, "rewards/margins": 0.1371087282896042, "rewards/rejected": -0.1388966143131256, "step": 2139 }, { "epoch": 1.479944674965422, "grad_norm": 8.675497055053711, "learning_rate": 4.733364069463655e-05, "log_odds_chosen": 5.791857719421387, "log_odds_ratio": -0.03341325372457504, "logits/chosen": -0.3276616930961609, "logits/rejected": -0.41704148054122925, "logps/chosen": -0.01462834607809782, "logps/rejected": -1.239469051361084, "loss": 3.6756, "nll_loss": 0.9155594706535339, "rewards/accuracies": 1.0, "rewards/chosen": -0.001462834537960589, "rewards/margins": 0.12248405814170837, "rewards/rejected": -0.1239469051361084, "step": 2140 }, { "epoch": 1.4806362378976488, "grad_norm": 6.080367565155029, "learning_rate": 4.73297986783464e-05, "log_odds_chosen": 5.032208442687988, "log_odds_ratio": -0.07722554355859756, "logits/chosen": -0.8022492527961731, "logits/rejected": -0.8383902311325073, "logps/chosen": -0.022705290466547012, "logps/rejected": -0.8100517988204956, "loss": 2.9716, "nll_loss": 0.7351704835891724, "rewards/accuracies": 1.0, "rewards/chosen": -0.0022705290466547012, "rewards/margins": 0.07873465120792389, "rewards/rejected": -0.08100517839193344, "step": 2141 }, { "epoch": 1.4813278008298756, "grad_norm": 6.03106164932251, "learning_rate": 4.7325956662056246e-05, "log_odds_chosen": 7.702787399291992, "log_odds_ratio": -0.04957111179828644, "logits/chosen": -0.39969608187675476, "logits/rejected": -0.4934896230697632, "logps/chosen": -0.0072068748995661736, "logps/rejected": -1.4062868356704712, "loss": 2.4801, "nll_loss": 0.6150761246681213, "rewards/accuracies": 1.0, "rewards/chosen": -0.0007206875015981495, "rewards/margins": 0.13990800082683563, "rewards/rejected": -0.14062868058681488, "step": 2142 }, { "epoch": 1.4820193637621024, "grad_norm": 6.338611125946045, "learning_rate": 4.7322114645766105e-05, "log_odds_chosen": 3.901794672012329, "log_odds_ratio": -0.1494646668434143, "logits/chosen": -0.9588245153427124, "logits/rejected": -1.0375213623046875, "logps/chosen": -0.032890114933252335, "logps/rejected": -0.5975766181945801, "loss": 3.4648, "nll_loss": 0.851256251335144, "rewards/accuracies": 0.875, "rewards/chosen": -0.003289011772722006, "rewards/margins": 0.056468650698661804, "rewards/rejected": -0.05975766479969025, "step": 2143 }, { "epoch": 1.4827109266943292, "grad_norm": 7.798945903778076, "learning_rate": 4.731827262947595e-05, "log_odds_chosen": 4.657999038696289, "log_odds_ratio": -0.22223106026649475, "logits/chosen": 0.04468072950839996, "logits/rejected": 0.035873137414455414, "logps/chosen": -0.08787171542644501, "logps/rejected": -1.0934221744537354, "loss": 4.3384, "nll_loss": 1.0623890161514282, "rewards/accuracies": 0.875, "rewards/chosen": -0.00878717191517353, "rewards/margins": 0.10055506229400635, "rewards/rejected": -0.10934222489595413, "step": 2144 }, { "epoch": 1.483402489626556, "grad_norm": 9.234012603759766, "learning_rate": 4.73144306131858e-05, "log_odds_chosen": 5.35285758972168, "log_odds_ratio": -0.1352691948413849, "logits/chosen": -0.47749924659729004, "logits/rejected": -0.5015690326690674, "logps/chosen": -0.04192415624856949, "logps/rejected": -0.7736974954605103, "loss": 5.0982, "nll_loss": 1.2610275745391846, "rewards/accuracies": 0.875, "rewards/chosen": -0.004192416090518236, "rewards/margins": 0.07317733764648438, "rewards/rejected": -0.07736974954605103, "step": 2145 }, { "epoch": 1.484094052558783, "grad_norm": 8.922550201416016, "learning_rate": 4.7310588596895656e-05, "log_odds_chosen": 2.317765712738037, "log_odds_ratio": -0.4278677701950073, "logits/chosen": -0.5566809177398682, "logits/rejected": -0.5125826001167297, "logps/chosen": -0.0754639282822609, "logps/rejected": -0.27378353476524353, "loss": 3.6639, "nll_loss": 0.8731777667999268, "rewards/accuracies": 0.875, "rewards/chosen": -0.007546393200755119, "rewards/margins": 0.019831961020827293, "rewards/rejected": -0.027378354221582413, "step": 2146 }, { "epoch": 1.4847856154910097, "grad_norm": 7.1663689613342285, "learning_rate": 4.73067465806055e-05, "log_odds_chosen": 2.725557327270508, "log_odds_ratio": -0.2395949810743332, "logits/chosen": -0.6971051096916199, "logits/rejected": -0.721907913684845, "logps/chosen": -0.12180915474891663, "logps/rejected": -0.7929058074951172, "loss": 4.1105, "nll_loss": 1.0036766529083252, "rewards/accuracies": 0.875, "rewards/chosen": -0.012180916965007782, "rewards/margins": 0.06710965931415558, "rewards/rejected": -0.07929057627916336, "step": 2147 }, { "epoch": 1.4854771784232366, "grad_norm": 7.183311939239502, "learning_rate": 4.7302904564315354e-05, "log_odds_chosen": 5.562891960144043, "log_odds_ratio": -0.05773504078388214, "logits/chosen": -0.45431026816368103, "logits/rejected": -0.514234185218811, "logps/chosen": -0.03870779275894165, "logps/rejected": -0.9699372053146362, "loss": 4.4011, "nll_loss": 1.0945072174072266, "rewards/accuracies": 1.0, "rewards/chosen": -0.0038707794155925512, "rewards/margins": 0.0931229442358017, "rewards/rejected": -0.09699372202157974, "step": 2148 }, { "epoch": 1.4861687413554634, "grad_norm": 5.313976764678955, "learning_rate": 4.7299062548025206e-05, "log_odds_chosen": 5.13576602935791, "log_odds_ratio": -0.11388306319713593, "logits/chosen": -0.188670352101326, "logits/rejected": -0.2855151891708374, "logps/chosen": -0.09410841763019562, "logps/rejected": -0.9148901104927063, "loss": 2.5396, "nll_loss": 0.6235028505325317, "rewards/accuracies": 1.0, "rewards/chosen": -0.009410843253135681, "rewards/margins": 0.08207817375659943, "rewards/rejected": -0.09148901700973511, "step": 2149 }, { "epoch": 1.4868603042876902, "grad_norm": 7.928635597229004, "learning_rate": 4.729522053173506e-05, "log_odds_chosen": 3.8607382774353027, "log_odds_ratio": -0.5775615572929382, "logits/chosen": -0.21268567442893982, "logits/rejected": -0.2625402510166168, "logps/chosen": -0.13489803671836853, "logps/rejected": -0.9022601842880249, "loss": 2.8881, "nll_loss": 0.6642749309539795, "rewards/accuracies": 0.625, "rewards/chosen": -0.013489805161952972, "rewards/margins": 0.076736219227314, "rewards/rejected": -0.09022602438926697, "step": 2150 }, { "epoch": 1.487551867219917, "grad_norm": 7.397334575653076, "learning_rate": 4.7291378515444904e-05, "log_odds_chosen": 6.1395344734191895, "log_odds_ratio": -0.06367681920528412, "logits/chosen": -0.08033701777458191, "logits/rejected": -0.15627387166023254, "logps/chosen": -0.07491898536682129, "logps/rejected": -1.1956180334091187, "loss": 2.5914, "nll_loss": 0.6414797306060791, "rewards/accuracies": 1.0, "rewards/chosen": -0.007491898722946644, "rewards/margins": 0.11206988990306854, "rewards/rejected": -0.11956179141998291, "step": 2151 }, { "epoch": 1.4882434301521439, "grad_norm": 9.43634033203125, "learning_rate": 4.7287536499154763e-05, "log_odds_chosen": 2.093677282333374, "log_odds_ratio": -0.6924388408660889, "logits/chosen": -0.32470396161079407, "logits/rejected": -0.3753596544265747, "logps/chosen": -0.13023850321769714, "logps/rejected": -0.6583462357521057, "loss": 3.179, "nll_loss": 0.7255163788795471, "rewards/accuracies": 0.625, "rewards/chosen": -0.01302385050803423, "rewards/margins": 0.052810780704021454, "rewards/rejected": -0.06583462655544281, "step": 2152 }, { "epoch": 1.4889349930843707, "grad_norm": 7.570080757141113, "learning_rate": 4.728369448286461e-05, "log_odds_chosen": 4.581209182739258, "log_odds_ratio": -0.19417330622673035, "logits/chosen": -0.5809056162834167, "logits/rejected": -0.6013231873512268, "logps/chosen": -0.07302338629961014, "logps/rejected": -0.6953396797180176, "loss": 3.8762, "nll_loss": 0.9496421217918396, "rewards/accuracies": 0.875, "rewards/chosen": -0.007302338723093271, "rewards/margins": 0.0622316375374794, "rewards/rejected": -0.06953397393226624, "step": 2153 }, { "epoch": 1.4896265560165975, "grad_norm": 4.859841346740723, "learning_rate": 4.727985246657446e-05, "log_odds_chosen": 5.577225685119629, "log_odds_ratio": -0.014450366608798504, "logits/chosen": -0.4863715171813965, "logits/rejected": -0.4975162148475647, "logps/chosen": -0.008166614919900894, "logps/rejected": -0.7243944406509399, "loss": 2.3394, "nll_loss": 0.5834062695503235, "rewards/accuracies": 1.0, "rewards/chosen": -0.0008166615734808147, "rewards/margins": 0.07162278145551682, "rewards/rejected": -0.07243944704532623, "step": 2154 }, { "epoch": 1.4903181189488244, "grad_norm": 7.439135551452637, "learning_rate": 4.7276010450284314e-05, "log_odds_chosen": 3.001149892807007, "log_odds_ratio": -0.32452264428138733, "logits/chosen": -0.40237510204315186, "logits/rejected": -0.4026588201522827, "logps/chosen": -0.09805717319250107, "logps/rejected": -0.6367148160934448, "loss": 3.4933, "nll_loss": 0.8408713340759277, "rewards/accuracies": 0.875, "rewards/chosen": -0.009805718436837196, "rewards/margins": 0.053865764290094376, "rewards/rejected": -0.06367147713899612, "step": 2155 }, { "epoch": 1.4910096818810512, "grad_norm": 7.253205299377441, "learning_rate": 4.727216843399416e-05, "log_odds_chosen": 5.118253231048584, "log_odds_ratio": -0.24667249619960785, "logits/chosen": -0.8493658900260925, "logits/rejected": -0.9143842458724976, "logps/chosen": -0.07504816353321075, "logps/rejected": -1.4404281377792358, "loss": 2.998, "nll_loss": 0.7248427867889404, "rewards/accuracies": 0.875, "rewards/chosen": -0.007504816632717848, "rewards/margins": 0.13653799891471863, "rewards/rejected": -0.14404281973838806, "step": 2156 }, { "epoch": 1.491701244813278, "grad_norm": 8.593515396118164, "learning_rate": 4.726832641770401e-05, "log_odds_chosen": 4.080145835876465, "log_odds_ratio": -0.08691144734621048, "logits/chosen": -0.7267591953277588, "logits/rejected": -0.7008885145187378, "logps/chosen": -0.03745944797992706, "logps/rejected": -0.5800659656524658, "loss": 3.0962, "nll_loss": 0.7653520107269287, "rewards/accuracies": 1.0, "rewards/chosen": -0.0037459449376910925, "rewards/margins": 0.05426064878702164, "rewards/rejected": -0.058006592094898224, "step": 2157 }, { "epoch": 1.4923928077455049, "grad_norm": 9.480552673339844, "learning_rate": 4.7264484401413865e-05, "log_odds_chosen": 4.114963054656982, "log_odds_ratio": -0.20959897339344025, "logits/chosen": -0.6540583372116089, "logits/rejected": -0.7013518810272217, "logps/chosen": -0.10385959595441818, "logps/rejected": -0.9378431439399719, "loss": 3.4575, "nll_loss": 0.8434049487113953, "rewards/accuracies": 0.875, "rewards/chosen": -0.010385959409177303, "rewards/margins": 0.08339834958314896, "rewards/rejected": -0.09378431737422943, "step": 2158 }, { "epoch": 1.4930843706777317, "grad_norm": 5.0781474113464355, "learning_rate": 4.726064238512372e-05, "log_odds_chosen": 2.2874131202697754, "log_odds_ratio": -0.5506502389907837, "logits/chosen": -0.6341639161109924, "logits/rejected": -0.6848548650741577, "logps/chosen": -0.17891795933246613, "logps/rejected": -0.5480682849884033, "loss": 3.3959, "nll_loss": 0.7939110994338989, "rewards/accuracies": 0.5, "rewards/chosen": -0.017891794443130493, "rewards/margins": 0.03691503778100014, "rewards/rejected": -0.05480683222413063, "step": 2159 }, { "epoch": 1.4937759336099585, "grad_norm": 11.54198169708252, "learning_rate": 4.725680036883356e-05, "log_odds_chosen": 5.798181056976318, "log_odds_ratio": -0.6406501531600952, "logits/chosen": -0.4734801948070526, "logits/rejected": -0.5142822265625, "logps/chosen": -0.07387179136276245, "logps/rejected": -1.086634635925293, "loss": 3.5472, "nll_loss": 0.8227443695068359, "rewards/accuracies": 0.875, "rewards/chosen": -0.007387179881334305, "rewards/margins": 0.10127627849578857, "rewards/rejected": -0.10866345465183258, "step": 2160 }, { "epoch": 1.4944674965421854, "grad_norm": 7.3922834396362305, "learning_rate": 4.725295835254342e-05, "log_odds_chosen": 4.19404411315918, "log_odds_ratio": -0.1303994059562683, "logits/chosen": -0.45763909816741943, "logits/rejected": -0.5041736364364624, "logps/chosen": -0.07846425473690033, "logps/rejected": -0.8622405529022217, "loss": 3.7875, "nll_loss": 0.9338254332542419, "rewards/accuracies": 1.0, "rewards/chosen": -0.007846426218748093, "rewards/margins": 0.0783776268362999, "rewards/rejected": -0.08622404932975769, "step": 2161 }, { "epoch": 1.4951590594744122, "grad_norm": 6.090280532836914, "learning_rate": 4.724911633625327e-05, "log_odds_chosen": 1.9667760133743286, "log_odds_ratio": -0.20600448548793793, "logits/chosen": -0.729042649269104, "logits/rejected": -0.7240199446678162, "logps/chosen": -0.1258997917175293, "logps/rejected": -0.6925151944160461, "loss": 4.4843, "nll_loss": 1.1004818677902222, "rewards/accuracies": 1.0, "rewards/chosen": -0.012589978985488415, "rewards/margins": 0.056661538779735565, "rewards/rejected": -0.06925151497125626, "step": 2162 }, { "epoch": 1.495850622406639, "grad_norm": 8.1521577835083, "learning_rate": 4.724527431996312e-05, "log_odds_chosen": 4.232202529907227, "log_odds_ratio": -0.05224863812327385, "logits/chosen": -0.5873209238052368, "logits/rejected": -0.6442223191261292, "logps/chosen": -0.037870604544878006, "logps/rejected": -0.7845534086227417, "loss": 5.4093, "nll_loss": 1.347105622291565, "rewards/accuracies": 1.0, "rewards/chosen": -0.0037870605010539293, "rewards/margins": 0.07466829568147659, "rewards/rejected": -0.078455351293087, "step": 2163 }, { "epoch": 1.4965421853388658, "grad_norm": 5.145297527313232, "learning_rate": 4.724143230367297e-05, "log_odds_chosen": 6.329265117645264, "log_odds_ratio": -0.11377011239528656, "logits/chosen": -0.6110424995422363, "logits/rejected": -0.6548846960067749, "logps/chosen": -0.03288734704256058, "logps/rejected": -0.841201901435852, "loss": 2.3195, "nll_loss": 0.5684930086135864, "rewards/accuracies": 1.0, "rewards/chosen": -0.0032887347042560577, "rewards/margins": 0.08083146065473557, "rewards/rejected": -0.08412019163370132, "step": 2164 }, { "epoch": 1.4972337482710927, "grad_norm": 8.501917839050293, "learning_rate": 4.723759028738282e-05, "log_odds_chosen": 5.699545383453369, "log_odds_ratio": -0.06798206269741058, "logits/chosen": -0.06234194338321686, "logits/rejected": -0.06303275376558304, "logps/chosen": -0.03642402961850166, "logps/rejected": -0.678272008895874, "loss": 2.7021, "nll_loss": 0.6687160134315491, "rewards/accuracies": 1.0, "rewards/chosen": -0.0036424030549824238, "rewards/margins": 0.06418479233980179, "rewards/rejected": -0.06782719492912292, "step": 2165 }, { "epoch": 1.4979253112033195, "grad_norm": 12.868005752563477, "learning_rate": 4.723374827109267e-05, "log_odds_chosen": 2.8281095027923584, "log_odds_ratio": -0.680151104927063, "logits/chosen": -0.6259391903877258, "logits/rejected": -0.6474286317825317, "logps/chosen": -0.12104423344135284, "logps/rejected": -0.4440361261367798, "loss": 3.9843, "nll_loss": 0.9280720949172974, "rewards/accuracies": 0.75, "rewards/chosen": -0.01210442278534174, "rewards/margins": 0.032299187034368515, "rewards/rejected": -0.04440361261367798, "step": 2166 }, { "epoch": 1.4986168741355463, "grad_norm": 7.35582160949707, "learning_rate": 4.722990625480252e-05, "log_odds_chosen": 7.120567321777344, "log_odds_ratio": -0.011260163970291615, "logits/chosen": -0.42110222578048706, "logits/rejected": -0.474237859249115, "logps/chosen": -0.007903838530182838, "logps/rejected": -1.129345178604126, "loss": 3.206, "nll_loss": 0.8003849387168884, "rewards/accuracies": 1.0, "rewards/chosen": -0.0007903838413767517, "rewards/margins": 0.11214414238929749, "rewards/rejected": -0.11293452978134155, "step": 2167 }, { "epoch": 1.4993084370677732, "grad_norm": 7.784075736999512, "learning_rate": 4.7226064238512375e-05, "log_odds_chosen": 5.417253494262695, "log_odds_ratio": -0.026224004104733467, "logits/chosen": -0.7177451252937317, "logits/rejected": -0.7622504234313965, "logps/chosen": -0.03399862349033356, "logps/rejected": -0.8395058512687683, "loss": 3.8119, "nll_loss": 0.9503412246704102, "rewards/accuracies": 1.0, "rewards/chosen": -0.0033998622093349695, "rewards/margins": 0.08055072277784348, "rewards/rejected": -0.08395059406757355, "step": 2168 }, { "epoch": 1.5, "grad_norm": 9.223710060119629, "learning_rate": 4.722222222222222e-05, "log_odds_chosen": 6.69984245300293, "log_odds_ratio": -0.0645102858543396, "logits/chosen": -0.47331032156944275, "logits/rejected": -0.547443151473999, "logps/chosen": -0.04146613925695419, "logps/rejected": -1.09946870803833, "loss": 4.3556, "nll_loss": 1.0824520587921143, "rewards/accuracies": 1.0, "rewards/chosen": -0.004146614111959934, "rewards/margins": 0.10580027103424072, "rewards/rejected": -0.10994688421487808, "step": 2169 }, { "epoch": 1.5006915629322268, "grad_norm": 5.514622688293457, "learning_rate": 4.721838020593208e-05, "log_odds_chosen": 3.6207275390625, "log_odds_ratio": -0.20964643359184265, "logits/chosen": -0.41321468353271484, "logits/rejected": -0.44356417655944824, "logps/chosen": -0.08171873539686203, "logps/rejected": -0.7008731365203857, "loss": 3.3274, "nll_loss": 0.8108948469161987, "rewards/accuracies": 1.0, "rewards/chosen": -0.008171873167157173, "rewards/margins": 0.06191544234752655, "rewards/rejected": -0.07008731365203857, "step": 2170 }, { "epoch": 1.5013831258644537, "grad_norm": 6.830862045288086, "learning_rate": 4.7214538189641926e-05, "log_odds_chosen": 2.9175610542297363, "log_odds_ratio": -0.34225520491600037, "logits/chosen": -0.5801506042480469, "logits/rejected": -0.6245124340057373, "logps/chosen": -0.19777889549732208, "logps/rejected": -0.8225585222244263, "loss": 3.689, "nll_loss": 0.8880286812782288, "rewards/accuracies": 0.75, "rewards/chosen": -0.019777892157435417, "rewards/margins": 0.062477968633174896, "rewards/rejected": -0.08225585520267487, "step": 2171 }, { "epoch": 1.5020746887966805, "grad_norm": 9.817378044128418, "learning_rate": 4.721069617335178e-05, "log_odds_chosen": 2.637803792953491, "log_odds_ratio": -0.326236367225647, "logits/chosen": -0.3923531770706177, "logits/rejected": -0.4752095937728882, "logps/chosen": -0.10277456045150757, "logps/rejected": -0.7350403666496277, "loss": 4.3623, "nll_loss": 1.0579389333724976, "rewards/accuracies": 0.875, "rewards/chosen": -0.010277455672621727, "rewards/margins": 0.06322658061981201, "rewards/rejected": -0.07350403815507889, "step": 2172 }, { "epoch": 1.5027662517289073, "grad_norm": 7.807641983032227, "learning_rate": 4.720685415706163e-05, "log_odds_chosen": 1.7504419088363647, "log_odds_ratio": -0.5149462223052979, "logits/chosen": -0.5191226005554199, "logits/rejected": -0.514976441860199, "logps/chosen": -0.1291445940732956, "logps/rejected": -0.38086992502212524, "loss": 3.6293, "nll_loss": 0.8558423519134521, "rewards/accuracies": 0.875, "rewards/chosen": -0.012914460152387619, "rewards/margins": 0.025172535330057144, "rewards/rejected": -0.03808699548244476, "step": 2173 }, { "epoch": 1.5034578146611342, "grad_norm": 5.703883171081543, "learning_rate": 4.7203012140771477e-05, "log_odds_chosen": 4.115744113922119, "log_odds_ratio": -0.2530246376991272, "logits/chosen": -0.39776185154914856, "logits/rejected": -0.42042019963264465, "logps/chosen": -0.11606866866350174, "logps/rejected": -0.5591020584106445, "loss": 2.8344, "nll_loss": 0.6832969188690186, "rewards/accuracies": 0.75, "rewards/chosen": -0.011606866493821144, "rewards/margins": 0.04430334270000458, "rewards/rejected": -0.05591020733118057, "step": 2174 }, { "epoch": 1.504149377593361, "grad_norm": 5.126632213592529, "learning_rate": 4.719917012448133e-05, "log_odds_chosen": 3.8321008682250977, "log_odds_ratio": -0.20930258929729462, "logits/chosen": -0.413907915353775, "logits/rejected": -0.4011528491973877, "logps/chosen": -0.05856021121144295, "logps/rejected": -0.8056188821792603, "loss": 3.0414, "nll_loss": 0.7394230365753174, "rewards/accuracies": 0.875, "rewards/chosen": -0.00585602130740881, "rewards/margins": 0.07470586895942688, "rewards/rejected": -0.08056189119815826, "step": 2175 }, { "epoch": 1.5048409405255878, "grad_norm": 6.326693058013916, "learning_rate": 4.719532810819118e-05, "log_odds_chosen": 4.230525970458984, "log_odds_ratio": -0.14581666886806488, "logits/chosen": -0.486900269985199, "logits/rejected": -0.4845389723777771, "logps/chosen": -0.0919831246137619, "logps/rejected": -0.8042812347412109, "loss": 3.837, "nll_loss": 0.9446582794189453, "rewards/accuracies": 1.0, "rewards/chosen": -0.009198312647640705, "rewards/margins": 0.07122981548309326, "rewards/rejected": -0.0804281234741211, "step": 2176 }, { "epoch": 1.5055325034578146, "grad_norm": 6.409120559692383, "learning_rate": 4.7191486091901034e-05, "log_odds_chosen": 2.6936960220336914, "log_odds_ratio": -0.239236980676651, "logits/chosen": -0.33274608850479126, "logits/rejected": -0.38033154606819153, "logps/chosen": -0.11048668622970581, "logps/rejected": -0.6125718355178833, "loss": 3.0156, "nll_loss": 0.7299783229827881, "rewards/accuracies": 0.875, "rewards/chosen": -0.011048668995499611, "rewards/margins": 0.05020851641893387, "rewards/rejected": -0.06125718355178833, "step": 2177 }, { "epoch": 1.5062240663900415, "grad_norm": 7.672756195068359, "learning_rate": 4.718764407561088e-05, "log_odds_chosen": 3.801034688949585, "log_odds_ratio": -0.316747784614563, "logits/chosen": -0.19822092354297638, "logits/rejected": -0.2509034276008606, "logps/chosen": -0.1539057195186615, "logps/rejected": -1.0572311878204346, "loss": 3.6783, "nll_loss": 0.8879072070121765, "rewards/accuracies": 0.75, "rewards/chosen": -0.01539057306945324, "rewards/margins": 0.09033254534006119, "rewards/rejected": -0.10572311282157898, "step": 2178 }, { "epoch": 1.5069156293222683, "grad_norm": 5.304668426513672, "learning_rate": 4.718380205932074e-05, "log_odds_chosen": 4.217876434326172, "log_odds_ratio": -0.23010079562664032, "logits/chosen": -0.6556233167648315, "logits/rejected": -0.6610323786735535, "logps/chosen": -0.05022624507546425, "logps/rejected": -0.6683301329612732, "loss": 3.1224, "nll_loss": 0.7575937509536743, "rewards/accuracies": 0.875, "rewards/chosen": -0.005022624507546425, "rewards/margins": 0.061810389161109924, "rewards/rejected": -0.0668330118060112, "step": 2179 }, { "epoch": 1.5076071922544951, "grad_norm": 3.787766933441162, "learning_rate": 4.7179960043030584e-05, "log_odds_chosen": 3.8163716793060303, "log_odds_ratio": -0.09614754468202591, "logits/chosen": -0.3028152287006378, "logits/rejected": -0.3119809329509735, "logps/chosen": -0.0463411808013916, "logps/rejected": -0.7167572379112244, "loss": 2.5819, "nll_loss": 0.6358667016029358, "rewards/accuracies": 1.0, "rewards/chosen": -0.004634118173271418, "rewards/margins": 0.06704160571098328, "rewards/rejected": -0.07167572528123856, "step": 2180 }, { "epoch": 1.508298755186722, "grad_norm": 8.500920295715332, "learning_rate": 4.717611802674044e-05, "log_odds_chosen": 4.703157424926758, "log_odds_ratio": -0.09611813724040985, "logits/chosen": -0.5946884751319885, "logits/rejected": -0.5608884692192078, "logps/chosen": -0.047219306230545044, "logps/rejected": -0.5458288192749023, "loss": 4.019, "nll_loss": 0.9951435327529907, "rewards/accuracies": 1.0, "rewards/chosen": -0.0047219302505254745, "rewards/margins": 0.04986095428466797, "rewards/rejected": -0.054582882672548294, "step": 2181 }, { "epoch": 1.5089903181189488, "grad_norm": 7.796213626861572, "learning_rate": 4.717227601045029e-05, "log_odds_chosen": 5.931513786315918, "log_odds_ratio": -0.12241919338703156, "logits/chosen": -0.402628093957901, "logits/rejected": -0.45937222242355347, "logps/chosen": -0.06165814772248268, "logps/rejected": -1.0699610710144043, "loss": 3.4985, "nll_loss": 0.8623833060264587, "rewards/accuracies": 0.875, "rewards/chosen": -0.006165814585983753, "rewards/margins": 0.10083030164241791, "rewards/rejected": -0.10699611157178879, "step": 2182 }, { "epoch": 1.5096818810511756, "grad_norm": 4.598722457885742, "learning_rate": 4.7168433994160135e-05, "log_odds_chosen": 4.163899898529053, "log_odds_ratio": -0.14226964116096497, "logits/chosen": -0.4701404571533203, "logits/rejected": -0.49784785509109497, "logps/chosen": -0.07165002077817917, "logps/rejected": -0.796354353427887, "loss": 2.391, "nll_loss": 0.5835211277008057, "rewards/accuracies": 1.0, "rewards/chosen": -0.007165002636611462, "rewards/margins": 0.07247043401002884, "rewards/rejected": -0.07963543385267258, "step": 2183 }, { "epoch": 1.5103734439834025, "grad_norm": 4.878766059875488, "learning_rate": 4.716459197786999e-05, "log_odds_chosen": 3.9604339599609375, "log_odds_ratio": -0.14563514292240143, "logits/chosen": -0.42983126640319824, "logits/rejected": -0.48781245946884155, "logps/chosen": -0.12423344701528549, "logps/rejected": -0.8960301280021667, "loss": 3.1638, "nll_loss": 0.7763780355453491, "rewards/accuracies": 1.0, "rewards/chosen": -0.012423344887793064, "rewards/margins": 0.07717966288328171, "rewards/rejected": -0.0896030068397522, "step": 2184 }, { "epoch": 1.5110650069156293, "grad_norm": 4.061890602111816, "learning_rate": 4.716074996157984e-05, "log_odds_chosen": 4.62667989730835, "log_odds_ratio": -0.09810079634189606, "logits/chosen": -0.42752885818481445, "logits/rejected": -0.45172789692878723, "logps/chosen": -0.03561442345380783, "logps/rejected": -0.48452576994895935, "loss": 2.199, "nll_loss": 0.5399402379989624, "rewards/accuracies": 1.0, "rewards/chosen": -0.003561442717909813, "rewards/margins": 0.044891130179166794, "rewards/rejected": -0.048452578485012054, "step": 2185 }, { "epoch": 1.5117565698478561, "grad_norm": 8.15449333190918, "learning_rate": 4.715690794528969e-05, "log_odds_chosen": 3.6245779991149902, "log_odds_ratio": -0.15182821452617645, "logits/chosen": -0.4021381139755249, "logits/rejected": -0.42084044218063354, "logps/chosen": -0.0873645544052124, "logps/rejected": -0.9084411859512329, "loss": 3.3647, "nll_loss": 0.8259831666946411, "rewards/accuracies": 1.0, "rewards/chosen": -0.00873645581305027, "rewards/margins": 0.08210767060518265, "rewards/rejected": -0.09084412455558777, "step": 2186 }, { "epoch": 1.512448132780083, "grad_norm": 7.220978736877441, "learning_rate": 4.715306592899954e-05, "log_odds_chosen": 1.8586406707763672, "log_odds_ratio": -0.3653057813644409, "logits/chosen": -0.5158290863037109, "logits/rejected": -0.5505003333091736, "logps/chosen": -0.09770803898572922, "logps/rejected": -0.3956950902938843, "loss": 3.5248, "nll_loss": 0.8446773290634155, "rewards/accuracies": 0.75, "rewards/chosen": -0.009770805016160011, "rewards/margins": 0.029798705130815506, "rewards/rejected": -0.03956950828433037, "step": 2187 }, { "epoch": 1.5131396957123098, "grad_norm": 9.282061576843262, "learning_rate": 4.71492239127094e-05, "log_odds_chosen": 5.534390449523926, "log_odds_ratio": -0.2007375955581665, "logits/chosen": -0.6496320962905884, "logits/rejected": -0.6339712142944336, "logps/chosen": -0.05207693949341774, "logps/rejected": -0.7376291751861572, "loss": 3.0871, "nll_loss": 0.7517117261886597, "rewards/accuracies": 0.875, "rewards/chosen": -0.005207694135606289, "rewards/margins": 0.06855522096157074, "rewards/rejected": -0.0737629160284996, "step": 2188 }, { "epoch": 1.5138312586445366, "grad_norm": 13.90990924835205, "learning_rate": 4.714538189641924e-05, "log_odds_chosen": 3.7275521755218506, "log_odds_ratio": -0.5319461226463318, "logits/chosen": -0.6782576441764832, "logits/rejected": -0.7164244651794434, "logps/chosen": -0.07063320279121399, "logps/rejected": -0.7580413818359375, "loss": 3.8182, "nll_loss": 0.9013439416885376, "rewards/accuracies": 0.75, "rewards/chosen": -0.007063319906592369, "rewards/margins": 0.0687408298254013, "rewards/rejected": -0.07580414414405823, "step": 2189 }, { "epoch": 1.5145228215767634, "grad_norm": 11.419933319091797, "learning_rate": 4.7141539880129095e-05, "log_odds_chosen": 3.8566625118255615, "log_odds_ratio": -0.6142464876174927, "logits/chosen": -0.8340749740600586, "logits/rejected": -0.8271781206130981, "logps/chosen": -0.11666074395179749, "logps/rejected": -0.5378887057304382, "loss": 2.7655, "nll_loss": 0.6299543976783752, "rewards/accuracies": 0.75, "rewards/chosen": -0.011666074395179749, "rewards/margins": 0.042122796177864075, "rewards/rejected": -0.05378887057304382, "step": 2190 }, { "epoch": 1.5152143845089903, "grad_norm": 6.894917011260986, "learning_rate": 4.713769786383895e-05, "log_odds_chosen": 5.651021480560303, "log_odds_ratio": -0.1871916502714157, "logits/chosen": -0.5743628740310669, "logits/rejected": -0.6309788823127747, "logps/chosen": -0.06064632534980774, "logps/rejected": -1.5780786275863647, "loss": 3.1899, "nll_loss": 0.7787548899650574, "rewards/accuracies": 0.875, "rewards/chosen": -0.006064632907509804, "rewards/margins": 0.15174323320388794, "rewards/rejected": -0.157807856798172, "step": 2191 }, { "epoch": 1.515905947441217, "grad_norm": 12.359020233154297, "learning_rate": 4.7133855847548793e-05, "log_odds_chosen": 3.298121213912964, "log_odds_ratio": -0.49267399311065674, "logits/chosen": -0.6871169805526733, "logits/rejected": -0.6752989888191223, "logps/chosen": -0.1996937096118927, "logps/rejected": -0.5604193210601807, "loss": 3.2892, "nll_loss": 0.7730243802070618, "rewards/accuracies": 0.75, "rewards/chosen": -0.01996937021613121, "rewards/margins": 0.036072563380002975, "rewards/rejected": -0.056041933596134186, "step": 2192 }, { "epoch": 1.516597510373444, "grad_norm": 10.040817260742188, "learning_rate": 4.7130013831258646e-05, "log_odds_chosen": 5.390373229980469, "log_odds_ratio": -0.04475773125886917, "logits/chosen": -0.41156676411628723, "logits/rejected": -0.5033215284347534, "logps/chosen": -0.046836577355861664, "logps/rejected": -1.0671862363815308, "loss": 4.4292, "nll_loss": 1.1028175354003906, "rewards/accuracies": 1.0, "rewards/chosen": -0.0046836575493216515, "rewards/margins": 0.10203496366739273, "rewards/rejected": -0.10671862214803696, "step": 2193 }, { "epoch": 1.5172890733056708, "grad_norm": 3.939854621887207, "learning_rate": 4.71261718149685e-05, "log_odds_chosen": 5.454831123352051, "log_odds_ratio": -0.08274343609809875, "logits/chosen": -0.4129883646965027, "logits/rejected": -0.46223029494285583, "logps/chosen": -0.051142267882823944, "logps/rejected": -1.1733758449554443, "loss": 2.4585, "nll_loss": 0.6063456535339355, "rewards/accuracies": 1.0, "rewards/chosen": -0.005114227067679167, "rewards/margins": 0.11222335696220398, "rewards/rejected": -0.11733758449554443, "step": 2194 }, { "epoch": 1.5179806362378976, "grad_norm": 8.463907241821289, "learning_rate": 4.712232979867835e-05, "log_odds_chosen": 4.573187828063965, "log_odds_ratio": -0.14323106408119202, "logits/chosen": -0.7672544121742249, "logits/rejected": -0.8273261785507202, "logps/chosen": -0.07819414883852005, "logps/rejected": -0.8404219746589661, "loss": 3.8202, "nll_loss": 0.9407300353050232, "rewards/accuracies": 1.0, "rewards/chosen": -0.00781941507011652, "rewards/margins": 0.07622279226779938, "rewards/rejected": -0.08404220640659332, "step": 2195 }, { "epoch": 1.5186721991701244, "grad_norm": 5.669083118438721, "learning_rate": 4.7118487782388196e-05, "log_odds_chosen": 4.349452018737793, "log_odds_ratio": -0.23575344681739807, "logits/chosen": -0.345994234085083, "logits/rejected": -0.38687825202941895, "logps/chosen": -0.08902490884065628, "logps/rejected": -0.8375565409660339, "loss": 3.4363, "nll_loss": 0.8354873657226562, "rewards/accuracies": 0.75, "rewards/chosen": -0.008902492001652718, "rewards/margins": 0.07485316693782806, "rewards/rejected": -0.08375565707683563, "step": 2196 }, { "epoch": 1.5193637621023512, "grad_norm": 6.776609897613525, "learning_rate": 4.711464576609805e-05, "log_odds_chosen": 7.661093711853027, "log_odds_ratio": -0.00390626722946763, "logits/chosen": -0.5424741506576538, "logits/rejected": -0.66068434715271, "logps/chosen": -0.0027085847686976194, "logps/rejected": -1.221256971359253, "loss": 2.6495, "nll_loss": 0.6619873046875, "rewards/accuracies": 1.0, "rewards/chosen": -0.0002708584943320602, "rewards/margins": 0.12185483425855637, "rewards/rejected": -0.12212569266557693, "step": 2197 }, { "epoch": 1.520055325034578, "grad_norm": 6.6352458000183105, "learning_rate": 4.71108037498079e-05, "log_odds_chosen": 3.3492088317871094, "log_odds_ratio": -0.3416593372821808, "logits/chosen": -0.6170538663864136, "logits/rejected": -0.6046271324157715, "logps/chosen": -0.09723344445228577, "logps/rejected": -0.8013380169868469, "loss": 3.7498, "nll_loss": 0.9032930135726929, "rewards/accuracies": 0.75, "rewards/chosen": -0.009723344817757607, "rewards/margins": 0.07041046023368835, "rewards/rejected": -0.08013381063938141, "step": 2198 }, { "epoch": 1.520746887966805, "grad_norm": 5.197224140167236, "learning_rate": 4.7106961733517754e-05, "log_odds_chosen": 3.2548828125, "log_odds_ratio": -0.12569035589694977, "logits/chosen": -0.8019800186157227, "logits/rejected": -0.7516960501670837, "logps/chosen": -0.10095830261707306, "logps/rejected": -0.8367016315460205, "loss": 3.6117, "nll_loss": 0.8903552889823914, "rewards/accuracies": 1.0, "rewards/chosen": -0.010095831006765366, "rewards/margins": 0.07357433438301086, "rewards/rejected": -0.08367016911506653, "step": 2199 }, { "epoch": 1.5214384508990317, "grad_norm": 6.180981636047363, "learning_rate": 4.71031197172276e-05, "log_odds_chosen": 7.052360534667969, "log_odds_ratio": -0.011196529492735863, "logits/chosen": -0.6240170001983643, "logits/rejected": -0.7029715776443481, "logps/chosen": -0.006017737090587616, "logps/rejected": -1.1081998348236084, "loss": 3.5874, "nll_loss": 0.8957244753837585, "rewards/accuracies": 1.0, "rewards/chosen": -0.000601773732341826, "rewards/margins": 0.11021822690963745, "rewards/rejected": -0.1108199954032898, "step": 2200 }, { "epoch": 1.5221300138312586, "grad_norm": 6.7219109535217285, "learning_rate": 4.709927770093746e-05, "log_odds_chosen": 2.2531862258911133, "log_odds_ratio": -0.41288065910339355, "logits/chosen": -0.3794539272785187, "logits/rejected": -0.3585187494754791, "logps/chosen": -0.18572141230106354, "logps/rejected": -0.6649050712585449, "loss": 3.0795, "nll_loss": 0.7285885810852051, "rewards/accuracies": 0.875, "rewards/chosen": -0.018572140485048294, "rewards/margins": 0.04791836813092232, "rewards/rejected": -0.06649051606655121, "step": 2201 }, { "epoch": 1.5228215767634854, "grad_norm": 6.930861949920654, "learning_rate": 4.7095435684647304e-05, "log_odds_chosen": 3.2896299362182617, "log_odds_ratio": -0.2985961437225342, "logits/chosen": -0.536874532699585, "logits/rejected": -0.55685955286026, "logps/chosen": -0.09223245084285736, "logps/rejected": -0.9117711186408997, "loss": 3.1251, "nll_loss": 0.7514032125473022, "rewards/accuracies": 0.875, "rewards/chosen": -0.009223245084285736, "rewards/margins": 0.08195386826992035, "rewards/rejected": -0.09117711335420609, "step": 2202 }, { "epoch": 1.5235131396957122, "grad_norm": 6.204575538635254, "learning_rate": 4.709159366835716e-05, "log_odds_chosen": 6.052004814147949, "log_odds_ratio": -0.183577299118042, "logits/chosen": -0.5787208080291748, "logits/rejected": -0.6404365301132202, "logps/chosen": -0.04411087930202484, "logps/rejected": -1.0143474340438843, "loss": 2.8211, "nll_loss": 0.6869177222251892, "rewards/accuracies": 0.875, "rewards/chosen": -0.004411087837070227, "rewards/margins": 0.09702365100383759, "rewards/rejected": -0.10143474489450455, "step": 2203 }, { "epoch": 1.524204702627939, "grad_norm": 7.466019153594971, "learning_rate": 4.708775165206701e-05, "log_odds_chosen": 5.644599437713623, "log_odds_ratio": -0.32767152786254883, "logits/chosen": -0.37275075912475586, "logits/rejected": -0.37881413102149963, "logps/chosen": -0.07302402704954147, "logps/rejected": -0.7124531269073486, "loss": 2.5691, "nll_loss": 0.6095045804977417, "rewards/accuracies": 0.875, "rewards/chosen": -0.007302402053028345, "rewards/margins": 0.06394290924072266, "rewards/rejected": -0.07124531269073486, "step": 2204 }, { "epoch": 1.5248962655601659, "grad_norm": 5.963090419769287, "learning_rate": 4.7083909635776855e-05, "log_odds_chosen": 4.4939069747924805, "log_odds_ratio": -0.03454150632023811, "logits/chosen": -0.6710375547409058, "logits/rejected": -0.6639552116394043, "logps/chosen": -0.03183533623814583, "logps/rejected": -0.6352487206459045, "loss": 3.6995, "nll_loss": 0.9214182496070862, "rewards/accuracies": 1.0, "rewards/chosen": -0.0031835336703807116, "rewards/margins": 0.06034134328365326, "rewards/rejected": -0.06352487206459045, "step": 2205 }, { "epoch": 1.5255878284923927, "grad_norm": 9.382392883300781, "learning_rate": 4.708006761948671e-05, "log_odds_chosen": 4.671770095825195, "log_odds_ratio": -0.13547496497631073, "logits/chosen": -0.3547920286655426, "logits/rejected": -0.42458677291870117, "logps/chosen": -0.06325706094503403, "logps/rejected": -0.9535627365112305, "loss": 4.7383, "nll_loss": 1.171016812324524, "rewards/accuracies": 1.0, "rewards/chosen": -0.006325706373900175, "rewards/margins": 0.08903056383132935, "rewards/rejected": -0.09535627067089081, "step": 2206 }, { "epoch": 1.5262793914246195, "grad_norm": 8.332213401794434, "learning_rate": 4.707622560319656e-05, "log_odds_chosen": 2.178969383239746, "log_odds_ratio": -0.4346066117286682, "logits/chosen": -0.049312885850667953, "logits/rejected": 0.04795428365468979, "logps/chosen": -0.11472927033901215, "logps/rejected": -0.44776538014411926, "loss": 3.148, "nll_loss": 0.7435441017150879, "rewards/accuracies": 0.75, "rewards/chosen": -0.011472927406430244, "rewards/margins": 0.033303603529930115, "rewards/rejected": -0.04477653279900551, "step": 2207 }, { "epoch": 1.5269709543568464, "grad_norm": 5.1370720863342285, "learning_rate": 4.707238358690641e-05, "log_odds_chosen": 6.408810615539551, "log_odds_ratio": -0.0063616689294576645, "logits/chosen": -0.4974561333656311, "logits/rejected": -0.5476498603820801, "logps/chosen": -0.012093435041606426, "logps/rejected": -1.1643551588058472, "loss": 2.863, "nll_loss": 0.7151111364364624, "rewards/accuracies": 1.0, "rewards/chosen": -0.0012093434343114495, "rewards/margins": 0.11522617936134338, "rewards/rejected": -0.11643552035093307, "step": 2208 }, { "epoch": 1.5276625172890732, "grad_norm": 9.080206871032715, "learning_rate": 4.706854157061626e-05, "log_odds_chosen": 5.32785701751709, "log_odds_ratio": -0.6000714898109436, "logits/chosen": -0.34474441409111023, "logits/rejected": -0.39225518703460693, "logps/chosen": -0.05898641422390938, "logps/rejected": -0.8116523623466492, "loss": 3.4182, "nll_loss": 0.7945421934127808, "rewards/accuracies": 0.875, "rewards/chosen": -0.005898641422390938, "rewards/margins": 0.07526659220457077, "rewards/rejected": -0.08116523176431656, "step": 2209 }, { "epoch": 1.5283540802213, "grad_norm": 8.669232368469238, "learning_rate": 4.706469955432612e-05, "log_odds_chosen": 6.044839859008789, "log_odds_ratio": -0.020968372002243996, "logits/chosen": -0.28658148646354675, "logits/rejected": -0.33178937435150146, "logps/chosen": -0.02577211521565914, "logps/rejected": -1.0604619979858398, "loss": 4.4212, "nll_loss": 1.1032041311264038, "rewards/accuracies": 1.0, "rewards/chosen": -0.0025772114749997854, "rewards/margins": 0.10346899181604385, "rewards/rejected": -0.10604619979858398, "step": 2210 }, { "epoch": 1.5290456431535269, "grad_norm": 4.403314113616943, "learning_rate": 4.706085753803596e-05, "log_odds_chosen": 5.636608600616455, "log_odds_ratio": -0.12528975307941437, "logits/chosen": -0.18785572052001953, "logits/rejected": -0.254210501909256, "logps/chosen": -0.042339250445365906, "logps/rejected": -0.874763548374176, "loss": 2.4197, "nll_loss": 0.5923962593078613, "rewards/accuracies": 0.875, "rewards/chosen": -0.004233924672007561, "rewards/margins": 0.08324243128299713, "rewards/rejected": -0.08747635781764984, "step": 2211 }, { "epoch": 1.5297372060857537, "grad_norm": 10.492977142333984, "learning_rate": 4.7057015521745815e-05, "log_odds_chosen": 2.836245536804199, "log_odds_ratio": -0.3267596662044525, "logits/chosen": -0.410195916891098, "logits/rejected": -0.4073330760002136, "logps/chosen": -0.1376115381717682, "logps/rejected": -0.7667669057846069, "loss": 3.9706, "nll_loss": 0.9599714875221252, "rewards/accuracies": 0.875, "rewards/chosen": -0.013761154375970364, "rewards/margins": 0.06291554123163223, "rewards/rejected": -0.07667669653892517, "step": 2212 }, { "epoch": 1.5304287690179805, "grad_norm": 5.397129058837891, "learning_rate": 4.705317350545567e-05, "log_odds_chosen": 4.809292793273926, "log_odds_ratio": -0.054968055337667465, "logits/chosen": -0.5614354610443115, "logits/rejected": -0.5658167004585266, "logps/chosen": -0.06616536527872086, "logps/rejected": -1.0150160789489746, "loss": 2.9256, "nll_loss": 0.725896954536438, "rewards/accuracies": 1.0, "rewards/chosen": -0.006616536527872086, "rewards/margins": 0.09488508105278015, "rewards/rejected": -0.10150161385536194, "step": 2213 }, { "epoch": 1.5311203319502074, "grad_norm": 6.102870464324951, "learning_rate": 4.704933148916551e-05, "log_odds_chosen": 3.3828892707824707, "log_odds_ratio": -0.2942639887332916, "logits/chosen": -0.39553898572921753, "logits/rejected": -0.3925166130065918, "logps/chosen": -0.07648057490587234, "logps/rejected": -0.4567372798919678, "loss": 3.152, "nll_loss": 0.758583664894104, "rewards/accuracies": 0.75, "rewards/chosen": -0.007648056838661432, "rewards/margins": 0.03802567347884178, "rewards/rejected": -0.045673731714487076, "step": 2214 }, { "epoch": 1.5318118948824342, "grad_norm": 11.949549674987793, "learning_rate": 4.7045489472875366e-05, "log_odds_chosen": 3.903663396835327, "log_odds_ratio": -0.39459455013275146, "logits/chosen": -0.2356366515159607, "logits/rejected": -0.2835361659526825, "logps/chosen": -0.06455142050981522, "logps/rejected": -0.6523666977882385, "loss": 3.3108, "nll_loss": 0.7882421612739563, "rewards/accuracies": 0.75, "rewards/chosen": -0.006455142050981522, "rewards/margins": 0.05878153070807457, "rewards/rejected": -0.06523667275905609, "step": 2215 }, { "epoch": 1.532503457814661, "grad_norm": 4.589229106903076, "learning_rate": 4.704164745658522e-05, "log_odds_chosen": 2.781095504760742, "log_odds_ratio": -0.1778787225484848, "logits/chosen": -0.510430634021759, "logits/rejected": -0.5029317140579224, "logps/chosen": -0.11355285346508026, "logps/rejected": -0.7437509894371033, "loss": 3.0605, "nll_loss": 0.7473265528678894, "rewards/accuracies": 1.0, "rewards/chosen": -0.011355285532772541, "rewards/margins": 0.06301981210708618, "rewards/rejected": -0.07437510043382645, "step": 2216 }, { "epoch": 1.5331950207468878, "grad_norm": 8.02696418762207, "learning_rate": 4.703780544029507e-05, "log_odds_chosen": 3.008775472640991, "log_odds_ratio": -0.3435032367706299, "logits/chosen": -0.4846343398094177, "logits/rejected": -0.4518267810344696, "logps/chosen": -0.10005879402160645, "logps/rejected": -0.5169304013252258, "loss": 4.5717, "nll_loss": 1.1085805892944336, "rewards/accuracies": 0.875, "rewards/chosen": -0.010005880147218704, "rewards/margins": 0.04168716073036194, "rewards/rejected": -0.05169304460287094, "step": 2217 }, { "epoch": 1.5338865836791147, "grad_norm": 4.993509769439697, "learning_rate": 4.7033963424004916e-05, "log_odds_chosen": 6.149620532989502, "log_odds_ratio": -0.07025769352912903, "logits/chosen": -0.36979010701179504, "logits/rejected": -0.36081594228744507, "logps/chosen": -0.03063029982149601, "logps/rejected": -0.859117329120636, "loss": 2.776, "nll_loss": 0.686970591545105, "rewards/accuracies": 1.0, "rewards/chosen": -0.0030630300752818584, "rewards/margins": 0.08284871280193329, "rewards/rejected": -0.08591174334287643, "step": 2218 }, { "epoch": 1.5345781466113415, "grad_norm": 7.524913311004639, "learning_rate": 4.7030121407714775e-05, "log_odds_chosen": 4.3976359367370605, "log_odds_ratio": -0.19181588292121887, "logits/chosen": -0.6076658368110657, "logits/rejected": -0.6003731489181519, "logps/chosen": -0.05698583647608757, "logps/rejected": -0.697370707988739, "loss": 4.0902, "nll_loss": 1.003367304801941, "rewards/accuracies": 0.75, "rewards/chosen": -0.005698584020137787, "rewards/margins": 0.06403848528862, "rewards/rejected": -0.06973707675933838, "step": 2219 }, { "epoch": 1.5352697095435683, "grad_norm": 9.015628814697266, "learning_rate": 4.702627939142462e-05, "log_odds_chosen": 4.4959282875061035, "log_odds_ratio": -0.49220168590545654, "logits/chosen": -0.47423118352890015, "logits/rejected": -0.489341676235199, "logps/chosen": -0.09900905936956406, "logps/rejected": -0.9558752775192261, "loss": 3.5996, "nll_loss": 0.8506906628608704, "rewards/accuracies": 0.625, "rewards/chosen": -0.009900907054543495, "rewards/margins": 0.08568662405014038, "rewards/rejected": -0.09558752924203873, "step": 2220 }, { "epoch": 1.5359612724757952, "grad_norm": 8.050044059753418, "learning_rate": 4.7022437375134474e-05, "log_odds_chosen": 2.9283671379089355, "log_odds_ratio": -0.2631620168685913, "logits/chosen": -0.7292971611022949, "logits/rejected": -0.7532137632369995, "logps/chosen": -0.07009609788656235, "logps/rejected": -0.6604401469230652, "loss": 3.9517, "nll_loss": 0.9616029262542725, "rewards/accuracies": 0.875, "rewards/chosen": -0.00700960960239172, "rewards/margins": 0.05903441086411476, "rewards/rejected": -0.06604401767253876, "step": 2221 }, { "epoch": 1.536652835408022, "grad_norm": 8.144369125366211, "learning_rate": 4.7018595358844326e-05, "log_odds_chosen": 6.381158828735352, "log_odds_ratio": -0.033019233494997025, "logits/chosen": -0.3614676594734192, "logits/rejected": -0.3959410786628723, "logps/chosen": -0.02348705753684044, "logps/rejected": -1.3655200004577637, "loss": 3.0952, "nll_loss": 0.7705005407333374, "rewards/accuracies": 1.0, "rewards/chosen": -0.00234870589338243, "rewards/margins": 0.13420329988002777, "rewards/rejected": -0.13655200600624084, "step": 2222 }, { "epoch": 1.5373443983402488, "grad_norm": 3.36130952835083, "learning_rate": 4.701475334255417e-05, "log_odds_chosen": 6.352807521820068, "log_odds_ratio": -0.04064463824033737, "logits/chosen": -0.0948086827993393, "logits/rejected": -0.13905943930149078, "logps/chosen": -0.018351389095187187, "logps/rejected": -0.7243081331253052, "loss": 2.6742, "nll_loss": 0.6644976735115051, "rewards/accuracies": 1.0, "rewards/chosen": -0.0018351390026509762, "rewards/margins": 0.07059568166732788, "rewards/rejected": -0.0724308118224144, "step": 2223 }, { "epoch": 1.5380359612724757, "grad_norm": 5.5334954261779785, "learning_rate": 4.7010911326264024e-05, "log_odds_chosen": 4.103457450866699, "log_odds_ratio": -0.4907708168029785, "logits/chosen": -0.39433753490448, "logits/rejected": -0.41758227348327637, "logps/chosen": -0.1360062211751938, "logps/rejected": -0.6773937344551086, "loss": 3.0142, "nll_loss": 0.7044817209243774, "rewards/accuracies": 0.75, "rewards/chosen": -0.013600623235106468, "rewards/margins": 0.054138749837875366, "rewards/rejected": -0.06773936748504639, "step": 2224 }, { "epoch": 1.5387275242047025, "grad_norm": 8.210365295410156, "learning_rate": 4.7007069309973877e-05, "log_odds_chosen": 4.578831672668457, "log_odds_ratio": -0.11062193661928177, "logits/chosen": -0.503457248210907, "logits/rejected": -0.5944218635559082, "logps/chosen": -0.060358330607414246, "logps/rejected": -1.2142866849899292, "loss": 4.0134, "nll_loss": 0.9922851324081421, "rewards/accuracies": 1.0, "rewards/chosen": -0.006035833153873682, "rewards/margins": 0.11539284139871597, "rewards/rejected": -0.12142866849899292, "step": 2225 }, { "epoch": 1.5394190871369293, "grad_norm": 7.67247200012207, "learning_rate": 4.700322729368373e-05, "log_odds_chosen": 4.713946342468262, "log_odds_ratio": -0.17651237547397614, "logits/chosen": -0.8597179055213928, "logits/rejected": -0.8643999099731445, "logps/chosen": -0.04120595008134842, "logps/rejected": -0.8932948112487793, "loss": 3.6999, "nll_loss": 0.9073218703269958, "rewards/accuracies": 0.875, "rewards/chosen": -0.004120595287531614, "rewards/margins": 0.08520889282226562, "rewards/rejected": -0.08932948112487793, "step": 2226 }, { "epoch": 1.5401106500691562, "grad_norm": 10.076916694641113, "learning_rate": 4.6999385277393575e-05, "log_odds_chosen": 4.503382205963135, "log_odds_ratio": -0.19504140317440033, "logits/chosen": -0.4432103931903839, "logits/rejected": -0.4810800850391388, "logps/chosen": -0.05120420455932617, "logps/rejected": -0.8100253343582153, "loss": 4.4369, "nll_loss": 1.0897185802459717, "rewards/accuracies": 0.875, "rewards/chosen": -0.005120420828461647, "rewards/margins": 0.07588210701942444, "rewards/rejected": -0.08100253343582153, "step": 2227 }, { "epoch": 1.540802213001383, "grad_norm": 13.191364288330078, "learning_rate": 4.6995543261103434e-05, "log_odds_chosen": 3.591403007507324, "log_odds_ratio": -0.5504745841026306, "logits/chosen": -0.6312179565429688, "logits/rejected": -0.7169389724731445, "logps/chosen": -0.06760822981595993, "logps/rejected": -0.6767516732215881, "loss": 3.1659, "nll_loss": 0.7364269495010376, "rewards/accuracies": 0.875, "rewards/chosen": -0.0067608230747282505, "rewards/margins": 0.06091434881091118, "rewards/rejected": -0.06767517328262329, "step": 2228 }, { "epoch": 1.5414937759336098, "grad_norm": 5.499429702758789, "learning_rate": 4.699170124481328e-05, "log_odds_chosen": 4.832643508911133, "log_odds_ratio": -0.1555217206478119, "logits/chosen": -0.2926243841648102, "logits/rejected": -0.3150829076766968, "logps/chosen": -0.04423796385526657, "logps/rejected": -0.8635392189025879, "loss": 2.8915, "nll_loss": 0.7073196172714233, "rewards/accuracies": 0.875, "rewards/chosen": -0.0044237966649234295, "rewards/margins": 0.08193013072013855, "rewards/rejected": -0.08635392785072327, "step": 2229 }, { "epoch": 1.5421853388658366, "grad_norm": 4.754825115203857, "learning_rate": 4.698785922852313e-05, "log_odds_chosen": 5.817052364349365, "log_odds_ratio": -0.2055891752243042, "logits/chosen": -0.33507710695266724, "logits/rejected": -0.433362752199173, "logps/chosen": -0.049009814858436584, "logps/rejected": -0.8625462055206299, "loss": 2.7819, "nll_loss": 0.674911618232727, "rewards/accuracies": 0.875, "rewards/chosen": -0.004900981672108173, "rewards/margins": 0.08135364204645157, "rewards/rejected": -0.08625461906194687, "step": 2230 }, { "epoch": 1.5428769017980635, "grad_norm": 7.7743635177612305, "learning_rate": 4.6984017212232984e-05, "log_odds_chosen": 3.0345685482025146, "log_odds_ratio": -0.2999267578125, "logits/chosen": -0.336641788482666, "logits/rejected": -0.3972005844116211, "logps/chosen": -0.11239126324653625, "logps/rejected": -0.6859149932861328, "loss": 3.0472, "nll_loss": 0.7318093180656433, "rewards/accuracies": 0.75, "rewards/chosen": -0.011239126324653625, "rewards/margins": 0.057352371513843536, "rewards/rejected": -0.06859149783849716, "step": 2231 }, { "epoch": 1.5435684647302903, "grad_norm": 5.8012166023254395, "learning_rate": 4.698017519594283e-05, "log_odds_chosen": 6.982174396514893, "log_odds_ratio": -0.018287427723407745, "logits/chosen": -0.5732105374336243, "logits/rejected": -0.6470195651054382, "logps/chosen": -0.00966467335820198, "logps/rejected": -1.0621178150177002, "loss": 2.9736, "nll_loss": 0.7415661811828613, "rewards/accuracies": 1.0, "rewards/chosen": -0.0009664673125371337, "rewards/margins": 0.10524532198905945, "rewards/rejected": -0.10621179640293121, "step": 2232 }, { "epoch": 1.5442600276625171, "grad_norm": 10.100383758544922, "learning_rate": 4.697633317965268e-05, "log_odds_chosen": 2.6996121406555176, "log_odds_ratio": -0.5876289010047913, "logits/chosen": -0.5158724784851074, "logits/rejected": -0.5607286691665649, "logps/chosen": -0.13475391268730164, "logps/rejected": -0.5057365894317627, "loss": 4.4236, "nll_loss": 1.0471446514129639, "rewards/accuracies": 0.625, "rewards/chosen": -0.013475392945110798, "rewards/margins": 0.03709826618432999, "rewards/rejected": -0.05057365447282791, "step": 2233 }, { "epoch": 1.544951590594744, "grad_norm": 7.3931145668029785, "learning_rate": 4.6972491163362535e-05, "log_odds_chosen": 3.28309965133667, "log_odds_ratio": -0.41309264302253723, "logits/chosen": 0.1753653883934021, "logits/rejected": 0.1266123354434967, "logps/chosen": -0.1575171798467636, "logps/rejected": -0.6481037735939026, "loss": 3.2442, "nll_loss": 0.7697430849075317, "rewards/accuracies": 0.625, "rewards/chosen": -0.01575171761214733, "rewards/margins": 0.04905865713953972, "rewards/rejected": -0.0648103803396225, "step": 2234 }, { "epoch": 1.5456431535269708, "grad_norm": 5.558627605438232, "learning_rate": 4.696864914707239e-05, "log_odds_chosen": 3.975231647491455, "log_odds_ratio": -0.2093396931886673, "logits/chosen": -0.7380461096763611, "logits/rejected": -0.8254467248916626, "logps/chosen": -0.04940512031316757, "logps/rejected": -0.6910010576248169, "loss": 4.2055, "nll_loss": 1.0304415225982666, "rewards/accuracies": 0.875, "rewards/chosen": -0.004940511658787727, "rewards/margins": 0.06415959447622299, "rewards/rejected": -0.06910011172294617, "step": 2235 }, { "epoch": 1.5463347164591976, "grad_norm": 6.294323444366455, "learning_rate": 4.696480713078223e-05, "log_odds_chosen": 4.579098224639893, "log_odds_ratio": -0.08531267940998077, "logits/chosen": -0.6607158780097961, "logits/rejected": -0.6623681783676147, "logps/chosen": -0.04320439323782921, "logps/rejected": -0.6710415482521057, "loss": 4.064, "nll_loss": 1.0074676275253296, "rewards/accuracies": 1.0, "rewards/chosen": -0.004320439882576466, "rewards/margins": 0.06278371810913086, "rewards/rejected": -0.06710416078567505, "step": 2236 }, { "epoch": 1.5470262793914247, "grad_norm": 4.83128023147583, "learning_rate": 4.696096511449209e-05, "log_odds_chosen": 5.596061706542969, "log_odds_ratio": -0.024458257481455803, "logits/chosen": -0.2902466952800751, "logits/rejected": -0.30807405710220337, "logps/chosen": -0.017890680581331253, "logps/rejected": -1.3433257341384888, "loss": 2.3647, "nll_loss": 0.5887378454208374, "rewards/accuracies": 1.0, "rewards/chosen": -0.0017890682211145759, "rewards/margins": 0.13254351913928986, "rewards/rejected": -0.1343325823545456, "step": 2237 }, { "epoch": 1.5477178423236515, "grad_norm": 9.162208557128906, "learning_rate": 4.695712309820194e-05, "log_odds_chosen": 5.807769775390625, "log_odds_ratio": -0.08700526505708694, "logits/chosen": -0.13000746071338654, "logits/rejected": -0.22356395423412323, "logps/chosen": -0.03778745234012604, "logps/rejected": -1.1309075355529785, "loss": 2.6584, "nll_loss": 0.6559075713157654, "rewards/accuracies": 1.0, "rewards/chosen": -0.0037787449546158314, "rewards/margins": 0.1093120127916336, "rewards/rejected": -0.11309076845645905, "step": 2238 }, { "epoch": 1.5484094052558783, "grad_norm": 7.159018516540527, "learning_rate": 4.695328108191179e-05, "log_odds_chosen": 3.921074867248535, "log_odds_ratio": -0.20838870108127594, "logits/chosen": -0.5181608200073242, "logits/rejected": -0.5305622816085815, "logps/chosen": -0.05944810062646866, "logps/rejected": -0.8501400947570801, "loss": 3.7351, "nll_loss": 0.9129410982131958, "rewards/accuracies": 0.875, "rewards/chosen": -0.005944809876382351, "rewards/margins": 0.07906919717788696, "rewards/rejected": -0.08501400798559189, "step": 2239 }, { "epoch": 1.5491009681881052, "grad_norm": 9.407584190368652, "learning_rate": 4.694943906562164e-05, "log_odds_chosen": 4.420240879058838, "log_odds_ratio": -0.27922773361206055, "logits/chosen": -0.7489995360374451, "logits/rejected": -0.7329412698745728, "logps/chosen": -0.07872271537780762, "logps/rejected": -0.8607683777809143, "loss": 3.4904, "nll_loss": 0.844673752784729, "rewards/accuracies": 0.75, "rewards/chosen": -0.007872272282838821, "rewards/margins": 0.07820457220077515, "rewards/rejected": -0.08607684075832367, "step": 2240 }, { "epoch": 1.549792531120332, "grad_norm": 6.541446208953857, "learning_rate": 4.694559704933149e-05, "log_odds_chosen": 4.660301208496094, "log_odds_ratio": -0.12407185137271881, "logits/chosen": -0.4145255982875824, "logits/rejected": -0.48058953881263733, "logps/chosen": -0.059069301933050156, "logps/rejected": -1.0553184747695923, "loss": 2.7047, "nll_loss": 0.6637730598449707, "rewards/accuracies": 1.0, "rewards/chosen": -0.0059069301933050156, "rewards/margins": 0.09962491691112518, "rewards/rejected": -0.10553184151649475, "step": 2241 }, { "epoch": 1.5504840940525588, "grad_norm": 4.595657825469971, "learning_rate": 4.694175503304134e-05, "log_odds_chosen": 5.710814476013184, "log_odds_ratio": -0.24719639122486115, "logits/chosen": -0.5707492232322693, "logits/rejected": -0.5625244379043579, "logps/chosen": -0.07158362865447998, "logps/rejected": -1.2810132503509521, "loss": 2.4542, "nll_loss": 0.5888248682022095, "rewards/accuracies": 0.875, "rewards/chosen": -0.007158362772315741, "rewards/margins": 0.12094297260046005, "rewards/rejected": -0.12810133397579193, "step": 2242 }, { "epoch": 1.5511756569847857, "grad_norm": 6.285073280334473, "learning_rate": 4.6937913016751193e-05, "log_odds_chosen": 4.71954870223999, "log_odds_ratio": -0.260731041431427, "logits/chosen": 0.10269571840763092, "logits/rejected": 0.06153454631567001, "logps/chosen": -0.09316494315862656, "logps/rejected": -0.9280557632446289, "loss": 2.9387, "nll_loss": 0.7085932493209839, "rewards/accuracies": 0.75, "rewards/chosen": -0.009316494688391685, "rewards/margins": 0.0834890753030777, "rewards/rejected": -0.09280557930469513, "step": 2243 }, { "epoch": 1.5518672199170125, "grad_norm": 6.047837734222412, "learning_rate": 4.6934071000461046e-05, "log_odds_chosen": 3.6565470695495605, "log_odds_ratio": -0.3122093975543976, "logits/chosen": -0.3903008997440338, "logits/rejected": -0.41671690344810486, "logps/chosen": -0.08219773322343826, "logps/rejected": -0.8850083351135254, "loss": 2.5731, "nll_loss": 0.6120545864105225, "rewards/accuracies": 0.875, "rewards/chosen": -0.008219772949814796, "rewards/margins": 0.08028105646371841, "rewards/rejected": -0.08850082755088806, "step": 2244 }, { "epoch": 1.5525587828492393, "grad_norm": 5.648904800415039, "learning_rate": 4.693022898417089e-05, "log_odds_chosen": 5.111184120178223, "log_odds_ratio": -0.11708255112171173, "logits/chosen": -0.4507385790348053, "logits/rejected": -0.4581332206726074, "logps/chosen": -0.02783753164112568, "logps/rejected": -0.6649580001831055, "loss": 2.5968, "nll_loss": 0.6374881863594055, "rewards/accuracies": 0.875, "rewards/chosen": -0.0027837532106786966, "rewards/margins": 0.06371204555034637, "rewards/rejected": -0.06649579852819443, "step": 2245 }, { "epoch": 1.5532503457814661, "grad_norm": 4.566701889038086, "learning_rate": 4.692638696788075e-05, "log_odds_chosen": 4.072457313537598, "log_odds_ratio": -0.13861241936683655, "logits/chosen": -0.4009241461753845, "logits/rejected": -0.48581573367118835, "logps/chosen": -0.06524769216775894, "logps/rejected": -0.5617847442626953, "loss": 2.1005, "nll_loss": 0.5112607479095459, "rewards/accuracies": 1.0, "rewards/chosen": -0.006524769123643637, "rewards/margins": 0.04965370520949364, "rewards/rejected": -0.05617847293615341, "step": 2246 }, { "epoch": 1.553941908713693, "grad_norm": 4.161489963531494, "learning_rate": 4.6922544951590596e-05, "log_odds_chosen": 5.0455217361450195, "log_odds_ratio": -0.08254259079694748, "logits/chosen": -0.08890029788017273, "logits/rejected": -0.14021196961402893, "logps/chosen": -0.04011262208223343, "logps/rejected": -0.823492169380188, "loss": 2.216, "nll_loss": 0.5457525849342346, "rewards/accuracies": 1.0, "rewards/chosen": -0.00401126267388463, "rewards/margins": 0.07833795249462128, "rewards/rejected": -0.08234922587871552, "step": 2247 }, { "epoch": 1.5546334716459198, "grad_norm": 12.599466323852539, "learning_rate": 4.691870293530045e-05, "log_odds_chosen": 3.1988582611083984, "log_odds_ratio": -0.2710178792476654, "logits/chosen": -0.22087787091732025, "logits/rejected": -0.2567731440067291, "logps/chosen": -0.04471834376454353, "logps/rejected": -0.5315315127372742, "loss": 2.9405, "nll_loss": 0.7080118060112, "rewards/accuracies": 0.875, "rewards/chosen": -0.004471834283322096, "rewards/margins": 0.04868132248520851, "rewards/rejected": -0.053153157234191895, "step": 2248 }, { "epoch": 1.5553250345781466, "grad_norm": 7.255458354949951, "learning_rate": 4.69148609190103e-05, "log_odds_chosen": 3.571174383163452, "log_odds_ratio": -0.19597908854484558, "logits/chosen": -0.3806026875972748, "logits/rejected": -0.4358273148536682, "logps/chosen": -0.0519745796918869, "logps/rejected": -0.746728241443634, "loss": 3.1931, "nll_loss": 0.7786867022514343, "rewards/accuracies": 0.875, "rewards/chosen": -0.00519745796918869, "rewards/margins": 0.06947536766529083, "rewards/rejected": -0.07467282563447952, "step": 2249 }, { "epoch": 1.5560165975103735, "grad_norm": 8.847131729125977, "learning_rate": 4.691101890272015e-05, "log_odds_chosen": 4.572246074676514, "log_odds_ratio": -0.1461586356163025, "logits/chosen": -0.5918107032775879, "logits/rejected": -0.5609433650970459, "logps/chosen": -0.036586739122867584, "logps/rejected": -0.8704870343208313, "loss": 4.0928, "nll_loss": 1.008584976196289, "rewards/accuracies": 1.0, "rewards/chosen": -0.003658674191683531, "rewards/margins": 0.08339002728462219, "rewards/rejected": -0.08704870939254761, "step": 2250 }, { "epoch": 1.5567081604426003, "grad_norm": 3.6108243465423584, "learning_rate": 4.690717688643e-05, "log_odds_chosen": 3.400420665740967, "log_odds_ratio": -0.20974057912826538, "logits/chosen": -0.38296398520469666, "logits/rejected": -0.3265213966369629, "logps/chosen": -0.1113806664943695, "logps/rejected": -0.82120680809021, "loss": 1.8716, "nll_loss": 0.4469362795352936, "rewards/accuracies": 0.875, "rewards/chosen": -0.011138067580759525, "rewards/margins": 0.07098261266946793, "rewards/rejected": -0.08212068676948547, "step": 2251 }, { "epoch": 1.5573997233748271, "grad_norm": 7.981571674346924, "learning_rate": 4.690333487013985e-05, "log_odds_chosen": 2.3555095195770264, "log_odds_ratio": -0.49662184715270996, "logits/chosen": -0.593062698841095, "logits/rejected": -0.615126371383667, "logps/chosen": -0.16694380342960358, "logps/rejected": -0.4941992163658142, "loss": 2.9893, "nll_loss": 0.6976538896560669, "rewards/accuracies": 0.875, "rewards/chosen": -0.016694379970431328, "rewards/margins": 0.03272554278373718, "rewards/rejected": -0.04941992461681366, "step": 2252 }, { "epoch": 1.558091286307054, "grad_norm": 7.54820442199707, "learning_rate": 4.6899492853849704e-05, "log_odds_chosen": 4.2390642166137695, "log_odds_ratio": -0.3768894374370575, "logits/chosen": -0.6133686304092407, "logits/rejected": -0.5917006731033325, "logps/chosen": -0.08966968953609467, "logps/rejected": -0.7241112589836121, "loss": 3.8115, "nll_loss": 0.9151946902275085, "rewards/accuracies": 0.75, "rewards/chosen": -0.008966969326138496, "rewards/margins": 0.06344415992498398, "rewards/rejected": -0.07241112738847733, "step": 2253 }, { "epoch": 1.5587828492392808, "grad_norm": 7.562520980834961, "learning_rate": 4.689565083755955e-05, "log_odds_chosen": 2.634389877319336, "log_odds_ratio": -0.3599710464477539, "logits/chosen": -0.5475929975509644, "logits/rejected": -0.5768888592720032, "logps/chosen": -0.0821683332324028, "logps/rejected": -0.5093742609024048, "loss": 4.1712, "nll_loss": 1.006812572479248, "rewards/accuracies": 0.75, "rewards/chosen": -0.008216832764446735, "rewards/margins": 0.04272059351205826, "rewards/rejected": -0.05093742161989212, "step": 2254 }, { "epoch": 1.5594744121715076, "grad_norm": 6.5873003005981445, "learning_rate": 4.689180882126941e-05, "log_odds_chosen": 5.166384220123291, "log_odds_ratio": -0.2231721132993698, "logits/chosen": -0.40660107135772705, "logits/rejected": -0.4643666446208954, "logps/chosen": -0.07692534476518631, "logps/rejected": -1.0464847087860107, "loss": 2.1066, "nll_loss": 0.5043294429779053, "rewards/accuracies": 0.875, "rewards/chosen": -0.007692534010857344, "rewards/margins": 0.09695594012737274, "rewards/rejected": -0.10464847087860107, "step": 2255 }, { "epoch": 1.5601659751037344, "grad_norm": 8.40011215209961, "learning_rate": 4.6887966804979255e-05, "log_odds_chosen": 4.982305526733398, "log_odds_ratio": -0.21497850120067596, "logits/chosen": -0.5718773603439331, "logits/rejected": -0.5998449325561523, "logps/chosen": -0.095095694065094, "logps/rejected": -0.9810383319854736, "loss": 3.0402, "nll_loss": 0.7385454773902893, "rewards/accuracies": 0.875, "rewards/chosen": -0.009509569965302944, "rewards/margins": 0.08859425783157349, "rewards/rejected": -0.09810382127761841, "step": 2256 }, { "epoch": 1.5608575380359613, "grad_norm": 5.619307041168213, "learning_rate": 4.688412478868911e-05, "log_odds_chosen": 6.083016872406006, "log_odds_ratio": -0.03372356668114662, "logits/chosen": -0.2072625458240509, "logits/rejected": -0.2259514033794403, "logps/chosen": -0.013912231661379337, "logps/rejected": -0.925947904586792, "loss": 3.2673, "nll_loss": 0.8134469985961914, "rewards/accuracies": 1.0, "rewards/chosen": -0.0013912231661379337, "rewards/margins": 0.09120357036590576, "rewards/rejected": -0.09259478747844696, "step": 2257 }, { "epoch": 1.561549100968188, "grad_norm": 12.73154354095459, "learning_rate": 4.688028277239896e-05, "log_odds_chosen": 4.015732765197754, "log_odds_ratio": -0.1854289174079895, "logits/chosen": -0.4852401316165924, "logits/rejected": -0.5350650548934937, "logps/chosen": -0.04746335744857788, "logps/rejected": -0.6718555688858032, "loss": 3.5894, "nll_loss": 0.8787986636161804, "rewards/accuracies": 0.875, "rewards/chosen": -0.004746335558593273, "rewards/margins": 0.06243922561407089, "rewards/rejected": -0.06718556582927704, "step": 2258 }, { "epoch": 1.562240663900415, "grad_norm": 7.783176898956299, "learning_rate": 4.6876440756108805e-05, "log_odds_chosen": 3.907989501953125, "log_odds_ratio": -0.11575151979923248, "logits/chosen": -0.3810523748397827, "logits/rejected": -0.380711168050766, "logps/chosen": -0.07309068739414215, "logps/rejected": -0.7313439846038818, "loss": 3.4366, "nll_loss": 0.84757000207901, "rewards/accuracies": 1.0, "rewards/chosen": -0.00730906892567873, "rewards/margins": 0.06582533568143845, "rewards/rejected": -0.0731343999505043, "step": 2259 }, { "epoch": 1.5629322268326418, "grad_norm": 7.931395053863525, "learning_rate": 4.687259873981866e-05, "log_odds_chosen": 6.3931474685668945, "log_odds_ratio": -0.00962926261126995, "logits/chosen": -0.6360489130020142, "logits/rejected": -0.6455775499343872, "logps/chosen": -0.014708174392580986, "logps/rejected": -1.1883326768875122, "loss": 3.0027, "nll_loss": 0.7497127652168274, "rewards/accuracies": 1.0, "rewards/chosen": -0.0014708174858242273, "rewards/margins": 0.11736244708299637, "rewards/rejected": -0.1188332661986351, "step": 2260 }, { "epoch": 1.5636237897648686, "grad_norm": 4.673415660858154, "learning_rate": 4.686875672352851e-05, "log_odds_chosen": 4.533776760101318, "log_odds_ratio": -0.1115853562951088, "logits/chosen": -0.534148633480072, "logits/rejected": -0.5244668126106262, "logps/chosen": -0.08479005843400955, "logps/rejected": -0.6998138427734375, "loss": 2.764, "nll_loss": 0.6798312664031982, "rewards/accuracies": 1.0, "rewards/chosen": -0.00847900565713644, "rewards/margins": 0.06150238215923309, "rewards/rejected": -0.06998138874769211, "step": 2261 }, { "epoch": 1.5643153526970954, "grad_norm": 6.692610263824463, "learning_rate": 4.686491470723836e-05, "log_odds_chosen": 7.074902057647705, "log_odds_ratio": -0.006812370382249355, "logits/chosen": -0.2236250340938568, "logits/rejected": -0.2226749062538147, "logps/chosen": -0.030843688175082207, "logps/rejected": -1.46689772605896, "loss": 3.1026, "nll_loss": 0.7749695181846619, "rewards/accuracies": 1.0, "rewards/chosen": -0.0030843692366033792, "rewards/margins": 0.14360541105270386, "rewards/rejected": -0.146689772605896, "step": 2262 }, { "epoch": 1.5650069156293223, "grad_norm": 9.14884090423584, "learning_rate": 4.686107269094821e-05, "log_odds_chosen": 3.6463592052459717, "log_odds_ratio": -0.248472660779953, "logits/chosen": 0.002660594880580902, "logits/rejected": 0.011733196675777435, "logps/chosen": -0.06953177601099014, "logps/rejected": -0.6269186735153198, "loss": 2.9179, "nll_loss": 0.7046196460723877, "rewards/accuracies": 0.875, "rewards/chosen": -0.006953177973628044, "rewards/margins": 0.05573869124054909, "rewards/rejected": -0.06269187480211258, "step": 2263 }, { "epoch": 1.565698478561549, "grad_norm": 7.125910758972168, "learning_rate": 4.685723067465807e-05, "log_odds_chosen": 2.580953598022461, "log_odds_ratio": -0.24101954698562622, "logits/chosen": -0.57945716381073, "logits/rejected": -0.5280551910400391, "logps/chosen": -0.07494837790727615, "logps/rejected": -0.6202612519264221, "loss": 2.7215, "nll_loss": 0.6562801599502563, "rewards/accuracies": 0.875, "rewards/chosen": -0.007494837511330843, "rewards/margins": 0.054531291127204895, "rewards/rejected": -0.06202612444758415, "step": 2264 }, { "epoch": 1.566390041493776, "grad_norm": 6.528625965118408, "learning_rate": 4.685338865836791e-05, "log_odds_chosen": 5.087283134460449, "log_odds_ratio": -0.3491363823413849, "logits/chosen": -0.4980352520942688, "logits/rejected": -0.48793551325798035, "logps/chosen": -0.1593426764011383, "logps/rejected": -1.1793937683105469, "loss": 3.2584, "nll_loss": 0.7796763181686401, "rewards/accuracies": 0.875, "rewards/chosen": -0.01593426614999771, "rewards/margins": 0.10200510919094086, "rewards/rejected": -0.11793938279151917, "step": 2265 }, { "epoch": 1.5670816044260027, "grad_norm": 13.136091232299805, "learning_rate": 4.6849546642077766e-05, "log_odds_chosen": 3.9400973320007324, "log_odds_ratio": -1.1063835620880127, "logits/chosen": -0.5145586729049683, "logits/rejected": -0.543152391910553, "logps/chosen": -0.09819920361042023, "logps/rejected": -0.9681227207183838, "loss": 2.7526, "nll_loss": 0.5775208473205566, "rewards/accuracies": 0.75, "rewards/chosen": -0.009819920174777508, "rewards/margins": 0.08699235320091248, "rewards/rejected": -0.09681227803230286, "step": 2266 }, { "epoch": 1.5677731673582296, "grad_norm": 14.50251579284668, "learning_rate": 4.684570462578762e-05, "log_odds_chosen": 4.714078426361084, "log_odds_ratio": -0.19142206013202667, "logits/chosen": -0.5533724427223206, "logits/rejected": -0.5080875158309937, "logps/chosen": -0.03418285399675369, "logps/rejected": -0.9308934211730957, "loss": 3.6667, "nll_loss": 0.8975303173065186, "rewards/accuracies": 0.875, "rewards/chosen": -0.0034182853996753693, "rewards/margins": 0.0896710455417633, "rewards/rejected": -0.09308933466672897, "step": 2267 }, { "epoch": 1.5684647302904564, "grad_norm": 6.3615570068359375, "learning_rate": 4.6841862609497464e-05, "log_odds_chosen": 4.702467918395996, "log_odds_ratio": -0.1508854329586029, "logits/chosen": -0.5412316918373108, "logits/rejected": -0.5616676807403564, "logps/chosen": -0.12236365675926208, "logps/rejected": -1.1580426692962646, "loss": 3.3303, "nll_loss": 0.8174852728843689, "rewards/accuracies": 1.0, "rewards/chosen": -0.012236366048455238, "rewards/margins": 0.10356790572404861, "rewards/rejected": -0.1158042699098587, "step": 2268 }, { "epoch": 1.5691562932226832, "grad_norm": 11.172139167785645, "learning_rate": 4.6838020593207316e-05, "log_odds_chosen": 3.8827757835388184, "log_odds_ratio": -0.2262200564146042, "logits/chosen": -0.481758177280426, "logits/rejected": -0.5334774255752563, "logps/chosen": -0.07622340321540833, "logps/rejected": -0.8563274145126343, "loss": 3.4932, "nll_loss": 0.8506813049316406, "rewards/accuracies": 0.875, "rewards/chosen": -0.007622339762747288, "rewards/margins": 0.07801041007041931, "rewards/rejected": -0.08563274890184402, "step": 2269 }, { "epoch": 1.56984785615491, "grad_norm": 7.487488746643066, "learning_rate": 4.683417857691717e-05, "log_odds_chosen": 5.401069164276123, "log_odds_ratio": -0.1360565572977066, "logits/chosen": -0.5984183549880981, "logits/rejected": -0.7007421255111694, "logps/chosen": -0.04290872812271118, "logps/rejected": -0.9781840443611145, "loss": 2.8651, "nll_loss": 0.702671229839325, "rewards/accuracies": 1.0, "rewards/chosen": -0.004290873650461435, "rewards/margins": 0.09352753311395645, "rewards/rejected": -0.09781840443611145, "step": 2270 }, { "epoch": 1.570539419087137, "grad_norm": 7.04751443862915, "learning_rate": 4.683033656062702e-05, "log_odds_chosen": 4.005356311798096, "log_odds_ratio": -0.3126870393753052, "logits/chosen": -0.23318475484848022, "logits/rejected": -0.2612619698047638, "logps/chosen": -0.07785270363092422, "logps/rejected": -0.8214019536972046, "loss": 3.8368, "nll_loss": 0.9279318451881409, "rewards/accuracies": 0.75, "rewards/chosen": -0.007785269990563393, "rewards/margins": 0.07435491681098938, "rewards/rejected": -0.08214019238948822, "step": 2271 }, { "epoch": 1.5712309820193637, "grad_norm": 6.216674327850342, "learning_rate": 4.682649454433687e-05, "log_odds_chosen": 3.7621045112609863, "log_odds_ratio": -0.5083651542663574, "logits/chosen": -0.07921777665615082, "logits/rejected": -0.09831476211547852, "logps/chosen": -0.09837433695793152, "logps/rejected": -0.5076334476470947, "loss": 2.1151, "nll_loss": 0.477946013212204, "rewards/accuracies": 0.875, "rewards/chosen": -0.009837434627115726, "rewards/margins": 0.04092591255903244, "rewards/rejected": -0.05076334625482559, "step": 2272 }, { "epoch": 1.5719225449515906, "grad_norm": 4.1684250831604, "learning_rate": 4.6822652528046726e-05, "log_odds_chosen": 4.168286323547363, "log_odds_ratio": -0.09342752397060394, "logits/chosen": -0.48797476291656494, "logits/rejected": -0.4549727439880371, "logps/chosen": -0.059577472507953644, "logps/rejected": -0.8933494091033936, "loss": 2.6642, "nll_loss": 0.656704306602478, "rewards/accuracies": 1.0, "rewards/chosen": -0.005957747809588909, "rewards/margins": 0.0833771824836731, "rewards/rejected": -0.08933493494987488, "step": 2273 }, { "epoch": 1.5726141078838174, "grad_norm": 6.8194098472595215, "learning_rate": 4.681881051175657e-05, "log_odds_chosen": 4.301138877868652, "log_odds_ratio": -0.2094355970621109, "logits/chosen": -0.023733407258987427, "logits/rejected": -0.0873059630393982, "logps/chosen": -0.026450948789715767, "logps/rejected": -0.46369093656539917, "loss": 2.8734, "nll_loss": 0.6974120140075684, "rewards/accuracies": 1.0, "rewards/chosen": -0.002645095344632864, "rewards/margins": 0.043724000453948975, "rewards/rejected": -0.04636909440159798, "step": 2274 }, { "epoch": 1.5733056708160442, "grad_norm": 3.638047218322754, "learning_rate": 4.6814968495466424e-05, "log_odds_chosen": 6.883190631866455, "log_odds_ratio": -0.01691877841949463, "logits/chosen": -0.4181588888168335, "logits/rejected": -0.4647769629955292, "logps/chosen": -0.03221792355179787, "logps/rejected": -0.7626996040344238, "loss": 2.0761, "nll_loss": 0.5173306465148926, "rewards/accuracies": 1.0, "rewards/chosen": -0.003221792634576559, "rewards/margins": 0.0730481669306755, "rewards/rejected": -0.0762699544429779, "step": 2275 }, { "epoch": 1.573997233748271, "grad_norm": 6.572237491607666, "learning_rate": 4.681112647917628e-05, "log_odds_chosen": 4.171795845031738, "log_odds_ratio": -0.33008846640586853, "logits/chosen": -0.4735686779022217, "logits/rejected": -0.5132468938827515, "logps/chosen": -0.09502127021551132, "logps/rejected": -0.559738278388977, "loss": 2.9807, "nll_loss": 0.7121750116348267, "rewards/accuracies": 0.875, "rewards/chosen": -0.009502128697931767, "rewards/margins": 0.04647170007228851, "rewards/rejected": -0.055973831564188004, "step": 2276 }, { "epoch": 1.5746887966804979, "grad_norm": 4.681520462036133, "learning_rate": 4.680728446288612e-05, "log_odds_chosen": 3.857510566711426, "log_odds_ratio": -0.20329353213310242, "logits/chosen": -0.5046722888946533, "logits/rejected": -0.4650399088859558, "logps/chosen": -0.0971132218837738, "logps/rejected": -0.9814940690994263, "loss": 3.3064, "nll_loss": 0.8062769174575806, "rewards/accuracies": 0.875, "rewards/chosen": -0.009711322374641895, "rewards/margins": 0.08843808621168137, "rewards/rejected": -0.09814940392971039, "step": 2277 }, { "epoch": 1.5753803596127247, "grad_norm": 7.279231548309326, "learning_rate": 4.6803442446595975e-05, "log_odds_chosen": 4.661543369293213, "log_odds_ratio": -0.19676269590854645, "logits/chosen": -0.6336410045623779, "logits/rejected": -0.561577320098877, "logps/chosen": -0.07620273530483246, "logps/rejected": -0.923383355140686, "loss": 3.0587, "nll_loss": 0.7449985146522522, "rewards/accuracies": 0.875, "rewards/chosen": -0.007620273623615503, "rewards/margins": 0.08471806347370148, "rewards/rejected": -0.09233833849430084, "step": 2278 }, { "epoch": 1.5760719225449515, "grad_norm": 4.930444240570068, "learning_rate": 4.679960043030583e-05, "log_odds_chosen": 4.155850887298584, "log_odds_ratio": -0.13013622164726257, "logits/chosen": -0.41056662797927856, "logits/rejected": -0.49715933203697205, "logps/chosen": -0.05985238403081894, "logps/rejected": -0.5921831727027893, "loss": 2.6141, "nll_loss": 0.6405088901519775, "rewards/accuracies": 1.0, "rewards/chosen": -0.005985238589346409, "rewards/margins": 0.053233079612255096, "rewards/rejected": -0.05921831727027893, "step": 2279 }, { "epoch": 1.5767634854771784, "grad_norm": 5.073685169219971, "learning_rate": 4.679575841401568e-05, "log_odds_chosen": 3.090080738067627, "log_odds_ratio": -0.3903737962245941, "logits/chosen": -0.5625483989715576, "logits/rejected": -0.5984906554222107, "logps/chosen": -0.08926475048065186, "logps/rejected": -0.48649924993515015, "loss": 3.0696, "nll_loss": 0.7283635139465332, "rewards/accuracies": 0.625, "rewards/chosen": -0.00892647448927164, "rewards/margins": 0.03972344845533371, "rewards/rejected": -0.048649922013282776, "step": 2280 }, { "epoch": 1.5774550484094052, "grad_norm": 6.59467887878418, "learning_rate": 4.6791916397725525e-05, "log_odds_chosen": 3.5226287841796875, "log_odds_ratio": -0.24100548028945923, "logits/chosen": -0.5984765887260437, "logits/rejected": -0.6621988415718079, "logps/chosen": -0.07654125243425369, "logps/rejected": -0.7357521653175354, "loss": 3.7518, "nll_loss": 0.913856565952301, "rewards/accuracies": 0.875, "rewards/chosen": -0.007654125336557627, "rewards/margins": 0.06592109799385071, "rewards/rejected": -0.0735752284526825, "step": 2281 }, { "epoch": 1.5781466113416323, "grad_norm": 15.683188438415527, "learning_rate": 4.6788074381435385e-05, "log_odds_chosen": 3.7196710109710693, "log_odds_ratio": -0.34569528698921204, "logits/chosen": -0.7387279272079468, "logits/rejected": -0.6877920031547546, "logps/chosen": -0.05489436909556389, "logps/rejected": -0.8038865327835083, "loss": 3.8624, "nll_loss": 0.9310262799263, "rewards/accuracies": 0.75, "rewards/chosen": -0.005489437375217676, "rewards/margins": 0.07489921152591705, "rewards/rejected": -0.08038865029811859, "step": 2282 }, { "epoch": 1.578838174273859, "grad_norm": 10.021100997924805, "learning_rate": 4.678423236514523e-05, "log_odds_chosen": 4.2374372482299805, "log_odds_ratio": -0.04714512825012207, "logits/chosen": -0.716540515422821, "logits/rejected": -0.7443221211433411, "logps/chosen": -0.055723875761032104, "logps/rejected": -0.974632203578949, "loss": 4.4345, "nll_loss": 1.103899359703064, "rewards/accuracies": 1.0, "rewards/chosen": -0.00557238794863224, "rewards/margins": 0.09189082682132721, "rewards/rejected": -0.0974632203578949, "step": 2283 }, { "epoch": 1.579529737206086, "grad_norm": 7.538951396942139, "learning_rate": 4.678039034885508e-05, "log_odds_chosen": 3.4724903106689453, "log_odds_ratio": -0.37358352541923523, "logits/chosen": -0.46461501717567444, "logits/rejected": -0.5749452114105225, "logps/chosen": -0.07608627527952194, "logps/rejected": -0.5288589000701904, "loss": 2.807, "nll_loss": 0.6643953323364258, "rewards/accuracies": 0.875, "rewards/chosen": -0.007608628366142511, "rewards/margins": 0.04527726769447327, "rewards/rejected": -0.05288589745759964, "step": 2284 }, { "epoch": 1.5802213001383127, "grad_norm": 7.384200096130371, "learning_rate": 4.6776548332564935e-05, "log_odds_chosen": 5.402228355407715, "log_odds_ratio": -0.07402972877025604, "logits/chosen": -0.5802706480026245, "logits/rejected": -0.5512796640396118, "logps/chosen": -0.042227327823638916, "logps/rejected": -0.8186689019203186, "loss": 4.1053, "nll_loss": 1.0189257860183716, "rewards/accuracies": 1.0, "rewards/chosen": -0.0042227329686284065, "rewards/margins": 0.07764415442943573, "rewards/rejected": -0.08186689019203186, "step": 2285 }, { "epoch": 1.5809128630705396, "grad_norm": 7.9935712814331055, "learning_rate": 4.677270631627478e-05, "log_odds_chosen": 3.9247207641601562, "log_odds_ratio": -0.19959238171577454, "logits/chosen": -0.4443342089653015, "logits/rejected": -0.44178879261016846, "logps/chosen": -0.08705995976924896, "logps/rejected": -0.6575961112976074, "loss": 3.2841, "nll_loss": 0.8010765314102173, "rewards/accuracies": 0.875, "rewards/chosen": -0.008705995976924896, "rewards/margins": 0.057053614407777786, "rewards/rejected": -0.06575960665941238, "step": 2286 }, { "epoch": 1.5816044260027664, "grad_norm": 6.8036909103393555, "learning_rate": 4.676886429998463e-05, "log_odds_chosen": 3.094165086746216, "log_odds_ratio": -0.21915318071842194, "logits/chosen": -0.650163471698761, "logits/rejected": -0.6991207599639893, "logps/chosen": -0.10170422494411469, "logps/rejected": -0.7241445779800415, "loss": 2.8511, "nll_loss": 0.6908689141273499, "rewards/accuracies": 0.875, "rewards/chosen": -0.010170423425734043, "rewards/margins": 0.06224404275417328, "rewards/rejected": -0.07241446524858475, "step": 2287 }, { "epoch": 1.5822959889349932, "grad_norm": 8.86994743347168, "learning_rate": 4.6765022283694486e-05, "log_odds_chosen": 4.749606132507324, "log_odds_ratio": -0.1850530505180359, "logits/chosen": -0.42290088534355164, "logits/rejected": -0.47745946049690247, "logps/chosen": -0.06721153110265732, "logps/rejected": -0.6770196557044983, "loss": 3.5533, "nll_loss": 0.8698145151138306, "rewards/accuracies": 0.875, "rewards/chosen": -0.006721153389662504, "rewards/margins": 0.06098081171512604, "rewards/rejected": -0.06770196557044983, "step": 2288 }, { "epoch": 1.58298755186722, "grad_norm": 6.188156604766846, "learning_rate": 4.676118026740434e-05, "log_odds_chosen": 4.213592052459717, "log_odds_ratio": -0.20819568634033203, "logits/chosen": -0.5084786415100098, "logits/rejected": -0.5529496669769287, "logps/chosen": -0.030551385134458542, "logps/rejected": -0.5414082407951355, "loss": 3.5748, "nll_loss": 0.8728883266448975, "rewards/accuracies": 1.0, "rewards/chosen": -0.003055138746276498, "rewards/margins": 0.051085688173770905, "rewards/rejected": -0.05414082854986191, "step": 2289 }, { "epoch": 1.583679114799447, "grad_norm": 5.629952907562256, "learning_rate": 4.6757338251114184e-05, "log_odds_chosen": 3.0484161376953125, "log_odds_ratio": -0.34847933053970337, "logits/chosen": -0.23855555057525635, "logits/rejected": -0.34040266275405884, "logps/chosen": -0.123260498046875, "logps/rejected": -0.5389373898506165, "loss": 2.3774, "nll_loss": 0.5595142841339111, "rewards/accuracies": 0.75, "rewards/chosen": -0.012326049618422985, "rewards/margins": 0.04156769439578056, "rewards/rejected": -0.053893741220235825, "step": 2290 }, { "epoch": 1.5843706777316737, "grad_norm": 20.07173728942871, "learning_rate": 4.675349623482404e-05, "log_odds_chosen": 3.135038375854492, "log_odds_ratio": -0.6684633493423462, "logits/chosen": -0.6842124462127686, "logits/rejected": -0.744235634803772, "logps/chosen": -0.19508275389671326, "logps/rejected": -0.7924327850341797, "loss": 4.8983, "nll_loss": 1.157721996307373, "rewards/accuracies": 0.875, "rewards/chosen": -0.019508276134729385, "rewards/margins": 0.0597350038588047, "rewards/rejected": -0.07924328744411469, "step": 2291 }, { "epoch": 1.5850622406639006, "grad_norm": 4.18657922744751, "learning_rate": 4.674965421853389e-05, "log_odds_chosen": 6.433959484100342, "log_odds_ratio": -0.09131443500518799, "logits/chosen": -0.3304455876350403, "logits/rejected": -0.361205518245697, "logps/chosen": -0.03453647345304489, "logps/rejected": -0.9038757085800171, "loss": 2.1664, "nll_loss": 0.5324781537055969, "rewards/accuracies": 0.875, "rewards/chosen": -0.003453647717833519, "rewards/margins": 0.0869339257478714, "rewards/rejected": -0.09038757532835007, "step": 2292 }, { "epoch": 1.5857538035961274, "grad_norm": 4.888633728027344, "learning_rate": 4.674581220224374e-05, "log_odds_chosen": 4.468114852905273, "log_odds_ratio": -0.25625795125961304, "logits/chosen": -0.31070369482040405, "logits/rejected": -0.33406126499176025, "logps/chosen": -0.08972156047821045, "logps/rejected": -0.7813112139701843, "loss": 2.4483, "nll_loss": 0.586453378200531, "rewards/accuracies": 0.875, "rewards/chosen": -0.008972156792879105, "rewards/margins": 0.06915897130966187, "rewards/rejected": -0.07813112437725067, "step": 2293 }, { "epoch": 1.5864453665283542, "grad_norm": 6.429629802703857, "learning_rate": 4.6741970185953594e-05, "log_odds_chosen": 5.878769874572754, "log_odds_ratio": -0.021112609654664993, "logits/chosen": -0.40205931663513184, "logits/rejected": -0.4707787036895752, "logps/chosen": -0.03028869815170765, "logps/rejected": -1.0147864818572998, "loss": 2.9816, "nll_loss": 0.7433009743690491, "rewards/accuracies": 1.0, "rewards/chosen": -0.00302886962890625, "rewards/margins": 0.09844978153705597, "rewards/rejected": -0.10147865861654282, "step": 2294 }, { "epoch": 1.587136929460581, "grad_norm": 6.87356424331665, "learning_rate": 4.673812816966344e-05, "log_odds_chosen": 4.551303386688232, "log_odds_ratio": -0.3128006160259247, "logits/chosen": -0.6874199509620667, "logits/rejected": -0.7475636005401611, "logps/chosen": -0.0662873387336731, "logps/rejected": -0.806043803691864, "loss": 3.8277, "nll_loss": 0.9256444573402405, "rewards/accuracies": 0.75, "rewards/chosen": -0.0066287340596318245, "rewards/margins": 0.07397565245628357, "rewards/rejected": -0.08060438185930252, "step": 2295 }, { "epoch": 1.5878284923928079, "grad_norm": 3.4723312854766846, "learning_rate": 4.673428615337329e-05, "log_odds_chosen": 5.179519176483154, "log_odds_ratio": -0.09129927307367325, "logits/chosen": -0.6070810556411743, "logits/rejected": -0.5578267574310303, "logps/chosen": -0.03832171857357025, "logps/rejected": -1.030213713645935, "loss": 2.8462, "nll_loss": 0.7024317383766174, "rewards/accuracies": 1.0, "rewards/chosen": -0.0038321721367537975, "rewards/margins": 0.09918919950723648, "rewards/rejected": -0.10302136838436127, "step": 2296 }, { "epoch": 1.5885200553250347, "grad_norm": 5.925631046295166, "learning_rate": 4.6730444137083144e-05, "log_odds_chosen": 6.14210319519043, "log_odds_ratio": -0.0657062903046608, "logits/chosen": -0.20594017207622528, "logits/rejected": -0.23784777522087097, "logps/chosen": -0.031046025454998016, "logps/rejected": -1.0381896495819092, "loss": 2.6941, "nll_loss": 0.6669539213180542, "rewards/accuracies": 1.0, "rewards/chosen": -0.0031046029180288315, "rewards/margins": 0.10071436315774918, "rewards/rejected": -0.10381896048784256, "step": 2297 }, { "epoch": 1.5892116182572615, "grad_norm": 3.7132484912872314, "learning_rate": 4.6726602120792997e-05, "log_odds_chosen": 4.760432720184326, "log_odds_ratio": -0.13704749941825867, "logits/chosen": 0.025912266224622726, "logits/rejected": 0.02450394444167614, "logps/chosen": -0.03368768095970154, "logps/rejected": -0.5429489612579346, "loss": 2.7401, "nll_loss": 0.6713322997093201, "rewards/accuracies": 1.0, "rewards/chosen": -0.003368767909705639, "rewards/margins": 0.050926122814416885, "rewards/rejected": -0.054294899106025696, "step": 2298 }, { "epoch": 1.5899031811894884, "grad_norm": 5.432823181152344, "learning_rate": 4.672276010450284e-05, "log_odds_chosen": 3.6638736724853516, "log_odds_ratio": -0.1098797619342804, "logits/chosen": -0.9263089299201965, "logits/rejected": -0.9255947470664978, "logps/chosen": -0.07125405222177505, "logps/rejected": -0.6752550601959229, "loss": 3.6093, "nll_loss": 0.891340970993042, "rewards/accuracies": 1.0, "rewards/chosen": -0.007125406060367823, "rewards/margins": 0.0604000985622406, "rewards/rejected": -0.06752550601959229, "step": 2299 }, { "epoch": 1.5905947441217152, "grad_norm": 4.5839080810546875, "learning_rate": 4.67189180882127e-05, "log_odds_chosen": 5.678994178771973, "log_odds_ratio": -0.06031135842204094, "logits/chosen": -0.4893847107887268, "logits/rejected": -0.445559024810791, "logps/chosen": -0.04330340772867203, "logps/rejected": -1.07259202003479, "loss": 2.6092, "nll_loss": 0.6462651491165161, "rewards/accuracies": 1.0, "rewards/chosen": -0.00433034123852849, "rewards/margins": 0.10292886942625046, "rewards/rejected": -0.10725921392440796, "step": 2300 }, { "epoch": 1.591286307053942, "grad_norm": 6.613040924072266, "learning_rate": 4.671507607192255e-05, "log_odds_chosen": 4.988267421722412, "log_odds_ratio": -0.11602865904569626, "logits/chosen": -0.5056090354919434, "logits/rejected": -0.4993463158607483, "logps/chosen": -0.0756821483373642, "logps/rejected": -1.1408820152282715, "loss": 3.2067, "nll_loss": 0.790062427520752, "rewards/accuracies": 1.0, "rewards/chosen": -0.007568215020000935, "rewards/margins": 0.10651998221874237, "rewards/rejected": -0.11408819258213043, "step": 2301 }, { "epoch": 1.5919778699861689, "grad_norm": 9.389286041259766, "learning_rate": 4.67112340556324e-05, "log_odds_chosen": 3.63946533203125, "log_odds_ratio": -0.3243602514266968, "logits/chosen": -0.49547529220581055, "logits/rejected": -0.5523756742477417, "logps/chosen": -0.06218154355883598, "logps/rejected": -0.6321157813072205, "loss": 3.8846, "nll_loss": 0.9387215375900269, "rewards/accuracies": 0.75, "rewards/chosen": -0.006218154914677143, "rewards/margins": 0.05699342489242554, "rewards/rejected": -0.06321157515048981, "step": 2302 }, { "epoch": 1.5926694329183957, "grad_norm": 11.416284561157227, "learning_rate": 4.670739203934225e-05, "log_odds_chosen": 5.384055137634277, "log_odds_ratio": -0.1621214896440506, "logits/chosen": -0.5782039165496826, "logits/rejected": -0.6556646823883057, "logps/chosen": -0.0784316435456276, "logps/rejected": -0.9207970499992371, "loss": 3.4856, "nll_loss": 0.8551783561706543, "rewards/accuracies": 1.0, "rewards/chosen": -0.007843165658414364, "rewards/margins": 0.08423653244972229, "rewards/rejected": -0.09207969903945923, "step": 2303 }, { "epoch": 1.5933609958506225, "grad_norm": 7.5037841796875, "learning_rate": 4.67035500230521e-05, "log_odds_chosen": 4.605857849121094, "log_odds_ratio": -0.03979405388236046, "logits/chosen": -0.4579419791698456, "logits/rejected": -0.5394278764724731, "logps/chosen": -0.04164456948637962, "logps/rejected": -0.9381356835365295, "loss": 2.7509, "nll_loss": 0.6837377548217773, "rewards/accuracies": 1.0, "rewards/chosen": -0.004164457321166992, "rewards/margins": 0.08964911848306656, "rewards/rejected": -0.09381356835365295, "step": 2304 }, { "epoch": 1.5940525587828493, "grad_norm": 3.53702712059021, "learning_rate": 4.669970800676195e-05, "log_odds_chosen": 5.398779392242432, "log_odds_ratio": -0.17257985472679138, "logits/chosen": -0.26726865768432617, "logits/rejected": -0.27700862288475037, "logps/chosen": -0.05751689895987511, "logps/rejected": -0.9009071588516235, "loss": 2.3245, "nll_loss": 0.5638747811317444, "rewards/accuracies": 1.0, "rewards/chosen": -0.005751689895987511, "rewards/margins": 0.08433903008699417, "rewards/rejected": -0.09009072184562683, "step": 2305 }, { "epoch": 1.5947441217150762, "grad_norm": 4.662716865539551, "learning_rate": 4.66958659904718e-05, "log_odds_chosen": 6.068386077880859, "log_odds_ratio": -0.05336064100265503, "logits/chosen": -0.10764148831367493, "logits/rejected": -0.13395418226718903, "logps/chosen": -0.013181259855628014, "logps/rejected": -1.0082037448883057, "loss": 2.8182, "nll_loss": 0.6992227435112, "rewards/accuracies": 1.0, "rewards/chosen": -0.0013181259855628014, "rewards/margins": 0.09950225800275803, "rewards/rejected": -0.100820392370224, "step": 2306 }, { "epoch": 1.595435684647303, "grad_norm": 6.559289455413818, "learning_rate": 4.6692023974181655e-05, "log_odds_chosen": 3.6565051078796387, "log_odds_ratio": -0.20271995663642883, "logits/chosen": -0.8742144107818604, "logits/rejected": -0.8906134963035583, "logps/chosen": -0.09067210555076599, "logps/rejected": -0.8885717988014221, "loss": 4.2556, "nll_loss": 1.043622374534607, "rewards/accuracies": 0.875, "rewards/chosen": -0.009067210368812084, "rewards/margins": 0.07978996634483337, "rewards/rejected": -0.08885718882083893, "step": 2307 }, { "epoch": 1.5961272475795298, "grad_norm": 8.234404563903809, "learning_rate": 4.66881819578915e-05, "log_odds_chosen": 5.145881652832031, "log_odds_ratio": -0.3637697100639343, "logits/chosen": -0.661266565322876, "logits/rejected": -0.7174186706542969, "logps/chosen": -0.04645030200481415, "logps/rejected": -0.9846563339233398, "loss": 3.6622, "nll_loss": 0.8791638016700745, "rewards/accuracies": 0.875, "rewards/chosen": -0.004645030479878187, "rewards/margins": 0.09382060915231705, "rewards/rejected": -0.09846563637256622, "step": 2308 }, { "epoch": 1.5968188105117567, "grad_norm": 9.499542236328125, "learning_rate": 4.668433994160136e-05, "log_odds_chosen": 4.3986029624938965, "log_odds_ratio": -0.15495246648788452, "logits/chosen": -0.6596752405166626, "logits/rejected": -0.667199969291687, "logps/chosen": -0.05631183832883835, "logps/rejected": -0.6967073082923889, "loss": 4.3779, "nll_loss": 1.0789897441864014, "rewards/accuracies": 1.0, "rewards/chosen": -0.005631184205412865, "rewards/margins": 0.06403955072164536, "rewards/rejected": -0.06967072933912277, "step": 2309 }, { "epoch": 1.5975103734439835, "grad_norm": 4.345082759857178, "learning_rate": 4.6680497925311206e-05, "log_odds_chosen": 6.556073188781738, "log_odds_ratio": -0.12552745640277863, "logits/chosen": -0.3318266272544861, "logits/rejected": -0.33238643407821655, "logps/chosen": -0.019965466111898422, "logps/rejected": -0.7021166086196899, "loss": 2.5127, "nll_loss": 0.6156325340270996, "rewards/accuracies": 0.875, "rewards/chosen": -0.0019965465180575848, "rewards/margins": 0.06821511685848236, "rewards/rejected": -0.07021167129278183, "step": 2310 }, { "epoch": 1.5982019363762103, "grad_norm": 8.282541275024414, "learning_rate": 4.667665590902106e-05, "log_odds_chosen": 7.438543319702148, "log_odds_ratio": -0.25406116247177124, "logits/chosen": -0.45235535502433777, "logits/rejected": -0.5772778391838074, "logps/chosen": -0.05688957870006561, "logps/rejected": -1.2933604717254639, "loss": 3.1258, "nll_loss": 0.7560532093048096, "rewards/accuracies": 0.875, "rewards/chosen": -0.005688957870006561, "rewards/margins": 0.12364709377288818, "rewards/rejected": -0.12933605909347534, "step": 2311 }, { "epoch": 1.5988934993084372, "grad_norm": 11.441560745239258, "learning_rate": 4.667281389273091e-05, "log_odds_chosen": 5.331631660461426, "log_odds_ratio": -0.3978029191493988, "logits/chosen": -0.4594797194004059, "logits/rejected": -0.5356467962265015, "logps/chosen": -0.06144750118255615, "logps/rejected": -0.762560248374939, "loss": 2.632, "nll_loss": 0.6182161569595337, "rewards/accuracies": 0.875, "rewards/chosen": -0.0061447499319911, "rewards/margins": 0.07011127471923828, "rewards/rejected": -0.07625602185726166, "step": 2312 }, { "epoch": 1.599585062240664, "grad_norm": 7.113100051879883, "learning_rate": 4.6668971876440756e-05, "log_odds_chosen": 5.57823371887207, "log_odds_ratio": -0.024948718026280403, "logits/chosen": -0.4611778259277344, "logits/rejected": -0.5264756679534912, "logps/chosen": -0.016496511176228523, "logps/rejected": -0.930469274520874, "loss": 3.3915, "nll_loss": 0.8453721404075623, "rewards/accuracies": 1.0, "rewards/chosen": -0.0016496512107551098, "rewards/margins": 0.09139727056026459, "rewards/rejected": -0.09304693341255188, "step": 2313 }, { "epoch": 1.6002766251728908, "grad_norm": 8.255138397216797, "learning_rate": 4.666512986015061e-05, "log_odds_chosen": 5.406302452087402, "log_odds_ratio": -0.13809970021247864, "logits/chosen": -0.5205468535423279, "logits/rejected": -0.5812825560569763, "logps/chosen": -0.09680378437042236, "logps/rejected": -0.9270695447921753, "loss": 3.6517, "nll_loss": 0.8991074562072754, "rewards/accuracies": 1.0, "rewards/chosen": -0.009680378250777721, "rewards/margins": 0.08302658051252365, "rewards/rejected": -0.09270695596933365, "step": 2314 }, { "epoch": 1.6009681881051177, "grad_norm": 8.078280448913574, "learning_rate": 4.666128784386046e-05, "log_odds_chosen": 5.976874828338623, "log_odds_ratio": -0.13946348428726196, "logits/chosen": -0.45572811365127563, "logits/rejected": -0.4910028576850891, "logps/chosen": -0.0380314439535141, "logps/rejected": -1.0345638990402222, "loss": 4.0855, "nll_loss": 1.0074275732040405, "rewards/accuracies": 0.875, "rewards/chosen": -0.0038031444419175386, "rewards/margins": 0.09965324401855469, "rewards/rejected": -0.10345638543367386, "step": 2315 }, { "epoch": 1.6016597510373445, "grad_norm": 9.3878173828125, "learning_rate": 4.6657445827570313e-05, "log_odds_chosen": 6.526431083679199, "log_odds_ratio": -0.07762724161148071, "logits/chosen": -0.20644918084144592, "logits/rejected": -0.2773859202861786, "logps/chosen": -0.046094466000795364, "logps/rejected": -1.1674156188964844, "loss": 3.4912, "nll_loss": 0.8650420308113098, "rewards/accuracies": 1.0, "rewards/chosen": -0.004609446506947279, "rewards/margins": 0.11213213205337524, "rewards/rejected": -0.11674156785011292, "step": 2316 }, { "epoch": 1.6023513139695713, "grad_norm": 7.8561577796936035, "learning_rate": 4.665360381128016e-05, "log_odds_chosen": 2.670375347137451, "log_odds_ratio": -0.47158828377723694, "logits/chosen": -0.4281069040298462, "logits/rejected": -0.46524304151535034, "logps/chosen": -0.1585179567337036, "logps/rejected": -0.8629575967788696, "loss": 3.2523, "nll_loss": 0.7659047245979309, "rewards/accuracies": 0.75, "rewards/chosen": -0.01585179753601551, "rewards/margins": 0.07044396549463272, "rewards/rejected": -0.08629576116800308, "step": 2317 }, { "epoch": 1.6030428769017981, "grad_norm": 5.324893474578857, "learning_rate": 4.664976179499002e-05, "log_odds_chosen": 5.277317047119141, "log_odds_ratio": -0.0963197872042656, "logits/chosen": -0.3024168908596039, "logits/rejected": -0.28410249948501587, "logps/chosen": -0.09467468410730362, "logps/rejected": -0.673798680305481, "loss": 2.8702, "nll_loss": 0.7079252004623413, "rewards/accuracies": 1.0, "rewards/chosen": -0.009467468596994877, "rewards/margins": 0.05791240185499191, "rewards/rejected": -0.06737986952066422, "step": 2318 }, { "epoch": 1.603734439834025, "grad_norm": 19.815616607666016, "learning_rate": 4.6645919778699864e-05, "log_odds_chosen": 3.646332025527954, "log_odds_ratio": -0.9083267450332642, "logits/chosen": -0.5500541925430298, "logits/rejected": -0.5560811161994934, "logps/chosen": -0.16285112500190735, "logps/rejected": -0.5417895317077637, "loss": 3.3589, "nll_loss": 0.748899519443512, "rewards/accuracies": 0.875, "rewards/chosen": -0.016285112127661705, "rewards/margins": 0.03789384663105011, "rewards/rejected": -0.05417895317077637, "step": 2319 }, { "epoch": 1.6044260027662518, "grad_norm": 6.896365642547607, "learning_rate": 4.6642077762409716e-05, "log_odds_chosen": 6.688531875610352, "log_odds_ratio": -0.03402579948306084, "logits/chosen": -0.4911445379257202, "logits/rejected": -0.5525773763656616, "logps/chosen": -0.057118237018585205, "logps/rejected": -1.2721749544143677, "loss": 2.672, "nll_loss": 0.6646054983139038, "rewards/accuracies": 1.0, "rewards/chosen": -0.005711824167519808, "rewards/margins": 0.12150569260120392, "rewards/rejected": -0.12721750140190125, "step": 2320 }, { "epoch": 1.6051175656984786, "grad_norm": 9.04888916015625, "learning_rate": 4.663823574611957e-05, "log_odds_chosen": 2.932542085647583, "log_odds_ratio": -0.4230884313583374, "logits/chosen": -0.16413822770118713, "logits/rejected": -0.09653112292289734, "logps/chosen": -0.0899929478764534, "logps/rejected": -0.42458221316337585, "loss": 2.9287, "nll_loss": 0.6898687481880188, "rewards/accuracies": 0.75, "rewards/chosen": -0.008999294601380825, "rewards/margins": 0.033458929508924484, "rewards/rejected": -0.042458221316337585, "step": 2321 }, { "epoch": 1.6058091286307055, "grad_norm": 10.678813934326172, "learning_rate": 4.6634393729829415e-05, "log_odds_chosen": 3.87385630607605, "log_odds_ratio": -0.39904534816741943, "logits/chosen": -0.43692782521247864, "logits/rejected": -0.4507974088191986, "logps/chosen": -0.20758147537708282, "logps/rejected": -0.8612111210823059, "loss": 3.9424, "nll_loss": 0.9456848502159119, "rewards/accuracies": 0.875, "rewards/chosen": -0.020758148282766342, "rewards/margins": 0.06536296010017395, "rewards/rejected": -0.08612111210823059, "step": 2322 }, { "epoch": 1.6065006915629323, "grad_norm": 11.248788833618164, "learning_rate": 4.663055171353927e-05, "log_odds_chosen": 6.452774524688721, "log_odds_ratio": -0.08504676818847656, "logits/chosen": -0.31893569231033325, "logits/rejected": -0.40124383568763733, "logps/chosen": -0.1601179838180542, "logps/rejected": -1.3543026447296143, "loss": 4.2856, "nll_loss": 1.0629040002822876, "rewards/accuracies": 1.0, "rewards/chosen": -0.01601179875433445, "rewards/margins": 0.11941846460103989, "rewards/rejected": -0.1354302614927292, "step": 2323 }, { "epoch": 1.6071922544951591, "grad_norm": 7.978420734405518, "learning_rate": 4.662670969724912e-05, "log_odds_chosen": 4.781014919281006, "log_odds_ratio": -0.07580827176570892, "logits/chosen": -0.46464425325393677, "logits/rejected": -0.5390160083770752, "logps/chosen": -0.010720476508140564, "logps/rejected": -0.6336989402770996, "loss": 2.9299, "nll_loss": 0.7249056100845337, "rewards/accuracies": 1.0, "rewards/chosen": -0.001072047627530992, "rewards/margins": 0.06229785829782486, "rewards/rejected": -0.06336989998817444, "step": 2324 }, { "epoch": 1.607883817427386, "grad_norm": 6.0682501792907715, "learning_rate": 4.662286768095897e-05, "log_odds_chosen": 7.181793212890625, "log_odds_ratio": -0.013707602396607399, "logits/chosen": -0.3983333110809326, "logits/rejected": -0.40123891830444336, "logps/chosen": -0.004066762514412403, "logps/rejected": -0.80652916431427, "loss": 3.2499, "nll_loss": 0.8111165165901184, "rewards/accuracies": 1.0, "rewards/chosen": -0.0004066762630827725, "rewards/margins": 0.08024624735116959, "rewards/rejected": -0.08065292239189148, "step": 2325 }, { "epoch": 1.6085753803596128, "grad_norm": 9.307788848876953, "learning_rate": 4.661902566466882e-05, "log_odds_chosen": 3.275400161743164, "log_odds_ratio": -0.3779076933860779, "logits/chosen": -0.293110728263855, "logits/rejected": -0.3226720690727234, "logps/chosen": -0.11518510431051254, "logps/rejected": -0.6717798709869385, "loss": 3.6997, "nll_loss": 0.8871325254440308, "rewards/accuracies": 0.875, "rewards/chosen": -0.011518510058522224, "rewards/margins": 0.05565947666764259, "rewards/rejected": -0.06717798113822937, "step": 2326 }, { "epoch": 1.6092669432918396, "grad_norm": 5.229033470153809, "learning_rate": 4.661518364837868e-05, "log_odds_chosen": 5.4648237228393555, "log_odds_ratio": -0.09957113116979599, "logits/chosen": -0.5804209113121033, "logits/rejected": -0.5788403749465942, "logps/chosen": -0.05049334466457367, "logps/rejected": -0.8705024719238281, "loss": 2.1554, "nll_loss": 0.5288969278335571, "rewards/accuracies": 1.0, "rewards/chosen": -0.005049334838986397, "rewards/margins": 0.08200091123580933, "rewards/rejected": -0.08705024421215057, "step": 2327 }, { "epoch": 1.6099585062240664, "grad_norm": 6.291632652282715, "learning_rate": 4.661134163208852e-05, "log_odds_chosen": 4.770295143127441, "log_odds_ratio": -0.3147009313106537, "logits/chosen": -0.4132739305496216, "logits/rejected": -0.4148447811603546, "logps/chosen": -0.061191972345113754, "logps/rejected": -1.0149085521697998, "loss": 2.1178, "nll_loss": 0.4979802966117859, "rewards/accuracies": 0.875, "rewards/chosen": -0.006119197234511375, "rewards/margins": 0.09537166357040405, "rewards/rejected": -0.10149086266756058, "step": 2328 }, { "epoch": 1.6106500691562933, "grad_norm": 7.1284918785095215, "learning_rate": 4.6607499615798375e-05, "log_odds_chosen": 4.904767990112305, "log_odds_ratio": -0.08909415453672409, "logits/chosen": -0.25764161348342896, "logits/rejected": -0.3145712912082672, "logps/chosen": -0.03883281722664833, "logps/rejected": -0.816331148147583, "loss": 3.3219, "nll_loss": 0.821575403213501, "rewards/accuracies": 1.0, "rewards/chosen": -0.003883281722664833, "rewards/margins": 0.0777498334646225, "rewards/rejected": -0.08163312077522278, "step": 2329 }, { "epoch": 1.61134163208852, "grad_norm": 6.315418720245361, "learning_rate": 4.660365759950822e-05, "log_odds_chosen": 5.398120403289795, "log_odds_ratio": -0.2213883101940155, "logits/chosen": -0.631862998008728, "logits/rejected": -0.6522068977355957, "logps/chosen": -0.08457255363464355, "logps/rejected": -0.718346118927002, "loss": 2.8186, "nll_loss": 0.6825187802314758, "rewards/accuracies": 0.875, "rewards/chosen": -0.00845725554972887, "rewards/margins": 0.06337735801935196, "rewards/rejected": -0.07183460891246796, "step": 2330 }, { "epoch": 1.612033195020747, "grad_norm": 5.7349324226379395, "learning_rate": 4.659981558321807e-05, "log_odds_chosen": 6.114752769470215, "log_odds_ratio": -0.02867019921541214, "logits/chosen": -0.3078418970108032, "logits/rejected": -0.30085307359695435, "logps/chosen": -0.04859218746423721, "logps/rejected": -1.2826318740844727, "loss": 2.4187, "nll_loss": 0.6017983555793762, "rewards/accuracies": 1.0, "rewards/chosen": -0.0048592183738946915, "rewards/margins": 0.12340398877859116, "rewards/rejected": -0.1282632052898407, "step": 2331 }, { "epoch": 1.6127247579529738, "grad_norm": 17.164234161376953, "learning_rate": 4.6595973566927925e-05, "log_odds_chosen": 4.351862907409668, "log_odds_ratio": -0.16593973338603973, "logits/chosen": -0.6303444504737854, "logits/rejected": -0.6840345859527588, "logps/chosen": -0.0819045901298523, "logps/rejected": -0.631934404373169, "loss": 3.2901, "nll_loss": 0.8059207201004028, "rewards/accuracies": 0.875, "rewards/chosen": -0.008190459571778774, "rewards/margins": 0.055002983659505844, "rewards/rejected": -0.0631934404373169, "step": 2332 }, { "epoch": 1.6134163208852006, "grad_norm": 8.640056610107422, "learning_rate": 4.659213155063777e-05, "log_odds_chosen": 5.334671974182129, "log_odds_ratio": -0.06108527630567551, "logits/chosen": -0.41245636343955994, "logits/rejected": -0.4462578296661377, "logps/chosen": -0.02963314950466156, "logps/rejected": -0.7621734738349915, "loss": 3.773, "nll_loss": 0.9371405839920044, "rewards/accuracies": 1.0, "rewards/chosen": -0.002963314764201641, "rewards/margins": 0.07325402647256851, "rewards/rejected": -0.07621734589338303, "step": 2333 }, { "epoch": 1.6141078838174274, "grad_norm": 6.361103534698486, "learning_rate": 4.658828953434763e-05, "log_odds_chosen": 4.108833312988281, "log_odds_ratio": -0.08453751355409622, "logits/chosen": -0.4442722201347351, "logits/rejected": -0.45774152874946594, "logps/chosen": -0.04168268293142319, "logps/rejected": -0.7075502872467041, "loss": 3.2087, "nll_loss": 0.7937202453613281, "rewards/accuracies": 1.0, "rewards/chosen": -0.004168268758803606, "rewards/margins": 0.06658677011728287, "rewards/rejected": -0.07075503468513489, "step": 2334 }, { "epoch": 1.6147994467496543, "grad_norm": 8.147212028503418, "learning_rate": 4.6584447518057476e-05, "log_odds_chosen": 5.207988739013672, "log_odds_ratio": -0.06420108675956726, "logits/chosen": -0.5827217698097229, "logits/rejected": -0.6342862844467163, "logps/chosen": -0.02741215191781521, "logps/rejected": -0.8105305433273315, "loss": 3.8817, "nll_loss": 0.9639945030212402, "rewards/accuracies": 1.0, "rewards/chosen": -0.0027412152849137783, "rewards/margins": 0.07831183820962906, "rewards/rejected": -0.08105304837226868, "step": 2335 }, { "epoch": 1.615491009681881, "grad_norm": 9.331297874450684, "learning_rate": 4.658060550176733e-05, "log_odds_chosen": 5.364526271820068, "log_odds_ratio": -0.6092586517333984, "logits/chosen": -0.430431991815567, "logits/rejected": -0.43262892961502075, "logps/chosen": -0.0933023989200592, "logps/rejected": -0.8510630130767822, "loss": 4.0586, "nll_loss": 0.9537221193313599, "rewards/accuracies": 0.875, "rewards/chosen": -0.009330240078270435, "rewards/margins": 0.07577606290578842, "rewards/rejected": -0.08510630577802658, "step": 2336 }, { "epoch": 1.616182572614108, "grad_norm": 4.386343955993652, "learning_rate": 4.657676348547718e-05, "log_odds_chosen": 4.233821868896484, "log_odds_ratio": -0.27742135524749756, "logits/chosen": -0.6338566541671753, "logits/rejected": -0.6705052852630615, "logps/chosen": -0.08768070489168167, "logps/rejected": -0.7955862879753113, "loss": 2.8503, "nll_loss": 0.6848416924476624, "rewards/accuracies": 0.875, "rewards/chosen": -0.008768070489168167, "rewards/margins": 0.07079055905342102, "rewards/rejected": -0.07955863326787949, "step": 2337 }, { "epoch": 1.6168741355463347, "grad_norm": 6.712360858917236, "learning_rate": 4.657292146918703e-05, "log_odds_chosen": 4.783242225646973, "log_odds_ratio": -0.3336433172225952, "logits/chosen": -0.41855043172836304, "logits/rejected": -0.474947988986969, "logps/chosen": -0.0697491243481636, "logps/rejected": -0.8092215657234192, "loss": 3.5056, "nll_loss": 0.8430478572845459, "rewards/accuracies": 0.75, "rewards/chosen": -0.00697491317987442, "rewards/margins": 0.0739472508430481, "rewards/rejected": -0.08092215657234192, "step": 2338 }, { "epoch": 1.6175656984785616, "grad_norm": 6.100496292114258, "learning_rate": 4.656907945289688e-05, "log_odds_chosen": 6.144561767578125, "log_odds_ratio": -0.15024614334106445, "logits/chosen": -0.35080528259277344, "logits/rejected": -0.36896559596061707, "logps/chosen": -0.05333162844181061, "logps/rejected": -1.3443855047225952, "loss": 3.5222, "nll_loss": 0.8655195236206055, "rewards/accuracies": 0.875, "rewards/chosen": -0.005333162844181061, "rewards/margins": 0.12910538911819458, "rewards/rejected": -0.13443854451179504, "step": 2339 }, { "epoch": 1.6182572614107884, "grad_norm": 14.227270126342773, "learning_rate": 4.656523743660673e-05, "log_odds_chosen": 3.9271492958068848, "log_odds_ratio": -0.31890344619750977, "logits/chosen": -0.368779718875885, "logits/rejected": -0.41777634620666504, "logps/chosen": -0.07359308004379272, "logps/rejected": -0.626854658126831, "loss": 4.2403, "nll_loss": 1.028173804283142, "rewards/accuracies": 0.75, "rewards/chosen": -0.007359308190643787, "rewards/margins": 0.055326156318187714, "rewards/rejected": -0.06268545985221863, "step": 2340 }, { "epoch": 1.6189488243430152, "grad_norm": 7.9860005378723145, "learning_rate": 4.6561395420316584e-05, "log_odds_chosen": 4.5483598709106445, "log_odds_ratio": -0.15386539697647095, "logits/chosen": -0.4116964042186737, "logits/rejected": -0.47240209579467773, "logps/chosen": -0.052656978368759155, "logps/rejected": -0.7900266051292419, "loss": 3.0385, "nll_loss": 0.7442415952682495, "rewards/accuracies": 0.875, "rewards/chosen": -0.0052656978368759155, "rewards/margins": 0.07373696565628052, "rewards/rejected": -0.07900266349315643, "step": 2341 }, { "epoch": 1.619640387275242, "grad_norm": 3.6560215950012207, "learning_rate": 4.655755340402643e-05, "log_odds_chosen": 5.275965690612793, "log_odds_ratio": -0.13455995917320251, "logits/chosen": -0.23229531943798065, "logits/rejected": -0.17382174730300903, "logps/chosen": -0.050933949649333954, "logps/rejected": -1.087402582168579, "loss": 2.7595, "nll_loss": 0.6764070391654968, "rewards/accuracies": 0.875, "rewards/chosen": -0.0050933947786688805, "rewards/margins": 0.10364687442779541, "rewards/rejected": -0.10874027013778687, "step": 2342 }, { "epoch": 1.620331950207469, "grad_norm": 6.554387092590332, "learning_rate": 4.655371138773629e-05, "log_odds_chosen": 5.708447456359863, "log_odds_ratio": -0.032198466360569, "logits/chosen": -0.35663536190986633, "logits/rejected": -0.4237971305847168, "logps/chosen": -0.0359690859913826, "logps/rejected": -0.9435381889343262, "loss": 2.962, "nll_loss": 0.7372850179672241, "rewards/accuracies": 1.0, "rewards/chosen": -0.003596908412873745, "rewards/margins": 0.09075691550970078, "rewards/rejected": -0.0943538174033165, "step": 2343 }, { "epoch": 1.6210235131396957, "grad_norm": 9.658689498901367, "learning_rate": 4.6549869371446134e-05, "log_odds_chosen": 5.208951950073242, "log_odds_ratio": -0.1817733198404312, "logits/chosen": -0.14689387381076813, "logits/rejected": -0.22184035181999207, "logps/chosen": -0.05039520561695099, "logps/rejected": -1.0678006410598755, "loss": 4.0494, "nll_loss": 0.9941627979278564, "rewards/accuracies": 1.0, "rewards/chosen": -0.005039520561695099, "rewards/margins": 0.10174053907394409, "rewards/rejected": -0.10678005963563919, "step": 2344 }, { "epoch": 1.6217150760719226, "grad_norm": 8.730910301208496, "learning_rate": 4.654602735515599e-05, "log_odds_chosen": 4.14716911315918, "log_odds_ratio": -0.2981174886226654, "logits/chosen": -0.8712558746337891, "logits/rejected": -0.8630931377410889, "logps/chosen": -0.07158464193344116, "logps/rejected": -0.6259889006614685, "loss": 4.7996, "nll_loss": 1.1700934171676636, "rewards/accuracies": 0.75, "rewards/chosen": -0.007158464286476374, "rewards/margins": 0.055440425872802734, "rewards/rejected": -0.06259889155626297, "step": 2345 }, { "epoch": 1.6224066390041494, "grad_norm": 5.973324775695801, "learning_rate": 4.654218533886584e-05, "log_odds_chosen": 5.6539106369018555, "log_odds_ratio": -0.31066593527793884, "logits/chosen": -0.2996593415737152, "logits/rejected": -0.3607049584388733, "logps/chosen": -0.1110413447022438, "logps/rejected": -0.9045712947845459, "loss": 2.8255, "nll_loss": 0.6753115057945251, "rewards/accuracies": 0.875, "rewards/chosen": -0.01110413484275341, "rewards/margins": 0.07935299724340439, "rewards/rejected": -0.09045712649822235, "step": 2346 }, { "epoch": 1.6230982019363762, "grad_norm": 7.056889533996582, "learning_rate": 4.653834332257569e-05, "log_odds_chosen": 4.830997943878174, "log_odds_ratio": -0.04605808109045029, "logits/chosen": -0.4083777666091919, "logits/rejected": -0.4561671316623688, "logps/chosen": -0.04745990410447121, "logps/rejected": -0.9793378114700317, "loss": 3.1702, "nll_loss": 0.7879566550254822, "rewards/accuracies": 1.0, "rewards/chosen": -0.004745990503579378, "rewards/margins": 0.09318779408931732, "rewards/rejected": -0.09793378412723541, "step": 2347 }, { "epoch": 1.623789764868603, "grad_norm": 8.9386625289917, "learning_rate": 4.653450130628554e-05, "log_odds_chosen": 3.8049674034118652, "log_odds_ratio": -0.24610795080661774, "logits/chosen": -0.6960456371307373, "logits/rejected": -0.6541339159011841, "logps/chosen": -0.05932285264134407, "logps/rejected": -0.9324048757553101, "loss": 2.891, "nll_loss": 0.6981493234634399, "rewards/accuracies": 0.875, "rewards/chosen": -0.005932285450398922, "rewards/margins": 0.08730820566415787, "rewards/rejected": -0.09324049204587936, "step": 2348 }, { "epoch": 1.6244813278008299, "grad_norm": 8.542035102844238, "learning_rate": 4.653065928999539e-05, "log_odds_chosen": 3.459298610687256, "log_odds_ratio": -0.5191382169723511, "logits/chosen": -0.5135226845741272, "logits/rejected": -0.4982113242149353, "logps/chosen": -0.11870171129703522, "logps/rejected": -0.684704601764679, "loss": 2.5091, "nll_loss": 0.5753598213195801, "rewards/accuracies": 0.75, "rewards/chosen": -0.011870170943439007, "rewards/margins": 0.05660029500722885, "rewards/rejected": -0.06847047060728073, "step": 2349 }, { "epoch": 1.6251728907330567, "grad_norm": 7.4994025230407715, "learning_rate": 4.652681727370524e-05, "log_odds_chosen": 3.385531187057495, "log_odds_ratio": -0.28039830923080444, "logits/chosen": -0.47290781140327454, "logits/rejected": -0.5128310918807983, "logps/chosen": -0.07095737010240555, "logps/rejected": -0.6914070844650269, "loss": 3.6671, "nll_loss": 0.8887372016906738, "rewards/accuracies": 0.875, "rewards/chosen": -0.007095737382769585, "rewards/margins": 0.06204497814178467, "rewards/rejected": -0.0691407173871994, "step": 2350 }, { "epoch": 1.6258644536652835, "grad_norm": 5.9747538566589355, "learning_rate": 4.652297525741509e-05, "log_odds_chosen": 4.028547286987305, "log_odds_ratio": -0.167395681142807, "logits/chosen": -0.8783102035522461, "logits/rejected": -0.9642181396484375, "logps/chosen": -0.04337505251169205, "logps/rejected": -0.6610270142555237, "loss": 3.3736, "nll_loss": 0.8266501426696777, "rewards/accuracies": 1.0, "rewards/chosen": -0.00433750543743372, "rewards/margins": 0.06176519766449928, "rewards/rejected": -0.06610269844532013, "step": 2351 }, { "epoch": 1.6265560165975104, "grad_norm": 6.233641147613525, "learning_rate": 4.651913324112495e-05, "log_odds_chosen": 4.822110176086426, "log_odds_ratio": -0.1332874894142151, "logits/chosen": -0.3468000292778015, "logits/rejected": -0.3059294819831848, "logps/chosen": -0.048264387995004654, "logps/rejected": -0.8635885715484619, "loss": 2.5327, "nll_loss": 0.6198562979698181, "rewards/accuracies": 1.0, "rewards/chosen": -0.0048264386132359505, "rewards/margins": 0.08153241872787476, "rewards/rejected": -0.08635886013507843, "step": 2352 }, { "epoch": 1.6272475795297372, "grad_norm": 7.80859375, "learning_rate": 4.651529122483479e-05, "log_odds_chosen": 6.187796115875244, "log_odds_ratio": -0.13487344980239868, "logits/chosen": -0.15797224640846252, "logits/rejected": -0.1921594738960266, "logps/chosen": -0.033555012196302414, "logps/rejected": -0.8472077250480652, "loss": 3.7709, "nll_loss": 0.9292386770248413, "rewards/accuracies": 0.875, "rewards/chosen": -0.0033555012196302414, "rewards/margins": 0.08136527240276337, "rewards/rejected": -0.08472077548503876, "step": 2353 }, { "epoch": 1.627939142461964, "grad_norm": 7.190445423126221, "learning_rate": 4.6511449208544645e-05, "log_odds_chosen": 3.892343521118164, "log_odds_ratio": -0.6625378131866455, "logits/chosen": -0.41133368015289307, "logits/rejected": -0.43225544691085815, "logps/chosen": -0.0741141065955162, "logps/rejected": -0.48772764205932617, "loss": 4.0325, "nll_loss": 0.9418785572052002, "rewards/accuracies": 0.75, "rewards/chosen": -0.00741141103208065, "rewards/margins": 0.041361354291439056, "rewards/rejected": -0.048772767186164856, "step": 2354 }, { "epoch": 1.6286307053941909, "grad_norm": 11.111103057861328, "learning_rate": 4.65076071922545e-05, "log_odds_chosen": 3.0443787574768066, "log_odds_ratio": -0.6435021162033081, "logits/chosen": -0.6717063188552856, "logits/rejected": -0.6969423294067383, "logps/chosen": -0.1275130808353424, "logps/rejected": -0.6429640054702759, "loss": 3.7216, "nll_loss": 0.8660593628883362, "rewards/accuracies": 0.75, "rewards/chosen": -0.01275130920112133, "rewards/margins": 0.05154508352279663, "rewards/rejected": -0.06429639458656311, "step": 2355 }, { "epoch": 1.6293222683264177, "grad_norm": 7.198049545288086, "learning_rate": 4.650376517596435e-05, "log_odds_chosen": 8.301595687866211, "log_odds_ratio": -0.0018735526828095317, "logits/chosen": -0.18375827372074127, "logits/rejected": -0.2660870850086212, "logps/chosen": -0.002222995739430189, "logps/rejected": -0.9437817931175232, "loss": 2.0797, "nll_loss": 0.5197421312332153, "rewards/accuracies": 1.0, "rewards/chosen": -0.00022229959722608328, "rewards/margins": 0.09415587782859802, "rewards/rejected": -0.09437818080186844, "step": 2356 }, { "epoch": 1.6300138312586445, "grad_norm": 4.328792572021484, "learning_rate": 4.6499923159674196e-05, "log_odds_chosen": 4.548550605773926, "log_odds_ratio": -0.16368317604064941, "logits/chosen": -0.19425427913665771, "logits/rejected": -0.26107314229011536, "logps/chosen": -0.04027582332491875, "logps/rejected": -0.6658027172088623, "loss": 2.533, "nll_loss": 0.6168872714042664, "rewards/accuracies": 0.875, "rewards/chosen": -0.004027582239359617, "rewards/margins": 0.06255269795656204, "rewards/rejected": -0.06658028066158295, "step": 2357 }, { "epoch": 1.6307053941908713, "grad_norm": 8.348052024841309, "learning_rate": 4.649608114338405e-05, "log_odds_chosen": 4.052003383636475, "log_odds_ratio": -0.42345088720321655, "logits/chosen": -0.5440030694007874, "logits/rejected": -0.5415800213813782, "logps/chosen": -0.10518966615200043, "logps/rejected": -0.7477426528930664, "loss": 3.023, "nll_loss": 0.7134120464324951, "rewards/accuracies": 0.875, "rewards/chosen": -0.010518967173993587, "rewards/margins": 0.06425529718399048, "rewards/rejected": -0.07477426528930664, "step": 2358 }, { "epoch": 1.6313969571230982, "grad_norm": 4.577966213226318, "learning_rate": 4.64922391270939e-05, "log_odds_chosen": 4.806175708770752, "log_odds_ratio": -0.07519068568944931, "logits/chosen": -0.40390050411224365, "logits/rejected": -0.44469988346099854, "logps/chosen": -0.08716096729040146, "logps/rejected": -0.8027600646018982, "loss": 2.4672, "nll_loss": 0.6092922687530518, "rewards/accuracies": 1.0, "rewards/chosen": -0.008716096170246601, "rewards/margins": 0.07155991345643997, "rewards/rejected": -0.0802760124206543, "step": 2359 }, { "epoch": 1.632088520055325, "grad_norm": 6.8378705978393555, "learning_rate": 4.6488397110803746e-05, "log_odds_chosen": 5.506162643432617, "log_odds_ratio": -0.03466683626174927, "logits/chosen": -0.32365161180496216, "logits/rejected": -0.2994399964809418, "logps/chosen": -0.01583227887749672, "logps/rejected": -0.6344969272613525, "loss": 2.9468, "nll_loss": 0.7332226037979126, "rewards/accuracies": 1.0, "rewards/chosen": -0.001583227887749672, "rewards/margins": 0.06186646223068237, "rewards/rejected": -0.0634496882557869, "step": 2360 }, { "epoch": 1.6327800829875518, "grad_norm": 6.263278007507324, "learning_rate": 4.6484555094513606e-05, "log_odds_chosen": 3.8691322803497314, "log_odds_ratio": -0.23683586716651917, "logits/chosen": -0.5043981075286865, "logits/rejected": -0.5182955265045166, "logps/chosen": -0.07492068409919739, "logps/rejected": -0.6660391092300415, "loss": 3.5599, "nll_loss": 0.8662796020507812, "rewards/accuracies": 0.875, "rewards/chosen": -0.007492068689316511, "rewards/margins": 0.05911184102296829, "rewards/rejected": -0.06660391390323639, "step": 2361 }, { "epoch": 1.6334716459197787, "grad_norm": 7.12549352645874, "learning_rate": 4.648071307822345e-05, "log_odds_chosen": 2.8235957622528076, "log_odds_ratio": -0.1785672903060913, "logits/chosen": -0.43651118874549866, "logits/rejected": -0.45994648337364197, "logps/chosen": -0.04840053990483284, "logps/rejected": -0.43135061860084534, "loss": 3.0901, "nll_loss": 0.7546652555465698, "rewards/accuracies": 1.0, "rewards/chosen": -0.004840054549276829, "rewards/margins": 0.03829500824213028, "rewards/rejected": -0.043135061860084534, "step": 2362 }, { "epoch": 1.6341632088520055, "grad_norm": 7.425775051116943, "learning_rate": 4.6476871061933304e-05, "log_odds_chosen": 4.484004020690918, "log_odds_ratio": -0.08991587907075882, "logits/chosen": -0.5775189399719238, "logits/rejected": -0.6644449234008789, "logps/chosen": -0.035481616854667664, "logps/rejected": -0.8728645443916321, "loss": 3.218, "nll_loss": 0.7955026030540466, "rewards/accuracies": 1.0, "rewards/chosen": -0.0035481618251651525, "rewards/margins": 0.0837383046746254, "rewards/rejected": -0.08728646486997604, "step": 2363 }, { "epoch": 1.6348547717842323, "grad_norm": 9.061689376831055, "learning_rate": 4.6473029045643156e-05, "log_odds_chosen": 2.770498752593994, "log_odds_ratio": -0.5221116542816162, "logits/chosen": -0.9692656993865967, "logits/rejected": -0.9988617897033691, "logps/chosen": -0.13199469447135925, "logps/rejected": -0.6178169250488281, "loss": 4.2676, "nll_loss": 1.0146853923797607, "rewards/accuracies": 0.875, "rewards/chosen": -0.013199469074606895, "rewards/margins": 0.04858222231268883, "rewards/rejected": -0.06178169324994087, "step": 2364 }, { "epoch": 1.6355463347164592, "grad_norm": 8.996692657470703, "learning_rate": 4.646918702935301e-05, "log_odds_chosen": 6.133260250091553, "log_odds_ratio": -0.009358121082186699, "logits/chosen": -0.6405230760574341, "logits/rejected": -0.7012723088264465, "logps/chosen": -0.005448098760098219, "logps/rejected": -1.0479671955108643, "loss": 4.8104, "nll_loss": 1.2016545534133911, "rewards/accuracies": 1.0, "rewards/chosen": -0.0005448098527267575, "rewards/margins": 0.1042519062757492, "rewards/rejected": -0.10479672253131866, "step": 2365 }, { "epoch": 1.636237897648686, "grad_norm": 9.420392990112305, "learning_rate": 4.6465345013062854e-05, "log_odds_chosen": 6.216562271118164, "log_odds_ratio": -0.010765348561108112, "logits/chosen": -0.2744053602218628, "logits/rejected": -0.40222588181495667, "logps/chosen": -0.009755797684192657, "logps/rejected": -1.333965539932251, "loss": 3.2282, "nll_loss": 0.8059628009796143, "rewards/accuracies": 1.0, "rewards/chosen": -0.0009755798382684588, "rewards/margins": 0.13242097198963165, "rewards/rejected": -0.13339656591415405, "step": 2366 }, { "epoch": 1.6369294605809128, "grad_norm": 7.960176467895508, "learning_rate": 4.646150299677271e-05, "log_odds_chosen": 3.522726535797119, "log_odds_ratio": -0.3352411389350891, "logits/chosen": -0.41156286001205444, "logits/rejected": -0.4562080204486847, "logps/chosen": -0.09292282164096832, "logps/rejected": -0.55586838722229, "loss": 2.4371, "nll_loss": 0.5757578611373901, "rewards/accuracies": 0.875, "rewards/chosen": -0.009292282164096832, "rewards/margins": 0.04629455506801605, "rewards/rejected": -0.055586837232112885, "step": 2367 }, { "epoch": 1.6376210235131397, "grad_norm": 8.390948295593262, "learning_rate": 4.645766098048256e-05, "log_odds_chosen": 5.556153774261475, "log_odds_ratio": -0.1825868785381317, "logits/chosen": -0.26770249009132385, "logits/rejected": -0.32716527581214905, "logps/chosen": -0.024412261322140694, "logps/rejected": -0.647440493106842, "loss": 2.921, "nll_loss": 0.7120010256767273, "rewards/accuracies": 0.875, "rewards/chosen": -0.002441226039081812, "rewards/margins": 0.06230282783508301, "rewards/rejected": -0.06474405527114868, "step": 2368 }, { "epoch": 1.6383125864453665, "grad_norm": 4.715832710266113, "learning_rate": 4.6453818964192405e-05, "log_odds_chosen": 4.0443806648254395, "log_odds_ratio": -0.20312117040157318, "logits/chosen": -0.45583438873291016, "logits/rejected": -0.4988771677017212, "logps/chosen": -0.06384460628032684, "logps/rejected": -0.4571217894554138, "loss": 2.9992, "nll_loss": 0.7294943332672119, "rewards/accuracies": 0.875, "rewards/chosen": -0.0063844602555036545, "rewards/margins": 0.039327722042798996, "rewards/rejected": -0.0457121841609478, "step": 2369 }, { "epoch": 1.6390041493775933, "grad_norm": 13.881675720214844, "learning_rate": 4.6449976947902264e-05, "log_odds_chosen": 2.983891725540161, "log_odds_ratio": -0.3598352372646332, "logits/chosen": -0.08851074427366257, "logits/rejected": -0.08484543859958649, "logps/chosen": -0.11364905536174774, "logps/rejected": -0.7294566631317139, "loss": 3.5079, "nll_loss": 0.8409815430641174, "rewards/accuracies": 0.875, "rewards/chosen": -0.011364906094968319, "rewards/margins": 0.06158076599240303, "rewards/rejected": -0.07294566929340363, "step": 2370 }, { "epoch": 1.6396957123098201, "grad_norm": 8.289203643798828, "learning_rate": 4.644613493161211e-05, "log_odds_chosen": 4.033492088317871, "log_odds_ratio": -0.14057360589504242, "logits/chosen": -0.9791640043258667, "logits/rejected": -0.9675908088684082, "logps/chosen": -0.03342659771442413, "logps/rejected": -0.8204505443572998, "loss": 4.3131, "nll_loss": 1.0642057657241821, "rewards/accuracies": 1.0, "rewards/chosen": -0.003342659678310156, "rewards/margins": 0.0787023976445198, "rewards/rejected": -0.0820450633764267, "step": 2371 }, { "epoch": 1.640387275242047, "grad_norm": 8.510683059692383, "learning_rate": 4.644229291532196e-05, "log_odds_chosen": 2.96242094039917, "log_odds_ratio": -0.2382379025220871, "logits/chosen": -0.44780367612838745, "logits/rejected": -0.5181176066398621, "logps/chosen": -0.09145855158567429, "logps/rejected": -0.7005022764205933, "loss": 3.3642, "nll_loss": 0.8172252178192139, "rewards/accuracies": 0.875, "rewards/chosen": -0.009145855903625488, "rewards/margins": 0.060904376208782196, "rewards/rejected": -0.07005023211240768, "step": 2372 }, { "epoch": 1.6410788381742738, "grad_norm": 9.975122451782227, "learning_rate": 4.6438450899031815e-05, "log_odds_chosen": 2.5919246673583984, "log_odds_ratio": -0.26950156688690186, "logits/chosen": -0.5262844562530518, "logits/rejected": -0.5486704707145691, "logps/chosen": -0.11428187787532806, "logps/rejected": -0.9702275991439819, "loss": 2.8376, "nll_loss": 0.6824502944946289, "rewards/accuracies": 0.875, "rewards/chosen": -0.011428188532590866, "rewards/margins": 0.08559457957744598, "rewards/rejected": -0.09702275693416595, "step": 2373 }, { "epoch": 1.6417704011065006, "grad_norm": 12.058677673339844, "learning_rate": 4.643460888274167e-05, "log_odds_chosen": 3.0631299018859863, "log_odds_ratio": -0.7720636129379272, "logits/chosen": -0.3485577702522278, "logits/rejected": -0.4091474413871765, "logps/chosen": -0.19718411564826965, "logps/rejected": -0.6004868149757385, "loss": 4.0899, "nll_loss": 0.9452710151672363, "rewards/accuracies": 0.75, "rewards/chosen": -0.019718410447239876, "rewards/margins": 0.040330275893211365, "rewards/rejected": -0.06004868820309639, "step": 2374 }, { "epoch": 1.6424619640387275, "grad_norm": 8.720122337341309, "learning_rate": 4.643076686645151e-05, "log_odds_chosen": 3.9990530014038086, "log_odds_ratio": -0.13114123046398163, "logits/chosen": -0.5957059860229492, "logits/rejected": -0.5671588778495789, "logps/chosen": -0.06041772663593292, "logps/rejected": -0.6443151831626892, "loss": 3.8435, "nll_loss": 0.9477718472480774, "rewards/accuracies": 1.0, "rewards/chosen": -0.006041772663593292, "rewards/margins": 0.05838974937796593, "rewards/rejected": -0.06443151831626892, "step": 2375 }, { "epoch": 1.6431535269709543, "grad_norm": 6.34602689743042, "learning_rate": 4.6426924850161365e-05, "log_odds_chosen": 4.483710765838623, "log_odds_ratio": -0.13568690419197083, "logits/chosen": -0.19696210324764252, "logits/rejected": -0.2683354914188385, "logps/chosen": -0.06465273350477219, "logps/rejected": -0.5963294506072998, "loss": 2.5422, "nll_loss": 0.6219762563705444, "rewards/accuracies": 1.0, "rewards/chosen": -0.006465273909270763, "rewards/margins": 0.053167674690485, "rewards/rejected": -0.05963294953107834, "step": 2376 }, { "epoch": 1.6438450899031811, "grad_norm": 10.477910041809082, "learning_rate": 4.642308283387122e-05, "log_odds_chosen": 4.577691555023193, "log_odds_ratio": -0.2504514455795288, "logits/chosen": -0.5443359613418579, "logits/rejected": -0.6230031251907349, "logps/chosen": -0.03581617400050163, "logps/rejected": -0.7296348810195923, "loss": 3.2231, "nll_loss": 0.7807391285896301, "rewards/accuracies": 0.875, "rewards/chosen": -0.003581617260351777, "rewards/margins": 0.06938187777996063, "rewards/rejected": -0.07296349108219147, "step": 2377 }, { "epoch": 1.644536652835408, "grad_norm": 9.255958557128906, "learning_rate": 4.641924081758107e-05, "log_odds_chosen": 5.29841423034668, "log_odds_ratio": -0.05628347396850586, "logits/chosen": -0.2169831395149231, "logits/rejected": -0.35057875514030457, "logps/chosen": -0.025036348029971123, "logps/rejected": -0.8059083819389343, "loss": 4.3766, "nll_loss": 1.0885179042816162, "rewards/accuracies": 1.0, "rewards/chosen": -0.0025036348961293697, "rewards/margins": 0.0780872032046318, "rewards/rejected": -0.08059084415435791, "step": 2378 }, { "epoch": 1.6452282157676348, "grad_norm": 6.931178569793701, "learning_rate": 4.641539880129092e-05, "log_odds_chosen": 3.6957497596740723, "log_odds_ratio": -0.12863591313362122, "logits/chosen": -0.245091512799263, "logits/rejected": -0.3163626790046692, "logps/chosen": -0.07319440692663193, "logps/rejected": -0.725760817527771, "loss": 3.671, "nll_loss": 0.9048863053321838, "rewards/accuracies": 0.875, "rewards/chosen": -0.007319441065192223, "rewards/margins": 0.06525664031505585, "rewards/rejected": -0.07257607579231262, "step": 2379 }, { "epoch": 1.6459197786998616, "grad_norm": 5.41664981842041, "learning_rate": 4.641155678500077e-05, "log_odds_chosen": 4.072056770324707, "log_odds_ratio": -0.12067017704248428, "logits/chosen": -0.7107937335968018, "logits/rejected": -0.6876804828643799, "logps/chosen": -0.05523668974637985, "logps/rejected": -0.7824287414550781, "loss": 2.8099, "nll_loss": 0.6904194951057434, "rewards/accuracies": 1.0, "rewards/chosen": -0.005523669067770243, "rewards/margins": 0.07271920889616013, "rewards/rejected": -0.07824286818504333, "step": 2380 }, { "epoch": 1.6466113416320884, "grad_norm": 7.0871052742004395, "learning_rate": 4.640771476871062e-05, "log_odds_chosen": 1.587499737739563, "log_odds_ratio": -0.522819995880127, "logits/chosen": -0.5985342860221863, "logits/rejected": -0.6404790282249451, "logps/chosen": -0.13571304082870483, "logps/rejected": -0.3021208941936493, "loss": 5.1378, "nll_loss": 1.232169270515442, "rewards/accuracies": 0.5, "rewards/chosen": -0.013571303337812424, "rewards/margins": 0.016640786081552505, "rewards/rejected": -0.03021209128201008, "step": 2381 }, { "epoch": 1.6473029045643153, "grad_norm": 15.613216400146484, "learning_rate": 4.640387275242047e-05, "log_odds_chosen": 4.4091796875, "log_odds_ratio": -0.2573304772377014, "logits/chosen": -0.7599989175796509, "logits/rejected": -0.7812910079956055, "logps/chosen": -0.07085003703832626, "logps/rejected": -0.9684122204780579, "loss": 3.8714, "nll_loss": 0.9421118497848511, "rewards/accuracies": 0.875, "rewards/chosen": -0.007085003890097141, "rewards/margins": 0.08975622057914734, "rewards/rejected": -0.0968412309885025, "step": 2382 }, { "epoch": 1.647994467496542, "grad_norm": 7.1068434715271, "learning_rate": 4.6400030736130325e-05, "log_odds_chosen": 4.6426897048950195, "log_odds_ratio": -0.13260595500469208, "logits/chosen": -0.4048321843147278, "logits/rejected": -0.4985610842704773, "logps/chosen": -0.021196218207478523, "logps/rejected": -0.729151725769043, "loss": 3.4597, "nll_loss": 0.8516725301742554, "rewards/accuracies": 0.875, "rewards/chosen": -0.002119621727615595, "rewards/margins": 0.07079555094242096, "rewards/rejected": -0.07291518151760101, "step": 2383 }, { "epoch": 1.648686030428769, "grad_norm": 7.182337760925293, "learning_rate": 4.639618871984017e-05, "log_odds_chosen": 5.572072982788086, "log_odds_ratio": -0.028026271611452103, "logits/chosen": -0.43479007482528687, "logits/rejected": -0.5537311434745789, "logps/chosen": -0.03721091151237488, "logps/rejected": -1.0600721836090088, "loss": 2.9149, "nll_loss": 0.7259154319763184, "rewards/accuracies": 1.0, "rewards/chosen": -0.00372109143063426, "rewards/margins": 0.10228613018989563, "rewards/rejected": -0.10600721836090088, "step": 2384 }, { "epoch": 1.6493775933609958, "grad_norm": 7.0679545402526855, "learning_rate": 4.639234670355003e-05, "log_odds_chosen": 5.097892761230469, "log_odds_ratio": -0.05553697049617767, "logits/chosen": -0.6244937777519226, "logits/rejected": -0.6847025156021118, "logps/chosen": -0.03268459439277649, "logps/rejected": -1.051744818687439, "loss": 3.2715, "nll_loss": 0.812329113483429, "rewards/accuracies": 1.0, "rewards/chosen": -0.003268459578976035, "rewards/margins": 0.10190602391958237, "rewards/rejected": -0.1051744893193245, "step": 2385 }, { "epoch": 1.6500691562932226, "grad_norm": 5.613284111022949, "learning_rate": 4.6388504687259876e-05, "log_odds_chosen": 4.971386909484863, "log_odds_ratio": -0.11330129951238632, "logits/chosen": -0.5442340970039368, "logits/rejected": -0.5847311615943909, "logps/chosen": -0.04058442637324333, "logps/rejected": -0.7056328654289246, "loss": 2.706, "nll_loss": 0.665163516998291, "rewards/accuracies": 0.875, "rewards/chosen": -0.004058443009853363, "rewards/margins": 0.06650485098361969, "rewards/rejected": -0.07056329399347305, "step": 2386 }, { "epoch": 1.6507607192254494, "grad_norm": 5.791971683502197, "learning_rate": 4.638466267096973e-05, "log_odds_chosen": 5.391927242279053, "log_odds_ratio": -0.04727129638195038, "logits/chosen": -0.3271331191062927, "logits/rejected": -0.35083311796188354, "logps/chosen": -0.04603290557861328, "logps/rejected": -1.006185531616211, "loss": 2.1011, "nll_loss": 0.5205403566360474, "rewards/accuracies": 1.0, "rewards/chosen": -0.004603290930390358, "rewards/margins": 0.09601525217294693, "rewards/rejected": -0.10061854869127274, "step": 2387 }, { "epoch": 1.6514522821576763, "grad_norm": 4.411108493804932, "learning_rate": 4.638082065467958e-05, "log_odds_chosen": 6.642820358276367, "log_odds_ratio": -0.024102747440338135, "logits/chosen": -0.26315993070602417, "logits/rejected": -0.2784760892391205, "logps/chosen": -0.014562083408236504, "logps/rejected": -0.8451967239379883, "loss": 2.0728, "nll_loss": 0.5157792568206787, "rewards/accuracies": 1.0, "rewards/chosen": -0.0014562084106728435, "rewards/margins": 0.08306346833705902, "rewards/rejected": -0.08451966941356659, "step": 2388 }, { "epoch": 1.652143845089903, "grad_norm": 9.01975154876709, "learning_rate": 4.6376978638389427e-05, "log_odds_chosen": 3.984631061553955, "log_odds_ratio": -0.06261247396469116, "logits/chosen": -0.7324444055557251, "logits/rejected": -0.7960898280143738, "logps/chosen": -0.013113527558743954, "logps/rejected": -0.5568873286247253, "loss": 4.219, "nll_loss": 1.0484963655471802, "rewards/accuracies": 1.0, "rewards/chosen": -0.0013113527093082666, "rewards/margins": 0.05437737703323364, "rewards/rejected": -0.055688731372356415, "step": 2389 }, { "epoch": 1.65283540802213, "grad_norm": 11.267913818359375, "learning_rate": 4.637313662209928e-05, "log_odds_chosen": 1.683915376663208, "log_odds_ratio": -0.8660053014755249, "logits/chosen": -0.41948461532592773, "logits/rejected": -0.4554569125175476, "logps/chosen": -0.1385391354560852, "logps/rejected": -0.43766355514526367, "loss": 3.0138, "nll_loss": 0.6668479442596436, "rewards/accuracies": 0.75, "rewards/chosen": -0.01385391503572464, "rewards/margins": 0.029912445694208145, "rewards/rejected": -0.04376635700464249, "step": 2390 }, { "epoch": 1.6535269709543567, "grad_norm": 7.12009334564209, "learning_rate": 4.636929460580913e-05, "log_odds_chosen": 5.1805267333984375, "log_odds_ratio": -0.04070788249373436, "logits/chosen": -0.5637659430503845, "logits/rejected": -0.6139363050460815, "logps/chosen": -0.020192958414554596, "logps/rejected": -0.8956127166748047, "loss": 2.5298, "nll_loss": 0.6283823251724243, "rewards/accuracies": 1.0, "rewards/chosen": -0.0020192954689264297, "rewards/margins": 0.08754197508096695, "rewards/rejected": -0.08956127613782883, "step": 2391 }, { "epoch": 1.6542185338865836, "grad_norm": 9.247042655944824, "learning_rate": 4.6365452589518984e-05, "log_odds_chosen": 5.03125, "log_odds_ratio": -0.3684726357460022, "logits/chosen": -0.8912017345428467, "logits/rejected": -0.8966810703277588, "logps/chosen": -0.04350098595023155, "logps/rejected": -0.5883774757385254, "loss": 2.8245, "nll_loss": 0.6692676544189453, "rewards/accuracies": 0.875, "rewards/chosen": -0.00435009878128767, "rewards/margins": 0.05448765307664871, "rewards/rejected": -0.05883774906396866, "step": 2392 }, { "epoch": 1.6549100968188104, "grad_norm": 9.383896827697754, "learning_rate": 4.636161057322883e-05, "log_odds_chosen": 1.8873748779296875, "log_odds_ratio": -0.37359458208084106, "logits/chosen": -0.7230343222618103, "logits/rejected": -0.683919370174408, "logps/chosen": -0.09096837043762207, "logps/rejected": -0.39955103397369385, "loss": 3.809, "nll_loss": 0.9149001240730286, "rewards/accuracies": 0.875, "rewards/chosen": -0.009096836671233177, "rewards/margins": 0.030858265236020088, "rewards/rejected": -0.039955101907253265, "step": 2393 }, { "epoch": 1.6556016597510372, "grad_norm": 7.632327079772949, "learning_rate": 4.635776855693869e-05, "log_odds_chosen": 5.199143409729004, "log_odds_ratio": -0.06454131752252579, "logits/chosen": -0.39764100313186646, "logits/rejected": -0.48015543818473816, "logps/chosen": -0.02446744777262211, "logps/rejected": -0.7504343390464783, "loss": 3.2063, "nll_loss": 0.7951316833496094, "rewards/accuracies": 1.0, "rewards/chosen": -0.0024467448238283396, "rewards/margins": 0.07259668409824371, "rewards/rejected": -0.07504343241453171, "step": 2394 }, { "epoch": 1.656293222683264, "grad_norm": 8.815381050109863, "learning_rate": 4.6353926540648534e-05, "log_odds_chosen": 1.673638939857483, "log_odds_ratio": -0.5779725313186646, "logits/chosen": -0.6094763875007629, "logits/rejected": -0.6504048109054565, "logps/chosen": -0.16714927554130554, "logps/rejected": -0.5325227975845337, "loss": 3.5577, "nll_loss": 0.8316306471824646, "rewards/accuracies": 0.75, "rewards/chosen": -0.016714926809072495, "rewards/margins": 0.036537349224090576, "rewards/rejected": -0.05325227975845337, "step": 2395 }, { "epoch": 1.656984785615491, "grad_norm": 9.138748168945312, "learning_rate": 4.635008452435839e-05, "log_odds_chosen": 4.83054256439209, "log_odds_ratio": -0.1090451255440712, "logits/chosen": -0.5976523160934448, "logits/rejected": -0.6270238161087036, "logps/chosen": -0.04145745560526848, "logps/rejected": -0.7733452320098877, "loss": 3.669, "nll_loss": 0.9063493013381958, "rewards/accuracies": 1.0, "rewards/chosen": -0.004145745653659105, "rewards/margins": 0.07318878173828125, "rewards/rejected": -0.07733452320098877, "step": 2396 }, { "epoch": 1.6576763485477177, "grad_norm": 5.9371113777160645, "learning_rate": 4.634624250806824e-05, "log_odds_chosen": 6.901837348937988, "log_odds_ratio": -0.0913933515548706, "logits/chosen": -0.42724931240081787, "logits/rejected": -0.4567227065563202, "logps/chosen": -0.033262595534324646, "logps/rejected": -1.2060177326202393, "loss": 2.511, "nll_loss": 0.6186057925224304, "rewards/accuracies": 0.875, "rewards/chosen": -0.0033262595534324646, "rewards/margins": 0.11727550625801086, "rewards/rejected": -0.12060176581144333, "step": 2397 }, { "epoch": 1.6583679114799446, "grad_norm": 7.880588054656982, "learning_rate": 4.6342400491778085e-05, "log_odds_chosen": 5.725263595581055, "log_odds_ratio": -0.059090036898851395, "logits/chosen": -0.5653278231620789, "logits/rejected": -0.6397727727890015, "logps/chosen": -0.027447620406746864, "logps/rejected": -1.0369272232055664, "loss": 2.8997, "nll_loss": 0.719023585319519, "rewards/accuracies": 1.0, "rewards/chosen": -0.0027447622269392014, "rewards/margins": 0.10094796866178513, "rewards/rejected": -0.10369272530078888, "step": 2398 }, { "epoch": 1.6590594744121714, "grad_norm": 8.58713150024414, "learning_rate": 4.633855847548794e-05, "log_odds_chosen": 2.910616874694824, "log_odds_ratio": -0.32300323247909546, "logits/chosen": -0.6054437160491943, "logits/rejected": -0.6225858330726624, "logps/chosen": -0.09555420279502869, "logps/rejected": -0.6819297075271606, "loss": 4.6391, "nll_loss": 1.127477765083313, "rewards/accuracies": 0.75, "rewards/chosen": -0.009555420838296413, "rewards/margins": 0.058637551963329315, "rewards/rejected": -0.0681929737329483, "step": 2399 }, { "epoch": 1.6597510373443982, "grad_norm": 8.066679954528809, "learning_rate": 4.633471645919779e-05, "log_odds_chosen": 3.3613734245300293, "log_odds_ratio": -0.4150410592556, "logits/chosen": -0.6153652667999268, "logits/rejected": -0.6782889366149902, "logps/chosen": -0.10744252055883408, "logps/rejected": -0.7856386303901672, "loss": 2.9875, "nll_loss": 0.7053636312484741, "rewards/accuracies": 0.75, "rewards/chosen": -0.010744252242147923, "rewards/margins": 0.06781961023807526, "rewards/rejected": -0.0785638615489006, "step": 2400 }, { "epoch": 1.660442600276625, "grad_norm": 6.530764102935791, "learning_rate": 4.633087444290764e-05, "log_odds_chosen": 3.1723811626434326, "log_odds_ratio": -0.26908573508262634, "logits/chosen": -0.5162174701690674, "logits/rejected": -0.5230663418769836, "logps/chosen": -0.07934844493865967, "logps/rejected": -0.7269495725631714, "loss": 3.0732, "nll_loss": 0.741379976272583, "rewards/accuracies": 0.875, "rewards/chosen": -0.007934845052659512, "rewards/margins": 0.06476011872291565, "rewards/rejected": -0.07269495725631714, "step": 2401 }, { "epoch": 1.6611341632088519, "grad_norm": 9.620217323303223, "learning_rate": 4.632703242661749e-05, "log_odds_chosen": 1.2391670942306519, "log_odds_ratio": -0.5796663165092468, "logits/chosen": -0.7345383167266846, "logits/rejected": -0.8077020049095154, "logps/chosen": -0.23129664361476898, "logps/rejected": -0.3796681761741638, "loss": 4.2852, "nll_loss": 1.013323426246643, "rewards/accuracies": 0.625, "rewards/chosen": -0.023129666224122047, "rewards/margins": 0.014837154187262058, "rewards/rejected": -0.03796681761741638, "step": 2402 }, { "epoch": 1.6618257261410787, "grad_norm": 7.168506145477295, "learning_rate": 4.632319041032735e-05, "log_odds_chosen": 4.937325477600098, "log_odds_ratio": -0.08248946070671082, "logits/chosen": -0.7550560235977173, "logits/rejected": -0.7097613215446472, "logps/chosen": -0.02854396589100361, "logps/rejected": -0.46548527479171753, "loss": 3.3639, "nll_loss": 0.8327199220657349, "rewards/accuracies": 1.0, "rewards/chosen": -0.002854396589100361, "rewards/margins": 0.04369413107633591, "rewards/rejected": -0.04654853045940399, "step": 2403 }, { "epoch": 1.6625172890733055, "grad_norm": 5.750540733337402, "learning_rate": 4.631934839403719e-05, "log_odds_chosen": 3.0646469593048096, "log_odds_ratio": -0.09252282977104187, "logits/chosen": -0.2593063712120056, "logits/rejected": -0.2871415317058563, "logps/chosen": -0.0420021153986454, "logps/rejected": -0.5545371174812317, "loss": 2.751, "nll_loss": 0.6785101890563965, "rewards/accuracies": 1.0, "rewards/chosen": -0.004200211726129055, "rewards/margins": 0.05125350132584572, "rewards/rejected": -0.055453717708587646, "step": 2404 }, { "epoch": 1.6632088520055324, "grad_norm": 7.526987552642822, "learning_rate": 4.6315506377747045e-05, "log_odds_chosen": 5.977910995483398, "log_odds_ratio": -0.01979386806488037, "logits/chosen": -0.4784207344055176, "logits/rejected": -0.6178550720214844, "logps/chosen": -0.0163608156144619, "logps/rejected": -1.1045422554016113, "loss": 2.767, "nll_loss": 0.6897709369659424, "rewards/accuracies": 1.0, "rewards/chosen": -0.0016360816080123186, "rewards/margins": 0.1088181585073471, "rewards/rejected": -0.11045423895120621, "step": 2405 }, { "epoch": 1.6639004149377592, "grad_norm": 9.162591934204102, "learning_rate": 4.63116643614569e-05, "log_odds_chosen": 7.101935386657715, "log_odds_ratio": -0.01733310893177986, "logits/chosen": -0.4071503281593323, "logits/rejected": -0.44532567262649536, "logps/chosen": -0.013491926714777946, "logps/rejected": -1.055969476699829, "loss": 3.5565, "nll_loss": 0.887387752532959, "rewards/accuracies": 1.0, "rewards/chosen": -0.0013491928111761808, "rewards/margins": 0.10424774885177612, "rewards/rejected": -0.10559694468975067, "step": 2406 }, { "epoch": 1.664591977869986, "grad_norm": 4.412231922149658, "learning_rate": 4.6307822345166743e-05, "log_odds_chosen": 2.818786859512329, "log_odds_ratio": -0.29969677329063416, "logits/chosen": -0.3059843182563782, "logits/rejected": -0.3522808849811554, "logps/chosen": -0.08872860670089722, "logps/rejected": -0.8007669448852539, "loss": 2.4794, "nll_loss": 0.5898890495300293, "rewards/accuracies": 0.75, "rewards/chosen": -0.008872861042618752, "rewards/margins": 0.07120384275913239, "rewards/rejected": -0.08007669448852539, "step": 2407 }, { "epoch": 1.6652835408022129, "grad_norm": 8.510326385498047, "learning_rate": 4.6303980328876596e-05, "log_odds_chosen": 2.5440433025360107, "log_odds_ratio": -0.28360381722450256, "logits/chosen": -0.4862896800041199, "logits/rejected": -0.501765251159668, "logps/chosen": -0.10348738729953766, "logps/rejected": -0.6458582878112793, "loss": 4.2624, "nll_loss": 1.0372350215911865, "rewards/accuracies": 0.875, "rewards/chosen": -0.010348739102482796, "rewards/margins": 0.054237090051174164, "rewards/rejected": -0.06458583474159241, "step": 2408 }, { "epoch": 1.6659751037344397, "grad_norm": 17.897363662719727, "learning_rate": 4.630013831258645e-05, "log_odds_chosen": 0.06756290793418884, "log_odds_ratio": -0.9511553049087524, "logits/chosen": -0.23593562841415405, "logits/rejected": -0.2747032940387726, "logps/chosen": -0.26998746395111084, "logps/rejected": -0.21665412187576294, "loss": 2.909, "nll_loss": 0.6321403980255127, "rewards/accuracies": 0.5, "rewards/chosen": -0.026998747140169144, "rewards/margins": -0.005333336070179939, "rewards/rejected": -0.021665412932634354, "step": 2409 }, { "epoch": 1.6666666666666665, "grad_norm": 10.7543306350708, "learning_rate": 4.62962962962963e-05, "log_odds_chosen": 5.174543380737305, "log_odds_ratio": -0.14928941428661346, "logits/chosen": -0.45020610094070435, "logits/rejected": -0.44557055830955505, "logps/chosen": -0.12285329401493073, "logps/rejected": -1.2492525577545166, "loss": 2.964, "nll_loss": 0.7260616421699524, "rewards/accuracies": 0.875, "rewards/chosen": -0.012285329401493073, "rewards/margins": 0.11263993382453918, "rewards/rejected": -0.12492527067661285, "step": 2410 }, { "epoch": 1.6673582295988933, "grad_norm": 6.037133693695068, "learning_rate": 4.6292454280006146e-05, "log_odds_chosen": 3.9803225994110107, "log_odds_ratio": -0.06571348756551743, "logits/chosen": -0.21116182208061218, "logits/rejected": -0.24481798708438873, "logps/chosen": -0.037533070892095566, "logps/rejected": -0.748488187789917, "loss": 2.9994, "nll_loss": 0.7432898879051208, "rewards/accuracies": 1.0, "rewards/chosen": -0.0037533072754740715, "rewards/margins": 0.07109551131725311, "rewards/rejected": -0.07484881579875946, "step": 2411 }, { "epoch": 1.6680497925311202, "grad_norm": 6.9043498039245605, "learning_rate": 4.6288612263716006e-05, "log_odds_chosen": 6.687690734863281, "log_odds_ratio": -0.03177565708756447, "logits/chosen": -0.5768988728523254, "logits/rejected": -0.6625084280967712, "logps/chosen": -0.010495376773178577, "logps/rejected": -1.171600341796875, "loss": 2.7179, "nll_loss": 0.6762967705726624, "rewards/accuracies": 1.0, "rewards/chosen": -0.0010495376773178577, "rewards/margins": 0.11611049622297287, "rewards/rejected": -0.11716003715991974, "step": 2412 }, { "epoch": 1.668741355463347, "grad_norm": 7.08948278427124, "learning_rate": 4.628477024742585e-05, "log_odds_chosen": 3.832082748413086, "log_odds_ratio": -0.317456990480423, "logits/chosen": -0.5864173769950867, "logits/rejected": -0.5810104012489319, "logps/chosen": -0.08993137627840042, "logps/rejected": -0.7754663825035095, "loss": 2.4216, "nll_loss": 0.5736429691314697, "rewards/accuracies": 0.75, "rewards/chosen": -0.008993137627840042, "rewards/margins": 0.06855350732803345, "rewards/rejected": -0.07754664123058319, "step": 2413 }, { "epoch": 1.6694329183955738, "grad_norm": 6.609368801116943, "learning_rate": 4.6280928231135704e-05, "log_odds_chosen": 4.299071311950684, "log_odds_ratio": -0.1827426552772522, "logits/chosen": -0.7438517212867737, "logits/rejected": -0.7562617063522339, "logps/chosen": -0.08332640677690506, "logps/rejected": -0.8302958607673645, "loss": 3.964, "nll_loss": 0.9727151393890381, "rewards/accuracies": 0.875, "rewards/chosen": -0.008332639932632446, "rewards/margins": 0.07469694316387177, "rewards/rejected": -0.08302959054708481, "step": 2414 }, { "epoch": 1.6701244813278007, "grad_norm": 8.660046577453613, "learning_rate": 4.6277086214845556e-05, "log_odds_chosen": 2.9852561950683594, "log_odds_ratio": -0.3320246934890747, "logits/chosen": -0.5786329507827759, "logits/rejected": -0.629688560962677, "logps/chosen": -0.11012428253889084, "logps/rejected": -0.7393942475318909, "loss": 3.0382, "nll_loss": 0.7263439297676086, "rewards/accuracies": 0.875, "rewards/chosen": -0.011012429371476173, "rewards/margins": 0.0629269927740097, "rewards/rejected": -0.07393942773342133, "step": 2415 }, { "epoch": 1.6708160442600275, "grad_norm": 6.166026592254639, "learning_rate": 4.62732441985554e-05, "log_odds_chosen": 5.47209358215332, "log_odds_ratio": -0.047553643584251404, "logits/chosen": -0.7303832173347473, "logits/rejected": -0.7512546181678772, "logps/chosen": -0.052451252937316895, "logps/rejected": -1.2305645942687988, "loss": 3.5086, "nll_loss": 0.8724021911621094, "rewards/accuracies": 1.0, "rewards/chosen": -0.005245125386863947, "rewards/margins": 0.11781134456396103, "rewards/rejected": -0.12305645644664764, "step": 2416 }, { "epoch": 1.6715076071922543, "grad_norm": 13.398856163024902, "learning_rate": 4.6269402182265254e-05, "log_odds_chosen": 3.6622700691223145, "log_odds_ratio": -0.12322408705949783, "logits/chosen": -0.7097039222717285, "logits/rejected": -0.7724823355674744, "logps/chosen": -0.058933380991220474, "logps/rejected": -0.8197494149208069, "loss": 4.7999, "nll_loss": 1.1876574754714966, "rewards/accuracies": 1.0, "rewards/chosen": -0.00589333800598979, "rewards/margins": 0.07608160376548767, "rewards/rejected": -0.08197493851184845, "step": 2417 }, { "epoch": 1.6721991701244814, "grad_norm": 7.694314956665039, "learning_rate": 4.626556016597511e-05, "log_odds_chosen": 3.396709442138672, "log_odds_ratio": -0.126684308052063, "logits/chosen": -0.5527836680412292, "logits/rejected": -0.635554313659668, "logps/chosen": -0.041724156588315964, "logps/rejected": -0.5722861289978027, "loss": 2.9894, "nll_loss": 0.7346909046173096, "rewards/accuracies": 1.0, "rewards/chosen": -0.004172415938228369, "rewards/margins": 0.05305619537830353, "rewards/rejected": -0.057228609919548035, "step": 2418 }, { "epoch": 1.6728907330567082, "grad_norm": 8.397724151611328, "learning_rate": 4.626171814968496e-05, "log_odds_chosen": 5.756424903869629, "log_odds_ratio": -0.25328224897384644, "logits/chosen": -0.27321773767471313, "logits/rejected": -0.2953091263771057, "logps/chosen": -0.013859656639397144, "logps/rejected": -1.0872235298156738, "loss": 2.2151, "nll_loss": 0.5284467339515686, "rewards/accuracies": 0.875, "rewards/chosen": -0.0013859656173735857, "rewards/margins": 0.1073363870382309, "rewards/rejected": -0.10872235894203186, "step": 2419 }, { "epoch": 1.673582295988935, "grad_norm": 7.1727681159973145, "learning_rate": 4.6257876133394805e-05, "log_odds_chosen": 4.892050266265869, "log_odds_ratio": -0.08339997380971909, "logits/chosen": -0.4896528124809265, "logits/rejected": -0.5178585052490234, "logps/chosen": -0.03309309482574463, "logps/rejected": -0.8350263833999634, "loss": 4.1185, "nll_loss": 1.0212879180908203, "rewards/accuracies": 1.0, "rewards/chosen": -0.003309309482574463, "rewards/margins": 0.08019333332777023, "rewards/rejected": -0.0835026428103447, "step": 2420 }, { "epoch": 1.6742738589211619, "grad_norm": 6.350358009338379, "learning_rate": 4.6254034117104664e-05, "log_odds_chosen": 4.816648483276367, "log_odds_ratio": -0.04416489228606224, "logits/chosen": -0.6494845151901245, "logits/rejected": -0.7247365117073059, "logps/chosen": -0.03898897394537926, "logps/rejected": -0.8093506097793579, "loss": 2.7141, "nll_loss": 0.6741119027137756, "rewards/accuracies": 1.0, "rewards/chosen": -0.003898897208273411, "rewards/margins": 0.07703615725040436, "rewards/rejected": -0.08093506097793579, "step": 2421 }, { "epoch": 1.6749654218533887, "grad_norm": 6.809445858001709, "learning_rate": 4.625019210081451e-05, "log_odds_chosen": 6.252318382263184, "log_odds_ratio": -0.02218172326683998, "logits/chosen": -0.20815208554267883, "logits/rejected": -0.22089838981628418, "logps/chosen": -0.03607087954878807, "logps/rejected": -0.9978293180465698, "loss": 3.4035, "nll_loss": 0.8486486077308655, "rewards/accuracies": 1.0, "rewards/chosen": -0.0036070882342755795, "rewards/margins": 0.09617584943771362, "rewards/rejected": -0.09978292882442474, "step": 2422 }, { "epoch": 1.6756569847856155, "grad_norm": 8.937105178833008, "learning_rate": 4.624635008452436e-05, "log_odds_chosen": 4.806909561157227, "log_odds_ratio": -0.358302503824234, "logits/chosen": -0.48498135805130005, "logits/rejected": -0.5562124252319336, "logps/chosen": -0.15368816256523132, "logps/rejected": -1.0874075889587402, "loss": 2.9095, "nll_loss": 0.6915530562400818, "rewards/accuracies": 0.875, "rewards/chosen": -0.015368817374110222, "rewards/margins": 0.09337194263935089, "rewards/rejected": -0.10874076187610626, "step": 2423 }, { "epoch": 1.6763485477178424, "grad_norm": 11.30183219909668, "learning_rate": 4.6242508068234215e-05, "log_odds_chosen": 5.33615779876709, "log_odds_ratio": -0.16340480744838715, "logits/chosen": -0.8020769357681274, "logits/rejected": -0.8382692337036133, "logps/chosen": -0.08057919889688492, "logps/rejected": -0.9394962191581726, "loss": 3.2915, "nll_loss": 0.8065407276153564, "rewards/accuracies": 0.875, "rewards/chosen": -0.008057920262217522, "rewards/margins": 0.0858917087316513, "rewards/rejected": -0.09394963085651398, "step": 2424 }, { "epoch": 1.6770401106500692, "grad_norm": 7.548745632171631, "learning_rate": 4.623866605194406e-05, "log_odds_chosen": 5.641702651977539, "log_odds_ratio": -0.1668786108493805, "logits/chosen": -0.6750974059104919, "logits/rejected": -0.6827152967453003, "logps/chosen": -0.057045768946409225, "logps/rejected": -1.1087830066680908, "loss": 3.5121, "nll_loss": 0.8613306283950806, "rewards/accuracies": 0.875, "rewards/chosen": -0.0057045770809054375, "rewards/margins": 0.1051737368106842, "rewards/rejected": -0.11087830364704132, "step": 2425 }, { "epoch": 1.677731673582296, "grad_norm": 10.312768936157227, "learning_rate": 4.623482403565391e-05, "log_odds_chosen": 5.615530967712402, "log_odds_ratio": -0.10865119099617004, "logits/chosen": -0.7960000038146973, "logits/rejected": -0.8431391716003418, "logps/chosen": -0.042798854410648346, "logps/rejected": -0.9962965846061707, "loss": 3.816, "nll_loss": 0.9431357979774475, "rewards/accuracies": 1.0, "rewards/chosen": -0.004279885906726122, "rewards/margins": 0.09534977376461029, "rewards/rejected": -0.09962965548038483, "step": 2426 }, { "epoch": 1.6784232365145229, "grad_norm": 5.688529014587402, "learning_rate": 4.6230982019363765e-05, "log_odds_chosen": 6.757950782775879, "log_odds_ratio": -0.027478059753775597, "logits/chosen": -0.5317009687423706, "logits/rejected": -0.5833687782287598, "logps/chosen": -0.013652315363287926, "logps/rejected": -1.1175298690795898, "loss": 1.7655, "nll_loss": 0.4386347234249115, "rewards/accuracies": 1.0, "rewards/chosen": -0.0013652315828949213, "rewards/margins": 0.11038775742053986, "rewards/rejected": -0.11175297945737839, "step": 2427 }, { "epoch": 1.6791147994467497, "grad_norm": 7.741587162017822, "learning_rate": 4.622714000307362e-05, "log_odds_chosen": 3.574104070663452, "log_odds_ratio": -0.1316196471452713, "logits/chosen": -0.7742865085601807, "logits/rejected": -0.7557849287986755, "logps/chosen": -0.03781423345208168, "logps/rejected": -0.6451917290687561, "loss": 3.8846, "nll_loss": 0.9579886198043823, "rewards/accuracies": 1.0, "rewards/chosen": -0.0037814234383404255, "rewards/margins": 0.06073775142431259, "rewards/rejected": -0.06451917439699173, "step": 2428 }, { "epoch": 1.6798063623789765, "grad_norm": 18.183177947998047, "learning_rate": 4.622329798678346e-05, "log_odds_chosen": 3.759333610534668, "log_odds_ratio": -0.7283949255943298, "logits/chosen": -0.6545952558517456, "logits/rejected": -0.6655271053314209, "logps/chosen": -0.10793383419513702, "logps/rejected": -0.7000494599342346, "loss": 3.6952, "nll_loss": 0.8509517908096313, "rewards/accuracies": 0.875, "rewards/chosen": -0.010793383233249187, "rewards/margins": 0.05921155586838722, "rewards/rejected": -0.07000494748353958, "step": 2429 }, { "epoch": 1.6804979253112033, "grad_norm": 9.159822463989258, "learning_rate": 4.621945597049332e-05, "log_odds_chosen": 3.761561870574951, "log_odds_ratio": -0.26112616062164307, "logits/chosen": -0.45438137650489807, "logits/rejected": -0.5548147559165955, "logps/chosen": -0.029453417286276817, "logps/rejected": -0.7845432162284851, "loss": 2.6587, "nll_loss": 0.6385586261749268, "rewards/accuracies": 0.875, "rewards/chosen": -0.0029453416354954243, "rewards/margins": 0.0755089819431305, "rewards/rejected": -0.07845432311296463, "step": 2430 }, { "epoch": 1.6811894882434302, "grad_norm": 10.218520164489746, "learning_rate": 4.621561395420317e-05, "log_odds_chosen": 5.348668098449707, "log_odds_ratio": -0.05290871858596802, "logits/chosen": -0.37899768352508545, "logits/rejected": -0.4649916887283325, "logps/chosen": -0.023256096988916397, "logps/rejected": -0.9848682284355164, "loss": 3.9494, "nll_loss": 0.9820523262023926, "rewards/accuracies": 1.0, "rewards/chosen": -0.002325609792023897, "rewards/margins": 0.09616121649742126, "rewards/rejected": -0.09848682582378387, "step": 2431 }, { "epoch": 1.681881051175657, "grad_norm": 8.257439613342285, "learning_rate": 4.621177193791302e-05, "log_odds_chosen": 3.942685127258301, "log_odds_ratio": -0.16259554028511047, "logits/chosen": -0.5742173194885254, "logits/rejected": -0.6128363609313965, "logps/chosen": -0.03621543198823929, "logps/rejected": -0.5569218993186951, "loss": 3.7442, "nll_loss": 0.9197925329208374, "rewards/accuracies": 0.875, "rewards/chosen": -0.0036215432919561863, "rewards/margins": 0.052070651203393936, "rewards/rejected": -0.055692195892333984, "step": 2432 }, { "epoch": 1.6825726141078838, "grad_norm": 7.5304460525512695, "learning_rate": 4.620792992162287e-05, "log_odds_chosen": 5.471726894378662, "log_odds_ratio": -0.12540628015995026, "logits/chosen": -0.29969680309295654, "logits/rejected": -0.35970044136047363, "logps/chosen": -0.07335058599710464, "logps/rejected": -0.9492495059967041, "loss": 2.1545, "nll_loss": 0.5260959267616272, "rewards/accuracies": 1.0, "rewards/chosen": -0.007335058879107237, "rewards/margins": 0.08758989721536636, "rewards/rejected": -0.09492494910955429, "step": 2433 }, { "epoch": 1.6832641770401107, "grad_norm": 7.556035041809082, "learning_rate": 4.620408790533272e-05, "log_odds_chosen": 4.8540778160095215, "log_odds_ratio": -0.09760741144418716, "logits/chosen": -0.7015647292137146, "logits/rejected": -0.7208361625671387, "logps/chosen": -0.019324539229273796, "logps/rejected": -0.9123736619949341, "loss": 2.3607, "nll_loss": 0.5804073214530945, "rewards/accuracies": 1.0, "rewards/chosen": -0.0019324538297951221, "rewards/margins": 0.08930491656064987, "rewards/rejected": -0.091237373650074, "step": 2434 }, { "epoch": 1.6839557399723375, "grad_norm": 13.672472953796387, "learning_rate": 4.620024588904257e-05, "log_odds_chosen": 4.359309196472168, "log_odds_ratio": -0.4574434757232666, "logits/chosen": -0.6222982406616211, "logits/rejected": -0.6353976726531982, "logps/chosen": -0.08387883752584457, "logps/rejected": -0.6800822019577026, "loss": 3.1069, "nll_loss": 0.7309712171554565, "rewards/accuracies": 0.875, "rewards/chosen": -0.008387884125113487, "rewards/margins": 0.059620339423418045, "rewards/rejected": -0.06800822913646698, "step": 2435 }, { "epoch": 1.6846473029045643, "grad_norm": 10.09824275970459, "learning_rate": 4.6196403872752424e-05, "log_odds_chosen": 5.2099432945251465, "log_odds_ratio": -0.1256905496120453, "logits/chosen": -0.7678384184837341, "logits/rejected": -0.8608188033103943, "logps/chosen": -0.09678126126527786, "logps/rejected": -1.3110018968582153, "loss": 3.2771, "nll_loss": 0.806714653968811, "rewards/accuracies": 0.875, "rewards/chosen": -0.009678126312792301, "rewards/margins": 0.12142205238342285, "rewards/rejected": -0.13110017776489258, "step": 2436 }, { "epoch": 1.6853388658367912, "grad_norm": 5.360626697540283, "learning_rate": 4.6192561856462276e-05, "log_odds_chosen": 3.714616060256958, "log_odds_ratio": -0.08257103711366653, "logits/chosen": -0.642780065536499, "logits/rejected": -0.7198878526687622, "logps/chosen": -0.07956398278474808, "logps/rejected": -0.8394503593444824, "loss": 2.3946, "nll_loss": 0.5903887152671814, "rewards/accuracies": 1.0, "rewards/chosen": -0.007956398651003838, "rewards/margins": 0.07598863542079926, "rewards/rejected": -0.08394503593444824, "step": 2437 }, { "epoch": 1.686030428769018, "grad_norm": 5.9998860359191895, "learning_rate": 4.618871984017212e-05, "log_odds_chosen": 4.092153549194336, "log_odds_ratio": -0.14713715016841888, "logits/chosen": -0.4301367700099945, "logits/rejected": -0.46247437596321106, "logps/chosen": -0.05428977310657501, "logps/rejected": -0.6135324835777283, "loss": 3.679, "nll_loss": 0.9050412178039551, "rewards/accuracies": 0.875, "rewards/chosen": -0.005428977310657501, "rewards/margins": 0.055924274027347565, "rewards/rejected": -0.061353251338005066, "step": 2438 }, { "epoch": 1.6867219917012448, "grad_norm": 7.3191609382629395, "learning_rate": 4.618487782388198e-05, "log_odds_chosen": 5.921332359313965, "log_odds_ratio": -0.03245503827929497, "logits/chosen": -0.4242483675479889, "logits/rejected": -0.4684517979621887, "logps/chosen": -0.010253867134451866, "logps/rejected": -0.8007473349571228, "loss": 2.8552, "nll_loss": 0.7105446457862854, "rewards/accuracies": 1.0, "rewards/chosen": -0.0010253868531435728, "rewards/margins": 0.07904934883117676, "rewards/rejected": -0.0800747349858284, "step": 2439 }, { "epoch": 1.6874135546334716, "grad_norm": 9.23619270324707, "learning_rate": 4.618103580759183e-05, "log_odds_chosen": 5.425571441650391, "log_odds_ratio": -0.24814097583293915, "logits/chosen": -0.44754737615585327, "logits/rejected": -0.5392141938209534, "logps/chosen": -0.052859995514154434, "logps/rejected": -1.1107946634292603, "loss": 3.7001, "nll_loss": 0.9002161026000977, "rewards/accuracies": 0.875, "rewards/chosen": -0.005285999272018671, "rewards/margins": 0.10579346120357513, "rewards/rejected": -0.11107945442199707, "step": 2440 }, { "epoch": 1.6881051175656985, "grad_norm": 6.046574115753174, "learning_rate": 4.617719379130168e-05, "log_odds_chosen": 4.892881393432617, "log_odds_ratio": -0.0677889883518219, "logits/chosen": -0.29898586869239807, "logits/rejected": -0.3219645321369171, "logps/chosen": -0.05868750810623169, "logps/rejected": -1.0086970329284668, "loss": 2.9673, "nll_loss": 0.7350428700447083, "rewards/accuracies": 1.0, "rewards/chosen": -0.005868750624358654, "rewards/margins": 0.09500095993280411, "rewards/rejected": -0.10086971521377563, "step": 2441 }, { "epoch": 1.6887966804979253, "grad_norm": 5.991647720336914, "learning_rate": 4.617335177501153e-05, "log_odds_chosen": 3.6196436882019043, "log_odds_ratio": -0.32258835434913635, "logits/chosen": -0.5833138823509216, "logits/rejected": -0.6411206126213074, "logps/chosen": -0.12731850147247314, "logps/rejected": -0.4984019994735718, "loss": 2.7453, "nll_loss": 0.6540665626525879, "rewards/accuracies": 0.875, "rewards/chosen": -0.012731850147247314, "rewards/margins": 0.03710835054516792, "rewards/rejected": -0.04984020069241524, "step": 2442 }, { "epoch": 1.6894882434301521, "grad_norm": 9.416582107543945, "learning_rate": 4.616950975872138e-05, "log_odds_chosen": 2.8410861492156982, "log_odds_ratio": -0.3418985903263092, "logits/chosen": -0.447319358587265, "logits/rejected": -0.49639326333999634, "logps/chosen": -0.1420106589794159, "logps/rejected": -0.5109445452690125, "loss": 2.9645, "nll_loss": 0.7069295644760132, "rewards/accuracies": 0.875, "rewards/chosen": -0.014201066456735134, "rewards/margins": 0.036893390119075775, "rewards/rejected": -0.051094453781843185, "step": 2443 }, { "epoch": 1.690179806362379, "grad_norm": 6.755992412567139, "learning_rate": 4.616566774243123e-05, "log_odds_chosen": 5.548319339752197, "log_odds_ratio": -0.2713317573070526, "logits/chosen": -0.22183842957019806, "logits/rejected": -0.1871333122253418, "logps/chosen": -0.032315757125616074, "logps/rejected": -0.8459647297859192, "loss": 2.0004, "nll_loss": 0.47296953201293945, "rewards/accuracies": 0.75, "rewards/chosen": -0.0032315754797309637, "rewards/margins": 0.08136489987373352, "rewards/rejected": -0.08459647744894028, "step": 2444 }, { "epoch": 1.6908713692946058, "grad_norm": 8.874152183532715, "learning_rate": 4.616182572614108e-05, "log_odds_chosen": 3.405881881713867, "log_odds_ratio": -0.4702404737472534, "logits/chosen": -0.41794833540916443, "logits/rejected": -0.4229547679424286, "logps/chosen": -0.0915827602148056, "logps/rejected": -0.6083066463470459, "loss": 2.9032, "nll_loss": 0.678780198097229, "rewards/accuracies": 0.625, "rewards/chosen": -0.00915827602148056, "rewards/margins": 0.05167239159345627, "rewards/rejected": -0.06083066761493683, "step": 2445 }, { "epoch": 1.6915629322268326, "grad_norm": 5.671358585357666, "learning_rate": 4.6157983709850935e-05, "log_odds_chosen": 4.28201961517334, "log_odds_ratio": -0.14261570572853088, "logits/chosen": -0.6459827423095703, "logits/rejected": -0.5838777422904968, "logps/chosen": -0.1007598340511322, "logps/rejected": -1.0819426774978638, "loss": 2.9352, "nll_loss": 0.7195260524749756, "rewards/accuracies": 0.875, "rewards/chosen": -0.010075983591377735, "rewards/margins": 0.09811828285455704, "rewards/rejected": -0.1081942766904831, "step": 2446 }, { "epoch": 1.6922544951590595, "grad_norm": 7.028148651123047, "learning_rate": 4.615414169356078e-05, "log_odds_chosen": 5.338274002075195, "log_odds_ratio": -0.31613755226135254, "logits/chosen": -0.38394150137901306, "logits/rejected": -0.4851318597793579, "logps/chosen": -0.04378465563058853, "logps/rejected": -0.7531493902206421, "loss": 1.9853, "nll_loss": 0.4647136628627777, "rewards/accuracies": 0.875, "rewards/chosen": -0.004378465469926596, "rewards/margins": 0.07093648612499237, "rewards/rejected": -0.0753149464726448, "step": 2447 }, { "epoch": 1.6929460580912863, "grad_norm": 6.10004186630249, "learning_rate": 4.615029967727064e-05, "log_odds_chosen": 5.716916084289551, "log_odds_ratio": -0.22165924310684204, "logits/chosen": -0.6277919411659241, "logits/rejected": -0.7794336080551147, "logps/chosen": -0.09859595447778702, "logps/rejected": -0.8822535276412964, "loss": 2.3748, "nll_loss": 0.5715370774269104, "rewards/accuracies": 0.875, "rewards/chosen": -0.009859594516456127, "rewards/margins": 0.078365758061409, "rewards/rejected": -0.088225357234478, "step": 2448 }, { "epoch": 1.6936376210235131, "grad_norm": 7.531747341156006, "learning_rate": 4.6146457660980485e-05, "log_odds_chosen": 5.302791595458984, "log_odds_ratio": -0.1415976583957672, "logits/chosen": -0.510055422782898, "logits/rejected": -0.5041730403900146, "logps/chosen": -0.06312351673841476, "logps/rejected": -1.0165313482284546, "loss": 2.7517, "nll_loss": 0.6737696528434753, "rewards/accuracies": 1.0, "rewards/chosen": -0.0063123516738414764, "rewards/margins": 0.0953407809138298, "rewards/rejected": -0.10165313631296158, "step": 2449 }, { "epoch": 1.69432918395574, "grad_norm": 9.723373413085938, "learning_rate": 4.614261564469034e-05, "log_odds_chosen": 4.904651165008545, "log_odds_ratio": -0.5076399445533752, "logits/chosen": -0.44497087597846985, "logits/rejected": -0.4860677719116211, "logps/chosen": -0.07396794855594635, "logps/rejected": -0.9484530687332153, "loss": 4.3168, "nll_loss": 1.0284472703933716, "rewards/accuracies": 0.75, "rewards/chosen": -0.007396795321255922, "rewards/margins": 0.08744850754737854, "rewards/rejected": -0.09484530985355377, "step": 2450 }, { "epoch": 1.6950207468879668, "grad_norm": 9.681573867797852, "learning_rate": 4.613877362840019e-05, "log_odds_chosen": 3.0576236248016357, "log_odds_ratio": -0.08652878552675247, "logits/chosen": -0.8884795904159546, "logits/rejected": -0.9039291143417358, "logps/chosen": -0.09103421866893768, "logps/rejected": -0.9812101721763611, "loss": 3.8463, "nll_loss": 0.9529250860214233, "rewards/accuracies": 1.0, "rewards/chosen": -0.009103422984480858, "rewards/margins": 0.0890175923705101, "rewards/rejected": -0.09812101721763611, "step": 2451 }, { "epoch": 1.6957123098201936, "grad_norm": 4.27262020111084, "learning_rate": 4.6134931612110036e-05, "log_odds_chosen": 4.734068393707275, "log_odds_ratio": -0.29768267273902893, "logits/chosen": -0.5565181970596313, "logits/rejected": -0.5211176872253418, "logps/chosen": -0.05631481856107712, "logps/rejected": -0.7082623839378357, "loss": 2.351, "nll_loss": 0.5579749345779419, "rewards/accuracies": 0.875, "rewards/chosen": -0.005631481762975454, "rewards/margins": 0.0651947632431984, "rewards/rejected": -0.07082624733448029, "step": 2452 }, { "epoch": 1.6964038727524204, "grad_norm": 10.028276443481445, "learning_rate": 4.613108959581989e-05, "log_odds_chosen": 2.7507269382476807, "log_odds_ratio": -0.4191727638244629, "logits/chosen": -0.4020436406135559, "logits/rejected": -0.4461291432380676, "logps/chosen": -0.10786256194114685, "logps/rejected": -0.7390084862709045, "loss": 3.349, "nll_loss": 0.7953333258628845, "rewards/accuracies": 0.75, "rewards/chosen": -0.01078625675290823, "rewards/margins": 0.06311459839344025, "rewards/rejected": -0.07390085607767105, "step": 2453 }, { "epoch": 1.6970954356846473, "grad_norm": 6.700299263000488, "learning_rate": 4.612724757952974e-05, "log_odds_chosen": 4.201428413391113, "log_odds_ratio": -0.072795569896698, "logits/chosen": -0.6352698802947998, "logits/rejected": -0.6624408960342407, "logps/chosen": -0.029072128236293793, "logps/rejected": -0.6273342370986938, "loss": 2.9228, "nll_loss": 0.7234126329421997, "rewards/accuracies": 1.0, "rewards/chosen": -0.0029072128236293793, "rewards/margins": 0.05982620269060135, "rewards/rejected": -0.06273341923952103, "step": 2454 }, { "epoch": 1.697786998616874, "grad_norm": 6.6374664306640625, "learning_rate": 4.612340556323959e-05, "log_odds_chosen": 4.870717525482178, "log_odds_ratio": -0.17836816608905792, "logits/chosen": -0.7230358123779297, "logits/rejected": -0.7389208078384399, "logps/chosen": -0.058723971247673035, "logps/rejected": -0.7045331001281738, "loss": 2.9063, "nll_loss": 0.7087265253067017, "rewards/accuracies": 0.875, "rewards/chosen": -0.005872397683560848, "rewards/margins": 0.06458091735839844, "rewards/rejected": -0.07045331597328186, "step": 2455 }, { "epoch": 1.698478561549101, "grad_norm": 7.5409979820251465, "learning_rate": 4.611956354694944e-05, "log_odds_chosen": 2.510221242904663, "log_odds_ratio": -0.3023854196071625, "logits/chosen": -0.7858247756958008, "logits/rejected": -0.8371924161911011, "logps/chosen": -0.11306092888116837, "logps/rejected": -0.5818637609481812, "loss": 3.1074, "nll_loss": 0.7466117143630981, "rewards/accuracies": 0.875, "rewards/chosen": -0.011306094005703926, "rewards/margins": 0.04688028246164322, "rewards/rejected": -0.058186378329992294, "step": 2456 }, { "epoch": 1.6991701244813278, "grad_norm": 4.992187976837158, "learning_rate": 4.61157215306593e-05, "log_odds_chosen": 3.4826698303222656, "log_odds_ratio": -0.15915066003799438, "logits/chosen": -0.5362116098403931, "logits/rejected": -0.6215125918388367, "logps/chosen": -0.08510999381542206, "logps/rejected": -0.7648427486419678, "loss": 2.3932, "nll_loss": 0.5823809504508972, "rewards/accuracies": 0.875, "rewards/chosen": -0.008510999381542206, "rewards/margins": 0.06797328591346741, "rewards/rejected": -0.07648427784442902, "step": 2457 }, { "epoch": 1.6998616874135546, "grad_norm": 5.758655071258545, "learning_rate": 4.6111879514369144e-05, "log_odds_chosen": 3.8834996223449707, "log_odds_ratio": -0.20648881793022156, "logits/chosen": -0.38048383593559265, "logits/rejected": -0.33826085925102234, "logps/chosen": -0.07839153707027435, "logps/rejected": -0.5018714666366577, "loss": 2.7401, "nll_loss": 0.6643880605697632, "rewards/accuracies": 1.0, "rewards/chosen": -0.007839154452085495, "rewards/margins": 0.0423479899764061, "rewards/rejected": -0.05018714442849159, "step": 2458 }, { "epoch": 1.7005532503457814, "grad_norm": 6.271485805511475, "learning_rate": 4.6108037498078996e-05, "log_odds_chosen": 3.3183982372283936, "log_odds_ratio": -0.2391716092824936, "logits/chosen": -0.807701826095581, "logits/rejected": -0.8435157537460327, "logps/chosen": -0.08216782659292221, "logps/rejected": -0.6454704403877258, "loss": 3.6022, "nll_loss": 0.8766371011734009, "rewards/accuracies": 0.875, "rewards/chosen": -0.00821678340435028, "rewards/margins": 0.05633026361465454, "rewards/rejected": -0.06454704701900482, "step": 2459 }, { "epoch": 1.7012448132780082, "grad_norm": 12.49203109741211, "learning_rate": 4.610419548178885e-05, "log_odds_chosen": 3.415001392364502, "log_odds_ratio": -0.1704428493976593, "logits/chosen": -0.6266674995422363, "logits/rejected": -0.6652182936668396, "logps/chosen": -0.06423190236091614, "logps/rejected": -0.42358794808387756, "loss": 3.8353, "nll_loss": 0.9417771100997925, "rewards/accuracies": 1.0, "rewards/chosen": -0.006423190236091614, "rewards/margins": 0.03593560308218002, "rewards/rejected": -0.04235879331827164, "step": 2460 }, { "epoch": 1.701936376210235, "grad_norm": 5.712089538574219, "learning_rate": 4.6100353465498694e-05, "log_odds_chosen": 5.9961442947387695, "log_odds_ratio": -0.027979286387562752, "logits/chosen": -0.5057214498519897, "logits/rejected": -0.500369131565094, "logps/chosen": -0.014197605662047863, "logps/rejected": -0.7986212372779846, "loss": 2.0362, "nll_loss": 0.5062602758407593, "rewards/accuracies": 1.0, "rewards/chosen": -0.0014197605196386576, "rewards/margins": 0.07844236493110657, "rewards/rejected": -0.07986212521791458, "step": 2461 }, { "epoch": 1.702627939142462, "grad_norm": 5.646170139312744, "learning_rate": 4.6096511449208547e-05, "log_odds_chosen": 5.234273910522461, "log_odds_ratio": -0.07074658572673798, "logits/chosen": -0.5358408093452454, "logits/rejected": -0.5804621577262878, "logps/chosen": -0.04008009284734726, "logps/rejected": -0.725631833076477, "loss": 2.6012, "nll_loss": 0.6432194709777832, "rewards/accuracies": 1.0, "rewards/chosen": -0.004008009098470211, "rewards/margins": 0.06855517625808716, "rewards/rejected": -0.07256318628787994, "step": 2462 }, { "epoch": 1.703319502074689, "grad_norm": 7.1241841316223145, "learning_rate": 4.609266943291839e-05, "log_odds_chosen": 4.2716546058654785, "log_odds_ratio": -0.23627769947052002, "logits/chosen": -0.49713873863220215, "logits/rejected": -0.47726917266845703, "logps/chosen": -0.046655893325805664, "logps/rejected": -0.7681934833526611, "loss": 2.7274, "nll_loss": 0.6582244038581848, "rewards/accuracies": 0.875, "rewards/chosen": -0.004665589425712824, "rewards/margins": 0.07215375453233719, "rewards/rejected": -0.07681934535503387, "step": 2463 }, { "epoch": 1.7040110650069158, "grad_norm": 12.598523139953613, "learning_rate": 4.608882741662825e-05, "log_odds_chosen": 4.887299060821533, "log_odds_ratio": -0.09854143112897873, "logits/chosen": -0.8739576935768127, "logits/rejected": -0.9403781294822693, "logps/chosen": -0.04767553135752678, "logps/rejected": -0.9661577343940735, "loss": 4.9117, "nll_loss": 1.2180765867233276, "rewards/accuracies": 1.0, "rewards/chosen": -0.00476755341514945, "rewards/margins": 0.091848224401474, "rewards/rejected": -0.09661578387022018, "step": 2464 }, { "epoch": 1.7047026279391426, "grad_norm": 7.02893590927124, "learning_rate": 4.60849854003381e-05, "log_odds_chosen": 3.973839282989502, "log_odds_ratio": -0.24144837260246277, "logits/chosen": -0.5743393898010254, "logits/rejected": -0.5642153024673462, "logps/chosen": -0.06009901687502861, "logps/rejected": -0.8592863082885742, "loss": 2.643, "nll_loss": 0.6366161108016968, "rewards/accuracies": 0.75, "rewards/chosen": -0.006009901873767376, "rewards/margins": 0.0799187421798706, "rewards/rejected": -0.08592863380908966, "step": 2465 }, { "epoch": 1.7053941908713695, "grad_norm": 11.561562538146973, "learning_rate": 4.608114338404795e-05, "log_odds_chosen": 5.92291259765625, "log_odds_ratio": -0.30987101793289185, "logits/chosen": -0.4808652997016907, "logits/rejected": -0.5432968139648438, "logps/chosen": -0.07293874770402908, "logps/rejected": -1.199968934059143, "loss": 3.8248, "nll_loss": 0.9252121448516846, "rewards/accuracies": 0.75, "rewards/chosen": -0.007293875329196453, "rewards/margins": 0.11270301789045334, "rewards/rejected": -0.11999689042568207, "step": 2466 }, { "epoch": 1.7060857538035963, "grad_norm": 7.464529991149902, "learning_rate": 4.60773013677578e-05, "log_odds_chosen": 4.637115478515625, "log_odds_ratio": -0.1439353972673416, "logits/chosen": -0.7529462575912476, "logits/rejected": -0.6442465782165527, "logps/chosen": -0.05325663462281227, "logps/rejected": -0.60707026720047, "loss": 2.232, "nll_loss": 0.5436094403266907, "rewards/accuracies": 1.0, "rewards/chosen": -0.005325663834810257, "rewards/margins": 0.05538136512041092, "rewards/rejected": -0.06070702522993088, "step": 2467 }, { "epoch": 1.7067773167358231, "grad_norm": 6.781388759613037, "learning_rate": 4.6073459351467654e-05, "log_odds_chosen": 3.841700553894043, "log_odds_ratio": -0.341701865196228, "logits/chosen": -0.6883922815322876, "logits/rejected": -0.7041289806365967, "logps/chosen": -0.08620139211416245, "logps/rejected": -0.857895016670227, "loss": 2.8108, "nll_loss": 0.6685272455215454, "rewards/accuracies": 0.75, "rewards/chosen": -0.008620140142738819, "rewards/margins": 0.07716936618089676, "rewards/rejected": -0.0857895016670227, "step": 2468 }, { "epoch": 1.70746887966805, "grad_norm": 7.313510894775391, "learning_rate": 4.60696173351775e-05, "log_odds_chosen": 5.199853897094727, "log_odds_ratio": -0.05814574658870697, "logits/chosen": -0.7749062776565552, "logits/rejected": -0.892104983329773, "logps/chosen": -0.05403900891542435, "logps/rejected": -0.8282055258750916, "loss": 3.1464, "nll_loss": 0.7807949781417847, "rewards/accuracies": 1.0, "rewards/chosen": -0.005403900984674692, "rewards/margins": 0.07741665095090866, "rewards/rejected": -0.08282054960727692, "step": 2469 }, { "epoch": 1.7081604426002768, "grad_norm": 6.603610038757324, "learning_rate": 4.606577531888735e-05, "log_odds_chosen": 4.6768012046813965, "log_odds_ratio": -0.18649247288703918, "logits/chosen": -0.4904458522796631, "logits/rejected": -0.5735772848129272, "logps/chosen": -0.04555944353342056, "logps/rejected": -0.6273662447929382, "loss": 2.7468, "nll_loss": 0.6680549383163452, "rewards/accuracies": 0.875, "rewards/chosen": -0.004555944819003344, "rewards/margins": 0.058180682361125946, "rewards/rejected": -0.0627366229891777, "step": 2470 }, { "epoch": 1.7088520055325036, "grad_norm": 6.438045024871826, "learning_rate": 4.6061933302597205e-05, "log_odds_chosen": 2.645942211151123, "log_odds_ratio": -0.261520653963089, "logits/chosen": -0.6434420347213745, "logits/rejected": -0.6276592016220093, "logps/chosen": -0.08992427587509155, "logps/rejected": -0.4632926881313324, "loss": 3.6067, "nll_loss": 0.8755119442939758, "rewards/accuracies": 0.875, "rewards/chosen": -0.008992427960038185, "rewards/margins": 0.037336841225624084, "rewards/rejected": -0.04632926732301712, "step": 2471 }, { "epoch": 1.7095435684647304, "grad_norm": 5.089877128601074, "learning_rate": 4.605809128630705e-05, "log_odds_chosen": 4.637329578399658, "log_odds_ratio": -0.20717129111289978, "logits/chosen": -0.3958089351654053, "logits/rejected": -0.5050152540206909, "logps/chosen": -0.06903784722089767, "logps/rejected": -0.763074517250061, "loss": 2.5772, "nll_loss": 0.62359219789505, "rewards/accuracies": 0.875, "rewards/chosen": -0.006903784349560738, "rewards/margins": 0.06940367817878723, "rewards/rejected": -0.07630746066570282, "step": 2472 }, { "epoch": 1.7102351313969573, "grad_norm": 6.6487531661987305, "learning_rate": 4.605424927001691e-05, "log_odds_chosen": 4.568889617919922, "log_odds_ratio": -0.28559839725494385, "logits/chosen": -0.49498260021209717, "logits/rejected": -0.5599215626716614, "logps/chosen": -0.0664115771651268, "logps/rejected": -0.8344486951828003, "loss": 2.6958, "nll_loss": 0.6453921794891357, "rewards/accuracies": 0.75, "rewards/chosen": -0.00664115697145462, "rewards/margins": 0.07680370658636093, "rewards/rejected": -0.08344486355781555, "step": 2473 }, { "epoch": 1.710926694329184, "grad_norm": 7.506117343902588, "learning_rate": 4.6050407253726755e-05, "log_odds_chosen": 7.32316780090332, "log_odds_ratio": -0.016720792278647423, "logits/chosen": -0.5493389964103699, "logits/rejected": -0.5543407797813416, "logps/chosen": -0.020256822928786278, "logps/rejected": -1.3681458234786987, "loss": 2.9035, "nll_loss": 0.7241946458816528, "rewards/accuracies": 1.0, "rewards/chosen": -0.0020256820134818554, "rewards/margins": 0.1347889006137848, "rewards/rejected": -0.13681459426879883, "step": 2474 }, { "epoch": 1.711618257261411, "grad_norm": 8.684357643127441, "learning_rate": 4.604656523743661e-05, "log_odds_chosen": 1.567682147026062, "log_odds_ratio": -0.7690153121948242, "logits/chosen": -0.7396680116653442, "logits/rejected": -0.7365007400512695, "logps/chosen": -0.164411261677742, "logps/rejected": -0.343228280544281, "loss": 2.7866, "nll_loss": 0.6197413206100464, "rewards/accuracies": 0.75, "rewards/chosen": -0.01644112728536129, "rewards/margins": 0.01788170263171196, "rewards/rejected": -0.0343228280544281, "step": 2475 }, { "epoch": 1.7123098201936378, "grad_norm": 4.919681549072266, "learning_rate": 4.604272322114646e-05, "log_odds_chosen": 6.278683662414551, "log_odds_ratio": -0.04371640086174011, "logits/chosen": -0.3746539354324341, "logits/rejected": -0.35592973232269287, "logps/chosen": -0.03592051565647125, "logps/rejected": -0.9769902229309082, "loss": 2.255, "nll_loss": 0.5593817830085754, "rewards/accuracies": 1.0, "rewards/chosen": -0.0035920515656471252, "rewards/margins": 0.09410697221755981, "rewards/rejected": -0.09769902378320694, "step": 2476 }, { "epoch": 1.7130013831258646, "grad_norm": 4.298426628112793, "learning_rate": 4.603888120485631e-05, "log_odds_chosen": 5.387601852416992, "log_odds_ratio": -0.05379874259233475, "logits/chosen": -0.4532133936882019, "logits/rejected": -0.4699724316596985, "logps/chosen": -0.028361182659864426, "logps/rejected": -0.6521701216697693, "loss": 2.3214, "nll_loss": 0.5749749541282654, "rewards/accuracies": 1.0, "rewards/chosen": -0.002836118219420314, "rewards/margins": 0.06238089129328728, "rewards/rejected": -0.06521701067686081, "step": 2477 }, { "epoch": 1.7136929460580914, "grad_norm": 6.177600860595703, "learning_rate": 4.603503918856616e-05, "log_odds_chosen": 5.090878486633301, "log_odds_ratio": -0.05117940902709961, "logits/chosen": -0.3519092798233032, "logits/rejected": -0.41055262088775635, "logps/chosen": -0.04080752655863762, "logps/rejected": -0.8973877429962158, "loss": 2.7697, "nll_loss": 0.6873171329498291, "rewards/accuracies": 1.0, "rewards/chosen": -0.0040807523764669895, "rewards/margins": 0.0856580138206482, "rewards/rejected": -0.08973877131938934, "step": 2478 }, { "epoch": 1.7143845089903182, "grad_norm": 12.071842193603516, "learning_rate": 4.603119717227601e-05, "log_odds_chosen": 2.429943323135376, "log_odds_ratio": -0.5938160419464111, "logits/chosen": -0.25275617837905884, "logits/rejected": -0.28012439608573914, "logps/chosen": -0.057525694370269775, "logps/rejected": -0.5072909593582153, "loss": 2.3098, "nll_loss": 0.5180577635765076, "rewards/accuracies": 0.875, "rewards/chosen": -0.005752569530159235, "rewards/margins": 0.044976525008678436, "rewards/rejected": -0.05072909966111183, "step": 2479 }, { "epoch": 1.715076071922545, "grad_norm": 11.65674114227295, "learning_rate": 4.602735515598586e-05, "log_odds_chosen": 3.821964979171753, "log_odds_ratio": -0.3769320249557495, "logits/chosen": -0.4692278802394867, "logits/rejected": -0.5543645024299622, "logps/chosen": -0.1797221601009369, "logps/rejected": -0.7264648675918579, "loss": 4.9171, "nll_loss": 1.1915725469589233, "rewards/accuracies": 0.75, "rewards/chosen": -0.01797221601009369, "rewards/margins": 0.05467427149415016, "rewards/rejected": -0.07264649122953415, "step": 2480 }, { "epoch": 1.715767634854772, "grad_norm": 5.816194534301758, "learning_rate": 4.602351313969571e-05, "log_odds_chosen": 4.72694206237793, "log_odds_ratio": -0.08599922060966492, "logits/chosen": -0.4541250765323639, "logits/rejected": -0.48050498962402344, "logps/chosen": -0.034682586789131165, "logps/rejected": -0.8450120687484741, "loss": 2.6775, "nll_loss": 0.6607639789581299, "rewards/accuracies": 1.0, "rewards/chosen": -0.0034682590048760176, "rewards/margins": 0.08103295415639877, "rewards/rejected": -0.0845012217760086, "step": 2481 }, { "epoch": 1.7164591977869987, "grad_norm": 8.660858154296875, "learning_rate": 4.601967112340557e-05, "log_odds_chosen": 5.18958044052124, "log_odds_ratio": -0.10727565735578537, "logits/chosen": -0.44695162773132324, "logits/rejected": -0.5264561176300049, "logps/chosen": -0.05404742807149887, "logps/rejected": -1.119019865989685, "loss": 3.3442, "nll_loss": 0.825323760509491, "rewards/accuracies": 0.875, "rewards/chosen": -0.005404743365943432, "rewards/margins": 0.10649725049734116, "rewards/rejected": -0.11190199106931686, "step": 2482 }, { "epoch": 1.7171507607192256, "grad_norm": 3.831432342529297, "learning_rate": 4.6015829107115414e-05, "log_odds_chosen": 6.9305739402771, "log_odds_ratio": -0.011790118180215359, "logits/chosen": -0.4339551627635956, "logits/rejected": -0.4620319604873657, "logps/chosen": -0.03041076473891735, "logps/rejected": -1.066805124282837, "loss": 2.0754, "nll_loss": 0.5176640152931213, "rewards/accuracies": 1.0, "rewards/chosen": -0.003041076473891735, "rewards/margins": 0.10363944619894028, "rewards/rejected": -0.10668051987886429, "step": 2483 }, { "epoch": 1.7178423236514524, "grad_norm": 8.102904319763184, "learning_rate": 4.6011987090825266e-05, "log_odds_chosen": 2.083221435546875, "log_odds_ratio": -0.5790278911590576, "logits/chosen": -0.4810032248497009, "logits/rejected": -0.5122133493423462, "logps/chosen": -0.12283414602279663, "logps/rejected": -0.42418205738067627, "loss": 3.5874, "nll_loss": 0.838951826095581, "rewards/accuracies": 0.625, "rewards/chosen": -0.012283414602279663, "rewards/margins": 0.030134791508316994, "rewards/rejected": -0.04241820424795151, "step": 2484 }, { "epoch": 1.7185338865836792, "grad_norm": 8.226753234863281, "learning_rate": 4.600814507453512e-05, "log_odds_chosen": 5.8126068115234375, "log_odds_ratio": -0.24563762545585632, "logits/chosen": -0.5683649182319641, "logits/rejected": -0.6123848557472229, "logps/chosen": -0.06213032454252243, "logps/rejected": -0.975424587726593, "loss": 2.6634, "nll_loss": 0.6412944197654724, "rewards/accuracies": 0.875, "rewards/chosen": -0.006213032640516758, "rewards/margins": 0.0913294330239296, "rewards/rejected": -0.09754246473312378, "step": 2485 }, { "epoch": 1.719225449515906, "grad_norm": 7.954338550567627, "learning_rate": 4.600430305824497e-05, "log_odds_chosen": 6.833853721618652, "log_odds_ratio": -0.2492285668849945, "logits/chosen": -0.7041445374488831, "logits/rejected": -0.7608736753463745, "logps/chosen": -0.04681987315416336, "logps/rejected": -1.284895658493042, "loss": 3.1293, "nll_loss": 0.7574116587638855, "rewards/accuracies": 0.875, "rewards/chosen": -0.004681987222284079, "rewards/margins": 0.12380757927894592, "rewards/rejected": -0.12848955392837524, "step": 2486 }, { "epoch": 1.7199170124481329, "grad_norm": 8.005608558654785, "learning_rate": 4.600046104195482e-05, "log_odds_chosen": 2.9669289588928223, "log_odds_ratio": -0.6262913942337036, "logits/chosen": -0.3596298396587372, "logits/rejected": -0.36272570490837097, "logps/chosen": -0.08726787567138672, "logps/rejected": -0.5723020434379578, "loss": 3.5328, "nll_loss": 0.8205739259719849, "rewards/accuracies": 0.75, "rewards/chosen": -0.008726788684725761, "rewards/margins": 0.048503417521715164, "rewards/rejected": -0.057230208069086075, "step": 2487 }, { "epoch": 1.7206085753803597, "grad_norm": 5.509665012359619, "learning_rate": 4.599661902566467e-05, "log_odds_chosen": 7.698426723480225, "log_odds_ratio": -0.005875317845493555, "logits/chosen": -0.17930477857589722, "logits/rejected": -0.21657007932662964, "logps/chosen": -0.004427487496286631, "logps/rejected": -1.1619611978530884, "loss": 2.2304, "nll_loss": 0.5570050477981567, "rewards/accuracies": 1.0, "rewards/chosen": -0.0004427487147040665, "rewards/margins": 0.11575337499380112, "rewards/rejected": -0.11619612574577332, "step": 2488 }, { "epoch": 1.7213001383125865, "grad_norm": 8.408278465270996, "learning_rate": 4.599277700937452e-05, "log_odds_chosen": 8.361098289489746, "log_odds_ratio": -0.0008353714365512133, "logits/chosen": -0.3038009703159332, "logits/rejected": -0.31657013297080994, "logps/chosen": -0.0015146147925406694, "logps/rejected": -1.2001842260360718, "loss": 2.9343, "nll_loss": 0.7334992289543152, "rewards/accuracies": 1.0, "rewards/chosen": -0.00015146148507483304, "rewards/margins": 0.11986696720123291, "rewards/rejected": -0.12001842260360718, "step": 2489 }, { "epoch": 1.7219917012448134, "grad_norm": 10.702224731445312, "learning_rate": 4.598893499308437e-05, "log_odds_chosen": 3.5512514114379883, "log_odds_ratio": -0.2796310782432556, "logits/chosen": -0.8009122610092163, "logits/rejected": -0.7680351734161377, "logps/chosen": -0.05254250392317772, "logps/rejected": -0.5955202579498291, "loss": 3.6625, "nll_loss": 0.8876738548278809, "rewards/accuracies": 0.875, "rewards/chosen": -0.005254250951111317, "rewards/margins": 0.054297782480716705, "rewards/rejected": -0.05955202877521515, "step": 2490 }, { "epoch": 1.7226832641770402, "grad_norm": 11.43749713897705, "learning_rate": 4.598509297679423e-05, "log_odds_chosen": 3.9742114543914795, "log_odds_ratio": -0.27042850852012634, "logits/chosen": -0.34033113718032837, "logits/rejected": -0.3668830990791321, "logps/chosen": -0.19744163751602173, "logps/rejected": -1.366867184638977, "loss": 4.315, "nll_loss": 1.0516953468322754, "rewards/accuracies": 0.75, "rewards/chosen": -0.019744165241718292, "rewards/margins": 0.11694254726171494, "rewards/rejected": -0.13668671250343323, "step": 2491 }, { "epoch": 1.723374827109267, "grad_norm": 7.133487224578857, "learning_rate": 4.598125096050407e-05, "log_odds_chosen": 6.378708839416504, "log_odds_ratio": -0.04591769725084305, "logits/chosen": 0.07625691592693329, "logits/rejected": 0.002835869789123535, "logps/chosen": -0.01164622139185667, "logps/rejected": -0.8535522222518921, "loss": 2.6138, "nll_loss": 0.6488522291183472, "rewards/accuracies": 1.0, "rewards/chosen": -0.001164622139185667, "rewards/margins": 0.08419059962034225, "rewards/rejected": -0.0853552296757698, "step": 2492 }, { "epoch": 1.7240663900414939, "grad_norm": 10.00739860534668, "learning_rate": 4.5977408944213925e-05, "log_odds_chosen": 3.1018543243408203, "log_odds_ratio": -0.6838514804840088, "logits/chosen": -0.28175613284111023, "logits/rejected": -0.3513212502002716, "logps/chosen": -0.12369559705257416, "logps/rejected": -0.625062108039856, "loss": 2.9139, "nll_loss": 0.6600803732872009, "rewards/accuracies": 0.5, "rewards/chosen": -0.012369560077786446, "rewards/margins": 0.05013664811849594, "rewards/rejected": -0.06250621378421783, "step": 2493 }, { "epoch": 1.7247579529737207, "grad_norm": 5.779770851135254, "learning_rate": 4.597356692792378e-05, "log_odds_chosen": 4.564273834228516, "log_odds_ratio": -0.040960900485515594, "logits/chosen": -0.4975500702857971, "logits/rejected": -0.49774548411369324, "logps/chosen": -0.01713930070400238, "logps/rejected": -0.5751754641532898, "loss": 3.0896, "nll_loss": 0.7683084011077881, "rewards/accuracies": 1.0, "rewards/chosen": -0.0017139300471171737, "rewards/margins": 0.05580361932516098, "rewards/rejected": -0.05751754716038704, "step": 2494 }, { "epoch": 1.7254495159059475, "grad_norm": 7.042632579803467, "learning_rate": 4.596972491163363e-05, "log_odds_chosen": 5.019662857055664, "log_odds_ratio": -0.14574462175369263, "logits/chosen": -0.11871343851089478, "logits/rejected": -0.1903182566165924, "logps/chosen": -0.037783216685056686, "logps/rejected": -0.5918722152709961, "loss": 2.5264, "nll_loss": 0.617037296295166, "rewards/accuracies": 0.875, "rewards/chosen": -0.003778322134166956, "rewards/margins": 0.05540889501571655, "rewards/rejected": -0.05918722227215767, "step": 2495 }, { "epoch": 1.7261410788381744, "grad_norm": 4.622135639190674, "learning_rate": 4.5965882895343475e-05, "log_odds_chosen": 4.588207721710205, "log_odds_ratio": -0.06134911999106407, "logits/chosen": -0.5902333855628967, "logits/rejected": -0.637090265750885, "logps/chosen": -0.03286483883857727, "logps/rejected": -0.8268821835517883, "loss": 3.1512, "nll_loss": 0.7816723585128784, "rewards/accuracies": 1.0, "rewards/chosen": -0.0032864839304238558, "rewards/margins": 0.07940173894166946, "rewards/rejected": -0.08268822729587555, "step": 2496 }, { "epoch": 1.7268326417704012, "grad_norm": 9.341205596923828, "learning_rate": 4.596204087905333e-05, "log_odds_chosen": 4.570016384124756, "log_odds_ratio": -0.15347853302955627, "logits/chosen": -0.23321914672851562, "logits/rejected": -0.3474777340888977, "logps/chosen": -0.043549127876758575, "logps/rejected": -0.8679786920547485, "loss": 3.9859, "nll_loss": 0.9811323285102844, "rewards/accuracies": 1.0, "rewards/chosen": -0.0043549127876758575, "rewards/margins": 0.08244295418262482, "rewards/rejected": -0.08679787069559097, "step": 2497 }, { "epoch": 1.727524204702628, "grad_norm": 7.280107021331787, "learning_rate": 4.595819886276318e-05, "log_odds_chosen": 3.2914509773254395, "log_odds_ratio": -0.07923942804336548, "logits/chosen": -0.5238845348358154, "logits/rejected": -0.6023181080818176, "logps/chosen": -0.06284703314304352, "logps/rejected": -0.7727693915367126, "loss": 2.7732, "nll_loss": 0.6853671669960022, "rewards/accuracies": 1.0, "rewards/chosen": -0.006284703500568867, "rewards/margins": 0.07099223881959915, "rewards/rejected": -0.07727694511413574, "step": 2498 }, { "epoch": 1.7282157676348548, "grad_norm": 9.751381874084473, "learning_rate": 4.5954356846473026e-05, "log_odds_chosen": 3.872626304626465, "log_odds_ratio": -0.16218523681163788, "logits/chosen": -0.5514325499534607, "logits/rejected": -0.5251030921936035, "logps/chosen": -0.07056479901075363, "logps/rejected": -0.63563072681427, "loss": 4.1649, "nll_loss": 1.0250036716461182, "rewards/accuracies": 0.875, "rewards/chosen": -0.007056479807943106, "rewards/margins": 0.05650658905506134, "rewards/rejected": -0.06356307119131088, "step": 2499 }, { "epoch": 1.7289073305670817, "grad_norm": 7.159670829772949, "learning_rate": 4.5950514830182885e-05, "log_odds_chosen": 5.2221269607543945, "log_odds_ratio": -0.12021451443433762, "logits/chosen": -0.3670402765274048, "logits/rejected": -0.470436155796051, "logps/chosen": -0.05295195057988167, "logps/rejected": -0.8274115324020386, "loss": 2.5272, "nll_loss": 0.6197685599327087, "rewards/accuracies": 0.875, "rewards/chosen": -0.005295194685459137, "rewards/margins": 0.07744596153497696, "rewards/rejected": -0.0827411562204361, "step": 2500 }, { "epoch": 1.7295988934993085, "grad_norm": 7.748027801513672, "learning_rate": 4.594667281389273e-05, "log_odds_chosen": 5.541880130767822, "log_odds_ratio": -0.20395153760910034, "logits/chosen": -0.43168240785598755, "logits/rejected": -0.49034783244132996, "logps/chosen": -0.05815047025680542, "logps/rejected": -1.1151851415634155, "loss": 2.8836, "nll_loss": 0.7005079388618469, "rewards/accuracies": 0.875, "rewards/chosen": -0.005815047305077314, "rewards/margins": 0.10570347309112549, "rewards/rejected": -0.11151853203773499, "step": 2501 }, { "epoch": 1.7302904564315353, "grad_norm": 7.447516918182373, "learning_rate": 4.594283079760258e-05, "log_odds_chosen": 4.158929824829102, "log_odds_ratio": -0.6623351573944092, "logits/chosen": -0.12878236174583435, "logits/rejected": -0.11322636157274246, "logps/chosen": -0.12923133373260498, "logps/rejected": -0.6508136987686157, "loss": 3.5702, "nll_loss": 0.8263086676597595, "rewards/accuracies": 0.625, "rewards/chosen": -0.012923132628202438, "rewards/margins": 0.05215824022889137, "rewards/rejected": -0.06508137285709381, "step": 2502 }, { "epoch": 1.7309820193637622, "grad_norm": 5.580257892608643, "learning_rate": 4.5938988781312436e-05, "log_odds_chosen": 5.174313068389893, "log_odds_ratio": -0.04148080572485924, "logits/chosen": -0.5327701568603516, "logits/rejected": -0.5527817606925964, "logps/chosen": -0.024058351293206215, "logps/rejected": -0.8554421663284302, "loss": 2.5435, "nll_loss": 0.6317334175109863, "rewards/accuracies": 1.0, "rewards/chosen": -0.0024058353155851364, "rewards/margins": 0.08313838392496109, "rewards/rejected": -0.08554422110319138, "step": 2503 }, { "epoch": 1.731673582295989, "grad_norm": 7.316047668457031, "learning_rate": 4.593514676502229e-05, "log_odds_chosen": 7.4302825927734375, "log_odds_ratio": -0.09521831572055817, "logits/chosen": -0.22343260049819946, "logits/rejected": -0.3161851167678833, "logps/chosen": -0.025817379355430603, "logps/rejected": -1.350869059562683, "loss": 2.7678, "nll_loss": 0.6824326515197754, "rewards/accuracies": 1.0, "rewards/chosen": -0.0025817379355430603, "rewards/margins": 0.1325051635503769, "rewards/rejected": -0.13508689403533936, "step": 2504 }, { "epoch": 1.7323651452282158, "grad_norm": 7.182354927062988, "learning_rate": 4.5931304748732134e-05, "log_odds_chosen": 5.076048851013184, "log_odds_ratio": -0.07221361249685287, "logits/chosen": -0.5134769678115845, "logits/rejected": -0.5864946246147156, "logps/chosen": -0.05353981629014015, "logps/rejected": -1.074465036392212, "loss": 3.9926, "nll_loss": 0.9909363985061646, "rewards/accuracies": 1.0, "rewards/chosen": -0.005353981629014015, "rewards/margins": 0.10209251940250397, "rewards/rejected": -0.10744651407003403, "step": 2505 }, { "epoch": 1.7330567081604427, "grad_norm": 11.882872581481934, "learning_rate": 4.5927462732441986e-05, "log_odds_chosen": 2.1057169437408447, "log_odds_ratio": -0.47601622343063354, "logits/chosen": -0.45321619510650635, "logits/rejected": -0.42948225140571594, "logps/chosen": -0.18323317170143127, "logps/rejected": -0.32210710644721985, "loss": 3.7983, "nll_loss": 0.9019668698310852, "rewards/accuracies": 0.75, "rewards/chosen": -0.018323317170143127, "rewards/margins": 0.013887394219636917, "rewards/rejected": -0.032210711389780045, "step": 2506 }, { "epoch": 1.7337482710926695, "grad_norm": 8.515796661376953, "learning_rate": 4.592362071615184e-05, "log_odds_chosen": 6.995229721069336, "log_odds_ratio": -0.05257797986268997, "logits/chosen": -0.40070289373397827, "logits/rejected": -0.44888609647750854, "logps/chosen": -0.015650387853384018, "logps/rejected": -1.2345138788223267, "loss": 2.4482, "nll_loss": 0.6067959666252136, "rewards/accuracies": 1.0, "rewards/chosen": -0.0015650388086214662, "rewards/margins": 0.12188635021448135, "rewards/rejected": -0.12345139682292938, "step": 2507 }, { "epoch": 1.7344398340248963, "grad_norm": 5.455097198486328, "learning_rate": 4.5919778699861684e-05, "log_odds_chosen": 5.574578285217285, "log_odds_ratio": -0.07917968928813934, "logits/chosen": -0.4635438323020935, "logits/rejected": -0.4907468855381012, "logps/chosen": -0.027772696688771248, "logps/rejected": -0.7599036693572998, "loss": 2.8449, "nll_loss": 0.7033053636550903, "rewards/accuracies": 1.0, "rewards/chosen": -0.002777269808575511, "rewards/margins": 0.07321310043334961, "rewards/rejected": -0.07599036395549774, "step": 2508 }, { "epoch": 1.7351313969571232, "grad_norm": 7.941450119018555, "learning_rate": 4.5915936683571544e-05, "log_odds_chosen": 5.4366536140441895, "log_odds_ratio": -0.15930068492889404, "logits/chosen": -0.21428066492080688, "logits/rejected": -0.3483664393424988, "logps/chosen": -0.03960276022553444, "logps/rejected": -1.0487326383590698, "loss": 3.0972, "nll_loss": 0.7583820223808289, "rewards/accuracies": 0.875, "rewards/chosen": -0.003960276022553444, "rewards/margins": 0.10091298818588257, "rewards/rejected": -0.10487326234579086, "step": 2509 }, { "epoch": 1.73582295988935, "grad_norm": 8.1426362991333, "learning_rate": 4.591209466728139e-05, "log_odds_chosen": 4.624601364135742, "log_odds_ratio": -0.059192001819610596, "logits/chosen": -0.5676755309104919, "logits/rejected": -0.576343834400177, "logps/chosen": -0.03207191824913025, "logps/rejected": -1.0185704231262207, "loss": 3.9412, "nll_loss": 0.9793733358383179, "rewards/accuracies": 1.0, "rewards/chosen": -0.003207192290574312, "rewards/margins": 0.09864984452724457, "rewards/rejected": -0.10185703635215759, "step": 2510 }, { "epoch": 1.7365145228215768, "grad_norm": 5.790066719055176, "learning_rate": 4.590825265099124e-05, "log_odds_chosen": 7.578642845153809, "log_odds_ratio": -0.057620033621788025, "logits/chosen": -0.5006792545318604, "logits/rejected": -0.5755428671836853, "logps/chosen": -0.013461096212267876, "logps/rejected": -1.1927460432052612, "loss": 2.1783, "nll_loss": 0.5388160943984985, "rewards/accuracies": 1.0, "rewards/chosen": -0.0013461096677929163, "rewards/margins": 0.11792849004268646, "rewards/rejected": -0.11927460134029388, "step": 2511 }, { "epoch": 1.7372060857538036, "grad_norm": 8.553199768066406, "learning_rate": 4.5904410634701094e-05, "log_odds_chosen": 5.309554100036621, "log_odds_ratio": -0.22256025671958923, "logits/chosen": -0.4385831356048584, "logits/rejected": -0.5174022316932678, "logps/chosen": -0.05707190930843353, "logps/rejected": -0.7726631164550781, "loss": 3.2445, "nll_loss": 0.7888723611831665, "rewards/accuracies": 0.875, "rewards/chosen": -0.005707190837711096, "rewards/margins": 0.0715591236948967, "rewards/rejected": -0.07726631313562393, "step": 2512 }, { "epoch": 1.7378976486860305, "grad_norm": 9.486678123474121, "learning_rate": 4.5900568618410947e-05, "log_odds_chosen": 3.2149507999420166, "log_odds_ratio": -0.8374648094177246, "logits/chosen": -0.6298545598983765, "logits/rejected": -0.64149010181427, "logps/chosen": -0.07850378006696701, "logps/rejected": -0.8460255861282349, "loss": 3.993, "nll_loss": 0.9144986867904663, "rewards/accuracies": 0.875, "rewards/chosen": -0.007850377820432186, "rewards/margins": 0.0767521783709526, "rewards/rejected": -0.08460256457328796, "step": 2513 }, { "epoch": 1.7385892116182573, "grad_norm": 8.289212226867676, "learning_rate": 4.589672660212079e-05, "log_odds_chosen": 4.248414039611816, "log_odds_ratio": -0.08790218830108643, "logits/chosen": -0.052254606038331985, "logits/rejected": -0.05429219827055931, "logps/chosen": -0.09396925568580627, "logps/rejected": -0.9986541867256165, "loss": 2.8122, "nll_loss": 0.6942670941352844, "rewards/accuracies": 1.0, "rewards/chosen": -0.009396926499903202, "rewards/margins": 0.09046850353479385, "rewards/rejected": -0.09986542910337448, "step": 2514 }, { "epoch": 1.7392807745504841, "grad_norm": 12.42770767211914, "learning_rate": 4.5892884585830645e-05, "log_odds_chosen": 1.7273861169815063, "log_odds_ratio": -0.34406787157058716, "logits/chosen": -0.6730232238769531, "logits/rejected": -0.6579403877258301, "logps/chosen": -0.07420868426561356, "logps/rejected": -0.4276873767375946, "loss": 5.5056, "nll_loss": 1.341995120048523, "rewards/accuracies": 0.875, "rewards/chosen": -0.0074208686128258705, "rewards/margins": 0.035347871482372284, "rewards/rejected": -0.04276873916387558, "step": 2515 }, { "epoch": 1.739972337482711, "grad_norm": 16.12079429626465, "learning_rate": 4.58890425695405e-05, "log_odds_chosen": 2.7042465209960938, "log_odds_ratio": -0.30173051357269287, "logits/chosen": -0.5809437036514282, "logits/rejected": -0.6290631890296936, "logps/chosen": -0.14772537350654602, "logps/rejected": -0.7955739498138428, "loss": 3.8072, "nll_loss": 0.9216184616088867, "rewards/accuracies": 0.75, "rewards/chosen": -0.014772538095712662, "rewards/margins": 0.06478486210107803, "rewards/rejected": -0.0795573964715004, "step": 2516 }, { "epoch": 1.7406639004149378, "grad_norm": 11.333333015441895, "learning_rate": 4.588520055325034e-05, "log_odds_chosen": 4.032566547393799, "log_odds_ratio": -0.5613663196563721, "logits/chosen": -0.30087190866470337, "logits/rejected": -0.3377087116241455, "logps/chosen": -0.09402960538864136, "logps/rejected": -1.0039438009262085, "loss": 3.3515, "nll_loss": 0.7817503213882446, "rewards/accuracies": 0.875, "rewards/chosen": -0.009402960538864136, "rewards/margins": 0.09099142253398895, "rewards/rejected": -0.10039438307285309, "step": 2517 }, { "epoch": 1.7413554633471646, "grad_norm": 8.307899475097656, "learning_rate": 4.58813585369602e-05, "log_odds_chosen": 5.074389934539795, "log_odds_ratio": -0.31445711851119995, "logits/chosen": -0.6631494760513306, "logits/rejected": -0.6819051504135132, "logps/chosen": -0.0452355220913887, "logps/rejected": -0.805597186088562, "loss": 3.2989, "nll_loss": 0.7932692766189575, "rewards/accuracies": 0.875, "rewards/chosen": -0.004523552488535643, "rewards/margins": 0.07603617012500763, "rewards/rejected": -0.08055973052978516, "step": 2518 }, { "epoch": 1.7420470262793915, "grad_norm": 8.067875862121582, "learning_rate": 4.587751652067005e-05, "log_odds_chosen": 5.05952787399292, "log_odds_ratio": -0.0641229972243309, "logits/chosen": -0.5828802585601807, "logits/rejected": -0.6675068140029907, "logps/chosen": -0.041139908134937286, "logps/rejected": -0.7485602498054504, "loss": 3.6923, "nll_loss": 0.9166650772094727, "rewards/accuracies": 1.0, "rewards/chosen": -0.004113990347832441, "rewards/margins": 0.07074204087257385, "rewards/rejected": -0.07485602796077728, "step": 2519 }, { "epoch": 1.7427385892116183, "grad_norm": 7.006168842315674, "learning_rate": 4.58736745043799e-05, "log_odds_chosen": 5.627865791320801, "log_odds_ratio": -0.30353936553001404, "logits/chosen": -0.42846840620040894, "logits/rejected": -0.4686059355735779, "logps/chosen": -0.07304301857948303, "logps/rejected": -1.1873046159744263, "loss": 3.7581, "nll_loss": 0.9091646075248718, "rewards/accuracies": 0.875, "rewards/chosen": -0.007304301485419273, "rewards/margins": 0.11142615973949432, "rewards/rejected": -0.11873047053813934, "step": 2520 }, { "epoch": 1.7434301521438451, "grad_norm": 7.399003982543945, "learning_rate": 4.586983248808975e-05, "log_odds_chosen": 5.614686012268066, "log_odds_ratio": -0.025744276121258736, "logits/chosen": -0.40345659852027893, "logits/rejected": -0.4784991145133972, "logps/chosen": -0.033800702542066574, "logps/rejected": -0.9102199077606201, "loss": 2.9014, "nll_loss": 0.7227640151977539, "rewards/accuracies": 1.0, "rewards/chosen": -0.003380069974809885, "rewards/margins": 0.08764191716909409, "rewards/rejected": -0.09102199226617813, "step": 2521 }, { "epoch": 1.744121715076072, "grad_norm": 6.353837490081787, "learning_rate": 4.5865990471799605e-05, "log_odds_chosen": 5.061413764953613, "log_odds_ratio": -0.17814365029335022, "logits/chosen": -0.30245551466941833, "logits/rejected": -0.31398245692253113, "logps/chosen": -0.08056647330522537, "logps/rejected": -0.700476348400116, "loss": 2.9955, "nll_loss": 0.7310599684715271, "rewards/accuracies": 0.875, "rewards/chosen": -0.008056647144258022, "rewards/margins": 0.061990994960069656, "rewards/rejected": -0.07004763931035995, "step": 2522 }, { "epoch": 1.7448132780082988, "grad_norm": 5.448588848114014, "learning_rate": 4.586214845550945e-05, "log_odds_chosen": 4.95775842666626, "log_odds_ratio": -0.16451671719551086, "logits/chosen": -0.613614559173584, "logits/rejected": -0.6826783418655396, "logps/chosen": -0.0742364451289177, "logps/rejected": -0.9489127397537231, "loss": 2.8164, "nll_loss": 0.6876364946365356, "rewards/accuracies": 0.875, "rewards/chosen": -0.007423644419759512, "rewards/margins": 0.08746762573719025, "rewards/rejected": -0.0948912650346756, "step": 2523 }, { "epoch": 1.7455048409405256, "grad_norm": 9.476053237915039, "learning_rate": 4.58583064392193e-05, "log_odds_chosen": 3.0854625701904297, "log_odds_ratio": -0.46415698528289795, "logits/chosen": -0.7995980978012085, "logits/rejected": -0.78324955701828, "logps/chosen": -0.057111553847789764, "logps/rejected": -0.5348659753799438, "loss": 3.6078, "nll_loss": 0.8555430769920349, "rewards/accuracies": 0.875, "rewards/chosen": -0.005711155943572521, "rewards/margins": 0.047775447368621826, "rewards/rejected": -0.053486600518226624, "step": 2524 }, { "epoch": 1.7461964038727524, "grad_norm": 6.046580791473389, "learning_rate": 4.5854464422929156e-05, "log_odds_chosen": 5.830511569976807, "log_odds_ratio": -0.14367660880088806, "logits/chosen": -0.31898587942123413, "logits/rejected": -0.33586829900741577, "logps/chosen": -0.07724668085575104, "logps/rejected": -0.6755509376525879, "loss": 2.7089, "nll_loss": 0.6628577709197998, "rewards/accuracies": 1.0, "rewards/chosen": -0.007724667899310589, "rewards/margins": 0.059830423444509506, "rewards/rejected": -0.06755509227514267, "step": 2525 }, { "epoch": 1.7468879668049793, "grad_norm": 6.565101146697998, "learning_rate": 4.5850622406639e-05, "log_odds_chosen": 5.3831467628479, "log_odds_ratio": -0.08913850039243698, "logits/chosen": -0.5596224069595337, "logits/rejected": -0.6060592532157898, "logps/chosen": -0.054114218801259995, "logps/rejected": -0.911675751209259, "loss": 3.3401, "nll_loss": 0.8261107802391052, "rewards/accuracies": 1.0, "rewards/chosen": -0.0054114218801259995, "rewards/margins": 0.08575616031885147, "rewards/rejected": -0.09116758406162262, "step": 2526 }, { "epoch": 1.747579529737206, "grad_norm": 10.230347633361816, "learning_rate": 4.584678039034886e-05, "log_odds_chosen": 5.845252990722656, "log_odds_ratio": -0.1744435578584671, "logits/chosen": -0.4496798515319824, "logits/rejected": -0.5602766275405884, "logps/chosen": -0.27355867624282837, "logps/rejected": -1.4243483543395996, "loss": 2.8279, "nll_loss": 0.6895267963409424, "rewards/accuracies": 0.875, "rewards/chosen": -0.027355868369340897, "rewards/margins": 0.11507895588874817, "rewards/rejected": -0.14243483543395996, "step": 2527 }, { "epoch": 1.748271092669433, "grad_norm": 7.345275402069092, "learning_rate": 4.5842938374058706e-05, "log_odds_chosen": 4.958152770996094, "log_odds_ratio": -0.09105677157640457, "logits/chosen": -0.2806224822998047, "logits/rejected": -0.31392592191696167, "logps/chosen": -0.014766628853976727, "logps/rejected": -0.577477753162384, "loss": 3.9083, "nll_loss": 0.9679797887802124, "rewards/accuracies": 1.0, "rewards/chosen": -0.0014766629319638014, "rewards/margins": 0.05627111345529556, "rewards/rejected": -0.057747773826122284, "step": 2528 }, { "epoch": 1.7489626556016598, "grad_norm": 4.975615978240967, "learning_rate": 4.583909635776856e-05, "log_odds_chosen": 6.15440559387207, "log_odds_ratio": -0.06456664204597473, "logits/chosen": -0.2691290080547333, "logits/rejected": -0.3199772238731384, "logps/chosen": -0.040304284542798996, "logps/rejected": -0.9820000529289246, "loss": 2.0233, "nll_loss": 0.4993680417537689, "rewards/accuracies": 1.0, "rewards/chosen": -0.004030428361147642, "rewards/margins": 0.09416957199573517, "rewards/rejected": -0.0982000008225441, "step": 2529 }, { "epoch": 1.7496542185338866, "grad_norm": 9.393540382385254, "learning_rate": 4.583525434147841e-05, "log_odds_chosen": 1.6259431838989258, "log_odds_ratio": -0.40542522072792053, "logits/chosen": -0.5042514801025391, "logits/rejected": -0.48449617624282837, "logps/chosen": -0.12315410375595093, "logps/rejected": -0.5425556302070618, "loss": 3.9706, "nll_loss": 0.9521090984344482, "rewards/accuracies": 0.75, "rewards/chosen": -0.012315411120653152, "rewards/margins": 0.041940152645111084, "rewards/rejected": -0.054255563765764236, "step": 2530 }, { "epoch": 1.7503457814661134, "grad_norm": 5.123023509979248, "learning_rate": 4.5831412325188263e-05, "log_odds_chosen": 3.471602439880371, "log_odds_ratio": -0.13706140220165253, "logits/chosen": -0.40993914008140564, "logits/rejected": -0.44549131393432617, "logps/chosen": -0.058320820331573486, "logps/rejected": -0.594856858253479, "loss": 2.147, "nll_loss": 0.5230435729026794, "rewards/accuracies": 1.0, "rewards/chosen": -0.005832082126289606, "rewards/margins": 0.05365360528230667, "rewards/rejected": -0.05948568880558014, "step": 2531 }, { "epoch": 1.7510373443983402, "grad_norm": 3.4983346462249756, "learning_rate": 4.582757030889811e-05, "log_odds_chosen": 5.438455581665039, "log_odds_ratio": -0.012369800359010696, "logits/chosen": -0.14914925396442413, "logits/rejected": -0.17695298790931702, "logps/chosen": -0.03334447368979454, "logps/rejected": -0.9888067841529846, "loss": 2.7649, "nll_loss": 0.6899773478507996, "rewards/accuracies": 1.0, "rewards/chosen": -0.0033344475086778402, "rewards/margins": 0.09554623067378998, "rewards/rejected": -0.09888067841529846, "step": 2532 }, { "epoch": 1.751728907330567, "grad_norm": 6.337129592895508, "learning_rate": 4.582372829260796e-05, "log_odds_chosen": 5.5760579109191895, "log_odds_ratio": -0.06427756696939468, "logits/chosen": -0.48593372106552124, "logits/rejected": -0.5026462078094482, "logps/chosen": -0.06390149146318436, "logps/rejected": -1.2482969760894775, "loss": 3.1869, "nll_loss": 0.7902973890304565, "rewards/accuracies": 1.0, "rewards/chosen": -0.0063901497051119804, "rewards/margins": 0.11843954771757126, "rewards/rejected": -0.12482969462871552, "step": 2533 }, { "epoch": 1.752420470262794, "grad_norm": 6.590423107147217, "learning_rate": 4.5819886276317814e-05, "log_odds_chosen": 3.642829656600952, "log_odds_ratio": -0.17655974626541138, "logits/chosen": -0.39275485277175903, "logits/rejected": -0.4510759115219116, "logps/chosen": -0.0891493633389473, "logps/rejected": -0.8172488212585449, "loss": 3.1116, "nll_loss": 0.7602559924125671, "rewards/accuracies": 1.0, "rewards/chosen": -0.00891493633389473, "rewards/margins": 0.07280994206666946, "rewards/rejected": -0.08172488212585449, "step": 2534 }, { "epoch": 1.7531120331950207, "grad_norm": 4.0732340812683105, "learning_rate": 4.581604426002766e-05, "log_odds_chosen": 4.978409290313721, "log_odds_ratio": -0.1693851202726364, "logits/chosen": -0.43896347284317017, "logits/rejected": -0.5271863341331482, "logps/chosen": -0.040723707526922226, "logps/rejected": -0.7022296190261841, "loss": 2.0599, "nll_loss": 0.49802640080451965, "rewards/accuracies": 0.875, "rewards/chosen": -0.00407237047329545, "rewards/margins": 0.06615059077739716, "rewards/rejected": -0.07022295892238617, "step": 2535 }, { "epoch": 1.7538035961272476, "grad_norm": 6.722652435302734, "learning_rate": 4.581220224373752e-05, "log_odds_chosen": 6.762175559997559, "log_odds_ratio": -0.013489147648215294, "logits/chosen": -0.6883155703544617, "logits/rejected": -0.6428356766700745, "logps/chosen": -0.009356812573969364, "logps/rejected": -0.9700776934623718, "loss": 2.998, "nll_loss": 0.7481532692909241, "rewards/accuracies": 1.0, "rewards/chosen": -0.0009356812224723399, "rewards/margins": 0.09607208520174026, "rewards/rejected": -0.09700776636600494, "step": 2536 }, { "epoch": 1.7544951590594744, "grad_norm": 4.753267765045166, "learning_rate": 4.5808360227447365e-05, "log_odds_chosen": 4.396501064300537, "log_odds_ratio": -0.0500371977686882, "logits/chosen": -0.6106992959976196, "logits/rejected": -0.6349448561668396, "logps/chosen": -0.04509191960096359, "logps/rejected": -0.767433226108551, "loss": 2.9094, "nll_loss": 0.7223502993583679, "rewards/accuracies": 1.0, "rewards/chosen": -0.004509191960096359, "rewards/margins": 0.0722341313958168, "rewards/rejected": -0.07674331963062286, "step": 2537 }, { "epoch": 1.7551867219917012, "grad_norm": 9.169681549072266, "learning_rate": 4.580451821115722e-05, "log_odds_chosen": 6.110998630523682, "log_odds_ratio": -0.03990146890282631, "logits/chosen": -0.3362918794155121, "logits/rejected": -0.43705520033836365, "logps/chosen": -0.014098139479756355, "logps/rejected": -1.3306076526641846, "loss": 3.4644, "nll_loss": 0.8621118068695068, "rewards/accuracies": 1.0, "rewards/chosen": -0.0014098139945417643, "rewards/margins": 0.13165093958377838, "rewards/rejected": -0.1330607533454895, "step": 2538 }, { "epoch": 1.755878284923928, "grad_norm": 7.079245567321777, "learning_rate": 4.580067619486707e-05, "log_odds_chosen": 5.740993499755859, "log_odds_ratio": -0.05639209598302841, "logits/chosen": -0.6401171684265137, "logits/rejected": -0.6803783178329468, "logps/chosen": -0.026444023475050926, "logps/rejected": -1.019196629524231, "loss": 2.425, "nll_loss": 0.6005994081497192, "rewards/accuracies": 1.0, "rewards/chosen": -0.00264440244063735, "rewards/margins": 0.09927526116371155, "rewards/rejected": -0.10191966593265533, "step": 2539 }, { "epoch": 1.7565698478561549, "grad_norm": 9.348685264587402, "learning_rate": 4.579683417857692e-05, "log_odds_chosen": 5.047703742980957, "log_odds_ratio": -0.12630712985992432, "logits/chosen": -0.5589942336082458, "logits/rejected": -0.6259468197822571, "logps/chosen": -0.06140504777431488, "logps/rejected": -0.9298676252365112, "loss": 3.1253, "nll_loss": 0.7687047123908997, "rewards/accuracies": 1.0, "rewards/chosen": -0.006140504498034716, "rewards/margins": 0.0868462473154068, "rewards/rejected": -0.09298676252365112, "step": 2540 }, { "epoch": 1.7572614107883817, "grad_norm": 7.808840751647949, "learning_rate": 4.579299216228677e-05, "log_odds_chosen": 6.712955474853516, "log_odds_ratio": -0.028888585045933723, "logits/chosen": -0.5721656680107117, "logits/rejected": -0.6266698837280273, "logps/chosen": -0.01065114140510559, "logps/rejected": -0.8054331541061401, "loss": 4.1854, "nll_loss": 1.043459415435791, "rewards/accuracies": 1.0, "rewards/chosen": -0.0010651140473783016, "rewards/margins": 0.07947821170091629, "rewards/rejected": -0.08054331690073013, "step": 2541 }, { "epoch": 1.7579529737206085, "grad_norm": 5.14798641204834, "learning_rate": 4.578915014599662e-05, "log_odds_chosen": 7.6750383377075195, "log_odds_ratio": -0.011463024653494358, "logits/chosen": -0.6050993204116821, "logits/rejected": -0.5772296786308289, "logps/chosen": -0.009099138900637627, "logps/rejected": -0.947097659111023, "loss": 2.5864, "nll_loss": 0.6454551219940186, "rewards/accuracies": 1.0, "rewards/chosen": -0.0009099137969315052, "rewards/margins": 0.09379984438419342, "rewards/rejected": -0.09470976144075394, "step": 2542 }, { "epoch": 1.7586445366528354, "grad_norm": 6.0344367027282715, "learning_rate": 4.578530812970647e-05, "log_odds_chosen": 3.80947208404541, "log_odds_ratio": -0.2249820977449417, "logits/chosen": -0.905397891998291, "logits/rejected": -0.8978962898254395, "logps/chosen": -0.06969407200813293, "logps/rejected": -0.701169490814209, "loss": 3.0263, "nll_loss": 0.7340745329856873, "rewards/accuracies": 0.875, "rewards/chosen": -0.0069694072008132935, "rewards/margins": 0.06314754486083984, "rewards/rejected": -0.07011695206165314, "step": 2543 }, { "epoch": 1.7593360995850622, "grad_norm": 8.386651992797852, "learning_rate": 4.578146611341632e-05, "log_odds_chosen": 5.489584445953369, "log_odds_ratio": -0.10720404982566833, "logits/chosen": -0.622559666633606, "logits/rejected": -0.6387258172035217, "logps/chosen": -0.04162842780351639, "logps/rejected": -0.9262433052062988, "loss": 3.3995, "nll_loss": 0.8391590118408203, "rewards/accuracies": 1.0, "rewards/chosen": -0.004162842407822609, "rewards/margins": 0.0884614959359169, "rewards/rejected": -0.09262434393167496, "step": 2544 }, { "epoch": 1.760027662517289, "grad_norm": 10.090507507324219, "learning_rate": 4.577762409712618e-05, "log_odds_chosen": 4.450800895690918, "log_odds_ratio": -0.10913616418838501, "logits/chosen": -0.5928056240081787, "logits/rejected": -0.5783717632293701, "logps/chosen": -0.079688660800457, "logps/rejected": -1.0320922136306763, "loss": 3.6144, "nll_loss": 0.8926970362663269, "rewards/accuracies": 1.0, "rewards/chosen": -0.00796886719763279, "rewards/margins": 0.09524035453796387, "rewards/rejected": -0.10320921242237091, "step": 2545 }, { "epoch": 1.7607192254495159, "grad_norm": 5.601189613342285, "learning_rate": 4.577378208083602e-05, "log_odds_chosen": 6.303149223327637, "log_odds_ratio": -0.05890952795743942, "logits/chosen": -0.41871675848960876, "logits/rejected": -0.41857463121414185, "logps/chosen": -0.049030303955078125, "logps/rejected": -1.0410717725753784, "loss": 3.3528, "nll_loss": 0.8323108553886414, "rewards/accuracies": 1.0, "rewards/chosen": -0.004903030581772327, "rewards/margins": 0.0992041528224945, "rewards/rejected": -0.10410717874765396, "step": 2546 }, { "epoch": 1.7614107883817427, "grad_norm": 5.82822322845459, "learning_rate": 4.5769940064545875e-05, "log_odds_chosen": 8.01188850402832, "log_odds_ratio": -0.0016340998699888587, "logits/chosen": -0.23192644119262695, "logits/rejected": -0.22401806712150574, "logps/chosen": -0.002971423789858818, "logps/rejected": -1.180034875869751, "loss": 2.8009, "nll_loss": 0.7000521421432495, "rewards/accuracies": 1.0, "rewards/chosen": -0.0002971423964481801, "rewards/margins": 0.11770634353160858, "rewards/rejected": -0.1180034801363945, "step": 2547 }, { "epoch": 1.7621023513139695, "grad_norm": 4.945810794830322, "learning_rate": 4.576609804825573e-05, "log_odds_chosen": 4.382489204406738, "log_odds_ratio": -0.14788955450057983, "logits/chosen": -0.3996325731277466, "logits/rejected": -0.4017907381057739, "logps/chosen": -0.04582387953996658, "logps/rejected": -0.8064547181129456, "loss": 2.4554, "nll_loss": 0.5990639328956604, "rewards/accuracies": 0.875, "rewards/chosen": -0.004582387860864401, "rewards/margins": 0.07606308907270432, "rewards/rejected": -0.08064547926187515, "step": 2548 }, { "epoch": 1.7627939142461964, "grad_norm": 3.50301456451416, "learning_rate": 4.576225603196558e-05, "log_odds_chosen": 5.088458061218262, "log_odds_ratio": -0.2111215889453888, "logits/chosen": -0.3392675518989563, "logits/rejected": -0.3433917164802551, "logps/chosen": -0.06524307280778885, "logps/rejected": -0.6364519596099854, "loss": 1.7787, "nll_loss": 0.42355847358703613, "rewards/accuracies": 0.875, "rewards/chosen": -0.006524307653307915, "rewards/margins": 0.05712088569998741, "rewards/rejected": -0.06364519149065018, "step": 2549 }, { "epoch": 1.7634854771784232, "grad_norm": 8.223228454589844, "learning_rate": 4.5758414015675426e-05, "log_odds_chosen": 3.8364288806915283, "log_odds_ratio": -0.1705276072025299, "logits/chosen": -0.4964444637298584, "logits/rejected": -0.5628746151924133, "logps/chosen": -0.05881989747285843, "logps/rejected": -0.6852390766143799, "loss": 1.9634, "nll_loss": 0.47379088401794434, "rewards/accuracies": 0.875, "rewards/chosen": -0.0058819898404181, "rewards/margins": 0.06264191120862961, "rewards/rejected": -0.06852389872074127, "step": 2550 }, { "epoch": 1.76417704011065, "grad_norm": 5.2325215339660645, "learning_rate": 4.575457199938528e-05, "log_odds_chosen": 7.47840690612793, "log_odds_ratio": -0.005898052826523781, "logits/chosen": -0.521270215511322, "logits/rejected": -0.58015376329422, "logps/chosen": -0.002618763130158186, "logps/rejected": -0.877556324005127, "loss": 2.8498, "nll_loss": 0.7118585109710693, "rewards/accuracies": 1.0, "rewards/chosen": -0.0002618762955535203, "rewards/margins": 0.08749376237392426, "rewards/rejected": -0.08775563538074493, "step": 2551 }, { "epoch": 1.7648686030428768, "grad_norm": 8.80147933959961, "learning_rate": 4.575072998309513e-05, "log_odds_chosen": 4.112476348876953, "log_odds_ratio": -0.14841890335083008, "logits/chosen": -0.16200856864452362, "logits/rejected": -0.18064048886299133, "logps/chosen": -0.10202540457248688, "logps/rejected": -1.109443187713623, "loss": 3.2059, "nll_loss": 0.7866219282150269, "rewards/accuracies": 1.0, "rewards/chosen": -0.010202541016042233, "rewards/margins": 0.10074177384376526, "rewards/rejected": -0.11094431579113007, "step": 2552 }, { "epoch": 1.7655601659751037, "grad_norm": 8.807273864746094, "learning_rate": 4.5746887966804977e-05, "log_odds_chosen": 4.388718605041504, "log_odds_ratio": -0.15505658090114594, "logits/chosen": -0.5256962776184082, "logits/rejected": -0.5840314030647278, "logps/chosen": -0.056036874651908875, "logps/rejected": -1.0513105392456055, "loss": 3.4775, "nll_loss": 0.8538665771484375, "rewards/accuracies": 0.875, "rewards/chosen": -0.00560368737205863, "rewards/margins": 0.09952735900878906, "rewards/rejected": -0.10513104498386383, "step": 2553 }, { "epoch": 1.7662517289073305, "grad_norm": 8.24011516571045, "learning_rate": 4.5743045950514836e-05, "log_odds_chosen": 4.820762634277344, "log_odds_ratio": -0.1034751906991005, "logits/chosen": -0.4720611870288849, "logits/rejected": -0.4762382209300995, "logps/chosen": -0.043905775994062424, "logps/rejected": -0.965567946434021, "loss": 3.8933, "nll_loss": 0.9629859328269958, "rewards/accuracies": 1.0, "rewards/chosen": -0.004390577785670757, "rewards/margins": 0.0921662226319313, "rewards/rejected": -0.09655679017305374, "step": 2554 }, { "epoch": 1.7669432918395573, "grad_norm": 7.611776351928711, "learning_rate": 4.573920393422468e-05, "log_odds_chosen": 3.958975315093994, "log_odds_ratio": -0.13509047031402588, "logits/chosen": -0.35202181339263916, "logits/rejected": -0.38554438948631287, "logps/chosen": -0.06110473349690437, "logps/rejected": -0.9542187452316284, "loss": 2.7015, "nll_loss": 0.6618557572364807, "rewards/accuracies": 1.0, "rewards/chosen": -0.006110473535954952, "rewards/margins": 0.0893113911151886, "rewards/rejected": -0.09542188048362732, "step": 2555 }, { "epoch": 1.7676348547717842, "grad_norm": 8.181086540222168, "learning_rate": 4.5735361917934534e-05, "log_odds_chosen": 3.602811813354492, "log_odds_ratio": -0.40293049812316895, "logits/chosen": -0.6601470112800598, "logits/rejected": -0.6676779389381409, "logps/chosen": -0.11564220488071442, "logps/rejected": -1.2787232398986816, "loss": 3.4561, "nll_loss": 0.8237354755401611, "rewards/accuracies": 0.875, "rewards/chosen": -0.011564221233129501, "rewards/margins": 0.11630810797214508, "rewards/rejected": -0.12787233293056488, "step": 2556 }, { "epoch": 1.768326417704011, "grad_norm": 8.936256408691406, "learning_rate": 4.5731519901644386e-05, "log_odds_chosen": 2.3760743141174316, "log_odds_ratio": -0.2754179537296295, "logits/chosen": -0.9153653979301453, "logits/rejected": -0.9587001800537109, "logps/chosen": -0.16239596903324127, "logps/rejected": -0.5563870072364807, "loss": 4.001, "nll_loss": 0.9727064371109009, "rewards/accuracies": 0.875, "rewards/chosen": -0.016239596530795097, "rewards/margins": 0.039399102330207825, "rewards/rejected": -0.05563870072364807, "step": 2557 }, { "epoch": 1.7690179806362378, "grad_norm": 8.981245994567871, "learning_rate": 4.572767788535424e-05, "log_odds_chosen": 3.714677095413208, "log_odds_ratio": -0.2708074450492859, "logits/chosen": -0.3442336618900299, "logits/rejected": -0.4027746021747589, "logps/chosen": -0.065810926258564, "logps/rejected": -0.5971598029136658, "loss": 3.5891, "nll_loss": 0.8701846599578857, "rewards/accuracies": 0.875, "rewards/chosen": -0.006581092718988657, "rewards/margins": 0.05313488841056824, "rewards/rejected": -0.059715982526540756, "step": 2558 }, { "epoch": 1.7697095435684647, "grad_norm": 6.440550327301025, "learning_rate": 4.5723835869064084e-05, "log_odds_chosen": 4.327930450439453, "log_odds_ratio": -0.08816304057836533, "logits/chosen": -0.5006344318389893, "logits/rejected": -0.5131029486656189, "logps/chosen": -0.06322955340147018, "logps/rejected": -0.884222149848938, "loss": 3.8265, "nll_loss": 0.9477996826171875, "rewards/accuracies": 1.0, "rewards/chosen": -0.006322954781353474, "rewards/margins": 0.08209925144910812, "rewards/rejected": -0.08842220902442932, "step": 2559 }, { "epoch": 1.7704011065006915, "grad_norm": 13.864747047424316, "learning_rate": 4.571999385277394e-05, "log_odds_chosen": 4.08857536315918, "log_odds_ratio": -0.33679530024528503, "logits/chosen": -0.5055022239685059, "logits/rejected": -0.5199083089828491, "logps/chosen": -0.045048221945762634, "logps/rejected": -0.4534846246242523, "loss": 3.9027, "nll_loss": 0.9419905543327332, "rewards/accuracies": 0.75, "rewards/chosen": -0.004504822660237551, "rewards/margins": 0.04084364324808121, "rewards/rejected": -0.04534846544265747, "step": 2560 }, { "epoch": 1.7710926694329183, "grad_norm": 8.000943183898926, "learning_rate": 4.571615183648379e-05, "log_odds_chosen": 3.79542875289917, "log_odds_ratio": -0.6328214406967163, "logits/chosen": -0.49562186002731323, "logits/rejected": -0.5720283389091492, "logps/chosen": -0.22840096056461334, "logps/rejected": -0.8777717351913452, "loss": 2.6406, "nll_loss": 0.5968554019927979, "rewards/accuracies": 0.625, "rewards/chosen": -0.022840095683932304, "rewards/margins": 0.06493708491325378, "rewards/rejected": -0.08777718245983124, "step": 2561 }, { "epoch": 1.7717842323651452, "grad_norm": 4.928500175476074, "learning_rate": 4.571230982019364e-05, "log_odds_chosen": 6.205010890960693, "log_odds_ratio": -0.03770516440272331, "logits/chosen": -0.5165535807609558, "logits/rejected": -0.5722185969352722, "logps/chosen": -0.04437845200300217, "logps/rejected": -1.2871813774108887, "loss": 2.2019, "nll_loss": 0.5466949343681335, "rewards/accuracies": 1.0, "rewards/chosen": -0.004437845200300217, "rewards/margins": 0.12428028881549835, "rewards/rejected": -0.12871812283992767, "step": 2562 }, { "epoch": 1.772475795297372, "grad_norm": 6.937126159667969, "learning_rate": 4.5708467803903494e-05, "log_odds_chosen": 5.473960876464844, "log_odds_ratio": -0.07916474342346191, "logits/chosen": -0.4659070372581482, "logits/rejected": -0.5119041800498962, "logps/chosen": -0.01686052419245243, "logps/rejected": -0.6619855165481567, "loss": 3.0094, "nll_loss": 0.7444390654563904, "rewards/accuracies": 1.0, "rewards/chosen": -0.001686052419245243, "rewards/margins": 0.06451250612735748, "rewards/rejected": -0.06619855761528015, "step": 2563 }, { "epoch": 1.7731673582295988, "grad_norm": 5.78624963760376, "learning_rate": 4.570462578761334e-05, "log_odds_chosen": 5.182653903961182, "log_odds_ratio": -0.04307662695646286, "logits/chosen": -0.3608715832233429, "logits/rejected": -0.4103313386440277, "logps/chosen": -0.05554118752479553, "logps/rejected": -1.0058197975158691, "loss": 2.8295, "nll_loss": 0.7030580043792725, "rewards/accuracies": 1.0, "rewards/chosen": -0.005554119125008583, "rewards/margins": 0.0950278639793396, "rewards/rejected": -0.10058198869228363, "step": 2564 }, { "epoch": 1.7738589211618256, "grad_norm": 6.86865234375, "learning_rate": 4.570078377132319e-05, "log_odds_chosen": 6.726229667663574, "log_odds_ratio": -0.0850033387541771, "logits/chosen": -0.3425213694572449, "logits/rejected": -0.43949681520462036, "logps/chosen": -0.03279908001422882, "logps/rejected": -1.065782070159912, "loss": 2.7858, "nll_loss": 0.6879481077194214, "rewards/accuracies": 1.0, "rewards/chosen": -0.003279907861724496, "rewards/margins": 0.10329829156398773, "rewards/rejected": -0.10657820105552673, "step": 2565 }, { "epoch": 1.7745504840940525, "grad_norm": 9.900083541870117, "learning_rate": 4.5696941755033045e-05, "log_odds_chosen": 3.831148147583008, "log_odds_ratio": -0.3260344862937927, "logits/chosen": -0.5216836333274841, "logits/rejected": -0.5302917957305908, "logps/chosen": -0.05193112790584564, "logps/rejected": -0.44250690937042236, "loss": 4.0738, "nll_loss": 0.9858537912368774, "rewards/accuracies": 0.875, "rewards/chosen": -0.005193112883716822, "rewards/margins": 0.03905757516622543, "rewards/rejected": -0.04425068944692612, "step": 2566 }, { "epoch": 1.7752420470262793, "grad_norm": 10.836536407470703, "learning_rate": 4.56930997387429e-05, "log_odds_chosen": 5.086477279663086, "log_odds_ratio": -0.22728855907917023, "logits/chosen": -0.6295913457870483, "logits/rejected": -0.681605339050293, "logps/chosen": -0.05664917454123497, "logps/rejected": -1.0559172630310059, "loss": 3.795, "nll_loss": 0.9260324835777283, "rewards/accuracies": 0.875, "rewards/chosen": -0.005664917174726725, "rewards/margins": 0.09992681443691254, "rewards/rejected": -0.10559172928333282, "step": 2567 }, { "epoch": 1.7759336099585061, "grad_norm": 11.188941955566406, "learning_rate": 4.568925772245274e-05, "log_odds_chosen": 5.1092705726623535, "log_odds_ratio": -0.08259230107069016, "logits/chosen": -0.6408547759056091, "logits/rejected": -0.7252624034881592, "logps/chosen": -0.01996024139225483, "logps/rejected": -0.7125002145767212, "loss": 4.0462, "nll_loss": 1.0032902956008911, "rewards/accuracies": 1.0, "rewards/chosen": -0.0019960240460932255, "rewards/margins": 0.06925399601459503, "rewards/rejected": -0.07125002145767212, "step": 2568 }, { "epoch": 1.776625172890733, "grad_norm": 11.991272926330566, "learning_rate": 4.56854157061626e-05, "log_odds_chosen": 4.901045322418213, "log_odds_ratio": -0.9436802268028259, "logits/chosen": -0.5167893767356873, "logits/rejected": -0.5799105763435364, "logps/chosen": -0.11814716458320618, "logps/rejected": -0.9997060298919678, "loss": 3.4344, "nll_loss": 0.7642271518707275, "rewards/accuracies": 0.75, "rewards/chosen": -0.011814717203378677, "rewards/margins": 0.08815588057041168, "rewards/rejected": -0.09997060894966125, "step": 2569 }, { "epoch": 1.7773167358229598, "grad_norm": 7.425483226776123, "learning_rate": 4.568157368987245e-05, "log_odds_chosen": 4.753199577331543, "log_odds_ratio": -0.31641826033592224, "logits/chosen": -0.5371108651161194, "logits/rejected": -0.6080737113952637, "logps/chosen": -0.045531876385211945, "logps/rejected": -0.6791171431541443, "loss": 3.0047, "nll_loss": 0.7195222973823547, "rewards/accuracies": 0.875, "rewards/chosen": -0.0045531876385211945, "rewards/margins": 0.06335853040218353, "rewards/rejected": -0.06791172176599503, "step": 2570 }, { "epoch": 1.7780082987551866, "grad_norm": 6.604394435882568, "learning_rate": 4.56777316735823e-05, "log_odds_chosen": 6.296212196350098, "log_odds_ratio": -0.0063947951421141624, "logits/chosen": -0.5465483665466309, "logits/rejected": -0.5125059485435486, "logps/chosen": -0.012749740853905678, "logps/rejected": -1.0247169733047485, "loss": 2.7648, "nll_loss": 0.6905537843704224, "rewards/accuracies": 1.0, "rewards/chosen": -0.001274973968975246, "rewards/margins": 0.10119672119617462, "rewards/rejected": -0.10247169435024261, "step": 2571 }, { "epoch": 1.7786998616874135, "grad_norm": 11.984283447265625, "learning_rate": 4.567388965729215e-05, "log_odds_chosen": 2.5593433380126953, "log_odds_ratio": -0.5915682911872864, "logits/chosen": -0.699419379234314, "logits/rejected": -0.7382344603538513, "logps/chosen": -0.18622469902038574, "logps/rejected": -0.6699910759925842, "loss": 3.5449, "nll_loss": 0.8270754218101501, "rewards/accuracies": 0.625, "rewards/chosen": -0.018622469156980515, "rewards/margins": 0.04837663844227791, "rewards/rejected": -0.06699910759925842, "step": 2572 }, { "epoch": 1.7793914246196403, "grad_norm": 6.196664810180664, "learning_rate": 4.5670047641002e-05, "log_odds_chosen": 5.564531326293945, "log_odds_ratio": -0.047365687787532806, "logits/chosen": -0.6117345094680786, "logits/rejected": -0.6705157160758972, "logps/chosen": -0.03350626677274704, "logps/rejected": -0.5655863881111145, "loss": 2.5519, "nll_loss": 0.6332501173019409, "rewards/accuracies": 1.0, "rewards/chosen": -0.0033506269101053476, "rewards/margins": 0.053208015859127045, "rewards/rejected": -0.05655864253640175, "step": 2573 }, { "epoch": 1.7800829875518671, "grad_norm": 6.181553840637207, "learning_rate": 4.566620562471185e-05, "log_odds_chosen": 3.522923469543457, "log_odds_ratio": -0.24771559238433838, "logits/chosen": -0.7219829559326172, "logits/rejected": -0.7598026990890503, "logps/chosen": -0.10694906860589981, "logps/rejected": -0.6799654364585876, "loss": 3.5333, "nll_loss": 0.8585590720176697, "rewards/accuracies": 0.875, "rewards/chosen": -0.010694906115531921, "rewards/margins": 0.05730164051055908, "rewards/rejected": -0.067996546626091, "step": 2574 }, { "epoch": 1.780774550484094, "grad_norm": 5.776702880859375, "learning_rate": 4.56623636084217e-05, "log_odds_chosen": 5.801150321960449, "log_odds_ratio": -0.13522619009017944, "logits/chosen": -0.7149621248245239, "logits/rejected": -0.7345128655433655, "logps/chosen": -0.04546257480978966, "logps/rejected": -0.755382776260376, "loss": 1.9775, "nll_loss": 0.4808577001094818, "rewards/accuracies": 0.875, "rewards/chosen": -0.004546257667243481, "rewards/margins": 0.07099202275276184, "rewards/rejected": -0.0755382776260376, "step": 2575 }, { "epoch": 1.7814661134163208, "grad_norm": 6.1580095291137695, "learning_rate": 4.5658521592131556e-05, "log_odds_chosen": 4.461126327514648, "log_odds_ratio": -0.05306124687194824, "logits/chosen": -0.3161161243915558, "logits/rejected": -0.3657586872577667, "logps/chosen": -0.07128717005252838, "logps/rejected": -1.515031099319458, "loss": 2.9739, "nll_loss": 0.7381733059883118, "rewards/accuracies": 1.0, "rewards/chosen": -0.007128716912120581, "rewards/margins": 0.14437440037727356, "rewards/rejected": -0.15150313079357147, "step": 2576 }, { "epoch": 1.7821576763485476, "grad_norm": 12.750073432922363, "learning_rate": 4.56546795758414e-05, "log_odds_chosen": 3.2818448543548584, "log_odds_ratio": -1.0118776559829712, "logits/chosen": -0.3662753105163574, "logits/rejected": -0.402810662984848, "logps/chosen": -0.09948496520519257, "logps/rejected": -0.7612845301628113, "loss": 4.6259, "nll_loss": 1.0552799701690674, "rewards/accuracies": 0.625, "rewards/chosen": -0.009948497638106346, "rewards/margins": 0.06617995351552963, "rewards/rejected": -0.07612845301628113, "step": 2577 }, { "epoch": 1.7828492392807744, "grad_norm": 9.642328262329102, "learning_rate": 4.565083755955126e-05, "log_odds_chosen": 3.5306921005249023, "log_odds_ratio": -0.5706806182861328, "logits/chosen": -0.44251155853271484, "logits/rejected": -0.45110124349594116, "logps/chosen": -0.10406813770532608, "logps/rejected": -0.5283291339874268, "loss": 3.3329, "nll_loss": 0.7761471271514893, "rewards/accuracies": 0.625, "rewards/chosen": -0.010406811721622944, "rewards/margins": 0.04242610186338425, "rewards/rejected": -0.052832912653684616, "step": 2578 }, { "epoch": 1.7835408022130013, "grad_norm": 6.969174861907959, "learning_rate": 4.5646995543261106e-05, "log_odds_chosen": 6.242532730102539, "log_odds_ratio": -0.018553482368588448, "logits/chosen": -0.6219586133956909, "logits/rejected": -0.6382424235343933, "logps/chosen": -0.01259948592633009, "logps/rejected": -0.997509241104126, "loss": 3.8471, "nll_loss": 0.9599129557609558, "rewards/accuracies": 1.0, "rewards/chosen": -0.0012599484762176871, "rewards/margins": 0.09849098324775696, "rewards/rejected": -0.09975093603134155, "step": 2579 }, { "epoch": 1.784232365145228, "grad_norm": 7.377046585083008, "learning_rate": 4.564315352697096e-05, "log_odds_chosen": 3.4929206371307373, "log_odds_ratio": -0.19127880036830902, "logits/chosen": -0.5476193428039551, "logits/rejected": -0.6145222187042236, "logps/chosen": -0.03862868994474411, "logps/rejected": -0.6367706656455994, "loss": 3.1703, "nll_loss": 0.7734379768371582, "rewards/accuracies": 0.875, "rewards/chosen": -0.0038628692273050547, "rewards/margins": 0.0598142072558403, "rewards/rejected": -0.06367707252502441, "step": 2580 }, { "epoch": 1.784923928077455, "grad_norm": 7.009395599365234, "learning_rate": 4.563931151068081e-05, "log_odds_chosen": 4.45806884765625, "log_odds_ratio": -0.17331717908382416, "logits/chosen": -0.2205784022808075, "logits/rejected": -0.26497140526771545, "logps/chosen": -0.06806333363056183, "logps/rejected": -0.7032272815704346, "loss": 2.761, "nll_loss": 0.6729127168655396, "rewards/accuracies": 0.875, "rewards/chosen": -0.0068063330836594105, "rewards/margins": 0.06351640075445175, "rewards/rejected": -0.07032272964715958, "step": 2581 }, { "epoch": 1.7856154910096818, "grad_norm": 8.358550071716309, "learning_rate": 4.563546949439066e-05, "log_odds_chosen": 5.872114181518555, "log_odds_ratio": -0.12831641733646393, "logits/chosen": -0.7278775572776794, "logits/rejected": -0.754865288734436, "logps/chosen": -0.017764536663889885, "logps/rejected": -0.8691182136535645, "loss": 2.4239, "nll_loss": 0.5931454300880432, "rewards/accuracies": 0.875, "rewards/chosen": -0.0017764536896720529, "rewards/margins": 0.08513537049293518, "rewards/rejected": -0.08691181242465973, "step": 2582 }, { "epoch": 1.7863070539419086, "grad_norm": 6.859455585479736, "learning_rate": 4.563162747810051e-05, "log_odds_chosen": 7.026611804962158, "log_odds_ratio": -0.007522970903664827, "logits/chosen": -0.47302526235580444, "logits/rejected": -0.5076656341552734, "logps/chosen": -0.003296129172667861, "logps/rejected": -1.0726574659347534, "loss": 2.5881, "nll_loss": 0.6462792158126831, "rewards/accuracies": 1.0, "rewards/chosen": -0.0003296128998044878, "rewards/margins": 0.10693613439798355, "rewards/rejected": -0.10726574808359146, "step": 2583 }, { "epoch": 1.7869986168741354, "grad_norm": 8.412336349487305, "learning_rate": 4.562778546181036e-05, "log_odds_chosen": 3.1393537521362305, "log_odds_ratio": -0.3619435727596283, "logits/chosen": -0.24041230976581573, "logits/rejected": -0.2845999002456665, "logps/chosen": -0.09177468717098236, "logps/rejected": -0.8623040914535522, "loss": 3.0168, "nll_loss": 0.7179951071739197, "rewards/accuracies": 0.75, "rewards/chosen": -0.009177468717098236, "rewards/margins": 0.07705294340848923, "rewards/rejected": -0.08623041212558746, "step": 2584 }, { "epoch": 1.7876901798063622, "grad_norm": 7.934391498565674, "learning_rate": 4.5623943445520214e-05, "log_odds_chosen": 5.535144805908203, "log_odds_ratio": -0.0946061760187149, "logits/chosen": -0.6780644655227661, "logits/rejected": -0.7226251363754272, "logps/chosen": -0.05594250559806824, "logps/rejected": -0.9195252656936646, "loss": 3.5998, "nll_loss": 0.8904974460601807, "rewards/accuracies": 0.875, "rewards/chosen": -0.005594250746071339, "rewards/margins": 0.08635827153921127, "rewards/rejected": -0.09195252507925034, "step": 2585 }, { "epoch": 1.788381742738589, "grad_norm": 9.528312683105469, "learning_rate": 4.562010142923006e-05, "log_odds_chosen": 4.408294677734375, "log_odds_ratio": -0.3562813699245453, "logits/chosen": -0.6378925442695618, "logits/rejected": -0.6505054235458374, "logps/chosen": -0.054091863334178925, "logps/rejected": -0.7761125564575195, "loss": 3.5691, "nll_loss": 0.856635332107544, "rewards/accuracies": 0.875, "rewards/chosen": -0.005409186240285635, "rewards/margins": 0.07220207899808884, "rewards/rejected": -0.07761126756668091, "step": 2586 }, { "epoch": 1.789073305670816, "grad_norm": 5.693426609039307, "learning_rate": 4.561625941293992e-05, "log_odds_chosen": 4.144665718078613, "log_odds_ratio": -0.28512609004974365, "logits/chosen": -0.5980684161186218, "logits/rejected": -0.5759909152984619, "logps/chosen": -0.08227177709341049, "logps/rejected": -0.6942697763442993, "loss": 3.113, "nll_loss": 0.7497309446334839, "rewards/accuracies": 0.75, "rewards/chosen": -0.008227178826928139, "rewards/margins": 0.06119980663061142, "rewards/rejected": -0.06942698359489441, "step": 2587 }, { "epoch": 1.7897648686030427, "grad_norm": 6.498386859893799, "learning_rate": 4.5612417396649765e-05, "log_odds_chosen": 7.378063678741455, "log_odds_ratio": -0.007801724597811699, "logits/chosen": -0.24729809165000916, "logits/rejected": -0.284311443567276, "logps/chosen": -0.01493473257869482, "logps/rejected": -1.2252931594848633, "loss": 3.0915, "nll_loss": 0.7720844149589539, "rewards/accuracies": 1.0, "rewards/chosen": -0.001493473188020289, "rewards/margins": 0.12103584408760071, "rewards/rejected": -0.12252932041883469, "step": 2588 }, { "epoch": 1.7904564315352696, "grad_norm": 6.341252326965332, "learning_rate": 4.560857538035962e-05, "log_odds_chosen": 2.9661362171173096, "log_odds_ratio": -0.21067170798778534, "logits/chosen": -0.1589818298816681, "logits/rejected": -0.20216339826583862, "logps/chosen": -0.10224173963069916, "logps/rejected": -0.7577835321426392, "loss": 2.759, "nll_loss": 0.6686722040176392, "rewards/accuracies": 0.875, "rewards/chosen": -0.010224174708127975, "rewards/margins": 0.065554179251194, "rewards/rejected": -0.07577835023403168, "step": 2589 }, { "epoch": 1.7911479944674964, "grad_norm": 8.582254409790039, "learning_rate": 4.560473336406947e-05, "log_odds_chosen": 5.644009590148926, "log_odds_ratio": -0.09102729707956314, "logits/chosen": -0.6598323583602905, "logits/rejected": -0.6340920329093933, "logps/chosen": -0.03555937111377716, "logps/rejected": -1.0442126989364624, "loss": 3.2956, "nll_loss": 0.8148072361946106, "rewards/accuracies": 1.0, "rewards/chosen": -0.0035559372045099735, "rewards/margins": 0.10086533427238464, "rewards/rejected": -0.10442127287387848, "step": 2590 }, { "epoch": 1.7918395573997232, "grad_norm": 5.829075813293457, "learning_rate": 4.5600891347779315e-05, "log_odds_chosen": 5.671568393707275, "log_odds_ratio": -0.032739464193582535, "logits/chosen": -0.697761595249176, "logits/rejected": -0.686554491519928, "logps/chosen": -0.02588575892150402, "logps/rejected": -0.8335800170898438, "loss": 2.9027, "nll_loss": 0.7223943471908569, "rewards/accuracies": 1.0, "rewards/chosen": -0.0025885761715471745, "rewards/margins": 0.08076941967010498, "rewards/rejected": -0.08335800468921661, "step": 2591 }, { "epoch": 1.79253112033195, "grad_norm": 9.570256233215332, "learning_rate": 4.559704933148917e-05, "log_odds_chosen": 4.774772644042969, "log_odds_ratio": -0.18960583209991455, "logits/chosen": -0.5086647868156433, "logits/rejected": -0.5806989669799805, "logps/chosen": -0.08618862181901932, "logps/rejected": -0.7194696068763733, "loss": 3.5935, "nll_loss": 0.8794207572937012, "rewards/accuracies": 0.875, "rewards/chosen": -0.008618861436843872, "rewards/margins": 0.0633281022310257, "rewards/rejected": -0.07194696366786957, "step": 2592 }, { "epoch": 1.7932226832641769, "grad_norm": 8.571864128112793, "learning_rate": 4.559320731519902e-05, "log_odds_chosen": 5.25043249130249, "log_odds_ratio": -0.21316511929035187, "logits/chosen": -0.5006336569786072, "logits/rejected": -0.5622016787528992, "logps/chosen": -0.08730829507112503, "logps/rejected": -0.9564471244812012, "loss": 3.3834, "nll_loss": 0.8245400190353394, "rewards/accuracies": 0.875, "rewards/chosen": -0.008730829693377018, "rewards/margins": 0.08691388368606567, "rewards/rejected": -0.09564471244812012, "step": 2593 }, { "epoch": 1.7939142461964037, "grad_norm": 10.458921432495117, "learning_rate": 4.558936529890887e-05, "log_odds_chosen": 3.889603614807129, "log_odds_ratio": -0.3547723889350891, "logits/chosen": -0.38273003697395325, "logits/rejected": -0.37535360455513, "logps/chosen": -0.08629313856363297, "logps/rejected": -0.7442159056663513, "loss": 2.6653, "nll_loss": 0.6308448910713196, "rewards/accuracies": 0.75, "rewards/chosen": -0.008629313670098782, "rewards/margins": 0.0657922774553299, "rewards/rejected": -0.07442159950733185, "step": 2594 }, { "epoch": 1.7946058091286305, "grad_norm": 6.538029670715332, "learning_rate": 4.558552328261872e-05, "log_odds_chosen": 3.9426791667938232, "log_odds_ratio": -0.1984841227531433, "logits/chosen": -0.4637698531150818, "logits/rejected": -0.4950888752937317, "logps/chosen": -0.05847723037004471, "logps/rejected": -0.655007004737854, "loss": 3.5605, "nll_loss": 0.8702831864356995, "rewards/accuracies": 0.875, "rewards/chosen": -0.005847723223268986, "rewards/margins": 0.05965298414230347, "rewards/rejected": -0.06550070643424988, "step": 2595 }, { "epoch": 1.7952973720608574, "grad_norm": 10.25632095336914, "learning_rate": 4.558168126632858e-05, "log_odds_chosen": 3.3027095794677734, "log_odds_ratio": -0.3819888234138489, "logits/chosen": -0.5645632743835449, "logits/rejected": -0.6149783134460449, "logps/chosen": -0.1231047660112381, "logps/rejected": -0.7725575566291809, "loss": 4.4363, "nll_loss": 1.0708774328231812, "rewards/accuracies": 0.875, "rewards/chosen": -0.01231047697365284, "rewards/margins": 0.0649452805519104, "rewards/rejected": -0.07725575566291809, "step": 2596 }, { "epoch": 1.7959889349930842, "grad_norm": 7.665077209472656, "learning_rate": 4.557783925003842e-05, "log_odds_chosen": 4.87660026550293, "log_odds_ratio": -0.040577709674835205, "logits/chosen": -0.4291841387748718, "logits/rejected": -0.47871482372283936, "logps/chosen": -0.02387285605072975, "logps/rejected": -0.7996243238449097, "loss": 3.0375, "nll_loss": 0.7553067207336426, "rewards/accuracies": 1.0, "rewards/chosen": -0.0023872856982052326, "rewards/margins": 0.07757514715194702, "rewards/rejected": -0.07996243238449097, "step": 2597 }, { "epoch": 1.796680497925311, "grad_norm": 5.8194193840026855, "learning_rate": 4.5573997233748275e-05, "log_odds_chosen": 5.245506286621094, "log_odds_ratio": -0.1459067016839981, "logits/chosen": -0.34469327330589294, "logits/rejected": -0.39062270522117615, "logps/chosen": -0.033551059663295746, "logps/rejected": -0.7837735414505005, "loss": 2.7199, "nll_loss": 0.6653863787651062, "rewards/accuracies": 0.875, "rewards/chosen": -0.0033551061060279608, "rewards/margins": 0.07502225041389465, "rewards/rejected": -0.07837735116481781, "step": 2598 }, { "epoch": 1.797372060857538, "grad_norm": 6.646912097930908, "learning_rate": 4.557015521745812e-05, "log_odds_chosen": 3.947014093399048, "log_odds_ratio": -0.13185060024261475, "logits/chosen": -0.5804282426834106, "logits/rejected": -0.6859478950500488, "logps/chosen": -0.0472014881670475, "logps/rejected": -0.7761104106903076, "loss": 3.5381, "nll_loss": 0.8713419437408447, "rewards/accuracies": 1.0, "rewards/chosen": -0.0047201490961015224, "rewards/margins": 0.07289090007543564, "rewards/rejected": -0.077611044049263, "step": 2599 }, { "epoch": 1.798063623789765, "grad_norm": 7.322403907775879, "learning_rate": 4.5566313201167974e-05, "log_odds_chosen": 4.652568340301514, "log_odds_ratio": -0.37490737438201904, "logits/chosen": -0.12980686128139496, "logits/rejected": -0.11926015466451645, "logps/chosen": -0.07453082501888275, "logps/rejected": -0.7865791320800781, "loss": 3.4297, "nll_loss": 0.8199406862258911, "rewards/accuracies": 0.75, "rewards/chosen": -0.00745308306068182, "rewards/margins": 0.07120483368635178, "rewards/rejected": -0.07865791022777557, "step": 2600 }, { "epoch": 1.7987551867219918, "grad_norm": 5.136075496673584, "learning_rate": 4.5562471184877826e-05, "log_odds_chosen": 6.08044958114624, "log_odds_ratio": -0.036412280052900314, "logits/chosen": -0.3810223937034607, "logits/rejected": -0.3522016108036041, "logps/chosen": -0.01769629679620266, "logps/rejected": -0.8769605159759521, "loss": 2.1277, "nll_loss": 0.5282766222953796, "rewards/accuracies": 1.0, "rewards/chosen": -0.0017696297727525234, "rewards/margins": 0.08592642843723297, "rewards/rejected": -0.08769606053829193, "step": 2601 }, { "epoch": 1.7994467496542186, "grad_norm": 8.091934204101562, "learning_rate": 4.555862916858767e-05, "log_odds_chosen": 3.523444652557373, "log_odds_ratio": -0.23734408617019653, "logits/chosen": -0.45225799083709717, "logits/rejected": -0.5152193307876587, "logps/chosen": -0.08465392887592316, "logps/rejected": -0.6641150712966919, "loss": 3.319, "nll_loss": 0.8060159087181091, "rewards/accuracies": 0.75, "rewards/chosen": -0.008465392515063286, "rewards/margins": 0.05794611573219299, "rewards/rejected": -0.06641151010990143, "step": 2602 }, { "epoch": 1.8001383125864454, "grad_norm": 6.36898946762085, "learning_rate": 4.555478715229753e-05, "log_odds_chosen": 5.446345329284668, "log_odds_ratio": -0.07899149507284164, "logits/chosen": -0.2378370761871338, "logits/rejected": -0.2884705662727356, "logps/chosen": -0.02247701585292816, "logps/rejected": -0.6792370080947876, "loss": 2.2205, "nll_loss": 0.5472333431243896, "rewards/accuracies": 1.0, "rewards/chosen": -0.0022477013990283012, "rewards/margins": 0.0656760036945343, "rewards/rejected": -0.06792370229959488, "step": 2603 }, { "epoch": 1.8008298755186722, "grad_norm": 9.214363098144531, "learning_rate": 4.5550945136007377e-05, "log_odds_chosen": 4.689992904663086, "log_odds_ratio": -0.08061084896326065, "logits/chosen": -0.5148239731788635, "logits/rejected": -0.6048566102981567, "logps/chosen": -0.03498592600226402, "logps/rejected": -0.7926340103149414, "loss": 3.5354, "nll_loss": 0.875788688659668, "rewards/accuracies": 1.0, "rewards/chosen": -0.0034985924139618874, "rewards/margins": 0.07576481252908707, "rewards/rejected": -0.07926340401172638, "step": 2604 }, { "epoch": 1.801521438450899, "grad_norm": 8.10094928741455, "learning_rate": 4.554710311971723e-05, "log_odds_chosen": 5.41352653503418, "log_odds_ratio": -0.05584706366062164, "logits/chosen": -0.5215616226196289, "logits/rejected": -0.5591295957565308, "logps/chosen": -0.037766214460134506, "logps/rejected": -0.655017077922821, "loss": 3.1323, "nll_loss": 0.7774906158447266, "rewards/accuracies": 1.0, "rewards/chosen": -0.0037766220048069954, "rewards/margins": 0.06172508746385574, "rewards/rejected": -0.06550170481204987, "step": 2605 }, { "epoch": 1.802213001383126, "grad_norm": 5.646283149719238, "learning_rate": 4.554326110342708e-05, "log_odds_chosen": 5.976848602294922, "log_odds_ratio": -0.21028871834278107, "logits/chosen": -0.4315609037876129, "logits/rejected": -0.42234447598457336, "logps/chosen": -0.023608697578310966, "logps/rejected": -0.6077236533164978, "loss": 3.1639, "nll_loss": 0.7699489593505859, "rewards/accuracies": 0.875, "rewards/chosen": -0.002360869897529483, "rewards/margins": 0.05841149389743805, "rewards/rejected": -0.0607723630964756, "step": 2606 }, { "epoch": 1.8029045643153527, "grad_norm": 5.159456729888916, "learning_rate": 4.5539419087136934e-05, "log_odds_chosen": 4.167616844177246, "log_odds_ratio": -0.12459512799978256, "logits/chosen": -0.12597954273223877, "logits/rejected": -0.18310877680778503, "logps/chosen": -0.042437825351953506, "logps/rejected": -0.6159353256225586, "loss": 2.3385, "nll_loss": 0.5721673369407654, "rewards/accuracies": 1.0, "rewards/chosen": -0.004243782255798578, "rewards/margins": 0.05734974890947342, "rewards/rejected": -0.06159352511167526, "step": 2607 }, { "epoch": 1.8035961272475796, "grad_norm": 6.154688358306885, "learning_rate": 4.553557707084678e-05, "log_odds_chosen": 5.0011677742004395, "log_odds_ratio": -0.0715303048491478, "logits/chosen": -0.651474118232727, "logits/rejected": -0.668293297290802, "logps/chosen": -0.04794564098119736, "logps/rejected": -1.0686466693878174, "loss": 2.4345, "nll_loss": 0.6014776229858398, "rewards/accuracies": 1.0, "rewards/chosen": -0.004794564098119736, "rewards/margins": 0.10207010805606842, "rewards/rejected": -0.10686466097831726, "step": 2608 }, { "epoch": 1.8042876901798064, "grad_norm": 7.125792026519775, "learning_rate": 4.553173505455663e-05, "log_odds_chosen": 5.126342296600342, "log_odds_ratio": -0.13379913568496704, "logits/chosen": -0.4011853039264679, "logits/rejected": -0.42175742983818054, "logps/chosen": -0.05009883642196655, "logps/rejected": -0.7972033619880676, "loss": 3.8232, "nll_loss": 0.9424182772636414, "rewards/accuracies": 1.0, "rewards/chosen": -0.005009883549064398, "rewards/margins": 0.07471044361591339, "rewards/rejected": -0.07972033321857452, "step": 2609 }, { "epoch": 1.8049792531120332, "grad_norm": 5.347149848937988, "learning_rate": 4.5527893038266484e-05, "log_odds_chosen": 5.037969589233398, "log_odds_ratio": -0.07694417238235474, "logits/chosen": -0.10675536841154099, "logits/rejected": -0.15652278065681458, "logps/chosen": -0.0804632306098938, "logps/rejected": -1.161550521850586, "loss": 2.5682, "nll_loss": 0.634350597858429, "rewards/accuracies": 1.0, "rewards/chosen": -0.00804632343351841, "rewards/margins": 0.10810872912406921, "rewards/rejected": -0.11615505814552307, "step": 2610 }, { "epoch": 1.80567081604426, "grad_norm": 6.408705234527588, "learning_rate": 4.552405102197633e-05, "log_odds_chosen": 7.264980316162109, "log_odds_ratio": -0.113655224442482, "logits/chosen": -0.20847178995609283, "logits/rejected": -0.1949325054883957, "logps/chosen": -0.022643744945526123, "logps/rejected": -0.7103455066680908, "loss": 3.0926, "nll_loss": 0.7617882490158081, "rewards/accuracies": 0.875, "rewards/chosen": -0.002264374401420355, "rewards/margins": 0.06877018511295319, "rewards/rejected": -0.07103455066680908, "step": 2611 }, { "epoch": 1.8063623789764869, "grad_norm": 8.062432289123535, "learning_rate": 4.552020900568619e-05, "log_odds_chosen": 5.82761287689209, "log_odds_ratio": -0.04699864983558655, "logits/chosen": -0.6141172051429749, "logits/rejected": -0.7337712049484253, "logps/chosen": -0.011600498110055923, "logps/rejected": -0.7071603536605835, "loss": 2.9428, "nll_loss": 0.7309926152229309, "rewards/accuracies": 1.0, "rewards/chosen": -0.0011600498110055923, "rewards/margins": 0.06955599039793015, "rewards/rejected": -0.07071603834629059, "step": 2612 }, { "epoch": 1.8070539419087137, "grad_norm": 7.179983615875244, "learning_rate": 4.5516366989396035e-05, "log_odds_chosen": 5.070010662078857, "log_odds_ratio": -0.0862480029463768, "logits/chosen": -0.6351104974746704, "logits/rejected": -0.6772160530090332, "logps/chosen": -0.055110447108745575, "logps/rejected": -0.751242995262146, "loss": 3.1957, "nll_loss": 0.7902911901473999, "rewards/accuracies": 1.0, "rewards/chosen": -0.005511044524610043, "rewards/margins": 0.0696132481098175, "rewards/rejected": -0.07512429356575012, "step": 2613 }, { "epoch": 1.8077455048409405, "grad_norm": 7.030640602111816, "learning_rate": 4.551252497310589e-05, "log_odds_chosen": 4.596918106079102, "log_odds_ratio": -0.047915950417518616, "logits/chosen": -0.49445128440856934, "logits/rejected": -0.5078890323638916, "logps/chosen": -0.018410056829452515, "logps/rejected": -0.6411164999008179, "loss": 3.5244, "nll_loss": 0.8763055801391602, "rewards/accuracies": 1.0, "rewards/chosen": -0.0018410057527944446, "rewards/margins": 0.06227065250277519, "rewards/rejected": -0.06411165744066238, "step": 2614 }, { "epoch": 1.8084370677731674, "grad_norm": 11.022759437561035, "learning_rate": 4.550868295681574e-05, "log_odds_chosen": 4.967000961303711, "log_odds_ratio": -0.027516499161720276, "logits/chosen": -0.6782200336456299, "logits/rejected": -0.6999487280845642, "logps/chosen": -0.014316966757178307, "logps/rejected": -0.7635257244110107, "loss": 4.7923, "nll_loss": 1.1953269243240356, "rewards/accuracies": 1.0, "rewards/chosen": -0.0014316968154162169, "rewards/margins": 0.07492087781429291, "rewards/rejected": -0.0763525739312172, "step": 2615 }, { "epoch": 1.8091286307053942, "grad_norm": 6.712900638580322, "learning_rate": 4.550484094052559e-05, "log_odds_chosen": 7.674071788787842, "log_odds_ratio": -0.011318670585751534, "logits/chosen": -0.1972891092300415, "logits/rejected": -0.25815290212631226, "logps/chosen": -0.010169260203838348, "logps/rejected": -1.0386838912963867, "loss": 2.8093, "nll_loss": 0.701184868812561, "rewards/accuracies": 1.0, "rewards/chosen": -0.001016925903968513, "rewards/margins": 0.10285145789384842, "rewards/rejected": -0.10386838763952255, "step": 2616 }, { "epoch": 1.809820193637621, "grad_norm": 7.950763702392578, "learning_rate": 4.550099892423544e-05, "log_odds_chosen": 5.966031551361084, "log_odds_ratio": -0.013020548038184643, "logits/chosen": -0.31445324420928955, "logits/rejected": -0.3987734317779541, "logps/chosen": -0.03026457317173481, "logps/rejected": -1.1416288614273071, "loss": 2.969, "nll_loss": 0.7409405708312988, "rewards/accuracies": 1.0, "rewards/chosen": -0.003026457503437996, "rewards/margins": 0.11113642156124115, "rewards/rejected": -0.11416289210319519, "step": 2617 }, { "epoch": 1.8105117565698479, "grad_norm": 31.848552703857422, "learning_rate": 4.549715690794529e-05, "log_odds_chosen": 3.156466007232666, "log_odds_ratio": -0.8378832936286926, "logits/chosen": -0.22038498520851135, "logits/rejected": -0.26401734352111816, "logps/chosen": -0.12877285480499268, "logps/rejected": -0.6650040149688721, "loss": 3.0298, "nll_loss": 0.6736541986465454, "rewards/accuracies": 0.625, "rewards/chosen": -0.012877286411821842, "rewards/margins": 0.05362311750650406, "rewards/rejected": -0.06650040298700333, "step": 2618 }, { "epoch": 1.8112033195020747, "grad_norm": 4.113903045654297, "learning_rate": 4.549331489165514e-05, "log_odds_chosen": 5.889928340911865, "log_odds_ratio": -0.03983638808131218, "logits/chosen": -0.24106872081756592, "logits/rejected": -0.18745410442352295, "logps/chosen": -0.025771846994757652, "logps/rejected": -0.7980707883834839, "loss": 1.828, "nll_loss": 0.4530182182788849, "rewards/accuracies": 1.0, "rewards/chosen": -0.002577184932306409, "rewards/margins": 0.07722988724708557, "rewards/rejected": -0.07980707287788391, "step": 2619 }, { "epoch": 1.8118948824343015, "grad_norm": 7.535008430480957, "learning_rate": 4.548947287536499e-05, "log_odds_chosen": 6.170376300811768, "log_odds_ratio": -0.02469835989177227, "logits/chosen": -0.5700592994689941, "logits/rejected": -0.6413227915763855, "logps/chosen": -0.04254474118351936, "logps/rejected": -1.1719743013381958, "loss": 2.9024, "nll_loss": 0.7231208086013794, "rewards/accuracies": 1.0, "rewards/chosen": -0.004254474304616451, "rewards/margins": 0.11294296383857727, "rewards/rejected": -0.1171974241733551, "step": 2620 }, { "epoch": 1.8125864453665284, "grad_norm": 8.301804542541504, "learning_rate": 4.548563085907485e-05, "log_odds_chosen": 6.983060836791992, "log_odds_ratio": -0.027500227093696594, "logits/chosen": -0.7125990986824036, "logits/rejected": -0.7472147941589355, "logps/chosen": -0.007630965206772089, "logps/rejected": -1.0622081756591797, "loss": 3.0055, "nll_loss": 0.7486361265182495, "rewards/accuracies": 1.0, "rewards/chosen": -0.0007630965556018054, "rewards/margins": 0.10545771569013596, "rewards/rejected": -0.10622081160545349, "step": 2621 }, { "epoch": 1.8132780082987552, "grad_norm": 6.867591857910156, "learning_rate": 4.5481788842784693e-05, "log_odds_chosen": 4.466533660888672, "log_odds_ratio": -0.2696218490600586, "logits/chosen": -0.5059961676597595, "logits/rejected": -0.5039629936218262, "logps/chosen": -0.0487339124083519, "logps/rejected": -0.6949042081832886, "loss": 2.632, "nll_loss": 0.6310458183288574, "rewards/accuracies": 0.75, "rewards/chosen": -0.00487339124083519, "rewards/margins": 0.06461702287197113, "rewards/rejected": -0.06949041783809662, "step": 2622 }, { "epoch": 1.813969571230982, "grad_norm": 6.75360107421875, "learning_rate": 4.5477946826494546e-05, "log_odds_chosen": 5.620083808898926, "log_odds_ratio": -0.22650887072086334, "logits/chosen": -0.34317028522491455, "logits/rejected": -0.3361210525035858, "logps/chosen": -0.08229245245456696, "logps/rejected": -1.3436719179153442, "loss": 2.4746, "nll_loss": 0.595992386341095, "rewards/accuracies": 0.875, "rewards/chosen": -0.00822924543172121, "rewards/margins": 0.12613794207572937, "rewards/rejected": -0.1343671977519989, "step": 2623 }, { "epoch": 1.8146611341632088, "grad_norm": 5.871620178222656, "learning_rate": 4.54741048102044e-05, "log_odds_chosen": 3.1263251304626465, "log_odds_ratio": -0.2592034637928009, "logits/chosen": -0.34768468141555786, "logits/rejected": -0.3263643980026245, "logps/chosen": -0.08932524174451828, "logps/rejected": -0.9391407370567322, "loss": 2.4419, "nll_loss": 0.5845474004745483, "rewards/accuracies": 0.875, "rewards/chosen": -0.008932523429393768, "rewards/margins": 0.08498155325651169, "rewards/rejected": -0.09391407668590546, "step": 2624 }, { "epoch": 1.8153526970954357, "grad_norm": 4.6835618019104, "learning_rate": 4.547026279391425e-05, "log_odds_chosen": 6.344972610473633, "log_odds_ratio": -0.06391476094722748, "logits/chosen": -0.3003113269805908, "logits/rejected": -0.34467703104019165, "logps/chosen": -0.04334036260843277, "logps/rejected": -1.4139169454574585, "loss": 2.7204, "nll_loss": 0.6737198233604431, "rewards/accuracies": 1.0, "rewards/chosen": -0.004334036260843277, "rewards/margins": 0.13705766201019287, "rewards/rejected": -0.14139169454574585, "step": 2625 }, { "epoch": 1.8160442600276625, "grad_norm": 6.126811981201172, "learning_rate": 4.5466420777624096e-05, "log_odds_chosen": 4.0297441482543945, "log_odds_ratio": -0.32939809560775757, "logits/chosen": -0.5913423895835876, "logits/rejected": -0.57082200050354, "logps/chosen": -0.13563945889472961, "logps/rejected": -0.8001002073287964, "loss": 3.0862, "nll_loss": 0.7386195659637451, "rewards/accuracies": 0.75, "rewards/chosen": -0.013563944958150387, "rewards/margins": 0.06644607335329056, "rewards/rejected": -0.08001002669334412, "step": 2626 }, { "epoch": 1.8167358229598893, "grad_norm": 7.478198051452637, "learning_rate": 4.546257876133395e-05, "log_odds_chosen": 7.136330604553223, "log_odds_ratio": -0.007372260093688965, "logits/chosen": -0.2368028610944748, "logits/rejected": -0.2646666467189789, "logps/chosen": -0.008087377063930035, "logps/rejected": -0.9763768911361694, "loss": 3.9637, "nll_loss": 0.9901875853538513, "rewards/accuracies": 1.0, "rewards/chosen": -0.0008087377063930035, "rewards/margins": 0.09682895243167877, "rewards/rejected": -0.09763769060373306, "step": 2627 }, { "epoch": 1.8174273858921162, "grad_norm": 6.5756378173828125, "learning_rate": 4.54587367450438e-05, "log_odds_chosen": 6.8269548416137695, "log_odds_ratio": -0.04414502903819084, "logits/chosen": -0.35990023612976074, "logits/rejected": -0.45491692423820496, "logps/chosen": -0.007723034359514713, "logps/rejected": -0.8144005537033081, "loss": 2.5238, "nll_loss": 0.6265450119972229, "rewards/accuracies": 1.0, "rewards/chosen": -0.0007723035523667932, "rewards/margins": 0.08066774904727936, "rewards/rejected": -0.08144006133079529, "step": 2628 }, { "epoch": 1.818118948824343, "grad_norm": 8.939872741699219, "learning_rate": 4.545489472875365e-05, "log_odds_chosen": 6.031547546386719, "log_odds_ratio": -0.1246354877948761, "logits/chosen": -0.3047257959842682, "logits/rejected": -0.3927888870239258, "logps/chosen": -0.04965946078300476, "logps/rejected": -1.3520649671554565, "loss": 2.9212, "nll_loss": 0.7178254127502441, "rewards/accuracies": 0.875, "rewards/chosen": -0.004965946078300476, "rewards/margins": 0.1302405595779419, "rewards/rejected": -0.13520650565624237, "step": 2629 }, { "epoch": 1.8188105117565698, "grad_norm": 5.710219860076904, "learning_rate": 4.5451052712463506e-05, "log_odds_chosen": 4.805438041687012, "log_odds_ratio": -0.20486658811569214, "logits/chosen": -0.15800583362579346, "logits/rejected": -0.24480903148651123, "logps/chosen": -0.09874942898750305, "logps/rejected": -1.33525550365448, "loss": 1.5654, "nll_loss": 0.37085121870040894, "rewards/accuracies": 0.875, "rewards/chosen": -0.00987494271248579, "rewards/margins": 0.12365061044692993, "rewards/rejected": -0.133525550365448, "step": 2630 }, { "epoch": 1.8195020746887967, "grad_norm": 8.432652473449707, "learning_rate": 4.544721069617335e-05, "log_odds_chosen": 6.5983076095581055, "log_odds_ratio": -0.16553828120231628, "logits/chosen": -0.528571367263794, "logits/rejected": -0.5659384727478027, "logps/chosen": -0.03578614443540573, "logps/rejected": -0.9695991277694702, "loss": 3.2697, "nll_loss": 0.8008802533149719, "rewards/accuracies": 0.875, "rewards/chosen": -0.003578614443540573, "rewards/margins": 0.09338130801916122, "rewards/rejected": -0.0969599187374115, "step": 2631 }, { "epoch": 1.8201936376210235, "grad_norm": 5.630443572998047, "learning_rate": 4.5443368679883204e-05, "log_odds_chosen": 6.879264831542969, "log_odds_ratio": -0.007554001174867153, "logits/chosen": -0.5525587797164917, "logits/rejected": -0.527340292930603, "logps/chosen": -0.014577634632587433, "logps/rejected": -1.4096137285232544, "loss": 2.5911, "nll_loss": 0.6470277309417725, "rewards/accuracies": 1.0, "rewards/chosen": -0.0014577636029571295, "rewards/margins": 0.139503613114357, "rewards/rejected": -0.14096137881278992, "step": 2632 }, { "epoch": 1.8208852005532503, "grad_norm": 5.754599571228027, "learning_rate": 4.543952666359306e-05, "log_odds_chosen": 2.836488962173462, "log_odds_ratio": -0.3192460834980011, "logits/chosen": -0.20284788310527802, "logits/rejected": -0.2106815129518509, "logps/chosen": -0.0716264545917511, "logps/rejected": -0.42863890528678894, "loss": 2.5742, "nll_loss": 0.6116150617599487, "rewards/accuracies": 0.75, "rewards/chosen": -0.00716264545917511, "rewards/margins": 0.035701245069503784, "rewards/rejected": -0.042863890528678894, "step": 2633 }, { "epoch": 1.8215767634854771, "grad_norm": 5.475660800933838, "learning_rate": 4.543568464730291e-05, "log_odds_chosen": 5.2552595138549805, "log_odds_ratio": -0.20006102323532104, "logits/chosen": -0.5913251638412476, "logits/rejected": -0.6479060649871826, "logps/chosen": -0.04129403084516525, "logps/rejected": -1.0466065406799316, "loss": 2.4332, "nll_loss": 0.588290810585022, "rewards/accuracies": 0.875, "rewards/chosen": -0.004129402805119753, "rewards/margins": 0.10053126513957977, "rewards/rejected": -0.10466066002845764, "step": 2634 }, { "epoch": 1.822268326417704, "grad_norm": 8.928821563720703, "learning_rate": 4.5431842631012755e-05, "log_odds_chosen": 3.049790859222412, "log_odds_ratio": -0.3694280683994293, "logits/chosen": -0.30583369731903076, "logits/rejected": -0.3571898341178894, "logps/chosen": -0.08246318250894547, "logps/rejected": -0.2945728898048401, "loss": 3.2304, "nll_loss": 0.7706577181816101, "rewards/accuracies": 0.75, "rewards/chosen": -0.008246318437159061, "rewards/margins": 0.02121097221970558, "rewards/rejected": -0.029457291588187218, "step": 2635 }, { "epoch": 1.8229598893499308, "grad_norm": 5.167267322540283, "learning_rate": 4.542800061472261e-05, "log_odds_chosen": 7.10857629776001, "log_odds_ratio": -0.00941796600818634, "logits/chosen": -0.27473002672195435, "logits/rejected": -0.3904823660850525, "logps/chosen": -0.03157273679971695, "logps/rejected": -1.428621768951416, "loss": 2.5949, "nll_loss": 0.647790253162384, "rewards/accuracies": 1.0, "rewards/chosen": -0.00315727386623621, "rewards/margins": 0.1397048979997635, "rewards/rejected": -0.14286217093467712, "step": 2636 }, { "epoch": 1.8236514522821576, "grad_norm": 6.512024879455566, "learning_rate": 4.542415859843246e-05, "log_odds_chosen": 6.002936840057373, "log_odds_ratio": -0.030928250402212143, "logits/chosen": -0.5706111788749695, "logits/rejected": -0.6251883506774902, "logps/chosen": -0.036800943315029144, "logps/rejected": -1.3069647550582886, "loss": 2.7111, "nll_loss": 0.6746917366981506, "rewards/accuracies": 1.0, "rewards/chosen": -0.0036800941452383995, "rewards/margins": 0.12701638042926788, "rewards/rejected": -0.13069647550582886, "step": 2637 }, { "epoch": 1.8243430152143845, "grad_norm": 8.315937042236328, "learning_rate": 4.5420316582142305e-05, "log_odds_chosen": 3.425870656967163, "log_odds_ratio": -0.7788676023483276, "logits/chosen": -0.43286874890327454, "logits/rejected": -0.48489269614219666, "logps/chosen": -0.05835752934217453, "logps/rejected": -0.8002941608428955, "loss": 3.263, "nll_loss": 0.7378556728363037, "rewards/accuracies": 0.875, "rewards/chosen": -0.00583575339987874, "rewards/margins": 0.07419366389513016, "rewards/rejected": -0.08002942055463791, "step": 2638 }, { "epoch": 1.8250345781466113, "grad_norm": 9.208503723144531, "learning_rate": 4.5416474565852165e-05, "log_odds_chosen": 5.260612964630127, "log_odds_ratio": -0.086390919983387, "logits/chosen": -0.7150118350982666, "logits/rejected": -0.7044198513031006, "logps/chosen": -0.03383970633149147, "logps/rejected": -0.8515818119049072, "loss": 3.5761, "nll_loss": 0.8853759169578552, "rewards/accuracies": 1.0, "rewards/chosen": -0.0033839705865830183, "rewards/margins": 0.08177421241998672, "rewards/rejected": -0.08515818417072296, "step": 2639 }, { "epoch": 1.8257261410788381, "grad_norm": 10.198955535888672, "learning_rate": 4.541263254956201e-05, "log_odds_chosen": 9.71574878692627, "log_odds_ratio": -0.00011423486284911633, "logits/chosen": -0.32049620151519775, "logits/rejected": -0.41254082322120667, "logps/chosen": -0.00040840901783667505, "logps/rejected": -1.8789923191070557, "loss": 2.8609, "nll_loss": 0.7152112722396851, "rewards/accuracies": 1.0, "rewards/chosen": -4.084090323885903e-05, "rewards/margins": 0.18785840272903442, "rewards/rejected": -0.18789923191070557, "step": 2640 }, { "epoch": 1.826417704011065, "grad_norm": 5.202672481536865, "learning_rate": 4.540879053327186e-05, "log_odds_chosen": 6.840071201324463, "log_odds_ratio": -0.02362118288874626, "logits/chosen": -0.5052455067634583, "logits/rejected": -0.588655948638916, "logps/chosen": -0.03541530296206474, "logps/rejected": -1.1660518646240234, "loss": 2.545, "nll_loss": 0.6338983774185181, "rewards/accuracies": 1.0, "rewards/chosen": -0.003541530342772603, "rewards/margins": 0.11306366324424744, "rewards/rejected": -0.11660519242286682, "step": 2641 }, { "epoch": 1.8271092669432918, "grad_norm": 6.811013221740723, "learning_rate": 4.5404948516981715e-05, "log_odds_chosen": 6.056200981140137, "log_odds_ratio": -0.16985799372196198, "logits/chosen": -0.2569509744644165, "logits/rejected": -0.31730279326438904, "logps/chosen": -0.04153449460864067, "logps/rejected": -1.42983877658844, "loss": 2.6367, "nll_loss": 0.6422007083892822, "rewards/accuracies": 0.875, "rewards/chosen": -0.0041534495539963245, "rewards/margins": 0.13883042335510254, "rewards/rejected": -0.14298386871814728, "step": 2642 }, { "epoch": 1.8278008298755186, "grad_norm": 7.3355183601379395, "learning_rate": 4.540110650069157e-05, "log_odds_chosen": 5.2649455070495605, "log_odds_ratio": -0.04633000120520592, "logits/chosen": -0.46298545598983765, "logits/rejected": -0.5828496217727661, "logps/chosen": -0.04721803590655327, "logps/rejected": -1.0684432983398438, "loss": 3.6065, "nll_loss": 0.8969941735267639, "rewards/accuracies": 1.0, "rewards/chosen": -0.004721803590655327, "rewards/margins": 0.10212253034114838, "rewards/rejected": -0.10684433579444885, "step": 2643 }, { "epoch": 1.8284923928077457, "grad_norm": 5.123359203338623, "learning_rate": 4.539726448440141e-05, "log_odds_chosen": 7.063897609710693, "log_odds_ratio": -0.01669420301914215, "logits/chosen": -0.5784233212471008, "logits/rejected": -0.5812735557556152, "logps/chosen": -0.012818637304008007, "logps/rejected": -1.1337049007415771, "loss": 2.7851, "nll_loss": 0.6945989727973938, "rewards/accuracies": 1.0, "rewards/chosen": -0.0012818636605516076, "rewards/margins": 0.1120886355638504, "rewards/rejected": -0.11337050795555115, "step": 2644 }, { "epoch": 1.8291839557399725, "grad_norm": 6.171611785888672, "learning_rate": 4.5393422468111266e-05, "log_odds_chosen": 2.9856228828430176, "log_odds_ratio": -0.1682049185037613, "logits/chosen": 0.5519628524780273, "logits/rejected": 0.5516259670257568, "logps/chosen": -0.08549317717552185, "logps/rejected": -0.6905692219734192, "loss": 2.8657, "nll_loss": 0.6996016502380371, "rewards/accuracies": 0.875, "rewards/chosen": -0.008549317717552185, "rewards/margins": 0.060507599264383316, "rewards/rejected": -0.0690569207072258, "step": 2645 }, { "epoch": 1.8298755186721993, "grad_norm": 14.625659942626953, "learning_rate": 4.538958045182112e-05, "log_odds_chosen": 4.857564926147461, "log_odds_ratio": -0.1095946878194809, "logits/chosen": -0.5669194459915161, "logits/rejected": -0.6688473224639893, "logps/chosen": -0.03788517788052559, "logps/rejected": -0.8945537805557251, "loss": 3.2053, "nll_loss": 0.7903628945350647, "rewards/accuracies": 0.875, "rewards/chosen": -0.003788517788052559, "rewards/margins": 0.08566686511039734, "rewards/rejected": -0.08945538103580475, "step": 2646 }, { "epoch": 1.8305670816044262, "grad_norm": 8.835652351379395, "learning_rate": 4.5385738435530964e-05, "log_odds_chosen": 4.281753063201904, "log_odds_ratio": -0.10200349986553192, "logits/chosen": -0.613962709903717, "logits/rejected": -0.6460983157157898, "logps/chosen": -0.03600337356328964, "logps/rejected": -0.9600581526756287, "loss": 3.2928, "nll_loss": 0.8130101561546326, "rewards/accuracies": 1.0, "rewards/chosen": -0.0036003373097628355, "rewards/margins": 0.09240548312664032, "rewards/rejected": -0.09600581973791122, "step": 2647 }, { "epoch": 1.831258644536653, "grad_norm": 12.954113006591797, "learning_rate": 4.538189641924082e-05, "log_odds_chosen": 5.192785739898682, "log_odds_ratio": -0.35543930530548096, "logits/chosen": -0.04655447229743004, "logits/rejected": -0.07551144063472748, "logps/chosen": -0.07405739277601242, "logps/rejected": -1.0524441003799438, "loss": 2.6527, "nll_loss": 0.6276319026947021, "rewards/accuracies": 0.75, "rewards/chosen": -0.007405739277601242, "rewards/margins": 0.09783867746591568, "rewards/rejected": -0.10524441301822662, "step": 2648 }, { "epoch": 1.8319502074688798, "grad_norm": 10.448448181152344, "learning_rate": 4.537805440295067e-05, "log_odds_chosen": 4.813277244567871, "log_odds_ratio": -0.366511732339859, "logits/chosen": -0.32178401947021484, "logits/rejected": -0.36728590726852417, "logps/chosen": -0.08522398769855499, "logps/rejected": -0.9796061515808105, "loss": 3.6681, "nll_loss": 0.8803846836090088, "rewards/accuracies": 0.875, "rewards/chosen": -0.008522399701178074, "rewards/margins": 0.08943821489810944, "rewards/rejected": -0.09796061366796494, "step": 2649 }, { "epoch": 1.8326417704011067, "grad_norm": 13.359732627868652, "learning_rate": 4.537421238666052e-05, "log_odds_chosen": 3.3614258766174316, "log_odds_ratio": -0.47019559144973755, "logits/chosen": -0.3268583416938782, "logits/rejected": -0.2812211811542511, "logps/chosen": -0.12640827894210815, "logps/rejected": -0.7674390077590942, "loss": 3.1545, "nll_loss": 0.7416092157363892, "rewards/accuracies": 0.875, "rewards/chosen": -0.01264082733541727, "rewards/margins": 0.06410308182239532, "rewards/rejected": -0.07674390077590942, "step": 2650 }, { "epoch": 1.8333333333333335, "grad_norm": 6.244953155517578, "learning_rate": 4.5370370370370374e-05, "log_odds_chosen": 6.26675271987915, "log_odds_ratio": -0.0935230702161789, "logits/chosen": -0.4478660225868225, "logits/rejected": -0.4610947072505951, "logps/chosen": -0.054639168083667755, "logps/rejected": -1.0677608251571655, "loss": 3.3856, "nll_loss": 0.8370494842529297, "rewards/accuracies": 1.0, "rewards/chosen": -0.0054639168083667755, "rewards/margins": 0.10131216049194336, "rewards/rejected": -0.10677608847618103, "step": 2651 }, { "epoch": 1.8340248962655603, "grad_norm": 13.164037704467773, "learning_rate": 4.5366528354080226e-05, "log_odds_chosen": 5.182484149932861, "log_odds_ratio": -0.1720438152551651, "logits/chosen": -0.2972196042537689, "logits/rejected": -0.3702899217605591, "logps/chosen": -0.031756024807691574, "logps/rejected": -0.9779322743415833, "loss": 3.9197, "nll_loss": 0.9627140760421753, "rewards/accuracies": 0.875, "rewards/chosen": -0.0031756022945046425, "rewards/margins": 0.09461762011051178, "rewards/rejected": -0.09779322892427444, "step": 2652 }, { "epoch": 1.8347164591977871, "grad_norm": 4.6243977546691895, "learning_rate": 4.536268633779007e-05, "log_odds_chosen": 7.156553745269775, "log_odds_ratio": -0.0021701371297240257, "logits/chosen": -0.4487355947494507, "logits/rejected": -0.41442787647247314, "logps/chosen": -0.01149112731218338, "logps/rejected": -1.3405351638793945, "loss": 3.0436, "nll_loss": 0.7606900334358215, "rewards/accuracies": 1.0, "rewards/chosen": -0.0011491125915199518, "rewards/margins": 0.13290441036224365, "rewards/rejected": -0.13405351340770721, "step": 2653 }, { "epoch": 1.835408022130014, "grad_norm": 8.237411499023438, "learning_rate": 4.5358844321499924e-05, "log_odds_chosen": 3.6853272914886475, "log_odds_ratio": -0.20125997066497803, "logits/chosen": -0.3884178698062897, "logits/rejected": -0.36602675914764404, "logps/chosen": -0.14039038121700287, "logps/rejected": -0.892852246761322, "loss": 3.6356, "nll_loss": 0.8887854218482971, "rewards/accuracies": 1.0, "rewards/chosen": -0.014039037749171257, "rewards/margins": 0.0752461850643158, "rewards/rejected": -0.0892852246761322, "step": 2654 }, { "epoch": 1.8360995850622408, "grad_norm": 10.218503952026367, "learning_rate": 4.535500230520978e-05, "log_odds_chosen": 2.992558717727661, "log_odds_ratio": -0.509008526802063, "logits/chosen": -0.6339020133018494, "logits/rejected": -0.6814983487129211, "logps/chosen": -0.16128496825695038, "logps/rejected": -0.9309603571891785, "loss": 3.3514, "nll_loss": 0.7869572639465332, "rewards/accuracies": 0.75, "rewards/chosen": -0.016128497198224068, "rewards/margins": 0.07696753740310669, "rewards/rejected": -0.09309603273868561, "step": 2655 }, { "epoch": 1.8367911479944676, "grad_norm": 10.936944007873535, "learning_rate": 4.535116028891962e-05, "log_odds_chosen": 5.422421932220459, "log_odds_ratio": -0.5106878876686096, "logits/chosen": -0.40086135268211365, "logits/rejected": -0.37600135803222656, "logps/chosen": -0.036261092871427536, "logps/rejected": -0.7765040397644043, "loss": 2.378, "nll_loss": 0.543430745601654, "rewards/accuracies": 0.875, "rewards/chosen": -0.003626109566539526, "rewards/margins": 0.07402429729700089, "rewards/rejected": -0.07765040546655655, "step": 2656 }, { "epoch": 1.8374827109266945, "grad_norm": 5.705078125, "learning_rate": 4.534731827262948e-05, "log_odds_chosen": 5.254233360290527, "log_odds_ratio": -0.10906915366649628, "logits/chosen": -0.6265988349914551, "logits/rejected": -0.5938752889633179, "logps/chosen": -0.04390117898583412, "logps/rejected": -0.9048900604248047, "loss": 2.7357, "nll_loss": 0.6730280518531799, "rewards/accuracies": 1.0, "rewards/chosen": -0.0043901181779801846, "rewards/margins": 0.08609890192747116, "rewards/rejected": -0.09048901498317719, "step": 2657 }, { "epoch": 1.8381742738589213, "grad_norm": 10.237077713012695, "learning_rate": 4.534347625633933e-05, "log_odds_chosen": 5.0289106369018555, "log_odds_ratio": -0.6908766627311707, "logits/chosen": -0.41462084650993347, "logits/rejected": -0.44479289650917053, "logps/chosen": -0.1477389633655548, "logps/rejected": -0.9825767278671265, "loss": 2.718, "nll_loss": 0.6104055643081665, "rewards/accuracies": 0.875, "rewards/chosen": -0.014773895964026451, "rewards/margins": 0.08348377794027328, "rewards/rejected": -0.09825767576694489, "step": 2658 }, { "epoch": 1.8388658367911481, "grad_norm": 11.822378158569336, "learning_rate": 4.533963424004918e-05, "log_odds_chosen": 3.216097831726074, "log_odds_ratio": -0.7136033177375793, "logits/chosen": -0.7236344814300537, "logits/rejected": -0.7421097755432129, "logps/chosen": -0.15638959407806396, "logps/rejected": -0.9404903054237366, "loss": 3.3851, "nll_loss": 0.7749228477478027, "rewards/accuracies": 0.75, "rewards/chosen": -0.015638958662748337, "rewards/margins": 0.0784100666642189, "rewards/rejected": -0.09404902905225754, "step": 2659 }, { "epoch": 1.839557399723375, "grad_norm": 7.603041648864746, "learning_rate": 4.533579222375903e-05, "log_odds_chosen": 5.358530044555664, "log_odds_ratio": -0.1660924255847931, "logits/chosen": -0.4428621530532837, "logits/rejected": -0.44063064455986023, "logps/chosen": -0.028198856860399246, "logps/rejected": -1.0400341749191284, "loss": 2.7174, "nll_loss": 0.6627500057220459, "rewards/accuracies": 0.875, "rewards/chosen": -0.0028198854997754097, "rewards/margins": 0.10118352621793747, "rewards/rejected": -0.1040034145116806, "step": 2660 }, { "epoch": 1.8402489626556018, "grad_norm": 5.782895565032959, "learning_rate": 4.5331950207468885e-05, "log_odds_chosen": 3.479762077331543, "log_odds_ratio": -0.47276097536087036, "logits/chosen": -0.4127695560455322, "logits/rejected": -0.46083658933639526, "logps/chosen": -0.1779659390449524, "logps/rejected": -0.8920935392379761, "loss": 2.6048, "nll_loss": 0.6039206981658936, "rewards/accuracies": 0.75, "rewards/chosen": -0.01779659278690815, "rewards/margins": 0.07141275703907013, "rewards/rejected": -0.08920934796333313, "step": 2661 }, { "epoch": 1.8409405255878286, "grad_norm": 12.407095909118652, "learning_rate": 4.532810819117873e-05, "log_odds_chosen": 2.9915771484375, "log_odds_ratio": -0.8544542789459229, "logits/chosen": -0.21043117344379425, "logits/rejected": -0.21721884608268738, "logps/chosen": -0.07011416554450989, "logps/rejected": -0.6056758165359497, "loss": 4.0644, "nll_loss": 0.9306578636169434, "rewards/accuracies": 0.75, "rewards/chosen": -0.007011416368186474, "rewards/margins": 0.0535561665892601, "rewards/rejected": -0.06056758388876915, "step": 2662 }, { "epoch": 1.8416320885200554, "grad_norm": 8.926106452941895, "learning_rate": 4.532426617488858e-05, "log_odds_chosen": 5.799577713012695, "log_odds_ratio": -0.032613616436719894, "logits/chosen": -0.6197344064712524, "logits/rejected": -0.6875401735305786, "logps/chosen": -0.022176966071128845, "logps/rejected": -0.9513285160064697, "loss": 2.8049, "nll_loss": 0.6979745626449585, "rewards/accuracies": 1.0, "rewards/chosen": -0.002217696513980627, "rewards/margins": 0.09291516244411469, "rewards/rejected": -0.09513285756111145, "step": 2663 }, { "epoch": 1.8423236514522823, "grad_norm": 8.99937629699707, "learning_rate": 4.5320424158598435e-05, "log_odds_chosen": 5.625021934509277, "log_odds_ratio": -0.24571014940738678, "logits/chosen": -0.10770852863788605, "logits/rejected": -0.18785730004310608, "logps/chosen": -0.06675803661346436, "logps/rejected": -1.043717861175537, "loss": 3.7772, "nll_loss": 0.9197167754173279, "rewards/accuracies": 0.875, "rewards/chosen": -0.006675804033875465, "rewards/margins": 0.0976959839463234, "rewards/rejected": -0.10437178611755371, "step": 2664 }, { "epoch": 1.843015214384509, "grad_norm": 8.712796211242676, "learning_rate": 4.531658214230828e-05, "log_odds_chosen": 4.1482439041137695, "log_odds_ratio": -0.194962278008461, "logits/chosen": -0.21983416378498077, "logits/rejected": -0.23658618330955505, "logps/chosen": -0.03656216338276863, "logps/rejected": -0.6330381631851196, "loss": 3.3435, "nll_loss": 0.816391110420227, "rewards/accuracies": 0.875, "rewards/chosen": -0.003656216664239764, "rewards/margins": 0.05964760482311249, "rewards/rejected": -0.06330382078886032, "step": 2665 }, { "epoch": 1.843706777316736, "grad_norm": 7.394927501678467, "learning_rate": 4.531274012601814e-05, "log_odds_chosen": 3.453693151473999, "log_odds_ratio": -0.24411636590957642, "logits/chosen": -0.3464747369289398, "logits/rejected": -0.4191704988479614, "logps/chosen": -0.10869094729423523, "logps/rejected": -1.0189597606658936, "loss": 2.4536, "nll_loss": 0.5889769196510315, "rewards/accuracies": 0.75, "rewards/chosen": -0.010869094170629978, "rewards/margins": 0.09102687984704971, "rewards/rejected": -0.10189597308635712, "step": 2666 }, { "epoch": 1.8443983402489628, "grad_norm": 4.406402587890625, "learning_rate": 4.5308898109727986e-05, "log_odds_chosen": 3.872445583343506, "log_odds_ratio": -0.24112261831760406, "logits/chosen": -0.3680941164493561, "logits/rejected": -0.3593483567237854, "logps/chosen": -0.08817479014396667, "logps/rejected": -0.6911728978157043, "loss": 3.0122, "nll_loss": 0.7289446592330933, "rewards/accuracies": 0.875, "rewards/chosen": -0.008817479014396667, "rewards/margins": 0.06029981002211571, "rewards/rejected": -0.06911729276180267, "step": 2667 }, { "epoch": 1.8450899031811896, "grad_norm": 6.851699352264404, "learning_rate": 4.530505609343784e-05, "log_odds_chosen": 4.621401309967041, "log_odds_ratio": -0.12672731280326843, "logits/chosen": -0.005862422287464142, "logits/rejected": -0.0212232768535614, "logps/chosen": -0.13591837882995605, "logps/rejected": -1.1643211841583252, "loss": 3.3122, "nll_loss": 0.8153849840164185, "rewards/accuracies": 1.0, "rewards/chosen": -0.013591839000582695, "rewards/margins": 0.10284027457237244, "rewards/rejected": -0.11643211543560028, "step": 2668 }, { "epoch": 1.8457814661134164, "grad_norm": 4.551556587219238, "learning_rate": 4.530121407714769e-05, "log_odds_chosen": 7.317740440368652, "log_odds_ratio": -0.017265843227505684, "logits/chosen": -0.6083251237869263, "logits/rejected": -0.5865840315818787, "logps/chosen": -0.02332671359181404, "logps/rejected": -1.1472117900848389, "loss": 3.5558, "nll_loss": 0.8872328996658325, "rewards/accuracies": 1.0, "rewards/chosen": -0.002332671545445919, "rewards/margins": 0.11238852143287659, "rewards/rejected": -0.11472119390964508, "step": 2669 }, { "epoch": 1.8464730290456433, "grad_norm": 11.547624588012695, "learning_rate": 4.529737206085754e-05, "log_odds_chosen": 2.723951816558838, "log_odds_ratio": -0.5727928876876831, "logits/chosen": -0.4375419318675995, "logits/rejected": -0.5310130715370178, "logps/chosen": -0.1666826605796814, "logps/rejected": -0.9059403538703918, "loss": 4.0289, "nll_loss": 0.9499408006668091, "rewards/accuracies": 0.875, "rewards/chosen": -0.01666826754808426, "rewards/margins": 0.07392577081918716, "rewards/rejected": -0.09059403836727142, "step": 2670 }, { "epoch": 1.84716459197787, "grad_norm": 5.87103271484375, "learning_rate": 4.529353004456739e-05, "log_odds_chosen": 3.784372568130493, "log_odds_ratio": -0.09613749384880066, "logits/chosen": -0.45019257068634033, "logits/rejected": -0.4648653268814087, "logps/chosen": -0.09225141257047653, "logps/rejected": -1.0031421184539795, "loss": 2.7772, "nll_loss": 0.684694766998291, "rewards/accuracies": 1.0, "rewards/chosen": -0.009225141257047653, "rewards/margins": 0.09108906984329224, "rewards/rejected": -0.10031421482563019, "step": 2671 }, { "epoch": 1.847856154910097, "grad_norm": 8.487081527709961, "learning_rate": 4.528968802827724e-05, "log_odds_chosen": 3.1780614852905273, "log_odds_ratio": -0.18778465688228607, "logits/chosen": -0.3719137907028198, "logits/rejected": -0.35996168851852417, "logps/chosen": -0.08501023054122925, "logps/rejected": -0.5380250215530396, "loss": 3.2701, "nll_loss": 0.7987452149391174, "rewards/accuracies": 0.875, "rewards/chosen": -0.008501023054122925, "rewards/margins": 0.04530148208141327, "rewards/rejected": -0.053802505135536194, "step": 2672 }, { "epoch": 1.8485477178423237, "grad_norm": 7.312365531921387, "learning_rate": 4.5285846011987094e-05, "log_odds_chosen": 4.829049587249756, "log_odds_ratio": -0.06223570927977562, "logits/chosen": -0.1999543011188507, "logits/rejected": -0.266481876373291, "logps/chosen": -0.030306056141853333, "logps/rejected": -0.9615808725357056, "loss": 3.1486, "nll_loss": 0.7809286117553711, "rewards/accuracies": 1.0, "rewards/chosen": -0.003030605847015977, "rewards/margins": 0.09312748908996582, "rewards/rejected": -0.09615809470415115, "step": 2673 }, { "epoch": 1.8492392807745506, "grad_norm": 7.50120210647583, "learning_rate": 4.528200399569694e-05, "log_odds_chosen": 3.278000593185425, "log_odds_ratio": -0.16125383973121643, "logits/chosen": 0.01486283540725708, "logits/rejected": -0.009005546569824219, "logps/chosen": -0.06412653625011444, "logps/rejected": -0.784261167049408, "loss": 2.7824, "nll_loss": 0.6794755458831787, "rewards/accuracies": 0.875, "rewards/chosen": -0.0064126537181437016, "rewards/margins": 0.07201346755027771, "rewards/rejected": -0.07842611521482468, "step": 2674 }, { "epoch": 1.8499308437067774, "grad_norm": 6.451777935028076, "learning_rate": 4.52781619794068e-05, "log_odds_chosen": 4.411476135253906, "log_odds_ratio": -0.10007528215646744, "logits/chosen": -0.1524033546447754, "logits/rejected": -0.1610099822282791, "logps/chosen": -0.02451336942613125, "logps/rejected": -0.7017672061920166, "loss": 2.7949, "nll_loss": 0.6887252330780029, "rewards/accuracies": 1.0, "rewards/chosen": -0.002451336942613125, "rewards/margins": 0.06772539019584656, "rewards/rejected": -0.07017672061920166, "step": 2675 }, { "epoch": 1.8506224066390042, "grad_norm": 7.678047180175781, "learning_rate": 4.5274319963116644e-05, "log_odds_chosen": 5.969219207763672, "log_odds_ratio": -0.016140181571245193, "logits/chosen": -0.49597981572151184, "logits/rejected": -0.5496968030929565, "logps/chosen": -0.0319584384560585, "logps/rejected": -1.0520793199539185, "loss": 3.7072, "nll_loss": 0.9251886606216431, "rewards/accuracies": 1.0, "rewards/chosen": -0.0031958436593413353, "rewards/margins": 0.10201209783554077, "rewards/rejected": -0.10520793497562408, "step": 2676 }, { "epoch": 1.851313969571231, "grad_norm": 9.525699615478516, "learning_rate": 4.5270477946826497e-05, "log_odds_chosen": 4.6154656410217285, "log_odds_ratio": -0.23475691676139832, "logits/chosen": -0.7846601009368896, "logits/rejected": -0.7942330241203308, "logps/chosen": -0.0763910710811615, "logps/rejected": -0.8175442218780518, "loss": 3.1672, "nll_loss": 0.768312394618988, "rewards/accuracies": 0.875, "rewards/chosen": -0.007639107294380665, "rewards/margins": 0.0741153210401535, "rewards/rejected": -0.0817544236779213, "step": 2677 }, { "epoch": 1.852005532503458, "grad_norm": 6.886621475219727, "learning_rate": 4.526663593053635e-05, "log_odds_chosen": 3.9328413009643555, "log_odds_ratio": -0.31209760904312134, "logits/chosen": -0.5099629163742065, "logits/rejected": -0.518309473991394, "logps/chosen": -0.11288018524646759, "logps/rejected": -0.6504040360450745, "loss": 3.7584, "nll_loss": 0.9083778262138367, "rewards/accuracies": 0.75, "rewards/chosen": -0.011288018897175789, "rewards/margins": 0.053752392530441284, "rewards/rejected": -0.06504040956497192, "step": 2678 }, { "epoch": 1.8526970954356847, "grad_norm": 8.468035697937012, "learning_rate": 4.52627939142462e-05, "log_odds_chosen": 3.892465353012085, "log_odds_ratio": -0.24962744116783142, "logits/chosen": -0.44910019636154175, "logits/rejected": -0.4465304911136627, "logps/chosen": -0.09325937926769257, "logps/rejected": -0.5119410157203674, "loss": 3.5815, "nll_loss": 0.8704084753990173, "rewards/accuracies": 0.875, "rewards/chosen": -0.009325938299298286, "rewards/margins": 0.041868165135383606, "rewards/rejected": -0.05119410157203674, "step": 2679 }, { "epoch": 1.8533886583679116, "grad_norm": 7.894447326660156, "learning_rate": 4.525895189795605e-05, "log_odds_chosen": 5.502786636352539, "log_odds_ratio": -0.026815906167030334, "logits/chosen": -0.44511091709136963, "logits/rejected": -0.5357626676559448, "logps/chosen": -0.02842571772634983, "logps/rejected": -0.8142504096031189, "loss": 4.3125, "nll_loss": 1.0754499435424805, "rewards/accuracies": 1.0, "rewards/chosen": -0.002842571819201112, "rewards/margins": 0.07858247309923172, "rewards/rejected": -0.08142504841089249, "step": 2680 }, { "epoch": 1.8540802213001384, "grad_norm": 11.441069602966309, "learning_rate": 4.52551098816659e-05, "log_odds_chosen": 3.513641357421875, "log_odds_ratio": -0.535304605960846, "logits/chosen": -0.5716827511787415, "logits/rejected": -0.6022769808769226, "logps/chosen": -0.07979574799537659, "logps/rejected": -0.7356309294700623, "loss": 3.3414, "nll_loss": 0.7818148136138916, "rewards/accuracies": 0.75, "rewards/chosen": -0.007979575544595718, "rewards/margins": 0.06558351963758469, "rewards/rejected": -0.0735630989074707, "step": 2681 }, { "epoch": 1.8547717842323652, "grad_norm": 6.630998611450195, "learning_rate": 4.525126786537575e-05, "log_odds_chosen": 6.117326736450195, "log_odds_ratio": -0.03048916533589363, "logits/chosen": -0.736469030380249, "logits/rejected": -0.76263427734375, "logps/chosen": -0.024096038192510605, "logps/rejected": -1.0225584506988525, "loss": 2.7666, "nll_loss": 0.6885951161384583, "rewards/accuracies": 1.0, "rewards/chosen": -0.0024096036795526743, "rewards/margins": 0.0998462438583374, "rewards/rejected": -0.10225585103034973, "step": 2682 }, { "epoch": 1.855463347164592, "grad_norm": 7.200371742248535, "learning_rate": 4.52474258490856e-05, "log_odds_chosen": 5.509161949157715, "log_odds_ratio": -0.02707480452954769, "logits/chosen": -0.5912322402000427, "logits/rejected": -0.6191037893295288, "logps/chosen": -0.02199496328830719, "logps/rejected": -1.120031714439392, "loss": 3.2204, "nll_loss": 0.8023803234100342, "rewards/accuracies": 1.0, "rewards/chosen": -0.0021994966082274914, "rewards/margins": 0.10980367660522461, "rewards/rejected": -0.11200316995382309, "step": 2683 }, { "epoch": 1.8561549100968189, "grad_norm": 7.9807939529418945, "learning_rate": 4.524358383279546e-05, "log_odds_chosen": 4.948763370513916, "log_odds_ratio": -0.03615286201238632, "logits/chosen": -0.10776793956756592, "logits/rejected": -0.14994102716445923, "logps/chosen": -0.05639262869954109, "logps/rejected": -1.0054923295974731, "loss": 3.5715, "nll_loss": 0.8892561197280884, "rewards/accuracies": 1.0, "rewards/chosen": -0.005639263428747654, "rewards/margins": 0.09490996599197388, "rewards/rejected": -0.10054922848939896, "step": 2684 }, { "epoch": 1.8568464730290457, "grad_norm": 10.224892616271973, "learning_rate": 4.52397418165053e-05, "log_odds_chosen": 7.033175468444824, "log_odds_ratio": -0.005409691948443651, "logits/chosen": -0.5313670039176941, "logits/rejected": -0.5757410526275635, "logps/chosen": -0.01673893816769123, "logps/rejected": -1.2961117029190063, "loss": 4.4357, "nll_loss": 1.108377456665039, "rewards/accuracies": 1.0, "rewards/chosen": -0.0016738938866183162, "rewards/margins": 0.12793727219104767, "rewards/rejected": -0.12961116433143616, "step": 2685 }, { "epoch": 1.8575380359612725, "grad_norm": 4.859482765197754, "learning_rate": 4.5235899800215155e-05, "log_odds_chosen": 3.9110684394836426, "log_odds_ratio": -0.108709916472435, "logits/chosen": -0.7197506427764893, "logits/rejected": -0.7499377727508545, "logps/chosen": -0.039227042347192764, "logps/rejected": -0.5742328763008118, "loss": 4.3951, "nll_loss": 1.0879111289978027, "rewards/accuracies": 1.0, "rewards/chosen": -0.003922704607248306, "rewards/margins": 0.05350058153271675, "rewards/rejected": -0.05742328613996506, "step": 2686 }, { "epoch": 1.8582295988934994, "grad_norm": 5.944377422332764, "learning_rate": 4.523205778392501e-05, "log_odds_chosen": 5.475446701049805, "log_odds_ratio": -0.0907863900065422, "logits/chosen": -0.10576437413692474, "logits/rejected": -0.1509627252817154, "logps/chosen": -0.04343018680810928, "logps/rejected": -0.7776180505752563, "loss": 2.6183, "nll_loss": 0.6454888582229614, "rewards/accuracies": 1.0, "rewards/chosen": -0.004343018867075443, "rewards/margins": 0.07341878116130829, "rewards/rejected": -0.07776179909706116, "step": 2687 }, { "epoch": 1.8589211618257262, "grad_norm": 7.269049644470215, "learning_rate": 4.522821576763486e-05, "log_odds_chosen": 5.872060775756836, "log_odds_ratio": -0.04043078050017357, "logits/chosen": -0.6755918860435486, "logits/rejected": -0.7131515741348267, "logps/chosen": -0.03322390094399452, "logps/rejected": -0.8359825015068054, "loss": 2.8901, "nll_loss": 0.7184814214706421, "rewards/accuracies": 1.0, "rewards/chosen": -0.0033223903737962246, "rewards/margins": 0.08027586340904236, "rewards/rejected": -0.08359825611114502, "step": 2688 }, { "epoch": 1.859612724757953, "grad_norm": 11.413594245910645, "learning_rate": 4.5224373751344706e-05, "log_odds_chosen": 3.7279882431030273, "log_odds_ratio": -0.544560432434082, "logits/chosen": -0.646683931350708, "logits/rejected": -0.6721376180648804, "logps/chosen": -0.10949568450450897, "logps/rejected": -0.7356846332550049, "loss": 3.597, "nll_loss": 0.8447965979576111, "rewards/accuracies": 0.75, "rewards/chosen": -0.010949568822979927, "rewards/margins": 0.06261889636516571, "rewards/rejected": -0.07356846332550049, "step": 2689 }, { "epoch": 1.8603042876901799, "grad_norm": 11.228747367858887, "learning_rate": 4.522053173505456e-05, "log_odds_chosen": 4.0871171951293945, "log_odds_ratio": -0.22871485352516174, "logits/chosen": -0.6205450296401978, "logits/rejected": -0.6625868082046509, "logps/chosen": -0.04733623191714287, "logps/rejected": -0.8056012988090515, "loss": 3.3682, "nll_loss": 0.8191684484481812, "rewards/accuracies": 0.875, "rewards/chosen": -0.004733623005449772, "rewards/margins": 0.07582651823759079, "rewards/rejected": -0.08056013286113739, "step": 2690 }, { "epoch": 1.8609958506224067, "grad_norm": 6.608537197113037, "learning_rate": 4.521668971876441e-05, "log_odds_chosen": 5.136848449707031, "log_odds_ratio": -0.12134000658988953, "logits/chosen": -0.5894830822944641, "logits/rejected": -0.5903124809265137, "logps/chosen": -0.03736359626054764, "logps/rejected": -0.9877703785896301, "loss": 3.2075, "nll_loss": 0.7897449135780334, "rewards/accuracies": 1.0, "rewards/chosen": -0.0037363599985837936, "rewards/margins": 0.09504067152738571, "rewards/rejected": -0.09877702593803406, "step": 2691 }, { "epoch": 1.8616874135546335, "grad_norm": 9.172867774963379, "learning_rate": 4.5212847702474256e-05, "log_odds_chosen": 4.03933048248291, "log_odds_ratio": -0.07970944046974182, "logits/chosen": -0.6346632242202759, "logits/rejected": -0.6845720410346985, "logps/chosen": -0.05896829068660736, "logps/rejected": -0.7044997811317444, "loss": 4.1825, "nll_loss": 1.0376558303833008, "rewards/accuracies": 1.0, "rewards/chosen": -0.005896829068660736, "rewards/margins": 0.0645531564950943, "rewards/rejected": -0.07044997811317444, "step": 2692 }, { "epoch": 1.8623789764868603, "grad_norm": 10.028759002685547, "learning_rate": 4.5209005686184115e-05, "log_odds_chosen": 5.194214344024658, "log_odds_ratio": -0.17836083471775055, "logits/chosen": -0.29558470845222473, "logits/rejected": -0.3286222815513611, "logps/chosen": -0.04205503687262535, "logps/rejected": -1.0484198331832886, "loss": 3.2651, "nll_loss": 0.7984365820884705, "rewards/accuracies": 0.875, "rewards/chosen": -0.00420550350099802, "rewards/margins": 0.10063648223876953, "rewards/rejected": -0.10484198480844498, "step": 2693 }, { "epoch": 1.8630705394190872, "grad_norm": 7.484879970550537, "learning_rate": 4.520516366989396e-05, "log_odds_chosen": 5.171629905700684, "log_odds_ratio": -0.17990508675575256, "logits/chosen": -0.5625219941139221, "logits/rejected": -0.5617840886116028, "logps/chosen": -0.1852400302886963, "logps/rejected": -0.7624889016151428, "loss": 2.7943, "nll_loss": 0.680594265460968, "rewards/accuracies": 0.875, "rewards/chosen": -0.01852400228381157, "rewards/margins": 0.05772489309310913, "rewards/rejected": -0.076248899102211, "step": 2694 }, { "epoch": 1.863762102351314, "grad_norm": 6.669751167297363, "learning_rate": 4.5201321653603813e-05, "log_odds_chosen": 4.229768753051758, "log_odds_ratio": -0.16075065732002258, "logits/chosen": -0.8317698240280151, "logits/rejected": -0.863510012626648, "logps/chosen": -0.0615994967520237, "logps/rejected": -0.6593600511550903, "loss": 2.3906, "nll_loss": 0.5815827250480652, "rewards/accuracies": 0.875, "rewards/chosen": -0.0061599500477313995, "rewards/margins": 0.05977606028318405, "rewards/rejected": -0.06593600660562515, "step": 2695 }, { "epoch": 1.8644536652835408, "grad_norm": 7.034972667694092, "learning_rate": 4.5197479637313666e-05, "log_odds_chosen": 3.354255199432373, "log_odds_ratio": -0.3019982576370239, "logits/chosen": -0.7325828075408936, "logits/rejected": -0.7738179564476013, "logps/chosen": -0.07212116569280624, "logps/rejected": -0.6469085812568665, "loss": 2.8238, "nll_loss": 0.6757556200027466, "rewards/accuracies": 0.875, "rewards/chosen": -0.0072121163830161095, "rewards/margins": 0.05747874453663826, "rewards/rejected": -0.06469085812568665, "step": 2696 }, { "epoch": 1.8651452282157677, "grad_norm": 7.403872966766357, "learning_rate": 4.519363762102352e-05, "log_odds_chosen": 2.895770788192749, "log_odds_ratio": -0.36901670694351196, "logits/chosen": -0.5260494947433472, "logits/rejected": -0.5574390292167664, "logps/chosen": -0.07369774580001831, "logps/rejected": -0.5763075351715088, "loss": 3.0012, "nll_loss": 0.7134076356887817, "rewards/accuracies": 0.75, "rewards/chosen": -0.007369774393737316, "rewards/margins": 0.050260983407497406, "rewards/rejected": -0.0576307512819767, "step": 2697 }, { "epoch": 1.8658367911479945, "grad_norm": 6.005950450897217, "learning_rate": 4.5189795604733364e-05, "log_odds_chosen": 5.62225341796875, "log_odds_ratio": -0.02279970608651638, "logits/chosen": -0.6721312403678894, "logits/rejected": -0.6759477853775024, "logps/chosen": -0.012295754626393318, "logps/rejected": -0.8416109085083008, "loss": 3.2932, "nll_loss": 0.821026623249054, "rewards/accuracies": 1.0, "rewards/chosen": -0.001229575602337718, "rewards/margins": 0.0829315185546875, "rewards/rejected": -0.08416110277175903, "step": 2698 }, { "epoch": 1.8665283540802213, "grad_norm": 6.691189765930176, "learning_rate": 4.5185953588443216e-05, "log_odds_chosen": 5.296922206878662, "log_odds_ratio": -0.056247636675834656, "logits/chosen": -0.7565345168113708, "logits/rejected": -0.8415186405181885, "logps/chosen": -0.04300527274608612, "logps/rejected": -1.137931227684021, "loss": 2.1701, "nll_loss": 0.5368894338607788, "rewards/accuracies": 1.0, "rewards/chosen": -0.004300527274608612, "rewards/margins": 0.10949259996414185, "rewards/rejected": -0.11379312723875046, "step": 2699 }, { "epoch": 1.8672199170124482, "grad_norm": 6.369565010070801, "learning_rate": 4.518211157215307e-05, "log_odds_chosen": 4.468364715576172, "log_odds_ratio": -0.17497983574867249, "logits/chosen": -0.8825744390487671, "logits/rejected": -0.9054487943649292, "logps/chosen": -0.06848480552434921, "logps/rejected": -0.7379632592201233, "loss": 4.083, "nll_loss": 1.0032566785812378, "rewards/accuracies": 1.0, "rewards/chosen": -0.006848481018096209, "rewards/margins": 0.06694784760475159, "rewards/rejected": -0.07379632443189621, "step": 2700 }, { "epoch": 1.867911479944675, "grad_norm": 6.025998592376709, "learning_rate": 4.5178269555862915e-05, "log_odds_chosen": 4.842930793762207, "log_odds_ratio": -0.2740570306777954, "logits/chosen": -0.7599248290061951, "logits/rejected": -0.802500307559967, "logps/chosen": -0.04903910309076309, "logps/rejected": -0.7304023504257202, "loss": 2.8573, "nll_loss": 0.6869195699691772, "rewards/accuracies": 0.75, "rewards/chosen": -0.0049039097502827644, "rewards/margins": 0.06813632696866989, "rewards/rejected": -0.07304023206233978, "step": 2701 }, { "epoch": 1.8686030428769018, "grad_norm": 5.929429054260254, "learning_rate": 4.5174427539572774e-05, "log_odds_chosen": 5.073709487915039, "log_odds_ratio": -0.3314833641052246, "logits/chosen": -0.43065565824508667, "logits/rejected": -0.45779237151145935, "logps/chosen": -0.0539717972278595, "logps/rejected": -0.6632078886032104, "loss": 2.8182, "nll_loss": 0.6714061498641968, "rewards/accuracies": 0.875, "rewards/chosen": -0.0053971800953149796, "rewards/margins": 0.060923609882593155, "rewards/rejected": -0.06632079184055328, "step": 2702 }, { "epoch": 1.8692946058091287, "grad_norm": 6.13959264755249, "learning_rate": 4.517058552328262e-05, "log_odds_chosen": 3.4868664741516113, "log_odds_ratio": -0.09228986501693726, "logits/chosen": -0.3578067421913147, "logits/rejected": -0.41644516587257385, "logps/chosen": -0.033023469150066376, "logps/rejected": -0.4936770796775818, "loss": 2.4884, "nll_loss": 0.6128824353218079, "rewards/accuracies": 1.0, "rewards/chosen": -0.0033023469150066376, "rewards/margins": 0.04606536030769348, "rewards/rejected": -0.04936770722270012, "step": 2703 }, { "epoch": 1.8699861687413555, "grad_norm": 13.459339141845703, "learning_rate": 4.516674350699247e-05, "log_odds_chosen": 3.097822666168213, "log_odds_ratio": -0.5419131517410278, "logits/chosen": -0.8968651294708252, "logits/rejected": -0.9812121987342834, "logps/chosen": -0.12201026827096939, "logps/rejected": -0.9365118145942688, "loss": 3.0374, "nll_loss": 0.705159068107605, "rewards/accuracies": 0.75, "rewards/chosen": -0.012201027013361454, "rewards/margins": 0.08145016431808472, "rewards/rejected": -0.0936511904001236, "step": 2704 }, { "epoch": 1.8706777316735823, "grad_norm": 8.480863571166992, "learning_rate": 4.5162901490702324e-05, "log_odds_chosen": 4.1900553703308105, "log_odds_ratio": -0.059285108000040054, "logits/chosen": -0.4107376039028168, "logits/rejected": -0.4237530827522278, "logps/chosen": -0.03784055635333061, "logps/rejected": -0.7297183275222778, "loss": 3.4364, "nll_loss": 0.8531801104545593, "rewards/accuracies": 1.0, "rewards/chosen": -0.003784055821597576, "rewards/margins": 0.06918777525424957, "rewards/rejected": -0.07297183573246002, "step": 2705 }, { "epoch": 1.8713692946058091, "grad_norm": 6.559045791625977, "learning_rate": 4.515905947441218e-05, "log_odds_chosen": 4.857430458068848, "log_odds_ratio": -0.05420558527112007, "logits/chosen": -0.28830158710479736, "logits/rejected": -0.29584336280822754, "logps/chosen": -0.07617554068565369, "logps/rejected": -0.926051139831543, "loss": 2.2317, "nll_loss": 0.552515983581543, "rewards/accuracies": 1.0, "rewards/chosen": -0.007617554161697626, "rewards/margins": 0.08498755842447281, "rewards/rejected": -0.0926051139831543, "step": 2706 }, { "epoch": 1.872060857538036, "grad_norm": 5.751916885375977, "learning_rate": 4.515521745812202e-05, "log_odds_chosen": 3.4366352558135986, "log_odds_ratio": -0.453977108001709, "logits/chosen": -0.28560858964920044, "logits/rejected": -0.27950069308280945, "logps/chosen": -0.07851672917604446, "logps/rejected": -0.6064249873161316, "loss": 2.5579, "nll_loss": 0.5940800905227661, "rewards/accuracies": 0.625, "rewards/chosen": -0.007851672358810902, "rewards/margins": 0.05279082432389259, "rewards/rejected": -0.06064249947667122, "step": 2707 }, { "epoch": 1.8727524204702628, "grad_norm": 8.47996997833252, "learning_rate": 4.5151375441831875e-05, "log_odds_chosen": 3.9178547859191895, "log_odds_ratio": -0.09079738706350327, "logits/chosen": -0.5651100873947144, "logits/rejected": -0.6518626809120178, "logps/chosen": -0.03847382590174675, "logps/rejected": -0.7573758363723755, "loss": 3.5533, "nll_loss": 0.8792436122894287, "rewards/accuracies": 1.0, "rewards/chosen": -0.0038473824970424175, "rewards/margins": 0.0718902051448822, "rewards/rejected": -0.07573758810758591, "step": 2708 }, { "epoch": 1.8734439834024896, "grad_norm": 18.4063663482666, "learning_rate": 4.514753342554173e-05, "log_odds_chosen": 3.6568000316619873, "log_odds_ratio": -0.2200811207294464, "logits/chosen": -0.38470497727394104, "logits/rejected": -0.34699082374572754, "logps/chosen": -0.04677264019846916, "logps/rejected": -0.6297637820243835, "loss": 2.9132, "nll_loss": 0.7062985897064209, "rewards/accuracies": 0.875, "rewards/chosen": -0.004677264019846916, "rewards/margins": 0.05829911679029465, "rewards/rejected": -0.06297638267278671, "step": 2709 }, { "epoch": 1.8741355463347165, "grad_norm": 19.251474380493164, "learning_rate": 4.514369140925157e-05, "log_odds_chosen": 2.494581460952759, "log_odds_ratio": -0.42333984375, "logits/chosen": -0.7448416948318481, "logits/rejected": -0.7299750447273254, "logps/chosen": -0.08245445042848587, "logps/rejected": -0.4006720781326294, "loss": 2.5965, "nll_loss": 0.6068035364151001, "rewards/accuracies": 0.625, "rewards/chosen": -0.008245445787906647, "rewards/margins": 0.03182176128029823, "rewards/rejected": -0.04006720334291458, "step": 2710 }, { "epoch": 1.8748271092669433, "grad_norm": 4.293595314025879, "learning_rate": 4.513984939296143e-05, "log_odds_chosen": 6.068905830383301, "log_odds_ratio": -0.04996497184038162, "logits/chosen": -0.5838699340820312, "logits/rejected": -0.6190752983093262, "logps/chosen": -0.00511554628610611, "logps/rejected": -0.6605459451675415, "loss": 2.7534, "nll_loss": 0.6833570003509521, "rewards/accuracies": 1.0, "rewards/chosen": -0.0005115546518936753, "rewards/margins": 0.0655430406332016, "rewards/rejected": -0.06605459749698639, "step": 2711 }, { "epoch": 1.8755186721991701, "grad_norm": 8.421035766601562, "learning_rate": 4.513600737667128e-05, "log_odds_chosen": 4.47832727432251, "log_odds_ratio": -0.16189125180244446, "logits/chosen": -0.5043261051177979, "logits/rejected": -0.5526118278503418, "logps/chosen": -0.07159212976694107, "logps/rejected": -1.062654733657837, "loss": 3.0325, "nll_loss": 0.7419383525848389, "rewards/accuracies": 1.0, "rewards/chosen": -0.007159212604165077, "rewards/margins": 0.09910625964403152, "rewards/rejected": -0.10626547038555145, "step": 2712 }, { "epoch": 1.876210235131397, "grad_norm": 12.252899169921875, "learning_rate": 4.513216536038113e-05, "log_odds_chosen": 2.638622999191284, "log_odds_ratio": -0.30449140071868896, "logits/chosen": -0.6102049350738525, "logits/rejected": -0.5791282057762146, "logps/chosen": -0.11507735401391983, "logps/rejected": -0.7121509909629822, "loss": 3.5219, "nll_loss": 0.8500288724899292, "rewards/accuracies": 0.875, "rewards/chosen": -0.011507734656333923, "rewards/margins": 0.059707362204790115, "rewards/rejected": -0.07121509313583374, "step": 2713 }, { "epoch": 1.8769017980636238, "grad_norm": 5.714953422546387, "learning_rate": 4.512832334409098e-05, "log_odds_chosen": 4.1826934814453125, "log_odds_ratio": -0.1705974042415619, "logits/chosen": -0.7607474327087402, "logits/rejected": -0.7724503874778748, "logps/chosen": -0.04644298925995827, "logps/rejected": -0.7300114631652832, "loss": 3.2861, "nll_loss": 0.8044597506523132, "rewards/accuracies": 0.875, "rewards/chosen": -0.004644298925995827, "rewards/margins": 0.06835684180259705, "rewards/rejected": -0.07300114631652832, "step": 2714 }, { "epoch": 1.8775933609958506, "grad_norm": 8.1884126663208, "learning_rate": 4.5124481327800835e-05, "log_odds_chosen": 5.894617080688477, "log_odds_ratio": -0.07726238667964935, "logits/chosen": -0.43670010566711426, "logits/rejected": -0.5112190246582031, "logps/chosen": -0.03426039591431618, "logps/rejected": -0.8254495859146118, "loss": 3.5076, "nll_loss": 0.8691853284835815, "rewards/accuracies": 1.0, "rewards/chosen": -0.0034260395914316177, "rewards/margins": 0.07911892235279083, "rewards/rejected": -0.0825449675321579, "step": 2715 }, { "epoch": 1.8782849239280774, "grad_norm": 7.814009189605713, "learning_rate": 4.512063931151068e-05, "log_odds_chosen": 4.36599063873291, "log_odds_ratio": -0.14571961760520935, "logits/chosen": -0.588714063167572, "logits/rejected": -0.7005572319030762, "logps/chosen": -0.032498858869075775, "logps/rejected": -0.8260935544967651, "loss": 3.3757, "nll_loss": 0.829341471195221, "rewards/accuracies": 1.0, "rewards/chosen": -0.003249885980039835, "rewards/margins": 0.07935947179794312, "rewards/rejected": -0.08260935544967651, "step": 2716 }, { "epoch": 1.8789764868603043, "grad_norm": 6.06086540222168, "learning_rate": 4.511679729522053e-05, "log_odds_chosen": 3.8938732147216797, "log_odds_ratio": -0.41503429412841797, "logits/chosen": -0.5160313844680786, "logits/rejected": -0.4538733959197998, "logps/chosen": -0.041960708796978, "logps/rejected": -0.4900236129760742, "loss": 2.833, "nll_loss": 0.6667478680610657, "rewards/accuracies": 0.875, "rewards/chosen": -0.004196071065962315, "rewards/margins": 0.04480629041790962, "rewards/rejected": -0.04900236055254936, "step": 2717 }, { "epoch": 1.879668049792531, "grad_norm": 13.422004699707031, "learning_rate": 4.5112955278930386e-05, "log_odds_chosen": 6.625287055969238, "log_odds_ratio": -0.11494994163513184, "logits/chosen": -0.5009042024612427, "logits/rejected": -0.589263916015625, "logps/chosen": -0.02857610397040844, "logps/rejected": -1.2235627174377441, "loss": 2.9235, "nll_loss": 0.7193735837936401, "rewards/accuracies": 0.875, "rewards/chosen": -0.0028576103504747152, "rewards/margins": 0.11949865520000458, "rewards/rejected": -0.12235626578330994, "step": 2718 }, { "epoch": 1.880359612724758, "grad_norm": 9.228880882263184, "learning_rate": 4.510911326264023e-05, "log_odds_chosen": 4.821526050567627, "log_odds_ratio": -0.06934637576341629, "logits/chosen": -0.5990431308746338, "logits/rejected": -0.5863394141197205, "logps/chosen": -0.04004587605595589, "logps/rejected": -0.8961687684059143, "loss": 3.7117, "nll_loss": 0.9209888577461243, "rewards/accuracies": 1.0, "rewards/chosen": -0.004004587419331074, "rewards/margins": 0.08561229705810547, "rewards/rejected": -0.08961687982082367, "step": 2719 }, { "epoch": 1.8810511756569848, "grad_norm": 5.676540851593018, "learning_rate": 4.510527124635009e-05, "log_odds_chosen": 4.036774158477783, "log_odds_ratio": -0.12386953830718994, "logits/chosen": -0.6862848401069641, "logits/rejected": -0.6691758632659912, "logps/chosen": -0.052078358829021454, "logps/rejected": -0.5784726142883301, "loss": 2.4663, "nll_loss": 0.6041802167892456, "rewards/accuracies": 1.0, "rewards/chosen": -0.0052078356966376305, "rewards/margins": 0.0526394248008728, "rewards/rejected": -0.05784726142883301, "step": 2720 }, { "epoch": 1.8817427385892116, "grad_norm": 6.777297496795654, "learning_rate": 4.5101429230059936e-05, "log_odds_chosen": 5.018651485443115, "log_odds_ratio": -0.13236510753631592, "logits/chosen": -0.8772146701812744, "logits/rejected": -0.8254865407943726, "logps/chosen": -0.04593784362077713, "logps/rejected": -0.8731970191001892, "loss": 2.6719, "nll_loss": 0.6547348499298096, "rewards/accuracies": 1.0, "rewards/chosen": -0.004593783989548683, "rewards/margins": 0.08272591233253479, "rewards/rejected": -0.08731970191001892, "step": 2721 }, { "epoch": 1.8824343015214384, "grad_norm": 8.613203048706055, "learning_rate": 4.509758721376979e-05, "log_odds_chosen": 5.046001434326172, "log_odds_ratio": -0.12102051824331284, "logits/chosen": -0.33736640214920044, "logits/rejected": -0.3499341607093811, "logps/chosen": -0.02708081528544426, "logps/rejected": -0.5669259428977966, "loss": 2.5795, "nll_loss": 0.632770836353302, "rewards/accuracies": 0.875, "rewards/chosen": -0.0027080816216766834, "rewards/margins": 0.053984515368938446, "rewards/rejected": -0.05669259652495384, "step": 2722 }, { "epoch": 1.8831258644536653, "grad_norm": 6.157186985015869, "learning_rate": 4.509374519747964e-05, "log_odds_chosen": 5.869537353515625, "log_odds_ratio": -0.01888483390212059, "logits/chosen": -0.37536218762397766, "logits/rejected": -0.4354729950428009, "logps/chosen": -0.05232808366417885, "logps/rejected": -1.428209900856018, "loss": 2.4493, "nll_loss": 0.6104321479797363, "rewards/accuracies": 1.0, "rewards/chosen": -0.0052328091114759445, "rewards/margins": 0.13758818805217743, "rewards/rejected": -0.14282099902629852, "step": 2723 }, { "epoch": 1.883817427385892, "grad_norm": 8.024432182312012, "learning_rate": 4.5089903181189494e-05, "log_odds_chosen": 7.1319169998168945, "log_odds_ratio": -0.12523026764392853, "logits/chosen": -0.280782014131546, "logits/rejected": -0.322892963886261, "logps/chosen": -0.020273447036743164, "logps/rejected": -0.9794613718986511, "loss": 3.4069, "nll_loss": 0.8392095565795898, "rewards/accuracies": 0.875, "rewards/chosen": -0.0020273446571081877, "rewards/margins": 0.09591878950595856, "rewards/rejected": -0.09794613718986511, "step": 2724 }, { "epoch": 1.884508990318119, "grad_norm": 13.520628929138184, "learning_rate": 4.508606116489934e-05, "log_odds_chosen": 2.8214855194091797, "log_odds_ratio": -0.5782947540283203, "logits/chosen": -0.6274783611297607, "logits/rejected": -0.6184959411621094, "logps/chosen": -0.13344895839691162, "logps/rejected": -0.893020749092102, "loss": 2.5629, "nll_loss": 0.5828902721405029, "rewards/accuracies": 0.625, "rewards/chosen": -0.013344896957278252, "rewards/margins": 0.07595717906951904, "rewards/rejected": -0.08930207788944244, "step": 2725 }, { "epoch": 1.8852005532503457, "grad_norm": 8.038005828857422, "learning_rate": 4.508221914860919e-05, "log_odds_chosen": 3.998943328857422, "log_odds_ratio": -0.2384883165359497, "logits/chosen": -0.6419277787208557, "logits/rejected": -0.6372604966163635, "logps/chosen": -0.04527169466018677, "logps/rejected": -0.5038492679595947, "loss": 1.9882, "nll_loss": 0.47319209575653076, "rewards/accuracies": 0.875, "rewards/chosen": -0.004527169745415449, "rewards/margins": 0.0458577536046505, "rewards/rejected": -0.050384923815727234, "step": 2726 }, { "epoch": 1.8858921161825726, "grad_norm": 9.993119239807129, "learning_rate": 4.5078377132319044e-05, "log_odds_chosen": 3.0417394638061523, "log_odds_ratio": -0.3634532690048218, "logits/chosen": -0.789708137512207, "logits/rejected": -0.7860262989997864, "logps/chosen": -0.08906921744346619, "logps/rejected": -0.4780641496181488, "loss": 3.6667, "nll_loss": 0.8803341388702393, "rewards/accuracies": 0.75, "rewards/chosen": -0.008906921371817589, "rewards/margins": 0.0388994924724102, "rewards/rejected": -0.04780641570687294, "step": 2727 }, { "epoch": 1.8865836791147994, "grad_norm": 5.467214584350586, "learning_rate": 4.507453511602889e-05, "log_odds_chosen": 5.641713619232178, "log_odds_ratio": -0.1424766629934311, "logits/chosen": -0.5579952001571655, "logits/rejected": -0.6143260598182678, "logps/chosen": -0.04905056208372116, "logps/rejected": -0.918487012386322, "loss": 3.252, "nll_loss": 0.7987476587295532, "rewards/accuracies": 1.0, "rewards/chosen": -0.004905056208372116, "rewards/margins": 0.08694364130496979, "rewards/rejected": -0.0918486937880516, "step": 2728 }, { "epoch": 1.8872752420470262, "grad_norm": 6.023781776428223, "learning_rate": 4.507069309973875e-05, "log_odds_chosen": 5.422904014587402, "log_odds_ratio": -0.18042154610157013, "logits/chosen": -0.21186286211013794, "logits/rejected": -0.2458893209695816, "logps/chosen": -0.042429495602846146, "logps/rejected": -0.9531780481338501, "loss": 3.054, "nll_loss": 0.7454517483711243, "rewards/accuracies": 0.875, "rewards/chosen": -0.004242949653416872, "rewards/margins": 0.0910748615860939, "rewards/rejected": -0.09531781077384949, "step": 2729 }, { "epoch": 1.887966804979253, "grad_norm": 6.529669761657715, "learning_rate": 4.5066851083448595e-05, "log_odds_chosen": 3.4735217094421387, "log_odds_ratio": -0.13795514404773712, "logits/chosen": -0.6479552388191223, "logits/rejected": -0.664212703704834, "logps/chosen": -0.08479224890470505, "logps/rejected": -0.5873520374298096, "loss": 3.1475, "nll_loss": 0.7730732560157776, "rewards/accuracies": 1.0, "rewards/chosen": -0.00847922544926405, "rewards/margins": 0.05025597661733627, "rewards/rejected": -0.0587352029979229, "step": 2730 }, { "epoch": 1.88865836791148, "grad_norm": 5.234907150268555, "learning_rate": 4.506300906715845e-05, "log_odds_chosen": 5.450425624847412, "log_odds_ratio": -0.023839298635721207, "logits/chosen": -0.4833140969276428, "logits/rejected": -0.5143532752990723, "logps/chosen": -0.03760465234518051, "logps/rejected": -1.2618212699890137, "loss": 2.6293, "nll_loss": 0.6549372673034668, "rewards/accuracies": 1.0, "rewards/chosen": -0.003760465420782566, "rewards/margins": 0.12242165207862854, "rewards/rejected": -0.12618212401866913, "step": 2731 }, { "epoch": 1.8893499308437067, "grad_norm": 9.136272430419922, "learning_rate": 4.50591670508683e-05, "log_odds_chosen": 4.215113639831543, "log_odds_ratio": -0.39656686782836914, "logits/chosen": -0.5351483821868896, "logits/rejected": -0.628555417060852, "logps/chosen": -0.0797351598739624, "logps/rejected": -0.9139702320098877, "loss": 3.6774, "nll_loss": 0.8796899318695068, "rewards/accuracies": 0.75, "rewards/chosen": -0.00797351635992527, "rewards/margins": 0.08342351019382477, "rewards/rejected": -0.09139702469110489, "step": 2732 }, { "epoch": 1.8900414937759336, "grad_norm": 5.817863464355469, "learning_rate": 4.505532503457815e-05, "log_odds_chosen": 5.436681747436523, "log_odds_ratio": -0.08999508619308472, "logits/chosen": -0.3045744299888611, "logits/rejected": -0.3256331980228424, "logps/chosen": -0.024274304509162903, "logps/rejected": -0.8689183592796326, "loss": 2.1392, "nll_loss": 0.525797963142395, "rewards/accuracies": 1.0, "rewards/chosen": -0.00242743082344532, "rewards/margins": 0.0844644159078598, "rewards/rejected": -0.08689183741807938, "step": 2733 }, { "epoch": 1.8907330567081604, "grad_norm": 12.771986961364746, "learning_rate": 4.5051483018288e-05, "log_odds_chosen": 2.040485382080078, "log_odds_ratio": -0.7644972801208496, "logits/chosen": -0.6130284070968628, "logits/rejected": -0.6401146650314331, "logps/chosen": -0.10524089634418488, "logps/rejected": -0.43873098492622375, "loss": 3.3721, "nll_loss": 0.7665740251541138, "rewards/accuracies": 0.75, "rewards/chosen": -0.010524090379476547, "rewards/margins": 0.03334901109337807, "rewards/rejected": -0.043873101472854614, "step": 2734 }, { "epoch": 1.8914246196403872, "grad_norm": 5.783315658569336, "learning_rate": 4.504764100199785e-05, "log_odds_chosen": 4.502688407897949, "log_odds_ratio": -0.18156832456588745, "logits/chosen": -0.8265919089317322, "logits/rejected": -0.8804236650466919, "logps/chosen": -0.06405682116746902, "logps/rejected": -0.7433871030807495, "loss": 2.4883, "nll_loss": 0.6039232611656189, "rewards/accuracies": 0.875, "rewards/chosen": -0.00640568183735013, "rewards/margins": 0.06793303042650223, "rewards/rejected": -0.07433871179819107, "step": 2735 }, { "epoch": 1.892116182572614, "grad_norm": 7.092947959899902, "learning_rate": 4.50437989857077e-05, "log_odds_chosen": 6.711772918701172, "log_odds_ratio": -0.028261862695217133, "logits/chosen": -0.35366034507751465, "logits/rejected": -0.4563252031803131, "logps/chosen": -0.015546809881925583, "logps/rejected": -0.8382112383842468, "loss": 2.5491, "nll_loss": 0.634446918964386, "rewards/accuracies": 1.0, "rewards/chosen": -0.0015546809881925583, "rewards/margins": 0.08226644992828369, "rewards/rejected": -0.0838211253285408, "step": 2736 }, { "epoch": 1.8928077455048409, "grad_norm": 8.738595962524414, "learning_rate": 4.503995696941755e-05, "log_odds_chosen": 4.996340751647949, "log_odds_ratio": -0.047110073268413544, "logits/chosen": -0.6247482299804688, "logits/rejected": -0.6630555391311646, "logps/chosen": -0.036776408553123474, "logps/rejected": -1.061787486076355, "loss": 3.7177, "nll_loss": 0.92470383644104, "rewards/accuracies": 1.0, "rewards/chosen": -0.003677641274407506, "rewards/margins": 0.10250110179185867, "rewards/rejected": -0.10617874562740326, "step": 2737 }, { "epoch": 1.8934993084370677, "grad_norm": 9.703561782836914, "learning_rate": 4.50361149531274e-05, "log_odds_chosen": 5.678030967712402, "log_odds_ratio": -0.2548525929450989, "logits/chosen": -1.067615270614624, "logits/rejected": -1.1188896894454956, "logps/chosen": -0.05637247487902641, "logps/rejected": -0.8493156433105469, "loss": 4.2344, "nll_loss": 1.0331244468688965, "rewards/accuracies": 0.875, "rewards/chosen": -0.005637247581034899, "rewards/margins": 0.07929432392120361, "rewards/rejected": -0.08493156731128693, "step": 2738 }, { "epoch": 1.8941908713692945, "grad_norm": 6.3866753578186035, "learning_rate": 4.503227293683725e-05, "log_odds_chosen": 5.3257646560668945, "log_odds_ratio": -0.03938756138086319, "logits/chosen": -0.6852152347564697, "logits/rejected": -0.6986730694770813, "logps/chosen": -0.0277650635689497, "logps/rejected": -0.7782572507858276, "loss": 3.0831, "nll_loss": 0.7668415307998657, "rewards/accuracies": 1.0, "rewards/chosen": -0.00277650635689497, "rewards/margins": 0.07504921406507492, "rewards/rejected": -0.07782572507858276, "step": 2739 }, { "epoch": 1.8948824343015214, "grad_norm": 8.502422332763672, "learning_rate": 4.5028430920547106e-05, "log_odds_chosen": 4.738999843597412, "log_odds_ratio": -0.1449851542711258, "logits/chosen": -0.9557698965072632, "logits/rejected": -1.0144156217575073, "logps/chosen": -0.022453149780631065, "logps/rejected": -0.8287165760993958, "loss": 2.469, "nll_loss": 0.602742612361908, "rewards/accuracies": 1.0, "rewards/chosen": -0.002245314884930849, "rewards/margins": 0.08062634617090225, "rewards/rejected": -0.08287165313959122, "step": 2740 }, { "epoch": 1.8955739972337482, "grad_norm": 9.78346061706543, "learning_rate": 4.502458890425695e-05, "log_odds_chosen": 7.0313920974731445, "log_odds_ratio": -0.04768810421228409, "logits/chosen": -0.36658820509910583, "logits/rejected": -0.4856250584125519, "logps/chosen": -0.01616629771888256, "logps/rejected": -1.0696743726730347, "loss": 3.9104, "nll_loss": 0.9728347659111023, "rewards/accuracies": 1.0, "rewards/chosen": -0.001616629771888256, "rewards/margins": 0.10535082221031189, "rewards/rejected": -0.10696744918823242, "step": 2741 }, { "epoch": 1.896265560165975, "grad_norm": 9.929014205932617, "learning_rate": 4.502074688796681e-05, "log_odds_chosen": 5.32511043548584, "log_odds_ratio": -0.17078785598278046, "logits/chosen": -0.284779816865921, "logits/rejected": -0.40232762694358826, "logps/chosen": -0.04031224176287651, "logps/rejected": -1.167478084564209, "loss": 3.9454, "nll_loss": 0.9692734479904175, "rewards/accuracies": 0.875, "rewards/chosen": -0.004031224176287651, "rewards/margins": 0.11271660029888153, "rewards/rejected": -0.11674781143665314, "step": 2742 }, { "epoch": 1.8969571230982019, "grad_norm": 8.456881523132324, "learning_rate": 4.5016904871676656e-05, "log_odds_chosen": 5.320615291595459, "log_odds_ratio": -0.06412041187286377, "logits/chosen": -0.5623228549957275, "logits/rejected": -0.6121507287025452, "logps/chosen": -0.028262116014957428, "logps/rejected": -0.6847209930419922, "loss": 2.7537, "nll_loss": 0.6820018887519836, "rewards/accuracies": 1.0, "rewards/chosen": -0.0028262115083634853, "rewards/margins": 0.06564588844776154, "rewards/rejected": -0.06847210228443146, "step": 2743 }, { "epoch": 1.8976486860304287, "grad_norm": 9.6913480758667, "learning_rate": 4.501306285538651e-05, "log_odds_chosen": 4.994526386260986, "log_odds_ratio": -0.14609560370445251, "logits/chosen": -0.602185070514679, "logits/rejected": -0.6847248673439026, "logps/chosen": -0.09153126180171967, "logps/rejected": -1.0046964883804321, "loss": 2.9175, "nll_loss": 0.7147679328918457, "rewards/accuracies": 1.0, "rewards/chosen": -0.009153125807642937, "rewards/margins": 0.09131652861833572, "rewards/rejected": -0.10046965628862381, "step": 2744 }, { "epoch": 1.8983402489626555, "grad_norm": 4.877399921417236, "learning_rate": 4.500922083909636e-05, "log_odds_chosen": 5.853466987609863, "log_odds_ratio": -0.06557883322238922, "logits/chosen": -0.3120821714401245, "logits/rejected": -0.3295907974243164, "logps/chosen": -0.030897993594408035, "logps/rejected": -1.2178821563720703, "loss": 2.5115, "nll_loss": 0.6213077902793884, "rewards/accuracies": 1.0, "rewards/chosen": -0.003089799080044031, "rewards/margins": 0.11869841814041138, "rewards/rejected": -0.12178821861743927, "step": 2745 }, { "epoch": 1.8990318118948823, "grad_norm": 9.56112289428711, "learning_rate": 4.5005378822806213e-05, "log_odds_chosen": 5.372619152069092, "log_odds_ratio": -0.2080005705356598, "logits/chosen": -0.4588029086589813, "logits/rejected": -0.5576678514480591, "logps/chosen": -0.035982996225357056, "logps/rejected": -0.8040719032287598, "loss": 2.6873, "nll_loss": 0.6510368585586548, "rewards/accuracies": 0.875, "rewards/chosen": -0.003598299575969577, "rewards/margins": 0.07680889219045639, "rewards/rejected": -0.08040718734264374, "step": 2746 }, { "epoch": 1.8997233748271092, "grad_norm": 7.170275688171387, "learning_rate": 4.500153680651606e-05, "log_odds_chosen": 5.3207597732543945, "log_odds_ratio": -0.05614379793405533, "logits/chosen": -0.22629055380821228, "logits/rejected": -0.2640830874443054, "logps/chosen": -0.02286584861576557, "logps/rejected": -0.8797377347946167, "loss": 2.8085, "nll_loss": 0.6965088844299316, "rewards/accuracies": 1.0, "rewards/chosen": -0.0022865845821797848, "rewards/margins": 0.08568719029426575, "rewards/rejected": -0.08797377347946167, "step": 2747 }, { "epoch": 1.900414937759336, "grad_norm": 7.8816447257995605, "learning_rate": 4.499769479022591e-05, "log_odds_chosen": 7.205872535705566, "log_odds_ratio": -0.2536963224411011, "logits/chosen": -0.333590030670166, "logits/rejected": -0.39955171942710876, "logps/chosen": -0.0653318241238594, "logps/rejected": -1.3166921138763428, "loss": 2.4678, "nll_loss": 0.5915802717208862, "rewards/accuracies": 0.875, "rewards/chosen": -0.006533183157444, "rewards/margins": 0.12513601779937744, "rewards/rejected": -0.13166922330856323, "step": 2748 }, { "epoch": 1.9011065006915628, "grad_norm": 10.401483535766602, "learning_rate": 4.4993852773935764e-05, "log_odds_chosen": 3.9236044883728027, "log_odds_ratio": -0.4264339804649353, "logits/chosen": -0.6277395486831665, "logits/rejected": -0.5958032608032227, "logps/chosen": -0.15293143689632416, "logps/rejected": -0.730241060256958, "loss": 4.6277, "nll_loss": 1.1142935752868652, "rewards/accuracies": 0.875, "rewards/chosen": -0.015293142758309841, "rewards/margins": 0.057730965316295624, "rewards/rejected": -0.07302410900592804, "step": 2749 }, { "epoch": 1.9017980636237897, "grad_norm": 8.748340606689453, "learning_rate": 4.499001075764561e-05, "log_odds_chosen": 6.704047203063965, "log_odds_ratio": -0.06408537179231644, "logits/chosen": -0.3179192543029785, "logits/rejected": -0.4334818124771118, "logps/chosen": -0.06114349886775017, "logps/rejected": -1.5072216987609863, "loss": 2.3598, "nll_loss": 0.5835462808609009, "rewards/accuracies": 1.0, "rewards/chosen": -0.006114350166171789, "rewards/margins": 0.144607812166214, "rewards/rejected": -0.1507221758365631, "step": 2750 }, { "epoch": 1.9024896265560165, "grad_norm": 9.328351974487305, "learning_rate": 4.498616874135547e-05, "log_odds_chosen": 3.970728874206543, "log_odds_ratio": -0.11508572101593018, "logits/chosen": -0.657474935054779, "logits/rejected": -0.6981061100959778, "logps/chosen": -0.05146617442369461, "logps/rejected": -0.6390859484672546, "loss": 3.8184, "nll_loss": 0.9430928230285645, "rewards/accuracies": 1.0, "rewards/chosen": -0.005146617069840431, "rewards/margins": 0.05876198038458824, "rewards/rejected": -0.06390859931707382, "step": 2751 }, { "epoch": 1.9031811894882433, "grad_norm": 4.731758117675781, "learning_rate": 4.4982326725065315e-05, "log_odds_chosen": 4.844266891479492, "log_odds_ratio": -0.4429481625556946, "logits/chosen": -0.22525936365127563, "logits/rejected": -0.18493236601352692, "logps/chosen": -0.1286071240901947, "logps/rejected": -0.9103085398674011, "loss": 2.6905, "nll_loss": 0.6283376216888428, "rewards/accuracies": 0.75, "rewards/chosen": -0.01286071166396141, "rewards/margins": 0.07817014306783676, "rewards/rejected": -0.09103085100650787, "step": 2752 }, { "epoch": 1.9038727524204702, "grad_norm": 7.517221927642822, "learning_rate": 4.497848470877517e-05, "log_odds_chosen": 7.291058540344238, "log_odds_ratio": -0.01622098684310913, "logits/chosen": -0.7878361940383911, "logits/rejected": -0.8620985150337219, "logps/chosen": -0.008632284589111805, "logps/rejected": -1.2846453189849854, "loss": 3.7779, "nll_loss": 0.9428427219390869, "rewards/accuracies": 1.0, "rewards/chosen": -0.0008632285171188414, "rewards/margins": 0.12760131061077118, "rewards/rejected": -0.12846453487873077, "step": 2753 }, { "epoch": 1.904564315352697, "grad_norm": 5.184489727020264, "learning_rate": 4.497464269248502e-05, "log_odds_chosen": 5.2329020500183105, "log_odds_ratio": -0.12184281647205353, "logits/chosen": -0.34280192852020264, "logits/rejected": -0.42546650767326355, "logps/chosen": -0.05324007198214531, "logps/rejected": -0.8889765739440918, "loss": 2.5912, "nll_loss": 0.6356096863746643, "rewards/accuracies": 1.0, "rewards/chosen": -0.005324007477611303, "rewards/margins": 0.08357363939285278, "rewards/rejected": -0.08889764547348022, "step": 2754 }, { "epoch": 1.9052558782849238, "grad_norm": 6.808511257171631, "learning_rate": 4.497080067619487e-05, "log_odds_chosen": 5.882111549377441, "log_odds_ratio": -0.008104901760816574, "logits/chosen": -0.4373306632041931, "logits/rejected": -0.4659738838672638, "logps/chosen": -0.017158400267362595, "logps/rejected": -1.082463264465332, "loss": 2.5273, "nll_loss": 0.6310088634490967, "rewards/accuracies": 1.0, "rewards/chosen": -0.0017158400733023882, "rewards/margins": 0.10653049498796463, "rewards/rejected": -0.1082463338971138, "step": 2755 }, { "epoch": 1.9059474412171507, "grad_norm": 4.522149085998535, "learning_rate": 4.496695865990472e-05, "log_odds_chosen": 6.7430243492126465, "log_odds_ratio": -0.009294845163822174, "logits/chosen": -0.5068034529685974, "logits/rejected": -0.5801413059234619, "logps/chosen": -0.02989388071000576, "logps/rejected": -1.3248211145401, "loss": 2.9167, "nll_loss": 0.728242814540863, "rewards/accuracies": 1.0, "rewards/chosen": -0.0029893883038312197, "rewards/margins": 0.12949271500110626, "rewards/rejected": -0.13248211145401, "step": 2756 }, { "epoch": 1.9066390041493775, "grad_norm": 12.909613609313965, "learning_rate": 4.496311664361457e-05, "log_odds_chosen": 5.796242713928223, "log_odds_ratio": -0.4076513648033142, "logits/chosen": -0.40431565046310425, "logits/rejected": -0.4224545359611511, "logps/chosen": -0.048604223877191544, "logps/rejected": -0.843641996383667, "loss": 3.2017, "nll_loss": 0.7596707344055176, "rewards/accuracies": 0.875, "rewards/chosen": -0.004860422573983669, "rewards/margins": 0.07950377464294434, "rewards/rejected": -0.08436420559883118, "step": 2757 }, { "epoch": 1.9073305670816043, "grad_norm": 4.586458683013916, "learning_rate": 4.495927462732442e-05, "log_odds_chosen": 4.613317012786865, "log_odds_ratio": -0.13756367564201355, "logits/chosen": -0.06619075685739517, "logits/rejected": -0.08160945028066635, "logps/chosen": -0.04854509234428406, "logps/rejected": -0.4217030107975006, "loss": 2.2961, "nll_loss": 0.5602596402168274, "rewards/accuracies": 0.875, "rewards/chosen": -0.004854509606957436, "rewards/margins": 0.037315793335437775, "rewards/rejected": -0.04217030107975006, "step": 2758 }, { "epoch": 1.9080221300138311, "grad_norm": 4.820953845977783, "learning_rate": 4.495543261103427e-05, "log_odds_chosen": 4.910735607147217, "log_odds_ratio": -0.09200502187013626, "logits/chosen": -0.5208714008331299, "logits/rejected": -0.5347784757614136, "logps/chosen": -0.04374665021896362, "logps/rejected": -0.9003453254699707, "loss": 2.9172, "nll_loss": 0.7201057076454163, "rewards/accuracies": 1.0, "rewards/chosen": -0.004374665208160877, "rewards/margins": 0.08565986901521683, "rewards/rejected": -0.09003454446792603, "step": 2759 }, { "epoch": 1.908713692946058, "grad_norm": 4.447933673858643, "learning_rate": 4.495159059474413e-05, "log_odds_chosen": 5.480074882507324, "log_odds_ratio": -0.02876094914972782, "logits/chosen": -0.3962971568107605, "logits/rejected": -0.4510257840156555, "logps/chosen": -0.015622947365045547, "logps/rejected": -0.7562682032585144, "loss": 2.493, "nll_loss": 0.6203768253326416, "rewards/accuracies": 1.0, "rewards/chosen": -0.0015622947830706835, "rewards/margins": 0.07406453043222427, "rewards/rejected": -0.07562682032585144, "step": 2760 }, { "epoch": 1.9094052558782848, "grad_norm": 7.416686058044434, "learning_rate": 4.494774857845397e-05, "log_odds_chosen": 4.5373969078063965, "log_odds_ratio": -0.0510084293782711, "logits/chosen": -0.6713343262672424, "logits/rejected": -0.6580287218093872, "logps/chosen": -0.02951555699110031, "logps/rejected": -0.8437892198562622, "loss": 3.0495, "nll_loss": 0.7572864294052124, "rewards/accuracies": 1.0, "rewards/chosen": -0.0029515554197132587, "rewards/margins": 0.08142737299203873, "rewards/rejected": -0.0843789279460907, "step": 2761 }, { "epoch": 1.9100968188105116, "grad_norm": 10.607845306396484, "learning_rate": 4.4943906562163825e-05, "log_odds_chosen": 2.8581295013427734, "log_odds_ratio": -0.6374597549438477, "logits/chosen": -0.5547253489494324, "logits/rejected": -0.5826330780982971, "logps/chosen": -0.09037734568119049, "logps/rejected": -0.7486076354980469, "loss": 3.3589, "nll_loss": 0.7759672403335571, "rewards/accuracies": 0.875, "rewards/chosen": -0.009037734940648079, "rewards/margins": 0.065823033452034, "rewards/rejected": -0.07486076653003693, "step": 2762 }, { "epoch": 1.9107883817427385, "grad_norm": 3.6539220809936523, "learning_rate": 4.494006454587368e-05, "log_odds_chosen": 7.207740783691406, "log_odds_ratio": -0.004129151813685894, "logits/chosen": -0.27698051929473877, "logits/rejected": -0.32458335161209106, "logps/chosen": -0.0025564860552549362, "logps/rejected": -0.8559420108795166, "loss": 2.4033, "nll_loss": 0.6004086136817932, "rewards/accuracies": 1.0, "rewards/chosen": -0.000255648628808558, "rewards/margins": 0.08533856272697449, "rewards/rejected": -0.08559420704841614, "step": 2763 }, { "epoch": 1.9114799446749653, "grad_norm": 8.975886344909668, "learning_rate": 4.493622252958353e-05, "log_odds_chosen": 4.372048377990723, "log_odds_ratio": -0.060153741389513016, "logits/chosen": -0.40431973338127136, "logits/rejected": -0.4270198941230774, "logps/chosen": -0.05016879737377167, "logps/rejected": -0.8772868514060974, "loss": 3.4937, "nll_loss": 0.8674010038375854, "rewards/accuracies": 1.0, "rewards/chosen": -0.005016880109906197, "rewards/margins": 0.08271180838346481, "rewards/rejected": -0.08772867918014526, "step": 2764 }, { "epoch": 1.9121715076071921, "grad_norm": 7.6837382316589355, "learning_rate": 4.4932380513293376e-05, "log_odds_chosen": 3.9830479621887207, "log_odds_ratio": -0.1646263599395752, "logits/chosen": -0.6387200355529785, "logits/rejected": -0.6545137763023376, "logps/chosen": -0.03771314024925232, "logps/rejected": -0.5626676678657532, "loss": 2.6703, "nll_loss": 0.6511242985725403, "rewards/accuracies": 1.0, "rewards/chosen": -0.0037713139317929745, "rewards/margins": 0.052495453506708145, "rewards/rejected": -0.05626676604151726, "step": 2765 }, { "epoch": 1.912863070539419, "grad_norm": 9.061310768127441, "learning_rate": 4.492853849700323e-05, "log_odds_chosen": 5.80774450302124, "log_odds_ratio": -0.3405974209308624, "logits/chosen": -0.2839244604110718, "logits/rejected": -0.29523801803588867, "logps/chosen": -0.052610985934734344, "logps/rejected": -1.217494010925293, "loss": 2.6866, "nll_loss": 0.6375950574874878, "rewards/accuracies": 0.75, "rewards/chosen": -0.005261098966002464, "rewards/margins": 0.11648830771446228, "rewards/rejected": -0.1217494010925293, "step": 2766 }, { "epoch": 1.9135546334716458, "grad_norm": 7.12164306640625, "learning_rate": 4.492469648071308e-05, "log_odds_chosen": 5.14296817779541, "log_odds_ratio": -0.17003518342971802, "logits/chosen": -0.15876546502113342, "logits/rejected": -0.1746135652065277, "logps/chosen": -0.035621292889118195, "logps/rejected": -0.6235146522521973, "loss": 3.1877, "nll_loss": 0.7799243927001953, "rewards/accuracies": 0.875, "rewards/chosen": -0.0035621291026473045, "rewards/margins": 0.058789342641830444, "rewards/rejected": -0.062351472675800323, "step": 2767 }, { "epoch": 1.9142461964038726, "grad_norm": 19.558931350708008, "learning_rate": 4.4920854464422927e-05, "log_odds_chosen": 1.8244339227676392, "log_odds_ratio": -0.6450532078742981, "logits/chosen": -0.20103971660137177, "logits/rejected": -0.17665702104568481, "logps/chosen": -0.12792760133743286, "logps/rejected": -0.4163038730621338, "loss": 3.523, "nll_loss": 0.8162442445755005, "rewards/accuracies": 0.75, "rewards/chosen": -0.01279276143759489, "rewards/margins": 0.028837626799941063, "rewards/rejected": -0.04163038730621338, "step": 2768 }, { "epoch": 1.9149377593360994, "grad_norm": 5.6358962059021, "learning_rate": 4.4917012448132786e-05, "log_odds_chosen": 2.602464199066162, "log_odds_ratio": -0.23151105642318726, "logits/chosen": -0.6144603490829468, "logits/rejected": -0.6773332357406616, "logps/chosen": -0.09697036445140839, "logps/rejected": -0.673506498336792, "loss": 2.4692, "nll_loss": 0.5941553115844727, "rewards/accuracies": 0.875, "rewards/chosen": -0.009697036817669868, "rewards/margins": 0.05765360966324806, "rewards/rejected": -0.06735064834356308, "step": 2769 }, { "epoch": 1.9156293222683263, "grad_norm": 10.034943580627441, "learning_rate": 4.491317043184263e-05, "log_odds_chosen": 3.2985281944274902, "log_odds_ratio": -0.25245070457458496, "logits/chosen": -0.6084011197090149, "logits/rejected": -0.6075975298881531, "logps/chosen": -0.07471877336502075, "logps/rejected": -0.7278150320053101, "loss": 3.1846, "nll_loss": 0.7709120512008667, "rewards/accuracies": 0.875, "rewards/chosen": -0.007471877615898848, "rewards/margins": 0.06530961394309998, "rewards/rejected": -0.07278149574995041, "step": 2770 }, { "epoch": 1.916320885200553, "grad_norm": 7.76045036315918, "learning_rate": 4.4909328415552484e-05, "log_odds_chosen": 4.8657941818237305, "log_odds_ratio": -0.16628439724445343, "logits/chosen": -0.5928363800048828, "logits/rejected": -0.620124340057373, "logps/chosen": -0.06428052484989166, "logps/rejected": -0.8373928070068359, "loss": 2.705, "nll_loss": 0.6596198081970215, "rewards/accuracies": 0.875, "rewards/chosen": -0.0064280531369149685, "rewards/margins": 0.07731122523546219, "rewards/rejected": -0.0837392807006836, "step": 2771 }, { "epoch": 1.91701244813278, "grad_norm": 6.2884955406188965, "learning_rate": 4.4905486399262336e-05, "log_odds_chosen": 5.502465724945068, "log_odds_ratio": -0.1859067976474762, "logits/chosen": -0.588131308555603, "logits/rejected": -0.6590185165405273, "logps/chosen": -0.043672025203704834, "logps/rejected": -0.8826485872268677, "loss": 2.5943, "nll_loss": 0.6299718618392944, "rewards/accuracies": 0.875, "rewards/chosen": -0.004367202520370483, "rewards/margins": 0.083897665143013, "rewards/rejected": -0.08826486766338348, "step": 2772 }, { "epoch": 1.9177040110650068, "grad_norm": 5.941636562347412, "learning_rate": 4.490164438297219e-05, "log_odds_chosen": 4.905325412750244, "log_odds_ratio": -0.022108597680926323, "logits/chosen": -0.37843644618988037, "logits/rejected": -0.43814554810523987, "logps/chosen": -0.03972003236413002, "logps/rejected": -1.0460373163223267, "loss": 2.7761, "nll_loss": 0.6918151378631592, "rewards/accuracies": 1.0, "rewards/chosen": -0.003972003236413002, "rewards/margins": 0.1006317287683487, "rewards/rejected": -0.10460373759269714, "step": 2773 }, { "epoch": 1.9183955739972336, "grad_norm": 14.511189460754395, "learning_rate": 4.4897802366682034e-05, "log_odds_chosen": 2.286804676055908, "log_odds_ratio": -0.4922664761543274, "logits/chosen": -0.5373677015304565, "logits/rejected": -0.5104278326034546, "logps/chosen": -0.06641744822263718, "logps/rejected": -0.5359044075012207, "loss": 3.1558, "nll_loss": 0.7397192716598511, "rewards/accuracies": 0.625, "rewards/chosen": -0.00664174510166049, "rewards/margins": 0.046948693692684174, "rewards/rejected": -0.05359043926000595, "step": 2774 }, { "epoch": 1.9190871369294604, "grad_norm": 8.476889610290527, "learning_rate": 4.489396035039189e-05, "log_odds_chosen": 5.486364364624023, "log_odds_ratio": -0.040458664298057556, "logits/chosen": -0.39343497157096863, "logits/rejected": -0.4808794856071472, "logps/chosen": -0.029638398438692093, "logps/rejected": -0.981798529624939, "loss": 3.5648, "nll_loss": 0.8871630430221558, "rewards/accuracies": 1.0, "rewards/chosen": -0.0029638397973030806, "rewards/margins": 0.09521600604057312, "rewards/rejected": -0.09817984700202942, "step": 2775 }, { "epoch": 1.9197786998616873, "grad_norm": 4.909126281738281, "learning_rate": 4.489011833410174e-05, "log_odds_chosen": 5.147785186767578, "log_odds_ratio": -0.115696981549263, "logits/chosen": -0.7142544388771057, "logits/rejected": -0.7357014417648315, "logps/chosen": -0.03013111650943756, "logps/rejected": -0.8791296482086182, "loss": 3.1168, "nll_loss": 0.7676218748092651, "rewards/accuracies": 1.0, "rewards/chosen": -0.003013111650943756, "rewards/margins": 0.08489985764026642, "rewards/rejected": -0.08791296929121017, "step": 2776 }, { "epoch": 1.920470262793914, "grad_norm": 3.1792287826538086, "learning_rate": 4.4886276317811585e-05, "log_odds_chosen": 2.745725393295288, "log_odds_ratio": -0.17853854596614838, "logits/chosen": -0.5553758144378662, "logits/rejected": -0.5793541669845581, "logps/chosen": -0.08622580021619797, "logps/rejected": -0.5508575439453125, "loss": 1.9488, "nll_loss": 0.46934592723846436, "rewards/accuracies": 1.0, "rewards/chosen": -0.008622580207884312, "rewards/margins": 0.046463172882795334, "rewards/rejected": -0.05508575215935707, "step": 2777 }, { "epoch": 1.921161825726141, "grad_norm": 8.015707015991211, "learning_rate": 4.4882434301521444e-05, "log_odds_chosen": 3.6785333156585693, "log_odds_ratio": -0.19257181882858276, "logits/chosen": -0.2816515862941742, "logits/rejected": -0.30312466621398926, "logps/chosen": -0.04474589228630066, "logps/rejected": -0.4880346655845642, "loss": 3.1441, "nll_loss": 0.7667652368545532, "rewards/accuracies": 0.875, "rewards/chosen": -0.004474589601159096, "rewards/margins": 0.044328875839710236, "rewards/rejected": -0.04880346730351448, "step": 2778 }, { "epoch": 1.9218533886583677, "grad_norm": 10.081058502197266, "learning_rate": 4.487859228523129e-05, "log_odds_chosen": 3.226313829421997, "log_odds_ratio": -0.3414326608181, "logits/chosen": -0.47777923941612244, "logits/rejected": -0.521342396736145, "logps/chosen": -0.08785060048103333, "logps/rejected": -0.7554973363876343, "loss": 2.8479, "nll_loss": 0.6778291463851929, "rewards/accuracies": 0.625, "rewards/chosen": -0.008785059675574303, "rewards/margins": 0.06676468253135681, "rewards/rejected": -0.07554973661899567, "step": 2779 }, { "epoch": 1.9225449515905948, "grad_norm": 6.341608047485352, "learning_rate": 4.487475026894114e-05, "log_odds_chosen": 4.8398966789245605, "log_odds_ratio": -0.04216049984097481, "logits/chosen": -0.4660530686378479, "logits/rejected": -0.49698352813720703, "logps/chosen": -0.019141169264912605, "logps/rejected": -0.7642238736152649, "loss": 2.6743, "nll_loss": 0.6643602848052979, "rewards/accuracies": 1.0, "rewards/chosen": -0.0019141167867928743, "rewards/margins": 0.07450827956199646, "rewards/rejected": -0.07642239332199097, "step": 2780 }, { "epoch": 1.9232365145228216, "grad_norm": 13.689393043518066, "learning_rate": 4.4870908252650995e-05, "log_odds_chosen": 3.5703673362731934, "log_odds_ratio": -0.25399351119995117, "logits/chosen": -0.32772380113601685, "logits/rejected": -0.4199979603290558, "logps/chosen": -0.049331896007061005, "logps/rejected": -0.6844727993011475, "loss": 3.7072, "nll_loss": 0.901390552520752, "rewards/accuracies": 0.875, "rewards/chosen": -0.0049331896007061005, "rewards/margins": 0.0635140985250473, "rewards/rejected": -0.0684472918510437, "step": 2781 }, { "epoch": 1.9239280774550485, "grad_norm": 16.07459259033203, "learning_rate": 4.486706623636085e-05, "log_odds_chosen": 1.7226035594940186, "log_odds_ratio": -1.1212856769561768, "logits/chosen": -0.5351619124412537, "logits/rejected": -0.6109406352043152, "logps/chosen": -0.19895480573177338, "logps/rejected": -0.5249719023704529, "loss": 4.4027, "nll_loss": 0.9885578155517578, "rewards/accuracies": 0.75, "rewards/chosen": -0.019895480945706367, "rewards/margins": 0.03260171413421631, "rewards/rejected": -0.05249718949198723, "step": 2782 }, { "epoch": 1.9246196403872753, "grad_norm": 18.613061904907227, "learning_rate": 4.486322422007069e-05, "log_odds_chosen": 4.626638889312744, "log_odds_ratio": -0.2957945168018341, "logits/chosen": -0.607136607170105, "logits/rejected": -0.6890389323234558, "logps/chosen": -0.044126056134700775, "logps/rejected": -0.6813442707061768, "loss": 2.5626, "nll_loss": 0.6110590100288391, "rewards/accuracies": 0.75, "rewards/chosen": -0.004412606358528137, "rewards/margins": 0.06372182816267014, "rewards/rejected": -0.06813443452119827, "step": 2783 }, { "epoch": 1.9253112033195021, "grad_norm": 7.884077072143555, "learning_rate": 4.4859382203780545e-05, "log_odds_chosen": 5.40144157409668, "log_odds_ratio": -0.016243906691670418, "logits/chosen": -0.4148845970630646, "logits/rejected": -0.4984428882598877, "logps/chosen": -0.019348058849573135, "logps/rejected": -1.0130583047866821, "loss": 2.9824, "nll_loss": 0.743984580039978, "rewards/accuracies": 1.0, "rewards/chosen": -0.0019348057685419917, "rewards/margins": 0.0993710309267044, "rewards/rejected": -0.10130582749843597, "step": 2784 }, { "epoch": 1.926002766251729, "grad_norm": 9.836116790771484, "learning_rate": 4.48555401874904e-05, "log_odds_chosen": 4.070340633392334, "log_odds_ratio": -0.5315168499946594, "logits/chosen": -0.34487390518188477, "logits/rejected": -0.3882456421852112, "logps/chosen": -0.13489031791687012, "logps/rejected": -0.7666717171669006, "loss": 3.333, "nll_loss": 0.7800894379615784, "rewards/accuracies": 0.75, "rewards/chosen": -0.013489031232893467, "rewards/margins": 0.06317814439535141, "rewards/rejected": -0.0766671746969223, "step": 2785 }, { "epoch": 1.9266943291839558, "grad_norm": 9.22327995300293, "learning_rate": 4.4851698171200243e-05, "log_odds_chosen": 6.947870254516602, "log_odds_ratio": -0.006460743024945259, "logits/chosen": -0.16602101922035217, "logits/rejected": -0.21374446153640747, "logps/chosen": -0.01367330364882946, "logps/rejected": -1.1837010383605957, "loss": 3.0261, "nll_loss": 0.7558853626251221, "rewards/accuracies": 1.0, "rewards/chosen": -0.0013673304347321391, "rewards/margins": 0.11700277030467987, "rewards/rejected": -0.11837010085582733, "step": 2786 }, { "epoch": 1.9273858921161826, "grad_norm": 10.728938102722168, "learning_rate": 4.48478561549101e-05, "log_odds_chosen": 4.983241558074951, "log_odds_ratio": -0.04518432915210724, "logits/chosen": -0.7207584381103516, "logits/rejected": -0.7739526629447937, "logps/chosen": -0.03859318047761917, "logps/rejected": -0.9132634401321411, "loss": 4.3894, "nll_loss": 1.0928202867507935, "rewards/accuracies": 1.0, "rewards/chosen": -0.0038593183271586895, "rewards/margins": 0.0874670222401619, "rewards/rejected": -0.09132634103298187, "step": 2787 }, { "epoch": 1.9280774550484094, "grad_norm": 5.109100818634033, "learning_rate": 4.484401413861995e-05, "log_odds_chosen": 2.9227466583251953, "log_odds_ratio": -0.10756994783878326, "logits/chosen": -0.6498531103134155, "logits/rejected": -0.7055495381355286, "logps/chosen": -0.06583870947360992, "logps/rejected": -0.7072476148605347, "loss": 2.6467, "nll_loss": 0.6509165167808533, "rewards/accuracies": 1.0, "rewards/chosen": -0.006583871319890022, "rewards/margins": 0.06414090096950531, "rewards/rejected": -0.07072477042675018, "step": 2788 }, { "epoch": 1.9287690179806363, "grad_norm": 5.672557830810547, "learning_rate": 4.48401721223298e-05, "log_odds_chosen": 3.876668691635132, "log_odds_ratio": -0.19308790564537048, "logits/chosen": -0.4066796600818634, "logits/rejected": -0.40035372972488403, "logps/chosen": -0.059991996735334396, "logps/rejected": -0.8340362310409546, "loss": 2.1607, "nll_loss": 0.5208736062049866, "rewards/accuracies": 0.875, "rewards/chosen": -0.005999199580401182, "rewards/margins": 0.07740442454814911, "rewards/rejected": -0.08340362459421158, "step": 2789 }, { "epoch": 1.929460580912863, "grad_norm": 5.521088600158691, "learning_rate": 4.483633010603965e-05, "log_odds_chosen": 4.913776874542236, "log_odds_ratio": -0.0815575122833252, "logits/chosen": -0.40390971302986145, "logits/rejected": -0.3940833508968353, "logps/chosen": -0.04151315614581108, "logps/rejected": -0.8694093227386475, "loss": 2.8957, "nll_loss": 0.7157632112503052, "rewards/accuracies": 1.0, "rewards/chosen": -0.0041513158939778805, "rewards/margins": 0.08278961479663849, "rewards/rejected": -0.0869409367442131, "step": 2790 }, { "epoch": 1.93015214384509, "grad_norm": 11.908353805541992, "learning_rate": 4.4832488089749506e-05, "log_odds_chosen": 3.066312074661255, "log_odds_ratio": -0.3459596633911133, "logits/chosen": -0.5527938008308411, "logits/rejected": -0.5647131204605103, "logps/chosen": -0.18038421869277954, "logps/rejected": -0.8058091998100281, "loss": 2.8924, "nll_loss": 0.6885152459144592, "rewards/accuracies": 0.875, "rewards/chosen": -0.018038421869277954, "rewards/margins": 0.06254249811172485, "rewards/rejected": -0.08058091998100281, "step": 2791 }, { "epoch": 1.9308437067773168, "grad_norm": 10.135628700256348, "learning_rate": 4.482864607345935e-05, "log_odds_chosen": 3.6124460697174072, "log_odds_ratio": -0.32742828130722046, "logits/chosen": -0.38191068172454834, "logits/rejected": -0.43758589029312134, "logps/chosen": -0.11376246809959412, "logps/rejected": -1.071624755859375, "loss": 3.1569, "nll_loss": 0.756482720375061, "rewards/accuracies": 0.75, "rewards/chosen": -0.011376245878636837, "rewards/margins": 0.09578622877597809, "rewards/rejected": -0.1071624681353569, "step": 2792 }, { "epoch": 1.9315352697095436, "grad_norm": 8.463871002197266, "learning_rate": 4.4824804057169204e-05, "log_odds_chosen": 2.9357008934020996, "log_odds_ratio": -0.27367833256721497, "logits/chosen": -0.5386611819267273, "logits/rejected": -0.5462771654129028, "logps/chosen": -0.08388536423444748, "logps/rejected": -0.6182925701141357, "loss": 3.1276, "nll_loss": 0.7545251846313477, "rewards/accuracies": 0.75, "rewards/chosen": -0.008388536050915718, "rewards/margins": 0.05344071984291077, "rewards/rejected": -0.061829254031181335, "step": 2793 }, { "epoch": 1.9322268326417704, "grad_norm": 5.5890421867370605, "learning_rate": 4.4820962040879056e-05, "log_odds_chosen": 3.97865629196167, "log_odds_ratio": -0.06815844029188156, "logits/chosen": -1.0436514616012573, "logits/rejected": -1.0876836776733398, "logps/chosen": -0.02249925397336483, "logps/rejected": -0.4423280954360962, "loss": 3.9761, "nll_loss": 0.9872040748596191, "rewards/accuracies": 1.0, "rewards/chosen": -0.0022499256301671267, "rewards/margins": 0.04198288917541504, "rewards/rejected": -0.0442328155040741, "step": 2794 }, { "epoch": 1.9329183955739973, "grad_norm": 8.73758602142334, "learning_rate": 4.48171200245889e-05, "log_odds_chosen": 3.9699978828430176, "log_odds_ratio": -0.24298183619976044, "logits/chosen": -0.7401033639907837, "logits/rejected": -0.7354604005813599, "logps/chosen": -0.05277275666594505, "logps/rejected": -0.5997828245162964, "loss": 3.7816, "nll_loss": 0.9210982322692871, "rewards/accuracies": 0.875, "rewards/chosen": -0.005277275573462248, "rewards/margins": 0.054701004177331924, "rewards/rejected": -0.05997828394174576, "step": 2795 }, { "epoch": 1.933609958506224, "grad_norm": 7.679773330688477, "learning_rate": 4.481327800829876e-05, "log_odds_chosen": 5.230242729187012, "log_odds_ratio": -0.027260133996605873, "logits/chosen": -0.928388237953186, "logits/rejected": -0.9680888652801514, "logps/chosen": -0.025700656697154045, "logps/rejected": -1.1047217845916748, "loss": 3.5528, "nll_loss": 0.885481595993042, "rewards/accuracies": 1.0, "rewards/chosen": -0.0025700656697154045, "rewards/margins": 0.10790210962295532, "rewards/rejected": -0.110472172498703, "step": 2796 }, { "epoch": 1.934301521438451, "grad_norm": 10.935383796691895, "learning_rate": 4.480943599200861e-05, "log_odds_chosen": 5.8976030349731445, "log_odds_ratio": -0.19832640886306763, "logits/chosen": -0.550868570804596, "logits/rejected": -0.6094168424606323, "logps/chosen": -0.02254554256796837, "logps/rejected": -0.7352597117424011, "loss": 3.0141, "nll_loss": 0.7336862683296204, "rewards/accuracies": 0.875, "rewards/chosen": -0.0022545545361936092, "rewards/margins": 0.07127141952514648, "rewards/rejected": -0.07352596521377563, "step": 2797 }, { "epoch": 1.9349930843706777, "grad_norm": 8.525611877441406, "learning_rate": 4.480559397571846e-05, "log_odds_chosen": 4.466034412384033, "log_odds_ratio": -0.09100319445133209, "logits/chosen": -0.5881481170654297, "logits/rejected": -0.5896488428115845, "logps/chosen": -0.040058400481939316, "logps/rejected": -0.8737333416938782, "loss": 2.5324, "nll_loss": 0.6240019798278809, "rewards/accuracies": 1.0, "rewards/chosen": -0.004005840048193932, "rewards/margins": 0.0833674967288971, "rewards/rejected": -0.08737333118915558, "step": 2798 }, { "epoch": 1.9356846473029046, "grad_norm": 6.885192394256592, "learning_rate": 4.480175195942831e-05, "log_odds_chosen": 3.733163356781006, "log_odds_ratio": -0.11658845096826553, "logits/chosen": -0.5719266533851624, "logits/rejected": -0.595264732837677, "logps/chosen": -0.056412748992443085, "logps/rejected": -1.1346626281738281, "loss": 2.8544, "nll_loss": 0.7019389271736145, "rewards/accuracies": 1.0, "rewards/chosen": -0.005641274619847536, "rewards/margins": 0.10782498866319656, "rewards/rejected": -0.11346626281738281, "step": 2799 }, { "epoch": 1.9363762102351314, "grad_norm": 8.165283203125, "learning_rate": 4.4797909943138164e-05, "log_odds_chosen": 3.9729034900665283, "log_odds_ratio": -0.15297673642635345, "logits/chosen": -0.927630603313446, "logits/rejected": -0.9283353090286255, "logps/chosen": -0.05737540125846863, "logps/rejected": -0.726395845413208, "loss": 3.6105, "nll_loss": 0.887338399887085, "rewards/accuracies": 0.875, "rewards/chosen": -0.005737540312111378, "rewards/margins": 0.0669020488858223, "rewards/rejected": -0.0726395919919014, "step": 2800 }, { "epoch": 1.9370677731673582, "grad_norm": 8.619080543518066, "learning_rate": 4.479406792684801e-05, "log_odds_chosen": 5.21150016784668, "log_odds_ratio": -0.023713568225502968, "logits/chosen": -0.5210046768188477, "logits/rejected": -0.5669018030166626, "logps/chosen": -0.04111271724104881, "logps/rejected": -1.0991835594177246, "loss": 2.6817, "nll_loss": 0.6680553555488586, "rewards/accuracies": 1.0, "rewards/chosen": -0.004111272282898426, "rewards/margins": 0.1058070957660675, "rewards/rejected": -0.10991836339235306, "step": 2801 }, { "epoch": 1.937759336099585, "grad_norm": 8.75975513458252, "learning_rate": 4.479022591055786e-05, "log_odds_chosen": 6.741459846496582, "log_odds_ratio": -0.010524172335863113, "logits/chosen": -0.4283122420310974, "logits/rejected": -0.45947590470314026, "logps/chosen": -0.006821490824222565, "logps/rejected": -1.2476394176483154, "loss": 2.4996, "nll_loss": 0.6238444447517395, "rewards/accuracies": 1.0, "rewards/chosen": -0.0006821490824222565, "rewards/margins": 0.1240818053483963, "rewards/rejected": -0.12476395070552826, "step": 2802 }, { "epoch": 1.938450899031812, "grad_norm": 6.204493999481201, "learning_rate": 4.4786383894267715e-05, "log_odds_chosen": 6.076966285705566, "log_odds_ratio": -0.10558860003948212, "logits/chosen": -0.5615969300270081, "logits/rejected": -0.5650418400764465, "logps/chosen": -0.03752349689602852, "logps/rejected": -1.1340097188949585, "loss": 2.4582, "nll_loss": 0.603987455368042, "rewards/accuracies": 1.0, "rewards/chosen": -0.003752349875867367, "rewards/margins": 0.10964862257242203, "rewards/rejected": -0.11340098083019257, "step": 2803 }, { "epoch": 1.9391424619640387, "grad_norm": 4.198095321655273, "learning_rate": 4.478254187797756e-05, "log_odds_chosen": 6.323598861694336, "log_odds_ratio": -0.00858142040669918, "logits/chosen": -0.3735266923904419, "logits/rejected": -0.4268512725830078, "logps/chosen": -0.009092532098293304, "logps/rejected": -1.0283095836639404, "loss": 2.2402, "nll_loss": 0.5591831207275391, "rewards/accuracies": 1.0, "rewards/chosen": -0.0009092532563954592, "rewards/margins": 0.10192171484231949, "rewards/rejected": -0.10283096134662628, "step": 2804 }, { "epoch": 1.9398340248962656, "grad_norm": 7.274281978607178, "learning_rate": 4.477869986168742e-05, "log_odds_chosen": 6.051234245300293, "log_odds_ratio": -0.022627316415309906, "logits/chosen": -0.6564967036247253, "logits/rejected": -0.7128115892410278, "logps/chosen": -0.015038514509797096, "logps/rejected": -0.9913001656532288, "loss": 2.6809, "nll_loss": 0.6679600477218628, "rewards/accuracies": 1.0, "rewards/chosen": -0.0015038514975458384, "rewards/margins": 0.09762617200613022, "rewards/rejected": -0.09913001954555511, "step": 2805 }, { "epoch": 1.9405255878284924, "grad_norm": 8.436661720275879, "learning_rate": 4.4774857845397265e-05, "log_odds_chosen": 5.065516948699951, "log_odds_ratio": -0.21357516944408417, "logits/chosen": -0.514738142490387, "logits/rejected": -0.5642529726028442, "logps/chosen": -0.04693892225623131, "logps/rejected": -0.9647188782691956, "loss": 2.5694, "nll_loss": 0.620995283126831, "rewards/accuracies": 0.875, "rewards/chosen": -0.004693892784416676, "rewards/margins": 0.0917779952287674, "rewards/rejected": -0.0964718908071518, "step": 2806 }, { "epoch": 1.9412171507607192, "grad_norm": 9.0737886428833, "learning_rate": 4.477101582910712e-05, "log_odds_chosen": 6.683316707611084, "log_odds_ratio": -0.03487955406308174, "logits/chosen": -0.4747570753097534, "logits/rejected": -0.4955167770385742, "logps/chosen": -0.011380588635802269, "logps/rejected": -0.9118317365646362, "loss": 2.7271, "nll_loss": 0.6782808899879456, "rewards/accuracies": 1.0, "rewards/chosen": -0.0011380589567124844, "rewards/margins": 0.09004510939121246, "rewards/rejected": -0.09118317067623138, "step": 2807 }, { "epoch": 1.941908713692946, "grad_norm": 5.534692287445068, "learning_rate": 4.476717381281697e-05, "log_odds_chosen": 7.164931774139404, "log_odds_ratio": -0.009051812812685966, "logits/chosen": -0.5594828128814697, "logits/rejected": -0.5811551809310913, "logps/chosen": -0.005597162526100874, "logps/rejected": -0.8697156310081482, "loss": 2.5355, "nll_loss": 0.6329704523086548, "rewards/accuracies": 1.0, "rewards/chosen": -0.0005597162526100874, "rewards/margins": 0.08641184866428375, "rewards/rejected": -0.08697156608104706, "step": 2808 }, { "epoch": 1.9426002766251729, "grad_norm": 8.621345520019531, "learning_rate": 4.476333179652682e-05, "log_odds_chosen": 4.592233657836914, "log_odds_ratio": -0.15282797813415527, "logits/chosen": -0.3370625376701355, "logits/rejected": -0.389223575592041, "logps/chosen": -0.07554537057876587, "logps/rejected": -1.1386663913726807, "loss": 3.5834, "nll_loss": 0.8805585503578186, "rewards/accuracies": 0.875, "rewards/chosen": -0.007554537151008844, "rewards/margins": 0.1063121035695076, "rewards/rejected": -0.11386663466691971, "step": 2809 }, { "epoch": 1.9432918395573997, "grad_norm": 11.15650463104248, "learning_rate": 4.475948978023667e-05, "log_odds_chosen": 5.505188465118408, "log_odds_ratio": -0.10849100351333618, "logits/chosen": -0.38597604632377625, "logits/rejected": -0.4197837710380554, "logps/chosen": -0.03493032231926918, "logps/rejected": -1.2237557172775269, "loss": 4.1632, "nll_loss": 1.0299582481384277, "rewards/accuracies": 1.0, "rewards/chosen": -0.0034930319525301456, "rewards/margins": 0.11888253688812256, "rewards/rejected": -0.12237557768821716, "step": 2810 }, { "epoch": 1.9439834024896265, "grad_norm": 6.0465922355651855, "learning_rate": 4.475564776394652e-05, "log_odds_chosen": 7.783335208892822, "log_odds_ratio": -0.0031097978353500366, "logits/chosen": -0.17682109773159027, "logits/rejected": -0.22807860374450684, "logps/chosen": -0.001854907488450408, "logps/rejected": -1.069655418395996, "loss": 2.3694, "nll_loss": 0.5920292139053345, "rewards/accuracies": 1.0, "rewards/chosen": -0.00018549074593465775, "rewards/margins": 0.1067800521850586, "rewards/rejected": -0.10696554183959961, "step": 2811 }, { "epoch": 1.9446749654218534, "grad_norm": 5.259706020355225, "learning_rate": 4.475180574765637e-05, "log_odds_chosen": 5.722519397735596, "log_odds_ratio": -0.015401207841932774, "logits/chosen": -0.28977158665657043, "logits/rejected": -0.3090120553970337, "logps/chosen": -0.0432988703250885, "logps/rejected": -0.9075368642807007, "loss": 2.691, "nll_loss": 0.6712194681167603, "rewards/accuracies": 1.0, "rewards/chosen": -0.004329887684434652, "rewards/margins": 0.08642380684614182, "rewards/rejected": -0.0907536968588829, "step": 2812 }, { "epoch": 1.9453665283540802, "grad_norm": 4.855476379394531, "learning_rate": 4.474796373136622e-05, "log_odds_chosen": 5.955103397369385, "log_odds_ratio": -0.02168934792280197, "logits/chosen": -0.311231791973114, "logits/rejected": -0.38722100853919983, "logps/chosen": -0.015130783431231976, "logps/rejected": -0.5940335988998413, "loss": 2.3921, "nll_loss": 0.5958442091941833, "rewards/accuracies": 1.0, "rewards/chosen": -0.0015130782267078757, "rewards/margins": 0.057890281081199646, "rewards/rejected": -0.05940335988998413, "step": 2813 }, { "epoch": 1.946058091286307, "grad_norm": 5.340009689331055, "learning_rate": 4.474412171507608e-05, "log_odds_chosen": 6.406757354736328, "log_odds_ratio": -0.009401117451488972, "logits/chosen": -0.700808584690094, "logits/rejected": -0.7515424489974976, "logps/chosen": -0.020106710493564606, "logps/rejected": -1.5756630897521973, "loss": 2.7495, "nll_loss": 0.6864414811134338, "rewards/accuracies": 1.0, "rewards/chosen": -0.002010670956224203, "rewards/margins": 0.15555565059185028, "rewards/rejected": -0.15756630897521973, "step": 2814 }, { "epoch": 1.9467496542185339, "grad_norm": 7.415836334228516, "learning_rate": 4.4740279698785924e-05, "log_odds_chosen": 3.2339749336242676, "log_odds_ratio": -0.16565166413784027, "logits/chosen": -0.265865683555603, "logits/rejected": -0.2878696918487549, "logps/chosen": -0.05613917112350464, "logps/rejected": -0.42157119512557983, "loss": 3.0349, "nll_loss": 0.7421600818634033, "rewards/accuracies": 1.0, "rewards/chosen": -0.005613917484879494, "rewards/margins": 0.03654320538043976, "rewards/rejected": -0.0421571210026741, "step": 2815 }, { "epoch": 1.9474412171507607, "grad_norm": 5.433392524719238, "learning_rate": 4.4736437682495776e-05, "log_odds_chosen": 4.738263130187988, "log_odds_ratio": -0.29604262113571167, "logits/chosen": -0.4162006974220276, "logits/rejected": -0.43315163254737854, "logps/chosen": -0.08273541927337646, "logps/rejected": -1.1874265670776367, "loss": 2.7732, "nll_loss": 0.6636911034584045, "rewards/accuracies": 0.75, "rewards/chosen": -0.008273541927337646, "rewards/margins": 0.11046911776065826, "rewards/rejected": -0.11874265968799591, "step": 2816 }, { "epoch": 1.9481327800829875, "grad_norm": 18.027544021606445, "learning_rate": 4.473259566620563e-05, "log_odds_chosen": 2.0296730995178223, "log_odds_ratio": -1.2344518899917603, "logits/chosen": -0.6975828409194946, "logits/rejected": -0.582709550857544, "logps/chosen": -0.16763977706432343, "logps/rejected": -0.5025269389152527, "loss": 4.5973, "nll_loss": 1.025890588760376, "rewards/accuracies": 0.625, "rewards/chosen": -0.016763977706432343, "rewards/margins": 0.03348871320486069, "rewards/rejected": -0.05025269091129303, "step": 2817 }, { "epoch": 1.9488243430152143, "grad_norm": 8.576213836669922, "learning_rate": 4.472875364991548e-05, "log_odds_chosen": 8.456327438354492, "log_odds_ratio": -0.00033374037593603134, "logits/chosen": 0.07133971899747849, "logits/rejected": 0.011340849101543427, "logps/chosen": -0.000913599447812885, "logps/rejected": -1.2824742794036865, "loss": 3.5978, "nll_loss": 0.8994190692901611, "rewards/accuracies": 1.0, "rewards/chosen": -9.135994332609698e-05, "rewards/margins": 0.12815608084201813, "rewards/rejected": -0.1282474398612976, "step": 2818 }, { "epoch": 1.9495159059474412, "grad_norm": 12.02246379852295, "learning_rate": 4.472491163362533e-05, "log_odds_chosen": 6.741928577423096, "log_odds_ratio": -0.006194033194333315, "logits/chosen": -0.3347575068473816, "logits/rejected": -0.43152832984924316, "logps/chosen": -0.005848361644893885, "logps/rejected": -1.3612442016601562, "loss": 4.2105, "nll_loss": 1.0520122051239014, "rewards/accuracies": 1.0, "rewards/chosen": -0.0005848361761309206, "rewards/margins": 0.13553959131240845, "rewards/rejected": -0.13612443208694458, "step": 2819 }, { "epoch": 1.950207468879668, "grad_norm": 6.350009441375732, "learning_rate": 4.472106961733518e-05, "log_odds_chosen": 6.244994163513184, "log_odds_ratio": -0.02147325873374939, "logits/chosen": -0.4485066533088684, "logits/rejected": -0.4813869297504425, "logps/chosen": -0.028115058317780495, "logps/rejected": -1.2422107458114624, "loss": 2.8348, "nll_loss": 0.7065523266792297, "rewards/accuracies": 1.0, "rewards/chosen": -0.0028115056920796633, "rewards/margins": 0.12140956521034241, "rewards/rejected": -0.1242210865020752, "step": 2820 }, { "epoch": 1.9508990318118948, "grad_norm": 16.922008514404297, "learning_rate": 4.471722760104503e-05, "log_odds_chosen": 4.798357009887695, "log_odds_ratio": -0.4773716330528259, "logits/chosen": -0.6690393686294556, "logits/rejected": -0.714600145816803, "logps/chosen": -0.08855325728654861, "logps/rejected": -0.8425127267837524, "loss": 3.0157, "nll_loss": 0.7061989307403564, "rewards/accuracies": 0.875, "rewards/chosen": -0.008855325169861317, "rewards/margins": 0.07539595663547516, "rewards/rejected": -0.0842512845993042, "step": 2821 }, { "epoch": 1.9515905947441217, "grad_norm": 6.992642402648926, "learning_rate": 4.471338558475488e-05, "log_odds_chosen": 4.1904497146606445, "log_odds_ratio": -0.4717223644256592, "logits/chosen": -0.44422727823257446, "logits/rejected": -0.4630334973335266, "logps/chosen": -0.08977383375167847, "logps/rejected": -1.0111335515975952, "loss": 3.1006, "nll_loss": 0.7279874682426453, "rewards/accuracies": 0.875, "rewards/chosen": -0.008977384306490421, "rewards/margins": 0.0921359732747078, "rewards/rejected": -0.10111334919929504, "step": 2822 }, { "epoch": 1.9522821576763485, "grad_norm": 10.742497444152832, "learning_rate": 4.4709543568464736e-05, "log_odds_chosen": 3.4371228218078613, "log_odds_ratio": -0.44192981719970703, "logits/chosen": -0.6044036746025085, "logits/rejected": -0.5718508958816528, "logps/chosen": -0.0498533770442009, "logps/rejected": -0.4369697570800781, "loss": 2.8645, "nll_loss": 0.6719335317611694, "rewards/accuracies": 0.75, "rewards/chosen": -0.004985337611287832, "rewards/margins": 0.03871164098381996, "rewards/rejected": -0.04369697719812393, "step": 2823 }, { "epoch": 1.9529737206085753, "grad_norm": 7.843807220458984, "learning_rate": 4.470570155217458e-05, "log_odds_chosen": 4.21707820892334, "log_odds_ratio": -0.17529143393039703, "logits/chosen": -0.5330499410629272, "logits/rejected": -0.5987671613693237, "logps/chosen": -0.05731034651398659, "logps/rejected": -0.6517893075942993, "loss": 3.3395, "nll_loss": 0.8173424005508423, "rewards/accuracies": 0.875, "rewards/chosen": -0.005731034558266401, "rewards/margins": 0.059447892010211945, "rewards/rejected": -0.06517893075942993, "step": 2824 }, { "epoch": 1.9536652835408024, "grad_norm": 7.9400739669799805, "learning_rate": 4.4701859535884435e-05, "log_odds_chosen": 6.131689071655273, "log_odds_ratio": -0.041180334985256195, "logits/chosen": -0.3131115734577179, "logits/rejected": -0.4246234595775604, "logps/chosen": -0.03072253428399563, "logps/rejected": -1.005305528640747, "loss": 2.8949, "nll_loss": 0.7196011543273926, "rewards/accuracies": 1.0, "rewards/chosen": -0.003072253428399563, "rewards/margins": 0.09745831787586212, "rewards/rejected": -0.10053056478500366, "step": 2825 }, { "epoch": 1.9543568464730292, "grad_norm": 8.393575668334961, "learning_rate": 4.469801751959429e-05, "log_odds_chosen": 5.061415195465088, "log_odds_ratio": -0.032158877700567245, "logits/chosen": -0.6719424724578857, "logits/rejected": -0.6707208156585693, "logps/chosen": -0.03238382935523987, "logps/rejected": -0.8522112369537354, "loss": 4.3141, "nll_loss": 1.075303077697754, "rewards/accuracies": 1.0, "rewards/chosen": -0.0032383829820901155, "rewards/margins": 0.08198274672031403, "rewards/rejected": -0.08522112667560577, "step": 2826 }, { "epoch": 1.955048409405256, "grad_norm": 5.415279865264893, "learning_rate": 4.469417550330414e-05, "log_odds_chosen": 4.250751972198486, "log_odds_ratio": -0.11042823642492294, "logits/chosen": -0.5419958233833313, "logits/rejected": -0.5159631967544556, "logps/chosen": -0.08708186447620392, "logps/rejected": -1.1190032958984375, "loss": 2.5718, "nll_loss": 0.6319047808647156, "rewards/accuracies": 1.0, "rewards/chosen": -0.008708186447620392, "rewards/margins": 0.10319213569164276, "rewards/rejected": -0.11190032958984375, "step": 2827 }, { "epoch": 1.9557399723374829, "grad_norm": 6.0877838134765625, "learning_rate": 4.4690333487013985e-05, "log_odds_chosen": 5.14921760559082, "log_odds_ratio": -0.09397347271442413, "logits/chosen": -0.05813811719417572, "logits/rejected": -0.11568836122751236, "logps/chosen": -0.05785089358687401, "logps/rejected": -0.926654040813446, "loss": 2.6938, "nll_loss": 0.66404128074646, "rewards/accuracies": 1.0, "rewards/chosen": -0.005785089451819658, "rewards/margins": 0.08688031882047653, "rewards/rejected": -0.0926654040813446, "step": 2828 }, { "epoch": 1.9564315352697097, "grad_norm": 9.632216453552246, "learning_rate": 4.468649147072384e-05, "log_odds_chosen": 6.0009660720825195, "log_odds_ratio": -0.36622923612594604, "logits/chosen": -0.5198516845703125, "logits/rejected": -0.6222413182258606, "logps/chosen": -0.027646349743008614, "logps/rejected": -0.8318744897842407, "loss": 3.4529, "nll_loss": 0.8265994787216187, "rewards/accuracies": 0.875, "rewards/chosen": -0.0027646352536976337, "rewards/margins": 0.08042281866073608, "rewards/rejected": -0.08318745344877243, "step": 2829 }, { "epoch": 1.9571230982019365, "grad_norm": 4.735780715942383, "learning_rate": 4.468264945443369e-05, "log_odds_chosen": 5.2941789627075195, "log_odds_ratio": -0.14041371643543243, "logits/chosen": -0.14039883017539978, "logits/rejected": -0.15818633139133453, "logps/chosen": -0.03495385870337486, "logps/rejected": -0.6268692016601562, "loss": 2.6105, "nll_loss": 0.6385937333106995, "rewards/accuracies": 0.875, "rewards/chosen": -0.0034953858703374863, "rewards/margins": 0.05919152498245239, "rewards/rejected": -0.06268692016601562, "step": 2830 }, { "epoch": 1.9578146611341634, "grad_norm": 14.99392032623291, "learning_rate": 4.4678807438143536e-05, "log_odds_chosen": 5.380325794219971, "log_odds_ratio": -0.08194153010845184, "logits/chosen": -0.45967763662338257, "logits/rejected": -0.5550145506858826, "logps/chosen": -0.050651248544454575, "logps/rejected": -1.1018568277359009, "loss": 4.1651, "nll_loss": 1.0330907106399536, "rewards/accuracies": 1.0, "rewards/chosen": -0.005065124947577715, "rewards/margins": 0.10512056201696396, "rewards/rejected": -0.11018568277359009, "step": 2831 }, { "epoch": 1.9585062240663902, "grad_norm": 4.958186626434326, "learning_rate": 4.4674965421853395e-05, "log_odds_chosen": 6.433233261108398, "log_odds_ratio": -0.04056533798575401, "logits/chosen": -0.49270355701446533, "logits/rejected": -0.5152170658111572, "logps/chosen": -0.03315674886107445, "logps/rejected": -1.1400666236877441, "loss": 2.5399, "nll_loss": 0.6309077739715576, "rewards/accuracies": 1.0, "rewards/chosen": -0.0033156750723719597, "rewards/margins": 0.11069098860025406, "rewards/rejected": -0.11400666832923889, "step": 2832 }, { "epoch": 1.959197786998617, "grad_norm": 6.792165756225586, "learning_rate": 4.467112340556324e-05, "log_odds_chosen": 5.845515251159668, "log_odds_ratio": -0.07941299676895142, "logits/chosen": -0.24126091599464417, "logits/rejected": -0.28594672679901123, "logps/chosen": -0.016813665628433228, "logps/rejected": -0.8013637661933899, "loss": 2.7251, "nll_loss": 0.6733429431915283, "rewards/accuracies": 1.0, "rewards/chosen": -0.0016813663532957435, "rewards/margins": 0.07845500856637955, "rewards/rejected": -0.08013637363910675, "step": 2833 }, { "epoch": 1.9598893499308438, "grad_norm": 5.078237533569336, "learning_rate": 4.466728138927309e-05, "log_odds_chosen": 2.8996164798736572, "log_odds_ratio": -0.2707454264163971, "logits/chosen": -0.2614973783493042, "logits/rejected": -0.291960746049881, "logps/chosen": -0.09858616441488266, "logps/rejected": -0.6153691411018372, "loss": 2.5298, "nll_loss": 0.6053740978240967, "rewards/accuracies": 0.875, "rewards/chosen": -0.009858615696430206, "rewards/margins": 0.05167829990386963, "rewards/rejected": -0.06153691187500954, "step": 2834 }, { "epoch": 1.9605809128630707, "grad_norm": 6.119528293609619, "learning_rate": 4.4663439372982945e-05, "log_odds_chosen": 3.4581170082092285, "log_odds_ratio": -0.26008594036102295, "logits/chosen": -0.49485692381858826, "logits/rejected": -0.48848897218704224, "logps/chosen": -0.07028748095035553, "logps/rejected": -0.5969229340553284, "loss": 3.2002, "nll_loss": 0.7740364074707031, "rewards/accuracies": 0.875, "rewards/chosen": -0.007028748281300068, "rewards/margins": 0.052663546055555344, "rewards/rejected": -0.05969228968024254, "step": 2835 }, { "epoch": 1.9612724757952975, "grad_norm": 7.903665542602539, "learning_rate": 4.46595973566928e-05, "log_odds_chosen": 5.2713799476623535, "log_odds_ratio": -0.165093794465065, "logits/chosen": -0.338885098695755, "logits/rejected": -0.3735104203224182, "logps/chosen": -0.06486129760742188, "logps/rejected": -0.9292372465133667, "loss": 3.3155, "nll_loss": 0.8123664855957031, "rewards/accuracies": 0.875, "rewards/chosen": -0.006486129947006702, "rewards/margins": 0.08643759787082672, "rewards/rejected": -0.09292373061180115, "step": 2836 }, { "epoch": 1.9619640387275243, "grad_norm": 9.878509521484375, "learning_rate": 4.4655755340402644e-05, "log_odds_chosen": 5.400105953216553, "log_odds_ratio": -0.7512086033821106, "logits/chosen": -0.558991551399231, "logits/rejected": -0.550513744354248, "logps/chosen": -0.11486005038022995, "logps/rejected": -1.0291719436645508, "loss": 2.7288, "nll_loss": 0.6070804595947266, "rewards/accuracies": 0.875, "rewards/chosen": -0.01148600596934557, "rewards/margins": 0.09143118560314178, "rewards/rejected": -0.10291719436645508, "step": 2837 }, { "epoch": 1.9626556016597512, "grad_norm": 21.60213279724121, "learning_rate": 4.4651913324112496e-05, "log_odds_chosen": 3.2322263717651367, "log_odds_ratio": -1.070180058479309, "logits/chosen": -0.4052848815917969, "logits/rejected": -0.49535343050956726, "logps/chosen": -0.12277340888977051, "logps/rejected": -0.6704703569412231, "loss": 3.4206, "nll_loss": 0.748128354549408, "rewards/accuracies": 0.75, "rewards/chosen": -0.012277341447770596, "rewards/margins": 0.054769691079854965, "rewards/rejected": -0.06704703718423843, "step": 2838 }, { "epoch": 1.963347164591978, "grad_norm": 8.996484756469727, "learning_rate": 4.464807130782235e-05, "log_odds_chosen": 5.494966506958008, "log_odds_ratio": -0.14851421117782593, "logits/chosen": -0.5047599077224731, "logits/rejected": -0.4936036467552185, "logps/chosen": -0.03942044824361801, "logps/rejected": -0.8090762495994568, "loss": 2.9505, "nll_loss": 0.7227743864059448, "rewards/accuracies": 0.875, "rewards/chosen": -0.003942045383155346, "rewards/margins": 0.0769655853509903, "rewards/rejected": -0.08090762794017792, "step": 2839 }, { "epoch": 1.9640387275242048, "grad_norm": 6.9417243003845215, "learning_rate": 4.4644229291532194e-05, "log_odds_chosen": 5.448168754577637, "log_odds_ratio": -0.04826320335268974, "logits/chosen": -0.6604098081588745, "logits/rejected": -0.6737585067749023, "logps/chosen": -0.03252304345369339, "logps/rejected": -0.96895432472229, "loss": 4.2249, "nll_loss": 1.0514090061187744, "rewards/accuracies": 1.0, "rewards/chosen": -0.0032523043919354677, "rewards/margins": 0.09364312887191772, "rewards/rejected": -0.09689544141292572, "step": 2840 }, { "epoch": 1.9647302904564317, "grad_norm": 8.161296844482422, "learning_rate": 4.464038727524205e-05, "log_odds_chosen": 4.60945987701416, "log_odds_ratio": -0.13581793010234833, "logits/chosen": -0.5580368041992188, "logits/rejected": -0.5402668714523315, "logps/chosen": -0.05534191057085991, "logps/rejected": -0.8401125073432922, "loss": 2.6204, "nll_loss": 0.6415296196937561, "rewards/accuracies": 0.875, "rewards/chosen": -0.005534191615879536, "rewards/margins": 0.07847706973552704, "rewards/rejected": -0.0840112566947937, "step": 2841 }, { "epoch": 1.9654218533886585, "grad_norm": 5.922436714172363, "learning_rate": 4.46365452589519e-05, "log_odds_chosen": 6.903104305267334, "log_odds_ratio": -0.17947496473789215, "logits/chosen": 0.05723372474312782, "logits/rejected": 0.07201800495386124, "logps/chosen": -0.03924143314361572, "logps/rejected": -0.9726995229721069, "loss": 2.2232, "nll_loss": 0.5378566980361938, "rewards/accuracies": 0.875, "rewards/chosen": -0.003924143500626087, "rewards/margins": 0.09334582090377808, "rewards/rejected": -0.09726995974779129, "step": 2842 }, { "epoch": 1.9661134163208853, "grad_norm": 10.01150131225586, "learning_rate": 4.463270324266175e-05, "log_odds_chosen": 4.400213718414307, "log_odds_ratio": -0.35011982917785645, "logits/chosen": -0.46392229199409485, "logits/rejected": -0.44696903228759766, "logps/chosen": -0.028617514297366142, "logps/rejected": -0.830445408821106, "loss": 3.6599, "nll_loss": 0.8799704313278198, "rewards/accuracies": 0.75, "rewards/chosen": -0.0028617512434720993, "rewards/margins": 0.08018279075622559, "rewards/rejected": -0.08304454386234283, "step": 2843 }, { "epoch": 1.9668049792531122, "grad_norm": 5.51869010925293, "learning_rate": 4.4628861226371604e-05, "log_odds_chosen": 4.802142143249512, "log_odds_ratio": -0.03285137936472893, "logits/chosen": -0.004739582538604736, "logits/rejected": -0.07433182001113892, "logps/chosen": -0.021929241716861725, "logps/rejected": -0.8128564357757568, "loss": 2.5213, "nll_loss": 0.6270398497581482, "rewards/accuracies": 1.0, "rewards/chosen": -0.00219292426481843, "rewards/margins": 0.07909271866083145, "rewards/rejected": -0.08128564804792404, "step": 2844 }, { "epoch": 1.967496542185339, "grad_norm": 6.1920976638793945, "learning_rate": 4.4625019210081456e-05, "log_odds_chosen": 4.352306842803955, "log_odds_ratio": -0.19127348065376282, "logits/chosen": -0.4722940921783447, "logits/rejected": -0.5073493123054504, "logps/chosen": -0.06751982867717743, "logps/rejected": -1.009918451309204, "loss": 2.608, "nll_loss": 0.6328845024108887, "rewards/accuracies": 0.875, "rewards/chosen": -0.006751983426511288, "rewards/margins": 0.09423987567424774, "rewards/rejected": -0.10099184513092041, "step": 2845 }, { "epoch": 1.9681881051175658, "grad_norm": 11.177550315856934, "learning_rate": 4.46211771937913e-05, "log_odds_chosen": 3.4274983406066895, "log_odds_ratio": -0.48501288890838623, "logits/chosen": -0.7478051781654358, "logits/rejected": -0.7823911309242249, "logps/chosen": -0.0978512093424797, "logps/rejected": -0.8678635358810425, "loss": 3.7819, "nll_loss": 0.8969804048538208, "rewards/accuracies": 0.75, "rewards/chosen": -0.009785121306777, "rewards/margins": 0.07700122892856598, "rewards/rejected": -0.08678635209798813, "step": 2846 }, { "epoch": 1.9688796680497926, "grad_norm": 7.19655704498291, "learning_rate": 4.4617335177501154e-05, "log_odds_chosen": 3.085355520248413, "log_odds_ratio": -0.2254747599363327, "logits/chosen": -0.6109316945075989, "logits/rejected": -0.6778422594070435, "logps/chosen": -0.14739884436130524, "logps/rejected": -0.8291506767272949, "loss": 3.3712, "nll_loss": 0.8202444911003113, "rewards/accuracies": 0.875, "rewards/chosen": -0.014739884994924068, "rewards/margins": 0.06817518174648285, "rewards/rejected": -0.08291506767272949, "step": 2847 }, { "epoch": 1.9695712309820195, "grad_norm": 6.391579627990723, "learning_rate": 4.461349316121101e-05, "log_odds_chosen": 5.636587142944336, "log_odds_ratio": -0.05788606405258179, "logits/chosen": -0.44978466629981995, "logits/rejected": -0.4869334399700165, "logps/chosen": -0.037404656410217285, "logps/rejected": -1.0483611822128296, "loss": 2.4287, "nll_loss": 0.6013800501823425, "rewards/accuracies": 1.0, "rewards/chosen": -0.003740465734153986, "rewards/margins": 0.10109566152095795, "rewards/rejected": -0.1048361212015152, "step": 2848 }, { "epoch": 1.9702627939142463, "grad_norm": 7.4656548500061035, "learning_rate": 4.460965114492085e-05, "log_odds_chosen": 4.645669937133789, "log_odds_ratio": -0.22266227006912231, "logits/chosen": -0.49918222427368164, "logits/rejected": -0.6102890968322754, "logps/chosen": -0.06597128510475159, "logps/rejected": -0.750339150428772, "loss": 3.5255, "nll_loss": 0.8591197729110718, "rewards/accuracies": 0.875, "rewards/chosen": -0.006597128696739674, "rewards/margins": 0.06843678653240204, "rewards/rejected": -0.07503391802310944, "step": 2849 }, { "epoch": 1.9709543568464731, "grad_norm": 5.622032165527344, "learning_rate": 4.460580912863071e-05, "log_odds_chosen": 3.8457815647125244, "log_odds_ratio": -0.17395474016666412, "logits/chosen": -0.5099331140518188, "logits/rejected": -0.5281578898429871, "logps/chosen": -0.062186792492866516, "logps/rejected": -0.627037525177002, "loss": 2.807, "nll_loss": 0.6843648552894592, "rewards/accuracies": 0.875, "rewards/chosen": -0.006218679249286652, "rewards/margins": 0.056485071778297424, "rewards/rejected": -0.06270375102758408, "step": 2850 }, { "epoch": 1.9716459197787, "grad_norm": 4.130119323730469, "learning_rate": 4.460196711234056e-05, "log_odds_chosen": 8.39101505279541, "log_odds_ratio": -0.0006186411483213305, "logits/chosen": -0.17172306776046753, "logits/rejected": -0.1964360475540161, "logps/chosen": -0.0007562484825029969, "logps/rejected": -1.0518345832824707, "loss": 2.7238, "nll_loss": 0.6808990240097046, "rewards/accuracies": 1.0, "rewards/chosen": -7.562484825029969e-05, "rewards/margins": 0.10510782897472382, "rewards/rejected": -0.105183444917202, "step": 2851 }, { "epoch": 1.9723374827109268, "grad_norm": 10.317930221557617, "learning_rate": 4.459812509605041e-05, "log_odds_chosen": 6.341769218444824, "log_odds_ratio": -0.04393875226378441, "logits/chosen": -0.27549225091934204, "logits/rejected": -0.3653901219367981, "logps/chosen": -0.019962439313530922, "logps/rejected": -0.8477230072021484, "loss": 3.417, "nll_loss": 0.8498455882072449, "rewards/accuracies": 1.0, "rewards/chosen": -0.0019962440710514784, "rewards/margins": 0.08277606219053268, "rewards/rejected": -0.08477230370044708, "step": 2852 }, { "epoch": 1.9730290456431536, "grad_norm": 8.948822021484375, "learning_rate": 4.459428307976026e-05, "log_odds_chosen": 2.1581223011016846, "log_odds_ratio": -0.3025994896888733, "logits/chosen": -0.6348332166671753, "logits/rejected": -0.5737580060958862, "logps/chosen": -0.1140579879283905, "logps/rejected": -0.44779258966445923, "loss": 3.7934, "nll_loss": 0.9181008338928223, "rewards/accuracies": 0.875, "rewards/chosen": -0.011405798606574535, "rewards/margins": 0.03337346017360687, "rewards/rejected": -0.04477925971150398, "step": 2853 }, { "epoch": 1.9737206085753805, "grad_norm": 7.595555305480957, "learning_rate": 4.4590441063470115e-05, "log_odds_chosen": 5.8971028327941895, "log_odds_ratio": -0.1407633274793625, "logits/chosen": -0.4646201729774475, "logits/rejected": -0.5450406074523926, "logps/chosen": -0.07197277247905731, "logps/rejected": -1.3927878141403198, "loss": 2.8903, "nll_loss": 0.7084888219833374, "rewards/accuracies": 0.875, "rewards/chosen": -0.007197277620434761, "rewards/margins": 0.1320815086364746, "rewards/rejected": -0.13927876949310303, "step": 2854 }, { "epoch": 1.9744121715076073, "grad_norm": 8.590985298156738, "learning_rate": 4.458659904717996e-05, "log_odds_chosen": 5.99652099609375, "log_odds_ratio": -0.06401261687278748, "logits/chosen": -0.3496834635734558, "logits/rejected": -0.4178650379180908, "logps/chosen": -0.023707207292318344, "logps/rejected": -0.9959208369255066, "loss": 3.0665, "nll_loss": 0.7602324485778809, "rewards/accuracies": 1.0, "rewards/chosen": -0.0023707207292318344, "rewards/margins": 0.09722136706113815, "rewards/rejected": -0.09959208220243454, "step": 2855 }, { "epoch": 1.9751037344398341, "grad_norm": 8.18436336517334, "learning_rate": 4.458275703088981e-05, "log_odds_chosen": 5.816441059112549, "log_odds_ratio": -0.1955913007259369, "logits/chosen": -0.29734522104263306, "logits/rejected": -0.3642163872718811, "logps/chosen": -0.050296586006879807, "logps/rejected": -0.8719285726547241, "loss": 3.4962, "nll_loss": 0.8544830679893494, "rewards/accuracies": 0.875, "rewards/chosen": -0.005029658786952496, "rewards/margins": 0.08216319978237152, "rewards/rejected": -0.08719285577535629, "step": 2856 }, { "epoch": 1.975795297372061, "grad_norm": 6.27744197845459, "learning_rate": 4.4578915014599665e-05, "log_odds_chosen": 8.145572662353516, "log_odds_ratio": -0.0036865519359707832, "logits/chosen": -0.3854920566082001, "logits/rejected": -0.48750007152557373, "logps/chosen": -0.013279465027153492, "logps/rejected": -1.5056675672531128, "loss": 2.6731, "nll_loss": 0.6679179668426514, "rewards/accuracies": 1.0, "rewards/chosen": -0.0013279466656967998, "rewards/margins": 0.14923880994319916, "rewards/rejected": -0.15056675672531128, "step": 2857 }, { "epoch": 1.9764868603042878, "grad_norm": 8.208527565002441, "learning_rate": 4.457507299830951e-05, "log_odds_chosen": 6.305216312408447, "log_odds_ratio": -0.020527303218841553, "logits/chosen": -0.17919857800006866, "logits/rejected": -0.2800734043121338, "logps/chosen": -0.02931048907339573, "logps/rejected": -1.0246630907058716, "loss": 4.0488, "nll_loss": 1.0101357698440552, "rewards/accuracies": 1.0, "rewards/chosen": -0.002931049093604088, "rewards/margins": 0.0995352566242218, "rewards/rejected": -0.10246631503105164, "step": 2858 }, { "epoch": 1.9771784232365146, "grad_norm": 6.1647844314575195, "learning_rate": 4.457123098201937e-05, "log_odds_chosen": 5.047560214996338, "log_odds_ratio": -0.14424802362918854, "logits/chosen": -0.6080353260040283, "logits/rejected": -0.7011988759040833, "logps/chosen": -0.06074054166674614, "logps/rejected": -1.0708612203598022, "loss": 2.1104, "nll_loss": 0.5131810307502747, "rewards/accuracies": 0.875, "rewards/chosen": -0.006074054166674614, "rewards/margins": 0.10101206600666046, "rewards/rejected": -0.10708612203598022, "step": 2859 }, { "epoch": 1.9778699861687414, "grad_norm": 6.896218776702881, "learning_rate": 4.4567388965729216e-05, "log_odds_chosen": 4.218295574188232, "log_odds_ratio": -0.23091116547584534, "logits/chosen": -0.2940022945404053, "logits/rejected": -0.3114502429962158, "logps/chosen": -0.06656209379434586, "logps/rejected": -0.5783684253692627, "loss": 2.5845, "nll_loss": 0.6230310797691345, "rewards/accuracies": 0.75, "rewards/chosen": -0.00665620993822813, "rewards/margins": 0.0511806383728981, "rewards/rejected": -0.05783684551715851, "step": 2860 }, { "epoch": 1.9785615491009683, "grad_norm": 6.326053619384766, "learning_rate": 4.456354694943907e-05, "log_odds_chosen": 5.043857097625732, "log_odds_ratio": -0.07648416608572006, "logits/chosen": -0.7028237581253052, "logits/rejected": -0.7462775707244873, "logps/chosen": -0.04524366185069084, "logps/rejected": -0.6462970972061157, "loss": 2.5193, "nll_loss": 0.622175395488739, "rewards/accuracies": 1.0, "rewards/chosen": -0.004524365998804569, "rewards/margins": 0.0601053424179554, "rewards/rejected": -0.0646297037601471, "step": 2861 }, { "epoch": 1.979253112033195, "grad_norm": 8.298463821411133, "learning_rate": 4.455970493314892e-05, "log_odds_chosen": 4.941799163818359, "log_odds_ratio": -0.2625234127044678, "logits/chosen": -0.6671578884124756, "logits/rejected": -0.6977245211601257, "logps/chosen": -0.05639420822262764, "logps/rejected": -0.5281553864479065, "loss": 3.7381, "nll_loss": 0.9082846641540527, "rewards/accuracies": 0.875, "rewards/chosen": -0.005639421287924051, "rewards/margins": 0.047176118940114975, "rewards/rejected": -0.05281553789973259, "step": 2862 }, { "epoch": 1.979944674965422, "grad_norm": 5.313868522644043, "learning_rate": 4.455586291685877e-05, "log_odds_chosen": 4.31884241104126, "log_odds_ratio": -0.13699235022068024, "logits/chosen": -0.7914988398551941, "logits/rejected": -0.8392302989959717, "logps/chosen": -0.08075849711894989, "logps/rejected": -0.9601966142654419, "loss": 3.7196, "nll_loss": 0.916207492351532, "rewards/accuracies": 1.0, "rewards/chosen": -0.008075850084424019, "rewards/margins": 0.08794382214546204, "rewards/rejected": -0.0960196703672409, "step": 2863 }, { "epoch": 1.9806362378976488, "grad_norm": 4.304253101348877, "learning_rate": 4.455202090056862e-05, "log_odds_chosen": 5.359724044799805, "log_odds_ratio": -0.07342061400413513, "logits/chosen": -0.6200317144393921, "logits/rejected": -0.6785701513290405, "logps/chosen": -0.03738350793719292, "logps/rejected": -0.8588244318962097, "loss": 2.112, "nll_loss": 0.5206697583198547, "rewards/accuracies": 1.0, "rewards/chosen": -0.003738350933417678, "rewards/margins": 0.08214408159255981, "rewards/rejected": -0.08588244020938873, "step": 2864 }, { "epoch": 1.9813278008298756, "grad_norm": 4.600732803344727, "learning_rate": 4.454817888427847e-05, "log_odds_chosen": 5.447720527648926, "log_odds_ratio": -0.0366494357585907, "logits/chosen": -0.2615331709384918, "logits/rejected": -0.18944472074508667, "logps/chosen": -0.03816407918930054, "logps/rejected": -1.2567387819290161, "loss": 3.6339, "nll_loss": 0.9048019051551819, "rewards/accuracies": 1.0, "rewards/chosen": -0.0038164081051945686, "rewards/margins": 0.12185746431350708, "rewards/rejected": -0.12567389011383057, "step": 2865 }, { "epoch": 1.9820193637621024, "grad_norm": 8.667045593261719, "learning_rate": 4.4544336867988324e-05, "log_odds_chosen": 5.813375473022461, "log_odds_ratio": -0.20894940197467804, "logits/chosen": -0.22451533377170563, "logits/rejected": -0.24346986413002014, "logps/chosen": -0.038732875138521194, "logps/rejected": -0.8478078842163086, "loss": 2.7926, "nll_loss": 0.6772516965866089, "rewards/accuracies": 0.75, "rewards/chosen": -0.0038732877001166344, "rewards/margins": 0.08090750873088837, "rewards/rejected": -0.08478079736232758, "step": 2866 }, { "epoch": 1.9827109266943292, "grad_norm": 10.199563980102539, "learning_rate": 4.454049485169817e-05, "log_odds_chosen": 4.983705997467041, "log_odds_ratio": -0.24623095989227295, "logits/chosen": -0.5467318892478943, "logits/rejected": -0.5973883271217346, "logps/chosen": -0.06223399564623833, "logps/rejected": -1.1259304285049438, "loss": 3.4424, "nll_loss": 0.8359812498092651, "rewards/accuracies": 0.875, "rewards/chosen": -0.00622339965775609, "rewards/margins": 0.10636964440345764, "rewards/rejected": -0.11259303987026215, "step": 2867 }, { "epoch": 1.983402489626556, "grad_norm": 8.570449829101562, "learning_rate": 4.453665283540803e-05, "log_odds_chosen": 6.765602111816406, "log_odds_ratio": -0.03605763614177704, "logits/chosen": -0.22504515945911407, "logits/rejected": -0.267051637172699, "logps/chosen": -0.0312674380838871, "logps/rejected": -1.280580997467041, "loss": 2.6459, "nll_loss": 0.6578635573387146, "rewards/accuracies": 1.0, "rewards/chosen": -0.003126743948087096, "rewards/margins": 0.12493135035037994, "rewards/rejected": -0.12805810570716858, "step": 2868 }, { "epoch": 1.984094052558783, "grad_norm": 11.911445617675781, "learning_rate": 4.4532810819117874e-05, "log_odds_chosen": 4.111666202545166, "log_odds_ratio": -0.16683000326156616, "logits/chosen": -0.38888388872146606, "logits/rejected": -0.4704105854034424, "logps/chosen": -0.061243414878845215, "logps/rejected": -0.829252302646637, "loss": 3.105, "nll_loss": 0.7595645189285278, "rewards/accuracies": 1.0, "rewards/chosen": -0.006124341860413551, "rewards/margins": 0.0768008902668953, "rewards/rejected": -0.0829252377152443, "step": 2869 }, { "epoch": 1.9847856154910097, "grad_norm": 7.618926048278809, "learning_rate": 4.452896880282773e-05, "log_odds_chosen": 6.063479423522949, "log_odds_ratio": -0.06398558616638184, "logits/chosen": -0.807996928691864, "logits/rejected": -0.8398293852806091, "logps/chosen": -0.022773319855332375, "logps/rejected": -0.9269109964370728, "loss": 2.6953, "nll_loss": 0.6674139499664307, "rewards/accuracies": 1.0, "rewards/chosen": -0.00227733189240098, "rewards/margins": 0.09041377902030945, "rewards/rejected": -0.0926911011338234, "step": 2870 }, { "epoch": 1.9854771784232366, "grad_norm": 7.750586032867432, "learning_rate": 4.452512678653757e-05, "log_odds_chosen": 4.687612056732178, "log_odds_ratio": -0.32614877820014954, "logits/chosen": -0.3697401285171509, "logits/rejected": -0.43365412950515747, "logps/chosen": -0.05747876688838005, "logps/rejected": -0.8642250299453735, "loss": 2.7228, "nll_loss": 0.6480889320373535, "rewards/accuracies": 0.75, "rewards/chosen": -0.005747877061367035, "rewards/margins": 0.08067463338375092, "rewards/rejected": -0.08642250299453735, "step": 2871 }, { "epoch": 1.9861687413554634, "grad_norm": 6.014692306518555, "learning_rate": 4.452128477024743e-05, "log_odds_chosen": 3.9112367630004883, "log_odds_ratio": -0.13464248180389404, "logits/chosen": -0.8118262887001038, "logits/rejected": -0.8243058323860168, "logps/chosen": -0.029838457703590393, "logps/rejected": -0.5436880588531494, "loss": 3.0423, "nll_loss": 0.7471101880073547, "rewards/accuracies": 1.0, "rewards/chosen": -0.0029838457703590393, "rewards/margins": 0.05138496309518814, "rewards/rejected": -0.05436880886554718, "step": 2872 }, { "epoch": 1.9868603042876902, "grad_norm": 3.0963494777679443, "learning_rate": 4.451744275395728e-05, "log_odds_chosen": 6.304614067077637, "log_odds_ratio": -0.021528642624616623, "logits/chosen": -0.40912091732025146, "logits/rejected": -0.42527052760124207, "logps/chosen": -0.018766680732369423, "logps/rejected": -0.8206733465194702, "loss": 2.6346, "nll_loss": 0.6564869284629822, "rewards/accuracies": 1.0, "rewards/chosen": -0.0018766680732369423, "rewards/margins": 0.08019067347049713, "rewards/rejected": -0.0820673406124115, "step": 2873 }, { "epoch": 1.987551867219917, "grad_norm": 7.448980808258057, "learning_rate": 4.451360073766713e-05, "log_odds_chosen": 4.426904678344727, "log_odds_ratio": -0.3069632649421692, "logits/chosen": -0.7302984595298767, "logits/rejected": -0.7961975336074829, "logps/chosen": -0.06175190210342407, "logps/rejected": -0.7879979610443115, "loss": 2.9033, "nll_loss": 0.6951185464859009, "rewards/accuracies": 0.875, "rewards/chosen": -0.0061751906760036945, "rewards/margins": 0.07262460887432098, "rewards/rejected": -0.07879979908466339, "step": 2874 }, { "epoch": 1.9882434301521439, "grad_norm": 8.628366470336914, "learning_rate": 4.450975872137698e-05, "log_odds_chosen": 6.7540740966796875, "log_odds_ratio": -0.06287705898284912, "logits/chosen": -0.4166528582572937, "logits/rejected": -0.4531726539134979, "logps/chosen": -0.029345639050006866, "logps/rejected": -1.141418695449829, "loss": 2.5816, "nll_loss": 0.6391215324401855, "rewards/accuracies": 1.0, "rewards/chosen": -0.0029345639050006866, "rewards/margins": 0.11120730638504028, "rewards/rejected": -0.11414187401533127, "step": 2875 }, { "epoch": 1.9889349930843707, "grad_norm": 7.2847137451171875, "learning_rate": 4.450591670508683e-05, "log_odds_chosen": 5.661350727081299, "log_odds_ratio": -0.01193216722458601, "logits/chosen": -0.9701260328292847, "logits/rejected": -1.0334163904190063, "logps/chosen": -0.029496140778064728, "logps/rejected": -1.305067539215088, "loss": 3.591, "nll_loss": 0.8965597152709961, "rewards/accuracies": 1.0, "rewards/chosen": -0.0029496143106371164, "rewards/margins": 0.12755712866783142, "rewards/rejected": -0.1305067539215088, "step": 2876 }, { "epoch": 1.9896265560165975, "grad_norm": 9.756966590881348, "learning_rate": 4.450207468879668e-05, "log_odds_chosen": 6.483921527862549, "log_odds_ratio": -0.011629382148385048, "logits/chosen": -0.3076292872428894, "logits/rejected": -0.39227768778800964, "logps/chosen": -0.03447824716567993, "logps/rejected": -1.3238623142242432, "loss": 2.9971, "nll_loss": 0.7481008768081665, "rewards/accuracies": 1.0, "rewards/chosen": -0.0034478246234357357, "rewards/margins": 0.12893840670585632, "rewards/rejected": -0.1323862224817276, "step": 2877 }, { "epoch": 1.9903181189488244, "grad_norm": 6.210879325866699, "learning_rate": 4.449823267250653e-05, "log_odds_chosen": 3.429314136505127, "log_odds_ratio": -0.16521140933036804, "logits/chosen": -0.35070574283599854, "logits/rejected": -0.2949613332748413, "logps/chosen": -0.0705675259232521, "logps/rejected": -0.8604551553726196, "loss": 2.848, "nll_loss": 0.6954702138900757, "rewards/accuracies": 1.0, "rewards/chosen": -0.007056752685457468, "rewards/margins": 0.07898876816034317, "rewards/rejected": -0.0860455185174942, "step": 2878 }, { "epoch": 1.9910096818810512, "grad_norm": 6.387576580047607, "learning_rate": 4.4494390656216385e-05, "log_odds_chosen": 6.673967361450195, "log_odds_ratio": -0.09716249257326126, "logits/chosen": -0.4616803526878357, "logits/rejected": -0.5385611057281494, "logps/chosen": -0.026279207319021225, "logps/rejected": -1.023545742034912, "loss": 2.9748, "nll_loss": 0.7339800596237183, "rewards/accuracies": 1.0, "rewards/chosen": -0.00262792082503438, "rewards/margins": 0.09972664713859558, "rewards/rejected": -0.10235457122325897, "step": 2879 }, { "epoch": 1.991701244813278, "grad_norm": 9.775636672973633, "learning_rate": 4.449054863992623e-05, "log_odds_chosen": 3.4497570991516113, "log_odds_ratio": -0.7726833820343018, "logits/chosen": -0.5093398690223694, "logits/rejected": -0.5379137992858887, "logps/chosen": -0.12793438136577606, "logps/rejected": -0.9378427267074585, "loss": 3.4558, "nll_loss": 0.7866874933242798, "rewards/accuracies": 0.625, "rewards/chosen": -0.012793438509106636, "rewards/margins": 0.08099082857370377, "rewards/rejected": -0.09378427267074585, "step": 2880 }, { "epoch": 1.9923928077455049, "grad_norm": 3.651247262954712, "learning_rate": 4.448670662363609e-05, "log_odds_chosen": 5.562066555023193, "log_odds_ratio": -0.03729052469134331, "logits/chosen": -0.5071220993995667, "logits/rejected": -0.6439098119735718, "logps/chosen": -0.03577201068401337, "logps/rejected": -1.1652424335479736, "loss": 1.8377, "nll_loss": 0.4556844234466553, "rewards/accuracies": 1.0, "rewards/chosen": -0.003577201394364238, "rewards/margins": 0.11294703930616379, "rewards/rejected": -0.11652424931526184, "step": 2881 }, { "epoch": 1.9930843706777317, "grad_norm": 4.731902599334717, "learning_rate": 4.4482864607345936e-05, "log_odds_chosen": 6.594812393188477, "log_odds_ratio": -0.0028282294515520334, "logits/chosen": -0.341488778591156, "logits/rejected": -0.3427751362323761, "logps/chosen": -0.03473107889294624, "logps/rejected": -1.6951305866241455, "loss": 2.6016, "nll_loss": 0.6501142382621765, "rewards/accuracies": 1.0, "rewards/chosen": -0.0034731074701994658, "rewards/margins": 0.16603994369506836, "rewards/rejected": -0.1695130616426468, "step": 2882 }, { "epoch": 1.9937759336099585, "grad_norm": 7.184812545776367, "learning_rate": 4.447902259105579e-05, "log_odds_chosen": 5.885580062866211, "log_odds_ratio": -0.13160696625709534, "logits/chosen": -0.42819827795028687, "logits/rejected": -0.45641326904296875, "logps/chosen": -0.1743771880865097, "logps/rejected": -0.9931154251098633, "loss": 2.0322, "nll_loss": 0.4948911666870117, "rewards/accuracies": 0.875, "rewards/chosen": -0.017437715083360672, "rewards/margins": 0.0818738341331482, "rewards/rejected": -0.09931155294179916, "step": 2883 }, { "epoch": 1.9944674965421854, "grad_norm": 7.710697650909424, "learning_rate": 4.447518057476564e-05, "log_odds_chosen": 5.005013465881348, "log_odds_ratio": -0.09908229857683182, "logits/chosen": -0.8851842284202576, "logits/rejected": -0.960582971572876, "logps/chosen": -0.031020062044262886, "logps/rejected": -0.5218628644943237, "loss": 3.4158, "nll_loss": 0.8440319299697876, "rewards/accuracies": 1.0, "rewards/chosen": -0.00310200615786016, "rewards/margins": 0.04908428341150284, "rewards/rejected": -0.05218628793954849, "step": 2884 }, { "epoch": 1.9951590594744122, "grad_norm": 6.744011878967285, "learning_rate": 4.4471338558475486e-05, "log_odds_chosen": 6.812548637390137, "log_odds_ratio": -0.012456808239221573, "logits/chosen": -0.9489343762397766, "logits/rejected": -0.8417586088180542, "logps/chosen": -0.0055020651780068874, "logps/rejected": -1.1910438537597656, "loss": 2.7535, "nll_loss": 0.6871209144592285, "rewards/accuracies": 1.0, "rewards/chosen": -0.0005502065760083497, "rewards/margins": 0.11855418235063553, "rewards/rejected": -0.11910439282655716, "step": 2885 }, { "epoch": 1.995850622406639, "grad_norm": 9.45841121673584, "learning_rate": 4.446749654218534e-05, "log_odds_chosen": 5.1840009689331055, "log_odds_ratio": -0.05699038878083229, "logits/chosen": -0.7790420055389404, "logits/rejected": -0.8795222043991089, "logps/chosen": -0.055796995759010315, "logps/rejected": -1.1143743991851807, "loss": 3.517, "nll_loss": 0.8735427260398865, "rewards/accuracies": 1.0, "rewards/chosen": -0.005579699762165546, "rewards/margins": 0.1058577448129654, "rewards/rejected": -0.11143743991851807, "step": 2886 }, { "epoch": 1.9965421853388658, "grad_norm": 7.960890293121338, "learning_rate": 4.446365452589519e-05, "log_odds_chosen": 5.083607196807861, "log_odds_ratio": -0.02815091609954834, "logits/chosen": -0.7035555243492126, "logits/rejected": -0.7305249571800232, "logps/chosen": -0.026452593505382538, "logps/rejected": -1.0385524034500122, "loss": 3.0024, "nll_loss": 0.7477902770042419, "rewards/accuracies": 1.0, "rewards/chosen": -0.0026452592574059963, "rewards/margins": 0.10120998322963715, "rewards/rejected": -0.10385524481534958, "step": 2887 }, { "epoch": 1.9972337482710927, "grad_norm": 8.027430534362793, "learning_rate": 4.4459812509605044e-05, "log_odds_chosen": 7.032285213470459, "log_odds_ratio": -0.01024580467492342, "logits/chosen": -0.7812651991844177, "logits/rejected": -0.8479325771331787, "logps/chosen": -0.018492156639695168, "logps/rejected": -1.0106534957885742, "loss": 2.4929, "nll_loss": 0.6222111582756042, "rewards/accuracies": 1.0, "rewards/chosen": -0.0018492157105356455, "rewards/margins": 0.09921613335609436, "rewards/rejected": -0.10106535255908966, "step": 2888 }, { "epoch": 1.9979253112033195, "grad_norm": 8.134820938110352, "learning_rate": 4.445597049331489e-05, "log_odds_chosen": 4.335086345672607, "log_odds_ratio": -0.3089689314365387, "logits/chosen": -0.7933206558227539, "logits/rejected": -0.7895016074180603, "logps/chosen": -0.07977023720741272, "logps/rejected": -0.6772675514221191, "loss": 2.6851, "nll_loss": 0.6403782367706299, "rewards/accuracies": 0.75, "rewards/chosen": -0.007977023720741272, "rewards/margins": 0.05974973365664482, "rewards/rejected": -0.0677267536520958, "step": 2889 }, { "epoch": 1.9986168741355463, "grad_norm": 9.249191284179688, "learning_rate": 4.445212847702475e-05, "log_odds_chosen": 4.665498733520508, "log_odds_ratio": -0.2790174186229706, "logits/chosen": -0.8788178563117981, "logits/rejected": -0.9095785021781921, "logps/chosen": -0.053846150636672974, "logps/rejected": -1.1634886264801025, "loss": 2.9982, "nll_loss": 0.7216591835021973, "rewards/accuracies": 0.75, "rewards/chosen": -0.005384615156799555, "rewards/margins": 0.11096425354480743, "rewards/rejected": -0.11634886264801025, "step": 2890 }, { "epoch": 1.9993084370677732, "grad_norm": 11.595385551452637, "learning_rate": 4.4448286460734594e-05, "log_odds_chosen": 4.811706066131592, "log_odds_ratio": -0.3877679705619812, "logits/chosen": -0.6049620509147644, "logits/rejected": -0.6957690715789795, "logps/chosen": -0.07414204627275467, "logps/rejected": -1.0122088193893433, "loss": 2.8433, "nll_loss": 0.6720539927482605, "rewards/accuracies": 0.875, "rewards/chosen": -0.007414204999804497, "rewards/margins": 0.0938066840171814, "rewards/rejected": -0.10122088342905045, "step": 2891 }, { "epoch": 2.0, "grad_norm": 12.436097145080566, "learning_rate": 4.4444444444444447e-05, "log_odds_chosen": 6.016121864318848, "log_odds_ratio": -0.031811460852622986, "logits/chosen": -0.6459561586380005, "logits/rejected": -0.6764639616012573, "logps/chosen": -0.021977191790938377, "logps/rejected": -0.9659074544906616, "loss": 4.1095, "nll_loss": 1.024193286895752, "rewards/accuracies": 1.0, "rewards/chosen": -0.0021977191790938377, "rewards/margins": 0.09439302980899811, "rewards/rejected": -0.09659074991941452, "step": 2892 }, { "epoch": 2.000691562932227, "grad_norm": 7.805417537689209, "learning_rate": 4.44406024281543e-05, "log_odds_chosen": 6.162633895874023, "log_odds_ratio": -0.01756115071475506, "logits/chosen": -0.8467515110969543, "logits/rejected": -0.9036287665367126, "logps/chosen": -0.02380123734474182, "logps/rejected": -1.1052594184875488, "loss": 2.7652, "nll_loss": 0.6895546913146973, "rewards/accuracies": 1.0, "rewards/chosen": -0.0023801238276064396, "rewards/margins": 0.1081458106637001, "rewards/rejected": -0.1105259358882904, "step": 2893 }, { "epoch": 2.0013831258644537, "grad_norm": 7.53794527053833, "learning_rate": 4.4436760411864145e-05, "log_odds_chosen": 3.3358545303344727, "log_odds_ratio": -0.15635338425636292, "logits/chosen": -0.5617286562919617, "logits/rejected": -0.569664716720581, "logps/chosen": -0.07381202280521393, "logps/rejected": -0.4878903031349182, "loss": 2.5615, "nll_loss": 0.6247285604476929, "rewards/accuracies": 1.0, "rewards/chosen": -0.007381202653050423, "rewards/margins": 0.04140783101320267, "rewards/rejected": -0.04878903180360794, "step": 2894 }, { "epoch": 2.0020746887966805, "grad_norm": 4.7608232498168945, "learning_rate": 4.4432918395574e-05, "log_odds_chosen": 5.928215026855469, "log_odds_ratio": -0.2225520759820938, "logits/chosen": -0.4803347587585449, "logits/rejected": -0.4999226927757263, "logps/chosen": -0.03862200677394867, "logps/rejected": -0.9687985777854919, "loss": 2.1594, "nll_loss": 0.517595648765564, "rewards/accuracies": 0.875, "rewards/chosen": -0.0038622005376964808, "rewards/margins": 0.09301765263080597, "rewards/rejected": -0.09687986224889755, "step": 2895 }, { "epoch": 2.0027662517289073, "grad_norm": 12.858072280883789, "learning_rate": 4.442907637928385e-05, "log_odds_chosen": 5.557333469390869, "log_odds_ratio": -0.21996933221817017, "logits/chosen": -0.7167026996612549, "logits/rejected": -0.7696047425270081, "logps/chosen": -0.04461139813065529, "logps/rejected": -0.7744177579879761, "loss": 3.0493, "nll_loss": 0.7403295040130615, "rewards/accuracies": 0.875, "rewards/chosen": -0.004461139906197786, "rewards/margins": 0.07298063486814499, "rewards/rejected": -0.07744176685810089, "step": 2896 }, { "epoch": 2.003457814661134, "grad_norm": 3.1132564544677734, "learning_rate": 4.44252343629937e-05, "log_odds_chosen": 5.70494270324707, "log_odds_ratio": -0.023692548274993896, "logits/chosen": -0.707499623298645, "logits/rejected": -0.7106534242630005, "logps/chosen": -0.02470366843044758, "logps/rejected": -0.9739349484443665, "loss": 2.3563, "nll_loss": 0.5866976976394653, "rewards/accuracies": 1.0, "rewards/chosen": -0.0024703668896108866, "rewards/margins": 0.09492312371730804, "rewards/rejected": -0.09739349037408829, "step": 2897 }, { "epoch": 2.004149377593361, "grad_norm": 6.3205718994140625, "learning_rate": 4.442139234670355e-05, "log_odds_chosen": 5.740413188934326, "log_odds_ratio": -0.0508279912173748, "logits/chosen": -0.6263430714607239, "logits/rejected": -0.6554606556892395, "logps/chosen": -0.04459373652935028, "logps/rejected": -1.0038213729858398, "loss": 3.1485, "nll_loss": 0.7820312976837158, "rewards/accuracies": 1.0, "rewards/chosen": -0.004459373652935028, "rewards/margins": 0.09592276811599731, "rewards/rejected": -0.10038213431835175, "step": 2898 }, { "epoch": 2.004840940525588, "grad_norm": 8.48741340637207, "learning_rate": 4.441755033041341e-05, "log_odds_chosen": 5.935898780822754, "log_odds_ratio": -0.04259810596704483, "logits/chosen": -0.6093939542770386, "logits/rejected": -0.6718311309814453, "logps/chosen": -0.02856658585369587, "logps/rejected": -1.0036630630493164, "loss": 2.3201, "nll_loss": 0.5757532119750977, "rewards/accuracies": 1.0, "rewards/chosen": -0.002856658538803458, "rewards/margins": 0.09750963747501373, "rewards/rejected": -0.10036630183458328, "step": 2899 }, { "epoch": 2.0055325034578146, "grad_norm": 6.241588115692139, "learning_rate": 4.441370831412325e-05, "log_odds_chosen": 6.6179304122924805, "log_odds_ratio": -0.06559796631336212, "logits/chosen": -0.494695246219635, "logits/rejected": -0.5800277590751648, "logps/chosen": -0.04796488955616951, "logps/rejected": -1.256560206413269, "loss": 2.1233, "nll_loss": 0.5242593288421631, "rewards/accuracies": 1.0, "rewards/chosen": -0.004796489141881466, "rewards/margins": 0.1208595335483551, "rewards/rejected": -0.12565602362155914, "step": 2900 }, { "epoch": 2.0062240663900415, "grad_norm": 9.595657348632812, "learning_rate": 4.4409866297833105e-05, "log_odds_chosen": 6.480893135070801, "log_odds_ratio": -0.05753350630402565, "logits/chosen": -0.3866846561431885, "logits/rejected": -0.5218240022659302, "logps/chosen": -0.014257104136049747, "logps/rejected": -1.110721230506897, "loss": 2.3186, "nll_loss": 0.5738852024078369, "rewards/accuracies": 1.0, "rewards/chosen": -0.0014257101574912667, "rewards/margins": 0.10964640974998474, "rewards/rejected": -0.1110721156001091, "step": 2901 }, { "epoch": 2.0069156293222683, "grad_norm": 5.194662570953369, "learning_rate": 4.440602428154296e-05, "log_odds_chosen": 4.608372211456299, "log_odds_ratio": -0.07409907132387161, "logits/chosen": -0.4550461173057556, "logits/rejected": -0.5282151699066162, "logps/chosen": -0.05694880336523056, "logps/rejected": -0.940022349357605, "loss": 2.1178, "nll_loss": 0.5220479965209961, "rewards/accuracies": 1.0, "rewards/chosen": -0.005694880615919828, "rewards/margins": 0.08830735832452774, "rewards/rejected": -0.09400224685668945, "step": 2902 }, { "epoch": 2.007607192254495, "grad_norm": 4.882895469665527, "learning_rate": 4.44021822652528e-05, "log_odds_chosen": 6.601312637329102, "log_odds_ratio": -0.11335127055644989, "logits/chosen": -0.38558921217918396, "logits/rejected": -0.40306785702705383, "logps/chosen": -0.05045357346534729, "logps/rejected": -1.3800384998321533, "loss": 2.2444, "nll_loss": 0.5497696399688721, "rewards/accuracies": 1.0, "rewards/chosen": -0.005045357625931501, "rewards/margins": 0.13295850157737732, "rewards/rejected": -0.1380038559436798, "step": 2903 }, { "epoch": 2.008298755186722, "grad_norm": 4.80790901184082, "learning_rate": 4.4398340248962656e-05, "log_odds_chosen": 5.533296585083008, "log_odds_ratio": -0.053497496992349625, "logits/chosen": -0.570841908454895, "logits/rejected": -0.5967553853988647, "logps/chosen": -0.021107885986566544, "logps/rejected": -0.9879939556121826, "loss": 1.6265, "nll_loss": 0.40128064155578613, "rewards/accuracies": 1.0, "rewards/chosen": -0.0021107885986566544, "rewards/margins": 0.09668861329555511, "rewards/rejected": -0.09879939258098602, "step": 2904 }, { "epoch": 2.008990318118949, "grad_norm": 9.519055366516113, "learning_rate": 4.439449823267251e-05, "log_odds_chosen": 7.151142120361328, "log_odds_ratio": -0.010571147315204144, "logits/chosen": -0.8588684797286987, "logits/rejected": -0.9364704489707947, "logps/chosen": -0.017046969383955002, "logps/rejected": -1.5156244039535522, "loss": 4.0667, "nll_loss": 1.0156068801879883, "rewards/accuracies": 1.0, "rewards/chosen": -0.0017046969151124358, "rewards/margins": 0.14985774457454681, "rewards/rejected": -0.15156245231628418, "step": 2905 }, { "epoch": 2.0096818810511756, "grad_norm": 11.029942512512207, "learning_rate": 4.439065621638236e-05, "log_odds_chosen": 6.913255214691162, "log_odds_ratio": -0.0055812327191233635, "logits/chosen": -0.7851934432983398, "logits/rejected": -0.8364708423614502, "logps/chosen": -0.01619294472038746, "logps/rejected": -1.1548116207122803, "loss": 3.5259, "nll_loss": 0.8809195756912231, "rewards/accuracies": 1.0, "rewards/chosen": -0.0016192945186048746, "rewards/margins": 0.11386187374591827, "rewards/rejected": -0.1154811680316925, "step": 2906 }, { "epoch": 2.0103734439834025, "grad_norm": 9.97729778289795, "learning_rate": 4.4386814200092206e-05, "log_odds_chosen": 5.829092979431152, "log_odds_ratio": -0.023012571036815643, "logits/chosen": -0.983071506023407, "logits/rejected": -1.1034585237503052, "logps/chosen": -0.014948589727282524, "logps/rejected": -1.1066315174102783, "loss": 4.3768, "nll_loss": 1.0918893814086914, "rewards/accuracies": 1.0, "rewards/chosen": -0.0014948589960113168, "rewards/margins": 0.10916829109191895, "rewards/rejected": -0.11066314578056335, "step": 2907 }, { "epoch": 2.0110650069156293, "grad_norm": 10.478568077087402, "learning_rate": 4.4382972183802065e-05, "log_odds_chosen": 6.799100875854492, "log_odds_ratio": -0.09093787521123886, "logits/chosen": -0.7140034437179565, "logits/rejected": -0.7645697593688965, "logps/chosen": -0.07351753115653992, "logps/rejected": -1.2636497020721436, "loss": 3.1697, "nll_loss": 0.7833304405212402, "rewards/accuracies": 1.0, "rewards/chosen": -0.007351753301918507, "rewards/margins": 0.1190132349729538, "rewards/rejected": -0.12636497616767883, "step": 2908 }, { "epoch": 2.011756569847856, "grad_norm": 9.869280815124512, "learning_rate": 4.437913016751191e-05, "log_odds_chosen": 7.119072914123535, "log_odds_ratio": -0.004749711137264967, "logits/chosen": -0.5111950039863586, "logits/rejected": -0.6186168193817139, "logps/chosen": -0.013184929266571999, "logps/rejected": -1.3745231628417969, "loss": 2.8304, "nll_loss": 0.7071370482444763, "rewards/accuracies": 1.0, "rewards/chosen": -0.0013184929266571999, "rewards/margins": 0.1361338198184967, "rewards/rejected": -0.13745230436325073, "step": 2909 }, { "epoch": 2.012448132780083, "grad_norm": 10.091440200805664, "learning_rate": 4.4375288151221763e-05, "log_odds_chosen": 4.885725021362305, "log_odds_ratio": -0.4392738938331604, "logits/chosen": -0.5816129446029663, "logits/rejected": -0.6979354619979858, "logps/chosen": -0.1722060739994049, "logps/rejected": -0.7426496744155884, "loss": 3.1504, "nll_loss": 0.7436795830726624, "rewards/accuracies": 0.75, "rewards/chosen": -0.01722060702741146, "rewards/margins": 0.0570443719625473, "rewards/rejected": -0.07426498085260391, "step": 2910 }, { "epoch": 2.0131396957123098, "grad_norm": 6.289185047149658, "learning_rate": 4.4371446134931616e-05, "log_odds_chosen": 6.991199016571045, "log_odds_ratio": -0.2730942964553833, "logits/chosen": -0.4855737090110779, "logits/rejected": -0.5304062366485596, "logps/chosen": -0.036621369421482086, "logps/rejected": -1.2104086875915527, "loss": 1.8411, "nll_loss": 0.4329620599746704, "rewards/accuracies": 0.875, "rewards/chosen": -0.003662137081846595, "rewards/margins": 0.11737874150276184, "rewards/rejected": -0.12104088068008423, "step": 2911 }, { "epoch": 2.0138312586445366, "grad_norm": 12.436087608337402, "learning_rate": 4.436760411864146e-05, "log_odds_chosen": 6.602833271026611, "log_odds_ratio": -0.05370119586586952, "logits/chosen": -0.8386690616607666, "logits/rejected": -0.9121941328048706, "logps/chosen": -0.17093002796173096, "logps/rejected": -1.591941475868225, "loss": 2.9952, "nll_loss": 0.7434421181678772, "rewards/accuracies": 1.0, "rewards/chosen": -0.017093002796173096, "rewards/margins": 0.14210115373134613, "rewards/rejected": -0.15919415652751923, "step": 2912 }, { "epoch": 2.0145228215767634, "grad_norm": 5.664691925048828, "learning_rate": 4.4363762102351314e-05, "log_odds_chosen": 7.527497291564941, "log_odds_ratio": -0.02149026468396187, "logits/chosen": -0.5523714423179626, "logits/rejected": -0.5327743291854858, "logps/chosen": -0.006375204771757126, "logps/rejected": -1.0944676399230957, "loss": 2.1397, "nll_loss": 0.5327867269515991, "rewards/accuracies": 1.0, "rewards/chosen": -0.0006375204538926482, "rewards/margins": 0.10880924761295319, "rewards/rejected": -0.10944677144289017, "step": 2913 }, { "epoch": 2.0152143845089903, "grad_norm": 6.964792728424072, "learning_rate": 4.4359920086061166e-05, "log_odds_chosen": 7.369643211364746, "log_odds_ratio": -0.0036067054606974125, "logits/chosen": -1.052027940750122, "logits/rejected": -1.034885048866272, "logps/chosen": -0.003161386586725712, "logps/rejected": -1.0445643663406372, "loss": 2.7793, "nll_loss": 0.6944639682769775, "rewards/accuracies": 1.0, "rewards/chosen": -0.0003161386412102729, "rewards/margins": 0.10414029657840729, "rewards/rejected": -0.10445643216371536, "step": 2914 }, { "epoch": 2.015905947441217, "grad_norm": 7.805403232574463, "learning_rate": 4.435607806977102e-05, "log_odds_chosen": 7.5195770263671875, "log_odds_ratio": -0.01669224351644516, "logits/chosen": -0.5986104011535645, "logits/rejected": -0.6391288042068481, "logps/chosen": -0.009707896038889885, "logps/rejected": -1.4273556470870972, "loss": 2.5347, "nll_loss": 0.6320062875747681, "rewards/accuracies": 1.0, "rewards/chosen": -0.000970789638813585, "rewards/margins": 0.1417647898197174, "rewards/rejected": -0.1427355706691742, "step": 2915 }, { "epoch": 2.016597510373444, "grad_norm": 9.950371742248535, "learning_rate": 4.4352236053480865e-05, "log_odds_chosen": 4.98217248916626, "log_odds_ratio": -0.5378924608230591, "logits/chosen": -0.4886634349822998, "logits/rejected": -0.5422289371490479, "logps/chosen": -0.25005918741226196, "logps/rejected": -0.954350471496582, "loss": 2.733, "nll_loss": 0.6294622421264648, "rewards/accuracies": 0.875, "rewards/chosen": -0.025005917996168137, "rewards/margins": 0.07042913138866425, "rewards/rejected": -0.09543504565954208, "step": 2916 }, { "epoch": 2.0172890733056708, "grad_norm": 6.791604995727539, "learning_rate": 4.4348394037190724e-05, "log_odds_chosen": 6.250939846038818, "log_odds_ratio": -0.01960081048309803, "logits/chosen": -0.5769975185394287, "logits/rejected": -0.6699945330619812, "logps/chosen": -0.04944942891597748, "logps/rejected": -1.7604033946990967, "loss": 2.1644, "nll_loss": 0.5391305685043335, "rewards/accuracies": 1.0, "rewards/chosen": -0.0049449424259364605, "rewards/margins": 0.17109540104866028, "rewards/rejected": -0.17604035139083862, "step": 2917 }, { "epoch": 2.0179806362378976, "grad_norm": 6.165281772613525, "learning_rate": 4.434455202090057e-05, "log_odds_chosen": 4.771698951721191, "log_odds_ratio": -0.539745032787323, "logits/chosen": -0.3371831178665161, "logits/rejected": -0.29655882716178894, "logps/chosen": -0.2672892212867737, "logps/rejected": -0.9545475244522095, "loss": 2.1533, "nll_loss": 0.48434799909591675, "rewards/accuracies": 0.875, "rewards/chosen": -0.026728922501206398, "rewards/margins": 0.0687258318066597, "rewards/rejected": -0.09545475244522095, "step": 2918 }, { "epoch": 2.0186721991701244, "grad_norm": 8.98730182647705, "learning_rate": 4.434071000461042e-05, "log_odds_chosen": 7.4459333419799805, "log_odds_ratio": -0.01685933582484722, "logits/chosen": -0.782569408416748, "logits/rejected": -0.8473621010780334, "logps/chosen": -0.018038183450698853, "logps/rejected": -1.451753854751587, "loss": 2.1227, "nll_loss": 0.5289920568466187, "rewards/accuracies": 1.0, "rewards/chosen": -0.001803818391636014, "rewards/margins": 0.14337158203125, "rewards/rejected": -0.14517538249492645, "step": 2919 }, { "epoch": 2.0193637621023512, "grad_norm": 6.880577087402344, "learning_rate": 4.4336867988320274e-05, "log_odds_chosen": 7.7974348068237305, "log_odds_ratio": -0.004169912077486515, "logits/chosen": -0.5809128284454346, "logits/rejected": -0.5881280303001404, "logps/chosen": -0.007793866563588381, "logps/rejected": -1.1953469514846802, "loss": 2.6271, "nll_loss": 0.6563675999641418, "rewards/accuracies": 1.0, "rewards/chosen": -0.0007793866680003703, "rewards/margins": 0.11875531077384949, "rewards/rejected": -0.1195346936583519, "step": 2920 }, { "epoch": 2.020055325034578, "grad_norm": 8.577593803405762, "learning_rate": 4.433302597203012e-05, "log_odds_chosen": 6.973368167877197, "log_odds_ratio": -0.0022561801597476006, "logits/chosen": -0.49948495626449585, "logits/rejected": -0.5469887852668762, "logps/chosen": -0.00309023167937994, "logps/rejected": -0.9119285345077515, "loss": 2.5148, "nll_loss": 0.6284716129302979, "rewards/accuracies": 1.0, "rewards/chosen": -0.0003090231621172279, "rewards/margins": 0.09088382124900818, "rewards/rejected": -0.09119285643100739, "step": 2921 }, { "epoch": 2.020746887966805, "grad_norm": 7.7070722579956055, "learning_rate": 4.432918395573997e-05, "log_odds_chosen": 5.305390357971191, "log_odds_ratio": -0.12080781906843185, "logits/chosen": -0.8089467883110046, "logits/rejected": -0.8239259719848633, "logps/chosen": -0.02970268949866295, "logps/rejected": -0.7682185769081116, "loss": 2.6354, "nll_loss": 0.6467616558074951, "rewards/accuracies": 1.0, "rewards/chosen": -0.0029702691826969385, "rewards/margins": 0.07385159283876419, "rewards/rejected": -0.07682186365127563, "step": 2922 }, { "epoch": 2.0214384508990317, "grad_norm": 9.248912811279297, "learning_rate": 4.4325341939449825e-05, "log_odds_chosen": 4.687819004058838, "log_odds_ratio": -0.03429641202092171, "logits/chosen": -0.49940192699432373, "logits/rejected": -0.5568559169769287, "logps/chosen": -0.07113655656576157, "logps/rejected": -0.9501205086708069, "loss": 2.7174, "nll_loss": 0.6759278178215027, "rewards/accuracies": 1.0, "rewards/chosen": -0.007113655563443899, "rewards/margins": 0.087898388504982, "rewards/rejected": -0.09501205384731293, "step": 2923 }, { "epoch": 2.0221300138312586, "grad_norm": 11.791963577270508, "learning_rate": 4.432149992315968e-05, "log_odds_chosen": 3.852811098098755, "log_odds_ratio": -0.19490672647953033, "logits/chosen": -0.4790309965610504, "logits/rejected": -0.5375632643699646, "logps/chosen": -0.18811428546905518, "logps/rejected": -0.8418582081794739, "loss": 3.0405, "nll_loss": 0.7406342625617981, "rewards/accuracies": 0.875, "rewards/chosen": -0.018811428919434547, "rewards/margins": 0.06537439674139023, "rewards/rejected": -0.08418582379817963, "step": 2924 }, { "epoch": 2.0228215767634854, "grad_norm": 7.580800533294678, "learning_rate": 4.431765790686952e-05, "log_odds_chosen": 5.117754936218262, "log_odds_ratio": -0.01622222363948822, "logits/chosen": -0.7211370468139648, "logits/rejected": -0.7603697776794434, "logps/chosen": -0.018140438944101334, "logps/rejected": -0.9220280051231384, "loss": 2.7227, "nll_loss": 0.6790465712547302, "rewards/accuracies": 1.0, "rewards/chosen": -0.0018140438478440046, "rewards/margins": 0.09038875252008438, "rewards/rejected": -0.0922027975320816, "step": 2925 }, { "epoch": 2.0235131396957122, "grad_norm": 7.705148220062256, "learning_rate": 4.431381589057938e-05, "log_odds_chosen": 6.213085651397705, "log_odds_ratio": -0.033481746912002563, "logits/chosen": -0.9892230033874512, "logits/rejected": -1.0106509923934937, "logps/chosen": -0.03902255743741989, "logps/rejected": -1.2663230895996094, "loss": 3.1659, "nll_loss": 0.7881351113319397, "rewards/accuracies": 1.0, "rewards/chosen": -0.003902255790308118, "rewards/margins": 0.1227300614118576, "rewards/rejected": -0.12663230299949646, "step": 2926 }, { "epoch": 2.024204702627939, "grad_norm": 6.366061210632324, "learning_rate": 4.430997387428923e-05, "log_odds_chosen": 7.32904577255249, "log_odds_ratio": -0.006772264838218689, "logits/chosen": -0.19414487481117249, "logits/rejected": -0.20445206761360168, "logps/chosen": -0.004846815951168537, "logps/rejected": -1.157535195350647, "loss": 3.0902, "nll_loss": 0.771882176399231, "rewards/accuracies": 1.0, "rewards/chosen": -0.00048468157183378935, "rewards/margins": 0.11526884138584137, "rewards/rejected": -0.11575351655483246, "step": 2927 }, { "epoch": 2.024896265560166, "grad_norm": 6.209042549133301, "learning_rate": 4.430613185799908e-05, "log_odds_chosen": 5.014216899871826, "log_odds_ratio": -0.05102992802858353, "logits/chosen": -0.8243149518966675, "logits/rejected": -0.8707647323608398, "logps/chosen": -0.04010523855686188, "logps/rejected": -0.9312438368797302, "loss": 2.5693, "nll_loss": 0.6372247934341431, "rewards/accuracies": 1.0, "rewards/chosen": -0.004010523669421673, "rewards/margins": 0.08911386132240295, "rewards/rejected": -0.0931243821978569, "step": 2928 }, { "epoch": 2.0255878284923927, "grad_norm": 8.702845573425293, "learning_rate": 4.430228984170893e-05, "log_odds_chosen": 5.299887180328369, "log_odds_ratio": -0.1380869746208191, "logits/chosen": -0.4525139331817627, "logits/rejected": -0.48863327503204346, "logps/chosen": -0.07026822865009308, "logps/rejected": -1.1539911031723022, "loss": 2.3965, "nll_loss": 0.5853177309036255, "rewards/accuracies": 0.875, "rewards/chosen": -0.00702682277187705, "rewards/margins": 0.1083722934126854, "rewards/rejected": -0.11539912223815918, "step": 2929 }, { "epoch": 2.0262793914246195, "grad_norm": 6.655099868774414, "learning_rate": 4.429844782541878e-05, "log_odds_chosen": 6.435043811798096, "log_odds_ratio": -0.039562076330184937, "logits/chosen": -0.563301146030426, "logits/rejected": -0.6276003122329712, "logps/chosen": -0.03223853558301926, "logps/rejected": -1.3018076419830322, "loss": 2.116, "nll_loss": 0.5250539183616638, "rewards/accuracies": 1.0, "rewards/chosen": -0.0032238538842648268, "rewards/margins": 0.12695690989494324, "rewards/rejected": -0.13018076121807098, "step": 2930 }, { "epoch": 2.0269709543568464, "grad_norm": 8.373746871948242, "learning_rate": 4.429460580912863e-05, "log_odds_chosen": 6.052464485168457, "log_odds_ratio": -0.06993289291858673, "logits/chosen": -0.48515585064888, "logits/rejected": -0.5455655455589294, "logps/chosen": -0.03491077572107315, "logps/rejected": -1.339547038078308, "loss": 2.9546, "nll_loss": 0.7316581010818481, "rewards/accuracies": 1.0, "rewards/chosen": -0.003491077572107315, "rewards/margins": 0.1304636299610138, "rewards/rejected": -0.1339547038078308, "step": 2931 }, { "epoch": 2.027662517289073, "grad_norm": 4.205761432647705, "learning_rate": 4.429076379283848e-05, "log_odds_chosen": 6.216808319091797, "log_odds_ratio": -0.05835818499326706, "logits/chosen": -0.7330908179283142, "logits/rejected": -0.7898491024971008, "logps/chosen": -0.030182205140590668, "logps/rejected": -1.4196422100067139, "loss": 3.3178, "nll_loss": 0.8236026763916016, "rewards/accuracies": 1.0, "rewards/chosen": -0.003018220653757453, "rewards/margins": 0.13894601166248322, "rewards/rejected": -0.14196424186229706, "step": 2932 }, { "epoch": 2.0283540802213, "grad_norm": 7.031354904174805, "learning_rate": 4.4286921776548336e-05, "log_odds_chosen": 6.888822555541992, "log_odds_ratio": -0.05103730410337448, "logits/chosen": -0.4072624146938324, "logits/rejected": -0.41956576704978943, "logps/chosen": -0.011635039933025837, "logps/rejected": -0.923719584941864, "loss": 2.3398, "nll_loss": 0.579850435256958, "rewards/accuracies": 1.0, "rewards/chosen": -0.0011635040864348412, "rewards/margins": 0.09120845049619675, "rewards/rejected": -0.09237195551395416, "step": 2933 }, { "epoch": 2.029045643153527, "grad_norm": 8.670273780822754, "learning_rate": 4.428307976025818e-05, "log_odds_chosen": 8.007255554199219, "log_odds_ratio": -0.004472412634640932, "logits/chosen": -0.5988377332687378, "logits/rejected": -0.6525205969810486, "logps/chosen": -0.0038377277087420225, "logps/rejected": -0.9639263153076172, "loss": 2.4431, "nll_loss": 0.6103239059448242, "rewards/accuracies": 1.0, "rewards/chosen": -0.00038377277087420225, "rewards/margins": 0.09600885212421417, "rewards/rejected": -0.09639262408018112, "step": 2934 }, { "epoch": 2.0297372060857537, "grad_norm": 5.9609551429748535, "learning_rate": 4.427923774396804e-05, "log_odds_chosen": 5.167479515075684, "log_odds_ratio": -0.0625988095998764, "logits/chosen": -0.29511865973472595, "logits/rejected": -0.3535918891429901, "logps/chosen": -0.023435872048139572, "logps/rejected": -0.6343368291854858, "loss": 2.2397, "nll_loss": 0.5536573529243469, "rewards/accuracies": 1.0, "rewards/chosen": -0.0023435871116816998, "rewards/margins": 0.061090096831321716, "rewards/rejected": -0.0634336918592453, "step": 2935 }, { "epoch": 2.0304287690179805, "grad_norm": 12.973045349121094, "learning_rate": 4.4275395727677886e-05, "log_odds_chosen": 5.455376148223877, "log_odds_ratio": -0.13886310160160065, "logits/chosen": -0.41711336374282837, "logits/rejected": -0.4392683207988739, "logps/chosen": -0.013393568806350231, "logps/rejected": -0.8761259317398071, "loss": 3.4505, "nll_loss": 0.8487340211868286, "rewards/accuracies": 0.875, "rewards/chosen": -0.0013393567642197013, "rewards/margins": 0.08627323806285858, "rewards/rejected": -0.0876125916838646, "step": 2936 }, { "epoch": 2.0311203319502074, "grad_norm": 5.99356746673584, "learning_rate": 4.427155371138774e-05, "log_odds_chosen": 5.800067901611328, "log_odds_ratio": -0.02246464043855667, "logits/chosen": -0.39779436588287354, "logits/rejected": -0.4212380051612854, "logps/chosen": -0.03113883174955845, "logps/rejected": -0.7767848372459412, "loss": 2.3202, "nll_loss": 0.5778111815452576, "rewards/accuracies": 1.0, "rewards/chosen": -0.0031138835474848747, "rewards/margins": 0.0745646059513092, "rewards/rejected": -0.07767849415540695, "step": 2937 }, { "epoch": 2.031811894882434, "grad_norm": 8.833569526672363, "learning_rate": 4.426771169509759e-05, "log_odds_chosen": 8.39259147644043, "log_odds_ratio": -0.00498524634167552, "logits/chosen": -0.5085591673851013, "logits/rejected": -0.6033196449279785, "logps/chosen": -0.0022648456506431103, "logps/rejected": -1.2173972129821777, "loss": 3.0348, "nll_loss": 0.7582062482833862, "rewards/accuracies": 1.0, "rewards/chosen": -0.00022648456797469407, "rewards/margins": 0.1215132474899292, "rewards/rejected": -0.12173973768949509, "step": 2938 }, { "epoch": 2.032503457814661, "grad_norm": 6.527597904205322, "learning_rate": 4.4263869678807444e-05, "log_odds_chosen": 6.323877334594727, "log_odds_ratio": -0.012947482988238335, "logits/chosen": -0.5131242871284485, "logits/rejected": -0.5534065961837769, "logps/chosen": -0.02248767577111721, "logps/rejected": -0.9359084367752075, "loss": 3.1013, "nll_loss": 0.7740212082862854, "rewards/accuracies": 1.0, "rewards/chosen": -0.002248767763376236, "rewards/margins": 0.09134207665920258, "rewards/rejected": -0.09359084069728851, "step": 2939 }, { "epoch": 2.033195020746888, "grad_norm": 5.964481353759766, "learning_rate": 4.426002766251729e-05, "log_odds_chosen": 6.230679512023926, "log_odds_ratio": -0.0629938542842865, "logits/chosen": -0.7436071634292603, "logits/rejected": -0.7987266778945923, "logps/chosen": -0.02044205367565155, "logps/rejected": -0.8582735061645508, "loss": 2.7018, "nll_loss": 0.6691381931304932, "rewards/accuracies": 1.0, "rewards/chosen": -0.0020442053209990263, "rewards/margins": 0.08378314971923828, "rewards/rejected": -0.08582735806703568, "step": 2940 }, { "epoch": 2.0338865836791147, "grad_norm": 8.997940063476562, "learning_rate": 4.425618564622714e-05, "log_odds_chosen": 6.974672794342041, "log_odds_ratio": -0.006081899628043175, "logits/chosen": -0.6376204490661621, "logits/rejected": -0.6679280400276184, "logps/chosen": -0.03785526379942894, "logps/rejected": -1.6801406145095825, "loss": 2.733, "nll_loss": 0.6826443672180176, "rewards/accuracies": 1.0, "rewards/chosen": -0.003785526379942894, "rewards/margins": 0.16422852873802185, "rewards/rejected": -0.16801407933235168, "step": 2941 }, { "epoch": 2.0345781466113415, "grad_norm": 9.944624900817871, "learning_rate": 4.4252343629936994e-05, "log_odds_chosen": 7.224353790283203, "log_odds_ratio": -0.00259740324690938, "logits/chosen": -0.7675692439079285, "logits/rejected": -0.840691089630127, "logps/chosen": -0.001974244136363268, "logps/rejected": -1.0744328498840332, "loss": 3.8919, "nll_loss": 0.9727070331573486, "rewards/accuracies": 1.0, "rewards/chosen": -0.00019742443691939116, "rewards/margins": 0.10724586248397827, "rewards/rejected": -0.10744328051805496, "step": 2942 }, { "epoch": 2.0352697095435683, "grad_norm": 8.882083892822266, "learning_rate": 4.424850161364684e-05, "log_odds_chosen": 6.137880325317383, "log_odds_ratio": -0.009068233892321587, "logits/chosen": -0.5883402228355408, "logits/rejected": -0.5579231381416321, "logps/chosen": -0.05238615721464157, "logps/rejected": -1.8226776123046875, "loss": 2.9676, "nll_loss": 0.741002082824707, "rewards/accuracies": 1.0, "rewards/chosen": -0.005238616373389959, "rewards/margins": 0.17702913284301758, "rewards/rejected": -0.18226775527000427, "step": 2943 }, { "epoch": 2.035961272475795, "grad_norm": 7.656317710876465, "learning_rate": 4.42446595973567e-05, "log_odds_chosen": 7.767665863037109, "log_odds_ratio": -0.05336516350507736, "logits/chosen": -0.5125993490219116, "logits/rejected": -0.5610659718513489, "logps/chosen": -0.005388497840613127, "logps/rejected": -1.1172276735305786, "loss": 2.6554, "nll_loss": 0.6585062742233276, "rewards/accuracies": 1.0, "rewards/chosen": -0.0005388497374951839, "rewards/margins": 0.11118391901254654, "rewards/rejected": -0.11172277480363846, "step": 2944 }, { "epoch": 2.036652835408022, "grad_norm": 5.670093536376953, "learning_rate": 4.4240817581066545e-05, "log_odds_chosen": 5.91354513168335, "log_odds_ratio": -0.0248207226395607, "logits/chosen": -0.49000585079193115, "logits/rejected": -0.515639066696167, "logps/chosen": -0.012547427788376808, "logps/rejected": -0.6958021521568298, "loss": 2.2802, "nll_loss": 0.5675714015960693, "rewards/accuracies": 1.0, "rewards/chosen": -0.001254742732271552, "rewards/margins": 0.068325474858284, "rewards/rejected": -0.06958021968603134, "step": 2945 }, { "epoch": 2.037344398340249, "grad_norm": 7.737858772277832, "learning_rate": 4.42369755647764e-05, "log_odds_chosen": 7.393893241882324, "log_odds_ratio": -0.018279001116752625, "logits/chosen": -0.422544002532959, "logits/rejected": -0.47324442863464355, "logps/chosen": -0.011747865006327629, "logps/rejected": -1.5627875328063965, "loss": 2.8876, "nll_loss": 0.7200790643692017, "rewards/accuracies": 1.0, "rewards/chosen": -0.0011747865937650204, "rewards/margins": 0.15510396659374237, "rewards/rejected": -0.15627875924110413, "step": 2946 }, { "epoch": 2.0380359612724757, "grad_norm": 9.531057357788086, "learning_rate": 4.423313354848625e-05, "log_odds_chosen": 6.561200141906738, "log_odds_ratio": -0.04572358354926109, "logits/chosen": -0.40563100576400757, "logits/rejected": -0.4432332515716553, "logps/chosen": -0.023076066747307777, "logps/rejected": -0.9452800750732422, "loss": 3.9257, "nll_loss": 0.976864218711853, "rewards/accuracies": 1.0, "rewards/chosen": -0.0023076068609952927, "rewards/margins": 0.09222040325403214, "rewards/rejected": -0.09452801197767258, "step": 2947 }, { "epoch": 2.0387275242047025, "grad_norm": 7.236406326293945, "learning_rate": 4.42292915321961e-05, "log_odds_chosen": 7.733157157897949, "log_odds_ratio": -0.0020131170749664307, "logits/chosen": -0.5212618708610535, "logits/rejected": -0.511024534702301, "logps/chosen": -0.001999392407014966, "logps/rejected": -0.9011251926422119, "loss": 1.9601, "nll_loss": 0.4898112416267395, "rewards/accuracies": 1.0, "rewards/chosen": -0.0001999392407014966, "rewards/margins": 0.08991258591413498, "rewards/rejected": -0.09011252224445343, "step": 2948 }, { "epoch": 2.0394190871369293, "grad_norm": 7.766987323760986, "learning_rate": 4.422544951590595e-05, "log_odds_chosen": 8.511279106140137, "log_odds_ratio": -0.02912795916199684, "logits/chosen": -0.27267301082611084, "logits/rejected": -0.3947067856788635, "logps/chosen": -0.012451526708900928, "logps/rejected": -2.0573956966400146, "loss": 1.7155, "nll_loss": 0.42596328258514404, "rewards/accuracies": 1.0, "rewards/chosen": -0.0012451526708900928, "rewards/margins": 0.2044944167137146, "rewards/rejected": -0.2057395577430725, "step": 2949 }, { "epoch": 2.040110650069156, "grad_norm": 8.35403060913086, "learning_rate": 4.42216074996158e-05, "log_odds_chosen": 5.819576263427734, "log_odds_ratio": -0.15582507848739624, "logits/chosen": -0.5156236886978149, "logits/rejected": -0.5940700173377991, "logps/chosen": -0.045006606727838516, "logps/rejected": -1.1011898517608643, "loss": 2.0249, "nll_loss": 0.4906438887119293, "rewards/accuracies": 0.875, "rewards/chosen": -0.004500660579651594, "rewards/margins": 0.10561832785606384, "rewards/rejected": -0.11011898517608643, "step": 2950 }, { "epoch": 2.040802213001383, "grad_norm": 4.178206443786621, "learning_rate": 4.421776548332565e-05, "log_odds_chosen": 5.757562637329102, "log_odds_ratio": -0.013421890325844288, "logits/chosen": -0.43508225679397583, "logits/rejected": -0.4227558374404907, "logps/chosen": -0.050725605338811874, "logps/rejected": -1.2691094875335693, "loss": 1.6484, "nll_loss": 0.4107661843299866, "rewards/accuracies": 1.0, "rewards/chosen": -0.005072561092674732, "rewards/margins": 0.12183839082717896, "rewards/rejected": -0.1269109547138214, "step": 2951 }, { "epoch": 2.04149377593361, "grad_norm": 8.70975112915039, "learning_rate": 4.42139234670355e-05, "log_odds_chosen": 8.446166038513184, "log_odds_ratio": -0.0025777772534638643, "logits/chosen": -0.6618889570236206, "logits/rejected": -0.7927266359329224, "logps/chosen": -0.0014678852166980505, "logps/rejected": -1.5695796012878418, "loss": 2.0736, "nll_loss": 0.5181523561477661, "rewards/accuracies": 1.0, "rewards/chosen": -0.00014678851584903896, "rewards/margins": 0.1568111628293991, "rewards/rejected": -0.1569579690694809, "step": 2952 }, { "epoch": 2.0421853388658366, "grad_norm": 9.313553810119629, "learning_rate": 4.421008145074536e-05, "log_odds_chosen": 7.796962261199951, "log_odds_ratio": -0.0014820595970377326, "logits/chosen": -0.8346244096755981, "logits/rejected": -0.8700048923492432, "logps/chosen": -0.0053040627390146255, "logps/rejected": -1.5605573654174805, "loss": 3.1736, "nll_loss": 0.7932461500167847, "rewards/accuracies": 1.0, "rewards/chosen": -0.0005304062506183982, "rewards/margins": 0.1555253267288208, "rewards/rejected": -0.1560557335615158, "step": 2953 }, { "epoch": 2.0428769017980635, "grad_norm": 9.841303825378418, "learning_rate": 4.42062394344552e-05, "log_odds_chosen": 9.34194564819336, "log_odds_ratio": -0.0003495306591503322, "logits/chosen": -0.23278909921646118, "logits/rejected": -0.24929757416248322, "logps/chosen": -0.0005623494507744908, "logps/rejected": -1.7412278652191162, "loss": 2.8978, "nll_loss": 0.7244049310684204, "rewards/accuracies": 1.0, "rewards/chosen": -5.6234941439470276e-05, "rewards/margins": 0.17406655848026276, "rewards/rejected": -0.17412279546260834, "step": 2954 }, { "epoch": 2.0435684647302903, "grad_norm": 5.997377395629883, "learning_rate": 4.4202397418165056e-05, "log_odds_chosen": 6.477039337158203, "log_odds_ratio": -0.025202833116054535, "logits/chosen": -0.9479755163192749, "logits/rejected": -0.9774594306945801, "logps/chosen": -0.044182758778333664, "logps/rejected": -1.0433984994888306, "loss": 2.3557, "nll_loss": 0.5864126682281494, "rewards/accuracies": 1.0, "rewards/chosen": -0.004418275784701109, "rewards/margins": 0.0999215766787529, "rewards/rejected": -0.1043398529291153, "step": 2955 }, { "epoch": 2.044260027662517, "grad_norm": 7.110849857330322, "learning_rate": 4.419855540187491e-05, "log_odds_chosen": 6.030847072601318, "log_odds_ratio": -0.037585724145174026, "logits/chosen": -0.6416101455688477, "logits/rejected": -0.6577980518341064, "logps/chosen": -0.06431546062231064, "logps/rejected": -1.2889031171798706, "loss": 3.1393, "nll_loss": 0.7810727953910828, "rewards/accuracies": 1.0, "rewards/chosen": -0.0064315455965697765, "rewards/margins": 0.12245876342058182, "rewards/rejected": -0.12889030575752258, "step": 2956 }, { "epoch": 2.044951590594744, "grad_norm": 5.512500286102295, "learning_rate": 4.419471338558476e-05, "log_odds_chosen": 7.396589756011963, "log_odds_ratio": -0.013286417350172997, "logits/chosen": -0.47500455379486084, "logits/rejected": -0.5753237009048462, "logps/chosen": -0.04434991627931595, "logps/rejected": -1.3787543773651123, "loss": 2.2678, "nll_loss": 0.5656318068504333, "rewards/accuracies": 1.0, "rewards/chosen": -0.004434991627931595, "rewards/margins": 0.13344044983386993, "rewards/rejected": -0.13787545263767242, "step": 2957 }, { "epoch": 2.045643153526971, "grad_norm": 5.134779930114746, "learning_rate": 4.4190871369294606e-05, "log_odds_chosen": 6.285913944244385, "log_odds_ratio": -0.14252759516239166, "logits/chosen": -0.45119357109069824, "logits/rejected": -0.5126819014549255, "logps/chosen": -0.03729405999183655, "logps/rejected": -1.1003504991531372, "loss": 1.8283, "nll_loss": 0.44282904267311096, "rewards/accuracies": 0.875, "rewards/chosen": -0.003729406511411071, "rewards/margins": 0.10630562901496887, "rewards/rejected": -0.11003503203392029, "step": 2958 }, { "epoch": 2.0463347164591976, "grad_norm": 7.935320854187012, "learning_rate": 4.418702935300446e-05, "log_odds_chosen": 7.760144233703613, "log_odds_ratio": -0.005842843558639288, "logits/chosen": -0.8411372900009155, "logits/rejected": -0.9680566191673279, "logps/chosen": -0.017029302194714546, "logps/rejected": -1.6215450763702393, "loss": 3.1712, "nll_loss": 0.7922165393829346, "rewards/accuracies": 1.0, "rewards/chosen": -0.0017029301961883903, "rewards/margins": 0.16045159101486206, "rewards/rejected": -0.16215452551841736, "step": 2959 }, { "epoch": 2.0470262793914245, "grad_norm": 6.970455646514893, "learning_rate": 4.418318733671431e-05, "log_odds_chosen": 6.338698387145996, "log_odds_ratio": -0.03856229782104492, "logits/chosen": -0.4592578709125519, "logits/rejected": -0.5230832099914551, "logps/chosen": -0.02689545601606369, "logps/rejected": -0.9274243116378784, "loss": 2.8688, "nll_loss": 0.713355541229248, "rewards/accuracies": 1.0, "rewards/chosen": -0.0026895455084741116, "rewards/margins": 0.09005288779735565, "rewards/rejected": -0.0927424281835556, "step": 2960 }, { "epoch": 2.0477178423236513, "grad_norm": 6.369350910186768, "learning_rate": 4.417934532042416e-05, "log_odds_chosen": 7.189737796783447, "log_odds_ratio": -0.12329629063606262, "logits/chosen": -0.6698348522186279, "logits/rejected": -0.7852696180343628, "logps/chosen": -0.028546592220664024, "logps/rejected": -1.4175446033477783, "loss": 1.6068, "nll_loss": 0.38936716318130493, "rewards/accuracies": 0.875, "rewards/chosen": -0.0028546589892357588, "rewards/margins": 0.1388998031616211, "rewards/rejected": -0.14175444841384888, "step": 2961 }, { "epoch": 2.048409405255878, "grad_norm": 8.8323392868042, "learning_rate": 4.4175503304134016e-05, "log_odds_chosen": 4.420435905456543, "log_odds_ratio": -0.3130551874637604, "logits/chosen": -0.4343242347240448, "logits/rejected": -0.4906140863895416, "logps/chosen": -0.07207857817411423, "logps/rejected": -0.7014517784118652, "loss": 2.3546, "nll_loss": 0.5573325753211975, "rewards/accuracies": 0.75, "rewards/chosen": -0.0072078583762049675, "rewards/margins": 0.06293731927871704, "rewards/rejected": -0.07014517486095428, "step": 2962 }, { "epoch": 2.049100968188105, "grad_norm": 7.881039142608643, "learning_rate": 4.417166128784386e-05, "log_odds_chosen": 7.461102485656738, "log_odds_ratio": -0.05037471279501915, "logits/chosen": -0.6518298387527466, "logits/rejected": -0.6640526652336121, "logps/chosen": -0.015996096655726433, "logps/rejected": -1.207945466041565, "loss": 3.1082, "nll_loss": 0.7720248103141785, "rewards/accuracies": 1.0, "rewards/chosen": -0.0015996096190065145, "rewards/margins": 0.11919493228197098, "rewards/rejected": -0.12079453468322754, "step": 2963 }, { "epoch": 2.0497925311203318, "grad_norm": 8.567526817321777, "learning_rate": 4.4167819271553714e-05, "log_odds_chosen": 7.342993259429932, "log_odds_ratio": -0.04448270797729492, "logits/chosen": -0.610135555267334, "logits/rejected": -0.6820641160011292, "logps/chosen": -0.10857971012592316, "logps/rejected": -1.3978261947631836, "loss": 2.6709, "nll_loss": 0.6632830500602722, "rewards/accuracies": 1.0, "rewards/chosen": -0.01085797231644392, "rewards/margins": 0.1289246529340744, "rewards/rejected": -0.1397826224565506, "step": 2964 }, { "epoch": 2.0504840940525586, "grad_norm": 11.050207138061523, "learning_rate": 4.4163977255263567e-05, "log_odds_chosen": 5.7598795890808105, "log_odds_ratio": -0.3220196068286896, "logits/chosen": -0.7325015068054199, "logits/rejected": -0.8497984409332275, "logps/chosen": -0.12142018973827362, "logps/rejected": -1.4437401294708252, "loss": 3.1083, "nll_loss": 0.744879424571991, "rewards/accuracies": 0.875, "rewards/chosen": -0.012142017483711243, "rewards/margins": 0.13223199546337128, "rewards/rejected": -0.14437401294708252, "step": 2965 }, { "epoch": 2.0511756569847854, "grad_norm": 6.123977184295654, "learning_rate": 4.416013523897342e-05, "log_odds_chosen": 8.347021102905273, "log_odds_ratio": -0.0005506742745637894, "logits/chosen": -0.625741720199585, "logits/rejected": -0.6533139944076538, "logps/chosen": -0.0005300822667777538, "logps/rejected": -0.7603899240493774, "loss": 3.1846, "nll_loss": 0.7960931062698364, "rewards/accuracies": 1.0, "rewards/chosen": -5.300822886056267e-05, "rewards/margins": 0.07598598301410675, "rewards/rejected": -0.07603899389505386, "step": 2966 }, { "epoch": 2.0518672199170123, "grad_norm": 7.458584308624268, "learning_rate": 4.4156293222683265e-05, "log_odds_chosen": 5.091979026794434, "log_odds_ratio": -0.9392639398574829, "logits/chosen": -0.37719377875328064, "logits/rejected": -0.4038720428943634, "logps/chosen": -0.17346209287643433, "logps/rejected": -0.9408200979232788, "loss": 2.9051, "nll_loss": 0.6323404908180237, "rewards/accuracies": 0.75, "rewards/chosen": -0.017346208915114403, "rewards/margins": 0.07673580199480057, "rewards/rejected": -0.09408202022314072, "step": 2967 }, { "epoch": 2.052558782849239, "grad_norm": 5.1775665283203125, "learning_rate": 4.415245120639312e-05, "log_odds_chosen": 7.053496837615967, "log_odds_ratio": -0.07024918496608734, "logits/chosen": -0.5811555981636047, "logits/rejected": -0.5938743948936462, "logps/chosen": -0.015003536827862263, "logps/rejected": -0.9554708003997803, "loss": 2.0713, "nll_loss": 0.5108081698417664, "rewards/accuracies": 1.0, "rewards/chosen": -0.0015003536827862263, "rewards/margins": 0.09404672682285309, "rewards/rejected": -0.09554708003997803, "step": 2968 }, { "epoch": 2.053250345781466, "grad_norm": 12.167163848876953, "learning_rate": 4.414860919010297e-05, "log_odds_chosen": 6.978175640106201, "log_odds_ratio": -0.0033219067845493555, "logits/chosen": -0.8989130258560181, "logits/rejected": -0.8963654041290283, "logps/chosen": -0.0031070064287632704, "logps/rejected": -0.9147448539733887, "loss": 3.3015, "nll_loss": 0.8250552415847778, "rewards/accuracies": 1.0, "rewards/chosen": -0.0003107006778009236, "rewards/margins": 0.09116378426551819, "rewards/rejected": -0.0914744883775711, "step": 2969 }, { "epoch": 2.0539419087136928, "grad_norm": 6.647165298461914, "learning_rate": 4.4144767173812815e-05, "log_odds_chosen": 8.044532775878906, "log_odds_ratio": -0.02352350763976574, "logits/chosen": -0.7308659553527832, "logits/rejected": -0.7580546140670776, "logps/chosen": -0.008968767710030079, "logps/rejected": -1.5896055698394775, "loss": 2.3966, "nll_loss": 0.5967886447906494, "rewards/accuracies": 1.0, "rewards/chosen": -0.0008968767942860723, "rewards/margins": 0.15806369483470917, "rewards/rejected": -0.15896056592464447, "step": 2970 }, { "epoch": 2.0546334716459196, "grad_norm": 4.377399921417236, "learning_rate": 4.4140925157522674e-05, "log_odds_chosen": 8.596623420715332, "log_odds_ratio": -0.0039055885281413794, "logits/chosen": -0.5529035329818726, "logits/rejected": -0.5938980579376221, "logps/chosen": -0.01188119500875473, "logps/rejected": -1.5860004425048828, "loss": 1.7488, "nll_loss": 0.43680238723754883, "rewards/accuracies": 1.0, "rewards/chosen": -0.0011881195241585374, "rewards/margins": 0.15741194784641266, "rewards/rejected": -0.15860004723072052, "step": 2971 }, { "epoch": 2.0553250345781464, "grad_norm": 5.296402454376221, "learning_rate": 4.413708314123252e-05, "log_odds_chosen": 7.072319030761719, "log_odds_ratio": -0.008747157640755177, "logits/chosen": -0.9367965459823608, "logits/rejected": -0.9441543817520142, "logps/chosen": -0.01353341992944479, "logps/rejected": -1.0854418277740479, "loss": 3.3855, "nll_loss": 0.8455039858818054, "rewards/accuracies": 1.0, "rewards/chosen": -0.0013533420860767365, "rewards/margins": 0.10719083249568939, "rewards/rejected": -0.10854417830705643, "step": 2972 }, { "epoch": 2.0560165975103732, "grad_norm": 9.635480880737305, "learning_rate": 4.413324112494237e-05, "log_odds_chosen": 5.131409645080566, "log_odds_ratio": -0.15523457527160645, "logits/chosen": -0.8282453417778015, "logits/rejected": -0.8770512938499451, "logps/chosen": -0.06053111329674721, "logps/rejected": -1.2064192295074463, "loss": 2.7522, "nll_loss": 0.6725161075592041, "rewards/accuracies": 0.875, "rewards/chosen": -0.006053111050277948, "rewards/margins": 0.11458881199359894, "rewards/rejected": -0.12064193189144135, "step": 2973 }, { "epoch": 2.0567081604426, "grad_norm": 8.57661247253418, "learning_rate": 4.4129399108652225e-05, "log_odds_chosen": 4.66064453125, "log_odds_ratio": -0.11506712436676025, "logits/chosen": -0.6556863188743591, "logits/rejected": -0.6985541582107544, "logps/chosen": -0.03705421835184097, "logps/rejected": -0.6838787794113159, "loss": 2.855, "nll_loss": 0.7022481560707092, "rewards/accuracies": 1.0, "rewards/chosen": -0.0037054221611469984, "rewards/margins": 0.06468245387077332, "rewards/rejected": -0.06838788092136383, "step": 2974 }, { "epoch": 2.057399723374827, "grad_norm": 3.915243148803711, "learning_rate": 4.412555709236208e-05, "log_odds_chosen": 6.537201404571533, "log_odds_ratio": -0.006676637101918459, "logits/chosen": -0.4104037880897522, "logits/rejected": -0.4930589199066162, "logps/chosen": -0.036588139832019806, "logps/rejected": -1.1748409271240234, "loss": 1.939, "nll_loss": 0.4840763211250305, "rewards/accuracies": 1.0, "rewards/chosen": -0.0036588143557310104, "rewards/margins": 0.11382529139518738, "rewards/rejected": -0.11748410016298294, "step": 2975 }, { "epoch": 2.0580912863070537, "grad_norm": 4.104969501495361, "learning_rate": 4.412171507607192e-05, "log_odds_chosen": 7.7819671630859375, "log_odds_ratio": -0.00965783093124628, "logits/chosen": -0.36368995904922485, "logits/rejected": -0.40193721652030945, "logps/chosen": -0.008413492701947689, "logps/rejected": -1.0480165481567383, "loss": 1.8093, "nll_loss": 0.45135873556137085, "rewards/accuracies": 1.0, "rewards/chosen": -0.0008413493051193655, "rewards/margins": 0.1039603054523468, "rewards/rejected": -0.10480165481567383, "step": 2976 }, { "epoch": 2.0587828492392806, "grad_norm": 5.845517635345459, "learning_rate": 4.4117873059781775e-05, "log_odds_chosen": 6.87288761138916, "log_odds_ratio": -0.02248889021575451, "logits/chosen": -0.6869525909423828, "logits/rejected": -0.761170506477356, "logps/chosen": -0.08527921140193939, "logps/rejected": -1.6256699562072754, "loss": 2.9241, "nll_loss": 0.7287745475769043, "rewards/accuracies": 1.0, "rewards/chosen": -0.008527921512722969, "rewards/margins": 0.15403907001018524, "rewards/rejected": -0.16256700456142426, "step": 2977 }, { "epoch": 2.0594744121715074, "grad_norm": 8.611862182617188, "learning_rate": 4.411403104349163e-05, "log_odds_chosen": 6.731040954589844, "log_odds_ratio": -0.0147066880017519, "logits/chosen": -0.5096957087516785, "logits/rejected": -0.6066713929176331, "logps/chosen": -0.025971077382564545, "logps/rejected": -1.575836181640625, "loss": 2.8274, "nll_loss": 0.70537930727005, "rewards/accuracies": 1.0, "rewards/chosen": -0.002597107784822583, "rewards/margins": 0.15498653054237366, "rewards/rejected": -0.15758362412452698, "step": 2978 }, { "epoch": 2.0601659751037342, "grad_norm": 5.3427863121032715, "learning_rate": 4.4110189027201474e-05, "log_odds_chosen": 4.761931419372559, "log_odds_ratio": -0.08862251043319702, "logits/chosen": -0.3233840763568878, "logits/rejected": -0.41529786586761475, "logps/chosen": -0.052310310304164886, "logps/rejected": -1.16804039478302, "loss": 2.4812, "nll_loss": 0.6114499568939209, "rewards/accuracies": 1.0, "rewards/chosen": -0.005231031216681004, "rewards/margins": 0.11157301068305969, "rewards/rejected": -0.11680404841899872, "step": 2979 }, { "epoch": 2.060857538035961, "grad_norm": 4.2136125564575195, "learning_rate": 4.410634701091133e-05, "log_odds_chosen": 6.927209854125977, "log_odds_ratio": -0.01120884157717228, "logits/chosen": -0.4469285011291504, "logits/rejected": -0.5098540782928467, "logps/chosen": -0.0214360561221838, "logps/rejected": -1.3919093608856201, "loss": 2.3812, "nll_loss": 0.5941844582557678, "rewards/accuracies": 1.0, "rewards/chosen": -0.00214360561221838, "rewards/margins": 0.13704732060432434, "rewards/rejected": -0.1391909271478653, "step": 2980 }, { "epoch": 2.061549100968188, "grad_norm": 10.168533325195312, "learning_rate": 4.410250499462118e-05, "log_odds_chosen": 5.533295154571533, "log_odds_ratio": -0.11421045660972595, "logits/chosen": -0.6414197683334351, "logits/rejected": -0.6973565816879272, "logps/chosen": -0.030152834951877594, "logps/rejected": -0.9524630308151245, "loss": 3.0553, "nll_loss": 0.7524070143699646, "rewards/accuracies": 1.0, "rewards/chosen": -0.0030152834951877594, "rewards/margins": 0.09223102033138275, "rewards/rejected": -0.09524630755186081, "step": 2981 }, { "epoch": 2.0622406639004147, "grad_norm": 8.804760932922363, "learning_rate": 4.409866297833103e-05, "log_odds_chosen": 7.190434455871582, "log_odds_ratio": -0.01756965182721615, "logits/chosen": -0.7176415324211121, "logits/rejected": -0.7547638416290283, "logps/chosen": -0.02191227488219738, "logps/rejected": -1.1347317695617676, "loss": 2.602, "nll_loss": 0.6487484574317932, "rewards/accuracies": 1.0, "rewards/chosen": -0.002191227627918124, "rewards/margins": 0.11128196120262146, "rewards/rejected": -0.11347319185733795, "step": 2982 }, { "epoch": 2.0629322268326415, "grad_norm": 6.621882438659668, "learning_rate": 4.409482096204088e-05, "log_odds_chosen": 8.03929615020752, "log_odds_ratio": -0.003591003129258752, "logits/chosen": -0.6009180545806885, "logits/rejected": -0.6958531737327576, "logps/chosen": -0.003013760782778263, "logps/rejected": -1.226391315460205, "loss": 2.0684, "nll_loss": 0.5167450904846191, "rewards/accuracies": 1.0, "rewards/chosen": -0.0003013760724570602, "rewards/margins": 0.12233775854110718, "rewards/rejected": -0.12263913452625275, "step": 2983 }, { "epoch": 2.0636237897648684, "grad_norm": 10.417600631713867, "learning_rate": 4.4090978945750736e-05, "log_odds_chosen": 6.877012729644775, "log_odds_ratio": -0.10523069649934769, "logits/chosen": -0.5266302227973938, "logits/rejected": -0.6316937804222107, "logps/chosen": -0.034719862043857574, "logps/rejected": -1.2791069746017456, "loss": 1.802, "nll_loss": 0.439973920583725, "rewards/accuracies": 0.875, "rewards/chosen": -0.0034719863906502724, "rewards/margins": 0.12443870306015015, "rewards/rejected": -0.12791068851947784, "step": 2984 }, { "epoch": 2.064315352697095, "grad_norm": 7.508418560028076, "learning_rate": 4.408713692946058e-05, "log_odds_chosen": 7.389313697814941, "log_odds_ratio": -0.0030453759245574474, "logits/chosen": -0.6497970223426819, "logits/rejected": -0.7042725682258606, "logps/chosen": -0.002656069817021489, "logps/rejected": -1.0196506977081299, "loss": 2.5648, "nll_loss": 0.6408883929252625, "rewards/accuracies": 1.0, "rewards/chosen": -0.00026560697006061673, "rewards/margins": 0.10169944912195206, "rewards/rejected": -0.10196506232023239, "step": 2985 }, { "epoch": 2.0650069156293225, "grad_norm": 9.812541007995605, "learning_rate": 4.4083294913170434e-05, "log_odds_chosen": 7.382270336151123, "log_odds_ratio": -0.036466456949710846, "logits/chosen": -0.7856600284576416, "logits/rejected": -0.8382467031478882, "logps/chosen": -0.020869005471467972, "logps/rejected": -1.2076259851455688, "loss": 3.1957, "nll_loss": 0.7952706813812256, "rewards/accuracies": 1.0, "rewards/chosen": -0.0020869006402790546, "rewards/margins": 0.11867569386959076, "rewards/rejected": -0.12076259404420853, "step": 2986 }, { "epoch": 2.0656984785615493, "grad_norm": 5.368399143218994, "learning_rate": 4.4079452896880286e-05, "log_odds_chosen": 7.302699089050293, "log_odds_ratio": -0.0016621847171336412, "logits/chosen": -0.5824560523033142, "logits/rejected": -0.632736086845398, "logps/chosen": -0.0023292091209441423, "logps/rejected": -1.1314865350723267, "loss": 1.9341, "nll_loss": 0.4833478331565857, "rewards/accuracies": 1.0, "rewards/chosen": -0.00023292092373594642, "rewards/margins": 0.11291573196649551, "rewards/rejected": -0.11314865946769714, "step": 2987 }, { "epoch": 2.066390041493776, "grad_norm": 6.796201705932617, "learning_rate": 4.407561088059013e-05, "log_odds_chosen": 7.102804660797119, "log_odds_ratio": -0.02306043915450573, "logits/chosen": -0.41754207015037537, "logits/rejected": -0.51899653673172, "logps/chosen": -0.028687093406915665, "logps/rejected": -1.1891157627105713, "loss": 2.5866, "nll_loss": 0.644343376159668, "rewards/accuracies": 1.0, "rewards/chosen": -0.002868709387257695, "rewards/margins": 0.11604287475347519, "rewards/rejected": -0.11891157925128937, "step": 2988 }, { "epoch": 2.067081604426003, "grad_norm": 9.230788230895996, "learning_rate": 4.407176886429999e-05, "log_odds_chosen": 5.161102294921875, "log_odds_ratio": -0.2189641296863556, "logits/chosen": -0.5901562571525574, "logits/rejected": -0.6268595457077026, "logps/chosen": -0.10483792424201965, "logps/rejected": -1.382750153541565, "loss": 2.6292, "nll_loss": 0.6354115605354309, "rewards/accuracies": 0.875, "rewards/chosen": -0.01048379298299551, "rewards/margins": 0.12779122591018677, "rewards/rejected": -0.13827502727508545, "step": 2989 }, { "epoch": 2.06777316735823, "grad_norm": 4.151891708374023, "learning_rate": 4.406792684800984e-05, "log_odds_chosen": 5.546518325805664, "log_odds_ratio": -0.1175304651260376, "logits/chosen": -0.41287970542907715, "logits/rejected": -0.4530215859413147, "logps/chosen": -0.025290869176387787, "logps/rejected": -0.804233193397522, "loss": 2.3301, "nll_loss": 0.5707738995552063, "rewards/accuracies": 0.875, "rewards/chosen": -0.002529087010771036, "rewards/margins": 0.07789423316717148, "rewards/rejected": -0.08042332530021667, "step": 2990 }, { "epoch": 2.0684647302904566, "grad_norm": 7.8981757164001465, "learning_rate": 4.406408483171969e-05, "log_odds_chosen": 8.113343238830566, "log_odds_ratio": -0.005146768409758806, "logits/chosen": -0.7099666595458984, "logits/rejected": -0.8495659232139587, "logps/chosen": -0.024176517501473427, "logps/rejected": -1.4046846628189087, "loss": 2.4403, "nll_loss": 0.609563946723938, "rewards/accuracies": 1.0, "rewards/chosen": -0.0024176519364118576, "rewards/margins": 0.13805082440376282, "rewards/rejected": -0.14046847820281982, "step": 2991 }, { "epoch": 2.0691562932226835, "grad_norm": 8.642290115356445, "learning_rate": 4.406024281542954e-05, "log_odds_chosen": 7.045675754547119, "log_odds_ratio": -0.01596110127866268, "logits/chosen": -0.534246027469635, "logits/rejected": -0.5317660570144653, "logps/chosen": -0.040739212185144424, "logps/rejected": -1.5891478061676025, "loss": 2.1967, "nll_loss": 0.5475837588310242, "rewards/accuracies": 1.0, "rewards/chosen": -0.004073921591043472, "rewards/margins": 0.1548408567905426, "rewards/rejected": -0.15891478955745697, "step": 2992 }, { "epoch": 2.0698478561549103, "grad_norm": 7.404383659362793, "learning_rate": 4.4056400799139394e-05, "log_odds_chosen": 5.8501434326171875, "log_odds_ratio": -0.00894884578883648, "logits/chosen": -0.43841439485549927, "logits/rejected": -0.47529149055480957, "logps/chosen": -0.015168728306889534, "logps/rejected": -0.8849344849586487, "loss": 2.0077, "nll_loss": 0.5010417699813843, "rewards/accuracies": 1.0, "rewards/chosen": -0.0015168729005381465, "rewards/margins": 0.08697657287120819, "rewards/rejected": -0.08849343657493591, "step": 2993 }, { "epoch": 2.070539419087137, "grad_norm": 7.639882564544678, "learning_rate": 4.405255878284924e-05, "log_odds_chosen": 9.027555465698242, "log_odds_ratio": -0.0001843296631705016, "logits/chosen": -0.3901001811027527, "logits/rejected": -0.3636050820350647, "logps/chosen": -0.0006443070597015321, "logps/rejected": -1.427654504776001, "loss": 2.051, "nll_loss": 0.5127426981925964, "rewards/accuracies": 1.0, "rewards/chosen": -6.443070742534474e-05, "rewards/margins": 0.14270102977752686, "rewards/rejected": -0.14276546239852905, "step": 2994 }, { "epoch": 2.071230982019364, "grad_norm": 14.118929862976074, "learning_rate": 4.404871676655909e-05, "log_odds_chosen": 6.0568623542785645, "log_odds_ratio": -0.5541839599609375, "logits/chosen": -0.5042297840118408, "logits/rejected": -0.5497844219207764, "logps/chosen": -0.07083805650472641, "logps/rejected": -1.3127648830413818, "loss": 3.5312, "nll_loss": 0.8273938894271851, "rewards/accuracies": 0.875, "rewards/chosen": -0.007083804812282324, "rewards/margins": 0.12419269233942032, "rewards/rejected": -0.13127650320529938, "step": 2995 }, { "epoch": 2.071922544951591, "grad_norm": 5.028932571411133, "learning_rate": 4.4044874750268945e-05, "log_odds_chosen": 6.680631637573242, "log_odds_ratio": -0.13834092020988464, "logits/chosen": -0.1440374255180359, "logits/rejected": -0.12740664184093475, "logps/chosen": -0.09567593038082123, "logps/rejected": -1.1710020303726196, "loss": 2.5045, "nll_loss": 0.6122902035713196, "rewards/accuracies": 0.875, "rewards/chosen": -0.009567593224346638, "rewards/margins": 0.10753262042999268, "rewards/rejected": -0.11710020899772644, "step": 2996 }, { "epoch": 2.0726141078838176, "grad_norm": 6.292712211608887, "learning_rate": 4.404103273397879e-05, "log_odds_chosen": 9.41784954071045, "log_odds_ratio": -0.0002688511158339679, "logits/chosen": -0.6236108541488647, "logits/rejected": -0.7072266340255737, "logps/chosen": -0.0007091419538483024, "logps/rejected": -1.6054515838623047, "loss": 2.2208, "nll_loss": 0.5551624894142151, "rewards/accuracies": 1.0, "rewards/chosen": -7.091420411597937e-05, "rewards/margins": 0.1604742556810379, "rewards/rejected": -0.16054517030715942, "step": 2997 }, { "epoch": 2.0733056708160444, "grad_norm": 5.728390216827393, "learning_rate": 4.403719071768865e-05, "log_odds_chosen": 5.915194988250732, "log_odds_ratio": -0.00786438025534153, "logits/chosen": -0.6329678893089294, "logits/rejected": -0.6185204982757568, "logps/chosen": -0.010668737813830376, "logps/rejected": -1.0165667533874512, "loss": 2.9553, "nll_loss": 0.7380290627479553, "rewards/accuracies": 1.0, "rewards/chosen": -0.0010668738977983594, "rewards/margins": 0.1005897969007492, "rewards/rejected": -0.10165666788816452, "step": 2998 }, { "epoch": 2.0739972337482713, "grad_norm": 8.11888599395752, "learning_rate": 4.4033348701398495e-05, "log_odds_chosen": 7.804757118225098, "log_odds_ratio": -0.0029276611749082804, "logits/chosen": -0.8999743461608887, "logits/rejected": -0.8988440036773682, "logps/chosen": -0.0017784256488084793, "logps/rejected": -1.1857937574386597, "loss": 2.9097, "nll_loss": 0.727120578289032, "rewards/accuracies": 1.0, "rewards/chosen": -0.00017784256488084793, "rewards/margins": 0.11840153485536575, "rewards/rejected": -0.11857938021421432, "step": 2999 }, { "epoch": 2.074688796680498, "grad_norm": 7.151800155639648, "learning_rate": 4.402950668510835e-05, "log_odds_chosen": 4.465782165527344, "log_odds_ratio": -0.09260845929384232, "logits/chosen": -0.7498218417167664, "logits/rejected": -0.7401602864265442, "logps/chosen": -0.08501623570919037, "logps/rejected": -1.4172019958496094, "loss": 3.1935, "nll_loss": 0.7891181111335754, "rewards/accuracies": 1.0, "rewards/chosen": -0.008501622825860977, "rewards/margins": 0.13321858644485474, "rewards/rejected": -0.14172020554542542, "step": 3000 }, { "epoch": 2.075380359612725, "grad_norm": 6.480521202087402, "learning_rate": 4.40256646688182e-05, "log_odds_chosen": 7.112648010253906, "log_odds_ratio": -0.03398454934358597, "logits/chosen": -0.7977691888809204, "logits/rejected": -0.8164126873016357, "logps/chosen": -0.02818971686065197, "logps/rejected": -1.5443800687789917, "loss": 2.9939, "nll_loss": 0.7450749278068542, "rewards/accuracies": 1.0, "rewards/chosen": -0.002818971872329712, "rewards/margins": 0.1516190469264984, "rewards/rejected": -0.15443801879882812, "step": 3001 }, { "epoch": 2.0760719225449518, "grad_norm": 7.503549575805664, "learning_rate": 4.402182265252805e-05, "log_odds_chosen": 7.291573524475098, "log_odds_ratio": -0.0050234016962349415, "logits/chosen": -0.8397652506828308, "logits/rejected": -0.9112769961357117, "logps/chosen": -0.011641757562756538, "logps/rejected": -1.3105417490005493, "loss": 2.9127, "nll_loss": 0.7276700735092163, "rewards/accuracies": 1.0, "rewards/chosen": -0.0011641758028417826, "rewards/margins": 0.12989000976085663, "rewards/rejected": -0.13105419278144836, "step": 3002 }, { "epoch": 2.0767634854771786, "grad_norm": 4.157247543334961, "learning_rate": 4.40179806362379e-05, "log_odds_chosen": 6.718463897705078, "log_odds_ratio": -0.05722730606794357, "logits/chosen": -0.07942191511392593, "logits/rejected": -0.15350845456123352, "logps/chosen": -0.0334065780043602, "logps/rejected": -0.8967019319534302, "loss": 2.0644, "nll_loss": 0.5103872418403625, "rewards/accuracies": 1.0, "rewards/chosen": -0.00334065780043602, "rewards/margins": 0.08632953464984894, "rewards/rejected": -0.08967019617557526, "step": 3003 }, { "epoch": 2.0774550484094054, "grad_norm": 5.905579090118408, "learning_rate": 4.401413861994775e-05, "log_odds_chosen": 5.6199469566345215, "log_odds_ratio": -0.07426194846630096, "logits/chosen": -0.4605045020580292, "logits/rejected": -0.5288490653038025, "logps/chosen": -0.03111214004456997, "logps/rejected": -0.9659241437911987, "loss": 1.8886, "nll_loss": 0.46473127603530884, "rewards/accuracies": 1.0, "rewards/chosen": -0.0031112139113247395, "rewards/margins": 0.09348119795322418, "rewards/rejected": -0.09659241884946823, "step": 3004 }, { "epoch": 2.0781466113416323, "grad_norm": 6.317102432250977, "learning_rate": 4.40102966036576e-05, "log_odds_chosen": 7.723196029663086, "log_odds_ratio": -0.002818305743858218, "logits/chosen": -0.5551360845565796, "logits/rejected": -0.6236424446105957, "logps/chosen": -0.0045334878377616405, "logps/rejected": -1.4982926845550537, "loss": 2.474, "nll_loss": 0.6182283163070679, "rewards/accuracies": 1.0, "rewards/chosen": -0.0004533488245215267, "rewards/margins": 0.14937594532966614, "rewards/rejected": -0.14982928335666656, "step": 3005 }, { "epoch": 2.078838174273859, "grad_norm": 6.065270900726318, "learning_rate": 4.400645458736745e-05, "log_odds_chosen": 7.245368003845215, "log_odds_ratio": -0.004873716738075018, "logits/chosen": -0.5570109486579895, "logits/rejected": -0.5704992413520813, "logps/chosen": -0.0028182133100926876, "logps/rejected": -0.9843683242797852, "loss": 2.09, "nll_loss": 0.5220032930374146, "rewards/accuracies": 1.0, "rewards/chosen": -0.0002818213542923331, "rewards/margins": 0.09815500676631927, "rewards/rejected": -0.09843683242797852, "step": 3006 }, { "epoch": 2.079529737206086, "grad_norm": 6.175274848937988, "learning_rate": 4.40026125710773e-05, "log_odds_chosen": 7.454465866088867, "log_odds_ratio": -0.008208566345274448, "logits/chosen": -0.45918309688568115, "logits/rejected": -0.4840087890625, "logps/chosen": -0.005108486860990524, "logps/rejected": -1.2046600580215454, "loss": 1.7959, "nll_loss": 0.4481605887413025, "rewards/accuracies": 1.0, "rewards/chosen": -0.0005108487675897777, "rewards/margins": 0.1199551522731781, "rewards/rejected": -0.12046600878238678, "step": 3007 }, { "epoch": 2.0802213001383127, "grad_norm": 7.142673492431641, "learning_rate": 4.3998770554787154e-05, "log_odds_chosen": 4.702856540679932, "log_odds_ratio": -0.34338873624801636, "logits/chosen": -0.3096299171447754, "logits/rejected": -0.3472004532814026, "logps/chosen": -0.10073808580636978, "logps/rejected": -1.0663481950759888, "loss": 1.6838, "nll_loss": 0.38661855459213257, "rewards/accuracies": 0.875, "rewards/chosen": -0.010073808953166008, "rewards/margins": 0.0965610146522522, "rewards/rejected": -0.10663482546806335, "step": 3008 }, { "epoch": 2.0809128630705396, "grad_norm": 9.609098434448242, "learning_rate": 4.3994928538497006e-05, "log_odds_chosen": 7.715497016906738, "log_odds_ratio": -0.003823342267423868, "logits/chosen": -0.43299996852874756, "logits/rejected": -0.49476325511932373, "logps/chosen": -0.007047053426504135, "logps/rejected": -1.4771684408187866, "loss": 3.3901, "nll_loss": 0.8471373319625854, "rewards/accuracies": 1.0, "rewards/chosen": -0.0007047054241411388, "rewards/margins": 0.1470121443271637, "rewards/rejected": -0.14771683514118195, "step": 3009 }, { "epoch": 2.0816044260027664, "grad_norm": 9.416679382324219, "learning_rate": 4.399108652220685e-05, "log_odds_chosen": 7.42830753326416, "log_odds_ratio": -0.022011570632457733, "logits/chosen": -0.3753679692745209, "logits/rejected": -0.47189861536026, "logps/chosen": -0.016750670969486237, "logps/rejected": -0.9741970300674438, "loss": 2.1602, "nll_loss": 0.5378579497337341, "rewards/accuracies": 1.0, "rewards/chosen": -0.001675067120231688, "rewards/margins": 0.09574463218450546, "rewards/rejected": -0.09741970151662827, "step": 3010 }, { "epoch": 2.0822959889349932, "grad_norm": 11.991842269897461, "learning_rate": 4.398724450591671e-05, "log_odds_chosen": 5.96860933303833, "log_odds_ratio": -0.2523881793022156, "logits/chosen": -0.1894284188747406, "logits/rejected": -0.2235884815454483, "logps/chosen": -0.036848284304142, "logps/rejected": -0.9814808368682861, "loss": 2.2428, "nll_loss": 0.5354623198509216, "rewards/accuracies": 0.75, "rewards/chosen": -0.0036848282907158136, "rewards/margins": 0.09446325898170471, "rewards/rejected": -0.09814808517694473, "step": 3011 }, { "epoch": 2.08298755186722, "grad_norm": 5.714529514312744, "learning_rate": 4.398340248962656e-05, "log_odds_chosen": 5.944969177246094, "log_odds_ratio": -0.19645489752292633, "logits/chosen": -0.07002492249011993, "logits/rejected": -0.11378204077482224, "logps/chosen": -0.06899984925985336, "logps/rejected": -1.3910918235778809, "loss": 2.6087, "nll_loss": 0.632527768611908, "rewards/accuracies": 0.875, "rewards/chosen": -0.0068999845534563065, "rewards/margins": 0.1322091966867447, "rewards/rejected": -0.13910917937755585, "step": 3012 }, { "epoch": 2.083679114799447, "grad_norm": 7.844751358032227, "learning_rate": 4.397956047333641e-05, "log_odds_chosen": 3.936131715774536, "log_odds_ratio": -0.30492010712623596, "logits/chosen": -0.40647411346435547, "logits/rejected": -0.41465991735458374, "logps/chosen": -0.0794655904173851, "logps/rejected": -0.8100540637969971, "loss": 3.0597, "nll_loss": 0.7344415783882141, "rewards/accuracies": 0.75, "rewards/chosen": -0.007946559228003025, "rewards/margins": 0.0730588510632515, "rewards/rejected": -0.08100540935993195, "step": 3013 }, { "epoch": 2.0843706777316737, "grad_norm": 8.143545150756836, "learning_rate": 4.397571845704626e-05, "log_odds_chosen": 6.678995132446289, "log_odds_ratio": -0.020952634513378143, "logits/chosen": -0.488328754901886, "logits/rejected": -0.5505259037017822, "logps/chosen": -0.03460100293159485, "logps/rejected": -1.2816894054412842, "loss": 2.2789, "nll_loss": 0.5676225423812866, "rewards/accuracies": 1.0, "rewards/chosen": -0.0034601001534610987, "rewards/margins": 0.12470885366201401, "rewards/rejected": -0.1281689554452896, "step": 3014 }, { "epoch": 2.0850622406639006, "grad_norm": 7.773388385772705, "learning_rate": 4.397187644075611e-05, "log_odds_chosen": 7.691577434539795, "log_odds_ratio": -0.0022774143144488335, "logits/chosen": -0.5185723304748535, "logits/rejected": -0.5329963564872742, "logps/chosen": -0.0031612124294042587, "logps/rejected": -1.4423391819000244, "loss": 2.1191, "nll_loss": 0.529556393623352, "rewards/accuracies": 1.0, "rewards/chosen": -0.00031612126622349024, "rewards/margins": 0.14391779899597168, "rewards/rejected": -0.14423391222953796, "step": 3015 }, { "epoch": 2.0857538035961274, "grad_norm": 6.006858825683594, "learning_rate": 4.396803442446596e-05, "log_odds_chosen": 7.957676887512207, "log_odds_ratio": -0.04760212451219559, "logits/chosen": 0.1509585678577423, "logits/rejected": 0.08457720279693604, "logps/chosen": -0.014640584588050842, "logps/rejected": -1.7319622039794922, "loss": 1.9171, "nll_loss": 0.4745044708251953, "rewards/accuracies": 1.0, "rewards/chosen": -0.0014640585286542773, "rewards/margins": 0.17173215746879578, "rewards/rejected": -0.1731962263584137, "step": 3016 }, { "epoch": 2.086445366528354, "grad_norm": 84.41448211669922, "learning_rate": 4.396419240817581e-05, "log_odds_chosen": 8.378778457641602, "log_odds_ratio": -0.29373475909233093, "logits/chosen": -0.27526938915252686, "logits/rejected": -0.3401549160480499, "logps/chosen": -0.013904515653848648, "logps/rejected": -1.756449818611145, "loss": 3.8393, "nll_loss": 0.9304454326629639, "rewards/accuracies": 0.875, "rewards/chosen": -0.001390451448969543, "rewards/margins": 0.17425453662872314, "rewards/rejected": -0.17564497888088226, "step": 3017 }, { "epoch": 2.087136929460581, "grad_norm": 8.896300315856934, "learning_rate": 4.3960350391885665e-05, "log_odds_chosen": 5.943868160247803, "log_odds_ratio": -0.09057852625846863, "logits/chosen": -0.29931822419166565, "logits/rejected": -0.28687506914138794, "logps/chosen": -0.018265876919031143, "logps/rejected": -0.9328300952911377, "loss": 2.5869, "nll_loss": 0.6376628279685974, "rewards/accuracies": 1.0, "rewards/chosen": -0.0018265878316015005, "rewards/margins": 0.09145642817020416, "rewards/rejected": -0.09328301250934601, "step": 3018 }, { "epoch": 2.087828492392808, "grad_norm": 9.654539108276367, "learning_rate": 4.395650837559551e-05, "log_odds_chosen": 6.187833786010742, "log_odds_ratio": -0.17915089428424835, "logits/chosen": -0.13763611018657684, "logits/rejected": -0.1429610550403595, "logps/chosen": -0.10990156978368759, "logps/rejected": -2.195013999938965, "loss": 2.9133, "nll_loss": 0.7104144096374512, "rewards/accuracies": 0.875, "rewards/chosen": -0.010990156792104244, "rewards/margins": 0.20851124823093414, "rewards/rejected": -0.21950142085552216, "step": 3019 }, { "epoch": 2.0885200553250347, "grad_norm": 5.395124435424805, "learning_rate": 4.395266635930537e-05, "log_odds_chosen": 5.991332054138184, "log_odds_ratio": -0.06997621804475784, "logits/chosen": -0.45609647035598755, "logits/rejected": -0.4964160919189453, "logps/chosen": -0.03144243732094765, "logps/rejected": -1.150253176689148, "loss": 2.0452, "nll_loss": 0.5043097734451294, "rewards/accuracies": 1.0, "rewards/chosen": -0.0031442439649254084, "rewards/margins": 0.1118810772895813, "rewards/rejected": -0.11502531915903091, "step": 3020 }, { "epoch": 2.0892116182572615, "grad_norm": 9.584518432617188, "learning_rate": 4.3948824343015215e-05, "log_odds_chosen": 5.3715128898620605, "log_odds_ratio": -0.03806905820965767, "logits/chosen": -0.29475364089012146, "logits/rejected": -0.3395359218120575, "logps/chosen": -0.08184289932250977, "logps/rejected": -1.1609790325164795, "loss": 2.3293, "nll_loss": 0.5785099864006042, "rewards/accuracies": 1.0, "rewards/chosen": -0.008184290491044521, "rewards/margins": 0.10791360586881638, "rewards/rejected": -0.11609790474176407, "step": 3021 }, { "epoch": 2.0899031811894884, "grad_norm": 8.217158317565918, "learning_rate": 4.394498232672507e-05, "log_odds_chosen": 6.2889580726623535, "log_odds_ratio": -0.016910960897803307, "logits/chosen": -0.6262481808662415, "logits/rejected": -0.6448217630386353, "logps/chosen": -0.024054640904068947, "logps/rejected": -1.1732820272445679, "loss": 2.7227, "nll_loss": 0.6789869070053101, "rewards/accuracies": 1.0, "rewards/chosen": -0.0024054639507085085, "rewards/margins": 0.11492274701595306, "rewards/rejected": -0.11732819676399231, "step": 3022 }, { "epoch": 2.090594744121715, "grad_norm": 9.62985897064209, "learning_rate": 4.394114031043492e-05, "log_odds_chosen": 5.910818099975586, "log_odds_ratio": -0.05642978847026825, "logits/chosen": -0.41197532415390015, "logits/rejected": -0.4623297452926636, "logps/chosen": -0.025548553094267845, "logps/rejected": -1.2217142581939697, "loss": 3.3591, "nll_loss": 0.8341245055198669, "rewards/accuracies": 1.0, "rewards/chosen": -0.002554855542257428, "rewards/margins": 0.1196165680885315, "rewards/rejected": -0.12217142432928085, "step": 3023 }, { "epoch": 2.091286307053942, "grad_norm": 6.7654709815979, "learning_rate": 4.3937298294144766e-05, "log_odds_chosen": 6.483285427093506, "log_odds_ratio": -0.045767199248075485, "logits/chosen": -0.5368460416793823, "logits/rejected": -0.5440640449523926, "logps/chosen": -0.05254742503166199, "logps/rejected": -1.434328317642212, "loss": 2.4722, "nll_loss": 0.6134682893753052, "rewards/accuracies": 1.0, "rewards/chosen": -0.005254742689430714, "rewards/margins": 0.13817809522151947, "rewards/rejected": -0.1434328258037567, "step": 3024 }, { "epoch": 2.091977869986169, "grad_norm": 7.35762357711792, "learning_rate": 4.393345627785462e-05, "log_odds_chosen": 6.339735507965088, "log_odds_ratio": -0.044218193739652634, "logits/chosen": -0.454628050327301, "logits/rejected": -0.5306775569915771, "logps/chosen": -0.0601518377661705, "logps/rejected": -1.0033906698226929, "loss": 2.0879, "nll_loss": 0.5175583958625793, "rewards/accuracies": 1.0, "rewards/chosen": -0.006015183869749308, "rewards/margins": 0.09432388097047806, "rewards/rejected": -0.10033906996250153, "step": 3025 }, { "epoch": 2.0926694329183957, "grad_norm": 11.857747077941895, "learning_rate": 4.392961426156447e-05, "log_odds_chosen": 6.992668628692627, "log_odds_ratio": -0.021204454824328423, "logits/chosen": -0.3737304210662842, "logits/rejected": -0.436320960521698, "logps/chosen": -0.06880363821983337, "logps/rejected": -1.2175607681274414, "loss": 2.5603, "nll_loss": 0.6379530429840088, "rewards/accuracies": 1.0, "rewards/chosen": -0.006880364380776882, "rewards/margins": 0.11487573385238647, "rewards/rejected": -0.12175609171390533, "step": 3026 }, { "epoch": 2.0933609958506225, "grad_norm": 10.206303596496582, "learning_rate": 4.392577224527432e-05, "log_odds_chosen": 8.618885040283203, "log_odds_ratio": -0.023046551272273064, "logits/chosen": -0.2979205846786499, "logits/rejected": -0.39356330037117004, "logps/chosen": -0.022700179368257523, "logps/rejected": -2.0342507362365723, "loss": 1.8279, "nll_loss": 0.4546803832054138, "rewards/accuracies": 1.0, "rewards/chosen": -0.0022700177505612373, "rewards/margins": 0.20115506649017334, "rewards/rejected": -0.2034250795841217, "step": 3027 }, { "epoch": 2.0940525587828493, "grad_norm": 6.319029808044434, "learning_rate": 4.392193022898417e-05, "log_odds_chosen": 5.630053520202637, "log_odds_ratio": -0.29856622219085693, "logits/chosen": -0.4813880920410156, "logits/rejected": -0.5345386266708374, "logps/chosen": -0.05630096048116684, "logps/rejected": -0.8578755855560303, "loss": 1.5996, "nll_loss": 0.37004461884498596, "rewards/accuracies": 0.875, "rewards/chosen": -0.005630096886307001, "rewards/margins": 0.08015745878219604, "rewards/rejected": -0.08578755706548691, "step": 3028 }, { "epoch": 2.094744121715076, "grad_norm": 7.860509872436523, "learning_rate": 4.391808821269403e-05, "log_odds_chosen": 7.750638008117676, "log_odds_ratio": -0.0035974266938865185, "logits/chosen": -0.4576967656612396, "logits/rejected": -0.5285370945930481, "logps/chosen": -0.018102500587701797, "logps/rejected": -1.5745806694030762, "loss": 2.7199, "nll_loss": 0.6796204447746277, "rewards/accuracies": 1.0, "rewards/chosen": -0.0018102501053363085, "rewards/margins": 0.15564781427383423, "rewards/rejected": -0.15745806694030762, "step": 3029 }, { "epoch": 2.095435684647303, "grad_norm": 7.408651828765869, "learning_rate": 4.3914246196403874e-05, "log_odds_chosen": 6.946645736694336, "log_odds_ratio": -0.02821875549852848, "logits/chosen": -0.214560866355896, "logits/rejected": -0.2366018295288086, "logps/chosen": -0.012655358761548996, "logps/rejected": -1.053880214691162, "loss": 1.9313, "nll_loss": 0.48000746965408325, "rewards/accuracies": 1.0, "rewards/chosen": -0.0012655358295887709, "rewards/margins": 0.10412249714136124, "rewards/rejected": -0.10538802295923233, "step": 3030 }, { "epoch": 2.09612724757953, "grad_norm": 7.017563819885254, "learning_rate": 4.3910404180113726e-05, "log_odds_chosen": 8.451176643371582, "log_odds_ratio": -0.0035266894847154617, "logits/chosen": -0.2520374059677124, "logits/rejected": -0.32749876379966736, "logps/chosen": -0.007094074506312609, "logps/rejected": -1.1015607118606567, "loss": 2.426, "nll_loss": 0.6061570644378662, "rewards/accuracies": 1.0, "rewards/chosen": -0.0007094074971973896, "rewards/margins": 0.10944667458534241, "rewards/rejected": -0.11015607416629791, "step": 3031 }, { "epoch": 2.0968188105117567, "grad_norm": 5.60612154006958, "learning_rate": 4.390656216382358e-05, "log_odds_chosen": 6.388954162597656, "log_odds_ratio": -0.03232260048389435, "logits/chosen": -0.354230135679245, "logits/rejected": -0.5040380954742432, "logps/chosen": -0.043253205716609955, "logps/rejected": -0.8980000019073486, "loss": 1.9146, "nll_loss": 0.47541293501853943, "rewards/accuracies": 1.0, "rewards/chosen": -0.004325320012867451, "rewards/margins": 0.08547468483448029, "rewards/rejected": -0.08980000019073486, "step": 3032 }, { "epoch": 2.0975103734439835, "grad_norm": 9.741111755371094, "learning_rate": 4.3902720147533424e-05, "log_odds_chosen": 5.344499588012695, "log_odds_ratio": -0.20701748132705688, "logits/chosen": -0.30035096406936646, "logits/rejected": -0.2930985689163208, "logps/chosen": -0.0543060302734375, "logps/rejected": -1.173159122467041, "loss": 3.3527, "nll_loss": 0.8174670934677124, "rewards/accuracies": 0.875, "rewards/chosen": -0.0054306029342114925, "rewards/margins": 0.11188530176877975, "rewards/rejected": -0.11731590330600739, "step": 3033 }, { "epoch": 2.0982019363762103, "grad_norm": 8.195241928100586, "learning_rate": 4.389887813124328e-05, "log_odds_chosen": 5.892043590545654, "log_odds_ratio": -0.021361295133829117, "logits/chosen": -0.43137770891189575, "logits/rejected": -0.49864283204078674, "logps/chosen": -0.049551934003829956, "logps/rejected": -1.3703354597091675, "loss": 2.82, "nll_loss": 0.7028669714927673, "rewards/accuracies": 1.0, "rewards/chosen": -0.004955193493515253, "rewards/margins": 0.1320783495903015, "rewards/rejected": -0.13703353703022003, "step": 3034 }, { "epoch": 2.098893499308437, "grad_norm": 10.782358169555664, "learning_rate": 4.389503611495313e-05, "log_odds_chosen": 6.741562843322754, "log_odds_ratio": -0.02253532223403454, "logits/chosen": -0.5028831362724304, "logits/rejected": -0.5559301376342773, "logps/chosen": -0.008717098273336887, "logps/rejected": -0.9764584898948669, "loss": 2.835, "nll_loss": 0.7064973711967468, "rewards/accuracies": 1.0, "rewards/chosen": -0.0008717098389752209, "rewards/margins": 0.0967741385102272, "rewards/rejected": -0.0976458415389061, "step": 3035 }, { "epoch": 2.099585062240664, "grad_norm": 8.789740562438965, "learning_rate": 4.389119409866298e-05, "log_odds_chosen": 7.403459548950195, "log_odds_ratio": -0.022690054029226303, "logits/chosen": -0.4163126051425934, "logits/rejected": -0.45792490243911743, "logps/chosen": -0.014534620568156242, "logps/rejected": -1.4380933046340942, "loss": 2.1172, "nll_loss": 0.5270276665687561, "rewards/accuracies": 1.0, "rewards/chosen": -0.0014534619404003024, "rewards/margins": 0.14235585927963257, "rewards/rejected": -0.14380933344364166, "step": 3036 }, { "epoch": 2.100276625172891, "grad_norm": 12.233290672302246, "learning_rate": 4.388735208237283e-05, "log_odds_chosen": 3.0194010734558105, "log_odds_ratio": -0.5815798044204712, "logits/chosen": -0.6315550804138184, "logits/rejected": -0.6456758975982666, "logps/chosen": -0.10260287672281265, "logps/rejected": -0.5657974481582642, "loss": 2.8741, "nll_loss": 0.6603554487228394, "rewards/accuracies": 0.875, "rewards/chosen": -0.01026028860360384, "rewards/margins": 0.04631945490837097, "rewards/rejected": -0.05657974258065224, "step": 3037 }, { "epoch": 2.1009681881051177, "grad_norm": 3.961130142211914, "learning_rate": 4.3883510066082686e-05, "log_odds_chosen": 6.792495250701904, "log_odds_ratio": -0.010656296275556087, "logits/chosen": -0.26495587825775146, "logits/rejected": -0.4072640836238861, "logps/chosen": -0.005022024270147085, "logps/rejected": -0.6567428112030029, "loss": 1.9578, "nll_loss": 0.4883909225463867, "rewards/accuracies": 1.0, "rewards/chosen": -0.0005022024270147085, "rewards/margins": 0.06517207622528076, "rewards/rejected": -0.06567428261041641, "step": 3038 }, { "epoch": 2.1016597510373445, "grad_norm": 9.593063354492188, "learning_rate": 4.387966804979253e-05, "log_odds_chosen": 7.462541580200195, "log_odds_ratio": -0.0023812439758330584, "logits/chosen": -0.4061529040336609, "logits/rejected": -0.5284491777420044, "logps/chosen": -0.0027313565369695425, "logps/rejected": -1.1370166540145874, "loss": 2.463, "nll_loss": 0.615522563457489, "rewards/accuracies": 1.0, "rewards/chosen": -0.00027313566533848643, "rewards/margins": 0.11342853307723999, "rewards/rejected": -0.11370167136192322, "step": 3039 }, { "epoch": 2.1023513139695713, "grad_norm": 3.8307673931121826, "learning_rate": 4.3875826033502385e-05, "log_odds_chosen": 7.653198719024658, "log_odds_ratio": -0.03865279629826546, "logits/chosen": -0.058998480439186096, "logits/rejected": -0.08295667916536331, "logps/chosen": -0.013944515958428383, "logps/rejected": -1.039737582206726, "loss": 2.3333, "nll_loss": 0.5794506072998047, "rewards/accuracies": 1.0, "rewards/chosen": -0.0013944517122581601, "rewards/margins": 0.10257931053638458, "rewards/rejected": -0.10397376120090485, "step": 3040 }, { "epoch": 2.103042876901798, "grad_norm": 11.61815071105957, "learning_rate": 4.387198401721224e-05, "log_odds_chosen": 7.312662124633789, "log_odds_ratio": -0.004587090574204922, "logits/chosen": -0.631591796875, "logits/rejected": -0.6536903381347656, "logps/chosen": -0.02399228885769844, "logps/rejected": -1.5654449462890625, "loss": 3.4732, "nll_loss": 0.8678387403488159, "rewards/accuracies": 1.0, "rewards/chosen": -0.002399228746071458, "rewards/margins": 0.1541452705860138, "rewards/rejected": -0.156544491648674, "step": 3041 }, { "epoch": 2.103734439834025, "grad_norm": 5.323784351348877, "learning_rate": 4.386814200092208e-05, "log_odds_chosen": 6.599758148193359, "log_odds_ratio": -0.11133424937725067, "logits/chosen": -0.5072517991065979, "logits/rejected": -0.5091153383255005, "logps/chosen": -0.0216111671179533, "logps/rejected": -1.1363182067871094, "loss": 2.4531, "nll_loss": 0.6021370887756348, "rewards/accuracies": 0.875, "rewards/chosen": -0.0021611163392663, "rewards/margins": 0.11147069931030273, "rewards/rejected": -0.11363181471824646, "step": 3042 }, { "epoch": 2.104426002766252, "grad_norm": 6.221068382263184, "learning_rate": 4.3864299984631935e-05, "log_odds_chosen": 7.854094505310059, "log_odds_ratio": -0.0015729665756225586, "logits/chosen": -0.6950536370277405, "logits/rejected": -0.7626223564147949, "logps/chosen": -0.001281562028452754, "logps/rejected": -1.0853941440582275, "loss": 2.3986, "nll_loss": 0.5995044708251953, "rewards/accuracies": 1.0, "rewards/chosen": -0.00012815619993489236, "rewards/margins": 0.10841125249862671, "rewards/rejected": -0.10853941738605499, "step": 3043 }, { "epoch": 2.1051175656984786, "grad_norm": 9.613995552062988, "learning_rate": 4.386045796834179e-05, "log_odds_chosen": 5.732051849365234, "log_odds_ratio": -0.03842860832810402, "logits/chosen": -0.36531874537467957, "logits/rejected": -0.38595065474510193, "logps/chosen": -0.03054799698293209, "logps/rejected": -1.0037133693695068, "loss": 2.823, "nll_loss": 0.7019094824790955, "rewards/accuracies": 1.0, "rewards/chosen": -0.003054799512028694, "rewards/margins": 0.09731653332710266, "rewards/rejected": -0.1003713309764862, "step": 3044 }, { "epoch": 2.1058091286307055, "grad_norm": 7.104485511779785, "learning_rate": 4.385661595205164e-05, "log_odds_chosen": 7.818532943725586, "log_odds_ratio": -0.0011651016538962722, "logits/chosen": -0.6283485889434814, "logits/rejected": -0.6749677658081055, "logps/chosen": -0.0066756028681993484, "logps/rejected": -1.2415318489074707, "loss": 2.2926, "nll_loss": 0.5730224847793579, "rewards/accuracies": 1.0, "rewards/chosen": -0.0006675602635368705, "rewards/margins": 0.12348562479019165, "rewards/rejected": -0.12415318191051483, "step": 3045 }, { "epoch": 2.1065006915629323, "grad_norm": 8.063847541809082, "learning_rate": 4.3852773935761486e-05, "log_odds_chosen": 7.581356048583984, "log_odds_ratio": -0.0035385629162192345, "logits/chosen": -0.46461209654808044, "logits/rejected": -0.5649842023849487, "logps/chosen": -0.01552728284150362, "logps/rejected": -1.3513633012771606, "loss": 2.725, "nll_loss": 0.6808865070343018, "rewards/accuracies": 1.0, "rewards/chosen": -0.0015527281211689115, "rewards/margins": 0.13358360528945923, "rewards/rejected": -0.13513632118701935, "step": 3046 }, { "epoch": 2.107192254495159, "grad_norm": 10.078218460083008, "learning_rate": 4.3848931919471345e-05, "log_odds_chosen": 6.767643928527832, "log_odds_ratio": -0.0026630214415490627, "logits/chosen": -0.5481566786766052, "logits/rejected": -0.6565913558006287, "logps/chosen": -0.004768464248627424, "logps/rejected": -1.2390985488891602, "loss": 3.2571, "nll_loss": 0.8140066862106323, "rewards/accuracies": 1.0, "rewards/chosen": -0.00047684641322121024, "rewards/margins": 0.12343300879001617, "rewards/rejected": -0.1239098608493805, "step": 3047 }, { "epoch": 2.107883817427386, "grad_norm": 12.023402214050293, "learning_rate": 4.384508990318119e-05, "log_odds_chosen": 4.712347984313965, "log_odds_ratio": -0.2186054289340973, "logits/chosen": -0.5743224024772644, "logits/rejected": -0.5981598496437073, "logps/chosen": -0.04524346441030502, "logps/rejected": -0.4811258614063263, "loss": 2.2951, "nll_loss": 0.5519220232963562, "rewards/accuracies": 0.875, "rewards/chosen": -0.004524346441030502, "rewards/margins": 0.04358823969960213, "rewards/rejected": -0.04811258241534233, "step": 3048 }, { "epoch": 2.108575380359613, "grad_norm": 6.6201677322387695, "learning_rate": 4.384124788689104e-05, "log_odds_chosen": 8.445602416992188, "log_odds_ratio": -0.00592130608856678, "logits/chosen": -0.5682473182678223, "logits/rejected": -0.5630465149879456, "logps/chosen": -0.0022103991359472275, "logps/rejected": -1.4159787893295288, "loss": 2.651, "nll_loss": 0.6621494889259338, "rewards/accuracies": 1.0, "rewards/chosen": -0.00022103989613242447, "rewards/margins": 0.14137683808803558, "rewards/rejected": -0.14159788191318512, "step": 3049 }, { "epoch": 2.1092669432918396, "grad_norm": 10.28715991973877, "learning_rate": 4.3837405870600895e-05, "log_odds_chosen": 6.215579032897949, "log_odds_ratio": -0.005182541906833649, "logits/chosen": -0.6934223771095276, "logits/rejected": -0.7401363849639893, "logps/chosen": -0.006744416896253824, "logps/rejected": -0.9159350991249084, "loss": 3.3619, "nll_loss": 0.8399689197540283, "rewards/accuracies": 1.0, "rewards/chosen": -0.0006744416896253824, "rewards/margins": 0.09091907739639282, "rewards/rejected": -0.09159351885318756, "step": 3050 }, { "epoch": 2.1099585062240664, "grad_norm": 6.814821243286133, "learning_rate": 4.383356385431074e-05, "log_odds_chosen": 7.128850936889648, "log_odds_ratio": -0.09054242074489594, "logits/chosen": -0.1573888510465622, "logits/rejected": -0.19654060900211334, "logps/chosen": -0.028513900935649872, "logps/rejected": -1.0100467205047607, "loss": 1.8336, "nll_loss": 0.4493419826030731, "rewards/accuracies": 1.0, "rewards/chosen": -0.002851390279829502, "rewards/margins": 0.09815328568220139, "rewards/rejected": -0.10100467503070831, "step": 3051 }, { "epoch": 2.1106500691562933, "grad_norm": 4.588749408721924, "learning_rate": 4.3829721838020594e-05, "log_odds_chosen": 6.3818359375, "log_odds_ratio": -0.09973902255296707, "logits/chosen": -0.4939768314361572, "logits/rejected": -0.46895715594291687, "logps/chosen": -0.0433577224612236, "logps/rejected": -1.110426664352417, "loss": 1.7762, "nll_loss": 0.43407464027404785, "rewards/accuracies": 0.875, "rewards/chosen": -0.0043357727117836475, "rewards/margins": 0.106706902384758, "rewards/rejected": -0.11104267090559006, "step": 3052 }, { "epoch": 2.11134163208852, "grad_norm": 10.691590309143066, "learning_rate": 4.3825879821730446e-05, "log_odds_chosen": 8.708065032958984, "log_odds_ratio": -0.003415337298065424, "logits/chosen": -0.9307934641838074, "logits/rejected": -0.9371286034584045, "logps/chosen": -0.0015783170238137245, "logps/rejected": -1.419217586517334, "loss": 2.508, "nll_loss": 0.6266547441482544, "rewards/accuracies": 1.0, "rewards/chosen": -0.00015783170238137245, "rewards/margins": 0.14176392555236816, "rewards/rejected": -0.1419217586517334, "step": 3053 }, { "epoch": 2.112033195020747, "grad_norm": 14.266855239868164, "learning_rate": 4.38220378054403e-05, "log_odds_chosen": 8.21017837524414, "log_odds_ratio": -0.010857914574444294, "logits/chosen": -0.5948159694671631, "logits/rejected": -0.7014087438583374, "logps/chosen": -0.005287535954266787, "logps/rejected": -1.3881491422653198, "loss": 2.6336, "nll_loss": 0.6573028564453125, "rewards/accuracies": 1.0, "rewards/chosen": -0.000528753618709743, "rewards/margins": 0.13828615844249725, "rewards/rejected": -0.13881491124629974, "step": 3054 }, { "epoch": 2.1127247579529738, "grad_norm": 11.680485725402832, "learning_rate": 4.3818195789150144e-05, "log_odds_chosen": 6.707440376281738, "log_odds_ratio": -0.6255326867103577, "logits/chosen": -0.5511981844902039, "logits/rejected": -0.5761828422546387, "logps/chosen": -0.016642756760120392, "logps/rejected": -1.5115129947662354, "loss": 3.3974, "nll_loss": 0.7867913246154785, "rewards/accuracies": 0.875, "rewards/chosen": -0.0016642757691442966, "rewards/margins": 0.14948701858520508, "rewards/rejected": -0.15115129947662354, "step": 3055 }, { "epoch": 2.1134163208852006, "grad_norm": 7.237359523773193, "learning_rate": 4.381435377286e-05, "log_odds_chosen": 7.038687705993652, "log_odds_ratio": -0.004269938915967941, "logits/chosen": -0.7024118900299072, "logits/rejected": -0.7122198343276978, "logps/chosen": -0.01284304354339838, "logps/rejected": -0.9574891328811646, "loss": 2.6331, "nll_loss": 0.6578375101089478, "rewards/accuracies": 1.0, "rewards/chosen": -0.0012843044241890311, "rewards/margins": 0.09446461498737335, "rewards/rejected": -0.0957489162683487, "step": 3056 }, { "epoch": 2.1141078838174274, "grad_norm": 4.956737041473389, "learning_rate": 4.381051175656985e-05, "log_odds_chosen": 7.141617774963379, "log_odds_ratio": -0.12717103958129883, "logits/chosen": -0.32736361026763916, "logits/rejected": -0.3319385051727295, "logps/chosen": -0.036458250135183334, "logps/rejected": -1.1596159934997559, "loss": 1.3879, "nll_loss": 0.33425214886665344, "rewards/accuracies": 0.875, "rewards/chosen": -0.003645824734121561, "rewards/margins": 0.11231578886508942, "rewards/rejected": -0.11596160382032394, "step": 3057 }, { "epoch": 2.1147994467496543, "grad_norm": 7.007951736450195, "learning_rate": 4.38066697402797e-05, "log_odds_chosen": 6.820135116577148, "log_odds_ratio": -0.06915004551410675, "logits/chosen": -0.6815324425697327, "logits/rejected": -0.7533388733863831, "logps/chosen": -0.02807777002453804, "logps/rejected": -1.120797872543335, "loss": 2.661, "nll_loss": 0.6583237051963806, "rewards/accuracies": 1.0, "rewards/chosen": -0.0028077769093215466, "rewards/margins": 0.10927200317382812, "rewards/rejected": -0.11207978427410126, "step": 3058 }, { "epoch": 2.115491009681881, "grad_norm": 10.030107498168945, "learning_rate": 4.3802827723989554e-05, "log_odds_chosen": 6.217621803283691, "log_odds_ratio": -0.040066175162792206, "logits/chosen": -0.6635290384292603, "logits/rejected": -0.7061796188354492, "logps/chosen": -0.024360649287700653, "logps/rejected": -1.495863914489746, "loss": 3.3484, "nll_loss": 0.8330943584442139, "rewards/accuracies": 1.0, "rewards/chosen": -0.0024360651150345802, "rewards/margins": 0.14715032279491425, "rewards/rejected": -0.14958639442920685, "step": 3059 }, { "epoch": 2.116182572614108, "grad_norm": 8.80070686340332, "learning_rate": 4.37989857076994e-05, "log_odds_chosen": 6.714267730712891, "log_odds_ratio": -0.007191003765910864, "logits/chosen": -0.5821347832679749, "logits/rejected": -0.619955837726593, "logps/chosen": -0.019215121865272522, "logps/rejected": -1.294236660003662, "loss": 2.5476, "nll_loss": 0.636172354221344, "rewards/accuracies": 1.0, "rewards/chosen": -0.001921512302942574, "rewards/margins": 0.12750215828418732, "rewards/rejected": -0.12942366302013397, "step": 3060 }, { "epoch": 2.1168741355463347, "grad_norm": 7.5158514976501465, "learning_rate": 4.379514369140925e-05, "log_odds_chosen": 7.826415538787842, "log_odds_ratio": -0.02342759445309639, "logits/chosen": -0.2783409059047699, "logits/rejected": -0.3393867015838623, "logps/chosen": -0.01329967100173235, "logps/rejected": -2.1055209636688232, "loss": 1.9379, "nll_loss": 0.4821318984031677, "rewards/accuracies": 1.0, "rewards/chosen": -0.0013299670536071062, "rewards/margins": 0.20922213792800903, "rewards/rejected": -0.21055209636688232, "step": 3061 }, { "epoch": 2.1175656984785616, "grad_norm": 4.418914318084717, "learning_rate": 4.3791301675119104e-05, "log_odds_chosen": 6.063641548156738, "log_odds_ratio": -0.015754833817481995, "logits/chosen": -0.22157390415668488, "logits/rejected": -0.30585265159606934, "logps/chosen": -0.058802567422389984, "logps/rejected": -1.6046185493469238, "loss": 2.4442, "nll_loss": 0.6094802618026733, "rewards/accuracies": 1.0, "rewards/chosen": -0.005880257114768028, "rewards/margins": 0.15458157658576965, "rewards/rejected": -0.16046184301376343, "step": 3062 }, { "epoch": 2.1182572614107884, "grad_norm": 9.49417781829834, "learning_rate": 4.378745965882896e-05, "log_odds_chosen": 6.562379360198975, "log_odds_ratio": -0.07851752638816833, "logits/chosen": -0.41256842017173767, "logits/rejected": -0.4614778161048889, "logps/chosen": -0.018696602433919907, "logps/rejected": -1.1838551759719849, "loss": 3.4774, "nll_loss": 0.8615073561668396, "rewards/accuracies": 1.0, "rewards/chosen": -0.0018696601036936045, "rewards/margins": 0.1165158599615097, "rewards/rejected": -0.11838552355766296, "step": 3063 }, { "epoch": 2.1189488243430152, "grad_norm": 6.9558515548706055, "learning_rate": 4.37836176425388e-05, "log_odds_chosen": 7.499703884124756, "log_odds_ratio": -0.004096648655831814, "logits/chosen": -0.224429190158844, "logits/rejected": -0.3123741149902344, "logps/chosen": -0.012242062948644161, "logps/rejected": -1.5422005653381348, "loss": 2.1784, "nll_loss": 0.5441972017288208, "rewards/accuracies": 1.0, "rewards/chosen": -0.0012242061784490943, "rewards/margins": 0.15299585461616516, "rewards/rejected": -0.15422005951404572, "step": 3064 }, { "epoch": 2.119640387275242, "grad_norm": 6.659056186676025, "learning_rate": 4.377977562624866e-05, "log_odds_chosen": 6.294110298156738, "log_odds_ratio": -0.1503944993019104, "logits/chosen": -0.1056843176484108, "logits/rejected": -0.10444828867912292, "logps/chosen": -0.037452977150678635, "logps/rejected": -1.0243463516235352, "loss": 2.2648, "nll_loss": 0.5511507391929626, "rewards/accuracies": 0.875, "rewards/chosen": -0.0037452979013323784, "rewards/margins": 0.09868934750556946, "rewards/rejected": -0.10243464261293411, "step": 3065 }, { "epoch": 2.120331950207469, "grad_norm": 6.566095352172852, "learning_rate": 4.377593360995851e-05, "log_odds_chosen": 4.200222969055176, "log_odds_ratio": -0.04109200835227966, "logits/chosen": -0.42699283361434937, "logits/rejected": -0.46811172366142273, "logps/chosen": -0.04961252957582474, "logps/rejected": -1.147521734237671, "loss": 3.3842, "nll_loss": 0.8419307470321655, "rewards/accuracies": 1.0, "rewards/chosen": -0.004961253143846989, "rewards/margins": 0.10979092121124268, "rewards/rejected": -0.11475217342376709, "step": 3066 }, { "epoch": 2.1210235131396957, "grad_norm": 7.800382137298584, "learning_rate": 4.377209159366836e-05, "log_odds_chosen": 3.6615090370178223, "log_odds_ratio": -0.11077920347452164, "logits/chosen": -0.49535033106803894, "logits/rejected": -0.5360032320022583, "logps/chosen": -0.09559698402881622, "logps/rejected": -1.1526672840118408, "loss": 1.8968, "nll_loss": 0.46311768889427185, "rewards/accuracies": 1.0, "rewards/chosen": -0.009559698402881622, "rewards/margins": 0.10570703446865082, "rewards/rejected": -0.11526672542095184, "step": 3067 }, { "epoch": 2.1217150760719226, "grad_norm": 13.618182182312012, "learning_rate": 4.376824957737821e-05, "log_odds_chosen": 7.400300979614258, "log_odds_ratio": -0.23561367392539978, "logits/chosen": -0.46377870440483093, "logits/rejected": -0.48604080080986023, "logps/chosen": -0.03631383925676346, "logps/rejected": -1.319928526878357, "loss": 3.283, "nll_loss": 0.7972000241279602, "rewards/accuracies": 0.875, "rewards/chosen": -0.003631383879110217, "rewards/margins": 0.12836146354675293, "rewards/rejected": -0.13199284672737122, "step": 3068 }, { "epoch": 2.1224066390041494, "grad_norm": 8.135883331298828, "learning_rate": 4.376440756108806e-05, "log_odds_chosen": 6.368939399719238, "log_odds_ratio": -0.22905480861663818, "logits/chosen": -0.6161007881164551, "logits/rejected": -0.6187179088592529, "logps/chosen": -0.035729553550481796, "logps/rejected": -0.9610961079597473, "loss": 1.7314, "nll_loss": 0.4099564850330353, "rewards/accuracies": 0.875, "rewards/chosen": -0.003572955960407853, "rewards/margins": 0.09253665804862976, "rewards/rejected": -0.09610961377620697, "step": 3069 }, { "epoch": 2.123098201936376, "grad_norm": 9.88579273223877, "learning_rate": 4.376056554479791e-05, "log_odds_chosen": 6.737878799438477, "log_odds_ratio": -0.027707181870937347, "logits/chosen": -0.20080123841762543, "logits/rejected": -0.25162220001220703, "logps/chosen": -0.014487730339169502, "logps/rejected": -1.53798246383667, "loss": 2.5172, "nll_loss": 0.6265227794647217, "rewards/accuracies": 1.0, "rewards/chosen": -0.0014487729640677571, "rewards/margins": 0.15234947204589844, "rewards/rejected": -0.15379825234413147, "step": 3070 }, { "epoch": 2.123789764868603, "grad_norm": 7.084987640380859, "learning_rate": 4.375672352850776e-05, "log_odds_chosen": 6.6261749267578125, "log_odds_ratio": -0.028139235451817513, "logits/chosen": -0.6384437084197998, "logits/rejected": -0.6909653544425964, "logps/chosen": -0.021311424672603607, "logps/rejected": -1.0407835245132446, "loss": 2.2195, "nll_loss": 0.5520578622817993, "rewards/accuracies": 1.0, "rewards/chosen": -0.0021311421878635883, "rewards/margins": 0.10194721817970276, "rewards/rejected": -0.10407835245132446, "step": 3071 }, { "epoch": 2.12448132780083, "grad_norm": 10.54240894317627, "learning_rate": 4.3752881512217615e-05, "log_odds_chosen": 4.758946418762207, "log_odds_ratio": -0.662519633769989, "logits/chosen": -0.6168556809425354, "logits/rejected": -0.7287572622299194, "logps/chosen": -0.06607799232006073, "logps/rejected": -0.7929989099502563, "loss": 2.9218, "nll_loss": 0.6641974449157715, "rewards/accuracies": 0.625, "rewards/chosen": -0.006607798859477043, "rewards/margins": 0.07269209623336792, "rewards/rejected": -0.07929988205432892, "step": 3072 }, { "epoch": 2.1251728907330567, "grad_norm": 12.808792114257812, "learning_rate": 4.374903949592746e-05, "log_odds_chosen": 3.1861891746520996, "log_odds_ratio": -0.6998291015625, "logits/chosen": -0.7169103026390076, "logits/rejected": -0.7344023585319519, "logps/chosen": -0.1427215337753296, "logps/rejected": -0.6728063821792603, "loss": 3.1134, "nll_loss": 0.7083545923233032, "rewards/accuracies": 0.625, "rewards/chosen": -0.014272153377532959, "rewards/margins": 0.05300847813487053, "rewards/rejected": -0.06728063523769379, "step": 3073 }, { "epoch": 2.1258644536652835, "grad_norm": 6.509385585784912, "learning_rate": 4.374519747963732e-05, "log_odds_chosen": 7.967581748962402, "log_odds_ratio": -0.017956677824258804, "logits/chosen": -0.6940062046051025, "logits/rejected": -0.840277910232544, "logps/chosen": -0.06229571998119354, "logps/rejected": -1.6665977239608765, "loss": 2.5872, "nll_loss": 0.645008385181427, "rewards/accuracies": 1.0, "rewards/chosen": -0.0062295724637806416, "rewards/margins": 0.1604302078485489, "rewards/rejected": -0.16665977239608765, "step": 3074 }, { "epoch": 2.1265560165975104, "grad_norm": 7.396805763244629, "learning_rate": 4.3741355463347166e-05, "log_odds_chosen": 6.3950324058532715, "log_odds_ratio": -0.05477265268564224, "logits/chosen": -0.3604324162006378, "logits/rejected": -0.4033384323120117, "logps/chosen": -0.018422357738018036, "logps/rejected": -0.9343924522399902, "loss": 2.7091, "nll_loss": 0.671809196472168, "rewards/accuracies": 1.0, "rewards/chosen": -0.0018422356806695461, "rewards/margins": 0.09159701317548752, "rewards/rejected": -0.09343923628330231, "step": 3075 }, { "epoch": 2.127247579529737, "grad_norm": 7.560407638549805, "learning_rate": 4.373751344705702e-05, "log_odds_chosen": 7.058584690093994, "log_odds_ratio": -0.06325285881757736, "logits/chosen": -0.8072975277900696, "logits/rejected": -0.8635538816452026, "logps/chosen": -0.024707302451133728, "logps/rejected": -1.4035589694976807, "loss": 2.4432, "nll_loss": 0.6044746041297913, "rewards/accuracies": 1.0, "rewards/chosen": -0.0024707301054149866, "rewards/margins": 0.1378851681947708, "rewards/rejected": -0.1403558999300003, "step": 3076 }, { "epoch": 2.127939142461964, "grad_norm": 5.632557392120361, "learning_rate": 4.373367143076687e-05, "log_odds_chosen": 6.698379993438721, "log_odds_ratio": -0.05778937041759491, "logits/chosen": -0.690493106842041, "logits/rejected": -0.6445252895355225, "logps/chosen": -0.016724741086363792, "logps/rejected": -1.3040034770965576, "loss": 2.5376, "nll_loss": 0.6286225318908691, "rewards/accuracies": 1.0, "rewards/chosen": -0.0016724740853533149, "rewards/margins": 0.12872786819934845, "rewards/rejected": -0.13040034472942352, "step": 3077 }, { "epoch": 2.128630705394191, "grad_norm": 68.70565795898438, "learning_rate": 4.3729829414476716e-05, "log_odds_chosen": 5.167508602142334, "log_odds_ratio": -0.539081871509552, "logits/chosen": -0.37192028760910034, "logits/rejected": -0.38595011830329895, "logps/chosen": -0.0928657129406929, "logps/rejected": -1.187703251838684, "loss": 2.6988, "nll_loss": 0.6207873821258545, "rewards/accuracies": 0.875, "rewards/chosen": -0.00928657315671444, "rewards/margins": 0.1094837561249733, "rewards/rejected": -0.11877032369375229, "step": 3078 }, { "epoch": 2.1293222683264177, "grad_norm": 4.5042219161987305, "learning_rate": 4.372598739818657e-05, "log_odds_chosen": 6.700753211975098, "log_odds_ratio": -0.0060334340669214725, "logits/chosen": -0.6424789428710938, "logits/rejected": -0.6247678995132446, "logps/chosen": -0.009268735535442829, "logps/rejected": -0.9170104265213013, "loss": 1.6578, "nll_loss": 0.41384565830230713, "rewards/accuracies": 1.0, "rewards/chosen": -0.0009268735884688795, "rewards/margins": 0.09077417105436325, "rewards/rejected": -0.09170104563236237, "step": 3079 }, { "epoch": 2.1300138312586445, "grad_norm": 8.544678688049316, "learning_rate": 4.372214538189642e-05, "log_odds_chosen": 6.743369102478027, "log_odds_ratio": -0.025003895163536072, "logits/chosen": -0.7757008075714111, "logits/rejected": -0.8247724771499634, "logps/chosen": -0.027958286926150322, "logps/rejected": -0.9012587070465088, "loss": 3.0383, "nll_loss": 0.757074236869812, "rewards/accuracies": 1.0, "rewards/chosen": -0.002795828739181161, "rewards/margins": 0.08733003586530685, "rewards/rejected": -0.09012586623430252, "step": 3080 }, { "epoch": 2.1307053941908713, "grad_norm": 7.25593376159668, "learning_rate": 4.3718303365606274e-05, "log_odds_chosen": 6.274052619934082, "log_odds_ratio": -0.05131087079644203, "logits/chosen": -0.46268534660339355, "logits/rejected": -0.5040990114212036, "logps/chosen": -0.020884795114398003, "logps/rejected": -1.282250165939331, "loss": 2.0169, "nll_loss": 0.49910321831703186, "rewards/accuracies": 1.0, "rewards/chosen": -0.0020884794648736715, "rewards/margins": 0.12613654136657715, "rewards/rejected": -0.12822501361370087, "step": 3081 }, { "epoch": 2.131396957123098, "grad_norm": 8.337603569030762, "learning_rate": 4.371446134931612e-05, "log_odds_chosen": 8.045146942138672, "log_odds_ratio": -0.008465875871479511, "logits/chosen": -0.40172910690307617, "logits/rejected": -0.48722216486930847, "logps/chosen": -0.0039009880274534225, "logps/rejected": -1.5526306629180908, "loss": 2.1899, "nll_loss": 0.5466317534446716, "rewards/accuracies": 1.0, "rewards/chosen": -0.00039009880856610835, "rewards/margins": 0.15487296879291534, "rewards/rejected": -0.15526306629180908, "step": 3082 }, { "epoch": 2.132088520055325, "grad_norm": 14.335528373718262, "learning_rate": 4.371061933302598e-05, "log_odds_chosen": 7.333463668823242, "log_odds_ratio": -0.25078287720680237, "logits/chosen": -0.5369386076927185, "logits/rejected": -0.5791423916816711, "logps/chosen": -0.04182959720492363, "logps/rejected": -1.1157071590423584, "loss": 3.0666, "nll_loss": 0.7415834069252014, "rewards/accuracies": 0.875, "rewards/chosen": -0.004182959441095591, "rewards/margins": 0.10738775134086609, "rewards/rejected": -0.11157071590423584, "step": 3083 }, { "epoch": 2.132780082987552, "grad_norm": 8.54443359375, "learning_rate": 4.3706777316735824e-05, "log_odds_chosen": 7.05941104888916, "log_odds_ratio": -0.0036911824718117714, "logits/chosen": -1.0558934211730957, "logits/rejected": -1.1036031246185303, "logps/chosen": -0.003986523952335119, "logps/rejected": -1.0695551633834839, "loss": 3.1301, "nll_loss": 0.7821628451347351, "rewards/accuracies": 1.0, "rewards/chosen": -0.0003986524243373424, "rewards/margins": 0.10655686259269714, "rewards/rejected": -0.10695552080869675, "step": 3084 }, { "epoch": 2.1334716459197787, "grad_norm": 10.675700187683105, "learning_rate": 4.370293530044568e-05, "log_odds_chosen": 7.191103935241699, "log_odds_ratio": -0.05402178317308426, "logits/chosen": -0.6177210211753845, "logits/rejected": -0.7148678302764893, "logps/chosen": -0.01665673218667507, "logps/rejected": -1.175557255744934, "loss": 2.7801, "nll_loss": 0.6896116733551025, "rewards/accuracies": 1.0, "rewards/chosen": -0.0016656732186675072, "rewards/margins": 0.11589005589485168, "rewards/rejected": -0.11755572259426117, "step": 3085 }, { "epoch": 2.1341632088520055, "grad_norm": 12.264751434326172, "learning_rate": 4.369909328415553e-05, "log_odds_chosen": 7.793338298797607, "log_odds_ratio": -0.003364799777045846, "logits/chosen": -0.5212658643722534, "logits/rejected": -0.6233722567558289, "logps/chosen": -0.00235918746329844, "logps/rejected": -1.2549175024032593, "loss": 3.3469, "nll_loss": 0.8363955616950989, "rewards/accuracies": 1.0, "rewards/chosen": -0.0002359187783440575, "rewards/margins": 0.12525583803653717, "rewards/rejected": -0.12549175322055817, "step": 3086 }, { "epoch": 2.1348547717842323, "grad_norm": 5.636877059936523, "learning_rate": 4.3695251267865375e-05, "log_odds_chosen": 5.239736080169678, "log_odds_ratio": -0.030729567632079124, "logits/chosen": -0.47833728790283203, "logits/rejected": -0.5737687945365906, "logps/chosen": -0.050199344754219055, "logps/rejected": -1.2572249174118042, "loss": 2.0981, "nll_loss": 0.5214625597000122, "rewards/accuracies": 1.0, "rewards/chosen": -0.005019934847950935, "rewards/margins": 0.12070255726575851, "rewards/rejected": -0.1257224977016449, "step": 3087 }, { "epoch": 2.135546334716459, "grad_norm": 8.218557357788086, "learning_rate": 4.369140925157523e-05, "log_odds_chosen": 6.637486457824707, "log_odds_ratio": -0.033020954579114914, "logits/chosen": -0.6227655410766602, "logits/rejected": -0.6597899794578552, "logps/chosen": -0.019715534523129463, "logps/rejected": -1.0135293006896973, "loss": 3.0103, "nll_loss": 0.7492808103561401, "rewards/accuracies": 1.0, "rewards/chosen": -0.00197155331261456, "rewards/margins": 0.09938137233257294, "rewards/rejected": -0.10135292261838913, "step": 3088 }, { "epoch": 2.136237897648686, "grad_norm": 10.687223434448242, "learning_rate": 4.368756723528508e-05, "log_odds_chosen": 5.657548904418945, "log_odds_ratio": -0.20293311774730682, "logits/chosen": -0.6749211549758911, "logits/rejected": -0.7769599556922913, "logps/chosen": -0.03227122128009796, "logps/rejected": -1.0073554515838623, "loss": 3.2338, "nll_loss": 0.788148045539856, "rewards/accuracies": 0.875, "rewards/chosen": -0.003227122128009796, "rewards/margins": 0.09750843048095703, "rewards/rejected": -0.10073556005954742, "step": 3089 }, { "epoch": 2.136929460580913, "grad_norm": 9.447967529296875, "learning_rate": 4.368372521899493e-05, "log_odds_chosen": 5.053914546966553, "log_odds_ratio": -0.16671393811702728, "logits/chosen": -0.41087472438812256, "logits/rejected": -0.4866830110549927, "logps/chosen": -0.055432647466659546, "logps/rejected": -1.1628947257995605, "loss": 2.8171, "nll_loss": 0.687608540058136, "rewards/accuracies": 0.875, "rewards/chosen": -0.00554326456040144, "rewards/margins": 0.11074619740247726, "rewards/rejected": -0.11628946661949158, "step": 3090 }, { "epoch": 2.1376210235131397, "grad_norm": 6.8454203605651855, "learning_rate": 4.367988320270478e-05, "log_odds_chosen": 7.090332508087158, "log_odds_ratio": -0.013906768523156643, "logits/chosen": -0.6725280284881592, "logits/rejected": -0.7864277362823486, "logps/chosen": -0.012035176157951355, "logps/rejected": -0.9701966047286987, "loss": 2.2742, "nll_loss": 0.5671637058258057, "rewards/accuracies": 1.0, "rewards/chosen": -0.0012035175459459424, "rewards/margins": 0.09581615030765533, "rewards/rejected": -0.09701967239379883, "step": 3091 }, { "epoch": 2.1383125864453665, "grad_norm": 5.8847975730896, "learning_rate": 4.367604118641464e-05, "log_odds_chosen": 5.034954071044922, "log_odds_ratio": -0.1259640008211136, "logits/chosen": -0.524029552936554, "logits/rejected": -0.5001509189605713, "logps/chosen": -0.10554247349500656, "logps/rejected": -0.838758647441864, "loss": 2.4572, "nll_loss": 0.6016969680786133, "rewards/accuracies": 1.0, "rewards/chosen": -0.010554247535765171, "rewards/margins": 0.0733216181397438, "rewards/rejected": -0.0838758647441864, "step": 3092 }, { "epoch": 2.1390041493775933, "grad_norm": 4.628413677215576, "learning_rate": 4.367219917012448e-05, "log_odds_chosen": 4.764793395996094, "log_odds_ratio": -0.023185372352600098, "logits/chosen": -0.4627071022987366, "logits/rejected": -0.5550016760826111, "logps/chosen": -0.03563341498374939, "logps/rejected": -0.6548776626586914, "loss": 2.7578, "nll_loss": 0.6871291399002075, "rewards/accuracies": 1.0, "rewards/chosen": -0.0035633419174700975, "rewards/margins": 0.061924420297145844, "rewards/rejected": -0.06548776477575302, "step": 3093 }, { "epoch": 2.13969571230982, "grad_norm": 10.076739311218262, "learning_rate": 4.3668357153834335e-05, "log_odds_chosen": 5.545009613037109, "log_odds_ratio": -0.26811662316322327, "logits/chosen": -0.7443239688873291, "logits/rejected": -0.7627904415130615, "logps/chosen": -0.041051093488931656, "logps/rejected": -1.1456853151321411, "loss": 4.2332, "nll_loss": 1.031481146812439, "rewards/accuracies": 0.875, "rewards/chosen": -0.0041051097214221954, "rewards/margins": 0.11046342551708221, "rewards/rejected": -0.11456853896379471, "step": 3094 }, { "epoch": 2.140387275242047, "grad_norm": 8.096652030944824, "learning_rate": 4.366451513754419e-05, "log_odds_chosen": 8.124271392822266, "log_odds_ratio": -0.012365620583295822, "logits/chosen": -0.7031798958778381, "logits/rejected": -0.833611011505127, "logps/chosen": -0.0064608375541865826, "logps/rejected": -1.580578327178955, "loss": 3.3684, "nll_loss": 0.8408694267272949, "rewards/accuracies": 1.0, "rewards/chosen": -0.0006460837321355939, "rewards/margins": 0.15741175413131714, "rewards/rejected": -0.15805783867835999, "step": 3095 }, { "epoch": 2.141078838174274, "grad_norm": 8.342835426330566, "learning_rate": 4.366067312125403e-05, "log_odds_chosen": 4.668115139007568, "log_odds_ratio": -0.02511441893875599, "logits/chosen": -0.5143285393714905, "logits/rejected": -0.5451970100402832, "logps/chosen": -0.03286014497280121, "logps/rejected": -0.9364689588546753, "loss": 2.5867, "nll_loss": 0.6441757678985596, "rewards/accuracies": 1.0, "rewards/chosen": -0.0032860147766768932, "rewards/margins": 0.09036087989807129, "rewards/rejected": -0.09364689141511917, "step": 3096 }, { "epoch": 2.1417704011065006, "grad_norm": 8.438102722167969, "learning_rate": 4.3656831104963886e-05, "log_odds_chosen": 5.5746002197265625, "log_odds_ratio": -0.021498534828424454, "logits/chosen": -0.3380069136619568, "logits/rejected": -0.358273983001709, "logps/chosen": -0.020813195034861565, "logps/rejected": -0.9423534274101257, "loss": 2.385, "nll_loss": 0.5941092371940613, "rewards/accuracies": 1.0, "rewards/chosen": -0.0020813194569200277, "rewards/margins": 0.09215402603149414, "rewards/rejected": -0.09423534572124481, "step": 3097 }, { "epoch": 2.1424619640387275, "grad_norm": 5.3521037101745605, "learning_rate": 4.365298908867374e-05, "log_odds_chosen": 6.82499885559082, "log_odds_ratio": -0.004994858056306839, "logits/chosen": -0.49015307426452637, "logits/rejected": -0.530591607093811, "logps/chosen": -0.0181460902094841, "logps/rejected": -1.093785047531128, "loss": 2.2152, "nll_loss": 0.5533087253570557, "rewards/accuracies": 1.0, "rewards/chosen": -0.0018146091606467962, "rewards/margins": 0.10756390541791916, "rewards/rejected": -0.10937851667404175, "step": 3098 }, { "epoch": 2.1431535269709543, "grad_norm": 5.270591735839844, "learning_rate": 4.364914707238359e-05, "log_odds_chosen": 6.464059829711914, "log_odds_ratio": -0.039618462324142456, "logits/chosen": -0.5720619559288025, "logits/rejected": -0.5675368309020996, "logps/chosen": -0.027832329273223877, "logps/rejected": -1.39834725856781, "loss": 2.5972, "nll_loss": 0.6453447937965393, "rewards/accuracies": 1.0, "rewards/chosen": -0.002783233067020774, "rewards/margins": 0.13705149292945862, "rewards/rejected": -0.13983473181724548, "step": 3099 }, { "epoch": 2.143845089903181, "grad_norm": 9.967931747436523, "learning_rate": 4.3645305056093436e-05, "log_odds_chosen": 5.426502704620361, "log_odds_ratio": -0.5206568241119385, "logits/chosen": -0.6620529890060425, "logits/rejected": -0.6964467763900757, "logps/chosen": -0.03018881566822529, "logps/rejected": -0.8304793238639832, "loss": 3.2589, "nll_loss": 0.7626627683639526, "rewards/accuracies": 0.875, "rewards/chosen": -0.0030188814271241426, "rewards/margins": 0.08002904802560806, "rewards/rejected": -0.08304793387651443, "step": 3100 }, { "epoch": 2.144536652835408, "grad_norm": 5.0591020584106445, "learning_rate": 4.3641463039803296e-05, "log_odds_chosen": 6.7463483810424805, "log_odds_ratio": -0.010486850515007973, "logits/chosen": -0.3661188781261444, "logits/rejected": -0.32782259583473206, "logps/chosen": -0.02775876224040985, "logps/rejected": -1.2433809041976929, "loss": 1.8088, "nll_loss": 0.4511429965496063, "rewards/accuracies": 1.0, "rewards/chosen": -0.002775876084342599, "rewards/margins": 0.12156221270561218, "rewards/rejected": -0.12433809041976929, "step": 3101 }, { "epoch": 2.145228215767635, "grad_norm": 6.19116735458374, "learning_rate": 4.363762102351314e-05, "log_odds_chosen": 6.955341815948486, "log_odds_ratio": -0.01190902478992939, "logits/chosen": -0.6434040069580078, "logits/rejected": -0.6938708424568176, "logps/chosen": -0.016568060964345932, "logps/rejected": -1.0684852600097656, "loss": 2.4648, "nll_loss": 0.6150209903717041, "rewards/accuracies": 1.0, "rewards/chosen": -0.0016568060964345932, "rewards/margins": 0.10519173741340637, "rewards/rejected": -0.10684853047132492, "step": 3102 }, { "epoch": 2.1459197786998616, "grad_norm": 6.833184242248535, "learning_rate": 4.3633779007222994e-05, "log_odds_chosen": 5.31828498840332, "log_odds_ratio": -0.09289534389972687, "logits/chosen": -0.6373336315155029, "logits/rejected": -0.6737222075462341, "logps/chosen": -0.03717003017663956, "logps/rejected": -0.8076385855674744, "loss": 2.3527, "nll_loss": 0.5788832306861877, "rewards/accuracies": 1.0, "rewards/chosen": -0.0037170026917010546, "rewards/margins": 0.07704685628414154, "rewards/rejected": -0.08076386153697968, "step": 3103 }, { "epoch": 2.1466113416320884, "grad_norm": 5.522740364074707, "learning_rate": 4.3629936990932846e-05, "log_odds_chosen": 8.604452133178711, "log_odds_ratio": -0.0005866825231350958, "logits/chosen": -0.1628008484840393, "logits/rejected": -0.1629152148962021, "logps/chosen": -0.0003812982467934489, "logps/rejected": -0.9394527673721313, "loss": 2.0881, "nll_loss": 0.5219618678092957, "rewards/accuracies": 1.0, "rewards/chosen": -3.812982322415337e-05, "rewards/margins": 0.09390714764595032, "rewards/rejected": -0.09394527971744537, "step": 3104 }, { "epoch": 2.1473029045643153, "grad_norm": 8.878650665283203, "learning_rate": 4.362609497464269e-05, "log_odds_chosen": 7.132782459259033, "log_odds_ratio": -0.0020438514184206724, "logits/chosen": -0.4118482768535614, "logits/rejected": -0.41403189301490784, "logps/chosen": -0.0040047611109912395, "logps/rejected": -0.7762624025344849, "loss": 2.7574, "nll_loss": 0.6891574859619141, "rewards/accuracies": 1.0, "rewards/chosen": -0.00040047609945759177, "rewards/margins": 0.07722576707601547, "rewards/rejected": -0.07762623578310013, "step": 3105 }, { "epoch": 2.147994467496542, "grad_norm": 7.991227626800537, "learning_rate": 4.3622252958352544e-05, "log_odds_chosen": 7.156825065612793, "log_odds_ratio": -0.030668115243315697, "logits/chosen": -0.40880826115608215, "logits/rejected": -0.4779745936393738, "logps/chosen": -0.01195601373910904, "logps/rejected": -1.2787102460861206, "loss": 2.6504, "nll_loss": 0.6595234274864197, "rewards/accuracies": 1.0, "rewards/chosen": -0.0011956014204770327, "rewards/margins": 0.12667542695999146, "rewards/rejected": -0.12787103652954102, "step": 3106 }, { "epoch": 2.148686030428769, "grad_norm": 6.472316265106201, "learning_rate": 4.3618410942062397e-05, "log_odds_chosen": 7.069546699523926, "log_odds_ratio": -0.16113081574440002, "logits/chosen": -0.6364884972572327, "logits/rejected": -0.6532797813415527, "logps/chosen": -0.029024068266153336, "logps/rejected": -1.284508228302002, "loss": 2.3296, "nll_loss": 0.5662857890129089, "rewards/accuracies": 0.875, "rewards/chosen": -0.002902406733483076, "rewards/margins": 0.12554842233657837, "rewards/rejected": -0.12845084071159363, "step": 3107 }, { "epoch": 2.1493775933609958, "grad_norm": 10.088801383972168, "learning_rate": 4.361456892577225e-05, "log_odds_chosen": 6.0846967697143555, "log_odds_ratio": -0.06640740483999252, "logits/chosen": -0.5729522705078125, "logits/rejected": -0.6283227205276489, "logps/chosen": -0.029298869892954826, "logps/rejected": -1.473647117614746, "loss": 2.8737, "nll_loss": 0.7117915153503418, "rewards/accuracies": 1.0, "rewards/chosen": -0.002929887268692255, "rewards/margins": 0.1444348245859146, "rewards/rejected": -0.14736472070217133, "step": 3108 }, { "epoch": 2.1500691562932226, "grad_norm": 9.722908973693848, "learning_rate": 4.3610726909482095e-05, "log_odds_chosen": 5.92418098449707, "log_odds_ratio": -0.29958927631378174, "logits/chosen": -0.45902007818222046, "logits/rejected": -0.5431629419326782, "logps/chosen": -0.03596596419811249, "logps/rejected": -1.0880738496780396, "loss": 2.997, "nll_loss": 0.7192915678024292, "rewards/accuracies": 0.875, "rewards/chosen": -0.003596596186980605, "rewards/margins": 0.1052107885479927, "rewards/rejected": -0.10880738496780396, "step": 3109 }, { "epoch": 2.1507607192254494, "grad_norm": 5.922492980957031, "learning_rate": 4.3606884893191954e-05, "log_odds_chosen": 6.920762538909912, "log_odds_ratio": -0.004645219072699547, "logits/chosen": -0.6970812082290649, "logits/rejected": -0.7362354397773743, "logps/chosen": -0.01976136490702629, "logps/rejected": -1.1406362056732178, "loss": 3.0133, "nll_loss": 0.7528709173202515, "rewards/accuracies": 1.0, "rewards/chosen": -0.0019761365838348866, "rewards/margins": 0.11208748817443848, "rewards/rejected": -0.11406362056732178, "step": 3110 }, { "epoch": 2.1514522821576763, "grad_norm": 5.159032821655273, "learning_rate": 4.36030428769018e-05, "log_odds_chosen": 3.858637809753418, "log_odds_ratio": -0.22229480743408203, "logits/chosen": -0.6306631565093994, "logits/rejected": -0.6449373960494995, "logps/chosen": -0.10265744477510452, "logps/rejected": -1.142212986946106, "loss": 2.3788, "nll_loss": 0.5724680423736572, "rewards/accuracies": 0.875, "rewards/chosen": -0.010265744291245937, "rewards/margins": 0.10395555198192596, "rewards/rejected": -0.11422129720449448, "step": 3111 }, { "epoch": 2.152143845089903, "grad_norm": 7.787853717803955, "learning_rate": 4.359920086061165e-05, "log_odds_chosen": 6.308670997619629, "log_odds_ratio": -0.008247793652117252, "logits/chosen": -0.37978595495224, "logits/rejected": -0.432533860206604, "logps/chosen": -0.012554554268717766, "logps/rejected": -1.0429201126098633, "loss": 2.0206, "nll_loss": 0.5043319463729858, "rewards/accuracies": 1.0, "rewards/chosen": -0.0012554554268717766, "rewards/margins": 0.1030365526676178, "rewards/rejected": -0.10429200530052185, "step": 3112 }, { "epoch": 2.15283540802213, "grad_norm": 8.554698944091797, "learning_rate": 4.3595358844321504e-05, "log_odds_chosen": 6.984411716461182, "log_odds_ratio": -0.005794777534902096, "logits/chosen": -0.4820101261138916, "logits/rejected": -0.6359446048736572, "logps/chosen": -0.013413993641734123, "logps/rejected": -1.2459449768066406, "loss": 2.9193, "nll_loss": 0.7292553186416626, "rewards/accuracies": 1.0, "rewards/chosen": -0.0013413995038717985, "rewards/margins": 0.12325310707092285, "rewards/rejected": -0.12459450215101242, "step": 3113 }, { "epoch": 2.1535269709543567, "grad_norm": 12.569887161254883, "learning_rate": 4.359151682803135e-05, "log_odds_chosen": 6.896792411804199, "log_odds_ratio": -0.009464538656175137, "logits/chosen": -0.2823646068572998, "logits/rejected": -0.3563285768032074, "logps/chosen": -0.028261443600058556, "logps/rejected": -1.547232985496521, "loss": 2.4172, "nll_loss": 0.6033486723899841, "rewards/accuracies": 1.0, "rewards/chosen": -0.0028261442203074694, "rewards/margins": 0.15189716219902039, "rewards/rejected": -0.15472330152988434, "step": 3114 }, { "epoch": 2.1542185338865836, "grad_norm": 12.023890495300293, "learning_rate": 4.35876748117412e-05, "log_odds_chosen": 7.172085285186768, "log_odds_ratio": -0.012960941530764103, "logits/chosen": -0.6027337312698364, "logits/rejected": -0.6425750851631165, "logps/chosen": -0.01669890806078911, "logps/rejected": -1.2620288133621216, "loss": 3.2111, "nll_loss": 0.8014745712280273, "rewards/accuracies": 1.0, "rewards/chosen": -0.0016698910621926188, "rewards/margins": 0.12453299760818481, "rewards/rejected": -0.12620288133621216, "step": 3115 }, { "epoch": 2.1549100968188104, "grad_norm": 12.55388069152832, "learning_rate": 4.3583832795451055e-05, "log_odds_chosen": 7.494009971618652, "log_odds_ratio": -0.02409595623612404, "logits/chosen": -0.3516850769519806, "logits/rejected": -0.4178076386451721, "logps/chosen": -0.009373624809086323, "logps/rejected": -1.325321078300476, "loss": 2.9038, "nll_loss": 0.7235294580459595, "rewards/accuracies": 1.0, "rewards/chosen": -0.0009373623761348426, "rewards/margins": 0.13159474730491638, "rewards/rejected": -0.13253211975097656, "step": 3116 }, { "epoch": 2.1556016597510372, "grad_norm": 6.692192554473877, "learning_rate": 4.357999077916091e-05, "log_odds_chosen": 8.052202224731445, "log_odds_ratio": -0.08095759153366089, "logits/chosen": -0.4351692795753479, "logits/rejected": -0.5287215113639832, "logps/chosen": -0.036751508712768555, "logps/rejected": -1.66717529296875, "loss": 2.1873, "nll_loss": 0.5387200117111206, "rewards/accuracies": 1.0, "rewards/chosen": -0.003675150917842984, "rewards/margins": 0.16304238140583038, "rewards/rejected": -0.166717529296875, "step": 3117 }, { "epoch": 2.156293222683264, "grad_norm": 5.936384677886963, "learning_rate": 4.357614876287075e-05, "log_odds_chosen": 6.1470746994018555, "log_odds_ratio": -0.04051423817873001, "logits/chosen": -0.3913887143135071, "logits/rejected": -0.4102795720100403, "logps/chosen": -0.033366814255714417, "logps/rejected": -1.026632308959961, "loss": 2.2855, "nll_loss": 0.5673317909240723, "rewards/accuracies": 1.0, "rewards/chosen": -0.003336681518703699, "rewards/margins": 0.09932655096054077, "rewards/rejected": -0.10266323387622833, "step": 3118 }, { "epoch": 2.156984785615491, "grad_norm": 11.439972877502441, "learning_rate": 4.357230674658061e-05, "log_odds_chosen": 7.716203689575195, "log_odds_ratio": -0.002041627187281847, "logits/chosen": -0.4915839433670044, "logits/rejected": -0.5531714558601379, "logps/chosen": -0.0034083856735378504, "logps/rejected": -1.5144751071929932, "loss": 2.4189, "nll_loss": 0.6045123934745789, "rewards/accuracies": 1.0, "rewards/chosen": -0.00034083856735378504, "rewards/margins": 0.15110668540000916, "rewards/rejected": -0.15144753456115723, "step": 3119 }, { "epoch": 2.1576763485477177, "grad_norm": 7.074250221252441, "learning_rate": 4.356846473029046e-05, "log_odds_chosen": 4.405117034912109, "log_odds_ratio": -0.1953820288181305, "logits/chosen": -0.43208175897598267, "logits/rejected": -0.4175960123538971, "logps/chosen": -0.13776032626628876, "logps/rejected": -1.0861680507659912, "loss": 1.9592, "nll_loss": 0.4702651798725128, "rewards/accuracies": 0.875, "rewards/chosen": -0.013776032254099846, "rewards/margins": 0.09484077990055084, "rewards/rejected": -0.10861679911613464, "step": 3120 }, { "epoch": 2.1583679114799446, "grad_norm": 5.285325527191162, "learning_rate": 4.356462271400031e-05, "log_odds_chosen": 7.057203769683838, "log_odds_ratio": -0.005945372395217419, "logits/chosen": -0.3348833918571472, "logits/rejected": -0.41188618540763855, "logps/chosen": -0.012273477390408516, "logps/rejected": -1.1557308435440063, "loss": 2.3991, "nll_loss": 0.599189817905426, "rewards/accuracies": 1.0, "rewards/chosen": -0.0012273476459085941, "rewards/margins": 0.1143457442522049, "rewards/rejected": -0.11557309329509735, "step": 3121 }, { "epoch": 2.1590594744121714, "grad_norm": 7.700890064239502, "learning_rate": 4.356078069771016e-05, "log_odds_chosen": 6.288533687591553, "log_odds_ratio": -0.031821660697460175, "logits/chosen": -0.9171697497367859, "logits/rejected": -0.9709770679473877, "logps/chosen": -0.018146559596061707, "logps/rejected": -1.7577550411224365, "loss": 2.243, "nll_loss": 0.5575743913650513, "rewards/accuracies": 1.0, "rewards/chosen": -0.0018146559596061707, "rewards/margins": 0.1739608645439148, "rewards/rejected": -0.17577551305294037, "step": 3122 }, { "epoch": 2.159751037344398, "grad_norm": 8.697759628295898, "learning_rate": 4.3556938681420015e-05, "log_odds_chosen": 8.340778350830078, "log_odds_ratio": -0.003053261898458004, "logits/chosen": -0.5757359266281128, "logits/rejected": -0.6766760349273682, "logps/chosen": -0.013043480925261974, "logps/rejected": -1.9186817407608032, "loss": 2.648, "nll_loss": 0.6617018580436707, "rewards/accuracies": 1.0, "rewards/chosen": -0.0013043482322245836, "rewards/margins": 0.19056381285190582, "rewards/rejected": -0.19186817109584808, "step": 3123 }, { "epoch": 2.160442600276625, "grad_norm": 6.629286289215088, "learning_rate": 4.355309666512986e-05, "log_odds_chosen": 7.6335368156433105, "log_odds_ratio": -0.11332618445158005, "logits/chosen": -0.316942036151886, "logits/rejected": -0.31863781809806824, "logps/chosen": -0.021302711218595505, "logps/rejected": -0.7485167980194092, "loss": 2.1456, "nll_loss": 0.5250740647315979, "rewards/accuracies": 0.875, "rewards/chosen": -0.002130271168425679, "rewards/margins": 0.07272141426801682, "rewards/rejected": -0.07485168427228928, "step": 3124 }, { "epoch": 2.161134163208852, "grad_norm": 4.9336323738098145, "learning_rate": 4.3549254648839713e-05, "log_odds_chosen": 5.858908176422119, "log_odds_ratio": -0.13920627534389496, "logits/chosen": -0.09984740614891052, "logits/rejected": -0.13050922751426697, "logps/chosen": -0.1269267499446869, "logps/rejected": -1.3792228698730469, "loss": 2.5293, "nll_loss": 0.6184045672416687, "rewards/accuracies": 0.875, "rewards/chosen": -0.012692674063146114, "rewards/margins": 0.125229611992836, "rewards/rejected": -0.1379222869873047, "step": 3125 }, { "epoch": 2.1618257261410787, "grad_norm": 7.739724636077881, "learning_rate": 4.3545412632549566e-05, "log_odds_chosen": 6.311422348022461, "log_odds_ratio": -0.011560730636119843, "logits/chosen": -0.6573699116706848, "logits/rejected": -0.6942727565765381, "logps/chosen": -0.027487125247716904, "logps/rejected": -1.0137187242507935, "loss": 3.0823, "nll_loss": 0.7694215178489685, "rewards/accuracies": 1.0, "rewards/chosen": -0.00274871289730072, "rewards/margins": 0.09862315654754639, "rewards/rejected": -0.1013718843460083, "step": 3126 }, { "epoch": 2.1625172890733055, "grad_norm": 5.675347328186035, "learning_rate": 4.354157061625941e-05, "log_odds_chosen": 6.896705627441406, "log_odds_ratio": -0.012512540444731712, "logits/chosen": -0.49224478006362915, "logits/rejected": -0.6597970128059387, "logps/chosen": -0.014489945955574512, "logps/rejected": -1.1900012493133545, "loss": 1.9826, "nll_loss": 0.4943990111351013, "rewards/accuracies": 1.0, "rewards/chosen": -0.0014489946188405156, "rewards/margins": 0.11755111813545227, "rewards/rejected": -0.11900011450052261, "step": 3127 }, { "epoch": 2.1632088520055324, "grad_norm": 7.413966655731201, "learning_rate": 4.353772859996927e-05, "log_odds_chosen": 6.049656867980957, "log_odds_ratio": -0.06955083459615707, "logits/chosen": -0.37585240602493286, "logits/rejected": -0.3701566457748413, "logps/chosen": -0.034673117101192474, "logps/rejected": -1.2663806676864624, "loss": 2.9125, "nll_loss": 0.721172571182251, "rewards/accuracies": 1.0, "rewards/chosen": -0.00346731161698699, "rewards/margins": 0.12317074835300446, "rewards/rejected": -0.12663805484771729, "step": 3128 }, { "epoch": 2.163900414937759, "grad_norm": 4.2426042556762695, "learning_rate": 4.3533886583679116e-05, "log_odds_chosen": 6.2769880294799805, "log_odds_ratio": -0.04037241265177727, "logits/chosen": -0.38149911165237427, "logits/rejected": -0.46072056889533997, "logps/chosen": -0.030505575239658356, "logps/rejected": -1.0659232139587402, "loss": 2.2097, "nll_loss": 0.5483903884887695, "rewards/accuracies": 1.0, "rewards/chosen": -0.003050557803362608, "rewards/margins": 0.10354176163673401, "rewards/rejected": -0.1065923273563385, "step": 3129 }, { "epoch": 2.164591977869986, "grad_norm": 5.698359489440918, "learning_rate": 4.353004456738897e-05, "log_odds_chosen": 6.9791436195373535, "log_odds_ratio": -0.0023404499515891075, "logits/chosen": -0.4339294731616974, "logits/rejected": -0.439035564661026, "logps/chosen": -0.007990580052137375, "logps/rejected": -1.0161094665527344, "loss": 2.305, "nll_loss": 0.5760195255279541, "rewards/accuracies": 1.0, "rewards/chosen": -0.0007990580052137375, "rewards/margins": 0.1008118987083435, "rewards/rejected": -0.1016109511256218, "step": 3130 }, { "epoch": 2.165283540802213, "grad_norm": 8.642672538757324, "learning_rate": 4.352620255109882e-05, "log_odds_chosen": 7.645012378692627, "log_odds_ratio": -0.0010081573855131865, "logits/chosen": -0.40549156069755554, "logits/rejected": -0.5037175416946411, "logps/chosen": -0.002098255092278123, "logps/rejected": -1.2280879020690918, "loss": 2.5388, "nll_loss": 0.6345977187156677, "rewards/accuracies": 1.0, "rewards/chosen": -0.00020982550631742924, "rewards/margins": 0.12259896099567413, "rewards/rejected": -0.1228087916970253, "step": 3131 }, { "epoch": 2.1659751037344397, "grad_norm": 8.14933967590332, "learning_rate": 4.3522360534808674e-05, "log_odds_chosen": 8.192554473876953, "log_odds_ratio": -0.0013432309497147799, "logits/chosen": -0.6731882691383362, "logits/rejected": -0.6550188064575195, "logps/chosen": -0.00395369715988636, "logps/rejected": -1.4148451089859009, "loss": 1.9726, "nll_loss": 0.49301496148109436, "rewards/accuracies": 1.0, "rewards/chosen": -0.000395369715988636, "rewards/margins": 0.14108915627002716, "rewards/rejected": -0.14148451387882233, "step": 3132 }, { "epoch": 2.1666666666666665, "grad_norm": 11.413957595825195, "learning_rate": 4.351851851851852e-05, "log_odds_chosen": 5.338525772094727, "log_odds_ratio": -0.5185012221336365, "logits/chosen": -0.24774277210235596, "logits/rejected": -0.31962743401527405, "logps/chosen": -0.05943586304783821, "logps/rejected": -1.091323971748352, "loss": 2.7867, "nll_loss": 0.6448211669921875, "rewards/accuracies": 0.875, "rewards/chosen": -0.005943586118519306, "rewards/margins": 0.10318881273269653, "rewards/rejected": -0.10913239419460297, "step": 3133 }, { "epoch": 2.1673582295988933, "grad_norm": 7.512145519256592, "learning_rate": 4.351467650222837e-05, "log_odds_chosen": 6.613034725189209, "log_odds_ratio": -0.16497647762298584, "logits/chosen": -0.5425326824188232, "logits/rejected": -0.55716472864151, "logps/chosen": -0.03287721425294876, "logps/rejected": -1.1185500621795654, "loss": 2.4417, "nll_loss": 0.5939371585845947, "rewards/accuracies": 0.875, "rewards/chosen": -0.0032877211924642324, "rewards/margins": 0.10856729000806808, "rewards/rejected": -0.11185500025749207, "step": 3134 }, { "epoch": 2.16804979253112, "grad_norm": 9.989569664001465, "learning_rate": 4.3510834485938224e-05, "log_odds_chosen": 5.303834915161133, "log_odds_ratio": -0.14418858289718628, "logits/chosen": -0.38814419507980347, "logits/rejected": -0.42610323429107666, "logps/chosen": -0.06320082396268845, "logps/rejected": -1.138200283050537, "loss": 3.2482, "nll_loss": 0.7976294159889221, "rewards/accuracies": 0.875, "rewards/chosen": -0.006320081651210785, "rewards/margins": 0.10749995708465576, "rewards/rejected": -0.11382003128528595, "step": 3135 }, { "epoch": 2.168741355463347, "grad_norm": 14.738361358642578, "learning_rate": 4.350699246964807e-05, "log_odds_chosen": 5.855432987213135, "log_odds_ratio": -0.2356831133365631, "logits/chosen": -0.7010008692741394, "logits/rejected": -0.7187799215316772, "logps/chosen": -0.030093370005488396, "logps/rejected": -1.0057425498962402, "loss": 3.5095, "nll_loss": 0.853812575340271, "rewards/accuracies": 0.875, "rewards/chosen": -0.0030093372333794832, "rewards/margins": 0.0975649282336235, "rewards/rejected": -0.10057426989078522, "step": 3136 }, { "epoch": 2.169432918395574, "grad_norm": 8.910038948059082, "learning_rate": 4.350315045335793e-05, "log_odds_chosen": 7.2788405418396, "log_odds_ratio": -0.002646538894623518, "logits/chosen": -0.22019320726394653, "logits/rejected": -0.28553158044815063, "logps/chosen": -0.0055422307923436165, "logps/rejected": -1.1429450511932373, "loss": 2.859, "nll_loss": 0.7144944667816162, "rewards/accuracies": 1.0, "rewards/chosen": -0.0005542230792343616, "rewards/margins": 0.11374028027057648, "rewards/rejected": -0.11429451406002045, "step": 3137 }, { "epoch": 2.1701244813278007, "grad_norm": 8.61752700805664, "learning_rate": 4.3499308437067775e-05, "log_odds_chosen": 7.681922912597656, "log_odds_ratio": -0.003510521724820137, "logits/chosen": -0.4615509510040283, "logits/rejected": -0.5451355576515198, "logps/chosen": -0.009908279404044151, "logps/rejected": -1.4224928617477417, "loss": 2.6812, "nll_loss": 0.6699454188346863, "rewards/accuracies": 1.0, "rewards/chosen": -0.0009908280335366726, "rewards/margins": 0.14125844836235046, "rewards/rejected": -0.14224928617477417, "step": 3138 }, { "epoch": 2.1708160442600275, "grad_norm": 5.192074775695801, "learning_rate": 4.349546642077763e-05, "log_odds_chosen": 8.567987442016602, "log_odds_ratio": -0.005496464669704437, "logits/chosen": -0.4235052466392517, "logits/rejected": -0.49524080753326416, "logps/chosen": -0.019714895635843277, "logps/rejected": -1.7913901805877686, "loss": 2.0488, "nll_loss": 0.5116626024246216, "rewards/accuracies": 1.0, "rewards/chosen": -0.0019714897498488426, "rewards/margins": 0.17716753482818604, "rewards/rejected": -0.17913901805877686, "step": 3139 }, { "epoch": 2.1715076071922543, "grad_norm": 8.280911445617676, "learning_rate": 4.349162440448748e-05, "log_odds_chosen": 5.522249221801758, "log_odds_ratio": -0.041618652641773224, "logits/chosen": -0.22748862206935883, "logits/rejected": -0.18105798959732056, "logps/chosen": -0.028005346655845642, "logps/rejected": -1.168099045753479, "loss": 2.8935, "nll_loss": 0.7192178964614868, "rewards/accuracies": 1.0, "rewards/chosen": -0.0028005349449813366, "rewards/margins": 0.11400936543941498, "rewards/rejected": -0.1168099045753479, "step": 3140 }, { "epoch": 2.172199170124481, "grad_norm": 9.586698532104492, "learning_rate": 4.348778238819733e-05, "log_odds_chosen": 5.8105058670043945, "log_odds_ratio": -0.25834015011787415, "logits/chosen": -0.427727073431015, "logits/rejected": -0.4316749572753906, "logps/chosen": -0.05228612199425697, "logps/rejected": -1.0495209693908691, "loss": 1.6904, "nll_loss": 0.39677131175994873, "rewards/accuracies": 0.875, "rewards/chosen": -0.00522861210629344, "rewards/margins": 0.09972350299358368, "rewards/rejected": -0.10495211184024811, "step": 3141 }, { "epoch": 2.172890733056708, "grad_norm": 9.963493347167969, "learning_rate": 4.348394037190718e-05, "log_odds_chosen": 5.8443074226379395, "log_odds_ratio": -0.05553425848484039, "logits/chosen": -0.5284481048583984, "logits/rejected": -0.5299968719482422, "logps/chosen": -0.021340377628803253, "logps/rejected": -0.9841907024383545, "loss": 3.4941, "nll_loss": 0.8679712414741516, "rewards/accuracies": 1.0, "rewards/chosen": -0.0021340379025787115, "rewards/margins": 0.09628503769636154, "rewards/rejected": -0.09841907024383545, "step": 3142 }, { "epoch": 2.173582295988935, "grad_norm": 7.235234260559082, "learning_rate": 4.348009835561703e-05, "log_odds_chosen": 5.568575382232666, "log_odds_ratio": -0.010355843231081963, "logits/chosen": -0.43284177780151367, "logits/rejected": -0.4781281650066376, "logps/chosen": -0.014164619147777557, "logps/rejected": -0.929655909538269, "loss": 1.9955, "nll_loss": 0.49784544110298157, "rewards/accuracies": 1.0, "rewards/chosen": -0.0014164620079100132, "rewards/margins": 0.09154912829399109, "rewards/rejected": -0.09296559542417526, "step": 3143 }, { "epoch": 2.1742738589211617, "grad_norm": 6.723057746887207, "learning_rate": 4.347625633932688e-05, "log_odds_chosen": 6.059225559234619, "log_odds_ratio": -0.050255246460437775, "logits/chosen": -0.4582567811012268, "logits/rejected": -0.539722204208374, "logps/chosen": -0.031468067318201065, "logps/rejected": -1.1873829364776611, "loss": 2.8291, "nll_loss": 0.7022486925125122, "rewards/accuracies": 1.0, "rewards/chosen": -0.0031468067318201065, "rewards/margins": 0.11559149622917175, "rewards/rejected": -0.11873829364776611, "step": 3144 }, { "epoch": 2.1749654218533885, "grad_norm": 4.7158203125, "learning_rate": 4.347241432303673e-05, "log_odds_chosen": 4.731280326843262, "log_odds_ratio": -0.03173203766345978, "logits/chosen": -0.48660892248153687, "logits/rejected": -0.515285074710846, "logps/chosen": -0.056296419352293015, "logps/rejected": -0.9932827949523926, "loss": 2.2232, "nll_loss": 0.5526218414306641, "rewards/accuracies": 1.0, "rewards/chosen": -0.0056296419352293015, "rewards/margins": 0.093698650598526, "rewards/rejected": -0.09932827949523926, "step": 3145 }, { "epoch": 2.1756569847856153, "grad_norm": 5.949375629425049, "learning_rate": 4.346857230674658e-05, "log_odds_chosen": 6.329002380371094, "log_odds_ratio": -0.013269197195768356, "logits/chosen": -0.4359148442745209, "logits/rejected": -0.42959028482437134, "logps/chosen": -0.028586188331246376, "logps/rejected": -1.123631238937378, "loss": 3.5716, "nll_loss": 0.8915849924087524, "rewards/accuracies": 1.0, "rewards/chosen": -0.002858618972823024, "rewards/margins": 0.10950449109077454, "rewards/rejected": -0.11236311495304108, "step": 3146 }, { "epoch": 2.176348547717842, "grad_norm": 7.0173845291137695, "learning_rate": 4.346473029045643e-05, "log_odds_chosen": 7.977410316467285, "log_odds_ratio": -0.00586349843069911, "logits/chosen": 0.18461117148399353, "logits/rejected": 0.1510668843984604, "logps/chosen": -0.0582503117620945, "logps/rejected": -2.1898932456970215, "loss": 2.341, "nll_loss": 0.5846550464630127, "rewards/accuracies": 1.0, "rewards/chosen": -0.00582503154873848, "rewards/margins": 0.2131642997264862, "rewards/rejected": -0.2189893275499344, "step": 3147 }, { "epoch": 2.177040110650069, "grad_norm": 7.535158157348633, "learning_rate": 4.3460888274166286e-05, "log_odds_chosen": 6.55058479309082, "log_odds_ratio": -0.009487954899668694, "logits/chosen": -0.4937528967857361, "logits/rejected": -0.5068577527999878, "logps/chosen": -0.03335797041654587, "logps/rejected": -1.729652762413025, "loss": 2.6839, "nll_loss": 0.670030415058136, "rewards/accuracies": 1.0, "rewards/chosen": -0.0033357972279191017, "rewards/margins": 0.16962948441505432, "rewards/rejected": -0.17296528816223145, "step": 3148 }, { "epoch": 2.177731673582296, "grad_norm": 4.840470790863037, "learning_rate": 4.345704625787613e-05, "log_odds_chosen": 5.8305511474609375, "log_odds_ratio": -0.0832158774137497, "logits/chosen": -0.1915377974510193, "logits/rejected": -0.14588585495948792, "logps/chosen": -0.017783522605895996, "logps/rejected": -0.8130568265914917, "loss": 1.8575, "nll_loss": 0.4560588598251343, "rewards/accuracies": 1.0, "rewards/chosen": -0.0017783523071557283, "rewards/margins": 0.07952733337879181, "rewards/rejected": -0.08130568265914917, "step": 3149 }, { "epoch": 2.1784232365145226, "grad_norm": 6.872931003570557, "learning_rate": 4.345320424158599e-05, "log_odds_chosen": 4.3958892822265625, "log_odds_ratio": -0.44033169746398926, "logits/chosen": -0.5839468240737915, "logits/rejected": -0.6039784550666809, "logps/chosen": -0.09881820529699326, "logps/rejected": -0.9070853590965271, "loss": 2.3977, "nll_loss": 0.555400550365448, "rewards/accuracies": 0.75, "rewards/chosen": -0.009881820529699326, "rewards/margins": 0.08082671463489532, "rewards/rejected": -0.09070853888988495, "step": 3150 }, { "epoch": 2.1791147994467495, "grad_norm": 7.127779006958008, "learning_rate": 4.3449362225295836e-05, "log_odds_chosen": 5.093412399291992, "log_odds_ratio": -0.0317099504172802, "logits/chosen": -0.38323113322257996, "logits/rejected": -0.4130544662475586, "logps/chosen": -0.028745543211698532, "logps/rejected": -1.1016664505004883, "loss": 2.6644, "nll_loss": 0.6629339456558228, "rewards/accuracies": 1.0, "rewards/chosen": -0.0028745546005666256, "rewards/margins": 0.10729208588600159, "rewards/rejected": -0.11016664654016495, "step": 3151 }, { "epoch": 2.1798063623789763, "grad_norm": 10.824358940124512, "learning_rate": 4.344552020900569e-05, "log_odds_chosen": 5.998074531555176, "log_odds_ratio": -0.14802958071231842, "logits/chosen": -0.4905872344970703, "logits/rejected": -0.5300999283790588, "logps/chosen": -0.046079330146312714, "logps/rejected": -1.1805709600448608, "loss": 3.2875, "nll_loss": 0.8070785999298096, "rewards/accuracies": 1.0, "rewards/chosen": -0.004607933573424816, "rewards/margins": 0.11344917863607407, "rewards/rejected": -0.11805711686611176, "step": 3152 }, { "epoch": 2.180497925311203, "grad_norm": 6.493251323699951, "learning_rate": 4.344167819271554e-05, "log_odds_chosen": 7.639892101287842, "log_odds_ratio": -0.05055145174264908, "logits/chosen": -0.6294256448745728, "logits/rejected": -0.7311701774597168, "logps/chosen": -0.01981090009212494, "logps/rejected": -1.5551573038101196, "loss": 2.365, "nll_loss": 0.5861850380897522, "rewards/accuracies": 1.0, "rewards/chosen": -0.0019810900557786226, "rewards/margins": 0.1535346359014511, "rewards/rejected": -0.15551573038101196, "step": 3153 }, { "epoch": 2.18118948824343, "grad_norm": 8.375, "learning_rate": 4.343783617642539e-05, "log_odds_chosen": 6.690349578857422, "log_odds_ratio": -0.008571420796215534, "logits/chosen": -0.6027698516845703, "logits/rejected": -0.6378378868103027, "logps/chosen": -0.005035060923546553, "logps/rejected": -0.8536246418952942, "loss": 2.751, "nll_loss": 0.6869030594825745, "rewards/accuracies": 1.0, "rewards/chosen": -0.0005035061039961874, "rewards/margins": 0.0848589539527893, "rewards/rejected": -0.08536246418952942, "step": 3154 }, { "epoch": 2.181881051175657, "grad_norm": 5.154770851135254, "learning_rate": 4.343399416013524e-05, "log_odds_chosen": 5.609927177429199, "log_odds_ratio": -0.0658092275261879, "logits/chosen": -0.2716918885707855, "logits/rejected": -0.33563169836997986, "logps/chosen": -0.027060627937316895, "logps/rejected": -0.7668888568878174, "loss": 2.1289, "nll_loss": 0.5256554484367371, "rewards/accuracies": 1.0, "rewards/chosen": -0.0027060629799962044, "rewards/margins": 0.07398282736539841, "rewards/rejected": -0.07668889313936234, "step": 3155 }, { "epoch": 2.1825726141078836, "grad_norm": 9.937250137329102, "learning_rate": 4.343015214384509e-05, "log_odds_chosen": 7.801394462585449, "log_odds_ratio": -0.004217131529003382, "logits/chosen": -0.5749065279960632, "logits/rejected": -0.6880556344985962, "logps/chosen": -0.005276396404951811, "logps/rejected": -1.3525454998016357, "loss": 3.1324, "nll_loss": 0.7826860547065735, "rewards/accuracies": 1.0, "rewards/chosen": -0.0005276396404951811, "rewards/margins": 0.13472691178321838, "rewards/rejected": -0.13525454699993134, "step": 3156 }, { "epoch": 2.1832641770401104, "grad_norm": 5.070712566375732, "learning_rate": 4.3426310127554944e-05, "log_odds_chosen": 6.111451625823975, "log_odds_ratio": -0.1284855157136917, "logits/chosen": -0.7218311429023743, "logits/rejected": -0.7740025520324707, "logps/chosen": -0.03459743782877922, "logps/rejected": -1.094872236251831, "loss": 2.0958, "nll_loss": 0.5111120939254761, "rewards/accuracies": 0.875, "rewards/chosen": -0.0034597436897456646, "rewards/margins": 0.10602748394012451, "rewards/rejected": -0.10948723554611206, "step": 3157 }, { "epoch": 2.1839557399723377, "grad_norm": 10.03350830078125, "learning_rate": 4.342246811126479e-05, "log_odds_chosen": 5.510814189910889, "log_odds_ratio": -0.17792275547981262, "logits/chosen": -0.505163311958313, "logits/rejected": -0.5847234129905701, "logps/chosen": -0.040842242538928986, "logps/rejected": -1.2754498720169067, "loss": 3.4138, "nll_loss": 0.8356497883796692, "rewards/accuracies": 0.875, "rewards/chosen": -0.004084224347025156, "rewards/margins": 0.12346076965332031, "rewards/rejected": -0.12754499912261963, "step": 3158 }, { "epoch": 2.1846473029045645, "grad_norm": 4.863821983337402, "learning_rate": 4.341862609497465e-05, "log_odds_chosen": 6.666403293609619, "log_odds_ratio": -0.012772869318723679, "logits/chosen": -0.48330166935920715, "logits/rejected": -0.5371727347373962, "logps/chosen": -0.033257968723773956, "logps/rejected": -1.3910984992980957, "loss": 1.819, "nll_loss": 0.4534626603126526, "rewards/accuracies": 1.0, "rewards/chosen": -0.0033257966861128807, "rewards/margins": 0.1357840597629547, "rewards/rejected": -0.13910984992980957, "step": 3159 }, { "epoch": 2.1853388658367914, "grad_norm": 6.057682037353516, "learning_rate": 4.3414784078684495e-05, "log_odds_chosen": 6.663447380065918, "log_odds_ratio": -0.05722741037607193, "logits/chosen": -0.7273718118667603, "logits/rejected": -0.7586309313774109, "logps/chosen": -0.03430384770035744, "logps/rejected": -1.0461488962173462, "loss": 2.3148, "nll_loss": 0.5729818940162659, "rewards/accuracies": 1.0, "rewards/chosen": -0.0034303846769034863, "rewards/margins": 0.10118450969457626, "rewards/rejected": -0.10461489111185074, "step": 3160 }, { "epoch": 2.186030428769018, "grad_norm": 11.682771682739258, "learning_rate": 4.341094206239435e-05, "log_odds_chosen": 8.383731842041016, "log_odds_ratio": -0.0027355810161679983, "logits/chosen": -0.29401373863220215, "logits/rejected": -0.40509486198425293, "logps/chosen": -0.0017849082360044122, "logps/rejected": -1.3605860471725464, "loss": 2.7221, "nll_loss": 0.6802555322647095, "rewards/accuracies": 1.0, "rewards/chosen": -0.00017849083815235645, "rewards/margins": 0.13588011264801025, "rewards/rejected": -0.13605859875679016, "step": 3161 }, { "epoch": 2.186721991701245, "grad_norm": 11.995657920837402, "learning_rate": 4.34071000461042e-05, "log_odds_chosen": 5.557642936706543, "log_odds_ratio": -0.14004188776016235, "logits/chosen": -0.2498472034931183, "logits/rejected": -0.259319543838501, "logps/chosen": -0.07155990600585938, "logps/rejected": -1.4202849864959717, "loss": 2.6449, "nll_loss": 0.6472086906433105, "rewards/accuracies": 0.875, "rewards/chosen": -0.007155990693718195, "rewards/margins": 0.13487249612808228, "rewards/rejected": -0.14202848076820374, "step": 3162 }, { "epoch": 2.187413554633472, "grad_norm": 9.4674654006958, "learning_rate": 4.3403258029814045e-05, "log_odds_chosen": 6.2325825691223145, "log_odds_ratio": -0.1332755982875824, "logits/chosen": -0.5602701306343079, "logits/rejected": -0.6629458665847778, "logps/chosen": -0.03466428816318512, "logps/rejected": -1.2542963027954102, "loss": 1.4881, "nll_loss": 0.35869354009628296, "rewards/accuracies": 0.875, "rewards/chosen": -0.0034664287231862545, "rewards/margins": 0.12196320295333862, "rewards/rejected": -0.12542963027954102, "step": 3163 }, { "epoch": 2.1881051175656987, "grad_norm": 10.65891170501709, "learning_rate": 4.33994160135239e-05, "log_odds_chosen": 7.232670783996582, "log_odds_ratio": -0.021584775298833847, "logits/chosen": -0.6576371788978577, "logits/rejected": -0.6923035979270935, "logps/chosen": -0.01624915562570095, "logps/rejected": -1.8470706939697266, "loss": 3.5342, "nll_loss": 0.8813962936401367, "rewards/accuracies": 1.0, "rewards/chosen": -0.0016249155160039663, "rewards/margins": 0.18308216333389282, "rewards/rejected": -0.18470707535743713, "step": 3164 }, { "epoch": 2.1887966804979255, "grad_norm": 4.825099945068359, "learning_rate": 4.339557399723375e-05, "log_odds_chosen": 5.801044940948486, "log_odds_ratio": -0.16033858060836792, "logits/chosen": -0.3030264377593994, "logits/rejected": -0.3055965304374695, "logps/chosen": -0.05437358096241951, "logps/rejected": -1.6448538303375244, "loss": 3.0495, "nll_loss": 0.7463371753692627, "rewards/accuracies": 0.875, "rewards/chosen": -0.005437357816845179, "rewards/margins": 0.15904802083969116, "rewards/rejected": -0.1644853949546814, "step": 3165 }, { "epoch": 2.1894882434301524, "grad_norm": 6.770928382873535, "learning_rate": 4.33917319809436e-05, "log_odds_chosen": 6.155390739440918, "log_odds_ratio": -0.030024850741028786, "logits/chosen": -0.48152872920036316, "logits/rejected": -0.4823768436908722, "logps/chosen": -0.020001396536827087, "logps/rejected": -0.9744084477424622, "loss": 2.3497, "nll_loss": 0.5844292640686035, "rewards/accuracies": 1.0, "rewards/chosen": -0.002000139793381095, "rewards/margins": 0.09544070065021515, "rewards/rejected": -0.09744083881378174, "step": 3166 }, { "epoch": 2.190179806362379, "grad_norm": 7.0886640548706055, "learning_rate": 4.338788996465345e-05, "log_odds_chosen": 6.047344207763672, "log_odds_ratio": -0.231770858168602, "logits/chosen": -0.5151282548904419, "logits/rejected": -0.5332531929016113, "logps/chosen": -0.06000122055411339, "logps/rejected": -0.7603382468223572, "loss": 2.0776, "nll_loss": 0.4962257444858551, "rewards/accuracies": 0.875, "rewards/chosen": -0.006000122055411339, "rewards/margins": 0.07003369927406311, "rewards/rejected": -0.0760338231921196, "step": 3167 }, { "epoch": 2.190871369294606, "grad_norm": 6.543388366699219, "learning_rate": 4.338404794836331e-05, "log_odds_chosen": 7.2738542556762695, "log_odds_ratio": -0.04685303941369057, "logits/chosen": -0.5018479228019714, "logits/rejected": -0.5226489305496216, "logps/chosen": -0.02508268505334854, "logps/rejected": -1.4995683431625366, "loss": 2.4453, "nll_loss": 0.6066345572471619, "rewards/accuracies": 1.0, "rewards/chosen": -0.0025082684587687254, "rewards/margins": 0.1474485844373703, "rewards/rejected": -0.1499568521976471, "step": 3168 }, { "epoch": 2.191562932226833, "grad_norm": 11.554959297180176, "learning_rate": 4.338020593207315e-05, "log_odds_chosen": 7.814593315124512, "log_odds_ratio": -0.0014177625998854637, "logits/chosen": -0.47919517755508423, "logits/rejected": -0.5403696894645691, "logps/chosen": -0.004644293338060379, "logps/rejected": -1.5748608112335205, "loss": 3.3544, "nll_loss": 0.8384552001953125, "rewards/accuracies": 1.0, "rewards/chosen": -0.0004644293221645057, "rewards/margins": 0.1570216715335846, "rewards/rejected": -0.15748609602451324, "step": 3169 }, { "epoch": 2.1922544951590597, "grad_norm": 5.401383876800537, "learning_rate": 4.3376363915783006e-05, "log_odds_chosen": 4.879332542419434, "log_odds_ratio": -0.09349296241998672, "logits/chosen": -0.6353992819786072, "logits/rejected": -0.6436095833778381, "logps/chosen": -0.03518253192305565, "logps/rejected": -0.8030743598937988, "loss": 2.365, "nll_loss": 0.5818997621536255, "rewards/accuracies": 1.0, "rewards/chosen": -0.003518253332003951, "rewards/margins": 0.07678918540477753, "rewards/rejected": -0.08030743151903152, "step": 3170 }, { "epoch": 2.1929460580912865, "grad_norm": 14.059950828552246, "learning_rate": 4.337252189949286e-05, "log_odds_chosen": 6.713525295257568, "log_odds_ratio": -0.08972524106502533, "logits/chosen": -0.9425017833709717, "logits/rejected": -0.9960139393806458, "logps/chosen": -0.03543572127819061, "logps/rejected": -1.2302966117858887, "loss": 3.1483, "nll_loss": 0.7781030535697937, "rewards/accuracies": 1.0, "rewards/chosen": -0.003543572500348091, "rewards/margins": 0.11948608607053757, "rewards/rejected": -0.1230296641588211, "step": 3171 }, { "epoch": 2.1936376210235133, "grad_norm": 8.6504545211792, "learning_rate": 4.3368679883202704e-05, "log_odds_chosen": 7.600931167602539, "log_odds_ratio": -0.0028964250814169645, "logits/chosen": -0.5214453935623169, "logits/rejected": -0.6939715147018433, "logps/chosen": -0.014445780776441097, "logps/rejected": -1.601884126663208, "loss": 2.5539, "nll_loss": 0.63817298412323, "rewards/accuracies": 1.0, "rewards/chosen": -0.0014445781707763672, "rewards/margins": 0.15874382853507996, "rewards/rejected": -0.16018840670585632, "step": 3172 }, { "epoch": 2.19432918395574, "grad_norm": 11.834299087524414, "learning_rate": 4.3364837866912556e-05, "log_odds_chosen": 7.545048713684082, "log_odds_ratio": -0.004208111669868231, "logits/chosen": -0.34441643953323364, "logits/rejected": -0.4657328128814697, "logps/chosen": -0.010565445758402348, "logps/rejected": -1.5098049640655518, "loss": 2.9481, "nll_loss": 0.7365975379943848, "rewards/accuracies": 1.0, "rewards/chosen": -0.0010565445991232991, "rewards/margins": 0.14992395043373108, "rewards/rejected": -0.15098050236701965, "step": 3173 }, { "epoch": 2.195020746887967, "grad_norm": 5.100581169128418, "learning_rate": 4.336099585062241e-05, "log_odds_chosen": 6.659306526184082, "log_odds_ratio": -0.045205675065517426, "logits/chosen": -0.47770747542381287, "logits/rejected": -0.43313068151474, "logps/chosen": -0.015689756721258163, "logps/rejected": -0.8002600073814392, "loss": 1.6891, "nll_loss": 0.41774749755859375, "rewards/accuracies": 1.0, "rewards/chosen": -0.0015689757419750094, "rewards/margins": 0.07845702022314072, "rewards/rejected": -0.08002599328756332, "step": 3174 }, { "epoch": 2.195712309820194, "grad_norm": 5.566988468170166, "learning_rate": 4.335715383433226e-05, "log_odds_chosen": 7.460770130157471, "log_odds_ratio": -0.07459472864866257, "logits/chosen": -0.44164443016052246, "logits/rejected": -0.47057870030403137, "logps/chosen": -0.02058670111000538, "logps/rejected": -1.0957387685775757, "loss": 2.013, "nll_loss": 0.495780348777771, "rewards/accuracies": 1.0, "rewards/chosen": -0.0020586701575666666, "rewards/margins": 0.10751520097255707, "rewards/rejected": -0.10957387089729309, "step": 3175 }, { "epoch": 2.1964038727524207, "grad_norm": 8.06294059753418, "learning_rate": 4.335331181804211e-05, "log_odds_chosen": 7.625150203704834, "log_odds_ratio": -0.005710828583687544, "logits/chosen": -0.643086314201355, "logits/rejected": -0.6431131958961487, "logps/chosen": -0.016301624476909637, "logps/rejected": -1.1473497152328491, "loss": 2.9075, "nll_loss": 0.7262943983078003, "rewards/accuracies": 1.0, "rewards/chosen": -0.0016301622381433845, "rewards/margins": 0.11310481280088425, "rewards/rejected": -0.11473497003316879, "step": 3176 }, { "epoch": 2.1970954356846475, "grad_norm": 7.399056434631348, "learning_rate": 4.3349469801751966e-05, "log_odds_chosen": 6.73783540725708, "log_odds_ratio": -0.18007498979568481, "logits/chosen": -0.7180381417274475, "logits/rejected": -0.7221200466156006, "logps/chosen": -0.028597483411431313, "logps/rejected": -0.8597887754440308, "loss": 2.8968, "nll_loss": 0.7061822414398193, "rewards/accuracies": 0.875, "rewards/chosen": -0.002859748201444745, "rewards/margins": 0.08311913162469864, "rewards/rejected": -0.08597888052463531, "step": 3177 }, { "epoch": 2.1977869986168743, "grad_norm": 12.78672981262207, "learning_rate": 4.334562778546181e-05, "log_odds_chosen": 8.832401275634766, "log_odds_ratio": -0.00019618018995970488, "logits/chosen": -0.592092752456665, "logits/rejected": -0.6911430954933167, "logps/chosen": -0.0007649950566701591, "logps/rejected": -1.6313860416412354, "loss": 2.6642, "nll_loss": 0.6660318970680237, "rewards/accuracies": 1.0, "rewards/chosen": -7.649950566701591e-05, "rewards/margins": 0.16306212544441223, "rewards/rejected": -0.16313862800598145, "step": 3178 }, { "epoch": 2.198478561549101, "grad_norm": 9.519674301147461, "learning_rate": 4.3341785769171664e-05, "log_odds_chosen": 8.840486526489258, "log_odds_ratio": -0.0005940008559264243, "logits/chosen": -0.784791886806488, "logits/rejected": -0.7828980684280396, "logps/chosen": -0.0008778494084253907, "logps/rejected": -1.399017572402954, "loss": 2.7933, "nll_loss": 0.698258101940155, "rewards/accuracies": 1.0, "rewards/chosen": -8.778493793215603e-05, "rewards/margins": 0.13981398940086365, "rewards/rejected": -0.1399017721414566, "step": 3179 }, { "epoch": 2.199170124481328, "grad_norm": 7.758335113525391, "learning_rate": 4.3337943752881517e-05, "log_odds_chosen": 8.568967819213867, "log_odds_ratio": -0.0028527555987238884, "logits/chosen": -0.9076350927352905, "logits/rejected": -0.968063473701477, "logps/chosen": -0.005145165137946606, "logps/rejected": -1.6919078826904297, "loss": 2.2237, "nll_loss": 0.5556411147117615, "rewards/accuracies": 1.0, "rewards/chosen": -0.0005145165487192571, "rewards/margins": 0.16867627203464508, "rewards/rejected": -0.16919079422950745, "step": 3180 }, { "epoch": 2.199861687413555, "grad_norm": 6.658428192138672, "learning_rate": 4.333410173659136e-05, "log_odds_chosen": 6.849039077758789, "log_odds_ratio": -0.1127195730805397, "logits/chosen": -0.6284754276275635, "logits/rejected": -0.653616726398468, "logps/chosen": -0.02700984664261341, "logps/rejected": -1.5343716144561768, "loss": 2.2461, "nll_loss": 0.5502545833587646, "rewards/accuracies": 0.875, "rewards/chosen": -0.00270098471082747, "rewards/margins": 0.15073618292808533, "rewards/rejected": -0.15343716740608215, "step": 3181 }, { "epoch": 2.2005532503457816, "grad_norm": 6.70884895324707, "learning_rate": 4.3330259720301215e-05, "log_odds_chosen": 7.267721652984619, "log_odds_ratio": -0.00911356508731842, "logits/chosen": -0.41547131538391113, "logits/rejected": -0.4822637736797333, "logps/chosen": -0.02417285554111004, "logps/rejected": -1.272136926651001, "loss": 2.368, "nll_loss": 0.5911010503768921, "rewards/accuracies": 1.0, "rewards/chosen": -0.0024172854609787464, "rewards/margins": 0.12479639798402786, "rewards/rejected": -0.12721368670463562, "step": 3182 }, { "epoch": 2.2012448132780085, "grad_norm": 7.333751201629639, "learning_rate": 4.332641770401107e-05, "log_odds_chosen": 5.247653007507324, "log_odds_ratio": -0.06348458677530289, "logits/chosen": -0.7271361351013184, "logits/rejected": -0.7556310892105103, "logps/chosen": -0.17119011282920837, "logps/rejected": -1.9269461631774902, "loss": 2.6175, "nll_loss": 0.6480197310447693, "rewards/accuracies": 1.0, "rewards/chosen": -0.017119012773036957, "rewards/margins": 0.1755756139755249, "rewards/rejected": -0.19269460439682007, "step": 3183 }, { "epoch": 2.2019363762102353, "grad_norm": 6.730661392211914, "learning_rate": 4.332257568772092e-05, "log_odds_chosen": 7.7098894119262695, "log_odds_ratio": -0.17441943287849426, "logits/chosen": -0.4983978867530823, "logits/rejected": -0.5662646293640137, "logps/chosen": -0.024879546836018562, "logps/rejected": -0.975651741027832, "loss": 1.9166, "nll_loss": 0.46170371770858765, "rewards/accuracies": 0.875, "rewards/chosen": -0.0024879546836018562, "rewards/margins": 0.09507721662521362, "rewards/rejected": -0.0975651741027832, "step": 3184 }, { "epoch": 2.202627939142462, "grad_norm": 8.689640045166016, "learning_rate": 4.3318733671430765e-05, "log_odds_chosen": 5.190764904022217, "log_odds_ratio": -0.04406759887933731, "logits/chosen": -0.7414398193359375, "logits/rejected": -0.7421854734420776, "logps/chosen": -0.07408274710178375, "logps/rejected": -1.9030416011810303, "loss": 2.7025, "nll_loss": 0.6712265014648438, "rewards/accuracies": 1.0, "rewards/chosen": -0.007408274337649345, "rewards/margins": 0.18289589881896973, "rewards/rejected": -0.19030416011810303, "step": 3185 }, { "epoch": 2.203319502074689, "grad_norm": 10.43053150177002, "learning_rate": 4.3314891655140624e-05, "log_odds_chosen": 8.137721061706543, "log_odds_ratio": -0.007064457517117262, "logits/chosen": -0.40223050117492676, "logits/rejected": -0.49093905091285706, "logps/chosen": -0.001537282601930201, "logps/rejected": -1.422028660774231, "loss": 2.1063, "nll_loss": 0.5258598327636719, "rewards/accuracies": 1.0, "rewards/chosen": -0.00015372825146187097, "rewards/margins": 0.1420491486787796, "rewards/rejected": -0.14220286905765533, "step": 3186 }, { "epoch": 2.204011065006916, "grad_norm": 9.425034523010254, "learning_rate": 4.331104963885047e-05, "log_odds_chosen": 9.267492294311523, "log_odds_ratio": -0.002203315496444702, "logits/chosen": -0.7236309051513672, "logits/rejected": -0.8363832235336304, "logps/chosen": -0.008194814436137676, "logps/rejected": -1.8639302253723145, "loss": 2.013, "nll_loss": 0.5030335187911987, "rewards/accuracies": 1.0, "rewards/chosen": -0.0008194815600290895, "rewards/margins": 0.1855735182762146, "rewards/rejected": -0.18639302253723145, "step": 3187 }, { "epoch": 2.2047026279391426, "grad_norm": 6.292108058929443, "learning_rate": 4.330720762256032e-05, "log_odds_chosen": 5.8964643478393555, "log_odds_ratio": -0.04489860683679581, "logits/chosen": -0.4235934019088745, "logits/rejected": -0.48958835005760193, "logps/chosen": -0.018647415563464165, "logps/rejected": -1.075378179550171, "loss": 2.4478, "nll_loss": 0.6074610948562622, "rewards/accuracies": 1.0, "rewards/chosen": -0.0018647415563464165, "rewards/margins": 0.10567308962345123, "rewards/rejected": -0.10753783583641052, "step": 3188 }, { "epoch": 2.2053941908713695, "grad_norm": 8.567946434020996, "learning_rate": 4.3303365606270175e-05, "log_odds_chosen": 8.469558715820312, "log_odds_ratio": -0.0012380550615489483, "logits/chosen": -0.5258292555809021, "logits/rejected": -0.5719144344329834, "logps/chosen": -0.004365398548543453, "logps/rejected": -1.2858614921569824, "loss": 2.6561, "nll_loss": 0.6639001369476318, "rewards/accuracies": 1.0, "rewards/chosen": -0.0004365398781374097, "rewards/margins": 0.12814961373806, "rewards/rejected": -0.12858614325523376, "step": 3189 }, { "epoch": 2.2060857538035963, "grad_norm": 7.210387706756592, "learning_rate": 4.329952358998002e-05, "log_odds_chosen": 6.826498985290527, "log_odds_ratio": -0.052564047276973724, "logits/chosen": -0.3505515456199646, "logits/rejected": -0.37268006801605225, "logps/chosen": -0.05025108531117439, "logps/rejected": -1.5903440713882446, "loss": 2.7754, "nll_loss": 0.6885868310928345, "rewards/accuracies": 1.0, "rewards/chosen": -0.005025108344852924, "rewards/margins": 0.15400929749011993, "rewards/rejected": -0.15903441607952118, "step": 3190 }, { "epoch": 2.206777316735823, "grad_norm": 6.022358417510986, "learning_rate": 4.329568157368987e-05, "log_odds_chosen": 5.189307689666748, "log_odds_ratio": -0.2198401540517807, "logits/chosen": -0.7311754822731018, "logits/rejected": -0.8276402354240417, "logps/chosen": -0.051636867225170135, "logps/rejected": -0.7299488186836243, "loss": 3.4755, "nll_loss": 0.8468843102455139, "rewards/accuracies": 0.875, "rewards/chosen": -0.005163686349987984, "rewards/margins": 0.06783119589090347, "rewards/rejected": -0.07299488037824631, "step": 3191 }, { "epoch": 2.20746887966805, "grad_norm": 5.002099514007568, "learning_rate": 4.3291839557399726e-05, "log_odds_chosen": 5.58121395111084, "log_odds_ratio": -0.09256982058286667, "logits/chosen": -0.7110233306884766, "logits/rejected": -0.6769241094589233, "logps/chosen": -0.04848054423928261, "logps/rejected": -0.9648277163505554, "loss": 2.7033, "nll_loss": 0.6665750741958618, "rewards/accuracies": 1.0, "rewards/chosen": -0.004848054610192776, "rewards/margins": 0.09163472056388855, "rewards/rejected": -0.0964827761054039, "step": 3192 }, { "epoch": 2.2081604426002768, "grad_norm": 5.96661376953125, "learning_rate": 4.328799754110958e-05, "log_odds_chosen": 3.965442657470703, "log_odds_ratio": -0.117483951151371, "logits/chosen": -0.41260385513305664, "logits/rejected": -0.45036524534225464, "logps/chosen": -0.07093721628189087, "logps/rejected": -1.108747959136963, "loss": 1.993, "nll_loss": 0.4865078926086426, "rewards/accuracies": 1.0, "rewards/chosen": -0.007093721069395542, "rewards/margins": 0.1037810817360878, "rewards/rejected": -0.11087480187416077, "step": 3193 }, { "epoch": 2.2088520055325036, "grad_norm": 8.659708976745605, "learning_rate": 4.3284155524819424e-05, "log_odds_chosen": 7.016357421875, "log_odds_ratio": -0.006338158156722784, "logits/chosen": -0.5837326049804688, "logits/rejected": -0.6578401923179626, "logps/chosen": -0.003191157942637801, "logps/rejected": -1.1636102199554443, "loss": 2.2925, "nll_loss": 0.5724791288375854, "rewards/accuracies": 1.0, "rewards/chosen": -0.0003191157302353531, "rewards/margins": 0.11604189872741699, "rewards/rejected": -0.11636102199554443, "step": 3194 }, { "epoch": 2.2095435684647304, "grad_norm": 9.927667617797852, "learning_rate": 4.328031350852928e-05, "log_odds_chosen": 8.560630798339844, "log_odds_ratio": -0.0008007477736100554, "logits/chosen": -0.42888209223747253, "logits/rejected": -0.4656681418418884, "logps/chosen": -0.0037196280900388956, "logps/rejected": -1.6876860857009888, "loss": 2.4, "nll_loss": 0.5999287962913513, "rewards/accuracies": 1.0, "rewards/chosen": -0.0003719627857208252, "rewards/margins": 0.16839665174484253, "rewards/rejected": -0.16876859962940216, "step": 3195 }, { "epoch": 2.2102351313969573, "grad_norm": 6.38437557220459, "learning_rate": 4.327647149223913e-05, "log_odds_chosen": 7.007352352142334, "log_odds_ratio": -0.009551974013447762, "logits/chosen": -0.7193029522895813, "logits/rejected": -0.759920597076416, "logps/chosen": -0.03922717645764351, "logps/rejected": -1.365323543548584, "loss": 2.1614, "nll_loss": 0.5394060611724854, "rewards/accuracies": 1.0, "rewards/chosen": -0.003922717645764351, "rewards/margins": 0.13260963559150696, "rewards/rejected": -0.13653233647346497, "step": 3196 }, { "epoch": 2.210926694329184, "grad_norm": 6.613427639007568, "learning_rate": 4.327262947594898e-05, "log_odds_chosen": 7.841188430786133, "log_odds_ratio": -0.0046505313366651535, "logits/chosen": -0.6819300651550293, "logits/rejected": -0.7347622513771057, "logps/chosen": -0.0033948104828596115, "logps/rejected": -1.0823031663894653, "loss": 2.5261, "nll_loss": 0.6310635209083557, "rewards/accuracies": 1.0, "rewards/chosen": -0.00033948104828596115, "rewards/margins": 0.10789082944393158, "rewards/rejected": -0.10823030769824982, "step": 3197 }, { "epoch": 2.211618257261411, "grad_norm": 7.287887096405029, "learning_rate": 4.3268787459658833e-05, "log_odds_chosen": 8.129772186279297, "log_odds_ratio": -0.002522420370951295, "logits/chosen": -0.7787665724754333, "logits/rejected": -0.8593880534172058, "logps/chosen": -0.0031739026308059692, "logps/rejected": -1.6081467866897583, "loss": 1.8657, "nll_loss": 0.4661679267883301, "rewards/accuracies": 1.0, "rewards/chosen": -0.00031739025143906474, "rewards/margins": 0.1604972928762436, "rewards/rejected": -0.16081468760967255, "step": 3198 }, { "epoch": 2.2123098201936378, "grad_norm": 5.78539514541626, "learning_rate": 4.326494544336868e-05, "log_odds_chosen": 8.055619239807129, "log_odds_ratio": -0.0013233129866421223, "logits/chosen": -0.6449908018112183, "logits/rejected": -0.7909893989562988, "logps/chosen": -0.00542853306978941, "logps/rejected": -1.635412335395813, "loss": 1.5937, "nll_loss": 0.39829838275909424, "rewards/accuracies": 1.0, "rewards/chosen": -0.0005428533768281341, "rewards/margins": 0.16299840807914734, "rewards/rejected": -0.1635412573814392, "step": 3199 }, { "epoch": 2.2130013831258646, "grad_norm": 8.962825775146484, "learning_rate": 4.326110342707853e-05, "log_odds_chosen": 6.840261459350586, "log_odds_ratio": -0.08905114978551865, "logits/chosen": -0.623429000377655, "logits/rejected": -0.6256603598594666, "logps/chosen": -0.021636225283145905, "logps/rejected": -1.1779842376708984, "loss": 1.8908, "nll_loss": 0.4637908339500427, "rewards/accuracies": 0.875, "rewards/chosen": -0.0021636225283145905, "rewards/margins": 0.11563479900360107, "rewards/rejected": -0.11779841780662537, "step": 3200 }, { "epoch": 2.2136929460580914, "grad_norm": 6.071709156036377, "learning_rate": 4.3257261410788384e-05, "log_odds_chosen": 6.589384078979492, "log_odds_ratio": -0.06430207192897797, "logits/chosen": -0.5658938884735107, "logits/rejected": -0.5847985744476318, "logps/chosen": -0.02275737375020981, "logps/rejected": -1.0458507537841797, "loss": 2.4343, "nll_loss": 0.6021410226821899, "rewards/accuracies": 1.0, "rewards/chosen": -0.0022757374681532383, "rewards/margins": 0.10230933874845505, "rewards/rejected": -0.10458506643772125, "step": 3201 }, { "epoch": 2.2143845089903182, "grad_norm": 9.392425537109375, "learning_rate": 4.3253419394498236e-05, "log_odds_chosen": 8.317364692687988, "log_odds_ratio": -0.0019505569944158196, "logits/chosen": -0.3586891293525696, "logits/rejected": -0.41375863552093506, "logps/chosen": -0.013265244662761688, "logps/rejected": -1.7743667364120483, "loss": 2.3781, "nll_loss": 0.5943280458450317, "rewards/accuracies": 1.0, "rewards/chosen": -0.0013265246525406837, "rewards/margins": 0.1761101484298706, "rewards/rejected": -0.1774366796016693, "step": 3202 }, { "epoch": 2.215076071922545, "grad_norm": 11.797636032104492, "learning_rate": 4.324957737820808e-05, "log_odds_chosen": 7.6043806076049805, "log_odds_ratio": -0.04728805273771286, "logits/chosen": -0.5296475291252136, "logits/rejected": -0.6080471277236938, "logps/chosen": -0.010652073659002781, "logps/rejected": -1.2598812580108643, "loss": 2.3432, "nll_loss": 0.5810590982437134, "rewards/accuracies": 1.0, "rewards/chosen": -0.0010652071796357632, "rewards/margins": 0.12492291629314423, "rewards/rejected": -0.12598812580108643, "step": 3203 }, { "epoch": 2.215767634854772, "grad_norm": 7.488215923309326, "learning_rate": 4.324573536191794e-05, "log_odds_chosen": 7.223307132720947, "log_odds_ratio": -0.0028760689310729504, "logits/chosen": -0.8060204982757568, "logits/rejected": -0.8052228689193726, "logps/chosen": -0.0104671660810709, "logps/rejected": -1.5643423795700073, "loss": 3.5774, "nll_loss": 0.8940551280975342, "rewards/accuracies": 1.0, "rewards/chosen": -0.0010467165848240256, "rewards/margins": 0.1553875207901001, "rewards/rejected": -0.15643423795700073, "step": 3204 }, { "epoch": 2.2164591977869987, "grad_norm": 7.110921859741211, "learning_rate": 4.324189334562779e-05, "log_odds_chosen": 7.80830717086792, "log_odds_ratio": -0.018234528601169586, "logits/chosen": -0.8252729773521423, "logits/rejected": -0.8693796396255493, "logps/chosen": -0.02617500349879265, "logps/rejected": -1.7985427379608154, "loss": 2.2648, "nll_loss": 0.5643655061721802, "rewards/accuracies": 1.0, "rewards/chosen": -0.0026175002567470074, "rewards/margins": 0.17723676562309265, "rewards/rejected": -0.17985425889492035, "step": 3205 }, { "epoch": 2.2171507607192256, "grad_norm": 5.993846893310547, "learning_rate": 4.323805132933764e-05, "log_odds_chosen": 7.54710578918457, "log_odds_ratio": -0.19196613132953644, "logits/chosen": -0.11485698819160461, "logits/rejected": -0.1784641146659851, "logps/chosen": -0.026814231649041176, "logps/rejected": -1.0498394966125488, "loss": 2.0394, "nll_loss": 0.4906499981880188, "rewards/accuracies": 0.875, "rewards/chosen": -0.0026814232114702463, "rewards/margins": 0.10230252146720886, "rewards/rejected": -0.10498394072055817, "step": 3206 }, { "epoch": 2.2178423236514524, "grad_norm": 10.639652252197266, "learning_rate": 4.323420931304749e-05, "log_odds_chosen": 8.695178031921387, "log_odds_ratio": -0.00283455359749496, "logits/chosen": -0.5208930969238281, "logits/rejected": -0.571614146232605, "logps/chosen": -0.00851532258093357, "logps/rejected": -1.6688697338104248, "loss": 2.6946, "nll_loss": 0.6733658909797668, "rewards/accuracies": 1.0, "rewards/chosen": -0.0008515321533195674, "rewards/margins": 0.16603544354438782, "rewards/rejected": -0.16688698530197144, "step": 3207 }, { "epoch": 2.2185338865836792, "grad_norm": 7.5925798416137695, "learning_rate": 4.323036729675734e-05, "log_odds_chosen": 8.466772079467773, "log_odds_ratio": -0.0017478003865107894, "logits/chosen": -0.5638378262519836, "logits/rejected": -0.6701875925064087, "logps/chosen": -0.002004731446504593, "logps/rejected": -1.594688892364502, "loss": 2.7888, "nll_loss": 0.697022020816803, "rewards/accuracies": 1.0, "rewards/chosen": -0.00020047312136739492, "rewards/margins": 0.15926840901374817, "rewards/rejected": -0.1594688892364502, "step": 3208 }, { "epoch": 2.219225449515906, "grad_norm": 6.606655597686768, "learning_rate": 4.322652528046719e-05, "log_odds_chosen": 8.772965431213379, "log_odds_ratio": -0.0006073166732676327, "logits/chosen": -0.5933998823165894, "logits/rejected": -0.616485595703125, "logps/chosen": -0.0005924435099586844, "logps/rejected": -1.153617262840271, "loss": 1.9326, "nll_loss": 0.4830939769744873, "rewards/accuracies": 1.0, "rewards/chosen": -5.9244346630293876e-05, "rewards/margins": 0.11530248820781708, "rewards/rejected": -0.11536173522472382, "step": 3209 }, { "epoch": 2.219917012448133, "grad_norm": 7.080139636993408, "learning_rate": 4.322268326417704e-05, "log_odds_chosen": 7.402169227600098, "log_odds_ratio": -0.04810430854558945, "logits/chosen": -0.6404480338096619, "logits/rejected": -0.699675440788269, "logps/chosen": -0.021948248147964478, "logps/rejected": -1.3484892845153809, "loss": 2.6392, "nll_loss": 0.6549916863441467, "rewards/accuracies": 1.0, "rewards/chosen": -0.0021948248613625765, "rewards/margins": 0.1326541006565094, "rewards/rejected": -0.1348489373922348, "step": 3210 }, { "epoch": 2.2206085753803597, "grad_norm": 8.870227813720703, "learning_rate": 4.3218841247886895e-05, "log_odds_chosen": 6.025036334991455, "log_odds_ratio": -0.06660658121109009, "logits/chosen": -0.6402832865715027, "logits/rejected": -0.6744290590286255, "logps/chosen": -0.03994206339120865, "logps/rejected": -1.503450870513916, "loss": 2.9903, "nll_loss": 0.7409057021141052, "rewards/accuracies": 1.0, "rewards/chosen": -0.00399420689791441, "rewards/margins": 0.1463508903980255, "rewards/rejected": -0.1503450721502304, "step": 3211 }, { "epoch": 2.2213001383125865, "grad_norm": 8.091143608093262, "learning_rate": 4.321499923159674e-05, "log_odds_chosen": 6.035751819610596, "log_odds_ratio": -0.014101025648415089, "logits/chosen": -0.5790939927101135, "logits/rejected": -0.6447158455848694, "logps/chosen": -0.15419796109199524, "logps/rejected": -1.8588969707489014, "loss": 2.8139, "nll_loss": 0.7020571231842041, "rewards/accuracies": 1.0, "rewards/chosen": -0.015419796109199524, "rewards/margins": 0.17046990990638733, "rewards/rejected": -0.18588969111442566, "step": 3212 }, { "epoch": 2.2219917012448134, "grad_norm": 9.204658508300781, "learning_rate": 4.32111572153066e-05, "log_odds_chosen": 8.189249038696289, "log_odds_ratio": -0.0007342756725847721, "logits/chosen": -0.5341463685035706, "logits/rejected": -0.541786789894104, "logps/chosen": -0.0009174979059025645, "logps/rejected": -1.1954847574234009, "loss": 2.9619, "nll_loss": 0.7404070496559143, "rewards/accuracies": 1.0, "rewards/chosen": -9.174978913506493e-05, "rewards/margins": 0.11945672333240509, "rewards/rejected": -0.11954847723245621, "step": 3213 }, { "epoch": 2.22268326417704, "grad_norm": 6.102440357208252, "learning_rate": 4.3207315199016445e-05, "log_odds_chosen": 8.242511749267578, "log_odds_ratio": -0.0031344012822955847, "logits/chosen": -0.5629912614822388, "logits/rejected": -0.6202033758163452, "logps/chosen": -0.010664651170372963, "logps/rejected": -1.4259583950042725, "loss": 2.9102, "nll_loss": 0.727225661277771, "rewards/accuracies": 1.0, "rewards/chosen": -0.001066465163603425, "rewards/margins": 0.141529381275177, "rewards/rejected": -0.14259584248065948, "step": 3214 }, { "epoch": 2.223374827109267, "grad_norm": 10.480937957763672, "learning_rate": 4.32034731827263e-05, "log_odds_chosen": 7.406135559082031, "log_odds_ratio": -0.09118548780679703, "logits/chosen": -0.3470768332481384, "logits/rejected": -0.44605544209480286, "logps/chosen": -0.06514297425746918, "logps/rejected": -1.6992648839950562, "loss": 2.9795, "nll_loss": 0.7357611060142517, "rewards/accuracies": 1.0, "rewards/chosen": -0.00651429733261466, "rewards/margins": 0.1634121984243393, "rewards/rejected": -0.1699264943599701, "step": 3215 }, { "epoch": 2.224066390041494, "grad_norm": 10.333035469055176, "learning_rate": 4.319963116643615e-05, "log_odds_chosen": 7.455963134765625, "log_odds_ratio": -0.016867658123373985, "logits/chosen": -0.5451837182044983, "logits/rejected": -0.5722004771232605, "logps/chosen": -0.007615362759679556, "logps/rejected": -1.1253387928009033, "loss": 2.4731, "nll_loss": 0.6165924668312073, "rewards/accuracies": 1.0, "rewards/chosen": -0.0007615363574586809, "rewards/margins": 0.11177234351634979, "rewards/rejected": -0.11253388226032257, "step": 3216 }, { "epoch": 2.2247579529737207, "grad_norm": 7.6712517738342285, "learning_rate": 4.3195789150145996e-05, "log_odds_chosen": 7.022882461547852, "log_odds_ratio": -0.06982903182506561, "logits/chosen": -0.428468257188797, "logits/rejected": -0.46561765670776367, "logps/chosen": -0.046739161014556885, "logps/rejected": -1.5080595016479492, "loss": 2.43, "nll_loss": 0.6005263328552246, "rewards/accuracies": 1.0, "rewards/chosen": -0.0046739159151911736, "rewards/margins": 0.14613203704357147, "rewards/rejected": -0.15080595016479492, "step": 3217 }, { "epoch": 2.2254495159059475, "grad_norm": 6.225061416625977, "learning_rate": 4.319194713385585e-05, "log_odds_chosen": 6.340688705444336, "log_odds_ratio": -0.019213810563087463, "logits/chosen": -0.4877764880657196, "logits/rejected": -0.5567202568054199, "logps/chosen": -0.06046876683831215, "logps/rejected": -1.2595614194869995, "loss": 2.7736, "nll_loss": 0.6914803385734558, "rewards/accuracies": 1.0, "rewards/chosen": -0.006046876776963472, "rewards/margins": 0.11990926414728165, "rewards/rejected": -0.12595614790916443, "step": 3218 }, { "epoch": 2.2261410788381744, "grad_norm": 11.778791427612305, "learning_rate": 4.31881051175657e-05, "log_odds_chosen": 8.48779296875, "log_odds_ratio": -0.0030446574091911316, "logits/chosen": -0.41129714250564575, "logits/rejected": -0.42171400785446167, "logps/chosen": -0.009282330051064491, "logps/rejected": -1.4498590230941772, "loss": 2.7957, "nll_loss": 0.6986181735992432, "rewards/accuracies": 1.0, "rewards/chosen": -0.0009282330865971744, "rewards/margins": 0.14405766129493713, "rewards/rejected": -0.1449858844280243, "step": 3219 }, { "epoch": 2.226832641770401, "grad_norm": 8.739641189575195, "learning_rate": 4.318426310127555e-05, "log_odds_chosen": 6.724615097045898, "log_odds_ratio": -0.20382250845432281, "logits/chosen": -0.6981180906295776, "logits/rejected": -0.7419140338897705, "logps/chosen": -0.025747288018465042, "logps/rejected": -1.162453532218933, "loss": 2.2124, "nll_loss": 0.5327064394950867, "rewards/accuracies": 0.875, "rewards/chosen": -0.002574728801846504, "rewards/margins": 0.11367063224315643, "rewards/rejected": -0.11624535918235779, "step": 3220 }, { "epoch": 2.227524204702628, "grad_norm": 10.79511833190918, "learning_rate": 4.31804210849854e-05, "log_odds_chosen": 6.931116580963135, "log_odds_ratio": -0.022382281720638275, "logits/chosen": -0.3587380051612854, "logits/rejected": -0.408026784658432, "logps/chosen": -0.03245670348405838, "logps/rejected": -1.308230996131897, "loss": 2.5902, "nll_loss": 0.6453030109405518, "rewards/accuracies": 1.0, "rewards/chosen": -0.0032456705812364817, "rewards/margins": 0.12757742404937744, "rewards/rejected": -0.13082310557365417, "step": 3221 }, { "epoch": 2.228215767634855, "grad_norm": 11.780632019042969, "learning_rate": 4.317657906869526e-05, "log_odds_chosen": 9.010337829589844, "log_odds_ratio": -0.0002952085924334824, "logits/chosen": -0.4327678382396698, "logits/rejected": -0.5492873191833496, "logps/chosen": -0.0007466014940291643, "logps/rejected": -1.6293330192565918, "loss": 3.5053, "nll_loss": 0.8762847185134888, "rewards/accuracies": 1.0, "rewards/chosen": -7.466015813406557e-05, "rewards/margins": 0.16285865008831024, "rewards/rejected": -0.16293330490589142, "step": 3222 }, { "epoch": 2.2289073305670817, "grad_norm": 7.237020492553711, "learning_rate": 4.3172737052405104e-05, "log_odds_chosen": 7.4671525955200195, "log_odds_ratio": -0.01417066901922226, "logits/chosen": -0.6515873670578003, "logits/rejected": -0.7530137896537781, "logps/chosen": -0.1246192455291748, "logps/rejected": -1.8631937503814697, "loss": 2.5728, "nll_loss": 0.6417914628982544, "rewards/accuracies": 1.0, "rewards/chosen": -0.012461923994123936, "rewards/margins": 0.17385748028755188, "rewards/rejected": -0.18631939589977264, "step": 3223 }, { "epoch": 2.2295988934993085, "grad_norm": 7.527968883514404, "learning_rate": 4.3168895036114956e-05, "log_odds_chosen": 6.418063640594482, "log_odds_ratio": -0.1917363405227661, "logits/chosen": -0.35648202896118164, "logits/rejected": -0.47666841745376587, "logps/chosen": -0.09514914453029633, "logps/rejected": -1.2580252885818481, "loss": 2.0882, "nll_loss": 0.5028823018074036, "rewards/accuracies": 0.875, "rewards/chosen": -0.009514914825558662, "rewards/margins": 0.11628760397434235, "rewards/rejected": -0.12580251693725586, "step": 3224 }, { "epoch": 2.2302904564315353, "grad_norm": 8.227404594421387, "learning_rate": 4.316505301982481e-05, "log_odds_chosen": 8.37899398803711, "log_odds_ratio": -0.0015768279554322362, "logits/chosen": -0.7162021398544312, "logits/rejected": -0.7547247409820557, "logps/chosen": -0.0065619018860161304, "logps/rejected": -1.9897502660751343, "loss": 2.6541, "nll_loss": 0.6633625030517578, "rewards/accuracies": 1.0, "rewards/chosen": -0.0006561902118846774, "rewards/margins": 0.19831883907318115, "rewards/rejected": -0.19897502660751343, "step": 3225 }, { "epoch": 2.230982019363762, "grad_norm": 8.16875171661377, "learning_rate": 4.3161211003534654e-05, "log_odds_chosen": 6.907095909118652, "log_odds_ratio": -0.03357435390353203, "logits/chosen": -0.6958314776420593, "logits/rejected": -0.7136378288269043, "logps/chosen": -0.016598278656601906, "logps/rejected": -1.4084336757659912, "loss": 2.3213, "nll_loss": 0.5769670605659485, "rewards/accuracies": 1.0, "rewards/chosen": -0.0016598280053585768, "rewards/margins": 0.13918353617191315, "rewards/rejected": -0.14084336161613464, "step": 3226 }, { "epoch": 2.231673582295989, "grad_norm": 12.13620662689209, "learning_rate": 4.315736898724451e-05, "log_odds_chosen": 7.031831741333008, "log_odds_ratio": -0.21214856207370758, "logits/chosen": -0.37228450179100037, "logits/rejected": -0.4435897469520569, "logps/chosen": -0.06253484636545181, "logps/rejected": -1.210754632949829, "loss": 2.4154, "nll_loss": 0.5826359391212463, "rewards/accuracies": 0.875, "rewards/chosen": -0.006253485102206469, "rewards/margins": 0.11482198536396027, "rewards/rejected": -0.12107546627521515, "step": 3227 }, { "epoch": 2.232365145228216, "grad_norm": 8.838326454162598, "learning_rate": 4.315352697095436e-05, "log_odds_chosen": 6.4838547706604, "log_odds_ratio": -0.07985038310289383, "logits/chosen": -0.7065879106521606, "logits/rejected": -0.7691741585731506, "logps/chosen": -0.01937960647046566, "logps/rejected": -0.8377599716186523, "loss": 2.8604, "nll_loss": 0.7071273326873779, "rewards/accuracies": 1.0, "rewards/chosen": -0.0019379606237635016, "rewards/margins": 0.08183804154396057, "rewards/rejected": -0.08377599716186523, "step": 3228 }, { "epoch": 2.2330567081604427, "grad_norm": 10.37695026397705, "learning_rate": 4.314968495466421e-05, "log_odds_chosen": 7.060420513153076, "log_odds_ratio": -0.003771477611735463, "logits/chosen": -0.5641513466835022, "logits/rejected": -0.7125513553619385, "logps/chosen": -0.10711301118135452, "logps/rejected": -1.5135568380355835, "loss": 2.4957, "nll_loss": 0.6235363483428955, "rewards/accuracies": 1.0, "rewards/chosen": -0.010711301118135452, "rewards/margins": 0.140644371509552, "rewards/rejected": -0.15135568380355835, "step": 3229 }, { "epoch": 2.2337482710926695, "grad_norm": 6.700646877288818, "learning_rate": 4.314584293837406e-05, "log_odds_chosen": 5.052937030792236, "log_odds_ratio": -0.2864532768726349, "logits/chosen": -0.41295093297958374, "logits/rejected": -0.538131594657898, "logps/chosen": -0.05101025477051735, "logps/rejected": -1.1042132377624512, "loss": 2.5972, "nll_loss": 0.6206562519073486, "rewards/accuracies": 0.875, "rewards/chosen": -0.005101025104522705, "rewards/margins": 0.10532030463218689, "rewards/rejected": -0.1104213297367096, "step": 3230 }, { "epoch": 2.2344398340248963, "grad_norm": 12.453567504882812, "learning_rate": 4.3142000922083917e-05, "log_odds_chosen": 6.392004013061523, "log_odds_ratio": -0.27353009581565857, "logits/chosen": -0.4739932417869568, "logits/rejected": -0.5234993696212769, "logps/chosen": -0.03688199445605278, "logps/rejected": -1.5051337480545044, "loss": 2.8271, "nll_loss": 0.6794204711914062, "rewards/accuracies": 0.875, "rewards/chosen": -0.0036881992127746344, "rewards/margins": 0.1468251794576645, "rewards/rejected": -0.15051338076591492, "step": 3231 }, { "epoch": 2.235131396957123, "grad_norm": 11.653568267822266, "learning_rate": 4.313815890579376e-05, "log_odds_chosen": 7.194883346557617, "log_odds_ratio": -0.005872397683560848, "logits/chosen": -0.10072185844182968, "logits/rejected": -0.15041132271289825, "logps/chosen": -0.017534758895635605, "logps/rejected": -1.875885248184204, "loss": 3.2902, "nll_loss": 0.8219622373580933, "rewards/accuracies": 1.0, "rewards/chosen": -0.0017534758662804961, "rewards/margins": 0.18583504855632782, "rewards/rejected": -0.18758851289749146, "step": 3232 }, { "epoch": 2.23582295988935, "grad_norm": 4.405982494354248, "learning_rate": 4.3134316889503615e-05, "log_odds_chosen": 9.255558013916016, "log_odds_ratio": -0.00452328659594059, "logits/chosen": -0.003129318356513977, "logits/rejected": 0.01258845254778862, "logps/chosen": -0.010522023774683475, "logps/rejected": -1.2999018430709839, "loss": 2.0627, "nll_loss": 0.5152261257171631, "rewards/accuracies": 1.0, "rewards/chosen": -0.0010522024240344763, "rewards/margins": 0.1289379894733429, "rewards/rejected": -0.12999019026756287, "step": 3233 }, { "epoch": 2.236514522821577, "grad_norm": 5.9882941246032715, "learning_rate": 4.313047487321347e-05, "log_odds_chosen": 7.687836647033691, "log_odds_ratio": -0.0037897920701652765, "logits/chosen": -0.625095009803772, "logits/rejected": -0.5738711953163147, "logps/chosen": -0.005181067157536745, "logps/rejected": -1.1728625297546387, "loss": 2.7346, "nll_loss": 0.6832716464996338, "rewards/accuracies": 1.0, "rewards/chosen": -0.0005181067390367389, "rewards/margins": 0.11676815152168274, "rewards/rejected": -0.11728626489639282, "step": 3234 }, { "epoch": 2.2372060857538036, "grad_norm": 9.690523147583008, "learning_rate": 4.312663285692331e-05, "log_odds_chosen": 6.730501174926758, "log_odds_ratio": -0.04687266796827316, "logits/chosen": -0.4622930884361267, "logits/rejected": -0.4392380714416504, "logps/chosen": -0.003654046915471554, "logps/rejected": -0.8899112343788147, "loss": 3.1119, "nll_loss": 0.7732935547828674, "rewards/accuracies": 1.0, "rewards/chosen": -0.00036540470318868756, "rewards/margins": 0.0886257141828537, "rewards/rejected": -0.08899112045764923, "step": 3235 }, { "epoch": 2.2378976486860305, "grad_norm": 12.868103981018066, "learning_rate": 4.3122790840633165e-05, "log_odds_chosen": 7.857724189758301, "log_odds_ratio": -0.08621303737163544, "logits/chosen": -0.08840826898813248, "logits/rejected": -0.20376858115196228, "logps/chosen": -0.017540447413921356, "logps/rejected": -1.723393201828003, "loss": 3.3885, "nll_loss": 0.8385149240493774, "rewards/accuracies": 1.0, "rewards/chosen": -0.0017540447879582644, "rewards/margins": 0.1705852895975113, "rewards/rejected": -0.1723393201828003, "step": 3236 }, { "epoch": 2.2385892116182573, "grad_norm": 8.59496784210205, "learning_rate": 4.311894882434302e-05, "log_odds_chosen": 7.627242088317871, "log_odds_ratio": -0.003344690427184105, "logits/chosen": -0.5222266912460327, "logits/rejected": -0.507487952709198, "logps/chosen": -0.0037036265712231398, "logps/rejected": -1.4348680973052979, "loss": 1.8954, "nll_loss": 0.47351786494255066, "rewards/accuracies": 1.0, "rewards/chosen": -0.000370362657122314, "rewards/margins": 0.14311645925045013, "rewards/rejected": -0.14348681271076202, "step": 3237 }, { "epoch": 2.239280774550484, "grad_norm": 6.6844072341918945, "learning_rate": 4.311510680805287e-05, "log_odds_chosen": 6.948589324951172, "log_odds_ratio": -0.007444444112479687, "logits/chosen": -0.5402169227600098, "logits/rejected": -0.5486041307449341, "logps/chosen": -0.01570907235145569, "logps/rejected": -1.117315411567688, "loss": 2.0259, "nll_loss": 0.5057216882705688, "rewards/accuracies": 1.0, "rewards/chosen": -0.0015709070721641183, "rewards/margins": 0.11016062647104263, "rewards/rejected": -0.11173154413700104, "step": 3238 }, { "epoch": 2.239972337482711, "grad_norm": 10.699847221374512, "learning_rate": 4.3111264791762716e-05, "log_odds_chosen": 7.9485344886779785, "log_odds_ratio": -0.001679889508523047, "logits/chosen": -0.6571987867355347, "logits/rejected": -0.7422472238540649, "logps/chosen": -0.004468269646167755, "logps/rejected": -1.2286795377731323, "loss": 2.6932, "nll_loss": 0.6731306910514832, "rewards/accuracies": 1.0, "rewards/chosen": -0.00044682700536213815, "rewards/margins": 0.12242113053798676, "rewards/rejected": -0.12286796420812607, "step": 3239 }, { "epoch": 2.240663900414938, "grad_norm": 10.12278938293457, "learning_rate": 4.3107422775472575e-05, "log_odds_chosen": 7.134811878204346, "log_odds_ratio": -0.009079055860638618, "logits/chosen": -0.6396535634994507, "logits/rejected": -0.7066131234169006, "logps/chosen": -0.008538391441106796, "logps/rejected": -1.2243645191192627, "loss": 2.8026, "nll_loss": 0.6997353434562683, "rewards/accuracies": 1.0, "rewards/chosen": -0.0008538390975445509, "rewards/margins": 0.12158262729644775, "rewards/rejected": -0.12243646383285522, "step": 3240 }, { "epoch": 2.2413554633471646, "grad_norm": 8.627345085144043, "learning_rate": 4.310358075918242e-05, "log_odds_chosen": 5.974154472351074, "log_odds_ratio": -0.21858333051204681, "logits/chosen": -0.43080878257751465, "logits/rejected": -0.45782575011253357, "logps/chosen": -0.04417749494314194, "logps/rejected": -1.6393053531646729, "loss": 2.274, "nll_loss": 0.5466482639312744, "rewards/accuracies": 0.875, "rewards/chosen": -0.004417749587446451, "rewards/margins": 0.15951277315616608, "rewards/rejected": -0.16393053531646729, "step": 3241 }, { "epoch": 2.2420470262793915, "grad_norm": 9.293315887451172, "learning_rate": 4.309973874289227e-05, "log_odds_chosen": 6.996009826660156, "log_odds_ratio": -0.05223194509744644, "logits/chosen": -0.5507470369338989, "logits/rejected": -0.6064082980155945, "logps/chosen": -0.024966636672616005, "logps/rejected": -1.1820029020309448, "loss": 3.2471, "nll_loss": 0.8065450191497803, "rewards/accuracies": 1.0, "rewards/chosen": -0.0024966637138277292, "rewards/margins": 0.11570362746715546, "rewards/rejected": -0.11820029467344284, "step": 3242 }, { "epoch": 2.2427385892116183, "grad_norm": 8.495476722717285, "learning_rate": 4.3095896726602126e-05, "log_odds_chosen": 7.876456260681152, "log_odds_ratio": -0.0026620151475071907, "logits/chosen": -0.5941371917724609, "logits/rejected": -0.7753130793571472, "logps/chosen": -0.013255937024950981, "logps/rejected": -1.70023512840271, "loss": 2.64, "nll_loss": 0.6597373485565186, "rewards/accuracies": 1.0, "rewards/chosen": -0.0013255936792120337, "rewards/margins": 0.16869792342185974, "rewards/rejected": -0.17002353072166443, "step": 3243 }, { "epoch": 2.243430152143845, "grad_norm": 3.8010642528533936, "learning_rate": 4.309205471031197e-05, "log_odds_chosen": 5.32291841506958, "log_odds_ratio": -0.3219887912273407, "logits/chosen": -0.490628182888031, "logits/rejected": -0.5665719509124756, "logps/chosen": -0.09193715453147888, "logps/rejected": -0.8743967413902283, "loss": 2.4086, "nll_loss": 0.5699490308761597, "rewards/accuracies": 0.875, "rewards/chosen": -0.009193716570734978, "rewards/margins": 0.07824596017599106, "rewards/rejected": -0.08743968605995178, "step": 3244 }, { "epoch": 2.244121715076072, "grad_norm": 6.200137138366699, "learning_rate": 4.3088212694021824e-05, "log_odds_chosen": 7.523862361907959, "log_odds_ratio": -0.02580084837973118, "logits/chosen": -0.7987110614776611, "logits/rejected": -0.8353661894798279, "logps/chosen": -0.03375190123915672, "logps/rejected": -1.976253628730774, "loss": 2.1664, "nll_loss": 0.539008617401123, "rewards/accuracies": 1.0, "rewards/chosen": -0.0033751900773495436, "rewards/margins": 0.1942501664161682, "rewards/rejected": -0.19762536883354187, "step": 3245 }, { "epoch": 2.2448132780082988, "grad_norm": 8.950324058532715, "learning_rate": 4.3084370677731676e-05, "log_odds_chosen": 8.77133560180664, "log_odds_ratio": -0.0008398180943913758, "logits/chosen": -0.801625669002533, "logits/rejected": -0.8420397043228149, "logps/chosen": -0.0011170408688485622, "logps/rejected": -1.5125846862792969, "loss": 2.4786, "nll_loss": 0.6195661425590515, "rewards/accuracies": 1.0, "rewards/chosen": -0.00011170408106409013, "rewards/margins": 0.1511467546224594, "rewards/rejected": -0.1512584686279297, "step": 3246 }, { "epoch": 2.2455048409405256, "grad_norm": 11.53207015991211, "learning_rate": 4.308052866144153e-05, "log_odds_chosen": 7.653658866882324, "log_odds_ratio": -0.05250634253025055, "logits/chosen": -0.7041653394699097, "logits/rejected": -0.8292118906974792, "logps/chosen": -0.028626399114727974, "logps/rejected": -1.410700798034668, "loss": 2.655, "nll_loss": 0.6584897637367249, "rewards/accuracies": 1.0, "rewards/chosen": -0.0028626397252082825, "rewards/margins": 0.13820745050907135, "rewards/rejected": -0.14107009768486023, "step": 3247 }, { "epoch": 2.2461964038727524, "grad_norm": 7.0916829109191895, "learning_rate": 4.3076686645151374e-05, "log_odds_chosen": 7.575862407684326, "log_odds_ratio": -0.0012360899709165096, "logits/chosen": -0.537795901298523, "logits/rejected": -0.5750592947006226, "logps/chosen": -0.009833377785980701, "logps/rejected": -1.325676441192627, "loss": 2.8621, "nll_loss": 0.7153981924057007, "rewards/accuracies": 1.0, "rewards/chosen": -0.0009833378717303276, "rewards/margins": 0.1315843164920807, "rewards/rejected": -0.1325676441192627, "step": 3248 }, { "epoch": 2.2468879668049793, "grad_norm": 7.13214111328125, "learning_rate": 4.3072844628861233e-05, "log_odds_chosen": 7.926778793334961, "log_odds_ratio": -0.02117532305419445, "logits/chosen": -0.764047384262085, "logits/rejected": -0.8071606755256653, "logps/chosen": -0.016845818608999252, "logps/rejected": -1.3062502145767212, "loss": 2.505, "nll_loss": 0.6241413950920105, "rewards/accuracies": 1.0, "rewards/chosen": -0.0016845817444846034, "rewards/margins": 0.1289404332637787, "rewards/rejected": -0.13062502443790436, "step": 3249 }, { "epoch": 2.247579529737206, "grad_norm": 10.385427474975586, "learning_rate": 4.306900261257108e-05, "log_odds_chosen": 7.289114952087402, "log_odds_ratio": -0.011439472436904907, "logits/chosen": -0.7682449817657471, "logits/rejected": -0.7987627983093262, "logps/chosen": -0.04962502047419548, "logps/rejected": -1.5088868141174316, "loss": 2.6635, "nll_loss": 0.6647320985794067, "rewards/accuracies": 1.0, "rewards/chosen": -0.004962501581758261, "rewards/margins": 0.14592617750167847, "rewards/rejected": -0.15088868141174316, "step": 3250 }, { "epoch": 2.248271092669433, "grad_norm": 9.709715843200684, "learning_rate": 4.306516059628093e-05, "log_odds_chosen": 5.992304801940918, "log_odds_ratio": -0.07425712794065475, "logits/chosen": -0.8729088306427002, "logits/rejected": -0.9067466259002686, "logps/chosen": -0.023998547345399857, "logps/rejected": -1.020616054534912, "loss": 2.7299, "nll_loss": 0.6750485897064209, "rewards/accuracies": 1.0, "rewards/chosen": -0.002399854827672243, "rewards/margins": 0.09966175258159637, "rewards/rejected": -0.10206159949302673, "step": 3251 }, { "epoch": 2.2489626556016598, "grad_norm": 8.44819450378418, "learning_rate": 4.3061318579990784e-05, "log_odds_chosen": 9.362162590026855, "log_odds_ratio": -0.000212931539863348, "logits/chosen": -0.7932662963867188, "logits/rejected": -0.8243728876113892, "logps/chosen": -0.0005738566978834569, "logps/rejected": -1.580110788345337, "loss": 2.2292, "nll_loss": 0.5572723150253296, "rewards/accuracies": 1.0, "rewards/chosen": -5.7385670515941456e-05, "rewards/margins": 0.15795369446277618, "rewards/rejected": -0.1580110788345337, "step": 3252 }, { "epoch": 2.2496542185338866, "grad_norm": 6.70921516418457, "learning_rate": 4.305747656370063e-05, "log_odds_chosen": 8.296453475952148, "log_odds_ratio": -0.0009130655089393258, "logits/chosen": -0.5469620823860168, "logits/rejected": -0.7344156503677368, "logps/chosen": -0.0072532035410404205, "logps/rejected": -1.434815526008606, "loss": 1.8735, "nll_loss": 0.4682943820953369, "rewards/accuracies": 1.0, "rewards/chosen": -0.0007253203075379133, "rewards/margins": 0.14275622367858887, "rewards/rejected": -0.1434815526008606, "step": 3253 }, { "epoch": 2.2503457814661134, "grad_norm": 10.811256408691406, "learning_rate": 4.305363454741048e-05, "log_odds_chosen": 7.021681785583496, "log_odds_ratio": -0.006112986709922552, "logits/chosen": -0.5054683685302734, "logits/rejected": -0.5628206133842468, "logps/chosen": -0.013626248575747013, "logps/rejected": -0.9956163167953491, "loss": 2.7577, "nll_loss": 0.688805103302002, "rewards/accuracies": 1.0, "rewards/chosen": -0.0013626248110085726, "rewards/margins": 0.09819900244474411, "rewards/rejected": -0.09956163913011551, "step": 3254 }, { "epoch": 2.2510373443983402, "grad_norm": 10.7681884765625, "learning_rate": 4.3049792531120335e-05, "log_odds_chosen": 6.661191940307617, "log_odds_ratio": -0.055751726031303406, "logits/chosen": -0.8494280576705933, "logits/rejected": -0.9281111359596252, "logps/chosen": -0.025588368996977806, "logps/rejected": -1.560171127319336, "loss": 3.1838, "nll_loss": 0.7903729677200317, "rewards/accuracies": 1.0, "rewards/chosen": -0.002558837179094553, "rewards/margins": 0.15345828235149384, "rewards/rejected": -0.15601710975170135, "step": 3255 }, { "epoch": 2.251728907330567, "grad_norm": 11.42106819152832, "learning_rate": 4.304595051483019e-05, "log_odds_chosen": 6.612816333770752, "log_odds_ratio": -0.2027987539768219, "logits/chosen": -0.3843982517719269, "logits/rejected": -0.5736691951751709, "logps/chosen": -0.13335944712162018, "logps/rejected": -1.2244727611541748, "loss": 2.5032, "nll_loss": 0.6055225133895874, "rewards/accuracies": 0.875, "rewards/chosen": -0.013335946016013622, "rewards/margins": 0.10911132395267487, "rewards/rejected": -0.12244727462530136, "step": 3256 }, { "epoch": 2.252420470262794, "grad_norm": 8.008171081542969, "learning_rate": 4.304210849854003e-05, "log_odds_chosen": 6.746915817260742, "log_odds_ratio": -0.010379820130765438, "logits/chosen": -0.6378681063652039, "logits/rejected": -0.6194570064544678, "logps/chosen": -0.037365447729825974, "logps/rejected": -1.6308256387710571, "loss": 2.3197, "nll_loss": 0.5788797736167908, "rewards/accuracies": 1.0, "rewards/chosen": -0.0037365450989454985, "rewards/margins": 0.15934602916240692, "rewards/rejected": -0.1630825698375702, "step": 3257 }, { "epoch": 2.2531120331950207, "grad_norm": 7.668332099914551, "learning_rate": 4.303826648224989e-05, "log_odds_chosen": 7.569102764129639, "log_odds_ratio": -0.025676576420664787, "logits/chosen": -0.661882758140564, "logits/rejected": -0.6706777811050415, "logps/chosen": -0.030474940314888954, "logps/rejected": -1.866816759109497, "loss": 1.7545, "nll_loss": 0.436055988073349, "rewards/accuracies": 1.0, "rewards/chosen": -0.0030474942177534103, "rewards/margins": 0.1836341917514801, "rewards/rejected": -0.18668167293071747, "step": 3258 }, { "epoch": 2.2538035961272476, "grad_norm": 7.231141090393066, "learning_rate": 4.303442446595974e-05, "log_odds_chosen": 9.443159103393555, "log_odds_ratio": -0.0002675468276720494, "logits/chosen": -0.6568310856819153, "logits/rejected": -0.6061131954193115, "logps/chosen": -0.00038531774771399796, "logps/rejected": -1.2876076698303223, "loss": 1.9718, "nll_loss": 0.4929143190383911, "rewards/accuracies": 1.0, "rewards/chosen": -3.853177622659132e-05, "rewards/margins": 0.12872223556041718, "rewards/rejected": -0.12876076996326447, "step": 3259 }, { "epoch": 2.2544951590594744, "grad_norm": 11.41521167755127, "learning_rate": 4.303058244966959e-05, "log_odds_chosen": 7.185343265533447, "log_odds_ratio": -0.009479331783950329, "logits/chosen": -0.7706685662269592, "logits/rejected": -0.9025553464889526, "logps/chosen": -0.024524778127670288, "logps/rejected": -1.3457211256027222, "loss": 2.4802, "nll_loss": 0.6191108226776123, "rewards/accuracies": 1.0, "rewards/chosen": -0.002452477812767029, "rewards/margins": 0.13211962580680847, "rewards/rejected": -0.1345721185207367, "step": 3260 }, { "epoch": 2.2551867219917012, "grad_norm": 5.6372456550598145, "learning_rate": 4.302674043337944e-05, "log_odds_chosen": 6.075026035308838, "log_odds_ratio": -0.04334619268774986, "logits/chosen": -0.6701595783233643, "logits/rejected": -0.5777078866958618, "logps/chosen": -0.10955867916345596, "logps/rejected": -1.360260248184204, "loss": 2.4343, "nll_loss": 0.6042494177818298, "rewards/accuracies": 1.0, "rewards/chosen": -0.010955867357552052, "rewards/margins": 0.12507015466690063, "rewards/rejected": -0.1360260248184204, "step": 3261 }, { "epoch": 2.255878284923928, "grad_norm": 5.7197723388671875, "learning_rate": 4.302289841708929e-05, "log_odds_chosen": 4.844676971435547, "log_odds_ratio": -0.0366857573390007, "logits/chosen": -0.4729902446269989, "logits/rejected": -0.44772809743881226, "logps/chosen": -0.03843897208571434, "logps/rejected": -0.9760329723358154, "loss": 2.5159, "nll_loss": 0.6253054738044739, "rewards/accuracies": 1.0, "rewards/chosen": -0.003843897022306919, "rewards/margins": 0.09375940263271332, "rewards/rejected": -0.09760329872369766, "step": 3262 }, { "epoch": 2.256569847856155, "grad_norm": 5.996796131134033, "learning_rate": 4.301905640079914e-05, "log_odds_chosen": 6.512656211853027, "log_odds_ratio": -0.018681341782212257, "logits/chosen": 0.005796484649181366, "logits/rejected": -0.04244758188724518, "logps/chosen": -0.033937402069568634, "logps/rejected": -1.1949613094329834, "loss": 2.3876, "nll_loss": 0.5950331687927246, "rewards/accuracies": 1.0, "rewards/chosen": -0.0033937403932213783, "rewards/margins": 0.11610238999128342, "rewards/rejected": -0.11949612945318222, "step": 3263 }, { "epoch": 2.2572614107883817, "grad_norm": 9.408733367919922, "learning_rate": 4.301521438450899e-05, "log_odds_chosen": 7.253141403198242, "log_odds_ratio": -0.11470719426870346, "logits/chosen": -0.7124958634376526, "logits/rejected": -0.7846969962120056, "logps/chosen": -0.04086989909410477, "logps/rejected": -1.3902311325073242, "loss": 3.3567, "nll_loss": 0.8277014493942261, "rewards/accuracies": 0.875, "rewards/chosen": -0.004086989909410477, "rewards/margins": 0.1349361389875412, "rewards/rejected": -0.13902312517166138, "step": 3264 }, { "epoch": 2.2579529737206085, "grad_norm": 4.989302635192871, "learning_rate": 4.3011372368218845e-05, "log_odds_chosen": 6.661134243011475, "log_odds_ratio": -0.055529430508613586, "logits/chosen": -0.5685741901397705, "logits/rejected": -0.6001918315887451, "logps/chosen": -0.04951227083802223, "logps/rejected": -1.53061842918396, "loss": 2.2204, "nll_loss": 0.5495560765266418, "rewards/accuracies": 1.0, "rewards/chosen": -0.004951227456331253, "rewards/margins": 0.14811062812805176, "rewards/rejected": -0.15306183695793152, "step": 3265 }, { "epoch": 2.2586445366528354, "grad_norm": 5.90012264251709, "learning_rate": 4.300753035192869e-05, "log_odds_chosen": 7.330995082855225, "log_odds_ratio": -0.004380673170089722, "logits/chosen": -0.5569080710411072, "logits/rejected": -0.5699841976165771, "logps/chosen": -0.0508880577981472, "logps/rejected": -2.003079891204834, "loss": 3.2612, "nll_loss": 0.8148605823516846, "rewards/accuracies": 1.0, "rewards/chosen": -0.00508880615234375, "rewards/margins": 0.19521918892860413, "rewards/rejected": -0.20030798017978668, "step": 3266 }, { "epoch": 2.259336099585062, "grad_norm": 11.648260116577148, "learning_rate": 4.300368833563855e-05, "log_odds_chosen": 7.323399543762207, "log_odds_ratio": -0.030352065339684486, "logits/chosen": -0.6574798226356506, "logits/rejected": -0.7213975787162781, "logps/chosen": -0.05200228840112686, "logps/rejected": -1.6685948371887207, "loss": 3.7964, "nll_loss": 0.9460740089416504, "rewards/accuracies": 1.0, "rewards/chosen": -0.005200228653848171, "rewards/margins": 0.16165924072265625, "rewards/rejected": -0.1668594777584076, "step": 3267 }, { "epoch": 2.260027662517289, "grad_norm": 12.301355361938477, "learning_rate": 4.2999846319348396e-05, "log_odds_chosen": 9.659789085388184, "log_odds_ratio": -0.013645894825458527, "logits/chosen": -0.4796256422996521, "logits/rejected": -0.5424432158470154, "logps/chosen": -0.022504033520817757, "logps/rejected": -2.019725799560547, "loss": 3.1548, "nll_loss": 0.7873326539993286, "rewards/accuracies": 1.0, "rewards/chosen": -0.0022504031658172607, "rewards/margins": 0.19972217082977295, "rewards/rejected": -0.2019725739955902, "step": 3268 }, { "epoch": 2.260719225449516, "grad_norm": 10.616440773010254, "learning_rate": 4.299600430305825e-05, "log_odds_chosen": 9.40095329284668, "log_odds_ratio": -0.0002967410546261817, "logits/chosen": -0.5349110960960388, "logits/rejected": -0.6757592558860779, "logps/chosen": -0.01788119412958622, "logps/rejected": -2.1804044246673584, "loss": 3.1055, "nll_loss": 0.7763397097587585, "rewards/accuracies": 1.0, "rewards/chosen": -0.0017881194362416863, "rewards/margins": 0.21625235676765442, "rewards/rejected": -0.21804045140743256, "step": 3269 }, { "epoch": 2.2614107883817427, "grad_norm": 6.400076389312744, "learning_rate": 4.29921622867681e-05, "log_odds_chosen": 6.685382843017578, "log_odds_ratio": -0.15850648283958435, "logits/chosen": -0.45440176129341125, "logits/rejected": -0.4439389109611511, "logps/chosen": -0.025936201214790344, "logps/rejected": -0.8457040786743164, "loss": 2.7164, "nll_loss": 0.6632498502731323, "rewards/accuracies": 0.875, "rewards/chosen": -0.002593620214611292, "rewards/margins": 0.08197679370641708, "rewards/rejected": -0.08457040786743164, "step": 3270 }, { "epoch": 2.2621023513139695, "grad_norm": 6.308727264404297, "learning_rate": 4.2988320270477947e-05, "log_odds_chosen": 6.813342571258545, "log_odds_ratio": -0.07909463346004486, "logits/chosen": -0.6073681116104126, "logits/rejected": -0.6416606903076172, "logps/chosen": -0.01722707785665989, "logps/rejected": -1.173327922821045, "loss": 1.9741, "nll_loss": 0.4856259226799011, "rewards/accuracies": 1.0, "rewards/chosen": -0.0017227077623829246, "rewards/margins": 0.11561009287834167, "rewards/rejected": -0.1173328086733818, "step": 3271 }, { "epoch": 2.2627939142461964, "grad_norm": 7.209175109863281, "learning_rate": 4.29844782541878e-05, "log_odds_chosen": 9.962041854858398, "log_odds_ratio": -0.0001319254224654287, "logits/chosen": -0.6178686618804932, "logits/rejected": -0.792114794254303, "logps/chosen": -0.0007171865436248481, "logps/rejected": -1.918930172920227, "loss": 1.7354, "nll_loss": 0.43383434414863586, "rewards/accuracies": 1.0, "rewards/chosen": -7.171865581767634e-05, "rewards/margins": 0.19182130694389343, "rewards/rejected": -0.19189301133155823, "step": 3272 }, { "epoch": 2.263485477178423, "grad_norm": 5.474750995635986, "learning_rate": 4.298063623789765e-05, "log_odds_chosen": 7.85467529296875, "log_odds_ratio": -0.0014620490837842226, "logits/chosen": -0.4752342998981476, "logits/rejected": -0.4551814794540405, "logps/chosen": -0.004117067903280258, "logps/rejected": -1.1596763134002686, "loss": 2.1883, "nll_loss": 0.5469228625297546, "rewards/accuracies": 1.0, "rewards/chosen": -0.000411706801969558, "rewards/margins": 0.11555592715740204, "rewards/rejected": -0.11596763134002686, "step": 3273 }, { "epoch": 2.26417704011065, "grad_norm": 7.661037445068359, "learning_rate": 4.2976794221607504e-05, "log_odds_chosen": 7.732230186462402, "log_odds_ratio": -0.018154030665755272, "logits/chosen": -0.6091871857643127, "logits/rejected": -0.7187412977218628, "logps/chosen": -0.008824712596833706, "logps/rejected": -1.1421221494674683, "loss": 2.5441, "nll_loss": 0.6342145204544067, "rewards/accuracies": 1.0, "rewards/chosen": -0.0008824712713249028, "rewards/margins": 0.11332975327968597, "rewards/rejected": -0.11421222984790802, "step": 3274 }, { "epoch": 2.264868603042877, "grad_norm": 8.103368759155273, "learning_rate": 4.297295220531735e-05, "log_odds_chosen": 7.086808204650879, "log_odds_ratio": -0.00975433737039566, "logits/chosen": -0.7722989916801453, "logits/rejected": -0.7403442859649658, "logps/chosen": -0.028213070705533028, "logps/rejected": -1.6213114261627197, "loss": 3.1533, "nll_loss": 0.7873413562774658, "rewards/accuracies": 1.0, "rewards/chosen": -0.0028213071636855602, "rewards/margins": 0.15930984914302826, "rewards/rejected": -0.1621311455965042, "step": 3275 }, { "epoch": 2.2655601659751037, "grad_norm": 11.01082992553711, "learning_rate": 4.296911018902721e-05, "log_odds_chosen": 6.675724983215332, "log_odds_ratio": -0.3206350803375244, "logits/chosen": -0.5880841016769409, "logits/rejected": -0.6000953316688538, "logps/chosen": -0.05225434899330139, "logps/rejected": -1.3412305116653442, "loss": 2.7484, "nll_loss": 0.6550301909446716, "rewards/accuracies": 0.875, "rewards/chosen": -0.005225434899330139, "rewards/margins": 0.12889762222766876, "rewards/rejected": -0.1341230571269989, "step": 3276 }, { "epoch": 2.2662517289073305, "grad_norm": 10.304984092712402, "learning_rate": 4.2965268172737054e-05, "log_odds_chosen": 4.339324951171875, "log_odds_ratio": -0.11746401339769363, "logits/chosen": -0.5929984450340271, "logits/rejected": -0.5827913284301758, "logps/chosen": -0.07616761326789856, "logps/rejected": -0.944993793964386, "loss": 2.7198, "nll_loss": 0.6682088375091553, "rewards/accuracies": 1.0, "rewards/chosen": -0.007616761140525341, "rewards/margins": 0.08688262850046158, "rewards/rejected": -0.0944993868470192, "step": 3277 }, { "epoch": 2.2669432918395573, "grad_norm": 8.508423805236816, "learning_rate": 4.296142615644691e-05, "log_odds_chosen": 7.036334037780762, "log_odds_ratio": -0.026559626683592796, "logits/chosen": -0.6788300275802612, "logits/rejected": -0.6941066384315491, "logps/chosen": -0.08082263916730881, "logps/rejected": -1.114711880683899, "loss": 2.1955, "nll_loss": 0.5462226271629333, "rewards/accuracies": 1.0, "rewards/chosen": -0.008082263171672821, "rewards/margins": 0.10338892042636871, "rewards/rejected": -0.11147119104862213, "step": 3278 }, { "epoch": 2.267634854771784, "grad_norm": 14.72610855102539, "learning_rate": 4.295758414015675e-05, "log_odds_chosen": 8.000221252441406, "log_odds_ratio": -0.11321330070495605, "logits/chosen": -0.512886106967926, "logits/rejected": -0.5639876127243042, "logps/chosen": -0.02386327274143696, "logps/rejected": -1.4123374223709106, "loss": 2.587, "nll_loss": 0.6354241371154785, "rewards/accuracies": 0.875, "rewards/chosen": -0.0023863273672759533, "rewards/margins": 0.1388474404811859, "rewards/rejected": -0.14123375713825226, "step": 3279 }, { "epoch": 2.268326417704011, "grad_norm": 9.623932838439941, "learning_rate": 4.2953742123866605e-05, "log_odds_chosen": 8.210575103759766, "log_odds_ratio": -0.001431704848073423, "logits/chosen": -0.9136925935745239, "logits/rejected": -0.9186261296272278, "logps/chosen": -0.0012273931642994285, "logps/rejected": -1.3958138227462769, "loss": 3.3816, "nll_loss": 0.845266580581665, "rewards/accuracies": 1.0, "rewards/chosen": -0.00012273932225070894, "rewards/margins": 0.13945864140987396, "rewards/rejected": -0.13958138227462769, "step": 3280 }, { "epoch": 2.269017980636238, "grad_norm": 18.497825622558594, "learning_rate": 4.294990010757646e-05, "log_odds_chosen": 5.381030082702637, "log_odds_ratio": -0.511152982711792, "logits/chosen": -0.505366861820221, "logits/rejected": -0.5537108778953552, "logps/chosen": -0.06397618353366852, "logps/rejected": -0.9307079911231995, "loss": 1.8231, "nll_loss": 0.40466052293777466, "rewards/accuracies": 0.875, "rewards/chosen": -0.006397617980837822, "rewards/margins": 0.08667318522930145, "rewards/rejected": -0.09307080507278442, "step": 3281 }, { "epoch": 2.2697095435684647, "grad_norm": 7.17557954788208, "learning_rate": 4.29460580912863e-05, "log_odds_chosen": 8.549341201782227, "log_odds_ratio": -0.0057712700217962265, "logits/chosen": -0.8990902304649353, "logits/rejected": -0.9997949600219727, "logps/chosen": -0.001989273354411125, "logps/rejected": -1.7681491374969482, "loss": 2.0357, "nll_loss": 0.5083510875701904, "rewards/accuracies": 1.0, "rewards/chosen": -0.00019892734417226166, "rewards/margins": 0.17661598324775696, "rewards/rejected": -0.17681491374969482, "step": 3282 }, { "epoch": 2.2704011065006915, "grad_norm": 6.074166774749756, "learning_rate": 4.294221607499616e-05, "log_odds_chosen": 8.438508033752441, "log_odds_ratio": -0.00044998922385275364, "logits/chosen": -0.5825424790382385, "logits/rejected": -0.5944786071777344, "logps/chosen": -0.00998244434595108, "logps/rejected": -1.5468660593032837, "loss": 1.8963, "nll_loss": 0.47402337193489075, "rewards/accuracies": 1.0, "rewards/chosen": -0.0009982445044443011, "rewards/margins": 0.15368834137916565, "rewards/rejected": -0.1546865999698639, "step": 3283 }, { "epoch": 2.2710926694329183, "grad_norm": 13.932870864868164, "learning_rate": 4.293837405870601e-05, "log_odds_chosen": 7.090470790863037, "log_odds_ratio": -0.0042477683164179325, "logits/chosen": -0.8459784388542175, "logits/rejected": -0.8491066694259644, "logps/chosen": -0.018670564517378807, "logps/rejected": -1.428104043006897, "loss": 3.267, "nll_loss": 0.8163129091262817, "rewards/accuracies": 1.0, "rewards/chosen": -0.001867056475020945, "rewards/margins": 0.14094334840774536, "rewards/rejected": -0.1428104043006897, "step": 3284 }, { "epoch": 2.271784232365145, "grad_norm": 7.576010704040527, "learning_rate": 4.293453204241586e-05, "log_odds_chosen": 6.226518630981445, "log_odds_ratio": -0.0069326963275671005, "logits/chosen": -0.7165563106536865, "logits/rejected": -0.7860947847366333, "logps/chosen": -0.021648811176419258, "logps/rejected": -1.109487533569336, "loss": 2.6907, "nll_loss": 0.6719880104064941, "rewards/accuracies": 1.0, "rewards/chosen": -0.002164881443604827, "rewards/margins": 0.10878387093544006, "rewards/rejected": -0.11094875633716583, "step": 3285 }, { "epoch": 2.272475795297372, "grad_norm": 10.582438468933105, "learning_rate": 4.293069002612571e-05, "log_odds_chosen": 7.673453330993652, "log_odds_ratio": -0.00417360058054328, "logits/chosen": -0.9730564951896667, "logits/rejected": -1.0138814449310303, "logps/chosen": -0.004357243422418833, "logps/rejected": -1.4962083101272583, "loss": 2.5051, "nll_loss": 0.625868558883667, "rewards/accuracies": 1.0, "rewards/chosen": -0.00043572435970418155, "rewards/margins": 0.14918510615825653, "rewards/rejected": -0.14962083101272583, "step": 3286 }, { "epoch": 2.273167358229599, "grad_norm": 12.714532852172852, "learning_rate": 4.2926848009835565e-05, "log_odds_chosen": 8.081611633300781, "log_odds_ratio": -0.0064338194206357, "logits/chosen": -0.6164727807044983, "logits/rejected": -0.6589667797088623, "logps/chosen": -0.004069966729730368, "logps/rejected": -1.4070924520492554, "loss": 2.7513, "nll_loss": 0.6871762275695801, "rewards/accuracies": 1.0, "rewards/chosen": -0.0004069966671522707, "rewards/margins": 0.14030225574970245, "rewards/rejected": -0.14070925116539001, "step": 3287 }, { "epoch": 2.2738589211618256, "grad_norm": 8.455578804016113, "learning_rate": 4.292300599354541e-05, "log_odds_chosen": 8.7753267288208, "log_odds_ratio": -0.0006104600615799427, "logits/chosen": -0.547274649143219, "logits/rejected": -0.6455528140068054, "logps/chosen": -0.0025969611015170813, "logps/rejected": -1.232614517211914, "loss": 2.6083, "nll_loss": 0.6520036458969116, "rewards/accuracies": 1.0, "rewards/chosen": -0.00025969609851017594, "rewards/margins": 0.12300176173448563, "rewards/rejected": -0.1232614517211914, "step": 3288 }, { "epoch": 2.2745504840940525, "grad_norm": 16.860441207885742, "learning_rate": 4.2919163977255263e-05, "log_odds_chosen": 7.584886074066162, "log_odds_ratio": -0.05619276314973831, "logits/chosen": -0.2892361283302307, "logits/rejected": -0.36306679248809814, "logps/chosen": -0.010725020430982113, "logps/rejected": -1.1932777166366577, "loss": 3.1154, "nll_loss": 0.7732195854187012, "rewards/accuracies": 1.0, "rewards/chosen": -0.0010725021129474044, "rewards/margins": 0.11825526505708694, "rewards/rejected": -0.11932776868343353, "step": 3289 }, { "epoch": 2.2752420470262793, "grad_norm": 16.079126358032227, "learning_rate": 4.2915321960965116e-05, "log_odds_chosen": 7.958521366119385, "log_odds_ratio": -0.0023251264356076717, "logits/chosen": -0.20205731689929962, "logits/rejected": -0.3348516523838043, "logps/chosen": -0.0021186298690736294, "logps/rejected": -1.3939123153686523, "loss": 3.0566, "nll_loss": 0.7639187574386597, "rewards/accuracies": 1.0, "rewards/chosen": -0.00021186300728004426, "rewards/margins": 0.13917936384677887, "rewards/rejected": -0.139391228556633, "step": 3290 }, { "epoch": 2.275933609958506, "grad_norm": 8.559741973876953, "learning_rate": 4.291147994467496e-05, "log_odds_chosen": 7.290081024169922, "log_odds_ratio": -0.003927894867956638, "logits/chosen": -0.7065256237983704, "logits/rejected": -0.6933072209358215, "logps/chosen": -0.01851980946958065, "logps/rejected": -1.1994181871414185, "loss": 2.6089, "nll_loss": 0.6518220901489258, "rewards/accuracies": 1.0, "rewards/chosen": -0.0018519810400903225, "rewards/margins": 0.11808983981609344, "rewards/rejected": -0.1199418231844902, "step": 3291 }, { "epoch": 2.276625172890733, "grad_norm": 6.953580379486084, "learning_rate": 4.290763792838482e-05, "log_odds_chosen": 5.8773016929626465, "log_odds_ratio": -0.015241350047290325, "logits/chosen": -0.986510157585144, "logits/rejected": -0.9865601062774658, "logps/chosen": -0.03149587661027908, "logps/rejected": -1.2142492532730103, "loss": 3.2376, "nll_loss": 0.8078746795654297, "rewards/accuracies": 1.0, "rewards/chosen": -0.003149587893858552, "rewards/margins": 0.11827534437179565, "rewards/rejected": -0.12142492830753326, "step": 3292 }, { "epoch": 2.27731673582296, "grad_norm": 6.562819480895996, "learning_rate": 4.2903795912094666e-05, "log_odds_chosen": 6.810787200927734, "log_odds_ratio": -0.005457735620439053, "logits/chosen": -0.3480616807937622, "logits/rejected": -0.31973057985305786, "logps/chosen": -0.0210685096681118, "logps/rejected": -1.0529942512512207, "loss": 2.3147, "nll_loss": 0.578117847442627, "rewards/accuracies": 1.0, "rewards/chosen": -0.00210685096681118, "rewards/margins": 0.10319257527589798, "rewards/rejected": -0.10529942810535431, "step": 3293 }, { "epoch": 2.2780082987551866, "grad_norm": 6.798682689666748, "learning_rate": 4.289995389580452e-05, "log_odds_chosen": 8.399381637573242, "log_odds_ratio": -0.008013543672859669, "logits/chosen": -0.9271516799926758, "logits/rejected": -0.969430685043335, "logps/chosen": -0.02214404195547104, "logps/rejected": -1.5581920146942139, "loss": 1.5441, "nll_loss": 0.3852202892303467, "rewards/accuracies": 1.0, "rewards/chosen": -0.002214404521510005, "rewards/margins": 0.15360480546951294, "rewards/rejected": -0.15581920742988586, "step": 3294 }, { "epoch": 2.2786998616874135, "grad_norm": 36.96128845214844, "learning_rate": 4.289611187951437e-05, "log_odds_chosen": 6.635312557220459, "log_odds_ratio": -0.13570347428321838, "logits/chosen": -0.6005826592445374, "logits/rejected": -0.6524233818054199, "logps/chosen": -0.04530520737171173, "logps/rejected": -1.0357275009155273, "loss": 2.2325, "nll_loss": 0.5445461869239807, "rewards/accuracies": 0.875, "rewards/chosen": -0.004530521109700203, "rewards/margins": 0.09904223680496216, "rewards/rejected": -0.10357275605201721, "step": 3295 }, { "epoch": 2.2793914246196403, "grad_norm": 10.540643692016602, "learning_rate": 4.2892269863224224e-05, "log_odds_chosen": 6.827335357666016, "log_odds_ratio": -0.002269915770739317, "logits/chosen": -0.46423041820526123, "logits/rejected": -0.5512615442276001, "logps/chosen": -0.008189619518816471, "logps/rejected": -1.168349266052246, "loss": 2.3288, "nll_loss": 0.5819670557975769, "rewards/accuracies": 1.0, "rewards/chosen": -0.0008189620566554368, "rewards/margins": 0.1160159707069397, "rewards/rejected": -0.11683492362499237, "step": 3296 }, { "epoch": 2.280082987551867, "grad_norm": 15.574295043945312, "learning_rate": 4.288842784693407e-05, "log_odds_chosen": 7.315655708312988, "log_odds_ratio": -0.30651018023490906, "logits/chosen": -0.4777139723300934, "logits/rejected": -0.4968945384025574, "logps/chosen": -0.1229388415813446, "logps/rejected": -1.1089379787445068, "loss": 1.7984, "nll_loss": 0.41893768310546875, "rewards/accuracies": 0.875, "rewards/chosen": -0.01229388453066349, "rewards/margins": 0.09859991073608398, "rewards/rejected": -0.11089379340410233, "step": 3297 }, { "epoch": 2.280774550484094, "grad_norm": 8.10746955871582, "learning_rate": 4.288458583064392e-05, "log_odds_chosen": 8.925813674926758, "log_odds_ratio": -0.0036792377941310406, "logits/chosen": -0.5411398410797119, "logits/rejected": -0.567682147026062, "logps/chosen": -0.03685717657208443, "logps/rejected": -1.8378243446350098, "loss": 1.8082, "nll_loss": 0.4516713619232178, "rewards/accuracies": 1.0, "rewards/chosen": -0.0036857177037745714, "rewards/margins": 0.18009671568870544, "rewards/rejected": -0.1837824285030365, "step": 3298 }, { "epoch": 2.2814661134163208, "grad_norm": 12.693647384643555, "learning_rate": 4.2880743814353774e-05, "log_odds_chosen": 7.668078422546387, "log_odds_ratio": -0.06813301891088486, "logits/chosen": -0.4694977402687073, "logits/rejected": -0.5269255638122559, "logps/chosen": -0.02033541537821293, "logps/rejected": -1.7283085584640503, "loss": 2.4857, "nll_loss": 0.6146007776260376, "rewards/accuracies": 1.0, "rewards/chosen": -0.002033541677519679, "rewards/margins": 0.17079731822013855, "rewards/rejected": -0.17283086478710175, "step": 3299 }, { "epoch": 2.2821576763485476, "grad_norm": 6.430253982543945, "learning_rate": 4.287690179806363e-05, "log_odds_chosen": 7.170631408691406, "log_odds_ratio": -0.012828205712139606, "logits/chosen": -0.4530397653579712, "logits/rejected": -0.49351730942726135, "logps/chosen": -0.026906395331025124, "logps/rejected": -1.5824973583221436, "loss": 2.0449, "nll_loss": 0.5099424719810486, "rewards/accuracies": 1.0, "rewards/chosen": -0.0026906393468379974, "rewards/margins": 0.15555909276008606, "rewards/rejected": -0.15824973583221436, "step": 3300 }, { "epoch": 2.2828492392807744, "grad_norm": 7.007138729095459, "learning_rate": 4.287305978177348e-05, "log_odds_chosen": 9.538039207458496, "log_odds_ratio": -0.00013177035725675523, "logits/chosen": -0.6366586089134216, "logits/rejected": -0.5977941751480103, "logps/chosen": -0.0002648232621140778, "logps/rejected": -1.5456188917160034, "loss": 2.3247, "nll_loss": 0.5811559557914734, "rewards/accuracies": 1.0, "rewards/chosen": -2.6482328394195065e-05, "rewards/margins": 0.1545354127883911, "rewards/rejected": -0.15456190705299377, "step": 3301 }, { "epoch": 2.2835408022130013, "grad_norm": 10.941584587097168, "learning_rate": 4.2869217765483325e-05, "log_odds_chosen": 7.814338207244873, "log_odds_ratio": -0.12133978307247162, "logits/chosen": -0.515864372253418, "logits/rejected": -0.5393328070640564, "logps/chosen": -0.03073921799659729, "logps/rejected": -1.3862650394439697, "loss": 3.2848, "nll_loss": 0.8090637922286987, "rewards/accuracies": 0.875, "rewards/chosen": -0.003073921659961343, "rewards/margins": 0.13555258512496948, "rewards/rejected": -0.13862650096416473, "step": 3302 }, { "epoch": 2.284232365145228, "grad_norm": 11.696934700012207, "learning_rate": 4.286537574919318e-05, "log_odds_chosen": 7.683901309967041, "log_odds_ratio": -0.013870958238840103, "logits/chosen": -0.7030923962593079, "logits/rejected": -0.7184832692146301, "logps/chosen": -0.027227070182561874, "logps/rejected": -1.4573798179626465, "loss": 3.7154, "nll_loss": 0.927453875541687, "rewards/accuracies": 1.0, "rewards/chosen": -0.002722707111388445, "rewards/margins": 0.1430152803659439, "rewards/rejected": -0.14573800563812256, "step": 3303 }, { "epoch": 2.284923928077455, "grad_norm": 7.419434070587158, "learning_rate": 4.286153373290303e-05, "log_odds_chosen": 8.207198143005371, "log_odds_ratio": -0.017640601843595505, "logits/chosen": -0.6689510941505432, "logits/rejected": -0.6708952188491821, "logps/chosen": -0.014345421455800533, "logps/rejected": -1.242602825164795, "loss": 2.4851, "nll_loss": 0.6195045709609985, "rewards/accuracies": 1.0, "rewards/chosen": -0.0014345422387123108, "rewards/margins": 0.1228257417678833, "rewards/rejected": -0.12426027655601501, "step": 3304 }, { "epoch": 2.2856154910096818, "grad_norm": 7.105348587036133, "learning_rate": 4.285769171661288e-05, "log_odds_chosen": 7.531919002532959, "log_odds_ratio": -0.07848531007766724, "logits/chosen": -0.8401054739952087, "logits/rejected": -0.9255817532539368, "logps/chosen": -0.014528581872582436, "logps/rejected": -0.8990166783332825, "loss": 3.1558, "nll_loss": 0.7811073064804077, "rewards/accuracies": 1.0, "rewards/chosen": -0.001452858094125986, "rewards/margins": 0.08844882249832153, "rewards/rejected": -0.08990167081356049, "step": 3305 }, { "epoch": 2.2863070539419086, "grad_norm": 6.265262603759766, "learning_rate": 4.285384970032273e-05, "log_odds_chosen": 8.80881118774414, "log_odds_ratio": -0.00043272125185467303, "logits/chosen": -0.4315575957298279, "logits/rejected": -0.5561200976371765, "logps/chosen": -0.0004981325473636389, "logps/rejected": -1.1576604843139648, "loss": 2.6085, "nll_loss": 0.6520813703536987, "rewards/accuracies": 1.0, "rewards/chosen": -4.9813254008768126e-05, "rewards/margins": 0.11571623384952545, "rewards/rejected": -0.11576604843139648, "step": 3306 }, { "epoch": 2.2869986168741354, "grad_norm": 6.479380130767822, "learning_rate": 4.285000768403258e-05, "log_odds_chosen": 6.527117729187012, "log_odds_ratio": -0.058632515370845795, "logits/chosen": -0.519963800907135, "logits/rejected": -0.5593026280403137, "logps/chosen": -0.042398929595947266, "logps/rejected": -1.2286722660064697, "loss": 1.8583, "nll_loss": 0.458715558052063, "rewards/accuracies": 1.0, "rewards/chosen": -0.004239893052726984, "rewards/margins": 0.11862733215093613, "rewards/rejected": -0.12286723405122757, "step": 3307 }, { "epoch": 2.2876901798063622, "grad_norm": 8.754281044006348, "learning_rate": 4.284616566774243e-05, "log_odds_chosen": 6.8843255043029785, "log_odds_ratio": -0.02043495699763298, "logits/chosen": -0.46738386154174805, "logits/rejected": -0.4485289454460144, "logps/chosen": -0.04600503295660019, "logps/rejected": -1.3759292364120483, "loss": 2.411, "nll_loss": 0.600706934928894, "rewards/accuracies": 1.0, "rewards/chosen": -0.0046005030162632465, "rewards/margins": 0.13299241662025452, "rewards/rejected": -0.13759291172027588, "step": 3308 }, { "epoch": 2.288381742738589, "grad_norm": 7.990466594696045, "learning_rate": 4.2842323651452285e-05, "log_odds_chosen": 6.636929512023926, "log_odds_ratio": -0.009947605431079865, "logits/chosen": -0.5649703741073608, "logits/rejected": -0.6036151051521301, "logps/chosen": -0.019645478576421738, "logps/rejected": -1.340593695640564, "loss": 2.5821, "nll_loss": 0.6445379853248596, "rewards/accuracies": 1.0, "rewards/chosen": -0.001964548137038946, "rewards/margins": 0.13209481537342072, "rewards/rejected": -0.1340593695640564, "step": 3309 }, { "epoch": 2.289073305670816, "grad_norm": 5.59704065322876, "learning_rate": 4.283848163516214e-05, "log_odds_chosen": 7.1342620849609375, "log_odds_ratio": -0.025569546967744827, "logits/chosen": -0.5936905145645142, "logits/rejected": -0.5649482011795044, "logps/chosen": -0.024222206324338913, "logps/rejected": -1.1674721240997314, "loss": 2.0807, "nll_loss": 0.5176059007644653, "rewards/accuracies": 1.0, "rewards/chosen": -0.002422221004962921, "rewards/margins": 0.11432498693466187, "rewards/rejected": -0.11674721539020538, "step": 3310 }, { "epoch": 2.2897648686030427, "grad_norm": 9.29557991027832, "learning_rate": 4.283463961887198e-05, "log_odds_chosen": 7.460236072540283, "log_odds_ratio": -0.009771870449185371, "logits/chosen": -0.8589206337928772, "logits/rejected": -0.8911477327346802, "logps/chosen": -0.021632924675941467, "logps/rejected": -1.742672324180603, "loss": 2.977, "nll_loss": 0.7432758212089539, "rewards/accuracies": 1.0, "rewards/chosen": -0.002163292607292533, "rewards/margins": 0.17210394144058228, "rewards/rejected": -0.1742672175168991, "step": 3311 }, { "epoch": 2.2904564315352696, "grad_norm": 7.1764092445373535, "learning_rate": 4.2830797602581836e-05, "log_odds_chosen": 6.939169883728027, "log_odds_ratio": -0.018409615382552147, "logits/chosen": -0.23254843056201935, "logits/rejected": -0.3019201159477234, "logps/chosen": -0.05554074048995972, "logps/rejected": -1.724541425704956, "loss": 2.4416, "nll_loss": 0.6085711717605591, "rewards/accuracies": 1.0, "rewards/chosen": -0.0055540744215250015, "rewards/margins": 0.16690006852149963, "rewards/rejected": -0.17245414853096008, "step": 3312 }, { "epoch": 2.2911479944674964, "grad_norm": 8.69416618347168, "learning_rate": 4.282695558629169e-05, "log_odds_chosen": 8.617881774902344, "log_odds_ratio": -0.004176803398877382, "logits/chosen": -0.6528822779655457, "logits/rejected": -0.7093425989151001, "logps/chosen": -0.04139639809727669, "logps/rejected": -1.784732699394226, "loss": 2.0386, "nll_loss": 0.5092415809631348, "rewards/accuracies": 1.0, "rewards/chosen": -0.004139639437198639, "rewards/margins": 0.1743336319923401, "rewards/rejected": -0.17847327888011932, "step": 3313 }, { "epoch": 2.2918395573997232, "grad_norm": 9.222074508666992, "learning_rate": 4.282311357000154e-05, "log_odds_chosen": 6.117391109466553, "log_odds_ratio": -0.3925205171108246, "logits/chosen": -0.5966262817382812, "logits/rejected": -0.63462895154953, "logps/chosen": -0.07529162615537643, "logps/rejected": -1.2157065868377686, "loss": 2.2904, "nll_loss": 0.5333395004272461, "rewards/accuracies": 0.875, "rewards/chosen": -0.007529162336140871, "rewards/margins": 0.11404149234294891, "rewards/rejected": -0.1215706616640091, "step": 3314 }, { "epoch": 2.29253112033195, "grad_norm": 5.440239429473877, "learning_rate": 4.2819271553711386e-05, "log_odds_chosen": 5.083359718322754, "log_odds_ratio": -0.09348893165588379, "logits/chosen": -0.5668821930885315, "logits/rejected": -0.6814877986907959, "logps/chosen": -0.06102047860622406, "logps/rejected": -1.2425282001495361, "loss": 2.8531, "nll_loss": 0.7039321660995483, "rewards/accuracies": 0.875, "rewards/chosen": -0.006102047860622406, "rewards/margins": 0.11815077811479568, "rewards/rejected": -0.12425282597541809, "step": 3315 }, { "epoch": 2.293222683264177, "grad_norm": 9.030952453613281, "learning_rate": 4.2815429537421246e-05, "log_odds_chosen": 6.790431499481201, "log_odds_ratio": -0.16002391278743744, "logits/chosen": -0.5022440552711487, "logits/rejected": -0.5329065322875977, "logps/chosen": -0.0351906344294548, "logps/rejected": -1.013128399848938, "loss": 2.4143, "nll_loss": 0.587579071521759, "rewards/accuracies": 0.875, "rewards/chosen": -0.0035190628841519356, "rewards/margins": 0.09779377281665802, "rewards/rejected": -0.10131284594535828, "step": 3316 }, { "epoch": 2.2939142461964037, "grad_norm": 7.451211929321289, "learning_rate": 4.281158752113109e-05, "log_odds_chosen": 9.210694313049316, "log_odds_ratio": -0.00048021538532339036, "logits/chosen": -0.33806172013282776, "logits/rejected": -0.36309614777565, "logps/chosen": -0.0005009582964703441, "logps/rejected": -1.570671558380127, "loss": 1.9924, "nll_loss": 0.4980572760105133, "rewards/accuracies": 1.0, "rewards/chosen": -5.009582673665136e-05, "rewards/margins": 0.1570170819759369, "rewards/rejected": -0.1570671796798706, "step": 3317 }, { "epoch": 2.2946058091286305, "grad_norm": 5.859487533569336, "learning_rate": 4.2807745504840944e-05, "log_odds_chosen": 8.068329811096191, "log_odds_ratio": -0.001363530638627708, "logits/chosen": -1.121833086013794, "logits/rejected": -1.131767749786377, "logps/chosen": -0.0072917938232421875, "logps/rejected": -1.3566069602966309, "loss": 2.5084, "nll_loss": 0.6269651055335999, "rewards/accuracies": 1.0, "rewards/chosen": -0.0007291793590411544, "rewards/margins": 0.1349315196275711, "rewards/rejected": -0.13566069304943085, "step": 3318 }, { "epoch": 2.2952973720608574, "grad_norm": 10.225635528564453, "learning_rate": 4.2803903488550796e-05, "log_odds_chosen": 8.81074047088623, "log_odds_ratio": -0.0011985604651272297, "logits/chosen": -0.9156633615493774, "logits/rejected": -1.0778508186340332, "logps/chosen": -0.0008523757569491863, "logps/rejected": -1.6153143644332886, "loss": 2.0688, "nll_loss": 0.5170796513557434, "rewards/accuracies": 1.0, "rewards/chosen": -8.523757423972711e-05, "rewards/margins": 0.16144618391990662, "rewards/rejected": -0.16153143346309662, "step": 3319 }, { "epoch": 2.295988934993084, "grad_norm": 7.461869716644287, "learning_rate": 4.280006147226064e-05, "log_odds_chosen": 6.207457542419434, "log_odds_ratio": -0.020755963400006294, "logits/chosen": -0.6278154850006104, "logits/rejected": -0.6054253578186035, "logps/chosen": -0.01028019841760397, "logps/rejected": -1.0288918018341064, "loss": 2.6409, "nll_loss": 0.6581598520278931, "rewards/accuracies": 1.0, "rewards/chosen": -0.0010280199348926544, "rewards/margins": 0.10186116397380829, "rewards/rejected": -0.10288918763399124, "step": 3320 }, { "epoch": 2.296680497925311, "grad_norm": 9.767592430114746, "learning_rate": 4.2796219455970494e-05, "log_odds_chosen": 6.164911270141602, "log_odds_ratio": -0.19356755912303925, "logits/chosen": -1.0346119403839111, "logits/rejected": -1.0217839479446411, "logps/chosen": -0.06772316247224808, "logps/rejected": -1.1891506910324097, "loss": 2.9357, "nll_loss": 0.7145782709121704, "rewards/accuracies": 0.875, "rewards/chosen": -0.006772316992282867, "rewards/margins": 0.11214275658130646, "rewards/rejected": -0.11891507357358932, "step": 3321 }, { "epoch": 2.297372060857538, "grad_norm": 3.739311456680298, "learning_rate": 4.279237743968035e-05, "log_odds_chosen": 5.006280422210693, "log_odds_ratio": -0.11216719448566437, "logits/chosen": -0.5570608973503113, "logits/rejected": -0.5480868816375732, "logps/chosen": -0.05582691729068756, "logps/rejected": -1.1256422996520996, "loss": 2.4771, "nll_loss": 0.6080511212348938, "rewards/accuracies": 1.0, "rewards/chosen": -0.005582691170275211, "rewards/margins": 0.10698153078556061, "rewards/rejected": -0.11256422102451324, "step": 3322 }, { "epoch": 2.2980636237897647, "grad_norm": 4.676652908325195, "learning_rate": 4.27885354233902e-05, "log_odds_chosen": 5.39848518371582, "log_odds_ratio": -0.1628534346818924, "logits/chosen": -0.2611388564109802, "logits/rejected": -0.3353046774864197, "logps/chosen": -0.052117325365543365, "logps/rejected": -1.071663737297058, "loss": 1.9026, "nll_loss": 0.4593610465526581, "rewards/accuracies": 0.875, "rewards/chosen": -0.005211732815951109, "rewards/margins": 0.1019546389579773, "rewards/rejected": -0.10716637223958969, "step": 3323 }, { "epoch": 2.2987551867219915, "grad_norm": 9.791510581970215, "learning_rate": 4.2784693407100045e-05, "log_odds_chosen": 5.421011447906494, "log_odds_ratio": -0.15795129537582397, "logits/chosen": -0.5889769792556763, "logits/rejected": -0.6601840257644653, "logps/chosen": -0.05094009265303612, "logps/rejected": -0.8651940226554871, "loss": 3.0226, "nll_loss": 0.7398436665534973, "rewards/accuracies": 0.875, "rewards/chosen": -0.005094009451568127, "rewards/margins": 0.08142539113759995, "rewards/rejected": -0.08651940524578094, "step": 3324 }, { "epoch": 2.2994467496542184, "grad_norm": 5.987621784210205, "learning_rate": 4.2780851390809904e-05, "log_odds_chosen": 6.50314474105835, "log_odds_ratio": -0.015520547516644001, "logits/chosen": -0.582254946231842, "logits/rejected": -0.6059767007827759, "logps/chosen": -0.0028326380997896194, "logps/rejected": -0.8199695348739624, "loss": 1.4449, "nll_loss": 0.35967934131622314, "rewards/accuracies": 1.0, "rewards/chosen": -0.00028326379833742976, "rewards/margins": 0.08171369135379791, "rewards/rejected": -0.08199695497751236, "step": 3325 }, { "epoch": 2.300138312586445, "grad_norm": 9.296241760253906, "learning_rate": 4.277700937451975e-05, "log_odds_chosen": 9.585782051086426, "log_odds_ratio": -0.000565931259188801, "logits/chosen": -0.5659375190734863, "logits/rejected": -0.6192294359207153, "logps/chosen": -0.0011265147477388382, "logps/rejected": -1.658210277557373, "loss": 2.6348, "nll_loss": 0.6586532592773438, "rewards/accuracies": 1.0, "rewards/chosen": -0.00011265148350503296, "rewards/margins": 0.16570837795734406, "rewards/rejected": -0.16582103073596954, "step": 3326 }, { "epoch": 2.300829875518672, "grad_norm": 5.303914546966553, "learning_rate": 4.27731673582296e-05, "log_odds_chosen": 6.517918109893799, "log_odds_ratio": -0.12337314337491989, "logits/chosen": -0.6629981398582458, "logits/rejected": -0.6698999404907227, "logps/chosen": -0.10352815687656403, "logps/rejected": -0.9165140390396118, "loss": 2.3389, "nll_loss": 0.5723835229873657, "rewards/accuracies": 0.875, "rewards/chosen": -0.010352815501391888, "rewards/margins": 0.0812985822558403, "rewards/rejected": -0.09165140241384506, "step": 3327 }, { "epoch": 2.301521438450899, "grad_norm": 11.455277442932129, "learning_rate": 4.2769325341939455e-05, "log_odds_chosen": 8.0078706741333, "log_odds_ratio": -0.12266886234283447, "logits/chosen": -0.8499240875244141, "logits/rejected": -0.8820198774337769, "logps/chosen": -0.02486070990562439, "logps/rejected": -1.4918012619018555, "loss": 2.8902, "nll_loss": 0.7102892994880676, "rewards/accuracies": 0.875, "rewards/chosen": -0.0024860710836946964, "rewards/margins": 0.14669406414031982, "rewards/rejected": -0.14918012917041779, "step": 3328 }, { "epoch": 2.3022130013831257, "grad_norm": 9.202096939086914, "learning_rate": 4.27654833256493e-05, "log_odds_chosen": 7.53227424621582, "log_odds_ratio": -0.08717505633831024, "logits/chosen": -0.7595465779304504, "logits/rejected": -0.8716880083084106, "logps/chosen": -0.019876958802342415, "logps/rejected": -1.5796029567718506, "loss": 2.1861, "nll_loss": 0.5378171801567078, "rewards/accuracies": 1.0, "rewards/chosen": -0.00198769592680037, "rewards/margins": 0.15597259998321533, "rewards/rejected": -0.15796029567718506, "step": 3329 }, { "epoch": 2.3029045643153525, "grad_norm": 8.945493698120117, "learning_rate": 4.276164130935915e-05, "log_odds_chosen": 6.195101261138916, "log_odds_ratio": -0.008044025860726833, "logits/chosen": -0.3661419153213501, "logits/rejected": -0.40794235467910767, "logps/chosen": -0.029904384166002274, "logps/rejected": -1.367875099182129, "loss": 2.7449, "nll_loss": 0.6854293346405029, "rewards/accuracies": 1.0, "rewards/chosen": -0.002990438835695386, "rewards/margins": 0.1337970793247223, "rewards/rejected": -0.1367875039577484, "step": 3330 }, { "epoch": 2.3035961272475793, "grad_norm": 8.282926559448242, "learning_rate": 4.2757799293069005e-05, "log_odds_chosen": 8.08902359008789, "log_odds_ratio": -0.012745675630867481, "logits/chosen": -0.3566465377807617, "logits/rejected": -0.3846319317817688, "logps/chosen": -0.04099631682038307, "logps/rejected": -1.4674735069274902, "loss": 2.3052, "nll_loss": 0.5750317573547363, "rewards/accuracies": 1.0, "rewards/chosen": -0.004099631682038307, "rewards/margins": 0.14264771342277527, "rewards/rejected": -0.14674735069274902, "step": 3331 }, { "epoch": 2.304287690179806, "grad_norm": 11.868744850158691, "learning_rate": 4.275395727677886e-05, "log_odds_chosen": 7.529213905334473, "log_odds_ratio": -0.005522570572793484, "logits/chosen": -0.42124423384666443, "logits/rejected": -0.5007656812667847, "logps/chosen": -0.020202763378620148, "logps/rejected": -1.7953150272369385, "loss": 2.9072, "nll_loss": 0.7262435555458069, "rewards/accuracies": 1.0, "rewards/chosen": -0.0020202761515975, "rewards/margins": 0.17751124501228333, "rewards/rejected": -0.1795315146446228, "step": 3332 }, { "epoch": 2.304979253112033, "grad_norm": 13.092118263244629, "learning_rate": 4.27501152604887e-05, "log_odds_chosen": 7.509737014770508, "log_odds_ratio": -0.03149031475186348, "logits/chosen": -0.6241152286529541, "logits/rejected": -0.6165431141853333, "logps/chosen": -0.06661160290241241, "logps/rejected": -1.4761927127838135, "loss": 3.2141, "nll_loss": 0.8003746271133423, "rewards/accuracies": 1.0, "rewards/chosen": -0.006661160849034786, "rewards/margins": 0.14095811545848846, "rewards/rejected": -0.14761927723884583, "step": 3333 }, { "epoch": 2.30567081604426, "grad_norm": 7.950167179107666, "learning_rate": 4.274627324419856e-05, "log_odds_chosen": 7.4247660636901855, "log_odds_ratio": -0.015696687623858452, "logits/chosen": -0.5261014699935913, "logits/rejected": -0.6262849569320679, "logps/chosen": -0.01380687952041626, "logps/rejected": -1.6431702375411987, "loss": 2.1106, "nll_loss": 0.5260758399963379, "rewards/accuracies": 1.0, "rewards/chosen": -0.0013806880451738834, "rewards/margins": 0.16293632984161377, "rewards/rejected": -0.1643170267343521, "step": 3334 }, { "epoch": 2.3063623789764867, "grad_norm": 3.703700542449951, "learning_rate": 4.274243122790841e-05, "log_odds_chosen": 6.943711280822754, "log_odds_ratio": -0.03206576034426689, "logits/chosen": -0.31782206892967224, "logits/rejected": -0.30885663628578186, "logps/chosen": -0.037259530276060104, "logps/rejected": -1.4616565704345703, "loss": 1.9969, "nll_loss": 0.4960177540779114, "rewards/accuracies": 1.0, "rewards/chosen": -0.0037259531673043966, "rewards/margins": 0.14243970811367035, "rewards/rejected": -0.1461656540632248, "step": 3335 }, { "epoch": 2.3070539419087135, "grad_norm": 5.592972755432129, "learning_rate": 4.273858921161826e-05, "log_odds_chosen": 6.800052642822266, "log_odds_ratio": -0.0781567394733429, "logits/chosen": -0.18893122673034668, "logits/rejected": -0.23857125639915466, "logps/chosen": -0.0250327717512846, "logps/rejected": -1.233428716659546, "loss": 2.063, "nll_loss": 0.5079275965690613, "rewards/accuracies": 1.0, "rewards/chosen": -0.002503277501091361, "rewards/margins": 0.12083958834409714, "rewards/rejected": -0.12334287166595459, "step": 3336 }, { "epoch": 2.3077455048409403, "grad_norm": 9.436056137084961, "learning_rate": 4.273474719532811e-05, "log_odds_chosen": 4.679429054260254, "log_odds_ratio": -0.05722453072667122, "logits/chosen": -0.5767453908920288, "logits/rejected": -0.6028765439987183, "logps/chosen": -0.024937059730291367, "logps/rejected": -0.6621779799461365, "loss": 2.9017, "nll_loss": 0.7196928262710571, "rewards/accuracies": 1.0, "rewards/chosen": -0.002493706066161394, "rewards/margins": 0.0637240931391716, "rewards/rejected": -0.06621779501438141, "step": 3337 }, { "epoch": 2.308437067773167, "grad_norm": 8.038519859313965, "learning_rate": 4.273090517903796e-05, "log_odds_chosen": 7.326345920562744, "log_odds_ratio": -0.0033848145976662636, "logits/chosen": -0.5330811142921448, "logits/rejected": -0.5206597447395325, "logps/chosen": -0.005207228474318981, "logps/rejected": -0.8569300174713135, "loss": 2.671, "nll_loss": 0.6674127578735352, "rewards/accuracies": 1.0, "rewards/chosen": -0.0005207228241488338, "rewards/margins": 0.08517228811979294, "rewards/rejected": -0.08569300919771194, "step": 3338 }, { "epoch": 2.309128630705394, "grad_norm": 5.706264972686768, "learning_rate": 4.272706316274781e-05, "log_odds_chosen": 4.299847602844238, "log_odds_ratio": -0.45109936594963074, "logits/chosen": -0.4020465910434723, "logits/rejected": -0.45422112941741943, "logps/chosen": -0.20991401374340057, "logps/rejected": -1.2692890167236328, "loss": 2.4653, "nll_loss": 0.5712153911590576, "rewards/accuracies": 0.875, "rewards/chosen": -0.020991403609514236, "rewards/margins": 0.10593751072883606, "rewards/rejected": -0.12692891061306, "step": 3339 }, { "epoch": 2.309820193637621, "grad_norm": 11.377758979797363, "learning_rate": 4.2723221146457664e-05, "log_odds_chosen": 7.025807857513428, "log_odds_ratio": -0.06139063090085983, "logits/chosen": -0.5835516452789307, "logits/rejected": -0.6491535902023315, "logps/chosen": -0.059739407151937485, "logps/rejected": -1.3370709419250488, "loss": 2.5576, "nll_loss": 0.6332700848579407, "rewards/accuracies": 1.0, "rewards/chosen": -0.0059739407151937485, "rewards/margins": 0.12773315608501434, "rewards/rejected": -0.13370709121227264, "step": 3340 }, { "epoch": 2.3105117565698476, "grad_norm": 9.424481391906738, "learning_rate": 4.2719379130167516e-05, "log_odds_chosen": 6.853867053985596, "log_odds_ratio": -0.01839285157620907, "logits/chosen": -0.599707841873169, "logits/rejected": -0.6209062337875366, "logps/chosen": -0.0179190244525671, "logps/rejected": -0.9835525751113892, "loss": 2.3245, "nll_loss": 0.5792940258979797, "rewards/accuracies": 1.0, "rewards/chosen": -0.0017919024685397744, "rewards/margins": 0.09656335413455963, "rewards/rejected": -0.0983552485704422, "step": 3341 }, { "epoch": 2.3112033195020745, "grad_norm": 8.705497741699219, "learning_rate": 4.271553711387736e-05, "log_odds_chosen": 8.515935897827148, "log_odds_ratio": -0.002595825819298625, "logits/chosen": -0.5556164979934692, "logits/rejected": -0.5878057479858398, "logps/chosen": -0.013318197801709175, "logps/rejected": -1.6672247648239136, "loss": 2.3244, "nll_loss": 0.5808513164520264, "rewards/accuracies": 1.0, "rewards/chosen": -0.0013318199198693037, "rewards/margins": 0.16539067029953003, "rewards/rejected": -0.16672247648239136, "step": 3342 }, { "epoch": 2.3118948824343013, "grad_norm": 11.22912311553955, "learning_rate": 4.271169509758722e-05, "log_odds_chosen": 4.5884246826171875, "log_odds_ratio": -0.17706415057182312, "logits/chosen": -0.3864899277687073, "logits/rejected": -0.4442172050476074, "logps/chosen": -0.09313705563545227, "logps/rejected": -0.860073447227478, "loss": 2.2343, "nll_loss": 0.5408572554588318, "rewards/accuracies": 1.0, "rewards/chosen": -0.009313706308603287, "rewards/margins": 0.07669363915920258, "rewards/rejected": -0.08600734919309616, "step": 3343 }, { "epoch": 2.312586445366528, "grad_norm": 7.984221458435059, "learning_rate": 4.2707853081297067e-05, "log_odds_chosen": 7.36676025390625, "log_odds_ratio": -0.1042494997382164, "logits/chosen": -0.49889159202575684, "logits/rejected": -0.6104129552841187, "logps/chosen": -0.02517927810549736, "logps/rejected": -1.2142199277877808, "loss": 1.9779, "nll_loss": 0.4840526580810547, "rewards/accuracies": 0.875, "rewards/chosen": -0.0025179279036819935, "rewards/margins": 0.11890406906604767, "rewards/rejected": -0.12142199277877808, "step": 3344 }, { "epoch": 2.313278008298755, "grad_norm": 9.529037475585938, "learning_rate": 4.270401106500692e-05, "log_odds_chosen": 8.3910551071167, "log_odds_ratio": -0.0006391934584826231, "logits/chosen": -0.32860779762268066, "logits/rejected": -0.4095700979232788, "logps/chosen": -0.008856563828885555, "logps/rejected": -1.5362614393234253, "loss": 2.9869, "nll_loss": 0.7466493844985962, "rewards/accuracies": 1.0, "rewards/chosen": -0.0008856563363224268, "rewards/margins": 0.1527405083179474, "rewards/rejected": -0.15362614393234253, "step": 3345 }, { "epoch": 2.313969571230982, "grad_norm": 9.688497543334961, "learning_rate": 4.270016904871677e-05, "log_odds_chosen": 6.905933856964111, "log_odds_ratio": -0.052650876343250275, "logits/chosen": -0.6683744788169861, "logits/rejected": -0.7596943378448486, "logps/chosen": -0.035936955362558365, "logps/rejected": -1.0634468793869019, "loss": 3.1083, "nll_loss": 0.7718011140823364, "rewards/accuracies": 1.0, "rewards/chosen": -0.0035936955828219652, "rewards/margins": 0.1027509942650795, "rewards/rejected": -0.10634469240903854, "step": 3346 }, { "epoch": 2.3146611341632086, "grad_norm": 7.8678507804870605, "learning_rate": 4.269632703242662e-05, "log_odds_chosen": 6.931734085083008, "log_odds_ratio": -0.015753693878650665, "logits/chosen": -0.5395713448524475, "logits/rejected": -0.6051682233810425, "logps/chosen": -0.013178205117583275, "logps/rejected": -0.8928380012512207, "loss": 2.3201, "nll_loss": 0.5784523487091064, "rewards/accuracies": 1.0, "rewards/chosen": -0.0013178205117583275, "rewards/margins": 0.08796598762273788, "rewards/rejected": -0.08928380161523819, "step": 3347 }, { "epoch": 2.3153526970954355, "grad_norm": 8.95217514038086, "learning_rate": 4.269248501613647e-05, "log_odds_chosen": 8.627083778381348, "log_odds_ratio": -0.0007175215287134051, "logits/chosen": -0.5515803098678589, "logits/rejected": -0.6366013884544373, "logps/chosen": -0.009073970839381218, "logps/rejected": -1.704122543334961, "loss": 2.1583, "nll_loss": 0.5395137667655945, "rewards/accuracies": 1.0, "rewards/chosen": -0.0009073971305042505, "rewards/margins": 0.16950486600399017, "rewards/rejected": -0.17041227221488953, "step": 3348 }, { "epoch": 2.3160442600276623, "grad_norm": 5.506462574005127, "learning_rate": 4.268864299984632e-05, "log_odds_chosen": 8.146820068359375, "log_odds_ratio": -0.0020080609247088432, "logits/chosen": -0.4577869176864624, "logits/rejected": -0.5449948906898499, "logps/chosen": -0.004896472208201885, "logps/rejected": -1.3375948667526245, "loss": 1.7863, "nll_loss": 0.44636422395706177, "rewards/accuracies": 1.0, "rewards/chosen": -0.0004896472091786563, "rewards/margins": 0.13326984643936157, "rewards/rejected": -0.1337594985961914, "step": 3349 }, { "epoch": 2.316735822959889, "grad_norm": 7.025913715362549, "learning_rate": 4.2684800983556174e-05, "log_odds_chosen": 9.591825485229492, "log_odds_ratio": -0.0010005512740463018, "logits/chosen": -0.7512106895446777, "logits/rejected": -0.8099105358123779, "logps/chosen": -0.010130836628377438, "logps/rejected": -2.046741008758545, "loss": 1.7075, "nll_loss": 0.42677560448646545, "rewards/accuracies": 1.0, "rewards/chosen": -0.0010130837326869369, "rewards/margins": 0.2036610245704651, "rewards/rejected": -0.20467409491539001, "step": 3350 }, { "epoch": 2.317427385892116, "grad_norm": 9.430314064025879, "learning_rate": 4.268095896726602e-05, "log_odds_chosen": 8.802265167236328, "log_odds_ratio": -0.00432342104613781, "logits/chosen": -0.6346014142036438, "logits/rejected": -0.6634232401847839, "logps/chosen": -0.010224283672869205, "logps/rejected": -1.2433936595916748, "loss": 2.379, "nll_loss": 0.5943161249160767, "rewards/accuracies": 1.0, "rewards/chosen": -0.0010224285069853067, "rewards/margins": 0.1233169287443161, "rewards/rejected": -0.12433935701847076, "step": 3351 }, { "epoch": 2.3181189488243428, "grad_norm": 7.456939697265625, "learning_rate": 4.267711695097588e-05, "log_odds_chosen": 7.387084007263184, "log_odds_ratio": -0.014544477686285973, "logits/chosen": -0.7580875158309937, "logits/rejected": -0.7801265716552734, "logps/chosen": -0.008447481319308281, "logps/rejected": -1.2176425457000732, "loss": 2.5504, "nll_loss": 0.6361559629440308, "rewards/accuracies": 1.0, "rewards/chosen": -0.0008447481086477637, "rewards/margins": 0.12091951072216034, "rewards/rejected": -0.12176425755023956, "step": 3352 }, { "epoch": 2.3188105117565696, "grad_norm": 9.283626556396484, "learning_rate": 4.2673274934685725e-05, "log_odds_chosen": 7.965545654296875, "log_odds_ratio": -0.0008849737932905555, "logits/chosen": -0.2704313397407532, "logits/rejected": -0.2850708067417145, "logps/chosen": -0.002565020928159356, "logps/rejected": -1.3778682947158813, "loss": 2.3377, "nll_loss": 0.5843430161476135, "rewards/accuracies": 1.0, "rewards/chosen": -0.00025650212774053216, "rewards/margins": 0.13753032684326172, "rewards/rejected": -0.1377868354320526, "step": 3353 }, { "epoch": 2.3195020746887964, "grad_norm": 11.757554054260254, "learning_rate": 4.266943291839558e-05, "log_odds_chosen": 7.557385444641113, "log_odds_ratio": -0.05157068371772766, "logits/chosen": -0.40662682056427, "logits/rejected": -0.4159397780895233, "logps/chosen": -0.01183061208575964, "logps/rejected": -1.2338945865631104, "loss": 3.4943, "nll_loss": 0.868411660194397, "rewards/accuracies": 1.0, "rewards/chosen": -0.0011830611620098352, "rewards/margins": 0.12220640480518341, "rewards/rejected": -0.12338946759700775, "step": 3354 }, { "epoch": 2.3201936376210233, "grad_norm": 8.072080612182617, "learning_rate": 4.266559090210543e-05, "log_odds_chosen": 8.094703674316406, "log_odds_ratio": -0.002076277043670416, "logits/chosen": -0.5662438869476318, "logits/rejected": -0.6120657920837402, "logps/chosen": -0.01660882495343685, "logps/rejected": -1.4540126323699951, "loss": 2.4049, "nll_loss": 0.6010183095932007, "rewards/accuracies": 1.0, "rewards/chosen": -0.0016608824953436852, "rewards/margins": 0.14374037086963654, "rewards/rejected": -0.1454012542963028, "step": 3355 }, { "epoch": 2.3208852005532505, "grad_norm": 12.50888729095459, "learning_rate": 4.2661748885815276e-05, "log_odds_chosen": 6.450561046600342, "log_odds_ratio": -0.26917147636413574, "logits/chosen": -0.5345016717910767, "logits/rejected": -0.5320403575897217, "logps/chosen": -0.007884496822953224, "logps/rejected": -0.9485344290733337, "loss": 2.1652, "nll_loss": 0.5143883228302002, "rewards/accuracies": 0.875, "rewards/chosen": -0.000788449659012258, "rewards/margins": 0.09406498819589615, "rewards/rejected": -0.09485343843698502, "step": 3356 }, { "epoch": 2.3215767634854774, "grad_norm": 8.556360244750977, "learning_rate": 4.265790686952513e-05, "log_odds_chosen": 6.606521129608154, "log_odds_ratio": -0.016001202166080475, "logits/chosen": -0.8234816789627075, "logits/rejected": -0.8304534554481506, "logps/chosen": -0.03415491804480553, "logps/rejected": -1.4321098327636719, "loss": 3.307, "nll_loss": 0.8251463174819946, "rewards/accuracies": 1.0, "rewards/chosen": -0.00341549189761281, "rewards/margins": 0.1397954821586609, "rewards/rejected": -0.1432109773159027, "step": 3357 }, { "epoch": 2.322268326417704, "grad_norm": 9.19092082977295, "learning_rate": 4.265406485323498e-05, "log_odds_chosen": 4.379786968231201, "log_odds_ratio": -0.23112638294696808, "logits/chosen": -0.5057024359703064, "logits/rejected": -0.5283100008964539, "logps/chosen": -0.09777574241161346, "logps/rejected": -1.1245331764221191, "loss": 2.7133, "nll_loss": 0.6552160978317261, "rewards/accuracies": 0.875, "rewards/chosen": -0.009777573868632317, "rewards/margins": 0.10267573595046997, "rewards/rejected": -0.11245331168174744, "step": 3358 }, { "epoch": 2.322959889349931, "grad_norm": 10.96688175201416, "learning_rate": 4.265022283694483e-05, "log_odds_chosen": 7.878183364868164, "log_odds_ratio": -0.005137327127158642, "logits/chosen": -0.4825310707092285, "logits/rejected": -0.5595512390136719, "logps/chosen": -0.007061135023832321, "logps/rejected": -1.8817293643951416, "loss": 2.8067, "nll_loss": 0.7011556625366211, "rewards/accuracies": 1.0, "rewards/chosen": -0.0007061135256662965, "rewards/margins": 0.1874668300151825, "rewards/rejected": -0.18817293643951416, "step": 3359 }, { "epoch": 2.323651452282158, "grad_norm": 9.870452880859375, "learning_rate": 4.264638082065468e-05, "log_odds_chosen": 9.459579467773438, "log_odds_ratio": -0.00026476330822333694, "logits/chosen": -0.5286309719085693, "logits/rejected": -0.6351138949394226, "logps/chosen": -0.000664436724036932, "logps/rejected": -1.7894325256347656, "loss": 3.2136, "nll_loss": 0.8033857345581055, "rewards/accuracies": 1.0, "rewards/chosen": -6.644366658292711e-05, "rewards/margins": 0.17887680232524872, "rewards/rejected": -0.17894324660301208, "step": 3360 }, { "epoch": 2.3243430152143847, "grad_norm": 5.560983180999756, "learning_rate": 4.264253880436454e-05, "log_odds_chosen": 8.930059432983398, "log_odds_ratio": -0.0022857023868709803, "logits/chosen": -0.9240604043006897, "logits/rejected": -0.8908398747444153, "logps/chosen": -0.01126360148191452, "logps/rejected": -1.5385799407958984, "loss": 1.9848, "nll_loss": 0.49597764015197754, "rewards/accuracies": 1.0, "rewards/chosen": -0.0011263600317761302, "rewards/margins": 0.15273164212703705, "rewards/rejected": -0.1538580060005188, "step": 3361 }, { "epoch": 2.3250345781466115, "grad_norm": 7.829687595367432, "learning_rate": 4.263869678807438e-05, "log_odds_chosen": 9.804492950439453, "log_odds_ratio": -0.00015975243877619505, "logits/chosen": -0.2846134901046753, "logits/rejected": -0.36845675110816956, "logps/chosen": -0.0004424336366355419, "logps/rejected": -1.7838191986083984, "loss": 1.8686, "nll_loss": 0.4671386778354645, "rewards/accuracies": 1.0, "rewards/chosen": -4.424336657393724e-05, "rewards/margins": 0.1783376783132553, "rewards/rejected": -0.17838191986083984, "step": 3362 }, { "epoch": 2.3257261410788383, "grad_norm": 13.060300827026367, "learning_rate": 4.2634854771784236e-05, "log_odds_chosen": 9.596633911132812, "log_odds_ratio": -0.0004242879222147167, "logits/chosen": -0.2015463262796402, "logits/rejected": -0.468197762966156, "logps/chosen": -0.0012442361330613494, "logps/rejected": -2.1529362201690674, "loss": 3.0141, "nll_loss": 0.7534917593002319, "rewards/accuracies": 1.0, "rewards/chosen": -0.00012442361912690103, "rewards/margins": 0.21516920626163483, "rewards/rejected": -0.21529361605644226, "step": 3363 }, { "epoch": 2.326417704011065, "grad_norm": 5.3858232498168945, "learning_rate": 4.263101275549409e-05, "log_odds_chosen": 6.65142297744751, "log_odds_ratio": -0.11172687262296677, "logits/chosen": -0.5483109951019287, "logits/rejected": -0.5711467266082764, "logps/chosen": -0.03660149127244949, "logps/rejected": -0.868246853351593, "loss": 1.6949, "nll_loss": 0.4125487804412842, "rewards/accuracies": 0.875, "rewards/chosen": -0.0036601494066417217, "rewards/margins": 0.08316454291343689, "rewards/rejected": -0.0868246853351593, "step": 3364 }, { "epoch": 2.327109266943292, "grad_norm": 9.577594757080078, "learning_rate": 4.2627170739203934e-05, "log_odds_chosen": 6.5316572189331055, "log_odds_ratio": -0.023967696353793144, "logits/chosen": -0.6139042377471924, "logits/rejected": -0.7178165316581726, "logps/chosen": -0.010692340321838856, "logps/rejected": -0.900058388710022, "loss": 2.6979, "nll_loss": 0.6720776557922363, "rewards/accuracies": 1.0, "rewards/chosen": -0.0010692339856177568, "rewards/margins": 0.08893661201000214, "rewards/rejected": -0.09000584483146667, "step": 3365 }, { "epoch": 2.327800829875519, "grad_norm": 11.827892303466797, "learning_rate": 4.2623328722913786e-05, "log_odds_chosen": 7.095213890075684, "log_odds_ratio": -0.007561472710222006, "logits/chosen": -0.701814591884613, "logits/rejected": -0.7703323364257812, "logps/chosen": -0.005921615287661552, "logps/rejected": -1.0419270992279053, "loss": 3.8682, "nll_loss": 0.966285765171051, "rewards/accuracies": 1.0, "rewards/chosen": -0.0005921615520492196, "rewards/margins": 0.10360054671764374, "rewards/rejected": -0.10419271886348724, "step": 3366 }, { "epoch": 2.3284923928077457, "grad_norm": 8.764759063720703, "learning_rate": 4.261948670662364e-05, "log_odds_chosen": 6.575815200805664, "log_odds_ratio": -0.09238407015800476, "logits/chosen": -0.5462766885757446, "logits/rejected": -0.4617829918861389, "logps/chosen": -0.026065904647111893, "logps/rejected": -1.1544698476791382, "loss": 2.4102, "nll_loss": 0.5933218002319336, "rewards/accuracies": 1.0, "rewards/chosen": -0.0026065902784466743, "rewards/margins": 0.11284039914608002, "rewards/rejected": -0.11544698476791382, "step": 3367 }, { "epoch": 2.3291839557399725, "grad_norm": 8.00680160522461, "learning_rate": 4.261564469033349e-05, "log_odds_chosen": 6.982217311859131, "log_odds_ratio": -0.007230084389448166, "logits/chosen": -0.35788673162460327, "logits/rejected": -0.35731175541877747, "logps/chosen": -0.017076268792152405, "logps/rejected": -1.1515657901763916, "loss": 2.0273, "nll_loss": 0.5060920715332031, "rewards/accuracies": 1.0, "rewards/chosen": -0.001707626972347498, "rewards/margins": 0.11344894766807556, "rewards/rejected": -0.11515657603740692, "step": 3368 }, { "epoch": 2.3298755186721993, "grad_norm": 8.321171760559082, "learning_rate": 4.261180267404334e-05, "log_odds_chosen": 7.154203414916992, "log_odds_ratio": -0.15566202998161316, "logits/chosen": -0.24052327871322632, "logits/rejected": -0.26545450091362, "logps/chosen": -0.04068966582417488, "logps/rejected": -1.7842777967453003, "loss": 2.2312, "nll_loss": 0.542233407497406, "rewards/accuracies": 0.875, "rewards/chosen": -0.004068966023623943, "rewards/margins": 0.1743588149547577, "rewards/rejected": -0.1784277707338333, "step": 3369 }, { "epoch": 2.330567081604426, "grad_norm": 8.19678783416748, "learning_rate": 4.2607960657753196e-05, "log_odds_chosen": 7.805452823638916, "log_odds_ratio": -0.07030778378248215, "logits/chosen": -0.2236385941505432, "logits/rejected": -0.3405549228191376, "logps/chosen": -0.029319610446691513, "logps/rejected": -1.758199691772461, "loss": 2.6445, "nll_loss": 0.6540853381156921, "rewards/accuracies": 1.0, "rewards/chosen": -0.0029319613240659237, "rewards/margins": 0.17288801074028015, "rewards/rejected": -0.17581996321678162, "step": 3370 }, { "epoch": 2.331258644536653, "grad_norm": 6.791223049163818, "learning_rate": 4.260411864146304e-05, "log_odds_chosen": 9.60167407989502, "log_odds_ratio": -0.0004270426870789379, "logits/chosen": -0.7064838409423828, "logits/rejected": -0.7003648281097412, "logps/chosen": -0.015431979671120644, "logps/rejected": -2.0963101387023926, "loss": 2.5563, "nll_loss": 0.6390376091003418, "rewards/accuracies": 1.0, "rewards/chosen": -0.0015431978972628713, "rewards/margins": 0.20808780193328857, "rewards/rejected": -0.2096310257911682, "step": 3371 }, { "epoch": 2.33195020746888, "grad_norm": 11.994937896728516, "learning_rate": 4.2600276625172894e-05, "log_odds_chosen": 8.312555313110352, "log_odds_ratio": -0.0009366670856252313, "logits/chosen": -0.4604548513889313, "logits/rejected": -0.5908196568489075, "logps/chosen": -0.020090028643608093, "logps/rejected": -1.9958922863006592, "loss": 2.9871, "nll_loss": 0.7466820478439331, "rewards/accuracies": 1.0, "rewards/chosen": -0.002009002724662423, "rewards/margins": 0.1975802183151245, "rewards/rejected": -0.19958922266960144, "step": 3372 }, { "epoch": 2.3326417704011067, "grad_norm": 10.920364379882812, "learning_rate": 4.259643460888275e-05, "log_odds_chosen": 7.596035003662109, "log_odds_ratio": -0.005939878057688475, "logits/chosen": -0.49765828251838684, "logits/rejected": -0.6091522574424744, "logps/chosen": -0.017343439161777496, "logps/rejected": -1.555943250656128, "loss": 3.4278, "nll_loss": 0.8563669919967651, "rewards/accuracies": 1.0, "rewards/chosen": -0.0017343438230454922, "rewards/margins": 0.15386000275611877, "rewards/rejected": -0.1555943489074707, "step": 3373 }, { "epoch": 2.3333333333333335, "grad_norm": 53.26963806152344, "learning_rate": 4.259259259259259e-05, "log_odds_chosen": 6.723474025726318, "log_odds_ratio": -0.45630714297294617, "logits/chosen": -0.5959466695785522, "logits/rejected": -0.6924944519996643, "logps/chosen": -0.09198511391878128, "logps/rejected": -1.44384765625, "loss": 2.6244, "nll_loss": 0.6104597449302673, "rewards/accuracies": 0.875, "rewards/chosen": -0.009198511950671673, "rewards/margins": 0.13518625497817993, "rewards/rejected": -0.14438477158546448, "step": 3374 }, { "epoch": 2.3340248962655603, "grad_norm": 7.871276378631592, "learning_rate": 4.2588750576302445e-05, "log_odds_chosen": 6.338235855102539, "log_odds_ratio": -0.1509992927312851, "logits/chosen": -0.4784778952598572, "logits/rejected": -0.48658180236816406, "logps/chosen": -0.03482593595981598, "logps/rejected": -1.3916345834732056, "loss": 3.3691, "nll_loss": 0.8271795511245728, "rewards/accuracies": 1.0, "rewards/chosen": -0.0034825934562832117, "rewards/margins": 0.13568086922168732, "rewards/rejected": -0.13916344940662384, "step": 3375 }, { "epoch": 2.334716459197787, "grad_norm": 6.738348007202148, "learning_rate": 4.25849085600123e-05, "log_odds_chosen": 6.788658142089844, "log_odds_ratio": -0.042201511561870575, "logits/chosen": -0.833795428276062, "logits/rejected": -0.8814170360565186, "logps/chosen": -0.028427157551050186, "logps/rejected": -1.651319980621338, "loss": 3.5604, "nll_loss": 0.8858755826950073, "rewards/accuracies": 1.0, "rewards/chosen": -0.00284271570853889, "rewards/margins": 0.16228929162025452, "rewards/rejected": -0.16513200104236603, "step": 3376 }, { "epoch": 2.335408022130014, "grad_norm": 10.640534400939941, "learning_rate": 4.258106654372215e-05, "log_odds_chosen": 8.235334396362305, "log_odds_ratio": -0.003731532720848918, "logits/chosen": -0.8289970755577087, "logits/rejected": -0.9308689832687378, "logps/chosen": -0.017879465594887733, "logps/rejected": -1.359239101409912, "loss": 2.9657, "nll_loss": 0.7410598993301392, "rewards/accuracies": 1.0, "rewards/chosen": -0.0017879465594887733, "rewards/margins": 0.13413597643375397, "rewards/rejected": -0.13592392206192017, "step": 3377 }, { "epoch": 2.336099585062241, "grad_norm": 7.650116443634033, "learning_rate": 4.2577224527431995e-05, "log_odds_chosen": 7.192252159118652, "log_odds_ratio": -0.012806126847863197, "logits/chosen": -0.7580456137657166, "logits/rejected": -0.7242326736450195, "logps/chosen": -0.03407813236117363, "logps/rejected": -1.6755526065826416, "loss": 2.846, "nll_loss": 0.7102081179618835, "rewards/accuracies": 1.0, "rewards/chosen": -0.003407812910154462, "rewards/margins": 0.16414742171764374, "rewards/rejected": -0.16755524277687073, "step": 3378 }, { "epoch": 2.3367911479944676, "grad_norm": 9.497821807861328, "learning_rate": 4.2573382511141855e-05, "log_odds_chosen": 7.8366498947143555, "log_odds_ratio": -0.0061141084879636765, "logits/chosen": -0.8389533758163452, "logits/rejected": -0.8879673480987549, "logps/chosen": -0.024966338649392128, "logps/rejected": -1.4060308933258057, "loss": 2.5736, "nll_loss": 0.6428003907203674, "rewards/accuracies": 1.0, "rewards/chosen": -0.0024966339115053415, "rewards/margins": 0.13810646533966064, "rewards/rejected": -0.14060309529304504, "step": 3379 }, { "epoch": 2.3374827109266945, "grad_norm": 5.044812202453613, "learning_rate": 4.25695404948517e-05, "log_odds_chosen": 5.910860061645508, "log_odds_ratio": -0.1787855178117752, "logits/chosen": -0.4667434096336365, "logits/rejected": -0.4716196656227112, "logps/chosen": -0.043792542070150375, "logps/rejected": -1.2373631000518799, "loss": 2.0394, "nll_loss": 0.491960346698761, "rewards/accuracies": 0.875, "rewards/chosen": -0.004379254300147295, "rewards/margins": 0.11935704946517944, "rewards/rejected": -0.12373629957437515, "step": 3380 }, { "epoch": 2.3381742738589213, "grad_norm": 8.184005737304688, "learning_rate": 4.256569847856155e-05, "log_odds_chosen": 7.020155429840088, "log_odds_ratio": -0.15846848487854004, "logits/chosen": -0.07825784385204315, "logits/rejected": -0.1667136251926422, "logps/chosen": -0.030404016375541687, "logps/rejected": -1.6403634548187256, "loss": 2.0926, "nll_loss": 0.5073078870773315, "rewards/accuracies": 0.875, "rewards/chosen": -0.003040401265025139, "rewards/margins": 0.16099593043327332, "rewards/rejected": -0.1640363484621048, "step": 3381 }, { "epoch": 2.338865836791148, "grad_norm": 5.734543323516846, "learning_rate": 4.2561856462271405e-05, "log_odds_chosen": 7.248272895812988, "log_odds_ratio": -0.060873474925756454, "logits/chosen": -0.5805363655090332, "logits/rejected": -0.6600069999694824, "logps/chosen": -0.05188342556357384, "logps/rejected": -1.8321489095687866, "loss": 2.1616, "nll_loss": 0.5343201756477356, "rewards/accuracies": 1.0, "rewards/chosen": -0.005188342649489641, "rewards/margins": 0.17802655696868896, "rewards/rejected": -0.18321490287780762, "step": 3382 }, { "epoch": 2.339557399723375, "grad_norm": 5.963815212249756, "learning_rate": 4.255801444598125e-05, "log_odds_chosen": 7.02641487121582, "log_odds_ratio": -0.10145469754934311, "logits/chosen": -0.6014857292175293, "logits/rejected": -0.5714874267578125, "logps/chosen": -0.05983032286167145, "logps/rejected": -1.6911689043045044, "loss": 2.3081, "nll_loss": 0.5668916702270508, "rewards/accuracies": 0.875, "rewards/chosen": -0.005983032286167145, "rewards/margins": 0.16313385963439941, "rewards/rejected": -0.16911689937114716, "step": 3383 }, { "epoch": 2.340248962655602, "grad_norm": 98.84029388427734, "learning_rate": 4.25541724296911e-05, "log_odds_chosen": 6.901886940002441, "log_odds_ratio": -0.3027273416519165, "logits/chosen": -0.7416228652000427, "logits/rejected": -0.7577385902404785, "logps/chosen": -0.01666867919266224, "logps/rejected": -0.819347620010376, "loss": 2.099, "nll_loss": 0.4944872260093689, "rewards/accuracies": 0.875, "rewards/chosen": -0.001666867989115417, "rewards/margins": 0.08026789873838425, "rewards/rejected": -0.08193476498126984, "step": 3384 }, { "epoch": 2.3409405255878286, "grad_norm": 7.437020778656006, "learning_rate": 4.2550330413400956e-05, "log_odds_chosen": 7.493680000305176, "log_odds_ratio": -0.04263937845826149, "logits/chosen": -0.6599336862564087, "logits/rejected": -0.7314375638961792, "logps/chosen": -0.011237611062824726, "logps/rejected": -1.0297126770019531, "loss": 2.617, "nll_loss": 0.6499974131584167, "rewards/accuracies": 1.0, "rewards/chosen": -0.0011237610597163439, "rewards/margins": 0.10184749215841293, "rewards/rejected": -0.10297125577926636, "step": 3385 }, { "epoch": 2.3416320885200554, "grad_norm": 8.780964851379395, "learning_rate": 4.254648839711081e-05, "log_odds_chosen": 7.680317401885986, "log_odds_ratio": -0.062225475907325745, "logits/chosen": -0.587841808795929, "logits/rejected": -0.6148817539215088, "logps/chosen": -0.0225834921002388, "logps/rejected": -1.806999921798706, "loss": 1.9738, "nll_loss": 0.48721805214881897, "rewards/accuracies": 1.0, "rewards/chosen": -0.0022583494428545237, "rewards/margins": 0.17844164371490479, "rewards/rejected": -0.18070000410079956, "step": 3386 }, { "epoch": 2.3423236514522823, "grad_norm": 9.550161361694336, "learning_rate": 4.2542646380820654e-05, "log_odds_chosen": 7.595914363861084, "log_odds_ratio": -0.0024608231615275145, "logits/chosen": -0.7636619806289673, "logits/rejected": -0.7690795063972473, "logps/chosen": -0.002205885713919997, "logps/rejected": -1.0934929847717285, "loss": 2.2587, "nll_loss": 0.5644301176071167, "rewards/accuracies": 1.0, "rewards/chosen": -0.00022058856848161668, "rewards/margins": 0.10912871360778809, "rewards/rejected": -0.10934930294752121, "step": 3387 }, { "epoch": 2.343015214384509, "grad_norm": 10.059521675109863, "learning_rate": 4.253880436453051e-05, "log_odds_chosen": 9.377660751342773, "log_odds_ratio": -0.00010987836867570877, "logits/chosen": -0.5637887716293335, "logits/rejected": -0.6180630922317505, "logps/chosen": -0.0005490887560881674, "logps/rejected": -1.5444753170013428, "loss": 2.1725, "nll_loss": 0.5431035161018372, "rewards/accuracies": 1.0, "rewards/chosen": -5.49088726984337e-05, "rewards/margins": 0.15439262986183167, "rewards/rejected": -0.1544475257396698, "step": 3388 }, { "epoch": 2.343706777316736, "grad_norm": 7.881161212921143, "learning_rate": 4.253496234824036e-05, "log_odds_chosen": 5.156569957733154, "log_odds_ratio": -0.16542679071426392, "logits/chosen": -0.14296261966228485, "logits/rejected": -0.2547076344490051, "logps/chosen": -0.07005628198385239, "logps/rejected": -0.8354336619377136, "loss": 2.1545, "nll_loss": 0.5220921635627747, "rewards/accuracies": 0.875, "rewards/chosen": -0.007005628198385239, "rewards/margins": 0.07653774321079254, "rewards/rejected": -0.08354337513446808, "step": 3389 }, { "epoch": 2.3443983402489628, "grad_norm": 9.925989151000977, "learning_rate": 4.253112033195021e-05, "log_odds_chosen": 7.2002058029174805, "log_odds_ratio": -0.2609242796897888, "logits/chosen": -0.3674536347389221, "logits/rejected": -0.41105300188064575, "logps/chosen": -0.07734145224094391, "logps/rejected": -1.5246528387069702, "loss": 2.3385, "nll_loss": 0.5585339069366455, "rewards/accuracies": 0.875, "rewards/chosen": -0.007734145503491163, "rewards/margins": 0.14473114907741547, "rewards/rejected": -0.15246528387069702, "step": 3390 }, { "epoch": 2.3450899031811896, "grad_norm": 7.965763092041016, "learning_rate": 4.2527278315660064e-05, "log_odds_chosen": 6.444962501525879, "log_odds_ratio": -0.21917779743671417, "logits/chosen": -0.5414268970489502, "logits/rejected": -0.603859007358551, "logps/chosen": -0.06609084457159042, "logps/rejected": -1.0937172174453735, "loss": 2.0623, "nll_loss": 0.4936527609825134, "rewards/accuracies": 0.875, "rewards/chosen": -0.0066090840846300125, "rewards/margins": 0.10276263952255249, "rewards/rejected": -0.10937172919511795, "step": 3391 }, { "epoch": 2.3457814661134164, "grad_norm": 21.98008918762207, "learning_rate": 4.252343629936991e-05, "log_odds_chosen": 5.816531658172607, "log_odds_ratio": -0.6510119438171387, "logits/chosen": -0.5702582001686096, "logits/rejected": -0.6187250018119812, "logps/chosen": -0.11002440005540848, "logps/rejected": -1.22270929813385, "loss": 3.7288, "nll_loss": 0.8671107292175293, "rewards/accuracies": 0.875, "rewards/chosen": -0.011002440005540848, "rewards/margins": 0.11126849800348282, "rewards/rejected": -0.12227094173431396, "step": 3392 }, { "epoch": 2.3464730290456433, "grad_norm": 9.141814231872559, "learning_rate": 4.251959428307976e-05, "log_odds_chosen": 7.8273210525512695, "log_odds_ratio": -0.0032731585670262575, "logits/chosen": -0.28313112258911133, "logits/rejected": -0.34095481038093567, "logps/chosen": -0.00457628583535552, "logps/rejected": -1.3544855117797852, "loss": 2.2227, "nll_loss": 0.5553531646728516, "rewards/accuracies": 1.0, "rewards/chosen": -0.0004576286009978503, "rewards/margins": 0.13499093055725098, "rewards/rejected": -0.13544854521751404, "step": 3393 }, { "epoch": 2.34716459197787, "grad_norm": 9.178505897521973, "learning_rate": 4.2515752266789614e-05, "log_odds_chosen": 8.25627613067627, "log_odds_ratio": -0.001994677586480975, "logits/chosen": -0.4252711534500122, "logits/rejected": -0.44464176893234253, "logps/chosen": -0.0024501513689756393, "logps/rejected": -1.3708351850509644, "loss": 2.0642, "nll_loss": 0.5158490538597107, "rewards/accuracies": 1.0, "rewards/chosen": -0.00024501513689756393, "rewards/margins": 0.13683849573135376, "rewards/rejected": -0.1370835304260254, "step": 3394 }, { "epoch": 2.347856154910097, "grad_norm": 4.448156356811523, "learning_rate": 4.2511910250499467e-05, "log_odds_chosen": 7.523759841918945, "log_odds_ratio": -0.055687472224235535, "logits/chosen": -0.6664091944694519, "logits/rejected": -0.7099178433418274, "logps/chosen": -0.02036425471305847, "logps/rejected": -1.0317578315734863, "loss": 1.8873, "nll_loss": 0.46624624729156494, "rewards/accuracies": 1.0, "rewards/chosen": -0.0020364252850413322, "rewards/margins": 0.10113934427499771, "rewards/rejected": -0.10317577421665192, "step": 3395 }, { "epoch": 2.3485477178423237, "grad_norm": 9.876389503479004, "learning_rate": 4.250806823420931e-05, "log_odds_chosen": 8.495573043823242, "log_odds_ratio": -0.045671653002500534, "logits/chosen": -0.7123823165893555, "logits/rejected": -0.7269488573074341, "logps/chosen": -0.013587714172899723, "logps/rejected": -1.3124465942382812, "loss": 2.8378, "nll_loss": 0.7048929929733276, "rewards/accuracies": 1.0, "rewards/chosen": -0.001358771463856101, "rewards/margins": 0.12988589704036713, "rewards/rejected": -0.13124465942382812, "step": 3396 }, { "epoch": 2.3492392807745506, "grad_norm": 10.173689842224121, "learning_rate": 4.250422621791917e-05, "log_odds_chosen": 8.026592254638672, "log_odds_ratio": -0.002193247899413109, "logits/chosen": -0.9392632246017456, "logits/rejected": -0.9983953833580017, "logps/chosen": -0.011764070950448513, "logps/rejected": -1.7628229856491089, "loss": 2.3371, "nll_loss": 0.5840455889701843, "rewards/accuracies": 1.0, "rewards/chosen": -0.0011764070950448513, "rewards/margins": 0.17510589957237244, "rewards/rejected": -0.17628228664398193, "step": 3397 }, { "epoch": 2.3499308437067774, "grad_norm": 9.759671211242676, "learning_rate": 4.250038420162902e-05, "log_odds_chosen": 7.043476581573486, "log_odds_ratio": -0.4816550314426422, "logits/chosen": -0.6363857388496399, "logits/rejected": -0.64066481590271, "logps/chosen": -0.07748615741729736, "logps/rejected": -0.8302817344665527, "loss": 2.5485, "nll_loss": 0.5889508724212646, "rewards/accuracies": 0.875, "rewards/chosen": -0.0077486163936555386, "rewards/margins": 0.07527956366539001, "rewards/rejected": -0.08302817493677139, "step": 3398 }, { "epoch": 2.3506224066390042, "grad_norm": 7.589184284210205, "learning_rate": 4.249654218533887e-05, "log_odds_chosen": 7.6665849685668945, "log_odds_ratio": -0.00181456352584064, "logits/chosen": -0.5171942710876465, "logits/rejected": -0.5481059551239014, "logps/chosen": -0.0013496269239112735, "logps/rejected": -0.8110032677650452, "loss": 2.1699, "nll_loss": 0.542304515838623, "rewards/accuracies": 1.0, "rewards/chosen": -0.00013496269821189344, "rewards/margins": 0.08096536993980408, "rewards/rejected": -0.08110032975673676, "step": 3399 }, { "epoch": 2.351313969571231, "grad_norm": 10.063977241516113, "learning_rate": 4.249270016904872e-05, "log_odds_chosen": 6.615221977233887, "log_odds_ratio": -0.10633889585733414, "logits/chosen": -0.42302262783050537, "logits/rejected": -0.48643821477890015, "logps/chosen": -0.039021894335746765, "logps/rejected": -1.403071403503418, "loss": 1.9831, "nll_loss": 0.48514240980148315, "rewards/accuracies": 0.875, "rewards/chosen": -0.003902189899235964, "rewards/margins": 0.13640496134757996, "rewards/rejected": -0.14030715823173523, "step": 3400 }, { "epoch": 2.352005532503458, "grad_norm": 11.160541534423828, "learning_rate": 4.248885815275857e-05, "log_odds_chosen": 7.308645725250244, "log_odds_ratio": -0.03402864187955856, "logits/chosen": -0.5057591795921326, "logits/rejected": -0.5204235911369324, "logps/chosen": -0.007180843036621809, "logps/rejected": -1.547482967376709, "loss": 2.3467, "nll_loss": 0.5832793712615967, "rewards/accuracies": 1.0, "rewards/chosen": -0.0007180843385867774, "rewards/margins": 0.1540302038192749, "rewards/rejected": -0.15474829077720642, "step": 3401 }, { "epoch": 2.3526970954356847, "grad_norm": 11.588892936706543, "learning_rate": 4.248501613646842e-05, "log_odds_chosen": 6.584766387939453, "log_odds_ratio": -0.03737543523311615, "logits/chosen": -0.587253749370575, "logits/rejected": -0.659322202205658, "logps/chosen": -0.019411414861679077, "logps/rejected": -1.6802135705947876, "loss": 2.1524, "nll_loss": 0.5343660712242126, "rewards/accuracies": 1.0, "rewards/chosen": -0.001941141439601779, "rewards/margins": 0.16608020663261414, "rewards/rejected": -0.16802135109901428, "step": 3402 }, { "epoch": 2.3533886583679116, "grad_norm": 9.891732215881348, "learning_rate": 4.248117412017827e-05, "log_odds_chosen": 6.61793851852417, "log_odds_ratio": -0.17892657220363617, "logits/chosen": -0.5233025550842285, "logits/rejected": -0.5984399318695068, "logps/chosen": -0.026846522465348244, "logps/rejected": -1.040014386177063, "loss": 2.8344, "nll_loss": 0.690710723400116, "rewards/accuracies": 0.875, "rewards/chosen": -0.002684652339667082, "rewards/margins": 0.10131677985191345, "rewards/rejected": -0.10400144010782242, "step": 3403 }, { "epoch": 2.3540802213001384, "grad_norm": 11.967804908752441, "learning_rate": 4.2477332103888125e-05, "log_odds_chosen": 7.058257102966309, "log_odds_ratio": -0.05008646100759506, "logits/chosen": -0.23758041858673096, "logits/rejected": -0.26495781540870667, "logps/chosen": -0.005382682662457228, "logps/rejected": -0.8401252031326294, "loss": 2.3625, "nll_loss": 0.5856223106384277, "rewards/accuracies": 1.0, "rewards/chosen": -0.0005382683011703193, "rewards/margins": 0.08347424864768982, "rewards/rejected": -0.08401252329349518, "step": 3404 }, { "epoch": 2.354771784232365, "grad_norm": 9.493339538574219, "learning_rate": 4.247349008759797e-05, "log_odds_chosen": 5.9872236251831055, "log_odds_ratio": -0.04331246018409729, "logits/chosen": -0.8071039319038391, "logits/rejected": -0.851711094379425, "logps/chosen": -0.018634863197803497, "logps/rejected": -1.080324649810791, "loss": 2.2317, "nll_loss": 0.5535867810249329, "rewards/accuracies": 1.0, "rewards/chosen": -0.0018634865991771221, "rewards/margins": 0.1061689704656601, "rewards/rejected": -0.10803245007991791, "step": 3405 }, { "epoch": 2.355463347164592, "grad_norm": 12.45865535736084, "learning_rate": 4.246964807130783e-05, "log_odds_chosen": 7.26013708114624, "log_odds_ratio": -0.015082553029060364, "logits/chosen": -0.2669753134250641, "logits/rejected": -0.3914386034011841, "logps/chosen": -0.02450769767165184, "logps/rejected": -2.022952079772949, "loss": 2.6149, "nll_loss": 0.6522207260131836, "rewards/accuracies": 1.0, "rewards/chosen": -0.002450769767165184, "rewards/margins": 0.19984443485736847, "rewards/rejected": -0.2022952139377594, "step": 3406 }, { "epoch": 2.356154910096819, "grad_norm": 12.195465087890625, "learning_rate": 4.2465806055017676e-05, "log_odds_chosen": 9.071712493896484, "log_odds_ratio": -0.000601945910602808, "logits/chosen": -0.23555348813533783, "logits/rejected": -0.28923851251602173, "logps/chosen": -0.0022937441244721413, "logps/rejected": -2.087567090988159, "loss": 1.9265, "nll_loss": 0.4815739691257477, "rewards/accuracies": 1.0, "rewards/chosen": -0.00022937438916414976, "rewards/margins": 0.2085273265838623, "rewards/rejected": -0.208756685256958, "step": 3407 }, { "epoch": 2.3568464730290457, "grad_norm": 8.689661026000977, "learning_rate": 4.246196403872753e-05, "log_odds_chosen": 8.243035316467285, "log_odds_ratio": -0.047890111804008484, "logits/chosen": -0.37406691908836365, "logits/rejected": -0.44183045625686646, "logps/chosen": -0.012632353231310844, "logps/rejected": -1.2202600240707397, "loss": 1.9614, "nll_loss": 0.48556771874427795, "rewards/accuracies": 1.0, "rewards/chosen": -0.0012632354628294706, "rewards/margins": 0.12076276540756226, "rewards/rejected": -0.1220259964466095, "step": 3408 }, { "epoch": 2.3575380359612725, "grad_norm": 8.059005737304688, "learning_rate": 4.245812202243738e-05, "log_odds_chosen": 7.652141571044922, "log_odds_ratio": -0.09811528772115707, "logits/chosen": -0.6067800521850586, "logits/rejected": -0.7010886073112488, "logps/chosen": -0.019398357719182968, "logps/rejected": -1.3128105401992798, "loss": 1.9711, "nll_loss": 0.4829697608947754, "rewards/accuracies": 0.875, "rewards/chosen": -0.0019398359581828117, "rewards/margins": 0.1293412148952484, "rewards/rejected": -0.1312810480594635, "step": 3409 }, { "epoch": 2.3582295988934994, "grad_norm": 8.750677108764648, "learning_rate": 4.2454280006147226e-05, "log_odds_chosen": 8.003756523132324, "log_odds_ratio": -0.006866155192255974, "logits/chosen": -0.5898510217666626, "logits/rejected": -0.5858031511306763, "logps/chosen": -0.01122718770056963, "logps/rejected": -1.737359881401062, "loss": 2.2807, "nll_loss": 0.5694831013679504, "rewards/accuracies": 1.0, "rewards/chosen": -0.0011227187933400273, "rewards/margins": 0.172613263130188, "rewards/rejected": -0.17373599112033844, "step": 3410 }, { "epoch": 2.358921161825726, "grad_norm": 11.310158729553223, "learning_rate": 4.245043798985708e-05, "log_odds_chosen": 7.99793004989624, "log_odds_ratio": -0.0029664812609553337, "logits/chosen": -0.3674885928630829, "logits/rejected": -0.4538128972053528, "logps/chosen": -0.011136957444250584, "logps/rejected": -1.4392294883728027, "loss": 2.2423, "nll_loss": 0.5602694749832153, "rewards/accuracies": 1.0, "rewards/chosen": -0.0011136956745758653, "rewards/margins": 0.14280925691127777, "rewards/rejected": -0.14392295479774475, "step": 3411 }, { "epoch": 2.359612724757953, "grad_norm": 6.208254337310791, "learning_rate": 4.2446595973566924e-05, "log_odds_chosen": 8.18211555480957, "log_odds_ratio": -0.006016615778207779, "logits/chosen": -0.4709635376930237, "logits/rejected": -0.4899592101573944, "logps/chosen": -0.01079797837883234, "logps/rejected": -0.919940710067749, "loss": 1.9933, "nll_loss": 0.4977228343486786, "rewards/accuracies": 1.0, "rewards/chosen": -0.0010797978611662984, "rewards/margins": 0.0909142792224884, "rewards/rejected": -0.09199407696723938, "step": 3412 }, { "epoch": 2.36030428769018, "grad_norm": 6.6337480545043945, "learning_rate": 4.2442753957276783e-05, "log_odds_chosen": 7.813111305236816, "log_odds_ratio": -0.005881158635020256, "logits/chosen": -0.67177414894104, "logits/rejected": -0.7192281484603882, "logps/chosen": -0.0034163526725023985, "logps/rejected": -1.134279727935791, "loss": 2.1709, "nll_loss": 0.5421328544616699, "rewards/accuracies": 1.0, "rewards/chosen": -0.0003416352847125381, "rewards/margins": 0.11308634281158447, "rewards/rejected": -0.11342797428369522, "step": 3413 }, { "epoch": 2.3609958506224067, "grad_norm": 12.27839469909668, "learning_rate": 4.243891194098663e-05, "log_odds_chosen": 8.821434020996094, "log_odds_ratio": -0.0011850158916786313, "logits/chosen": -0.6717657446861267, "logits/rejected": -0.7319881916046143, "logps/chosen": -0.0052223182283341885, "logps/rejected": -1.7572263479232788, "loss": 2.1305, "nll_loss": 0.5324950814247131, "rewards/accuracies": 1.0, "rewards/chosen": -0.0005222317995503545, "rewards/margins": 0.17520040273666382, "rewards/rejected": -0.1757226288318634, "step": 3414 }, { "epoch": 2.3616874135546335, "grad_norm": 7.977497100830078, "learning_rate": 4.243506992469648e-05, "log_odds_chosen": 7.292098045349121, "log_odds_ratio": -0.004010710399597883, "logits/chosen": -0.5028542876243591, "logits/rejected": -0.53304123878479, "logps/chosen": -0.01968352310359478, "logps/rejected": -1.1983541250228882, "loss": 2.8042, "nll_loss": 0.7006375789642334, "rewards/accuracies": 1.0, "rewards/chosen": -0.001968352124094963, "rewards/margins": 0.11786707490682602, "rewards/rejected": -0.11983540654182434, "step": 3415 }, { "epoch": 2.3623789764868603, "grad_norm": 6.613243103027344, "learning_rate": 4.2431227908406334e-05, "log_odds_chosen": 8.827180862426758, "log_odds_ratio": -0.019252749159932137, "logits/chosen": -0.7933902144432068, "logits/rejected": -0.9102729558944702, "logps/chosen": -0.005132139194756746, "logps/rejected": -1.7154276371002197, "loss": 1.4716, "nll_loss": 0.36596986651420593, "rewards/accuracies": 1.0, "rewards/chosen": -0.0005132139194756746, "rewards/margins": 0.17102953791618347, "rewards/rejected": -0.17154276371002197, "step": 3416 }, { "epoch": 2.363070539419087, "grad_norm": 6.68892765045166, "learning_rate": 4.2427385892116186e-05, "log_odds_chosen": 5.407261848449707, "log_odds_ratio": -0.06341204047203064, "logits/chosen": -0.5899242758750916, "logits/rejected": -0.6239021420478821, "logps/chosen": -0.05954143777489662, "logps/rejected": -1.1532893180847168, "loss": 2.598, "nll_loss": 0.6431571245193481, "rewards/accuracies": 1.0, "rewards/chosen": -0.0059541440568864346, "rewards/margins": 0.10937478393316269, "rewards/rejected": -0.11532893031835556, "step": 3417 }, { "epoch": 2.363762102351314, "grad_norm": 6.749809741973877, "learning_rate": 4.242354387582603e-05, "log_odds_chosen": 6.653587341308594, "log_odds_ratio": -0.09195633977651596, "logits/chosen": -0.6077258586883545, "logits/rejected": -0.6534870862960815, "logps/chosen": -0.029678650200366974, "logps/rejected": -1.3541957139968872, "loss": 2.3267, "nll_loss": 0.5724772214889526, "rewards/accuracies": 1.0, "rewards/chosen": -0.0029678652063012123, "rewards/margins": 0.13245171308517456, "rewards/rejected": -0.1354195773601532, "step": 3418 }, { "epoch": 2.364453665283541, "grad_norm": 7.185380458831787, "learning_rate": 4.2419701859535885e-05, "log_odds_chosen": 6.369234085083008, "log_odds_ratio": -0.03255006670951843, "logits/chosen": -0.7959968447685242, "logits/rejected": -0.9139914512634277, "logps/chosen": -0.02814808301627636, "logps/rejected": -1.2766046524047852, "loss": 2.2784, "nll_loss": 0.5663396120071411, "rewards/accuracies": 1.0, "rewards/chosen": -0.0028148081619292498, "rewards/margins": 0.12484566122293472, "rewards/rejected": -0.12766046822071075, "step": 3419 }, { "epoch": 2.3651452282157677, "grad_norm": 7.040897846221924, "learning_rate": 4.241585984324574e-05, "log_odds_chosen": 7.790557384490967, "log_odds_ratio": -0.013517394661903381, "logits/chosen": -0.758858859539032, "logits/rejected": -0.7736672163009644, "logps/chosen": -0.02057792991399765, "logps/rejected": -1.3754760026931763, "loss": 2.202, "nll_loss": 0.5491434335708618, "rewards/accuracies": 1.0, "rewards/chosen": -0.0020577930845320225, "rewards/margins": 0.135489821434021, "rewards/rejected": -0.1375475972890854, "step": 3420 }, { "epoch": 2.3658367911479945, "grad_norm": 5.4301252365112305, "learning_rate": 4.241201782695558e-05, "log_odds_chosen": 6.388769626617432, "log_odds_ratio": -0.09578309953212738, "logits/chosen": -0.2594180405139923, "logits/rejected": -0.24878433346748352, "logps/chosen": -0.02873176522552967, "logps/rejected": -1.0656076669692993, "loss": 2.0233, "nll_loss": 0.49625498056411743, "rewards/accuracies": 1.0, "rewards/chosen": -0.002873176708817482, "rewards/margins": 0.103687584400177, "rewards/rejected": -0.10656075924634933, "step": 3421 }, { "epoch": 2.3665283540802213, "grad_norm": 14.583373069763184, "learning_rate": 4.240817581066544e-05, "log_odds_chosen": 6.067316055297852, "log_odds_ratio": -0.5061472654342651, "logits/chosen": -0.438271164894104, "logits/rejected": -0.5065256357192993, "logps/chosen": -0.0877622589468956, "logps/rejected": -1.3191258907318115, "loss": 2.9576, "nll_loss": 0.6887795329093933, "rewards/accuracies": 0.75, "rewards/chosen": -0.008776226080954075, "rewards/margins": 0.12313637137413025, "rewards/rejected": -0.13191257417201996, "step": 3422 }, { "epoch": 2.367219917012448, "grad_norm": 6.707622051239014, "learning_rate": 4.240433379437529e-05, "log_odds_chosen": 6.3979363441467285, "log_odds_ratio": -0.11842715740203857, "logits/chosen": -0.6803793907165527, "logits/rejected": -0.6929797530174255, "logps/chosen": -0.02465580217540264, "logps/rejected": -1.0064455270767212, "loss": 2.5078, "nll_loss": 0.6151173114776611, "rewards/accuracies": 0.875, "rewards/chosen": -0.002465580590069294, "rewards/margins": 0.09817897528409958, "rewards/rejected": -0.1006445586681366, "step": 3423 }, { "epoch": 2.367911479944675, "grad_norm": 8.288269996643066, "learning_rate": 4.240049177808514e-05, "log_odds_chosen": 7.2223219871521, "log_odds_ratio": -0.015759773552417755, "logits/chosen": -0.6505395174026489, "logits/rejected": -0.6855196952819824, "logps/chosen": -0.04627052694559097, "logps/rejected": -1.183556318283081, "loss": 2.4063, "nll_loss": 0.6000023484230042, "rewards/accuracies": 1.0, "rewards/chosen": -0.004627052694559097, "rewards/margins": 0.11372856795787811, "rewards/rejected": -0.11835562437772751, "step": 3424 }, { "epoch": 2.368603042876902, "grad_norm": 6.999942302703857, "learning_rate": 4.239664976179499e-05, "log_odds_chosen": 8.207736015319824, "log_odds_ratio": -0.008073610253632069, "logits/chosen": -0.5741896629333496, "logits/rejected": -0.6363852024078369, "logps/chosen": -0.02019553631544113, "logps/rejected": -1.5705045461654663, "loss": 2.1859, "nll_loss": 0.5456606149673462, "rewards/accuracies": 1.0, "rewards/chosen": -0.002019553678110242, "rewards/margins": 0.15503089129924774, "rewards/rejected": -0.15705044567584991, "step": 3425 }, { "epoch": 2.3692946058091287, "grad_norm": 7.205198287963867, "learning_rate": 4.2392807745504845e-05, "log_odds_chosen": 7.496864318847656, "log_odds_ratio": -0.027593035250902176, "logits/chosen": -0.0906713455915451, "logits/rejected": -0.1560470461845398, "logps/chosen": -0.011206595227122307, "logps/rejected": -1.5430270433425903, "loss": 1.8617, "nll_loss": 0.4626578688621521, "rewards/accuracies": 1.0, "rewards/chosen": -0.0011206595227122307, "rewards/margins": 0.1531820446252823, "rewards/rejected": -0.154302716255188, "step": 3426 }, { "epoch": 2.3699861687413555, "grad_norm": 9.317922592163086, "learning_rate": 4.238896572921469e-05, "log_odds_chosen": 8.463835716247559, "log_odds_ratio": -0.002514339517802, "logits/chosen": -0.8520632386207581, "logits/rejected": -0.8247945308685303, "logps/chosen": -0.0037631026934832335, "logps/rejected": -2.1205391883850098, "loss": 2.2781, "nll_loss": 0.5692840218544006, "rewards/accuracies": 1.0, "rewards/chosen": -0.0003763103159144521, "rewards/margins": 0.21167761087417603, "rewards/rejected": -0.21205392479896545, "step": 3427 }, { "epoch": 2.3706777316735823, "grad_norm": 12.661592483520508, "learning_rate": 4.238512371292454e-05, "log_odds_chosen": 7.507530212402344, "log_odds_ratio": -0.002025590743869543, "logits/chosen": -0.5045261383056641, "logits/rejected": -0.5771041512489319, "logps/chosen": -0.008932366967201233, "logps/rejected": -1.6672617197036743, "loss": 3.6596, "nll_loss": 0.9147093892097473, "rewards/accuracies": 1.0, "rewards/chosen": -0.0008932367200031877, "rewards/margins": 0.16583293676376343, "rewards/rejected": -0.16672618687152863, "step": 3428 }, { "epoch": 2.371369294605809, "grad_norm": 7.28396463394165, "learning_rate": 4.2381281696634395e-05, "log_odds_chosen": 5.50955867767334, "log_odds_ratio": -0.18129563331604004, "logits/chosen": -0.43971797823905945, "logits/rejected": -0.42192983627319336, "logps/chosen": -0.05782304331660271, "logps/rejected": -1.093947410583496, "loss": 2.5992, "nll_loss": 0.6316773891448975, "rewards/accuracies": 0.875, "rewards/chosen": -0.005782304331660271, "rewards/margins": 0.10361243784427643, "rewards/rejected": -0.10939474403858185, "step": 3429 }, { "epoch": 2.372060857538036, "grad_norm": 19.657962799072266, "learning_rate": 4.237743968034424e-05, "log_odds_chosen": 7.222588539123535, "log_odds_ratio": -0.03204096108675003, "logits/chosen": -0.7179023027420044, "logits/rejected": -0.8604851365089417, "logps/chosen": -0.012024748139083385, "logps/rejected": -1.5378297567367554, "loss": 2.6524, "nll_loss": 0.6599072217941284, "rewards/accuracies": 1.0, "rewards/chosen": -0.0012024750467389822, "rewards/margins": 0.15258049964904785, "rewards/rejected": -0.15378296375274658, "step": 3430 }, { "epoch": 2.372752420470263, "grad_norm": 9.396291732788086, "learning_rate": 4.23735976640541e-05, "log_odds_chosen": 6.863970756530762, "log_odds_ratio": -0.008415119722485542, "logits/chosen": -0.6852945685386658, "logits/rejected": -0.7253008484840393, "logps/chosen": -0.026889167726039886, "logps/rejected": -1.6196262836456299, "loss": 2.5258, "nll_loss": 0.6306195259094238, "rewards/accuracies": 1.0, "rewards/chosen": -0.002688916865736246, "rewards/margins": 0.15927371382713318, "rewards/rejected": -0.161962628364563, "step": 3431 }, { "epoch": 2.3734439834024896, "grad_norm": 10.495866775512695, "learning_rate": 4.2369755647763946e-05, "log_odds_chosen": 6.5691633224487305, "log_odds_ratio": -0.08497857302427292, "logits/chosen": -0.30856412649154663, "logits/rejected": -0.33345896005630493, "logps/chosen": -0.028544174507260323, "logps/rejected": -1.3216798305511475, "loss": 2.7573, "nll_loss": 0.6808239221572876, "rewards/accuracies": 1.0, "rewards/chosen": -0.0028544177766889334, "rewards/margins": 0.12931357324123383, "rewards/rejected": -0.1321679949760437, "step": 3432 }, { "epoch": 2.3741355463347165, "grad_norm": 6.415916919708252, "learning_rate": 4.23659136314738e-05, "log_odds_chosen": 6.058993339538574, "log_odds_ratio": -0.006088991649448872, "logits/chosen": -0.6415539979934692, "logits/rejected": -0.6294801235198975, "logps/chosen": -0.025170328095555305, "logps/rejected": -1.4109984636306763, "loss": 1.6702, "nll_loss": 0.41693025827407837, "rewards/accuracies": 1.0, "rewards/chosen": -0.002517032902687788, "rewards/margins": 0.13858281075954437, "rewards/rejected": -0.14109984040260315, "step": 3433 }, { "epoch": 2.3748271092669433, "grad_norm": 4.665531635284424, "learning_rate": 4.236207161518365e-05, "log_odds_chosen": 7.872640132904053, "log_odds_ratio": -0.002072228817269206, "logits/chosen": -0.19979572296142578, "logits/rejected": -0.26814359426498413, "logps/chosen": -0.024378551170229912, "logps/rejected": -1.237983226776123, "loss": 2.4034, "nll_loss": 0.6006313562393188, "rewards/accuracies": 1.0, "rewards/chosen": -0.002437855117022991, "rewards/margins": 0.12136045098304749, "rewards/rejected": -0.12379831075668335, "step": 3434 }, { "epoch": 2.37551867219917, "grad_norm": 7.580257415771484, "learning_rate": 4.23582295988935e-05, "log_odds_chosen": 6.102107048034668, "log_odds_ratio": -0.2961543798446655, "logits/chosen": -0.10959547758102417, "logits/rejected": -0.15983079373836517, "logps/chosen": -0.07290703058242798, "logps/rejected": -1.5514822006225586, "loss": 2.3746, "nll_loss": 0.5640414953231812, "rewards/accuracies": 0.75, "rewards/chosen": -0.007290703244507313, "rewards/margins": 0.14785751700401306, "rewards/rejected": -0.1551482081413269, "step": 3435 }, { "epoch": 2.376210235131397, "grad_norm": 10.682846069335938, "learning_rate": 4.235438758260335e-05, "log_odds_chosen": 8.229127883911133, "log_odds_ratio": -0.0005710788536816835, "logits/chosen": -0.6116784811019897, "logits/rejected": -0.6720350384712219, "logps/chosen": -0.007531903684139252, "logps/rejected": -1.4279307126998901, "loss": 4.3613, "nll_loss": 1.090259075164795, "rewards/accuracies": 1.0, "rewards/chosen": -0.0007531904266215861, "rewards/margins": 0.14203989505767822, "rewards/rejected": -0.14279307425022125, "step": 3436 }, { "epoch": 2.376901798063624, "grad_norm": 4.78339958190918, "learning_rate": 4.23505455663132e-05, "log_odds_chosen": 6.552225112915039, "log_odds_ratio": -0.1655147522687912, "logits/chosen": -0.2748771905899048, "logits/rejected": -0.28989773988723755, "logps/chosen": -0.04578880965709686, "logps/rejected": -1.4036614894866943, "loss": 2.4327, "nll_loss": 0.591616690158844, "rewards/accuracies": 0.875, "rewards/chosen": -0.004578881431370974, "rewards/margins": 0.1357872635126114, "rewards/rejected": -0.14036613702774048, "step": 3437 }, { "epoch": 2.3775933609958506, "grad_norm": 5.989414691925049, "learning_rate": 4.2346703550023054e-05, "log_odds_chosen": 7.069077968597412, "log_odds_ratio": -0.014767002314329147, "logits/chosen": -0.3627806305885315, "logits/rejected": -0.3392050564289093, "logps/chosen": -0.02562572993338108, "logps/rejected": -2.0423498153686523, "loss": 2.2831, "nll_loss": 0.5692924857139587, "rewards/accuracies": 1.0, "rewards/chosen": -0.002562573179602623, "rewards/margins": 0.20167241990566254, "rewards/rejected": -0.2042349874973297, "step": 3438 }, { "epoch": 2.3782849239280774, "grad_norm": 12.863221168518066, "learning_rate": 4.23428615337329e-05, "log_odds_chosen": 9.35482406616211, "log_odds_ratio": -0.028638150542974472, "logits/chosen": -0.42819708585739136, "logits/rejected": -0.5285694003105164, "logps/chosen": -0.00422546686604619, "logps/rejected": -1.748453140258789, "loss": 3.1993, "nll_loss": 0.7969600558280945, "rewards/accuracies": 1.0, "rewards/chosen": -0.0004225466982461512, "rewards/margins": 0.17442278563976288, "rewards/rejected": -0.17484530806541443, "step": 3439 }, { "epoch": 2.3789764868603043, "grad_norm": 7.027946949005127, "learning_rate": 4.233901951744276e-05, "log_odds_chosen": 7.778133869171143, "log_odds_ratio": -0.0407455638051033, "logits/chosen": -0.663629949092865, "logits/rejected": -0.7393888235092163, "logps/chosen": -0.012316934764385223, "logps/rejected": -1.5184440612792969, "loss": 2.6779, "nll_loss": 0.6653945446014404, "rewards/accuracies": 1.0, "rewards/chosen": -0.0012316934298723936, "rewards/margins": 0.1506127119064331, "rewards/rejected": -0.15184439718723297, "step": 3440 }, { "epoch": 2.379668049792531, "grad_norm": 13.124755859375, "learning_rate": 4.2335177501152604e-05, "log_odds_chosen": 5.84478759765625, "log_odds_ratio": -0.586132287979126, "logits/chosen": -0.6295636296272278, "logits/rejected": -0.6671774387359619, "logps/chosen": -0.1445447951555252, "logps/rejected": -1.2849104404449463, "loss": 2.3785, "nll_loss": 0.5360167026519775, "rewards/accuracies": 0.875, "rewards/chosen": -0.014454478397965431, "rewards/margins": 0.11403656005859375, "rewards/rejected": -0.12849104404449463, "step": 3441 }, { "epoch": 2.380359612724758, "grad_norm": 8.439517974853516, "learning_rate": 4.233133548486246e-05, "log_odds_chosen": 6.201792240142822, "log_odds_ratio": -0.06423471868038177, "logits/chosen": -0.31344470381736755, "logits/rejected": -0.3481372594833374, "logps/chosen": -0.023115266114473343, "logps/rejected": -1.3296178579330444, "loss": 2.4696, "nll_loss": 0.6109641790390015, "rewards/accuracies": 1.0, "rewards/chosen": -0.002311526797711849, "rewards/margins": 0.13065025210380554, "rewards/rejected": -0.13296179473400116, "step": 3442 }, { "epoch": 2.3810511756569848, "grad_norm": 5.6978020668029785, "learning_rate": 4.232749346857231e-05, "log_odds_chosen": 8.566123962402344, "log_odds_ratio": -0.0043657380156219006, "logits/chosen": -0.17997238039970398, "logits/rejected": -0.23091138899326324, "logps/chosen": -0.005184969864785671, "logps/rejected": -1.4944618940353394, "loss": 2.1032, "nll_loss": 0.5253602266311646, "rewards/accuracies": 1.0, "rewards/chosen": -0.0005184969631955028, "rewards/margins": 0.1489276885986328, "rewards/rejected": -0.14944618940353394, "step": 3443 }, { "epoch": 2.3817427385892116, "grad_norm": 5.119330406188965, "learning_rate": 4.232365145228216e-05, "log_odds_chosen": 5.514773845672607, "log_odds_ratio": -0.09987065941095352, "logits/chosen": -0.41642263531684875, "logits/rejected": -0.4259873926639557, "logps/chosen": -0.0746447816491127, "logps/rejected": -0.8729536533355713, "loss": 2.3954, "nll_loss": 0.5888611674308777, "rewards/accuracies": 1.0, "rewards/chosen": -0.00746447779238224, "rewards/margins": 0.07983088493347168, "rewards/rejected": -0.08729536831378937, "step": 3444 }, { "epoch": 2.3824343015214384, "grad_norm": 6.080493927001953, "learning_rate": 4.231980943599201e-05, "log_odds_chosen": 6.873577117919922, "log_odds_ratio": -0.08558716624975204, "logits/chosen": -0.35207927227020264, "logits/rejected": -0.4364143908023834, "logps/chosen": -0.026298439130187035, "logps/rejected": -2.3086209297180176, "loss": 2.4776, "nll_loss": 0.6108462810516357, "rewards/accuracies": 1.0, "rewards/chosen": -0.0026298437733203173, "rewards/margins": 0.2282322347164154, "rewards/rejected": -0.2308620810508728, "step": 3445 }, { "epoch": 2.3831258644536653, "grad_norm": 12.112558364868164, "learning_rate": 4.231596741970186e-05, "log_odds_chosen": 9.93198013305664, "log_odds_ratio": -0.006665684282779694, "logits/chosen": -0.46666690707206726, "logits/rejected": -0.5174428224563599, "logps/chosen": -0.006805818993598223, "logps/rejected": -2.516042470932007, "loss": 2.0792, "nll_loss": 0.519120991230011, "rewards/accuracies": 1.0, "rewards/chosen": -0.0006805819575674832, "rewards/margins": 0.25092369318008423, "rewards/rejected": -0.25160425901412964, "step": 3446 }, { "epoch": 2.383817427385892, "grad_norm": 11.87564754486084, "learning_rate": 4.231212540341171e-05, "log_odds_chosen": 7.434047222137451, "log_odds_ratio": -0.04765244945883751, "logits/chosen": -0.36915624141693115, "logits/rejected": -0.42544883489608765, "logps/chosen": -0.008615016005933285, "logps/rejected": -1.1400610208511353, "loss": 3.0075, "nll_loss": 0.7471170425415039, "rewards/accuracies": 1.0, "rewards/chosen": -0.0008615015540271997, "rewards/margins": 0.11314460635185242, "rewards/rejected": -0.11400610208511353, "step": 3447 }, { "epoch": 2.384508990318119, "grad_norm": 9.698756217956543, "learning_rate": 4.230828338712156e-05, "log_odds_chosen": 7.496614456176758, "log_odds_ratio": -0.00677049346268177, "logits/chosen": -0.5656105875968933, "logits/rejected": -0.6477078199386597, "logps/chosen": -0.008299939334392548, "logps/rejected": -1.4475886821746826, "loss": 3.4122, "nll_loss": 0.8523802757263184, "rewards/accuracies": 1.0, "rewards/chosen": -0.0008299939800053835, "rewards/margins": 0.1439288854598999, "rewards/rejected": -0.14475886523723602, "step": 3448 }, { "epoch": 2.3852005532503457, "grad_norm": 10.093292236328125, "learning_rate": 4.230444137083142e-05, "log_odds_chosen": 4.253750324249268, "log_odds_ratio": -0.2802105247974396, "logits/chosen": -0.6729620099067688, "logits/rejected": -0.7065836787223816, "logps/chosen": -0.0628410279750824, "logps/rejected": -0.6742812395095825, "loss": 3.1062, "nll_loss": 0.7485273480415344, "rewards/accuracies": 0.875, "rewards/chosen": -0.006284103263169527, "rewards/margins": 0.06114402785897255, "rewards/rejected": -0.06742812693119049, "step": 3449 }, { "epoch": 2.3858921161825726, "grad_norm": 10.238203048706055, "learning_rate": 4.230059935454126e-05, "log_odds_chosen": 6.280107021331787, "log_odds_ratio": -0.08056508749723434, "logits/chosen": -0.7012012600898743, "logits/rejected": -0.7339482307434082, "logps/chosen": -0.041637249290943146, "logps/rejected": -1.1578660011291504, "loss": 3.0957, "nll_loss": 0.7658792734146118, "rewards/accuracies": 1.0, "rewards/chosen": -0.004163725301623344, "rewards/margins": 0.1116228848695755, "rewards/rejected": -0.1157865971326828, "step": 3450 }, { "epoch": 2.3865836791147994, "grad_norm": 5.492684364318848, "learning_rate": 4.2296757338251115e-05, "log_odds_chosen": 7.788609981536865, "log_odds_ratio": -0.005910110659897327, "logits/chosen": -0.5513278245925903, "logits/rejected": -0.6317500472068787, "logps/chosen": -0.04046875983476639, "logps/rejected": -1.576035737991333, "loss": 2.3057, "nll_loss": 0.5758423209190369, "rewards/accuracies": 1.0, "rewards/chosen": -0.004046875983476639, "rewards/margins": 0.1535567045211792, "rewards/rejected": -0.15760357677936554, "step": 3451 }, { "epoch": 2.3872752420470262, "grad_norm": 9.929596900939941, "learning_rate": 4.229291532196097e-05, "log_odds_chosen": 7.062018394470215, "log_odds_ratio": -0.007457717787474394, "logits/chosen": -0.6636491417884827, "logits/rejected": -0.6853954792022705, "logps/chosen": -0.028744446113705635, "logps/rejected": -1.737699031829834, "loss": 2.9979, "nll_loss": 0.7487397789955139, "rewards/accuracies": 1.0, "rewards/chosen": -0.0028744444716721773, "rewards/margins": 0.17089545726776123, "rewards/rejected": -0.17376990616321564, "step": 3452 }, { "epoch": 2.387966804979253, "grad_norm": 10.12405776977539, "learning_rate": 4.228907330567082e-05, "log_odds_chosen": 7.498879432678223, "log_odds_ratio": -0.014232144691050053, "logits/chosen": -0.513949453830719, "logits/rejected": -0.5723739862442017, "logps/chosen": -0.046775419265031815, "logps/rejected": -1.9690231084823608, "loss": 2.5009, "nll_loss": 0.6237916946411133, "rewards/accuracies": 1.0, "rewards/chosen": -0.004677542019635439, "rewards/margins": 0.19222477078437805, "rewards/rejected": -0.1969023048877716, "step": 3453 }, { "epoch": 2.38865836791148, "grad_norm": 7.1505608558654785, "learning_rate": 4.2285231289380666e-05, "log_odds_chosen": 7.8512282371521, "log_odds_ratio": -0.0028052683919668198, "logits/chosen": -0.6594638824462891, "logits/rejected": -0.6560357809066772, "logps/chosen": -0.010988444089889526, "logps/rejected": -1.8420581817626953, "loss": 2.0855, "nll_loss": 0.5210833549499512, "rewards/accuracies": 1.0, "rewards/chosen": -0.0010988445719704032, "rewards/margins": 0.18310695886611938, "rewards/rejected": -0.1842058002948761, "step": 3454 }, { "epoch": 2.3893499308437067, "grad_norm": 10.518378257751465, "learning_rate": 4.228138927309052e-05, "log_odds_chosen": 7.85250186920166, "log_odds_ratio": -0.059666577726602554, "logits/chosen": -0.5748401880264282, "logits/rejected": -0.6480482220649719, "logps/chosen": -0.016306307166814804, "logps/rejected": -1.607426404953003, "loss": 3.279, "nll_loss": 0.8137713670730591, "rewards/accuracies": 1.0, "rewards/chosen": -0.001630630693398416, "rewards/margins": 0.15911200642585754, "rewards/rejected": -0.1607426404953003, "step": 3455 }, { "epoch": 2.3900414937759336, "grad_norm": 9.05886459350586, "learning_rate": 4.227754725680037e-05, "log_odds_chosen": 8.22207260131836, "log_odds_ratio": -0.002946071792393923, "logits/chosen": -0.8173617124557495, "logits/rejected": -0.9066953063011169, "logps/chosen": -0.007910683751106262, "logps/rejected": -1.566526174545288, "loss": 2.5319, "nll_loss": 0.6326807141304016, "rewards/accuracies": 1.0, "rewards/chosen": -0.000791068421676755, "rewards/margins": 0.15586155652999878, "rewards/rejected": -0.15665262937545776, "step": 3456 }, { "epoch": 2.3907330567081604, "grad_norm": 11.151406288146973, "learning_rate": 4.2273705240510216e-05, "log_odds_chosen": 6.041598320007324, "log_odds_ratio": -0.27977150678634644, "logits/chosen": -0.2281467467546463, "logits/rejected": -0.3184647262096405, "logps/chosen": -0.037950512021780014, "logps/rejected": -1.3815393447875977, "loss": 2.4032, "nll_loss": 0.5728345513343811, "rewards/accuracies": 0.75, "rewards/chosen": -0.00379505124874413, "rewards/margins": 0.13435888290405273, "rewards/rejected": -0.13815394043922424, "step": 3457 }, { "epoch": 2.391424619640387, "grad_norm": 5.956516265869141, "learning_rate": 4.2269863224220076e-05, "log_odds_chosen": 8.76341724395752, "log_odds_ratio": -0.005829709582030773, "logits/chosen": -0.8177924156188965, "logits/rejected": -0.8164641857147217, "logps/chosen": -0.007455950137227774, "logps/rejected": -1.714500904083252, "loss": 2.3196, "nll_loss": 0.5793288946151733, "rewards/accuracies": 1.0, "rewards/chosen": -0.0007455950835719705, "rewards/margins": 0.17070451378822327, "rewards/rejected": -0.17145009338855743, "step": 3458 }, { "epoch": 2.392116182572614, "grad_norm": 8.454880714416504, "learning_rate": 4.226602120792992e-05, "log_odds_chosen": 8.373088836669922, "log_odds_ratio": -0.006798881571739912, "logits/chosen": -0.7450951337814331, "logits/rejected": -0.7184380888938904, "logps/chosen": -0.0014081236440688372, "logps/rejected": -0.927725613117218, "loss": 2.5047, "nll_loss": 0.625499427318573, "rewards/accuracies": 1.0, "rewards/chosen": -0.00014081236440688372, "rewards/margins": 0.0926317572593689, "rewards/rejected": -0.09277257323265076, "step": 3459 }, { "epoch": 2.392807745504841, "grad_norm": 6.365279674530029, "learning_rate": 4.2262179191639774e-05, "log_odds_chosen": 7.5471720695495605, "log_odds_ratio": -0.15343418717384338, "logits/chosen": -0.733639657497406, "logits/rejected": -0.7661193013191223, "logps/chosen": -0.038739293813705444, "logps/rejected": -1.47605562210083, "loss": 2.1541, "nll_loss": 0.5231756567955017, "rewards/accuracies": 0.875, "rewards/chosen": -0.003873929614201188, "rewards/margins": 0.14373163878917694, "rewards/rejected": -0.14760556817054749, "step": 3460 }, { "epoch": 2.3934993084370677, "grad_norm": 5.454896926879883, "learning_rate": 4.2258337175349626e-05, "log_odds_chosen": 8.599830627441406, "log_odds_ratio": -0.000444973586127162, "logits/chosen": -0.5653376579284668, "logits/rejected": -0.5564531087875366, "logps/chosen": -0.018016574904322624, "logps/rejected": -1.6302697658538818, "loss": 2.4157, "nll_loss": 0.603872537612915, "rewards/accuracies": 1.0, "rewards/chosen": -0.0018016574904322624, "rewards/margins": 0.1612253189086914, "rewards/rejected": -0.16302695870399475, "step": 3461 }, { "epoch": 2.3941908713692945, "grad_norm": 6.181629180908203, "learning_rate": 4.225449515905948e-05, "log_odds_chosen": 6.0642008781433105, "log_odds_ratio": -0.38111788034439087, "logits/chosen": -0.5761101841926575, "logits/rejected": -0.6472548246383667, "logps/chosen": -0.141601100564003, "logps/rejected": -1.98850679397583, "loss": 1.897, "nll_loss": 0.43614256381988525, "rewards/accuracies": 0.875, "rewards/chosen": -0.01416010968387127, "rewards/margins": 0.18469057977199554, "rewards/rejected": -0.19885067641735077, "step": 3462 }, { "epoch": 2.3948824343015214, "grad_norm": 9.098873138427734, "learning_rate": 4.2250653142769324e-05, "log_odds_chosen": 6.658843040466309, "log_odds_ratio": -0.08088690787553787, "logits/chosen": -0.36394232511520386, "logits/rejected": -0.4369269609451294, "logps/chosen": -0.019272904843091965, "logps/rejected": -1.2084076404571533, "loss": 2.163, "nll_loss": 0.53264981508255, "rewards/accuracies": 1.0, "rewards/chosen": -0.001927290461026132, "rewards/margins": 0.11891347914934158, "rewards/rejected": -0.12084076553583145, "step": 3463 }, { "epoch": 2.395573997233748, "grad_norm": 9.867566108703613, "learning_rate": 4.224681112647918e-05, "log_odds_chosen": 5.931120872497559, "log_odds_ratio": -0.029134787619113922, "logits/chosen": -0.5077770948410034, "logits/rejected": -0.5073307752609253, "logps/chosen": -0.0129080293700099, "logps/rejected": -1.1946752071380615, "loss": 2.1006, "nll_loss": 0.5222440958023071, "rewards/accuracies": 1.0, "rewards/chosen": -0.0012908029602840543, "rewards/margins": 0.11817672848701477, "rewards/rejected": -0.11946753412485123, "step": 3464 }, { "epoch": 2.396265560165975, "grad_norm": 11.358476638793945, "learning_rate": 4.224296911018903e-05, "log_odds_chosen": 4.821341514587402, "log_odds_ratio": -0.3646850287914276, "logits/chosen": -0.519396185874939, "logits/rejected": -0.5581240653991699, "logps/chosen": -0.0811510905623436, "logps/rejected": -0.7058289051055908, "loss": 2.3962, "nll_loss": 0.5625927448272705, "rewards/accuracies": 0.75, "rewards/chosen": -0.00811510905623436, "rewards/margins": 0.0624677836894989, "rewards/rejected": -0.07058288902044296, "step": 3465 }, { "epoch": 2.396957123098202, "grad_norm": 7.898974895477295, "learning_rate": 4.2239127093898875e-05, "log_odds_chosen": 8.102975845336914, "log_odds_ratio": -0.007673066109418869, "logits/chosen": -0.388555109500885, "logits/rejected": -0.4589465856552124, "logps/chosen": -0.009603948332369328, "logps/rejected": -1.1193115711212158, "loss": 2.0014, "nll_loss": 0.4995794892311096, "rewards/accuracies": 1.0, "rewards/chosen": -0.0009603948565199971, "rewards/margins": 0.11097076535224915, "rewards/rejected": -0.11193115264177322, "step": 3466 }, { "epoch": 2.3976486860304287, "grad_norm": 9.023910522460938, "learning_rate": 4.2235285077608734e-05, "log_odds_chosen": 5.483177185058594, "log_odds_ratio": -0.30498525500297546, "logits/chosen": -0.40885457396507263, "logits/rejected": -0.45225241780281067, "logps/chosen": -0.05430954322218895, "logps/rejected": -0.9977082014083862, "loss": 2.5177, "nll_loss": 0.5989183783531189, "rewards/accuracies": 0.875, "rewards/chosen": -0.00543095450848341, "rewards/margins": 0.09433987736701965, "rewards/rejected": -0.09977082908153534, "step": 3467 }, { "epoch": 2.3983402489626555, "grad_norm": 10.120564460754395, "learning_rate": 4.223144306131858e-05, "log_odds_chosen": 6.483001708984375, "log_odds_ratio": -0.017802314832806587, "logits/chosen": -0.8806463479995728, "logits/rejected": -0.8878389596939087, "logps/chosen": -0.01730438694357872, "logps/rejected": -1.4190657138824463, "loss": 2.5606, "nll_loss": 0.6383647918701172, "rewards/accuracies": 1.0, "rewards/chosen": -0.0017304387874901295, "rewards/margins": 0.14017613232135773, "rewards/rejected": -0.14190655946731567, "step": 3468 }, { "epoch": 2.3990318118948823, "grad_norm": 82.23304748535156, "learning_rate": 4.222760104502843e-05, "log_odds_chosen": 5.96318244934082, "log_odds_ratio": -0.2941014766693115, "logits/chosen": -0.9196685552597046, "logits/rejected": -0.9675211906433105, "logps/chosen": -0.08452773094177246, "logps/rejected": -1.7398574352264404, "loss": 2.2949, "nll_loss": 0.5443228483200073, "rewards/accuracies": 0.875, "rewards/chosen": -0.008452773094177246, "rewards/margins": 0.16553297638893127, "rewards/rejected": -0.17398573458194733, "step": 3469 }, { "epoch": 2.399723374827109, "grad_norm": 8.60822582244873, "learning_rate": 4.2223759028738285e-05, "log_odds_chosen": 7.1528096199035645, "log_odds_ratio": -0.01803523115813732, "logits/chosen": -0.9783897995948792, "logits/rejected": -1.034075379371643, "logps/chosen": -0.023058656603097916, "logps/rejected": -1.163728952407837, "loss": 2.1729, "nll_loss": 0.5414154529571533, "rewards/accuracies": 1.0, "rewards/chosen": -0.002305865753442049, "rewards/margins": 0.11406703293323517, "rewards/rejected": -0.11637289822101593, "step": 3470 }, { "epoch": 2.400414937759336, "grad_norm": 8.744670867919922, "learning_rate": 4.221991701244814e-05, "log_odds_chosen": 4.124874114990234, "log_odds_ratio": -0.30007296800613403, "logits/chosen": -0.20929817855358124, "logits/rejected": -0.24133385717868805, "logps/chosen": -0.0663536861538887, "logps/rejected": -0.3404579758644104, "loss": 3.041, "nll_loss": 0.7302361726760864, "rewards/accuracies": 0.75, "rewards/chosen": -0.006635368801653385, "rewards/margins": 0.02741042897105217, "rewards/rejected": -0.03404579684138298, "step": 3471 }, { "epoch": 2.401106500691563, "grad_norm": 11.96430778503418, "learning_rate": 4.221607499615798e-05, "log_odds_chosen": 9.046222686767578, "log_odds_ratio": -0.001090765930712223, "logits/chosen": -0.8712348937988281, "logits/rejected": -1.0210875272750854, "logps/chosen": -0.0011071586050093174, "logps/rejected": -1.683934211730957, "loss": 2.9123, "nll_loss": 0.7279656529426575, "rewards/accuracies": 1.0, "rewards/chosen": -0.00011071586050093174, "rewards/margins": 0.16828271746635437, "rewards/rejected": -0.16839343309402466, "step": 3472 }, { "epoch": 2.4017980636237897, "grad_norm": 7.895980358123779, "learning_rate": 4.2212232979867835e-05, "log_odds_chosen": 8.111176490783691, "log_odds_ratio": -0.0013888756511732936, "logits/chosen": -0.8593344688415527, "logits/rejected": -0.7808473110198975, "logps/chosen": -0.0021940222941339016, "logps/rejected": -1.2674121856689453, "loss": 2.1754, "nll_loss": 0.5437151193618774, "rewards/accuracies": 1.0, "rewards/chosen": -0.00021940222359262407, "rewards/margins": 0.12652181088924408, "rewards/rejected": -0.1267412155866623, "step": 3473 }, { "epoch": 2.4024896265560165, "grad_norm": 8.358758926391602, "learning_rate": 4.220839096357769e-05, "log_odds_chosen": 6.501728534698486, "log_odds_ratio": -0.021476784721016884, "logits/chosen": -1.0236040353775024, "logits/rejected": -1.0076302289962769, "logps/chosen": -0.023863408714532852, "logps/rejected": -1.1087217330932617, "loss": 2.3159, "nll_loss": 0.5768356919288635, "rewards/accuracies": 1.0, "rewards/chosen": -0.0023863406386226416, "rewards/margins": 0.10848583281040192, "rewards/rejected": -0.11087217181921005, "step": 3474 }, { "epoch": 2.4031811894882433, "grad_norm": 7.01558256149292, "learning_rate": 4.220454894728753e-05, "log_odds_chosen": 5.170758247375488, "log_odds_ratio": -0.13841156661510468, "logits/chosen": -0.5865446925163269, "logits/rejected": -0.612939178943634, "logps/chosen": -0.07330407202243805, "logps/rejected": -1.681246042251587, "loss": 2.7987, "nll_loss": 0.685846209526062, "rewards/accuracies": 0.875, "rewards/chosen": -0.00733040738850832, "rewards/margins": 0.160794198513031, "rewards/rejected": -0.16812460124492645, "step": 3475 }, { "epoch": 2.40387275242047, "grad_norm": 7.598639965057373, "learning_rate": 4.220070693099739e-05, "log_odds_chosen": 7.575196743011475, "log_odds_ratio": -0.0028736027888953686, "logits/chosen": -0.8495464324951172, "logits/rejected": -0.847161591053009, "logps/chosen": -0.02533099614083767, "logps/rejected": -1.672313928604126, "loss": 2.3675, "nll_loss": 0.5915927886962891, "rewards/accuracies": 1.0, "rewards/chosen": -0.002533099614083767, "rewards/margins": 0.1646983027458191, "rewards/rejected": -0.16723139584064484, "step": 3476 }, { "epoch": 2.404564315352697, "grad_norm": 7.659574031829834, "learning_rate": 4.219686491470724e-05, "log_odds_chosen": 5.771527290344238, "log_odds_ratio": -0.05470234900712967, "logits/chosen": -0.5478336215019226, "logits/rejected": -0.634901762008667, "logps/chosen": -0.02737801894545555, "logps/rejected": -0.8024625778198242, "loss": 2.5849, "nll_loss": 0.6407510042190552, "rewards/accuracies": 1.0, "rewards/chosen": -0.0027378019876778126, "rewards/margins": 0.07750844955444336, "rewards/rejected": -0.08024625480175018, "step": 3477 }, { "epoch": 2.405255878284924, "grad_norm": 10.186736106872559, "learning_rate": 4.219302289841709e-05, "log_odds_chosen": 5.86126708984375, "log_odds_ratio": -0.1267208307981491, "logits/chosen": -0.6385801434516907, "logits/rejected": -0.6514959931373596, "logps/chosen": -0.038592059165239334, "logps/rejected": -0.8937729597091675, "loss": 2.1297, "nll_loss": 0.5197412371635437, "rewards/accuracies": 0.875, "rewards/chosen": -0.0038592061027884483, "rewards/margins": 0.08551809191703796, "rewards/rejected": -0.08937729895114899, "step": 3478 }, { "epoch": 2.4059474412171507, "grad_norm": 12.728795051574707, "learning_rate": 4.218918088212694e-05, "log_odds_chosen": 8.360170364379883, "log_odds_ratio": -0.05475056543946266, "logits/chosen": -0.445570170879364, "logits/rejected": -0.5104779601097107, "logps/chosen": -0.019982457160949707, "logps/rejected": -1.60440993309021, "loss": 1.9802, "nll_loss": 0.4895625710487366, "rewards/accuracies": 1.0, "rewards/chosen": -0.001998245483264327, "rewards/margins": 0.15844275057315826, "rewards/rejected": -0.16044099628925323, "step": 3479 }, { "epoch": 2.4066390041493775, "grad_norm": 6.73025369644165, "learning_rate": 4.2185338865836796e-05, "log_odds_chosen": 7.225137710571289, "log_odds_ratio": -0.025533132255077362, "logits/chosen": -0.6241756677627563, "logits/rejected": -0.6339388489723206, "logps/chosen": -0.010463009588420391, "logps/rejected": -1.1056491136550903, "loss": 2.7345, "nll_loss": 0.6810735464096069, "rewards/accuracies": 1.0, "rewards/chosen": -0.0010463010985404253, "rewards/margins": 0.10951860249042511, "rewards/rejected": -0.11056490242481232, "step": 3480 }, { "epoch": 2.4073305670816043, "grad_norm": 6.189925670623779, "learning_rate": 4.218149684954664e-05, "log_odds_chosen": 6.061086177825928, "log_odds_ratio": -0.16295485198497772, "logits/chosen": -0.5748433470726013, "logits/rejected": -0.5821690559387207, "logps/chosen": -0.04194222018122673, "logps/rejected": -0.7503185272216797, "loss": 2.334, "nll_loss": 0.5672000050544739, "rewards/accuracies": 0.875, "rewards/chosen": -0.004194222390651703, "rewards/margins": 0.07083762437105179, "rewards/rejected": -0.07503185421228409, "step": 3481 }, { "epoch": 2.408022130013831, "grad_norm": 8.091987609863281, "learning_rate": 4.2177654833256494e-05, "log_odds_chosen": 6.556269645690918, "log_odds_ratio": -0.05733267217874527, "logits/chosen": -0.47325581312179565, "logits/rejected": -0.45619022846221924, "logps/chosen": -0.021132618188858032, "logps/rejected": -1.0394395589828491, "loss": 1.7748, "nll_loss": 0.43796780705451965, "rewards/accuracies": 1.0, "rewards/chosen": -0.0021132619585841894, "rewards/margins": 0.10183070600032806, "rewards/rejected": -0.10394395887851715, "step": 3482 }, { "epoch": 2.408713692946058, "grad_norm": 9.280055046081543, "learning_rate": 4.2173812816966346e-05, "log_odds_chosen": 8.679950714111328, "log_odds_ratio": -0.0015581633197143674, "logits/chosen": -0.7012354731559753, "logits/rejected": -0.7508944272994995, "logps/chosen": -0.013607164844870567, "logps/rejected": -1.7001599073410034, "loss": 2.3659, "nll_loss": 0.5913158655166626, "rewards/accuracies": 1.0, "rewards/chosen": -0.0013607164146378636, "rewards/margins": 0.16865527629852295, "rewards/rejected": -0.17001599073410034, "step": 3483 }, { "epoch": 2.409405255878285, "grad_norm": 13.053488731384277, "learning_rate": 4.216997080067619e-05, "log_odds_chosen": 7.968313217163086, "log_odds_ratio": -0.06880037486553192, "logits/chosen": -0.5693072080612183, "logits/rejected": -0.6625873446464539, "logps/chosen": -0.02932230569422245, "logps/rejected": -1.8904154300689697, "loss": 3.0299, "nll_loss": 0.7505956888198853, "rewards/accuracies": 1.0, "rewards/chosen": -0.0029322307091206312, "rewards/margins": 0.18610931932926178, "rewards/rejected": -0.18904155492782593, "step": 3484 }, { "epoch": 2.4100968188105116, "grad_norm": 7.838770389556885, "learning_rate": 4.216612878438605e-05, "log_odds_chosen": 7.503055572509766, "log_odds_ratio": -0.07354681938886642, "logits/chosen": -0.6398206949234009, "logits/rejected": -0.5904887914657593, "logps/chosen": -0.022758491337299347, "logps/rejected": -1.350595235824585, "loss": 2.1233, "nll_loss": 0.5234798192977905, "rewards/accuracies": 1.0, "rewards/chosen": -0.0022758489940315485, "rewards/margins": 0.1327836811542511, "rewards/rejected": -0.13505952060222626, "step": 3485 }, { "epoch": 2.4107883817427385, "grad_norm": 9.07406234741211, "learning_rate": 4.2162286768095897e-05, "log_odds_chosen": 6.529168128967285, "log_odds_ratio": -0.09720098227262497, "logits/chosen": -0.5522468090057373, "logits/rejected": -0.5980552434921265, "logps/chosen": -0.025193000212311745, "logps/rejected": -1.235506534576416, "loss": 2.4523, "nll_loss": 0.6033427119255066, "rewards/accuracies": 1.0, "rewards/chosen": -0.0025192999746650457, "rewards/margins": 0.12103135138750076, "rewards/rejected": -0.1235506534576416, "step": 3486 }, { "epoch": 2.4114799446749653, "grad_norm": 7.101974010467529, "learning_rate": 4.215844475180575e-05, "log_odds_chosen": 6.016046524047852, "log_odds_ratio": -0.020641181617975235, "logits/chosen": -0.7464077472686768, "logits/rejected": -0.7497326135635376, "logps/chosen": -0.056564487516880035, "logps/rejected": -1.5879918336868286, "loss": 2.3461, "nll_loss": 0.5844558477401733, "rewards/accuracies": 1.0, "rewards/chosen": -0.005656449124217033, "rewards/margins": 0.15314273536205292, "rewards/rejected": -0.1587991863489151, "step": 3487 }, { "epoch": 2.412171507607192, "grad_norm": 7.258253574371338, "learning_rate": 4.21546027355156e-05, "log_odds_chosen": 8.490350723266602, "log_odds_ratio": -0.0014901505783200264, "logits/chosen": -0.9239876866340637, "logits/rejected": -0.9879686832427979, "logps/chosen": -0.009935002774000168, "logps/rejected": -1.9407963752746582, "loss": 2.2881, "nll_loss": 0.5718832015991211, "rewards/accuracies": 1.0, "rewards/chosen": -0.0009935003472492099, "rewards/margins": 0.19308611750602722, "rewards/rejected": -0.19407962262630463, "step": 3488 }, { "epoch": 2.412863070539419, "grad_norm": 7.0164289474487305, "learning_rate": 4.2150760719225454e-05, "log_odds_chosen": 8.137097358703613, "log_odds_ratio": -0.012940660119056702, "logits/chosen": -0.7661728858947754, "logits/rejected": -0.7889710664749146, "logps/chosen": -0.014242495410144329, "logps/rejected": -1.191624402999878, "loss": 2.1544, "nll_loss": 0.5372986197471619, "rewards/accuracies": 1.0, "rewards/chosen": -0.001424249610863626, "rewards/margins": 0.11773819476366043, "rewards/rejected": -0.11916244775056839, "step": 3489 }, { "epoch": 2.413554633471646, "grad_norm": 4.6547160148620605, "learning_rate": 4.21469187029353e-05, "log_odds_chosen": 7.072702407836914, "log_odds_ratio": -0.027786776423454285, "logits/chosen": -0.6046419143676758, "logits/rejected": -0.6040664911270142, "logps/chosen": -0.029178176075220108, "logps/rejected": -1.7043691873550415, "loss": 2.238, "nll_loss": 0.5567184090614319, "rewards/accuracies": 1.0, "rewards/chosen": -0.0029178177937865257, "rewards/margins": 0.16751909255981445, "rewards/rejected": -0.17043691873550415, "step": 3490 }, { "epoch": 2.4142461964038726, "grad_norm": 11.417491912841797, "learning_rate": 4.214307668664515e-05, "log_odds_chosen": 5.747222423553467, "log_odds_ratio": -0.08429299294948578, "logits/chosen": -0.7581441402435303, "logits/rejected": -0.7898090481758118, "logps/chosen": -0.0240895077586174, "logps/rejected": -0.8935565948486328, "loss": 2.6229, "nll_loss": 0.6472893357276917, "rewards/accuracies": 1.0, "rewards/chosen": -0.002408950822427869, "rewards/margins": 0.08694671094417572, "rewards/rejected": -0.08935566246509552, "step": 3491 }, { "epoch": 2.4149377593360994, "grad_norm": 9.846121788024902, "learning_rate": 4.2139234670355004e-05, "log_odds_chosen": 7.643199443817139, "log_odds_ratio": -0.1389947384595871, "logits/chosen": -0.8550637364387512, "logits/rejected": -0.9452530145645142, "logps/chosen": -0.013365296646952629, "logps/rejected": -1.3257532119750977, "loss": 2.4252, "nll_loss": 0.5924007892608643, "rewards/accuracies": 0.875, "rewards/chosen": -0.0013365296181291342, "rewards/margins": 0.13123878836631775, "rewards/rejected": -0.13257533311843872, "step": 3492 }, { "epoch": 2.4156293222683263, "grad_norm": 4.824674606323242, "learning_rate": 4.213539265406486e-05, "log_odds_chosen": 5.845300674438477, "log_odds_ratio": -0.03971134126186371, "logits/chosen": -0.853974461555481, "logits/rejected": -0.8486250042915344, "logps/chosen": -0.023859363049268723, "logps/rejected": -0.889519453048706, "loss": 2.4991, "nll_loss": 0.620795488357544, "rewards/accuracies": 1.0, "rewards/chosen": -0.002385936211794615, "rewards/margins": 0.0865660086274147, "rewards/rejected": -0.0889519453048706, "step": 3493 }, { "epoch": 2.416320885200553, "grad_norm": 6.13293981552124, "learning_rate": 4.213155063777471e-05, "log_odds_chosen": 8.28451919555664, "log_odds_ratio": -0.00443453062325716, "logits/chosen": -0.6030639410018921, "logits/rejected": -0.6766177415847778, "logps/chosen": -0.005843472667038441, "logps/rejected": -1.6858534812927246, "loss": 2.6302, "nll_loss": 0.6570960283279419, "rewards/accuracies": 1.0, "rewards/chosen": -0.0005843472899869084, "rewards/margins": 0.16800101101398468, "rewards/rejected": -0.16858534514904022, "step": 3494 }, { "epoch": 2.41701244813278, "grad_norm": 10.260222434997559, "learning_rate": 4.2127708621484555e-05, "log_odds_chosen": 7.628552436828613, "log_odds_ratio": -0.005517259240150452, "logits/chosen": -0.7729060649871826, "logits/rejected": -0.7599183917045593, "logps/chosen": -0.014096952974796295, "logps/rejected": -1.4275188446044922, "loss": 2.4221, "nll_loss": 0.6049808859825134, "rewards/accuracies": 1.0, "rewards/chosen": -0.0014096952509135008, "rewards/margins": 0.1413421928882599, "rewards/rejected": -0.14275188744068146, "step": 3495 }, { "epoch": 2.4177040110650068, "grad_norm": 10.477607727050781, "learning_rate": 4.212386660519441e-05, "log_odds_chosen": 8.311507225036621, "log_odds_ratio": -0.0017723742639645934, "logits/chosen": -0.7644646763801575, "logits/rejected": -0.795981764793396, "logps/chosen": -0.0015009690541774035, "logps/rejected": -1.2311259508132935, "loss": 2.3773, "nll_loss": 0.5941388607025146, "rewards/accuracies": 1.0, "rewards/chosen": -0.0001500969083281234, "rewards/margins": 0.12296249717473984, "rewards/rejected": -0.12311260402202606, "step": 3496 }, { "epoch": 2.4183955739972336, "grad_norm": 10.727534294128418, "learning_rate": 4.212002458890426e-05, "log_odds_chosen": 8.098236083984375, "log_odds_ratio": -0.02444791980087757, "logits/chosen": -0.6742796897888184, "logits/rejected": -0.7470793128013611, "logps/chosen": -0.02403697744011879, "logps/rejected": -1.4629892110824585, "loss": 3.3694, "nll_loss": 0.8398998379707336, "rewards/accuracies": 1.0, "rewards/chosen": -0.002403697930276394, "rewards/margins": 0.143895223736763, "rewards/rejected": -0.14629891514778137, "step": 3497 }, { "epoch": 2.4190871369294604, "grad_norm": 11.279629707336426, "learning_rate": 4.211618257261411e-05, "log_odds_chosen": 7.625337600708008, "log_odds_ratio": -0.005302184261381626, "logits/chosen": -0.9389767050743103, "logits/rejected": -1.0070160627365112, "logps/chosen": -0.00925231259316206, "logps/rejected": -1.2491271495819092, "loss": 3.1125, "nll_loss": 0.7776023745536804, "rewards/accuracies": 1.0, "rewards/chosen": -0.0009252313175238669, "rewards/margins": 0.12398748099803925, "rewards/rejected": -0.12491270899772644, "step": 3498 }, { "epoch": 2.4197786998616873, "grad_norm": 6.170611381530762, "learning_rate": 4.211234055632396e-05, "log_odds_chosen": 6.709138870239258, "log_odds_ratio": -0.046844013035297394, "logits/chosen": -0.5986511707305908, "logits/rejected": -0.5998558402061462, "logps/chosen": -0.013723606243729591, "logps/rejected": -1.2466332912445068, "loss": 2.3662, "nll_loss": 0.5868594646453857, "rewards/accuracies": 1.0, "rewards/chosen": -0.001372360740788281, "rewards/margins": 0.12329097092151642, "rewards/rejected": -0.1246633380651474, "step": 3499 }, { "epoch": 2.420470262793914, "grad_norm": 9.391353607177734, "learning_rate": 4.210849854003382e-05, "log_odds_chosen": 5.222013473510742, "log_odds_ratio": -0.22473469376564026, "logits/chosen": -0.480400025844574, "logits/rejected": -0.5001802444458008, "logps/chosen": -0.056873537600040436, "logps/rejected": -0.8878986239433289, "loss": 2.4718, "nll_loss": 0.5954755544662476, "rewards/accuracies": 0.75, "rewards/chosen": -0.005687354132533073, "rewards/margins": 0.0831025093793869, "rewards/rejected": -0.08878986537456512, "step": 3500 }, { "epoch": 2.421161825726141, "grad_norm": 11.657116889953613, "learning_rate": 4.210465652374366e-05, "log_odds_chosen": 8.136720657348633, "log_odds_ratio": -0.0016932344296947122, "logits/chosen": -0.6791071891784668, "logits/rejected": -0.8140288591384888, "logps/chosen": -0.005484652239829302, "logps/rejected": -1.6490072011947632, "loss": 2.8094, "nll_loss": 0.7021803855895996, "rewards/accuracies": 1.0, "rewards/chosen": -0.0005484651774168015, "rewards/margins": 0.16435226798057556, "rewards/rejected": -0.16490072011947632, "step": 3501 }, { "epoch": 2.4218533886583677, "grad_norm": 9.51025676727295, "learning_rate": 4.2100814507453515e-05, "log_odds_chosen": 8.480232238769531, "log_odds_ratio": -0.04696385934948921, "logits/chosen": -0.5885998010635376, "logits/rejected": -0.6898617148399353, "logps/chosen": -0.05135025084018707, "logps/rejected": -2.130434989929199, "loss": 2.084, "nll_loss": 0.5163017511367798, "rewards/accuracies": 1.0, "rewards/chosen": -0.005135024897754192, "rewards/margins": 0.2079084813594818, "rewards/rejected": -0.21304351091384888, "step": 3502 }, { "epoch": 2.4225449515905946, "grad_norm": 8.47912883758545, "learning_rate": 4.209697249116337e-05, "log_odds_chosen": 8.443939208984375, "log_odds_ratio": -0.0013600240927189589, "logits/chosen": -0.7223008871078491, "logits/rejected": -0.7537130117416382, "logps/chosen": -0.0009003658778965473, "logps/rejected": -1.134358525276184, "loss": 2.8194, "nll_loss": 0.7047219276428223, "rewards/accuracies": 1.0, "rewards/chosen": -9.003658487927169e-05, "rewards/margins": 0.11334581673145294, "rewards/rejected": -0.11343584954738617, "step": 3503 }, { "epoch": 2.4232365145228214, "grad_norm": 8.608758926391602, "learning_rate": 4.2093130474873213e-05, "log_odds_chosen": 7.91787576675415, "log_odds_ratio": -0.08348464220762253, "logits/chosen": -0.559022068977356, "logits/rejected": -0.6558287143707275, "logps/chosen": -0.013709326274693012, "logps/rejected": -1.163172960281372, "loss": 1.8885, "nll_loss": 0.46378472447395325, "rewards/accuracies": 1.0, "rewards/chosen": -0.00137093267403543, "rewards/margins": 0.11494636535644531, "rewards/rejected": -0.11631729453802109, "step": 3504 }, { "epoch": 2.4239280774550482, "grad_norm": 12.582856178283691, "learning_rate": 4.2089288458583066e-05, "log_odds_chosen": 9.710137367248535, "log_odds_ratio": -0.00018082182214129716, "logits/chosen": -0.7905318737030029, "logits/rejected": -0.8423488140106201, "logps/chosen": -0.0003087608201894909, "logps/rejected": -1.664839267730713, "loss": 2.9246, "nll_loss": 0.7311212420463562, "rewards/accuracies": 1.0, "rewards/chosen": -3.0876079108566046e-05, "rewards/margins": 0.1664530634880066, "rewards/rejected": -0.16648393869400024, "step": 3505 }, { "epoch": 2.424619640387275, "grad_norm": 6.5926594734191895, "learning_rate": 4.208544644229292e-05, "log_odds_chosen": 7.102262496948242, "log_odds_ratio": -0.005528897512704134, "logits/chosen": -0.6839585304260254, "logits/rejected": -0.5873481631278992, "logps/chosen": -0.008574813604354858, "logps/rejected": -0.9425464868545532, "loss": 2.1099, "nll_loss": 0.5269334316253662, "rewards/accuracies": 1.0, "rewards/chosen": -0.0008574813837185502, "rewards/margins": 0.09339717775583267, "rewards/rejected": -0.09425465762615204, "step": 3506 }, { "epoch": 2.425311203319502, "grad_norm": 8.038623809814453, "learning_rate": 4.208160442600277e-05, "log_odds_chosen": 7.128605365753174, "log_odds_ratio": -0.003702126909047365, "logits/chosen": -0.7105140089988708, "logits/rejected": -0.7407926917076111, "logps/chosen": -0.010936792008578777, "logps/rejected": -1.22905695438385, "loss": 2.5495, "nll_loss": 0.6369987726211548, "rewards/accuracies": 1.0, "rewards/chosen": -0.001093679224140942, "rewards/margins": 0.12181201577186584, "rewards/rejected": -0.12290570139884949, "step": 3507 }, { "epoch": 2.4260027662517287, "grad_norm": 9.31099796295166, "learning_rate": 4.2077762409712616e-05, "log_odds_chosen": 7.37070369720459, "log_odds_ratio": -0.043804366141557693, "logits/chosen": -0.39794284105300903, "logits/rejected": -0.46229493618011475, "logps/chosen": -0.022990640252828598, "logps/rejected": -1.276123285293579, "loss": 2.4207, "nll_loss": 0.6007853150367737, "rewards/accuracies": 1.0, "rewards/chosen": -0.0022990640718489885, "rewards/margins": 0.12531326711177826, "rewards/rejected": -0.12761233747005463, "step": 3508 }, { "epoch": 2.4266943291839556, "grad_norm": 6.147286415100098, "learning_rate": 4.2073920393422476e-05, "log_odds_chosen": 7.822290420532227, "log_odds_ratio": -0.0024672728031873703, "logits/chosen": -0.8302035927772522, "logits/rejected": -0.9225561618804932, "logps/chosen": -0.005152544938027859, "logps/rejected": -1.4149426221847534, "loss": 1.9211, "nll_loss": 0.4800260365009308, "rewards/accuracies": 1.0, "rewards/chosen": -0.0005152545054443181, "rewards/margins": 0.14097902178764343, "rewards/rejected": -0.1414942592382431, "step": 3509 }, { "epoch": 2.4273858921161824, "grad_norm": 4.319844722747803, "learning_rate": 4.207007837713232e-05, "log_odds_chosen": 7.981142044067383, "log_odds_ratio": -0.0016685464652255177, "logits/chosen": -0.8217602372169495, "logits/rejected": -0.8055883049964905, "logps/chosen": -0.00772473169490695, "logps/rejected": -1.4985321760177612, "loss": 1.509, "nll_loss": 0.37708476185798645, "rewards/accuracies": 1.0, "rewards/chosen": -0.000772473169490695, "rewards/margins": 0.14908073842525482, "rewards/rejected": -0.14985322952270508, "step": 3510 }, { "epoch": 2.428077455048409, "grad_norm": 9.382851600646973, "learning_rate": 4.2066236360842174e-05, "log_odds_chosen": 7.408436298370361, "log_odds_ratio": -0.004545638337731361, "logits/chosen": -0.6942574381828308, "logits/rejected": -0.6026928424835205, "logps/chosen": -0.014898409135639668, "logps/rejected": -1.4916660785675049, "loss": 2.2225, "nll_loss": 0.5551747679710388, "rewards/accuracies": 1.0, "rewards/chosen": -0.001489840797148645, "rewards/margins": 0.1476767659187317, "rewards/rejected": -0.14916659891605377, "step": 3511 }, { "epoch": 2.428769017980636, "grad_norm": 6.530588150024414, "learning_rate": 4.2062394344552026e-05, "log_odds_chosen": 7.621374130249023, "log_odds_ratio": -0.020029377192258835, "logits/chosen": -0.36985185742378235, "logits/rejected": -0.4080933928489685, "logps/chosen": -0.010694924741983414, "logps/rejected": -1.1310560703277588, "loss": 2.2096, "nll_loss": 0.550404965877533, "rewards/accuracies": 1.0, "rewards/chosen": -0.0010694925440475345, "rewards/margins": 0.11203610897064209, "rewards/rejected": -0.11310561001300812, "step": 3512 }, { "epoch": 2.429460580912863, "grad_norm": 9.551876068115234, "learning_rate": 4.205855232826187e-05, "log_odds_chosen": 6.510831356048584, "log_odds_ratio": -0.10963248461484909, "logits/chosen": -0.7897687554359436, "logits/rejected": -0.8556662201881409, "logps/chosen": -0.02455962263047695, "logps/rejected": -1.2443952560424805, "loss": 2.7122, "nll_loss": 0.6670763492584229, "rewards/accuracies": 1.0, "rewards/chosen": -0.002455962123349309, "rewards/margins": 0.12198356539011002, "rewards/rejected": -0.12443952262401581, "step": 3513 }, { "epoch": 2.43015214384509, "grad_norm": 8.177549362182617, "learning_rate": 4.2054710311971724e-05, "log_odds_chosen": 7.105329513549805, "log_odds_ratio": -0.0745643824338913, "logits/chosen": -0.7847766876220703, "logits/rejected": -0.8372522592544556, "logps/chosen": -0.02346830442547798, "logps/rejected": -1.133172631263733, "loss": 2.8405, "nll_loss": 0.702670156955719, "rewards/accuracies": 1.0, "rewards/chosen": -0.0023468302097171545, "rewards/margins": 0.11097043752670288, "rewards/rejected": -0.11331726610660553, "step": 3514 }, { "epoch": 2.430843706777317, "grad_norm": 6.872097492218018, "learning_rate": 4.205086829568158e-05, "log_odds_chosen": 7.203080654144287, "log_odds_ratio": -0.014032398350536823, "logits/chosen": -0.7077941298484802, "logits/rejected": -0.7775839567184448, "logps/chosen": -0.010666534304618835, "logps/rejected": -1.1262292861938477, "loss": 2.1964, "nll_loss": 0.5477020740509033, "rewards/accuracies": 1.0, "rewards/chosen": -0.0010666532907634974, "rewards/margins": 0.1115562841296196, "rewards/rejected": -0.112622931599617, "step": 3515 }, { "epoch": 2.431535269709544, "grad_norm": 9.27294921875, "learning_rate": 4.204702627939143e-05, "log_odds_chosen": 9.278311729431152, "log_odds_ratio": -0.014651588164269924, "logits/chosen": -0.4278988242149353, "logits/rejected": -0.505477786064148, "logps/chosen": -0.0059097036719322205, "logps/rejected": -1.815339207649231, "loss": 2.4822, "nll_loss": 0.6190872192382812, "rewards/accuracies": 1.0, "rewards/chosen": -0.0005909703322686255, "rewards/margins": 0.18094295263290405, "rewards/rejected": -0.18153391778469086, "step": 3516 }, { "epoch": 2.4322268326417706, "grad_norm": 12.633123397827148, "learning_rate": 4.2043184263101275e-05, "log_odds_chosen": 7.5888190269470215, "log_odds_ratio": -0.12230602651834488, "logits/chosen": -0.7426217794418335, "logits/rejected": -0.8118423223495483, "logps/chosen": -0.02818923629820347, "logps/rejected": -1.1682384014129639, "loss": 3.2121, "nll_loss": 0.7908050417900085, "rewards/accuracies": 0.875, "rewards/chosen": -0.0028189236763864756, "rewards/margins": 0.11400490999221802, "rewards/rejected": -0.11682382971048355, "step": 3517 }, { "epoch": 2.4329183955739975, "grad_norm": 8.882617950439453, "learning_rate": 4.2039342246811134e-05, "log_odds_chosen": 8.545615196228027, "log_odds_ratio": -0.0015734564512968063, "logits/chosen": -0.6913523077964783, "logits/rejected": -0.6268677115440369, "logps/chosen": -0.01377897709608078, "logps/rejected": -2.0227184295654297, "loss": 3.4862, "nll_loss": 0.8713939189910889, "rewards/accuracies": 1.0, "rewards/chosen": -0.0013778976863250136, "rewards/margins": 0.20089393854141235, "rewards/rejected": -0.20227184891700745, "step": 3518 }, { "epoch": 2.4336099585062243, "grad_norm": 12.85490894317627, "learning_rate": 4.203550023052098e-05, "log_odds_chosen": 6.209042549133301, "log_odds_ratio": -0.16449643671512604, "logits/chosen": -0.46550968289375305, "logits/rejected": -0.47567084431648254, "logps/chosen": -0.06882783770561218, "logps/rejected": -0.9017906188964844, "loss": 2.3628, "nll_loss": 0.5742617845535278, "rewards/accuracies": 0.875, "rewards/chosen": -0.006882783956825733, "rewards/margins": 0.0832962840795517, "rewards/rejected": -0.09017906337976456, "step": 3519 }, { "epoch": 2.434301521438451, "grad_norm": 9.354718208312988, "learning_rate": 4.203165821423083e-05, "log_odds_chosen": 7.354831695556641, "log_odds_ratio": -0.0032965652644634247, "logits/chosen": -0.7076842784881592, "logits/rejected": -0.6892897486686707, "logps/chosen": -0.02614540606737137, "logps/rejected": -1.610516905784607, "loss": 2.6776, "nll_loss": 0.6690692901611328, "rewards/accuracies": 1.0, "rewards/chosen": -0.0026145405136048794, "rewards/margins": 0.15843714773654938, "rewards/rejected": -0.1610516905784607, "step": 3520 }, { "epoch": 2.434993084370678, "grad_norm": 14.583526611328125, "learning_rate": 4.2027816197940685e-05, "log_odds_chosen": 7.048150539398193, "log_odds_ratio": -0.45794180035591125, "logits/chosen": -0.5279990434646606, "logits/rejected": -0.551681637763977, "logps/chosen": -0.06697956472635269, "logps/rejected": -1.5029540061950684, "loss": 2.3421, "nll_loss": 0.5397307276725769, "rewards/accuracies": 0.875, "rewards/chosen": -0.006697956472635269, "rewards/margins": 0.14359745383262634, "rewards/rejected": -0.1502954065799713, "step": 3521 }, { "epoch": 2.435684647302905, "grad_norm": 8.86139965057373, "learning_rate": 4.202397418165053e-05, "log_odds_chosen": 8.100759506225586, "log_odds_ratio": -0.0019487441750243306, "logits/chosen": -0.4604555070400238, "logits/rejected": -0.5274443030357361, "logps/chosen": -0.0014906483702361584, "logps/rejected": -1.226670742034912, "loss": 2.3142, "nll_loss": 0.5783494114875793, "rewards/accuracies": 1.0, "rewards/chosen": -0.00014906484284438193, "rewards/margins": 0.12251799553632736, "rewards/rejected": -0.12266705930233002, "step": 3522 }, { "epoch": 2.4363762102351316, "grad_norm": 5.555274486541748, "learning_rate": 4.202013216536038e-05, "log_odds_chosen": 6.237675666809082, "log_odds_ratio": -0.018100908026099205, "logits/chosen": -0.7315844893455505, "logits/rejected": -0.810881495475769, "logps/chosen": -0.03815982863306999, "logps/rejected": -1.432349443435669, "loss": 3.2658, "nll_loss": 0.8146426677703857, "rewards/accuracies": 1.0, "rewards/chosen": -0.0038159831892699003, "rewards/margins": 0.13941895961761475, "rewards/rejected": -0.14323493838310242, "step": 3523 }, { "epoch": 2.4370677731673585, "grad_norm": 3.357123851776123, "learning_rate": 4.2016290149070235e-05, "log_odds_chosen": 8.570686340332031, "log_odds_ratio": -0.011956355534493923, "logits/chosen": -0.3522316813468933, "logits/rejected": -0.29090994596481323, "logps/chosen": -0.006132098380476236, "logps/rejected": -1.357385277748108, "loss": 2.189, "nll_loss": 0.5460516810417175, "rewards/accuracies": 1.0, "rewards/chosen": -0.0006132098496891558, "rewards/margins": 0.1351253092288971, "rewards/rejected": -0.1357385218143463, "step": 3524 }, { "epoch": 2.4377593360995853, "grad_norm": 10.35288143157959, "learning_rate": 4.201244813278009e-05, "log_odds_chosen": 9.286419868469238, "log_odds_ratio": -0.002155565656721592, "logits/chosen": -0.15422987937927246, "logits/rejected": -0.22997677326202393, "logps/chosen": -0.000969383807387203, "logps/rejected": -1.7188987731933594, "loss": 2.9679, "nll_loss": 0.7417480945587158, "rewards/accuracies": 1.0, "rewards/chosen": -9.693838364910334e-05, "rewards/margins": 0.17179293930530548, "rewards/rejected": -0.17188987135887146, "step": 3525 }, { "epoch": 2.438450899031812, "grad_norm": 5.98746919631958, "learning_rate": 4.200860611648993e-05, "log_odds_chosen": 7.209506988525391, "log_odds_ratio": -0.01720517687499523, "logits/chosen": -0.255402535200119, "logits/rejected": -0.2656596004962921, "logps/chosen": -0.019557084888219833, "logps/rejected": -1.6097584962844849, "loss": 2.8255, "nll_loss": 0.7046565413475037, "rewards/accuracies": 1.0, "rewards/chosen": -0.0019557084888219833, "rewards/margins": 0.15902015566825867, "rewards/rejected": -0.1609758585691452, "step": 3526 }, { "epoch": 2.439142461964039, "grad_norm": 6.258415222167969, "learning_rate": 4.200476410019979e-05, "log_odds_chosen": 6.269476890563965, "log_odds_ratio": -0.038511913269758224, "logits/chosen": -0.18216294050216675, "logits/rejected": -0.20947065949440002, "logps/chosen": -0.0269983671605587, "logps/rejected": -1.0320063829421997, "loss": 2.2651, "nll_loss": 0.562411904335022, "rewards/accuracies": 1.0, "rewards/chosen": -0.0026998370885849, "rewards/margins": 0.10050080716609955, "rewards/rejected": -0.10320064425468445, "step": 3527 }, { "epoch": 2.4398340248962658, "grad_norm": 6.181754112243652, "learning_rate": 4.200092208390964e-05, "log_odds_chosen": 7.249432563781738, "log_odds_ratio": -0.002386817242950201, "logits/chosen": -0.22067557275295258, "logits/rejected": -0.26951614022254944, "logps/chosen": -0.007044011726975441, "logps/rejected": -1.532610535621643, "loss": 1.9815, "nll_loss": 0.49513524770736694, "rewards/accuracies": 1.0, "rewards/chosen": -0.0007044011726975441, "rewards/margins": 0.1525566577911377, "rewards/rejected": -0.15326105058193207, "step": 3528 }, { "epoch": 2.4405255878284926, "grad_norm": 10.975595474243164, "learning_rate": 4.199708006761949e-05, "log_odds_chosen": 5.667407512664795, "log_odds_ratio": -0.10486754029989243, "logits/chosen": -0.21335190534591675, "logits/rejected": -0.21384568512439728, "logps/chosen": -0.018393559381365776, "logps/rejected": -1.1246503591537476, "loss": 2.6217, "nll_loss": 0.6449326276779175, "rewards/accuracies": 0.875, "rewards/chosen": -0.001839356031268835, "rewards/margins": 0.11062569171190262, "rewards/rejected": -0.112465038895607, "step": 3529 }, { "epoch": 2.4412171507607194, "grad_norm": 6.723755359649658, "learning_rate": 4.199323805132934e-05, "log_odds_chosen": 9.1740140914917, "log_odds_ratio": -0.0013784350594505668, "logits/chosen": -0.5831416845321655, "logits/rejected": -0.6475759744644165, "logps/chosen": -0.0018314392073079944, "logps/rejected": -1.3935047388076782, "loss": 2.3557, "nll_loss": 0.5887914896011353, "rewards/accuracies": 1.0, "rewards/chosen": -0.00018314392946194857, "rewards/margins": 0.13916733860969543, "rewards/rejected": -0.13935047388076782, "step": 3530 }, { "epoch": 2.4419087136929463, "grad_norm": 5.532959938049316, "learning_rate": 4.198939603503919e-05, "log_odds_chosen": 8.193620681762695, "log_odds_ratio": -0.08815432339906693, "logits/chosen": -0.32796207070350647, "logits/rejected": -0.309958815574646, "logps/chosen": -0.0200906153768301, "logps/rejected": -1.7347533702850342, "loss": 2.6446, "nll_loss": 0.6523411870002747, "rewards/accuracies": 0.875, "rewards/chosen": -0.0020090616308152676, "rewards/margins": 0.17146629095077515, "rewards/rejected": -0.17347534000873566, "step": 3531 }, { "epoch": 2.442600276625173, "grad_norm": 8.325709342956543, "learning_rate": 4.198555401874904e-05, "log_odds_chosen": 9.631030082702637, "log_odds_ratio": -0.0007575347553938627, "logits/chosen": 0.05261028930544853, "logits/rejected": -0.10341690480709076, "logps/chosen": -0.0013362450990825891, "logps/rejected": -1.930006980895996, "loss": 1.9346, "nll_loss": 0.48357462882995605, "rewards/accuracies": 1.0, "rewards/chosen": -0.00013362453319132328, "rewards/margins": 0.19286708533763885, "rewards/rejected": -0.1930007040500641, "step": 3532 }, { "epoch": 2.4432918395574, "grad_norm": 42.79154586791992, "learning_rate": 4.1981712002458894e-05, "log_odds_chosen": 7.646982192993164, "log_odds_ratio": -0.18479464948177338, "logits/chosen": -0.06726447492837906, "logits/rejected": -0.2156417965888977, "logps/chosen": -0.017851902171969414, "logps/rejected": -1.2830493450164795, "loss": 2.2222, "nll_loss": 0.5370683670043945, "rewards/accuracies": 0.875, "rewards/chosen": -0.0017851904267445207, "rewards/margins": 0.12651973962783813, "rewards/rejected": -0.12830494344234467, "step": 3533 }, { "epoch": 2.4439834024896268, "grad_norm": 6.985574245452881, "learning_rate": 4.1977869986168746e-05, "log_odds_chosen": 8.568099975585938, "log_odds_ratio": -0.009587760083377361, "logits/chosen": -0.29672423005104065, "logits/rejected": -0.30285751819610596, "logps/chosen": -0.01729811169207096, "logps/rejected": -1.8276588916778564, "loss": 2.1486, "nll_loss": 0.5361930727958679, "rewards/accuracies": 1.0, "rewards/chosen": -0.0017298111924901605, "rewards/margins": 0.1810360699892044, "rewards/rejected": -0.1827658861875534, "step": 3534 }, { "epoch": 2.4446749654218536, "grad_norm": 5.899697780609131, "learning_rate": 4.197402796987859e-05, "log_odds_chosen": 7.62744140625, "log_odds_ratio": -0.029710203409194946, "logits/chosen": -0.30902692675590515, "logits/rejected": -0.35745781660079956, "logps/chosen": -0.019765477627515793, "logps/rejected": -1.4128010272979736, "loss": 1.9326, "nll_loss": 0.4801683723926544, "rewards/accuracies": 1.0, "rewards/chosen": -0.0019765477627515793, "rewards/margins": 0.1393035501241684, "rewards/rejected": -0.14128009974956512, "step": 3535 }, { "epoch": 2.4453665283540804, "grad_norm": 8.331310272216797, "learning_rate": 4.197018595358845e-05, "log_odds_chosen": 7.946462631225586, "log_odds_ratio": -0.002366039901971817, "logits/chosen": -0.227493554353714, "logits/rejected": -0.2392466962337494, "logps/chosen": -0.02204231731593609, "logps/rejected": -1.4582772254943848, "loss": 2.4526, "nll_loss": 0.6129144430160522, "rewards/accuracies": 1.0, "rewards/chosen": -0.0022042319178581238, "rewards/margins": 0.1436235010623932, "rewards/rejected": -0.14582772552967072, "step": 3536 }, { "epoch": 2.4460580912863072, "grad_norm": 10.663069725036621, "learning_rate": 4.19663439372983e-05, "log_odds_chosen": 8.45026969909668, "log_odds_ratio": -0.0037501309998333454, "logits/chosen": -0.5676348209381104, "logits/rejected": -0.6324989795684814, "logps/chosen": -0.003062083385884762, "logps/rejected": -1.8823702335357666, "loss": 2.9366, "nll_loss": 0.7337688207626343, "rewards/accuracies": 1.0, "rewards/chosen": -0.00030620835605077446, "rewards/margins": 0.18793082237243652, "rewards/rejected": -0.1882370412349701, "step": 3537 }, { "epoch": 2.446749654218534, "grad_norm": 6.520402431488037, "learning_rate": 4.196250192100815e-05, "log_odds_chosen": 6.4279375076293945, "log_odds_ratio": -0.06751563400030136, "logits/chosen": -0.5972424745559692, "logits/rejected": -0.6959658861160278, "logps/chosen": -0.026075618341565132, "logps/rejected": -0.9391674995422363, "loss": 2.8835, "nll_loss": 0.714113712310791, "rewards/accuracies": 1.0, "rewards/chosen": -0.0026075621135532856, "rewards/margins": 0.09130918234586716, "rewards/rejected": -0.09391674399375916, "step": 3538 }, { "epoch": 2.447441217150761, "grad_norm": 7.875150680541992, "learning_rate": 4.1958659904718e-05, "log_odds_chosen": 8.601234436035156, "log_odds_ratio": -0.023501534014940262, "logits/chosen": -0.28037527203559875, "logits/rejected": -0.25492340326309204, "logps/chosen": -0.024181626737117767, "logps/rejected": -1.3170043230056763, "loss": 2.0807, "nll_loss": 0.5178147554397583, "rewards/accuracies": 1.0, "rewards/chosen": -0.002418162766844034, "rewards/margins": 0.12928228080272675, "rewards/rejected": -0.13170044124126434, "step": 3539 }, { "epoch": 2.4481327800829877, "grad_norm": 32.92366409301758, "learning_rate": 4.195481788842785e-05, "log_odds_chosen": 6.287031173706055, "log_odds_ratio": -0.3081459701061249, "logits/chosen": -0.6735745668411255, "logits/rejected": -0.6229374408721924, "logps/chosen": -0.10165555775165558, "logps/rejected": -1.1201739311218262, "loss": 2.6028, "nll_loss": 0.6198921203613281, "rewards/accuracies": 0.875, "rewards/chosen": -0.010165555402636528, "rewards/margins": 0.10185182839632034, "rewards/rejected": -0.11201739311218262, "step": 3540 }, { "epoch": 2.4488243430152146, "grad_norm": 28.747129440307617, "learning_rate": 4.19509758721377e-05, "log_odds_chosen": 6.925184726715088, "log_odds_ratio": -0.44749465584754944, "logits/chosen": -0.7733160257339478, "logits/rejected": -0.6954240798950195, "logps/chosen": -0.08167851716279984, "logps/rejected": -1.769601583480835, "loss": 2.2594, "nll_loss": 0.5201063752174377, "rewards/accuracies": 0.875, "rewards/chosen": -0.008167851716279984, "rewards/margins": 0.16879230737686157, "rewards/rejected": -0.17696017026901245, "step": 3541 }, { "epoch": 2.4495159059474414, "grad_norm": 10.873333930969238, "learning_rate": 4.194713385584755e-05, "log_odds_chosen": 8.917963981628418, "log_odds_ratio": -0.0018444982124492526, "logits/chosen": -0.2584773302078247, "logits/rejected": -0.37781739234924316, "logps/chosen": -0.001504393294453621, "logps/rejected": -1.7596986293792725, "loss": 2.4579, "nll_loss": 0.6142921447753906, "rewards/accuracies": 1.0, "rewards/chosen": -0.0001504393294453621, "rewards/margins": 0.17581942677497864, "rewards/rejected": -0.17596986889839172, "step": 3542 }, { "epoch": 2.4502074688796682, "grad_norm": 8.034497261047363, "learning_rate": 4.1943291839557405e-05, "log_odds_chosen": 8.216196060180664, "log_odds_ratio": -0.07497686892747879, "logits/chosen": -0.32882681488990784, "logits/rejected": -0.3448234498500824, "logps/chosen": -0.011722751893103123, "logps/rejected": -1.3132429122924805, "loss": 1.8573, "nll_loss": 0.4568219780921936, "rewards/accuracies": 1.0, "rewards/chosen": -0.001172275166027248, "rewards/margins": 0.13015201687812805, "rewards/rejected": -0.13132429122924805, "step": 3543 }, { "epoch": 2.450899031811895, "grad_norm": 16.778364181518555, "learning_rate": 4.193944982326725e-05, "log_odds_chosen": 7.608541011810303, "log_odds_ratio": -0.09194928407669067, "logits/chosen": -0.5622957944869995, "logits/rejected": -0.66969233751297, "logps/chosen": -0.02298230305314064, "logps/rejected": -1.5121748447418213, "loss": 2.6808, "nll_loss": 0.6610052585601807, "rewards/accuracies": 0.875, "rewards/chosen": -0.002298230305314064, "rewards/margins": 0.1489192545413971, "rewards/rejected": -0.1512174904346466, "step": 3544 }, { "epoch": 2.451590594744122, "grad_norm": 5.129400253295898, "learning_rate": 4.193560780697711e-05, "log_odds_chosen": 6.8556013107299805, "log_odds_ratio": -0.06903890520334244, "logits/chosen": -0.09445610642433167, "logits/rejected": -0.1201615259051323, "logps/chosen": -0.0447566993534565, "logps/rejected": -1.1922276020050049, "loss": 2.4929, "nll_loss": 0.6163094639778137, "rewards/accuracies": 1.0, "rewards/chosen": -0.00447566993534565, "rewards/margins": 0.11474709212779999, "rewards/rejected": -0.11922276020050049, "step": 3545 }, { "epoch": 2.4522821576763487, "grad_norm": 19.428516387939453, "learning_rate": 4.1931765790686955e-05, "log_odds_chosen": 7.740385055541992, "log_odds_ratio": -0.1259237378835678, "logits/chosen": -0.6805443167686462, "logits/rejected": -0.745235025882721, "logps/chosen": -0.029267750680446625, "logps/rejected": -1.4361294507980347, "loss": 3.2265, "nll_loss": 0.7940384149551392, "rewards/accuracies": 0.875, "rewards/chosen": -0.0029267752543091774, "rewards/margins": 0.14068616926670074, "rewards/rejected": -0.14361293613910675, "step": 3546 }, { "epoch": 2.4529737206085755, "grad_norm": 7.1293206214904785, "learning_rate": 4.192792377439681e-05, "log_odds_chosen": 8.598723411560059, "log_odds_ratio": -0.002772655338048935, "logits/chosen": -0.4891475439071655, "logits/rejected": -0.5739641189575195, "logps/chosen": -0.06681334227323532, "logps/rejected": -1.9241998195648193, "loss": 2.3965, "nll_loss": 0.5988386869430542, "rewards/accuracies": 1.0, "rewards/chosen": -0.006681334227323532, "rewards/margins": 0.18573863804340363, "rewards/rejected": -0.19241997599601746, "step": 3547 }, { "epoch": 2.4536652835408024, "grad_norm": 7.23325777053833, "learning_rate": 4.192408175810665e-05, "log_odds_chosen": 7.28911828994751, "log_odds_ratio": -0.011899770237505436, "logits/chosen": -0.3867855668067932, "logits/rejected": -0.42984023690223694, "logps/chosen": -0.01793701946735382, "logps/rejected": -1.2952455282211304, "loss": 1.6492, "nll_loss": 0.4111155569553375, "rewards/accuracies": 1.0, "rewards/chosen": -0.0017937019001692533, "rewards/margins": 0.1277308613061905, "rewards/rejected": -0.12952455878257751, "step": 3548 }, { "epoch": 2.454356846473029, "grad_norm": 5.491495132446289, "learning_rate": 4.1920239741816506e-05, "log_odds_chosen": 9.01888656616211, "log_odds_ratio": -0.0014132228679955006, "logits/chosen": -0.6249552965164185, "logits/rejected": -0.7304789423942566, "logps/chosen": -0.0013792149256914854, "logps/rejected": -1.6873981952667236, "loss": 2.1958, "nll_loss": 0.5488159656524658, "rewards/accuracies": 1.0, "rewards/chosen": -0.00013792149547953159, "rewards/margins": 0.1686019003391266, "rewards/rejected": -0.16873982548713684, "step": 3549 }, { "epoch": 2.455048409405256, "grad_norm": 11.909871101379395, "learning_rate": 4.191639772552636e-05, "log_odds_chosen": 7.287874698638916, "log_odds_ratio": -0.013897876255214214, "logits/chosen": -0.5943728089332581, "logits/rejected": -0.6236305832862854, "logps/chosen": -0.08704106509685516, "logps/rejected": -1.2860745191574097, "loss": 2.9799, "nll_loss": 0.7435942888259888, "rewards/accuracies": 1.0, "rewards/chosen": -0.008704107254743576, "rewards/margins": 0.11990335583686829, "rewards/rejected": -0.12860745191574097, "step": 3550 }, { "epoch": 2.455739972337483, "grad_norm": 6.244982719421387, "learning_rate": 4.1912555709236204e-05, "log_odds_chosen": 9.310818672180176, "log_odds_ratio": -0.0022720363922417164, "logits/chosen": -0.31200623512268066, "logits/rejected": -0.3407549262046814, "logps/chosen": -0.0016210743924602866, "logps/rejected": -1.4185664653778076, "loss": 2.4014, "nll_loss": 0.6001317501068115, "rewards/accuracies": 1.0, "rewards/chosen": -0.00016210746252909303, "rewards/margins": 0.14169453084468842, "rewards/rejected": -0.14185664057731628, "step": 3551 }, { "epoch": 2.4564315352697097, "grad_norm": 5.678574085235596, "learning_rate": 4.190871369294606e-05, "log_odds_chosen": 9.565153121948242, "log_odds_ratio": -0.00012046610208926722, "logits/chosen": -0.41298067569732666, "logits/rejected": -0.3656235933303833, "logps/chosen": -0.00019133117166347802, "logps/rejected": -1.162304401397705, "loss": 1.3996, "nll_loss": 0.34988972544670105, "rewards/accuracies": 1.0, "rewards/chosen": -1.913311643875204e-05, "rewards/margins": 0.11621131747961044, "rewards/rejected": -0.11623044312000275, "step": 3552 }, { "epoch": 2.4571230982019365, "grad_norm": 9.438420295715332, "learning_rate": 4.190487167665591e-05, "log_odds_chosen": 9.713960647583008, "log_odds_ratio": -0.0001747915375744924, "logits/chosen": -0.29356127977371216, "logits/rejected": -0.35049235820770264, "logps/chosen": -0.001048873527906835, "logps/rejected": -1.8610334396362305, "loss": 2.2306, "nll_loss": 0.5576305389404297, "rewards/accuracies": 1.0, "rewards/chosen": -0.00010488735279068351, "rewards/margins": 0.18599846959114075, "rewards/rejected": -0.18610334396362305, "step": 3553 }, { "epoch": 2.4578146611341634, "grad_norm": 8.732086181640625, "learning_rate": 4.190102966036576e-05, "log_odds_chosen": 8.62548828125, "log_odds_ratio": -0.003063853830099106, "logits/chosen": -0.4967626929283142, "logits/rejected": -0.513431966304779, "logps/chosen": -0.0021129597444087267, "logps/rejected": -1.5165539979934692, "loss": 2.2112, "nll_loss": 0.5525052547454834, "rewards/accuracies": 1.0, "rewards/chosen": -0.0002112959627993405, "rewards/margins": 0.15144410729408264, "rewards/rejected": -0.1516554057598114, "step": 3554 }, { "epoch": 2.45850622406639, "grad_norm": 4.8293890953063965, "learning_rate": 4.1897187644075614e-05, "log_odds_chosen": 7.02392578125, "log_odds_ratio": -0.004108362831175327, "logits/chosen": -0.42433008551597595, "logits/rejected": -0.46287766098976135, "logps/chosen": -0.005805987864732742, "logps/rejected": -0.9526754021644592, "loss": 1.7432, "nll_loss": 0.4353874921798706, "rewards/accuracies": 1.0, "rewards/chosen": -0.000580598774831742, "rewards/margins": 0.09468695521354675, "rewards/rejected": -0.09526754915714264, "step": 3555 }, { "epoch": 2.459197786998617, "grad_norm": 8.237462043762207, "learning_rate": 4.1893345627785466e-05, "log_odds_chosen": 5.931075096130371, "log_odds_ratio": -0.18810392916202545, "logits/chosen": -0.5654093623161316, "logits/rejected": -0.6350131034851074, "logps/chosen": -0.12591242790222168, "logps/rejected": -1.0404949188232422, "loss": 2.4037, "nll_loss": 0.5821194648742676, "rewards/accuracies": 0.875, "rewards/chosen": -0.012591241858899593, "rewards/margins": 0.09145824611186981, "rewards/rejected": -0.10404949635267258, "step": 3556 }, { "epoch": 2.459889349930844, "grad_norm": 11.721480369567871, "learning_rate": 4.188950361149531e-05, "log_odds_chosen": 9.411944389343262, "log_odds_ratio": -0.0009381014388054609, "logits/chosen": -0.695732057094574, "logits/rejected": -0.733596920967102, "logps/chosen": -0.0009364191209897399, "logps/rejected": -1.7959481477737427, "loss": 2.5884, "nll_loss": 0.6470180153846741, "rewards/accuracies": 1.0, "rewards/chosen": -9.364191646454856e-05, "rewards/margins": 0.17950116097927094, "rewards/rejected": -0.17959479987621307, "step": 3557 }, { "epoch": 2.4605809128630707, "grad_norm": 11.841344833374023, "learning_rate": 4.1885661595205164e-05, "log_odds_chosen": 5.187502861022949, "log_odds_ratio": -0.566064715385437, "logits/chosen": -0.46774718165397644, "logits/rejected": -0.48095083236694336, "logps/chosen": -0.0918060690164566, "logps/rejected": -1.4666005373001099, "loss": 3.5646, "nll_loss": 0.834552526473999, "rewards/accuracies": 0.75, "rewards/chosen": -0.00918060727417469, "rewards/margins": 0.13747945427894592, "rewards/rejected": -0.14666005969047546, "step": 3558 }, { "epoch": 2.4612724757952975, "grad_norm": 9.315000534057617, "learning_rate": 4.1881819578915017e-05, "log_odds_chosen": 7.865085601806641, "log_odds_ratio": -0.0006331136683002114, "logits/chosen": -0.5515438914299011, "logits/rejected": -0.6229417324066162, "logps/chosen": -0.0010849256068468094, "logps/rejected": -1.1749250888824463, "loss": 2.4928, "nll_loss": 0.6231462955474854, "rewards/accuracies": 1.0, "rewards/chosen": -0.00010849256068468094, "rewards/margins": 0.11738403141498566, "rewards/rejected": -0.11749253422021866, "step": 3559 }, { "epoch": 2.4619640387275243, "grad_norm": 9.650796890258789, "learning_rate": 4.187797756262486e-05, "log_odds_chosen": 6.812906265258789, "log_odds_ratio": -0.07138875871896744, "logits/chosen": -0.46891918778419495, "logits/rejected": -0.45208626985549927, "logps/chosen": -0.011670373380184174, "logps/rejected": -1.3647243976593018, "loss": 2.8237, "nll_loss": 0.698790431022644, "rewards/accuracies": 1.0, "rewards/chosen": -0.0011670372914522886, "rewards/margins": 0.13530540466308594, "rewards/rejected": -0.1364724338054657, "step": 3560 }, { "epoch": 2.462655601659751, "grad_norm": 7.182980537414551, "learning_rate": 4.187413554633472e-05, "log_odds_chosen": 5.552410125732422, "log_odds_ratio": -0.07222361117601395, "logits/chosen": -0.4027874171733856, "logits/rejected": -0.4223852753639221, "logps/chosen": -0.03922223672270775, "logps/rejected": -0.9920768141746521, "loss": 2.3507, "nll_loss": 0.580459713935852, "rewards/accuracies": 1.0, "rewards/chosen": -0.003922224044799805, "rewards/margins": 0.09528546035289764, "rewards/rejected": -0.09920768439769745, "step": 3561 }, { "epoch": 2.463347164591978, "grad_norm": 10.363471984863281, "learning_rate": 4.187029353004457e-05, "log_odds_chosen": 9.871475219726562, "log_odds_ratio": -0.00011123805597890168, "logits/chosen": -0.625693678855896, "logits/rejected": -0.6865161061286926, "logps/chosen": -0.00019468393293209374, "logps/rejected": -1.5656342506408691, "loss": 2.7754, "nll_loss": 0.6938341856002808, "rewards/accuracies": 1.0, "rewards/chosen": -1.9468394384603016e-05, "rewards/margins": 0.15654397010803223, "rewards/rejected": -0.1565634310245514, "step": 3562 }, { "epoch": 2.464038727524205, "grad_norm": 8.084590911865234, "learning_rate": 4.186645151375442e-05, "log_odds_chosen": 8.47359561920166, "log_odds_ratio": -0.0016386422794312239, "logits/chosen": -0.30302852392196655, "logits/rejected": -0.3554832339286804, "logps/chosen": -0.0012318093795329332, "logps/rejected": -1.1712515354156494, "loss": 2.147, "nll_loss": 0.5365808606147766, "rewards/accuracies": 1.0, "rewards/chosen": -0.00012318094377405941, "rewards/margins": 0.1170019656419754, "rewards/rejected": -0.11712514609098434, "step": 3563 }, { "epoch": 2.4647302904564317, "grad_norm": 9.11082649230957, "learning_rate": 4.186260949746427e-05, "log_odds_chosen": 5.911943435668945, "log_odds_ratio": -0.5747730135917664, "logits/chosen": -0.7678748369216919, "logits/rejected": -0.7210307121276855, "logps/chosen": -0.1222434714436531, "logps/rejected": -0.7873757481575012, "loss": 2.8083, "nll_loss": 0.6446093916893005, "rewards/accuracies": 0.75, "rewards/chosen": -0.012224346399307251, "rewards/margins": 0.06651322543621063, "rewards/rejected": -0.07873757183551788, "step": 3564 }, { "epoch": 2.4654218533886585, "grad_norm": 9.616727828979492, "learning_rate": 4.1858767481174124e-05, "log_odds_chosen": 7.258073329925537, "log_odds_ratio": -0.04513192176818848, "logits/chosen": 0.04711649566888809, "logits/rejected": -0.013913527131080627, "logps/chosen": -0.03416390344500542, "logps/rejected": -0.891162633895874, "loss": 2.1688, "nll_loss": 0.5376936197280884, "rewards/accuracies": 1.0, "rewards/chosen": -0.003416390623897314, "rewards/margins": 0.08569987118244171, "rewards/rejected": -0.08911626040935516, "step": 3565 }, { "epoch": 2.4661134163208853, "grad_norm": 6.7814836502075195, "learning_rate": 4.185492546488397e-05, "log_odds_chosen": 7.401462554931641, "log_odds_ratio": -0.07456041872501373, "logits/chosen": -0.8305421471595764, "logits/rejected": -0.8727108240127563, "logps/chosen": -0.008470169268548489, "logps/rejected": -1.1675711870193481, "loss": 1.6602, "nll_loss": 0.40759918093681335, "rewards/accuracies": 1.0, "rewards/chosen": -0.0008470169268548489, "rewards/margins": 0.11591009795665741, "rewards/rejected": -0.1167571172118187, "step": 3566 }, { "epoch": 2.466804979253112, "grad_norm": 8.304616928100586, "learning_rate": 4.185108344859382e-05, "log_odds_chosen": 7.286868095397949, "log_odds_ratio": -0.024223200976848602, "logits/chosen": -0.8337805271148682, "logits/rejected": -0.8922019600868225, "logps/chosen": -0.03300413489341736, "logps/rejected": -1.7029643058776855, "loss": 2.0032, "nll_loss": 0.49838027358055115, "rewards/accuracies": 1.0, "rewards/chosen": -0.003300413955003023, "rewards/margins": 0.16699601709842682, "rewards/rejected": -0.17029643058776855, "step": 3567 }, { "epoch": 2.467496542185339, "grad_norm": 7.218667984008789, "learning_rate": 4.1847241432303675e-05, "log_odds_chosen": 7.885411262512207, "log_odds_ratio": -0.060036953538656235, "logits/chosen": -0.47863897681236267, "logits/rejected": -0.4377424716949463, "logps/chosen": -0.019913654774427414, "logps/rejected": -1.4336100816726685, "loss": 1.865, "nll_loss": 0.46025776863098145, "rewards/accuracies": 1.0, "rewards/chosen": -0.001991365570574999, "rewards/margins": 0.14136964082717896, "rewards/rejected": -0.14336100220680237, "step": 3568 }, { "epoch": 2.468188105117566, "grad_norm": 6.102696895599365, "learning_rate": 4.184339941601352e-05, "log_odds_chosen": 8.722169876098633, "log_odds_ratio": -0.0007616700604557991, "logits/chosen": -0.11960343271493912, "logits/rejected": -0.15465594828128815, "logps/chosen": -0.025594644248485565, "logps/rejected": -1.8997677564620972, "loss": 2.0682, "nll_loss": 0.5169663429260254, "rewards/accuracies": 1.0, "rewards/chosen": -0.0025594644248485565, "rewards/margins": 0.18741732835769653, "rewards/rejected": -0.1899767965078354, "step": 3569 }, { "epoch": 2.4688796680497926, "grad_norm": 11.712726593017578, "learning_rate": 4.183955739972338e-05, "log_odds_chosen": 8.480051040649414, "log_odds_ratio": -0.003982068505138159, "logits/chosen": -0.550246000289917, "logits/rejected": -0.5758745074272156, "logps/chosen": -0.0030394475907087326, "logps/rejected": -1.7417186498641968, "loss": 2.3966, "nll_loss": 0.5987579822540283, "rewards/accuracies": 1.0, "rewards/chosen": -0.0003039447474293411, "rewards/margins": 0.17386791110038757, "rewards/rejected": -0.17417186498641968, "step": 3570 }, { "epoch": 2.4695712309820195, "grad_norm": 11.296751022338867, "learning_rate": 4.1835715383433226e-05, "log_odds_chosen": 6.232704162597656, "log_odds_ratio": -0.18135468661785126, "logits/chosen": -0.5195183753967285, "logits/rejected": -0.5521309971809387, "logps/chosen": -0.03475148230791092, "logps/rejected": -1.137387990951538, "loss": 2.0156, "nll_loss": 0.48577266931533813, "rewards/accuracies": 0.875, "rewards/chosen": -0.0034751484636217356, "rewards/margins": 0.1102636530995369, "rewards/rejected": -0.11373881250619888, "step": 3571 }, { "epoch": 2.4702627939142463, "grad_norm": 14.958809852600098, "learning_rate": 4.183187336714308e-05, "log_odds_chosen": 7.303866386413574, "log_odds_ratio": -0.2045290172100067, "logits/chosen": -0.3235791027545929, "logits/rejected": -0.3822289705276489, "logps/chosen": -0.043132223188877106, "logps/rejected": -1.458284616470337, "loss": 1.892, "nll_loss": 0.45253604650497437, "rewards/accuracies": 0.875, "rewards/chosen": -0.004313222132623196, "rewards/margins": 0.14151525497436523, "rewards/rejected": -0.1458284705877304, "step": 3572 }, { "epoch": 2.470954356846473, "grad_norm": 11.448431968688965, "learning_rate": 4.182803135085293e-05, "log_odds_chosen": 8.554526329040527, "log_odds_ratio": -0.004633777309209108, "logits/chosen": -0.35143518447875977, "logits/rejected": -0.4527926445007324, "logps/chosen": -0.0009376034140586853, "logps/rejected": -1.0931717157363892, "loss": 2.191, "nll_loss": 0.5472741723060608, "rewards/accuracies": 1.0, "rewards/chosen": -9.376034722663462e-05, "rewards/margins": 0.10922341048717499, "rewards/rejected": -0.10931716859340668, "step": 3573 }, { "epoch": 2.4716459197787, "grad_norm": 9.666537284851074, "learning_rate": 4.182418933456278e-05, "log_odds_chosen": 7.457915782928467, "log_odds_ratio": -0.11223464459180832, "logits/chosen": -0.7632264494895935, "logits/rejected": -0.7934819459915161, "logps/chosen": -0.07048378139734268, "logps/rejected": -1.9534372091293335, "loss": 2.2209, "nll_loss": 0.5439975261688232, "rewards/accuracies": 0.875, "rewards/chosen": -0.007048378232866526, "rewards/margins": 0.1882953643798828, "rewards/rejected": -0.1953437328338623, "step": 3574 }, { "epoch": 2.472337482710927, "grad_norm": 9.697028160095215, "learning_rate": 4.182034731827263e-05, "log_odds_chosen": 8.214350700378418, "log_odds_ratio": -0.0024849059991538525, "logits/chosen": -0.23068474233150482, "logits/rejected": -0.2544393837451935, "logps/chosen": -0.0016188130248337984, "logps/rejected": -1.2002190351486206, "loss": 2.3922, "nll_loss": 0.597804069519043, "rewards/accuracies": 1.0, "rewards/chosen": -0.00016188131121452898, "rewards/margins": 0.11986003071069717, "rewards/rejected": -0.12002190947532654, "step": 3575 }, { "epoch": 2.4730290456431536, "grad_norm": 5.239798069000244, "learning_rate": 4.181650530198248e-05, "log_odds_chosen": 6.941335678100586, "log_odds_ratio": -0.00915892980992794, "logits/chosen": -0.027788877487182617, "logits/rejected": -0.049438588321208954, "logps/chosen": -0.0346873477101326, "logps/rejected": -1.5081522464752197, "loss": 1.4903, "nll_loss": 0.37166211009025574, "rewards/accuracies": 1.0, "rewards/chosen": -0.0034687344450503588, "rewards/margins": 0.14734649658203125, "rewards/rejected": -0.1508152186870575, "step": 3576 }, { "epoch": 2.4737206085753805, "grad_norm": 9.928712844848633, "learning_rate": 4.1812663285692333e-05, "log_odds_chosen": 5.2237091064453125, "log_odds_ratio": -0.2726137042045593, "logits/chosen": -0.5916513204574585, "logits/rejected": -0.5914942026138306, "logps/chosen": -0.20390404760837555, "logps/rejected": -1.1321120262145996, "loss": 3.9991, "nll_loss": 0.9725254774093628, "rewards/accuracies": 0.875, "rewards/chosen": -0.020390406250953674, "rewards/margins": 0.09282079339027405, "rewards/rejected": -0.11321119964122772, "step": 3577 }, { "epoch": 2.4744121715076073, "grad_norm": 5.80659818649292, "learning_rate": 4.180882126940218e-05, "log_odds_chosen": 5.961888313293457, "log_odds_ratio": -0.009627663530409336, "logits/chosen": -0.3582921326160431, "logits/rejected": -0.36567357182502747, "logps/chosen": -0.02949088253080845, "logps/rejected": -1.1007611751556396, "loss": 2.4407, "nll_loss": 0.6092128753662109, "rewards/accuracies": 1.0, "rewards/chosen": -0.0029490883462131023, "rewards/margins": 0.10712704807519913, "rewards/rejected": -0.11007612943649292, "step": 3578 }, { "epoch": 2.475103734439834, "grad_norm": 4.562489032745361, "learning_rate": 4.180497925311204e-05, "log_odds_chosen": 6.75181245803833, "log_odds_ratio": -0.0999455451965332, "logits/chosen": -0.38527029752731323, "logits/rejected": -0.388009250164032, "logps/chosen": -0.0627438947558403, "logps/rejected": -1.3990108966827393, "loss": 2.3529, "nll_loss": 0.578228771686554, "rewards/accuracies": 0.875, "rewards/chosen": -0.006274389568716288, "rewards/margins": 0.13362669944763184, "rewards/rejected": -0.13990110158920288, "step": 3579 }, { "epoch": 2.475795297372061, "grad_norm": 12.828277587890625, "learning_rate": 4.1801137236821884e-05, "log_odds_chosen": 9.04585075378418, "log_odds_ratio": -0.00040327146416530013, "logits/chosen": -0.14115282893180847, "logits/rejected": -0.23341891169548035, "logps/chosen": -0.0007533514872193336, "logps/rejected": -1.5589354038238525, "loss": 2.4261, "nll_loss": 0.6064795851707458, "rewards/accuracies": 1.0, "rewards/chosen": -7.533514872193336e-05, "rewards/margins": 0.15581819415092468, "rewards/rejected": -0.15589353442192078, "step": 3580 }, { "epoch": 2.4764868603042878, "grad_norm": 9.210855484008789, "learning_rate": 4.1797295220531736e-05, "log_odds_chosen": 6.8370819091796875, "log_odds_ratio": -0.12023650854825974, "logits/chosen": -0.5468093752861023, "logits/rejected": -0.5404381155967712, "logps/chosen": -0.06064155325293541, "logps/rejected": -1.6137638092041016, "loss": 3.0733, "nll_loss": 0.756309986114502, "rewards/accuracies": 0.875, "rewards/chosen": -0.006064155604690313, "rewards/margins": 0.15531222522258759, "rewards/rejected": -0.16137638688087463, "step": 3581 }, { "epoch": 2.4771784232365146, "grad_norm": 6.40119743347168, "learning_rate": 4.179345320424159e-05, "log_odds_chosen": 7.077298641204834, "log_odds_ratio": -0.010537205263972282, "logits/chosen": -0.4514099359512329, "logits/rejected": -0.4493291974067688, "logps/chosen": -0.031477462500333786, "logps/rejected": -1.365896463394165, "loss": 2.946, "nll_loss": 0.7354586124420166, "rewards/accuracies": 1.0, "rewards/chosen": -0.0031477464362978935, "rewards/margins": 0.13344189524650574, "rewards/rejected": -0.1365896463394165, "step": 3582 }, { "epoch": 2.4778699861687414, "grad_norm": 8.285335540771484, "learning_rate": 4.178961118795144e-05, "log_odds_chosen": 7.359831809997559, "log_odds_ratio": -0.03024285100400448, "logits/chosen": -0.49037089943885803, "logits/rejected": -0.45868930220603943, "logps/chosen": -0.00882015936076641, "logps/rejected": -0.8284816741943359, "loss": 1.7684, "nll_loss": 0.4390672445297241, "rewards/accuracies": 1.0, "rewards/chosen": -0.0008820158545859158, "rewards/margins": 0.08196616172790527, "rewards/rejected": -0.08284817636013031, "step": 3583 }, { "epoch": 2.4785615491009683, "grad_norm": 9.757538795471191, "learning_rate": 4.178576917166129e-05, "log_odds_chosen": 6.399682998657227, "log_odds_ratio": -0.10057688504457474, "logits/chosen": -0.3511509895324707, "logits/rejected": -0.35170474648475647, "logps/chosen": -0.02771635912358761, "logps/rejected": -1.0118992328643799, "loss": 2.0764, "nll_loss": 0.5090445280075073, "rewards/accuracies": 1.0, "rewards/chosen": -0.002771636238321662, "rewards/margins": 0.09841828048229218, "rewards/rejected": -0.10118991881608963, "step": 3584 }, { "epoch": 2.479253112033195, "grad_norm": 6.219103813171387, "learning_rate": 4.178192715537114e-05, "log_odds_chosen": 7.042239189147949, "log_odds_ratio": -0.0017283523920923471, "logits/chosen": -0.3467369079589844, "logits/rejected": -0.33279949426651, "logps/chosen": -0.009448968805372715, "logps/rejected": -1.1239537000656128, "loss": 2.2031, "nll_loss": 0.550603985786438, "rewards/accuracies": 1.0, "rewards/chosen": -0.0009448969503864646, "rewards/margins": 0.11145047843456268, "rewards/rejected": -0.11239536851644516, "step": 3585 }, { "epoch": 2.479944674965422, "grad_norm": 8.289338111877441, "learning_rate": 4.177808513908099e-05, "log_odds_chosen": 7.68574333190918, "log_odds_ratio": -0.002449170919135213, "logits/chosen": -0.4363767206668854, "logits/rejected": -0.5026494264602661, "logps/chosen": -0.013567497953772545, "logps/rejected": -1.48887038230896, "loss": 3.3617, "nll_loss": 0.8401690721511841, "rewards/accuracies": 1.0, "rewards/chosen": -0.0013567497953772545, "rewards/margins": 0.14753028750419617, "rewards/rejected": -0.148887038230896, "step": 3586 }, { "epoch": 2.4806362378976488, "grad_norm": 8.901142120361328, "learning_rate": 4.177424312279084e-05, "log_odds_chosen": 6.479077339172363, "log_odds_ratio": -0.03187675029039383, "logits/chosen": -0.4941876530647278, "logits/rejected": -0.5805838704109192, "logps/chosen": -0.007656537927687168, "logps/rejected": -1.0773801803588867, "loss": 1.8931, "nll_loss": 0.4700874984264374, "rewards/accuracies": 1.0, "rewards/chosen": -0.0007656537345610559, "rewards/margins": 0.10697236657142639, "rewards/rejected": -0.10773801803588867, "step": 3587 }, { "epoch": 2.4813278008298756, "grad_norm": 10.053321838378906, "learning_rate": 4.17704011065007e-05, "log_odds_chosen": 7.308525085449219, "log_odds_ratio": -0.006363980006426573, "logits/chosen": -0.9116455316543579, "logits/rejected": -0.9546210169792175, "logps/chosen": -0.018561337143182755, "logps/rejected": -1.8791520595550537, "loss": 2.5481, "nll_loss": 0.6363804936408997, "rewards/accuracies": 1.0, "rewards/chosen": -0.001856133807450533, "rewards/margins": 0.18605907261371613, "rewards/rejected": -0.18791520595550537, "step": 3588 }, { "epoch": 2.4820193637621024, "grad_norm": 7.588259696960449, "learning_rate": 4.176655909021054e-05, "log_odds_chosen": 6.818621635437012, "log_odds_ratio": -0.02121109515428543, "logits/chosen": -0.714361310005188, "logits/rejected": -0.7343268394470215, "logps/chosen": -0.042965568602085114, "logps/rejected": -1.5659581422805786, "loss": 1.9111, "nll_loss": 0.4756477475166321, "rewards/accuracies": 1.0, "rewards/chosen": -0.004296557046473026, "rewards/margins": 0.15229925513267517, "rewards/rejected": -0.15659581124782562, "step": 3589 }, { "epoch": 2.4827109266943292, "grad_norm": 6.362364768981934, "learning_rate": 4.1762717073920395e-05, "log_odds_chosen": 8.199535369873047, "log_odds_ratio": -0.0014860157389193773, "logits/chosen": -0.5204131603240967, "logits/rejected": -0.5123761892318726, "logps/chosen": -0.015633605420589447, "logps/rejected": -1.625710129737854, "loss": 1.7413, "nll_loss": 0.43518710136413574, "rewards/accuracies": 1.0, "rewards/chosen": -0.001563360681757331, "rewards/margins": 0.16100767254829407, "rewards/rejected": -0.1625710278749466, "step": 3590 }, { "epoch": 2.483402489626556, "grad_norm": 10.019575119018555, "learning_rate": 4.175887505763025e-05, "log_odds_chosen": 7.701471328735352, "log_odds_ratio": -0.09589733183383942, "logits/chosen": -0.6089304685592651, "logits/rejected": -0.6508186459541321, "logps/chosen": -0.029153263196349144, "logps/rejected": -1.3184267282485962, "loss": 3.1084, "nll_loss": 0.7675221562385559, "rewards/accuracies": 0.875, "rewards/chosen": -0.0029153262730687857, "rewards/margins": 0.1289273500442505, "rewards/rejected": -0.13184267282485962, "step": 3591 }, { "epoch": 2.484094052558783, "grad_norm": 11.550004005432129, "learning_rate": 4.17550330413401e-05, "log_odds_chosen": 7.115861892700195, "log_odds_ratio": -0.07433371245861053, "logits/chosen": -0.6918562054634094, "logits/rejected": -0.7228385210037231, "logps/chosen": -0.02212700992822647, "logps/rejected": -1.7282590866088867, "loss": 2.6529, "nll_loss": 0.6557831168174744, "rewards/accuracies": 1.0, "rewards/chosen": -0.0022127011325210333, "rewards/margins": 0.17061319947242737, "rewards/rejected": -0.1728259027004242, "step": 3592 }, { "epoch": 2.4847856154910097, "grad_norm": 9.391145706176758, "learning_rate": 4.1751191025049945e-05, "log_odds_chosen": 5.340620517730713, "log_odds_ratio": -0.19077332317829132, "logits/chosen": -0.24847255647182465, "logits/rejected": -0.29707443714141846, "logps/chosen": -0.07398758828639984, "logps/rejected": -1.1074838638305664, "loss": 2.5396, "nll_loss": 0.6158105134963989, "rewards/accuracies": 0.875, "rewards/chosen": -0.007398759014904499, "rewards/margins": 0.10334962606430054, "rewards/rejected": -0.11074838787317276, "step": 3593 }, { "epoch": 2.4854771784232366, "grad_norm": 11.480371475219727, "learning_rate": 4.17473490087598e-05, "log_odds_chosen": 7.875348091125488, "log_odds_ratio": -0.001167620182968676, "logits/chosen": -0.6042084097862244, "logits/rejected": -0.6367383599281311, "logps/chosen": -0.004173867404460907, "logps/rejected": -1.4751344919204712, "loss": 3.0375, "nll_loss": 0.7592536211013794, "rewards/accuracies": 1.0, "rewards/chosen": -0.00041738676372915506, "rewards/margins": 0.14709606766700745, "rewards/rejected": -0.1475134640932083, "step": 3594 }, { "epoch": 2.4861687413554634, "grad_norm": 6.479579448699951, "learning_rate": 4.174350699246965e-05, "log_odds_chosen": 6.573331832885742, "log_odds_ratio": -0.056094568222761154, "logits/chosen": -0.6127046346664429, "logits/rejected": -0.5991913676261902, "logps/chosen": -0.028692560270428658, "logps/rejected": -1.1372803449630737, "loss": 2.9897, "nll_loss": 0.741807222366333, "rewards/accuracies": 1.0, "rewards/chosen": -0.0028692560736089945, "rewards/margins": 0.11085877567529678, "rewards/rejected": -0.11372803151607513, "step": 3595 }, { "epoch": 2.4868603042876902, "grad_norm": 9.766852378845215, "learning_rate": 4.1739664976179496e-05, "log_odds_chosen": 7.247687816619873, "log_odds_ratio": -0.012085276655852795, "logits/chosen": -0.5809255838394165, "logits/rejected": -0.607845664024353, "logps/chosen": -0.01595086231827736, "logps/rejected": -1.7827832698822021, "loss": 2.4924, "nll_loss": 0.6218916177749634, "rewards/accuracies": 1.0, "rewards/chosen": -0.001595086301676929, "rewards/margins": 0.176683247089386, "rewards/rejected": -0.17827832698822021, "step": 3596 }, { "epoch": 2.487551867219917, "grad_norm": 7.794604301452637, "learning_rate": 4.1735822959889355e-05, "log_odds_chosen": 6.11072301864624, "log_odds_ratio": -0.013152681291103363, "logits/chosen": -0.7504695653915405, "logits/rejected": -0.8083955645561218, "logps/chosen": -0.01121857576072216, "logps/rejected": -1.0678339004516602, "loss": 3.1961, "nll_loss": 0.7977035045623779, "rewards/accuracies": 1.0, "rewards/chosen": -0.0011218574363738298, "rewards/margins": 0.1056615337729454, "rewards/rejected": -0.10678339004516602, "step": 3597 }, { "epoch": 2.488243430152144, "grad_norm": 10.555513381958008, "learning_rate": 4.17319809435992e-05, "log_odds_chosen": 5.213860511779785, "log_odds_ratio": -0.4782610535621643, "logits/chosen": -0.3444117307662964, "logits/rejected": -0.32682672142982483, "logps/chosen": -0.053420089185237885, "logps/rejected": -0.8151041269302368, "loss": 2.8929, "nll_loss": 0.6753936409950256, "rewards/accuracies": 0.875, "rewards/chosen": -0.005342008545994759, "rewards/margins": 0.07616840302944183, "rewards/rejected": -0.08151040971279144, "step": 3598 }, { "epoch": 2.4889349930843707, "grad_norm": 5.460208892822266, "learning_rate": 4.172813892730905e-05, "log_odds_chosen": 7.005800724029541, "log_odds_ratio": -0.004268847871571779, "logits/chosen": -0.388731986284256, "logits/rejected": -0.36822617053985596, "logps/chosen": -0.0035434365272521973, "logps/rejected": -0.8111451268196106, "loss": 1.8473, "nll_loss": 0.4613950252532959, "rewards/accuracies": 1.0, "rewards/chosen": -0.00035434364690445364, "rewards/margins": 0.0807601809501648, "rewards/rejected": -0.0811145156621933, "step": 3599 }, { "epoch": 2.4896265560165975, "grad_norm": 9.794899940490723, "learning_rate": 4.1724296911018906e-05, "log_odds_chosen": 6.337307453155518, "log_odds_ratio": -0.060170188546180725, "logits/chosen": -0.3038662075996399, "logits/rejected": -0.3344561755657196, "logps/chosen": -0.018794970586895943, "logps/rejected": -0.899629533290863, "loss": 2.4075, "nll_loss": 0.5958477258682251, "rewards/accuracies": 1.0, "rewards/chosen": -0.0018794970819726586, "rewards/margins": 0.08808346092700958, "rewards/rejected": -0.08996295928955078, "step": 3600 }, { "epoch": 2.4903181189488244, "grad_norm": 7.350867748260498, "learning_rate": 4.172045489472876e-05, "log_odds_chosen": 7.752102375030518, "log_odds_ratio": -0.0015861605061218143, "logits/chosen": -0.4283638596534729, "logits/rejected": -0.4642946124076843, "logps/chosen": -0.0022205570712685585, "logps/rejected": -1.1298866271972656, "loss": 2.6427, "nll_loss": 0.6605045199394226, "rewards/accuracies": 1.0, "rewards/chosen": -0.00022205570712685585, "rewards/margins": 0.11276662349700928, "rewards/rejected": -0.11298868060112, "step": 3601 }, { "epoch": 2.491009681881051, "grad_norm": 8.670126914978027, "learning_rate": 4.1716612878438604e-05, "log_odds_chosen": 8.038167953491211, "log_odds_ratio": -0.0027964513283222914, "logits/chosen": -0.3971180319786072, "logits/rejected": -0.4683905839920044, "logps/chosen": -0.010728216730058193, "logps/rejected": -1.5991425514221191, "loss": 1.7308, "nll_loss": 0.43241745233535767, "rewards/accuracies": 1.0, "rewards/chosen": -0.0010728216730058193, "rewards/margins": 0.15884143114089966, "rewards/rejected": -0.15991425514221191, "step": 3602 }, { "epoch": 2.491701244813278, "grad_norm": 11.873007774353027, "learning_rate": 4.1712770862148456e-05, "log_odds_chosen": 7.145326614379883, "log_odds_ratio": -0.01985153742134571, "logits/chosen": -0.5149418115615845, "logits/rejected": -0.5132201910018921, "logps/chosen": -0.027801502496004105, "logps/rejected": -1.4563482999801636, "loss": 2.5473, "nll_loss": 0.6348486542701721, "rewards/accuracies": 1.0, "rewards/chosen": -0.002780150156468153, "rewards/margins": 0.1428546905517578, "rewards/rejected": -0.14563484489917755, "step": 3603 }, { "epoch": 2.492392807745505, "grad_norm": 6.793402194976807, "learning_rate": 4.170892884585831e-05, "log_odds_chosen": 6.222506046295166, "log_odds_ratio": -0.09904603660106659, "logits/chosen": -0.5928165912628174, "logits/rejected": -0.5925490856170654, "logps/chosen": -0.03379706293344498, "logps/rejected": -1.099259853363037, "loss": 3.1783, "nll_loss": 0.7846640348434448, "rewards/accuracies": 0.875, "rewards/chosen": -0.0033797065261751413, "rewards/margins": 0.10654628276824951, "rewards/rejected": -0.10992599278688431, "step": 3604 }, { "epoch": 2.4930843706777317, "grad_norm": 8.131731033325195, "learning_rate": 4.1705086829568154e-05, "log_odds_chosen": 7.854827880859375, "log_odds_ratio": -0.013267126865684986, "logits/chosen": -0.8261618614196777, "logits/rejected": -0.8810702562332153, "logps/chosen": -0.004752838518470526, "logps/rejected": -1.3137333393096924, "loss": 2.0157, "nll_loss": 0.5025960206985474, "rewards/accuracies": 1.0, "rewards/chosen": -0.00047528385766781867, "rewards/margins": 0.13089805841445923, "rewards/rejected": -0.131373330950737, "step": 3605 }, { "epoch": 2.4937759336099585, "grad_norm": 8.815180778503418, "learning_rate": 4.1701244813278014e-05, "log_odds_chosen": 7.279289245605469, "log_odds_ratio": -0.05281849950551987, "logits/chosen": -0.7257159352302551, "logits/rejected": -0.823196530342102, "logps/chosen": -0.027641449123620987, "logps/rejected": -1.5369056463241577, "loss": 3.0359, "nll_loss": 0.753699779510498, "rewards/accuracies": 1.0, "rewards/chosen": -0.0027641449123620987, "rewards/margins": 0.150926411151886, "rewards/rejected": -0.15369057655334473, "step": 3606 }, { "epoch": 2.4944674965421854, "grad_norm": 9.75115966796875, "learning_rate": 4.169740279698786e-05, "log_odds_chosen": 6.750976085662842, "log_odds_ratio": -0.052393123507499695, "logits/chosen": -0.4319000244140625, "logits/rejected": -0.4197804629802704, "logps/chosen": -0.03174269199371338, "logps/rejected": -1.9581066370010376, "loss": 2.3748, "nll_loss": 0.5884555578231812, "rewards/accuracies": 1.0, "rewards/chosen": -0.003174269339069724, "rewards/margins": 0.1926364153623581, "rewards/rejected": -0.19581067562103271, "step": 3607 }, { "epoch": 2.495159059474412, "grad_norm": 9.596985816955566, "learning_rate": 4.169356078069771e-05, "log_odds_chosen": 4.707325458526611, "log_odds_ratio": -0.47428303956985474, "logits/chosen": -0.7324930429458618, "logits/rejected": -0.713641881942749, "logps/chosen": -0.119391530752182, "logps/rejected": -1.1009997129440308, "loss": 2.5459, "nll_loss": 0.5890485048294067, "rewards/accuracies": 0.75, "rewards/chosen": -0.0119391530752182, "rewards/margins": 0.09816082566976547, "rewards/rejected": -0.11009997874498367, "step": 3608 }, { "epoch": 2.495850622406639, "grad_norm": 4.151665210723877, "learning_rate": 4.1689718764407564e-05, "log_odds_chosen": 7.3658833503723145, "log_odds_ratio": -0.009625360369682312, "logits/chosen": -0.3350619673728943, "logits/rejected": -0.45576098561286926, "logps/chosen": -0.003950429614633322, "logps/rejected": -0.8261271715164185, "loss": 1.8249, "nll_loss": 0.4552645683288574, "rewards/accuracies": 1.0, "rewards/chosen": -0.0003950429381802678, "rewards/margins": 0.08221767842769623, "rewards/rejected": -0.08261272311210632, "step": 3609 }, { "epoch": 2.496542185338866, "grad_norm": 6.8638997077941895, "learning_rate": 4.1685876748117417e-05, "log_odds_chosen": 7.412099838256836, "log_odds_ratio": -0.0016858414746820927, "logits/chosen": -0.4631730914115906, "logits/rejected": -0.539579451084137, "logps/chosen": -0.03612817823886871, "logps/rejected": -1.7752039432525635, "loss": 1.7417, "nll_loss": 0.4352552592754364, "rewards/accuracies": 1.0, "rewards/chosen": -0.0036128174979239702, "rewards/margins": 0.1739075779914856, "rewards/rejected": -0.17752039432525635, "step": 3610 }, { "epoch": 2.4972337482710927, "grad_norm": 9.153830528259277, "learning_rate": 4.168203473182726e-05, "log_odds_chosen": 8.275040626525879, "log_odds_ratio": -0.0027329260483384132, "logits/chosen": -0.6889567971229553, "logits/rejected": -0.7413190007209778, "logps/chosen": -0.005626749712973833, "logps/rejected": -1.3126780986785889, "loss": 2.9899, "nll_loss": 0.7472079992294312, "rewards/accuracies": 1.0, "rewards/chosen": -0.0005626750062219799, "rewards/margins": 0.13070513308048248, "rewards/rejected": -0.13126781582832336, "step": 3611 }, { "epoch": 2.4979253112033195, "grad_norm": 9.003024101257324, "learning_rate": 4.1678192715537115e-05, "log_odds_chosen": 8.323738098144531, "log_odds_ratio": -0.012034624814987183, "logits/chosen": -0.8074311017990112, "logits/rejected": -0.8693141937255859, "logps/chosen": -0.05786255747079849, "logps/rejected": -1.7432314157485962, "loss": 2.1424, "nll_loss": 0.5343984961509705, "rewards/accuracies": 1.0, "rewards/chosen": -0.0057862563990056515, "rewards/margins": 0.16853688657283783, "rewards/rejected": -0.17432314157485962, "step": 3612 }, { "epoch": 2.4986168741355463, "grad_norm": 8.100503921508789, "learning_rate": 4.167435069924697e-05, "log_odds_chosen": 6.446010112762451, "log_odds_ratio": -0.12480390071868896, "logits/chosen": -0.4934987425804138, "logits/rejected": -0.5425033569335938, "logps/chosen": -0.045704033225774765, "logps/rejected": -1.0807603597640991, "loss": 2.082, "nll_loss": 0.5080088973045349, "rewards/accuracies": 0.875, "rewards/chosen": -0.004570403136312962, "rewards/margins": 0.10350564122200012, "rewards/rejected": -0.10807604342699051, "step": 3613 }, { "epoch": 2.499308437067773, "grad_norm": 9.035955429077148, "learning_rate": 4.167050868295681e-05, "log_odds_chosen": 5.936398983001709, "log_odds_ratio": -0.4109426736831665, "logits/chosen": -0.48561152815818787, "logits/rejected": -0.5519176721572876, "logps/chosen": -0.058604124933481216, "logps/rejected": -1.421567678451538, "loss": 2.1311, "nll_loss": 0.49168580770492554, "rewards/accuracies": 0.875, "rewards/chosen": -0.005860412493348122, "rewards/margins": 0.1362963616847992, "rewards/rejected": -0.14215677976608276, "step": 3614 }, { "epoch": 2.5, "grad_norm": 9.259349822998047, "learning_rate": 4.166666666666667e-05, "log_odds_chosen": 4.857237815856934, "log_odds_ratio": -0.47869187593460083, "logits/chosen": -0.548782467842102, "logits/rejected": -0.5626112818717957, "logps/chosen": -0.06998100131750107, "logps/rejected": -0.7982571125030518, "loss": 2.3702, "nll_loss": 0.5446842908859253, "rewards/accuracies": 0.75, "rewards/chosen": -0.006998100318014622, "rewards/margins": 0.07282762229442596, "rewards/rejected": -0.07982571423053741, "step": 3615 }, { "epoch": 2.500691562932227, "grad_norm": 11.092930793762207, "learning_rate": 4.166282465037652e-05, "log_odds_chosen": 7.3806047439575195, "log_odds_ratio": -0.007463864050805569, "logits/chosen": -0.9096285104751587, "logits/rejected": -0.9700690507888794, "logps/chosen": -0.013421340845525265, "logps/rejected": -1.608832597732544, "loss": 2.8723, "nll_loss": 0.7173298597335815, "rewards/accuracies": 1.0, "rewards/chosen": -0.0013421342009678483, "rewards/margins": 0.15954113006591797, "rewards/rejected": -0.16088326275348663, "step": 3616 }, { "epoch": 2.5013831258644537, "grad_norm": 6.884032726287842, "learning_rate": 4.165898263408637e-05, "log_odds_chosen": 8.47869873046875, "log_odds_ratio": -0.006416505668312311, "logits/chosen": -0.5534612536430359, "logits/rejected": -0.5722460150718689, "logps/chosen": -0.003985046874731779, "logps/rejected": -1.439831018447876, "loss": 2.0179, "nll_loss": 0.5038344860076904, "rewards/accuracies": 1.0, "rewards/chosen": -0.000398504693293944, "rewards/margins": 0.14358460903167725, "rewards/rejected": -0.14398311078548431, "step": 3617 }, { "epoch": 2.5020746887966805, "grad_norm": 6.384535789489746, "learning_rate": 4.165514061779622e-05, "log_odds_chosen": 8.993051528930664, "log_odds_ratio": -0.0003727722796611488, "logits/chosen": -0.7564731240272522, "logits/rejected": -0.7366716265678406, "logps/chosen": -0.0005397037602961063, "logps/rejected": -1.3181127309799194, "loss": 2.341, "nll_loss": 0.5852185487747192, "rewards/accuracies": 1.0, "rewards/chosen": -5.3970376029610634e-05, "rewards/margins": 0.13175730407238007, "rewards/rejected": -0.13181129097938538, "step": 3618 }, { "epoch": 2.5027662517289073, "grad_norm": 7.561714172363281, "learning_rate": 4.1651298601506075e-05, "log_odds_chosen": 7.267178058624268, "log_odds_ratio": -0.003275883849710226, "logits/chosen": -0.7403993010520935, "logits/rejected": -0.7949719429016113, "logps/chosen": -0.00556724751368165, "logps/rejected": -1.3268835544586182, "loss": 2.1509, "nll_loss": 0.5374022722244263, "rewards/accuracies": 1.0, "rewards/chosen": -0.0005567247862927616, "rewards/margins": 0.1321316510438919, "rewards/rejected": -0.13268837332725525, "step": 3619 }, { "epoch": 2.503457814661134, "grad_norm": 7.276869297027588, "learning_rate": 4.164745658521592e-05, "log_odds_chosen": 9.333890914916992, "log_odds_ratio": -0.0008068106253631413, "logits/chosen": -0.6412614583969116, "logits/rejected": -0.6887497901916504, "logps/chosen": -0.0008464233251288533, "logps/rejected": -1.1573611497879028, "loss": 1.6581, "nll_loss": 0.41444694995880127, "rewards/accuracies": 1.0, "rewards/chosen": -8.464233542326838e-05, "rewards/margins": 0.11565147340297699, "rewards/rejected": -0.11573611199855804, "step": 3620 }, { "epoch": 2.504149377593361, "grad_norm": 6.960307598114014, "learning_rate": 4.164361456892577e-05, "log_odds_chosen": 6.703517913818359, "log_odds_ratio": -0.022511044517159462, "logits/chosen": -0.7467190623283386, "logits/rejected": -0.7689237594604492, "logps/chosen": -0.014704996719956398, "logps/rejected": -1.5688300132751465, "loss": 2.1581, "nll_loss": 0.5372787714004517, "rewards/accuracies": 1.0, "rewards/chosen": -0.001470499555580318, "rewards/margins": 0.155412495136261, "rewards/rejected": -0.15688300132751465, "step": 3621 }, { "epoch": 2.504840940525588, "grad_norm": 8.569221496582031, "learning_rate": 4.1639772552635626e-05, "log_odds_chosen": 6.749528884887695, "log_odds_ratio": -0.12193821370601654, "logits/chosen": -0.2547239065170288, "logits/rejected": -0.34872543811798096, "logps/chosen": -0.04174911230802536, "logps/rejected": -1.5677011013031006, "loss": 2.2557, "nll_loss": 0.5517250895500183, "rewards/accuracies": 1.0, "rewards/chosen": -0.004174911882728338, "rewards/margins": 0.15259522199630737, "rewards/rejected": -0.15677013993263245, "step": 3622 }, { "epoch": 2.5055325034578146, "grad_norm": 7.080369472503662, "learning_rate": 4.163593053634547e-05, "log_odds_chosen": 7.612618923187256, "log_odds_ratio": -0.0030858798418194056, "logits/chosen": -0.45897969603538513, "logits/rejected": -0.5010754466056824, "logps/chosen": -0.03506157547235489, "logps/rejected": -1.5567389726638794, "loss": 2.1821, "nll_loss": 0.5452094078063965, "rewards/accuracies": 1.0, "rewards/chosen": -0.003506158012896776, "rewards/margins": 0.15216773748397827, "rewards/rejected": -0.15567392110824585, "step": 3623 }, { "epoch": 2.5062240663900415, "grad_norm": 14.771923065185547, "learning_rate": 4.163208852005533e-05, "log_odds_chosen": 7.967820167541504, "log_odds_ratio": -0.002373651135712862, "logits/chosen": -0.7408491373062134, "logits/rejected": -0.8213186264038086, "logps/chosen": -0.005303661338984966, "logps/rejected": -1.6401475667953491, "loss": 3.355, "nll_loss": 0.8385175466537476, "rewards/accuracies": 1.0, "rewards/chosen": -0.0005303661455400288, "rewards/margins": 0.1634843945503235, "rewards/rejected": -0.1640147566795349, "step": 3624 }, { "epoch": 2.5069156293222683, "grad_norm": 5.273161888122559, "learning_rate": 4.1628246503765176e-05, "log_odds_chosen": 6.562053680419922, "log_odds_ratio": -0.06032273545861244, "logits/chosen": -0.3231491446495056, "logits/rejected": -0.3876674771308899, "logps/chosen": -0.023305881768465042, "logps/rejected": -0.8332839012145996, "loss": 1.8313, "nll_loss": 0.45178982615470886, "rewards/accuracies": 1.0, "rewards/chosen": -0.0023305879440158606, "rewards/margins": 0.08099780976772308, "rewards/rejected": -0.08332839608192444, "step": 3625 }, { "epoch": 2.507607192254495, "grad_norm": 9.690799713134766, "learning_rate": 4.162440448747503e-05, "log_odds_chosen": 6.092679977416992, "log_odds_ratio": -0.023633258417248726, "logits/chosen": -0.6088852882385254, "logits/rejected": -0.7097447514533997, "logps/chosen": -0.05217176675796509, "logps/rejected": -1.6721185445785522, "loss": 2.3207, "nll_loss": 0.5778143405914307, "rewards/accuracies": 1.0, "rewards/chosen": -0.005217176862061024, "rewards/margins": 0.16199468076229095, "rewards/rejected": -0.1672118604183197, "step": 3626 }, { "epoch": 2.508298755186722, "grad_norm": 9.050857543945312, "learning_rate": 4.162056247118488e-05, "log_odds_chosen": 8.486778259277344, "log_odds_ratio": -0.001319223316386342, "logits/chosen": -0.47137826681137085, "logits/rejected": -0.5116744041442871, "logps/chosen": -0.006257123313844204, "logps/rejected": -1.4125785827636719, "loss": 2.1905, "nll_loss": 0.547484278678894, "rewards/accuracies": 1.0, "rewards/chosen": -0.0006257123895920813, "rewards/margins": 0.14063213765621185, "rewards/rejected": -0.1412578523159027, "step": 3627 }, { "epoch": 2.508990318118949, "grad_norm": 7.497647285461426, "learning_rate": 4.1616720454894733e-05, "log_odds_chosen": 6.012910842895508, "log_odds_ratio": -0.14136555790901184, "logits/chosen": -0.2557965815067291, "logits/rejected": -0.3421425521373749, "logps/chosen": -0.026772573590278625, "logps/rejected": -1.0495741367340088, "loss": 2.4656, "nll_loss": 0.6022545099258423, "rewards/accuracies": 1.0, "rewards/chosen": -0.0026772571727633476, "rewards/margins": 0.1022801548242569, "rewards/rejected": -0.10495741665363312, "step": 3628 }, { "epoch": 2.5096818810511756, "grad_norm": 6.901090145111084, "learning_rate": 4.161287843860458e-05, "log_odds_chosen": 6.562984466552734, "log_odds_ratio": -0.045680299401283264, "logits/chosen": -0.601599931716919, "logits/rejected": -0.5920424461364746, "logps/chosen": -0.01585603505373001, "logps/rejected": -1.0223904848098755, "loss": 1.7887, "nll_loss": 0.4426080584526062, "rewards/accuracies": 1.0, "rewards/chosen": -0.0015856034588068724, "rewards/margins": 0.10065343976020813, "rewards/rejected": -0.10223904997110367, "step": 3629 }, { "epoch": 2.5103734439834025, "grad_norm": 5.661782741546631, "learning_rate": 4.160903642231443e-05, "log_odds_chosen": 7.5030975341796875, "log_odds_ratio": -0.023007739335298538, "logits/chosen": -0.5051695704460144, "logits/rejected": -0.5527991652488708, "logps/chosen": -0.01834404654800892, "logps/rejected": -1.1878063678741455, "loss": 1.4083, "nll_loss": 0.3497661054134369, "rewards/accuracies": 1.0, "rewards/chosen": -0.0018344044219702482, "rewards/margins": 0.11694624274969101, "rewards/rejected": -0.11878064274787903, "step": 3630 }, { "epoch": 2.5110650069156293, "grad_norm": 6.964871883392334, "learning_rate": 4.1605194406024284e-05, "log_odds_chosen": 7.265258312225342, "log_odds_ratio": -0.052114978432655334, "logits/chosen": -0.512820839881897, "logits/rejected": -0.5915680527687073, "logps/chosen": -0.03276847302913666, "logps/rejected": -1.5541868209838867, "loss": 1.5971, "nll_loss": 0.3940580487251282, "rewards/accuracies": 1.0, "rewards/chosen": -0.0032768475357443094, "rewards/margins": 0.15214183926582336, "rewards/rejected": -0.15541867911815643, "step": 3631 }, { "epoch": 2.511756569847856, "grad_norm": 6.527342796325684, "learning_rate": 4.160135238973413e-05, "log_odds_chosen": 8.185376167297363, "log_odds_ratio": -0.0033926228061318398, "logits/chosen": -0.7429603338241577, "logits/rejected": -0.7161184549331665, "logps/chosen": -0.01603500172495842, "logps/rejected": -1.4270488023757935, "loss": 1.7159, "nll_loss": 0.4286254346370697, "rewards/accuracies": 1.0, "rewards/chosen": -0.0016035002190619707, "rewards/margins": 0.1411013901233673, "rewards/rejected": -0.14270488917827606, "step": 3632 }, { "epoch": 2.512448132780083, "grad_norm": 10.357183456420898, "learning_rate": 4.159751037344399e-05, "log_odds_chosen": 8.630908966064453, "log_odds_ratio": -0.000405243132263422, "logits/chosen": -0.7257434725761414, "logits/rejected": -0.8590246438980103, "logps/chosen": -0.0009773027850314975, "logps/rejected": -1.3986811637878418, "loss": 2.2534, "nll_loss": 0.5633000731468201, "rewards/accuracies": 1.0, "rewards/chosen": -9.773028432391584e-05, "rewards/margins": 0.13977038860321045, "rewards/rejected": -0.1398681253194809, "step": 3633 }, { "epoch": 2.5131396957123098, "grad_norm": 12.191122055053711, "learning_rate": 4.1593668357153835e-05, "log_odds_chosen": 6.413578987121582, "log_odds_ratio": -0.006660694722086191, "logits/chosen": -0.7182711958885193, "logits/rejected": -0.7781530022621155, "logps/chosen": -0.01736205443739891, "logps/rejected": -1.2246630191802979, "loss": 3.0462, "nll_loss": 0.7608934640884399, "rewards/accuracies": 1.0, "rewards/chosen": -0.0017362055368721485, "rewards/margins": 0.12073009461164474, "rewards/rejected": -0.12246629595756531, "step": 3634 }, { "epoch": 2.5138312586445366, "grad_norm": 6.257330417633057, "learning_rate": 4.158982634086369e-05, "log_odds_chosen": 5.112867832183838, "log_odds_ratio": -0.13929276168346405, "logits/chosen": -0.5829563140869141, "logits/rejected": -0.5793777704238892, "logps/chosen": -0.047521110624074936, "logps/rejected": -0.9819206595420837, "loss": 2.4791, "nll_loss": 0.605844259262085, "rewards/accuracies": 0.875, "rewards/chosen": -0.004752111155539751, "rewards/margins": 0.09343995153903961, "rewards/rejected": -0.09819206595420837, "step": 3635 }, { "epoch": 2.5145228215767634, "grad_norm": 19.461641311645508, "learning_rate": 4.158598432457354e-05, "log_odds_chosen": 7.858949184417725, "log_odds_ratio": -0.027700483798980713, "logits/chosen": -0.5611499547958374, "logits/rejected": -0.6735534071922302, "logps/chosen": -0.030950110405683517, "logps/rejected": -1.3868426084518433, "loss": 1.8722, "nll_loss": 0.4652819037437439, "rewards/accuracies": 1.0, "rewards/chosen": -0.003095010994002223, "rewards/margins": 0.13558925688266754, "rewards/rejected": -0.13868427276611328, "step": 3636 }, { "epoch": 2.5152143845089903, "grad_norm": 4.125360488891602, "learning_rate": 4.158214230828339e-05, "log_odds_chosen": 8.624306678771973, "log_odds_ratio": -0.0006575646111741662, "logits/chosen": -0.2748103737831116, "logits/rejected": -0.2165181040763855, "logps/chosen": -0.0003640328941401094, "logps/rejected": -0.9175252914428711, "loss": 1.2471, "nll_loss": 0.3117210865020752, "rewards/accuracies": 1.0, "rewards/chosen": -3.640328941401094e-05, "rewards/margins": 0.09171614050865173, "rewards/rejected": -0.0917525440454483, "step": 3637 }, { "epoch": 2.515905947441217, "grad_norm": 7.473677158355713, "learning_rate": 4.157830029199324e-05, "log_odds_chosen": 6.285887718200684, "log_odds_ratio": -0.08532913774251938, "logits/chosen": -0.5009844303131104, "logits/rejected": -0.5384580492973328, "logps/chosen": -0.02979310043156147, "logps/rejected": -0.9416558742523193, "loss": 2.2114, "nll_loss": 0.5443115234375, "rewards/accuracies": 1.0, "rewards/chosen": -0.0029793099965900183, "rewards/margins": 0.0911862775683403, "rewards/rejected": -0.09416559338569641, "step": 3638 }, { "epoch": 2.516597510373444, "grad_norm": 12.419747352600098, "learning_rate": 4.157445827570309e-05, "log_odds_chosen": 8.659000396728516, "log_odds_ratio": -0.048959698528051376, "logits/chosen": -0.46966612339019775, "logits/rejected": -0.5531617403030396, "logps/chosen": -0.008436794392764568, "logps/rejected": -1.7925329208374023, "loss": 2.0447, "nll_loss": 0.5062693953514099, "rewards/accuracies": 1.0, "rewards/chosen": -0.0008436795906163752, "rewards/margins": 0.1784096211194992, "rewards/rejected": -0.17925329506397247, "step": 3639 }, { "epoch": 2.5172890733056708, "grad_norm": 8.426016807556152, "learning_rate": 4.157061625941294e-05, "log_odds_chosen": 8.495464324951172, "log_odds_ratio": -0.03560282662510872, "logits/chosen": -0.8380284309387207, "logits/rejected": -0.9043651819229126, "logps/chosen": -0.010453960858285427, "logps/rejected": -1.4786534309387207, "loss": 1.5629, "nll_loss": 0.38715261220932007, "rewards/accuracies": 1.0, "rewards/chosen": -0.0010453959694132209, "rewards/margins": 0.14681994915008545, "rewards/rejected": -0.14786535501480103, "step": 3640 }, { "epoch": 2.5179806362378976, "grad_norm": 5.699162483215332, "learning_rate": 4.156677424312279e-05, "log_odds_chosen": 7.089378356933594, "log_odds_ratio": -0.006637131329625845, "logits/chosen": -0.15468713641166687, "logits/rejected": -0.1885947287082672, "logps/chosen": -0.06389021128416061, "logps/rejected": -1.86360502243042, "loss": 2.1492, "nll_loss": 0.5366443991661072, "rewards/accuracies": 1.0, "rewards/chosen": -0.006389021407812834, "rewards/margins": 0.17997147142887115, "rewards/rejected": -0.18636049330234528, "step": 3641 }, { "epoch": 2.5186721991701244, "grad_norm": 8.707549095153809, "learning_rate": 4.156293222683265e-05, "log_odds_chosen": 7.686588287353516, "log_odds_ratio": -0.003062628209590912, "logits/chosen": -0.936684787273407, "logits/rejected": -1.0674530267715454, "logps/chosen": -0.007867519743740559, "logps/rejected": -1.4413552284240723, "loss": 2.7943, "nll_loss": 0.6982684135437012, "rewards/accuracies": 1.0, "rewards/chosen": -0.0007867519743740559, "rewards/margins": 0.14334876835346222, "rewards/rejected": -0.14413553476333618, "step": 3642 }, { "epoch": 2.5193637621023512, "grad_norm": 7.398531913757324, "learning_rate": 4.155909021054249e-05, "log_odds_chosen": 6.667263507843018, "log_odds_ratio": -0.12674516439437866, "logits/chosen": -0.4337894320487976, "logits/rejected": -0.45456454157829285, "logps/chosen": -0.04140767455101013, "logps/rejected": -1.1496739387512207, "loss": 2.1738, "nll_loss": 0.5307748913764954, "rewards/accuracies": 0.875, "rewards/chosen": -0.004140767734497786, "rewards/margins": 0.11082662642002106, "rewards/rejected": -0.11496739089488983, "step": 3643 }, { "epoch": 2.520055325034578, "grad_norm": 8.9595365524292, "learning_rate": 4.1555248194252345e-05, "log_odds_chosen": 8.366384506225586, "log_odds_ratio": -0.007697094231843948, "logits/chosen": -0.702767550945282, "logits/rejected": -0.7329120635986328, "logps/chosen": -0.007878115400671959, "logps/rejected": -1.8763498067855835, "loss": 1.952, "nll_loss": 0.48721975088119507, "rewards/accuracies": 1.0, "rewards/chosen": -0.0007878115284256637, "rewards/margins": 0.18684718012809753, "rewards/rejected": -0.18763497471809387, "step": 3644 }, { "epoch": 2.520746887966805, "grad_norm": 9.695239067077637, "learning_rate": 4.15514061779622e-05, "log_odds_chosen": 4.558919906616211, "log_odds_ratio": -0.2631775140762329, "logits/chosen": -0.6078516244888306, "logits/rejected": -0.5811923742294312, "logps/chosen": -0.047537729144096375, "logps/rejected": -0.8359494805335999, "loss": 2.5318, "nll_loss": 0.6066234111785889, "rewards/accuracies": 0.875, "rewards/chosen": -0.00475377356633544, "rewards/margins": 0.0788411796092987, "rewards/rejected": -0.08359494805335999, "step": 3645 }, { "epoch": 2.5214384508990317, "grad_norm": 8.223495483398438, "learning_rate": 4.154756416167205e-05, "log_odds_chosen": 9.086836814880371, "log_odds_ratio": -0.0041122944094240665, "logits/chosen": -0.5973942875862122, "logits/rejected": -0.6405963897705078, "logps/chosen": -0.008441880345344543, "logps/rejected": -1.617875337600708, "loss": 2.6734, "nll_loss": 0.6679355502128601, "rewards/accuracies": 1.0, "rewards/chosen": -0.0008441880927421153, "rewards/margins": 0.16094335913658142, "rewards/rejected": -0.16178753972053528, "step": 3646 }, { "epoch": 2.5221300138312586, "grad_norm": 4.186194896697998, "learning_rate": 4.1543722145381896e-05, "log_odds_chosen": 7.946590423583984, "log_odds_ratio": -0.04126410558819771, "logits/chosen": -0.6535416841506958, "logits/rejected": -0.6662824153900146, "logps/chosen": -0.010252664797008038, "logps/rejected": -1.1775137186050415, "loss": 2.225, "nll_loss": 0.552130937576294, "rewards/accuracies": 1.0, "rewards/chosen": -0.0010252664797008038, "rewards/margins": 0.1167261004447937, "rewards/rejected": -0.11775137484073639, "step": 3647 }, { "epoch": 2.5228215767634854, "grad_norm": 7.053506851196289, "learning_rate": 4.153988012909175e-05, "log_odds_chosen": 8.013751983642578, "log_odds_ratio": -0.0022309324704110622, "logits/chosen": -0.44436901807785034, "logits/rejected": -0.5080931186676025, "logps/chosen": -0.048563968390226364, "logps/rejected": -1.8365061283111572, "loss": 2.5409, "nll_loss": 0.6350106596946716, "rewards/accuracies": 1.0, "rewards/chosen": -0.004856396932154894, "rewards/margins": 0.1787942349910736, "rewards/rejected": -0.18365061283111572, "step": 3648 }, { "epoch": 2.5235131396957122, "grad_norm": 8.54772663116455, "learning_rate": 4.15360381128016e-05, "log_odds_chosen": 8.504034996032715, "log_odds_ratio": -0.04370781406760216, "logits/chosen": -0.526552677154541, "logits/rejected": -0.5632598400115967, "logps/chosen": -0.014592758379876614, "logps/rejected": -1.5742847919464111, "loss": 2.3825, "nll_loss": 0.5912644863128662, "rewards/accuracies": 1.0, "rewards/chosen": -0.0014592758379876614, "rewards/margins": 0.15596920251846313, "rewards/rejected": -0.15742847323417664, "step": 3649 }, { "epoch": 2.524204702627939, "grad_norm": 7.6013593673706055, "learning_rate": 4.1532196096511447e-05, "log_odds_chosen": 7.224681854248047, "log_odds_ratio": -0.053471360355615616, "logits/chosen": -0.7720454335212708, "logits/rejected": -0.8293158411979675, "logps/chosen": -0.014203069731593132, "logps/rejected": -1.1382806301116943, "loss": 2.4617, "nll_loss": 0.6100710034370422, "rewards/accuracies": 1.0, "rewards/chosen": -0.0014203068567439914, "rewards/margins": 0.11240774393081665, "rewards/rejected": -0.11382806301116943, "step": 3650 }, { "epoch": 2.524896265560166, "grad_norm": 5.8260955810546875, "learning_rate": 4.1528354080221306e-05, "log_odds_chosen": 4.697497367858887, "log_odds_ratio": -0.16557545959949493, "logits/chosen": -0.7771276831626892, "logits/rejected": -0.7690300941467285, "logps/chosen": -0.05061003565788269, "logps/rejected": -1.3822760581970215, "loss": 2.6081, "nll_loss": 0.6354620456695557, "rewards/accuracies": 0.875, "rewards/chosen": -0.005061003845185041, "rewards/margins": 0.1331666111946106, "rewards/rejected": -0.13822761178016663, "step": 3651 }, { "epoch": 2.5255878284923927, "grad_norm": 10.953791618347168, "learning_rate": 4.152451206393115e-05, "log_odds_chosen": 9.335792541503906, "log_odds_ratio": -0.0005506311426870525, "logits/chosen": -0.826223611831665, "logits/rejected": -0.9384573101997375, "logps/chosen": -0.000618268852122128, "logps/rejected": -1.434199571609497, "loss": 1.8864, "nll_loss": 0.4715338349342346, "rewards/accuracies": 1.0, "rewards/chosen": -6.182688230182976e-05, "rewards/margins": 0.14335814118385315, "rewards/rejected": -0.14341996610164642, "step": 3652 }, { "epoch": 2.5262793914246195, "grad_norm": 6.306936740875244, "learning_rate": 4.1520670047641004e-05, "log_odds_chosen": 6.237020969390869, "log_odds_ratio": -0.03519082069396973, "logits/chosen": -0.5907887816429138, "logits/rejected": -0.6402274966239929, "logps/chosen": -0.012569701299071312, "logps/rejected": -1.149125337600708, "loss": 2.4935, "nll_loss": 0.6198525428771973, "rewards/accuracies": 1.0, "rewards/chosen": -0.0012569701066240668, "rewards/margins": 0.11365556716918945, "rewards/rejected": -0.11491253972053528, "step": 3653 }, { "epoch": 2.5269709543568464, "grad_norm": 9.48086929321289, "learning_rate": 4.1516828031350856e-05, "log_odds_chosen": 9.682076454162598, "log_odds_ratio": -0.00010647479211911559, "logits/chosen": -0.76712965965271, "logits/rejected": -0.744515597820282, "logps/chosen": -0.00026008131681010127, "logps/rejected": -1.4440491199493408, "loss": 1.9111, "nll_loss": 0.4777595102787018, "rewards/accuracies": 1.0, "rewards/chosen": -2.600813240860589e-05, "rewards/margins": 0.1443789005279541, "rewards/rejected": -0.14440491795539856, "step": 3654 }, { "epoch": 2.527662517289073, "grad_norm": 5.191808223724365, "learning_rate": 4.151298601506071e-05, "log_odds_chosen": 6.734824180603027, "log_odds_ratio": -0.12821802496910095, "logits/chosen": -0.4947073459625244, "logits/rejected": -0.5462866425514221, "logps/chosen": -0.060167863965034485, "logps/rejected": -1.258905291557312, "loss": 1.9003, "nll_loss": 0.4622592628002167, "rewards/accuracies": 0.875, "rewards/chosen": -0.0060167862102389336, "rewards/margins": 0.11987375468015671, "rewards/rejected": -0.12589053809642792, "step": 3655 }, { "epoch": 2.5283540802213, "grad_norm": 14.005337715148926, "learning_rate": 4.1509143998770554e-05, "log_odds_chosen": 6.469050407409668, "log_odds_ratio": -0.2855418622493744, "logits/chosen": -0.3703860938549042, "logits/rejected": -0.44764846563339233, "logps/chosen": -0.027607586234807968, "logps/rejected": -0.7636227607727051, "loss": 3.0046, "nll_loss": 0.7226047515869141, "rewards/accuracies": 0.75, "rewards/chosen": -0.002760758623480797, "rewards/margins": 0.07360151410102844, "rewards/rejected": -0.07636226713657379, "step": 3656 }, { "epoch": 2.529045643153527, "grad_norm": 10.278541564941406, "learning_rate": 4.150530198248041e-05, "log_odds_chosen": 7.559301853179932, "log_odds_ratio": -0.005141490139067173, "logits/chosen": -0.977824866771698, "logits/rejected": -1.0137873888015747, "logps/chosen": -0.044321730732917786, "logps/rejected": -1.5173851251602173, "loss": 3.0781, "nll_loss": 0.7690147757530212, "rewards/accuracies": 1.0, "rewards/chosen": -0.004432173445820808, "rewards/margins": 0.14730635285377502, "rewards/rejected": -0.1517385095357895, "step": 3657 }, { "epoch": 2.5297372060857537, "grad_norm": 7.183173656463623, "learning_rate": 4.150145996619026e-05, "log_odds_chosen": 5.152936935424805, "log_odds_ratio": -0.27972304821014404, "logits/chosen": -0.7417833209037781, "logits/rejected": -0.7697551250457764, "logps/chosen": -0.04818735271692276, "logps/rejected": -0.8900142908096313, "loss": 2.265, "nll_loss": 0.5382810831069946, "rewards/accuracies": 0.75, "rewards/chosen": -0.004818734712898731, "rewards/margins": 0.08418269455432892, "rewards/rejected": -0.08900142461061478, "step": 3658 }, { "epoch": 2.5304287690179805, "grad_norm": 6.066020965576172, "learning_rate": 4.1497617949900105e-05, "log_odds_chosen": 9.213735580444336, "log_odds_ratio": -0.0029404088854789734, "logits/chosen": -0.8174360394477844, "logits/rejected": -0.8695136308670044, "logps/chosen": -0.005035539623349905, "logps/rejected": -1.9761308431625366, "loss": 1.7436, "nll_loss": 0.4355984330177307, "rewards/accuracies": 1.0, "rewards/chosen": -0.0005035539506934583, "rewards/margins": 0.19710955023765564, "rewards/rejected": -0.19761309027671814, "step": 3659 }, { "epoch": 2.5311203319502074, "grad_norm": 8.873180389404297, "learning_rate": 4.1493775933609964e-05, "log_odds_chosen": 7.810351371765137, "log_odds_ratio": -0.004365085158497095, "logits/chosen": -0.9499499797821045, "logits/rejected": -0.9649522304534912, "logps/chosen": -0.0038736488204449415, "logps/rejected": -1.2680219411849976, "loss": 2.5343, "nll_loss": 0.6331478357315063, "rewards/accuracies": 1.0, "rewards/chosen": -0.00038736488204449415, "rewards/margins": 0.12641483545303345, "rewards/rejected": -0.12680219113826752, "step": 3660 }, { "epoch": 2.531811894882434, "grad_norm": 10.1052885055542, "learning_rate": 4.148993391731981e-05, "log_odds_chosen": 6.543613433837891, "log_odds_ratio": -0.043987736105918884, "logits/chosen": -0.9714975953102112, "logits/rejected": -0.9878481030464172, "logps/chosen": -0.024164706468582153, "logps/rejected": -1.5076501369476318, "loss": 2.6552, "nll_loss": 0.6593921184539795, "rewards/accuracies": 1.0, "rewards/chosen": -0.002416470320895314, "rewards/margins": 0.14834854006767273, "rewards/rejected": -0.15076500177383423, "step": 3661 }, { "epoch": 2.532503457814661, "grad_norm": 8.464655876159668, "learning_rate": 4.148609190102966e-05, "log_odds_chosen": 7.593095302581787, "log_odds_ratio": -0.012364407069981098, "logits/chosen": -0.8906121253967285, "logits/rejected": -0.9512820243835449, "logps/chosen": -0.01501818560063839, "logps/rejected": -1.4770665168762207, "loss": 2.2662, "nll_loss": 0.5653175711631775, "rewards/accuracies": 1.0, "rewards/chosen": -0.0015018185367807746, "rewards/margins": 0.14620482921600342, "rewards/rejected": -0.14770665764808655, "step": 3662 }, { "epoch": 2.533195020746888, "grad_norm": 8.537209510803223, "learning_rate": 4.1482249884739515e-05, "log_odds_chosen": 7.7734456062316895, "log_odds_ratio": -0.010539239272475243, "logits/chosen": -0.37151771783828735, "logits/rejected": -0.40873855352401733, "logps/chosen": -0.026093240827322006, "logps/rejected": -1.6680834293365479, "loss": 2.0097, "nll_loss": 0.5013793110847473, "rewards/accuracies": 1.0, "rewards/chosen": -0.0026093239430338144, "rewards/margins": 0.16419902443885803, "rewards/rejected": -0.1668083518743515, "step": 3663 }, { "epoch": 2.5338865836791147, "grad_norm": 9.18942928314209, "learning_rate": 4.147840786844937e-05, "log_odds_chosen": 8.71163558959961, "log_odds_ratio": -0.0007794310804456472, "logits/chosen": -0.5640786290168762, "logits/rejected": -0.6506339311599731, "logps/chosen": -0.0010314120445400476, "logps/rejected": -1.8243160247802734, "loss": 2.0071, "nll_loss": 0.5017048120498657, "rewards/accuracies": 1.0, "rewards/chosen": -0.00010314121027477086, "rewards/margins": 0.182328462600708, "rewards/rejected": -0.18243160843849182, "step": 3664 }, { "epoch": 2.5345781466113415, "grad_norm": 6.1480584144592285, "learning_rate": 4.147456585215921e-05, "log_odds_chosen": 8.104879379272461, "log_odds_ratio": -0.002435260685160756, "logits/chosen": -0.5540738105773926, "logits/rejected": -0.6117613911628723, "logps/chosen": -0.0028951477725058794, "logps/rejected": -1.5245311260223389, "loss": 1.6881, "nll_loss": 0.42178869247436523, "rewards/accuracies": 1.0, "rewards/chosen": -0.0002895148063544184, "rewards/margins": 0.15216359496116638, "rewards/rejected": -0.15245312452316284, "step": 3665 }, { "epoch": 2.5352697095435683, "grad_norm": 14.560870170593262, "learning_rate": 4.1470723835869065e-05, "log_odds_chosen": 6.843084812164307, "log_odds_ratio": -0.24995103478431702, "logits/chosen": -0.35342735052108765, "logits/rejected": -0.46956032514572144, "logps/chosen": -0.032915253192186356, "logps/rejected": -1.3369067907333374, "loss": 3.4494, "nll_loss": 0.8373644948005676, "rewards/accuracies": 0.875, "rewards/chosen": -0.003291525412350893, "rewards/margins": 0.13039915263652802, "rewards/rejected": -0.13369068503379822, "step": 3666 }, { "epoch": 2.535961272475795, "grad_norm": 6.680951118469238, "learning_rate": 4.146688181957892e-05, "log_odds_chosen": 7.095094680786133, "log_odds_ratio": -0.0850469321012497, "logits/chosen": -0.43845391273498535, "logits/rejected": -0.48212161660194397, "logps/chosen": -0.024386655539274216, "logps/rejected": -1.4623682498931885, "loss": 2.0037, "nll_loss": 0.4924285411834717, "rewards/accuracies": 1.0, "rewards/chosen": -0.0024386656004935503, "rewards/margins": 0.14379817247390747, "rewards/rejected": -0.1462368369102478, "step": 3667 }, { "epoch": 2.536652835408022, "grad_norm": 9.7996187210083, "learning_rate": 4.1463039803288763e-05, "log_odds_chosen": 8.075040817260742, "log_odds_ratio": -0.004537233617156744, "logits/chosen": -0.26644569635391235, "logits/rejected": -0.3150561451911926, "logps/chosen": -0.0036520101130008698, "logps/rejected": -1.3317700624465942, "loss": 2.0921, "nll_loss": 0.5225600600242615, "rewards/accuracies": 1.0, "rewards/chosen": -0.0003652009763754904, "rewards/margins": 0.1328117996454239, "rewards/rejected": -0.1331769973039627, "step": 3668 }, { "epoch": 2.537344398340249, "grad_norm": 8.300936698913574, "learning_rate": 4.145919778699862e-05, "log_odds_chosen": 8.642348289489746, "log_odds_ratio": -0.013115583918988705, "logits/chosen": -0.6805007457733154, "logits/rejected": -0.6964966058731079, "logps/chosen": -0.005282273981720209, "logps/rejected": -2.066359281539917, "loss": 2.5309, "nll_loss": 0.631418764591217, "rewards/accuracies": 1.0, "rewards/chosen": -0.0005282273632474244, "rewards/margins": 0.2061077058315277, "rewards/rejected": -0.2066359519958496, "step": 3669 }, { "epoch": 2.5380359612724757, "grad_norm": 7.85097074508667, "learning_rate": 4.145535577070847e-05, "log_odds_chosen": 7.3196611404418945, "log_odds_ratio": -0.0034911674447357655, "logits/chosen": -0.6438291072845459, "logits/rejected": -0.6845942139625549, "logps/chosen": -0.025680480524897575, "logps/rejected": -1.8952102661132812, "loss": 2.6675, "nll_loss": 0.6665199995040894, "rewards/accuracies": 1.0, "rewards/chosen": -0.0025680481921881437, "rewards/margins": 0.18695297837257385, "rewards/rejected": -0.18952102959156036, "step": 3670 }, { "epoch": 2.5387275242047025, "grad_norm": 11.16006088256836, "learning_rate": 4.145151375441832e-05, "log_odds_chosen": 6.697037696838379, "log_odds_ratio": -0.09762268513441086, "logits/chosen": -0.33623313903808594, "logits/rejected": -0.41128477454185486, "logps/chosen": -0.04758386313915253, "logps/rejected": -1.1416031122207642, "loss": 2.5198, "nll_loss": 0.6201927065849304, "rewards/accuracies": 1.0, "rewards/chosen": -0.00475838640704751, "rewards/margins": 0.10940193384885788, "rewards/rejected": -0.11416031420230865, "step": 3671 }, { "epoch": 2.5394190871369293, "grad_norm": 10.982725143432617, "learning_rate": 4.144767173812817e-05, "log_odds_chosen": 6.469160079956055, "log_odds_ratio": -0.0469195693731308, "logits/chosen": -0.312357634305954, "logits/rejected": -0.38012784719467163, "logps/chosen": -0.18392308056354523, "logps/rejected": -1.900977373123169, "loss": 2.5113, "nll_loss": 0.6231358647346497, "rewards/accuracies": 1.0, "rewards/chosen": -0.018392309546470642, "rewards/margins": 0.171705424785614, "rewards/rejected": -0.19009774923324585, "step": 3672 }, { "epoch": 2.540110650069156, "grad_norm": 8.845681190490723, "learning_rate": 4.1443829721838026e-05, "log_odds_chosen": 10.247167587280273, "log_odds_ratio": -0.00015303498366847634, "logits/chosen": -0.7295843958854675, "logits/rejected": -0.7623701095581055, "logps/chosen": -0.00021613975695800036, "logps/rejected": -1.8121672868728638, "loss": 2.4817, "nll_loss": 0.6204196214675903, "rewards/accuracies": 1.0, "rewards/chosen": -2.1613976059597917e-05, "rewards/margins": 0.18119511008262634, "rewards/rejected": -0.18121671676635742, "step": 3673 }, { "epoch": 2.540802213001383, "grad_norm": 8.529345512390137, "learning_rate": 4.143998770554787e-05, "log_odds_chosen": 7.201008319854736, "log_odds_ratio": -0.19989247620105743, "logits/chosen": -0.26101046800613403, "logits/rejected": -0.27898168563842773, "logps/chosen": -0.03462667763233185, "logps/rejected": -1.7183300256729126, "loss": 2.1496, "nll_loss": 0.5173985958099365, "rewards/accuracies": 0.875, "rewards/chosen": -0.003462668042629957, "rewards/margins": 0.1683703362941742, "rewards/rejected": -0.17183300852775574, "step": 3674 }, { "epoch": 2.54149377593361, "grad_norm": 11.021639823913574, "learning_rate": 4.1436145689257724e-05, "log_odds_chosen": 8.368658065795898, "log_odds_ratio": -0.0014843323733657598, "logits/chosen": -0.3710412383079529, "logits/rejected": -0.4252433776855469, "logps/chosen": -0.002052636817097664, "logps/rejected": -1.7162036895751953, "loss": 2.2048, "nll_loss": 0.5510503053665161, "rewards/accuracies": 1.0, "rewards/chosen": -0.00020526369917206466, "rewards/margins": 0.17141512036323547, "rewards/rejected": -0.17162038385868073, "step": 3675 }, { "epoch": 2.5421853388658366, "grad_norm": 7.163419723510742, "learning_rate": 4.1432303672967576e-05, "log_odds_chosen": 7.673587799072266, "log_odds_ratio": -0.13042639195919037, "logits/chosen": -0.08417253941297531, "logits/rejected": -0.20888949930667877, "logps/chosen": -0.022530486807227135, "logps/rejected": -1.327731728553772, "loss": 2.3599, "nll_loss": 0.5769286155700684, "rewards/accuracies": 0.875, "rewards/chosen": -0.0022530488204210997, "rewards/margins": 0.13052012026309967, "rewards/rejected": -0.13277317583560944, "step": 3676 }, { "epoch": 2.5428769017980635, "grad_norm": 6.752707481384277, "learning_rate": 4.142846165667743e-05, "log_odds_chosen": 8.717757225036621, "log_odds_ratio": -0.0008739815093576908, "logits/chosen": -0.856940507888794, "logits/rejected": -0.9300462603569031, "logps/chosen": -0.03579828515648842, "logps/rejected": -2.1495282649993896, "loss": 1.8846, "nll_loss": 0.47106146812438965, "rewards/accuracies": 1.0, "rewards/chosen": -0.0035798284225165844, "rewards/margins": 0.2113730013370514, "rewards/rejected": -0.21495282649993896, "step": 3677 }, { "epoch": 2.5435684647302903, "grad_norm": 6.426164627075195, "learning_rate": 4.142461964038728e-05, "log_odds_chosen": 9.750266075134277, "log_odds_ratio": -0.0001426611270289868, "logits/chosen": -0.3759447932243347, "logits/rejected": -0.5120629072189331, "logps/chosen": -0.0004961431259289384, "logps/rejected": -1.8397492170333862, "loss": 1.7415, "nll_loss": 0.4353630244731903, "rewards/accuracies": 1.0, "rewards/chosen": -4.9614311137702316e-05, "rewards/margins": 0.1839253008365631, "rewards/rejected": -0.18397492170333862, "step": 3678 }, { "epoch": 2.544260027662517, "grad_norm": 11.520225524902344, "learning_rate": 4.142077762409713e-05, "log_odds_chosen": 9.523664474487305, "log_odds_ratio": -0.00046379820560105145, "logits/chosen": -0.46259498596191406, "logits/rejected": -0.5426309108734131, "logps/chosen": -0.0007358100265264511, "logps/rejected": -1.7401684522628784, "loss": 3.2972, "nll_loss": 0.8242548108100891, "rewards/accuracies": 1.0, "rewards/chosen": -7.358100265264511e-05, "rewards/margins": 0.17394328117370605, "rewards/rejected": -0.17401686310768127, "step": 3679 }, { "epoch": 2.544951590594744, "grad_norm": 6.897475719451904, "learning_rate": 4.141693560780698e-05, "log_odds_chosen": 8.941631317138672, "log_odds_ratio": -0.0003182542568538338, "logits/chosen": -0.5127269625663757, "logits/rejected": -0.46197599172592163, "logps/chosen": -0.0016620360547676682, "logps/rejected": -1.2729542255401611, "loss": 2.1741, "nll_loss": 0.5434816479682922, "rewards/accuracies": 1.0, "rewards/chosen": -0.00016620359383523464, "rewards/margins": 0.1271292269229889, "rewards/rejected": -0.12729541957378387, "step": 3680 }, { "epoch": 2.545643153526971, "grad_norm": 3.8709726333618164, "learning_rate": 4.141309359151683e-05, "log_odds_chosen": 8.852423667907715, "log_odds_ratio": -0.0007429651450365782, "logits/chosen": -0.33135154843330383, "logits/rejected": -0.41605496406555176, "logps/chosen": -0.006752867251634598, "logps/rejected": -2.200617551803589, "loss": 1.7138, "nll_loss": 0.4283781051635742, "rewards/accuracies": 1.0, "rewards/chosen": -0.0006752866902388632, "rewards/margins": 0.21938645839691162, "rewards/rejected": -0.22006173431873322, "step": 3681 }, { "epoch": 2.5463347164591976, "grad_norm": 10.108016014099121, "learning_rate": 4.1409251575226684e-05, "log_odds_chosen": 7.953339576721191, "log_odds_ratio": -0.09382897615432739, "logits/chosen": -0.15474338829517365, "logits/rejected": -0.23979714512825012, "logps/chosen": -0.03481658548116684, "logps/rejected": -1.3647801876068115, "loss": 2.6875, "nll_loss": 0.6624833941459656, "rewards/accuracies": 0.875, "rewards/chosen": -0.0034816591069102287, "rewards/margins": 0.13299638032913208, "rewards/rejected": -0.13647803664207458, "step": 3682 }, { "epoch": 2.5470262793914245, "grad_norm": 7.43285608291626, "learning_rate": 4.140540955893653e-05, "log_odds_chosen": 8.776679992675781, "log_odds_ratio": -0.0011820968938991427, "logits/chosen": -0.5200778245925903, "logits/rejected": -0.6396703720092773, "logps/chosen": -0.002887851558625698, "logps/rejected": -1.8419222831726074, "loss": 2.5257, "nll_loss": 0.6313114166259766, "rewards/accuracies": 1.0, "rewards/chosen": -0.0002887851442210376, "rewards/margins": 0.18390345573425293, "rewards/rejected": -0.1841922402381897, "step": 3683 }, { "epoch": 2.5477178423236513, "grad_norm": 5.443092346191406, "learning_rate": 4.140156754264638e-05, "log_odds_chosen": 7.036106109619141, "log_odds_ratio": -0.06073131412267685, "logits/chosen": -0.4874728322029114, "logits/rejected": -0.4316939115524292, "logps/chosen": -0.04359099268913269, "logps/rejected": -1.3618528842926025, "loss": 1.7898, "nll_loss": 0.44136685132980347, "rewards/accuracies": 1.0, "rewards/chosen": -0.004359099082648754, "rewards/margins": 0.13182619214057922, "rewards/rejected": -0.13618530333042145, "step": 3684 }, { "epoch": 2.548409405255878, "grad_norm": 6.47907829284668, "learning_rate": 4.1397725526356235e-05, "log_odds_chosen": 6.502830505371094, "log_odds_ratio": -0.054894234985113144, "logits/chosen": -0.24924349784851074, "logits/rejected": -0.2819105386734009, "logps/chosen": -0.025829501450061798, "logps/rejected": -1.2309153079986572, "loss": 2.0147, "nll_loss": 0.49817532300949097, "rewards/accuracies": 1.0, "rewards/chosen": -0.002582950284704566, "rewards/margins": 0.1205085888504982, "rewards/rejected": -0.12309154123067856, "step": 3685 }, { "epoch": 2.549100968188105, "grad_norm": 9.771394729614258, "learning_rate": 4.139388351006609e-05, "log_odds_chosen": 8.620658874511719, "log_odds_ratio": -0.0012988585513085127, "logits/chosen": -0.5891374945640564, "logits/rejected": -0.6297314763069153, "logps/chosen": -0.00756434490904212, "logps/rejected": -1.4627835750579834, "loss": 2.5286, "nll_loss": 0.6320264339447021, "rewards/accuracies": 1.0, "rewards/chosen": -0.0007564345141872764, "rewards/margins": 0.14552193880081177, "rewards/rejected": -0.14627835154533386, "step": 3686 }, { "epoch": 2.5497925311203318, "grad_norm": 10.010108947753906, "learning_rate": 4.139004149377593e-05, "log_odds_chosen": 8.177577018737793, "log_odds_ratio": -0.0021723266690969467, "logits/chosen": -0.5750257968902588, "logits/rejected": -0.5584626793861389, "logps/chosen": -0.0018111247336491942, "logps/rejected": -1.4841028451919556, "loss": 2.0266, "nll_loss": 0.5064324736595154, "rewards/accuracies": 1.0, "rewards/chosen": -0.00018111246754415333, "rewards/margins": 0.14822916686534882, "rewards/rejected": -0.14841027557849884, "step": 3687 }, { "epoch": 2.5504840940525586, "grad_norm": 8.33242130279541, "learning_rate": 4.1386199477485785e-05, "log_odds_chosen": 7.380660533905029, "log_odds_ratio": -0.07418529689311981, "logits/chosen": -0.39534804224967957, "logits/rejected": -0.3996621072292328, "logps/chosen": -0.019277188926935196, "logps/rejected": -1.3679413795471191, "loss": 1.7864, "nll_loss": 0.4391922354698181, "rewards/accuracies": 1.0, "rewards/chosen": -0.001927718985825777, "rewards/margins": 0.13486641645431519, "rewards/rejected": -0.13679413497447968, "step": 3688 }, { "epoch": 2.5511756569847854, "grad_norm": 35.614662170410156, "learning_rate": 4.138235746119564e-05, "log_odds_chosen": 6.996004104614258, "log_odds_ratio": -0.24035514891147614, "logits/chosen": -0.7962977886199951, "logits/rejected": -0.8157656788825989, "logps/chosen": -0.04621091112494469, "logps/rejected": -0.9972507953643799, "loss": 2.4286, "nll_loss": 0.5831174254417419, "rewards/accuracies": 0.875, "rewards/chosen": -0.004621092230081558, "rewards/margins": 0.09510399401187897, "rewards/rejected": -0.09972508251667023, "step": 3689 }, { "epoch": 2.5518672199170123, "grad_norm": 10.523552894592285, "learning_rate": 4.137851544490548e-05, "log_odds_chosen": 8.526422500610352, "log_odds_ratio": -0.0025308893527835608, "logits/chosen": -0.40642938017845154, "logits/rejected": -0.49298715591430664, "logps/chosen": -0.05294908583164215, "logps/rejected": -1.5810902118682861, "loss": 2.9231, "nll_loss": 0.7305221557617188, "rewards/accuracies": 1.0, "rewards/chosen": -0.005294908303767443, "rewards/margins": 0.152814120054245, "rewards/rejected": -0.15810903906822205, "step": 3690 }, { "epoch": 2.552558782849239, "grad_norm": 9.078492164611816, "learning_rate": 4.137467342861534e-05, "log_odds_chosen": 7.632801055908203, "log_odds_ratio": -0.03201550990343094, "logits/chosen": -0.6973223686218262, "logits/rejected": -0.7350947856903076, "logps/chosen": -0.0043390472419559956, "logps/rejected": -1.2494264841079712, "loss": 2.6931, "nll_loss": 0.6700709462165833, "rewards/accuracies": 1.0, "rewards/chosen": -0.00043390473001636565, "rewards/margins": 0.12450873851776123, "rewards/rejected": -0.12494263797998428, "step": 3691 }, { "epoch": 2.553250345781466, "grad_norm": 7.067032337188721, "learning_rate": 4.137083141232519e-05, "log_odds_chosen": 8.267839431762695, "log_odds_ratio": -0.002087076660245657, "logits/chosen": -0.5631198883056641, "logits/rejected": -0.5623632073402405, "logps/chosen": -0.002745155245065689, "logps/rejected": -1.3989791870117188, "loss": 1.7896, "nll_loss": 0.44719675183296204, "rewards/accuracies": 1.0, "rewards/chosen": -0.0002745155361481011, "rewards/margins": 0.13962340354919434, "rewards/rejected": -0.1398979127407074, "step": 3692 }, { "epoch": 2.5539419087136928, "grad_norm": 5.6397576332092285, "learning_rate": 4.136698939603504e-05, "log_odds_chosen": 8.663679122924805, "log_odds_ratio": -0.0006652399315498769, "logits/chosen": -0.6536080241203308, "logits/rejected": -0.6602993607521057, "logps/chosen": -0.0023707440122962, "logps/rejected": -1.1587908267974854, "loss": 2.405, "nll_loss": 0.6011757850646973, "rewards/accuracies": 1.0, "rewards/chosen": -0.0002370743895880878, "rewards/margins": 0.1156420111656189, "rewards/rejected": -0.11587909609079361, "step": 3693 }, { "epoch": 2.5546334716459196, "grad_norm": 8.868212699890137, "learning_rate": 4.136314737974489e-05, "log_odds_chosen": 6.981932640075684, "log_odds_ratio": -0.21796710789203644, "logits/chosen": -0.44232919812202454, "logits/rejected": -0.4955691993236542, "logps/chosen": -0.02946803905069828, "logps/rejected": -1.4040617942810059, "loss": 1.9888, "nll_loss": 0.47540146112442017, "rewards/accuracies": 0.875, "rewards/chosen": -0.0029468040447682142, "rewards/margins": 0.13745936751365662, "rewards/rejected": -0.14040617644786835, "step": 3694 }, { "epoch": 2.5553250345781464, "grad_norm": 13.996137619018555, "learning_rate": 4.1359305363454746e-05, "log_odds_chosen": 9.852861404418945, "log_odds_ratio": -0.00029456906486302614, "logits/chosen": -0.46493977308273315, "logits/rejected": -0.5792163610458374, "logps/chosen": -0.004407463129609823, "logps/rejected": -2.5801842212677, "loss": 2.9734, "nll_loss": 0.7433305382728577, "rewards/accuracies": 1.0, "rewards/chosen": -0.0004407463420648128, "rewards/margins": 0.25757768750190735, "rewards/rejected": -0.258018434047699, "step": 3695 }, { "epoch": 2.5560165975103732, "grad_norm": 6.141745567321777, "learning_rate": 4.135546334716459e-05, "log_odds_chosen": 8.410773277282715, "log_odds_ratio": -0.0020805567037314177, "logits/chosen": -0.5083537697792053, "logits/rejected": -0.5329819321632385, "logps/chosen": -0.011984552256762981, "logps/rejected": -1.7198209762573242, "loss": 1.2896, "nll_loss": 0.32219621539115906, "rewards/accuracies": 1.0, "rewards/chosen": -0.00119845534209162, "rewards/margins": 0.1707836538553238, "rewards/rejected": -0.17198210954666138, "step": 3696 }, { "epoch": 2.5567081604426, "grad_norm": 14.502395629882812, "learning_rate": 4.1351621330874444e-05, "log_odds_chosen": 6.762268543243408, "log_odds_ratio": -0.03570711612701416, "logits/chosen": -0.43851161003112793, "logits/rejected": -0.4272356629371643, "logps/chosen": -0.019428087398409843, "logps/rejected": -1.2996336221694946, "loss": 2.1978, "nll_loss": 0.5458870530128479, "rewards/accuracies": 1.0, "rewards/chosen": -0.0019428087398409843, "rewards/margins": 0.12802055478096008, "rewards/rejected": -0.12996336817741394, "step": 3697 }, { "epoch": 2.557399723374827, "grad_norm": 9.247694969177246, "learning_rate": 4.1347779314584296e-05, "log_odds_chosen": 8.048039436340332, "log_odds_ratio": -0.011633592657744884, "logits/chosen": -0.6219062805175781, "logits/rejected": -0.7027066946029663, "logps/chosen": -0.006910478230565786, "logps/rejected": -1.1331398487091064, "loss": 2.5269, "nll_loss": 0.6305506229400635, "rewards/accuracies": 1.0, "rewards/chosen": -0.0006910478696227074, "rewards/margins": 0.112622931599617, "rewards/rejected": -0.11331398785114288, "step": 3698 }, { "epoch": 2.5580912863070537, "grad_norm": 11.068485260009766, "learning_rate": 4.134393729829414e-05, "log_odds_chosen": 9.680505752563477, "log_odds_ratio": -0.00019160093506798148, "logits/chosen": -0.8199482560157776, "logits/rejected": -0.8591936230659485, "logps/chosen": -0.0002677099546417594, "logps/rejected": -1.6102635860443115, "loss": 2.397, "nll_loss": 0.5992240905761719, "rewards/accuracies": 1.0, "rewards/chosen": -2.6770994736580178e-05, "rewards/margins": 0.1609995812177658, "rewards/rejected": -0.16102635860443115, "step": 3699 }, { "epoch": 2.5587828492392806, "grad_norm": 9.24276065826416, "learning_rate": 4.1340095282004e-05, "log_odds_chosen": 9.128841400146484, "log_odds_ratio": -0.0005217056022956967, "logits/chosen": -0.8914197087287903, "logits/rejected": -0.9727503061294556, "logps/chosen": -0.001058247173205018, "logps/rejected": -1.5193347930908203, "loss": 3.1986, "nll_loss": 0.7996020317077637, "rewards/accuracies": 1.0, "rewards/chosen": -0.00010582470713416114, "rewards/margins": 0.15182766318321228, "rewards/rejected": -0.151933491230011, "step": 3700 }, { "epoch": 2.5594744121715074, "grad_norm": 17.474658966064453, "learning_rate": 4.133625326571385e-05, "log_odds_chosen": 9.318645477294922, "log_odds_ratio": -0.0022993730381131172, "logits/chosen": -0.8668950796127319, "logits/rejected": -0.9719336628913879, "logps/chosen": -0.009635216556489468, "logps/rejected": -2.347177028656006, "loss": 2.9675, "nll_loss": 0.7416494488716125, "rewards/accuracies": 1.0, "rewards/chosen": -0.0009635217138566077, "rewards/margins": 0.2337542176246643, "rewards/rejected": -0.2347177267074585, "step": 3701 }, { "epoch": 2.5601659751037342, "grad_norm": 9.051102638244629, "learning_rate": 4.13324112494237e-05, "log_odds_chosen": 8.766997337341309, "log_odds_ratio": -0.002480762079358101, "logits/chosen": -0.7824666500091553, "logits/rejected": -0.8516795039176941, "logps/chosen": -0.0019446569494903088, "logps/rejected": -1.526063323020935, "loss": 2.5402, "nll_loss": 0.634802520275116, "rewards/accuracies": 1.0, "rewards/chosen": -0.0001944656833074987, "rewards/margins": 0.15241187810897827, "rewards/rejected": -0.15260633826255798, "step": 3702 }, { "epoch": 2.560857538035961, "grad_norm": 7.871819972991943, "learning_rate": 4.132856923313355e-05, "log_odds_chosen": 7.462924480438232, "log_odds_ratio": -0.014852583408355713, "logits/chosen": -0.57256019115448, "logits/rejected": -0.6143975257873535, "logps/chosen": -0.014655756764113903, "logps/rejected": -1.7680262327194214, "loss": 2.694, "nll_loss": 0.6720025539398193, "rewards/accuracies": 1.0, "rewards/chosen": -0.001465575653128326, "rewards/margins": 0.1753370612859726, "rewards/rejected": -0.1768026351928711, "step": 3703 }, { "epoch": 2.561549100968188, "grad_norm": 13.968018531799316, "learning_rate": 4.1324727216843404e-05, "log_odds_chosen": 6.530374050140381, "log_odds_ratio": -0.1512891799211502, "logits/chosen": -0.767929196357727, "logits/rejected": -0.7999763488769531, "logps/chosen": -0.20367056131362915, "logps/rejected": -2.1344516277313232, "loss": 3.0548, "nll_loss": 0.7485730648040771, "rewards/accuracies": 0.875, "rewards/chosen": -0.020367056131362915, "rewards/margins": 0.19307810068130493, "rewards/rejected": -0.21344517171382904, "step": 3704 }, { "epoch": 2.5622406639004147, "grad_norm": 5.46859073638916, "learning_rate": 4.132088520055325e-05, "log_odds_chosen": 8.344247817993164, "log_odds_ratio": -0.014011223800480366, "logits/chosen": -0.5818994045257568, "logits/rejected": -0.5327481627464294, "logps/chosen": -0.005584258586168289, "logps/rejected": -1.0000637769699097, "loss": 1.9534, "nll_loss": 0.4869387149810791, "rewards/accuracies": 1.0, "rewards/chosen": -0.0005584259051829576, "rewards/margins": 0.09944795817136765, "rewards/rejected": -0.10000638663768768, "step": 3705 }, { "epoch": 2.5629322268326415, "grad_norm": 10.0899019241333, "learning_rate": 4.13170431842631e-05, "log_odds_chosen": 9.544801712036133, "log_odds_ratio": -0.00021236162865534425, "logits/chosen": -0.7554320096969604, "logits/rejected": -0.8102554082870483, "logps/chosen": -0.0018119094893336296, "logps/rejected": -2.1428074836730957, "loss": 2.1968, "nll_loss": 0.5491690635681152, "rewards/accuracies": 1.0, "rewards/chosen": -0.00018119096057489514, "rewards/margins": 0.21409955620765686, "rewards/rejected": -0.21428075432777405, "step": 3706 }, { "epoch": 2.5636237897648684, "grad_norm": 10.008685111999512, "learning_rate": 4.1313201167972955e-05, "log_odds_chosen": 9.383994102478027, "log_odds_ratio": -0.00018407402967568487, "logits/chosen": -1.0799190998077393, "logits/rejected": -1.0719953775405884, "logps/chosen": -0.0004757646529469639, "logps/rejected": -1.5452126264572144, "loss": 2.2054, "nll_loss": 0.5513247847557068, "rewards/accuracies": 1.0, "rewards/chosen": -4.757646092912182e-05, "rewards/margins": 0.154473677277565, "rewards/rejected": -0.15452125668525696, "step": 3707 }, { "epoch": 2.564315352697095, "grad_norm": 8.341119766235352, "learning_rate": 4.13093591516828e-05, "log_odds_chosen": 6.45013952255249, "log_odds_ratio": -0.07698262482881546, "logits/chosen": -0.9068139791488647, "logits/rejected": -0.8944410085678101, "logps/chosen": -0.022624794393777847, "logps/rejected": -1.1139686107635498, "loss": 1.9664, "nll_loss": 0.4838896691799164, "rewards/accuracies": 1.0, "rewards/chosen": -0.0022624791599810123, "rewards/margins": 0.10913438349962234, "rewards/rejected": -0.11139686405658722, "step": 3708 }, { "epoch": 2.565006915629322, "grad_norm": 9.152691841125488, "learning_rate": 4.130551713539266e-05, "log_odds_chosen": 8.225065231323242, "log_odds_ratio": -0.05828314274549484, "logits/chosen": -0.5669647455215454, "logits/rejected": -0.6710978150367737, "logps/chosen": -0.02455291338264942, "logps/rejected": -1.7932811975479126, "loss": 2.4232, "nll_loss": 0.5999712944030762, "rewards/accuracies": 1.0, "rewards/chosen": -0.002455291338264942, "rewards/margins": 0.1768728345632553, "rewards/rejected": -0.17932814359664917, "step": 3709 }, { "epoch": 2.565698478561549, "grad_norm": 11.202807426452637, "learning_rate": 4.1301675119102505e-05, "log_odds_chosen": 7.55279016494751, "log_odds_ratio": -0.0012247057165950537, "logits/chosen": -0.7727817893028259, "logits/rejected": -0.7303429245948792, "logps/chosen": -0.00481916731223464, "logps/rejected": -1.6028831005096436, "loss": 2.6498, "nll_loss": 0.6623245477676392, "rewards/accuracies": 1.0, "rewards/chosen": -0.0004819167370442301, "rewards/margins": 0.15980640053749084, "rewards/rejected": -0.16028831899166107, "step": 3710 }, { "epoch": 2.5663900414937757, "grad_norm": 5.204437255859375, "learning_rate": 4.129783310281236e-05, "log_odds_chosen": 6.436047077178955, "log_odds_ratio": -0.12044772505760193, "logits/chosen": -0.5464393496513367, "logits/rejected": -0.5386316180229187, "logps/chosen": -0.10069790482521057, "logps/rejected": -1.1179600954055786, "loss": 2.2288, "nll_loss": 0.5451546907424927, "rewards/accuracies": 1.0, "rewards/chosen": -0.010069791227579117, "rewards/margins": 0.1017262265086174, "rewards/rejected": -0.11179601401090622, "step": 3711 }, { "epoch": 2.5670816044260025, "grad_norm": 7.855978012084961, "learning_rate": 4.129399108652221e-05, "log_odds_chosen": 7.216761589050293, "log_odds_ratio": -0.008108437061309814, "logits/chosen": -0.7366430163383484, "logits/rejected": -0.742645263671875, "logps/chosen": -0.033469267189502716, "logps/rejected": -1.6177469491958618, "loss": 2.6916, "nll_loss": 0.6720876693725586, "rewards/accuracies": 1.0, "rewards/chosen": -0.0033469272311776876, "rewards/margins": 0.15842777490615845, "rewards/rejected": -0.16177469491958618, "step": 3712 }, { "epoch": 2.5677731673582294, "grad_norm": 6.685059070587158, "learning_rate": 4.129014907023206e-05, "log_odds_chosen": 5.891242980957031, "log_odds_ratio": -0.12909001111984253, "logits/chosen": -0.5541249513626099, "logits/rejected": -0.570735514163971, "logps/chosen": -0.02706187777221203, "logps/rejected": -0.9847200512886047, "loss": 2.8047, "nll_loss": 0.6882719993591309, "rewards/accuracies": 1.0, "rewards/chosen": -0.002706187777221203, "rewards/margins": 0.09576582908630371, "rewards/rejected": -0.09847201406955719, "step": 3713 }, { "epoch": 2.568464730290456, "grad_norm": 8.131349563598633, "learning_rate": 4.128630705394191e-05, "log_odds_chosen": 9.295339584350586, "log_odds_ratio": -0.0011500322725623846, "logits/chosen": -0.5707579255104065, "logits/rejected": -0.6556651592254639, "logps/chosen": -0.00318812089972198, "logps/rejected": -1.8498668670654297, "loss": 1.9003, "nll_loss": 0.4749618172645569, "rewards/accuracies": 1.0, "rewards/chosen": -0.000318812089972198, "rewards/margins": 0.18466788530349731, "rewards/rejected": -0.18498669564723969, "step": 3714 }, { "epoch": 2.569156293222683, "grad_norm": 8.574792861938477, "learning_rate": 4.128246503765176e-05, "log_odds_chosen": 7.151443958282471, "log_odds_ratio": -0.05823368579149246, "logits/chosen": -0.7019017934799194, "logits/rejected": -0.6479978561401367, "logps/chosen": -0.08117261528968811, "logps/rejected": -1.3640259504318237, "loss": 2.1487, "nll_loss": 0.5313621163368225, "rewards/accuracies": 1.0, "rewards/chosen": -0.00811726227402687, "rewards/margins": 0.12828533351421356, "rewards/rejected": -0.13640260696411133, "step": 3715 }, { "epoch": 2.56984785615491, "grad_norm": 12.357011795043945, "learning_rate": 4.127862302136161e-05, "log_odds_chosen": 7.658764839172363, "log_odds_ratio": -0.002434109104797244, "logits/chosen": -0.6412211656570435, "logits/rejected": -0.6666697859764099, "logps/chosen": -0.00591158214956522, "logps/rejected": -1.2501399517059326, "loss": 3.0724, "nll_loss": 0.7678543329238892, "rewards/accuracies": 1.0, "rewards/chosen": -0.0005911581683903933, "rewards/margins": 0.1244228333234787, "rewards/rejected": -0.12501400709152222, "step": 3716 }, { "epoch": 2.5705394190871367, "grad_norm": 8.453063011169434, "learning_rate": 4.127478100507146e-05, "log_odds_chosen": 9.030070304870605, "log_odds_ratio": -0.001330976141616702, "logits/chosen": -0.3844653367996216, "logits/rejected": -0.46229255199432373, "logps/chosen": -0.021825632080435753, "logps/rejected": -2.265153408050537, "loss": 1.9788, "nll_loss": 0.4945632815361023, "rewards/accuracies": 1.0, "rewards/chosen": -0.0021825633011758327, "rewards/margins": 0.2243327796459198, "rewards/rejected": -0.22651533782482147, "step": 3717 }, { "epoch": 2.5712309820193635, "grad_norm": 8.923296928405762, "learning_rate": 4.127093898878132e-05, "log_odds_chosen": 8.41724967956543, "log_odds_ratio": -0.06643003225326538, "logits/chosen": 0.05033461004495621, "logits/rejected": -0.05653882771730423, "logps/chosen": -0.013024937361478806, "logps/rejected": -1.5312068462371826, "loss": 2.0591, "nll_loss": 0.5081378817558289, "rewards/accuracies": 1.0, "rewards/chosen": -0.0013024937361478806, "rewards/margins": 0.1518182009458542, "rewards/rejected": -0.15312069654464722, "step": 3718 }, { "epoch": 2.5719225449515903, "grad_norm": 14.89078426361084, "learning_rate": 4.1267096972491164e-05, "log_odds_chosen": 6.805048942565918, "log_odds_ratio": -0.16832545399665833, "logits/chosen": -0.3612036406993866, "logits/rejected": -0.38720014691352844, "logps/chosen": -0.05015212297439575, "logps/rejected": -1.658402681350708, "loss": 2.9003, "nll_loss": 0.7082515954971313, "rewards/accuracies": 0.875, "rewards/chosen": -0.00501521211117506, "rewards/margins": 0.16082505881786346, "rewards/rejected": -0.165840283036232, "step": 3719 }, { "epoch": 2.572614107883817, "grad_norm": 6.6490068435668945, "learning_rate": 4.1263254956201016e-05, "log_odds_chosen": 9.336997985839844, "log_odds_ratio": -0.03805683180689812, "logits/chosen": -0.35168910026550293, "logits/rejected": -0.4202730357646942, "logps/chosen": -0.02017226442694664, "logps/rejected": -2.215473175048828, "loss": 1.8718, "nll_loss": 0.46415483951568604, "rewards/accuracies": 1.0, "rewards/chosen": -0.0020172265358269215, "rewards/margins": 0.21953007578849792, "rewards/rejected": -0.22154730558395386, "step": 3720 }, { "epoch": 2.573305670816044, "grad_norm": 5.562412738800049, "learning_rate": 4.125941293991087e-05, "log_odds_chosen": 7.149702072143555, "log_odds_ratio": -0.057801514863967896, "logits/chosen": -0.4055611789226532, "logits/rejected": -0.4318541884422302, "logps/chosen": -0.02749335952103138, "logps/rejected": -1.172360897064209, "loss": 1.7467, "nll_loss": 0.4308894872665405, "rewards/accuracies": 1.0, "rewards/chosen": -0.0027493361849337816, "rewards/margins": 0.11448674649000168, "rewards/rejected": -0.11723607778549194, "step": 3721 }, { "epoch": 2.573997233748271, "grad_norm": 8.328250885009766, "learning_rate": 4.125557092362072e-05, "log_odds_chosen": 6.597143650054932, "log_odds_ratio": -0.031210634857416153, "logits/chosen": -0.7388463020324707, "logits/rejected": -0.7064796090126038, "logps/chosen": -0.020624712109565735, "logps/rejected": -1.1217725276947021, "loss": 2.4386, "nll_loss": 0.606519877910614, "rewards/accuracies": 1.0, "rewards/chosen": -0.0020624713506549597, "rewards/margins": 0.11011476814746857, "rewards/rejected": -0.11217725276947021, "step": 3722 }, { "epoch": 2.5746887966804977, "grad_norm": 4.782063961029053, "learning_rate": 4.1251728907330567e-05, "log_odds_chosen": 9.41702938079834, "log_odds_ratio": -0.0007288760971277952, "logits/chosen": -0.2897818386554718, "logits/rejected": -0.30856797099113464, "logps/chosen": -0.0008728159009478986, "logps/rejected": -1.4619526863098145, "loss": 1.7117, "nll_loss": 0.4278546869754791, "rewards/accuracies": 1.0, "rewards/chosen": -8.728158718440682e-05, "rewards/margins": 0.1461080014705658, "rewards/rejected": -0.14619527757167816, "step": 3723 }, { "epoch": 2.5753803596127245, "grad_norm": 7.023058891296387, "learning_rate": 4.124788689104042e-05, "log_odds_chosen": 7.026298999786377, "log_odds_ratio": -0.4848077595233917, "logits/chosen": -0.5465977787971497, "logits/rejected": -0.5585201978683472, "logps/chosen": -0.16477973759174347, "logps/rejected": -1.1323270797729492, "loss": 2.2299, "nll_loss": 0.5089831352233887, "rewards/accuracies": 0.875, "rewards/chosen": -0.016477974131703377, "rewards/margins": 0.09675473719835281, "rewards/rejected": -0.11323270946741104, "step": 3724 }, { "epoch": 2.5760719225449513, "grad_norm": 41.20199966430664, "learning_rate": 4.124404487475027e-05, "log_odds_chosen": 4.795586585998535, "log_odds_ratio": -0.8382541537284851, "logits/chosen": -0.34821343421936035, "logits/rejected": -0.3835321068763733, "logps/chosen": -0.23601265251636505, "logps/rejected": -0.6838950514793396, "loss": 2.6539, "nll_loss": 0.5796493291854858, "rewards/accuracies": 0.625, "rewards/chosen": -0.023601265624165535, "rewards/margins": 0.044788237661123276, "rewards/rejected": -0.06838950514793396, "step": 3725 }, { "epoch": 2.576763485477178, "grad_norm": 12.310609817504883, "learning_rate": 4.124020285846012e-05, "log_odds_chosen": 7.353997230529785, "log_odds_ratio": -0.1631837785243988, "logits/chosen": -0.6637724041938782, "logits/rejected": -0.6767927408218384, "logps/chosen": -0.04751261696219444, "logps/rejected": -1.9424400329589844, "loss": 2.6097, "nll_loss": 0.6360946893692017, "rewards/accuracies": 0.875, "rewards/chosen": -0.004751261789351702, "rewards/margins": 0.18949274718761444, "rewards/rejected": -0.19424399733543396, "step": 3726 }, { "epoch": 2.577455048409405, "grad_norm": 5.297358989715576, "learning_rate": 4.1236360842169976e-05, "log_odds_chosen": 8.62894344329834, "log_odds_ratio": -0.0006866774056106806, "logits/chosen": -0.5644747018814087, "logits/rejected": -0.646756649017334, "logps/chosen": -0.0179891437292099, "logps/rejected": -2.733907699584961, "loss": 2.2156, "nll_loss": 0.5538387298583984, "rewards/accuracies": 1.0, "rewards/chosen": -0.0017989143962040544, "rewards/margins": 0.2715918719768524, "rewards/rejected": -0.2733907699584961, "step": 3727 }, { "epoch": 2.5781466113416323, "grad_norm": 7.3834099769592285, "learning_rate": 4.123251882587982e-05, "log_odds_chosen": 7.929017066955566, "log_odds_ratio": -0.0026038330979645252, "logits/chosen": -0.5381110906600952, "logits/rejected": -0.5581732392311096, "logps/chosen": -0.018378963693976402, "logps/rejected": -1.692350149154663, "loss": 2.5575, "nll_loss": 0.6391133069992065, "rewards/accuracies": 1.0, "rewards/chosen": -0.001837896415963769, "rewards/margins": 0.16739711165428162, "rewards/rejected": -0.1692350208759308, "step": 3728 }, { "epoch": 2.578838174273859, "grad_norm": 6.349059104919434, "learning_rate": 4.1228676809589674e-05, "log_odds_chosen": 9.217550277709961, "log_odds_ratio": -0.001908198813907802, "logits/chosen": -0.7512813210487366, "logits/rejected": -0.8352804780006409, "logps/chosen": -0.0008954018703661859, "logps/rejected": -1.6340348720550537, "loss": 1.7692, "nll_loss": 0.4421083927154541, "rewards/accuracies": 1.0, "rewards/chosen": -8.954018994700164e-05, "rewards/margins": 0.16331395506858826, "rewards/rejected": -0.1634034961462021, "step": 3729 }, { "epoch": 2.579529737206086, "grad_norm": 7.565160751342773, "learning_rate": 4.122483479329953e-05, "log_odds_chosen": 8.419466018676758, "log_odds_ratio": -0.0009257863275706768, "logits/chosen": -0.25553634762763977, "logits/rejected": -0.31241804361343384, "logps/chosen": -0.005397059954702854, "logps/rejected": -1.4249114990234375, "loss": 1.9198, "nll_loss": 0.4798622727394104, "rewards/accuracies": 1.0, "rewards/chosen": -0.000539705972187221, "rewards/margins": 0.14195145666599274, "rewards/rejected": -0.1424911618232727, "step": 3730 }, { "epoch": 2.5802213001383127, "grad_norm": 11.41741943359375, "learning_rate": 4.122099277700938e-05, "log_odds_chosen": 7.808590412139893, "log_odds_ratio": -0.004272694233804941, "logits/chosen": -0.8148729801177979, "logits/rejected": -0.8728553056716919, "logps/chosen": -0.0217888280749321, "logps/rejected": -1.8935446739196777, "loss": 2.8953, "nll_loss": 0.7234096527099609, "rewards/accuracies": 1.0, "rewards/chosen": -0.0021788827143609524, "rewards/margins": 0.18717558681964874, "rewards/rejected": -0.18935447931289673, "step": 3731 }, { "epoch": 2.5809128630705396, "grad_norm": 11.43437671661377, "learning_rate": 4.1217150760719225e-05, "log_odds_chosen": 10.363927841186523, "log_odds_ratio": -9.404075535712764e-05, "logits/chosen": -0.7213290929794312, "logits/rejected": -0.8471459746360779, "logps/chosen": -0.00039993657264858484, "logps/rejected": -2.210651397705078, "loss": 2.5843, "nll_loss": 0.6460575461387634, "rewards/accuracies": 1.0, "rewards/chosen": -3.999365435447544e-05, "rewards/margins": 0.22102515399456024, "rewards/rejected": -0.22106513381004333, "step": 3732 }, { "epoch": 2.5816044260027664, "grad_norm": 59.48982238769531, "learning_rate": 4.121330874442908e-05, "log_odds_chosen": 6.653110980987549, "log_odds_ratio": -0.5694103837013245, "logits/chosen": -0.6047723293304443, "logits/rejected": -0.5345290303230286, "logps/chosen": -0.07189460098743439, "logps/rejected": -1.7830207347869873, "loss": 2.3783, "nll_loss": 0.5376428961753845, "rewards/accuracies": 0.875, "rewards/chosen": -0.007189460098743439, "rewards/margins": 0.17111262679100037, "rewards/rejected": -0.1783020794391632, "step": 3733 }, { "epoch": 2.5822959889349932, "grad_norm": 67.61016082763672, "learning_rate": 4.120946672813893e-05, "log_odds_chosen": 7.578793048858643, "log_odds_ratio": -0.19432534277439117, "logits/chosen": -0.1921953558921814, "logits/rejected": -0.2634783387184143, "logps/chosen": -0.031715311110019684, "logps/rejected": -1.1055599451065063, "loss": 3.2861, "nll_loss": 0.8021039962768555, "rewards/accuracies": 0.875, "rewards/chosen": -0.0031715314835309982, "rewards/margins": 0.10738445818424225, "rewards/rejected": -0.1105559840798378, "step": 3734 }, { "epoch": 2.58298755186722, "grad_norm": 7.091904640197754, "learning_rate": 4.1205624711848776e-05, "log_odds_chosen": 8.691332817077637, "log_odds_ratio": -0.001732210977934301, "logits/chosen": -0.7369958162307739, "logits/rejected": -0.7992552518844604, "logps/chosen": -0.0012605376541614532, "logps/rejected": -1.5876713991165161, "loss": 2.1241, "nll_loss": 0.5308501124382019, "rewards/accuracies": 1.0, "rewards/chosen": -0.00012605376832652837, "rewards/margins": 0.15864109992980957, "rewards/rejected": -0.15876714885234833, "step": 3735 }, { "epoch": 2.583679114799447, "grad_norm": 6.683202743530273, "learning_rate": 4.1201782695558635e-05, "log_odds_chosen": 8.278829574584961, "log_odds_ratio": -0.005301903001964092, "logits/chosen": -0.338886022567749, "logits/rejected": -0.38707235455513, "logps/chosen": -0.01110898144543171, "logps/rejected": -1.345354676246643, "loss": 2.1922, "nll_loss": 0.547519326210022, "rewards/accuracies": 1.0, "rewards/chosen": -0.0011108980979770422, "rewards/margins": 0.1334245651960373, "rewards/rejected": -0.13453546166419983, "step": 3736 }, { "epoch": 2.5843706777316737, "grad_norm": 6.4845685958862305, "learning_rate": 4.119794067926848e-05, "log_odds_chosen": 6.890257835388184, "log_odds_ratio": -0.016935264691710472, "logits/chosen": -0.611894428730011, "logits/rejected": -0.6529337167739868, "logps/chosen": -0.007628034334629774, "logps/rejected": -0.7589771747589111, "loss": 2.2051, "nll_loss": 0.5495746731758118, "rewards/accuracies": 1.0, "rewards/chosen": -0.0007628033636137843, "rewards/margins": 0.07513491064310074, "rewards/rejected": -0.075897715985775, "step": 3737 }, { "epoch": 2.5850622406639006, "grad_norm": 7.8248724937438965, "learning_rate": 4.119409866297833e-05, "log_odds_chosen": 6.516547679901123, "log_odds_ratio": -0.011854683980345726, "logits/chosen": -0.7901416420936584, "logits/rejected": -0.7836036682128906, "logps/chosen": -0.011734462343156338, "logps/rejected": -1.2134819030761719, "loss": 2.6598, "nll_loss": 0.6637638807296753, "rewards/accuracies": 1.0, "rewards/chosen": -0.0011734463041648269, "rewards/margins": 0.12017473578453064, "rewards/rejected": -0.12134818732738495, "step": 3738 }, { "epoch": 2.5857538035961274, "grad_norm": 8.53990364074707, "learning_rate": 4.1190256646688185e-05, "log_odds_chosen": 6.061685562133789, "log_odds_ratio": -0.22549201548099518, "logits/chosen": -0.6229150295257568, "logits/rejected": -0.6731955409049988, "logps/chosen": -0.036444906145334244, "logps/rejected": -1.0788745880126953, "loss": 2.3329, "nll_loss": 0.5606649518013, "rewards/accuracies": 0.75, "rewards/chosen": -0.0036444906145334244, "rewards/margins": 0.1042429655790329, "rewards/rejected": -0.10788745433092117, "step": 3739 }, { "epoch": 2.586445366528354, "grad_norm": 10.496400833129883, "learning_rate": 4.118641463039804e-05, "log_odds_chosen": 9.048690795898438, "log_odds_ratio": -0.0010932005243375897, "logits/chosen": -0.6942977905273438, "logits/rejected": -0.746668815612793, "logps/chosen": -0.003788003697991371, "logps/rejected": -1.28829026222229, "loss": 2.3722, "nll_loss": 0.5929319858551025, "rewards/accuracies": 1.0, "rewards/chosen": -0.0003788003814406693, "rewards/margins": 0.1284502148628235, "rewards/rejected": -0.12882903218269348, "step": 3740 }, { "epoch": 2.587136929460581, "grad_norm": 8.886248588562012, "learning_rate": 4.118257261410788e-05, "log_odds_chosen": 9.778158187866211, "log_odds_ratio": -9.212135773850605e-05, "logits/chosen": -0.6989057064056396, "logits/rejected": -0.7910153865814209, "logps/chosen": -0.0002986646140925586, "logps/rejected": -1.6260040998458862, "loss": 2.3061, "nll_loss": 0.576522707939148, "rewards/accuracies": 1.0, "rewards/chosen": -2.9866463592043146e-05, "rewards/margins": 0.1625705361366272, "rewards/rejected": -0.16260039806365967, "step": 3741 }, { "epoch": 2.587828492392808, "grad_norm": 8.751694679260254, "learning_rate": 4.1178730597817736e-05, "log_odds_chosen": 7.6103715896606445, "log_odds_ratio": -0.10901050269603729, "logits/chosen": -0.5994385480880737, "logits/rejected": -0.6329340934753418, "logps/chosen": -0.027846332639455795, "logps/rejected": -1.5407474040985107, "loss": 3.1338, "nll_loss": 0.7725571990013123, "rewards/accuracies": 0.875, "rewards/chosen": -0.0027846333105117083, "rewards/margins": 0.15129011869430542, "rewards/rejected": -0.1540747433900833, "step": 3742 }, { "epoch": 2.5885200553250347, "grad_norm": 11.04948902130127, "learning_rate": 4.117488858152759e-05, "log_odds_chosen": 8.791509628295898, "log_odds_ratio": -0.0031256452202796936, "logits/chosen": -0.50660240650177, "logits/rejected": -0.6062948703765869, "logps/chosen": -0.0033155661076307297, "logps/rejected": -1.942363977432251, "loss": 2.4201, "nll_loss": 0.6047027111053467, "rewards/accuracies": 1.0, "rewards/chosen": -0.00033155662822537124, "rewards/margins": 0.1939048320055008, "rewards/rejected": -0.1942363828420639, "step": 3743 }, { "epoch": 2.5892116182572615, "grad_norm": 9.975250244140625, "learning_rate": 4.1171046565237434e-05, "log_odds_chosen": 7.344701766967773, "log_odds_ratio": -0.2183784544467926, "logits/chosen": -0.41115838289260864, "logits/rejected": -0.4515266418457031, "logps/chosen": -0.07224129885435104, "logps/rejected": -2.3858866691589355, "loss": 2.3751, "nll_loss": 0.5719287991523743, "rewards/accuracies": 0.875, "rewards/chosen": -0.007224130444228649, "rewards/margins": 0.23136454820632935, "rewards/rejected": -0.23858866095542908, "step": 3744 }, { "epoch": 2.5899031811894884, "grad_norm": 4.491325855255127, "learning_rate": 4.116720454894729e-05, "log_odds_chosen": 9.122901916503906, "log_odds_ratio": -0.00112934282515198, "logits/chosen": -0.6243253350257874, "logits/rejected": -0.639293372631073, "logps/chosen": -0.0041105677373707294, "logps/rejected": -1.7523343563079834, "loss": 1.4115, "nll_loss": 0.3527562618255615, "rewards/accuracies": 1.0, "rewards/chosen": -0.00041105682612396777, "rewards/margins": 0.1748223900794983, "rewards/rejected": -0.17523345351219177, "step": 3745 }, { "epoch": 2.590594744121715, "grad_norm": 7.6031341552734375, "learning_rate": 4.116336253265714e-05, "log_odds_chosen": 7.601007461547852, "log_odds_ratio": -0.10867451876401901, "logits/chosen": -0.421059787273407, "logits/rejected": -0.5527886152267456, "logps/chosen": -0.15169177949428558, "logps/rejected": -1.3082287311553955, "loss": 2.2726, "nll_loss": 0.5572940111160278, "rewards/accuracies": 0.875, "rewards/chosen": -0.015169178135693073, "rewards/margins": 0.11565369367599487, "rewards/rejected": -0.13082286715507507, "step": 3746 }, { "epoch": 2.591286307053942, "grad_norm": 11.434212684631348, "learning_rate": 4.115952051636699e-05, "log_odds_chosen": 7.910490989685059, "log_odds_ratio": -0.007251637522131205, "logits/chosen": -0.2362067997455597, "logits/rejected": -0.4405251741409302, "logps/chosen": -0.04665207117795944, "logps/rejected": -1.3648693561553955, "loss": 2.7146, "nll_loss": 0.6779237985610962, "rewards/accuracies": 1.0, "rewards/chosen": -0.004665207117795944, "rewards/margins": 0.13182173669338226, "rewards/rejected": -0.1364869475364685, "step": 3747 }, { "epoch": 2.591977869986169, "grad_norm": 7.133166790008545, "learning_rate": 4.1155678500076844e-05, "log_odds_chosen": 7.42473030090332, "log_odds_ratio": -0.0022472471464425325, "logits/chosen": -0.6103567481040955, "logits/rejected": -0.6208904981613159, "logps/chosen": -0.003965867683291435, "logps/rejected": -0.9936189651489258, "loss": 1.4544, "nll_loss": 0.3633824288845062, "rewards/accuracies": 1.0, "rewards/chosen": -0.00039658680907450616, "rewards/margins": 0.09896530210971832, "rewards/rejected": -0.09936189651489258, "step": 3748 }, { "epoch": 2.5926694329183957, "grad_norm": 12.385791778564453, "learning_rate": 4.1151836483786696e-05, "log_odds_chosen": 7.634697914123535, "log_odds_ratio": -0.05306196212768555, "logits/chosen": -1.0478848218917847, "logits/rejected": -1.1195652484893799, "logps/chosen": -0.00799348670989275, "logps/rejected": -1.5046417713165283, "loss": 3.5996, "nll_loss": 0.8945903778076172, "rewards/accuracies": 1.0, "rewards/chosen": -0.0007993485778570175, "rewards/margins": 0.14966483414173126, "rewards/rejected": -0.15046417713165283, "step": 3749 }, { "epoch": 2.5933609958506225, "grad_norm": 12.1556396484375, "learning_rate": 4.114799446749654e-05, "log_odds_chosen": 7.831677436828613, "log_odds_ratio": -0.04401255026459694, "logits/chosen": -0.6939361095428467, "logits/rejected": -0.8240491151809692, "logps/chosen": -0.022363290190696716, "logps/rejected": -1.603339433670044, "loss": 2.7254, "nll_loss": 0.6769535541534424, "rewards/accuracies": 1.0, "rewards/chosen": -0.0022363290190696716, "rewards/margins": 0.15809760987758636, "rewards/rejected": -0.16033394634723663, "step": 3750 }, { "epoch": 2.5940525587828493, "grad_norm": 5.799905776977539, "learning_rate": 4.1144152451206394e-05, "log_odds_chosen": 8.367742538452148, "log_odds_ratio": -0.0042973789386451244, "logits/chosen": -0.5330762267112732, "logits/rejected": -0.6044706702232361, "logps/chosen": -0.0007183550042100251, "logps/rejected": -0.879762589931488, "loss": 1.5809, "nll_loss": 0.39480525255203247, "rewards/accuracies": 1.0, "rewards/chosen": -7.183550042100251e-05, "rewards/margins": 0.0879044234752655, "rewards/rejected": -0.08797626197338104, "step": 3751 }, { "epoch": 2.594744121715076, "grad_norm": 11.173681259155273, "learning_rate": 4.114031043491625e-05, "log_odds_chosen": 7.132036209106445, "log_odds_ratio": -0.06363911926746368, "logits/chosen": -0.5374847054481506, "logits/rejected": -0.5664747953414917, "logps/chosen": -0.0732634961605072, "logps/rejected": -1.957589864730835, "loss": 3.0929, "nll_loss": 0.7668578624725342, "rewards/accuracies": 1.0, "rewards/chosen": -0.00732634961605072, "rewards/margins": 0.18843263387680054, "rewards/rejected": -0.19575899839401245, "step": 3752 }, { "epoch": 2.595435684647303, "grad_norm": 8.798983573913574, "learning_rate": 4.113646841862609e-05, "log_odds_chosen": 7.174066066741943, "log_odds_ratio": -0.0576028972864151, "logits/chosen": -0.12469097971916199, "logits/rejected": -0.19542089104652405, "logps/chosen": -0.02913515642285347, "logps/rejected": -1.2168099880218506, "loss": 2.0781, "nll_loss": 0.5137559771537781, "rewards/accuracies": 1.0, "rewards/chosen": -0.002913515781983733, "rewards/margins": 0.11876747757196426, "rewards/rejected": -0.12168100476264954, "step": 3753 }, { "epoch": 2.59612724757953, "grad_norm": 9.644112586975098, "learning_rate": 4.113262640233595e-05, "log_odds_chosen": 7.350411415100098, "log_odds_ratio": -0.01159360259771347, "logits/chosen": -0.4807983636856079, "logits/rejected": -0.546984076499939, "logps/chosen": -0.038234058767557144, "logps/rejected": -1.8546245098114014, "loss": 2.8755, "nll_loss": 0.7177104949951172, "rewards/accuracies": 1.0, "rewards/chosen": -0.0038234058301895857, "rewards/margins": 0.18163906037807465, "rewards/rejected": -0.18546245992183685, "step": 3754 }, { "epoch": 2.5968188105117567, "grad_norm": 7.758711814880371, "learning_rate": 4.11287843860458e-05, "log_odds_chosen": 7.503241062164307, "log_odds_ratio": -0.08800943195819855, "logits/chosen": -0.5578584671020508, "logits/rejected": -0.6296103596687317, "logps/chosen": -0.015839863568544388, "logps/rejected": -1.226827621459961, "loss": 1.8376, "nll_loss": 0.4506067633628845, "rewards/accuracies": 0.875, "rewards/chosen": -0.0015839864499866962, "rewards/margins": 0.12109877914190292, "rewards/rejected": -0.12268276512622833, "step": 3755 }, { "epoch": 2.5975103734439835, "grad_norm": 13.332018852233887, "learning_rate": 4.112494236975565e-05, "log_odds_chosen": 9.398469924926758, "log_odds_ratio": -0.0007393067935481668, "logits/chosen": -0.7192118167877197, "logits/rejected": -0.8135256171226501, "logps/chosen": -0.0019481182098388672, "logps/rejected": -1.9981613159179688, "loss": 2.3335, "nll_loss": 0.5832892656326294, "rewards/accuracies": 1.0, "rewards/chosen": -0.00019481181516312063, "rewards/margins": 0.199621319770813, "rewards/rejected": -0.19981613755226135, "step": 3756 }, { "epoch": 2.5982019363762103, "grad_norm": 5.693976879119873, "learning_rate": 4.11211003534655e-05, "log_odds_chosen": 7.411104202270508, "log_odds_ratio": -0.037521954625844955, "logits/chosen": -0.6283434629440308, "logits/rejected": -0.6703078150749207, "logps/chosen": -0.02808951586484909, "logps/rejected": -1.5351510047912598, "loss": 1.8259, "nll_loss": 0.4527303874492645, "rewards/accuracies": 1.0, "rewards/chosen": -0.002808951772749424, "rewards/margins": 0.15070615708827972, "rewards/rejected": -0.15351510047912598, "step": 3757 }, { "epoch": 2.598893499308437, "grad_norm": 6.475027561187744, "learning_rate": 4.1117258337175355e-05, "log_odds_chosen": 5.8653364181518555, "log_odds_ratio": -0.1683236062526703, "logits/chosen": -0.2808341085910797, "logits/rejected": -0.25737473368644714, "logps/chosen": -0.027778543531894684, "logps/rejected": -0.9268168210983276, "loss": 1.9337, "nll_loss": 0.4665814936161041, "rewards/accuracies": 0.875, "rewards/chosen": -0.002777854213491082, "rewards/margins": 0.0899038314819336, "rewards/rejected": -0.09268169105052948, "step": 3758 }, { "epoch": 2.599585062240664, "grad_norm": 10.565065383911133, "learning_rate": 4.11134163208852e-05, "log_odds_chosen": 8.24848461151123, "log_odds_ratio": -0.0010619653621688485, "logits/chosen": -0.6984177827835083, "logits/rejected": -0.7020764350891113, "logps/chosen": -0.0028242850676178932, "logps/rejected": -1.5658907890319824, "loss": 2.4563, "nll_loss": 0.6139716506004333, "rewards/accuracies": 1.0, "rewards/chosen": -0.0002824285184033215, "rewards/margins": 0.156306654214859, "rewards/rejected": -0.1565890908241272, "step": 3759 }, { "epoch": 2.600276625172891, "grad_norm": 11.125207901000977, "learning_rate": 4.110957430459505e-05, "log_odds_chosen": 8.153125762939453, "log_odds_ratio": -0.008074373006820679, "logits/chosen": -0.6523471474647522, "logits/rejected": -0.7233648896217346, "logps/chosen": -0.019731884822249413, "logps/rejected": -1.3840183019638062, "loss": 3.0923, "nll_loss": 0.7722706198692322, "rewards/accuracies": 1.0, "rewards/chosen": -0.0019731884822249413, "rewards/margins": 0.13642865419387817, "rewards/rejected": -0.1384018361568451, "step": 3760 }, { "epoch": 2.6009681881051177, "grad_norm": 8.181022644042969, "learning_rate": 4.1105732288304905e-05, "log_odds_chosen": 7.941451549530029, "log_odds_ratio": -0.006261616013944149, "logits/chosen": -0.8328216075897217, "logits/rejected": -0.8856630325317383, "logps/chosen": -0.012141270563006401, "logps/rejected": -1.4298372268676758, "loss": 3.0807, "nll_loss": 0.769557774066925, "rewards/accuracies": 1.0, "rewards/chosen": -0.00121412705630064, "rewards/margins": 0.14176960289478302, "rewards/rejected": -0.14298373460769653, "step": 3761 }, { "epoch": 2.6016597510373445, "grad_norm": 8.711572647094727, "learning_rate": 4.110189027201475e-05, "log_odds_chosen": 8.191829681396484, "log_odds_ratio": -0.003800910897552967, "logits/chosen": -0.4973553717136383, "logits/rejected": -0.512773871421814, "logps/chosen": -0.015499060973525047, "logps/rejected": -1.1160831451416016, "loss": 2.1024, "nll_loss": 0.5252153873443604, "rewards/accuracies": 1.0, "rewards/chosen": -0.0015499060973525047, "rewards/margins": 0.11005841195583344, "rewards/rejected": -0.11160831153392792, "step": 3762 }, { "epoch": 2.6023513139695713, "grad_norm": 11.474364280700684, "learning_rate": 4.109804825572461e-05, "log_odds_chosen": 9.341354370117188, "log_odds_ratio": -0.0021199476905167103, "logits/chosen": -0.6627390384674072, "logits/rejected": -0.7836405038833618, "logps/chosen": -0.015581142157316208, "logps/rejected": -1.9159284830093384, "loss": 2.3043, "nll_loss": 0.5758620500564575, "rewards/accuracies": 1.0, "rewards/chosen": -0.0015581144252792, "rewards/margins": 0.19003473222255707, "rewards/rejected": -0.19159284234046936, "step": 3763 }, { "epoch": 2.603042876901798, "grad_norm": 12.52896785736084, "learning_rate": 4.1094206239434456e-05, "log_odds_chosen": 7.266579627990723, "log_odds_ratio": -0.11897798627614975, "logits/chosen": -0.5841730237007141, "logits/rejected": -0.6162888407707214, "logps/chosen": -0.02642166055738926, "logps/rejected": -1.2500101327896118, "loss": 2.8538, "nll_loss": 0.7015467882156372, "rewards/accuracies": 0.875, "rewards/chosen": -0.002642165869474411, "rewards/margins": 0.12235884368419647, "rewards/rejected": -0.12500101327896118, "step": 3764 }, { "epoch": 2.603734439834025, "grad_norm": 15.374418258666992, "learning_rate": 4.109036422314431e-05, "log_odds_chosen": 8.82072639465332, "log_odds_ratio": -0.12637755274772644, "logits/chosen": -0.657711923122406, "logits/rejected": -0.7071999311447144, "logps/chosen": -0.016477566212415695, "logps/rejected": -1.6140774488449097, "loss": 2.0818, "nll_loss": 0.5078055262565613, "rewards/accuracies": 0.875, "rewards/chosen": -0.0016477566678076982, "rewards/margins": 0.159759983420372, "rewards/rejected": -0.16140775382518768, "step": 3765 }, { "epoch": 2.604426002766252, "grad_norm": 11.307380676269531, "learning_rate": 4.108652220685416e-05, "log_odds_chosen": 8.271891593933105, "log_odds_ratio": -0.001959600020200014, "logits/chosen": -0.8720443844795227, "logits/rejected": -0.802766740322113, "logps/chosen": -0.008896499872207642, "logps/rejected": -1.7645208835601807, "loss": 2.4307, "nll_loss": 0.6074774265289307, "rewards/accuracies": 1.0, "rewards/chosen": -0.0008896499057300389, "rewards/margins": 0.17556244134902954, "rewards/rejected": -0.17645208537578583, "step": 3766 }, { "epoch": 2.6051175656984786, "grad_norm": 5.210627555847168, "learning_rate": 4.108268019056401e-05, "log_odds_chosen": 8.590871810913086, "log_odds_ratio": -0.023738976567983627, "logits/chosen": -0.7618262767791748, "logits/rejected": -0.7200245261192322, "logps/chosen": -0.031090902164578438, "logps/rejected": -1.953304409980774, "loss": 2.4172, "nll_loss": 0.6019155979156494, "rewards/accuracies": 1.0, "rewards/chosen": -0.003109090495854616, "rewards/margins": 0.19222135841846466, "rewards/rejected": -0.1953304409980774, "step": 3767 }, { "epoch": 2.6058091286307055, "grad_norm": 8.155974388122559, "learning_rate": 4.107883817427386e-05, "log_odds_chosen": 7.465442657470703, "log_odds_ratio": -0.15218786895275116, "logits/chosen": -0.6641751527786255, "logits/rejected": -0.6453537344932556, "logps/chosen": -0.03820411115884781, "logps/rejected": -1.7105059623718262, "loss": 2.7485, "nll_loss": 0.6719143390655518, "rewards/accuracies": 0.875, "rewards/chosen": -0.0038204113952815533, "rewards/margins": 0.16723018884658813, "rewards/rejected": -0.17105060815811157, "step": 3768 }, { "epoch": 2.6065006915629323, "grad_norm": 10.886385917663574, "learning_rate": 4.107499615798371e-05, "log_odds_chosen": 9.198636054992676, "log_odds_ratio": -0.0007475916645489633, "logits/chosen": -0.5266662240028381, "logits/rejected": -0.6063534617424011, "logps/chosen": -0.03145657107234001, "logps/rejected": -1.9173073768615723, "loss": 2.6858, "nll_loss": 0.6713849902153015, "rewards/accuracies": 1.0, "rewards/chosen": -0.0031456567812711, "rewards/margins": 0.1885850876569748, "rewards/rejected": -0.19173073768615723, "step": 3769 }, { "epoch": 2.607192254495159, "grad_norm": 4.671172142028809, "learning_rate": 4.1071154141693564e-05, "log_odds_chosen": 8.649884223937988, "log_odds_ratio": -0.09504207968711853, "logits/chosen": -0.5261533260345459, "logits/rejected": -0.5755932927131653, "logps/chosen": -0.02501145377755165, "logps/rejected": -1.5033838748931885, "loss": 1.524, "nll_loss": 0.37148401141166687, "rewards/accuracies": 0.875, "rewards/chosen": -0.0025011454708874226, "rewards/margins": 0.14783723652362823, "rewards/rejected": -0.15033838152885437, "step": 3770 }, { "epoch": 2.607883817427386, "grad_norm": 12.868654251098633, "learning_rate": 4.106731212540341e-05, "log_odds_chosen": 8.409965515136719, "log_odds_ratio": -0.004177105613052845, "logits/chosen": -0.5481134653091431, "logits/rejected": -0.6200520396232605, "logps/chosen": -0.01093447208404541, "logps/rejected": -1.4257891178131104, "loss": 4.3731, "nll_loss": 1.0928475856781006, "rewards/accuracies": 1.0, "rewards/chosen": -0.001093447208404541, "rewards/margins": 0.14148546755313873, "rewards/rejected": -0.14257891476154327, "step": 3771 }, { "epoch": 2.608575380359613, "grad_norm": 6.570640563964844, "learning_rate": 4.106347010911327e-05, "log_odds_chosen": 8.466964721679688, "log_odds_ratio": -0.00046557781752198935, "logits/chosen": -0.23388677835464478, "logits/rejected": -0.23734617233276367, "logps/chosen": -0.00039772834861651063, "logps/rejected": -1.008994221687317, "loss": 2.5733, "nll_loss": 0.6432757377624512, "rewards/accuracies": 1.0, "rewards/chosen": -3.977283267886378e-05, "rewards/margins": 0.10085965692996979, "rewards/rejected": -0.10089942812919617, "step": 3772 }, { "epoch": 2.6092669432918396, "grad_norm": 10.865242958068848, "learning_rate": 4.1059628092823114e-05, "log_odds_chosen": 7.911555290222168, "log_odds_ratio": -0.0076803830452263355, "logits/chosen": -0.5928400754928589, "logits/rejected": -0.6205337643623352, "logps/chosen": -0.00917899701744318, "logps/rejected": -1.467241883277893, "loss": 3.2169, "nll_loss": 0.8034663200378418, "rewards/accuracies": 1.0, "rewards/chosen": -0.0009178997133858502, "rewards/margins": 0.14580628275871277, "rewards/rejected": -0.14672419428825378, "step": 3773 }, { "epoch": 2.6099585062240664, "grad_norm": 7.693874359130859, "learning_rate": 4.1055786076532967e-05, "log_odds_chosen": 7.510994911193848, "log_odds_ratio": -0.011618074029684067, "logits/chosen": -0.38228869438171387, "logits/rejected": -0.44549834728240967, "logps/chosen": -0.01588156260550022, "logps/rejected": -1.182736873626709, "loss": 2.1218, "nll_loss": 0.5292880535125732, "rewards/accuracies": 1.0, "rewards/chosen": -0.0015881562139838934, "rewards/margins": 0.11668553948402405, "rewards/rejected": -0.11827369034290314, "step": 3774 }, { "epoch": 2.6106500691562933, "grad_norm": 8.907024383544922, "learning_rate": 4.105194406024282e-05, "log_odds_chosen": 6.680961608886719, "log_odds_ratio": -0.06957157701253891, "logits/chosen": -0.8452804088592529, "logits/rejected": -0.7844629883766174, "logps/chosen": -0.07758989930152893, "logps/rejected": -1.437814474105835, "loss": 2.7614, "nll_loss": 0.6833951473236084, "rewards/accuracies": 1.0, "rewards/chosen": -0.007758989930152893, "rewards/margins": 0.13602246344089508, "rewards/rejected": -0.14378145337104797, "step": 3775 }, { "epoch": 2.61134163208852, "grad_norm": 6.217175006866455, "learning_rate": 4.104810204395267e-05, "log_odds_chosen": 6.552158832550049, "log_odds_ratio": -0.0551423579454422, "logits/chosen": -0.4047723412513733, "logits/rejected": -0.42235076427459717, "logps/chosen": -0.036186181008815765, "logps/rejected": -1.1089141368865967, "loss": 2.6563, "nll_loss": 0.6585571765899658, "rewards/accuracies": 1.0, "rewards/chosen": -0.003618618007749319, "rewards/margins": 0.10727280378341675, "rewards/rejected": -0.1108914166688919, "step": 3776 }, { "epoch": 2.612033195020747, "grad_norm": 7.649138450622559, "learning_rate": 4.104426002766252e-05, "log_odds_chosen": 7.4502434730529785, "log_odds_ratio": -0.003743886947631836, "logits/chosen": -0.5986257791519165, "logits/rejected": -0.6282727122306824, "logps/chosen": -0.007481364067643881, "logps/rejected": -1.1748636960983276, "loss": 2.1058, "nll_loss": 0.5260686874389648, "rewards/accuracies": 1.0, "rewards/chosen": -0.0007481364300474524, "rewards/margins": 0.11673823744058609, "rewards/rejected": -0.117486372590065, "step": 3777 }, { "epoch": 2.6127247579529738, "grad_norm": 16.62424659729004, "learning_rate": 4.104041801137237e-05, "log_odds_chosen": 6.623798370361328, "log_odds_ratio": -0.41715821623802185, "logits/chosen": -0.7055549025535583, "logits/rejected": -0.7265179753303528, "logps/chosen": -0.0954117402434349, "logps/rejected": -1.7129487991333008, "loss": 2.4636, "nll_loss": 0.574172854423523, "rewards/accuracies": 0.75, "rewards/chosen": -0.009541173465549946, "rewards/margins": 0.16175371408462524, "rewards/rejected": -0.17129486799240112, "step": 3778 }, { "epoch": 2.6134163208852006, "grad_norm": 14.161336898803711, "learning_rate": 4.103657599508222e-05, "log_odds_chosen": 5.261303901672363, "log_odds_ratio": -0.38418567180633545, "logits/chosen": -0.4809882640838623, "logits/rejected": -0.5193066596984863, "logps/chosen": -0.051292117685079575, "logps/rejected": -1.037435531616211, "loss": 2.9689, "nll_loss": 0.7038084864616394, "rewards/accuracies": 0.875, "rewards/chosen": -0.005129212513566017, "rewards/margins": 0.09861434251070023, "rewards/rejected": -0.1037435531616211, "step": 3779 }, { "epoch": 2.6141078838174274, "grad_norm": 8.383172035217285, "learning_rate": 4.103273397879207e-05, "log_odds_chosen": 8.017049789428711, "log_odds_ratio": -0.00684964656829834, "logits/chosen": -0.5790307521820068, "logits/rejected": -0.5661333203315735, "logps/chosen": -0.004952224902808666, "logps/rejected": -1.5408872365951538, "loss": 2.4536, "nll_loss": 0.6127271056175232, "rewards/accuracies": 1.0, "rewards/chosen": -0.000495222513563931, "rewards/margins": 0.153593510389328, "rewards/rejected": -0.15408873558044434, "step": 3780 }, { "epoch": 2.6147994467496543, "grad_norm": 9.299695014953613, "learning_rate": 4.102889196250193e-05, "log_odds_chosen": 8.528641700744629, "log_odds_ratio": -0.0012356049846857786, "logits/chosen": -0.8160710334777832, "logits/rejected": -0.8102253079414368, "logps/chosen": -0.006617030128836632, "logps/rejected": -1.8163725137710571, "loss": 2.2087, "nll_loss": 0.5520486831665039, "rewards/accuracies": 1.0, "rewards/chosen": -0.0006617030594497919, "rewards/margins": 0.1809755563735962, "rewards/rejected": -0.1816372573375702, "step": 3781 }, { "epoch": 2.615491009681881, "grad_norm": 8.188958168029785, "learning_rate": 4.102504994621177e-05, "log_odds_chosen": 10.0280122756958, "log_odds_ratio": -9.502626198809594e-05, "logits/chosen": -0.2838655710220337, "logits/rejected": -0.40352708101272583, "logps/chosen": -0.00035995981306768954, "logps/rejected": -1.7294433116912842, "loss": 1.5491, "nll_loss": 0.38726910948753357, "rewards/accuracies": 1.0, "rewards/chosen": -3.599598494474776e-05, "rewards/margins": 0.17290833592414856, "rewards/rejected": -0.1729443371295929, "step": 3782 }, { "epoch": 2.616182572614108, "grad_norm": 6.363637447357178, "learning_rate": 4.1021207929921625e-05, "log_odds_chosen": 7.355223655700684, "log_odds_ratio": -0.036441314965486526, "logits/chosen": -0.5759349465370178, "logits/rejected": -0.5798578262329102, "logps/chosen": -0.027550656348466873, "logps/rejected": -1.470800518989563, "loss": 2.4145, "nll_loss": 0.5999844074249268, "rewards/accuracies": 1.0, "rewards/chosen": -0.002755065681412816, "rewards/margins": 0.14432498812675476, "rewards/rejected": -0.14708006381988525, "step": 3783 }, { "epoch": 2.6168741355463347, "grad_norm": 7.999043941497803, "learning_rate": 4.101736591363148e-05, "log_odds_chosen": 7.062474727630615, "log_odds_ratio": -0.05684699863195419, "logits/chosen": -0.5237227082252502, "logits/rejected": -0.5357232093811035, "logps/chosen": -0.02586865797638893, "logps/rejected": -1.7018102407455444, "loss": 2.2973, "nll_loss": 0.5686320066452026, "rewards/accuracies": 1.0, "rewards/chosen": -0.002586865797638893, "rewards/margins": 0.16759416460990906, "rewards/rejected": -0.1701810210943222, "step": 3784 }, { "epoch": 2.6175656984785616, "grad_norm": 7.679310321807861, "learning_rate": 4.101352389734133e-05, "log_odds_chosen": 7.4297637939453125, "log_odds_ratio": -0.06839942932128906, "logits/chosen": -0.2992628812789917, "logits/rejected": -0.4127195179462433, "logps/chosen": -0.041940055787563324, "logps/rejected": -1.3728930950164795, "loss": 2.1012, "nll_loss": 0.5184489488601685, "rewards/accuracies": 1.0, "rewards/chosen": -0.004194005858153105, "rewards/margins": 0.13309532403945923, "rewards/rejected": -0.13728931546211243, "step": 3785 }, { "epoch": 2.6182572614107884, "grad_norm": 11.26512622833252, "learning_rate": 4.1009681881051176e-05, "log_odds_chosen": 5.08907413482666, "log_odds_ratio": -0.2812805473804474, "logits/chosen": -0.6196216344833374, "logits/rejected": -0.5990339517593384, "logps/chosen": -0.10777544230222702, "logps/rejected": -0.9553592801094055, "loss": 2.5721, "nll_loss": 0.6148989200592041, "rewards/accuracies": 0.75, "rewards/chosen": -0.010777544230222702, "rewards/margins": 0.08475838601589203, "rewards/rejected": -0.09553593397140503, "step": 3786 }, { "epoch": 2.6189488243430152, "grad_norm": 9.276042938232422, "learning_rate": 4.100583986476103e-05, "log_odds_chosen": 7.981724739074707, "log_odds_ratio": -0.006409569643437862, "logits/chosen": -0.7504392266273499, "logits/rejected": -0.8193677067756653, "logps/chosen": -0.011914849281311035, "logps/rejected": -1.3361014127731323, "loss": 2.7555, "nll_loss": 0.6882370114326477, "rewards/accuracies": 1.0, "rewards/chosen": -0.0011914849746972322, "rewards/margins": 0.1324186474084854, "rewards/rejected": -0.13361014425754547, "step": 3787 }, { "epoch": 2.619640387275242, "grad_norm": 13.060140609741211, "learning_rate": 4.100199784847088e-05, "log_odds_chosen": 7.931905746459961, "log_odds_ratio": -0.000717336602974683, "logits/chosen": -0.7390726804733276, "logits/rejected": -0.8203366994857788, "logps/chosen": -0.0012997111771255732, "logps/rejected": -1.4393033981323242, "loss": 3.8084, "nll_loss": 0.9520176649093628, "rewards/accuracies": 1.0, "rewards/chosen": -0.00012997111480217427, "rewards/margins": 0.14380037784576416, "rewards/rejected": -0.1439303457736969, "step": 3788 }, { "epoch": 2.620331950207469, "grad_norm": 9.99527645111084, "learning_rate": 4.0998155832180726e-05, "log_odds_chosen": 7.0623884201049805, "log_odds_ratio": -0.10466547310352325, "logits/chosen": -0.6097970008850098, "logits/rejected": -0.7266175746917725, "logps/chosen": -0.020670359954237938, "logps/rejected": -1.1227185726165771, "loss": 3.1378, "nll_loss": 0.7739837169647217, "rewards/accuracies": 1.0, "rewards/chosen": -0.002067035995423794, "rewards/margins": 0.11020482331514359, "rewards/rejected": -0.11227186769247055, "step": 3789 }, { "epoch": 2.6210235131396957, "grad_norm": 8.242027282714844, "learning_rate": 4.0994313815890585e-05, "log_odds_chosen": 9.106143951416016, "log_odds_ratio": -0.00046437146374955773, "logits/chosen": -0.7410778999328613, "logits/rejected": -0.6651361584663391, "logps/chosen": -0.001504677115008235, "logps/rejected": -1.3949095010757446, "loss": 2.6114, "nll_loss": 0.6528007388114929, "rewards/accuracies": 1.0, "rewards/chosen": -0.0001504677056800574, "rewards/margins": 0.13934049010276794, "rewards/rejected": -0.13949096202850342, "step": 3790 }, { "epoch": 2.6217150760719226, "grad_norm": 7.91328239440918, "learning_rate": 4.099047179960043e-05, "log_odds_chosen": 7.157289505004883, "log_odds_ratio": -0.0055101243779063225, "logits/chosen": -0.6908423900604248, "logits/rejected": -0.6601514220237732, "logps/chosen": -0.022872548550367355, "logps/rejected": -1.2855727672576904, "loss": 1.8462, "nll_loss": 0.46100759506225586, "rewards/accuracies": 1.0, "rewards/chosen": -0.002287255134433508, "rewards/margins": 0.12627002596855164, "rewards/rejected": -0.1285572648048401, "step": 3791 }, { "epoch": 2.6224066390041494, "grad_norm": 6.737125873565674, "learning_rate": 4.0986629783310283e-05, "log_odds_chosen": 6.822482109069824, "log_odds_ratio": -0.0716143399477005, "logits/chosen": -0.4894212484359741, "logits/rejected": -0.4532252550125122, "logps/chosen": -0.04768814519047737, "logps/rejected": -1.8026762008666992, "loss": 2.5347, "nll_loss": 0.6265243291854858, "rewards/accuracies": 1.0, "rewards/chosen": -0.00476881442591548, "rewards/margins": 0.17549879848957062, "rewards/rejected": -0.18026763200759888, "step": 3792 }, { "epoch": 2.623098201936376, "grad_norm": 4.337125301361084, "learning_rate": 4.0982787767020136e-05, "log_odds_chosen": 5.33636474609375, "log_odds_ratio": -0.1461387425661087, "logits/chosen": -0.3308134078979492, "logits/rejected": -0.31176647543907166, "logps/chosen": -0.062302809208631516, "logps/rejected": -1.0903065204620361, "loss": 1.4588, "nll_loss": 0.3500952422618866, "rewards/accuracies": 1.0, "rewards/chosen": -0.006230281665921211, "rewards/margins": 0.10280036926269531, "rewards/rejected": -0.10903064906597137, "step": 3793 }, { "epoch": 2.623789764868603, "grad_norm": 6.027596950531006, "learning_rate": 4.097894575072999e-05, "log_odds_chosen": 7.09362268447876, "log_odds_ratio": -0.06785794347524643, "logits/chosen": -0.4358813464641571, "logits/rejected": -0.48490625619888306, "logps/chosen": -0.02412148006260395, "logps/rejected": -1.2474931478500366, "loss": 2.536, "nll_loss": 0.6272022724151611, "rewards/accuracies": 1.0, "rewards/chosen": -0.0024121480528265238, "rewards/margins": 0.12233716994524002, "rewards/rejected": -0.1247493103146553, "step": 3794 }, { "epoch": 2.62448132780083, "grad_norm": 10.282882690429688, "learning_rate": 4.0975103734439834e-05, "log_odds_chosen": 9.190343856811523, "log_odds_ratio": -0.001859789015725255, "logits/chosen": -0.63014817237854, "logits/rejected": -0.6413495540618896, "logps/chosen": -0.00032331724651157856, "logps/rejected": -1.4479916095733643, "loss": 3.6494, "nll_loss": 0.9121723175048828, "rewards/accuracies": 1.0, "rewards/chosen": -3.233172537875362e-05, "rewards/margins": 0.14476682245731354, "rewards/rejected": -0.1447991579771042, "step": 3795 }, { "epoch": 2.6251728907330567, "grad_norm": 8.422372817993164, "learning_rate": 4.0971261718149686e-05, "log_odds_chosen": 7.013335227966309, "log_odds_ratio": -0.0271234679967165, "logits/chosen": -0.7074835300445557, "logits/rejected": -0.7603086829185486, "logps/chosen": -0.01090591587126255, "logps/rejected": -1.0833988189697266, "loss": 1.9539, "nll_loss": 0.4857563078403473, "rewards/accuracies": 1.0, "rewards/chosen": -0.0010905916569754481, "rewards/margins": 0.10724928230047226, "rewards/rejected": -0.10833987593650818, "step": 3796 }, { "epoch": 2.6258644536652835, "grad_norm": 8.807729721069336, "learning_rate": 4.096741970185954e-05, "log_odds_chosen": 7.642078399658203, "log_odds_ratio": -0.00967707671225071, "logits/chosen": -0.5156012773513794, "logits/rejected": -0.5499816536903381, "logps/chosen": -0.0036300821229815483, "logps/rejected": -1.2207317352294922, "loss": 2.6392, "nll_loss": 0.6588394045829773, "rewards/accuracies": 1.0, "rewards/chosen": -0.00036300826468504965, "rewards/margins": 0.1217101663351059, "rewards/rejected": -0.12207317352294922, "step": 3797 }, { "epoch": 2.6265560165975104, "grad_norm": 6.9938554763793945, "learning_rate": 4.0963577685569385e-05, "log_odds_chosen": 7.7642130851745605, "log_odds_ratio": -0.006069661118090153, "logits/chosen": -0.47169187664985657, "logits/rejected": -0.5067437291145325, "logps/chosen": -0.08318636566400528, "logps/rejected": -2.1593117713928223, "loss": 3.3399, "nll_loss": 0.8343735933303833, "rewards/accuracies": 1.0, "rewards/chosen": -0.008318635635077953, "rewards/margins": 0.207612544298172, "rewards/rejected": -0.21593117713928223, "step": 3798 }, { "epoch": 2.627247579529737, "grad_norm": 8.0711030960083, "learning_rate": 4.0959735669279244e-05, "log_odds_chosen": 8.419703483581543, "log_odds_ratio": -0.0006184577941894531, "logits/chosen": -0.749565601348877, "logits/rejected": -0.7853137254714966, "logps/chosen": -0.0029448317363858223, "logps/rejected": -1.5154144763946533, "loss": 2.1765, "nll_loss": 0.5440510511398315, "rewards/accuracies": 1.0, "rewards/chosen": -0.0002944831794593483, "rewards/margins": 0.15124696493148804, "rewards/rejected": -0.15154144167900085, "step": 3799 }, { "epoch": 2.627939142461964, "grad_norm": 3.90470552444458, "learning_rate": 4.095589365298909e-05, "log_odds_chosen": 7.269073963165283, "log_odds_ratio": -0.0662284791469574, "logits/chosen": -0.3588363826274872, "logits/rejected": -0.3765372037887573, "logps/chosen": -0.03212092071771622, "logps/rejected": -1.3606046438217163, "loss": 2.0721, "nll_loss": 0.5114089250564575, "rewards/accuracies": 1.0, "rewards/chosen": -0.0032120919786393642, "rewards/margins": 0.13284838199615479, "rewards/rejected": -0.1360604614019394, "step": 3800 }, { "epoch": 2.628630705394191, "grad_norm": 10.0017728805542, "learning_rate": 4.095205163669894e-05, "log_odds_chosen": 8.136731147766113, "log_odds_ratio": -0.018487481400370598, "logits/chosen": -0.6621404886245728, "logits/rejected": -0.6417994499206543, "logps/chosen": -0.009381298907101154, "logps/rejected": -1.1562975645065308, "loss": 2.6296, "nll_loss": 0.6555531024932861, "rewards/accuracies": 1.0, "rewards/chosen": -0.0009381298441439867, "rewards/margins": 0.11469162255525589, "rewards/rejected": -0.11562975496053696, "step": 3801 }, { "epoch": 2.6293222683264177, "grad_norm": 8.75160026550293, "learning_rate": 4.0948209620408794e-05, "log_odds_chosen": 8.332706451416016, "log_odds_ratio": -0.003808370791375637, "logits/chosen": -0.5499763488769531, "logits/rejected": -0.6190809607505798, "logps/chosen": -0.015007829293608665, "logps/rejected": -1.885420322418213, "loss": 2.1238, "nll_loss": 0.530563235282898, "rewards/accuracies": 1.0, "rewards/chosen": -0.0015007827896624804, "rewards/margins": 0.1870412528514862, "rewards/rejected": -0.18854205310344696, "step": 3802 }, { "epoch": 2.6300138312586445, "grad_norm": 4.922801494598389, "learning_rate": 4.094436760411865e-05, "log_odds_chosen": 4.8717803955078125, "log_odds_ratio": -0.04627423360943794, "logits/chosen": -0.4469316005706787, "logits/rejected": -0.44363802671432495, "logps/chosen": -0.04971982538700104, "logps/rejected": -1.0474414825439453, "loss": 2.4512, "nll_loss": 0.6081790328025818, "rewards/accuracies": 1.0, "rewards/chosen": -0.004971982911229134, "rewards/margins": 0.09977217018604279, "rewards/rejected": -0.10474415123462677, "step": 3803 }, { "epoch": 2.6307053941908713, "grad_norm": 7.153709411621094, "learning_rate": 4.094052558782849e-05, "log_odds_chosen": 6.108127117156982, "log_odds_ratio": -0.015503794886171818, "logits/chosen": -0.4905102252960205, "logits/rejected": -0.48964786529541016, "logps/chosen": -0.1222764328122139, "logps/rejected": -1.8249282836914062, "loss": 1.9354, "nll_loss": 0.482305109500885, "rewards/accuracies": 1.0, "rewards/chosen": -0.01222764328122139, "rewards/margins": 0.17026519775390625, "rewards/rejected": -0.18249283730983734, "step": 3804 }, { "epoch": 2.631396957123098, "grad_norm": 8.944799423217773, "learning_rate": 4.0936683571538345e-05, "log_odds_chosen": 6.0882673263549805, "log_odds_ratio": -0.16901487112045288, "logits/chosen": -0.391895592212677, "logits/rejected": -0.4109463095664978, "logps/chosen": -0.04145354777574539, "logps/rejected": -1.382396936416626, "loss": 2.3299, "nll_loss": 0.5655611157417297, "rewards/accuracies": 0.875, "rewards/chosen": -0.004145354963839054, "rewards/margins": 0.13409434258937836, "rewards/rejected": -0.13823971152305603, "step": 3805 }, { "epoch": 2.632088520055325, "grad_norm": 13.911857604980469, "learning_rate": 4.09328415552482e-05, "log_odds_chosen": 6.242876052856445, "log_odds_ratio": -0.21624933183193207, "logits/chosen": -0.6284334063529968, "logits/rejected": -0.6858136653900146, "logps/chosen": -0.0389171838760376, "logps/rejected": -0.7917266488075256, "loss": 2.2325, "nll_loss": 0.5364995002746582, "rewards/accuracies": 0.875, "rewards/chosen": -0.003891718341037631, "rewards/margins": 0.07528094947338104, "rewards/rejected": -0.07917267084121704, "step": 3806 }, { "epoch": 2.632780082987552, "grad_norm": 5.895772457122803, "learning_rate": 4.092899953895804e-05, "log_odds_chosen": 7.286813735961914, "log_odds_ratio": -0.037271179258823395, "logits/chosen": -0.357649028301239, "logits/rejected": -0.4334043562412262, "logps/chosen": -0.02683999016880989, "logps/rejected": -1.6060643196105957, "loss": 2.1091, "nll_loss": 0.5235556960105896, "rewards/accuracies": 1.0, "rewards/chosen": -0.002683999016880989, "rewards/margins": 0.1579224318265915, "rewards/rejected": -0.16060644388198853, "step": 3807 }, { "epoch": 2.6334716459197787, "grad_norm": 6.329620838165283, "learning_rate": 4.09251575226679e-05, "log_odds_chosen": 7.340235710144043, "log_odds_ratio": -0.040890198200941086, "logits/chosen": -0.2552340030670166, "logits/rejected": -0.29688987135887146, "logps/chosen": -0.013605457730591297, "logps/rejected": -1.0799440145492554, "loss": 1.9992, "nll_loss": 0.4957117438316345, "rewards/accuracies": 1.0, "rewards/chosen": -0.0013605458661913872, "rewards/margins": 0.10663385689258575, "rewards/rejected": -0.10799440741539001, "step": 3808 }, { "epoch": 2.6341632088520055, "grad_norm": 6.554021835327148, "learning_rate": 4.092131550637775e-05, "log_odds_chosen": 8.336524963378906, "log_odds_ratio": -0.0008704437641426921, "logits/chosen": -0.2876972556114197, "logits/rejected": -0.33872395753860474, "logps/chosen": -0.010608218610286713, "logps/rejected": -1.7114243507385254, "loss": 2.0471, "nll_loss": 0.5116779804229736, "rewards/accuracies": 1.0, "rewards/chosen": -0.0010608219308778644, "rewards/margins": 0.17008161544799805, "rewards/rejected": -0.17114242911338806, "step": 3809 }, { "epoch": 2.6348547717842323, "grad_norm": 8.618371963500977, "learning_rate": 4.09174734900876e-05, "log_odds_chosen": 7.948423862457275, "log_odds_ratio": -0.003505430184304714, "logits/chosen": -0.5390564799308777, "logits/rejected": -0.5939716100692749, "logps/chosen": -0.0177299827337265, "logps/rejected": -1.9059690237045288, "loss": 2.2947, "nll_loss": 0.5733277201652527, "rewards/accuracies": 1.0, "rewards/chosen": -0.0017729983665049076, "rewards/margins": 0.1888239085674286, "rewards/rejected": -0.19059689342975616, "step": 3810 }, { "epoch": 2.635546334716459, "grad_norm": 9.77852725982666, "learning_rate": 4.091363147379745e-05, "log_odds_chosen": 9.04955005645752, "log_odds_ratio": -0.007421260699629784, "logits/chosen": -0.48122864961624146, "logits/rejected": -0.5198429226875305, "logps/chosen": -0.00442839739844203, "logps/rejected": -1.4086993932724, "loss": 1.6727, "nll_loss": 0.4174409508705139, "rewards/accuracies": 1.0, "rewards/chosen": -0.0004428397514857352, "rewards/margins": 0.1404271125793457, "rewards/rejected": -0.14086996018886566, "step": 3811 }, { "epoch": 2.636237897648686, "grad_norm": 12.669425964355469, "learning_rate": 4.0909789457507305e-05, "log_odds_chosen": 8.74130630493164, "log_odds_ratio": -0.00046664898400194943, "logits/chosen": -0.7802400588989258, "logits/rejected": -0.7810622453689575, "logps/chosen": -0.001165109919384122, "logps/rejected": -1.3760836124420166, "loss": 2.9398, "nll_loss": 0.7349057197570801, "rewards/accuracies": 1.0, "rewards/chosen": -0.0001165109861176461, "rewards/margins": 0.1374918520450592, "rewards/rejected": -0.1376083493232727, "step": 3812 }, { "epoch": 2.636929460580913, "grad_norm": 6.967644214630127, "learning_rate": 4.090594744121715e-05, "log_odds_chosen": 9.533140182495117, "log_odds_ratio": -0.00017059470701497048, "logits/chosen": -0.41741979122161865, "logits/rejected": -0.5131128430366516, "logps/chosen": -0.0004757500428240746, "logps/rejected": -1.5622491836547852, "loss": 1.7849, "nll_loss": 0.4462040066719055, "rewards/accuracies": 1.0, "rewards/chosen": -4.7575005737598985e-05, "rewards/margins": 0.1561773270368576, "rewards/rejected": -0.15622490644454956, "step": 3813 }, { "epoch": 2.6376210235131397, "grad_norm": 5.830072402954102, "learning_rate": 4.0902105424927e-05, "log_odds_chosen": 7.749738693237305, "log_odds_ratio": -0.003948946483433247, "logits/chosen": -0.3899442255496979, "logits/rejected": -0.42192280292510986, "logps/chosen": -0.0035098264925181866, "logps/rejected": -1.289759635925293, "loss": 1.5506, "nll_loss": 0.3872567117214203, "rewards/accuracies": 1.0, "rewards/chosen": -0.00035098264925181866, "rewards/margins": 0.12862497568130493, "rewards/rejected": -0.12897595763206482, "step": 3814 }, { "epoch": 2.6383125864453665, "grad_norm": 7.257383823394775, "learning_rate": 4.0898263408636856e-05, "log_odds_chosen": 6.416209697723389, "log_odds_ratio": -0.1501171886920929, "logits/chosen": -0.5847111344337463, "logits/rejected": -0.5577553510665894, "logps/chosen": -0.03365294635295868, "logps/rejected": -1.0939298868179321, "loss": 1.979, "nll_loss": 0.47974252700805664, "rewards/accuracies": 0.875, "rewards/chosen": -0.003365294774994254, "rewards/margins": 0.10602769255638123, "rewards/rejected": -0.10939298570156097, "step": 3815 }, { "epoch": 2.6390041493775933, "grad_norm": 8.99246597290039, "learning_rate": 4.08944213923467e-05, "log_odds_chosen": 6.8492841720581055, "log_odds_ratio": -0.004586468450725079, "logits/chosen": -0.41203147172927856, "logits/rejected": -0.46544843912124634, "logps/chosen": -0.03983590006828308, "logps/rejected": -1.4733881950378418, "loss": 2.9193, "nll_loss": 0.7293692827224731, "rewards/accuracies": 1.0, "rewards/chosen": -0.003983589820563793, "rewards/margins": 0.14335523545742035, "rewards/rejected": -0.1473388373851776, "step": 3816 }, { "epoch": 2.63969571230982, "grad_norm": 8.560349464416504, "learning_rate": 4.089057937605656e-05, "log_odds_chosen": 7.125585556030273, "log_odds_ratio": -0.010650004260241985, "logits/chosen": -0.5648536682128906, "logits/rejected": -0.6092085242271423, "logps/chosen": -0.009023960679769516, "logps/rejected": -1.2239824533462524, "loss": 2.6873, "nll_loss": 0.6707549095153809, "rewards/accuracies": 1.0, "rewards/chosen": -0.0009023960446938872, "rewards/margins": 0.12149585783481598, "rewards/rejected": -0.122398242354393, "step": 3817 }, { "epoch": 2.640387275242047, "grad_norm": 12.124699592590332, "learning_rate": 4.0886737359766406e-05, "log_odds_chosen": 7.615258693695068, "log_odds_ratio": -0.0019200460519641638, "logits/chosen": -0.48766252398490906, "logits/rejected": -0.5352762937545776, "logps/chosen": -0.04233637452125549, "logps/rejected": -2.3085317611694336, "loss": 2.8713, "nll_loss": 0.717635989189148, "rewards/accuracies": 1.0, "rewards/chosen": -0.004233637358993292, "rewards/margins": 0.22661955654621124, "rewards/rejected": -0.23085319995880127, "step": 3818 }, { "epoch": 2.641078838174274, "grad_norm": 9.745429039001465, "learning_rate": 4.088289534347626e-05, "log_odds_chosen": 7.206563949584961, "log_odds_ratio": -0.10519903898239136, "logits/chosen": -0.33590954542160034, "logits/rejected": -0.37854519486427307, "logps/chosen": -0.19065965712070465, "logps/rejected": -1.7531626224517822, "loss": 1.8261, "nll_loss": 0.4459928572177887, "rewards/accuracies": 1.0, "rewards/chosen": -0.019065964967012405, "rewards/margins": 0.15625028312206268, "rewards/rejected": -0.1753162443637848, "step": 3819 }, { "epoch": 2.6417704011065006, "grad_norm": 10.741318702697754, "learning_rate": 4.0879053327186104e-05, "log_odds_chosen": 8.253884315490723, "log_odds_ratio": -0.050954267382621765, "logits/chosen": -0.6642457842826843, "logits/rejected": -0.6910403966903687, "logps/chosen": -0.06695730984210968, "logps/rejected": -1.5085256099700928, "loss": 3.24, "nll_loss": 0.804897665977478, "rewards/accuracies": 1.0, "rewards/chosen": -0.0066957310773432255, "rewards/margins": 0.14415684342384338, "rewards/rejected": -0.15085257589817047, "step": 3820 }, { "epoch": 2.6424619640387275, "grad_norm": 11.081913948059082, "learning_rate": 4.0875211310895964e-05, "log_odds_chosen": 6.918118476867676, "log_odds_ratio": -0.24356284737586975, "logits/chosen": -0.5838861465454102, "logits/rejected": -0.6377038955688477, "logps/chosen": -0.050251834094524384, "logps/rejected": -1.658482313156128, "loss": 2.5045, "nll_loss": 0.601761519908905, "rewards/accuracies": 0.875, "rewards/chosen": -0.005025183781981468, "rewards/margins": 0.1608230471611023, "rewards/rejected": -0.1658482402563095, "step": 3821 }, { "epoch": 2.6431535269709543, "grad_norm": 8.70766830444336, "learning_rate": 4.087136929460581e-05, "log_odds_chosen": 7.524777412414551, "log_odds_ratio": -0.002118032891303301, "logits/chosen": -0.003473300486803055, "logits/rejected": -0.07419916987419128, "logps/chosen": -0.039731841534376144, "logps/rejected": -2.60288143157959, "loss": 1.8429, "nll_loss": 0.4605070948600769, "rewards/accuracies": 1.0, "rewards/chosen": -0.0039731841534376144, "rewards/margins": 0.2563149631023407, "rewards/rejected": -0.26028814911842346, "step": 3822 }, { "epoch": 2.643845089903181, "grad_norm": 11.58268928527832, "learning_rate": 4.086752727831566e-05, "log_odds_chosen": 5.5339131355285645, "log_odds_ratio": -1.1879533529281616, "logits/chosen": -0.6753538250923157, "logits/rejected": -0.7414652705192566, "logps/chosen": -0.3033027946949005, "logps/rejected": -1.4092167615890503, "loss": 2.8279, "nll_loss": 0.5881817936897278, "rewards/accuracies": 0.875, "rewards/chosen": -0.03033027984201908, "rewards/margins": 0.11059139668941498, "rewards/rejected": -0.1409216821193695, "step": 3823 }, { "epoch": 2.644536652835408, "grad_norm": 8.396631240844727, "learning_rate": 4.0863685262025514e-05, "log_odds_chosen": 6.820697784423828, "log_odds_ratio": -0.06059110537171364, "logits/chosen": -0.8757572174072266, "logits/rejected": -0.9193231463432312, "logps/chosen": -0.05826441943645477, "logps/rejected": -1.401708960533142, "loss": 2.2095, "nll_loss": 0.546317994594574, "rewards/accuracies": 1.0, "rewards/chosen": -0.005826442502439022, "rewards/margins": 0.1343444585800171, "rewards/rejected": -0.1401709020137787, "step": 3824 }, { "epoch": 2.645228215767635, "grad_norm": 8.253293991088867, "learning_rate": 4.085984324573536e-05, "log_odds_chosen": 9.214951515197754, "log_odds_ratio": -0.00024521065643057227, "logits/chosen": -0.3974234163761139, "logits/rejected": -0.40845921635627747, "logps/chosen": -0.00035066824057139456, "logps/rejected": -1.5347728729248047, "loss": 1.7804, "nll_loss": 0.4450783133506775, "rewards/accuracies": 1.0, "rewards/chosen": -3.5066823329543695e-05, "rewards/margins": 0.15344221889972687, "rewards/rejected": -0.153477281332016, "step": 3825 }, { "epoch": 2.6459197786998616, "grad_norm": 9.070773124694824, "learning_rate": 4.085600122944521e-05, "log_odds_chosen": 8.925060272216797, "log_odds_ratio": -0.000363756698789075, "logits/chosen": -0.2657910883426666, "logits/rejected": -0.286965012550354, "logps/chosen": -0.0059166718274354935, "logps/rejected": -1.8982032537460327, "loss": 1.4624, "nll_loss": 0.3655526041984558, "rewards/accuracies": 1.0, "rewards/chosen": -0.0005916672525927424, "rewards/margins": 0.18922865390777588, "rewards/rejected": -0.1898203343153, "step": 3826 }, { "epoch": 2.6466113416320884, "grad_norm": 7.061948299407959, "learning_rate": 4.0852159213155065e-05, "log_odds_chosen": 6.75771951675415, "log_odds_ratio": -0.022473342716693878, "logits/chosen": -0.501249372959137, "logits/rejected": -0.47875383496284485, "logps/chosen": -0.04296121746301651, "logps/rejected": -2.019442558288574, "loss": 1.5044, "nll_loss": 0.3738507330417633, "rewards/accuracies": 1.0, "rewards/chosen": -0.004296122118830681, "rewards/margins": 0.19764815270900726, "rewards/rejected": -0.2019442617893219, "step": 3827 }, { "epoch": 2.6473029045643153, "grad_norm": 8.723488807678223, "learning_rate": 4.084831719686492e-05, "log_odds_chosen": 7.7598161697387695, "log_odds_ratio": -0.08781145513057709, "logits/chosen": -0.5303641557693481, "logits/rejected": -0.5795314311981201, "logps/chosen": -0.017855996266007423, "logps/rejected": -1.4885255098342896, "loss": 2.612, "nll_loss": 0.6442256569862366, "rewards/accuracies": 0.875, "rewards/chosen": -0.0017855996266007423, "rewards/margins": 0.14706696569919586, "rewards/rejected": -0.14885255694389343, "step": 3828 }, { "epoch": 2.647994467496542, "grad_norm": 6.054506301879883, "learning_rate": 4.084447518057476e-05, "log_odds_chosen": 5.622161865234375, "log_odds_ratio": -0.16529729962348938, "logits/chosen": -0.09599445015192032, "logits/rejected": -0.1764889806509018, "logps/chosen": -0.06396627426147461, "logps/rejected": -1.3258779048919678, "loss": 2.5946, "nll_loss": 0.6321138739585876, "rewards/accuracies": 0.875, "rewards/chosen": -0.006396627984941006, "rewards/margins": 0.1261911690235138, "rewards/rejected": -0.13258779048919678, "step": 3829 }, { "epoch": 2.648686030428769, "grad_norm": 9.629890441894531, "learning_rate": 4.084063316428462e-05, "log_odds_chosen": 7.408921718597412, "log_odds_ratio": -0.009323184378445148, "logits/chosen": -0.6157118082046509, "logits/rejected": -0.6616868376731873, "logps/chosen": -0.021634576842188835, "logps/rejected": -1.3038392066955566, "loss": 2.1013, "nll_loss": 0.5243857502937317, "rewards/accuracies": 1.0, "rewards/chosen": -0.0021634576842188835, "rewards/margins": 0.12822046875953674, "rewards/rejected": -0.1303839236497879, "step": 3830 }, { "epoch": 2.6493775933609958, "grad_norm": 7.59913969039917, "learning_rate": 4.083679114799447e-05, "log_odds_chosen": 8.643135070800781, "log_odds_ratio": -0.002662697108462453, "logits/chosen": -0.5360656976699829, "logits/rejected": -0.5914124250411987, "logps/chosen": -0.026968974620103836, "logps/rejected": -1.923891305923462, "loss": 2.1109, "nll_loss": 0.5274561643600464, "rewards/accuracies": 1.0, "rewards/chosen": -0.00269689760170877, "rewards/margins": 0.18969221413135529, "rewards/rejected": -0.1923891305923462, "step": 3831 }, { "epoch": 2.6500691562932226, "grad_norm": 10.914976119995117, "learning_rate": 4.083294913170432e-05, "log_odds_chosen": 9.900793075561523, "log_odds_ratio": -0.00014636406558565795, "logits/chosen": -0.720471978187561, "logits/rejected": -0.8152539730072021, "logps/chosen": -0.00039817302604205906, "logps/rejected": -1.9039825201034546, "loss": 2.374, "nll_loss": 0.5934768319129944, "rewards/accuracies": 1.0, "rewards/chosen": -3.981730333180167e-05, "rewards/margins": 0.19035843014717102, "rewards/rejected": -0.19039824604988098, "step": 3832 }, { "epoch": 2.6507607192254494, "grad_norm": 10.051604270935059, "learning_rate": 4.082910711541417e-05, "log_odds_chosen": 7.259871482849121, "log_odds_ratio": -0.020360752940177917, "logits/chosen": -0.5566485524177551, "logits/rejected": -0.6278355717658997, "logps/chosen": -0.04195665195584297, "logps/rejected": -1.222951054573059, "loss": 2.6429, "nll_loss": 0.6586805582046509, "rewards/accuracies": 1.0, "rewards/chosen": -0.004195665009319782, "rewards/margins": 0.11809943616390228, "rewards/rejected": -0.12229510396718979, "step": 3833 }, { "epoch": 2.6514522821576763, "grad_norm": 22.69855499267578, "learning_rate": 4.082526509912402e-05, "log_odds_chosen": 7.608578205108643, "log_odds_ratio": -0.2585284411907196, "logits/chosen": -0.7369226813316345, "logits/rejected": -0.8471648097038269, "logps/chosen": -0.047729022800922394, "logps/rejected": -1.2069224119186401, "loss": 2.6804, "nll_loss": 0.6442494988441467, "rewards/accuracies": 0.875, "rewards/chosen": -0.004772902466356754, "rewards/margins": 0.1159193366765976, "rewards/rejected": -0.12069223821163177, "step": 3834 }, { "epoch": 2.652143845089903, "grad_norm": 16.364238739013672, "learning_rate": 4.082142308283387e-05, "log_odds_chosen": 7.655045032501221, "log_odds_ratio": -0.38344433903694153, "logits/chosen": -0.11542128026485443, "logits/rejected": -0.21868827939033508, "logps/chosen": -0.010792739689350128, "logps/rejected": -2.05288028717041, "loss": 2.0762, "nll_loss": 0.4807088077068329, "rewards/accuracies": 0.875, "rewards/chosen": -0.0010792739922180772, "rewards/margins": 0.20420874655246735, "rewards/rejected": -0.20528802275657654, "step": 3835 }, { "epoch": 2.65283540802213, "grad_norm": 12.420515060424805, "learning_rate": 4.081758106654372e-05, "log_odds_chosen": 7.426298141479492, "log_odds_ratio": -0.22978830337524414, "logits/chosen": -0.37689489126205444, "logits/rejected": -0.39640137553215027, "logps/chosen": -0.07515082508325577, "logps/rejected": -1.773220419883728, "loss": 2.6472, "nll_loss": 0.6388266086578369, "rewards/accuracies": 0.875, "rewards/chosen": -0.007515083998441696, "rewards/margins": 0.16980695724487305, "rewards/rejected": -0.17732205986976624, "step": 3836 }, { "epoch": 2.6535269709543567, "grad_norm": 6.804900169372559, "learning_rate": 4.0813739050253576e-05, "log_odds_chosen": 7.390723705291748, "log_odds_ratio": -0.01640833169221878, "logits/chosen": -0.37023866176605225, "logits/rejected": -0.41221803426742554, "logps/chosen": -0.04346206784248352, "logps/rejected": -1.1033592224121094, "loss": 1.6437, "nll_loss": 0.4092819094657898, "rewards/accuracies": 1.0, "rewards/chosen": -0.0043462058529257774, "rewards/margins": 0.1059897169470787, "rewards/rejected": -0.11033592373132706, "step": 3837 }, { "epoch": 2.6542185338865836, "grad_norm": 6.01309871673584, "learning_rate": 4.080989703396342e-05, "log_odds_chosen": 6.006360054016113, "log_odds_ratio": -0.042434729635715485, "logits/chosen": -0.6176404356956482, "logits/rejected": -0.6368340849876404, "logps/chosen": -0.025102484971284866, "logps/rejected": -1.3419160842895508, "loss": 2.0207, "nll_loss": 0.5009331107139587, "rewards/accuracies": 1.0, "rewards/chosen": -0.0025102486833930016, "rewards/margins": 0.13168136775493622, "rewards/rejected": -0.1341916024684906, "step": 3838 }, { "epoch": 2.6549100968188104, "grad_norm": 10.797191619873047, "learning_rate": 4.080605501767328e-05, "log_odds_chosen": 7.917986869812012, "log_odds_ratio": -0.001113984500989318, "logits/chosen": -0.5247561931610107, "logits/rejected": -0.5820844173431396, "logps/chosen": -0.01609625667333603, "logps/rejected": -1.6284340620040894, "loss": 2.8802, "nll_loss": 0.7199360132217407, "rewards/accuracies": 1.0, "rewards/chosen": -0.0016096257604658604, "rewards/margins": 0.16123376786708832, "rewards/rejected": -0.16284340620040894, "step": 3839 }, { "epoch": 2.6556016597510372, "grad_norm": 7.469875812530518, "learning_rate": 4.0802213001383126e-05, "log_odds_chosen": 6.664145469665527, "log_odds_ratio": -0.3955520987510681, "logits/chosen": -0.5398687124252319, "logits/rejected": -0.5271255970001221, "logps/chosen": -0.07110904902219772, "logps/rejected": -1.0651947259902954, "loss": 2.023, "nll_loss": 0.4662015438079834, "rewards/accuracies": 0.875, "rewards/chosen": -0.007110904902219772, "rewards/margins": 0.09940856695175171, "rewards/rejected": -0.10651947557926178, "step": 3840 }, { "epoch": 2.656293222683264, "grad_norm": 14.15065860748291, "learning_rate": 4.079837098509298e-05, "log_odds_chosen": 6.631532669067383, "log_odds_ratio": -0.2880239188671112, "logits/chosen": -0.23957356810569763, "logits/rejected": -0.41902583837509155, "logps/chosen": -0.10840874910354614, "logps/rejected": -1.2948004007339478, "loss": 2.6744, "nll_loss": 0.6398000121116638, "rewards/accuracies": 0.875, "rewards/chosen": -0.010840874165296555, "rewards/margins": 0.11863917112350464, "rewards/rejected": -0.1294800490140915, "step": 3841 }, { "epoch": 2.656984785615491, "grad_norm": 11.483959197998047, "learning_rate": 4.079452896880283e-05, "log_odds_chosen": 7.611615180969238, "log_odds_ratio": -0.0306989885866642, "logits/chosen": -0.1826712191104889, "logits/rejected": -0.19769719243049622, "logps/chosen": -0.008514686487615108, "logps/rejected": -1.3686504364013672, "loss": 2.3888, "nll_loss": 0.594137966632843, "rewards/accuracies": 1.0, "rewards/chosen": -0.0008514686487615108, "rewards/margins": 0.13601356744766235, "rewards/rejected": -0.13686503469944, "step": 3842 }, { "epoch": 2.6576763485477177, "grad_norm": 14.097295761108398, "learning_rate": 4.079068695251268e-05, "log_odds_chosen": 5.852741718292236, "log_odds_ratio": -0.24867239594459534, "logits/chosen": -0.47088390588760376, "logits/rejected": -0.5414155125617981, "logps/chosen": -0.12828010320663452, "logps/rejected": -1.1394619941711426, "loss": 3.247, "nll_loss": 0.7868857383728027, "rewards/accuracies": 0.875, "rewards/chosen": -0.012828009203076363, "rewards/margins": 0.10111817717552185, "rewards/rejected": -0.11394619941711426, "step": 3843 }, { "epoch": 2.6583679114799446, "grad_norm": 9.225852966308594, "learning_rate": 4.078684493622253e-05, "log_odds_chosen": 7.986328125, "log_odds_ratio": -0.0008940557599999011, "logits/chosen": -0.49089115858078003, "logits/rejected": -0.519273042678833, "logps/chosen": -0.01081137452274561, "logps/rejected": -1.614487886428833, "loss": 2.1834, "nll_loss": 0.5457635521888733, "rewards/accuracies": 1.0, "rewards/chosen": -0.001081137452274561, "rewards/margins": 0.16036765277385712, "rewards/rejected": -0.16144880652427673, "step": 3844 }, { "epoch": 2.6590594744121714, "grad_norm": 6.242781639099121, "learning_rate": 4.078300291993238e-05, "log_odds_chosen": 7.671241760253906, "log_odds_ratio": -0.0021602134220302105, "logits/chosen": -0.2434796690940857, "logits/rejected": -0.3377665877342224, "logps/chosen": -0.0016284455778077245, "logps/rejected": -1.052886962890625, "loss": 2.3956, "nll_loss": 0.5986765027046204, "rewards/accuracies": 1.0, "rewards/chosen": -0.00016284457524307072, "rewards/margins": 0.10512584447860718, "rewards/rejected": -0.10528869181871414, "step": 3845 }, { "epoch": 2.659751037344398, "grad_norm": 11.046217918395996, "learning_rate": 4.0779160903642234e-05, "log_odds_chosen": 7.890171527862549, "log_odds_ratio": -0.0014221521560102701, "logits/chosen": -0.5744228363037109, "logits/rejected": -0.625802218914032, "logps/chosen": -0.014100349508225918, "logps/rejected": -1.7941745519638062, "loss": 2.8484, "nll_loss": 0.7119573354721069, "rewards/accuracies": 1.0, "rewards/chosen": -0.0014100349508225918, "rewards/margins": 0.17800742387771606, "rewards/rejected": -0.1794174611568451, "step": 3846 }, { "epoch": 2.660442600276625, "grad_norm": 9.494037628173828, "learning_rate": 4.077531888735208e-05, "log_odds_chosen": 6.027531147003174, "log_odds_ratio": -0.03056892566382885, "logits/chosen": -0.7430992722511292, "logits/rejected": -0.7252732515335083, "logps/chosen": -0.015541885048151016, "logps/rejected": -1.1196444034576416, "loss": 2.77, "nll_loss": 0.6894551515579224, "rewards/accuracies": 1.0, "rewards/chosen": -0.0015541885513812304, "rewards/margins": 0.11041024327278137, "rewards/rejected": -0.11196442693471909, "step": 3847 }, { "epoch": 2.661134163208852, "grad_norm": 7.332900524139404, "learning_rate": 4.077147687106194e-05, "log_odds_chosen": 7.575974464416504, "log_odds_ratio": -0.00114710524212569, "logits/chosen": -0.6454428434371948, "logits/rejected": -0.6171896457672119, "logps/chosen": -0.013584421947598457, "logps/rejected": -1.3186981678009033, "loss": 2.2862, "nll_loss": 0.571441113948822, "rewards/accuracies": 1.0, "rewards/chosen": -0.0013584423577412963, "rewards/margins": 0.13051137328147888, "rewards/rejected": -0.13186980783939362, "step": 3848 }, { "epoch": 2.6618257261410787, "grad_norm": 6.807016372680664, "learning_rate": 4.0767634854771785e-05, "log_odds_chosen": 6.989053726196289, "log_odds_ratio": -0.00887046754360199, "logits/chosen": -0.7269768714904785, "logits/rejected": -0.800000011920929, "logps/chosen": -0.015401525422930717, "logps/rejected": -1.5553052425384521, "loss": 2.6655, "nll_loss": 0.6654878854751587, "rewards/accuracies": 1.0, "rewards/chosen": -0.0015401525888592005, "rewards/margins": 0.15399038791656494, "rewards/rejected": -0.15553054213523865, "step": 3849 }, { "epoch": 2.6625172890733055, "grad_norm": 10.290566444396973, "learning_rate": 4.076379283848164e-05, "log_odds_chosen": 4.950558662414551, "log_odds_ratio": -0.01827535592019558, "logits/chosen": -0.8898689150810242, "logits/rejected": -0.9134399890899658, "logps/chosen": -0.012501144781708717, "logps/rejected": -0.901066780090332, "loss": 3.7583, "nll_loss": 0.9377538561820984, "rewards/accuracies": 1.0, "rewards/chosen": -0.0012501144083216786, "rewards/margins": 0.08885655552148819, "rewards/rejected": -0.09010668098926544, "step": 3850 }, { "epoch": 2.6632088520055324, "grad_norm": 9.970890045166016, "learning_rate": 4.075995082219149e-05, "log_odds_chosen": 6.204949378967285, "log_odds_ratio": -0.05663444474339485, "logits/chosen": -0.5326727628707886, "logits/rejected": -0.6079345941543579, "logps/chosen": -0.055087704211473465, "logps/rejected": -1.7836432456970215, "loss": 2.3693, "nll_loss": 0.5866531729698181, "rewards/accuracies": 1.0, "rewards/chosen": -0.005508770234882832, "rewards/margins": 0.17285554111003876, "rewards/rejected": -0.1783643215894699, "step": 3851 }, { "epoch": 2.663900414937759, "grad_norm": 10.323980331420898, "learning_rate": 4.0756108805901335e-05, "log_odds_chosen": 7.44677734375, "log_odds_ratio": -0.030988784506917, "logits/chosen": -0.5944615602493286, "logits/rejected": -0.6548985242843628, "logps/chosen": -0.010647930204868317, "logps/rejected": -1.1650986671447754, "loss": 2.1231, "nll_loss": 0.5276675820350647, "rewards/accuracies": 1.0, "rewards/chosen": -0.001064792973920703, "rewards/margins": 0.11544506996870041, "rewards/rejected": -0.11650986969470978, "step": 3852 }, { "epoch": 2.664591977869986, "grad_norm": 6.948586940765381, "learning_rate": 4.075226678961119e-05, "log_odds_chosen": 8.399258613586426, "log_odds_ratio": -0.007890290580689907, "logits/chosen": -0.6912119388580322, "logits/rejected": -0.798456609249115, "logps/chosen": -0.0031807105988264084, "logps/rejected": -1.0791079998016357, "loss": 1.4553, "nll_loss": 0.36302649974823, "rewards/accuracies": 1.0, "rewards/chosen": -0.00031807104824110866, "rewards/margins": 0.10759273171424866, "rewards/rejected": -0.10791080445051193, "step": 3853 }, { "epoch": 2.665283540802213, "grad_norm": 5.564873695373535, "learning_rate": 4.074842477332104e-05, "log_odds_chosen": 6.925118446350098, "log_odds_ratio": -0.04115324467420578, "logits/chosen": -0.24638259410858154, "logits/rejected": -0.32120299339294434, "logps/chosen": -0.036915652453899384, "logps/rejected": -1.4263198375701904, "loss": 2.0515, "nll_loss": 0.5087476968765259, "rewards/accuracies": 1.0, "rewards/chosen": -0.0036915652453899384, "rewards/margins": 0.13894042372703552, "rewards/rejected": -0.14263199269771576, "step": 3854 }, { "epoch": 2.6659751037344397, "grad_norm": 7.070037364959717, "learning_rate": 4.074458275703089e-05, "log_odds_chosen": 6.6316328048706055, "log_odds_ratio": -0.04871571436524391, "logits/chosen": -0.3488408625125885, "logits/rejected": -0.46453577280044556, "logps/chosen": -0.035265352576971054, "logps/rejected": -1.430934190750122, "loss": 2.7185, "nll_loss": 0.6747473478317261, "rewards/accuracies": 1.0, "rewards/chosen": -0.003526535350829363, "rewards/margins": 0.13956689834594727, "rewards/rejected": -0.14309342205524445, "step": 3855 }, { "epoch": 2.6666666666666665, "grad_norm": 7.606586456298828, "learning_rate": 4.074074074074074e-05, "log_odds_chosen": 8.715312957763672, "log_odds_ratio": -0.0006025927141308784, "logits/chosen": -0.5068209767341614, "logits/rejected": -0.613471508026123, "logps/chosen": -0.0014307673554867506, "logps/rejected": -1.4268276691436768, "loss": 1.8998, "nll_loss": 0.47490203380584717, "rewards/accuracies": 1.0, "rewards/chosen": -0.00014307672972790897, "rewards/margins": 0.14253969490528107, "rewards/rejected": -0.1426827758550644, "step": 3856 }, { "epoch": 2.6673582295988933, "grad_norm": 6.091766357421875, "learning_rate": 4.07368987244506e-05, "log_odds_chosen": 6.20139217376709, "log_odds_ratio": -0.07082384079694748, "logits/chosen": -0.4589351415634155, "logits/rejected": -0.5272402763366699, "logps/chosen": -0.03111000917851925, "logps/rejected": -1.0170793533325195, "loss": 2.2244, "nll_loss": 0.549013614654541, "rewards/accuracies": 1.0, "rewards/chosen": -0.003111000871285796, "rewards/margins": 0.09859693795442581, "rewards/rejected": -0.10170793533325195, "step": 3857 }, { "epoch": 2.66804979253112, "grad_norm": 14.512560844421387, "learning_rate": 4.073305670816044e-05, "log_odds_chosen": 9.04071044921875, "log_odds_ratio": -0.00032202163129113615, "logits/chosen": -0.7915189862251282, "logits/rejected": -0.8212922215461731, "logps/chosen": -0.002225534524768591, "logps/rejected": -2.078573703765869, "loss": 2.7696, "nll_loss": 0.6923729777336121, "rewards/accuracies": 1.0, "rewards/chosen": -0.00022255346993915737, "rewards/margins": 0.2076348066329956, "rewards/rejected": -0.20785734057426453, "step": 3858 }, { "epoch": 2.668741355463347, "grad_norm": 11.682271003723145, "learning_rate": 4.0729214691870296e-05, "log_odds_chosen": 8.06418514251709, "log_odds_ratio": -0.0015061056474223733, "logits/chosen": -0.5448694229125977, "logits/rejected": -0.5539915561676025, "logps/chosen": -0.001430652802810073, "logps/rejected": -1.1192471981048584, "loss": 2.0914, "nll_loss": 0.5227075815200806, "rewards/accuracies": 1.0, "rewards/chosen": -0.00014306529192253947, "rewards/margins": 0.11178165674209595, "rewards/rejected": -0.11192472279071808, "step": 3859 }, { "epoch": 2.669432918395574, "grad_norm": 13.919990539550781, "learning_rate": 4.072537267558015e-05, "log_odds_chosen": 7.997103214263916, "log_odds_ratio": -0.001455902587622404, "logits/chosen": -0.8294209241867065, "logits/rejected": -0.881327748298645, "logps/chosen": -0.010614650323987007, "logps/rejected": -1.5739727020263672, "loss": 2.1879, "nll_loss": 0.5468218326568604, "rewards/accuracies": 1.0, "rewards/chosen": -0.0010614650091156363, "rewards/margins": 0.15633580088615417, "rewards/rejected": -0.15739727020263672, "step": 3860 }, { "epoch": 2.6701244813278007, "grad_norm": 10.517012596130371, "learning_rate": 4.0721530659289994e-05, "log_odds_chosen": 6.757225513458252, "log_odds_ratio": -0.0808834433555603, "logits/chosen": -0.7147971391677856, "logits/rejected": -0.7620722651481628, "logps/chosen": -0.004747908562421799, "logps/rejected": -1.114861249923706, "loss": 2.979, "nll_loss": 0.7366524934768677, "rewards/accuracies": 1.0, "rewards/chosen": -0.0004747908387798816, "rewards/margins": 0.1110113263130188, "rewards/rejected": -0.11148611456155777, "step": 3861 }, { "epoch": 2.6708160442600275, "grad_norm": 9.34632396697998, "learning_rate": 4.0717688642999846e-05, "log_odds_chosen": 5.147512912750244, "log_odds_ratio": -0.12234330177307129, "logits/chosen": -0.4665258228778839, "logits/rejected": -0.5213587284088135, "logps/chosen": -0.023862307891249657, "logps/rejected": -1.0018904209136963, "loss": 3.204, "nll_loss": 0.7887601256370544, "rewards/accuracies": 0.875, "rewards/chosen": -0.002386230742558837, "rewards/margins": 0.09780281782150269, "rewards/rejected": -0.10018904507160187, "step": 3862 }, { "epoch": 2.6715076071922543, "grad_norm": 6.544745445251465, "learning_rate": 4.07138466267097e-05, "log_odds_chosen": 4.824288368225098, "log_odds_ratio": -0.04939433932304382, "logits/chosen": -0.4103182554244995, "logits/rejected": -0.4253101050853729, "logps/chosen": -0.042487192898988724, "logps/rejected": -1.0595020055770874, "loss": 2.1754, "nll_loss": 0.5389158129692078, "rewards/accuracies": 1.0, "rewards/chosen": -0.004248719196766615, "rewards/margins": 0.10170148313045502, "rewards/rejected": -0.10595019906759262, "step": 3863 }, { "epoch": 2.6721991701244816, "grad_norm": 5.377713680267334, "learning_rate": 4.071000461041955e-05, "log_odds_chosen": 6.0147199630737305, "log_odds_ratio": -0.17093312740325928, "logits/chosen": -0.4358542859554291, "logits/rejected": -0.5036448836326599, "logps/chosen": -0.0451769195497036, "logps/rejected": -1.2233073711395264, "loss": 1.7589, "nll_loss": 0.4226234257221222, "rewards/accuracies": 0.875, "rewards/chosen": -0.004517692141234875, "rewards/margins": 0.11781305074691772, "rewards/rejected": -0.12233074754476547, "step": 3864 }, { "epoch": 2.6728907330567084, "grad_norm": 11.191709518432617, "learning_rate": 4.07061625941294e-05, "log_odds_chosen": 6.220905303955078, "log_odds_ratio": -0.1648993343114853, "logits/chosen": -0.7400772571563721, "logits/rejected": -0.7881424427032471, "logps/chosen": -0.0402042493224144, "logps/rejected": -1.097238540649414, "loss": 2.8797, "nll_loss": 0.7034355401992798, "rewards/accuracies": 0.875, "rewards/chosen": -0.004020425025373697, "rewards/margins": 0.1057034358382225, "rewards/rejected": -0.10972385853528976, "step": 3865 }, { "epoch": 2.6735822959889353, "grad_norm": 11.402688026428223, "learning_rate": 4.0702320577839256e-05, "log_odds_chosen": 7.402166366577148, "log_odds_ratio": -0.003541384357959032, "logits/chosen": -0.4315453767776489, "logits/rejected": -0.48258814215660095, "logps/chosen": -0.03788773715496063, "logps/rejected": -2.194901466369629, "loss": 1.6595, "nll_loss": 0.4145263433456421, "rewards/accuracies": 1.0, "rewards/chosen": -0.003788774134591222, "rewards/margins": 0.2157014012336731, "rewards/rejected": -0.2194901704788208, "step": 3866 }, { "epoch": 2.674273858921162, "grad_norm": 9.743550300598145, "learning_rate": 4.06984785615491e-05, "log_odds_chosen": 8.811948776245117, "log_odds_ratio": -0.0250435471534729, "logits/chosen": -1.0121331214904785, "logits/rejected": -1.1049795150756836, "logps/chosen": -0.006895300932228565, "logps/rejected": -1.593515396118164, "loss": 2.7048, "nll_loss": 0.6736976504325867, "rewards/accuracies": 1.0, "rewards/chosen": -0.0006895301048643887, "rewards/margins": 0.15866202116012573, "rewards/rejected": -0.15935155749320984, "step": 3867 }, { "epoch": 2.674965421853389, "grad_norm": 14.078466415405273, "learning_rate": 4.0694636545258954e-05, "log_odds_chosen": 9.76037883758545, "log_odds_ratio": -0.00011932184861507267, "logits/chosen": -0.4967171847820282, "logits/rejected": -0.585934042930603, "logps/chosen": -0.0009728466393426061, "logps/rejected": -2.253514528274536, "loss": 3.3634, "nll_loss": 0.8408321738243103, "rewards/accuracies": 1.0, "rewards/chosen": -9.728466102387756e-05, "rewards/margins": 0.22525416314601898, "rewards/rejected": -0.2253514528274536, "step": 3868 }, { "epoch": 2.6756569847856158, "grad_norm": 7.892293930053711, "learning_rate": 4.0690794528968806e-05, "log_odds_chosen": 7.16309928894043, "log_odds_ratio": -0.0015303477412089705, "logits/chosen": -0.6888867616653442, "logits/rejected": -0.7380569577217102, "logps/chosen": -0.005860478151589632, "logps/rejected": -1.0863014459609985, "loss": 2.0397, "nll_loss": 0.5097621083259583, "rewards/accuracies": 1.0, "rewards/chosen": -0.0005860478268004954, "rewards/margins": 0.1080440878868103, "rewards/rejected": -0.10863013565540314, "step": 3869 }, { "epoch": 2.6763485477178426, "grad_norm": 10.240550994873047, "learning_rate": 4.068695251267866e-05, "log_odds_chosen": 7.787205219268799, "log_odds_ratio": -0.0014418363571166992, "logits/chosen": -0.3999979794025421, "logits/rejected": -0.3923502564430237, "logps/chosen": -0.0008615354308858514, "logps/rejected": -1.0863168239593506, "loss": 3.3077, "nll_loss": 0.826777458190918, "rewards/accuracies": 1.0, "rewards/chosen": -8.615355181973428e-05, "rewards/margins": 0.10854553431272507, "rewards/rejected": -0.1086316853761673, "step": 3870 }, { "epoch": 2.6770401106500694, "grad_norm": 6.977838039398193, "learning_rate": 4.0683110496388504e-05, "log_odds_chosen": 6.633417129516602, "log_odds_ratio": -0.0035154526121914387, "logits/chosen": -0.522433876991272, "logits/rejected": -0.5463957190513611, "logps/chosen": -0.02177012898027897, "logps/rejected": -1.4686945676803589, "loss": 2.0591, "nll_loss": 0.5144321918487549, "rewards/accuracies": 1.0, "rewards/chosen": -0.002177012851461768, "rewards/margins": 0.14469242095947266, "rewards/rejected": -0.1468694508075714, "step": 3871 }, { "epoch": 2.6777316735822962, "grad_norm": 8.900273323059082, "learning_rate": 4.067926848009836e-05, "log_odds_chosen": 8.341584205627441, "log_odds_ratio": -0.005001600366085768, "logits/chosen": -0.33963701128959656, "logits/rejected": -0.41181546449661255, "logps/chosen": -0.08042304217815399, "logps/rejected": -2.277837038040161, "loss": 1.7396, "nll_loss": 0.43440160155296326, "rewards/accuracies": 1.0, "rewards/chosen": -0.008042304776608944, "rewards/margins": 0.21974140405654907, "rewards/rejected": -0.2277837097644806, "step": 3872 }, { "epoch": 2.678423236514523, "grad_norm": 13.76147174835205, "learning_rate": 4.067542646380821e-05, "log_odds_chosen": 7.684887886047363, "log_odds_ratio": -0.29360559582710266, "logits/chosen": -0.41378188133239746, "logits/rejected": -0.47570520639419556, "logps/chosen": -0.03724703937768936, "logps/rejected": -1.4443128108978271, "loss": 2.0423, "nll_loss": 0.48122406005859375, "rewards/accuracies": 0.875, "rewards/chosen": -0.0037247042637318373, "rewards/margins": 0.14070658385753632, "rewards/rejected": -0.14443129301071167, "step": 3873 }, { "epoch": 2.67911479944675, "grad_norm": 8.732939720153809, "learning_rate": 4.0671584447518055e-05, "log_odds_chosen": 7.691486835479736, "log_odds_ratio": -0.015395074151456356, "logits/chosen": -0.4411604404449463, "logits/rejected": -0.5540481209754944, "logps/chosen": -0.02716037817299366, "logps/rejected": -1.8648601770401, "loss": 2.5227, "nll_loss": 0.6291290521621704, "rewards/accuracies": 1.0, "rewards/chosen": -0.0027160379104316235, "rewards/margins": 0.18376997113227844, "rewards/rejected": -0.18648600578308105, "step": 3874 }, { "epoch": 2.6798063623789767, "grad_norm": 8.468565940856934, "learning_rate": 4.0667742431227914e-05, "log_odds_chosen": 7.416378498077393, "log_odds_ratio": -0.08451016247272491, "logits/chosen": -0.8846219182014465, "logits/rejected": -0.8550897836685181, "logps/chosen": -0.010380887426435947, "logps/rejected": -1.1413564682006836, "loss": 2.8681, "nll_loss": 0.7085626721382141, "rewards/accuracies": 1.0, "rewards/chosen": -0.001038088696077466, "rewards/margins": 0.11309756338596344, "rewards/rejected": -0.11413565278053284, "step": 3875 }, { "epoch": 2.6804979253112036, "grad_norm": 14.807464599609375, "learning_rate": 4.066390041493776e-05, "log_odds_chosen": 8.475648880004883, "log_odds_ratio": -0.06202422454953194, "logits/chosen": -0.2031635195016861, "logits/rejected": -0.30704939365386963, "logps/chosen": -0.018131043761968613, "logps/rejected": -1.6106603145599365, "loss": 2.4935, "nll_loss": 0.6171661615371704, "rewards/accuracies": 1.0, "rewards/chosen": -0.0018131043761968613, "rewards/margins": 0.15925294160842896, "rewards/rejected": -0.16106604039669037, "step": 3876 }, { "epoch": 2.6811894882434304, "grad_norm": 6.985348224639893, "learning_rate": 4.066005839864761e-05, "log_odds_chosen": 5.815200328826904, "log_odds_ratio": -0.10103943943977356, "logits/chosen": -0.5342247486114502, "logits/rejected": -0.49896302819252014, "logps/chosen": -0.024796854704618454, "logps/rejected": -0.9605525732040405, "loss": 2.5107, "nll_loss": 0.6175825595855713, "rewards/accuracies": 1.0, "rewards/chosen": -0.0024796854704618454, "rewards/margins": 0.09357556700706482, "rewards/rejected": -0.09605525434017181, "step": 3877 }, { "epoch": 2.6818810511756572, "grad_norm": 6.360101222991943, "learning_rate": 4.0656216382357465e-05, "log_odds_chosen": 6.302757740020752, "log_odds_ratio": -0.01789461448788643, "logits/chosen": -0.4748285710811615, "logits/rejected": -0.4775317907333374, "logps/chosen": -0.02190949022769928, "logps/rejected": -0.8361377716064453, "loss": 2.0028, "nll_loss": 0.49890488386154175, "rewards/accuracies": 1.0, "rewards/chosen": -0.002190949162468314, "rewards/margins": 0.0814228355884552, "rewards/rejected": -0.0836137905716896, "step": 3878 }, { "epoch": 2.682572614107884, "grad_norm": 7.889153957366943, "learning_rate": 4.065237436606732e-05, "log_odds_chosen": 8.003373146057129, "log_odds_ratio": -0.006866448558866978, "logits/chosen": -0.47109514474868774, "logits/rejected": -0.4927683472633362, "logps/chosen": -0.024222789332270622, "logps/rejected": -1.558125376701355, "loss": 1.7338, "nll_loss": 0.4327537417411804, "rewards/accuracies": 1.0, "rewards/chosen": -0.002422278979793191, "rewards/margins": 0.15339027345180511, "rewards/rejected": -0.15581254661083221, "step": 3879 }, { "epoch": 2.683264177040111, "grad_norm": 7.911652088165283, "learning_rate": 4.064853234977716e-05, "log_odds_chosen": 5.731238842010498, "log_odds_ratio": -0.2041652798652649, "logits/chosen": -0.3681705594062805, "logits/rejected": -0.3524719476699829, "logps/chosen": -0.038544073700904846, "logps/rejected": -0.7402932047843933, "loss": 2.4387, "nll_loss": 0.5892567038536072, "rewards/accuracies": 0.75, "rewards/chosen": -0.003854407463222742, "rewards/margins": 0.0701749175786972, "rewards/rejected": -0.07402931898832321, "step": 3880 }, { "epoch": 2.6839557399723377, "grad_norm": 7.045600414276123, "learning_rate": 4.0644690333487015e-05, "log_odds_chosen": 8.163865089416504, "log_odds_ratio": -0.0022141621448099613, "logits/chosen": -0.6396666169166565, "logits/rejected": -0.6936120986938477, "logps/chosen": -0.0017530673649162054, "logps/rejected": -1.4846478700637817, "loss": 1.7957, "nll_loss": 0.4487159848213196, "rewards/accuracies": 1.0, "rewards/chosen": -0.00017530674813315272, "rewards/margins": 0.1482894867658615, "rewards/rejected": -0.14846479892730713, "step": 3881 }, { "epoch": 2.6846473029045645, "grad_norm": 7.05449104309082, "learning_rate": 4.064084831719687e-05, "log_odds_chosen": 7.6457719802856445, "log_odds_ratio": -0.0029674817342311144, "logits/chosen": -0.5483105182647705, "logits/rejected": -0.528850793838501, "logps/chosen": -0.015136461704969406, "logps/rejected": -1.5551209449768066, "loss": 1.7261, "nll_loss": 0.4312302768230438, "rewards/accuracies": 1.0, "rewards/chosen": -0.0015136462170630693, "rewards/margins": 0.1539984494447708, "rewards/rejected": -0.15551209449768066, "step": 3882 }, { "epoch": 2.6853388658367914, "grad_norm": 9.146442413330078, "learning_rate": 4.0637006300906713e-05, "log_odds_chosen": 6.629992961883545, "log_odds_ratio": -0.14234022796154022, "logits/chosen": -0.4649689793586731, "logits/rejected": -0.5079630613327026, "logps/chosen": -0.028579382225871086, "logps/rejected": -1.3636877536773682, "loss": 2.2741, "nll_loss": 0.5542930960655212, "rewards/accuracies": 0.875, "rewards/chosen": -0.0028579384088516235, "rewards/margins": 0.13351084291934967, "rewards/rejected": -0.1363687813282013, "step": 3883 }, { "epoch": 2.686030428769018, "grad_norm": 8.364623069763184, "learning_rate": 4.063316428461657e-05, "log_odds_chosen": 7.477568626403809, "log_odds_ratio": -0.01047502364963293, "logits/chosen": -0.16539695858955383, "logits/rejected": -0.2058202177286148, "logps/chosen": -0.00590811762958765, "logps/rejected": -1.113823652267456, "loss": 2.0598, "nll_loss": 0.5138932466506958, "rewards/accuracies": 1.0, "rewards/chosen": -0.0005908117746002972, "rewards/margins": 0.11079156398773193, "rewards/rejected": -0.1113823801279068, "step": 3884 }, { "epoch": 2.686721991701245, "grad_norm": 8.873336791992188, "learning_rate": 4.062932226832642e-05, "log_odds_chosen": 7.151572227478027, "log_odds_ratio": -0.010587374679744244, "logits/chosen": -0.7112501859664917, "logits/rejected": -0.7304208278656006, "logps/chosen": -0.008737252093851566, "logps/rejected": -1.2269251346588135, "loss": 1.7025, "nll_loss": 0.4245630204677582, "rewards/accuracies": 1.0, "rewards/chosen": -0.0008737251628190279, "rewards/margins": 0.12181878834962845, "rewards/rejected": -0.12269251048564911, "step": 3885 }, { "epoch": 2.687413554633472, "grad_norm": 13.44513988494873, "learning_rate": 4.062548025203627e-05, "log_odds_chosen": 8.571016311645508, "log_odds_ratio": -0.0013945872196927667, "logits/chosen": -0.5865503549575806, "logits/rejected": -0.5623582601547241, "logps/chosen": -0.0017820007633417845, "logps/rejected": -1.2935950756072998, "loss": 2.1752, "nll_loss": 0.543658435344696, "rewards/accuracies": 1.0, "rewards/chosen": -0.00017820007633417845, "rewards/margins": 0.12918131053447723, "rewards/rejected": -0.12935952842235565, "step": 3886 }, { "epoch": 2.6881051175656987, "grad_norm": 5.672190189361572, "learning_rate": 4.062163823574612e-05, "log_odds_chosen": 7.870635986328125, "log_odds_ratio": -0.006466720253229141, "logits/chosen": -0.4872884750366211, "logits/rejected": -0.5083516240119934, "logps/chosen": -0.01766045391559601, "logps/rejected": -1.3608150482177734, "loss": 2.0634, "nll_loss": 0.5152048468589783, "rewards/accuracies": 1.0, "rewards/chosen": -0.001766045344993472, "rewards/margins": 0.13431546092033386, "rewards/rejected": -0.1360815167427063, "step": 3887 }, { "epoch": 2.6887966804979255, "grad_norm": 9.264443397521973, "learning_rate": 4.0617796219455976e-05, "log_odds_chosen": 7.730437755584717, "log_odds_ratio": -0.0016871271654963493, "logits/chosen": -0.5565177798271179, "logits/rejected": -0.6040325164794922, "logps/chosen": -0.0025901379995048046, "logps/rejected": -1.3575786352157593, "loss": 2.4853, "nll_loss": 0.6211501955986023, "rewards/accuracies": 1.0, "rewards/chosen": -0.0002590137883089483, "rewards/margins": 0.13549885153770447, "rewards/rejected": -0.13575786352157593, "step": 3888 }, { "epoch": 2.6894882434301524, "grad_norm": 7.969750881195068, "learning_rate": 4.061395420316582e-05, "log_odds_chosen": 9.363624572753906, "log_odds_ratio": -0.0005542628350667655, "logits/chosen": -0.7252931594848633, "logits/rejected": -0.8185261487960815, "logps/chosen": -0.016552282497286797, "logps/rejected": -2.182529926300049, "loss": 2.064, "nll_loss": 0.5159405469894409, "rewards/accuracies": 1.0, "rewards/chosen": -0.0016552285524085164, "rewards/margins": 0.2165977656841278, "rewards/rejected": -0.2182529866695404, "step": 3889 }, { "epoch": 2.690179806362379, "grad_norm": 8.628170013427734, "learning_rate": 4.0610112186875674e-05, "log_odds_chosen": 8.0739164352417, "log_odds_ratio": -0.002225684467703104, "logits/chosen": -0.747127115726471, "logits/rejected": -0.8388808965682983, "logps/chosen": -0.003269063076004386, "logps/rejected": -1.6631715297698975, "loss": 2.0785, "nll_loss": 0.5193923115730286, "rewards/accuracies": 1.0, "rewards/chosen": -0.00032690633088350296, "rewards/margins": 0.16599026322364807, "rewards/rejected": -0.1663171648979187, "step": 3890 }, { "epoch": 2.690871369294606, "grad_norm": 11.820703506469727, "learning_rate": 4.0606270170585526e-05, "log_odds_chosen": 7.5276336669921875, "log_odds_ratio": -0.0058481087908148766, "logits/chosen": -0.43524500727653503, "logits/rejected": -0.48104870319366455, "logps/chosen": -0.009421736001968384, "logps/rejected": -1.2175965309143066, "loss": 2.5074, "nll_loss": 0.6262714266777039, "rewards/accuracies": 1.0, "rewards/chosen": -0.000942173704970628, "rewards/margins": 0.12081749737262726, "rewards/rejected": -0.12175966054201126, "step": 3891 }, { "epoch": 2.691562932226833, "grad_norm": 10.340312957763672, "learning_rate": 4.060242815429537e-05, "log_odds_chosen": 7.890967845916748, "log_odds_ratio": -0.00823564175516367, "logits/chosen": -0.7037793397903442, "logits/rejected": -0.7014849781990051, "logps/chosen": -0.011880909092724323, "logps/rejected": -1.146535873413086, "loss": 2.7982, "nll_loss": 0.6987218260765076, "rewards/accuracies": 1.0, "rewards/chosen": -0.0011880907695740461, "rewards/margins": 0.11346549540758133, "rewards/rejected": -0.11465359479188919, "step": 3892 }, { "epoch": 2.6922544951590597, "grad_norm": 10.122757911682129, "learning_rate": 4.059858613800523e-05, "log_odds_chosen": 5.4945068359375, "log_odds_ratio": -0.09575443714857101, "logits/chosen": -0.725883960723877, "logits/rejected": -0.7544358968734741, "logps/chosen": -0.022189276292920113, "logps/rejected": -1.1797430515289307, "loss": 2.3616, "nll_loss": 0.5808249711990356, "rewards/accuracies": 1.0, "rewards/chosen": -0.0022189277224242687, "rewards/margins": 0.11575537174940109, "rewards/rejected": -0.11797429621219635, "step": 3893 }, { "epoch": 2.6929460580912865, "grad_norm": 5.698261737823486, "learning_rate": 4.059474412171508e-05, "log_odds_chosen": 9.365954399108887, "log_odds_ratio": -0.010592492297291756, "logits/chosen": -0.4197603464126587, "logits/rejected": -0.38374871015548706, "logps/chosen": -0.0014273038832470775, "logps/rejected": -1.4466173648834229, "loss": 1.3858, "nll_loss": 0.3453885614871979, "rewards/accuracies": 1.0, "rewards/chosen": -0.0001427303795935586, "rewards/margins": 0.14451901614665985, "rewards/rejected": -0.14466175436973572, "step": 3894 }, { "epoch": 2.6936376210235133, "grad_norm": 15.388368606567383, "learning_rate": 4.059090210542493e-05, "log_odds_chosen": 7.5923991203308105, "log_odds_ratio": -0.1653386652469635, "logits/chosen": -0.33131247758865356, "logits/rejected": -0.41436147689819336, "logps/chosen": -0.0448850654065609, "logps/rejected": -1.642098307609558, "loss": 2.4806, "nll_loss": 0.6036100387573242, "rewards/accuracies": 0.875, "rewards/chosen": -0.004488506354391575, "rewards/margins": 0.15972132980823517, "rewards/rejected": -0.16420982778072357, "step": 3895 }, { "epoch": 2.69432918395574, "grad_norm": 9.265947341918945, "learning_rate": 4.058706008913478e-05, "log_odds_chosen": 7.897387981414795, "log_odds_ratio": -0.02778870053589344, "logits/chosen": -0.24363026022911072, "logits/rejected": -0.2546027898788452, "logps/chosen": -0.015297142788767815, "logps/rejected": -1.6907174587249756, "loss": 2.0793, "nll_loss": 0.5170523524284363, "rewards/accuracies": 1.0, "rewards/chosen": -0.0015297143254429102, "rewards/margins": 0.16754204034805298, "rewards/rejected": -0.169071763753891, "step": 3896 }, { "epoch": 2.695020746887967, "grad_norm": 9.980796813964844, "learning_rate": 4.0583218072844634e-05, "log_odds_chosen": 9.521817207336426, "log_odds_ratio": -0.0002317545295227319, "logits/chosen": -0.4232358932495117, "logits/rejected": -0.582566499710083, "logps/chosen": -0.0009553482523187995, "logps/rejected": -1.9109771251678467, "loss": 2.051, "nll_loss": 0.5127320289611816, "rewards/accuracies": 1.0, "rewards/chosen": -9.553483687341213e-05, "rewards/margins": 0.19100217521190643, "rewards/rejected": -0.1910977065563202, "step": 3897 }, { "epoch": 2.695712309820194, "grad_norm": 4.509500503540039, "learning_rate": 4.057937605655448e-05, "log_odds_chosen": 8.891090393066406, "log_odds_ratio": -0.031227584928274155, "logits/chosen": -0.4662817120552063, "logits/rejected": -0.5478078126907349, "logps/chosen": -0.011539162136614323, "logps/rejected": -1.3029841184616089, "loss": 1.6549, "nll_loss": 0.41060373187065125, "rewards/accuracies": 1.0, "rewards/chosen": -0.0011539161205291748, "rewards/margins": 0.12914448976516724, "rewards/rejected": -0.1302984207868576, "step": 3898 }, { "epoch": 2.6964038727524207, "grad_norm": 9.72545337677002, "learning_rate": 4.057553404026433e-05, "log_odds_chosen": 7.56840705871582, "log_odds_ratio": -0.00843381229788065, "logits/chosen": -0.6165993213653564, "logits/rejected": -0.6628961563110352, "logps/chosen": -0.008199600502848625, "logps/rejected": -1.769745111465454, "loss": 2.7607, "nll_loss": 0.6893199682235718, "rewards/accuracies": 1.0, "rewards/chosen": -0.0008199600852094591, "rewards/margins": 0.17615455389022827, "rewards/rejected": -0.17697452008724213, "step": 3899 }, { "epoch": 2.6970954356846475, "grad_norm": 8.728021621704102, "learning_rate": 4.0571692023974185e-05, "log_odds_chosen": 6.957355976104736, "log_odds_ratio": -0.0020753592252731323, "logits/chosen": -0.6975337266921997, "logits/rejected": -0.7607104778289795, "logps/chosen": -0.024932991713285446, "logps/rejected": -1.4897279739379883, "loss": 2.086, "nll_loss": 0.521297812461853, "rewards/accuracies": 1.0, "rewards/chosen": -0.002493299311026931, "rewards/margins": 0.1464795023202896, "rewards/rejected": -0.1489727944135666, "step": 3900 }, { "epoch": 2.6977869986168743, "grad_norm": 10.900203704833984, "learning_rate": 4.056785000768403e-05, "log_odds_chosen": 6.62492561340332, "log_odds_ratio": -0.17824991047382355, "logits/chosen": -0.7206655740737915, "logits/rejected": -0.693558931350708, "logps/chosen": -0.03227349370718002, "logps/rejected": -1.4377551078796387, "loss": 2.3092, "nll_loss": 0.5594759583473206, "rewards/accuracies": 0.875, "rewards/chosen": -0.0032273493707180023, "rewards/margins": 0.14054815471172333, "rewards/rejected": -0.14377550780773163, "step": 3901 }, { "epoch": 2.698478561549101, "grad_norm": 9.88683795928955, "learning_rate": 4.056400799139389e-05, "log_odds_chosen": 8.03813648223877, "log_odds_ratio": -0.0048516602255403996, "logits/chosen": -0.41523438692092896, "logits/rejected": -0.502999484539032, "logps/chosen": -0.013218377716839314, "logps/rejected": -1.833310604095459, "loss": 1.7961, "nll_loss": 0.44853001832962036, "rewards/accuracies": 1.0, "rewards/chosen": -0.0013218377716839314, "rewards/margins": 0.18200922012329102, "rewards/rejected": -0.18333107233047485, "step": 3902 }, { "epoch": 2.699170124481328, "grad_norm": 9.741769790649414, "learning_rate": 4.0560165975103735e-05, "log_odds_chosen": 7.460450172424316, "log_odds_ratio": -0.0015208596596494317, "logits/chosen": -0.9277177453041077, "logits/rejected": -0.9233143329620361, "logps/chosen": -0.002522763330489397, "logps/rejected": -1.2246127128601074, "loss": 3.0391, "nll_loss": 0.7596240043640137, "rewards/accuracies": 1.0, "rewards/chosen": -0.00025227630976587534, "rewards/margins": 0.12220901250839233, "rewards/rejected": -0.12246128171682358, "step": 3903 }, { "epoch": 2.699861687413555, "grad_norm": 8.596832275390625, "learning_rate": 4.055632395881359e-05, "log_odds_chosen": 8.594837188720703, "log_odds_ratio": -0.0006484482437372208, "logits/chosen": -0.6567248702049255, "logits/rejected": -0.7241695523262024, "logps/chosen": -0.0012694273609668016, "logps/rejected": -1.3293309211730957, "loss": 2.7539, "nll_loss": 0.6884142160415649, "rewards/accuracies": 1.0, "rewards/chosen": -0.00012694273027591407, "rewards/margins": 0.13280615210533142, "rewards/rejected": -0.1329330950975418, "step": 3904 }, { "epoch": 2.7005532503457816, "grad_norm": 11.800738334655762, "learning_rate": 4.055248194252344e-05, "log_odds_chosen": 7.378905296325684, "log_odds_ratio": -0.009102431125938892, "logits/chosen": -0.5646547079086304, "logits/rejected": -0.5651717185974121, "logps/chosen": -0.009457389824092388, "logps/rejected": -1.2582478523254395, "loss": 2.4284, "nll_loss": 0.6061837673187256, "rewards/accuracies": 1.0, "rewards/chosen": -0.0009457390988245606, "rewards/margins": 0.12487904727458954, "rewards/rejected": -0.12582479417324066, "step": 3905 }, { "epoch": 2.7012448132780085, "grad_norm": 10.2549467086792, "learning_rate": 4.054863992623329e-05, "log_odds_chosen": 8.616276741027832, "log_odds_ratio": -0.0003504530468489975, "logits/chosen": -0.49540776014328003, "logits/rejected": -0.5995450019836426, "logps/chosen": -0.0056231142953038216, "logps/rejected": -1.4994674921035767, "loss": 1.9879, "nll_loss": 0.4969436228275299, "rewards/accuracies": 1.0, "rewards/chosen": -0.0005623113829642534, "rewards/margins": 0.14938445389270782, "rewards/rejected": -0.14994676411151886, "step": 3906 }, { "epoch": 2.7019363762102353, "grad_norm": 13.21338176727295, "learning_rate": 4.054479790994314e-05, "log_odds_chosen": 8.875137329101562, "log_odds_ratio": -0.0010612740879878402, "logits/chosen": -0.5491659641265869, "logits/rejected": -0.6577551364898682, "logps/chosen": -0.014460853300988674, "logps/rejected": -2.1844234466552734, "loss": 2.2323, "nll_loss": 0.5579643845558167, "rewards/accuracies": 1.0, "rewards/chosen": -0.0014460852835327387, "rewards/margins": 0.21699626743793488, "rewards/rejected": -0.21844235062599182, "step": 3907 }, { "epoch": 2.702627939142462, "grad_norm": 12.623719215393066, "learning_rate": 4.054095589365299e-05, "log_odds_chosen": 8.661251068115234, "log_odds_ratio": -0.0006629570852965117, "logits/chosen": -0.6035693287849426, "logits/rejected": -0.632934033870697, "logps/chosen": -0.0006702827522531152, "logps/rejected": -1.1736888885498047, "loss": 2.1473, "nll_loss": 0.5367664694786072, "rewards/accuracies": 1.0, "rewards/chosen": -6.702827522531152e-05, "rewards/margins": 0.11730185151100159, "rewards/rejected": -0.11736888438463211, "step": 3908 }, { "epoch": 2.703319502074689, "grad_norm": 8.1231050491333, "learning_rate": 4.053711387736284e-05, "log_odds_chosen": 8.479167938232422, "log_odds_ratio": -0.000644492800347507, "logits/chosen": -0.8432545065879822, "logits/rejected": -0.8350276947021484, "logps/chosen": -0.0008359847124665976, "logps/rejected": -1.1647535562515259, "loss": 2.1751, "nll_loss": 0.5437168478965759, "rewards/accuracies": 1.0, "rewards/chosen": -8.359846833627671e-05, "rewards/margins": 0.11639176309108734, "rewards/rejected": -0.11647535860538483, "step": 3909 }, { "epoch": 2.704011065006916, "grad_norm": 12.870767593383789, "learning_rate": 4.053327186107269e-05, "log_odds_chosen": 8.805551528930664, "log_odds_ratio": -0.0004201167030259967, "logits/chosen": -0.4179730713367462, "logits/rejected": -0.550365149974823, "logps/chosen": -0.001364890718832612, "logps/rejected": -1.2633905410766602, "loss": 1.5198, "nll_loss": 0.3799135088920593, "rewards/accuracies": 1.0, "rewards/chosen": -0.0001364890777040273, "rewards/margins": 0.1262025684118271, "rewards/rejected": -0.12633906304836273, "step": 3910 }, { "epoch": 2.7047026279391426, "grad_norm": 12.722868919372559, "learning_rate": 4.052942984478255e-05, "log_odds_chosen": 7.820643901824951, "log_odds_ratio": -0.007156676612794399, "logits/chosen": -0.5849895477294922, "logits/rejected": -0.6761038303375244, "logps/chosen": -0.006085277535021305, "logps/rejected": -1.2077401876449585, "loss": 3.0812, "nll_loss": 0.7695819735527039, "rewards/accuracies": 1.0, "rewards/chosen": -0.0006085278000682592, "rewards/margins": 0.12016548216342926, "rewards/rejected": -0.12077402323484421, "step": 3911 }, { "epoch": 2.7053941908713695, "grad_norm": 5.718795299530029, "learning_rate": 4.0525587828492394e-05, "log_odds_chosen": 7.587345123291016, "log_odds_ratio": -0.0025898406747728586, "logits/chosen": -0.4436808228492737, "logits/rejected": -0.5065404176712036, "logps/chosen": -0.017170462757349014, "logps/rejected": -1.5590879917144775, "loss": 1.8046, "nll_loss": 0.4508890211582184, "rewards/accuracies": 1.0, "rewards/chosen": -0.001717046252451837, "rewards/margins": 0.1541917622089386, "rewards/rejected": -0.15590879321098328, "step": 3912 }, { "epoch": 2.7060857538035963, "grad_norm": 8.069631576538086, "learning_rate": 4.0521745812202246e-05, "log_odds_chosen": 8.478148460388184, "log_odds_ratio": -0.00025979289785027504, "logits/chosen": -0.8399416208267212, "logits/rejected": -0.9217376708984375, "logps/chosen": -0.0006875486578792334, "logps/rejected": -1.0694687366485596, "loss": 3.5088, "nll_loss": 0.8771728873252869, "rewards/accuracies": 1.0, "rewards/chosen": -6.875485996715724e-05, "rewards/margins": 0.1068781167268753, "rewards/rejected": -0.10694687068462372, "step": 3913 }, { "epoch": 2.706777316735823, "grad_norm": 10.46756362915039, "learning_rate": 4.05179037959121e-05, "log_odds_chosen": 8.568737983703613, "log_odds_ratio": -0.046277035027742386, "logits/chosen": -1.0236213207244873, "logits/rejected": -1.0970962047576904, "logps/chosen": -0.014784927479922771, "logps/rejected": -1.8517358303070068, "loss": 2.4229, "nll_loss": 0.6010984182357788, "rewards/accuracies": 1.0, "rewards/chosen": -0.001478492864407599, "rewards/margins": 0.18369510769844055, "rewards/rejected": -0.18517358601093292, "step": 3914 }, { "epoch": 2.70746887966805, "grad_norm": 5.574758529663086, "learning_rate": 4.051406177962195e-05, "log_odds_chosen": 9.207605361938477, "log_odds_ratio": -0.015368283726274967, "logits/chosen": -0.5122767686843872, "logits/rejected": -0.5474386811256409, "logps/chosen": -0.00530365202575922, "logps/rejected": -1.8214813470840454, "loss": 1.4181, "nll_loss": 0.3529995381832123, "rewards/accuracies": 1.0, "rewards/chosen": -0.0005303652142174542, "rewards/margins": 0.18161778151988983, "rewards/rejected": -0.18214815855026245, "step": 3915 }, { "epoch": 2.7081604426002768, "grad_norm": 7.1005940437316895, "learning_rate": 4.05102197633318e-05, "log_odds_chosen": 7.656611919403076, "log_odds_ratio": -0.05239873006939888, "logits/chosen": -0.8261842131614685, "logits/rejected": -0.8624880313873291, "logps/chosen": -0.052322208881378174, "logps/rejected": -1.5566890239715576, "loss": 2.0033, "nll_loss": 0.495580792427063, "rewards/accuracies": 1.0, "rewards/chosen": -0.005232220981270075, "rewards/margins": 0.15043668448925018, "rewards/rejected": -0.15566891431808472, "step": 3916 }, { "epoch": 2.7088520055325036, "grad_norm": 9.42386531829834, "learning_rate": 4.050637774704165e-05, "log_odds_chosen": 7.031638145446777, "log_odds_ratio": -0.0677785649895668, "logits/chosen": -0.798640251159668, "logits/rejected": -0.8351523280143738, "logps/chosen": -0.04338229447603226, "logps/rejected": -1.8607432842254639, "loss": 2.8671, "nll_loss": 0.7099849581718445, "rewards/accuracies": 1.0, "rewards/chosen": -0.004338229540735483, "rewards/margins": 0.18173609673976898, "rewards/rejected": -0.18607433140277863, "step": 3917 }, { "epoch": 2.7095435684647304, "grad_norm": 6.6379008293151855, "learning_rate": 4.05025357307515e-05, "log_odds_chosen": 7.005310535430908, "log_odds_ratio": -0.09212741255760193, "logits/chosen": -0.563160240650177, "logits/rejected": -0.5820728540420532, "logps/chosen": -0.02856297791004181, "logps/rejected": -1.6477155685424805, "loss": 1.4233, "nll_loss": 0.346622496843338, "rewards/accuracies": 1.0, "rewards/chosen": -0.0028562978841364384, "rewards/margins": 0.16191525757312775, "rewards/rejected": -0.16477157175540924, "step": 3918 }, { "epoch": 2.7102351313969573, "grad_norm": 9.909972190856934, "learning_rate": 4.049869371446135e-05, "log_odds_chosen": 7.453892707824707, "log_odds_ratio": -0.039632659405469894, "logits/chosen": -0.8415449857711792, "logits/rejected": -0.8661006689071655, "logps/chosen": -0.022686485201120377, "logps/rejected": -1.4245719909667969, "loss": 2.5347, "nll_loss": 0.629721999168396, "rewards/accuracies": 1.0, "rewards/chosen": -0.0022686487063765526, "rewards/margins": 0.1401885449886322, "rewards/rejected": -0.14245720207691193, "step": 3919 }, { "epoch": 2.710926694329184, "grad_norm": 14.87858772277832, "learning_rate": 4.0494851698171206e-05, "log_odds_chosen": 6.864459991455078, "log_odds_ratio": -0.20086896419525146, "logits/chosen": -0.7582334280014038, "logits/rejected": -0.7744381427764893, "logps/chosen": -0.0741969421505928, "logps/rejected": -1.1814193725585938, "loss": 3.4825, "nll_loss": 0.8505353927612305, "rewards/accuracies": 0.875, "rewards/chosen": -0.00741969421505928, "rewards/margins": 0.11072224378585815, "rewards/rejected": -0.11814194172620773, "step": 3920 }, { "epoch": 2.711618257261411, "grad_norm": 7.602975845336914, "learning_rate": 4.049100968188105e-05, "log_odds_chosen": 9.197195053100586, "log_odds_ratio": -0.0003866076876875013, "logits/chosen": -0.5975632667541504, "logits/rejected": -0.6337493658065796, "logps/chosen": -0.003041280433535576, "logps/rejected": -1.9324337244033813, "loss": 1.2749, "nll_loss": 0.31867676973342896, "rewards/accuracies": 1.0, "rewards/chosen": -0.00030412807245738804, "rewards/margins": 0.19293925166130066, "rewards/rejected": -0.1932433843612671, "step": 3921 }, { "epoch": 2.7123098201936378, "grad_norm": 9.588038444519043, "learning_rate": 4.0487167665590905e-05, "log_odds_chosen": 8.537476539611816, "log_odds_ratio": -0.22845543920993805, "logits/chosen": -0.618996262550354, "logits/rejected": -0.6214995384216309, "logps/chosen": -0.03453611209988594, "logps/rejected": -1.6125141382217407, "loss": 1.9046, "nll_loss": 0.45329976081848145, "rewards/accuracies": 0.875, "rewards/chosen": -0.003453611396253109, "rewards/margins": 0.15779779851436615, "rewards/rejected": -0.16125141084194183, "step": 3922 }, { "epoch": 2.7130013831258646, "grad_norm": 10.34792423248291, "learning_rate": 4.048332564930076e-05, "log_odds_chosen": 8.786086082458496, "log_odds_ratio": -0.035590723156929016, "logits/chosen": -0.7593101263046265, "logits/rejected": -0.7925082445144653, "logps/chosen": -0.012174851261079311, "logps/rejected": -2.1274166107177734, "loss": 2.3948, "nll_loss": 0.595142662525177, "rewards/accuracies": 1.0, "rewards/chosen": -0.0012174851726740599, "rewards/margins": 0.21152418851852417, "rewards/rejected": -0.2127416729927063, "step": 3923 }, { "epoch": 2.7136929460580914, "grad_norm": 8.583378791809082, "learning_rate": 4.047948363301061e-05, "log_odds_chosen": 8.819097518920898, "log_odds_ratio": -0.004179012030363083, "logits/chosen": -0.5776971578598022, "logits/rejected": -0.5667839050292969, "logps/chosen": -0.003491302952170372, "logps/rejected": -1.191100835800171, "loss": 2.0645, "nll_loss": 0.5156947374343872, "rewards/accuracies": 1.0, "rewards/chosen": -0.0003491303068585694, "rewards/margins": 0.11876094341278076, "rewards/rejected": -0.11911007761955261, "step": 3924 }, { "epoch": 2.7143845089903182, "grad_norm": 13.72464656829834, "learning_rate": 4.0475641616720455e-05, "log_odds_chosen": 7.309238910675049, "log_odds_ratio": -0.14363299310207367, "logits/chosen": -0.5639284253120422, "logits/rejected": -0.6585797667503357, "logps/chosen": -0.05896759405732155, "logps/rejected": -1.1728018522262573, "loss": 2.576, "nll_loss": 0.6296295523643494, "rewards/accuracies": 0.875, "rewards/chosen": -0.005896759685128927, "rewards/margins": 0.11138343065977097, "rewards/rejected": -0.11728018522262573, "step": 3925 }, { "epoch": 2.715076071922545, "grad_norm": 8.657648086547852, "learning_rate": 4.047179960043031e-05, "log_odds_chosen": 7.906722545623779, "log_odds_ratio": -0.03158888593316078, "logits/chosen": -0.5023703575134277, "logits/rejected": -0.5678955912590027, "logps/chosen": -0.03969509154558182, "logps/rejected": -1.6585890054702759, "loss": 2.9702, "nll_loss": 0.7393918633460999, "rewards/accuracies": 1.0, "rewards/chosen": -0.003969509620219469, "rewards/margins": 0.16188938915729523, "rewards/rejected": -0.1658589094877243, "step": 3926 }, { "epoch": 2.715767634854772, "grad_norm": 11.101727485656738, "learning_rate": 4.046795758414016e-05, "log_odds_chosen": 8.115633010864258, "log_odds_ratio": -0.01802477240562439, "logits/chosen": -0.6598634719848633, "logits/rejected": -0.6970669627189636, "logps/chosen": -0.029014674946665764, "logps/rejected": -1.402409553527832, "loss": 2.0918, "nll_loss": 0.5211363434791565, "rewards/accuracies": 1.0, "rewards/chosen": -0.00290146772749722, "rewards/margins": 0.1373395025730133, "rewards/rejected": -0.14024096727371216, "step": 3927 }, { "epoch": 2.7164591977869987, "grad_norm": 5.158077239990234, "learning_rate": 4.0464115567850006e-05, "log_odds_chosen": 8.50020980834961, "log_odds_ratio": -0.0006952831172384322, "logits/chosen": -0.3887978792190552, "logits/rejected": -0.3988155126571655, "logps/chosen": -0.00446537509560585, "logps/rejected": -1.4568560123443604, "loss": 1.2471, "nll_loss": 0.31169915199279785, "rewards/accuracies": 1.0, "rewards/chosen": -0.000446537509560585, "rewards/margins": 0.14523907005786896, "rewards/rejected": -0.1456855982542038, "step": 3928 }, { "epoch": 2.7171507607192256, "grad_norm": 10.49365234375, "learning_rate": 4.0460273551559865e-05, "log_odds_chosen": 7.3715620040893555, "log_odds_ratio": -0.10738270729780197, "logits/chosen": -0.6264104843139648, "logits/rejected": -0.6489126086235046, "logps/chosen": -0.03647862747311592, "logps/rejected": -1.6284416913986206, "loss": 2.7679, "nll_loss": 0.6812424063682556, "rewards/accuracies": 0.875, "rewards/chosen": -0.0036478627007454634, "rewards/margins": 0.15919630229473114, "rewards/rejected": -0.16284418106079102, "step": 3929 }, { "epoch": 2.7178423236514524, "grad_norm": 3.889863967895508, "learning_rate": 4.045643153526971e-05, "log_odds_chosen": 6.721612930297852, "log_odds_ratio": -0.034767501056194305, "logits/chosen": -0.447994589805603, "logits/rejected": -0.5033765435218811, "logps/chosen": -0.05682168900966644, "logps/rejected": -1.3589305877685547, "loss": 2.0974, "nll_loss": 0.5208672881126404, "rewards/accuracies": 1.0, "rewards/chosen": -0.005682168994098902, "rewards/margins": 0.13021089136600494, "rewards/rejected": -0.1358930617570877, "step": 3930 }, { "epoch": 2.7185338865836792, "grad_norm": 7.74894905090332, "learning_rate": 4.045258951897956e-05, "log_odds_chosen": 8.092951774597168, "log_odds_ratio": -0.07686490565538406, "logits/chosen": -0.49599015712738037, "logits/rejected": -0.5249794721603394, "logps/chosen": -0.01426799688488245, "logps/rejected": -1.4215922355651855, "loss": 1.975, "nll_loss": 0.486062228679657, "rewards/accuracies": 1.0, "rewards/chosen": -0.001426799688488245, "rewards/margins": 0.14073242247104645, "rewards/rejected": -0.14215922355651855, "step": 3931 }, { "epoch": 2.719225449515906, "grad_norm": 6.715245246887207, "learning_rate": 4.0448747502689415e-05, "log_odds_chosen": 7.388888359069824, "log_odds_ratio": -0.0016329422360286117, "logits/chosen": -0.5692066550254822, "logits/rejected": -0.6565683484077454, "logps/chosen": -0.003036600537598133, "logps/rejected": -1.260680079460144, "loss": 2.2788, "nll_loss": 0.569549024105072, "rewards/accuracies": 1.0, "rewards/chosen": -0.0003036600537598133, "rewards/margins": 0.12576434016227722, "rewards/rejected": -0.12606801092624664, "step": 3932 }, { "epoch": 2.719917012448133, "grad_norm": 7.658824920654297, "learning_rate": 4.044490548639927e-05, "log_odds_chosen": 9.301992416381836, "log_odds_ratio": -0.0002013940247707069, "logits/chosen": -0.6492097973823547, "logits/rejected": -0.7609802484512329, "logps/chosen": -0.00040117939352057874, "logps/rejected": -1.10262131690979, "loss": 2.1947, "nll_loss": 0.5486506223678589, "rewards/accuracies": 1.0, "rewards/chosen": -4.011793862446211e-05, "rewards/margins": 0.11022201180458069, "rewards/rejected": -0.11026212573051453, "step": 3933 }, { "epoch": 2.7206085753803597, "grad_norm": 8.80551528930664, "learning_rate": 4.0441063470109114e-05, "log_odds_chosen": 7.008855819702148, "log_odds_ratio": -0.08213210105895996, "logits/chosen": -0.6002588868141174, "logits/rejected": -0.6500236988067627, "logps/chosen": -0.027482986450195312, "logps/rejected": -1.412733554840088, "loss": 2.0206, "nll_loss": 0.49693989753723145, "rewards/accuracies": 1.0, "rewards/chosen": -0.00274829869158566, "rewards/margins": 0.13852505385875702, "rewards/rejected": -0.1412733495235443, "step": 3934 }, { "epoch": 2.7213001383125865, "grad_norm": 10.194206237792969, "learning_rate": 4.0437221453818966e-05, "log_odds_chosen": 7.7847771644592285, "log_odds_ratio": -0.07691100239753723, "logits/chosen": -0.17872712016105652, "logits/rejected": -0.23985889554023743, "logps/chosen": -0.01729346066713333, "logps/rejected": -1.4045839309692383, "loss": 2.0997, "nll_loss": 0.5172348022460938, "rewards/accuracies": 1.0, "rewards/chosen": -0.0017293461132794619, "rewards/margins": 0.1387290358543396, "rewards/rejected": -0.1404583901166916, "step": 3935 }, { "epoch": 2.7219917012448134, "grad_norm": 10.218342781066895, "learning_rate": 4.043337943752882e-05, "log_odds_chosen": 8.4993314743042, "log_odds_ratio": -0.002254007151350379, "logits/chosen": -0.8403540253639221, "logits/rejected": -0.8589710593223572, "logps/chosen": -0.004420984070748091, "logps/rejected": -1.4414920806884766, "loss": 2.1637, "nll_loss": 0.5407010316848755, "rewards/accuracies": 1.0, "rewards/chosen": -0.0004420984478201717, "rewards/margins": 0.14370711147785187, "rewards/rejected": -0.14414921402931213, "step": 3936 }, { "epoch": 2.72268326417704, "grad_norm": 6.502514362335205, "learning_rate": 4.0429537421238664e-05, "log_odds_chosen": 6.624269485473633, "log_odds_ratio": -0.03526769578456879, "logits/chosen": -0.283847838640213, "logits/rejected": -0.3334774076938629, "logps/chosen": -0.017359206452965736, "logps/rejected": -1.1060389280319214, "loss": 1.9313, "nll_loss": 0.4792954921722412, "rewards/accuracies": 1.0, "rewards/chosen": -0.0017359207849949598, "rewards/margins": 0.10886797308921814, "rewards/rejected": -0.11060389131307602, "step": 3937 }, { "epoch": 2.723374827109267, "grad_norm": 8.666592597961426, "learning_rate": 4.042569540494852e-05, "log_odds_chosen": 8.527840614318848, "log_odds_ratio": -0.06644105166196823, "logits/chosen": -0.4190026521682739, "logits/rejected": -0.44489941000938416, "logps/chosen": -0.012680643238127232, "logps/rejected": -1.3973329067230225, "loss": 2.6437, "nll_loss": 0.6542931795120239, "rewards/accuracies": 1.0, "rewards/chosen": -0.00126806425396353, "rewards/margins": 0.13846522569656372, "rewards/rejected": -0.13973329961299896, "step": 3938 }, { "epoch": 2.724066390041494, "grad_norm": 5.463160991668701, "learning_rate": 4.042185338865837e-05, "log_odds_chosen": 7.737649917602539, "log_odds_ratio": -0.08719510585069656, "logits/chosen": -0.5029735565185547, "logits/rejected": -0.5753784775733948, "logps/chosen": -0.015753207728266716, "logps/rejected": -1.1266005039215088, "loss": 1.5785, "nll_loss": 0.38590627908706665, "rewards/accuracies": 1.0, "rewards/chosen": -0.0015753208426758647, "rewards/margins": 0.11108473688364029, "rewards/rejected": -0.11266005784273148, "step": 3939 }, { "epoch": 2.7247579529737207, "grad_norm": 12.32331657409668, "learning_rate": 4.041801137236822e-05, "log_odds_chosen": 6.248561382293701, "log_odds_ratio": -0.05965609475970268, "logits/chosen": -0.560837984085083, "logits/rejected": -0.6607064008712769, "logps/chosen": -0.04490054026246071, "logps/rejected": -1.6605818271636963, "loss": 2.3404, "nll_loss": 0.5791374444961548, "rewards/accuracies": 1.0, "rewards/chosen": -0.0044900537468492985, "rewards/margins": 0.16156813502311707, "rewards/rejected": -0.16605818271636963, "step": 3940 }, { "epoch": 2.7254495159059475, "grad_norm": 11.115545272827148, "learning_rate": 4.0414169356078074e-05, "log_odds_chosen": 7.029140949249268, "log_odds_ratio": -0.01237096730619669, "logits/chosen": -0.44518017768859863, "logits/rejected": -0.4358668327331543, "logps/chosen": -0.018776437267661095, "logps/rejected": -1.5602182149887085, "loss": 2.9906, "nll_loss": 0.7464084625244141, "rewards/accuracies": 1.0, "rewards/chosen": -0.0018776437500491738, "rewards/margins": 0.15414418280124664, "rewards/rejected": -0.1560218185186386, "step": 3941 }, { "epoch": 2.7261410788381744, "grad_norm": 14.741984367370605, "learning_rate": 4.0410327339787926e-05, "log_odds_chosen": 9.35867691040039, "log_odds_ratio": -0.0004632086493074894, "logits/chosen": -0.8224536776542664, "logits/rejected": -0.8809283971786499, "logps/chosen": -0.0005872789770364761, "logps/rejected": -1.5677838325500488, "loss": 2.3178, "nll_loss": 0.5793916583061218, "rewards/accuracies": 1.0, "rewards/chosen": -5.872789188288152e-05, "rewards/margins": 0.1567196547985077, "rewards/rejected": -0.15677838027477264, "step": 3942 }, { "epoch": 2.726832641770401, "grad_norm": 9.652228355407715, "learning_rate": 4.040648532349777e-05, "log_odds_chosen": 7.889554500579834, "log_odds_ratio": -0.018208064138889313, "logits/chosen": -0.488161563873291, "logits/rejected": -0.5011002421379089, "logps/chosen": -0.06083019822835922, "logps/rejected": -2.197711229324341, "loss": 2.2119, "nll_loss": 0.5511464476585388, "rewards/accuracies": 1.0, "rewards/chosen": -0.006083020009100437, "rewards/margins": 0.21368810534477234, "rewards/rejected": -0.2197711169719696, "step": 3943 }, { "epoch": 2.727524204702628, "grad_norm": 10.315166473388672, "learning_rate": 4.0402643307207624e-05, "log_odds_chosen": 6.549238204956055, "log_odds_ratio": -0.12524710595607758, "logits/chosen": -0.8253248929977417, "logits/rejected": -0.8640207648277283, "logps/chosen": -0.04595312848687172, "logps/rejected": -1.4616085290908813, "loss": 2.8316, "nll_loss": 0.6953847408294678, "rewards/accuracies": 0.875, "rewards/chosen": -0.0045953127555549145, "rewards/margins": 0.14156554639339447, "rewards/rejected": -0.14616085588932037, "step": 3944 }, { "epoch": 2.728215767634855, "grad_norm": 8.87439250946045, "learning_rate": 4.039880129091748e-05, "log_odds_chosen": 7.761504173278809, "log_odds_ratio": -0.10865526646375656, "logits/chosen": -0.005162131041288376, "logits/rejected": -0.15493258833885193, "logps/chosen": -0.08741172403097153, "logps/rejected": -1.810775876045227, "loss": 1.7261, "nll_loss": 0.42064934968948364, "rewards/accuracies": 0.875, "rewards/chosen": -0.008741172961890697, "rewards/margins": 0.1723364144563675, "rewards/rejected": -0.18107758462429047, "step": 3945 }, { "epoch": 2.7289073305670817, "grad_norm": 9.416640281677246, "learning_rate": 4.039495927462732e-05, "log_odds_chosen": 6.959840297698975, "log_odds_ratio": -0.04584544152021408, "logits/chosen": -0.35366320610046387, "logits/rejected": -0.4630590081214905, "logps/chosen": -0.03433135151863098, "logps/rejected": -1.7933472394943237, "loss": 1.7769, "nll_loss": 0.4396374225616455, "rewards/accuracies": 1.0, "rewards/chosen": -0.0034331348724663258, "rewards/margins": 0.17590157687664032, "rewards/rejected": -0.17933472990989685, "step": 3946 }, { "epoch": 2.7295988934993085, "grad_norm": 10.85071849822998, "learning_rate": 4.039111725833718e-05, "log_odds_chosen": 9.792091369628906, "log_odds_ratio": -0.0015566610964015126, "logits/chosen": -0.450967013835907, "logits/rejected": -0.5745599269866943, "logps/chosen": -0.006072147749364376, "logps/rejected": -2.566922187805176, "loss": 2.017, "nll_loss": 0.5040937066078186, "rewards/accuracies": 1.0, "rewards/chosen": -0.0006072148098610342, "rewards/margins": 0.25608503818511963, "rewards/rejected": -0.25669223070144653, "step": 3947 }, { "epoch": 2.7302904564315353, "grad_norm": 12.596390724182129, "learning_rate": 4.038727524204703e-05, "log_odds_chosen": 9.460590362548828, "log_odds_ratio": -0.0002683461061678827, "logits/chosen": -0.580082893371582, "logits/rejected": -0.689507782459259, "logps/chosen": -0.000847513903863728, "logps/rejected": -1.7451519966125488, "loss": 2.1106, "nll_loss": 0.5276321172714233, "rewards/accuracies": 1.0, "rewards/chosen": -8.475138747598976e-05, "rewards/margins": 0.1744304597377777, "rewards/rejected": -0.17451520264148712, "step": 3948 }, { "epoch": 2.730982019363762, "grad_norm": 7.90508508682251, "learning_rate": 4.038343322575688e-05, "log_odds_chosen": 9.44764232635498, "log_odds_ratio": -0.0003510116948746145, "logits/chosen": -0.6924615502357483, "logits/rejected": -0.6415228843688965, "logps/chosen": -0.0007907212129794061, "logps/rejected": -1.8190643787384033, "loss": 2.1582, "nll_loss": 0.5395174622535706, "rewards/accuracies": 1.0, "rewards/chosen": -7.907212420832366e-05, "rewards/margins": 0.1818273663520813, "rewards/rejected": -0.18190644681453705, "step": 3949 }, { "epoch": 2.731673582295989, "grad_norm": 8.298971176147461, "learning_rate": 4.037959120946673e-05, "log_odds_chosen": 7.152010440826416, "log_odds_ratio": -0.0063316673040390015, "logits/chosen": -0.6834003925323486, "logits/rejected": -0.7061575055122375, "logps/chosen": -0.012138995341956615, "logps/rejected": -1.122275710105896, "loss": 2.9132, "nll_loss": 0.7276560664176941, "rewards/accuracies": 1.0, "rewards/chosen": -0.0012138995807617903, "rewards/margins": 0.11101366579532623, "rewards/rejected": -0.11222756654024124, "step": 3950 }, { "epoch": 2.732365145228216, "grad_norm": 11.298041343688965, "learning_rate": 4.0375749193176585e-05, "log_odds_chosen": 7.214498519897461, "log_odds_ratio": -0.10135520249605179, "logits/chosen": -0.7198128700256348, "logits/rejected": -0.7529308199882507, "logps/chosen": -0.024711720645427704, "logps/rejected": -1.5879918336868286, "loss": 3.3971, "nll_loss": 0.8391504287719727, "rewards/accuracies": 0.875, "rewards/chosen": -0.002471171785145998, "rewards/margins": 0.156328022480011, "rewards/rejected": -0.1587992012500763, "step": 3951 }, { "epoch": 2.7330567081604427, "grad_norm": 5.922629356384277, "learning_rate": 4.037190717688643e-05, "log_odds_chosen": 8.451513290405273, "log_odds_ratio": -0.06844214349985123, "logits/chosen": -0.566558837890625, "logits/rejected": -0.593731164932251, "logps/chosen": -0.020725499838590622, "logps/rejected": -1.4322352409362793, "loss": 2.1264, "nll_loss": 0.5247609615325928, "rewards/accuracies": 1.0, "rewards/chosen": -0.0020725501235574484, "rewards/margins": 0.14115098118782043, "rewards/rejected": -0.14322352409362793, "step": 3952 }, { "epoch": 2.7337482710926695, "grad_norm": 8.351309776306152, "learning_rate": 4.036806516059628e-05, "log_odds_chosen": 6.58920955657959, "log_odds_ratio": -0.1379947066307068, "logits/chosen": -0.47179096937179565, "logits/rejected": -0.447683185338974, "logps/chosen": -0.039678920060396194, "logps/rejected": -1.048729419708252, "loss": 2.3215, "nll_loss": 0.5665820837020874, "rewards/accuracies": 0.875, "rewards/chosen": -0.003967892378568649, "rewards/margins": 0.10090505331754684, "rewards/rejected": -0.1048729419708252, "step": 3953 }, { "epoch": 2.7344398340248963, "grad_norm": 9.427661895751953, "learning_rate": 4.0364223144306135e-05, "log_odds_chosen": 9.888446807861328, "log_odds_ratio": -7.997354987310246e-05, "logits/chosen": -0.5060456991195679, "logits/rejected": -0.583258867263794, "logps/chosen": -0.00034985889215022326, "logps/rejected": -1.884965419769287, "loss": 1.8423, "nll_loss": 0.46057385206222534, "rewards/accuracies": 1.0, "rewards/chosen": -3.498589285300113e-05, "rewards/margins": 0.1884615570306778, "rewards/rejected": -0.18849653005599976, "step": 3954 }, { "epoch": 2.735131396957123, "grad_norm": 10.099342346191406, "learning_rate": 4.036038112801598e-05, "log_odds_chosen": 5.796465873718262, "log_odds_ratio": -0.08623596280813217, "logits/chosen": -0.9473574161529541, "logits/rejected": -0.9406151175498962, "logps/chosen": -0.010863440111279488, "logps/rejected": -0.6920241117477417, "loss": 2.4653, "nll_loss": 0.6077094078063965, "rewards/accuracies": 1.0, "rewards/chosen": -0.0010863440111279488, "rewards/margins": 0.06811606884002686, "rewards/rejected": -0.06920240819454193, "step": 3955 }, { "epoch": 2.73582295988935, "grad_norm": 9.084993362426758, "learning_rate": 4.0356539111725833e-05, "log_odds_chosen": 8.409929275512695, "log_odds_ratio": -0.05629992485046387, "logits/chosen": -0.634174644947052, "logits/rejected": -0.6410291194915771, "logps/chosen": -0.009794793091714382, "logps/rejected": -1.433899164199829, "loss": 2.3453, "nll_loss": 0.5806872248649597, "rewards/accuracies": 1.0, "rewards/chosen": -0.0009794794023036957, "rewards/margins": 0.14241044223308563, "rewards/rejected": -0.14338991045951843, "step": 3956 }, { "epoch": 2.736514522821577, "grad_norm": 8.876943588256836, "learning_rate": 4.0352697095435686e-05, "log_odds_chosen": 8.132782936096191, "log_odds_ratio": -0.0022850818932056427, "logits/chosen": -0.9935863018035889, "logits/rejected": -0.968889057636261, "logps/chosen": -0.09043926745653152, "logps/rejected": -1.6776096820831299, "loss": 2.4984, "nll_loss": 0.6243612766265869, "rewards/accuracies": 1.0, "rewards/chosen": -0.009043926373124123, "rewards/margins": 0.15871703624725342, "rewards/rejected": -0.167760968208313, "step": 3957 }, { "epoch": 2.7372060857538036, "grad_norm": 9.663110733032227, "learning_rate": 4.034885507914554e-05, "log_odds_chosen": 9.72215747833252, "log_odds_ratio": -0.00014341410133056343, "logits/chosen": -0.6471495628356934, "logits/rejected": -0.607186496257782, "logps/chosen": -0.0002497847599443048, "logps/rejected": -1.2455099821090698, "loss": 2.172, "nll_loss": 0.5429768562316895, "rewards/accuracies": 1.0, "rewards/chosen": -2.497847526683472e-05, "rewards/margins": 0.1245260089635849, "rewards/rejected": -0.12455099821090698, "step": 3958 }, { "epoch": 2.7378976486860305, "grad_norm": 11.114601135253906, "learning_rate": 4.0345013062855384e-05, "log_odds_chosen": 9.111005783081055, "log_odds_ratio": -0.0003828184853773564, "logits/chosen": -0.6637808084487915, "logits/rejected": -0.6742137670516968, "logps/chosen": -0.01722045987844467, "logps/rejected": -1.8206391334533691, "loss": 2.439, "nll_loss": 0.6097138524055481, "rewards/accuracies": 1.0, "rewards/chosen": -0.0017220460576936603, "rewards/margins": 0.18034186959266663, "rewards/rejected": -0.18206390738487244, "step": 3959 }, { "epoch": 2.7385892116182573, "grad_norm": 6.329898357391357, "learning_rate": 4.034117104656524e-05, "log_odds_chosen": 6.555821418762207, "log_odds_ratio": -0.016997672617435455, "logits/chosen": -0.5201515555381775, "logits/rejected": -0.6183047294616699, "logps/chosen": -0.030940266326069832, "logps/rejected": -1.0968849658966064, "loss": 2.1955, "nll_loss": 0.5471838116645813, "rewards/accuracies": 1.0, "rewards/chosen": -0.003094026818871498, "rewards/margins": 0.10659447312355042, "rewards/rejected": -0.10968849062919617, "step": 3960 }, { "epoch": 2.739280774550484, "grad_norm": 21.0051326751709, "learning_rate": 4.033732903027509e-05, "log_odds_chosen": 7.625939846038818, "log_odds_ratio": -0.05772934854030609, "logits/chosen": -0.830172598361969, "logits/rejected": -0.8871779441833496, "logps/chosen": -0.009763781912624836, "logps/rejected": -1.484619379043579, "loss": 2.1533, "nll_loss": 0.5325421094894409, "rewards/accuracies": 1.0, "rewards/chosen": -0.000976378214545548, "rewards/margins": 0.14748555421829224, "rewards/rejected": -0.1484619379043579, "step": 3961 }, { "epoch": 2.739972337482711, "grad_norm": 4.871427536010742, "learning_rate": 4.033348701398494e-05, "log_odds_chosen": 8.589265823364258, "log_odds_ratio": -0.025390522554516792, "logits/chosen": -0.6745571494102478, "logits/rejected": -0.836500883102417, "logps/chosen": -0.015709497034549713, "logps/rejected": -1.4090120792388916, "loss": 1.9426, "nll_loss": 0.48311781883239746, "rewards/accuracies": 1.0, "rewards/chosen": -0.001570949680171907, "rewards/margins": 0.13933026790618896, "rewards/rejected": -0.14090119302272797, "step": 3962 }, { "epoch": 2.740663900414938, "grad_norm": 9.784700393676758, "learning_rate": 4.0329644997694794e-05, "log_odds_chosen": 7.266717433929443, "log_odds_ratio": -0.24722522497177124, "logits/chosen": -0.6542754769325256, "logits/rejected": -0.7426164150238037, "logps/chosen": -0.050657421350479126, "logps/rejected": -0.9146636128425598, "loss": 2.0315, "nll_loss": 0.48315340280532837, "rewards/accuracies": 0.875, "rewards/chosen": -0.005065742414444685, "rewards/margins": 0.08640061318874359, "rewards/rejected": -0.09146635979413986, "step": 3963 }, { "epoch": 2.7413554633471646, "grad_norm": 6.475007057189941, "learning_rate": 4.032580298140464e-05, "log_odds_chosen": 7.788166046142578, "log_odds_ratio": -0.054864123463630676, "logits/chosen": -0.7246919870376587, "logits/rejected": -0.7207714915275574, "logps/chosen": -0.028761431574821472, "logps/rejected": -1.7532126903533936, "loss": 1.7134, "nll_loss": 0.42286068201065063, "rewards/accuracies": 1.0, "rewards/chosen": -0.002876143204048276, "rewards/margins": 0.1724451184272766, "rewards/rejected": -0.1753212809562683, "step": 3964 }, { "epoch": 2.7420470262793915, "grad_norm": 12.352106094360352, "learning_rate": 4.032196096511449e-05, "log_odds_chosen": 6.385231971740723, "log_odds_ratio": -0.1378117799758911, "logits/chosen": -0.6535122990608215, "logits/rejected": -0.6418668031692505, "logps/chosen": -0.03800104185938835, "logps/rejected": -0.8482541441917419, "loss": 2.5641, "nll_loss": 0.6272392272949219, "rewards/accuracies": 0.875, "rewards/chosen": -0.0038001039065420628, "rewards/margins": 0.08102530986070633, "rewards/rejected": -0.08482541143894196, "step": 3965 }, { "epoch": 2.7427385892116183, "grad_norm": 6.539783954620361, "learning_rate": 4.0318118948824344e-05, "log_odds_chosen": 9.533210754394531, "log_odds_ratio": -0.0005043463315814734, "logits/chosen": -0.9464341402053833, "logits/rejected": -0.9169366955757141, "logps/chosen": -0.00027665687957778573, "logps/rejected": -1.6659789085388184, "loss": 2.2158, "nll_loss": 0.5538901090621948, "rewards/accuracies": 1.0, "rewards/chosen": -2.7665688321576454e-05, "rewards/margins": 0.16657023131847382, "rewards/rejected": -0.1665979027748108, "step": 3966 }, { "epoch": 2.743430152143845, "grad_norm": 11.467350006103516, "learning_rate": 4.03142769325342e-05, "log_odds_chosen": 6.552248477935791, "log_odds_ratio": -0.2534642815589905, "logits/chosen": -0.7549129128456116, "logits/rejected": -0.827406644821167, "logps/chosen": -0.03583712875843048, "logps/rejected": -1.2653224468231201, "loss": 2.5423, "nll_loss": 0.6102339625358582, "rewards/accuracies": 0.875, "rewards/chosen": -0.0035837129689753056, "rewards/margins": 0.1229485347867012, "rewards/rejected": -0.12653225660324097, "step": 3967 }, { "epoch": 2.744121715076072, "grad_norm": 6.8532938957214355, "learning_rate": 4.031043491624404e-05, "log_odds_chosen": 8.665301322937012, "log_odds_ratio": -0.0003807971370406449, "logits/chosen": -0.9270603656768799, "logits/rejected": -0.9942599534988403, "logps/chosen": -0.016690397635102272, "logps/rejected": -2.1862082481384277, "loss": 2.1557, "nll_loss": 0.5388835668563843, "rewards/accuracies": 1.0, "rewards/chosen": -0.0016690397169440985, "rewards/margins": 0.21695180237293243, "rewards/rejected": -0.21862083673477173, "step": 3968 }, { "epoch": 2.7448132780082988, "grad_norm": 10.167153358459473, "learning_rate": 4.03065928999539e-05, "log_odds_chosen": 8.84040355682373, "log_odds_ratio": -0.00025179842486977577, "logits/chosen": -0.7059938907623291, "logits/rejected": -0.9238421320915222, "logps/chosen": -0.0011752690188586712, "logps/rejected": -1.904888391494751, "loss": 3.019, "nll_loss": 0.7547341585159302, "rewards/accuracies": 1.0, "rewards/chosen": -0.0001175269135273993, "rewards/margins": 0.1903713047504425, "rewards/rejected": -0.19048884510993958, "step": 3969 }, { "epoch": 2.7455048409405256, "grad_norm": 4.787475109100342, "learning_rate": 4.030275088366375e-05, "log_odds_chosen": 8.128543853759766, "log_odds_ratio": -0.09393294155597687, "logits/chosen": -0.33512839674949646, "logits/rejected": -0.311124712228775, "logps/chosen": -0.03485126420855522, "logps/rejected": -1.4904800653457642, "loss": 1.7159, "nll_loss": 0.41957443952560425, "rewards/accuracies": 0.875, "rewards/chosen": -0.003485126420855522, "rewards/margins": 0.14556287229061127, "rewards/rejected": -0.14904801547527313, "step": 3970 }, { "epoch": 2.7461964038727524, "grad_norm": 6.264207363128662, "learning_rate": 4.02989088673736e-05, "log_odds_chosen": 8.825662612915039, "log_odds_ratio": -0.000746442936360836, "logits/chosen": -0.7133558988571167, "logits/rejected": -0.7757239937782288, "logps/chosen": -0.004503064788877964, "logps/rejected": -1.8890973329544067, "loss": 2.119, "nll_loss": 0.5296758413314819, "rewards/accuracies": 1.0, "rewards/chosen": -0.00045030651381239295, "rewards/margins": 0.18845942616462708, "rewards/rejected": -0.18890973925590515, "step": 3971 }, { "epoch": 2.7468879668049793, "grad_norm": 7.407824516296387, "learning_rate": 4.029506685108345e-05, "log_odds_chosen": 7.501564979553223, "log_odds_ratio": -0.0042665572836995125, "logits/chosen": -0.771816611289978, "logits/rejected": -0.8200007081031799, "logps/chosen": -0.002163660479709506, "logps/rejected": -1.0099034309387207, "loss": 2.718, "nll_loss": 0.6790682077407837, "rewards/accuracies": 1.0, "rewards/chosen": -0.00021636603923980147, "rewards/margins": 0.10077398270368576, "rewards/rejected": -0.10099035501480103, "step": 3972 }, { "epoch": 2.747579529737206, "grad_norm": 7.049964904785156, "learning_rate": 4.02912248347933e-05, "log_odds_chosen": 8.63952922821045, "log_odds_ratio": -0.000538341177161783, "logits/chosen": -0.78837651014328, "logits/rejected": -0.7955034971237183, "logps/chosen": -0.000618205638602376, "logps/rejected": -1.2546896934509277, "loss": 2.262, "nll_loss": 0.5654584765434265, "rewards/accuracies": 1.0, "rewards/chosen": -6.182056677062064e-05, "rewards/margins": 0.1254071444272995, "rewards/rejected": -0.12546896934509277, "step": 3973 }, { "epoch": 2.748271092669433, "grad_norm": 10.377960205078125, "learning_rate": 4.028738281850315e-05, "log_odds_chosen": 8.370406150817871, "log_odds_ratio": -0.0035593262873589993, "logits/chosen": -0.9567809104919434, "logits/rejected": -0.9531281590461731, "logps/chosen": -0.01249743066728115, "logps/rejected": -1.7988122701644897, "loss": 2.3203, "nll_loss": 0.5797082781791687, "rewards/accuracies": 1.0, "rewards/chosen": -0.0012497431598603725, "rewards/margins": 0.1786315143108368, "rewards/rejected": -0.1798812448978424, "step": 3974 }, { "epoch": 2.7489626556016598, "grad_norm": 8.847599029541016, "learning_rate": 4.0283540802213e-05, "log_odds_chosen": 8.98076343536377, "log_odds_ratio": -0.0664471983909607, "logits/chosen": -0.6736356616020203, "logits/rejected": -0.7417312264442444, "logps/chosen": -0.012593384832143784, "logps/rejected": -1.628709077835083, "loss": 2.18, "nll_loss": 0.538344144821167, "rewards/accuracies": 1.0, "rewards/chosen": -0.0012593385763466358, "rewards/margins": 0.16161157190799713, "rewards/rejected": -0.16287091374397278, "step": 3975 }, { "epoch": 2.7496542185338866, "grad_norm": 8.94532299041748, "learning_rate": 4.0279698785922855e-05, "log_odds_chosen": 7.802983283996582, "log_odds_ratio": -0.002552380319684744, "logits/chosen": -0.6118870377540588, "logits/rejected": -0.6478884220123291, "logps/chosen": -0.014447561465203762, "logps/rejected": -1.78226900100708, "loss": 2.3816, "nll_loss": 0.5951405167579651, "rewards/accuracies": 1.0, "rewards/chosen": -0.0014447560533881187, "rewards/margins": 0.17678214609622955, "rewards/rejected": -0.17822691798210144, "step": 3976 }, { "epoch": 2.7503457814661134, "grad_norm": 7.9482011795043945, "learning_rate": 4.02758567696327e-05, "log_odds_chosen": 7.3630757331848145, "log_odds_ratio": -0.029140042141079903, "logits/chosen": -0.6187721490859985, "logits/rejected": -0.6251906752586365, "logps/chosen": -0.01222775038331747, "logps/rejected": -1.3199526071548462, "loss": 2.0326, "nll_loss": 0.5052246451377869, "rewards/accuracies": 1.0, "rewards/chosen": -0.001222774968482554, "rewards/margins": 0.13077250123023987, "rewards/rejected": -0.13199526071548462, "step": 3977 }, { "epoch": 2.7510373443983402, "grad_norm": 13.724565505981445, "learning_rate": 4.027201475334256e-05, "log_odds_chosen": 7.626209259033203, "log_odds_ratio": -0.019534602761268616, "logits/chosen": -0.7065523862838745, "logits/rejected": -0.7183932065963745, "logps/chosen": -0.07203464210033417, "logps/rejected": -1.4935412406921387, "loss": 2.5822, "nll_loss": 0.6436068415641785, "rewards/accuracies": 1.0, "rewards/chosen": -0.007203464396297932, "rewards/margins": 0.1421506404876709, "rewards/rejected": -0.14935411512851715, "step": 3978 }, { "epoch": 2.751728907330567, "grad_norm": 8.385231971740723, "learning_rate": 4.0268172737052406e-05, "log_odds_chosen": 8.239843368530273, "log_odds_ratio": -0.006714486517012119, "logits/chosen": -0.6160125136375427, "logits/rejected": -0.6247880458831787, "logps/chosen": -0.03856637701392174, "logps/rejected": -1.549187421798706, "loss": 1.8715, "nll_loss": 0.46721044182777405, "rewards/accuracies": 1.0, "rewards/chosen": -0.00385663821361959, "rewards/margins": 0.15106210112571716, "rewards/rejected": -0.15491873025894165, "step": 3979 }, { "epoch": 2.752420470262794, "grad_norm": 4.186921119689941, "learning_rate": 4.026433072076226e-05, "log_odds_chosen": 7.790356636047363, "log_odds_ratio": -0.018140949308872223, "logits/chosen": -0.3370264172554016, "logits/rejected": -0.31349921226501465, "logps/chosen": -0.03148407116532326, "logps/rejected": -1.4903897047042847, "loss": 1.9644, "nll_loss": 0.48929262161254883, "rewards/accuracies": 1.0, "rewards/chosen": -0.003148407209664583, "rewards/margins": 0.14589056372642517, "rewards/rejected": -0.14903897047042847, "step": 3980 }, { "epoch": 2.7531120331950207, "grad_norm": 16.19760513305664, "learning_rate": 4.026048870447211e-05, "log_odds_chosen": 7.4634857177734375, "log_odds_ratio": -0.3400443494319916, "logits/chosen": -0.22639168798923492, "logits/rejected": -0.19484885036945343, "logps/chosen": -0.03847894072532654, "logps/rejected": -1.5501339435577393, "loss": 1.7273, "nll_loss": 0.39781975746154785, "rewards/accuracies": 0.875, "rewards/chosen": -0.003847894025966525, "rewards/margins": 0.15116551518440247, "rewards/rejected": -0.15501339733600616, "step": 3981 }, { "epoch": 2.7538035961272476, "grad_norm": 6.2771782875061035, "learning_rate": 4.0256646688181956e-05, "log_odds_chosen": 6.931901454925537, "log_odds_ratio": -0.3148242235183716, "logits/chosen": -0.6994768381118774, "logits/rejected": -0.7499065399169922, "logps/chosen": -0.06756354868412018, "logps/rejected": -1.5410547256469727, "loss": 2.0378, "nll_loss": 0.47797098755836487, "rewards/accuracies": 0.875, "rewards/chosen": -0.006756355054676533, "rewards/margins": 0.14734911918640137, "rewards/rejected": -0.15410546958446503, "step": 3982 }, { "epoch": 2.7544951590594744, "grad_norm": 5.253891468048096, "learning_rate": 4.025280467189181e-05, "log_odds_chosen": 8.897655487060547, "log_odds_ratio": -0.0008970237104222178, "logits/chosen": -0.45553719997406006, "logits/rejected": -0.48855888843536377, "logps/chosen": -0.019885070621967316, "logps/rejected": -1.8306454420089722, "loss": 1.6886, "nll_loss": 0.42205068469047546, "rewards/accuracies": 1.0, "rewards/chosen": -0.0019885068759322166, "rewards/margins": 0.1810760498046875, "rewards/rejected": -0.1830645501613617, "step": 3983 }, { "epoch": 2.7551867219917012, "grad_norm": 16.035680770874023, "learning_rate": 4.024896265560166e-05, "log_odds_chosen": 9.053376197814941, "log_odds_ratio": -0.029256589710712433, "logits/chosen": -0.5385884642601013, "logits/rejected": -0.6471099257469177, "logps/chosen": -0.004622517619282007, "logps/rejected": -1.9592335224151611, "loss": 3.2722, "nll_loss": 0.8151220083236694, "rewards/accuracies": 1.0, "rewards/chosen": -0.00046225180267356336, "rewards/margins": 0.19546110928058624, "rewards/rejected": -0.19592337310314178, "step": 3984 }, { "epoch": 2.755878284923928, "grad_norm": 7.4928483963012695, "learning_rate": 4.0245120639311514e-05, "log_odds_chosen": 8.065269470214844, "log_odds_ratio": -0.0013664980651810765, "logits/chosen": -0.3876135051250458, "logits/rejected": -0.4293254315853119, "logps/chosen": -0.030116790905594826, "logps/rejected": -1.8832831382751465, "loss": 1.8511, "nll_loss": 0.4626496732234955, "rewards/accuracies": 1.0, "rewards/chosen": -0.0030116792768239975, "rewards/margins": 0.18531666696071625, "rewards/rejected": -0.1883283257484436, "step": 3985 }, { "epoch": 2.756569847856155, "grad_norm": 14.424047470092773, "learning_rate": 4.024127862302136e-05, "log_odds_chosen": 9.725471496582031, "log_odds_ratio": -0.000976808718405664, "logits/chosen": -0.5688410401344299, "logits/rejected": -0.7274327278137207, "logps/chosen": -0.0016707740724086761, "logps/rejected": -1.8488935232162476, "loss": 2.6804, "nll_loss": 0.6700056195259094, "rewards/accuracies": 1.0, "rewards/chosen": -0.00016707740724086761, "rewards/margins": 0.18472227454185486, "rewards/rejected": -0.18488934636116028, "step": 3986 }, { "epoch": 2.7572614107883817, "grad_norm": 6.92812442779541, "learning_rate": 4.023743660673122e-05, "log_odds_chosen": 8.491493225097656, "log_odds_ratio": -0.07710529118776321, "logits/chosen": -0.5341342687606812, "logits/rejected": -0.561559796333313, "logps/chosen": -0.019994715228676796, "logps/rejected": -1.5566470623016357, "loss": 1.8402, "nll_loss": 0.45234861969947815, "rewards/accuracies": 1.0, "rewards/chosen": -0.0019994715694338083, "rewards/margins": 0.15366524457931519, "rewards/rejected": -0.15566471219062805, "step": 3987 }, { "epoch": 2.7579529737206085, "grad_norm": 8.029306411743164, "learning_rate": 4.0233594590441064e-05, "log_odds_chosen": 8.991018295288086, "log_odds_ratio": -0.00014573686348740011, "logits/chosen": -0.5196717381477356, "logits/rejected": -0.5397700071334839, "logps/chosen": -0.001510739792138338, "logps/rejected": -1.6422498226165771, "loss": 1.666, "nll_loss": 0.41648074984550476, "rewards/accuracies": 1.0, "rewards/chosen": -0.00015107399667613208, "rewards/margins": 0.1640739142894745, "rewards/rejected": -0.16422498226165771, "step": 3988 }, { "epoch": 2.7586445366528354, "grad_norm": 5.102666854858398, "learning_rate": 4.022975257415092e-05, "log_odds_chosen": 10.04948616027832, "log_odds_ratio": -7.772055687382817e-05, "logits/chosen": -0.6284279823303223, "logits/rejected": -0.6805120706558228, "logps/chosen": -0.0002829919976647943, "logps/rejected": -1.904032588005066, "loss": 1.6529, "nll_loss": 0.4132192134857178, "rewards/accuracies": 1.0, "rewards/chosen": -2.8299202313064598e-05, "rewards/margins": 0.1903749704360962, "rewards/rejected": -0.1904032677412033, "step": 3989 }, { "epoch": 2.759336099585062, "grad_norm": 9.161526679992676, "learning_rate": 4.022591055786077e-05, "log_odds_chosen": 7.404929161071777, "log_odds_ratio": -0.0020130304619669914, "logits/chosen": -0.5665072798728943, "logits/rejected": -0.5944963097572327, "logps/chosen": -0.012136176228523254, "logps/rejected": -1.4921623468399048, "loss": 2.8559, "nll_loss": 0.7137806415557861, "rewards/accuracies": 1.0, "rewards/chosen": -0.0012136177392676473, "rewards/margins": 0.14800262451171875, "rewards/rejected": -0.14921623468399048, "step": 3990 }, { "epoch": 2.760027662517289, "grad_norm": 12.188777923583984, "learning_rate": 4.0222068541570615e-05, "log_odds_chosen": 9.008995056152344, "log_odds_ratio": -0.0009650088031776249, "logits/chosen": -0.7447724342346191, "logits/rejected": -0.8593225479125977, "logps/chosen": -0.0012664712266996503, "logps/rejected": -1.6476327180862427, "loss": 2.5855, "nll_loss": 0.6462736129760742, "rewards/accuracies": 1.0, "rewards/chosen": -0.00012664712266996503, "rewards/margins": 0.16463662683963776, "rewards/rejected": -0.16476327180862427, "step": 3991 }, { "epoch": 2.760719225449516, "grad_norm": 12.439476013183594, "learning_rate": 4.021822652528047e-05, "log_odds_chosen": 8.762724876403809, "log_odds_ratio": -0.07469207048416138, "logits/chosen": -0.9009903073310852, "logits/rejected": -0.9644653797149658, "logps/chosen": -0.013878803700208664, "logps/rejected": -1.6378298997879028, "loss": 1.9406, "nll_loss": 0.47767218947410583, "rewards/accuracies": 1.0, "rewards/chosen": -0.0013878804165869951, "rewards/margins": 0.16239511966705322, "rewards/rejected": -0.1637829840183258, "step": 3992 }, { "epoch": 2.7614107883817427, "grad_norm": 13.11072063446045, "learning_rate": 4.021438450899032e-05, "log_odds_chosen": 6.637383460998535, "log_odds_ratio": -0.4479435086250305, "logits/chosen": -0.4912869930267334, "logits/rejected": -0.5125991702079773, "logps/chosen": -0.01698119565844536, "logps/rejected": -1.0482051372528076, "loss": 2.3407, "nll_loss": 0.5403863191604614, "rewards/accuracies": 0.875, "rewards/chosen": -0.0016981197986751795, "rewards/margins": 0.10312239825725555, "rewards/rejected": -0.10482051968574524, "step": 3993 }, { "epoch": 2.7621023513139695, "grad_norm": 11.071776390075684, "learning_rate": 4.021054249270017e-05, "log_odds_chosen": 8.1903076171875, "log_odds_ratio": -0.0026236893609166145, "logits/chosen": -0.891800045967102, "logits/rejected": -0.9731010794639587, "logps/chosen": -0.01190229132771492, "logps/rejected": -2.0794668197631836, "loss": 1.7713, "nll_loss": 0.4425641894340515, "rewards/accuracies": 1.0, "rewards/chosen": -0.001190229202620685, "rewards/margins": 0.20675644278526306, "rewards/rejected": -0.20794667303562164, "step": 3994 }, { "epoch": 2.7627939142461964, "grad_norm": 21.30668067932129, "learning_rate": 4.020670047641002e-05, "log_odds_chosen": 7.261108875274658, "log_odds_ratio": -0.03292210027575493, "logits/chosen": 0.15424926578998566, "logits/rejected": 0.09129300713539124, "logps/chosen": -0.08206956088542938, "logps/rejected": -1.2352995872497559, "loss": 1.7829, "nll_loss": 0.4424290955066681, "rewards/accuracies": 1.0, "rewards/chosen": -0.008206957019865513, "rewards/margins": 0.11532299220561981, "rewards/rejected": -0.12352995574474335, "step": 3995 }, { "epoch": 2.763485477178423, "grad_norm": 6.121913909912109, "learning_rate": 4.020285846011988e-05, "log_odds_chosen": 7.284252166748047, "log_odds_ratio": -0.10654313862323761, "logits/chosen": -0.4905470609664917, "logits/rejected": -0.5098867416381836, "logps/chosen": -0.03048006258904934, "logps/rejected": -1.1575863361358643, "loss": 1.9294, "nll_loss": 0.47170132398605347, "rewards/accuracies": 0.875, "rewards/chosen": -0.003048006445169449, "rewards/margins": 0.1127106249332428, "rewards/rejected": -0.11575863510370255, "step": 3996 }, { "epoch": 2.76417704011065, "grad_norm": 9.75130558013916, "learning_rate": 4.019901644382972e-05, "log_odds_chosen": 7.484089374542236, "log_odds_ratio": -0.06234271451830864, "logits/chosen": -0.6909130215644836, "logits/rejected": -0.7404910922050476, "logps/chosen": -0.018211044371128082, "logps/rejected": -1.218412160873413, "loss": 2.6985, "nll_loss": 0.6683934330940247, "rewards/accuracies": 1.0, "rewards/chosen": -0.0018211043206974864, "rewards/margins": 0.12002012133598328, "rewards/rejected": -0.12184122204780579, "step": 3997 }, { "epoch": 2.764868603042877, "grad_norm": 10.215263366699219, "learning_rate": 4.0195174427539575e-05, "log_odds_chosen": 6.937007904052734, "log_odds_ratio": -0.007254381664097309, "logits/chosen": -0.6279563903808594, "logits/rejected": -0.6680964827537537, "logps/chosen": -0.01847420632839203, "logps/rejected": -1.2389494180679321, "loss": 1.7004, "nll_loss": 0.4243742823600769, "rewards/accuracies": 1.0, "rewards/chosen": -0.001847420702688396, "rewards/margins": 0.1220475286245346, "rewards/rejected": -0.12389494478702545, "step": 3998 }, { "epoch": 2.7655601659751037, "grad_norm": 7.872920989990234, "learning_rate": 4.019133241124943e-05, "log_odds_chosen": 7.680271148681641, "log_odds_ratio": -0.07638738304376602, "logits/chosen": -0.5571773052215576, "logits/rejected": -0.6206885576248169, "logps/chosen": -0.038735780864953995, "logps/rejected": -1.5839513540267944, "loss": 2.0725, "nll_loss": 0.5104899406433105, "rewards/accuracies": 1.0, "rewards/chosen": -0.003873578505590558, "rewards/margins": 0.15452156960964203, "rewards/rejected": -0.15839514136314392, "step": 3999 }, { "epoch": 2.7662517289073305, "grad_norm": 9.754056930541992, "learning_rate": 4.018749039495927e-05, "log_odds_chosen": 8.085151672363281, "log_odds_ratio": -0.0051775057800114155, "logits/chosen": -0.7449995875358582, "logits/rejected": -0.8129273653030396, "logps/chosen": -0.018867207691073418, "logps/rejected": -2.167125701904297, "loss": 1.6079, "nll_loss": 0.4014506936073303, "rewards/accuracies": 1.0, "rewards/chosen": -0.0018867208855226636, "rewards/margins": 0.2148258537054062, "rewards/rejected": -0.2167125642299652, "step": 4000 }, { "epoch": 2.7669432918395573, "grad_norm": 12.460733413696289, "learning_rate": 4.0183648378669126e-05, "log_odds_chosen": 8.746673583984375, "log_odds_ratio": -0.0002168616047129035, "logits/chosen": -0.6806103587150574, "logits/rejected": -0.6511906385421753, "logps/chosen": -0.005443001165986061, "logps/rejected": -1.9330780506134033, "loss": 2.4411, "nll_loss": 0.610245406627655, "rewards/accuracies": 1.0, "rewards/chosen": -0.0005443001282401383, "rewards/margins": 0.19276350736618042, "rewards/rejected": -0.1933078020811081, "step": 4001 }, { "epoch": 2.767634854771784, "grad_norm": 8.446491241455078, "learning_rate": 4.017980636237898e-05, "log_odds_chosen": 7.240784645080566, "log_odds_ratio": -0.04213593900203705, "logits/chosen": -0.8190625309944153, "logits/rejected": -0.8051847219467163, "logps/chosen": -0.010527187958359718, "logps/rejected": -1.0580394268035889, "loss": 2.336, "nll_loss": 0.5797888040542603, "rewards/accuracies": 1.0, "rewards/chosen": -0.0010527188424021006, "rewards/margins": 0.10475122928619385, "rewards/rejected": -0.105803944170475, "step": 4002 }, { "epoch": 2.768326417704011, "grad_norm": 9.982586860656738, "learning_rate": 4.017596434608883e-05, "log_odds_chosen": 8.95711612701416, "log_odds_ratio": -0.004018096718937159, "logits/chosen": -0.46991389989852905, "logits/rejected": -0.5015337467193604, "logps/chosen": -0.03133925050497055, "logps/rejected": -2.5801265239715576, "loss": 2.4328, "nll_loss": 0.6077884435653687, "rewards/accuracies": 1.0, "rewards/chosen": -0.003133924910798669, "rewards/margins": 0.2548786997795105, "rewards/rejected": -0.25801265239715576, "step": 4003 }, { "epoch": 2.769017980636238, "grad_norm": 11.527600288391113, "learning_rate": 4.0172122329798676e-05, "log_odds_chosen": 6.015590667724609, "log_odds_ratio": -0.04452924430370331, "logits/chosen": -0.6099879741668701, "logits/rejected": -0.6066796183586121, "logps/chosen": -0.12464918941259384, "logps/rejected": -2.0709896087646484, "loss": 2.5879, "nll_loss": 0.642532229423523, "rewards/accuracies": 1.0, "rewards/chosen": -0.012464918196201324, "rewards/margins": 0.19463402032852173, "rewards/rejected": -0.20709894597530365, "step": 4004 }, { "epoch": 2.7697095435684647, "grad_norm": 10.465564727783203, "learning_rate": 4.0168280313508535e-05, "log_odds_chosen": 8.651500701904297, "log_odds_ratio": -0.00045137875713407993, "logits/chosen": -0.6306678652763367, "logits/rejected": -0.7083243131637573, "logps/chosen": -0.0007956651970744133, "logps/rejected": -1.4663710594177246, "loss": 2.4454, "nll_loss": 0.6112978458404541, "rewards/accuracies": 1.0, "rewards/chosen": -7.956652552820742e-05, "rewards/margins": 0.1465575248003006, "rewards/rejected": -0.14663709700107574, "step": 4005 }, { "epoch": 2.7704011065006915, "grad_norm": 7.135653495788574, "learning_rate": 4.016443829721838e-05, "log_odds_chosen": 6.230930328369141, "log_odds_ratio": -0.06728748232126236, "logits/chosen": -0.4055030643939972, "logits/rejected": -0.4956081509590149, "logps/chosen": -0.019831674173474312, "logps/rejected": -1.0246037244796753, "loss": 2.184, "nll_loss": 0.5392595529556274, "rewards/accuracies": 1.0, "rewards/chosen": -0.001983167137950659, "rewards/margins": 0.1004772037267685, "rewards/rejected": -0.10246037691831589, "step": 4006 }, { "epoch": 2.7710926694329183, "grad_norm": 4.531956195831299, "learning_rate": 4.0160596280928233e-05, "log_odds_chosen": 8.730263710021973, "log_odds_ratio": -0.0006904865731485188, "logits/chosen": -0.45924514532089233, "logits/rejected": -0.5823002457618713, "logps/chosen": -0.002315716352313757, "logps/rejected": -1.4111111164093018, "loss": 2.0128, "nll_loss": 0.503140926361084, "rewards/accuracies": 1.0, "rewards/chosen": -0.00023157161194831133, "rewards/margins": 0.1408795416355133, "rewards/rejected": -0.1411111056804657, "step": 4007 }, { "epoch": 2.771784232365145, "grad_norm": 9.06564998626709, "learning_rate": 4.0156754264638086e-05, "log_odds_chosen": 6.933753967285156, "log_odds_ratio": -0.1319449245929718, "logits/chosen": -0.6110938191413879, "logits/rejected": -0.6701584458351135, "logps/chosen": -0.039627041667699814, "logps/rejected": -2.0406789779663086, "loss": 2.2408, "nll_loss": 0.5470160245895386, "rewards/accuracies": 0.875, "rewards/chosen": -0.003962704446166754, "rewards/margins": 0.200105220079422, "rewards/rejected": -0.20406793057918549, "step": 4008 }, { "epoch": 2.772475795297372, "grad_norm": 5.062579154968262, "learning_rate": 4.015291224834793e-05, "log_odds_chosen": 6.016531944274902, "log_odds_ratio": -0.0551312081515789, "logits/chosen": -0.5424445867538452, "logits/rejected": -0.5113322734832764, "logps/chosen": -0.06058872863650322, "logps/rejected": -1.9856109619140625, "loss": 1.9022, "nll_loss": 0.47004297375679016, "rewards/accuracies": 1.0, "rewards/chosen": -0.006058873143047094, "rewards/margins": 0.1925022304058075, "rewards/rejected": -0.19856110215187073, "step": 4009 }, { "epoch": 2.773167358229599, "grad_norm": 6.759472846984863, "learning_rate": 4.0149070232057784e-05, "log_odds_chosen": 7.94062614440918, "log_odds_ratio": -0.00318117905408144, "logits/chosen": -0.6912134885787964, "logits/rejected": -0.7385083436965942, "logps/chosen": -0.03081917017698288, "logps/rejected": -2.4372217655181885, "loss": 2.304, "nll_loss": 0.5756765604019165, "rewards/accuracies": 1.0, "rewards/chosen": -0.0030819172970950603, "rewards/margins": 0.24064025282859802, "rewards/rejected": -0.24372217059135437, "step": 4010 }, { "epoch": 2.7738589211618256, "grad_norm": 12.081768035888672, "learning_rate": 4.0145228215767636e-05, "log_odds_chosen": 7.577772617340088, "log_odds_ratio": -0.006525705568492413, "logits/chosen": -0.5828157067298889, "logits/rejected": -0.6487139463424683, "logps/chosen": -0.01663350872695446, "logps/rejected": -1.8727152347564697, "loss": 2.6041, "nll_loss": 0.6503660082817078, "rewards/accuracies": 1.0, "rewards/chosen": -0.0016633509658277035, "rewards/margins": 0.1856081783771515, "rewards/rejected": -0.18727153539657593, "step": 4011 }, { "epoch": 2.7745504840940525, "grad_norm": 9.275781631469727, "learning_rate": 4.014138619947749e-05, "log_odds_chosen": 8.689956665039062, "log_odds_ratio": -0.0027832777705043554, "logits/chosen": -0.5445963144302368, "logits/rejected": -0.5595325231552124, "logps/chosen": -0.007963388226926327, "logps/rejected": -1.6978943347930908, "loss": 2.0967, "nll_loss": 0.5239031910896301, "rewards/accuracies": 1.0, "rewards/chosen": -0.0007963387761265039, "rewards/margins": 0.16899308562278748, "rewards/rejected": -0.16978943347930908, "step": 4012 }, { "epoch": 2.7752420470262793, "grad_norm": 4.7812604904174805, "learning_rate": 4.0137544183187335e-05, "log_odds_chosen": 6.512237548828125, "log_odds_ratio": -0.04956220090389252, "logits/chosen": -0.8668148517608643, "logits/rejected": -0.8137930035591125, "logps/chosen": -0.05137316510081291, "logps/rejected": -1.837198257446289, "loss": 2.3039, "nll_loss": 0.5710086226463318, "rewards/accuracies": 1.0, "rewards/chosen": -0.005137316882610321, "rewards/margins": 0.17858250439167023, "rewards/rejected": -0.18371984362602234, "step": 4013 }, { "epoch": 2.775933609958506, "grad_norm": 46.199378967285156, "learning_rate": 4.0133702166897194e-05, "log_odds_chosen": 4.95249605178833, "log_odds_ratio": -0.3153817653656006, "logits/chosen": -0.49015292525291443, "logits/rejected": -0.5078474879264832, "logps/chosen": -0.05883823335170746, "logps/rejected": -0.6610848307609558, "loss": 2.4359, "nll_loss": 0.5774248242378235, "rewards/accuracies": 0.875, "rewards/chosen": -0.0058838240802288055, "rewards/margins": 0.060224659740924835, "rewards/rejected": -0.06610848009586334, "step": 4014 }, { "epoch": 2.776625172890733, "grad_norm": 12.034295082092285, "learning_rate": 4.012986015060704e-05, "log_odds_chosen": 8.162162780761719, "log_odds_ratio": -0.002041205298155546, "logits/chosen": -0.4860447943210602, "logits/rejected": -0.5647892951965332, "logps/chosen": -0.006864494178444147, "logps/rejected": -1.4859334230422974, "loss": 1.6817, "nll_loss": 0.42022138833999634, "rewards/accuracies": 1.0, "rewards/chosen": -0.0006864494062028825, "rewards/margins": 0.14790688455104828, "rewards/rejected": -0.14859335124492645, "step": 4015 }, { "epoch": 2.77731673582296, "grad_norm": 11.894104957580566, "learning_rate": 4.012601813431689e-05, "log_odds_chosen": 9.395560264587402, "log_odds_ratio": -0.0009875416290014982, "logits/chosen": -1.0839825868606567, "logits/rejected": -1.0906481742858887, "logps/chosen": -0.005453157238662243, "logps/rejected": -2.035409450531006, "loss": 2.0012, "nll_loss": 0.5002046227455139, "rewards/accuracies": 1.0, "rewards/chosen": -0.0005453157937154174, "rewards/margins": 0.2029956430196762, "rewards/rejected": -0.20354095101356506, "step": 4016 }, { "epoch": 2.7780082987551866, "grad_norm": 7.765829563140869, "learning_rate": 4.0122176118026744e-05, "log_odds_chosen": 9.723349571228027, "log_odds_ratio": -0.00022265892766881734, "logits/chosen": -0.6836073398590088, "logits/rejected": -0.8250362873077393, "logps/chosen": -0.0006030694930814207, "logps/rejected": -1.8063586950302124, "loss": 1.6202, "nll_loss": 0.4050217568874359, "rewards/accuracies": 1.0, "rewards/chosen": -6.0306949308142066e-05, "rewards/margins": 0.18057554960250854, "rewards/rejected": -0.18063588440418243, "step": 4017 }, { "epoch": 2.7786998616874135, "grad_norm": 6.4268364906311035, "learning_rate": 4.011833410173659e-05, "log_odds_chosen": 7.016240119934082, "log_odds_ratio": -0.007511706091463566, "logits/chosen": -0.5878118276596069, "logits/rejected": -0.6550705432891846, "logps/chosen": -0.02737569808959961, "logps/rejected": -1.6542975902557373, "loss": 1.9515, "nll_loss": 0.4871138334274292, "rewards/accuracies": 1.0, "rewards/chosen": -0.002737569622695446, "rewards/margins": 0.16269220411777496, "rewards/rejected": -0.16542977094650269, "step": 4018 }, { "epoch": 2.7793914246196403, "grad_norm": 31.795257568359375, "learning_rate": 4.011449208544644e-05, "log_odds_chosen": 7.1993408203125, "log_odds_ratio": -0.17486952245235443, "logits/chosen": -0.45069921016693115, "logits/rejected": -0.5299843549728394, "logps/chosen": -0.03464874252676964, "logps/rejected": -1.7125872373580933, "loss": 2.3585, "nll_loss": 0.5721323490142822, "rewards/accuracies": 0.875, "rewards/chosen": -0.003464874578639865, "rewards/margins": 0.1677938550710678, "rewards/rejected": -0.17125873267650604, "step": 4019 }, { "epoch": 2.780082987551867, "grad_norm": 5.901736736297607, "learning_rate": 4.0110650069156295e-05, "log_odds_chosen": 4.562118053436279, "log_odds_ratio": -0.3361849784851074, "logits/chosen": -0.3503913879394531, "logits/rejected": -0.3848911225795746, "logps/chosen": -0.14615672826766968, "logps/rejected": -1.2584377527236938, "loss": 2.3862, "nll_loss": 0.5629367828369141, "rewards/accuracies": 0.75, "rewards/chosen": -0.014615673571825027, "rewards/margins": 0.1112281009554863, "rewards/rejected": -0.12584376335144043, "step": 4020 }, { "epoch": 2.780774550484094, "grad_norm": 11.129192352294922, "learning_rate": 4.010680805286615e-05, "log_odds_chosen": 8.119607925415039, "log_odds_ratio": -0.031143292784690857, "logits/chosen": -0.4110638201236725, "logits/rejected": -0.42109963297843933, "logps/chosen": -0.049136094748973846, "logps/rejected": -1.557751178741455, "loss": 2.3672, "nll_loss": 0.58868008852005, "rewards/accuracies": 1.0, "rewards/chosen": -0.004913609474897385, "rewards/margins": 0.15086150169372559, "rewards/rejected": -0.15577509999275208, "step": 4021 }, { "epoch": 2.7814661134163208, "grad_norm": 8.29444694519043, "learning_rate": 4.010296603657599e-05, "log_odds_chosen": 6.8840227127075195, "log_odds_ratio": -0.18162855505943298, "logits/chosen": -0.45118263363838196, "logits/rejected": -0.5519630908966064, "logps/chosen": -0.04445667192339897, "logps/rejected": -1.5489853620529175, "loss": 1.8229, "nll_loss": 0.4375506043434143, "rewards/accuracies": 0.875, "rewards/chosen": -0.004445666912943125, "rewards/margins": 0.1504528671503067, "rewards/rejected": -0.154898539185524, "step": 4022 }, { "epoch": 2.7821576763485476, "grad_norm": 10.82302474975586, "learning_rate": 4.009912402028585e-05, "log_odds_chosen": 8.228324890136719, "log_odds_ratio": -0.0011258398881182075, "logits/chosen": -0.3915403485298157, "logits/rejected": -0.4355335235595703, "logps/chosen": -0.0018793190829455853, "logps/rejected": -1.365816354751587, "loss": 2.1308, "nll_loss": 0.5325887799263, "rewards/accuracies": 1.0, "rewards/chosen": -0.00018793190247379243, "rewards/margins": 0.1363936960697174, "rewards/rejected": -0.1365816295146942, "step": 4023 }, { "epoch": 2.7828492392807744, "grad_norm": 5.188867568969727, "learning_rate": 4.00952820039957e-05, "log_odds_chosen": 8.392681121826172, "log_odds_ratio": -0.0071550956927239895, "logits/chosen": -0.46807339787483215, "logits/rejected": -0.46381571888923645, "logps/chosen": -0.017753636464476585, "logps/rejected": -1.238928198814392, "loss": 1.9335, "nll_loss": 0.482657790184021, "rewards/accuracies": 1.0, "rewards/chosen": -0.0017753635765984654, "rewards/margins": 0.12211745232343674, "rewards/rejected": -0.12389282882213593, "step": 4024 }, { "epoch": 2.7835408022130013, "grad_norm": 5.690225601196289, "learning_rate": 4.009143998770555e-05, "log_odds_chosen": 5.526510715484619, "log_odds_ratio": -0.1800631582736969, "logits/chosen": -0.48184502124786377, "logits/rejected": -0.5255023241043091, "logps/chosen": -0.0705324187874794, "logps/rejected": -1.812889814376831, "loss": 1.9376, "nll_loss": 0.46639716625213623, "rewards/accuracies": 0.875, "rewards/chosen": -0.007053242065012455, "rewards/margins": 0.1742357611656189, "rewards/rejected": -0.18128898739814758, "step": 4025 }, { "epoch": 2.784232365145228, "grad_norm": 7.3954691886901855, "learning_rate": 4.00875979714154e-05, "log_odds_chosen": 7.558425426483154, "log_odds_ratio": -0.01121465303003788, "logits/chosen": -0.47046932578086853, "logits/rejected": -0.46596261858940125, "logps/chosen": -0.004000439308583736, "logps/rejected": -0.7876605987548828, "loss": 2.5458, "nll_loss": 0.6353315114974976, "rewards/accuracies": 1.0, "rewards/chosen": -0.0004000439075753093, "rewards/margins": 0.07836601883172989, "rewards/rejected": -0.07876606285572052, "step": 4026 }, { "epoch": 2.784923928077455, "grad_norm": 7.344405651092529, "learning_rate": 4.008375595512525e-05, "log_odds_chosen": 8.260010719299316, "log_odds_ratio": -0.0010395023273304105, "logits/chosen": -0.6027242541313171, "logits/rejected": -0.5930612087249756, "logps/chosen": -0.021254943683743477, "logps/rejected": -1.3257710933685303, "loss": 2.3931, "nll_loss": 0.5981633067131042, "rewards/accuracies": 1.0, "rewards/chosen": -0.00212549464777112, "rewards/margins": 0.13045161962509155, "rewards/rejected": -0.1325771063566208, "step": 4027 }, { "epoch": 2.7856154910096818, "grad_norm": 7.669803142547607, "learning_rate": 4.00799139388351e-05, "log_odds_chosen": 7.015578746795654, "log_odds_ratio": -0.10500874370336533, "logits/chosen": -0.49662086367607117, "logits/rejected": -0.5779070854187012, "logps/chosen": -0.04561088979244232, "logps/rejected": -1.9851887226104736, "loss": 2.0359, "nll_loss": 0.49846282601356506, "rewards/accuracies": 0.875, "rewards/chosen": -0.0045610894449055195, "rewards/margins": 0.19395779073238373, "rewards/rejected": -0.19851887226104736, "step": 4028 }, { "epoch": 2.7863070539419086, "grad_norm": 10.287899017333984, "learning_rate": 4.007607192254495e-05, "log_odds_chosen": 8.59146499633789, "log_odds_ratio": -0.0028011025860905647, "logits/chosen": -0.5080961585044861, "logits/rejected": -0.5439881086349487, "logps/chosen": -0.002593546872958541, "logps/rejected": -1.545117974281311, "loss": 2.7331, "nll_loss": 0.6829999685287476, "rewards/accuracies": 1.0, "rewards/chosen": -0.000259354681475088, "rewards/margins": 0.15425243973731995, "rewards/rejected": -0.15451179444789886, "step": 4029 }, { "epoch": 2.7869986168741354, "grad_norm": 20.256053924560547, "learning_rate": 4.0072229906254806e-05, "log_odds_chosen": 6.982294082641602, "log_odds_ratio": -0.0798601508140564, "logits/chosen": -0.4688485860824585, "logits/rejected": -0.48391133546829224, "logps/chosen": -0.03412385657429695, "logps/rejected": -1.5629781484603882, "loss": 2.6672, "nll_loss": 0.6588075757026672, "rewards/accuracies": 1.0, "rewards/chosen": -0.003412386169657111, "rewards/margins": 0.15288543701171875, "rewards/rejected": -0.15629780292510986, "step": 4030 }, { "epoch": 2.7876901798063622, "grad_norm": 10.259025573730469, "learning_rate": 4.006838788996465e-05, "log_odds_chosen": 7.8801093101501465, "log_odds_ratio": -0.09860547631978989, "logits/chosen": -0.29706934094429016, "logits/rejected": -0.32118216156959534, "logps/chosen": -0.018375318497419357, "logps/rejected": -1.4736485481262207, "loss": 1.8238, "nll_loss": 0.44608259201049805, "rewards/accuracies": 0.875, "rewards/chosen": -0.0018375319195911288, "rewards/margins": 0.14552733302116394, "rewards/rejected": -0.14736486971378326, "step": 4031 }, { "epoch": 2.788381742738589, "grad_norm": 19.465635299682617, "learning_rate": 4.006454587367451e-05, "log_odds_chosen": 9.73245620727539, "log_odds_ratio": -0.08642785251140594, "logits/chosen": -0.3006506562232971, "logits/rejected": -0.38461631536483765, "logps/chosen": -0.027534427121281624, "logps/rejected": -2.6337976455688477, "loss": 2.5214, "nll_loss": 0.6217066049575806, "rewards/accuracies": 1.0, "rewards/chosen": -0.0027534423861652613, "rewards/margins": 0.26062634587287903, "rewards/rejected": -0.2633797824382782, "step": 4032 }, { "epoch": 2.789073305670816, "grad_norm": 9.730466842651367, "learning_rate": 4.0060703857384356e-05, "log_odds_chosen": 7.375240802764893, "log_odds_ratio": -0.042026542127132416, "logits/chosen": -0.5182772874832153, "logits/rejected": -0.549079179763794, "logps/chosen": -0.033890802413225174, "logps/rejected": -1.802767276763916, "loss": 2.4504, "nll_loss": 0.6084006428718567, "rewards/accuracies": 1.0, "rewards/chosen": -0.0033890805207192898, "rewards/margins": 0.176887646317482, "rewards/rejected": -0.18027672171592712, "step": 4033 }, { "epoch": 2.7897648686030427, "grad_norm": 8.207728385925293, "learning_rate": 4.005686184109421e-05, "log_odds_chosen": 8.203607559204102, "log_odds_ratio": -0.01775890588760376, "logits/chosen": -0.5532872080802917, "logits/rejected": -0.5614966750144958, "logps/chosen": -0.008538950234651566, "logps/rejected": -1.3925681114196777, "loss": 1.7439, "nll_loss": 0.43420541286468506, "rewards/accuracies": 1.0, "rewards/chosen": -0.0008538949186913669, "rewards/margins": 0.13840290904045105, "rewards/rejected": -0.1392568200826645, "step": 4034 }, { "epoch": 2.7904564315352696, "grad_norm": 5.796356201171875, "learning_rate": 4.005301982480406e-05, "log_odds_chosen": 8.953681945800781, "log_odds_ratio": -0.0008014945196919143, "logits/chosen": -0.3912316560745239, "logits/rejected": -0.37463435530662537, "logps/chosen": -0.015697212889790535, "logps/rejected": -2.049187660217285, "loss": 2.3212, "nll_loss": 0.5802172422409058, "rewards/accuracies": 1.0, "rewards/chosen": -0.001569721382111311, "rewards/margins": 0.2033490538597107, "rewards/rejected": -0.204918771982193, "step": 4035 }, { "epoch": 2.7911479944674964, "grad_norm": 7.292884826660156, "learning_rate": 4.004917780851391e-05, "log_odds_chosen": 6.377499103546143, "log_odds_ratio": -0.09592024981975555, "logits/chosen": -0.37000563740730286, "logits/rejected": -0.39865243434906006, "logps/chosen": -0.06645894795656204, "logps/rejected": -1.4568859338760376, "loss": 1.8935, "nll_loss": 0.46377626061439514, "rewards/accuracies": 1.0, "rewards/chosen": -0.006645894609391689, "rewards/margins": 0.1390427052974701, "rewards/rejected": -0.14568859338760376, "step": 4036 }, { "epoch": 2.7918395573997232, "grad_norm": 9.267068862915039, "learning_rate": 4.004533579222376e-05, "log_odds_chosen": 8.6797513961792, "log_odds_ratio": -0.00480996398255229, "logits/chosen": -0.7288060784339905, "logits/rejected": -0.6807577013969421, "logps/chosen": -0.009048324078321457, "logps/rejected": -1.9454820156097412, "loss": 1.945, "nll_loss": 0.48575958609580994, "rewards/accuracies": 1.0, "rewards/chosen": -0.0009048324427567422, "rewards/margins": 0.19364337623119354, "rewards/rejected": -0.19454818964004517, "step": 4037 }, { "epoch": 2.79253112033195, "grad_norm": 7.5702080726623535, "learning_rate": 4.004149377593361e-05, "log_odds_chosen": 6.665687084197998, "log_odds_ratio": -0.019693978130817413, "logits/chosen": -0.9752466678619385, "logits/rejected": -0.9107370972633362, "logps/chosen": -0.012831311672925949, "logps/rejected": -1.1290169954299927, "loss": 2.1806, "nll_loss": 0.5431694984436035, "rewards/accuracies": 1.0, "rewards/chosen": -0.0012831311905756593, "rewards/margins": 0.11161856353282928, "rewards/rejected": -0.11290168762207031, "step": 4038 }, { "epoch": 2.793222683264177, "grad_norm": 5.317671775817871, "learning_rate": 4.0037651759643464e-05, "log_odds_chosen": 8.465705871582031, "log_odds_ratio": -0.0017247693613171577, "logits/chosen": -0.5362719297409058, "logits/rejected": -0.583799421787262, "logps/chosen": -0.006024193484336138, "logps/rejected": -1.5577731132507324, "loss": 1.6803, "nll_loss": 0.4199100732803345, "rewards/accuracies": 1.0, "rewards/chosen": -0.0006024193135090172, "rewards/margins": 0.1551748812198639, "rewards/rejected": -0.15577730536460876, "step": 4039 }, { "epoch": 2.7939142461964037, "grad_norm": 7.264804840087891, "learning_rate": 4.003380974335331e-05, "log_odds_chosen": 9.161325454711914, "log_odds_ratio": -0.0010589503217488527, "logits/chosen": -0.19057327508926392, "logits/rejected": -0.3201407194137573, "logps/chosen": -0.0007936095353215933, "logps/rejected": -1.4208149909973145, "loss": 1.5752, "nll_loss": 0.39369529485702515, "rewards/accuracies": 1.0, "rewards/chosen": -7.93609578977339e-05, "rewards/margins": 0.142002135515213, "rewards/rejected": -0.14208149909973145, "step": 4040 }, { "epoch": 2.7946058091286305, "grad_norm": 11.013221740722656, "learning_rate": 4.002996772706317e-05, "log_odds_chosen": 8.914923667907715, "log_odds_ratio": -0.0003905659541487694, "logits/chosen": -0.5774435997009277, "logits/rejected": -0.715229868888855, "logps/chosen": -0.0004203822463750839, "logps/rejected": -1.1688902378082275, "loss": 2.4236, "nll_loss": 0.6058591604232788, "rewards/accuracies": 1.0, "rewards/chosen": -4.203822390991263e-05, "rewards/margins": 0.1168469786643982, "rewards/rejected": -0.11688902229070663, "step": 4041 }, { "epoch": 2.7952973720608574, "grad_norm": 6.471141338348389, "learning_rate": 4.0026125710773015e-05, "log_odds_chosen": 5.749246597290039, "log_odds_ratio": -0.014492766000330448, "logits/chosen": -0.3788442313671112, "logits/rejected": -0.41355931758880615, "logps/chosen": -0.011785943061113358, "logps/rejected": -0.8134269714355469, "loss": 2.1996, "nll_loss": 0.5484617948532104, "rewards/accuracies": 1.0, "rewards/chosen": -0.0011785943061113358, "rewards/margins": 0.0801641047000885, "rewards/rejected": -0.08134269714355469, "step": 4042 }, { "epoch": 2.795988934993084, "grad_norm": 9.5997314453125, "learning_rate": 4.002228369448287e-05, "log_odds_chosen": 8.17568588256836, "log_odds_ratio": -0.06700452417135239, "logits/chosen": -0.5788341164588928, "logits/rejected": -0.5489345788955688, "logps/chosen": -0.0365753248333931, "logps/rejected": -1.9677122831344604, "loss": 2.4008, "nll_loss": 0.5935037732124329, "rewards/accuracies": 1.0, "rewards/chosen": -0.003657532623037696, "rewards/margins": 0.19311368465423584, "rewards/rejected": -0.19677123427391052, "step": 4043 }, { "epoch": 2.796680497925311, "grad_norm": 9.586773872375488, "learning_rate": 4.001844167819272e-05, "log_odds_chosen": 6.191709518432617, "log_odds_ratio": -0.024510130286216736, "logits/chosen": -0.8226577043533325, "logits/rejected": -0.8689040541648865, "logps/chosen": -0.009145855903625488, "logps/rejected": -1.0411524772644043, "loss": 2.4241, "nll_loss": 0.6035729050636292, "rewards/accuracies": 1.0, "rewards/chosen": -0.0009145855437964201, "rewards/margins": 0.10320065915584564, "rewards/rejected": -0.10411524772644043, "step": 4044 }, { "epoch": 2.797372060857538, "grad_norm": 12.250510215759277, "learning_rate": 4.0014599661902565e-05, "log_odds_chosen": 9.6131591796875, "log_odds_ratio": -0.00039203441701829433, "logits/chosen": -0.606217622756958, "logits/rejected": -0.6473501920700073, "logps/chosen": -0.0008249045349657536, "logps/rejected": -1.6661516427993774, "loss": 2.7216, "nll_loss": 0.6803591251373291, "rewards/accuracies": 1.0, "rewards/chosen": -8.24904564069584e-05, "rewards/margins": 0.1665326952934265, "rewards/rejected": -0.16661517322063446, "step": 4045 }, { "epoch": 2.7980636237897647, "grad_norm": 8.003177642822266, "learning_rate": 4.001075764561242e-05, "log_odds_chosen": 7.322998046875, "log_odds_ratio": -0.10197833180427551, "logits/chosen": -0.562049150466919, "logits/rejected": -0.5889880657196045, "logps/chosen": -0.030817590653896332, "logps/rejected": -1.0370532274246216, "loss": 2.5991, "nll_loss": 0.6395775079727173, "rewards/accuracies": 1.0, "rewards/chosen": -0.003081759437918663, "rewards/margins": 0.10062356293201447, "rewards/rejected": -0.10370532423257828, "step": 4046 }, { "epoch": 2.7987551867219915, "grad_norm": 9.056816101074219, "learning_rate": 4.000691562932227e-05, "log_odds_chosen": 8.016447067260742, "log_odds_ratio": -0.07796118408441544, "logits/chosen": -0.7534922957420349, "logits/rejected": -0.7713943719863892, "logps/chosen": -0.03694911673665047, "logps/rejected": -1.8605222702026367, "loss": 2.1996, "nll_loss": 0.5421122908592224, "rewards/accuracies": 1.0, "rewards/chosen": -0.0036949114874005318, "rewards/margins": 0.1823573112487793, "rewards/rejected": -0.18605221807956696, "step": 4047 }, { "epoch": 2.7994467496542184, "grad_norm": 9.049676895141602, "learning_rate": 4.000307361303212e-05, "log_odds_chosen": 7.648162841796875, "log_odds_ratio": -0.03125055879354477, "logits/chosen": -0.5109508633613586, "logits/rejected": -0.5957425832748413, "logps/chosen": -0.013546126894652843, "logps/rejected": -1.659212350845337, "loss": 1.6936, "nll_loss": 0.4202747941017151, "rewards/accuracies": 1.0, "rewards/chosen": -0.0013546126428991556, "rewards/margins": 0.16456662118434906, "rewards/rejected": -0.16592122614383698, "step": 4048 }, { "epoch": 2.800138312586445, "grad_norm": 7.598301410675049, "learning_rate": 3.999923159674197e-05, "log_odds_chosen": 8.735095977783203, "log_odds_ratio": -0.004727786872535944, "logits/chosen": -0.5558849573135376, "logits/rejected": -0.5349166989326477, "logps/chosen": -0.009516008198261261, "logps/rejected": -1.6679878234863281, "loss": 1.7339, "nll_loss": 0.433004766702652, "rewards/accuracies": 1.0, "rewards/chosen": -0.0009516008431091905, "rewards/margins": 0.16584721207618713, "rewards/rejected": -0.16679880023002625, "step": 4049 }, { "epoch": 2.800829875518672, "grad_norm": 13.116601943969727, "learning_rate": 3.999538958045183e-05, "log_odds_chosen": 9.699845314025879, "log_odds_ratio": -0.00045129720820114017, "logits/chosen": -0.43724325299263, "logits/rejected": -0.4891416132450104, "logps/chosen": -0.0005087569006718695, "logps/rejected": -1.6439473628997803, "loss": 1.9135, "nll_loss": 0.4783242344856262, "rewards/accuracies": 1.0, "rewards/chosen": -5.087569297757e-05, "rewards/margins": 0.16434386372566223, "rewards/rejected": -0.16439473628997803, "step": 4050 }, { "epoch": 2.801521438450899, "grad_norm": 7.343100547790527, "learning_rate": 3.999154756416167e-05, "log_odds_chosen": 6.8652567863464355, "log_odds_ratio": -0.00484616169705987, "logits/chosen": -0.5420911908149719, "logits/rejected": -0.561378002166748, "logps/chosen": -0.015762126073241234, "logps/rejected": -1.9778697490692139, "loss": 2.0224, "nll_loss": 0.505126953125, "rewards/accuracies": 1.0, "rewards/chosen": -0.0015762128168717027, "rewards/margins": 0.19621075689792633, "rewards/rejected": -0.19778698682785034, "step": 4051 }, { "epoch": 2.8022130013831257, "grad_norm": 9.008139610290527, "learning_rate": 3.9987705547871526e-05, "log_odds_chosen": 8.551125526428223, "log_odds_ratio": -0.03587919473648071, "logits/chosen": -0.5163147449493408, "logits/rejected": -0.601231575012207, "logps/chosen": -0.03649171441793442, "logps/rejected": -2.1623549461364746, "loss": 2.2451, "nll_loss": 0.5576812624931335, "rewards/accuracies": 1.0, "rewards/chosen": -0.0036491714417934418, "rewards/margins": 0.21258632838726044, "rewards/rejected": -0.21623550355434418, "step": 4052 }, { "epoch": 2.8029045643153525, "grad_norm": 13.98880386352539, "learning_rate": 3.998386353158138e-05, "log_odds_chosen": 7.360173225402832, "log_odds_ratio": -0.06701034307479858, "logits/chosen": -0.699334979057312, "logits/rejected": -0.7111362218856812, "logps/chosen": -0.046054355800151825, "logps/rejected": -2.1368675231933594, "loss": 2.3722, "nll_loss": 0.5863499641418457, "rewards/accuracies": 1.0, "rewards/chosen": -0.00460543530061841, "rewards/margins": 0.20908132195472717, "rewards/rejected": -0.2136867493391037, "step": 4053 }, { "epoch": 2.8035961272475793, "grad_norm": 24.911413192749023, "learning_rate": 3.998002151529123e-05, "log_odds_chosen": 5.606881141662598, "log_odds_ratio": -0.14306114614009857, "logits/chosen": -0.6254528164863586, "logits/rejected": -0.6368358135223389, "logps/chosen": -0.0452696867287159, "logps/rejected": -1.1443300247192383, "loss": 2.1549, "nll_loss": 0.5244289636611938, "rewards/accuracies": 0.875, "rewards/chosen": -0.0045269690454006195, "rewards/margins": 0.10990603268146515, "rewards/rejected": -0.11443300545215607, "step": 4054 }, { "epoch": 2.804287690179806, "grad_norm": 6.053211212158203, "learning_rate": 3.9976179499001076e-05, "log_odds_chosen": 7.3733110427856445, "log_odds_ratio": -0.002126081380993128, "logits/chosen": -0.6541106700897217, "logits/rejected": -0.6929240822792053, "logps/chosen": -0.01228273380547762, "logps/rejected": -1.2755669355392456, "loss": 2.4835, "nll_loss": 0.620650053024292, "rewards/accuracies": 1.0, "rewards/chosen": -0.001228273380547762, "rewards/margins": 0.12632840871810913, "rewards/rejected": -0.1275566965341568, "step": 4055 }, { "epoch": 2.804979253112033, "grad_norm": 11.584280967712402, "learning_rate": 3.997233748271093e-05, "log_odds_chosen": 8.602903366088867, "log_odds_ratio": -0.02909720316529274, "logits/chosen": -1.1846723556518555, "logits/rejected": -1.2454025745391846, "logps/chosen": -0.009676833637058735, "logps/rejected": -1.965323805809021, "loss": 3.4535, "nll_loss": 0.8604767322540283, "rewards/accuracies": 1.0, "rewards/chosen": -0.0009676833869889379, "rewards/margins": 0.19556470215320587, "rewards/rejected": -0.19653236865997314, "step": 4056 }, { "epoch": 2.80567081604426, "grad_norm": 8.060429573059082, "learning_rate": 3.996849546642078e-05, "log_odds_chosen": 7.116885185241699, "log_odds_ratio": -0.005772262811660767, "logits/chosen": -0.7592568397521973, "logits/rejected": -0.7438384890556335, "logps/chosen": -0.028484918177127838, "logps/rejected": -1.9294735193252563, "loss": 2.7605, "nll_loss": 0.6895406246185303, "rewards/accuracies": 1.0, "rewards/chosen": -0.0028484920039772987, "rewards/margins": 0.1900988668203354, "rewards/rejected": -0.1929473578929901, "step": 4057 }, { "epoch": 2.8063623789764867, "grad_norm": 5.724400997161865, "learning_rate": 3.996465345013063e-05, "log_odds_chosen": 8.871892929077148, "log_odds_ratio": -0.0014322178903967142, "logits/chosen": -0.28607577085494995, "logits/rejected": -0.3320361375808716, "logps/chosen": -0.017788385972380638, "logps/rejected": -1.4142343997955322, "loss": 2.0421, "nll_loss": 0.5103698968887329, "rewards/accuracies": 1.0, "rewards/chosen": -0.0017788386903703213, "rewards/margins": 0.13964460790157318, "rewards/rejected": -0.14142344892024994, "step": 4058 }, { "epoch": 2.8070539419087135, "grad_norm": 8.724485397338867, "learning_rate": 3.9960811433840486e-05, "log_odds_chosen": 8.082185745239258, "log_odds_ratio": -0.018572242930531502, "logits/chosen": -0.7844616174697876, "logits/rejected": -0.8723915815353394, "logps/chosen": -0.010151880793273449, "logps/rejected": -1.1117634773254395, "loss": 2.5914, "nll_loss": 0.6459817290306091, "rewards/accuracies": 1.0, "rewards/chosen": -0.0010151881724596024, "rewards/margins": 0.11016116291284561, "rewards/rejected": -0.11117635667324066, "step": 4059 }, { "epoch": 2.8077455048409403, "grad_norm": 8.289448738098145, "learning_rate": 3.995696941755033e-05, "log_odds_chosen": 7.2113566398620605, "log_odds_ratio": -0.05613557994365692, "logits/chosen": -0.7712424397468567, "logits/rejected": -0.7228599786758423, "logps/chosen": -0.010022724978625774, "logps/rejected": -0.9686402082443237, "loss": 2.3449, "nll_loss": 0.58060222864151, "rewards/accuracies": 1.0, "rewards/chosen": -0.001002272474579513, "rewards/margins": 0.09586174786090851, "rewards/rejected": -0.0968640148639679, "step": 4060 }, { "epoch": 2.808437067773167, "grad_norm": 7.372903347015381, "learning_rate": 3.9953127401260184e-05, "log_odds_chosen": 6.312814235687256, "log_odds_ratio": -0.024066109210252762, "logits/chosen": -0.23818959295749664, "logits/rejected": -0.38583752512931824, "logps/chosen": -0.014680419117212296, "logps/rejected": -1.2234750986099243, "loss": 1.8019, "nll_loss": 0.44808071851730347, "rewards/accuracies": 1.0, "rewards/chosen": -0.0014680419117212296, "rewards/margins": 0.12087946385145187, "rewards/rejected": -0.12234750390052795, "step": 4061 }, { "epoch": 2.809128630705394, "grad_norm": 5.99877405166626, "learning_rate": 3.9949285384970037e-05, "log_odds_chosen": 7.9143571853637695, "log_odds_ratio": -0.01069034356623888, "logits/chosen": -0.4054313600063324, "logits/rejected": -0.5019809007644653, "logps/chosen": -0.011747448705136776, "logps/rejected": -1.5104658603668213, "loss": 2.6341, "nll_loss": 0.6574532389640808, "rewards/accuracies": 1.0, "rewards/chosen": -0.0011747449170798063, "rewards/margins": 0.1498718410730362, "rewards/rejected": -0.15104657411575317, "step": 4062 }, { "epoch": 2.809820193637621, "grad_norm": 9.521771430969238, "learning_rate": 3.994544336867989e-05, "log_odds_chosen": 8.424257278442383, "log_odds_ratio": -0.0014310001861304045, "logits/chosen": -0.7866235375404358, "logits/rejected": -0.8643758296966553, "logps/chosen": -0.002819900633767247, "logps/rejected": -1.5088589191436768, "loss": 2.7465, "nll_loss": 0.686479389667511, "rewards/accuracies": 1.0, "rewards/chosen": -0.0002819900691974908, "rewards/margins": 0.15060390532016754, "rewards/rejected": -0.1508859097957611, "step": 4063 }, { "epoch": 2.8105117565698476, "grad_norm": 7.929076194763184, "learning_rate": 3.9941601352389735e-05, "log_odds_chosen": 7.050183296203613, "log_odds_ratio": -0.054438620805740356, "logits/chosen": -0.5261591672897339, "logits/rejected": -0.5558338761329651, "logps/chosen": -0.01621352881193161, "logps/rejected": -1.1250156164169312, "loss": 2.1176, "nll_loss": 0.5239666700363159, "rewards/accuracies": 1.0, "rewards/chosen": -0.0016213529743254185, "rewards/margins": 0.11088021099567413, "rewards/rejected": -0.11250156164169312, "step": 4064 }, { "epoch": 2.8112033195020745, "grad_norm": 8.669575691223145, "learning_rate": 3.993775933609959e-05, "log_odds_chosen": 7.901257514953613, "log_odds_ratio": -0.006656877230852842, "logits/chosen": -0.7184497117996216, "logits/rejected": -0.7672439813613892, "logps/chosen": -0.01615772396326065, "logps/rejected": -1.3511242866516113, "loss": 2.0056, "nll_loss": 0.5007306337356567, "rewards/accuracies": 1.0, "rewards/chosen": -0.0016157726058736444, "rewards/margins": 0.13349665701389313, "rewards/rejected": -0.1351124346256256, "step": 4065 }, { "epoch": 2.8118948824343013, "grad_norm": 11.849681854248047, "learning_rate": 3.993391731980944e-05, "log_odds_chosen": 10.316424369812012, "log_odds_ratio": -0.00012056290142936632, "logits/chosen": -0.29598119854927063, "logits/rejected": -0.3441726863384247, "logps/chosen": -0.00017041430692188442, "logps/rejected": -1.8917357921600342, "loss": 2.6001, "nll_loss": 0.6500047445297241, "rewards/accuracies": 1.0, "rewards/chosen": -1.7041431419784203e-05, "rewards/margins": 0.18915654718875885, "rewards/rejected": -0.18917357921600342, "step": 4066 }, { "epoch": 2.812586445366528, "grad_norm": 22.96870231628418, "learning_rate": 3.9930075303519285e-05, "log_odds_chosen": 6.919788360595703, "log_odds_ratio": -0.9415428638458252, "logits/chosen": -0.5172785520553589, "logits/rejected": -0.5798472166061401, "logps/chosen": -0.16864734888076782, "logps/rejected": -0.9507753849029541, "loss": 2.3151, "nll_loss": 0.4846179485321045, "rewards/accuracies": 0.875, "rewards/chosen": -0.01686473749577999, "rewards/margins": 0.07821279764175415, "rewards/rejected": -0.09507754445075989, "step": 4067 }, { "epoch": 2.813278008298755, "grad_norm": 9.234999656677246, "learning_rate": 3.9926233287229144e-05, "log_odds_chosen": 9.32768440246582, "log_odds_ratio": -0.15762081742286682, "logits/chosen": -0.7216007113456726, "logits/rejected": -0.8246335983276367, "logps/chosen": -0.02265734225511551, "logps/rejected": -1.8226202726364136, "loss": 1.8922, "nll_loss": 0.4572892189025879, "rewards/accuracies": 0.875, "rewards/chosen": -0.0022657345980405807, "rewards/margins": 0.1799962967634201, "rewards/rejected": -0.18226204812526703, "step": 4068 }, { "epoch": 2.813969571230982, "grad_norm": 5.7767133712768555, "learning_rate": 3.992239127093899e-05, "log_odds_chosen": 7.759276866912842, "log_odds_ratio": -0.0045270719565451145, "logits/chosen": -0.47087520360946655, "logits/rejected": -0.40145114064216614, "logps/chosen": -0.007731554564088583, "logps/rejected": -1.359923005104065, "loss": 2.0149, "nll_loss": 0.5032612085342407, "rewards/accuracies": 1.0, "rewards/chosen": -0.0007731555378995836, "rewards/margins": 0.13521915674209595, "rewards/rejected": -0.13599231839179993, "step": 4069 }, { "epoch": 2.8146611341632086, "grad_norm": 5.5008440017700195, "learning_rate": 3.991854925464884e-05, "log_odds_chosen": 7.026598930358887, "log_odds_ratio": -0.11519190669059753, "logits/chosen": -0.7082157731056213, "logits/rejected": -0.6972053647041321, "logps/chosen": -0.02982058934867382, "logps/rejected": -1.2710275650024414, "loss": 1.9039, "nll_loss": 0.4644562602043152, "rewards/accuracies": 0.875, "rewards/chosen": -0.002982059260830283, "rewards/margins": 0.12412068992853165, "rewards/rejected": -0.12710274755954742, "step": 4070 }, { "epoch": 2.8153526970954355, "grad_norm": 9.34103775024414, "learning_rate": 3.9914707238358695e-05, "log_odds_chosen": 8.257344245910645, "log_odds_ratio": -0.0006549620884470642, "logits/chosen": -0.46251440048217773, "logits/rejected": -0.4937194585800171, "logps/chosen": -0.0014829322462901473, "logps/rejected": -1.1529827117919922, "loss": 2.7946, "nll_loss": 0.6985812187194824, "rewards/accuracies": 1.0, "rewards/chosen": -0.00014829322753939778, "rewards/margins": 0.11514997482299805, "rewards/rejected": -0.11529827117919922, "step": 4071 }, { "epoch": 2.8160442600276623, "grad_norm": 7.539175033569336, "learning_rate": 3.991086522206855e-05, "log_odds_chosen": 7.862712383270264, "log_odds_ratio": -0.000995173119008541, "logits/chosen": -0.8730528950691223, "logits/rejected": -0.8860530853271484, "logps/chosen": -0.0025655007921159267, "logps/rejected": -1.3373017311096191, "loss": 2.4647, "nll_loss": 0.6160710453987122, "rewards/accuracies": 1.0, "rewards/chosen": -0.00025655009085312486, "rewards/margins": 0.13347361981868744, "rewards/rejected": -0.13373017311096191, "step": 4072 }, { "epoch": 2.816735822959889, "grad_norm": 9.269205093383789, "learning_rate": 3.990702320577839e-05, "log_odds_chosen": 8.780440330505371, "log_odds_ratio": -0.0008190101943910122, "logits/chosen": -0.5245490074157715, "logits/rejected": -0.6109641790390015, "logps/chosen": -0.004009606782346964, "logps/rejected": -1.9757808446884155, "loss": 1.9942, "nll_loss": 0.49846985936164856, "rewards/accuracies": 1.0, "rewards/chosen": -0.00040096070733852684, "rewards/margins": 0.19717712700366974, "rewards/rejected": -0.1975780874490738, "step": 4073 }, { "epoch": 2.817427385892116, "grad_norm": 7.564273357391357, "learning_rate": 3.9903181189488246e-05, "log_odds_chosen": 8.549010276794434, "log_odds_ratio": -0.0013980288058519363, "logits/chosen": -0.347595751285553, "logits/rejected": -0.43927520513534546, "logps/chosen": -0.015520025976002216, "logps/rejected": -1.797426700592041, "loss": 1.9958, "nll_loss": 0.4988030791282654, "rewards/accuracies": 1.0, "rewards/chosen": -0.0015520025044679642, "rewards/margins": 0.17819064855575562, "rewards/rejected": -0.17974264919757843, "step": 4074 }, { "epoch": 2.8181189488243428, "grad_norm": 32.705650329589844, "learning_rate": 3.98993391731981e-05, "log_odds_chosen": 6.796420574188232, "log_odds_ratio": -0.398301899433136, "logits/chosen": -0.7179882526397705, "logits/rejected": -0.7630666494369507, "logps/chosen": -0.051055118441581726, "logps/rejected": -1.2427799701690674, "loss": 2.342, "nll_loss": 0.5456675291061401, "rewards/accuracies": 0.875, "rewards/chosen": -0.0051055122166872025, "rewards/margins": 0.11917249858379364, "rewards/rejected": -0.1242780089378357, "step": 4075 }, { "epoch": 2.8188105117565696, "grad_norm": 8.102416038513184, "learning_rate": 3.9895497156907944e-05, "log_odds_chosen": 6.3788862228393555, "log_odds_ratio": -0.029690608382225037, "logits/chosen": -0.5226523280143738, "logits/rejected": -0.5438002347946167, "logps/chosen": -0.014367911033332348, "logps/rejected": -1.0449059009552002, "loss": 1.9453, "nll_loss": 0.4833501875400543, "rewards/accuracies": 1.0, "rewards/chosen": -0.0014367912663146853, "rewards/margins": 0.10305380821228027, "rewards/rejected": -0.10449059307575226, "step": 4076 }, { "epoch": 2.8195020746887964, "grad_norm": 9.83740234375, "learning_rate": 3.98916551406178e-05, "log_odds_chosen": 8.079355239868164, "log_odds_ratio": -0.05913592129945755, "logits/chosen": -0.5341934561729431, "logits/rejected": -0.6323039531707764, "logps/chosen": -0.026006482541561127, "logps/rejected": -1.7434455156326294, "loss": 2.0879, "nll_loss": 0.5160654783248901, "rewards/accuracies": 1.0, "rewards/chosen": -0.0026006484404206276, "rewards/margins": 0.17174391448497772, "rewards/rejected": -0.17434456944465637, "step": 4077 }, { "epoch": 2.8201936376210233, "grad_norm": 16.19186019897461, "learning_rate": 3.988781312432765e-05, "log_odds_chosen": 6.893074035644531, "log_odds_ratio": -0.011647537350654602, "logits/chosen": -0.8889976739883423, "logits/rejected": -0.9283890128135681, "logps/chosen": -0.01870199292898178, "logps/rejected": -1.302621603012085, "loss": 2.6, "nll_loss": 0.6488242149353027, "rewards/accuracies": 1.0, "rewards/chosen": -0.0018701993394643068, "rewards/margins": 0.12839196622371674, "rewards/rejected": -0.13026216626167297, "step": 4078 }, { "epoch": 2.82088520055325, "grad_norm": 12.260211944580078, "learning_rate": 3.98839711080375e-05, "log_odds_chosen": 8.926525115966797, "log_odds_ratio": -0.0010325999464839697, "logits/chosen": -0.690007209777832, "logits/rejected": -0.803962230682373, "logps/chosen": -0.002110689412802458, "logps/rejected": -1.6868860721588135, "loss": 2.4598, "nll_loss": 0.6148371696472168, "rewards/accuracies": 1.0, "rewards/chosen": -0.0002110689238179475, "rewards/margins": 0.16847753524780273, "rewards/rejected": -0.1686885952949524, "step": 4079 }, { "epoch": 2.821576763485477, "grad_norm": 11.205087661743164, "learning_rate": 3.9880129091747353e-05, "log_odds_chosen": 8.465922355651855, "log_odds_ratio": -0.004587572067975998, "logits/chosen": -0.6520918607711792, "logits/rejected": -0.7593033909797668, "logps/chosen": -0.0034907313529402018, "logps/rejected": -1.5698950290679932, "loss": 2.1028, "nll_loss": 0.5252323150634766, "rewards/accuracies": 1.0, "rewards/chosen": -0.00034907314693555236, "rewards/margins": 0.15664042532444, "rewards/rejected": -0.15698951482772827, "step": 4080 }, { "epoch": 2.8222683264177038, "grad_norm": 7.319963455200195, "learning_rate": 3.9876287075457206e-05, "log_odds_chosen": 6.9736738204956055, "log_odds_ratio": -0.1095675528049469, "logits/chosen": -0.5960395336151123, "logits/rejected": -0.6045196056365967, "logps/chosen": -0.02621668018400669, "logps/rejected": -1.096388578414917, "loss": 1.7686, "nll_loss": 0.4312053322792053, "rewards/accuracies": 1.0, "rewards/chosen": -0.002621668390929699, "rewards/margins": 0.10701719671487808, "rewards/rejected": -0.10963885486125946, "step": 4081 }, { "epoch": 2.8229598893499306, "grad_norm": 7.484302520751953, "learning_rate": 3.987244505916705e-05, "log_odds_chosen": 9.62667465209961, "log_odds_ratio": -0.00041180552216246724, "logits/chosen": -0.7579289674758911, "logits/rejected": -0.8042924404144287, "logps/chosen": -0.017796583473682404, "logps/rejected": -1.883541226387024, "loss": 1.8983, "nll_loss": 0.4745240807533264, "rewards/accuracies": 1.0, "rewards/chosen": -0.001779658254235983, "rewards/margins": 0.18657445907592773, "rewards/rejected": -0.18835411965847015, "step": 4082 }, { "epoch": 2.8236514522821574, "grad_norm": 7.532115459442139, "learning_rate": 3.9868603042876904e-05, "log_odds_chosen": 6.588289260864258, "log_odds_ratio": -0.2632429897785187, "logits/chosen": -0.4547191560268402, "logits/rejected": -0.4525451362133026, "logps/chosen": -0.10993438214063644, "logps/rejected": -1.7299728393554688, "loss": 2.3395, "nll_loss": 0.5585499405860901, "rewards/accuracies": 0.875, "rewards/chosen": -0.01099343877285719, "rewards/margins": 0.16200384497642517, "rewards/rejected": -0.17299726605415344, "step": 4083 }, { "epoch": 2.8243430152143842, "grad_norm": 9.845251083374023, "learning_rate": 3.9864761026586756e-05, "log_odds_chosen": 7.189066410064697, "log_odds_ratio": -0.004802032373845577, "logits/chosen": -0.8864326477050781, "logits/rejected": -0.8713440895080566, "logps/chosen": -0.021907519549131393, "logps/rejected": -1.5963313579559326, "loss": 2.434, "nll_loss": 0.608009397983551, "rewards/accuracies": 1.0, "rewards/chosen": -0.002190752187743783, "rewards/margins": 0.1574423909187317, "rewards/rejected": -0.15963315963745117, "step": 4084 }, { "epoch": 2.825034578146611, "grad_norm": 5.436544895172119, "learning_rate": 3.98609190102966e-05, "log_odds_chosen": 8.149063110351562, "log_odds_ratio": -0.031937647610902786, "logits/chosen": -0.3354184031486511, "logits/rejected": -0.4097675681114197, "logps/chosen": -0.023115739226341248, "logps/rejected": -1.7368464469909668, "loss": 2.3099, "nll_loss": 0.574272096157074, "rewards/accuracies": 1.0, "rewards/chosen": -0.0023115738295018673, "rewards/margins": 0.17137306928634644, "rewards/rejected": -0.17368465662002563, "step": 4085 }, { "epoch": 2.825726141078838, "grad_norm": 10.83985710144043, "learning_rate": 3.985707699400646e-05, "log_odds_chosen": 8.234752655029297, "log_odds_ratio": -0.04173066467046738, "logits/chosen": -0.9166703820228577, "logits/rejected": -1.0253310203552246, "logps/chosen": -0.009630718268454075, "logps/rejected": -1.1056783199310303, "loss": 2.5543, "nll_loss": 0.6344038248062134, "rewards/accuracies": 1.0, "rewards/chosen": -0.0009630717686377466, "rewards/margins": 0.10960475355386734, "rewards/rejected": -0.11056783050298691, "step": 4086 }, { "epoch": 2.8264177040110647, "grad_norm": 13.876534461975098, "learning_rate": 3.985323497771631e-05, "log_odds_chosen": 8.142451286315918, "log_odds_ratio": -0.23297399282455444, "logits/chosen": -0.8295532464981079, "logits/rejected": -0.8863059282302856, "logps/chosen": -0.02916792407631874, "logps/rejected": -1.4469013214111328, "loss": 2.3819, "nll_loss": 0.5721670389175415, "rewards/accuracies": 0.875, "rewards/chosen": -0.002916792407631874, "rewards/margins": 0.14177334308624268, "rewards/rejected": -0.14469014108181, "step": 4087 }, { "epoch": 2.8271092669432916, "grad_norm": 6.461430549621582, "learning_rate": 3.984939296142616e-05, "log_odds_chosen": 6.785519599914551, "log_odds_ratio": -0.04183311015367508, "logits/chosen": -0.7603569030761719, "logits/rejected": -0.7885888814926147, "logps/chosen": -0.0300129521638155, "logps/rejected": -1.3980698585510254, "loss": 1.657, "nll_loss": 0.4100547432899475, "rewards/accuracies": 1.0, "rewards/chosen": -0.0030012952629476786, "rewards/margins": 0.1368056982755661, "rewards/rejected": -0.13980698585510254, "step": 4088 }, { "epoch": 2.8278008298755184, "grad_norm": 6.225308418273926, "learning_rate": 3.984555094513601e-05, "log_odds_chosen": 9.266956329345703, "log_odds_ratio": -0.002623880747705698, "logits/chosen": -0.6244158744812012, "logits/rejected": -0.597240149974823, "logps/chosen": -0.0062347580678761005, "logps/rejected": -1.559669017791748, "loss": 1.5555, "nll_loss": 0.3886123597621918, "rewards/accuracies": 1.0, "rewards/chosen": -0.0006234758184291422, "rewards/margins": 0.1553434431552887, "rewards/rejected": -0.15596690773963928, "step": 4089 }, { "epoch": 2.8284923928077457, "grad_norm": 7.004493236541748, "learning_rate": 3.9841708928845864e-05, "log_odds_chosen": 7.017504692077637, "log_odds_ratio": -0.045389194041490555, "logits/chosen": -0.6390866041183472, "logits/rejected": -0.6546069979667664, "logps/chosen": -0.041990771889686584, "logps/rejected": -1.8552742004394531, "loss": 2.1232, "nll_loss": 0.5262652635574341, "rewards/accuracies": 1.0, "rewards/chosen": -0.004199077840894461, "rewards/margins": 0.18132832646369934, "rewards/rejected": -0.18552741408348083, "step": 4090 }, { "epoch": 2.8291839557399725, "grad_norm": 8.383001327514648, "learning_rate": 3.983786691255571e-05, "log_odds_chosen": 8.123327255249023, "log_odds_ratio": -0.013854804448783398, "logits/chosen": -0.6715909242630005, "logits/rejected": -0.6964853405952454, "logps/chosen": -0.009874495677649975, "logps/rejected": -1.4259437322616577, "loss": 2.3549, "nll_loss": 0.5873280167579651, "rewards/accuracies": 1.0, "rewards/chosen": -0.0009874494280666113, "rewards/margins": 0.14160692691802979, "rewards/rejected": -0.1425943672657013, "step": 4091 }, { "epoch": 2.8298755186721993, "grad_norm": 8.464040756225586, "learning_rate": 3.983402489626556e-05, "log_odds_chosen": 8.556112289428711, "log_odds_ratio": -0.0016304109012708068, "logits/chosen": -0.5392479300498962, "logits/rejected": -0.6534115076065063, "logps/chosen": -0.027099961414933205, "logps/rejected": -2.275935649871826, "loss": 2.2959, "nll_loss": 0.5738140344619751, "rewards/accuracies": 1.0, "rewards/chosen": -0.002709996188059449, "rewards/margins": 0.224883571267128, "rewards/rejected": -0.2275935411453247, "step": 4092 }, { "epoch": 2.830567081604426, "grad_norm": 7.396304130554199, "learning_rate": 3.9830182879975415e-05, "log_odds_chosen": 8.257478713989258, "log_odds_ratio": -0.0026086117140948772, "logits/chosen": -0.4450608491897583, "logits/rejected": -0.46384957432746887, "logps/chosen": -0.026241516694426537, "logps/rejected": -2.154176950454712, "loss": 2.0062, "nll_loss": 0.5012954473495483, "rewards/accuracies": 1.0, "rewards/chosen": -0.002624151762574911, "rewards/margins": 0.21279355883598328, "rewards/rejected": -0.21541771292686462, "step": 4093 }, { "epoch": 2.831258644536653, "grad_norm": 10.601005554199219, "learning_rate": 3.982634086368526e-05, "log_odds_chosen": 5.183744430541992, "log_odds_ratio": -0.2201242297887802, "logits/chosen": -0.25432318449020386, "logits/rejected": -0.29870888590812683, "logps/chosen": -0.06550457328557968, "logps/rejected": -1.0095858573913574, "loss": 2.2327, "nll_loss": 0.5361602306365967, "rewards/accuracies": 0.875, "rewards/chosen": -0.006550457328557968, "rewards/margins": 0.09440812468528748, "rewards/rejected": -0.10095858573913574, "step": 4094 }, { "epoch": 2.83195020746888, "grad_norm": 11.715688705444336, "learning_rate": 3.982249884739511e-05, "log_odds_chosen": 9.65654182434082, "log_odds_ratio": -0.00367301725782454, "logits/chosen": -0.3656267821788788, "logits/rejected": -0.4155307412147522, "logps/chosen": -0.0016836244612932205, "logps/rejected": -2.357590675354004, "loss": 3.5452, "nll_loss": 0.8859277367591858, "rewards/accuracies": 1.0, "rewards/chosen": -0.000168362443218939, "rewards/margins": 0.23559071123600006, "rewards/rejected": -0.23575909435749054, "step": 4095 }, { "epoch": 2.8326417704011067, "grad_norm": 10.904032707214355, "learning_rate": 3.9818656831104965e-05, "log_odds_chosen": 8.239508628845215, "log_odds_ratio": -0.0010725038591772318, "logits/chosen": -0.660797119140625, "logits/rejected": -0.666714072227478, "logps/chosen": -0.013842078857123852, "logps/rejected": -1.586050033569336, "loss": 2.3764, "nll_loss": 0.5939972400665283, "rewards/accuracies": 1.0, "rewards/chosen": -0.0013842078624293208, "rewards/margins": 0.15722079575061798, "rewards/rejected": -0.15860500931739807, "step": 4096 }, { "epoch": 2.8333333333333335, "grad_norm": 7.934488296508789, "learning_rate": 3.981481481481482e-05, "log_odds_chosen": 8.910290718078613, "log_odds_ratio": -0.0014349442208185792, "logits/chosen": -0.5394958853721619, "logits/rejected": -0.630646824836731, "logps/chosen": -0.0013760102447122335, "logps/rejected": -1.5402330160140991, "loss": 2.5146, "nll_loss": 0.628517746925354, "rewards/accuracies": 1.0, "rewards/chosen": -0.00013760101865045726, "rewards/margins": 0.15388569235801697, "rewards/rejected": -0.15402328968048096, "step": 4097 }, { "epoch": 2.8340248962655603, "grad_norm": 5.750637531280518, "learning_rate": 3.9810972798524664e-05, "log_odds_chosen": 7.891044616699219, "log_odds_ratio": -0.004763246979564428, "logits/chosen": -0.578194797039032, "logits/rejected": -0.5832671523094177, "logps/chosen": -0.012611321173608303, "logps/rejected": -1.1056181192398071, "loss": 1.6103, "nll_loss": 0.40209874510765076, "rewards/accuracies": 1.0, "rewards/chosen": -0.0012611323036253452, "rewards/margins": 0.10930068790912628, "rewards/rejected": -0.11056182533502579, "step": 4098 }, { "epoch": 2.834716459197787, "grad_norm": 7.133883953094482, "learning_rate": 3.980713078223452e-05, "log_odds_chosen": 5.702484130859375, "log_odds_ratio": -0.15084023773670197, "logits/chosen": -0.6206138134002686, "logits/rejected": -0.5826375484466553, "logps/chosen": -0.053505752235651016, "logps/rejected": -1.6038875579833984, "loss": 1.9244, "nll_loss": 0.4660155475139618, "rewards/accuracies": 0.875, "rewards/chosen": -0.005350574851036072, "rewards/margins": 0.15503817796707153, "rewards/rejected": -0.1603887677192688, "step": 4099 }, { "epoch": 2.835408022130014, "grad_norm": 8.215850830078125, "learning_rate": 3.980328876594437e-05, "log_odds_chosen": 9.187931060791016, "log_odds_ratio": -0.0014828175771981478, "logits/chosen": -0.7627009153366089, "logits/rejected": -0.7655040621757507, "logps/chosen": -0.0007910731364972889, "logps/rejected": -1.2454516887664795, "loss": 2.5632, "nll_loss": 0.6406489610671997, "rewards/accuracies": 1.0, "rewards/chosen": -7.910731801530346e-05, "rewards/margins": 0.12446606159210205, "rewards/rejected": -0.12454516440629959, "step": 4100 }, { "epoch": 2.836099585062241, "grad_norm": 7.439212799072266, "learning_rate": 3.979944674965422e-05, "log_odds_chosen": 7.659041881561279, "log_odds_ratio": -0.004087934270501137, "logits/chosen": -0.8255820870399475, "logits/rejected": -0.879666268825531, "logps/chosen": -0.006819070316851139, "logps/rejected": -1.261864423751831, "loss": 1.5073, "nll_loss": 0.376420259475708, "rewards/accuracies": 1.0, "rewards/chosen": -0.0006819070549681783, "rewards/margins": 0.1255045384168625, "rewards/rejected": -0.12618646025657654, "step": 4101 }, { "epoch": 2.8367911479944676, "grad_norm": 10.409981727600098, "learning_rate": 3.979560473336407e-05, "log_odds_chosen": 7.2086687088012695, "log_odds_ratio": -0.09979799389839172, "logits/chosen": -0.33021828532218933, "logits/rejected": -0.4076710343360901, "logps/chosen": -0.019604945555329323, "logps/rejected": -0.9485692977905273, "loss": 1.9901, "nll_loss": 0.4875490367412567, "rewards/accuracies": 1.0, "rewards/chosen": -0.0019604945555329323, "rewards/margins": 0.09289643168449402, "rewards/rejected": -0.09485693275928497, "step": 4102 }, { "epoch": 2.8374827109266945, "grad_norm": 11.524858474731445, "learning_rate": 3.979176271707392e-05, "log_odds_chosen": 9.426741600036621, "log_odds_ratio": -0.000592447817325592, "logits/chosen": -0.9023887515068054, "logits/rejected": -0.9688795804977417, "logps/chosen": -0.0008985823369584978, "logps/rejected": -1.7494679689407349, "loss": 2.6452, "nll_loss": 0.6612343192100525, "rewards/accuracies": 1.0, "rewards/chosen": -8.985823660623282e-05, "rewards/margins": 0.17485694587230682, "rewards/rejected": -0.17494678497314453, "step": 4103 }, { "epoch": 2.8381742738589213, "grad_norm": 7.3802008628845215, "learning_rate": 3.978792070078377e-05, "log_odds_chosen": 8.576873779296875, "log_odds_ratio": -0.09843210875988007, "logits/chosen": -0.6974600553512573, "logits/rejected": -0.7106385231018066, "logps/chosen": -0.04491880536079407, "logps/rejected": -1.5757704973220825, "loss": 1.641, "nll_loss": 0.4003986716270447, "rewards/accuracies": 0.875, "rewards/chosen": -0.004491880536079407, "rewards/margins": 0.15308517217636108, "rewards/rejected": -0.1575770378112793, "step": 4104 }, { "epoch": 2.838865836791148, "grad_norm": 15.590590476989746, "learning_rate": 3.9784078684493624e-05, "log_odds_chosen": 7.23812198638916, "log_odds_ratio": -0.07408453524112701, "logits/chosen": -0.49816223978996277, "logits/rejected": -0.5431675314903259, "logps/chosen": -0.02068902552127838, "logps/rejected": -1.185168743133545, "loss": 1.8842, "nll_loss": 0.46363386511802673, "rewards/accuracies": 1.0, "rewards/chosen": -0.002068902365863323, "rewards/margins": 0.11644796282052994, "rewards/rejected": -0.11851686239242554, "step": 4105 }, { "epoch": 2.839557399723375, "grad_norm": 6.0863165855407715, "learning_rate": 3.9780236668203476e-05, "log_odds_chosen": 7.257332801818848, "log_odds_ratio": -0.07132648676633835, "logits/chosen": -0.6692647933959961, "logits/rejected": -0.7386534214019775, "logps/chosen": -0.03094615414738655, "logps/rejected": -0.9907289743423462, "loss": 1.726, "nll_loss": 0.424371600151062, "rewards/accuracies": 1.0, "rewards/chosen": -0.0030946151819080114, "rewards/margins": 0.095978282392025, "rewards/rejected": -0.0990729033946991, "step": 4106 }, { "epoch": 2.840248962655602, "grad_norm": 9.847002983093262, "learning_rate": 3.977639465191332e-05, "log_odds_chosen": 5.666792869567871, "log_odds_ratio": -0.20099371671676636, "logits/chosen": -0.583608090877533, "logits/rejected": -0.6237548589706421, "logps/chosen": -0.06134883314371109, "logps/rejected": -1.838850736618042, "loss": 2.6003, "nll_loss": 0.6299666166305542, "rewards/accuracies": 0.875, "rewards/chosen": -0.006134883500635624, "rewards/margins": 0.17775020003318787, "rewards/rejected": -0.18388508260250092, "step": 4107 }, { "epoch": 2.8409405255878286, "grad_norm": 12.510313987731934, "learning_rate": 3.977255263562318e-05, "log_odds_chosen": 7.918940544128418, "log_odds_ratio": -0.0010384717024862766, "logits/chosen": -0.6723248958587646, "logits/rejected": -0.7267094850540161, "logps/chosen": -0.0015254435129463673, "logps/rejected": -1.2971229553222656, "loss": 2.1719, "nll_loss": 0.5428681969642639, "rewards/accuracies": 1.0, "rewards/chosen": -0.00015254435129463673, "rewards/margins": 0.12955975532531738, "rewards/rejected": -0.1297122985124588, "step": 4108 }, { "epoch": 2.8416320885200554, "grad_norm": 7.601841449737549, "learning_rate": 3.976871061933303e-05, "log_odds_chosen": 5.5808939933776855, "log_odds_ratio": -0.04469112306833267, "logits/chosen": -0.6102938652038574, "logits/rejected": -0.634024977684021, "logps/chosen": -0.04405169188976288, "logps/rejected": -0.8939533233642578, "loss": 2.4455, "nll_loss": 0.6069144606590271, "rewards/accuracies": 1.0, "rewards/chosen": -0.004405169747769833, "rewards/margins": 0.08499015867710114, "rewards/rejected": -0.08939532935619354, "step": 4109 }, { "epoch": 2.8423236514522823, "grad_norm": 11.374734878540039, "learning_rate": 3.976486860304288e-05, "log_odds_chosen": 9.27154541015625, "log_odds_ratio": -0.0003829200577456504, "logits/chosen": -0.6784447431564331, "logits/rejected": -0.7364102602005005, "logps/chosen": -0.0008755293092690408, "logps/rejected": -1.626358151435852, "loss": 1.8203, "nll_loss": 0.45503830909729004, "rewards/accuracies": 1.0, "rewards/chosen": -8.755293674767017e-05, "rewards/margins": 0.1625482589006424, "rewards/rejected": -0.16263581812381744, "step": 4110 }, { "epoch": 2.843015214384509, "grad_norm": 8.393563270568848, "learning_rate": 3.976102658675273e-05, "log_odds_chosen": 8.176108360290527, "log_odds_ratio": -0.0006821705610491335, "logits/chosen": -0.8334037661552429, "logits/rejected": -0.839069128036499, "logps/chosen": -0.010099105536937714, "logps/rejected": -1.0930083990097046, "loss": 2.1009, "nll_loss": 0.5251496434211731, "rewards/accuracies": 1.0, "rewards/chosen": -0.0010099106002599, "rewards/margins": 0.10829093307256699, "rewards/rejected": -0.10930084437131882, "step": 4111 }, { "epoch": 2.843706777316736, "grad_norm": 9.47371768951416, "learning_rate": 3.975718457046258e-05, "log_odds_chosen": 6.1424174308776855, "log_odds_ratio": -0.0985918939113617, "logits/chosen": -0.5856118202209473, "logits/rejected": -0.6207878589630127, "logps/chosen": -0.04165790230035782, "logps/rejected": -1.1315250396728516, "loss": 2.456, "nll_loss": 0.6041349768638611, "rewards/accuracies": 1.0, "rewards/chosen": -0.004165790043771267, "rewards/margins": 0.10898672044277191, "rewards/rejected": -0.11315250396728516, "step": 4112 }, { "epoch": 2.8443983402489628, "grad_norm": 30.912683486938477, "learning_rate": 3.975334255417243e-05, "log_odds_chosen": 7.40262508392334, "log_odds_ratio": -0.27424168586730957, "logits/chosen": -0.7680657505989075, "logits/rejected": -0.7505401372909546, "logps/chosen": -0.07187169790267944, "logps/rejected": -1.3441351652145386, "loss": 1.8906, "nll_loss": 0.44523701071739197, "rewards/accuracies": 0.875, "rewards/chosen": -0.007187169045209885, "rewards/margins": 0.1272263377904892, "rewards/rejected": -0.13441351056098938, "step": 4113 }, { "epoch": 2.8450899031811896, "grad_norm": 7.518012046813965, "learning_rate": 3.974950053788228e-05, "log_odds_chosen": 9.113598823547363, "log_odds_ratio": -0.00020042213145643473, "logits/chosen": -0.4726852774620056, "logits/rejected": -0.49504923820495605, "logps/chosen": -0.0003604450321290642, "logps/rejected": -1.195584774017334, "loss": 2.1996, "nll_loss": 0.549877941608429, "rewards/accuracies": 1.0, "rewards/chosen": -3.604450466809794e-05, "rewards/margins": 0.11952243745326996, "rewards/rejected": -0.11955846846103668, "step": 4114 }, { "epoch": 2.8457814661134164, "grad_norm": 14.199585914611816, "learning_rate": 3.9745658521592135e-05, "log_odds_chosen": 7.985638618469238, "log_odds_ratio": -0.008731107227504253, "logits/chosen": -0.4885767698287964, "logits/rejected": -0.45740532875061035, "logps/chosen": -0.0026937490329146385, "logps/rejected": -1.0088611841201782, "loss": 2.509, "nll_loss": 0.6263870000839233, "rewards/accuracies": 1.0, "rewards/chosen": -0.0002693749265745282, "rewards/margins": 0.10061675310134888, "rewards/rejected": -0.10088612139225006, "step": 4115 }, { "epoch": 2.8464730290456433, "grad_norm": 6.5890889167785645, "learning_rate": 3.974181650530198e-05, "log_odds_chosen": 9.449125289916992, "log_odds_ratio": -0.0001227892644237727, "logits/chosen": -0.5998516082763672, "logits/rejected": -0.6113935112953186, "logps/chosen": -0.0005787935806438327, "logps/rejected": -1.388109803199768, "loss": 1.5608, "nll_loss": 0.39019450545310974, "rewards/accuracies": 1.0, "rewards/chosen": -5.787936242995784e-05, "rewards/margins": 0.13875310122966766, "rewards/rejected": -0.13881099224090576, "step": 4116 }, { "epoch": 2.84716459197787, "grad_norm": 17.623966217041016, "learning_rate": 3.973797448901184e-05, "log_odds_chosen": 5.721372604370117, "log_odds_ratio": -0.3729022145271301, "logits/chosen": -0.5502513647079468, "logits/rejected": -0.5644844770431519, "logps/chosen": -0.06836064159870148, "logps/rejected": -1.288745641708374, "loss": 2.1235, "nll_loss": 0.49359455704689026, "rewards/accuracies": 0.75, "rewards/chosen": -0.006836064159870148, "rewards/margins": 0.12203850597143173, "rewards/rejected": -0.12887457013130188, "step": 4117 }, { "epoch": 2.847856154910097, "grad_norm": 5.089163780212402, "learning_rate": 3.9734132472721685e-05, "log_odds_chosen": 7.329201698303223, "log_odds_ratio": -0.17281562089920044, "logits/chosen": -0.6293739080429077, "logits/rejected": -0.6185722947120667, "logps/chosen": -0.04492910951375961, "logps/rejected": -1.3377739191055298, "loss": 2.0892, "nll_loss": 0.5050212740898132, "rewards/accuracies": 0.875, "rewards/chosen": -0.004492911044508219, "rewards/margins": 0.12928447127342224, "rewards/rejected": -0.13377737998962402, "step": 4118 }, { "epoch": 2.8485477178423237, "grad_norm": 7.862229347229004, "learning_rate": 3.973029045643154e-05, "log_odds_chosen": 5.797403335571289, "log_odds_ratio": -0.11605511605739594, "logits/chosen": -0.495116651058197, "logits/rejected": -0.5606188774108887, "logps/chosen": -0.03224784508347511, "logps/rejected": -0.8752045631408691, "loss": 2.3005, "nll_loss": 0.5635241270065308, "rewards/accuracies": 0.875, "rewards/chosen": -0.0032247842755168676, "rewards/margins": 0.08429567515850067, "rewards/rejected": -0.08752045035362244, "step": 4119 }, { "epoch": 2.8492392807745506, "grad_norm": 7.012640953063965, "learning_rate": 3.972644844014139e-05, "log_odds_chosen": 7.387671947479248, "log_odds_ratio": -0.2472531795501709, "logits/chosen": -0.9369310140609741, "logits/rejected": -1.023695707321167, "logps/chosen": -0.051521994173526764, "logps/rejected": -1.3849338293075562, "loss": 2.4995, "nll_loss": 0.6001489162445068, "rewards/accuracies": 0.75, "rewards/chosen": -0.005152199417352676, "rewards/margins": 0.13334119319915771, "rewards/rejected": -0.1384933888912201, "step": 4120 }, { "epoch": 2.8499308437067774, "grad_norm": 8.53339958190918, "learning_rate": 3.9722606423851236e-05, "log_odds_chosen": 6.195735931396484, "log_odds_ratio": -0.14083018898963928, "logits/chosen": -0.48863381147384644, "logits/rejected": -0.5069273710250854, "logps/chosen": -0.041694194078445435, "logps/rejected": -1.1637004613876343, "loss": 1.8266, "nll_loss": 0.4425726532936096, "rewards/accuracies": 1.0, "rewards/chosen": -0.0041694194078445435, "rewards/margins": 0.11220061779022217, "rewards/rejected": -0.11637004464864731, "step": 4121 }, { "epoch": 2.8506224066390042, "grad_norm": 9.292778015136719, "learning_rate": 3.971876440756109e-05, "log_odds_chosen": 8.705432891845703, "log_odds_ratio": -0.0005094002117402852, "logits/chosen": -0.24346569180488586, "logits/rejected": -0.2411637306213379, "logps/chosen": -0.000389696768252179, "logps/rejected": -1.2692971229553223, "loss": 1.9885, "nll_loss": 0.49707961082458496, "rewards/accuracies": 1.0, "rewards/chosen": -3.896967973560095e-05, "rewards/margins": 0.12689074873924255, "rewards/rejected": -0.12692971527576447, "step": 4122 }, { "epoch": 2.851313969571231, "grad_norm": 8.638797760009766, "learning_rate": 3.971492239127094e-05, "log_odds_chosen": 8.19682788848877, "log_odds_ratio": -0.24544239044189453, "logits/chosen": -0.5123015642166138, "logits/rejected": -0.5535436868667603, "logps/chosen": -0.027725744992494583, "logps/rejected": -1.5624759197235107, "loss": 2.4238, "nll_loss": 0.5813997983932495, "rewards/accuracies": 0.875, "rewards/chosen": -0.002772574545815587, "rewards/margins": 0.15347503125667572, "rewards/rejected": -0.1562476009130478, "step": 4123 }, { "epoch": 2.852005532503458, "grad_norm": 7.665647506713867, "learning_rate": 3.971108037498079e-05, "log_odds_chosen": 7.87119722366333, "log_odds_ratio": -0.09006493538618088, "logits/chosen": -0.5190091133117676, "logits/rejected": -0.6639402508735657, "logps/chosen": -0.02196761779487133, "logps/rejected": -1.3968288898468018, "loss": 1.7224, "nll_loss": 0.42158395051956177, "rewards/accuracies": 0.875, "rewards/chosen": -0.002196761779487133, "rewards/margins": 0.13748612999916077, "rewards/rejected": -0.13968288898468018, "step": 4124 }, { "epoch": 2.8526970954356847, "grad_norm": 8.311286926269531, "learning_rate": 3.970723835869064e-05, "log_odds_chosen": 6.067910194396973, "log_odds_ratio": -0.029917169362306595, "logits/chosen": -0.5240775346755981, "logits/rejected": -0.519280731678009, "logps/chosen": -0.03226805478334427, "logps/rejected": -1.527235507965088, "loss": 2.5063, "nll_loss": 0.6235796213150024, "rewards/accuracies": 1.0, "rewards/chosen": -0.0032268057111650705, "rewards/margins": 0.14949676394462585, "rewards/rejected": -0.1527235507965088, "step": 4125 }, { "epoch": 2.8533886583679116, "grad_norm": 10.94139289855957, "learning_rate": 3.97033963424005e-05, "log_odds_chosen": 8.23917007446289, "log_odds_ratio": -0.16562430560588837, "logits/chosen": -0.6252275705337524, "logits/rejected": -0.7134463787078857, "logps/chosen": -0.07940398901700974, "logps/rejected": -1.8555247783660889, "loss": 2.4194, "nll_loss": 0.5882754921913147, "rewards/accuracies": 0.875, "rewards/chosen": -0.007940399460494518, "rewards/margins": 0.1776120811700821, "rewards/rejected": -0.1855524778366089, "step": 4126 }, { "epoch": 2.8540802213001384, "grad_norm": 9.70150089263916, "learning_rate": 3.9699554326110344e-05, "log_odds_chosen": 8.478819847106934, "log_odds_ratio": -0.002084367675706744, "logits/chosen": -0.7293291687965393, "logits/rejected": -0.7733567953109741, "logps/chosen": -0.002238813554868102, "logps/rejected": -1.5626964569091797, "loss": 2.2216, "nll_loss": 0.5551963448524475, "rewards/accuracies": 1.0, "rewards/chosen": -0.00022388134675566107, "rewards/margins": 0.15604576468467712, "rewards/rejected": -0.1562696397304535, "step": 4127 }, { "epoch": 2.854771784232365, "grad_norm": 7.927999019622803, "learning_rate": 3.9695712309820196e-05, "log_odds_chosen": 6.218683242797852, "log_odds_ratio": -0.05425438657402992, "logits/chosen": -0.599659264087677, "logits/rejected": -0.6657902002334595, "logps/chosen": -0.02499246969819069, "logps/rejected": -1.1194629669189453, "loss": 2.4865, "nll_loss": 0.6162107586860657, "rewards/accuracies": 1.0, "rewards/chosen": -0.002499246969819069, "rewards/margins": 0.10944704711437225, "rewards/rejected": -0.11194629967212677, "step": 4128 }, { "epoch": 2.855463347164592, "grad_norm": 6.431820869445801, "learning_rate": 3.969187029353005e-05, "log_odds_chosen": 4.778810501098633, "log_odds_ratio": -0.20529891550540924, "logits/chosen": -0.6679726839065552, "logits/rejected": -0.6747136116027832, "logps/chosen": -0.042183686047792435, "logps/rejected": -0.634368896484375, "loss": 2.6459, "nll_loss": 0.6409425735473633, "rewards/accuracies": 1.0, "rewards/chosen": -0.004218368325382471, "rewards/margins": 0.05921851843595505, "rewards/rejected": -0.06343688815832138, "step": 4129 }, { "epoch": 2.856154910096819, "grad_norm": 10.62767505645752, "learning_rate": 3.9688028277239894e-05, "log_odds_chosen": 8.825204849243164, "log_odds_ratio": -0.0023956261575222015, "logits/chosen": -0.37675511837005615, "logits/rejected": -0.46603935956954956, "logps/chosen": -0.023891257122159004, "logps/rejected": -1.8063663244247437, "loss": 2.1087, "nll_loss": 0.5269260406494141, "rewards/accuracies": 1.0, "rewards/chosen": -0.002389125758782029, "rewards/margins": 0.17824749648571014, "rewards/rejected": -0.18063662946224213, "step": 4130 }, { "epoch": 2.8568464730290457, "grad_norm": 9.79223918914795, "learning_rate": 3.968418626094975e-05, "log_odds_chosen": 7.960743427276611, "log_odds_ratio": -0.025452613830566406, "logits/chosen": -0.4843645691871643, "logits/rejected": -0.5887432098388672, "logps/chosen": -0.015297478064894676, "logps/rejected": -1.1496312618255615, "loss": 2.0604, "nll_loss": 0.512560248374939, "rewards/accuracies": 1.0, "rewards/chosen": -0.0015297478530555964, "rewards/margins": 0.11343339085578918, "rewards/rejected": -0.11496314406394958, "step": 4131 }, { "epoch": 2.8575380359612725, "grad_norm": 13.040294647216797, "learning_rate": 3.96803442446596e-05, "log_odds_chosen": 7.492790222167969, "log_odds_ratio": -0.09991626441478729, "logits/chosen": -0.7050648331642151, "logits/rejected": -0.7885043621063232, "logps/chosen": -0.07697435468435287, "logps/rejected": -1.779345989227295, "loss": 1.6866, "nll_loss": 0.411651611328125, "rewards/accuracies": 1.0, "rewards/chosen": -0.007697435095906258, "rewards/margins": 0.17023716866970062, "rewards/rejected": -0.17793460190296173, "step": 4132 }, { "epoch": 2.8582295988934994, "grad_norm": 12.954906463623047, "learning_rate": 3.967650222836945e-05, "log_odds_chosen": 8.792889595031738, "log_odds_ratio": -0.0010062884539365768, "logits/chosen": -0.7896513938903809, "logits/rejected": -0.8716443181037903, "logps/chosen": -0.00804637186229229, "logps/rejected": -2.03944993019104, "loss": 3.4081, "nll_loss": 0.8519180417060852, "rewards/accuracies": 1.0, "rewards/chosen": -0.0008046371513046324, "rewards/margins": 0.20314034819602966, "rewards/rejected": -0.20394501090049744, "step": 4133 }, { "epoch": 2.858921161825726, "grad_norm": 9.084781646728516, "learning_rate": 3.96726602120793e-05, "log_odds_chosen": 9.515411376953125, "log_odds_ratio": -0.0002063115971395746, "logits/chosen": -0.6839795112609863, "logits/rejected": -0.7928391098976135, "logps/chosen": -0.0004598861269187182, "logps/rejected": -1.465610146522522, "loss": 1.3488, "nll_loss": 0.3371778130531311, "rewards/accuracies": 1.0, "rewards/chosen": -4.598861050908454e-05, "rewards/margins": 0.14651501178741455, "rewards/rejected": -0.14656101167201996, "step": 4134 }, { "epoch": 2.859612724757953, "grad_norm": 5.373547077178955, "learning_rate": 3.9668818195789156e-05, "log_odds_chosen": 6.309614658355713, "log_odds_ratio": -0.1429893523454666, "logits/chosen": -0.3133348524570465, "logits/rejected": -0.2622450888156891, "logps/chosen": -0.03732801601290703, "logps/rejected": -1.3269708156585693, "loss": 1.9574, "nll_loss": 0.4750511348247528, "rewards/accuracies": 1.0, "rewards/chosen": -0.0037328016478568316, "rewards/margins": 0.12896430492401123, "rewards/rejected": -0.13269709050655365, "step": 4135 }, { "epoch": 2.86030428769018, "grad_norm": 6.702598571777344, "learning_rate": 3.9664976179499e-05, "log_odds_chosen": 7.8618597984313965, "log_odds_ratio": -0.0689602792263031, "logits/chosen": -0.43502557277679443, "logits/rejected": -0.5458186268806458, "logps/chosen": -0.023434704169631004, "logps/rejected": -1.301213026046753, "loss": 1.5143, "nll_loss": 0.37167826294898987, "rewards/accuracies": 1.0, "rewards/chosen": -0.0023434702306985855, "rewards/margins": 0.1277778297662735, "rewards/rejected": -0.13012130558490753, "step": 4136 }, { "epoch": 2.8609958506224067, "grad_norm": 6.976266860961914, "learning_rate": 3.9661134163208855e-05, "log_odds_chosen": 8.573393821716309, "log_odds_ratio": -0.002217457164078951, "logits/chosen": -0.6940039396286011, "logits/rejected": -0.6850975155830383, "logps/chosen": -0.01544831320643425, "logps/rejected": -1.618596076965332, "loss": 2.9021, "nll_loss": 0.725298285484314, "rewards/accuracies": 1.0, "rewards/chosen": -0.001544831320643425, "rewards/margins": 0.16031478345394135, "rewards/rejected": -0.16185960173606873, "step": 4137 }, { "epoch": 2.8616874135546335, "grad_norm": 10.130950927734375, "learning_rate": 3.965729214691871e-05, "log_odds_chosen": 8.983856201171875, "log_odds_ratio": -0.0004064887179993093, "logits/chosen": -0.7249870896339417, "logits/rejected": -0.8000103235244751, "logps/chosen": -0.0005151379154995084, "logps/rejected": -1.072391152381897, "loss": 3.4311, "nll_loss": 0.8577353358268738, "rewards/accuracies": 1.0, "rewards/chosen": -5.151379082235508e-05, "rewards/margins": 0.10718759894371033, "rewards/rejected": -0.10723911970853806, "step": 4138 }, { "epoch": 2.8623789764868603, "grad_norm": 11.023736000061035, "learning_rate": 3.965345013062855e-05, "log_odds_chosen": 7.778514862060547, "log_odds_ratio": -0.0062539223581552505, "logits/chosen": -0.6994754076004028, "logits/rejected": -0.727636992931366, "logps/chosen": -0.013756345957517624, "logps/rejected": -1.6888890266418457, "loss": 2.6655, "nll_loss": 0.6657446026802063, "rewards/accuracies": 1.0, "rewards/chosen": -0.001375634572468698, "rewards/margins": 0.16751326620578766, "rewards/rejected": -0.1688888967037201, "step": 4139 }, { "epoch": 2.863070539419087, "grad_norm": 10.296049118041992, "learning_rate": 3.9649608114338405e-05, "log_odds_chosen": 8.372491836547852, "log_odds_ratio": -0.00872521847486496, "logits/chosen": -0.6874377131462097, "logits/rejected": -0.6679731607437134, "logps/chosen": -0.01878192648291588, "logps/rejected": -2.1108551025390625, "loss": 1.8428, "nll_loss": 0.45982909202575684, "rewards/accuracies": 1.0, "rewards/chosen": -0.001878192531876266, "rewards/margins": 0.20920731127262115, "rewards/rejected": -0.2110855132341385, "step": 4140 }, { "epoch": 2.863762102351314, "grad_norm": 8.425458908081055, "learning_rate": 3.964576609804826e-05, "log_odds_chosen": 5.280470371246338, "log_odds_ratio": -0.17009109258651733, "logits/chosen": -0.6054092645645142, "logits/rejected": -0.6110110282897949, "logps/chosen": -0.03894772753119469, "logps/rejected": -0.6882196664810181, "loss": 2.2599, "nll_loss": 0.5479753017425537, "rewards/accuracies": 0.875, "rewards/chosen": -0.003894772846251726, "rewards/margins": 0.06492719054222107, "rewards/rejected": -0.06882195919752121, "step": 4141 }, { "epoch": 2.864453665283541, "grad_norm": 8.525484085083008, "learning_rate": 3.964192408175811e-05, "log_odds_chosen": 7.187023639678955, "log_odds_ratio": -0.005043178331106901, "logits/chosen": -0.5835676193237305, "logits/rejected": -0.6493792533874512, "logps/chosen": -0.009211929515004158, "logps/rejected": -1.476091742515564, "loss": 1.7105, "nll_loss": 0.4271281957626343, "rewards/accuracies": 1.0, "rewards/chosen": -0.0009211929282173514, "rewards/margins": 0.14668798446655273, "rewards/rejected": -0.1476091593503952, "step": 4142 }, { "epoch": 2.8651452282157677, "grad_norm": 10.203916549682617, "learning_rate": 3.9638082065467956e-05, "log_odds_chosen": 7.878159046173096, "log_odds_ratio": -0.032731618732213974, "logits/chosen": -0.5193063020706177, "logits/rejected": -0.5699669122695923, "logps/chosen": -0.013802244327962399, "logps/rejected": -1.6927944421768188, "loss": 1.8504, "nll_loss": 0.45931798219680786, "rewards/accuracies": 1.0, "rewards/chosen": -0.0013802244793623686, "rewards/margins": 0.1678992211818695, "rewards/rejected": -0.16927944123744965, "step": 4143 }, { "epoch": 2.8658367911479945, "grad_norm": 10.561854362487793, "learning_rate": 3.9634240049177815e-05, "log_odds_chosen": 8.101981163024902, "log_odds_ratio": -0.049096763134002686, "logits/chosen": -0.5377451777458191, "logits/rejected": -0.5618083477020264, "logps/chosen": -0.011895643547177315, "logps/rejected": -1.4573811292648315, "loss": 2.1364, "nll_loss": 0.5291892290115356, "rewards/accuracies": 1.0, "rewards/chosen": -0.0011895645875483751, "rewards/margins": 0.14454853534698486, "rewards/rejected": -0.14573809504508972, "step": 4144 }, { "epoch": 2.8665283540802213, "grad_norm": 16.13058853149414, "learning_rate": 3.963039803288766e-05, "log_odds_chosen": 8.42239761352539, "log_odds_ratio": -0.0038425899110734463, "logits/chosen": -0.7643156051635742, "logits/rejected": -0.8458773493766785, "logps/chosen": -0.0014390680007636547, "logps/rejected": -1.2367219924926758, "loss": 2.3132, "nll_loss": 0.5779082775115967, "rewards/accuracies": 1.0, "rewards/chosen": -0.00014390680007636547, "rewards/margins": 0.12352830171585083, "rewards/rejected": -0.12367220222949982, "step": 4145 }, { "epoch": 2.867219917012448, "grad_norm": 11.940716743469238, "learning_rate": 3.962655601659751e-05, "log_odds_chosen": 7.8669891357421875, "log_odds_ratio": -0.0037622463423758745, "logits/chosen": -0.45503783226013184, "logits/rejected": -0.5161993503570557, "logps/chosen": -0.0027905493043363094, "logps/rejected": -1.4165853261947632, "loss": 2.5674, "nll_loss": 0.6414811611175537, "rewards/accuracies": 1.0, "rewards/chosen": -0.0002790549478959292, "rewards/margins": 0.1413794755935669, "rewards/rejected": -0.14165852963924408, "step": 4146 }, { "epoch": 2.867911479944675, "grad_norm": 11.2161865234375, "learning_rate": 3.9622714000307365e-05, "log_odds_chosen": 8.637145042419434, "log_odds_ratio": -0.0002987972693517804, "logits/chosen": -0.39274901151657104, "logits/rejected": -0.3932146430015564, "logps/chosen": -0.0006174084264785051, "logps/rejected": -1.0560269355773926, "loss": 2.1117, "nll_loss": 0.5278958082199097, "rewards/accuracies": 1.0, "rewards/chosen": -6.174084410304204e-05, "rewards/margins": 0.10554095357656479, "rewards/rejected": -0.1056026890873909, "step": 4147 }, { "epoch": 2.868603042876902, "grad_norm": 9.172666549682617, "learning_rate": 3.961887198401721e-05, "log_odds_chosen": 7.592833042144775, "log_odds_ratio": -0.12618932127952576, "logits/chosen": -0.5942907333374023, "logits/rejected": -0.507341742515564, "logps/chosen": -0.021718140691518784, "logps/rejected": -1.5091440677642822, "loss": 2.3169, "nll_loss": 0.5666061043739319, "rewards/accuracies": 0.875, "rewards/chosen": -0.0021718142088502645, "rewards/margins": 0.14874258637428284, "rewards/rejected": -0.15091440081596375, "step": 4148 }, { "epoch": 2.8692946058091287, "grad_norm": 8.65243911743164, "learning_rate": 3.9615029967727064e-05, "log_odds_chosen": 9.024596214294434, "log_odds_ratio": -0.0005055609508417547, "logits/chosen": -0.7445271015167236, "logits/rejected": -0.7984901666641235, "logps/chosen": -0.0023893089964985847, "logps/rejected": -1.6262643337249756, "loss": 2.3654, "nll_loss": 0.5912907719612122, "rewards/accuracies": 1.0, "rewards/chosen": -0.00023893089382909238, "rewards/margins": 0.16238752007484436, "rewards/rejected": -0.1626264452934265, "step": 4149 }, { "epoch": 2.8699861687413555, "grad_norm": 6.936448574066162, "learning_rate": 3.9611187951436916e-05, "log_odds_chosen": 8.660881996154785, "log_odds_ratio": -0.003154081990942359, "logits/chosen": -0.3486626446247101, "logits/rejected": -0.5041624307632446, "logps/chosen": -0.008399656973779202, "logps/rejected": -1.586033582687378, "loss": 1.7391, "nll_loss": 0.43445295095443726, "rewards/accuracies": 1.0, "rewards/chosen": -0.0008399657090194523, "rewards/margins": 0.15776340663433075, "rewards/rejected": -0.15860337018966675, "step": 4150 }, { "epoch": 2.8706777316735823, "grad_norm": 9.0235595703125, "learning_rate": 3.960734593514677e-05, "log_odds_chosen": 8.743791580200195, "log_odds_ratio": -0.016513537615537643, "logits/chosen": -0.32983332872390747, "logits/rejected": -0.3408893346786499, "logps/chosen": -0.007986058481037617, "logps/rejected": -1.2895114421844482, "loss": 1.7975, "nll_loss": 0.44772592186927795, "rewards/accuracies": 1.0, "rewards/chosen": -0.0007986059063114226, "rewards/margins": 0.128152534365654, "rewards/rejected": -0.12895113229751587, "step": 4151 }, { "epoch": 2.871369294605809, "grad_norm": 7.92490291595459, "learning_rate": 3.9603503918856614e-05, "log_odds_chosen": 7.174158096313477, "log_odds_ratio": -0.10260120034217834, "logits/chosen": -0.7217217087745667, "logits/rejected": -0.7370195984840393, "logps/chosen": -0.03901343792676926, "logps/rejected": -1.1967353820800781, "loss": 2.2984, "nll_loss": 0.5643276572227478, "rewards/accuracies": 0.875, "rewards/chosen": -0.0039013437926769257, "rewards/margins": 0.11577218770980835, "rewards/rejected": -0.11967353522777557, "step": 4152 }, { "epoch": 2.872060857538036, "grad_norm": 10.109107971191406, "learning_rate": 3.959966190256647e-05, "log_odds_chosen": 8.623994827270508, "log_odds_ratio": -0.0028305151499807835, "logits/chosen": -0.376717209815979, "logits/rejected": -0.4848959743976593, "logps/chosen": -0.01420350931584835, "logps/rejected": -1.7301173210144043, "loss": 2.1019, "nll_loss": 0.5252029895782471, "rewards/accuracies": 1.0, "rewards/chosen": -0.0014203509781509638, "rewards/margins": 0.1715914011001587, "rewards/rejected": -0.17301173508167267, "step": 4153 }, { "epoch": 2.872752420470263, "grad_norm": 24.528562545776367, "learning_rate": 3.959581988627632e-05, "log_odds_chosen": 5.912657737731934, "log_odds_ratio": -0.3544897437095642, "logits/chosen": -0.593536376953125, "logits/rejected": -0.5855855941772461, "logps/chosen": -0.03977097570896149, "logps/rejected": -1.3248205184936523, "loss": 2.729, "nll_loss": 0.6468054056167603, "rewards/accuracies": 0.875, "rewards/chosen": -0.003977097105234861, "rewards/margins": 0.1285049468278885, "rewards/rejected": -0.13248205184936523, "step": 4154 }, { "epoch": 2.8734439834024896, "grad_norm": 3.737313747406006, "learning_rate": 3.959197786998617e-05, "log_odds_chosen": 7.370685577392578, "log_odds_ratio": -0.003854503622278571, "logits/chosen": -0.1420593559741974, "logits/rejected": -0.14798593521118164, "logps/chosen": -0.029535293579101562, "logps/rejected": -1.2239516973495483, "loss": 1.6555, "nll_loss": 0.4134930968284607, "rewards/accuracies": 1.0, "rewards/chosen": -0.0029535293579101562, "rewards/margins": 0.11944163590669632, "rewards/rejected": -0.12239515781402588, "step": 4155 }, { "epoch": 2.8741355463347165, "grad_norm": 12.146896362304688, "learning_rate": 3.9588135853696024e-05, "log_odds_chosen": 8.372788429260254, "log_odds_ratio": -0.0069664292968809605, "logits/chosen": -0.7387031316757202, "logits/rejected": -0.8111766576766968, "logps/chosen": -0.03236401453614235, "logps/rejected": -1.8445135354995728, "loss": 3.1845, "nll_loss": 0.7954254150390625, "rewards/accuracies": 1.0, "rewards/chosen": -0.003236401593312621, "rewards/margins": 0.18121495842933655, "rewards/rejected": -0.18445135653018951, "step": 4156 }, { "epoch": 2.8748271092669433, "grad_norm": 5.84153938293457, "learning_rate": 3.958429383740587e-05, "log_odds_chosen": 8.153403282165527, "log_odds_ratio": -0.006102471146732569, "logits/chosen": -0.6401076316833496, "logits/rejected": -0.6655992865562439, "logps/chosen": -0.02678937464952469, "logps/rejected": -1.4880584478378296, "loss": 1.7642, "nll_loss": 0.440429151058197, "rewards/accuracies": 1.0, "rewards/chosen": -0.0026789375115185976, "rewards/margins": 0.1461269110441208, "rewards/rejected": -0.14880585670471191, "step": 4157 }, { "epoch": 2.87551867219917, "grad_norm": 12.111544609069824, "learning_rate": 3.958045182111572e-05, "log_odds_chosen": 8.821361541748047, "log_odds_ratio": -0.018130799755454063, "logits/chosen": -0.8116099834442139, "logits/rejected": -0.8167247772216797, "logps/chosen": -0.01426270417869091, "logps/rejected": -1.8682608604431152, "loss": 2.8337, "nll_loss": 0.7066094279289246, "rewards/accuracies": 1.0, "rewards/chosen": -0.0014262704644352198, "rewards/margins": 0.18539981544017792, "rewards/rejected": -0.18682609498500824, "step": 4158 }, { "epoch": 2.876210235131397, "grad_norm": 10.255803108215332, "learning_rate": 3.9576609804825574e-05, "log_odds_chosen": 8.658646583557129, "log_odds_ratio": -0.0022335494868457317, "logits/chosen": -0.34910067915916443, "logits/rejected": -0.41302329301834106, "logps/chosen": -0.02276512421667576, "logps/rejected": -1.6416635513305664, "loss": 2.4113, "nll_loss": 0.6026020646095276, "rewards/accuracies": 1.0, "rewards/chosen": -0.002276512561365962, "rewards/margins": 0.16188983619213104, "rewards/rejected": -0.16416634619235992, "step": 4159 }, { "epoch": 2.876901798063624, "grad_norm": 11.113587379455566, "learning_rate": 3.957276778853543e-05, "log_odds_chosen": 7.869854927062988, "log_odds_ratio": -0.012342148460447788, "logits/chosen": -0.27500343322753906, "logits/rejected": -0.31267884373664856, "logps/chosen": -0.0049442751333117485, "logps/rejected": -0.9284360408782959, "loss": 2.7271, "nll_loss": 0.6805315017700195, "rewards/accuracies": 1.0, "rewards/chosen": -0.0004944275133311749, "rewards/margins": 0.09234918653964996, "rewards/rejected": -0.09284360706806183, "step": 4160 }, { "epoch": 2.8775933609958506, "grad_norm": 6.368313789367676, "learning_rate": 3.956892577224527e-05, "log_odds_chosen": 8.16576099395752, "log_odds_ratio": -0.0021114160772413015, "logits/chosen": -0.646938681602478, "logits/rejected": -0.7232961654663086, "logps/chosen": -0.007677272893488407, "logps/rejected": -1.0247087478637695, "loss": 1.4459, "nll_loss": 0.36126214265823364, "rewards/accuracies": 1.0, "rewards/chosen": -0.0007677272660657763, "rewards/margins": 0.10170315951108932, "rewards/rejected": -0.10247088968753815, "step": 4161 }, { "epoch": 2.8782849239280774, "grad_norm": 8.01225757598877, "learning_rate": 3.956508375595513e-05, "log_odds_chosen": 8.719640731811523, "log_odds_ratio": -0.0030138578731566668, "logits/chosen": -0.22106723487377167, "logits/rejected": -0.2978143095970154, "logps/chosen": -0.014302356168627739, "logps/rejected": -1.9104068279266357, "loss": 2.0553, "nll_loss": 0.5135329961776733, "rewards/accuracies": 1.0, "rewards/chosen": -0.0014302355702966452, "rewards/margins": 0.18961046636104584, "rewards/rejected": -0.19104069471359253, "step": 4162 }, { "epoch": 2.8789764868603043, "grad_norm": 3.933363199234009, "learning_rate": 3.956124173966498e-05, "log_odds_chosen": 6.446390628814697, "log_odds_ratio": -0.0866515040397644, "logits/chosen": -0.45056915283203125, "logits/rejected": -0.44341132044792175, "logps/chosen": -0.06035517156124115, "logps/rejected": -2.150413990020752, "loss": 2.0524, "nll_loss": 0.5044370293617249, "rewards/accuracies": 1.0, "rewards/chosen": -0.0060355169698596, "rewards/margins": 0.20900589227676392, "rewards/rejected": -0.2150413990020752, "step": 4163 }, { "epoch": 2.879668049792531, "grad_norm": 4.023238658905029, "learning_rate": 3.955739972337483e-05, "log_odds_chosen": 8.455058097839355, "log_odds_ratio": -0.021985219791531563, "logits/chosen": -0.3112573027610779, "logits/rejected": -0.4217797815799713, "logps/chosen": -0.0011460219975560904, "logps/rejected": -1.2003974914550781, "loss": 1.8707, "nll_loss": 0.4654797911643982, "rewards/accuracies": 1.0, "rewards/chosen": -0.00011460219684522599, "rewards/margins": 0.11992514133453369, "rewards/rejected": -0.12003974616527557, "step": 4164 }, { "epoch": 2.880359612724758, "grad_norm": 14.296460151672363, "learning_rate": 3.955355770708468e-05, "log_odds_chosen": 7.658043384552002, "log_odds_ratio": -0.11128035187721252, "logits/chosen": -0.44805073738098145, "logits/rejected": -0.5070619583129883, "logps/chosen": -0.021924814209342003, "logps/rejected": -1.7103160619735718, "loss": 1.6055, "nll_loss": 0.3902527093887329, "rewards/accuracies": 0.875, "rewards/chosen": -0.0021924814209342003, "rewards/margins": 0.16883914172649384, "rewards/rejected": -0.1710316240787506, "step": 4165 }, { "epoch": 2.8810511756569848, "grad_norm": 16.067855834960938, "learning_rate": 3.954971569079453e-05, "log_odds_chosen": 6.472123622894287, "log_odds_ratio": -0.07002764195203781, "logits/chosen": -0.31374144554138184, "logits/rejected": -0.3764995038509369, "logps/chosen": -0.010352091863751411, "logps/rejected": -1.2906413078308105, "loss": 2.0815, "nll_loss": 0.5133610963821411, "rewards/accuracies": 1.0, "rewards/chosen": -0.0010352092795073986, "rewards/margins": 0.12802892923355103, "rewards/rejected": -0.12906414270401, "step": 4166 }, { "epoch": 2.8817427385892116, "grad_norm": 8.798420906066895, "learning_rate": 3.954587367450438e-05, "log_odds_chosen": 8.108315467834473, "log_odds_ratio": -0.0021794813219457865, "logits/chosen": -0.4807795584201813, "logits/rejected": -0.5782303214073181, "logps/chosen": -0.010585448704659939, "logps/rejected": -1.2855427265167236, "loss": 2.992, "nll_loss": 0.7477890849113464, "rewards/accuracies": 1.0, "rewards/chosen": -0.0010585449635982513, "rewards/margins": 0.12749573588371277, "rewards/rejected": -0.12855426967144012, "step": 4167 }, { "epoch": 2.8824343015214384, "grad_norm": 8.277303695678711, "learning_rate": 3.954203165821423e-05, "log_odds_chosen": 8.692684173583984, "log_odds_ratio": -0.0006725117564201355, "logits/chosen": -0.350612074136734, "logits/rejected": -0.4381290674209595, "logps/chosen": -0.027638660743832588, "logps/rejected": -1.892930030822754, "loss": 2.8563, "nll_loss": 0.7140158414840698, "rewards/accuracies": 1.0, "rewards/chosen": -0.002763866214081645, "rewards/margins": 0.18652912974357605, "rewards/rejected": -0.1892929971218109, "step": 4168 }, { "epoch": 2.8831258644536653, "grad_norm": 10.069982528686523, "learning_rate": 3.9538189641924085e-05, "log_odds_chosen": 9.616327285766602, "log_odds_ratio": -0.00030772568425163627, "logits/chosen": -0.5223462581634521, "logits/rejected": -0.6427964568138123, "logps/chosen": -0.0007177912630140781, "logps/rejected": -1.6643712520599365, "loss": 2.0674, "nll_loss": 0.5168122053146362, "rewards/accuracies": 1.0, "rewards/chosen": -7.177912630140781e-05, "rewards/margins": 0.1663653403520584, "rewards/rejected": -0.16643711924552917, "step": 4169 }, { "epoch": 2.883817427385892, "grad_norm": 7.767836093902588, "learning_rate": 3.953434762563393e-05, "log_odds_chosen": 6.838160514831543, "log_odds_ratio": -0.02437964268028736, "logits/chosen": -0.39230552315711975, "logits/rejected": -0.443790078163147, "logps/chosen": -0.028187813237309456, "logps/rejected": -1.36873197555542, "loss": 2.5007, "nll_loss": 0.6227420568466187, "rewards/accuracies": 1.0, "rewards/chosen": -0.002818781416863203, "rewards/margins": 0.13405440747737885, "rewards/rejected": -0.13687318563461304, "step": 4170 }, { "epoch": 2.884508990318119, "grad_norm": 13.185744285583496, "learning_rate": 3.953050560934379e-05, "log_odds_chosen": 7.265320777893066, "log_odds_ratio": -0.028976215049624443, "logits/chosen": -0.5060882568359375, "logits/rejected": -0.49441277980804443, "logps/chosen": -0.03849921375513077, "logps/rejected": -1.089863657951355, "loss": 2.2308, "nll_loss": 0.5547950267791748, "rewards/accuracies": 1.0, "rewards/chosen": -0.0038499212823808193, "rewards/margins": 0.105136439204216, "rewards/rejected": -0.10898637026548386, "step": 4171 }, { "epoch": 2.8852005532503457, "grad_norm": 12.196954727172852, "learning_rate": 3.9526663593053636e-05, "log_odds_chosen": 6.205423355102539, "log_odds_ratio": -0.13248874247074127, "logits/chosen": -0.4635886549949646, "logits/rejected": -0.46544769406318665, "logps/chosen": -0.026167435571551323, "logps/rejected": -1.3755590915679932, "loss": 2.1558, "nll_loss": 0.5257101655006409, "rewards/accuracies": 0.875, "rewards/chosen": -0.002616743789985776, "rewards/margins": 0.13493917882442474, "rewards/rejected": -0.13755591213703156, "step": 4172 }, { "epoch": 2.8858921161825726, "grad_norm": 8.013738632202148, "learning_rate": 3.952282157676349e-05, "log_odds_chosen": 7.696534156799316, "log_odds_ratio": -0.016941042616963387, "logits/chosen": -1.0045013427734375, "logits/rejected": -0.9985222816467285, "logps/chosen": -0.007150155026465654, "logps/rejected": -1.0973981618881226, "loss": 3.2758, "nll_loss": 0.8172488212585449, "rewards/accuracies": 1.0, "rewards/chosen": -0.0007150155142880976, "rewards/margins": 0.10902479290962219, "rewards/rejected": -0.10973981767892838, "step": 4173 }, { "epoch": 2.8865836791147994, "grad_norm": 8.398833274841309, "learning_rate": 3.951897956047334e-05, "log_odds_chosen": 7.615683078765869, "log_odds_ratio": -0.03679567947983742, "logits/chosen": -0.08813100308179855, "logits/rejected": -0.1682872474193573, "logps/chosen": -0.015082152560353279, "logps/rejected": -1.5612318515777588, "loss": 1.8013, "nll_loss": 0.44665414094924927, "rewards/accuracies": 1.0, "rewards/chosen": -0.0015082152094691992, "rewards/margins": 0.15461498498916626, "rewards/rejected": -0.15612319111824036, "step": 4174 }, { "epoch": 2.8872752420470262, "grad_norm": 6.936132907867432, "learning_rate": 3.9515137544183186e-05, "log_odds_chosen": 8.234755516052246, "log_odds_ratio": -0.009359374642372131, "logits/chosen": -0.6276150941848755, "logits/rejected": -0.6759732365608215, "logps/chosen": -0.01394907757639885, "logps/rejected": -1.7503783702850342, "loss": 2.0957, "nll_loss": 0.522986114025116, "rewards/accuracies": 1.0, "rewards/chosen": -0.001394907827489078, "rewards/margins": 0.17364291846752167, "rewards/rejected": -0.17503784596920013, "step": 4175 }, { "epoch": 2.887966804979253, "grad_norm": 11.726055145263672, "learning_rate": 3.951129552789304e-05, "log_odds_chosen": 9.761301040649414, "log_odds_ratio": -8.028695447137579e-05, "logits/chosen": -0.7729347348213196, "logits/rejected": -0.8374239206314087, "logps/chosen": -0.00031806406332179904, "logps/rejected": -1.7444686889648438, "loss": 1.9061, "nll_loss": 0.4765219986438751, "rewards/accuracies": 1.0, "rewards/chosen": -3.180640851496719e-05, "rewards/margins": 0.17441505193710327, "rewards/rejected": -0.17444688081741333, "step": 4176 }, { "epoch": 2.88865836791148, "grad_norm": 9.028305053710938, "learning_rate": 3.950745351160289e-05, "log_odds_chosen": 9.551950454711914, "log_odds_ratio": -0.048649102449417114, "logits/chosen": -0.5353203415870667, "logits/rejected": -0.6251725554466248, "logps/chosen": -0.011355753988027573, "logps/rejected": -1.9747955799102783, "loss": 1.852, "nll_loss": 0.45813390612602234, "rewards/accuracies": 1.0, "rewards/chosen": -0.0011355754686519504, "rewards/margins": 0.19634398818016052, "rewards/rejected": -0.19747956097126007, "step": 4177 }, { "epoch": 2.8893499308437067, "grad_norm": 7.553938865661621, "learning_rate": 3.9503611495312744e-05, "log_odds_chosen": 8.27835750579834, "log_odds_ratio": -0.04242272675037384, "logits/chosen": -0.3490467667579651, "logits/rejected": -0.36693528294563293, "logps/chosen": -0.010712453164160252, "logps/rejected": -1.159693717956543, "loss": 1.8144, "nll_loss": 0.4493652880191803, "rewards/accuracies": 1.0, "rewards/chosen": -0.0010712452931329608, "rewards/margins": 0.11489813029766083, "rewards/rejected": -0.11596937477588654, "step": 4178 }, { "epoch": 2.8900414937759336, "grad_norm": 10.130223274230957, "learning_rate": 3.949976947902259e-05, "log_odds_chosen": 6.871800422668457, "log_odds_ratio": -0.04268191382288933, "logits/chosen": -0.7141193151473999, "logits/rejected": -0.7577247619628906, "logps/chosen": -0.012114400044083595, "logps/rejected": -1.2614021301269531, "loss": 2.3988, "nll_loss": 0.5954397916793823, "rewards/accuracies": 1.0, "rewards/chosen": -0.0012114399578422308, "rewards/margins": 0.124928779900074, "rewards/rejected": -0.12614022195339203, "step": 4179 }, { "epoch": 2.8907330567081604, "grad_norm": 5.669473648071289, "learning_rate": 3.949592746273245e-05, "log_odds_chosen": 7.002610206604004, "log_odds_ratio": -0.08763153851032257, "logits/chosen": -0.6747829914093018, "logits/rejected": -0.6756397485733032, "logps/chosen": -0.015487837605178356, "logps/rejected": -0.8023409247398376, "loss": 2.2792, "nll_loss": 0.5610270500183105, "rewards/accuracies": 0.875, "rewards/chosen": -0.001548783970065415, "rewards/margins": 0.07868531346321106, "rewards/rejected": -0.0802340880036354, "step": 4180 }, { "epoch": 2.891424619640387, "grad_norm": 7.439328193664551, "learning_rate": 3.9492085446442294e-05, "log_odds_chosen": 8.811647415161133, "log_odds_ratio": -0.0013991171726956964, "logits/chosen": -0.5426240563392639, "logits/rejected": -0.5518914461135864, "logps/chosen": -0.007188364397734404, "logps/rejected": -1.4096122980117798, "loss": 2.1917, "nll_loss": 0.5477972030639648, "rewards/accuracies": 1.0, "rewards/chosen": -0.0007188364979811013, "rewards/margins": 0.14024239778518677, "rewards/rejected": -0.14096122980117798, "step": 4181 }, { "epoch": 2.892116182572614, "grad_norm": 5.265755653381348, "learning_rate": 3.948824343015215e-05, "log_odds_chosen": 7.264721870422363, "log_odds_ratio": -0.18816621601581573, "logits/chosen": -0.3916324973106384, "logits/rejected": -0.34224846959114075, "logps/chosen": -0.046060629189014435, "logps/rejected": -1.132361888885498, "loss": 1.7736, "nll_loss": 0.42457854747772217, "rewards/accuracies": 0.875, "rewards/chosen": -0.004606062546372414, "rewards/margins": 0.10863012075424194, "rewards/rejected": -0.1132361888885498, "step": 4182 }, { "epoch": 2.892807745504841, "grad_norm": 5.767202854156494, "learning_rate": 3.9484401413862e-05, "log_odds_chosen": 6.7517900466918945, "log_odds_ratio": -0.016323775053024292, "logits/chosen": -0.07235578447580338, "logits/rejected": -0.05019722133874893, "logps/chosen": -0.018838627263903618, "logps/rejected": -1.6113855838775635, "loss": 1.6765, "nll_loss": 0.4175013303756714, "rewards/accuracies": 1.0, "rewards/chosen": -0.0018838628893718123, "rewards/margins": 0.15925469994544983, "rewards/rejected": -0.16113856434822083, "step": 4183 }, { "epoch": 2.8934993084370677, "grad_norm": 13.19206428527832, "learning_rate": 3.9480559397571845e-05, "log_odds_chosen": 8.968629837036133, "log_odds_ratio": -0.00036881750565953553, "logits/chosen": -0.6369163393974304, "logits/rejected": -0.7073555588722229, "logps/chosen": -0.0007098768837749958, "logps/rejected": -1.733012080192566, "loss": 2.5317, "nll_loss": 0.6328850388526917, "rewards/accuracies": 1.0, "rewards/chosen": -7.098769856384024e-05, "rewards/margins": 0.17323023080825806, "rewards/rejected": -0.17330121994018555, "step": 4184 }, { "epoch": 2.8941908713692945, "grad_norm": 9.749066352844238, "learning_rate": 3.94767173812817e-05, "log_odds_chosen": 9.5924072265625, "log_odds_ratio": -0.0001539234654046595, "logits/chosen": -0.7313152551651001, "logits/rejected": -0.8595709800720215, "logps/chosen": -0.0010388323571532965, "logps/rejected": -1.938551902770996, "loss": 2.7596, "nll_loss": 0.6898916959762573, "rewards/accuracies": 1.0, "rewards/chosen": -0.00010388322698418051, "rewards/margins": 0.19375132024288177, "rewards/rejected": -0.1938551962375641, "step": 4185 }, { "epoch": 2.8948824343015214, "grad_norm": 6.4055328369140625, "learning_rate": 3.947287536499155e-05, "log_odds_chosen": 8.286406517028809, "log_odds_ratio": -0.07344295084476471, "logits/chosen": -0.599174439907074, "logits/rejected": -0.5423631072044373, "logps/chosen": -0.01735123060643673, "logps/rejected": -1.1026806831359863, "loss": 1.5879, "nll_loss": 0.38963472843170166, "rewards/accuracies": 1.0, "rewards/chosen": -0.0017351231072098017, "rewards/margins": 0.10853295028209686, "rewards/rejected": -0.11026806384325027, "step": 4186 }, { "epoch": 2.895573997233748, "grad_norm": 9.003849983215332, "learning_rate": 3.94690333487014e-05, "log_odds_chosen": 8.759421348571777, "log_odds_ratio": -0.03563835099339485, "logits/chosen": -1.0180939435958862, "logits/rejected": -1.0876891613006592, "logps/chosen": -0.008947193622589111, "logps/rejected": -1.673654556274414, "loss": 1.7472, "nll_loss": 0.4332250952720642, "rewards/accuracies": 1.0, "rewards/chosen": -0.0008947193855419755, "rewards/margins": 0.1664707213640213, "rewards/rejected": -0.16736546158790588, "step": 4187 }, { "epoch": 2.896265560165975, "grad_norm": 7.504535675048828, "learning_rate": 3.946519133241125e-05, "log_odds_chosen": 8.320568084716797, "log_odds_ratio": -0.05261503532528877, "logits/chosen": -0.7506527304649353, "logits/rejected": -0.7245360612869263, "logps/chosen": -0.016820482909679413, "logps/rejected": -1.4949345588684082, "loss": 1.5225, "nll_loss": 0.37536221742630005, "rewards/accuracies": 1.0, "rewards/chosen": -0.0016820483142510056, "rewards/margins": 0.1478113979101181, "rewards/rejected": -0.14949345588684082, "step": 4188 }, { "epoch": 2.896957123098202, "grad_norm": 10.017531394958496, "learning_rate": 3.946134931612111e-05, "log_odds_chosen": 8.923404693603516, "log_odds_ratio": -0.012731466442346573, "logits/chosen": -0.9017990827560425, "logits/rejected": -0.985031008720398, "logps/chosen": -0.0057351253926754, "logps/rejected": -1.6369001865386963, "loss": 1.9505, "nll_loss": 0.48634442687034607, "rewards/accuracies": 1.0, "rewards/chosen": -0.0005735125159844756, "rewards/margins": 0.16311649978160858, "rewards/rejected": -0.1636900156736374, "step": 4189 }, { "epoch": 2.8976486860304287, "grad_norm": 11.514911651611328, "learning_rate": 3.945750729983095e-05, "log_odds_chosen": 9.583656311035156, "log_odds_ratio": -0.000768057769164443, "logits/chosen": -0.44403791427612305, "logits/rejected": -0.5449143648147583, "logps/chosen": -0.0007324862526729703, "logps/rejected": -1.5823893547058105, "loss": 1.8875, "nll_loss": 0.4717921316623688, "rewards/accuracies": 1.0, "rewards/chosen": -7.32486296328716e-05, "rewards/margins": 0.15816569328308105, "rewards/rejected": -0.15823894739151, "step": 4190 }, { "epoch": 2.8983402489626555, "grad_norm": 9.892921447753906, "learning_rate": 3.9453665283540805e-05, "log_odds_chosen": 10.3226957321167, "log_odds_ratio": -0.0001017776012304239, "logits/chosen": -0.7956699132919312, "logits/rejected": -0.9215668439865112, "logps/chosen": -0.006516370922327042, "logps/rejected": -2.688707113265991, "loss": 2.5422, "nll_loss": 0.6355412006378174, "rewards/accuracies": 1.0, "rewards/chosen": -0.0006516370922327042, "rewards/margins": 0.2682190537452698, "rewards/rejected": -0.2688707113265991, "step": 4191 }, { "epoch": 2.8990318118948823, "grad_norm": 3.587132215499878, "learning_rate": 3.944982326725066e-05, "log_odds_chosen": 8.61502456665039, "log_odds_ratio": -0.0015266663394868374, "logits/chosen": -0.507659375667572, "logits/rejected": -0.5381101369857788, "logps/chosen": -0.010759102180600166, "logps/rejected": -1.9087737798690796, "loss": 1.8812, "nll_loss": 0.4701571464538574, "rewards/accuracies": 1.0, "rewards/chosen": -0.001075910171493888, "rewards/margins": 0.18980145454406738, "rewards/rejected": -0.19087737798690796, "step": 4192 }, { "epoch": 2.899723374827109, "grad_norm": 9.977949142456055, "learning_rate": 3.94459812509605e-05, "log_odds_chosen": 8.210229873657227, "log_odds_ratio": -0.0020022920798510313, "logits/chosen": -0.6053897738456726, "logits/rejected": -0.593431830406189, "logps/chosen": -0.0019178414950147271, "logps/rejected": -1.0676766633987427, "loss": 1.6852, "nll_loss": 0.42109107971191406, "rewards/accuracies": 1.0, "rewards/chosen": -0.00019178414368070662, "rewards/margins": 0.1065758764743805, "rewards/rejected": -0.10676766186952591, "step": 4193 }, { "epoch": 2.900414937759336, "grad_norm": 5.745366096496582, "learning_rate": 3.9442139234670356e-05, "log_odds_chosen": 8.749312400817871, "log_odds_ratio": -0.007004758343100548, "logits/chosen": -0.43974483013153076, "logits/rejected": -0.4170272648334503, "logps/chosen": -0.009306280873715878, "logps/rejected": -2.1502115726470947, "loss": 2.3188, "nll_loss": 0.5790024995803833, "rewards/accuracies": 1.0, "rewards/chosen": -0.000930628040805459, "rewards/margins": 0.2140905261039734, "rewards/rejected": -0.21502117812633514, "step": 4194 }, { "epoch": 2.901106500691563, "grad_norm": 9.20225715637207, "learning_rate": 3.943829721838021e-05, "log_odds_chosen": 8.916379928588867, "log_odds_ratio": -0.0017394019523635507, "logits/chosen": -0.3987049162387848, "logits/rejected": -0.4037243127822876, "logps/chosen": -0.011702263727784157, "logps/rejected": -1.6109840869903564, "loss": 2.0616, "nll_loss": 0.5152207016944885, "rewards/accuracies": 1.0, "rewards/chosen": -0.0011702264891937375, "rewards/margins": 0.15992820262908936, "rewards/rejected": -0.1610984057188034, "step": 4195 }, { "epoch": 2.9017980636237897, "grad_norm": 8.414830207824707, "learning_rate": 3.943445520209006e-05, "log_odds_chosen": 9.312470436096191, "log_odds_ratio": -0.0006356225931085646, "logits/chosen": -0.4111187756061554, "logits/rejected": -0.49365267157554626, "logps/chosen": -0.024308985099196434, "logps/rejected": -1.9406907558441162, "loss": 2.0854, "nll_loss": 0.5212797522544861, "rewards/accuracies": 1.0, "rewards/chosen": -0.0024308988358825445, "rewards/margins": 0.19163817167282104, "rewards/rejected": -0.19406907260417938, "step": 4196 }, { "epoch": 2.9024896265560165, "grad_norm": 6.095809459686279, "learning_rate": 3.9430613185799906e-05, "log_odds_chosen": 9.26791763305664, "log_odds_ratio": -0.00040234148036688566, "logits/chosen": -0.5236822962760925, "logits/rejected": -0.48013976216316223, "logps/chosen": -0.03143753483891487, "logps/rejected": -2.8108372688293457, "loss": 1.9503, "nll_loss": 0.4875357151031494, "rewards/accuracies": 1.0, "rewards/chosen": -0.0031437536235898733, "rewards/margins": 0.2779400050640106, "rewards/rejected": -0.28108376264572144, "step": 4197 }, { "epoch": 2.9031811894882433, "grad_norm": 12.865019798278809, "learning_rate": 3.9426771169509766e-05, "log_odds_chosen": 7.660120964050293, "log_odds_ratio": -0.009351848624646664, "logits/chosen": -0.6887496709823608, "logits/rejected": -0.7265763282775879, "logps/chosen": -0.02220279350876808, "logps/rejected": -2.0053799152374268, "loss": 2.4326, "nll_loss": 0.6072081923484802, "rewards/accuracies": 1.0, "rewards/chosen": -0.0022202793043106794, "rewards/margins": 0.1983177214860916, "rewards/rejected": -0.2005380094051361, "step": 4198 }, { "epoch": 2.90387275242047, "grad_norm": 5.224964141845703, "learning_rate": 3.942292915321961e-05, "log_odds_chosen": 6.3237409591674805, "log_odds_ratio": -0.14260509610176086, "logits/chosen": -0.5718114376068115, "logits/rejected": -0.5917366743087769, "logps/chosen": -0.03757050260901451, "logps/rejected": -1.6181234121322632, "loss": 2.0782, "nll_loss": 0.5052976608276367, "rewards/accuracies": 0.875, "rewards/chosen": -0.003757050260901451, "rewards/margins": 0.15805530548095703, "rewards/rejected": -0.16181235015392303, "step": 4199 }, { "epoch": 2.904564315352697, "grad_norm": 7.983197212219238, "learning_rate": 3.9419087136929464e-05, "log_odds_chosen": 8.89438247680664, "log_odds_ratio": -0.0003911318490281701, "logits/chosen": -0.39353638887405396, "logits/rejected": -0.46018946170806885, "logps/chosen": -0.0006360848783515394, "logps/rejected": -1.3578107357025146, "loss": 2.3677, "nll_loss": 0.5918948650360107, "rewards/accuracies": 1.0, "rewards/chosen": -6.360848783515394e-05, "rewards/margins": 0.1357174664735794, "rewards/rejected": -0.13578107953071594, "step": 4200 }, { "epoch": 2.905255878284924, "grad_norm": 5.677807331085205, "learning_rate": 3.9415245120639316e-05, "log_odds_chosen": 7.875686168670654, "log_odds_ratio": -0.05565962940454483, "logits/chosen": -0.5313123464584351, "logits/rejected": -0.5201388597488403, "logps/chosen": -0.03696315363049507, "logps/rejected": -1.6507844924926758, "loss": 1.4112, "nll_loss": 0.3472402095794678, "rewards/accuracies": 1.0, "rewards/chosen": -0.0036963154561817646, "rewards/margins": 0.16138213872909546, "rewards/rejected": -0.16507846117019653, "step": 4201 }, { "epoch": 2.9059474412171507, "grad_norm": 4.537448406219482, "learning_rate": 3.941140310434916e-05, "log_odds_chosen": 8.374617576599121, "log_odds_ratio": -0.0009444555034860969, "logits/chosen": -0.8532025218009949, "logits/rejected": -0.8434891104698181, "logps/chosen": -0.006630830001085997, "logps/rejected": -2.0214223861694336, "loss": 1.8496, "nll_loss": 0.46230918169021606, "rewards/accuracies": 1.0, "rewards/chosen": -0.0006630829884670675, "rewards/margins": 0.20147916674613953, "rewards/rejected": -0.20214225351810455, "step": 4202 }, { "epoch": 2.9066390041493775, "grad_norm": 7.785838603973389, "learning_rate": 3.9407561088059014e-05, "log_odds_chosen": 8.70776081085205, "log_odds_ratio": -0.0008020623936317861, "logits/chosen": -0.733709454536438, "logits/rejected": -0.7174305319786072, "logps/chosen": -0.016540687531232834, "logps/rejected": -2.144221305847168, "loss": 2.8436, "nll_loss": 0.710813581943512, "rewards/accuracies": 1.0, "rewards/chosen": -0.0016540689393877983, "rewards/margins": 0.2127680480480194, "rewards/rejected": -0.21442212164402008, "step": 4203 }, { "epoch": 2.9073305670816043, "grad_norm": 56.542301177978516, "learning_rate": 3.940371907176887e-05, "log_odds_chosen": 4.422608375549316, "log_odds_ratio": -0.39145660400390625, "logits/chosen": -0.3432076871395111, "logits/rejected": -0.4004635810852051, "logps/chosen": -0.07698570191860199, "logps/rejected": -1.0786305665969849, "loss": 2.1216, "nll_loss": 0.49124303460121155, "rewards/accuracies": 0.75, "rewards/chosen": -0.007698570378124714, "rewards/margins": 0.10016448050737381, "rewards/rejected": -0.10786305367946625, "step": 4204 }, { "epoch": 2.908022130013831, "grad_norm": 10.854572296142578, "learning_rate": 3.939987705547872e-05, "log_odds_chosen": 7.742916584014893, "log_odds_ratio": -0.07084621489048004, "logits/chosen": -0.7000867128372192, "logits/rejected": -0.7924137711524963, "logps/chosen": -0.009045793674886227, "logps/rejected": -1.1441210508346558, "loss": 2.219, "nll_loss": 0.5476707816123962, "rewards/accuracies": 1.0, "rewards/chosen": -0.0009045794140547514, "rewards/margins": 0.11350752413272858, "rewards/rejected": -0.1144120991230011, "step": 4205 }, { "epoch": 2.908713692946058, "grad_norm": 14.714045524597168, "learning_rate": 3.9396035039188565e-05, "log_odds_chosen": 7.317990779876709, "log_odds_ratio": -0.3476130962371826, "logits/chosen": -0.6655561923980713, "logits/rejected": -0.6966454982757568, "logps/chosen": -0.021608801558613777, "logps/rejected": -0.984076976776123, "loss": 2.5814, "nll_loss": 0.6105821132659912, "rewards/accuracies": 0.875, "rewards/chosen": -0.0021608800161629915, "rewards/margins": 0.09624682366847992, "rewards/rejected": -0.09840770065784454, "step": 4206 }, { "epoch": 2.909405255878285, "grad_norm": 10.995607376098633, "learning_rate": 3.9392193022898424e-05, "log_odds_chosen": 7.899350166320801, "log_odds_ratio": -0.21427175402641296, "logits/chosen": -0.6187517642974854, "logits/rejected": -0.6937867403030396, "logps/chosen": -0.10036478191614151, "logps/rejected": -1.7736245393753052, "loss": 1.8907, "nll_loss": 0.4512489140033722, "rewards/accuracies": 0.875, "rewards/chosen": -0.010036477819085121, "rewards/margins": 0.1673259735107422, "rewards/rejected": -0.17736247181892395, "step": 4207 }, { "epoch": 2.9100968188105116, "grad_norm": 7.072151184082031, "learning_rate": 3.938835100660827e-05, "log_odds_chosen": 9.21097469329834, "log_odds_ratio": -0.053593918681144714, "logits/chosen": -0.14670710265636444, "logits/rejected": -0.2045065462589264, "logps/chosen": -0.029685556888580322, "logps/rejected": -1.4088356494903564, "loss": 1.424, "nll_loss": 0.3506321609020233, "rewards/accuracies": 1.0, "rewards/chosen": -0.002968555549159646, "rewards/margins": 0.1379150152206421, "rewards/rejected": -0.14088356494903564, "step": 4208 }, { "epoch": 2.9107883817427385, "grad_norm": 6.414186000823975, "learning_rate": 3.938450899031812e-05, "log_odds_chosen": 9.366171836853027, "log_odds_ratio": -0.00038891323492862284, "logits/chosen": -0.3959054946899414, "logits/rejected": -0.4083250164985657, "logps/chosen": -0.009610005654394627, "logps/rejected": -1.7705631256103516, "loss": 1.6591, "nll_loss": 0.41473865509033203, "rewards/accuracies": 1.0, "rewards/chosen": -0.0009610005654394627, "rewards/margins": 0.17609530687332153, "rewards/rejected": -0.17705631256103516, "step": 4209 }, { "epoch": 2.9114799446749653, "grad_norm": 10.280040740966797, "learning_rate": 3.9380666974027975e-05, "log_odds_chosen": 9.308536529541016, "log_odds_ratio": -0.00011768620606744662, "logits/chosen": -0.6421458721160889, "logits/rejected": -0.665635347366333, "logps/chosen": -0.0003707177529577166, "logps/rejected": -1.6169333457946777, "loss": 2.0478, "nll_loss": 0.5119322538375854, "rewards/accuracies": 1.0, "rewards/chosen": -3.707177529577166e-05, "rewards/margins": 0.16165626049041748, "rewards/rejected": -0.16169333457946777, "step": 4210 }, { "epoch": 2.912171507607192, "grad_norm": 12.741182327270508, "learning_rate": 3.937682495773782e-05, "log_odds_chosen": 8.938257217407227, "log_odds_ratio": -0.41519448161125183, "logits/chosen": -0.4574255347251892, "logits/rejected": -0.5206787586212158, "logps/chosen": -0.06497032195329666, "logps/rejected": -2.0512773990631104, "loss": 2.2847, "nll_loss": 0.529647707939148, "rewards/accuracies": 0.875, "rewards/chosen": -0.006497031543403864, "rewards/margins": 0.19863072037696838, "rewards/rejected": -0.2051277458667755, "step": 4211 }, { "epoch": 2.912863070539419, "grad_norm": 11.842411994934082, "learning_rate": 3.937298294144767e-05, "log_odds_chosen": 8.423245429992676, "log_odds_ratio": -0.0006916861748322845, "logits/chosen": -0.7953372001647949, "logits/rejected": -0.8773461580276489, "logps/chosen": -0.001747145433910191, "logps/rejected": -1.8659417629241943, "loss": 3.0268, "nll_loss": 0.7566385269165039, "rewards/accuracies": 1.0, "rewards/chosen": -0.0001747145433910191, "rewards/margins": 0.18641947209835052, "rewards/rejected": -0.1865941882133484, "step": 4212 }, { "epoch": 2.913554633471646, "grad_norm": 8.623528480529785, "learning_rate": 3.9369140925157525e-05, "log_odds_chosen": 9.111515045166016, "log_odds_ratio": -0.00039377735811285675, "logits/chosen": -0.8491336703300476, "logits/rejected": -0.8755612969398499, "logps/chosen": -0.0004639826947823167, "logps/rejected": -1.3792146444320679, "loss": 2.2468, "nll_loss": 0.5616547465324402, "rewards/accuracies": 1.0, "rewards/chosen": -4.63982651126571e-05, "rewards/margins": 0.13787506520748138, "rewards/rejected": -0.13792146742343903, "step": 4213 }, { "epoch": 2.9142461964038726, "grad_norm": 9.771961212158203, "learning_rate": 3.936529890886738e-05, "log_odds_chosen": 9.823007583618164, "log_odds_ratio": -0.0001347611687378958, "logits/chosen": -0.7041987776756287, "logits/rejected": -0.6953439712524414, "logps/chosen": -0.000278160790912807, "logps/rejected": -1.5819087028503418, "loss": 2.2263, "nll_loss": 0.5565525889396667, "rewards/accuracies": 1.0, "rewards/chosen": -2.7816076908493415e-05, "rewards/margins": 0.15816305577754974, "rewards/rejected": -0.15819087624549866, "step": 4214 }, { "epoch": 2.9149377593360994, "grad_norm": 18.160442352294922, "learning_rate": 3.936145689257722e-05, "log_odds_chosen": 7.970919609069824, "log_odds_ratio": -0.017684318125247955, "logits/chosen": -0.5528253316879272, "logits/rejected": -0.6049160361289978, "logps/chosen": -0.02685156650841236, "logps/rejected": -1.7387304306030273, "loss": 1.8739, "nll_loss": 0.46671339869499207, "rewards/accuracies": 1.0, "rewards/chosen": -0.0026851568836718798, "rewards/margins": 0.1711878776550293, "rewards/rejected": -0.17387305200099945, "step": 4215 }, { "epoch": 2.9156293222683263, "grad_norm": 10.328432083129883, "learning_rate": 3.935761487628708e-05, "log_odds_chosen": 9.84450912475586, "log_odds_ratio": -8.842186798574403e-05, "logits/chosen": -0.6626081466674805, "logits/rejected": -0.7288064956665039, "logps/chosen": -0.0009883481543511152, "logps/rejected": -1.6530663967132568, "loss": 1.9919, "nll_loss": 0.4979715347290039, "rewards/accuracies": 1.0, "rewards/chosen": -9.883481834549457e-05, "rewards/margins": 0.16520781815052032, "rewards/rejected": -0.16530665755271912, "step": 4216 }, { "epoch": 2.916320885200553, "grad_norm": 4.938757419586182, "learning_rate": 3.935377285999693e-05, "log_odds_chosen": 8.558516502380371, "log_odds_ratio": -0.002902967156842351, "logits/chosen": -0.6540226936340332, "logits/rejected": -0.5421640276908875, "logps/chosen": -0.031563468277454376, "logps/rejected": -1.5742692947387695, "loss": 1.5972, "nll_loss": 0.3990045189857483, "rewards/accuracies": 1.0, "rewards/chosen": -0.003156346967443824, "rewards/margins": 0.15427058935165405, "rewards/rejected": -0.15742693841457367, "step": 4217 }, { "epoch": 2.91701244813278, "grad_norm": 8.419347763061523, "learning_rate": 3.934993084370678e-05, "log_odds_chosen": 7.854592800140381, "log_odds_ratio": -0.006661005783826113, "logits/chosen": -0.7401362061500549, "logits/rejected": -0.7170186638832092, "logps/chosen": -0.06734812259674072, "logps/rejected": -2.068657159805298, "loss": 2.8914, "nll_loss": 0.7221934199333191, "rewards/accuracies": 1.0, "rewards/chosen": -0.006734812632203102, "rewards/margins": 0.200130894780159, "rewards/rejected": -0.20686571300029755, "step": 4218 }, { "epoch": 2.9177040110650068, "grad_norm": 16.346050262451172, "learning_rate": 3.934608882741663e-05, "log_odds_chosen": 5.416713714599609, "log_odds_ratio": -0.6428422331809998, "logits/chosen": -0.48766276240348816, "logits/rejected": -0.5208728313446045, "logps/chosen": -0.110658660531044, "logps/rejected": -1.206841230392456, "loss": 2.2163, "nll_loss": 0.489782452583313, "rewards/accuracies": 0.875, "rewards/chosen": -0.01106586679816246, "rewards/margins": 0.10961826145648956, "rewards/rejected": -0.12068411707878113, "step": 4219 }, { "epoch": 2.9183955739972336, "grad_norm": 5.745017051696777, "learning_rate": 3.934224681112648e-05, "log_odds_chosen": 6.683662414550781, "log_odds_ratio": -0.08422426879405975, "logits/chosen": -0.37878748774528503, "logits/rejected": -0.42978018522262573, "logps/chosen": -0.018797200173139572, "logps/rejected": -1.3064601421356201, "loss": 1.8086, "nll_loss": 0.44373905658721924, "rewards/accuracies": 1.0, "rewards/chosen": -0.0018797202501446009, "rewards/margins": 0.12876629829406738, "rewards/rejected": -0.1306460201740265, "step": 4220 }, { "epoch": 2.9190871369294604, "grad_norm": 10.25680160522461, "learning_rate": 3.933840479483633e-05, "log_odds_chosen": 9.171581268310547, "log_odds_ratio": -0.0021570641547441483, "logits/chosen": -0.6555256843566895, "logits/rejected": -0.6951804161071777, "logps/chosen": -0.003925275523215532, "logps/rejected": -1.6789007186889648, "loss": 1.3798, "nll_loss": 0.34473517537117004, "rewards/accuracies": 1.0, "rewards/chosen": -0.00039252755232155323, "rewards/margins": 0.16749754548072815, "rewards/rejected": -0.16789008677005768, "step": 4221 }, { "epoch": 2.9197786998616873, "grad_norm": 15.780588150024414, "learning_rate": 3.9334562778546184e-05, "log_odds_chosen": 5.6302595138549805, "log_odds_ratio": -0.6053066253662109, "logits/chosen": -0.7769954800605774, "logits/rejected": -0.816230058670044, "logps/chosen": -0.09104986488819122, "logps/rejected": -1.325893759727478, "loss": 2.6182, "nll_loss": 0.5940166115760803, "rewards/accuracies": 0.875, "rewards/chosen": -0.009104986675083637, "rewards/margins": 0.12348438799381256, "rewards/rejected": -0.13258937001228333, "step": 4222 }, { "epoch": 2.920470262793914, "grad_norm": 11.950429916381836, "learning_rate": 3.9330720762256036e-05, "log_odds_chosen": 9.111445426940918, "log_odds_ratio": -0.007683016825467348, "logits/chosen": -0.725569486618042, "logits/rejected": -0.8772428035736084, "logps/chosen": -0.007877436466515064, "logps/rejected": -1.8429392576217651, "loss": 1.9335, "nll_loss": 0.4826072156429291, "rewards/accuracies": 1.0, "rewards/chosen": -0.0007877436000853777, "rewards/margins": 0.18350617587566376, "rewards/rejected": -0.1842939257621765, "step": 4223 }, { "epoch": 2.921161825726141, "grad_norm": 9.985198020935059, "learning_rate": 3.932687874596588e-05, "log_odds_chosen": 9.653959274291992, "log_odds_ratio": -0.00018180804909206927, "logits/chosen": -0.528473973274231, "logits/rejected": -0.6489172577857971, "logps/chosen": -0.0006757283117622137, "logps/rejected": -2.1277120113372803, "loss": 1.8746, "nll_loss": 0.46863406896591187, "rewards/accuracies": 1.0, "rewards/chosen": -6.757282972102985e-05, "rewards/margins": 0.2127036303281784, "rewards/rejected": -0.2127711921930313, "step": 4224 }, { "epoch": 2.9218533886583677, "grad_norm": 5.75289249420166, "learning_rate": 3.932303672967574e-05, "log_odds_chosen": 8.337453842163086, "log_odds_ratio": -0.002024096203967929, "logits/chosen": -0.5319167375564575, "logits/rejected": -0.6447017192840576, "logps/chosen": -0.0009236917831003666, "logps/rejected": -1.2165443897247314, "loss": 1.9135, "nll_loss": 0.47816094756126404, "rewards/accuracies": 1.0, "rewards/chosen": -9.236918413080275e-05, "rewards/margins": 0.12156207114458084, "rewards/rejected": -0.1216544359922409, "step": 4225 }, { "epoch": 2.922544951590595, "grad_norm": 11.359525680541992, "learning_rate": 3.9319194713385587e-05, "log_odds_chosen": 6.562146186828613, "log_odds_ratio": -0.07711490243673325, "logits/chosen": -0.5829511880874634, "logits/rejected": -0.6149609088897705, "logps/chosen": -0.05082641541957855, "logps/rejected": -1.6802895069122314, "loss": 2.6736, "nll_loss": 0.660689651966095, "rewards/accuracies": 1.0, "rewards/chosen": -0.005082641262561083, "rewards/margins": 0.16294631361961365, "rewards/rejected": -0.16802895069122314, "step": 4226 }, { "epoch": 2.923236514522822, "grad_norm": 7.414312839508057, "learning_rate": 3.931535269709544e-05, "log_odds_chosen": 8.072091102600098, "log_odds_ratio": -0.0029985117726027966, "logits/chosen": -0.7848948240280151, "logits/rejected": -0.8188717365264893, "logps/chosen": -0.03268032893538475, "logps/rejected": -1.8890024423599243, "loss": 2.1037, "nll_loss": 0.5256178975105286, "rewards/accuracies": 1.0, "rewards/chosen": -0.0032680328004062176, "rewards/margins": 0.18563222885131836, "rewards/rejected": -0.18890026211738586, "step": 4227 }, { "epoch": 2.9239280774550487, "grad_norm": 6.1554059982299805, "learning_rate": 3.9311510680805285e-05, "log_odds_chosen": 8.72746753692627, "log_odds_ratio": -0.00024264020612463355, "logits/chosen": -0.5359256267547607, "logits/rejected": -0.5526854991912842, "logps/chosen": -0.0005240375176072121, "logps/rejected": -1.1332951784133911, "loss": 2.1296, "nll_loss": 0.532380223274231, "rewards/accuracies": 1.0, "rewards/chosen": -5.240375321591273e-05, "rewards/margins": 0.1132771223783493, "rewards/rejected": -0.11332952231168747, "step": 4228 }, { "epoch": 2.9246196403872755, "grad_norm": 10.648176193237305, "learning_rate": 3.930766866451514e-05, "log_odds_chosen": 7.886308670043945, "log_odds_ratio": -0.014126875437796116, "logits/chosen": -0.7142013311386108, "logits/rejected": -0.8259632587432861, "logps/chosen": -0.005320434924215078, "logps/rejected": -1.1804341077804565, "loss": 2.7332, "nll_loss": 0.6818897724151611, "rewards/accuracies": 1.0, "rewards/chosen": -0.0005320435157045722, "rewards/margins": 0.11751136183738708, "rewards/rejected": -0.11804340034723282, "step": 4229 }, { "epoch": 2.9253112033195023, "grad_norm": 5.809198379516602, "learning_rate": 3.930382664822499e-05, "log_odds_chosen": 8.172040939331055, "log_odds_ratio": -0.21329273283481598, "logits/chosen": -0.49062544107437134, "logits/rejected": -0.5070676207542419, "logps/chosen": -0.029187794774770737, "logps/rejected": -1.2543081045150757, "loss": 2.1047, "nll_loss": 0.5048477649688721, "rewards/accuracies": 0.875, "rewards/chosen": -0.0029187791515141726, "rewards/margins": 0.12251203507184982, "rewards/rejected": -0.12543080747127533, "step": 4230 }, { "epoch": 2.926002766251729, "grad_norm": 9.787761688232422, "learning_rate": 3.929998463193484e-05, "log_odds_chosen": 6.455556392669678, "log_odds_ratio": -0.03728388249874115, "logits/chosen": -0.3578833341598511, "logits/rejected": -0.33278632164001465, "logps/chosen": -0.00679417559877038, "logps/rejected": -0.7100874185562134, "loss": 2.3575, "nll_loss": 0.5856543779373169, "rewards/accuracies": 1.0, "rewards/chosen": -0.0006794175715185702, "rewards/margins": 0.07032933086156845, "rewards/rejected": -0.07100874185562134, "step": 4231 }, { "epoch": 2.926694329183956, "grad_norm": 11.004345893859863, "learning_rate": 3.9296142615644694e-05, "log_odds_chosen": 9.477989196777344, "log_odds_ratio": -0.00019522027287166566, "logits/chosen": -0.5522055625915527, "logits/rejected": -0.5933316946029663, "logps/chosen": -0.0004704441817011684, "logps/rejected": -1.6733304262161255, "loss": 2.1303, "nll_loss": 0.5325589179992676, "rewards/accuracies": 1.0, "rewards/chosen": -4.704441744252108e-05, "rewards/margins": 0.1672860085964203, "rewards/rejected": -0.16733305156230927, "step": 4232 }, { "epoch": 2.927385892116183, "grad_norm": 9.407308578491211, "learning_rate": 3.929230059935454e-05, "log_odds_chosen": 8.922008514404297, "log_odds_ratio": -0.00025383057072758675, "logits/chosen": -0.364921510219574, "logits/rejected": -0.3875230848789215, "logps/chosen": -0.0007518371567130089, "logps/rejected": -1.1318978071212769, "loss": 1.9326, "nll_loss": 0.4831249713897705, "rewards/accuracies": 1.0, "rewards/chosen": -7.518372149206698e-05, "rewards/margins": 0.1131146103143692, "rewards/rejected": -0.11318978667259216, "step": 4233 }, { "epoch": 2.9280774550484097, "grad_norm": 9.522400856018066, "learning_rate": 3.928845858306439e-05, "log_odds_chosen": 6.760089874267578, "log_odds_ratio": -0.32851678133010864, "logits/chosen": -0.5342674851417542, "logits/rejected": -0.6126154661178589, "logps/chosen": -0.12489331513643265, "logps/rejected": -1.1863129138946533, "loss": 1.918, "nll_loss": 0.44663700461387634, "rewards/accuracies": 0.875, "rewards/chosen": -0.01248933281749487, "rewards/margins": 0.10614196956157684, "rewards/rejected": -0.11863130331039429, "step": 4234 }, { "epoch": 2.9287690179806365, "grad_norm": 8.68045711517334, "learning_rate": 3.9284616566774245e-05, "log_odds_chosen": 8.119098663330078, "log_odds_ratio": -0.0052395109087228775, "logits/chosen": -0.5035631656646729, "logits/rejected": -0.5046873092651367, "logps/chosen": -0.02135084755718708, "logps/rejected": -1.6839799880981445, "loss": 2.2956, "nll_loss": 0.5733876824378967, "rewards/accuracies": 1.0, "rewards/chosen": -0.0021350847091525793, "rewards/margins": 0.1662629246711731, "rewards/rejected": -0.16839802265167236, "step": 4235 }, { "epoch": 2.9294605809128633, "grad_norm": 9.004694938659668, "learning_rate": 3.92807745504841e-05, "log_odds_chosen": 6.27830696105957, "log_odds_ratio": -0.1884261518716812, "logits/chosen": -0.676655113697052, "logits/rejected": -0.7348206043243408, "logps/chosen": -0.08192940056324005, "logps/rejected": -1.172129511833191, "loss": 1.7475, "nll_loss": 0.41804251074790955, "rewards/accuracies": 0.875, "rewards/chosen": -0.00819294061511755, "rewards/margins": 0.10902000963687897, "rewards/rejected": -0.11721295118331909, "step": 4236 }, { "epoch": 2.93015214384509, "grad_norm": 10.645638465881348, "learning_rate": 3.927693253419394e-05, "log_odds_chosen": 7.660663604736328, "log_odds_ratio": -0.0013477486791089177, "logits/chosen": -0.351400762796402, "logits/rejected": -0.4446882903575897, "logps/chosen": -0.0024033382069319487, "logps/rejected": -1.6316173076629639, "loss": 2.9281, "nll_loss": 0.7318964004516602, "rewards/accuracies": 1.0, "rewards/chosen": -0.000240333829424344, "rewards/margins": 0.16292139887809753, "rewards/rejected": -0.1631617248058319, "step": 4237 }, { "epoch": 2.930843706777317, "grad_norm": 6.696867942810059, "learning_rate": 3.92730905179038e-05, "log_odds_chosen": 8.292326927185059, "log_odds_ratio": -0.0022583678364753723, "logits/chosen": -0.19574078917503357, "logits/rejected": -0.21028833091259003, "logps/chosen": -0.0025987233966588974, "logps/rejected": -1.272185206413269, "loss": 2.0609, "nll_loss": 0.5149998068809509, "rewards/accuracies": 1.0, "rewards/chosen": -0.0002598723513074219, "rewards/margins": 0.12695865333080292, "rewards/rejected": -0.12721852958202362, "step": 4238 }, { "epoch": 2.931535269709544, "grad_norm": 7.561919212341309, "learning_rate": 3.926924850161365e-05, "log_odds_chosen": 8.425704002380371, "log_odds_ratio": -0.0013171505415812135, "logits/chosen": -0.6048797369003296, "logits/rejected": -0.7474537491798401, "logps/chosen": -0.0018197052413597703, "logps/rejected": -1.1950289011001587, "loss": 2.2343, "nll_loss": 0.5584410429000854, "rewards/accuracies": 1.0, "rewards/chosen": -0.00018197050667367876, "rewards/margins": 0.11932092905044556, "rewards/rejected": -0.11950289458036423, "step": 4239 }, { "epoch": 2.9322268326417706, "grad_norm": 7.599555015563965, "learning_rate": 3.92654064853235e-05, "log_odds_chosen": 9.207232475280762, "log_odds_ratio": -0.0002494049840606749, "logits/chosen": -0.1796451061964035, "logits/rejected": -0.23438525199890137, "logps/chosen": -0.0007484787493012846, "logps/rejected": -1.6299362182617188, "loss": 1.4964, "nll_loss": 0.37406790256500244, "rewards/accuracies": 1.0, "rewards/chosen": -7.48478778405115e-05, "rewards/margins": 0.16291877627372742, "rewards/rejected": -0.1629936248064041, "step": 4240 }, { "epoch": 2.9329183955739975, "grad_norm": 11.072807312011719, "learning_rate": 3.926156446903335e-05, "log_odds_chosen": 8.597249984741211, "log_odds_ratio": -0.011972310021519661, "logits/chosen": -0.3940218985080719, "logits/rejected": -0.4087577760219574, "logps/chosen": -0.00673884991556406, "logps/rejected": -1.875365972518921, "loss": 2.1652, "nll_loss": 0.5400927662849426, "rewards/accuracies": 1.0, "rewards/chosen": -0.000673884991556406, "rewards/margins": 0.1868627369403839, "rewards/rejected": -0.18753661215305328, "step": 4241 }, { "epoch": 2.9336099585062243, "grad_norm": 6.58119535446167, "learning_rate": 3.92577224527432e-05, "log_odds_chosen": 8.13068962097168, "log_odds_ratio": -0.005946990102529526, "logits/chosen": -0.3203129768371582, "logits/rejected": -0.4184744358062744, "logps/chosen": -0.03520968556404114, "logps/rejected": -2.0817081928253174, "loss": 1.4605, "nll_loss": 0.36452704668045044, "rewards/accuracies": 1.0, "rewards/chosen": -0.00352096906863153, "rewards/margins": 0.20464983582496643, "rewards/rejected": -0.2081708163022995, "step": 4242 }, { "epoch": 2.934301521438451, "grad_norm": 12.412485122680664, "learning_rate": 3.925388043645305e-05, "log_odds_chosen": 7.7870073318481445, "log_odds_ratio": -0.0017178517300635576, "logits/chosen": -0.6953123211860657, "logits/rejected": -0.7932695150375366, "logps/chosen": -0.002230787882581353, "logps/rejected": -1.3735847473144531, "loss": 2.1767, "nll_loss": 0.5439935922622681, "rewards/accuracies": 1.0, "rewards/chosen": -0.00022307877952698618, "rewards/margins": 0.13713541626930237, "rewards/rejected": -0.13735848665237427, "step": 4243 }, { "epoch": 2.934993084370678, "grad_norm": 7.041884422302246, "learning_rate": 3.9250038420162903e-05, "log_odds_chosen": 5.54819393157959, "log_odds_ratio": -0.09255164861679077, "logits/chosen": -0.25514480471611023, "logits/rejected": -0.3109058141708374, "logps/chosen": -0.0339653380215168, "logps/rejected": -0.9779253005981445, "loss": 2.4905, "nll_loss": 0.6133647561073303, "rewards/accuracies": 1.0, "rewards/chosen": -0.0033965338952839375, "rewards/margins": 0.09439600259065628, "rewards/rejected": -0.09779253602027893, "step": 4244 }, { "epoch": 2.935684647302905, "grad_norm": 14.048311233520508, "learning_rate": 3.9246196403872756e-05, "log_odds_chosen": 8.181252479553223, "log_odds_ratio": -0.033230848610401154, "logits/chosen": -0.5824018716812134, "logits/rejected": -0.6378239989280701, "logps/chosen": -0.014942415058612823, "logps/rejected": -2.086918354034424, "loss": 3.1938, "nll_loss": 0.7951152920722961, "rewards/accuracies": 1.0, "rewards/chosen": -0.0014942414127290249, "rewards/margins": 0.20719760656356812, "rewards/rejected": -0.20869183540344238, "step": 4245 }, { "epoch": 2.9363762102351316, "grad_norm": 8.99216079711914, "learning_rate": 3.92423543875826e-05, "log_odds_chosen": 8.586301803588867, "log_odds_ratio": -0.004110632464289665, "logits/chosen": -0.8050059080123901, "logits/rejected": -0.8754081130027771, "logps/chosen": -0.0025968970730900764, "logps/rejected": -1.4557313919067383, "loss": 2.9999, "nll_loss": 0.7495602965354919, "rewards/accuracies": 1.0, "rewards/chosen": -0.0002596897247713059, "rewards/margins": 0.14531344175338745, "rewards/rejected": -0.14557313919067383, "step": 4246 }, { "epoch": 2.9370677731673585, "grad_norm": 11.729406356811523, "learning_rate": 3.923851237129246e-05, "log_odds_chosen": 9.105123519897461, "log_odds_ratio": -0.012979497201740742, "logits/chosen": -0.5769229531288147, "logits/rejected": -0.6147894859313965, "logps/chosen": -0.07719717919826508, "logps/rejected": -2.1323089599609375, "loss": 1.7843, "nll_loss": 0.44478702545166016, "rewards/accuracies": 1.0, "rewards/chosen": -0.007719717919826508, "rewards/margins": 0.2055111825466156, "rewards/rejected": -0.2132309079170227, "step": 4247 }, { "epoch": 2.9377593360995853, "grad_norm": 9.502190589904785, "learning_rate": 3.9234670355002306e-05, "log_odds_chosen": 7.549291133880615, "log_odds_ratio": -0.019823361188173294, "logits/chosen": -1.219071388244629, "logits/rejected": -1.203476071357727, "logps/chosen": -0.023129645735025406, "logps/rejected": -1.278996467590332, "loss": 1.776, "nll_loss": 0.4420260190963745, "rewards/accuracies": 1.0, "rewards/chosen": -0.002312964526936412, "rewards/margins": 0.12558668851852417, "rewards/rejected": -0.1278996467590332, "step": 4248 }, { "epoch": 2.938450899031812, "grad_norm": 8.954014778137207, "learning_rate": 3.923082833871216e-05, "log_odds_chosen": 6.703267574310303, "log_odds_ratio": -0.28621187806129456, "logits/chosen": -0.2853702902793884, "logits/rejected": -0.29533183574676514, "logps/chosen": -0.051028452813625336, "logps/rejected": -1.3374086618423462, "loss": 2.1933, "nll_loss": 0.5197107791900635, "rewards/accuracies": 0.875, "rewards/chosen": -0.005102845374494791, "rewards/margins": 0.12863802909851074, "rewards/rejected": -0.1337408721446991, "step": 4249 }, { "epoch": 2.939142461964039, "grad_norm": 9.448728561401367, "learning_rate": 3.922698632242201e-05, "log_odds_chosen": 8.249773979187012, "log_odds_ratio": -0.00031678471714258194, "logits/chosen": -0.5632359981536865, "logits/rejected": -0.698321521282196, "logps/chosen": -0.0008963820873759687, "logps/rejected": -1.3571325540542603, "loss": 2.2072, "nll_loss": 0.5517725944519043, "rewards/accuracies": 1.0, "rewards/chosen": -8.963821164797992e-05, "rewards/margins": 0.13562361896038055, "rewards/rejected": -0.13571324944496155, "step": 4250 }, { "epoch": 2.9398340248962658, "grad_norm": 10.276646614074707, "learning_rate": 3.922314430613186e-05, "log_odds_chosen": 7.62478494644165, "log_odds_ratio": -0.047770481556653976, "logits/chosen": -0.7422584891319275, "logits/rejected": -0.7952272295951843, "logps/chosen": -0.011581145226955414, "logps/rejected": -1.3504177331924438, "loss": 2.5862, "nll_loss": 0.6417734622955322, "rewards/accuracies": 1.0, "rewards/chosen": -0.0011581146391108632, "rewards/margins": 0.13388365507125854, "rewards/rejected": -0.13504177331924438, "step": 4251 }, { "epoch": 2.9405255878284926, "grad_norm": 6.6713128089904785, "learning_rate": 3.921930228984171e-05, "log_odds_chosen": 7.338813781738281, "log_odds_ratio": -0.011613673530519009, "logits/chosen": -0.5105392932891846, "logits/rejected": -0.5866726040840149, "logps/chosen": -0.004626925103366375, "logps/rejected": -0.7748826146125793, "loss": 1.8144, "nll_loss": 0.4524500072002411, "rewards/accuracies": 1.0, "rewards/chosen": -0.0004626925219781697, "rewards/margins": 0.07702556252479553, "rewards/rejected": -0.07748826593160629, "step": 4252 }, { "epoch": 2.9412171507607194, "grad_norm": 7.037342548370361, "learning_rate": 3.921546027355156e-05, "log_odds_chosen": 9.136719703674316, "log_odds_ratio": -0.0018027378246188164, "logits/chosen": -0.3973497152328491, "logits/rejected": -0.4088752865791321, "logps/chosen": -0.0014848411083221436, "logps/rejected": -1.3421297073364258, "loss": 1.5211, "nll_loss": 0.3800997734069824, "rewards/accuracies": 1.0, "rewards/chosen": -0.00014848413411527872, "rewards/margins": 0.1340644806623459, "rewards/rejected": -0.13421295583248138, "step": 4253 }, { "epoch": 2.9419087136929463, "grad_norm": 6.471978664398193, "learning_rate": 3.9211618257261414e-05, "log_odds_chosen": 7.913397789001465, "log_odds_ratio": -0.019799327477812767, "logits/chosen": -0.7454057931900024, "logits/rejected": -0.8406031131744385, "logps/chosen": -0.013360895216464996, "logps/rejected": -0.9780092239379883, "loss": 2.2511, "nll_loss": 0.5607913732528687, "rewards/accuracies": 1.0, "rewards/chosen": -0.0013360894517973065, "rewards/margins": 0.09646482765674591, "rewards/rejected": -0.09780092537403107, "step": 4254 }, { "epoch": 2.942600276625173, "grad_norm": 9.354866027832031, "learning_rate": 3.920777624097126e-05, "log_odds_chosen": 9.310257911682129, "log_odds_ratio": -0.0006552515551447868, "logits/chosen": 0.03181115537881851, "logits/rejected": -0.08694909512996674, "logps/chosen": -0.0003843040904030204, "logps/rejected": -1.5662592649459839, "loss": 2.1056, "nll_loss": 0.5263240337371826, "rewards/accuracies": 1.0, "rewards/chosen": -3.8430407585110515e-05, "rewards/margins": 0.15658749639987946, "rewards/rejected": -0.1566259264945984, "step": 4255 }, { "epoch": 2.9432918395574, "grad_norm": 4.746151447296143, "learning_rate": 3.920393422468112e-05, "log_odds_chosen": 8.743995666503906, "log_odds_ratio": -0.0007172015612013638, "logits/chosen": -0.5727619528770447, "logits/rejected": -0.6019116044044495, "logps/chosen": -0.0029322528280317783, "logps/rejected": -1.502450704574585, "loss": 2.0104, "nll_loss": 0.5025299787521362, "rewards/accuracies": 1.0, "rewards/chosen": -0.0002932252536993474, "rewards/margins": 0.14995186030864716, "rewards/rejected": -0.1502450704574585, "step": 4256 }, { "epoch": 2.9439834024896268, "grad_norm": 12.371516227722168, "learning_rate": 3.9200092208390965e-05, "log_odds_chosen": 7.0362443923950195, "log_odds_ratio": -0.06542985886335373, "logits/chosen": -0.6845235228538513, "logits/rejected": -0.7309256792068481, "logps/chosen": -0.01841142028570175, "logps/rejected": -1.020219326019287, "loss": 1.8128, "nll_loss": 0.4466596841812134, "rewards/accuracies": 1.0, "rewards/chosen": -0.001841141958720982, "rewards/margins": 0.10018078982830048, "rewards/rejected": -0.10202193260192871, "step": 4257 }, { "epoch": 2.9446749654218536, "grad_norm": 9.63621997833252, "learning_rate": 3.919625019210082e-05, "log_odds_chosen": 8.842986106872559, "log_odds_ratio": -0.0012780596734955907, "logits/chosen": -0.31890368461608887, "logits/rejected": -0.3919992446899414, "logps/chosen": -0.01795879378914833, "logps/rejected": -2.36039400100708, "loss": 2.2404, "nll_loss": 0.5599759221076965, "rewards/accuracies": 1.0, "rewards/chosen": -0.0017958792159333825, "rewards/margins": 0.2342435121536255, "rewards/rejected": -0.23603938519954681, "step": 4258 }, { "epoch": 2.9453665283540804, "grad_norm": 7.4359307289123535, "learning_rate": 3.919240817581067e-05, "log_odds_chosen": 7.937699317932129, "log_odds_ratio": -0.0024727436248213053, "logits/chosen": -0.06838397681713104, "logits/rejected": -0.0431574210524559, "logps/chosen": -0.012063509784638882, "logps/rejected": -1.3039665222167969, "loss": 1.6813, "nll_loss": 0.42007312178611755, "rewards/accuracies": 1.0, "rewards/chosen": -0.0012063512112945318, "rewards/margins": 0.1291903257369995, "rewards/rejected": -0.13039666414260864, "step": 4259 }, { "epoch": 2.9460580912863072, "grad_norm": 9.317797660827637, "learning_rate": 3.9188566159520515e-05, "log_odds_chosen": 7.148035049438477, "log_odds_ratio": -0.00553960120305419, "logits/chosen": -0.5761981010437012, "logits/rejected": -0.5446697473526001, "logps/chosen": -0.013994252309203148, "logps/rejected": -1.1831421852111816, "loss": 2.3347, "nll_loss": 0.5831324458122253, "rewards/accuracies": 1.0, "rewards/chosen": -0.0013994253240525723, "rewards/margins": 0.1169147938489914, "rewards/rejected": -0.1183142215013504, "step": 4260 }, { "epoch": 2.946749654218534, "grad_norm": 7.1314849853515625, "learning_rate": 3.918472414323037e-05, "log_odds_chosen": 6.1319427490234375, "log_odds_ratio": -0.08988655358552933, "logits/chosen": -0.23304779827594757, "logits/rejected": -0.24430322647094727, "logps/chosen": -0.026172567158937454, "logps/rejected": -0.8957435488700867, "loss": 2.2714, "nll_loss": 0.5588510632514954, "rewards/accuracies": 1.0, "rewards/chosen": -0.0026172564830631018, "rewards/margins": 0.08695709705352783, "rewards/rejected": -0.08957435190677643, "step": 4261 }, { "epoch": 2.947441217150761, "grad_norm": 6.733025550842285, "learning_rate": 3.918088212694022e-05, "log_odds_chosen": 5.93974494934082, "log_odds_ratio": -0.17785590887069702, "logits/chosen": -0.21797175705432892, "logits/rejected": -0.22403068840503693, "logps/chosen": -0.05725084990262985, "logps/rejected": -1.5029304027557373, "loss": 2.5309, "nll_loss": 0.6149465441703796, "rewards/accuracies": 0.875, "rewards/chosen": -0.005725085269659758, "rewards/margins": 0.14456796646118164, "rewards/rejected": -0.15029305219650269, "step": 4262 }, { "epoch": 2.9481327800829877, "grad_norm": 20.814090728759766, "learning_rate": 3.917704011065007e-05, "log_odds_chosen": 7.192148208618164, "log_odds_ratio": -0.20047271251678467, "logits/chosen": -0.5887202620506287, "logits/rejected": -0.6112239956855774, "logps/chosen": -0.041321538388729095, "logps/rejected": -1.0154424905776978, "loss": 2.7193, "nll_loss": 0.6597743034362793, "rewards/accuracies": 0.875, "rewards/chosen": -0.004132153932005167, "rewards/margins": 0.0974120944738388, "rewards/rejected": -0.10154424607753754, "step": 4263 }, { "epoch": 2.9488243430152146, "grad_norm": 10.13122272491455, "learning_rate": 3.917319809435992e-05, "log_odds_chosen": 9.080028533935547, "log_odds_ratio": -0.0002214660053141415, "logits/chosen": -0.5770301818847656, "logits/rejected": -0.6393797993659973, "logps/chosen": -0.0003423684975132346, "logps/rejected": -1.1868422031402588, "loss": 2.4343, "nll_loss": 0.6085643768310547, "rewards/accuracies": 1.0, "rewards/chosen": -3.4236851206514984e-05, "rewards/margins": 0.11864998936653137, "rewards/rejected": -0.11868421733379364, "step": 4264 }, { "epoch": 2.9495159059474414, "grad_norm": 10.2073974609375, "learning_rate": 3.916935607806978e-05, "log_odds_chosen": 7.860172271728516, "log_odds_ratio": -0.0023849881254136562, "logits/chosen": -0.5341753959655762, "logits/rejected": -0.579387366771698, "logps/chosen": -0.010475466959178448, "logps/rejected": -1.4733842611312866, "loss": 1.7168, "nll_loss": 0.42895734310150146, "rewards/accuracies": 1.0, "rewards/chosen": -0.0010475468588992953, "rewards/margins": 0.1462908834218979, "rewards/rejected": -0.14733843505382538, "step": 4265 }, { "epoch": 2.9502074688796682, "grad_norm": 13.511934280395508, "learning_rate": 3.916551406177962e-05, "log_odds_chosen": 8.346586227416992, "log_odds_ratio": -0.0005293315043672919, "logits/chosen": -1.025830626487732, "logits/rejected": -1.034759521484375, "logps/chosen": -0.0005322285578586161, "logps/rejected": -1.1304508447647095, "loss": 2.7532, "nll_loss": 0.6882580518722534, "rewards/accuracies": 1.0, "rewards/chosen": -5.322285505826585e-05, "rewards/margins": 0.11299186199903488, "rewards/rejected": -0.11304508149623871, "step": 4266 }, { "epoch": 2.950899031811895, "grad_norm": 10.670827865600586, "learning_rate": 3.9161672045489476e-05, "log_odds_chosen": 10.011075973510742, "log_odds_ratio": -8.805980905890465e-05, "logits/chosen": -0.17297831177711487, "logits/rejected": -0.18511781096458435, "logps/chosen": -0.000354648131178692, "logps/rejected": -1.5110416412353516, "loss": 2.2342, "nll_loss": 0.5585365891456604, "rewards/accuracies": 1.0, "rewards/chosen": -3.546481457306072e-05, "rewards/margins": 0.15106868743896484, "rewards/rejected": -0.1511041522026062, "step": 4267 }, { "epoch": 2.951590594744122, "grad_norm": 9.187833786010742, "learning_rate": 3.915783002919933e-05, "log_odds_chosen": 7.468688011169434, "log_odds_ratio": -0.0015352519694715738, "logits/chosen": -0.5218496322631836, "logits/rejected": -0.5341426134109497, "logps/chosen": -0.0017257456202059984, "logps/rejected": -0.9419411420822144, "loss": 1.9649, "nll_loss": 0.4910805821418762, "rewards/accuracies": 1.0, "rewards/chosen": -0.00017257456784136593, "rewards/margins": 0.09402154386043549, "rewards/rejected": -0.09419411420822144, "step": 4268 }, { "epoch": 2.9522821576763487, "grad_norm": 7.416023254394531, "learning_rate": 3.9153988012909174e-05, "log_odds_chosen": 6.75653076171875, "log_odds_ratio": -0.06857343018054962, "logits/chosen": -0.5883265137672424, "logits/rejected": -0.5947442650794983, "logps/chosen": -0.01682782731950283, "logps/rejected": -1.1577966213226318, "loss": 1.7577, "nll_loss": 0.4325792193412781, "rewards/accuracies": 1.0, "rewards/chosen": -0.0016827830113470554, "rewards/margins": 0.11409687250852585, "rewards/rejected": -0.11577965319156647, "step": 4269 }, { "epoch": 2.9529737206085755, "grad_norm": 11.112635612487793, "learning_rate": 3.9150145996619026e-05, "log_odds_chosen": 9.163501739501953, "log_odds_ratio": -0.00044503927347250283, "logits/chosen": -0.5382460355758667, "logits/rejected": -0.6539362668991089, "logps/chosen": -0.0007444759830832481, "logps/rejected": -1.6698039770126343, "loss": 2.0158, "nll_loss": 0.5039148926734924, "rewards/accuracies": 1.0, "rewards/chosen": -7.44475910323672e-05, "rewards/margins": 0.1669059544801712, "rewards/rejected": -0.16698040068149567, "step": 4270 }, { "epoch": 2.9536652835408024, "grad_norm": 10.982603073120117, "learning_rate": 3.914630398032888e-05, "log_odds_chosen": 8.879176139831543, "log_odds_ratio": -0.0016005634097382426, "logits/chosen": -0.184329092502594, "logits/rejected": -0.24073684215545654, "logps/chosen": -0.0036374146584421396, "logps/rejected": -1.4348986148834229, "loss": 2.0214, "nll_loss": 0.5051819086074829, "rewards/accuracies": 1.0, "rewards/chosen": -0.0003637414483819157, "rewards/margins": 0.14312610030174255, "rewards/rejected": -0.14348986744880676, "step": 4271 }, { "epoch": 2.954356846473029, "grad_norm": 10.150177955627441, "learning_rate": 3.914246196403873e-05, "log_odds_chosen": 9.1649808883667, "log_odds_ratio": -0.001695746323093772, "logits/chosen": -0.9434870481491089, "logits/rejected": -1.068698525428772, "logps/chosen": -0.0010683319997042418, "logps/rejected": -1.5475596189498901, "loss": 2.3266, "nll_loss": 0.5814720392227173, "rewards/accuracies": 1.0, "rewards/chosen": -0.00010683320579119027, "rewards/margins": 0.15464913845062256, "rewards/rejected": -0.15475594997406006, "step": 4272 }, { "epoch": 2.955048409405256, "grad_norm": 10.010522842407227, "learning_rate": 3.913861994774858e-05, "log_odds_chosen": 6.911557197570801, "log_odds_ratio": -0.00604627002030611, "logits/chosen": -0.9683492183685303, "logits/rejected": -1.006256341934204, "logps/chosen": -0.019437741488218307, "logps/rejected": -1.368293285369873, "loss": 2.4068, "nll_loss": 0.6010944843292236, "rewards/accuracies": 1.0, "rewards/chosen": -0.0019437741721048951, "rewards/margins": 0.13488556444644928, "rewards/rejected": -0.13682934641838074, "step": 4273 }, { "epoch": 2.955739972337483, "grad_norm": 6.434133529663086, "learning_rate": 3.9134777931458436e-05, "log_odds_chosen": 8.657649040222168, "log_odds_ratio": -0.000983987469226122, "logits/chosen": -0.45698267221450806, "logits/rejected": -0.5368836522102356, "logps/chosen": -0.0029307485092431307, "logps/rejected": -1.5611507892608643, "loss": 1.236, "nll_loss": 0.30890151858329773, "rewards/accuracies": 1.0, "rewards/chosen": -0.000293074845103547, "rewards/margins": 0.1558220088481903, "rewards/rejected": -0.1561150848865509, "step": 4274 }, { "epoch": 2.9564315352697097, "grad_norm": 5.325925827026367, "learning_rate": 3.913093591516828e-05, "log_odds_chosen": 8.406046867370605, "log_odds_ratio": -0.019645029678940773, "logits/chosen": -0.8153131008148193, "logits/rejected": -0.8119903802871704, "logps/chosen": -0.02138805016875267, "logps/rejected": -1.3045752048492432, "loss": 1.2154, "nll_loss": 0.3018897473812103, "rewards/accuracies": 1.0, "rewards/chosen": -0.0021388051100075245, "rewards/margins": 0.12831872701644897, "rewards/rejected": -0.1304575353860855, "step": 4275 }, { "epoch": 2.9571230982019365, "grad_norm": 8.21432876586914, "learning_rate": 3.9127093898878134e-05, "log_odds_chosen": 8.348808288574219, "log_odds_ratio": -0.003002789104357362, "logits/chosen": -0.29157984256744385, "logits/rejected": -0.32698357105255127, "logps/chosen": -0.0036668144166469574, "logps/rejected": -0.9981551170349121, "loss": 1.9019, "nll_loss": 0.4751623272895813, "rewards/accuracies": 1.0, "rewards/chosen": -0.00036668143002316356, "rewards/margins": 0.09944883733987808, "rewards/rejected": -0.09981551021337509, "step": 4276 }, { "epoch": 2.9578146611341634, "grad_norm": 7.75137996673584, "learning_rate": 3.9123251882587987e-05, "log_odds_chosen": 8.320832252502441, "log_odds_ratio": -0.007186429109424353, "logits/chosen": -0.2537834346294403, "logits/rejected": -0.2808937132358551, "logps/chosen": -0.03648059815168381, "logps/rejected": -2.1640422344207764, "loss": 1.7763, "nll_loss": 0.443354070186615, "rewards/accuracies": 1.0, "rewards/chosen": -0.003648059908300638, "rewards/margins": 0.21275615692138672, "rewards/rejected": -0.21640421450138092, "step": 4277 }, { "epoch": 2.95850622406639, "grad_norm": 12.242390632629395, "learning_rate": 3.911940986629783e-05, "log_odds_chosen": 5.879909515380859, "log_odds_ratio": -0.027574969455599785, "logits/chosen": -0.39817455410957336, "logits/rejected": -0.41713497042655945, "logps/chosen": -0.021489018574357033, "logps/rejected": -0.9022034406661987, "loss": 1.5127, "nll_loss": 0.3754188120365143, "rewards/accuracies": 1.0, "rewards/chosen": -0.002148902043700218, "rewards/margins": 0.08807145059108734, "rewards/rejected": -0.09022034704685211, "step": 4278 }, { "epoch": 2.959197786998617, "grad_norm": 6.00631046295166, "learning_rate": 3.9115567850007685e-05, "log_odds_chosen": 7.773863792419434, "log_odds_ratio": -0.10315965116024017, "logits/chosen": -0.10753624141216278, "logits/rejected": -0.18743924796581268, "logps/chosen": -0.039872244000434875, "logps/rejected": -1.489803671836853, "loss": 1.8879, "nll_loss": 0.4616524577140808, "rewards/accuracies": 0.875, "rewards/chosen": -0.003987224772572517, "rewards/margins": 0.1449931412935257, "rewards/rejected": -0.14898037910461426, "step": 4279 }, { "epoch": 2.959889349930844, "grad_norm": 7.796035289764404, "learning_rate": 3.911172583371754e-05, "log_odds_chosen": 9.047157287597656, "log_odds_ratio": -0.000789603334851563, "logits/chosen": -0.5538119673728943, "logits/rejected": -0.5578813552856445, "logps/chosen": -0.008371025323867798, "logps/rejected": -1.8051315546035767, "loss": 1.6288, "nll_loss": 0.4071248471736908, "rewards/accuracies": 1.0, "rewards/chosen": -0.0008371025323867798, "rewards/margins": 0.17967605590820312, "rewards/rejected": -0.1805131584405899, "step": 4280 }, { "epoch": 2.9605809128630707, "grad_norm": 9.822040557861328, "learning_rate": 3.910788381742739e-05, "log_odds_chosen": 8.555595397949219, "log_odds_ratio": -0.04050131142139435, "logits/chosen": -0.5271527767181396, "logits/rejected": -0.5641564726829529, "logps/chosen": -0.009184690192341805, "logps/rejected": -1.4864246845245361, "loss": 2.6192, "nll_loss": 0.6507552266120911, "rewards/accuracies": 1.0, "rewards/chosen": -0.0009184691007249057, "rewards/margins": 0.1477240025997162, "rewards/rejected": -0.14864246547222137, "step": 4281 }, { "epoch": 2.9612724757952975, "grad_norm": 8.706021308898926, "learning_rate": 3.9104041801137235e-05, "log_odds_chosen": 8.526544570922852, "log_odds_ratio": -0.0027415938675403595, "logits/chosen": -0.2767260670661926, "logits/rejected": -0.3521498739719391, "logps/chosen": -0.03149921074509621, "logps/rejected": -2.332549810409546, "loss": 2.7675, "nll_loss": 0.6915987133979797, "rewards/accuracies": 1.0, "rewards/chosen": -0.0031499210745096207, "rewards/margins": 0.23010505735874176, "rewards/rejected": -0.23325496912002563, "step": 4282 }, { "epoch": 2.9619640387275243, "grad_norm": 8.435463905334473, "learning_rate": 3.9100199784847094e-05, "log_odds_chosen": 8.319658279418945, "log_odds_ratio": -0.0016154496697708964, "logits/chosen": -0.6601822972297668, "logits/rejected": -0.6941728591918945, "logps/chosen": -0.007337766233831644, "logps/rejected": -1.6939918994903564, "loss": 2.7152, "nll_loss": 0.6786311268806458, "rewards/accuracies": 1.0, "rewards/chosen": -0.0007337766001001, "rewards/margins": 0.16866540908813477, "rewards/rejected": -0.1693992018699646, "step": 4283 }, { "epoch": 2.962655601659751, "grad_norm": 10.894098281860352, "learning_rate": 3.909635776855694e-05, "log_odds_chosen": 8.50836181640625, "log_odds_ratio": -0.0030262740328907967, "logits/chosen": -0.6033471822738647, "logits/rejected": -0.6101424098014832, "logps/chosen": -0.004452358465641737, "logps/rejected": -1.6144495010375977, "loss": 2.132, "nll_loss": 0.5326870083808899, "rewards/accuracies": 1.0, "rewards/chosen": -0.00044523581163957715, "rewards/margins": 0.16099971532821655, "rewards/rejected": -0.16144494712352753, "step": 4284 }, { "epoch": 2.963347164591978, "grad_norm": 8.354084968566895, "learning_rate": 3.909251575226679e-05, "log_odds_chosen": 9.289058685302734, "log_odds_ratio": -0.0005091601051390171, "logits/chosen": -0.4666883051395416, "logits/rejected": -0.4653850793838501, "logps/chosen": -0.0041956775821745396, "logps/rejected": -1.7832475900650024, "loss": 1.9979, "nll_loss": 0.49942725896835327, "rewards/accuracies": 1.0, "rewards/chosen": -0.0004195678047835827, "rewards/margins": 0.17790518701076508, "rewards/rejected": -0.17832475900650024, "step": 4285 }, { "epoch": 2.964038727524205, "grad_norm": 8.920733451843262, "learning_rate": 3.9088673735976645e-05, "log_odds_chosen": 8.48482894897461, "log_odds_ratio": -0.31773439049720764, "logits/chosen": -0.47151845693588257, "logits/rejected": -0.5274024605751038, "logps/chosen": -0.04550067335367203, "logps/rejected": -1.6561365127563477, "loss": 2.1528, "nll_loss": 0.5064210891723633, "rewards/accuracies": 0.875, "rewards/chosen": -0.004550067242234945, "rewards/margins": 0.16106358170509338, "rewards/rejected": -0.16561365127563477, "step": 4286 }, { "epoch": 2.9647302904564317, "grad_norm": 10.381241798400879, "learning_rate": 3.908483171968649e-05, "log_odds_chosen": 8.353084564208984, "log_odds_ratio": -0.000998140312731266, "logits/chosen": -0.4602486491203308, "logits/rejected": -0.5085919499397278, "logps/chosen": -0.010730762965977192, "logps/rejected": -1.7254202365875244, "loss": 1.8188, "nll_loss": 0.45460206270217896, "rewards/accuracies": 1.0, "rewards/chosen": -0.0010730763897299767, "rewards/margins": 0.17146895825862885, "rewards/rejected": -0.1725420355796814, "step": 4287 }, { "epoch": 2.9654218533886585, "grad_norm": 8.11890983581543, "learning_rate": 3.908098970339634e-05, "log_odds_chosen": 9.263433456420898, "log_odds_ratio": -0.000751931220293045, "logits/chosen": -0.4940032958984375, "logits/rejected": -0.5015082955360413, "logps/chosen": -0.0017792684957385063, "logps/rejected": -1.5851678848266602, "loss": 2.8127, "nll_loss": 0.7030935883522034, "rewards/accuracies": 1.0, "rewards/chosen": -0.00017792684957385063, "rewards/margins": 0.15833887457847595, "rewards/rejected": -0.1585167944431305, "step": 4288 }, { "epoch": 2.9661134163208853, "grad_norm": 7.235042572021484, "learning_rate": 3.9077147687106196e-05, "log_odds_chosen": 9.509930610656738, "log_odds_ratio": -0.00032048820867203176, "logits/chosen": -0.4654198884963989, "logits/rejected": -0.5719193816184998, "logps/chosen": -0.0006036916165612638, "logps/rejected": -1.7015082836151123, "loss": 1.8108, "nll_loss": 0.4526631832122803, "rewards/accuracies": 1.0, "rewards/chosen": -6.0369158745743334e-05, "rewards/margins": 0.1700904667377472, "rewards/rejected": -0.17015081644058228, "step": 4289 }, { "epoch": 2.966804979253112, "grad_norm": 10.309484481811523, "learning_rate": 3.907330567081605e-05, "log_odds_chosen": 7.775326728820801, "log_odds_ratio": -0.002041358035057783, "logits/chosen": -0.80727618932724, "logits/rejected": -0.8146430253982544, "logps/chosen": -0.009806342422962189, "logps/rejected": -1.9370990991592407, "loss": 2.6617, "nll_loss": 0.6652133464813232, "rewards/accuracies": 1.0, "rewards/chosen": -0.0009806342422962189, "rewards/margins": 0.19272929430007935, "rewards/rejected": -0.19370993971824646, "step": 4290 }, { "epoch": 2.967496542185339, "grad_norm": 6.211965560913086, "learning_rate": 3.9069463654525894e-05, "log_odds_chosen": 7.471760272979736, "log_odds_ratio": -0.003108600154519081, "logits/chosen": -0.6506915092468262, "logits/rejected": -0.6464847326278687, "logps/chosen": -0.013406043872237206, "logps/rejected": -1.191932201385498, "loss": 2.562, "nll_loss": 0.64018714427948, "rewards/accuracies": 1.0, "rewards/chosen": -0.0013406045036390424, "rewards/margins": 0.11785262078046799, "rewards/rejected": -0.11919323354959488, "step": 4291 }, { "epoch": 2.968188105117566, "grad_norm": 8.040523529052734, "learning_rate": 3.906562163823575e-05, "log_odds_chosen": 8.697381973266602, "log_odds_ratio": -0.04064595699310303, "logits/chosen": -0.8994853496551514, "logits/rejected": -0.9400737285614014, "logps/chosen": -0.009031183086335659, "logps/rejected": -1.172762393951416, "loss": 1.6371, "nll_loss": 0.40520185232162476, "rewards/accuracies": 1.0, "rewards/chosen": -0.0009031182853505015, "rewards/margins": 0.11637313663959503, "rewards/rejected": -0.11727625131607056, "step": 4292 }, { "epoch": 2.9688796680497926, "grad_norm": 8.956297874450684, "learning_rate": 3.90617796219456e-05, "log_odds_chosen": 7.714384078979492, "log_odds_ratio": -0.022408613935112953, "logits/chosen": -1.1202744245529175, "logits/rejected": -1.164198875427246, "logps/chosen": -0.03206299990415573, "logps/rejected": -1.8425147533416748, "loss": 2.6058, "nll_loss": 0.6492141485214233, "rewards/accuracies": 1.0, "rewards/chosen": -0.003206299850717187, "rewards/margins": 0.18104518949985504, "rewards/rejected": -0.18425148725509644, "step": 4293 }, { "epoch": 2.9695712309820195, "grad_norm": 9.355306625366211, "learning_rate": 3.905793760565545e-05, "log_odds_chosen": 9.887144088745117, "log_odds_ratio": -0.00020667076751124114, "logits/chosen": -1.2133868932724, "logits/rejected": -1.2913806438446045, "logps/chosen": -0.0007871249108575284, "logps/rejected": -2.1001222133636475, "loss": 3.3309, "nll_loss": 0.832693338394165, "rewards/accuracies": 1.0, "rewards/chosen": -7.871249545132741e-05, "rewards/margins": 0.20993351936340332, "rewards/rejected": -0.21001222729682922, "step": 4294 }, { "epoch": 2.9702627939142463, "grad_norm": 11.772032737731934, "learning_rate": 3.9054095589365303e-05, "log_odds_chosen": 8.659290313720703, "log_odds_ratio": -0.0025949098635464907, "logits/chosen": -0.4823494553565979, "logits/rejected": -0.6542070508003235, "logps/chosen": -0.0011625216575339437, "logps/rejected": -1.6143386363983154, "loss": 2.0923, "nll_loss": 0.5228087902069092, "rewards/accuracies": 1.0, "rewards/chosen": -0.00011625216575339437, "rewards/margins": 0.1613176167011261, "rewards/rejected": -0.1614338606595993, "step": 4295 }, { "epoch": 2.970954356846473, "grad_norm": 8.367432594299316, "learning_rate": 3.905025357307515e-05, "log_odds_chosen": 8.646538734436035, "log_odds_ratio": -0.002555843908339739, "logits/chosen": -0.8320046663284302, "logits/rejected": -0.8604940176010132, "logps/chosen": -0.01020450796931982, "logps/rejected": -1.5223532915115356, "loss": 2.0883, "nll_loss": 0.5218141674995422, "rewards/accuracies": 1.0, "rewards/chosen": -0.001020450727082789, "rewards/margins": 0.15121488273143768, "rewards/rejected": -0.15223534405231476, "step": 4296 }, { "epoch": 2.9716459197787, "grad_norm": 10.044321060180664, "learning_rate": 3.9046411556785e-05, "log_odds_chosen": 8.615673065185547, "log_odds_ratio": -0.0004599147359840572, "logits/chosen": -0.566417932510376, "logits/rejected": -0.5312891602516174, "logps/chosen": -0.0008795886533334851, "logps/rejected": -1.526329517364502, "loss": 1.9875, "nll_loss": 0.49684005975723267, "rewards/accuracies": 1.0, "rewards/chosen": -8.795886242296547e-05, "rewards/margins": 0.1525450050830841, "rewards/rejected": -0.1526329517364502, "step": 4297 }, { "epoch": 2.972337482710927, "grad_norm": 7.15255880355835, "learning_rate": 3.9042569540494854e-05, "log_odds_chosen": 6.27223539352417, "log_odds_ratio": -0.08419568091630936, "logits/chosen": -0.48312613368034363, "logits/rejected": -0.5573301315307617, "logps/chosen": -0.014529145322740078, "logps/rejected": -0.7528274059295654, "loss": 2.0046, "nll_loss": 0.49272122979164124, "rewards/accuracies": 1.0, "rewards/chosen": -0.001452914671972394, "rewards/margins": 0.07382982224225998, "rewards/rejected": -0.0752827376127243, "step": 4298 }, { "epoch": 2.9730290456431536, "grad_norm": 9.718936920166016, "learning_rate": 3.9038727524204706e-05, "log_odds_chosen": 10.059136390686035, "log_odds_ratio": -9.451636287849396e-05, "logits/chosen": -0.7800579071044922, "logits/rejected": -0.7673681974411011, "logps/chosen": -0.0003319536044728011, "logps/rejected": -1.5992937088012695, "loss": 1.5703, "nll_loss": 0.39255863428115845, "rewards/accuracies": 1.0, "rewards/chosen": -3.319535971968435e-05, "rewards/margins": 0.15989619493484497, "rewards/rejected": -0.15992936491966248, "step": 4299 }, { "epoch": 2.9737206085753805, "grad_norm": 8.345458984375, "learning_rate": 3.903488550791455e-05, "log_odds_chosen": 8.29789924621582, "log_odds_ratio": -0.25329098105430603, "logits/chosen": -0.9458674192428589, "logits/rejected": -0.9740471839904785, "logps/chosen": -0.03172118216753006, "logps/rejected": -1.9004459381103516, "loss": 1.5651, "nll_loss": 0.36595281958580017, "rewards/accuracies": 0.875, "rewards/chosen": -0.003172118216753006, "rewards/margins": 0.1868724822998047, "rewards/rejected": -0.1900446116924286, "step": 4300 }, { "epoch": 2.9744121715076073, "grad_norm": 9.374674797058105, "learning_rate": 3.903104349162441e-05, "log_odds_chosen": 7.801394939422607, "log_odds_ratio": -0.002809441415593028, "logits/chosen": -0.9712445735931396, "logits/rejected": -1.0163609981536865, "logps/chosen": -0.02158650942146778, "logps/rejected": -2.050865411758423, "loss": 3.2508, "nll_loss": 0.8124136924743652, "rewards/accuracies": 1.0, "rewards/chosen": -0.002158651128411293, "rewards/margins": 0.20292788743972778, "rewards/rejected": -0.20508654415607452, "step": 4301 }, { "epoch": 2.975103734439834, "grad_norm": 9.962836265563965, "learning_rate": 3.902720147533426e-05, "log_odds_chosen": 8.387914657592773, "log_odds_ratio": -0.0036154557019472122, "logits/chosen": -0.7854832410812378, "logits/rejected": -0.8578284978866577, "logps/chosen": -0.015231077559292316, "logps/rejected": -1.6216599941253662, "loss": 1.7954, "nll_loss": 0.4484889507293701, "rewards/accuracies": 1.0, "rewards/chosen": -0.0015231078723445535, "rewards/margins": 0.16064289212226868, "rewards/rejected": -0.16216599941253662, "step": 4302 }, { "epoch": 2.975795297372061, "grad_norm": 70.63771057128906, "learning_rate": 3.902335945904411e-05, "log_odds_chosen": 8.389386177062988, "log_odds_ratio": -0.09924168884754181, "logits/chosen": -0.97005295753479, "logits/rejected": -0.9767628908157349, "logps/chosen": -0.019456295296549797, "logps/rejected": -1.2770992517471313, "loss": 2.3251, "nll_loss": 0.5713623762130737, "rewards/accuracies": 0.875, "rewards/chosen": -0.001945629483088851, "rewards/margins": 0.12576431035995483, "rewards/rejected": -0.12770992517471313, "step": 4303 }, { "epoch": 2.9764868603042878, "grad_norm": 11.85477066040039, "learning_rate": 3.901951744275396e-05, "log_odds_chosen": 7.693626880645752, "log_odds_ratio": -0.008109038695693016, "logits/chosen": -1.0131137371063232, "logits/rejected": -1.0055873394012451, "logps/chosen": -0.003135553328320384, "logps/rejected": -1.1383662223815918, "loss": 3.107, "nll_loss": 0.7759391069412231, "rewards/accuracies": 1.0, "rewards/chosen": -0.00031355535611510277, "rewards/margins": 0.11352306604385376, "rewards/rejected": -0.1138366311788559, "step": 4304 }, { "epoch": 2.9771784232365146, "grad_norm": 8.77263355255127, "learning_rate": 3.901567542646381e-05, "log_odds_chosen": 6.874538898468018, "log_odds_ratio": -0.012623955495655537, "logits/chosen": -0.9305988550186157, "logits/rejected": -0.9772317409515381, "logps/chosen": -0.04155722260475159, "logps/rejected": -1.3131730556488037, "loss": 2.2006, "nll_loss": 0.5488851070404053, "rewards/accuracies": 1.0, "rewards/chosen": -0.004155721981078386, "rewards/margins": 0.12716159224510193, "rewards/rejected": -0.13131731748580933, "step": 4305 }, { "epoch": 2.9778699861687414, "grad_norm": 15.39285659790039, "learning_rate": 3.901183341017366e-05, "log_odds_chosen": 8.982562065124512, "log_odds_ratio": -0.0003474602708593011, "logits/chosen": -0.8192355632781982, "logits/rejected": -0.9334505796432495, "logps/chosen": -0.0009468475473113358, "logps/rejected": -1.6504664421081543, "loss": 2.0991, "nll_loss": 0.5247402191162109, "rewards/accuracies": 1.0, "rewards/chosen": -9.468475764151663e-05, "rewards/margins": 0.16495195031166077, "rewards/rejected": -0.16504666209220886, "step": 4306 }, { "epoch": 2.9785615491009683, "grad_norm": 14.8624906539917, "learning_rate": 3.900799139388351e-05, "log_odds_chosen": 9.051704406738281, "log_odds_ratio": -0.0017430292209610343, "logits/chosen": -0.6860344409942627, "logits/rejected": -0.8370791673660278, "logps/chosen": -0.0037408650387078524, "logps/rejected": -1.9641859531402588, "loss": 2.7644, "nll_loss": 0.6909268498420715, "rewards/accuracies": 1.0, "rewards/chosen": -0.0003740865213330835, "rewards/margins": 0.19604453444480896, "rewards/rejected": -0.1964186131954193, "step": 4307 }, { "epoch": 2.979253112033195, "grad_norm": 8.236113548278809, "learning_rate": 3.9004149377593365e-05, "log_odds_chosen": 7.400773048400879, "log_odds_ratio": -0.09109840542078018, "logits/chosen": -0.5979514122009277, "logits/rejected": -0.5859559774398804, "logps/chosen": -0.022600244730710983, "logps/rejected": -1.639123558998108, "loss": 2.1339, "nll_loss": 0.5243626236915588, "rewards/accuracies": 0.875, "rewards/chosen": -0.0022600244265049696, "rewards/margins": 0.1616523414850235, "rewards/rejected": -0.1639123558998108, "step": 4308 }, { "epoch": 2.979944674965422, "grad_norm": 4.792588233947754, "learning_rate": 3.900030736130321e-05, "log_odds_chosen": 5.436771392822266, "log_odds_ratio": -0.09919846802949905, "logits/chosen": -0.6263391971588135, "logits/rejected": -0.7491417527198792, "logps/chosen": -0.03652811795473099, "logps/rejected": -1.2123457193374634, "loss": 2.3405, "nll_loss": 0.5751992464065552, "rewards/accuracies": 1.0, "rewards/chosen": -0.0036528119817376137, "rewards/margins": 0.11758175492286682, "rewards/rejected": -0.12123456597328186, "step": 4309 }, { "epoch": 2.9806362378976488, "grad_norm": 8.58399486541748, "learning_rate": 3.899646534501307e-05, "log_odds_chosen": 10.13625717163086, "log_odds_ratio": -0.00011886454740306363, "logits/chosen": -0.5826963782310486, "logits/rejected": -0.7255100011825562, "logps/chosen": -0.00013461017806548625, "logps/rejected": -1.4675657749176025, "loss": 2.1245, "nll_loss": 0.5311151742935181, "rewards/accuracies": 1.0, "rewards/chosen": -1.3461018170346506e-05, "rewards/margins": 0.14674311876296997, "rewards/rejected": -0.1467565894126892, "step": 4310 }, { "epoch": 2.9813278008298756, "grad_norm": 10.050031661987305, "learning_rate": 3.8992623328722915e-05, "log_odds_chosen": 8.63019847869873, "log_odds_ratio": -0.0012419001432135701, "logits/chosen": -0.5965954065322876, "logits/rejected": -0.6558192372322083, "logps/chosen": -0.004061999265104532, "logps/rejected": -1.7488057613372803, "loss": 1.8868, "nll_loss": 0.4715661108493805, "rewards/accuracies": 1.0, "rewards/chosen": -0.00040619992068968713, "rewards/margins": 0.17447435855865479, "rewards/rejected": -0.17488057911396027, "step": 4311 }, { "epoch": 2.9820193637621024, "grad_norm": 9.843374252319336, "learning_rate": 3.898878131243277e-05, "log_odds_chosen": 9.409902572631836, "log_odds_ratio": -0.00021617556922137737, "logits/chosen": -1.0234929323196411, "logits/rejected": -1.1329476833343506, "logps/chosen": -0.0003393127117305994, "logps/rejected": -1.5909518003463745, "loss": 1.8993, "nll_loss": 0.4748102128505707, "rewards/accuracies": 1.0, "rewards/chosen": -3.3931268262676895e-05, "rewards/margins": 0.1590612381696701, "rewards/rejected": -0.1590951681137085, "step": 4312 }, { "epoch": 2.9827109266943292, "grad_norm": 9.166037559509277, "learning_rate": 3.898493929614262e-05, "log_odds_chosen": 7.901092052459717, "log_odds_ratio": -0.054978758096694946, "logits/chosen": -0.7726214528083801, "logits/rejected": -0.8527544140815735, "logps/chosen": -0.028022143989801407, "logps/rejected": -1.464059829711914, "loss": 1.8119, "nll_loss": 0.44747546315193176, "rewards/accuracies": 1.0, "rewards/chosen": -0.002802214352414012, "rewards/margins": 0.14360378682613373, "rewards/rejected": -0.14640599489212036, "step": 4313 }, { "epoch": 2.983402489626556, "grad_norm": 7.450784206390381, "learning_rate": 3.8981097279852466e-05, "log_odds_chosen": 7.23293399810791, "log_odds_ratio": -0.05062123015522957, "logits/chosen": -0.5278096199035645, "logits/rejected": -0.62985759973526, "logps/chosen": -0.026304200291633606, "logps/rejected": -1.278918981552124, "loss": 1.8208, "nll_loss": 0.45012617111206055, "rewards/accuracies": 1.0, "rewards/chosen": -0.0026304200291633606, "rewards/margins": 0.12526148557662964, "rewards/rejected": -0.1278918981552124, "step": 4314 }, { "epoch": 2.984094052558783, "grad_norm": 7.188449382781982, "learning_rate": 3.897725526356232e-05, "log_odds_chosen": 7.602090358734131, "log_odds_ratio": -0.07067767530679703, "logits/chosen": -0.8626900911331177, "logits/rejected": -0.8943912982940674, "logps/chosen": -0.0472387969493866, "logps/rejected": -1.7587538957595825, "loss": 1.9876, "nll_loss": 0.48984020948410034, "rewards/accuracies": 1.0, "rewards/chosen": -0.004723879974335432, "rewards/margins": 0.17115150392055511, "rewards/rejected": -0.17587539553642273, "step": 4315 }, { "epoch": 2.9847856154910097, "grad_norm": 7.509032726287842, "learning_rate": 3.897341324727217e-05, "log_odds_chosen": 8.086267471313477, "log_odds_ratio": -0.052891407161951065, "logits/chosen": -0.4344555139541626, "logits/rejected": -0.4079325199127197, "logps/chosen": -0.015601033344864845, "logps/rejected": -1.3612074851989746, "loss": 1.4352, "nll_loss": 0.3534983992576599, "rewards/accuracies": 1.0, "rewards/chosen": -0.0015601032646372914, "rewards/margins": 0.13456064462661743, "rewards/rejected": -0.1361207515001297, "step": 4316 }, { "epoch": 2.9854771784232366, "grad_norm": 7.007813453674316, "learning_rate": 3.896957123098202e-05, "log_odds_chosen": 8.999627113342285, "log_odds_ratio": -0.0010090176947414875, "logits/chosen": -0.666915237903595, "logits/rejected": -0.7530708909034729, "logps/chosen": -0.01949833706021309, "logps/rejected": -2.4200823307037354, "loss": 1.8857, "nll_loss": 0.47133368253707886, "rewards/accuracies": 1.0, "rewards/chosen": -0.0019498338224366307, "rewards/margins": 0.24005839228630066, "rewards/rejected": -0.242008239030838, "step": 4317 }, { "epoch": 2.9861687413554634, "grad_norm": 11.183320045471191, "learning_rate": 3.896572921469187e-05, "log_odds_chosen": 6.617632865905762, "log_odds_ratio": -0.04021994769573212, "logits/chosen": -0.7787746787071228, "logits/rejected": -0.8017224073410034, "logps/chosen": -0.041280489414930344, "logps/rejected": -1.8801112174987793, "loss": 2.2311, "nll_loss": 0.5537528991699219, "rewards/accuracies": 1.0, "rewards/chosen": -0.004128048662096262, "rewards/margins": 0.18388308584690094, "rewards/rejected": -0.18801113963127136, "step": 4318 }, { "epoch": 2.9868603042876902, "grad_norm": 9.392274856567383, "learning_rate": 3.896188719840173e-05, "log_odds_chosen": 8.517684936523438, "log_odds_ratio": -0.010786582715809345, "logits/chosen": -0.641266942024231, "logits/rejected": -0.6726396679878235, "logps/chosen": -0.042483001947402954, "logps/rejected": -1.9543349742889404, "loss": 2.701, "nll_loss": 0.6741783022880554, "rewards/accuracies": 1.0, "rewards/chosen": -0.004248300567269325, "rewards/margins": 0.19118520617485046, "rewards/rejected": -0.19543349742889404, "step": 4319 }, { "epoch": 2.987551867219917, "grad_norm": 8.228974342346191, "learning_rate": 3.8958045182111574e-05, "log_odds_chosen": 5.370292663574219, "log_odds_ratio": -0.3013046681880951, "logits/chosen": -0.2348720133304596, "logits/rejected": -0.37705564498901367, "logps/chosen": -0.04984167218208313, "logps/rejected": -0.9333829283714294, "loss": 2.2342, "nll_loss": 0.5284290313720703, "rewards/accuracies": 0.875, "rewards/chosen": -0.004984167404472828, "rewards/margins": 0.08835412561893463, "rewards/rejected": -0.09333829581737518, "step": 4320 }, { "epoch": 2.988243430152144, "grad_norm": 50.50739288330078, "learning_rate": 3.8954203165821426e-05, "log_odds_chosen": 8.352073669433594, "log_odds_ratio": -0.0004624387656804174, "logits/chosen": -0.5019068717956543, "logits/rejected": -0.5019280910491943, "logps/chosen": -0.011270806193351746, "logps/rejected": -1.633318543434143, "loss": 3.0913, "nll_loss": 0.7727884650230408, "rewards/accuracies": 1.0, "rewards/chosen": -0.001127080642618239, "rewards/margins": 0.162204772233963, "rewards/rejected": -0.16333186626434326, "step": 4321 }, { "epoch": 2.9889349930843707, "grad_norm": 8.486451148986816, "learning_rate": 3.895036114953128e-05, "log_odds_chosen": 8.059738159179688, "log_odds_ratio": -0.00266972160898149, "logits/chosen": -0.7248245477676392, "logits/rejected": -0.78750079870224, "logps/chosen": -0.047702398151159286, "logps/rejected": -2.0713438987731934, "loss": 1.8981, "nll_loss": 0.474260151386261, "rewards/accuracies": 1.0, "rewards/chosen": -0.004770240746438503, "rewards/margins": 0.20236416161060333, "rewards/rejected": -0.2071343958377838, "step": 4322 }, { "epoch": 2.9896265560165975, "grad_norm": 9.462930679321289, "learning_rate": 3.8946519133241124e-05, "log_odds_chosen": 8.55579948425293, "log_odds_ratio": -0.02762962505221367, "logits/chosen": -0.5688346028327942, "logits/rejected": -0.5741695165634155, "logps/chosen": -0.011251446790993214, "logps/rejected": -1.7209012508392334, "loss": 1.7582, "nll_loss": 0.43678048253059387, "rewards/accuracies": 1.0, "rewards/chosen": -0.001125144655816257, "rewards/margins": 0.17096498608589172, "rewards/rejected": -0.17209011316299438, "step": 4323 }, { "epoch": 2.9903181189488244, "grad_norm": 5.706713676452637, "learning_rate": 3.894267711695098e-05, "log_odds_chosen": 7.146937370300293, "log_odds_ratio": -0.009990318678319454, "logits/chosen": -0.022167712450027466, "logits/rejected": -0.007036931812763214, "logps/chosen": -0.019013497978448868, "logps/rejected": -1.0692731142044067, "loss": 1.9051, "nll_loss": 0.4752686023712158, "rewards/accuracies": 1.0, "rewards/chosen": -0.001901349751278758, "rewards/margins": 0.10502596944570541, "rewards/rejected": -0.10692732036113739, "step": 4324 }, { "epoch": 2.991009681881051, "grad_norm": 12.431560516357422, "learning_rate": 3.893883510066083e-05, "log_odds_chosen": 8.107927322387695, "log_odds_ratio": -0.06721797585487366, "logits/chosen": -0.8215648531913757, "logits/rejected": -0.911736786365509, "logps/chosen": -0.024141529574990273, "logps/rejected": -1.440712332725525, "loss": 2.7399, "nll_loss": 0.6782621741294861, "rewards/accuracies": 1.0, "rewards/chosen": -0.0024141529574990273, "rewards/margins": 0.14165708422660828, "rewards/rejected": -0.14407123625278473, "step": 4325 }, { "epoch": 2.991701244813278, "grad_norm": 11.858086585998535, "learning_rate": 3.893499308437068e-05, "log_odds_chosen": 7.023995876312256, "log_odds_ratio": -0.17745637893676758, "logits/chosen": -0.5713260769844055, "logits/rejected": -0.6480407118797302, "logps/chosen": -0.03282373398542404, "logps/rejected": -1.1977462768554688, "loss": 2.0188, "nll_loss": 0.48696666955947876, "rewards/accuracies": 0.875, "rewards/chosen": -0.003282373771071434, "rewards/margins": 0.11649225652217865, "rewards/rejected": -0.11977462470531464, "step": 4326 }, { "epoch": 2.992392807745505, "grad_norm": 11.904878616333008, "learning_rate": 3.893115106808053e-05, "log_odds_chosen": 10.03628921508789, "log_odds_ratio": -7.053057925077155e-05, "logits/chosen": -0.4557734429836273, "logits/rejected": -0.5793010592460632, "logps/chosen": -0.0002729504485614598, "logps/rejected": -1.6975574493408203, "loss": 2.4038, "nll_loss": 0.6009531021118164, "rewards/accuracies": 1.0, "rewards/chosen": -2.7295047402731143e-05, "rewards/margins": 0.16972845792770386, "rewards/rejected": -0.169755756855011, "step": 4327 }, { "epoch": 2.9930843706777317, "grad_norm": 9.12769603729248, "learning_rate": 3.892730905179039e-05, "log_odds_chosen": 7.800318717956543, "log_odds_ratio": -0.0034801724832504988, "logits/chosen": -0.60991370677948, "logits/rejected": -0.7001553177833557, "logps/chosen": -0.03430306911468506, "logps/rejected": -1.6319289207458496, "loss": 2.4333, "nll_loss": 0.6079657077789307, "rewards/accuracies": 1.0, "rewards/chosen": -0.0034303071442991495, "rewards/margins": 0.15976257622241974, "rewards/rejected": -0.16319288313388824, "step": 4328 }, { "epoch": 2.9937759336099585, "grad_norm": 13.82374095916748, "learning_rate": 3.892346703550023e-05, "log_odds_chosen": 8.682519912719727, "log_odds_ratio": -0.010508873499929905, "logits/chosen": -0.4079381227493286, "logits/rejected": -0.5559213161468506, "logps/chosen": -0.005879267118871212, "logps/rejected": -2.0450315475463867, "loss": 2.4957, "nll_loss": 0.622867226600647, "rewards/accuracies": 1.0, "rewards/chosen": -0.0005879267118871212, "rewards/margins": 0.20391523838043213, "rewards/rejected": -0.20450317859649658, "step": 4329 }, { "epoch": 2.9944674965421854, "grad_norm": 9.895845413208008, "learning_rate": 3.8919625019210085e-05, "log_odds_chosen": 8.463839530944824, "log_odds_ratio": -0.0006095452117733657, "logits/chosen": -0.5000820159912109, "logits/rejected": -0.5394564867019653, "logps/chosen": -0.0017691230168566108, "logps/rejected": -1.2518408298492432, "loss": 1.7294, "nll_loss": 0.43227720260620117, "rewards/accuracies": 1.0, "rewards/chosen": -0.00017691230459604412, "rewards/margins": 0.12500718235969543, "rewards/rejected": -0.12518410384655, "step": 4330 }, { "epoch": 2.995159059474412, "grad_norm": 6.428073406219482, "learning_rate": 3.891578300291994e-05, "log_odds_chosen": 7.467696666717529, "log_odds_ratio": -0.1370055079460144, "logits/chosen": -0.2736653983592987, "logits/rejected": -0.2604514956474304, "logps/chosen": -0.03479510545730591, "logps/rejected": -1.2272887229919434, "loss": 1.6824, "nll_loss": 0.40690019726753235, "rewards/accuracies": 0.875, "rewards/chosen": -0.003479510312899947, "rewards/margins": 0.1192493662238121, "rewards/rejected": -0.1227288767695427, "step": 4331 }, { "epoch": 2.995850622406639, "grad_norm": 16.991193771362305, "learning_rate": 3.891194098662978e-05, "log_odds_chosen": 10.145772933959961, "log_odds_ratio": -0.00011397979687899351, "logits/chosen": -1.0705267190933228, "logits/rejected": -1.188539981842041, "logps/chosen": -0.00029643025482073426, "logps/rejected": -1.6331541538238525, "loss": 2.6387, "nll_loss": 0.659654438495636, "rewards/accuracies": 1.0, "rewards/chosen": -2.9643026209669188e-05, "rewards/margins": 0.16328579187393188, "rewards/rejected": -0.16331541538238525, "step": 4332 }, { "epoch": 2.996542185338866, "grad_norm": 5.449528694152832, "learning_rate": 3.8908098970339635e-05, "log_odds_chosen": 7.263501167297363, "log_odds_ratio": -0.13936229050159454, "logits/chosen": -0.24265936017036438, "logits/rejected": -0.30016687512397766, "logps/chosen": -0.028652330860495567, "logps/rejected": -0.9009796380996704, "loss": 1.887, "nll_loss": 0.4578217566013336, "rewards/accuracies": 0.875, "rewards/chosen": -0.0028652329929172993, "rewards/margins": 0.08723273873329163, "rewards/rejected": -0.09009796380996704, "step": 4333 }, { "epoch": 2.9972337482710927, "grad_norm": 13.071614265441895, "learning_rate": 3.890425695404949e-05, "log_odds_chosen": 9.893881797790527, "log_odds_ratio": -0.00010834841668838635, "logits/chosen": -0.7541863918304443, "logits/rejected": -0.902802586555481, "logps/chosen": -0.0004265864845365286, "logps/rejected": -1.915844440460205, "loss": 2.3551, "nll_loss": 0.5887622833251953, "rewards/accuracies": 1.0, "rewards/chosen": -4.265864481567405e-05, "rewards/margins": 0.19154179096221924, "rewards/rejected": -0.19158445298671722, "step": 4334 }, { "epoch": 2.9979253112033195, "grad_norm": 10.656232833862305, "learning_rate": 3.890041493775934e-05, "log_odds_chosen": 8.736654281616211, "log_odds_ratio": -0.0017928852466866374, "logits/chosen": -0.9096781611442566, "logits/rejected": -0.9119482040405273, "logps/chosen": -0.0038549380842596292, "logps/rejected": -1.348965048789978, "loss": 1.8054, "nll_loss": 0.4511691629886627, "rewards/accuracies": 1.0, "rewards/chosen": -0.0003854937676806003, "rewards/margins": 0.13451100885868073, "rewards/rejected": -0.13489650189876556, "step": 4335 }, { "epoch": 2.9986168741355463, "grad_norm": 15.79922866821289, "learning_rate": 3.8896572921469186e-05, "log_odds_chosen": 7.798433303833008, "log_odds_ratio": -0.15630565583705902, "logits/chosen": -0.7631257772445679, "logits/rejected": -0.7486634254455566, "logps/chosen": -0.011932741850614548, "logps/rejected": -1.5030312538146973, "loss": 1.8688, "nll_loss": 0.45157331228256226, "rewards/accuracies": 0.875, "rewards/chosen": -0.0011932742781937122, "rewards/margins": 0.1491098701953888, "rewards/rejected": -0.15030314028263092, "step": 4336 }, { "epoch": 2.999308437067773, "grad_norm": 13.872130393981934, "learning_rate": 3.8892730905179045e-05, "log_odds_chosen": 8.419084548950195, "log_odds_ratio": -0.0037066680379211903, "logits/chosen": -0.9183025360107422, "logits/rejected": -1.061065435409546, "logps/chosen": -0.0020110062323510647, "logps/rejected": -1.3740841150283813, "loss": 3.0195, "nll_loss": 0.7545135021209717, "rewards/accuracies": 1.0, "rewards/chosen": -0.0002011006436077878, "rewards/margins": 0.13720732927322388, "rewards/rejected": -0.13740840554237366, "step": 4337 }, { "epoch": 3.0, "grad_norm": 5.617066383361816, "learning_rate": 3.888888888888889e-05, "log_odds_chosen": 8.242136001586914, "log_odds_ratio": -0.003207864472642541, "logits/chosen": -0.576624870300293, "logits/rejected": -0.5586822032928467, "logps/chosen": -0.011922507546842098, "logps/rejected": -1.576462984085083, "loss": 1.8424, "nll_loss": 0.4602872133255005, "rewards/accuracies": 1.0, "rewards/chosen": -0.0011922508710995317, "rewards/margins": 0.15645405650138855, "rewards/rejected": -0.1576462984085083, "step": 4338 }, { "epoch": 3.000691562932227, "grad_norm": 7.336403846740723, "learning_rate": 3.888504687259874e-05, "log_odds_chosen": 8.288568496704102, "log_odds_ratio": -0.002005348913371563, "logits/chosen": -0.22258299589157104, "logits/rejected": -0.23540785908699036, "logps/chosen": -0.0008239853195846081, "logps/rejected": -1.2276651859283447, "loss": 1.6028, "nll_loss": 0.4004961848258972, "rewards/accuracies": 1.0, "rewards/chosen": -8.239853195846081e-05, "rewards/margins": 0.12268412858247757, "rewards/rejected": -0.12276651710271835, "step": 4339 }, { "epoch": 3.0013831258644537, "grad_norm": 8.08159351348877, "learning_rate": 3.8881204856308596e-05, "log_odds_chosen": 8.504263877868652, "log_odds_ratio": -0.0014581052819266915, "logits/chosen": -0.4644441604614258, "logits/rejected": -0.6355771422386169, "logps/chosen": -0.001975719118490815, "logps/rejected": -1.3220500946044922, "loss": 1.7005, "nll_loss": 0.4249787926673889, "rewards/accuracies": 1.0, "rewards/chosen": -0.00019757190602831542, "rewards/margins": 0.1320074498653412, "rewards/rejected": -0.13220500946044922, "step": 4340 }, { "epoch": 3.0020746887966805, "grad_norm": 9.502801895141602, "learning_rate": 3.887736284001844e-05, "log_odds_chosen": 8.841012954711914, "log_odds_ratio": -0.0030967092607170343, "logits/chosen": -0.8835254907608032, "logits/rejected": -0.9606581926345825, "logps/chosen": -0.01663133129477501, "logps/rejected": -1.7187840938568115, "loss": 1.8045, "nll_loss": 0.45080313086509705, "rewards/accuracies": 1.0, "rewards/chosen": -0.0016631331527605653, "rewards/margins": 0.17021527886390686, "rewards/rejected": -0.17187842726707458, "step": 4341 }, { "epoch": 3.0027662517289073, "grad_norm": 7.053144454956055, "learning_rate": 3.8873520823728294e-05, "log_odds_chosen": 6.129289627075195, "log_odds_ratio": -0.018381556496024132, "logits/chosen": -0.45580482482910156, "logits/rejected": -0.43981999158859253, "logps/chosen": -0.02091745100915432, "logps/rejected": -1.376305341720581, "loss": 2.0763, "nll_loss": 0.5172290802001953, "rewards/accuracies": 1.0, "rewards/chosen": -0.0020917453803122044, "rewards/margins": 0.13553880155086517, "rewards/rejected": -0.13763055205345154, "step": 4342 }, { "epoch": 3.003457814661134, "grad_norm": 6.612039089202881, "learning_rate": 3.8869678807438146e-05, "log_odds_chosen": 6.951261520385742, "log_odds_ratio": -0.010566813871264458, "logits/chosen": -0.4701387286186218, "logits/rejected": -0.5493282675743103, "logps/chosen": -0.008238430134952068, "logps/rejected": -1.044950246810913, "loss": 2.1988, "nll_loss": 0.548647403717041, "rewards/accuracies": 1.0, "rewards/chosen": -0.0008238430600613356, "rewards/margins": 0.10367118567228317, "rewards/rejected": -0.10449503362178802, "step": 4343 }, { "epoch": 3.004149377593361, "grad_norm": 6.696752548217773, "learning_rate": 3.8865836791148e-05, "log_odds_chosen": 8.452653884887695, "log_odds_ratio": -0.001230748021043837, "logits/chosen": -0.7052797079086304, "logits/rejected": -0.7413879632949829, "logps/chosen": -0.0010859024478122592, "logps/rejected": -1.1285423040390015, "loss": 1.9587, "nll_loss": 0.48956286907196045, "rewards/accuracies": 1.0, "rewards/chosen": -0.00010859025496756658, "rewards/margins": 0.11274563521146774, "rewards/rejected": -0.1128542348742485, "step": 4344 }, { "epoch": 3.004840940525588, "grad_norm": 8.277460098266602, "learning_rate": 3.8861994774857844e-05, "log_odds_chosen": 7.86977481842041, "log_odds_ratio": -0.08035603165626526, "logits/chosen": -0.5653095245361328, "logits/rejected": -0.6719416975975037, "logps/chosen": -0.02020619437098503, "logps/rejected": -1.7089112997055054, "loss": 1.8993, "nll_loss": 0.46680140495300293, "rewards/accuracies": 1.0, "rewards/chosen": -0.0020206195767968893, "rewards/margins": 0.16887050867080688, "rewards/rejected": -0.17089113593101501, "step": 4345 }, { "epoch": 3.0055325034578146, "grad_norm": 17.090787887573242, "learning_rate": 3.8858152758567704e-05, "log_odds_chosen": 9.048724174499512, "log_odds_ratio": -0.0008302384521812201, "logits/chosen": -0.6417528986930847, "logits/rejected": -0.7200732231140137, "logps/chosen": -0.0013055673334747553, "logps/rejected": -1.8107833862304688, "loss": 2.562, "nll_loss": 0.6404181122779846, "rewards/accuracies": 1.0, "rewards/chosen": -0.00013055672752670944, "rewards/margins": 0.18094778060913086, "rewards/rejected": -0.18107834458351135, "step": 4346 }, { "epoch": 3.0062240663900415, "grad_norm": 10.707474708557129, "learning_rate": 3.885431074227755e-05, "log_odds_chosen": 9.08885669708252, "log_odds_ratio": -0.0011596616823226213, "logits/chosen": -0.686644971370697, "logits/rejected": -0.7940715551376343, "logps/chosen": -0.009002113714814186, "logps/rejected": -2.2833571434020996, "loss": 1.6825, "nll_loss": 0.42050090432167053, "rewards/accuracies": 1.0, "rewards/chosen": -0.000900211394764483, "rewards/margins": 0.22743549942970276, "rewards/rejected": -0.22833570837974548, "step": 4347 }, { "epoch": 3.0069156293222683, "grad_norm": 13.651171684265137, "learning_rate": 3.88504687259874e-05, "log_odds_chosen": 9.122451782226562, "log_odds_ratio": -0.00026590804918669164, "logits/chosen": -0.4620441794395447, "logits/rejected": -0.5854654312133789, "logps/chosen": -0.0007560051744803786, "logps/rejected": -1.7641313076019287, "loss": 1.5304, "nll_loss": 0.3825651705265045, "rewards/accuracies": 1.0, "rewards/chosen": -7.560051744803786e-05, "rewards/margins": 0.17633754014968872, "rewards/rejected": -0.1764131486415863, "step": 4348 }, { "epoch": 3.007607192254495, "grad_norm": 4.420802593231201, "learning_rate": 3.8846626709697254e-05, "log_odds_chosen": 8.020853042602539, "log_odds_ratio": -0.008768648840487003, "logits/chosen": -0.2722416818141937, "logits/rejected": -0.3450675904750824, "logps/chosen": -0.024905625730752945, "logps/rejected": -1.2258330583572388, "loss": 2.3368, "nll_loss": 0.5833240747451782, "rewards/accuracies": 1.0, "rewards/chosen": -0.002490562153980136, "rewards/margins": 0.12009275704622269, "rewards/rejected": -0.1225833147764206, "step": 4349 }, { "epoch": 3.008298755186722, "grad_norm": 8.147367477416992, "learning_rate": 3.88427846934071e-05, "log_odds_chosen": 8.38794994354248, "log_odds_ratio": -0.0030633790884166956, "logits/chosen": -0.9003145694732666, "logits/rejected": -0.939216673374176, "logps/chosen": -0.0036230748519301414, "logps/rejected": -1.565699577331543, "loss": 1.3701, "nll_loss": 0.3422118127346039, "rewards/accuracies": 1.0, "rewards/chosen": -0.0003623075026553124, "rewards/margins": 0.15620765089988708, "rewards/rejected": -0.1565699726343155, "step": 4350 }, { "epoch": 3.008990318118949, "grad_norm": 7.668023109436035, "learning_rate": 3.883894267711695e-05, "log_odds_chosen": 8.312056541442871, "log_odds_ratio": -0.0012021416332572699, "logits/chosen": -0.7460612058639526, "logits/rejected": -0.786266565322876, "logps/chosen": -0.015112178400158882, "logps/rejected": -1.3443372249603271, "loss": 2.188, "nll_loss": 0.5468809008598328, "rewards/accuracies": 1.0, "rewards/chosen": -0.0015112179098650813, "rewards/margins": 0.13292251527309418, "rewards/rejected": -0.13443374633789062, "step": 4351 }, { "epoch": 3.0096818810511756, "grad_norm": 12.11293888092041, "learning_rate": 3.8835100660826805e-05, "log_odds_chosen": 9.41283893585205, "log_odds_ratio": -0.00010628172458382323, "logits/chosen": -0.8179978132247925, "logits/rejected": -0.8387307524681091, "logps/chosen": -0.00039945554453879595, "logps/rejected": -1.5601049661636353, "loss": 1.4393, "nll_loss": 0.3598126471042633, "rewards/accuracies": 1.0, "rewards/chosen": -3.9945560274645686e-05, "rewards/margins": 0.15597054362297058, "rewards/rejected": -0.1560104936361313, "step": 4352 }, { "epoch": 3.0103734439834025, "grad_norm": 6.373059272766113, "learning_rate": 3.883125864453666e-05, "log_odds_chosen": 8.087642669677734, "log_odds_ratio": -0.003798246616497636, "logits/chosen": -0.5732825994491577, "logits/rejected": -0.6371693015098572, "logps/chosen": -0.006854540202766657, "logps/rejected": -1.7817339897155762, "loss": 1.4208, "nll_loss": 0.35482344031333923, "rewards/accuracies": 1.0, "rewards/chosen": -0.0006854540552012622, "rewards/margins": 0.17748793959617615, "rewards/rejected": -0.17817339301109314, "step": 4353 }, { "epoch": 3.0110650069156293, "grad_norm": 6.119757652282715, "learning_rate": 3.88274166282465e-05, "log_odds_chosen": 10.328543663024902, "log_odds_ratio": -6.733743794029579e-05, "logits/chosen": -0.8365834355354309, "logits/rejected": -0.8045994639396667, "logps/chosen": -0.00036057105171494186, "logps/rejected": -2.1830642223358154, "loss": 1.7374, "nll_loss": 0.43434974551200867, "rewards/accuracies": 1.0, "rewards/chosen": -3.605710662668571e-05, "rewards/margins": 0.21827037632465363, "rewards/rejected": -0.21830643713474274, "step": 4354 }, { "epoch": 3.011756569847856, "grad_norm": 7.220427513122559, "learning_rate": 3.882357461195636e-05, "log_odds_chosen": 8.390373229980469, "log_odds_ratio": -0.010137408040463924, "logits/chosen": -0.5291712880134583, "logits/rejected": -0.6182175278663635, "logps/chosen": -0.021362818777561188, "logps/rejected": -1.3063123226165771, "loss": 1.7929, "nll_loss": 0.4471994936466217, "rewards/accuracies": 1.0, "rewards/chosen": -0.0021362819243222475, "rewards/margins": 0.12849494814872742, "rewards/rejected": -0.1306312382221222, "step": 4355 }, { "epoch": 3.012448132780083, "grad_norm": 10.587180137634277, "learning_rate": 3.881973259566621e-05, "log_odds_chosen": 9.672530174255371, "log_odds_ratio": -0.00017483210831414908, "logits/chosen": -0.6423888802528381, "logits/rejected": -0.6911444664001465, "logps/chosen": -0.0017972304485738277, "logps/rejected": -2.239515781402588, "loss": 1.8757, "nll_loss": 0.4689146876335144, "rewards/accuracies": 1.0, "rewards/chosen": -0.00017972305067814887, "rewards/margins": 0.2237718403339386, "rewards/rejected": -0.2239515781402588, "step": 4356 }, { "epoch": 3.0131396957123098, "grad_norm": 10.566658973693848, "learning_rate": 3.881589057937606e-05, "log_odds_chosen": 9.284326553344727, "log_odds_ratio": -0.0003429109347052872, "logits/chosen": -0.8109344244003296, "logits/rejected": -0.9047459363937378, "logps/chosen": -0.0017176901455968618, "logps/rejected": -1.703169584274292, "loss": 2.3812, "nll_loss": 0.5952752232551575, "rewards/accuracies": 1.0, "rewards/chosen": -0.00017176903202198446, "rewards/margins": 0.170145183801651, "rewards/rejected": -0.17031696438789368, "step": 4357 }, { "epoch": 3.0138312586445366, "grad_norm": 17.43169593811035, "learning_rate": 3.881204856308591e-05, "log_odds_chosen": 8.845995903015137, "log_odds_ratio": -0.004415709525346756, "logits/chosen": -0.5009146332740784, "logits/rejected": -0.5920040607452393, "logps/chosen": -0.014210812747478485, "logps/rejected": -2.076862335205078, "loss": 2.4603, "nll_loss": 0.6146366596221924, "rewards/accuracies": 1.0, "rewards/chosen": -0.001421081367880106, "rewards/margins": 0.2062651515007019, "rewards/rejected": -0.20768624544143677, "step": 4358 }, { "epoch": 3.0145228215767634, "grad_norm": 12.506994247436523, "learning_rate": 3.880820654679576e-05, "log_odds_chosen": 7.278022766113281, "log_odds_ratio": -0.028671320527791977, "logits/chosen": -0.5169048309326172, "logits/rejected": -0.5569590330123901, "logps/chosen": -0.008861766196787357, "logps/rejected": -1.2220641374588013, "loss": 2.141, "nll_loss": 0.5323811769485474, "rewards/accuracies": 1.0, "rewards/chosen": -0.0008861765963956714, "rewards/margins": 0.12132023274898529, "rewards/rejected": -0.12220640480518341, "step": 4359 }, { "epoch": 3.0152143845089903, "grad_norm": 9.292475700378418, "learning_rate": 3.880436453050561e-05, "log_odds_chosen": 8.517985343933105, "log_odds_ratio": -0.0006469730869866908, "logits/chosen": -0.5386276245117188, "logits/rejected": -0.565827488899231, "logps/chosen": -0.002869624411687255, "logps/rejected": -1.2151039838790894, "loss": 1.6565, "nll_loss": 0.41406431794166565, "rewards/accuracies": 1.0, "rewards/chosen": -0.00028696245863102376, "rewards/margins": 0.12122343480587006, "rewards/rejected": -0.12151038646697998, "step": 4360 }, { "epoch": 3.015905947441217, "grad_norm": 9.588244438171387, "learning_rate": 3.8800522514215456e-05, "log_odds_chosen": 9.962486267089844, "log_odds_ratio": -0.000979436095803976, "logits/chosen": -0.4506533741950989, "logits/rejected": -0.4827750325202942, "logps/chosen": -0.03247072920203209, "logps/rejected": -2.2011234760284424, "loss": 1.5661, "nll_loss": 0.3914303481578827, "rewards/accuracies": 1.0, "rewards/chosen": -0.0032470731530338526, "rewards/margins": 0.21686527132987976, "rewards/rejected": -0.22011235356330872, "step": 4361 }, { "epoch": 3.016597510373444, "grad_norm": 8.233197212219238, "learning_rate": 3.8796680497925316e-05, "log_odds_chosen": 7.316631317138672, "log_odds_ratio": -0.021387575194239616, "logits/chosen": -0.6702169179916382, "logits/rejected": -0.7465333938598633, "logps/chosen": -0.02479386515915394, "logps/rejected": -1.9944210052490234, "loss": 1.6044, "nll_loss": 0.39896106719970703, "rewards/accuracies": 1.0, "rewards/chosen": -0.00247938628308475, "rewards/margins": 0.19696269929409027, "rewards/rejected": -0.1994420886039734, "step": 4362 }, { "epoch": 3.0172890733056708, "grad_norm": 7.379128456115723, "learning_rate": 3.879283848163516e-05, "log_odds_chosen": 8.85990047454834, "log_odds_ratio": -0.003634555032476783, "logits/chosen": -0.5511636734008789, "logits/rejected": -0.5116143226623535, "logps/chosen": -0.016325172036886215, "logps/rejected": -1.544255256652832, "loss": 1.1823, "nll_loss": 0.295213907957077, "rewards/accuracies": 1.0, "rewards/chosen": -0.0016325172036886215, "rewards/margins": 0.1527930200099945, "rewards/rejected": -0.15442554652690887, "step": 4363 }, { "epoch": 3.0179806362378976, "grad_norm": 5.7828803062438965, "learning_rate": 3.8788996465345014e-05, "log_odds_chosen": 6.473647117614746, "log_odds_ratio": -0.01633561961352825, "logits/chosen": -0.6352428197860718, "logits/rejected": -0.698454737663269, "logps/chosen": -0.008926715701818466, "logps/rejected": -1.0296143293380737, "loss": 1.1327, "nll_loss": 0.2815358638763428, "rewards/accuracies": 1.0, "rewards/chosen": -0.0008926716400310397, "rewards/margins": 0.10206875950098038, "rewards/rejected": -0.10296143591403961, "step": 4364 }, { "epoch": 3.0186721991701244, "grad_norm": 6.8958659172058105, "learning_rate": 3.8785154449054866e-05, "log_odds_chosen": 6.793898105621338, "log_odds_ratio": -0.013819929212331772, "logits/chosen": -0.07012942433357239, "logits/rejected": -0.07956613600254059, "logps/chosen": -0.00711780646815896, "logps/rejected": -0.6190686225891113, "loss": 1.1591, "nll_loss": 0.2883892059326172, "rewards/accuracies": 1.0, "rewards/chosen": -0.0007117806235328317, "rewards/margins": 0.06119508296251297, "rewards/rejected": -0.061906859278678894, "step": 4365 }, { "epoch": 3.0193637621023512, "grad_norm": 6.910161972045898, "learning_rate": 3.878131243276472e-05, "log_odds_chosen": 7.337653160095215, "log_odds_ratio": -0.14091874659061432, "logits/chosen": -0.6717323064804077, "logits/rejected": -0.6785844564437866, "logps/chosen": -0.034647684544324875, "logps/rejected": -1.0071440935134888, "loss": 1.4795, "nll_loss": 0.35579484701156616, "rewards/accuracies": 0.875, "rewards/chosen": -0.0034647686406970024, "rewards/margins": 0.09724964201450348, "rewards/rejected": -0.10071440786123276, "step": 4366 }, { "epoch": 3.020055325034578, "grad_norm": 8.759042739868164, "learning_rate": 3.8777470416474564e-05, "log_odds_chosen": 7.364710807800293, "log_odds_ratio": -0.22978220880031586, "logits/chosen": -0.8016922473907471, "logits/rejected": -0.8354951739311218, "logps/chosen": -0.035079024732112885, "logps/rejected": -1.2885403633117676, "loss": 2.256, "nll_loss": 0.541018545627594, "rewards/accuracies": 0.875, "rewards/chosen": -0.003507902380079031, "rewards/margins": 0.12534615397453308, "rewards/rejected": -0.12885405123233795, "step": 4367 }, { "epoch": 3.020746887966805, "grad_norm": 7.147649765014648, "learning_rate": 3.877362840018442e-05, "log_odds_chosen": 8.82288932800293, "log_odds_ratio": -0.0035906489938497543, "logits/chosen": -0.41625678539276123, "logits/rejected": -0.5004568099975586, "logps/chosen": -0.008884168229997158, "logps/rejected": -2.025074005126953, "loss": 1.6076, "nll_loss": 0.4015321135520935, "rewards/accuracies": 1.0, "rewards/chosen": -0.0008884167764335871, "rewards/margins": 0.2016189694404602, "rewards/rejected": -0.2025073915719986, "step": 4368 }, { "epoch": 3.0214384508990317, "grad_norm": 11.277046203613281, "learning_rate": 3.876978638389427e-05, "log_odds_chosen": 9.12707805633545, "log_odds_ratio": -0.004158929456025362, "logits/chosen": -0.5607892274856567, "logits/rejected": -0.578012228012085, "logps/chosen": -0.0050364332273602486, "logps/rejected": -1.462164282798767, "loss": 1.7723, "nll_loss": 0.44266659021377563, "rewards/accuracies": 1.0, "rewards/chosen": -0.0005036432994529605, "rewards/margins": 0.14571279287338257, "rewards/rejected": -0.14621643722057343, "step": 4369 }, { "epoch": 3.0221300138312586, "grad_norm": 10.114571571350098, "learning_rate": 3.8765944367604115e-05, "log_odds_chosen": 7.488341808319092, "log_odds_ratio": -0.2522977590560913, "logits/chosen": -0.7943065762519836, "logits/rejected": -0.812098503112793, "logps/chosen": -0.03343523293733597, "logps/rejected": -1.2078806161880493, "loss": 2.4229, "nll_loss": 0.5804873704910278, "rewards/accuracies": 0.875, "rewards/chosen": -0.0033435234799981117, "rewards/margins": 0.11744453012943268, "rewards/rejected": -0.12078805267810822, "step": 4370 }, { "epoch": 3.0228215767634854, "grad_norm": 8.294888496398926, "learning_rate": 3.8762102351313974e-05, "log_odds_chosen": 8.21873664855957, "log_odds_ratio": -0.0010850150138139725, "logits/chosen": -0.38183748722076416, "logits/rejected": -0.4458293914794922, "logps/chosen": -0.0054486412554979324, "logps/rejected": -1.4044756889343262, "loss": 1.8935, "nll_loss": 0.4732619524002075, "rewards/accuracies": 1.0, "rewards/chosen": -0.0005448641604743898, "rewards/margins": 0.13990271091461182, "rewards/rejected": -0.14044757187366486, "step": 4371 }, { "epoch": 3.0235131396957122, "grad_norm": 9.032965660095215, "learning_rate": 3.875826033502382e-05, "log_odds_chosen": 9.557016372680664, "log_odds_ratio": -0.00015933552640490234, "logits/chosen": -0.6070985198020935, "logits/rejected": -0.653249204158783, "logps/chosen": -0.0006065353518351912, "logps/rejected": -2.034036159515381, "loss": 1.3604, "nll_loss": 0.3400716185569763, "rewards/accuracies": 1.0, "rewards/chosen": -6.06535431870725e-05, "rewards/margins": 0.2033429592847824, "rewards/rejected": -0.20340359210968018, "step": 4372 }, { "epoch": 3.024204702627939, "grad_norm": 9.028538703918457, "learning_rate": 3.875441831873367e-05, "log_odds_chosen": 9.122668266296387, "log_odds_ratio": -0.013028179295361042, "logits/chosen": -0.01854856312274933, "logits/rejected": -0.10948525369167328, "logps/chosen": -0.014808842912316322, "logps/rejected": -1.9937236309051514, "loss": 1.8602, "nll_loss": 0.46375733613967896, "rewards/accuracies": 1.0, "rewards/chosen": -0.001480884151533246, "rewards/margins": 0.1978914886713028, "rewards/rejected": -0.19937236607074738, "step": 4373 }, { "epoch": 3.024896265560166, "grad_norm": 8.094748497009277, "learning_rate": 3.8750576302443524e-05, "log_odds_chosen": 8.073196411132812, "log_odds_ratio": -0.07655221968889236, "logits/chosen": -0.5587270855903625, "logits/rejected": -0.5245088338851929, "logps/chosen": -0.028404507786035538, "logps/rejected": -1.8108826875686646, "loss": 1.9002, "nll_loss": 0.4673946499824524, "rewards/accuracies": 1.0, "rewards/chosen": -0.0028404509648680687, "rewards/margins": 0.17824780941009521, "rewards/rejected": -0.18108826875686646, "step": 4374 }, { "epoch": 3.0255878284923927, "grad_norm": 5.171634674072266, "learning_rate": 3.874673428615338e-05, "log_odds_chosen": 7.644787311553955, "log_odds_ratio": -0.06222674623131752, "logits/chosen": -0.5618513822555542, "logits/rejected": -0.6038928031921387, "logps/chosen": -0.03759719431400299, "logps/rejected": -1.9660907983779907, "loss": 1.4441, "nll_loss": 0.35479259490966797, "rewards/accuracies": 1.0, "rewards/chosen": -0.0037597191985696554, "rewards/margins": 0.19284936785697937, "rewards/rejected": -0.19660907983779907, "step": 4375 }, { "epoch": 3.0262793914246195, "grad_norm": 7.60352087020874, "learning_rate": 3.874289226986322e-05, "log_odds_chosen": 7.690865993499756, "log_odds_ratio": -0.005768823437392712, "logits/chosen": -0.6503806710243225, "logits/rejected": -0.5802209973335266, "logps/chosen": -0.012286531738936901, "logps/rejected": -1.5931379795074463, "loss": 1.3203, "nll_loss": 0.3294871747493744, "rewards/accuracies": 1.0, "rewards/chosen": -0.0012286532437428832, "rewards/margins": 0.15808513760566711, "rewards/rejected": -0.15931379795074463, "step": 4376 }, { "epoch": 3.0269709543568464, "grad_norm": 9.345633506774902, "learning_rate": 3.8739050253573075e-05, "log_odds_chosen": 8.948345184326172, "log_odds_ratio": -0.0007345854537561536, "logits/chosen": -0.46991199254989624, "logits/rejected": -0.5386595129966736, "logps/chosen": -0.018476814031600952, "logps/rejected": -2.32340407371521, "loss": 1.6296, "nll_loss": 0.4073340892791748, "rewards/accuracies": 1.0, "rewards/chosen": -0.0018476813565939665, "rewards/margins": 0.2304927259683609, "rewards/rejected": -0.23234039545059204, "step": 4377 }, { "epoch": 3.027662517289073, "grad_norm": 6.002345561981201, "learning_rate": 3.873520823728293e-05, "log_odds_chosen": 9.55575942993164, "log_odds_ratio": -0.0006717491778545082, "logits/chosen": -0.5274007320404053, "logits/rejected": -0.5790569186210632, "logps/chosen": -0.020701147615909576, "logps/rejected": -2.3770816326141357, "loss": 1.5081, "nll_loss": 0.37695783376693726, "rewards/accuracies": 1.0, "rewards/chosen": -0.0020701151806861162, "rewards/margins": 0.23563805222511292, "rewards/rejected": -0.237708181142807, "step": 4378 }, { "epoch": 3.0283540802213, "grad_norm": 8.811324119567871, "learning_rate": 3.873136622099277e-05, "log_odds_chosen": 9.221470832824707, "log_odds_ratio": -0.010890920646488667, "logits/chosen": -0.4534289836883545, "logits/rejected": -0.5714493989944458, "logps/chosen": -0.01702927052974701, "logps/rejected": -2.3649582862854004, "loss": 1.7555, "nll_loss": 0.4377870261669159, "rewards/accuracies": 1.0, "rewards/chosen": -0.0017029274022206664, "rewards/margins": 0.23479288816452026, "rewards/rejected": -0.23649582266807556, "step": 4379 }, { "epoch": 3.029045643153527, "grad_norm": 8.119233131408691, "learning_rate": 3.872752420470263e-05, "log_odds_chosen": 8.403648376464844, "log_odds_ratio": -0.0031433827243745327, "logits/chosen": -0.6514449119567871, "logits/rejected": -0.6899955868721008, "logps/chosen": -0.06951490044593811, "logps/rejected": -2.216085433959961, "loss": 1.6456, "nll_loss": 0.41109544038772583, "rewards/accuracies": 1.0, "rewards/chosen": -0.006951490417122841, "rewards/margins": 0.2146570384502411, "rewards/rejected": -0.22160851955413818, "step": 4380 }, { "epoch": 3.0297372060857537, "grad_norm": 4.929358959197998, "learning_rate": 3.872368218841248e-05, "log_odds_chosen": 8.164472579956055, "log_odds_ratio": -0.0010731443762779236, "logits/chosen": -0.4625300168991089, "logits/rejected": -0.4479852318763733, "logps/chosen": -0.02008689194917679, "logps/rejected": -1.1178135871887207, "loss": 1.9998, "nll_loss": 0.49984341859817505, "rewards/accuracies": 1.0, "rewards/chosen": -0.0020086888689547777, "rewards/margins": 0.10977266728878021, "rewards/rejected": -0.11178135126829147, "step": 4381 }, { "epoch": 3.0304287690179805, "grad_norm": 15.727904319763184, "learning_rate": 3.871984017212233e-05, "log_odds_chosen": 8.262201309204102, "log_odds_ratio": -0.046532727777957916, "logits/chosen": -0.38712355494499207, "logits/rejected": -0.37175098061561584, "logps/chosen": -0.01136075146496296, "logps/rejected": -1.7719151973724365, "loss": 2.4988, "nll_loss": 0.6200357675552368, "rewards/accuracies": 1.0, "rewards/chosen": -0.0011360751232132316, "rewards/margins": 0.176055446267128, "rewards/rejected": -0.17719152569770813, "step": 4382 }, { "epoch": 3.0311203319502074, "grad_norm": 7.210832595825195, "learning_rate": 3.871599815583218e-05, "log_odds_chosen": 8.842565536499023, "log_odds_ratio": -0.024006277322769165, "logits/chosen": -0.667304277420044, "logits/rejected": -0.7376624941825867, "logps/chosen": -0.009047461673617363, "logps/rejected": -1.5637233257293701, "loss": 1.5166, "nll_loss": 0.37674498558044434, "rewards/accuracies": 1.0, "rewards/chosen": -0.0009047460625879467, "rewards/margins": 0.15546758472919464, "rewards/rejected": -0.1563723385334015, "step": 4383 }, { "epoch": 3.031811894882434, "grad_norm": 15.053751945495605, "learning_rate": 3.8712156139542035e-05, "log_odds_chosen": 9.234689712524414, "log_odds_ratio": -0.00016180895909201354, "logits/chosen": -0.6031422019004822, "logits/rejected": -0.6600396037101746, "logps/chosen": -0.0003273483016528189, "logps/rejected": -1.3932242393493652, "loss": 2.0128, "nll_loss": 0.5031747221946716, "rewards/accuracies": 1.0, "rewards/chosen": -3.2734831620473415e-05, "rewards/margins": 0.13928969204425812, "rewards/rejected": -0.139322429895401, "step": 4384 }, { "epoch": 3.032503457814661, "grad_norm": 8.344961166381836, "learning_rate": 3.870831412325188e-05, "log_odds_chosen": 8.851398468017578, "log_odds_ratio": -0.027447307482361794, "logits/chosen": -0.4169967770576477, "logits/rejected": -0.455474317073822, "logps/chosen": -0.030304603278636932, "logps/rejected": -1.9393913745880127, "loss": 1.1909, "nll_loss": 0.29497459530830383, "rewards/accuracies": 1.0, "rewards/chosen": -0.0030304603278636932, "rewards/margins": 0.19090867042541504, "rewards/rejected": -0.19393913447856903, "step": 4385 }, { "epoch": 3.033195020746888, "grad_norm": 10.690800666809082, "learning_rate": 3.8704472106961733e-05, "log_odds_chosen": 7.3425164222717285, "log_odds_ratio": -0.03778545558452606, "logits/chosen": -0.6645103693008423, "logits/rejected": -0.7565867900848389, "logps/chosen": -0.028321033343672752, "logps/rejected": -1.0251092910766602, "loss": 2.4422, "nll_loss": 0.6067838072776794, "rewards/accuracies": 1.0, "rewards/chosen": -0.0028321032878011465, "rewards/margins": 0.09967882931232452, "rewards/rejected": -0.10251092910766602, "step": 4386 }, { "epoch": 3.0338865836791147, "grad_norm": 12.634204864501953, "learning_rate": 3.8700630090671586e-05, "log_odds_chosen": 8.964178085327148, "log_odds_ratio": -0.0009216453763656318, "logits/chosen": -1.0691150426864624, "logits/rejected": -1.1286835670471191, "logps/chosen": -0.0009471324156038463, "logps/rejected": -1.5427871942520142, "loss": 2.3683, "nll_loss": 0.5919942259788513, "rewards/accuracies": 1.0, "rewards/chosen": -9.471323573961854e-05, "rewards/margins": 0.1541840136051178, "rewards/rejected": -0.1542787253856659, "step": 4387 }, { "epoch": 3.0345781466113415, "grad_norm": 7.034060955047607, "learning_rate": 3.869678807438143e-05, "log_odds_chosen": 8.673445701599121, "log_odds_ratio": -0.03487813100218773, "logits/chosen": -0.5235074758529663, "logits/rejected": -0.5503664612770081, "logps/chosen": -0.013425452634692192, "logps/rejected": -1.4154572486877441, "loss": 1.669, "nll_loss": 0.41377225518226624, "rewards/accuracies": 1.0, "rewards/chosen": -0.001342545379884541, "rewards/margins": 0.14020317792892456, "rewards/rejected": -0.14154571294784546, "step": 4388 }, { "epoch": 3.0352697095435683, "grad_norm": 11.320638656616211, "learning_rate": 3.869294605809129e-05, "log_odds_chosen": 7.365285873413086, "log_odds_ratio": -0.1156717836856842, "logits/chosen": -0.9415791034698486, "logits/rejected": -0.9529377222061157, "logps/chosen": -0.031220735982060432, "logps/rejected": -1.4222886562347412, "loss": 3.6534, "nll_loss": 0.9017861485481262, "rewards/accuracies": 0.875, "rewards/chosen": -0.0031220735982060432, "rewards/margins": 0.13910678029060364, "rewards/rejected": -0.1422288715839386, "step": 4389 }, { "epoch": 3.035961272475795, "grad_norm": 11.250693321228027, "learning_rate": 3.8689104041801136e-05, "log_odds_chosen": 10.075836181640625, "log_odds_ratio": -0.00014684397319797426, "logits/chosen": -0.6689065098762512, "logits/rejected": -0.7600905895233154, "logps/chosen": -0.0005376600893214345, "logps/rejected": -2.088433265686035, "loss": 1.5687, "nll_loss": 0.39217111468315125, "rewards/accuracies": 1.0, "rewards/chosen": -5.376600893214345e-05, "rewards/margins": 0.20878957211971283, "rewards/rejected": -0.20884335041046143, "step": 4390 }, { "epoch": 3.036652835408022, "grad_norm": 6.093531608581543, "learning_rate": 3.868526202551099e-05, "log_odds_chosen": 7.18631649017334, "log_odds_ratio": -0.03731034696102142, "logits/chosen": -0.5766352415084839, "logits/rejected": -0.6117807626724243, "logps/chosen": -0.022443819791078568, "logps/rejected": -1.7518061399459839, "loss": 2.0163, "nll_loss": 0.5003336668014526, "rewards/accuracies": 1.0, "rewards/chosen": -0.0022443821653723717, "rewards/margins": 0.17293623089790344, "rewards/rejected": -0.1751806139945984, "step": 4391 }, { "epoch": 3.037344398340249, "grad_norm": 8.917433738708496, "learning_rate": 3.868142000922084e-05, "log_odds_chosen": 9.447513580322266, "log_odds_ratio": -0.0014931621262803674, "logits/chosen": -0.3905342221260071, "logits/rejected": -0.39794474840164185, "logps/chosen": -0.015030551701784134, "logps/rejected": -1.5086308717727661, "loss": 1.3014, "nll_loss": 0.32520678639411926, "rewards/accuracies": 1.0, "rewards/chosen": -0.0015030552167445421, "rewards/margins": 0.1493600308895111, "rewards/rejected": -0.15086308121681213, "step": 4392 }, { "epoch": 3.0380359612724757, "grad_norm": 7.268914222717285, "learning_rate": 3.8677577992930694e-05, "log_odds_chosen": 7.071560859680176, "log_odds_ratio": -0.10249079018831253, "logits/chosen": -0.7327741384506226, "logits/rejected": -0.772663414478302, "logps/chosen": -0.01829216629266739, "logps/rejected": -1.123624563217163, "loss": 1.1595, "nll_loss": 0.2796328663825989, "rewards/accuracies": 0.875, "rewards/chosen": -0.0018292168388143182, "rewards/margins": 0.11053324490785599, "rewards/rejected": -0.11236246675252914, "step": 4393 }, { "epoch": 3.0387275242047025, "grad_norm": 9.540959358215332, "learning_rate": 3.867373597664054e-05, "log_odds_chosen": 7.865304946899414, "log_odds_ratio": -0.01287818793207407, "logits/chosen": -0.20857594907283783, "logits/rejected": -0.26517733931541443, "logps/chosen": -0.005776575766503811, "logps/rejected": -1.1927257776260376, "loss": 1.668, "nll_loss": 0.41570839285850525, "rewards/accuracies": 1.0, "rewards/chosen": -0.0005776575417257845, "rewards/margins": 0.11869492381811142, "rewards/rejected": -0.11927258223295212, "step": 4394 }, { "epoch": 3.0394190871369293, "grad_norm": 11.04859447479248, "learning_rate": 3.866989396035039e-05, "log_odds_chosen": 8.835795402526855, "log_odds_ratio": -0.0005848797736689448, "logits/chosen": -0.9367823004722595, "logits/rejected": -0.9795846939086914, "logps/chosen": -0.0011461263056844473, "logps/rejected": -1.7380290031433105, "loss": 1.6836, "nll_loss": 0.4208444654941559, "rewards/accuracies": 1.0, "rewards/chosen": -0.00011461263056844473, "rewards/margins": 0.17368827760219574, "rewards/rejected": -0.17380289733409882, "step": 4395 }, { "epoch": 3.040110650069156, "grad_norm": 8.23969554901123, "learning_rate": 3.8666051944060244e-05, "log_odds_chosen": 8.328208923339844, "log_odds_ratio": -0.09206652641296387, "logits/chosen": -0.5694941282272339, "logits/rejected": -0.6927324533462524, "logps/chosen": -0.019737619906663895, "logps/rejected": -1.4263737201690674, "loss": 1.2315, "nll_loss": 0.298658162355423, "rewards/accuracies": 0.875, "rewards/chosen": -0.0019737619441002607, "rewards/margins": 0.14066360890865326, "rewards/rejected": -0.14263737201690674, "step": 4396 }, { "epoch": 3.040802213001383, "grad_norm": 7.260254383087158, "learning_rate": 3.866220992777009e-05, "log_odds_chosen": 4.9062933921813965, "log_odds_ratio": -0.148776113986969, "logits/chosen": 0.028328221291303635, "logits/rejected": -0.10123750567436218, "logps/chosen": -0.11511752009391785, "logps/rejected": -1.450113296508789, "loss": 1.7417, "nll_loss": 0.42053550481796265, "rewards/accuracies": 1.0, "rewards/chosen": -0.011511752381920815, "rewards/margins": 0.13349959254264832, "rewards/rejected": -0.14501133561134338, "step": 4397 }, { "epoch": 3.04149377593361, "grad_norm": 9.752484321594238, "learning_rate": 3.865836791147995e-05, "log_odds_chosen": 9.574945449829102, "log_odds_ratio": -0.00010855550499400124, "logits/chosen": -0.7012908458709717, "logits/rejected": -0.826108992099762, "logps/chosen": -0.00035735705750994384, "logps/rejected": -1.6022120714187622, "loss": 2.4012, "nll_loss": 0.6002871990203857, "rewards/accuracies": 1.0, "rewards/chosen": -3.5735705750994384e-05, "rewards/margins": 0.16018547117710114, "rewards/rejected": -0.16022121906280518, "step": 4398 }, { "epoch": 3.0421853388658366, "grad_norm": 9.122933387756348, "learning_rate": 3.8654525895189795e-05, "log_odds_chosen": 9.139860153198242, "log_odds_ratio": -0.0013590446906164289, "logits/chosen": -0.5273385643959045, "logits/rejected": -0.6435430645942688, "logps/chosen": -0.0033525507897138596, "logps/rejected": -2.115792751312256, "loss": 1.7969, "nll_loss": 0.44908446073532104, "rewards/accuracies": 1.0, "rewards/chosen": -0.00033525508479215205, "rewards/margins": 0.21124403178691864, "rewards/rejected": -0.21157927811145782, "step": 4399 }, { "epoch": 3.0428769017980635, "grad_norm": 9.778103828430176, "learning_rate": 3.865068387889965e-05, "log_odds_chosen": 9.495455741882324, "log_odds_ratio": -0.0002024644345510751, "logits/chosen": -0.9759871959686279, "logits/rejected": -0.9766150712966919, "logps/chosen": -0.0011281885672360659, "logps/rejected": -1.9828314781188965, "loss": 1.5857, "nll_loss": 0.39639735221862793, "rewards/accuracies": 1.0, "rewards/chosen": -0.00011281885235803202, "rewards/margins": 0.19817033410072327, "rewards/rejected": -0.1982831358909607, "step": 4400 }, { "epoch": 3.0435684647302903, "grad_norm": 6.08988618850708, "learning_rate": 3.86468418626095e-05, "log_odds_chosen": 7.714538097381592, "log_odds_ratio": -0.015934668481349945, "logits/chosen": -0.5180322527885437, "logits/rejected": -0.47100329399108887, "logps/chosen": -0.005107459146529436, "logps/rejected": -0.8941366672515869, "loss": 1.3783, "nll_loss": 0.34297019243240356, "rewards/accuracies": 1.0, "rewards/chosen": -0.0005107459728606045, "rewards/margins": 0.08890292048454285, "rewards/rejected": -0.08941367268562317, "step": 4401 }, { "epoch": 3.044260027662517, "grad_norm": 8.789294242858887, "learning_rate": 3.864299984631935e-05, "log_odds_chosen": 9.35297966003418, "log_odds_ratio": -0.00039945071330294013, "logits/chosen": -0.43555620312690735, "logits/rejected": -0.5225505828857422, "logps/chosen": -0.008832286112010479, "logps/rejected": -2.5036051273345947, "loss": 1.5038, "nll_loss": 0.3759007751941681, "rewards/accuracies": 1.0, "rewards/chosen": -0.0008832286112010479, "rewards/margins": 0.2494772970676422, "rewards/rejected": -0.25036054849624634, "step": 4402 }, { "epoch": 3.044951590594744, "grad_norm": 8.463292121887207, "learning_rate": 3.86391578300292e-05, "log_odds_chosen": 9.686382293701172, "log_odds_ratio": -0.00024558964651077986, "logits/chosen": -0.31426572799682617, "logits/rejected": -0.4136694073677063, "logps/chosen": -0.017539246007800102, "logps/rejected": -2.283996105194092, "loss": 2.083, "nll_loss": 0.5207360982894897, "rewards/accuracies": 1.0, "rewards/chosen": -0.0017539247637614608, "rewards/margins": 0.22664567828178406, "rewards/rejected": -0.2283996194601059, "step": 4403 }, { "epoch": 3.045643153526971, "grad_norm": 7.6976318359375, "learning_rate": 3.863531581373905e-05, "log_odds_chosen": 9.104455947875977, "log_odds_ratio": -0.0006909758085384965, "logits/chosen": -0.6387878656387329, "logits/rejected": -0.6774571537971497, "logps/chosen": -0.006374065764248371, "logps/rejected": -2.3529205322265625, "loss": 1.3831, "nll_loss": 0.34571653604507446, "rewards/accuracies": 1.0, "rewards/chosen": -0.0006374065997079015, "rewards/margins": 0.23465465009212494, "rewards/rejected": -0.23529204726219177, "step": 4404 }, { "epoch": 3.0463347164591976, "grad_norm": 8.895613670349121, "learning_rate": 3.86314737974489e-05, "log_odds_chosen": 9.99482536315918, "log_odds_ratio": -0.00017433454922866076, "logits/chosen": -0.6106169819831848, "logits/rejected": -0.6616973876953125, "logps/chosen": -0.008926170878112316, "logps/rejected": -2.503861427307129, "loss": 1.3169, "nll_loss": 0.32919979095458984, "rewards/accuracies": 1.0, "rewards/chosen": -0.0008926171576604247, "rewards/margins": 0.24949350953102112, "rewards/rejected": -0.25038614869117737, "step": 4405 }, { "epoch": 3.0470262793914245, "grad_norm": 13.473219871520996, "learning_rate": 3.862763178115875e-05, "log_odds_chosen": 5.710055351257324, "log_odds_ratio": -0.2638789713382721, "logits/chosen": -0.42658939957618713, "logits/rejected": -0.46941259503364563, "logps/chosen": -0.05069947615265846, "logps/rejected": -1.32853364944458, "loss": 2.2273, "nll_loss": 0.5304248332977295, "rewards/accuracies": 0.875, "rewards/chosen": -0.005069947801530361, "rewards/margins": 0.1277834177017212, "rewards/rejected": -0.13285337388515472, "step": 4406 }, { "epoch": 3.0477178423236513, "grad_norm": 11.018197059631348, "learning_rate": 3.862378976486861e-05, "log_odds_chosen": 9.014132499694824, "log_odds_ratio": -0.0004934677272103727, "logits/chosen": -0.33915597200393677, "logits/rejected": -0.4539306163787842, "logps/chosen": -0.0013354574330151081, "logps/rejected": -1.8095229864120483, "loss": 1.4394, "nll_loss": 0.35978925228118896, "rewards/accuracies": 1.0, "rewards/chosen": -0.0001335457491222769, "rewards/margins": 0.18081875145435333, "rewards/rejected": -0.1809522956609726, "step": 4407 }, { "epoch": 3.048409405255878, "grad_norm": 8.081323623657227, "learning_rate": 3.861994774857845e-05, "log_odds_chosen": 9.20877742767334, "log_odds_ratio": -0.00035140541149303317, "logits/chosen": -0.569820761680603, "logits/rejected": -0.685656726360321, "logps/chosen": -0.001261628814972937, "logps/rejected": -1.6668808460235596, "loss": 1.6334, "nll_loss": 0.4083223342895508, "rewards/accuracies": 1.0, "rewards/chosen": -0.00012616287858691067, "rewards/margins": 0.16656193137168884, "rewards/rejected": -0.16668808460235596, "step": 4408 }, { "epoch": 3.049100968188105, "grad_norm": 13.364263534545898, "learning_rate": 3.8616105732288306e-05, "log_odds_chosen": 9.914350509643555, "log_odds_ratio": -0.0038212628569453955, "logits/chosen": -0.30686965584754944, "logits/rejected": -0.433027446269989, "logps/chosen": -0.007338955998420715, "logps/rejected": -2.2491300106048584, "loss": 1.7598, "nll_loss": 0.43957746028900146, "rewards/accuracies": 1.0, "rewards/chosen": -0.0007338955765590072, "rewards/margins": 0.22417908906936646, "rewards/rejected": -0.22491300106048584, "step": 4409 }, { "epoch": 3.0497925311203318, "grad_norm": 6.702042102813721, "learning_rate": 3.861226371599816e-05, "log_odds_chosen": 7.66822624206543, "log_odds_ratio": -0.03534835949540138, "logits/chosen": -0.4316217303276062, "logits/rejected": -0.48696446418762207, "logps/chosen": -0.04215482994914055, "logps/rejected": -1.7758280038833618, "loss": 1.3272, "nll_loss": 0.3282645344734192, "rewards/accuracies": 1.0, "rewards/chosen": -0.004215483088046312, "rewards/margins": 0.17336732149124146, "rewards/rejected": -0.17758280038833618, "step": 4410 }, { "epoch": 3.0504840940525586, "grad_norm": 7.669111251831055, "learning_rate": 3.860842169970801e-05, "log_odds_chosen": 8.917407989501953, "log_odds_ratio": -0.00028047675732523203, "logits/chosen": -0.3156546354293823, "logits/rejected": -0.37852245569229126, "logps/chosen": -0.0016239421674981713, "logps/rejected": -1.795964241027832, "loss": 1.3518, "nll_loss": 0.3379298448562622, "rewards/accuracies": 1.0, "rewards/chosen": -0.00016239422257058322, "rewards/margins": 0.17943403124809265, "rewards/rejected": -0.1795964241027832, "step": 4411 }, { "epoch": 3.0511756569847854, "grad_norm": 12.863265991210938, "learning_rate": 3.8604579683417856e-05, "log_odds_chosen": 8.214728355407715, "log_odds_ratio": -0.001312942593358457, "logits/chosen": -0.463234543800354, "logits/rejected": -0.5421000719070435, "logps/chosen": -0.0032501136884093285, "logps/rejected": -1.4576647281646729, "loss": 1.573, "nll_loss": 0.3931177854537964, "rewards/accuracies": 1.0, "rewards/chosen": -0.00032501138048246503, "rewards/margins": 0.145441472530365, "rewards/rejected": -0.1457664966583252, "step": 4412 }, { "epoch": 3.0518672199170123, "grad_norm": 12.542684555053711, "learning_rate": 3.860073766712771e-05, "log_odds_chosen": 9.891372680664062, "log_odds_ratio": -0.0006059879087843001, "logits/chosen": -0.6966993808746338, "logits/rejected": -0.69878089427948, "logps/chosen": -0.0019379917066544294, "logps/rejected": -2.054600238800049, "loss": 1.5864, "nll_loss": 0.39654040336608887, "rewards/accuracies": 1.0, "rewards/chosen": -0.00019379917648620903, "rewards/margins": 0.20526623725891113, "rewards/rejected": -0.20546004176139832, "step": 4413 }, { "epoch": 3.052558782849239, "grad_norm": 14.597355842590332, "learning_rate": 3.859689565083756e-05, "log_odds_chosen": 9.579411506652832, "log_odds_ratio": -0.0002695823786780238, "logits/chosen": -0.38167479634284973, "logits/rejected": -0.4428882598876953, "logps/chosen": -0.0002935394586529583, "logps/rejected": -1.5735745429992676, "loss": 1.9326, "nll_loss": 0.4831179082393646, "rewards/accuracies": 1.0, "rewards/chosen": -2.935394695668947e-05, "rewards/margins": 0.15732811391353607, "rewards/rejected": -0.15735746920108795, "step": 4414 }, { "epoch": 3.053250345781466, "grad_norm": 7.0043559074401855, "learning_rate": 3.8593053634547414e-05, "log_odds_chosen": 9.005176544189453, "log_odds_ratio": -0.04333890229463577, "logits/chosen": -0.7164819836616516, "logits/rejected": -0.7455224394798279, "logps/chosen": -0.01237468235194683, "logps/rejected": -2.378635883331299, "loss": 1.8281, "nll_loss": 0.45269614458084106, "rewards/accuracies": 1.0, "rewards/chosen": -0.0012374682119116187, "rewards/margins": 0.23662608861923218, "rewards/rejected": -0.23786357045173645, "step": 4415 }, { "epoch": 3.0539419087136928, "grad_norm": 6.635776996612549, "learning_rate": 3.8589211618257266e-05, "log_odds_chosen": 9.555427551269531, "log_odds_ratio": -0.0004285011673346162, "logits/chosen": -0.5694887042045593, "logits/rejected": -0.6285260915756226, "logps/chosen": -0.0004087548586539924, "logps/rejected": -1.590221643447876, "loss": 1.5202, "nll_loss": 0.3800092339515686, "rewards/accuracies": 1.0, "rewards/chosen": -4.087548586539924e-05, "rewards/margins": 0.1589812934398651, "rewards/rejected": -0.15902216732501984, "step": 4416 }, { "epoch": 3.0546334716459196, "grad_norm": 9.27367877960205, "learning_rate": 3.858536960196711e-05, "log_odds_chosen": 7.5078935623168945, "log_odds_ratio": -0.0073926495388150215, "logits/chosen": -0.6996981501579285, "logits/rejected": -0.7687663435935974, "logps/chosen": -0.00571818882599473, "logps/rejected": -1.2903776168823242, "loss": 2.3679, "nll_loss": 0.5912336111068726, "rewards/accuracies": 1.0, "rewards/chosen": -0.0005718189058825374, "rewards/margins": 0.128465935587883, "rewards/rejected": -0.1290377676486969, "step": 4417 }, { "epoch": 3.0553250345781464, "grad_norm": 9.297929763793945, "learning_rate": 3.8581527585676964e-05, "log_odds_chosen": 8.047447204589844, "log_odds_ratio": -0.012478945776820183, "logits/chosen": -0.6040946841239929, "logits/rejected": -0.671326756477356, "logps/chosen": -0.014846572652459145, "logps/rejected": -1.8723747730255127, "loss": 1.5809, "nll_loss": 0.3939892053604126, "rewards/accuracies": 1.0, "rewards/chosen": -0.0014846572885289788, "rewards/margins": 0.18575282394886017, "rewards/rejected": -0.18723748624324799, "step": 4418 }, { "epoch": 3.0560165975103732, "grad_norm": 8.1918306350708, "learning_rate": 3.857768556938682e-05, "log_odds_chosen": 9.591246604919434, "log_odds_ratio": -0.00013377962750382721, "logits/chosen": -0.23663687705993652, "logits/rejected": -0.28745290637016296, "logps/chosen": -0.0005562568549066782, "logps/rejected": -1.7879509925842285, "loss": 2.0461, "nll_loss": 0.5115119218826294, "rewards/accuracies": 1.0, "rewards/chosen": -5.562568549066782e-05, "rewards/margins": 0.17873947322368622, "rewards/rejected": -0.17879509925842285, "step": 4419 }, { "epoch": 3.0567081604426, "grad_norm": 4.319944858551025, "learning_rate": 3.857384355309667e-05, "log_odds_chosen": 8.3831787109375, "log_odds_ratio": -0.004961658269166946, "logits/chosen": -0.5148369073867798, "logits/rejected": -0.5669583678245544, "logps/chosen": -0.006720840930938721, "logps/rejected": -1.406997561454773, "loss": 1.7634, "nll_loss": 0.4403529167175293, "rewards/accuracies": 1.0, "rewards/chosen": -0.0006720841629430652, "rewards/margins": 0.14002767205238342, "rewards/rejected": -0.14069975912570953, "step": 4420 }, { "epoch": 3.057399723374827, "grad_norm": 9.564262390136719, "learning_rate": 3.8570001536806515e-05, "log_odds_chosen": 9.466020584106445, "log_odds_ratio": -0.012601032853126526, "logits/chosen": -1.022118330001831, "logits/rejected": -1.0248609781265259, "logps/chosen": -0.004576122388243675, "logps/rejected": -1.540669322013855, "loss": 1.5513, "nll_loss": 0.3865562677383423, "rewards/accuracies": 1.0, "rewards/chosen": -0.0004576121864374727, "rewards/margins": 0.15360932052135468, "rewards/rejected": -0.15406693518161774, "step": 4421 }, { "epoch": 3.0580912863070537, "grad_norm": 11.14309310913086, "learning_rate": 3.856615952051637e-05, "log_odds_chosen": 8.333141326904297, "log_odds_ratio": -0.029943065717816353, "logits/chosen": -0.7349153161048889, "logits/rejected": -0.7577260732650757, "logps/chosen": -0.009285441599786282, "logps/rejected": -1.486148715019226, "loss": 1.6955, "nll_loss": 0.4208747148513794, "rewards/accuracies": 1.0, "rewards/chosen": -0.0009285442065447569, "rewards/margins": 0.14768633246421814, "rewards/rejected": -0.14861486852169037, "step": 4422 }, { "epoch": 3.0587828492392806, "grad_norm": 9.059833526611328, "learning_rate": 3.856231750422622e-05, "log_odds_chosen": 9.1171875, "log_odds_ratio": -0.010607258416712284, "logits/chosen": -0.42079970240592957, "logits/rejected": -0.5635207295417786, "logps/chosen": -0.004433733876794577, "logps/rejected": -1.4279680252075195, "loss": 1.6389, "nll_loss": 0.40866702795028687, "rewards/accuracies": 1.0, "rewards/chosen": -0.00044337339932098985, "rewards/margins": 0.14235343039035797, "rewards/rejected": -0.14279679954051971, "step": 4423 }, { "epoch": 3.0594744121715074, "grad_norm": 8.973243713378906, "learning_rate": 3.855847548793607e-05, "log_odds_chosen": 7.946057319641113, "log_odds_ratio": -0.0021891689393669367, "logits/chosen": -1.1421451568603516, "logits/rejected": -1.0701775550842285, "logps/chosen": -0.015628967434167862, "logps/rejected": -1.5090515613555908, "loss": 1.647, "nll_loss": 0.41152113676071167, "rewards/accuracies": 1.0, "rewards/chosen": -0.0015628967666998506, "rewards/margins": 0.14934225380420685, "rewards/rejected": -0.15090514719486237, "step": 4424 }, { "epoch": 3.0601659751037342, "grad_norm": 11.055354118347168, "learning_rate": 3.8554633471645925e-05, "log_odds_chosen": 6.188984394073486, "log_odds_ratio": -0.10389276593923569, "logits/chosen": -0.5800085067749023, "logits/rejected": -0.6050304174423218, "logps/chosen": -0.0289864894002676, "logps/rejected": -0.9998306035995483, "loss": 1.4891, "nll_loss": 0.3618795573711395, "rewards/accuracies": 0.875, "rewards/chosen": -0.0028986490797251463, "rewards/margins": 0.0970844104886055, "rewards/rejected": -0.09998306632041931, "step": 4425 }, { "epoch": 3.060857538035961, "grad_norm": 26.787431716918945, "learning_rate": 3.855079145535577e-05, "log_odds_chosen": 9.614953994750977, "log_odds_ratio": -0.0001302398304687813, "logits/chosen": -0.6753515601158142, "logits/rejected": -0.6884777545928955, "logps/chosen": -0.0004761649470310658, "logps/rejected": -1.7124427556991577, "loss": 2.7306, "nll_loss": 0.6826401948928833, "rewards/accuracies": 1.0, "rewards/chosen": -4.761649324791506e-05, "rewards/margins": 0.17119666934013367, "rewards/rejected": -0.171244278550148, "step": 4426 }, { "epoch": 3.061549100968188, "grad_norm": 9.503124237060547, "learning_rate": 3.854694943906562e-05, "log_odds_chosen": 6.231769561767578, "log_odds_ratio": -0.08688283711671829, "logits/chosen": -0.7942008972167969, "logits/rejected": -0.7996950149536133, "logps/chosen": -0.14525654911994934, "logps/rejected": -1.7236475944519043, "loss": 2.1466, "nll_loss": 0.5279530882835388, "rewards/accuracies": 1.0, "rewards/chosen": -0.01452565286308527, "rewards/margins": 0.1578390896320343, "rewards/rejected": -0.172364741563797, "step": 4427 }, { "epoch": 3.0622406639004147, "grad_norm": 4.853358268737793, "learning_rate": 3.8543107422775475e-05, "log_odds_chosen": 8.784786224365234, "log_odds_ratio": -0.000326198001857847, "logits/chosen": -0.4402710795402527, "logits/rejected": -0.499617338180542, "logps/chosen": -0.006046361289918423, "logps/rejected": -1.3748338222503662, "loss": 1.959, "nll_loss": 0.4897170066833496, "rewards/accuracies": 1.0, "rewards/chosen": -0.0006046361522749066, "rewards/margins": 0.13687875866889954, "rewards/rejected": -0.1374833881855011, "step": 4428 }, { "epoch": 3.0629322268326415, "grad_norm": 5.846126079559326, "learning_rate": 3.853926540648533e-05, "log_odds_chosen": 8.772059440612793, "log_odds_ratio": -0.0014638709835708141, "logits/chosen": -0.651498556137085, "logits/rejected": -0.7316693663597107, "logps/chosen": -0.01148604042828083, "logps/rejected": -1.9184293746948242, "loss": 1.7238, "nll_loss": 0.43079331517219543, "rewards/accuracies": 1.0, "rewards/chosen": -0.00114860397297889, "rewards/margins": 0.19069434702396393, "rewards/rejected": -0.1918429434299469, "step": 4429 }, { "epoch": 3.0636237897648684, "grad_norm": 7.701274871826172, "learning_rate": 3.853542339019517e-05, "log_odds_chosen": 9.660760879516602, "log_odds_ratio": -0.0004473314620554447, "logits/chosen": -0.6717379093170166, "logits/rejected": -0.7057700157165527, "logps/chosen": -0.0006988372188061476, "logps/rejected": -1.5927550792694092, "loss": 1.4822, "nll_loss": 0.37049365043640137, "rewards/accuracies": 1.0, "rewards/chosen": -6.98837247909978e-05, "rewards/margins": 0.1592056155204773, "rewards/rejected": -0.15927551686763763, "step": 4430 }, { "epoch": 3.064315352697095, "grad_norm": 10.977331161499023, "learning_rate": 3.853158137390503e-05, "log_odds_chosen": 9.710474014282227, "log_odds_ratio": -0.0019115728791803122, "logits/chosen": -0.3936481773853302, "logits/rejected": -0.5065484046936035, "logps/chosen": -0.0009990218095481396, "logps/rejected": -1.7076680660247803, "loss": 2.1401, "nll_loss": 0.5348359942436218, "rewards/accuracies": 1.0, "rewards/chosen": -9.990217949962243e-05, "rewards/margins": 0.17066690325737, "rewards/rejected": -0.17076681554317474, "step": 4431 }, { "epoch": 3.0650069156293225, "grad_norm": 7.184093475341797, "learning_rate": 3.852773935761488e-05, "log_odds_chosen": 8.461292266845703, "log_odds_ratio": -0.015264814719557762, "logits/chosen": -0.4620281457901001, "logits/rejected": -0.5182569026947021, "logps/chosen": -0.01789667457342148, "logps/rejected": -1.422250747680664, "loss": 1.4042, "nll_loss": 0.3495308458805084, "rewards/accuracies": 1.0, "rewards/chosen": -0.001789667527191341, "rewards/margins": 0.14043541252613068, "rewards/rejected": -0.14222508668899536, "step": 4432 }, { "epoch": 3.0656984785615493, "grad_norm": 14.419909477233887, "learning_rate": 3.852389734132473e-05, "log_odds_chosen": 7.307796478271484, "log_odds_ratio": -0.04180353879928589, "logits/chosen": -0.6438498497009277, "logits/rejected": -0.7512152194976807, "logps/chosen": -0.013369385153055191, "logps/rejected": -1.3575177192687988, "loss": 1.801, "nll_loss": 0.44607308506965637, "rewards/accuracies": 1.0, "rewards/chosen": -0.0013369384687393904, "rewards/margins": 0.13441482186317444, "rewards/rejected": -0.13575176894664764, "step": 4433 }, { "epoch": 3.066390041493776, "grad_norm": 17.058692932128906, "learning_rate": 3.852005532503458e-05, "log_odds_chosen": 7.7271904945373535, "log_odds_ratio": -0.015360197052359581, "logits/chosen": -0.5677446722984314, "logits/rejected": -0.7343844771385193, "logps/chosen": -0.021658003330230713, "logps/rejected": -1.5284223556518555, "loss": 2.6219, "nll_loss": 0.6539467573165894, "rewards/accuracies": 1.0, "rewards/chosen": -0.0021658004261553288, "rewards/margins": 0.1506764441728592, "rewards/rejected": -0.15284225344657898, "step": 4434 }, { "epoch": 3.067081604426003, "grad_norm": 7.667229175567627, "learning_rate": 3.851621330874443e-05, "log_odds_chosen": 8.72133731842041, "log_odds_ratio": -0.0008878376102074981, "logits/chosen": -0.579259991645813, "logits/rejected": -0.5156400799751282, "logps/chosen": -0.01773679256439209, "logps/rejected": -1.6515758037567139, "loss": 1.635, "nll_loss": 0.40866297483444214, "rewards/accuracies": 1.0, "rewards/chosen": -0.0017736791633069515, "rewards/margins": 0.16338390111923218, "rewards/rejected": -0.16515758633613586, "step": 4435 }, { "epoch": 3.06777316735823, "grad_norm": 10.078861236572266, "learning_rate": 3.851237129245428e-05, "log_odds_chosen": 9.7711763381958, "log_odds_ratio": -0.0007375985151156783, "logits/chosen": -0.965905487537384, "logits/rejected": -1.0674183368682861, "logps/chosen": -0.0014153417432680726, "logps/rejected": -2.040693759918213, "loss": 2.9026, "nll_loss": 0.7255856990814209, "rewards/accuracies": 1.0, "rewards/chosen": -0.0001415341830579564, "rewards/margins": 0.20392782986164093, "rewards/rejected": -0.2040693610906601, "step": 4436 }, { "epoch": 3.0684647302904566, "grad_norm": 8.490081787109375, "learning_rate": 3.8508529276164134e-05, "log_odds_chosen": 9.745711326599121, "log_odds_ratio": -0.0002596020931378007, "logits/chosen": -0.6246272921562195, "logits/rejected": -0.7017021179199219, "logps/chosen": -0.01125405728816986, "logps/rejected": -2.0048890113830566, "loss": 1.1157, "nll_loss": 0.2789047956466675, "rewards/accuracies": 1.0, "rewards/chosen": -0.001125405658967793, "rewards/margins": 0.19936349987983704, "rewards/rejected": -0.20048891007900238, "step": 4437 }, { "epoch": 3.0691562932226835, "grad_norm": 10.923334121704102, "learning_rate": 3.8504687259873986e-05, "log_odds_chosen": 9.16975212097168, "log_odds_ratio": -0.00021780317183583975, "logits/chosen": -0.8082156777381897, "logits/rejected": -0.8127976059913635, "logps/chosen": -0.0005995776737108827, "logps/rejected": -1.327314853668213, "loss": 1.7019, "nll_loss": 0.4254598319530487, "rewards/accuracies": 1.0, "rewards/chosen": -5.995776882627979e-05, "rewards/margins": 0.132671520113945, "rewards/rejected": -0.13273146748542786, "step": 4438 }, { "epoch": 3.0698478561549103, "grad_norm": 7.313838958740234, "learning_rate": 3.850084524358383e-05, "log_odds_chosen": 8.235380172729492, "log_odds_ratio": -0.005429758690297604, "logits/chosen": -0.5990132093429565, "logits/rejected": -0.6255252361297607, "logps/chosen": -0.004577214829623699, "logps/rejected": -1.3805031776428223, "loss": 1.5167, "nll_loss": 0.3786201477050781, "rewards/accuracies": 1.0, "rewards/chosen": -0.00045772147132083774, "rewards/margins": 0.1375925987958908, "rewards/rejected": -0.13805033266544342, "step": 4439 }, { "epoch": 3.070539419087137, "grad_norm": 14.503705024719238, "learning_rate": 3.849700322729369e-05, "log_odds_chosen": 9.288135528564453, "log_odds_ratio": -0.0004684936720877886, "logits/chosen": -0.7211368680000305, "logits/rejected": -0.7905447483062744, "logps/chosen": -0.001473725656978786, "logps/rejected": -1.8873465061187744, "loss": 1.8446, "nll_loss": 0.4611014127731323, "rewards/accuracies": 1.0, "rewards/chosen": -0.00014737255696672946, "rewards/margins": 0.1885872781276703, "rewards/rejected": -0.18873465061187744, "step": 4440 }, { "epoch": 3.071230982019364, "grad_norm": 9.53628921508789, "learning_rate": 3.8493161211003537e-05, "log_odds_chosen": 9.065003395080566, "log_odds_ratio": -0.022357532754540443, "logits/chosen": -0.8508001565933228, "logits/rejected": -0.8630539178848267, "logps/chosen": -0.005625884048640728, "logps/rejected": -1.6689910888671875, "loss": 2.0876, "nll_loss": 0.5196753144264221, "rewards/accuracies": 1.0, "rewards/chosen": -0.0005625883350148797, "rewards/margins": 0.1663365215063095, "rewards/rejected": -0.16689911484718323, "step": 4441 }, { "epoch": 3.071922544951591, "grad_norm": 8.680511474609375, "learning_rate": 3.848931919471339e-05, "log_odds_chosen": 8.122732162475586, "log_odds_ratio": -0.0008864381816238165, "logits/chosen": -0.891105055809021, "logits/rejected": -0.9033305048942566, "logps/chosen": -0.005982173141092062, "logps/rejected": -0.9022830724716187, "loss": 1.7817, "nll_loss": 0.44534575939178467, "rewards/accuracies": 1.0, "rewards/chosen": -0.000598217302467674, "rewards/margins": 0.08963009715080261, "rewards/rejected": -0.09022831171751022, "step": 4442 }, { "epoch": 3.0726141078838176, "grad_norm": 10.046014785766602, "learning_rate": 3.848547717842324e-05, "log_odds_chosen": 8.835466384887695, "log_odds_ratio": -0.0003820597776211798, "logits/chosen": -0.8449460864067078, "logits/rejected": -0.8626073598861694, "logps/chosen": -0.0031728330068290234, "logps/rejected": -1.5975279808044434, "loss": 1.8497, "nll_loss": 0.46238836646080017, "rewards/accuracies": 1.0, "rewards/chosen": -0.0003172833239659667, "rewards/margins": 0.15943552553653717, "rewards/rejected": -0.15975281596183777, "step": 4443 }, { "epoch": 3.0733056708160444, "grad_norm": 12.780777931213379, "learning_rate": 3.848163516213309e-05, "log_odds_chosen": 8.433425903320312, "log_odds_ratio": -0.009119795635342598, "logits/chosen": -1.046372890472412, "logits/rejected": -1.0667132139205933, "logps/chosen": -0.008828721009194851, "logps/rejected": -1.5524736642837524, "loss": 2.306, "nll_loss": 0.575589120388031, "rewards/accuracies": 1.0, "rewards/chosen": -0.0008828719728626311, "rewards/margins": 0.15436449646949768, "rewards/rejected": -0.15524739027023315, "step": 4444 }, { "epoch": 3.0739972337482713, "grad_norm": 9.021060943603516, "learning_rate": 3.847779314584294e-05, "log_odds_chosen": 9.643104553222656, "log_odds_ratio": -0.0003110415127594024, "logits/chosen": -0.7924270033836365, "logits/rejected": -0.8385024070739746, "logps/chosen": -0.010611020028591156, "logps/rejected": -2.142685651779175, "loss": 2.2366, "nll_loss": 0.5591127872467041, "rewards/accuracies": 1.0, "rewards/chosen": -0.00106110202614218, "rewards/margins": 0.2132074534893036, "rewards/rejected": -0.21426856517791748, "step": 4445 }, { "epoch": 3.074688796680498, "grad_norm": 13.978005409240723, "learning_rate": 3.847395112955279e-05, "log_odds_chosen": 9.722139358520508, "log_odds_ratio": -0.00034077069722115993, "logits/chosen": -0.8204346895217896, "logits/rejected": -0.9325641393661499, "logps/chosen": -0.0008741967030800879, "logps/rejected": -1.7474991083145142, "loss": 1.6533, "nll_loss": 0.41330283880233765, "rewards/accuracies": 1.0, "rewards/chosen": -8.741967030800879e-05, "rewards/margins": 0.1746625006198883, "rewards/rejected": -0.1747499257326126, "step": 4446 }, { "epoch": 3.075380359612725, "grad_norm": 10.52839469909668, "learning_rate": 3.8470109113262644e-05, "log_odds_chosen": 9.051468849182129, "log_odds_ratio": -0.0009590685949660838, "logits/chosen": -0.9662353992462158, "logits/rejected": -1.0331647396087646, "logps/chosen": -0.0024615302681922913, "logps/rejected": -1.8895854949951172, "loss": 1.9556, "nll_loss": 0.4887927770614624, "rewards/accuracies": 1.0, "rewards/chosen": -0.0002461530384607613, "rewards/margins": 0.18871241807937622, "rewards/rejected": -0.1889585554599762, "step": 4447 }, { "epoch": 3.0760719225449518, "grad_norm": 10.936558723449707, "learning_rate": 3.846626709697249e-05, "log_odds_chosen": 8.980976104736328, "log_odds_ratio": -0.00037278165109455585, "logits/chosen": -0.9659571647644043, "logits/rejected": -1.1706774234771729, "logps/chosen": -0.0006645218818448484, "logps/rejected": -1.483097791671753, "loss": 1.7788, "nll_loss": 0.4446701407432556, "rewards/accuracies": 1.0, "rewards/chosen": -6.645219400525093e-05, "rewards/margins": 0.14824333786964417, "rewards/rejected": -0.14830978214740753, "step": 4448 }, { "epoch": 3.0767634854771786, "grad_norm": 11.437039375305176, "learning_rate": 3.846242508068235e-05, "log_odds_chosen": 8.314053535461426, "log_odds_ratio": -0.13158449530601501, "logits/chosen": -0.728661298751831, "logits/rejected": -0.7856017351150513, "logps/chosen": -0.03292816877365112, "logps/rejected": -1.8274672031402588, "loss": 2.5398, "nll_loss": 0.6218024492263794, "rewards/accuracies": 0.875, "rewards/chosen": -0.0032928166911005974, "rewards/margins": 0.17945389449596405, "rewards/rejected": -0.18274672329425812, "step": 4449 }, { "epoch": 3.0774550484094054, "grad_norm": 9.860296249389648, "learning_rate": 3.8458583064392195e-05, "log_odds_chosen": 8.01667594909668, "log_odds_ratio": -0.07519376277923584, "logits/chosen": -0.6754899621009827, "logits/rejected": -0.6415287256240845, "logps/chosen": -0.014091627672314644, "logps/rejected": -1.4123716354370117, "loss": 1.9168, "nll_loss": 0.4716867208480835, "rewards/accuracies": 1.0, "rewards/chosen": -0.0014091627672314644, "rewards/margins": 0.13982799649238586, "rewards/rejected": -0.14123715460300446, "step": 4450 }, { "epoch": 3.0781466113416323, "grad_norm": 7.970553398132324, "learning_rate": 3.845474104810205e-05, "log_odds_chosen": 8.878427505493164, "log_odds_ratio": -0.0015269446885213256, "logits/chosen": -0.44634032249450684, "logits/rejected": -0.49079009890556335, "logps/chosen": -0.004734280984848738, "logps/rejected": -1.7657830715179443, "loss": 1.4751, "nll_loss": 0.36861249804496765, "rewards/accuracies": 1.0, "rewards/chosen": -0.0004734280810225755, "rewards/margins": 0.17610487341880798, "rewards/rejected": -0.17657829821109772, "step": 4451 }, { "epoch": 3.078838174273859, "grad_norm": 5.924354076385498, "learning_rate": 3.84508990318119e-05, "log_odds_chosen": 8.421602249145508, "log_odds_ratio": -0.06661777198314667, "logits/chosen": -0.6268646717071533, "logits/rejected": -0.7258235812187195, "logps/chosen": -0.012585322372615337, "logps/rejected": -1.2588582038879395, "loss": 2.1821, "nll_loss": 0.5388583540916443, "rewards/accuracies": 1.0, "rewards/chosen": -0.0012585322838276625, "rewards/margins": 0.12462729215621948, "rewards/rejected": -0.12588582932949066, "step": 4452 }, { "epoch": 3.079529737206086, "grad_norm": 9.645779609680176, "learning_rate": 3.8447057015521746e-05, "log_odds_chosen": 9.39462661743164, "log_odds_ratio": -0.03679275140166283, "logits/chosen": -0.5939630270004272, "logits/rejected": -0.644051194190979, "logps/chosen": -0.017827268689870834, "logps/rejected": -2.204650402069092, "loss": 1.4338, "nll_loss": 0.3547765016555786, "rewards/accuracies": 1.0, "rewards/chosen": -0.001782726845704019, "rewards/margins": 0.21868231892585754, "rewards/rejected": -0.2204650640487671, "step": 4453 }, { "epoch": 3.0802213001383127, "grad_norm": 13.677511215209961, "learning_rate": 3.84432149992316e-05, "log_odds_chosen": 9.449896812438965, "log_odds_ratio": -0.0015739205991849303, "logits/chosen": -0.8569554090499878, "logits/rejected": -0.8320474624633789, "logps/chosen": -0.0067398822866380215, "logps/rejected": -2.1724767684936523, "loss": 1.5862, "nll_loss": 0.3963874578475952, "rewards/accuracies": 1.0, "rewards/chosen": -0.0006739882519468665, "rewards/margins": 0.21657368540763855, "rewards/rejected": -0.21724767982959747, "step": 4454 }, { "epoch": 3.0809128630705396, "grad_norm": 11.459304809570312, "learning_rate": 3.843937298294145e-05, "log_odds_chosen": 9.47178840637207, "log_odds_ratio": -0.0006335485959425569, "logits/chosen": -0.6219982504844666, "logits/rejected": -0.6629889011383057, "logps/chosen": -0.0020509539172053337, "logps/rejected": -1.8684937953948975, "loss": 1.8082, "nll_loss": 0.45199406147003174, "rewards/accuracies": 1.0, "rewards/chosen": -0.0002050954062724486, "rewards/margins": 0.1866442710161209, "rewards/rejected": -0.18684938549995422, "step": 4455 }, { "epoch": 3.0816044260027664, "grad_norm": 7.764420986175537, "learning_rate": 3.84355309666513e-05, "log_odds_chosen": 8.673635482788086, "log_odds_ratio": -0.0005076941451989114, "logits/chosen": -0.6043013334274292, "logits/rejected": -0.6264380216598511, "logps/chosen": -0.0006218141061253846, "logps/rejected": -1.4252135753631592, "loss": 2.0007, "nll_loss": 0.5001353025436401, "rewards/accuracies": 1.0, "rewards/chosen": -6.218141061253846e-05, "rewards/margins": 0.1424591839313507, "rewards/rejected": -0.14252136647701263, "step": 4456 }, { "epoch": 3.0822959889349932, "grad_norm": 7.5119733810424805, "learning_rate": 3.843168895036115e-05, "log_odds_chosen": 8.390885353088379, "log_odds_ratio": -0.025730164721608162, "logits/chosen": -0.5082446336746216, "logits/rejected": -0.567543625831604, "logps/chosen": -0.01098723616451025, "logps/rejected": -1.2334426641464233, "loss": 1.687, "nll_loss": 0.4191865026950836, "rewards/accuracies": 1.0, "rewards/chosen": -0.001098723616451025, "rewards/margins": 0.12224555015563965, "rewards/rejected": -0.12334427237510681, "step": 4457 }, { "epoch": 3.08298755186722, "grad_norm": 21.876222610473633, "learning_rate": 3.842784693407101e-05, "log_odds_chosen": 8.640466690063477, "log_odds_ratio": -0.03930068388581276, "logits/chosen": -0.8005126714706421, "logits/rejected": -0.8496750593185425, "logps/chosen": -0.009835846722126007, "logps/rejected": -2.1062915325164795, "loss": 1.8627, "nll_loss": 0.4617469310760498, "rewards/accuracies": 1.0, "rewards/chosen": -0.0009835846722126007, "rewards/margins": 0.2096455693244934, "rewards/rejected": -0.2106291502714157, "step": 4458 }, { "epoch": 3.083679114799447, "grad_norm": 10.259416580200195, "learning_rate": 3.8424004917780853e-05, "log_odds_chosen": 9.032968521118164, "log_odds_ratio": -0.007468566298484802, "logits/chosen": -0.7418262958526611, "logits/rejected": -0.7376289367675781, "logps/chosen": -0.003130620112642646, "logps/rejected": -1.7070213556289673, "loss": 1.2162, "nll_loss": 0.303314745426178, "rewards/accuracies": 1.0, "rewards/chosen": -0.0003130620170850307, "rewards/margins": 0.1703890711069107, "rewards/rejected": -0.17070214450359344, "step": 4459 }, { "epoch": 3.0843706777316737, "grad_norm": 10.962529182434082, "learning_rate": 3.8420162901490706e-05, "log_odds_chosen": 8.482834815979004, "log_odds_ratio": -0.21217072010040283, "logits/chosen": -0.8609898090362549, "logits/rejected": -0.8481860160827637, "logps/chosen": -0.07208161801099777, "logps/rejected": -1.3337786197662354, "loss": 1.6501, "nll_loss": 0.39131245017051697, "rewards/accuracies": 0.875, "rewards/chosen": -0.007208161521703005, "rewards/margins": 0.12616971135139465, "rewards/rejected": -0.13337786495685577, "step": 4460 }, { "epoch": 3.0850622406639006, "grad_norm": 5.700719833374023, "learning_rate": 3.841632088520056e-05, "log_odds_chosen": 9.250066757202148, "log_odds_ratio": -0.00032611002097837627, "logits/chosen": -0.6338516473770142, "logits/rejected": -0.6218562126159668, "logps/chosen": -0.0013254042714834213, "logps/rejected": -1.582740068435669, "loss": 1.8656, "nll_loss": 0.46636563539505005, "rewards/accuracies": 1.0, "rewards/chosen": -0.00013254041550680995, "rewards/margins": 0.15814147889614105, "rewards/rejected": -0.15827400982379913, "step": 4461 }, { "epoch": 3.0857538035961274, "grad_norm": 8.559025764465332, "learning_rate": 3.8412478868910404e-05, "log_odds_chosen": 10.36841106414795, "log_odds_ratio": -0.00013494495942723006, "logits/chosen": -0.857991099357605, "logits/rejected": -0.963939368724823, "logps/chosen": -0.003404158866032958, "logps/rejected": -2.077117681503296, "loss": 1.3211, "nll_loss": 0.3302675485610962, "rewards/accuracies": 1.0, "rewards/chosen": -0.0003404158924240619, "rewards/margins": 0.20737135410308838, "rewards/rejected": -0.20771175622940063, "step": 4462 }, { "epoch": 3.086445366528354, "grad_norm": 8.417356491088867, "learning_rate": 3.8408636852620256e-05, "log_odds_chosen": 9.118749618530273, "log_odds_ratio": -0.012486644089221954, "logits/chosen": -0.5907131433486938, "logits/rejected": -0.62277752161026, "logps/chosen": -0.06458212435245514, "logps/rejected": -2.072675943374634, "loss": 1.0546, "nll_loss": 0.26241254806518555, "rewards/accuracies": 1.0, "rewards/chosen": -0.006458211690187454, "rewards/margins": 0.20080935955047607, "rewards/rejected": -0.2072676122188568, "step": 4463 }, { "epoch": 3.087136929460581, "grad_norm": 12.34536075592041, "learning_rate": 3.840479483633011e-05, "log_odds_chosen": 8.383492469787598, "log_odds_ratio": -0.0008394765318371356, "logits/chosen": -0.6303070783615112, "logits/rejected": -0.6680947542190552, "logps/chosen": -0.0022870246320962906, "logps/rejected": -1.5010545253753662, "loss": 2.1234, "nll_loss": 0.5307590961456299, "rewards/accuracies": 1.0, "rewards/chosen": -0.00022870246903039515, "rewards/margins": 0.14987675845623016, "rewards/rejected": -0.15010544657707214, "step": 4464 }, { "epoch": 3.087828492392808, "grad_norm": 10.20311164855957, "learning_rate": 3.840095282003996e-05, "log_odds_chosen": 9.271467208862305, "log_odds_ratio": -0.0012613222934305668, "logits/chosen": -0.6812804937362671, "logits/rejected": -0.7113140225410461, "logps/chosen": -0.03402145206928253, "logps/rejected": -2.317378282546997, "loss": 2.2311, "nll_loss": 0.5576537251472473, "rewards/accuracies": 1.0, "rewards/chosen": -0.003402145579457283, "rewards/margins": 0.2283356785774231, "rewards/rejected": -0.23173782229423523, "step": 4465 }, { "epoch": 3.0885200553250347, "grad_norm": 7.677236557006836, "learning_rate": 3.839711080374981e-05, "log_odds_chosen": 9.496734619140625, "log_odds_ratio": -0.0010136470664292574, "logits/chosen": -0.6936898827552795, "logits/rejected": -0.7297221422195435, "logps/chosen": -0.0011777316685765982, "logps/rejected": -2.1823971271514893, "loss": 1.4355, "nll_loss": 0.3587798476219177, "rewards/accuracies": 1.0, "rewards/chosen": -0.00011777316831285134, "rewards/margins": 0.21812193095684052, "rewards/rejected": -0.2182397097349167, "step": 4466 }, { "epoch": 3.0892116182572615, "grad_norm": 15.080026626586914, "learning_rate": 3.8393268787459666e-05, "log_odds_chosen": 9.812665939331055, "log_odds_ratio": -0.00014757076860405505, "logits/chosen": -0.3832654654979706, "logits/rejected": -0.47605186700820923, "logps/chosen": -0.0010359041625633836, "logps/rejected": -1.9479248523712158, "loss": 2.1878, "nll_loss": 0.5469351410865784, "rewards/accuracies": 1.0, "rewards/chosen": -0.00010359041334595531, "rewards/margins": 0.19468891620635986, "rewards/rejected": -0.1947924941778183, "step": 4467 }, { "epoch": 3.0899031811894884, "grad_norm": 12.628984451293945, "learning_rate": 3.838942677116951e-05, "log_odds_chosen": 8.88953971862793, "log_odds_ratio": -0.012125710025429726, "logits/chosen": -0.4032934308052063, "logits/rejected": -0.45221227407455444, "logps/chosen": -0.005299792625010014, "logps/rejected": -1.8057115077972412, "loss": 1.4878, "nll_loss": 0.37074652314186096, "rewards/accuracies": 1.0, "rewards/chosen": -0.000529979239217937, "rewards/margins": 0.18004117906093597, "rewards/rejected": -0.18057113885879517, "step": 4468 }, { "epoch": 3.090594744121715, "grad_norm": 9.369607925415039, "learning_rate": 3.8385584754879364e-05, "log_odds_chosen": 9.427680969238281, "log_odds_ratio": -0.0027859038673341274, "logits/chosen": -0.3410671353340149, "logits/rejected": -0.4170883595943451, "logps/chosen": -0.009003382176160812, "logps/rejected": -1.8481632471084595, "loss": 1.8318, "nll_loss": 0.4576743543148041, "rewards/accuracies": 1.0, "rewards/chosen": -0.0009003382292576134, "rewards/margins": 0.18391598761081696, "rewards/rejected": -0.18481633067131042, "step": 4469 }, { "epoch": 3.091286307053942, "grad_norm": 8.817591667175293, "learning_rate": 3.838174273858922e-05, "log_odds_chosen": 8.1690034866333, "log_odds_ratio": -0.008122103288769722, "logits/chosen": -0.32725226879119873, "logits/rejected": -0.3610275983810425, "logps/chosen": -0.010240322910249233, "logps/rejected": -1.8948895931243896, "loss": 1.558, "nll_loss": 0.3886779844760895, "rewards/accuracies": 1.0, "rewards/chosen": -0.0010240323608741164, "rewards/margins": 0.1884649097919464, "rewards/rejected": -0.18948894739151, "step": 4470 }, { "epoch": 3.091977869986169, "grad_norm": 6.53830623626709, "learning_rate": 3.837790072229906e-05, "log_odds_chosen": 8.212554931640625, "log_odds_ratio": -0.18837156891822815, "logits/chosen": -0.4135130047798157, "logits/rejected": -0.4412657916545868, "logps/chosen": -0.04065573215484619, "logps/rejected": -1.0980712175369263, "loss": 1.8802, "nll_loss": 0.45120689272880554, "rewards/accuracies": 0.875, "rewards/chosen": -0.0040655736811459064, "rewards/margins": 0.10574156045913696, "rewards/rejected": -0.10980713367462158, "step": 4471 }, { "epoch": 3.0926694329183957, "grad_norm": 9.242819786071777, "learning_rate": 3.8374058706008915e-05, "log_odds_chosen": 9.362022399902344, "log_odds_ratio": -0.00011695074499584734, "logits/chosen": -0.4138777554035187, "logits/rejected": -0.43331849575042725, "logps/chosen": -0.00035065674455836415, "logps/rejected": -1.322685956954956, "loss": 2.1748, "nll_loss": 0.5436833500862122, "rewards/accuracies": 1.0, "rewards/chosen": -3.506567736621946e-05, "rewards/margins": 0.13223353028297424, "rewards/rejected": -0.13226859271526337, "step": 4472 }, { "epoch": 3.0933609958506225, "grad_norm": 10.725008964538574, "learning_rate": 3.837021668971877e-05, "log_odds_chosen": 9.351563453674316, "log_odds_ratio": -0.0004978245706297457, "logits/chosen": -0.5398173928260803, "logits/rejected": -0.5418287515640259, "logps/chosen": -0.005135357845574617, "logps/rejected": -2.0309886932373047, "loss": 1.4824, "nll_loss": 0.3705606460571289, "rewards/accuracies": 1.0, "rewards/chosen": -0.0005135358078405261, "rewards/margins": 0.2025853395462036, "rewards/rejected": -0.203098863363266, "step": 4473 }, { "epoch": 3.0940525587828493, "grad_norm": 8.506511688232422, "learning_rate": 3.836637467342862e-05, "log_odds_chosen": 6.215714931488037, "log_odds_ratio": -0.2552088499069214, "logits/chosen": -0.8886780738830566, "logits/rejected": -0.8105942010879517, "logps/chosen": -0.043494973331689835, "logps/rejected": -0.7536239624023438, "loss": 2.0884, "nll_loss": 0.4965880513191223, "rewards/accuracies": 0.875, "rewards/chosen": -0.0043494971469044685, "rewards/margins": 0.07101289927959442, "rewards/rejected": -0.07536239176988602, "step": 4474 }, { "epoch": 3.094744121715076, "grad_norm": 9.47619342803955, "learning_rate": 3.8362532657138465e-05, "log_odds_chosen": 8.74432373046875, "log_odds_ratio": -0.00039555650437250733, "logits/chosen": -0.6446212530136108, "logits/rejected": -0.6923754215240479, "logps/chosen": -0.002961081452667713, "logps/rejected": -1.4609273672103882, "loss": 1.93, "nll_loss": 0.4824484586715698, "rewards/accuracies": 1.0, "rewards/chosen": -0.00029610813362523913, "rewards/margins": 0.14579662680625916, "rewards/rejected": -0.1460927426815033, "step": 4475 }, { "epoch": 3.095435684647303, "grad_norm": 13.779592514038086, "learning_rate": 3.8358690640848325e-05, "log_odds_chosen": 9.060077667236328, "log_odds_ratio": -0.00019415131828282028, "logits/chosen": -0.7600535154342651, "logits/rejected": -0.7289277911186218, "logps/chosen": -0.0007039851043373346, "logps/rejected": -1.6375209093093872, "loss": 1.7422, "nll_loss": 0.43554073572158813, "rewards/accuracies": 1.0, "rewards/chosen": -7.039851334411651e-05, "rewards/margins": 0.16368168592453003, "rewards/rejected": -0.16375207901000977, "step": 4476 }, { "epoch": 3.09612724757953, "grad_norm": 10.712538719177246, "learning_rate": 3.835484862455817e-05, "log_odds_chosen": 9.2240571975708, "log_odds_ratio": -0.0012207168620079756, "logits/chosen": -0.7368443012237549, "logits/rejected": -0.7674077153205872, "logps/chosen": -0.007611352019011974, "logps/rejected": -2.200847864151001, "loss": 2.2683, "nll_loss": 0.5669484734535217, "rewards/accuracies": 1.0, "rewards/chosen": -0.0007611351902596653, "rewards/margins": 0.21932365000247955, "rewards/rejected": -0.2200847864151001, "step": 4477 }, { "epoch": 3.0968188105117567, "grad_norm": 6.989372730255127, "learning_rate": 3.835100660826802e-05, "log_odds_chosen": 8.106101989746094, "log_odds_ratio": -0.0028811958618462086, "logits/chosen": -0.8484435677528381, "logits/rejected": -0.8258570432662964, "logps/chosen": -0.038005031645298004, "logps/rejected": -1.863260269165039, "loss": 1.3121, "nll_loss": 0.32773715257644653, "rewards/accuracies": 1.0, "rewards/chosen": -0.0038005029782652855, "rewards/margins": 0.18252553045749664, "rewards/rejected": -0.1863260418176651, "step": 4478 }, { "epoch": 3.0975103734439835, "grad_norm": 12.75827693939209, "learning_rate": 3.8347164591977875e-05, "log_odds_chosen": 8.949575424194336, "log_odds_ratio": -0.0010895613813772798, "logits/chosen": -0.6992684602737427, "logits/rejected": -0.7569445967674255, "logps/chosen": -0.0008068022434599698, "logps/rejected": -1.7668452262878418, "loss": 1.9399, "nll_loss": 0.484869122505188, "rewards/accuracies": 1.0, "rewards/chosen": -8.068021270446479e-05, "rewards/margins": 0.17660385370254517, "rewards/rejected": -0.17668454349040985, "step": 4479 }, { "epoch": 3.0982019363762103, "grad_norm": 11.199858665466309, "learning_rate": 3.834332257568772e-05, "log_odds_chosen": 8.661763191223145, "log_odds_ratio": -0.016961198300123215, "logits/chosen": -0.7467653751373291, "logits/rejected": -0.78775954246521, "logps/chosen": -0.014684153720736504, "logps/rejected": -1.940002679824829, "loss": 2.0541, "nll_loss": 0.511833131313324, "rewards/accuracies": 1.0, "rewards/chosen": -0.0014684153720736504, "rewards/margins": 0.19253185391426086, "rewards/rejected": -0.1940002590417862, "step": 4480 }, { "epoch": 3.098893499308437, "grad_norm": 4.9811110496521, "learning_rate": 3.833948055939757e-05, "log_odds_chosen": 7.296429634094238, "log_odds_ratio": -0.016438350081443787, "logits/chosen": -0.735593318939209, "logits/rejected": -0.7207514643669128, "logps/chosen": -0.011228415183722973, "logps/rejected": -1.2033060789108276, "loss": 2.4165, "nll_loss": 0.6024818420410156, "rewards/accuracies": 1.0, "rewards/chosen": -0.001122841495089233, "rewards/margins": 0.11920776963233948, "rewards/rejected": -0.12033060938119888, "step": 4481 }, { "epoch": 3.099585062240664, "grad_norm": 13.971213340759277, "learning_rate": 3.8335638543107426e-05, "log_odds_chosen": 9.258010864257812, "log_odds_ratio": -0.00527906185016036, "logits/chosen": -0.8993180990219116, "logits/rejected": -0.9418272376060486, "logps/chosen": -0.0038291211239993572, "logps/rejected": -1.8594636917114258, "loss": 2.4635, "nll_loss": 0.6153481602668762, "rewards/accuracies": 1.0, "rewards/chosen": -0.0003829121415037662, "rewards/margins": 0.18556344509124756, "rewards/rejected": -0.18594636023044586, "step": 4482 }, { "epoch": 3.100276625172891, "grad_norm": 8.516523361206055, "learning_rate": 3.833179652681728e-05, "log_odds_chosen": 10.226236343383789, "log_odds_ratio": -0.00016320720897056162, "logits/chosen": -0.9161720275878906, "logits/rejected": -0.917118489742279, "logps/chosen": -0.0006441689911298454, "logps/rejected": -2.2198100090026855, "loss": 1.2774, "nll_loss": 0.31933191418647766, "rewards/accuracies": 1.0, "rewards/chosen": -6.441689765779302e-05, "rewards/margins": 0.22191661596298218, "rewards/rejected": -0.221981018781662, "step": 4483 }, { "epoch": 3.1009681881051177, "grad_norm": 11.72066879272461, "learning_rate": 3.8327954510527124e-05, "log_odds_chosen": 10.348617553710938, "log_odds_ratio": -0.0001743563188938424, "logits/chosen": -0.4661424160003662, "logits/rejected": -0.530230700969696, "logps/chosen": -0.0005677434965036809, "logps/rejected": -2.4082119464874268, "loss": 1.6134, "nll_loss": 0.403322696685791, "rewards/accuracies": 1.0, "rewards/chosen": -5.6774355471134186e-05, "rewards/margins": 0.24076443910598755, "rewards/rejected": -0.2408212125301361, "step": 4484 }, { "epoch": 3.1016597510373445, "grad_norm": 6.819164752960205, "learning_rate": 3.832411249423698e-05, "log_odds_chosen": 8.427994728088379, "log_odds_ratio": -0.0013439118629321456, "logits/chosen": -0.6012922525405884, "logits/rejected": -0.6957547664642334, "logps/chosen": -0.00487975450232625, "logps/rejected": -1.8782953023910522, "loss": 1.712, "nll_loss": 0.42785927653312683, "rewards/accuracies": 1.0, "rewards/chosen": -0.0004879754560533911, "rewards/margins": 0.18734155595302582, "rewards/rejected": -0.18782952427864075, "step": 4485 }, { "epoch": 3.1023513139695713, "grad_norm": 14.561470985412598, "learning_rate": 3.832027047794683e-05, "log_odds_chosen": 10.100393295288086, "log_odds_ratio": -0.00017121568089351058, "logits/chosen": -0.9278507232666016, "logits/rejected": -1.0120364427566528, "logps/chosen": -0.0006230022408999503, "logps/rejected": -2.0958151817321777, "loss": 1.978, "nll_loss": 0.4944764971733093, "rewards/accuracies": 1.0, "rewards/chosen": -6.230021972442046e-05, "rewards/margins": 0.2095191925764084, "rewards/rejected": -0.20958150923252106, "step": 4486 }, { "epoch": 3.103042876901798, "grad_norm": 7.111811637878418, "learning_rate": 3.831642846165668e-05, "log_odds_chosen": 7.821290969848633, "log_odds_ratio": -0.15456566214561462, "logits/chosen": -0.44528767466545105, "logits/rejected": -0.4628318250179291, "logps/chosen": -0.02520749345421791, "logps/rejected": -1.2928862571716309, "loss": 1.7394, "nll_loss": 0.4193989038467407, "rewards/accuracies": 0.875, "rewards/chosen": -0.0025207491125911474, "rewards/margins": 0.12676787376403809, "rewards/rejected": -0.12928862869739532, "step": 4487 }, { "epoch": 3.103734439834025, "grad_norm": 9.723301887512207, "learning_rate": 3.8312586445366534e-05, "log_odds_chosen": 6.991793155670166, "log_odds_ratio": -0.03994767740368843, "logits/chosen": -0.3234378695487976, "logits/rejected": -0.3832213878631592, "logps/chosen": -0.012255754321813583, "logps/rejected": -1.1929444074630737, "loss": 1.8702, "nll_loss": 0.46356457471847534, "rewards/accuracies": 1.0, "rewards/chosen": -0.0012255755718797445, "rewards/margins": 0.1180688664317131, "rewards/rejected": -0.11929444223642349, "step": 4488 }, { "epoch": 3.104426002766252, "grad_norm": 10.895448684692383, "learning_rate": 3.830874442907638e-05, "log_odds_chosen": 10.129297256469727, "log_odds_ratio": -5.873259578947909e-05, "logits/chosen": -0.6546196341514587, "logits/rejected": -0.7104349136352539, "logps/chosen": -0.0002052735653705895, "logps/rejected": -1.7905243635177612, "loss": 1.3839, "nll_loss": 0.34596529603004456, "rewards/accuracies": 1.0, "rewards/chosen": -2.052735726465471e-05, "rewards/margins": 0.17903190851211548, "rewards/rejected": -0.1790524423122406, "step": 4489 }, { "epoch": 3.1051175656984786, "grad_norm": 6.577591896057129, "learning_rate": 3.830490241278623e-05, "log_odds_chosen": 8.690784454345703, "log_odds_ratio": -0.00047942117089405656, "logits/chosen": -0.844971776008606, "logits/rejected": -0.897977888584137, "logps/chosen": -0.0027525126934051514, "logps/rejected": -1.9402804374694824, "loss": 1.9784, "nll_loss": 0.4945591688156128, "rewards/accuracies": 1.0, "rewards/chosen": -0.00027525125187821686, "rewards/margins": 0.19375279545783997, "rewards/rejected": -0.19402804970741272, "step": 4490 }, { "epoch": 3.1058091286307055, "grad_norm": 13.503525733947754, "learning_rate": 3.8301060396496084e-05, "log_odds_chosen": 9.159711837768555, "log_odds_ratio": -0.0036358933430165052, "logits/chosen": -0.22813749313354492, "logits/rejected": -0.34502047300338745, "logps/chosen": -0.0027327819261699915, "logps/rejected": -1.9411729574203491, "loss": 1.6807, "nll_loss": 0.41981786489486694, "rewards/accuracies": 1.0, "rewards/chosen": -0.00027327818679623306, "rewards/margins": 0.1938440203666687, "rewards/rejected": -0.19411730766296387, "step": 4491 }, { "epoch": 3.1065006915629323, "grad_norm": 8.305024147033691, "learning_rate": 3.829721838020594e-05, "log_odds_chosen": 8.52673053741455, "log_odds_ratio": -0.002459357026964426, "logits/chosen": -0.7672133445739746, "logits/rejected": -0.7956629395484924, "logps/chosen": -0.001488923910073936, "logps/rejected": -1.4330894947052002, "loss": 1.3449, "nll_loss": 0.33596938848495483, "rewards/accuracies": 1.0, "rewards/chosen": -0.00014889237354509532, "rewards/margins": 0.14316006004810333, "rewards/rejected": -0.14330896735191345, "step": 4492 }, { "epoch": 3.107192254495159, "grad_norm": 5.7519850730896, "learning_rate": 3.829337636391578e-05, "log_odds_chosen": 9.599166870117188, "log_odds_ratio": -0.00018097971042152494, "logits/chosen": -0.658562183380127, "logits/rejected": -0.6959885358810425, "logps/chosen": -0.0007571708410978317, "logps/rejected": -1.9449284076690674, "loss": 2.0916, "nll_loss": 0.5228797197341919, "rewards/accuracies": 1.0, "rewards/chosen": -7.57170855649747e-05, "rewards/margins": 0.19441711902618408, "rewards/rejected": -0.19449283182621002, "step": 4493 }, { "epoch": 3.107883817427386, "grad_norm": 8.070000648498535, "learning_rate": 3.828953434762564e-05, "log_odds_chosen": 7.616555690765381, "log_odds_ratio": -0.029507823288440704, "logits/chosen": -0.8429253101348877, "logits/rejected": -0.7952168583869934, "logps/chosen": -0.00954954419285059, "logps/rejected": -1.3259698152542114, "loss": 1.6679, "nll_loss": 0.41403070092201233, "rewards/accuracies": 1.0, "rewards/chosen": -0.0009549543610773981, "rewards/margins": 0.13164202868938446, "rewards/rejected": -0.13259698450565338, "step": 4494 }, { "epoch": 3.108575380359613, "grad_norm": 9.600811004638672, "learning_rate": 3.828569233133549e-05, "log_odds_chosen": 6.310328483581543, "log_odds_ratio": -0.22290322184562683, "logits/chosen": -0.32344478368759155, "logits/rejected": -0.44014453887939453, "logps/chosen": -0.04343204200267792, "logps/rejected": -1.4269005060195923, "loss": 1.9721, "nll_loss": 0.47072654962539673, "rewards/accuracies": 0.875, "rewards/chosen": -0.004343204665929079, "rewards/margins": 0.1383468508720398, "rewards/rejected": -0.14269006252288818, "step": 4495 }, { "epoch": 3.1092669432918396, "grad_norm": 8.956048011779785, "learning_rate": 3.828185031504534e-05, "log_odds_chosen": 8.941858291625977, "log_odds_ratio": -0.0007558095967397094, "logits/chosen": -0.4833359122276306, "logits/rejected": -0.5360693335533142, "logps/chosen": -0.0011183847673237324, "logps/rejected": -1.6190630197525024, "loss": 2.241, "nll_loss": 0.5601741075515747, "rewards/accuracies": 1.0, "rewards/chosen": -0.00011183848255313933, "rewards/margins": 0.16179445385932922, "rewards/rejected": -0.16190630197525024, "step": 4496 }, { "epoch": 3.1099585062240664, "grad_norm": 10.014410972595215, "learning_rate": 3.8278008298755185e-05, "log_odds_chosen": 8.638592720031738, "log_odds_ratio": -0.013766951858997345, "logits/chosen": -0.6217601895332336, "logits/rejected": -0.6732291579246521, "logps/chosen": -0.06834837794303894, "logps/rejected": -1.6543635129928589, "loss": 1.7909, "nll_loss": 0.4463382065296173, "rewards/accuracies": 1.0, "rewards/chosen": -0.006834837608039379, "rewards/margins": 0.1586015224456787, "rewards/rejected": -0.16543635725975037, "step": 4497 }, { "epoch": 3.1106500691562933, "grad_norm": 10.994170188903809, "learning_rate": 3.827416628246504e-05, "log_odds_chosen": 8.087845802307129, "log_odds_ratio": -0.020464470610022545, "logits/chosen": -0.4601096510887146, "logits/rejected": -0.5213154554367065, "logps/chosen": -0.00844386126846075, "logps/rejected": -1.5365309715270996, "loss": 1.4808, "nll_loss": 0.3681448698043823, "rewards/accuracies": 1.0, "rewards/chosen": -0.0008443861734122038, "rewards/margins": 0.15280871093273163, "rewards/rejected": -0.1536531001329422, "step": 4498 }, { "epoch": 3.11134163208852, "grad_norm": 11.073749542236328, "learning_rate": 3.827032426617489e-05, "log_odds_chosen": 9.555994033813477, "log_odds_ratio": -0.00013254325313027948, "logits/chosen": -0.42572540044784546, "logits/rejected": -0.4704053997993469, "logps/chosen": -0.0004056716861668974, "logps/rejected": -1.6349362134933472, "loss": 2.0059, "nll_loss": 0.5014705061912537, "rewards/accuracies": 1.0, "rewards/chosen": -4.056716716149822e-05, "rewards/margins": 0.16345307230949402, "rewards/rejected": -0.16349363327026367, "step": 4499 }, { "epoch": 3.112033195020747, "grad_norm": 9.384265899658203, "learning_rate": 3.8266482249884736e-05, "log_odds_chosen": 8.335820198059082, "log_odds_ratio": -0.0012116122525185347, "logits/chosen": -0.46154022216796875, "logits/rejected": -0.5181608200073242, "logps/chosen": -0.0015864280285313725, "logps/rejected": -1.568237066268921, "loss": 1.5026, "nll_loss": 0.3755166828632355, "rewards/accuracies": 1.0, "rewards/chosen": -0.00015864279703237116, "rewards/margins": 0.15666505694389343, "rewards/rejected": -0.15682370960712433, "step": 4500 }, { "epoch": 3.1127247579529738, "grad_norm": 12.012398719787598, "learning_rate": 3.8262640233594595e-05, "log_odds_chosen": 10.375574111938477, "log_odds_ratio": -0.004499649163335562, "logits/chosen": -0.9000028371810913, "logits/rejected": -0.9860438108444214, "logps/chosen": -0.002155824564397335, "logps/rejected": -2.352019786834717, "loss": 2.0993, "nll_loss": 0.5243684649467468, "rewards/accuracies": 1.0, "rewards/chosen": -0.0002155824622604996, "rewards/margins": 0.23498639464378357, "rewards/rejected": -0.23520199954509735, "step": 4501 }, { "epoch": 3.1134163208852006, "grad_norm": 12.486892700195312, "learning_rate": 3.825879821730444e-05, "log_odds_chosen": 8.377776145935059, "log_odds_ratio": -0.0027169152162969112, "logits/chosen": -0.4547843337059021, "logits/rejected": -0.5709913372993469, "logps/chosen": -0.001894856453873217, "logps/rejected": -1.635668396949768, "loss": 1.9407, "nll_loss": 0.4848959445953369, "rewards/accuracies": 1.0, "rewards/chosen": -0.00018948563956655562, "rewards/margins": 0.16337734460830688, "rewards/rejected": -0.16356684267520905, "step": 4502 }, { "epoch": 3.1141078838174274, "grad_norm": 12.442771911621094, "learning_rate": 3.825495620101429e-05, "log_odds_chosen": 8.797966003417969, "log_odds_ratio": -0.0005118194967508316, "logits/chosen": -0.7598543167114258, "logits/rejected": -0.8394927382469177, "logps/chosen": -0.0020327758975327015, "logps/rejected": -1.7451748847961426, "loss": 1.5557, "nll_loss": 0.388874888420105, "rewards/accuracies": 1.0, "rewards/chosen": -0.000203277581022121, "rewards/margins": 0.1743142157793045, "rewards/rejected": -0.17451749742031097, "step": 4503 }, { "epoch": 3.1147994467496543, "grad_norm": 10.25559139251709, "learning_rate": 3.8251114184724146e-05, "log_odds_chosen": 9.953062057495117, "log_odds_ratio": -0.00027120395679958165, "logits/chosen": -0.7184591889381409, "logits/rejected": -0.8281209468841553, "logps/chosen": -0.000444697099737823, "logps/rejected": -1.6925357580184937, "loss": 1.6686, "nll_loss": 0.4171278774738312, "rewards/accuracies": 1.0, "rewards/chosen": -4.446971433935687e-05, "rewards/margins": 0.16920912265777588, "rewards/rejected": -0.16925358772277832, "step": 4504 }, { "epoch": 3.115491009681881, "grad_norm": 17.230388641357422, "learning_rate": 3.8247272168434e-05, "log_odds_chosen": 9.531290054321289, "log_odds_ratio": -0.00029410183196887374, "logits/chosen": -0.9135196208953857, "logits/rejected": -1.0209636688232422, "logps/chosen": -0.0008348989649675786, "logps/rejected": -1.95163893699646, "loss": 1.6661, "nll_loss": 0.4165038764476776, "rewards/accuracies": 1.0, "rewards/chosen": -8.34899110486731e-05, "rewards/margins": 0.19508041441440582, "rewards/rejected": -0.19516390562057495, "step": 4505 }, { "epoch": 3.116182572614108, "grad_norm": 11.494372367858887, "learning_rate": 3.8243430152143844e-05, "log_odds_chosen": 7.4096455574035645, "log_odds_ratio": -0.1444232016801834, "logits/chosen": -0.5988174676895142, "logits/rejected": -0.6123107671737671, "logps/chosen": -0.039420872926712036, "logps/rejected": -1.9260753393173218, "loss": 1.6472, "nll_loss": 0.3973577618598938, "rewards/accuracies": 0.875, "rewards/chosen": -0.003942087292671204, "rewards/margins": 0.1886654496192932, "rewards/rejected": -0.19260753691196442, "step": 4506 }, { "epoch": 3.1168741355463347, "grad_norm": 4.810232639312744, "learning_rate": 3.8239588135853696e-05, "log_odds_chosen": 7.191157341003418, "log_odds_ratio": -0.08262602239847183, "logits/chosen": -0.46612024307250977, "logits/rejected": -0.5304436683654785, "logps/chosen": -0.041873492300510406, "logps/rejected": -1.7846962213516235, "loss": 1.7705, "nll_loss": 0.43437063694000244, "rewards/accuracies": 1.0, "rewards/chosen": -0.004187349695712328, "rewards/margins": 0.1742822825908661, "rewards/rejected": -0.17846962809562683, "step": 4507 }, { "epoch": 3.1175656984785616, "grad_norm": 7.528070449829102, "learning_rate": 3.823574611956355e-05, "log_odds_chosen": 7.74015998840332, "log_odds_ratio": -0.1015208289027214, "logits/chosen": -0.9653025269508362, "logits/rejected": -1.017502784729004, "logps/chosen": -0.017939701676368713, "logps/rejected": -1.6744894981384277, "loss": 1.7263, "nll_loss": 0.42141109704971313, "rewards/accuracies": 0.875, "rewards/chosen": -0.0017939701210707426, "rewards/margins": 0.1656549721956253, "rewards/rejected": -0.16744893789291382, "step": 4508 }, { "epoch": 3.1182572614107884, "grad_norm": 8.28232192993164, "learning_rate": 3.8231904103273394e-05, "log_odds_chosen": 9.37982177734375, "log_odds_ratio": -0.0001628204045118764, "logits/chosen": -0.7197470664978027, "logits/rejected": -0.7083673477172852, "logps/chosen": -0.0006745536811649799, "logps/rejected": -1.5198719501495361, "loss": 1.8457, "nll_loss": 0.46140754222869873, "rewards/accuracies": 1.0, "rewards/chosen": -6.745537393726408e-05, "rewards/margins": 0.15191973745822906, "rewards/rejected": -0.15198718011379242, "step": 4509 }, { "epoch": 3.1189488243430152, "grad_norm": 10.852813720703125, "learning_rate": 3.8228062086983253e-05, "log_odds_chosen": 9.736101150512695, "log_odds_ratio": -0.00016522295481991023, "logits/chosen": -0.613042414188385, "logits/rejected": -0.5795801877975464, "logps/chosen": -0.010511064901947975, "logps/rejected": -2.089296817779541, "loss": 1.7505, "nll_loss": 0.43759751319885254, "rewards/accuracies": 1.0, "rewards/chosen": -0.0010511064901947975, "rewards/margins": 0.20787858963012695, "rewards/rejected": -0.20892968773841858, "step": 4510 }, { "epoch": 3.119640387275242, "grad_norm": 9.711675643920898, "learning_rate": 3.82242200706931e-05, "log_odds_chosen": 10.058385848999023, "log_odds_ratio": -7.867505337344483e-05, "logits/chosen": -0.710852324962616, "logits/rejected": -0.7429115176200867, "logps/chosen": -0.00019204053387511522, "logps/rejected": -1.4844186305999756, "loss": 1.7442, "nll_loss": 0.43605199456214905, "rewards/accuracies": 1.0, "rewards/chosen": -1.920405338751152e-05, "rewards/margins": 0.14842267334461212, "rewards/rejected": -0.1484418660402298, "step": 4511 }, { "epoch": 3.120331950207469, "grad_norm": 7.414134502410889, "learning_rate": 3.822037805440295e-05, "log_odds_chosen": 9.799873352050781, "log_odds_ratio": -0.00024242886865977198, "logits/chosen": -0.7370198369026184, "logits/rejected": -0.7367205023765564, "logps/chosen": -0.00038399602635763586, "logps/rejected": -1.648240566253662, "loss": 1.9762, "nll_loss": 0.49401822686195374, "rewards/accuracies": 1.0, "rewards/chosen": -3.839960481855087e-05, "rewards/margins": 0.16478565335273743, "rewards/rejected": -0.16482405364513397, "step": 4512 }, { "epoch": 3.1210235131396957, "grad_norm": 8.614060401916504, "learning_rate": 3.8216536038112804e-05, "log_odds_chosen": 7.210538387298584, "log_odds_ratio": -0.010765092447400093, "logits/chosen": -0.8183335661888123, "logits/rejected": -0.8469040989875793, "logps/chosen": -0.013249891810119152, "logps/rejected": -1.218770146369934, "loss": 2.2016, "nll_loss": 0.5493332147598267, "rewards/accuracies": 1.0, "rewards/chosen": -0.0013249891344457865, "rewards/margins": 0.12055202573537827, "rewards/rejected": -0.12187701463699341, "step": 4513 }, { "epoch": 3.1217150760719226, "grad_norm": 14.239781379699707, "learning_rate": 3.8212694021822656e-05, "log_odds_chosen": 8.896451950073242, "log_odds_ratio": -0.015659527853131294, "logits/chosen": -0.49167075753211975, "logits/rejected": -0.549437403678894, "logps/chosen": -0.005587196443229914, "logps/rejected": -1.6201717853546143, "loss": 1.9809, "nll_loss": 0.49366411566734314, "rewards/accuracies": 1.0, "rewards/chosen": -0.0005587196792475879, "rewards/margins": 0.16145846247673035, "rewards/rejected": -0.16201716661453247, "step": 4514 }, { "epoch": 3.1224066390041494, "grad_norm": 8.278278350830078, "learning_rate": 3.82088520055325e-05, "log_odds_chosen": 8.167107582092285, "log_odds_ratio": -0.0011531723430380225, "logits/chosen": -0.5432494282722473, "logits/rejected": -0.5762361884117126, "logps/chosen": -0.005155651364475489, "logps/rejected": -1.4995512962341309, "loss": 2.03, "nll_loss": 0.5073837637901306, "rewards/accuracies": 1.0, "rewards/chosen": -0.0005155651015229523, "rewards/margins": 0.14943957328796387, "rewards/rejected": -0.1499551236629486, "step": 4515 }, { "epoch": 3.123098201936376, "grad_norm": 9.371342658996582, "learning_rate": 3.8205009989242355e-05, "log_odds_chosen": 9.502702713012695, "log_odds_ratio": -0.0005149688804522157, "logits/chosen": -0.6440606117248535, "logits/rejected": -0.8232892155647278, "logps/chosen": -0.004486470948904753, "logps/rejected": -1.9728906154632568, "loss": 2.2974, "nll_loss": 0.5743060111999512, "rewards/accuracies": 1.0, "rewards/chosen": -0.0004486471298150718, "rewards/margins": 0.19684040546417236, "rewards/rejected": -0.19728906452655792, "step": 4516 }, { "epoch": 3.123789764868603, "grad_norm": 19.65491485595703, "learning_rate": 3.820116797295221e-05, "log_odds_chosen": 8.875723838806152, "log_odds_ratio": -0.009844149462878704, "logits/chosen": -0.3485298156738281, "logits/rejected": -0.44744396209716797, "logps/chosen": -0.034259259700775146, "logps/rejected": -1.8825244903564453, "loss": 1.6779, "nll_loss": 0.4184797406196594, "rewards/accuracies": 1.0, "rewards/chosen": -0.0034259259700775146, "rewards/margins": 0.18482650816440582, "rewards/rejected": -0.18825244903564453, "step": 4517 }, { "epoch": 3.12448132780083, "grad_norm": 13.161083221435547, "learning_rate": 3.819732595666205e-05, "log_odds_chosen": 8.252479553222656, "log_odds_ratio": -0.004612181335687637, "logits/chosen": -0.711460292339325, "logits/rejected": -0.7563948631286621, "logps/chosen": -0.006149608641862869, "logps/rejected": -1.7720332145690918, "loss": 2.0579, "nll_loss": 0.514003574848175, "rewards/accuracies": 1.0, "rewards/chosen": -0.0006149609107524157, "rewards/margins": 0.17658837139606476, "rewards/rejected": -0.17720332741737366, "step": 4518 }, { "epoch": 3.1251728907330567, "grad_norm": 9.831012725830078, "learning_rate": 3.819348394037191e-05, "log_odds_chosen": 8.261101722717285, "log_odds_ratio": -0.1013021394610405, "logits/chosen": -0.40049105882644653, "logits/rejected": -0.4902288317680359, "logps/chosen": -0.016606254503130913, "logps/rejected": -1.3624508380889893, "loss": 1.502, "nll_loss": 0.3653719425201416, "rewards/accuracies": 0.875, "rewards/chosen": -0.0016606254503130913, "rewards/margins": 0.1345844566822052, "rewards/rejected": -0.13624508678913116, "step": 4519 }, { "epoch": 3.1258644536652835, "grad_norm": 7.515540599822998, "learning_rate": 3.818964192408176e-05, "log_odds_chosen": 8.36376667022705, "log_odds_ratio": -0.004170221742242575, "logits/chosen": -0.2855423390865326, "logits/rejected": -0.3653988838195801, "logps/chosen": -0.027606811374425888, "logps/rejected": -2.2836270332336426, "loss": 1.6224, "nll_loss": 0.40518805384635925, "rewards/accuracies": 1.0, "rewards/chosen": -0.0027606813237071037, "rewards/margins": 0.22560201585292816, "rewards/rejected": -0.22836267948150635, "step": 4520 }, { "epoch": 3.1265560165975104, "grad_norm": 6.657315731048584, "learning_rate": 3.818579990779161e-05, "log_odds_chosen": 7.705994606018066, "log_odds_ratio": -0.022902924567461014, "logits/chosen": -0.4771485924720764, "logits/rejected": -0.49368542432785034, "logps/chosen": -0.007439715787768364, "logps/rejected": -1.3107768297195435, "loss": 1.8003, "nll_loss": 0.4477929472923279, "rewards/accuracies": 1.0, "rewards/chosen": -0.0007439716137014329, "rewards/margins": 0.13033372163772583, "rewards/rejected": -0.13107769191265106, "step": 4521 }, { "epoch": 3.127247579529737, "grad_norm": 11.238456726074219, "learning_rate": 3.818195789150146e-05, "log_odds_chosen": 9.492000579833984, "log_odds_ratio": -0.001921428251080215, "logits/chosen": -0.7598745822906494, "logits/rejected": -0.7640295028686523, "logps/chosen": -0.013556775636970997, "logps/rejected": -2.340526819229126, "loss": 2.0358, "nll_loss": 0.5087577104568481, "rewards/accuracies": 1.0, "rewards/chosen": -0.0013556774938479066, "rewards/margins": 0.2326970100402832, "rewards/rejected": -0.23405268788337708, "step": 4522 }, { "epoch": 3.127939142461964, "grad_norm": 10.358023643493652, "learning_rate": 3.8178115875211315e-05, "log_odds_chosen": 9.809257507324219, "log_odds_ratio": -8.809195423964411e-05, "logits/chosen": -0.3360787034034729, "logits/rejected": -0.3992619216442108, "logps/chosen": -0.00785091519355774, "logps/rejected": -2.415611743927002, "loss": 1.505, "nll_loss": 0.3762507736682892, "rewards/accuracies": 1.0, "rewards/chosen": -0.0007850916008464992, "rewards/margins": 0.24077607691287994, "rewards/rejected": -0.241561159491539, "step": 4523 }, { "epoch": 3.128630705394191, "grad_norm": 7.600725173950195, "learning_rate": 3.817427385892116e-05, "log_odds_chosen": 8.749622344970703, "log_odds_ratio": -0.027113988995552063, "logits/chosen": -0.5375210642814636, "logits/rejected": -0.5235632658004761, "logps/chosen": -0.007372237276285887, "logps/rejected": -1.1214462518692017, "loss": 1.6342, "nll_loss": 0.40584635734558105, "rewards/accuracies": 1.0, "rewards/chosen": -0.0007372237741947174, "rewards/margins": 0.11140740662813187, "rewards/rejected": -0.11214463412761688, "step": 4524 }, { "epoch": 3.1293222683264177, "grad_norm": 8.687854766845703, "learning_rate": 3.817043184263101e-05, "log_odds_chosen": 8.751347541809082, "log_odds_ratio": -0.001098288339562714, "logits/chosen": -0.6035765409469604, "logits/rejected": -0.6965278387069702, "logps/chosen": -0.1082809567451477, "logps/rejected": -2.047489881515503, "loss": 1.6846, "nll_loss": 0.42103543877601624, "rewards/accuracies": 1.0, "rewards/chosen": -0.010828095488250256, "rewards/margins": 0.19392091035842896, "rewards/rejected": -0.2047489881515503, "step": 4525 }, { "epoch": 3.1300138312586445, "grad_norm": 11.338460922241211, "learning_rate": 3.8166589826340865e-05, "log_odds_chosen": 9.703506469726562, "log_odds_ratio": -0.0005722501664422452, "logits/chosen": -0.792091429233551, "logits/rejected": -0.8154028654098511, "logps/chosen": -0.0005980893620289862, "logps/rejected": -1.6763802766799927, "loss": 1.62, "nll_loss": 0.40493831038475037, "rewards/accuracies": 1.0, "rewards/chosen": -5.980893911328167e-05, "rewards/margins": 0.16757820546627045, "rewards/rejected": -0.16763801872730255, "step": 4526 }, { "epoch": 3.1307053941908713, "grad_norm": 11.097390174865723, "learning_rate": 3.816274781005071e-05, "log_odds_chosen": 10.066173553466797, "log_odds_ratio": -0.00018428280600346625, "logits/chosen": -1.0647556781768799, "logits/rejected": -1.1590054035186768, "logps/chosen": -0.000478035188280046, "logps/rejected": -2.538705825805664, "loss": 1.7628, "nll_loss": 0.44069257378578186, "rewards/accuracies": 1.0, "rewards/chosen": -4.780351810040884e-05, "rewards/margins": 0.2538227438926697, "rewards/rejected": -0.25387057662010193, "step": 4527 }, { "epoch": 3.131396957123098, "grad_norm": 9.307280540466309, "learning_rate": 3.815890579376057e-05, "log_odds_chosen": 9.103038787841797, "log_odds_ratio": -0.001420386484824121, "logits/chosen": -0.7872853875160217, "logits/rejected": -0.8193323612213135, "logps/chosen": -0.0031216240022331476, "logps/rejected": -2.2407970428466797, "loss": 2.1735, "nll_loss": 0.5432285070419312, "rewards/accuracies": 1.0, "rewards/chosen": -0.00031216241768561304, "rewards/margins": 0.22376754879951477, "rewards/rejected": -0.2240796983242035, "step": 4528 }, { "epoch": 3.132088520055325, "grad_norm": 7.521166801452637, "learning_rate": 3.8155063777470416e-05, "log_odds_chosen": 9.6439208984375, "log_odds_ratio": -0.00010717587429098785, "logits/chosen": -0.4613168239593506, "logits/rejected": -0.5104846954345703, "logps/chosen": -0.0003044250188395381, "logps/rejected": -1.3650177717208862, "loss": 1.3345, "nll_loss": 0.3336198329925537, "rewards/accuracies": 1.0, "rewards/chosen": -3.044250115635805e-05, "rewards/margins": 0.13647134602069855, "rewards/rejected": -0.13650178909301758, "step": 4529 }, { "epoch": 3.132780082987552, "grad_norm": 8.576324462890625, "learning_rate": 3.815122176118027e-05, "log_odds_chosen": 9.355891227722168, "log_odds_ratio": -0.02535889483988285, "logits/chosen": -0.7813079953193665, "logits/rejected": -0.7906760573387146, "logps/chosen": -0.007265019230544567, "logps/rejected": -1.7882845401763916, "loss": 1.091, "nll_loss": 0.27020618319511414, "rewards/accuracies": 1.0, "rewards/chosen": -0.0007265019230544567, "rewards/margins": 0.17810194194316864, "rewards/rejected": -0.17882846295833588, "step": 4530 }, { "epoch": 3.1334716459197787, "grad_norm": 9.892020225524902, "learning_rate": 3.814737974489012e-05, "log_odds_chosen": 10.09764289855957, "log_odds_ratio": -0.00012242019874975085, "logits/chosen": -0.5264326333999634, "logits/rejected": -0.5695961117744446, "logps/chosen": -0.00032916830969043076, "logps/rejected": -1.9653332233428955, "loss": 1.6326, "nll_loss": 0.4081262946128845, "rewards/accuracies": 1.0, "rewards/chosen": -3.29168324242346e-05, "rewards/margins": 0.19650039076805115, "rewards/rejected": -0.19653332233428955, "step": 4531 }, { "epoch": 3.1341632088520055, "grad_norm": 14.981584548950195, "learning_rate": 3.814353772859997e-05, "log_odds_chosen": 10.171943664550781, "log_odds_ratio": -4.946034459862858e-05, "logits/chosen": -0.9239128828048706, "logits/rejected": -0.9973942637443542, "logps/chosen": -0.0003368504694662988, "logps/rejected": -1.8945417404174805, "loss": 2.7217, "nll_loss": 0.6804137229919434, "rewards/accuracies": 1.0, "rewards/chosen": -3.368504621903412e-05, "rewards/margins": 0.18942049145698547, "rewards/rejected": -0.18945418298244476, "step": 4532 }, { "epoch": 3.1348547717842323, "grad_norm": 10.596940994262695, "learning_rate": 3.813969571230982e-05, "log_odds_chosen": 10.182984352111816, "log_odds_ratio": -9.355310612590984e-05, "logits/chosen": -0.8709641695022583, "logits/rejected": -0.9588356018066406, "logps/chosen": -0.000607682392001152, "logps/rejected": -2.1859307289123535, "loss": 1.4606, "nll_loss": 0.3651290535926819, "rewards/accuracies": 1.0, "rewards/chosen": -6.076823774492368e-05, "rewards/margins": 0.21853229403495789, "rewards/rejected": -0.2185930758714676, "step": 4533 }, { "epoch": 3.135546334716459, "grad_norm": 10.857816696166992, "learning_rate": 3.813585369601967e-05, "log_odds_chosen": 7.239170074462891, "log_odds_ratio": -0.014962945133447647, "logits/chosen": -0.5337001085281372, "logits/rejected": -0.5952611565589905, "logps/chosen": -0.02933443710207939, "logps/rejected": -1.7761834859848022, "loss": 1.7248, "nll_loss": 0.42969420552253723, "rewards/accuracies": 1.0, "rewards/chosen": -0.0029334432911127806, "rewards/margins": 0.17468491196632385, "rewards/rejected": -0.1776183545589447, "step": 4534 }, { "epoch": 3.136237897648686, "grad_norm": 4.286896705627441, "learning_rate": 3.8132011679729524e-05, "log_odds_chosen": 7.423393249511719, "log_odds_ratio": -0.02035851590335369, "logits/chosen": -0.6046768426895142, "logits/rejected": -0.5994390845298767, "logps/chosen": -0.034431342035532, "logps/rejected": -1.7557482719421387, "loss": 1.6542, "nll_loss": 0.4115162491798401, "rewards/accuracies": 1.0, "rewards/chosen": -0.0034431342501193285, "rewards/margins": 0.17213168740272522, "rewards/rejected": -0.17557483911514282, "step": 4535 }, { "epoch": 3.136929460580913, "grad_norm": 10.18018627166748, "learning_rate": 3.812816966343937e-05, "log_odds_chosen": 8.911344528198242, "log_odds_ratio": -0.00033108796924352646, "logits/chosen": -0.779600977897644, "logits/rejected": -0.7951276898384094, "logps/chosen": -0.0005056941881775856, "logps/rejected": -1.4306249618530273, "loss": 2.17, "nll_loss": 0.5424723625183105, "rewards/accuracies": 1.0, "rewards/chosen": -5.056941881775856e-05, "rewards/margins": 0.1430119276046753, "rewards/rejected": -0.1430625021457672, "step": 4536 }, { "epoch": 3.1376210235131397, "grad_norm": 12.990944862365723, "learning_rate": 3.812432764714923e-05, "log_odds_chosen": 10.054588317871094, "log_odds_ratio": -7.011681009316817e-05, "logits/chosen": -0.7560017108917236, "logits/rejected": -0.8109539747238159, "logps/chosen": -0.0004920088686048985, "logps/rejected": -1.7367125749588013, "loss": 1.9389, "nll_loss": 0.48472166061401367, "rewards/accuracies": 1.0, "rewards/chosen": -4.9200891226064414e-05, "rewards/margins": 0.17362205684185028, "rewards/rejected": -0.17367127537727356, "step": 4537 }, { "epoch": 3.1383125864453665, "grad_norm": 6.141697406768799, "learning_rate": 3.8120485630859074e-05, "log_odds_chosen": 10.290353775024414, "log_odds_ratio": -6.922272586962208e-05, "logits/chosen": -0.2762095630168915, "logits/rejected": -0.3313080370426178, "logps/chosen": -0.000490238016936928, "logps/rejected": -2.147183895111084, "loss": 2.0987, "nll_loss": 0.5246639251708984, "rewards/accuracies": 1.0, "rewards/chosen": -4.902379441773519e-05, "rewards/margins": 0.2146693915128708, "rewards/rejected": -0.21471840143203735, "step": 4538 }, { "epoch": 3.1390041493775933, "grad_norm": 10.24838638305664, "learning_rate": 3.811664361456893e-05, "log_odds_chosen": 7.072497844696045, "log_odds_ratio": -0.2984233796596527, "logits/chosen": -0.2776835262775421, "logits/rejected": -0.3410561680793762, "logps/chosen": -0.04763030633330345, "logps/rejected": -1.6126518249511719, "loss": 1.9583, "nll_loss": 0.45972296595573425, "rewards/accuracies": 0.875, "rewards/chosen": -0.00476303044706583, "rewards/margins": 0.1565021276473999, "rewards/rejected": -0.16126517951488495, "step": 4539 }, { "epoch": 3.13969571230982, "grad_norm": 16.48712158203125, "learning_rate": 3.811280159827878e-05, "log_odds_chosen": 8.227566719055176, "log_odds_ratio": -0.04556776210665703, "logits/chosen": -0.9189306497573853, "logits/rejected": -0.9514791965484619, "logps/chosen": -0.03799891471862793, "logps/rejected": -1.8318504095077515, "loss": 2.0979, "nll_loss": 0.5199169516563416, "rewards/accuracies": 1.0, "rewards/chosen": -0.0037998920306563377, "rewards/margins": 0.17938515543937683, "rewards/rejected": -0.18318504095077515, "step": 4540 }, { "epoch": 3.140387275242047, "grad_norm": 9.07288646697998, "learning_rate": 3.810895958198863e-05, "log_odds_chosen": 9.004685401916504, "log_odds_ratio": -0.06647829711437225, "logits/chosen": -0.6648062467575073, "logits/rejected": -0.6944547891616821, "logps/chosen": -0.012586474418640137, "logps/rejected": -1.3501288890838623, "loss": 2.3062, "nll_loss": 0.5699008107185364, "rewards/accuracies": 1.0, "rewards/chosen": -0.0012586475349962711, "rewards/margins": 0.13375423848628998, "rewards/rejected": -0.1350128948688507, "step": 4541 }, { "epoch": 3.141078838174274, "grad_norm": 7.928465366363525, "learning_rate": 3.810511756569848e-05, "log_odds_chosen": 10.70061206817627, "log_odds_ratio": -3.524612111505121e-05, "logits/chosen": -0.664079487323761, "logits/rejected": -0.7402001619338989, "logps/chosen": -0.00035893419408239424, "logps/rejected": -2.4550933837890625, "loss": 1.3285, "nll_loss": 0.3321237564086914, "rewards/accuracies": 1.0, "rewards/chosen": -3.5893419408239424e-05, "rewards/margins": 0.2454734593629837, "rewards/rejected": -0.24550935626029968, "step": 4542 }, { "epoch": 3.1417704011065006, "grad_norm": 7.757707595825195, "learning_rate": 3.810127554940833e-05, "log_odds_chosen": 9.735649108886719, "log_odds_ratio": -0.0007004727958701551, "logits/chosen": -0.6714752316474915, "logits/rejected": -0.6968262195587158, "logps/chosen": -0.00038532583857886493, "logps/rejected": -1.7883989810943604, "loss": 1.6233, "nll_loss": 0.4057468771934509, "rewards/accuracies": 1.0, "rewards/chosen": -3.853258385788649e-05, "rewards/margins": 0.17880135774612427, "rewards/rejected": -0.17883989214897156, "step": 4543 }, { "epoch": 3.1424619640387275, "grad_norm": 4.612785816192627, "learning_rate": 3.809743353311818e-05, "log_odds_chosen": 8.947471618652344, "log_odds_ratio": -0.0007860027835704386, "logits/chosen": -0.5513883233070374, "logits/rejected": -0.5391987562179565, "logps/chosen": -0.007197847589850426, "logps/rejected": -1.6272315979003906, "loss": 1.3186, "nll_loss": 0.32956576347351074, "rewards/accuracies": 1.0, "rewards/chosen": -0.0007197847589850426, "rewards/margins": 0.16200336813926697, "rewards/rejected": -0.16272316873073578, "step": 4544 }, { "epoch": 3.1431535269709543, "grad_norm": 10.903369903564453, "learning_rate": 3.809359151682803e-05, "log_odds_chosen": 8.987730026245117, "log_odds_ratio": -0.0010401320178061724, "logits/chosen": -0.6096397638320923, "logits/rejected": -0.6015598177909851, "logps/chosen": -0.001135033555328846, "logps/rejected": -1.9247807264328003, "loss": 1.033, "nll_loss": 0.2581413686275482, "rewards/accuracies": 1.0, "rewards/chosen": -0.00011350335262250155, "rewards/margins": 0.19236457347869873, "rewards/rejected": -0.19247806072235107, "step": 4545 }, { "epoch": 3.143845089903181, "grad_norm": 12.1816987991333, "learning_rate": 3.808974950053789e-05, "log_odds_chosen": 9.709785461425781, "log_odds_ratio": -0.0002501948911231011, "logits/chosen": -0.3249852955341339, "logits/rejected": -0.35515284538269043, "logps/chosen": -0.0010478305630385876, "logps/rejected": -1.9457213878631592, "loss": 1.5735, "nll_loss": 0.39334553480148315, "rewards/accuracies": 1.0, "rewards/chosen": -0.00010478307376615703, "rewards/margins": 0.19446735084056854, "rewards/rejected": -0.19457213580608368, "step": 4546 }, { "epoch": 3.144536652835408, "grad_norm": 8.440278053283691, "learning_rate": 3.808590748424773e-05, "log_odds_chosen": 10.5477876663208, "log_odds_ratio": -5.409811274148524e-05, "logits/chosen": -0.5726054906845093, "logits/rejected": -0.5959814786911011, "logps/chosen": -0.0015591013943776488, "logps/rejected": -2.7095227241516113, "loss": 1.3806, "nll_loss": 0.3451417088508606, "rewards/accuracies": 1.0, "rewards/chosen": -0.00015591015107929707, "rewards/margins": 0.27079635858535767, "rewards/rejected": -0.2709522545337677, "step": 4547 }, { "epoch": 3.145228215767635, "grad_norm": 5.923503398895264, "learning_rate": 3.8082065467957585e-05, "log_odds_chosen": 9.889942169189453, "log_odds_ratio": -0.0002645776839926839, "logits/chosen": -0.6835029125213623, "logits/rejected": -0.6722432374954224, "logps/chosen": -0.0024868096224963665, "logps/rejected": -2.269965171813965, "loss": 2.0377, "nll_loss": 0.509391725063324, "rewards/accuracies": 1.0, "rewards/chosen": -0.00024868096807040274, "rewards/margins": 0.22674782574176788, "rewards/rejected": -0.226996511220932, "step": 4548 }, { "epoch": 3.1459197786998616, "grad_norm": 10.639673233032227, "learning_rate": 3.807822345166744e-05, "log_odds_chosen": 8.301789283752441, "log_odds_ratio": -0.021940121427178383, "logits/chosen": -0.6153140664100647, "logits/rejected": -0.7271953821182251, "logps/chosen": -0.013397076167166233, "logps/rejected": -2.071251392364502, "loss": 1.7852, "nll_loss": 0.44411715865135193, "rewards/accuracies": 1.0, "rewards/chosen": -0.0013397075235843658, "rewards/margins": 0.20578543841838837, "rewards/rejected": -0.20712514221668243, "step": 4549 }, { "epoch": 3.1466113416320884, "grad_norm": 15.749545097351074, "learning_rate": 3.807438143537729e-05, "log_odds_chosen": 7.985233306884766, "log_odds_ratio": -0.12385857850313187, "logits/chosen": -0.4573472738265991, "logits/rejected": -0.39598169922828674, "logps/chosen": -0.02607070654630661, "logps/rejected": -1.364487886428833, "loss": 2.2428, "nll_loss": 0.5483059883117676, "rewards/accuracies": 0.875, "rewards/chosen": -0.002607070840895176, "rewards/margins": 0.13384172320365906, "rewards/rejected": -0.13644880056381226, "step": 4550 }, { "epoch": 3.1473029045643153, "grad_norm": 8.415826797485352, "learning_rate": 3.8070539419087136e-05, "log_odds_chosen": 8.762563705444336, "log_odds_ratio": -0.010597055777907372, "logits/chosen": -0.6360316276550293, "logits/rejected": -0.7695156335830688, "logps/chosen": -0.004911277908831835, "logps/rejected": -1.581146478652954, "loss": 1.7155, "nll_loss": 0.427810400724411, "rewards/accuracies": 1.0, "rewards/chosen": -0.0004911277792416513, "rewards/margins": 0.1576235145330429, "rewards/rejected": -0.15811465680599213, "step": 4551 }, { "epoch": 3.147994467496542, "grad_norm": 12.998376846313477, "learning_rate": 3.806669740279699e-05, "log_odds_chosen": 8.921663284301758, "log_odds_ratio": -0.04195103421807289, "logits/chosen": -0.6693733334541321, "logits/rejected": -0.7234500646591187, "logps/chosen": -0.01005852036178112, "logps/rejected": -1.833262324333191, "loss": 1.5829, "nll_loss": 0.3915401101112366, "rewards/accuracies": 1.0, "rewards/chosen": -0.0010058521293103695, "rewards/margins": 0.18232038617134094, "rewards/rejected": -0.18332622945308685, "step": 4552 }, { "epoch": 3.148686030428769, "grad_norm": 5.58607816696167, "learning_rate": 3.806285538650684e-05, "log_odds_chosen": 8.226150512695312, "log_odds_ratio": -0.004057474434375763, "logits/chosen": -0.2192223072052002, "logits/rejected": -0.24665291607379913, "logps/chosen": -0.01471928134560585, "logps/rejected": -1.6626795530319214, "loss": 1.2105, "nll_loss": 0.3022083342075348, "rewards/accuracies": 1.0, "rewards/chosen": -0.0014719280879944563, "rewards/margins": 0.16479603946208954, "rewards/rejected": -0.16626796126365662, "step": 4553 }, { "epoch": 3.1493775933609958, "grad_norm": 8.711701393127441, "learning_rate": 3.8059013370216686e-05, "log_odds_chosen": 9.90752124786377, "log_odds_ratio": -0.0003670519217848778, "logits/chosen": -0.6568311452865601, "logits/rejected": -0.7144882678985596, "logps/chosen": -0.035695165395736694, "logps/rejected": -2.3609328269958496, "loss": 1.7224, "nll_loss": 0.4305512309074402, "rewards/accuracies": 1.0, "rewards/chosen": -0.003569516586139798, "rewards/margins": 0.23252378404140472, "rewards/rejected": -0.23609329760074615, "step": 4554 }, { "epoch": 3.1500691562932226, "grad_norm": 6.859455108642578, "learning_rate": 3.8055171353926546e-05, "log_odds_chosen": 10.337671279907227, "log_odds_ratio": -0.00044965840061195195, "logits/chosen": -0.5700564980506897, "logits/rejected": -0.6095435619354248, "logps/chosen": -0.004616261925548315, "logps/rejected": -1.8322263956069946, "loss": 2.594, "nll_loss": 0.6484533548355103, "rewards/accuracies": 1.0, "rewards/chosen": -0.00046162621583789587, "rewards/margins": 0.18276099860668182, "rewards/rejected": -0.18322263658046722, "step": 4555 }, { "epoch": 3.1507607192254494, "grad_norm": 14.39889144897461, "learning_rate": 3.805132933763639e-05, "log_odds_chosen": 10.14631462097168, "log_odds_ratio": -0.0001559885567985475, "logits/chosen": -0.24603904783725739, "logits/rejected": -0.26028525829315186, "logps/chosen": -0.0026844381354749203, "logps/rejected": -2.728746175765991, "loss": 1.5621, "nll_loss": 0.3905075490474701, "rewards/accuracies": 1.0, "rewards/chosen": -0.0002684438368305564, "rewards/margins": 0.27260616421699524, "rewards/rejected": -0.2728746235370636, "step": 4556 }, { "epoch": 3.1514522821576763, "grad_norm": 8.706809043884277, "learning_rate": 3.8047487321346244e-05, "log_odds_chosen": 8.413780212402344, "log_odds_ratio": -0.020199157297611237, "logits/chosen": -0.43870848417282104, "logits/rejected": -0.503291666507721, "logps/chosen": -0.0241429153829813, "logps/rejected": -1.6240514516830444, "loss": 2.3396, "nll_loss": 0.5828862190246582, "rewards/accuracies": 1.0, "rewards/chosen": -0.0024142912589013577, "rewards/margins": 0.15999086201190948, "rewards/rejected": -0.16240514814853668, "step": 4557 }, { "epoch": 3.152143845089903, "grad_norm": 10.746550559997559, "learning_rate": 3.8043645305056096e-05, "log_odds_chosen": 8.803289413452148, "log_odds_ratio": -0.006239832378923893, "logits/chosen": -0.8961911201477051, "logits/rejected": -0.9288344979286194, "logps/chosen": -0.004250252153724432, "logps/rejected": -1.7004587650299072, "loss": 2.3146, "nll_loss": 0.5780288577079773, "rewards/accuracies": 1.0, "rewards/chosen": -0.00042502518044784665, "rewards/margins": 0.16962085664272308, "rewards/rejected": -0.1700458824634552, "step": 4558 }, { "epoch": 3.15283540802213, "grad_norm": 6.900879383087158, "learning_rate": 3.803980328876595e-05, "log_odds_chosen": 9.192712783813477, "log_odds_ratio": -0.0011526880552992225, "logits/chosen": -0.31786349415779114, "logits/rejected": -0.3973749876022339, "logps/chosen": -0.0021980530582368374, "logps/rejected": -2.287642478942871, "loss": 1.7842, "nll_loss": 0.44592586159706116, "rewards/accuracies": 1.0, "rewards/chosen": -0.00021980531164444983, "rewards/margins": 0.2285444587469101, "rewards/rejected": -0.22876425087451935, "step": 4559 }, { "epoch": 3.1535269709543567, "grad_norm": 11.182805061340332, "learning_rate": 3.8035961272475794e-05, "log_odds_chosen": 9.342815399169922, "log_odds_ratio": -0.0012750012101605535, "logits/chosen": -0.6669700145721436, "logits/rejected": -0.7313302755355835, "logps/chosen": -0.001783718471415341, "logps/rejected": -1.8242666721343994, "loss": 2.2457, "nll_loss": 0.5613073706626892, "rewards/accuracies": 1.0, "rewards/chosen": -0.0001783718471415341, "rewards/margins": 0.1822482794523239, "rewards/rejected": -0.18242666125297546, "step": 4560 }, { "epoch": 3.1542185338865836, "grad_norm": 6.648580074310303, "learning_rate": 3.803211925618565e-05, "log_odds_chosen": 9.386903762817383, "log_odds_ratio": -0.020936183631420135, "logits/chosen": -0.4923725426197052, "logits/rejected": -0.5274564027786255, "logps/chosen": -0.005909002851694822, "logps/rejected": -1.635195016860962, "loss": 1.4973, "nll_loss": 0.3722338080406189, "rewards/accuracies": 1.0, "rewards/chosen": -0.0005909003666602075, "rewards/margins": 0.16292861104011536, "rewards/rejected": -0.1635195016860962, "step": 4561 }, { "epoch": 3.1549100968188104, "grad_norm": 7.211044788360596, "learning_rate": 3.80282772398955e-05, "log_odds_chosen": 10.988120079040527, "log_odds_ratio": -2.3240507289301604e-05, "logits/chosen": -0.544777512550354, "logits/rejected": -0.581328809261322, "logps/chosen": -0.00042092709918506444, "logps/rejected": -2.3012421131134033, "loss": 1.4785, "nll_loss": 0.3696138560771942, "rewards/accuracies": 1.0, "rewards/chosen": -4.2092706280527636e-05, "rewards/margins": 0.23008212447166443, "rewards/rejected": -0.23012422025203705, "step": 4562 }, { "epoch": 3.1556016597510372, "grad_norm": 9.094809532165527, "learning_rate": 3.8024435223605345e-05, "log_odds_chosen": 9.047502517700195, "log_odds_ratio": -0.009910568594932556, "logits/chosen": -0.8417686223983765, "logits/rejected": -0.9273465275764465, "logps/chosen": -0.005829110741615295, "logps/rejected": -1.8627815246582031, "loss": 1.3684, "nll_loss": 0.3411003649234772, "rewards/accuracies": 1.0, "rewards/chosen": -0.0005829111323691905, "rewards/margins": 0.18569524586200714, "rewards/rejected": -0.18627816438674927, "step": 4563 }, { "epoch": 3.156293222683264, "grad_norm": 11.577505111694336, "learning_rate": 3.8020593207315204e-05, "log_odds_chosen": 9.485040664672852, "log_odds_ratio": -0.00032070037559606135, "logits/chosen": -0.59970623254776, "logits/rejected": -0.6934555768966675, "logps/chosen": -0.0005038708914071321, "logps/rejected": -1.4899804592132568, "loss": 1.8164, "nll_loss": 0.4540640711784363, "rewards/accuracies": 1.0, "rewards/chosen": -5.038708695792593e-05, "rewards/margins": 0.14894765615463257, "rewards/rejected": -0.14899805188179016, "step": 4564 }, { "epoch": 3.156984785615491, "grad_norm": 9.965083122253418, "learning_rate": 3.801675119102505e-05, "log_odds_chosen": 7.520229339599609, "log_odds_ratio": -0.08336061239242554, "logits/chosen": -0.6140985488891602, "logits/rejected": -0.5827906727790833, "logps/chosen": -0.05686326324939728, "logps/rejected": -1.5699549913406372, "loss": 2.1781, "nll_loss": 0.53618323802948, "rewards/accuracies": 1.0, "rewards/chosen": -0.005686326418071985, "rewards/margins": 0.15130917727947235, "rewards/rejected": -0.1569955050945282, "step": 4565 }, { "epoch": 3.1576763485477177, "grad_norm": 6.392039775848389, "learning_rate": 3.80129091747349e-05, "log_odds_chosen": 7.001106262207031, "log_odds_ratio": -0.07764378935098648, "logits/chosen": -0.8396638631820679, "logits/rejected": -0.8487118482589722, "logps/chosen": -0.0225069560110569, "logps/rejected": -1.720860242843628, "loss": 0.8638, "nll_loss": 0.20819467306137085, "rewards/accuracies": 1.0, "rewards/chosen": -0.0022506958339363337, "rewards/margins": 0.16983532905578613, "rewards/rejected": -0.17208603024482727, "step": 4566 }, { "epoch": 3.1583679114799446, "grad_norm": 10.21845531463623, "learning_rate": 3.8009067158444755e-05, "log_odds_chosen": 9.769824028015137, "log_odds_ratio": -0.001093681319616735, "logits/chosen": -0.9067624807357788, "logits/rejected": -1.0266830921173096, "logps/chosen": -0.018000207841396332, "logps/rejected": -2.444042444229126, "loss": 2.1081, "nll_loss": 0.5269204378128052, "rewards/accuracies": 1.0, "rewards/chosen": -0.0018000205745920539, "rewards/margins": 0.24260422587394714, "rewards/rejected": -0.24440424144268036, "step": 4567 }, { "epoch": 3.1590594744121714, "grad_norm": 9.175846099853516, "learning_rate": 3.800522514215461e-05, "log_odds_chosen": 7.827970027923584, "log_odds_ratio": -0.13693012297153473, "logits/chosen": -0.70560622215271, "logits/rejected": -0.7182061076164246, "logps/chosen": -0.03513843193650246, "logps/rejected": -1.7183489799499512, "loss": 1.957, "nll_loss": 0.4755551517009735, "rewards/accuracies": 0.875, "rewards/chosen": -0.0035138430539518595, "rewards/margins": 0.16832104325294495, "rewards/rejected": -0.17183488607406616, "step": 4568 }, { "epoch": 3.159751037344398, "grad_norm": 12.983506202697754, "learning_rate": 3.800138312586445e-05, "log_odds_chosen": 10.426617622375488, "log_odds_ratio": -4.319160507293418e-05, "logits/chosen": -0.6623454093933105, "logits/rejected": -0.677485466003418, "logps/chosen": -0.0005207035574130714, "logps/rejected": -2.6388802528381348, "loss": 1.8196, "nll_loss": 0.45490360260009766, "rewards/accuracies": 1.0, "rewards/chosen": -5.2070354286115617e-05, "rewards/margins": 0.26383593678474426, "rewards/rejected": -0.26388800144195557, "step": 4569 }, { "epoch": 3.160442600276625, "grad_norm": 9.496644020080566, "learning_rate": 3.7997541109574305e-05, "log_odds_chosen": 10.419952392578125, "log_odds_ratio": -4.813496343558654e-05, "logits/chosen": -0.4542783796787262, "logits/rejected": -0.4638960361480713, "logps/chosen": -0.0003169108822476119, "logps/rejected": -2.302415370941162, "loss": 1.5223, "nll_loss": 0.3805696666240692, "rewards/accuracies": 1.0, "rewards/chosen": -3.169108822476119e-05, "rewards/margins": 0.2302098423242569, "rewards/rejected": -0.23024152219295502, "step": 4570 }, { "epoch": 3.161134163208852, "grad_norm": 7.444447994232178, "learning_rate": 3.799369909328416e-05, "log_odds_chosen": 8.599638938903809, "log_odds_ratio": -0.007685279473662376, "logits/chosen": -0.6876360774040222, "logits/rejected": -0.7898210287094116, "logps/chosen": -0.009488348849117756, "logps/rejected": -1.3206067085266113, "loss": 1.4894, "nll_loss": 0.3715746998786926, "rewards/accuracies": 1.0, "rewards/chosen": -0.0009488348732702434, "rewards/margins": 0.13111184537410736, "rewards/rejected": -0.1320606917142868, "step": 4571 }, { "epoch": 3.1618257261410787, "grad_norm": 7.070675373077393, "learning_rate": 3.7989857076994e-05, "log_odds_chosen": 7.803130149841309, "log_odds_ratio": -0.07015282660722733, "logits/chosen": -0.4623362720012665, "logits/rejected": -0.44682449102401733, "logps/chosen": -0.033260829746723175, "logps/rejected": -1.460196852684021, "loss": 2.0823, "nll_loss": 0.5135682225227356, "rewards/accuracies": 1.0, "rewards/chosen": -0.003326083067804575, "rewards/margins": 0.14269360899925232, "rewards/rejected": -0.14601969718933105, "step": 4572 }, { "epoch": 3.1625172890733055, "grad_norm": 13.822750091552734, "learning_rate": 3.798601506070386e-05, "log_odds_chosen": 6.678684234619141, "log_odds_ratio": -0.26905539631843567, "logits/chosen": -0.6584138870239258, "logits/rejected": -0.7105019092559814, "logps/chosen": -0.052003588527441025, "logps/rejected": -1.2119102478027344, "loss": 2.0026, "nll_loss": 0.4737449884414673, "rewards/accuracies": 0.875, "rewards/chosen": -0.005200359039008617, "rewards/margins": 0.11599066853523254, "rewards/rejected": -0.12119103968143463, "step": 4573 }, { "epoch": 3.1632088520055324, "grad_norm": 8.823368072509766, "learning_rate": 3.798217304441371e-05, "log_odds_chosen": 7.944136142730713, "log_odds_ratio": -0.015342186205089092, "logits/chosen": -0.36007851362228394, "logits/rejected": -0.37841683626174927, "logps/chosen": -0.035427793860435486, "logps/rejected": -1.5605015754699707, "loss": 1.9745, "nll_loss": 0.49209773540496826, "rewards/accuracies": 1.0, "rewards/chosen": -0.0035427792463451624, "rewards/margins": 0.15250737965106964, "rewards/rejected": -0.1560501605272293, "step": 4574 }, { "epoch": 3.163900414937759, "grad_norm": 6.48723840713501, "learning_rate": 3.797833102812356e-05, "log_odds_chosen": 9.472557067871094, "log_odds_ratio": -0.0004954534815624356, "logits/chosen": -0.7768731117248535, "logits/rejected": -0.7985833287239075, "logps/chosen": -0.04230440780520439, "logps/rejected": -2.2706217765808105, "loss": 2.1138, "nll_loss": 0.5283978581428528, "rewards/accuracies": 1.0, "rewards/chosen": -0.0042304410599172115, "rewards/margins": 0.22283174097537994, "rewards/rejected": -0.2270621806383133, "step": 4575 }, { "epoch": 3.164591977869986, "grad_norm": 12.95726490020752, "learning_rate": 3.797448901183341e-05, "log_odds_chosen": 10.314776420593262, "log_odds_ratio": -7.154385093599558e-05, "logits/chosen": -0.6321621537208557, "logits/rejected": -0.7744560837745667, "logps/chosen": -0.0019077484030276537, "logps/rejected": -2.5642080307006836, "loss": 2.4073, "nll_loss": 0.6018108129501343, "rewards/accuracies": 1.0, "rewards/chosen": -0.00019077486649621278, "rewards/margins": 0.25623005628585815, "rewards/rejected": -0.2564208209514618, "step": 4576 }, { "epoch": 3.165283540802213, "grad_norm": 6.923228740692139, "learning_rate": 3.7970646995543266e-05, "log_odds_chosen": 10.274030685424805, "log_odds_ratio": -0.00012094212434021756, "logits/chosen": -0.4876629114151001, "logits/rejected": -0.4081573486328125, "logps/chosen": -0.01138163823634386, "logps/rejected": -2.783386468887329, "loss": 1.3585, "nll_loss": 0.3396143317222595, "rewards/accuracies": 1.0, "rewards/chosen": -0.0011381638469174504, "rewards/margins": 0.27720052003860474, "rewards/rejected": -0.2783386707305908, "step": 4577 }, { "epoch": 3.1659751037344397, "grad_norm": 6.5532450675964355, "learning_rate": 3.796680497925311e-05, "log_odds_chosen": 8.911333084106445, "log_odds_ratio": -0.005457364488393068, "logits/chosen": -0.43831944465637207, "logits/rejected": -0.43045973777770996, "logps/chosen": -0.02725759893655777, "logps/rejected": -1.86832594871521, "loss": 1.2491, "nll_loss": 0.3117315173149109, "rewards/accuracies": 1.0, "rewards/chosen": -0.002725760219618678, "rewards/margins": 0.18410682678222656, "rewards/rejected": -0.18683260679244995, "step": 4578 }, { "epoch": 3.1666666666666665, "grad_norm": 4.984968662261963, "learning_rate": 3.7962962962962964e-05, "log_odds_chosen": 8.321807861328125, "log_odds_ratio": -0.0017942956183105707, "logits/chosen": -0.6507587432861328, "logits/rejected": -0.7082344889640808, "logps/chosen": -0.019144365563988686, "logps/rejected": -1.0949219465255737, "loss": 1.0557, "nll_loss": 0.26374536752700806, "rewards/accuracies": 1.0, "rewards/chosen": -0.001914436463266611, "rewards/margins": 0.10757777094841003, "rewards/rejected": -0.10949219763278961, "step": 4579 }, { "epoch": 3.1673582295988933, "grad_norm": 9.328558921813965, "learning_rate": 3.7959120946672816e-05, "log_odds_chosen": 8.825399398803711, "log_odds_ratio": -0.0010717068798840046, "logits/chosen": -0.4912228584289551, "logits/rejected": -0.6055634617805481, "logps/chosen": -0.0007479118648916483, "logps/rejected": -1.322448492050171, "loss": 1.5699, "nll_loss": 0.39237523078918457, "rewards/accuracies": 1.0, "rewards/chosen": -7.47911908547394e-05, "rewards/margins": 0.13217005133628845, "rewards/rejected": -0.13224485516548157, "step": 4580 }, { "epoch": 3.16804979253112, "grad_norm": 6.22381067276001, "learning_rate": 3.795527893038266e-05, "log_odds_chosen": 10.746294021606445, "log_odds_ratio": -0.00043368813931010664, "logits/chosen": -0.6561489701271057, "logits/rejected": -0.6594254970550537, "logps/chosen": -0.0003508516529109329, "logps/rejected": -2.45505428314209, "loss": 1.1298, "nll_loss": 0.28241264820098877, "rewards/accuracies": 1.0, "rewards/chosen": -3.508516601868905e-05, "rewards/margins": 0.24547035992145538, "rewards/rejected": -0.2455054521560669, "step": 4581 }, { "epoch": 3.168741355463347, "grad_norm": 18.001495361328125, "learning_rate": 3.795143691409252e-05, "log_odds_chosen": 9.882756233215332, "log_odds_ratio": -9.903610043693334e-05, "logits/chosen": -0.8339722156524658, "logits/rejected": -0.8579087257385254, "logps/chosen": -0.008268280886113644, "logps/rejected": -2.6852235794067383, "loss": 2.2308, "nll_loss": 0.5577019453048706, "rewards/accuracies": 1.0, "rewards/chosen": -0.0008268280653283, "rewards/margins": 0.2676955461502075, "rewards/rejected": -0.26852235198020935, "step": 4582 }, { "epoch": 3.169432918395574, "grad_norm": 9.087573051452637, "learning_rate": 3.794759489780237e-05, "log_odds_chosen": 6.8832855224609375, "log_odds_ratio": -0.0470358170568943, "logits/chosen": -0.7121328711509705, "logits/rejected": -0.7107651233673096, "logps/chosen": -0.022450348362326622, "logps/rejected": -1.4015861749649048, "loss": 1.4622, "nll_loss": 0.36085256934165955, "rewards/accuracies": 1.0, "rewards/chosen": -0.0022450347896665335, "rewards/margins": 0.13791358470916748, "rewards/rejected": -0.14015862345695496, "step": 4583 }, { "epoch": 3.1701244813278007, "grad_norm": 7.808243274688721, "learning_rate": 3.794375288151222e-05, "log_odds_chosen": 9.859292984008789, "log_odds_ratio": -0.0006968683446757495, "logits/chosen": -0.9944567084312439, "logits/rejected": -1.0274537801742554, "logps/chosen": -0.004056369420140982, "logps/rejected": -2.3057827949523926, "loss": 1.354, "nll_loss": 0.33843401074409485, "rewards/accuracies": 1.0, "rewards/chosen": -0.00040563696529716253, "rewards/margins": 0.23017263412475586, "rewards/rejected": -0.23057827353477478, "step": 4584 }, { "epoch": 3.1708160442600275, "grad_norm": 12.067472457885742, "learning_rate": 3.793991086522207e-05, "log_odds_chosen": 10.771336555480957, "log_odds_ratio": -7.997899956535548e-05, "logits/chosen": -0.7247877717018127, "logits/rejected": -0.7443068027496338, "logps/chosen": -0.011935079470276833, "logps/rejected": -2.6619701385498047, "loss": 2.0333, "nll_loss": 0.5083144307136536, "rewards/accuracies": 1.0, "rewards/chosen": -0.001193507923744619, "rewards/margins": 0.2650035321712494, "rewards/rejected": -0.2661970257759094, "step": 4585 }, { "epoch": 3.1715076071922543, "grad_norm": 8.026774406433105, "learning_rate": 3.7936068848931924e-05, "log_odds_chosen": 9.327430725097656, "log_odds_ratio": -0.0010386076755821705, "logits/chosen": -0.7047549486160278, "logits/rejected": -0.7773745656013489, "logps/chosen": -0.012416575103998184, "logps/rejected": -2.148869752883911, "loss": 1.9018, "nll_loss": 0.4753361642360687, "rewards/accuracies": 1.0, "rewards/chosen": -0.0012416575336828828, "rewards/margins": 0.213645339012146, "rewards/rejected": -0.21488699316978455, "step": 4586 }, { "epoch": 3.172199170124481, "grad_norm": 9.1338472366333, "learning_rate": 3.793222683264177e-05, "log_odds_chosen": 9.807607650756836, "log_odds_ratio": -0.00013375926937442273, "logits/chosen": -0.5474683046340942, "logits/rejected": -0.6458426117897034, "logps/chosen": -0.00019876201986335218, "logps/rejected": -1.5424573421478271, "loss": 2.3267, "nll_loss": 0.5816740393638611, "rewards/accuracies": 1.0, "rewards/chosen": -1.987620271393098e-05, "rewards/margins": 0.15422585606575012, "rewards/rejected": -0.15424573421478271, "step": 4587 }, { "epoch": 3.172890733056708, "grad_norm": 10.483997344970703, "learning_rate": 3.792838481635162e-05, "log_odds_chosen": 9.483867645263672, "log_odds_ratio": -0.0002670464455150068, "logits/chosen": -0.7305455207824707, "logits/rejected": -0.7739405632019043, "logps/chosen": -0.00530233234167099, "logps/rejected": -1.971374273300171, "loss": 1.3878, "nll_loss": 0.34692639112472534, "rewards/accuracies": 1.0, "rewards/chosen": -0.0005302332574501634, "rewards/margins": 0.19660718739032745, "rewards/rejected": -0.19713741540908813, "step": 4588 }, { "epoch": 3.173582295988935, "grad_norm": 11.079160690307617, "learning_rate": 3.7924542800061475e-05, "log_odds_chosen": 7.841558456420898, "log_odds_ratio": -0.4125269949436188, "logits/chosen": -0.4979378581047058, "logits/rejected": -0.581967830657959, "logps/chosen": -0.024180810898542404, "logps/rejected": -1.787544846534729, "loss": 2.3551, "nll_loss": 0.5475137233734131, "rewards/accuracies": 0.875, "rewards/chosen": -0.002418080810457468, "rewards/margins": 0.176336407661438, "rewards/rejected": -0.17875447869300842, "step": 4589 }, { "epoch": 3.1742738589211617, "grad_norm": 5.296082496643066, "learning_rate": 3.792070078377132e-05, "log_odds_chosen": 9.685503005981445, "log_odds_ratio": -0.00016764621250331402, "logits/chosen": -0.4837448000907898, "logits/rejected": -0.56712806224823, "logps/chosen": -0.0006031938828527927, "logps/rejected": -1.852403998374939, "loss": 3.0507, "nll_loss": 0.7626625299453735, "rewards/accuracies": 1.0, "rewards/chosen": -6.031939119566232e-05, "rewards/margins": 0.18518008291721344, "rewards/rejected": -0.18524041771888733, "step": 4590 }, { "epoch": 3.1749654218533885, "grad_norm": 7.954952239990234, "learning_rate": 3.791685876748118e-05, "log_odds_chosen": 9.898430824279785, "log_odds_ratio": -0.00040304564754478633, "logits/chosen": -0.5487443208694458, "logits/rejected": -0.7202839851379395, "logps/chosen": -0.00048723159125074744, "logps/rejected": -1.7513352632522583, "loss": 1.1091, "nll_loss": 0.27722257375717163, "rewards/accuracies": 1.0, "rewards/chosen": -4.8723159125074744e-05, "rewards/margins": 0.17508479952812195, "rewards/rejected": -0.17513352632522583, "step": 4591 }, { "epoch": 3.1756569847856153, "grad_norm": 10.471096992492676, "learning_rate": 3.7913016751191025e-05, "log_odds_chosen": 9.12653923034668, "log_odds_ratio": -0.001498258556239307, "logits/chosen": -0.8056678771972656, "logits/rejected": -0.7711480259895325, "logps/chosen": -0.009058143012225628, "logps/rejected": -1.626591682434082, "loss": 1.6736, "nll_loss": 0.4182409346103668, "rewards/accuracies": 1.0, "rewards/chosen": -0.0009058142895810306, "rewards/margins": 0.1617533564567566, "rewards/rejected": -0.1626591831445694, "step": 4592 }, { "epoch": 3.176348547717842, "grad_norm": 7.665005683898926, "learning_rate": 3.790917473490088e-05, "log_odds_chosen": 9.794302940368652, "log_odds_ratio": -9.794899233384058e-05, "logits/chosen": -0.4575244188308716, "logits/rejected": -0.5109673738479614, "logps/chosen": -0.0003009043575730175, "logps/rejected": -1.6932740211486816, "loss": 1.7761, "nll_loss": 0.44400835037231445, "rewards/accuracies": 1.0, "rewards/chosen": -3.0090435757301748e-05, "rewards/margins": 0.16929732263088226, "rewards/rejected": -0.16932742297649384, "step": 4593 }, { "epoch": 3.177040110650069, "grad_norm": 12.323850631713867, "learning_rate": 3.790533271861073e-05, "log_odds_chosen": 8.642744064331055, "log_odds_ratio": -0.00040836347034201026, "logits/chosen": -0.7057383060455322, "logits/rejected": -0.7906292080879211, "logps/chosen": -0.0008442990947514772, "logps/rejected": -1.459619402885437, "loss": 1.6049, "nll_loss": 0.40117207169532776, "rewards/accuracies": 1.0, "rewards/chosen": -8.442990656476468e-05, "rewards/margins": 0.14587751030921936, "rewards/rejected": -0.1459619402885437, "step": 4594 }, { "epoch": 3.177731673582296, "grad_norm": 10.345414161682129, "learning_rate": 3.790149070232058e-05, "log_odds_chosen": 8.904953002929688, "log_odds_ratio": -0.0004517165943980217, "logits/chosen": -0.9660544395446777, "logits/rejected": -0.9590066075325012, "logps/chosen": -0.006094220094382763, "logps/rejected": -1.2050225734710693, "loss": 2.3718, "nll_loss": 0.5929133892059326, "rewards/accuracies": 1.0, "rewards/chosen": -0.0006094219861552119, "rewards/margins": 0.11989283561706543, "rewards/rejected": -0.12050226330757141, "step": 4595 }, { "epoch": 3.1784232365145226, "grad_norm": 10.865817070007324, "learning_rate": 3.789764868603043e-05, "log_odds_chosen": 7.841320991516113, "log_odds_ratio": -0.1730465143918991, "logits/chosen": -0.350616455078125, "logits/rejected": -0.4073426425457001, "logps/chosen": -0.033529266715049744, "logps/rejected": -0.9049347043037415, "loss": 1.5156, "nll_loss": 0.36158961057662964, "rewards/accuracies": 0.875, "rewards/chosen": -0.0033529268112033606, "rewards/margins": 0.08714054524898529, "rewards/rejected": -0.09049347043037415, "step": 4596 }, { "epoch": 3.1791147994467495, "grad_norm": 8.323912620544434, "learning_rate": 3.789380666974028e-05, "log_odds_chosen": 9.956376075744629, "log_odds_ratio": -0.0005759909981861711, "logits/chosen": -0.5365810990333557, "logits/rejected": -0.617435872554779, "logps/chosen": -0.0005928762257099152, "logps/rejected": -2.054478406906128, "loss": 1.5833, "nll_loss": 0.3957583010196686, "rewards/accuracies": 1.0, "rewards/chosen": -5.928762402618304e-05, "rewards/margins": 0.20538857579231262, "rewards/rejected": -0.20544785261154175, "step": 4597 }, { "epoch": 3.1798063623789763, "grad_norm": 22.66376304626465, "learning_rate": 3.788996465345013e-05, "log_odds_chosen": 7.783935546875, "log_odds_ratio": -0.05122813954949379, "logits/chosen": -0.7455435991287231, "logits/rejected": -0.7452982664108276, "logps/chosen": -0.21089009940624237, "logps/rejected": -1.9292497634887695, "loss": 1.9919, "nll_loss": 0.492841899394989, "rewards/accuracies": 1.0, "rewards/chosen": -0.021089009940624237, "rewards/margins": 0.1718359738588333, "rewards/rejected": -0.19292497634887695, "step": 4598 }, { "epoch": 3.180497925311203, "grad_norm": 10.29366397857666, "learning_rate": 3.788612263715998e-05, "log_odds_chosen": 7.870556354522705, "log_odds_ratio": -0.030002042651176453, "logits/chosen": -0.7490079998970032, "logits/rejected": -0.7733038067817688, "logps/chosen": -0.008063086308538914, "logps/rejected": -1.3171665668487549, "loss": 2.0297, "nll_loss": 0.504428505897522, "rewards/accuracies": 1.0, "rewards/chosen": -0.0008063087007030845, "rewards/margins": 0.13091033697128296, "rewards/rejected": -0.13171665370464325, "step": 4599 }, { "epoch": 3.18118948824343, "grad_norm": 8.865200996398926, "learning_rate": 3.788228062086984e-05, "log_odds_chosen": 9.468286514282227, "log_odds_ratio": -0.004047780763357878, "logits/chosen": -0.5546740889549255, "logits/rejected": -0.6222197413444519, "logps/chosen": -0.0008241356699727476, "logps/rejected": -1.7708520889282227, "loss": 2.0809, "nll_loss": 0.5198326110839844, "rewards/accuracies": 1.0, "rewards/chosen": -8.241356408689171e-05, "rewards/margins": 0.17700281739234924, "rewards/rejected": -0.17708522081375122, "step": 4600 }, { "epoch": 3.181881051175657, "grad_norm": 6.551109790802002, "learning_rate": 3.7878438604579684e-05, "log_odds_chosen": 8.998653411865234, "log_odds_ratio": -0.00079822022235021, "logits/chosen": -0.2313028872013092, "logits/rejected": -0.26333147287368774, "logps/chosen": -0.0006675302283838391, "logps/rejected": -1.1907142400741577, "loss": 1.2586, "nll_loss": 0.3145686388015747, "rewards/accuracies": 1.0, "rewards/chosen": -6.675302574876696e-05, "rewards/margins": 0.11900466680526733, "rewards/rejected": -0.11907142400741577, "step": 4601 }, { "epoch": 3.1825726141078836, "grad_norm": 8.488152503967285, "learning_rate": 3.7874596588289536e-05, "log_odds_chosen": 8.680622100830078, "log_odds_ratio": -0.001602665986865759, "logits/chosen": -0.5562158226966858, "logits/rejected": -0.596197247505188, "logps/chosen": -0.0017571898642927408, "logps/rejected": -1.3305821418762207, "loss": 2.0244, "nll_loss": 0.5059409141540527, "rewards/accuracies": 1.0, "rewards/chosen": -0.00017571900389157236, "rewards/margins": 0.1328824907541275, "rewards/rejected": -0.13305820524692535, "step": 4602 }, { "epoch": 3.1832641770401104, "grad_norm": 8.795045852661133, "learning_rate": 3.787075457199939e-05, "log_odds_chosen": 9.601675987243652, "log_odds_ratio": -0.0003586334642022848, "logits/chosen": -0.5978565812110901, "logits/rejected": -0.6616254448890686, "logps/chosen": -0.0011712521081790328, "logps/rejected": -1.742924451828003, "loss": 2.3246, "nll_loss": 0.5811123251914978, "rewards/accuracies": 1.0, "rewards/chosen": -0.00011712520790752023, "rewards/margins": 0.17417532205581665, "rewards/rejected": -0.1742924451828003, "step": 4603 }, { "epoch": 3.1839557399723377, "grad_norm": 11.541089057922363, "learning_rate": 3.786691255570924e-05, "log_odds_chosen": 9.450611114501953, "log_odds_ratio": -0.0001150656898971647, "logits/chosen": -0.6971194744110107, "logits/rejected": -0.7279193997383118, "logps/chosen": -0.0005755338934250176, "logps/rejected": -1.7603645324707031, "loss": 2.0298, "nll_loss": 0.5074312686920166, "rewards/accuracies": 1.0, "rewards/chosen": -5.7553388614906e-05, "rewards/margins": 0.1759788990020752, "rewards/rejected": -0.17603644728660583, "step": 4604 }, { "epoch": 3.1846473029045645, "grad_norm": 6.5050482749938965, "learning_rate": 3.7863070539419087e-05, "log_odds_chosen": 8.913361549377441, "log_odds_ratio": -0.0013621591497212648, "logits/chosen": -0.23251497745513916, "logits/rejected": -0.23843058943748474, "logps/chosen": -0.034153182059526443, "logps/rejected": -2.1316874027252197, "loss": 1.797, "nll_loss": 0.4491085112094879, "rewards/accuracies": 1.0, "rewards/chosen": -0.0034153189044445753, "rewards/margins": 0.2097533941268921, "rewards/rejected": -0.21316871047019958, "step": 4605 }, { "epoch": 3.1853388658367914, "grad_norm": 11.801334381103516, "learning_rate": 3.785922852312894e-05, "log_odds_chosen": 7.46860408782959, "log_odds_ratio": -0.024415817111730576, "logits/chosen": -0.6700199246406555, "logits/rejected": -0.7383530735969543, "logps/chosen": -0.010652041994035244, "logps/rejected": -1.7683783769607544, "loss": 2.0786, "nll_loss": 0.5172020196914673, "rewards/accuracies": 1.0, "rewards/chosen": -0.0010652042692527175, "rewards/margins": 0.17577266693115234, "rewards/rejected": -0.17683786153793335, "step": 4606 }, { "epoch": 3.186030428769018, "grad_norm": 7.657423496246338, "learning_rate": 3.785538650683879e-05, "log_odds_chosen": 8.825857162475586, "log_odds_ratio": -0.0038504679687321186, "logits/chosen": -0.6110005378723145, "logits/rejected": -0.6402556300163269, "logps/chosen": -0.002229629550129175, "logps/rejected": -1.0497758388519287, "loss": 1.3781, "nll_loss": 0.34415027499198914, "rewards/accuracies": 1.0, "rewards/chosen": -0.0002229629608336836, "rewards/margins": 0.10475462675094604, "rewards/rejected": -0.10497759282588959, "step": 4607 }, { "epoch": 3.186721991701245, "grad_norm": 4.710501194000244, "learning_rate": 3.7851544490548644e-05, "log_odds_chosen": 8.452669143676758, "log_odds_ratio": -0.0828661322593689, "logits/chosen": -0.49123167991638184, "logits/rejected": -0.48999887704849243, "logps/chosen": -0.034764111042022705, "logps/rejected": -2.062809944152832, "loss": 0.9485, "nll_loss": 0.22884541749954224, "rewards/accuracies": 1.0, "rewards/chosen": -0.003476410871371627, "rewards/margins": 0.2028045952320099, "rewards/rejected": -0.20628100633621216, "step": 4608 }, { "epoch": 3.187413554633472, "grad_norm": 9.057634353637695, "learning_rate": 3.7847702474258496e-05, "log_odds_chosen": 7.493239402770996, "log_odds_ratio": -0.0032867516856640577, "logits/chosen": -0.0850645899772644, "logits/rejected": -0.13177891075611115, "logps/chosen": -0.01688811369240284, "logps/rejected": -1.6319787502288818, "loss": 1.76, "nll_loss": 0.43966561555862427, "rewards/accuracies": 1.0, "rewards/chosen": -0.0016888114623725414, "rewards/margins": 0.16150905191898346, "rewards/rejected": -0.16319787502288818, "step": 4609 }, { "epoch": 3.1881051175656987, "grad_norm": 7.127264499664307, "learning_rate": 3.784386045796834e-05, "log_odds_chosen": 9.543813705444336, "log_odds_ratio": -0.00027629200485534966, "logits/chosen": -0.680914044380188, "logits/rejected": -0.7802472710609436, "logps/chosen": -0.0005112183280289173, "logps/rejected": -1.5584700107574463, "loss": 1.5544, "nll_loss": 0.3885806202888489, "rewards/accuracies": 1.0, "rewards/chosen": -5.1121834985679016e-05, "rewards/margins": 0.1557958722114563, "rewards/rejected": -0.1558469831943512, "step": 4610 }, { "epoch": 3.1887966804979255, "grad_norm": 10.605690956115723, "learning_rate": 3.7840018441678194e-05, "log_odds_chosen": 10.069377899169922, "log_odds_ratio": -8.173806418199092e-05, "logits/chosen": -0.7247356176376343, "logits/rejected": -0.7631049752235413, "logps/chosen": -0.0006380442646332085, "logps/rejected": -2.2513394355773926, "loss": 1.7809, "nll_loss": 0.44522354006767273, "rewards/accuracies": 1.0, "rewards/chosen": -6.380442209774628e-05, "rewards/margins": 0.22507014870643616, "rewards/rejected": -0.22513394057750702, "step": 4611 }, { "epoch": 3.1894882434301524, "grad_norm": 13.950614929199219, "learning_rate": 3.783617642538805e-05, "log_odds_chosen": 8.581480026245117, "log_odds_ratio": -0.001877216505818069, "logits/chosen": -0.7479287385940552, "logits/rejected": -0.8167770504951477, "logps/chosen": -0.0037166469264775515, "logps/rejected": -2.0587105751037598, "loss": 1.4035, "nll_loss": 0.3506844639778137, "rewards/accuracies": 1.0, "rewards/chosen": -0.0003716647333931178, "rewards/margins": 0.20549941062927246, "rewards/rejected": -0.2058710753917694, "step": 4612 }, { "epoch": 3.190179806362379, "grad_norm": 8.323124885559082, "learning_rate": 3.78323344090979e-05, "log_odds_chosen": 9.50680160522461, "log_odds_ratio": -0.00012267159763723612, "logits/chosen": -0.42881646752357483, "logits/rejected": -0.4316210150718689, "logps/chosen": -0.00014182465383782983, "logps/rejected": -1.0632882118225098, "loss": 1.4954, "nll_loss": 0.3738468587398529, "rewards/accuracies": 1.0, "rewards/chosen": -1.4182465747580864e-05, "rewards/margins": 0.10631464421749115, "rewards/rejected": -0.1063288226723671, "step": 4613 }, { "epoch": 3.190871369294606, "grad_norm": 7.727511405944824, "learning_rate": 3.7828492392807745e-05, "log_odds_chosen": 6.572525501251221, "log_odds_ratio": -0.040850505232810974, "logits/chosen": -0.5981727242469788, "logits/rejected": -0.7035253047943115, "logps/chosen": -0.012225775048136711, "logps/rejected": -0.6517828106880188, "loss": 1.4826, "nll_loss": 0.3665538430213928, "rewards/accuracies": 1.0, "rewards/chosen": -0.0012225775280967355, "rewards/margins": 0.06395570933818817, "rewards/rejected": -0.0651782900094986, "step": 4614 }, { "epoch": 3.191562932226833, "grad_norm": 6.5258660316467285, "learning_rate": 3.7824650376517604e-05, "log_odds_chosen": 9.075474739074707, "log_odds_ratio": -0.00019371425150893629, "logits/chosen": -0.7500657439231873, "logits/rejected": -0.8560510277748108, "logps/chosen": -0.009036424569785595, "logps/rejected": -2.4027295112609863, "loss": 2.0085, "nll_loss": 0.5021045207977295, "rewards/accuracies": 1.0, "rewards/chosen": -0.0009036423871293664, "rewards/margins": 0.23936930298805237, "rewards/rejected": -0.24027293920516968, "step": 4615 }, { "epoch": 3.1922544951590597, "grad_norm": 7.792971611022949, "learning_rate": 3.782080836022745e-05, "log_odds_chosen": 9.568330764770508, "log_odds_ratio": -0.0003454481775406748, "logits/chosen": -0.705230712890625, "logits/rejected": -0.7632952332496643, "logps/chosen": -0.00021943078900221735, "logps/rejected": -1.222402811050415, "loss": 1.8451, "nll_loss": 0.4612407982349396, "rewards/accuracies": 1.0, "rewards/chosen": -2.1943080355413258e-05, "rewards/margins": 0.12221834063529968, "rewards/rejected": -0.12224028259515762, "step": 4616 }, { "epoch": 3.1929460580912865, "grad_norm": 8.6287260055542, "learning_rate": 3.78169663439373e-05, "log_odds_chosen": 8.9024076461792, "log_odds_ratio": -0.029390254989266396, "logits/chosen": -0.546991765499115, "logits/rejected": -0.5764098167419434, "logps/chosen": -0.13760261237621307, "logps/rejected": -2.3649721145629883, "loss": 1.7657, "nll_loss": 0.43848732113838196, "rewards/accuracies": 1.0, "rewards/chosen": -0.013760262168943882, "rewards/margins": 0.22273695468902588, "rewards/rejected": -0.23649722337722778, "step": 4617 }, { "epoch": 3.1936376210235133, "grad_norm": 8.359898567199707, "learning_rate": 3.7813124327647155e-05, "log_odds_chosen": 7.723917484283447, "log_odds_ratio": -0.30947861075401306, "logits/chosen": -0.6778655648231506, "logits/rejected": -0.6863540410995483, "logps/chosen": -0.04144514724612236, "logps/rejected": -1.8415319919586182, "loss": 1.5114, "nll_loss": 0.3469085991382599, "rewards/accuracies": 0.875, "rewards/chosen": -0.004144514445215464, "rewards/margins": 0.1800086796283722, "rewards/rejected": -0.18415319919586182, "step": 4618 }, { "epoch": 3.19432918395574, "grad_norm": 6.584410667419434, "learning_rate": 3.7809282311357e-05, "log_odds_chosen": 9.380361557006836, "log_odds_ratio": -0.00014648567594122142, "logits/chosen": -0.2656836211681366, "logits/rejected": -0.38636136054992676, "logps/chosen": -0.000417731876950711, "logps/rejected": -1.453555703163147, "loss": 1.4629, "nll_loss": 0.3657173216342926, "rewards/accuracies": 1.0, "rewards/chosen": -4.177319351583719e-05, "rewards/margins": 0.1453137993812561, "rewards/rejected": -0.14535556733608246, "step": 4619 }, { "epoch": 3.195020746887967, "grad_norm": 9.390706062316895, "learning_rate": 3.780544029506685e-05, "log_odds_chosen": 7.611919403076172, "log_odds_ratio": -0.09439986199140549, "logits/chosen": -0.7868224382400513, "logits/rejected": -0.7948030829429626, "logps/chosen": -0.02480524592101574, "logps/rejected": -1.5412051677703857, "loss": 2.0887, "nll_loss": 0.5127406120300293, "rewards/accuracies": 0.875, "rewards/chosen": -0.0024805248249322176, "rewards/margins": 0.15163999795913696, "rewards/rejected": -0.1541205197572708, "step": 4620 }, { "epoch": 3.195712309820194, "grad_norm": 7.162692070007324, "learning_rate": 3.7801598278776705e-05, "log_odds_chosen": 8.350488662719727, "log_odds_ratio": -0.0006202237564139068, "logits/chosen": -0.5837564468383789, "logits/rejected": -0.596234917640686, "logps/chosen": -0.0023574563674628735, "logps/rejected": -1.5767695903778076, "loss": 1.3595, "nll_loss": 0.33981087803840637, "rewards/accuracies": 1.0, "rewards/chosen": -0.00023574565420858562, "rewards/margins": 0.15744122862815857, "rewards/rejected": -0.15767696499824524, "step": 4621 }, { "epoch": 3.1964038727524207, "grad_norm": 9.734930992126465, "learning_rate": 3.779775626248656e-05, "log_odds_chosen": 8.387091636657715, "log_odds_ratio": -0.07301833480596542, "logits/chosen": -0.7368804216384888, "logits/rejected": -0.8132337331771851, "logps/chosen": -0.028411580249667168, "logps/rejected": -1.6419252157211304, "loss": 2.2744, "nll_loss": 0.5613013505935669, "rewards/accuracies": 1.0, "rewards/chosen": -0.0028411580715328455, "rewards/margins": 0.16135136783123016, "rewards/rejected": -0.16419252753257751, "step": 4622 }, { "epoch": 3.1970954356846475, "grad_norm": 10.370285987854004, "learning_rate": 3.7793914246196403e-05, "log_odds_chosen": 10.116138458251953, "log_odds_ratio": -6.656107871094719e-05, "logits/chosen": -0.6933034062385559, "logits/rejected": -0.7805424928665161, "logps/chosen": -0.00021460730931721628, "logps/rejected": -1.8299367427825928, "loss": 2.3349, "nll_loss": 0.5837261080741882, "rewards/accuracies": 1.0, "rewards/chosen": -2.1460733478306793e-05, "rewards/margins": 0.18297219276428223, "rewards/rejected": -0.18299366533756256, "step": 4623 }, { "epoch": 3.1977869986168743, "grad_norm": 7.127041339874268, "learning_rate": 3.779007222990626e-05, "log_odds_chosen": 9.852951049804688, "log_odds_ratio": -0.00026556866941973567, "logits/chosen": -0.4125402271747589, "logits/rejected": -0.44134292006492615, "logps/chosen": -0.004293524660170078, "logps/rejected": -2.5027565956115723, "loss": 1.6023, "nll_loss": 0.4005424678325653, "rewards/accuracies": 1.0, "rewards/chosen": -0.0004293524834793061, "rewards/margins": 0.24984632432460785, "rewards/rejected": -0.2502756714820862, "step": 4624 }, { "epoch": 3.198478561549101, "grad_norm": 8.07728099822998, "learning_rate": 3.778623021361611e-05, "log_odds_chosen": 8.688798904418945, "log_odds_ratio": -0.0003031464875675738, "logits/chosen": -0.6191985011100769, "logits/rejected": -0.6176548004150391, "logps/chosen": -0.0016528278356418014, "logps/rejected": -1.2181322574615479, "loss": 1.6133, "nll_loss": 0.40328437089920044, "rewards/accuracies": 1.0, "rewards/chosen": -0.00016528279229532927, "rewards/margins": 0.12164793908596039, "rewards/rejected": -0.12181322276592255, "step": 4625 }, { "epoch": 3.199170124481328, "grad_norm": 9.304462432861328, "learning_rate": 3.778238819732596e-05, "log_odds_chosen": 8.981358528137207, "log_odds_ratio": -0.0006013654638081789, "logits/chosen": -0.578431248664856, "logits/rejected": -0.6131760478019714, "logps/chosen": -0.008758382871747017, "logps/rejected": -2.5303421020507812, "loss": 1.6803, "nll_loss": 0.42000359296798706, "rewards/accuracies": 1.0, "rewards/chosen": -0.0008758382173255086, "rewards/margins": 0.25215837359428406, "rewards/rejected": -0.25303423404693604, "step": 4626 }, { "epoch": 3.199861687413555, "grad_norm": 11.000825881958008, "learning_rate": 3.777854618103581e-05, "log_odds_chosen": 8.437768936157227, "log_odds_ratio": -0.0017144496086984873, "logits/chosen": -0.2553323805332184, "logits/rejected": -0.2956623435020447, "logps/chosen": -0.0262621957808733, "logps/rejected": -1.871435523033142, "loss": 1.5366, "nll_loss": 0.38397151231765747, "rewards/accuracies": 1.0, "rewards/chosen": -0.002626219531521201, "rewards/margins": 0.18451733887195587, "rewards/rejected": -0.18714354932308197, "step": 4627 }, { "epoch": 3.2005532503457816, "grad_norm": 8.009211540222168, "learning_rate": 3.777470416474566e-05, "log_odds_chosen": 8.235366821289062, "log_odds_ratio": -0.0006904486217536032, "logits/chosen": -0.5074923038482666, "logits/rejected": -0.5455666184425354, "logps/chosen": -0.015887683257460594, "logps/rejected": -1.4512051343917847, "loss": 1.4743, "nll_loss": 0.3685183823108673, "rewards/accuracies": 1.0, "rewards/chosen": -0.0015887684421613812, "rewards/margins": 0.14353173971176147, "rewards/rejected": -0.1451205164194107, "step": 4628 }, { "epoch": 3.2012448132780085, "grad_norm": 10.77706241607666, "learning_rate": 3.777086214845551e-05, "log_odds_chosen": 8.013542175292969, "log_odds_ratio": -0.001794246258214116, "logits/chosen": -0.5844020843505859, "logits/rejected": -0.6350750923156738, "logps/chosen": -0.008887016214430332, "logps/rejected": -1.4342986345291138, "loss": 1.9349, "nll_loss": 0.4835505485534668, "rewards/accuracies": 1.0, "rewards/chosen": -0.0008887016447260976, "rewards/margins": 0.14254117012023926, "rewards/rejected": -0.14342986047267914, "step": 4629 }, { "epoch": 3.2019363762102353, "grad_norm": 9.606144905090332, "learning_rate": 3.7767020132165364e-05, "log_odds_chosen": 8.354914665222168, "log_odds_ratio": -0.005320781376212835, "logits/chosen": -0.3048575818538666, "logits/rejected": -0.3707965612411499, "logps/chosen": -0.03981057181954384, "logps/rejected": -1.787867784500122, "loss": 1.398, "nll_loss": 0.3489583134651184, "rewards/accuracies": 1.0, "rewards/chosen": -0.003981057554483414, "rewards/margins": 0.17480574548244476, "rewards/rejected": -0.17878679931163788, "step": 4630 }, { "epoch": 3.202627939142462, "grad_norm": 9.291351318359375, "learning_rate": 3.7763178115875216e-05, "log_odds_chosen": 10.094281196594238, "log_odds_ratio": -7.057151378830895e-05, "logits/chosen": -0.5524444580078125, "logits/rejected": -0.6190738081932068, "logps/chosen": -0.00044838193571195006, "logps/rejected": -2.0691215991973877, "loss": 2.091, "nll_loss": 0.5227524042129517, "rewards/accuracies": 1.0, "rewards/chosen": -4.483819066081196e-05, "rewards/margins": 0.20686733722686768, "rewards/rejected": -0.20691215991973877, "step": 4631 }, { "epoch": 3.203319502074689, "grad_norm": 7.177628040313721, "learning_rate": 3.775933609958506e-05, "log_odds_chosen": 6.744282245635986, "log_odds_ratio": -0.07658500224351883, "logits/chosen": -0.2799968123435974, "logits/rejected": -0.276175320148468, "logps/chosen": -0.06268740445375443, "logps/rejected": -1.8641242980957031, "loss": 1.4837, "nll_loss": 0.3632669448852539, "rewards/accuracies": 1.0, "rewards/chosen": -0.006268740631639957, "rewards/margins": 0.18014369904994965, "rewards/rejected": -0.18641243875026703, "step": 4632 }, { "epoch": 3.204011065006916, "grad_norm": 10.223982810974121, "learning_rate": 3.7755494083294914e-05, "log_odds_chosen": 5.96131706237793, "log_odds_ratio": -0.437809556722641, "logits/chosen": -0.4530424475669861, "logits/rejected": -0.4831022024154663, "logps/chosen": -0.05227883160114288, "logps/rejected": -0.825284481048584, "loss": 2.3553, "nll_loss": 0.545049250125885, "rewards/accuracies": 0.75, "rewards/chosen": -0.005227882880717516, "rewards/margins": 0.07730056345462799, "rewards/rejected": -0.08252844959497452, "step": 4633 }, { "epoch": 3.2047026279391426, "grad_norm": 7.440293788909912, "learning_rate": 3.775165206700477e-05, "log_odds_chosen": 7.756006717681885, "log_odds_ratio": -0.1270664632320404, "logits/chosen": -0.7057449221611023, "logits/rejected": -0.7199209332466125, "logps/chosen": -0.04400571063160896, "logps/rejected": -1.755561113357544, "loss": 1.4132, "nll_loss": 0.3405888080596924, "rewards/accuracies": 1.0, "rewards/chosen": -0.004400571342557669, "rewards/margins": 0.17115554213523865, "rewards/rejected": -0.17555610835552216, "step": 4634 }, { "epoch": 3.2053941908713695, "grad_norm": 121.2735366821289, "learning_rate": 3.774781005071462e-05, "log_odds_chosen": 7.710501670837402, "log_odds_ratio": -0.44360071420669556, "logits/chosen": -0.39047908782958984, "logits/rejected": -0.46261516213417053, "logps/chosen": -0.06811662018299103, "logps/rejected": -1.5281715393066406, "loss": 1.5479, "nll_loss": 0.342612624168396, "rewards/accuracies": 0.875, "rewards/chosen": -0.0068116625770926476, "rewards/margins": 0.1460055112838745, "rewards/rejected": -0.15281715989112854, "step": 4635 }, { "epoch": 3.2060857538035963, "grad_norm": 7.653830528259277, "learning_rate": 3.7743968034424465e-05, "log_odds_chosen": 9.797605514526367, "log_odds_ratio": -0.0001808924862416461, "logits/chosen": -0.7511324286460876, "logits/rejected": -0.790381908416748, "logps/chosen": -0.0045016733929514885, "logps/rejected": -2.013901948928833, "loss": 1.6972, "nll_loss": 0.42428848147392273, "rewards/accuracies": 1.0, "rewards/chosen": -0.0004501673683989793, "rewards/margins": 0.20094002783298492, "rewards/rejected": -0.20139019191265106, "step": 4636 }, { "epoch": 3.206777316735823, "grad_norm": 6.133581161499023, "learning_rate": 3.774012601813432e-05, "log_odds_chosen": 7.885909080505371, "log_odds_ratio": -0.005818231031298637, "logits/chosen": -0.3401247560977936, "logits/rejected": -0.3749280273914337, "logps/chosen": -0.01006716676056385, "logps/rejected": -1.574007272720337, "loss": 1.4664, "nll_loss": 0.3660276234149933, "rewards/accuracies": 1.0, "rewards/chosen": -0.0010067166294902563, "rewards/margins": 0.15639400482177734, "rewards/rejected": -0.1574007272720337, "step": 4637 }, { "epoch": 3.20746887966805, "grad_norm": 7.458352565765381, "learning_rate": 3.773628400184417e-05, "log_odds_chosen": 7.854743003845215, "log_odds_ratio": -0.07570353895425797, "logits/chosen": -0.5678203105926514, "logits/rejected": -0.5638433694839478, "logps/chosen": -0.0164833664894104, "logps/rejected": -1.0614879131317139, "loss": 1.7779, "nll_loss": 0.43691444396972656, "rewards/accuracies": 1.0, "rewards/chosen": -0.0016483367653563619, "rewards/margins": 0.10450047254562378, "rewards/rejected": -0.10614880919456482, "step": 4638 }, { "epoch": 3.2081604426002768, "grad_norm": 6.762449264526367, "learning_rate": 3.7732441985554015e-05, "log_odds_chosen": 9.169034957885742, "log_odds_ratio": -0.00012380752013996243, "logits/chosen": -0.2560634911060333, "logits/rejected": -0.2874855101108551, "logps/chosen": -0.00047212644130922854, "logps/rejected": -1.5481019020080566, "loss": 1.2975, "nll_loss": 0.32436853647232056, "rewards/accuracies": 1.0, "rewards/chosen": -4.721264849649742e-05, "rewards/margins": 0.15476298332214355, "rewards/rejected": -0.15481020510196686, "step": 4639 }, { "epoch": 3.2088520055325036, "grad_norm": 6.330355644226074, "learning_rate": 3.7728599969263875e-05, "log_odds_chosen": 9.285317420959473, "log_odds_ratio": -0.0011506613809615374, "logits/chosen": -0.3541565537452698, "logits/rejected": -0.34927690029144287, "logps/chosen": -0.006774414796382189, "logps/rejected": -2.2020339965820312, "loss": 1.4599, "nll_loss": 0.3648587465286255, "rewards/accuracies": 1.0, "rewards/chosen": -0.0006774414796382189, "rewards/margins": 0.21952593326568604, "rewards/rejected": -0.22020339965820312, "step": 4640 }, { "epoch": 3.2095435684647304, "grad_norm": 12.923638343811035, "learning_rate": 3.772475795297372e-05, "log_odds_chosen": 9.016387939453125, "log_odds_ratio": -0.0076195537112653255, "logits/chosen": -0.5307125449180603, "logits/rejected": -0.6310874819755554, "logps/chosen": -0.004158839583396912, "logps/rejected": -1.8606791496276855, "loss": 1.7386, "nll_loss": 0.4339001178741455, "rewards/accuracies": 1.0, "rewards/chosen": -0.00041588395833969116, "rewards/margins": 0.18565204739570618, "rewards/rejected": -0.18606792390346527, "step": 4641 }, { "epoch": 3.2102351313969573, "grad_norm": 6.46682071685791, "learning_rate": 3.772091593668357e-05, "log_odds_chosen": 8.791299819946289, "log_odds_ratio": -0.0005798639031127095, "logits/chosen": -0.37945863604545593, "logits/rejected": -0.31576138734817505, "logps/chosen": -0.00813287403434515, "logps/rejected": -1.8918553590774536, "loss": 1.5963, "nll_loss": 0.3990292549133301, "rewards/accuracies": 1.0, "rewards/chosen": -0.0008132873917929828, "rewards/margins": 0.18837225437164307, "rewards/rejected": -0.1891855150461197, "step": 4642 }, { "epoch": 3.210926694329184, "grad_norm": 10.52108097076416, "learning_rate": 3.7717073920393425e-05, "log_odds_chosen": 8.242277145385742, "log_odds_ratio": -0.009203329682350159, "logits/chosen": -0.21045435965061188, "logits/rejected": -0.2219468653202057, "logps/chosen": -0.015710486099123955, "logps/rejected": -1.6792577505111694, "loss": 1.8406, "nll_loss": 0.45923006534576416, "rewards/accuracies": 1.0, "rewards/chosen": -0.0015710486331954598, "rewards/margins": 0.1663547158241272, "rewards/rejected": -0.16792577505111694, "step": 4643 }, { "epoch": 3.211618257261411, "grad_norm": 9.322639465332031, "learning_rate": 3.771323190410328e-05, "log_odds_chosen": 8.99551010131836, "log_odds_ratio": -0.0010692543582990766, "logits/chosen": -0.7445977926254272, "logits/rejected": -0.706108808517456, "logps/chosen": -0.0009857756085693836, "logps/rejected": -1.4609174728393555, "loss": 1.4885, "nll_loss": 0.37202388048171997, "rewards/accuracies": 1.0, "rewards/chosen": -9.85775695880875e-05, "rewards/margins": 0.145993173122406, "rewards/rejected": -0.1460917592048645, "step": 4644 }, { "epoch": 3.2123098201936378, "grad_norm": 7.412604808807373, "learning_rate": 3.770938988781312e-05, "log_odds_chosen": 8.64638900756836, "log_odds_ratio": -0.08681802451610565, "logits/chosen": -0.6938418745994568, "logits/rejected": -0.6647101044654846, "logps/chosen": -0.01793830655515194, "logps/rejected": -1.8186700344085693, "loss": 0.9929, "nll_loss": 0.23953798413276672, "rewards/accuracies": 0.875, "rewards/chosen": -0.0017938308883458376, "rewards/margins": 0.18007317185401917, "rewards/rejected": -0.18186700344085693, "step": 4645 }, { "epoch": 3.2130013831258646, "grad_norm": 6.7649149894714355, "learning_rate": 3.7705547871522976e-05, "log_odds_chosen": 8.722943305969238, "log_odds_ratio": -0.0013751662336289883, "logits/chosen": -0.6181378364562988, "logits/rejected": -0.5605261325836182, "logps/chosen": -0.017265386879444122, "logps/rejected": -2.7568469047546387, "loss": 1.6982, "nll_loss": 0.4244130849838257, "rewards/accuracies": 1.0, "rewards/chosen": -0.0017265386413782835, "rewards/margins": 0.27395814657211304, "rewards/rejected": -0.2756847143173218, "step": 4646 }, { "epoch": 3.2136929460580914, "grad_norm": 9.90661907196045, "learning_rate": 3.770170585523283e-05, "log_odds_chosen": 8.376338005065918, "log_odds_ratio": -0.0018616068409755826, "logits/chosen": -0.4833725094795227, "logits/rejected": -0.5298304557800293, "logps/chosen": -0.016209768131375313, "logps/rejected": -1.9687637090682983, "loss": 1.4897, "nll_loss": 0.3722422420978546, "rewards/accuracies": 1.0, "rewards/chosen": -0.0016209769528359175, "rewards/margins": 0.19525539875030518, "rewards/rejected": -0.1968763917684555, "step": 4647 }, { "epoch": 3.2143845089903182, "grad_norm": 9.517404556274414, "learning_rate": 3.7697863838942674e-05, "log_odds_chosen": 9.411402702331543, "log_odds_ratio": -0.00038138747913762927, "logits/chosen": -0.8192919492721558, "logits/rejected": -0.7597867846488953, "logps/chosen": -0.02547089383006096, "logps/rejected": -2.002095937728882, "loss": 1.7269, "nll_loss": 0.431691974401474, "rewards/accuracies": 1.0, "rewards/chosen": -0.0025470894761383533, "rewards/margins": 0.19766250252723694, "rewards/rejected": -0.2002095878124237, "step": 4648 }, { "epoch": 3.215076071922545, "grad_norm": 4.255143165588379, "learning_rate": 3.769402182265253e-05, "log_odds_chosen": 8.469841003417969, "log_odds_ratio": -0.0006997321615926921, "logits/chosen": -0.44030725955963135, "logits/rejected": -0.4799768924713135, "logps/chosen": -0.01644117198884487, "logps/rejected": -1.5012691020965576, "loss": 1.9054, "nll_loss": 0.4762773811817169, "rewards/accuracies": 1.0, "rewards/chosen": -0.001644117059186101, "rewards/margins": 0.1484827995300293, "rewards/rejected": -0.15012691915035248, "step": 4649 }, { "epoch": 3.215767634854772, "grad_norm": 11.852595329284668, "learning_rate": 3.769017980636238e-05, "log_odds_chosen": 8.309383392333984, "log_odds_ratio": -0.04895387962460518, "logits/chosen": -0.5808581113815308, "logits/rejected": -0.6516112089157104, "logps/chosen": -0.010794704779982567, "logps/rejected": -1.5447827577590942, "loss": 1.8383, "nll_loss": 0.4546731412410736, "rewards/accuracies": 1.0, "rewards/chosen": -0.0010794706176966429, "rewards/margins": 0.1533988118171692, "rewards/rejected": -0.1544782817363739, "step": 4650 }, { "epoch": 3.2164591977869987, "grad_norm": 16.106447219848633, "learning_rate": 3.768633779007223e-05, "log_odds_chosen": 9.898414611816406, "log_odds_ratio": -8.67611524881795e-05, "logits/chosen": -0.503020703792572, "logits/rejected": -0.518354058265686, "logps/chosen": -0.000286134920315817, "logps/rejected": -1.6814404726028442, "loss": 1.6288, "nll_loss": 0.40719372034072876, "rewards/accuracies": 1.0, "rewards/chosen": -2.8613490940188058e-05, "rewards/margins": 0.16811543703079224, "rewards/rejected": -0.16814404726028442, "step": 4651 }, { "epoch": 3.2171507607192256, "grad_norm": 13.375833511352539, "learning_rate": 3.7682495773782084e-05, "log_odds_chosen": 8.774674415588379, "log_odds_ratio": -0.0004774326807819307, "logits/chosen": -0.5813500881195068, "logits/rejected": -0.640762448310852, "logps/chosen": -0.016906345263123512, "logps/rejected": -2.210822105407715, "loss": 2.4823, "nll_loss": 0.6205355525016785, "rewards/accuracies": 1.0, "rewards/chosen": -0.0016906345263123512, "rewards/margins": 0.2193915843963623, "rewards/rejected": -0.22108224034309387, "step": 4652 }, { "epoch": 3.2178423236514524, "grad_norm": 12.245010375976562, "learning_rate": 3.7678653757491936e-05, "log_odds_chosen": 7.981349945068359, "log_odds_ratio": -0.042398203164339066, "logits/chosen": -0.6212899684906006, "logits/rejected": -0.6401181221008301, "logps/chosen": -0.0034417440183460712, "logps/rejected": -1.4640731811523438, "loss": 1.0523, "nll_loss": 0.2588362991809845, "rewards/accuracies": 1.0, "rewards/chosen": -0.00034417444840073586, "rewards/margins": 0.14606314897537231, "rewards/rejected": -0.1464073210954666, "step": 4653 }, { "epoch": 3.2185338865836792, "grad_norm": 9.426918983459473, "learning_rate": 3.767481174120178e-05, "log_odds_chosen": 9.180564880371094, "log_odds_ratio": -0.004941493272781372, "logits/chosen": -0.8051036596298218, "logits/rejected": -0.8713425993919373, "logps/chosen": -0.006668214686214924, "logps/rejected": -2.0180726051330566, "loss": 2.0154, "nll_loss": 0.5033589601516724, "rewards/accuracies": 1.0, "rewards/chosen": -0.0006668214919045568, "rewards/margins": 0.20114043354988098, "rewards/rejected": -0.20180726051330566, "step": 4654 }, { "epoch": 3.219225449515906, "grad_norm": 10.532155990600586, "learning_rate": 3.7670969724911634e-05, "log_odds_chosen": 8.6746826171875, "log_odds_ratio": -0.0008860914967954159, "logits/chosen": -1.1926677227020264, "logits/rejected": -1.298346996307373, "logps/chosen": -0.009572312235832214, "logps/rejected": -1.7488539218902588, "loss": 1.5501, "nll_loss": 0.38743335008621216, "rewards/accuracies": 1.0, "rewards/chosen": -0.0009572312119416893, "rewards/margins": 0.1739281713962555, "rewards/rejected": -0.17488539218902588, "step": 4655 }, { "epoch": 3.219917012448133, "grad_norm": 7.555779457092285, "learning_rate": 3.7667127708621487e-05, "log_odds_chosen": 8.387954711914062, "log_odds_ratio": -0.0016320085851475596, "logits/chosen": -0.6773730516433716, "logits/rejected": -0.6358497738838196, "logps/chosen": -0.0013973293825984001, "logps/rejected": -1.4357331991195679, "loss": 1.094, "nll_loss": 0.2733432948589325, "rewards/accuracies": 1.0, "rewards/chosen": -0.00013973294699098915, "rewards/margins": 0.1434335708618164, "rewards/rejected": -0.1435733139514923, "step": 4656 }, { "epoch": 3.2206085753803597, "grad_norm": 9.341814041137695, "learning_rate": 3.766328569233133e-05, "log_odds_chosen": 7.2777605056762695, "log_odds_ratio": -0.05912681296467781, "logits/chosen": -0.7827059626579285, "logits/rejected": -0.7716456651687622, "logps/chosen": -0.02723333239555359, "logps/rejected": -1.409245252609253, "loss": 2.0224, "nll_loss": 0.4996985197067261, "rewards/accuracies": 1.0, "rewards/chosen": -0.002723333425819874, "rewards/margins": 0.13820120692253113, "rewards/rejected": -0.14092452824115753, "step": 4657 }, { "epoch": 3.2213001383125865, "grad_norm": 9.240910530090332, "learning_rate": 3.765944367604119e-05, "log_odds_chosen": 8.91317367553711, "log_odds_ratio": -0.00021192299027461559, "logits/chosen": -0.8595993518829346, "logits/rejected": -0.8856823444366455, "logps/chosen": -0.00036952694063074887, "logps/rejected": -0.9628509283065796, "loss": 2.6645, "nll_loss": 0.6660939455032349, "rewards/accuracies": 1.0, "rewards/chosen": -3.695269697345793e-05, "rewards/margins": 0.09624814242124557, "rewards/rejected": -0.09628509730100632, "step": 4658 }, { "epoch": 3.2219917012448134, "grad_norm": 9.384906768798828, "learning_rate": 3.765560165975104e-05, "log_odds_chosen": 9.198957443237305, "log_odds_ratio": -0.0007888744585216045, "logits/chosen": -0.6961106061935425, "logits/rejected": -0.809903621673584, "logps/chosen": -0.002150989603251219, "logps/rejected": -2.140223503112793, "loss": 2.2783, "nll_loss": 0.569500744342804, "rewards/accuracies": 1.0, "rewards/chosen": -0.0002150989748770371, "rewards/margins": 0.21380724012851715, "rewards/rejected": -0.21402233839035034, "step": 4659 }, { "epoch": 3.22268326417704, "grad_norm": 5.048548221588135, "learning_rate": 3.765175964346089e-05, "log_odds_chosen": 8.297301292419434, "log_odds_ratio": -0.002085586078464985, "logits/chosen": -0.19204308092594147, "logits/rejected": -0.17156429588794708, "logps/chosen": -0.0024641165509819984, "logps/rejected": -1.0932461023330688, "loss": 1.2116, "nll_loss": 0.3026840090751648, "rewards/accuracies": 1.0, "rewards/chosen": -0.00024641165509819984, "rewards/margins": 0.10907819867134094, "rewards/rejected": -0.10932460427284241, "step": 4660 }, { "epoch": 3.223374827109267, "grad_norm": 8.581104278564453, "learning_rate": 3.764791762717074e-05, "log_odds_chosen": 8.011213302612305, "log_odds_ratio": -0.015043283812701702, "logits/chosen": -0.7729774713516235, "logits/rejected": -0.7363721132278442, "logps/chosen": -0.004782415926456451, "logps/rejected": -0.9363967180252075, "loss": 1.5382, "nll_loss": 0.3830445110797882, "rewards/accuracies": 1.0, "rewards/chosen": -0.0004782416217494756, "rewards/margins": 0.0931614339351654, "rewards/rejected": -0.09363967180252075, "step": 4661 }, { "epoch": 3.224066390041494, "grad_norm": 9.607839584350586, "learning_rate": 3.7644075610880594e-05, "log_odds_chosen": 9.721281051635742, "log_odds_ratio": -0.00016697979299351573, "logits/chosen": -0.5198001861572266, "logits/rejected": -0.6026105284690857, "logps/chosen": -0.0010092060547322035, "logps/rejected": -1.845146894454956, "loss": 1.4448, "nll_loss": 0.3611833155155182, "rewards/accuracies": 1.0, "rewards/chosen": -0.00010092060983879492, "rewards/margins": 0.18441376090049744, "rewards/rejected": -0.18451470136642456, "step": 4662 }, { "epoch": 3.2247579529737207, "grad_norm": 9.854679107666016, "learning_rate": 3.764023359459044e-05, "log_odds_chosen": 8.785548210144043, "log_odds_ratio": -0.0028546079993247986, "logits/chosen": -0.47076669335365295, "logits/rejected": -0.5453388094902039, "logps/chosen": -0.002330200746655464, "logps/rejected": -1.5070253610610962, "loss": 1.6038, "nll_loss": 0.40066125988960266, "rewards/accuracies": 1.0, "rewards/chosen": -0.00023302006593439728, "rewards/margins": 0.15046951174736023, "rewards/rejected": -0.15070253610610962, "step": 4663 }, { "epoch": 3.2254495159059475, "grad_norm": 13.590742111206055, "learning_rate": 3.763639157830029e-05, "log_odds_chosen": 7.963131904602051, "log_odds_ratio": -0.0036431909538805485, "logits/chosen": -0.0865345448255539, "logits/rejected": -0.1931847631931305, "logps/chosen": -0.021115001291036606, "logps/rejected": -1.798100471496582, "loss": 1.7865, "nll_loss": 0.4462681710720062, "rewards/accuracies": 1.0, "rewards/chosen": -0.0021115001291036606, "rewards/margins": 0.17769855260849, "rewards/rejected": -0.1798100471496582, "step": 4664 }, { "epoch": 3.2261410788381744, "grad_norm": 5.58372163772583, "learning_rate": 3.7632549562010145e-05, "log_odds_chosen": 9.053085327148438, "log_odds_ratio": -0.0012438575504347682, "logits/chosen": -0.5867530107498169, "logits/rejected": -0.7836533784866333, "logps/chosen": -0.01469984371215105, "logps/rejected": -1.4102587699890137, "loss": 1.4402, "nll_loss": 0.3599216938018799, "rewards/accuracies": 1.0, "rewards/chosen": -0.0014699844177812338, "rewards/margins": 0.1395559012889862, "rewards/rejected": -0.14102588593959808, "step": 4665 }, { "epoch": 3.226832641770401, "grad_norm": 12.05872631072998, "learning_rate": 3.762870754571999e-05, "log_odds_chosen": 7.115346431732178, "log_odds_ratio": -0.04925874248147011, "logits/chosen": -0.7009913921356201, "logits/rejected": -0.6906958222389221, "logps/chosen": -0.032538898289203644, "logps/rejected": -1.2821375131607056, "loss": 2.1572, "nll_loss": 0.5343620777130127, "rewards/accuracies": 1.0, "rewards/chosen": -0.003253889735788107, "rewards/margins": 0.12495986372232437, "rewards/rejected": -0.12821374833583832, "step": 4666 }, { "epoch": 3.227524204702628, "grad_norm": 10.255760192871094, "learning_rate": 3.762486552942985e-05, "log_odds_chosen": 9.634766578674316, "log_odds_ratio": -0.007561844773590565, "logits/chosen": -0.41329842805862427, "logits/rejected": -0.42982420325279236, "logps/chosen": -0.0032992465421557426, "logps/rejected": -2.2600021362304688, "loss": 1.6797, "nll_loss": 0.4191625416278839, "rewards/accuracies": 1.0, "rewards/chosen": -0.00032992466003634036, "rewards/margins": 0.22567029297351837, "rewards/rejected": -0.22600021958351135, "step": 4667 }, { "epoch": 3.228215767634855, "grad_norm": 8.777718544006348, "learning_rate": 3.7621023513139696e-05, "log_odds_chosen": 8.005030632019043, "log_odds_ratio": -0.001454401994124055, "logits/chosen": -0.6022816300392151, "logits/rejected": -0.6292502880096436, "logps/chosen": -0.029774591326713562, "logps/rejected": -2.4813647270202637, "loss": 1.5148, "nll_loss": 0.37855100631713867, "rewards/accuracies": 1.0, "rewards/chosen": -0.0029774592258036137, "rewards/margins": 0.24515900015830994, "rewards/rejected": -0.2481364607810974, "step": 4668 }, { "epoch": 3.2289073305670817, "grad_norm": 6.01991605758667, "learning_rate": 3.761718149684955e-05, "log_odds_chosen": 7.310305595397949, "log_odds_ratio": -0.23577818274497986, "logits/chosen": -0.6367329955101013, "logits/rejected": -0.6445616483688354, "logps/chosen": -0.03664974868297577, "logps/rejected": -1.2936198711395264, "loss": 1.756, "nll_loss": 0.4154262840747833, "rewards/accuracies": 0.875, "rewards/chosen": -0.0036649745889008045, "rewards/margins": 0.12569700181484222, "rewards/rejected": -0.12936197221279144, "step": 4669 }, { "epoch": 3.2295988934993085, "grad_norm": 11.206335067749023, "learning_rate": 3.76133394805594e-05, "log_odds_chosen": 8.69202709197998, "log_odds_ratio": -0.003309912048280239, "logits/chosen": -0.06795523315668106, "logits/rejected": -0.1211337149143219, "logps/chosen": -0.0026320209726691246, "logps/rejected": -1.8271875381469727, "loss": 1.6936, "nll_loss": 0.42306768894195557, "rewards/accuracies": 1.0, "rewards/chosen": -0.00026320209144614637, "rewards/margins": 0.18245553970336914, "rewards/rejected": -0.18271872401237488, "step": 4670 }, { "epoch": 3.2302904564315353, "grad_norm": 8.706618309020996, "learning_rate": 3.760949746426925e-05, "log_odds_chosen": 8.893049240112305, "log_odds_ratio": -0.0005472367629408836, "logits/chosen": -0.8280361890792847, "logits/rejected": -0.839066207408905, "logps/chosen": -0.019564703106880188, "logps/rejected": -2.181659698486328, "loss": 2.0402, "nll_loss": 0.510004997253418, "rewards/accuracies": 1.0, "rewards/chosen": -0.001956470310688019, "rewards/margins": 0.2162095010280609, "rewards/rejected": -0.21816599369049072, "step": 4671 }, { "epoch": 3.230982019363762, "grad_norm": 7.39252233505249, "learning_rate": 3.76056554479791e-05, "log_odds_chosen": 9.779670715332031, "log_odds_ratio": -0.0002500510308891535, "logits/chosen": -0.37103089690208435, "logits/rejected": -0.3734338879585266, "logps/chosen": -0.004838225431740284, "logps/rejected": -1.9937759637832642, "loss": 1.261, "nll_loss": 0.31522834300994873, "rewards/accuracies": 1.0, "rewards/chosen": -0.0004838225431740284, "rewards/margins": 0.19889378547668457, "rewards/rejected": -0.1993776112794876, "step": 4672 }, { "epoch": 3.231673582295989, "grad_norm": 7.205052852630615, "learning_rate": 3.760181343168895e-05, "log_odds_chosen": 8.463676452636719, "log_odds_ratio": -0.0005333481822162867, "logits/chosen": -0.3758777678012848, "logits/rejected": -0.38880136609077454, "logps/chosen": -0.0009052710374817252, "logps/rejected": -1.089587688446045, "loss": 1.5308, "nll_loss": 0.3826429843902588, "rewards/accuracies": 1.0, "rewards/chosen": -9.052710811374709e-05, "rewards/margins": 0.10886824131011963, "rewards/rejected": -0.10895876586437225, "step": 4673 }, { "epoch": 3.232365145228216, "grad_norm": 7.246854305267334, "learning_rate": 3.7597971415398803e-05, "log_odds_chosen": 8.98906135559082, "log_odds_ratio": -0.00046163221122696996, "logits/chosen": -0.6586583256721497, "logits/rejected": -0.6791250705718994, "logps/chosen": -0.0069456384517252445, "logps/rejected": -2.028202772140503, "loss": 1.3642, "nll_loss": 0.3409973084926605, "rewards/accuracies": 1.0, "rewards/chosen": -0.0006945638451725245, "rewards/margins": 0.20212571322917938, "rewards/rejected": -0.20282027125358582, "step": 4674 }, { "epoch": 3.2330567081604427, "grad_norm": 14.615860939025879, "learning_rate": 3.759412939910865e-05, "log_odds_chosen": 9.896066665649414, "log_odds_ratio": -5.868840526090935e-05, "logits/chosen": -0.7222040891647339, "logits/rejected": -0.8251558542251587, "logps/chosen": -0.00048558454727753997, "logps/rejected": -1.741550326347351, "loss": 3.3383, "nll_loss": 0.8345783352851868, "rewards/accuracies": 1.0, "rewards/chosen": -4.8558453272562474e-05, "rewards/margins": 0.17410646378993988, "rewards/rejected": -0.17415504157543182, "step": 4675 }, { "epoch": 3.2337482710926695, "grad_norm": 5.720992088317871, "learning_rate": 3.759028738281851e-05, "log_odds_chosen": 9.12053394317627, "log_odds_ratio": -0.01311265118420124, "logits/chosen": -0.35370558500289917, "logits/rejected": -0.44249600172042847, "logps/chosen": -0.01543546374887228, "logps/rejected": -2.2226688861846924, "loss": 1.205, "nll_loss": 0.2999301552772522, "rewards/accuracies": 1.0, "rewards/chosen": -0.0015435463283210993, "rewards/margins": 0.22072333097457886, "rewards/rejected": -0.22226688265800476, "step": 4676 }, { "epoch": 3.2344398340248963, "grad_norm": 8.618167877197266, "learning_rate": 3.7586445366528354e-05, "log_odds_chosen": 9.140830993652344, "log_odds_ratio": -0.00018256741168443114, "logits/chosen": -0.7402679920196533, "logits/rejected": -0.766697347164154, "logps/chosen": -0.009820668958127499, "logps/rejected": -1.8408517837524414, "loss": 2.3509, "nll_loss": 0.587706446647644, "rewards/accuracies": 1.0, "rewards/chosen": -0.000982066965661943, "rewards/margins": 0.18310311436653137, "rewards/rejected": -0.1840851902961731, "step": 4677 }, { "epoch": 3.235131396957123, "grad_norm": 10.047858238220215, "learning_rate": 3.7582603350238206e-05, "log_odds_chosen": 9.806346893310547, "log_odds_ratio": -7.717790140304714e-05, "logits/chosen": -0.7226029634475708, "logits/rejected": -0.7571486234664917, "logps/chosen": -0.0005480307736434042, "logps/rejected": -1.879559874534607, "loss": 1.7701, "nll_loss": 0.4425126910209656, "rewards/accuracies": 1.0, "rewards/chosen": -5.480307663674466e-05, "rewards/margins": 0.18790119886398315, "rewards/rejected": -0.18795599043369293, "step": 4678 }, { "epoch": 3.23582295988935, "grad_norm": 6.668112277984619, "learning_rate": 3.757876133394806e-05, "log_odds_chosen": 7.855587005615234, "log_odds_ratio": -0.0021889405325055122, "logits/chosen": -0.7883478403091431, "logits/rejected": -0.8201271891593933, "logps/chosen": -0.0025607063435018063, "logps/rejected": -1.2306946516036987, "loss": 1.9619, "nll_loss": 0.49024853110313416, "rewards/accuracies": 1.0, "rewards/chosen": -0.0002560706343501806, "rewards/margins": 0.1228134036064148, "rewards/rejected": -0.12306946516036987, "step": 4679 }, { "epoch": 3.236514522821577, "grad_norm": 12.489912986755371, "learning_rate": 3.757491931765791e-05, "log_odds_chosen": 7.957180023193359, "log_odds_ratio": -0.06421130150556564, "logits/chosen": -0.35384392738342285, "logits/rejected": -0.4528539180755615, "logps/chosen": -0.012504791840910912, "logps/rejected": -1.523071050643921, "loss": 1.6993, "nll_loss": 0.4184127449989319, "rewards/accuracies": 1.0, "rewards/chosen": -0.0012504790211096406, "rewards/margins": 0.15105663239955902, "rewards/rejected": -0.15230710804462433, "step": 4680 }, { "epoch": 3.2372060857538036, "grad_norm": 5.42405366897583, "learning_rate": 3.757107730136776e-05, "log_odds_chosen": 7.128323554992676, "log_odds_ratio": -0.10372748225927353, "logits/chosen": -0.41413596272468567, "logits/rejected": -0.3195302486419678, "logps/chosen": -0.03096316009759903, "logps/rejected": -1.648804783821106, "loss": 1.1236, "nll_loss": 0.2705293595790863, "rewards/accuracies": 1.0, "rewards/chosen": -0.0030963162425905466, "rewards/margins": 0.16178417205810547, "rewards/rejected": -0.16488048434257507, "step": 4681 }, { "epoch": 3.2378976486860305, "grad_norm": 14.078836441040039, "learning_rate": 3.756723528507761e-05, "log_odds_chosen": 8.796689987182617, "log_odds_ratio": -0.0006093117990531027, "logits/chosen": -0.5976904034614563, "logits/rejected": -0.6900879144668579, "logps/chosen": -0.0045931520871818066, "logps/rejected": -1.6781189441680908, "loss": 1.8045, "nll_loss": 0.45107364654541016, "rewards/accuracies": 1.0, "rewards/chosen": -0.00045931522618047893, "rewards/margins": 0.1673526018857956, "rewards/rejected": -0.16781191527843475, "step": 4682 }, { "epoch": 3.2385892116182573, "grad_norm": 14.267437934875488, "learning_rate": 3.756339326878746e-05, "log_odds_chosen": 9.33151626586914, "log_odds_ratio": -0.008812183514237404, "logits/chosen": -0.42314353585243225, "logits/rejected": -0.6270802021026611, "logps/chosen": -0.008810743689537048, "logps/rejected": -2.262091875076294, "loss": 2.0369, "nll_loss": 0.5083341002464294, "rewards/accuracies": 1.0, "rewards/chosen": -0.0008810744038783014, "rewards/margins": 0.22532811760902405, "rewards/rejected": -0.22620920836925507, "step": 4683 }, { "epoch": 3.239280774550484, "grad_norm": 6.2066779136657715, "learning_rate": 3.755955125249731e-05, "log_odds_chosen": 9.290958404541016, "log_odds_ratio": -0.0004162929253652692, "logits/chosen": -0.5276498794555664, "logits/rejected": -0.6197217702865601, "logps/chosen": -0.002184888580814004, "logps/rejected": -1.5688152313232422, "loss": 1.398, "nll_loss": 0.34945833683013916, "rewards/accuracies": 1.0, "rewards/chosen": -0.00021848888718523085, "rewards/margins": 0.15666301548480988, "rewards/rejected": -0.15688151121139526, "step": 4684 }, { "epoch": 3.239972337482711, "grad_norm": 14.069849014282227, "learning_rate": 3.755570923620717e-05, "log_odds_chosen": 7.24415397644043, "log_odds_ratio": -0.2386513352394104, "logits/chosen": -0.6899997591972351, "logits/rejected": -0.7094993591308594, "logps/chosen": -0.034362297505140305, "logps/rejected": -1.2999882698059082, "loss": 1.9539, "nll_loss": 0.4645982086658478, "rewards/accuracies": 0.875, "rewards/chosen": -0.0034362301230430603, "rewards/margins": 0.12656259536743164, "rewards/rejected": -0.1299988329410553, "step": 4685 }, { "epoch": 3.240663900414938, "grad_norm": 9.512862205505371, "learning_rate": 3.755186721991701e-05, "log_odds_chosen": 7.003537178039551, "log_odds_ratio": -0.06596534699201584, "logits/chosen": -0.7419699430465698, "logits/rejected": -0.7511963248252869, "logps/chosen": -0.024283548817038536, "logps/rejected": -1.1892660856246948, "loss": 2.5285, "nll_loss": 0.6255288124084473, "rewards/accuracies": 1.0, "rewards/chosen": -0.0024283546954393387, "rewards/margins": 0.11649825423955917, "rewards/rejected": -0.11892661452293396, "step": 4686 }, { "epoch": 3.2413554633471646, "grad_norm": 11.929668426513672, "learning_rate": 3.7548025203626865e-05, "log_odds_chosen": 9.594942092895508, "log_odds_ratio": -0.00027311124722473323, "logits/chosen": -0.5834130048751831, "logits/rejected": -0.660904049873352, "logps/chosen": -0.007404697127640247, "logps/rejected": -2.3006372451782227, "loss": 1.9043, "nll_loss": 0.47605520486831665, "rewards/accuracies": 1.0, "rewards/chosen": -0.0007404697826132178, "rewards/margins": 0.22932323813438416, "rewards/rejected": -0.23006370663642883, "step": 4687 }, { "epoch": 3.2420470262793915, "grad_norm": 7.606420516967773, "learning_rate": 3.754418318733672e-05, "log_odds_chosen": 8.018352508544922, "log_odds_ratio": -0.033401452004909515, "logits/chosen": -0.433298796415329, "logits/rejected": -0.4908546209335327, "logps/chosen": -0.019605904817581177, "logps/rejected": -1.8726167678833008, "loss": 1.1228, "nll_loss": 0.2773599624633789, "rewards/accuracies": 1.0, "rewards/chosen": -0.0019605904817581177, "rewards/margins": 0.18530109524726868, "rewards/rejected": -0.1872616857290268, "step": 4688 }, { "epoch": 3.2427385892116183, "grad_norm": 7.604212760925293, "learning_rate": 3.754034117104657e-05, "log_odds_chosen": 7.7977447509765625, "log_odds_ratio": -0.02325718104839325, "logits/chosen": -0.2695230543613434, "logits/rejected": -0.33405792713165283, "logps/chosen": -0.009040276519954205, "logps/rejected": -1.8517611026763916, "loss": 1.3656, "nll_loss": 0.3390858471393585, "rewards/accuracies": 1.0, "rewards/chosen": -0.0009040277218446136, "rewards/margins": 0.18427208065986633, "rewards/rejected": -0.18517610430717468, "step": 4689 }, { "epoch": 3.243430152143845, "grad_norm": 7.930447578430176, "learning_rate": 3.7536499154756415e-05, "log_odds_chosen": 8.696924209594727, "log_odds_ratio": -0.0006968708476051688, "logits/chosen": -0.5834269523620605, "logits/rejected": -0.6710518598556519, "logps/chosen": -0.010049977339804173, "logps/rejected": -1.799387812614441, "loss": 1.7001, "nll_loss": 0.4249535799026489, "rewards/accuracies": 1.0, "rewards/chosen": -0.0010049976408481598, "rewards/margins": 0.1789337694644928, "rewards/rejected": -0.17993877828121185, "step": 4690 }, { "epoch": 3.244121715076072, "grad_norm": 10.962292671203613, "learning_rate": 3.753265713846627e-05, "log_odds_chosen": 9.12042236328125, "log_odds_ratio": -0.0011845820117741823, "logits/chosen": -0.7881402969360352, "logits/rejected": -0.8471523523330688, "logps/chosen": -0.001060610287822783, "logps/rejected": -1.4474725723266602, "loss": 1.4429, "nll_loss": 0.36061328649520874, "rewards/accuracies": 1.0, "rewards/chosen": -0.00010606103023746982, "rewards/margins": 0.1446411907672882, "rewards/rejected": -0.1447472721338272, "step": 4691 }, { "epoch": 3.2448132780082988, "grad_norm": 10.399835586547852, "learning_rate": 3.752881512217612e-05, "log_odds_chosen": 8.767321586608887, "log_odds_ratio": -0.0005905175348743796, "logits/chosen": -0.6651334762573242, "logits/rejected": -0.6290321350097656, "logps/chosen": -0.0007113451138138771, "logps/rejected": -1.4993962049484253, "loss": 1.7853, "nll_loss": 0.44627705216407776, "rewards/accuracies": 1.0, "rewards/chosen": -7.113451283657923e-05, "rewards/margins": 0.14986848831176758, "rewards/rejected": -0.1499396115541458, "step": 4692 }, { "epoch": 3.2455048409405256, "grad_norm": 8.132790565490723, "learning_rate": 3.7524973105885966e-05, "log_odds_chosen": 8.834731101989746, "log_odds_ratio": -0.0014325689990073442, "logits/chosen": -0.7984225749969482, "logits/rejected": -0.8978487253189087, "logps/chosen": -0.034150779247283936, "logps/rejected": -1.837937831878662, "loss": 1.5841, "nll_loss": 0.39587467908859253, "rewards/accuracies": 1.0, "rewards/chosen": -0.0034150779247283936, "rewards/margins": 0.180378720164299, "rewards/rejected": -0.1837937980890274, "step": 4693 }, { "epoch": 3.2461964038727524, "grad_norm": 11.265973091125488, "learning_rate": 3.7521131089595825e-05, "log_odds_chosen": 7.646029472351074, "log_odds_ratio": -0.02673262730240822, "logits/chosen": -0.5934436321258545, "logits/rejected": -0.5885952711105347, "logps/chosen": -0.008165750652551651, "logps/rejected": -1.3897361755371094, "loss": 1.3938, "nll_loss": 0.3457859456539154, "rewards/accuracies": 1.0, "rewards/chosen": -0.0008165750768966973, "rewards/margins": 0.13815705478191376, "rewards/rejected": -0.13897360861301422, "step": 4694 }, { "epoch": 3.2468879668049793, "grad_norm": 6.324791431427002, "learning_rate": 3.751728907330567e-05, "log_odds_chosen": 6.769179344177246, "log_odds_ratio": -0.11484857648611069, "logits/chosen": -0.6039650440216064, "logits/rejected": -0.5800857543945312, "logps/chosen": -0.028059110045433044, "logps/rejected": -1.295393466949463, "loss": 1.1339, "nll_loss": 0.271982342004776, "rewards/accuracies": 0.875, "rewards/chosen": -0.0028059110045433044, "rewards/margins": 0.1267334371805191, "rewards/rejected": -0.1295393407344818, "step": 4695 }, { "epoch": 3.247579529737206, "grad_norm": 20.774612426757812, "learning_rate": 3.751344705701552e-05, "log_odds_chosen": 9.281335830688477, "log_odds_ratio": -0.0002557536936365068, "logits/chosen": -0.4461503028869629, "logits/rejected": -0.5599817037582397, "logps/chosen": -0.0008539292612113059, "logps/rejected": -1.8166067600250244, "loss": 2.0747, "nll_loss": 0.5186419486999512, "rewards/accuracies": 1.0, "rewards/chosen": -8.539292321074754e-05, "rewards/margins": 0.18157526850700378, "rewards/rejected": -0.18166068196296692, "step": 4696 }, { "epoch": 3.248271092669433, "grad_norm": 10.650715827941895, "learning_rate": 3.7509605040725376e-05, "log_odds_chosen": 10.62962532043457, "log_odds_ratio": -5.8132434787694365e-05, "logits/chosen": -0.6330527663230896, "logits/rejected": -0.7648962140083313, "logps/chosen": -0.00021957623539492488, "logps/rejected": -2.031571865081787, "loss": 1.2311, "nll_loss": 0.3077716529369354, "rewards/accuracies": 1.0, "rewards/chosen": -2.195762499468401e-05, "rewards/margins": 0.20313522219657898, "rewards/rejected": -0.2031571865081787, "step": 4697 }, { "epoch": 3.2489626556016598, "grad_norm": 9.276825904846191, "learning_rate": 3.750576302443523e-05, "log_odds_chosen": 9.023599624633789, "log_odds_ratio": -0.00047183758579194546, "logits/chosen": -0.6361697912216187, "logits/rejected": -0.6914676427841187, "logps/chosen": -0.007565617561340332, "logps/rejected": -1.8860620260238647, "loss": 1.7635, "nll_loss": 0.44083526730537415, "rewards/accuracies": 1.0, "rewards/chosen": -0.0007565617561340332, "rewards/margins": 0.18784965574741364, "rewards/rejected": -0.18860623240470886, "step": 4698 }, { "epoch": 3.2496542185338866, "grad_norm": 11.17447280883789, "learning_rate": 3.7501921008145074e-05, "log_odds_chosen": 9.626748085021973, "log_odds_ratio": -0.00036149457446299493, "logits/chosen": -0.8828724026679993, "logits/rejected": -0.9713633060455322, "logps/chosen": -0.0006358891841955483, "logps/rejected": -2.0440754890441895, "loss": 2.0904, "nll_loss": 0.5225660800933838, "rewards/accuracies": 1.0, "rewards/chosen": -6.358891550917178e-05, "rewards/margins": 0.2043439894914627, "rewards/rejected": -0.20440757274627686, "step": 4699 }, { "epoch": 3.2503457814661134, "grad_norm": 12.056692123413086, "learning_rate": 3.7498078991854926e-05, "log_odds_chosen": 8.626978874206543, "log_odds_ratio": -0.00200482876971364, "logits/chosen": -0.8026310801506042, "logits/rejected": -0.8775153756141663, "logps/chosen": -0.0016090385615825653, "logps/rejected": -1.2534581422805786, "loss": 1.9658, "nll_loss": 0.49123844504356384, "rewards/accuracies": 1.0, "rewards/chosen": -0.00016090385906863958, "rewards/margins": 0.12518492341041565, "rewards/rejected": -0.12534582614898682, "step": 4700 }, { "epoch": 3.2510373443983402, "grad_norm": 6.449684143066406, "learning_rate": 3.749423697556478e-05, "log_odds_chosen": 8.831612586975098, "log_odds_ratio": -0.0013644417049363256, "logits/chosen": -0.6984375715255737, "logits/rejected": -0.7231895923614502, "logps/chosen": -0.001228701206855476, "logps/rejected": -1.282002568244934, "loss": 1.0217, "nll_loss": 0.2552833557128906, "rewards/accuracies": 1.0, "rewards/chosen": -0.00012287012941669673, "rewards/margins": 0.12807738780975342, "rewards/rejected": -0.1282002478837967, "step": 4701 }, { "epoch": 3.251728907330567, "grad_norm": 9.102448463439941, "learning_rate": 3.7490394959274624e-05, "log_odds_chosen": 8.337583541870117, "log_odds_ratio": -0.038717515766620636, "logits/chosen": -0.4444388747215271, "logits/rejected": -0.47403082251548767, "logps/chosen": -0.010870045982301235, "logps/rejected": -1.121246337890625, "loss": 1.3042, "nll_loss": 0.3221665024757385, "rewards/accuracies": 1.0, "rewards/chosen": -0.0010870046680793166, "rewards/margins": 0.11103762686252594, "rewards/rejected": -0.11212463676929474, "step": 4702 }, { "epoch": 3.252420470262794, "grad_norm": 9.043835639953613, "learning_rate": 3.7486552942984484e-05, "log_odds_chosen": 8.033955574035645, "log_odds_ratio": -0.004091629758477211, "logits/chosen": -0.4190574288368225, "logits/rejected": -0.4952911138534546, "logps/chosen": -0.012978767044842243, "logps/rejected": -1.4955980777740479, "loss": 1.331, "nll_loss": 0.3323467969894409, "rewards/accuracies": 1.0, "rewards/chosen": -0.0012978767044842243, "rewards/margins": 0.1482619345188141, "rewards/rejected": -0.14955979585647583, "step": 4703 }, { "epoch": 3.2531120331950207, "grad_norm": 9.657242774963379, "learning_rate": 3.748271092669433e-05, "log_odds_chosen": 8.201099395751953, "log_odds_ratio": -0.040349896997213364, "logits/chosen": -0.22873516380786896, "logits/rejected": -0.3231565058231354, "logps/chosen": -0.022254683077335358, "logps/rejected": -1.6321990489959717, "loss": 1.5839, "nll_loss": 0.3919522762298584, "rewards/accuracies": 1.0, "rewards/chosen": -0.0022254684008657932, "rewards/margins": 0.16099445521831512, "rewards/rejected": -0.16321992874145508, "step": 4704 }, { "epoch": 3.2538035961272476, "grad_norm": 9.094130516052246, "learning_rate": 3.747886891040418e-05, "log_odds_chosen": 9.257597923278809, "log_odds_ratio": -0.0005117820110172033, "logits/chosen": -0.713983416557312, "logits/rejected": -0.7741215229034424, "logps/chosen": -0.0029167216271162033, "logps/rejected": -1.9832541942596436, "loss": 1.2118, "nll_loss": 0.3028981387615204, "rewards/accuracies": 1.0, "rewards/chosen": -0.0002916721859946847, "rewards/margins": 0.19803375005722046, "rewards/rejected": -0.19832541048526764, "step": 4705 }, { "epoch": 3.2544951590594744, "grad_norm": 7.859956741333008, "learning_rate": 3.7475026894114034e-05, "log_odds_chosen": 9.877527236938477, "log_odds_ratio": -9.827398753259331e-05, "logits/chosen": -0.8897565007209778, "logits/rejected": -0.9196685552597046, "logps/chosen": -0.0003954821149818599, "logps/rejected": -1.7171218395233154, "loss": 1.2827, "nll_loss": 0.3206574618816376, "rewards/accuracies": 1.0, "rewards/chosen": -3.954820931539871e-05, "rewards/margins": 0.17167265713214874, "rewards/rejected": -0.1717122197151184, "step": 4706 }, { "epoch": 3.2551867219917012, "grad_norm": 5.342949867248535, "learning_rate": 3.747118487782389e-05, "log_odds_chosen": 10.59014892578125, "log_odds_ratio": -4.1027629777090624e-05, "logits/chosen": -0.4254288673400879, "logits/rejected": -0.4047529697418213, "logps/chosen": -0.00017517567903269082, "logps/rejected": -1.7674227952957153, "loss": 1.1699, "nll_loss": 0.2924777865409851, "rewards/accuracies": 1.0, "rewards/chosen": -1.7517568267066963e-05, "rewards/margins": 0.17672476172447205, "rewards/rejected": -0.176742285490036, "step": 4707 }, { "epoch": 3.255878284923928, "grad_norm": 7.126961708068848, "learning_rate": 3.746734286153373e-05, "log_odds_chosen": 8.468984603881836, "log_odds_ratio": -0.0038509315345436335, "logits/chosen": -0.4620926082134247, "logits/rejected": -0.42402681708335876, "logps/chosen": -0.0030720517970621586, "logps/rejected": -1.2905032634735107, "loss": 1.0187, "nll_loss": 0.25430044531822205, "rewards/accuracies": 1.0, "rewards/chosen": -0.0003072051622439176, "rewards/margins": 0.12874311208724976, "rewards/rejected": -0.1290503293275833, "step": 4708 }, { "epoch": 3.256569847856155, "grad_norm": 7.006777286529541, "learning_rate": 3.7463500845243585e-05, "log_odds_chosen": 9.9539213180542, "log_odds_ratio": -0.0007148530567064881, "logits/chosen": -0.14592701196670532, "logits/rejected": -0.2852640151977539, "logps/chosen": -0.000978219322860241, "logps/rejected": -2.2065348625183105, "loss": 1.3983, "nll_loss": 0.34950917959213257, "rewards/accuracies": 1.0, "rewards/chosen": -9.78219322860241e-05, "rewards/margins": 0.2205556333065033, "rewards/rejected": -0.2206534594297409, "step": 4709 }, { "epoch": 3.2572614107883817, "grad_norm": 7.470785617828369, "learning_rate": 3.745965882895344e-05, "log_odds_chosen": 9.500198364257812, "log_odds_ratio": -0.08528933674097061, "logits/chosen": -0.06970225274562836, "logits/rejected": -0.07198717445135117, "logps/chosen": -0.014830323867499828, "logps/rejected": -2.3052525520324707, "loss": 1.4311, "nll_loss": 0.34924232959747314, "rewards/accuracies": 1.0, "rewards/chosen": -0.0014830323634669185, "rewards/margins": 0.2290422022342682, "rewards/rejected": -0.23052525520324707, "step": 4710 }, { "epoch": 3.2579529737206085, "grad_norm": 9.470431327819824, "learning_rate": 3.745581681266328e-05, "log_odds_chosen": 8.710151672363281, "log_odds_ratio": -0.006062633823603392, "logits/chosen": -0.39688247442245483, "logits/rejected": -0.48148801922798157, "logps/chosen": -0.006541873794049025, "logps/rejected": -1.8868510723114014, "loss": 1.1379, "nll_loss": 0.2838761806488037, "rewards/accuracies": 1.0, "rewards/chosen": -0.0006541873444803059, "rewards/margins": 0.1880309134721756, "rewards/rejected": -0.1886851042509079, "step": 4711 }, { "epoch": 3.2586445366528354, "grad_norm": 10.436737060546875, "learning_rate": 3.745197479637314e-05, "log_odds_chosen": 8.634658813476562, "log_odds_ratio": -0.00040024961344897747, "logits/chosen": -0.3570972681045532, "logits/rejected": -0.5034259557723999, "logps/chosen": -0.005407108925282955, "logps/rejected": -2.238614082336426, "loss": 2.0126, "nll_loss": 0.5031040906906128, "rewards/accuracies": 1.0, "rewards/chosen": -0.0005407108692452312, "rewards/margins": 0.22332070767879486, "rewards/rejected": -0.223861426115036, "step": 4712 }, { "epoch": 3.259336099585062, "grad_norm": 8.839387893676758, "learning_rate": 3.744813278008299e-05, "log_odds_chosen": 8.221335411071777, "log_odds_ratio": -0.037947457283735275, "logits/chosen": -0.43112167716026306, "logits/rejected": -0.5641734004020691, "logps/chosen": -0.028066959232091904, "logps/rejected": -2.2719764709472656, "loss": 1.7187, "nll_loss": 0.42588597536087036, "rewards/accuracies": 1.0, "rewards/chosen": -0.0028066958766430616, "rewards/margins": 0.2243909388780594, "rewards/rejected": -0.22719764709472656, "step": 4713 }, { "epoch": 3.260027662517289, "grad_norm": 11.743000984191895, "learning_rate": 3.744429076379284e-05, "log_odds_chosen": 8.314166069030762, "log_odds_ratio": -0.012927965261042118, "logits/chosen": -0.32482391595840454, "logits/rejected": -0.29587215185165405, "logps/chosen": -0.015798617154359818, "logps/rejected": -1.9533848762512207, "loss": 1.5372, "nll_loss": 0.3830021917819977, "rewards/accuracies": 1.0, "rewards/chosen": -0.001579861855134368, "rewards/margins": 0.19375863671302795, "rewards/rejected": -0.19533848762512207, "step": 4714 }, { "epoch": 3.260719225449516, "grad_norm": 8.914383888244629, "learning_rate": 3.744044874750269e-05, "log_odds_chosen": 8.001260757446289, "log_odds_ratio": -0.0005701860645785928, "logits/chosen": -0.16800744831562042, "logits/rejected": -0.20939353108406067, "logps/chosen": -0.012287750840187073, "logps/rejected": -1.8615694046020508, "loss": 1.8311, "nll_loss": 0.4577205777168274, "rewards/accuracies": 1.0, "rewards/chosen": -0.001228775130584836, "rewards/margins": 0.18492814898490906, "rewards/rejected": -0.18615692853927612, "step": 4715 }, { "epoch": 3.2614107883817427, "grad_norm": 6.675778388977051, "learning_rate": 3.7436606731212545e-05, "log_odds_chosen": 10.189067840576172, "log_odds_ratio": -0.00026544820866547525, "logits/chosen": -0.6262035369873047, "logits/rejected": -0.6417590975761414, "logps/chosen": -0.0010462929494678974, "logps/rejected": -2.1971559524536133, "loss": 1.6888, "nll_loss": 0.42217081785202026, "rewards/accuracies": 1.0, "rewards/chosen": -0.00010462929640198126, "rewards/margins": 0.21961098909378052, "rewards/rejected": -0.21971561014652252, "step": 4716 }, { "epoch": 3.2621023513139695, "grad_norm": 6.2069573402404785, "learning_rate": 3.743276471492239e-05, "log_odds_chosen": 6.975490570068359, "log_odds_ratio": -0.04731149226427078, "logits/chosen": -0.4405580163002014, "logits/rejected": -0.40422195196151733, "logps/chosen": -0.10175307095050812, "logps/rejected": -1.5015373229980469, "loss": 1.356, "nll_loss": 0.33427995443344116, "rewards/accuracies": 1.0, "rewards/chosen": -0.010175305418670177, "rewards/margins": 0.13997843861579895, "rewards/rejected": -0.1501537412405014, "step": 4717 }, { "epoch": 3.2627939142461964, "grad_norm": 10.46078872680664, "learning_rate": 3.742892269863224e-05, "log_odds_chosen": 8.430739402770996, "log_odds_ratio": -0.01749058999121189, "logits/chosen": -0.4526011645793915, "logits/rejected": -0.4871124029159546, "logps/chosen": -0.03805088624358177, "logps/rejected": -2.7476189136505127, "loss": 2.6868, "nll_loss": 0.669959545135498, "rewards/accuracies": 1.0, "rewards/chosen": -0.0038050890434533358, "rewards/margins": 0.270956814289093, "rewards/rejected": -0.2747619152069092, "step": 4718 }, { "epoch": 3.263485477178423, "grad_norm": 12.18230152130127, "learning_rate": 3.7425080682342096e-05, "log_odds_chosen": 9.675816535949707, "log_odds_ratio": -0.00019988187705166638, "logits/chosen": -0.4546333849430084, "logits/rejected": -0.5552395582199097, "logps/chosen": -0.0006059492588974535, "logps/rejected": -1.817103624343872, "loss": 2.1331, "nll_loss": 0.5332649946212769, "rewards/accuracies": 1.0, "rewards/chosen": -6.059492443455383e-05, "rewards/margins": 0.18164978921413422, "rewards/rejected": -0.1817103624343872, "step": 4719 }, { "epoch": 3.26417704011065, "grad_norm": 7.717270374298096, "learning_rate": 3.742123866605194e-05, "log_odds_chosen": 8.973867416381836, "log_odds_ratio": -0.0006007368210703135, "logits/chosen": -0.3857366442680359, "logits/rejected": -0.43481382727622986, "logps/chosen": -0.000942138722166419, "logps/rejected": -1.4649817943572998, "loss": 1.9242, "nll_loss": 0.480987548828125, "rewards/accuracies": 1.0, "rewards/chosen": -9.421388676855713e-05, "rewards/margins": 0.146403968334198, "rewards/rejected": -0.1464981883764267, "step": 4720 }, { "epoch": 3.264868603042877, "grad_norm": 29.994112014770508, "learning_rate": 3.74173966497618e-05, "log_odds_chosen": 7.361050605773926, "log_odds_ratio": -0.41764599084854126, "logits/chosen": -0.2790865898132324, "logits/rejected": -0.3517614006996155, "logps/chosen": -0.04281236231327057, "logps/rejected": -1.6136527061462402, "loss": 1.7113, "nll_loss": 0.3860637843608856, "rewards/accuracies": 0.875, "rewards/chosen": -0.004281235858798027, "rewards/margins": 0.15708401799201965, "rewards/rejected": -0.16136527061462402, "step": 4721 }, { "epoch": 3.2655601659751037, "grad_norm": 10.153568267822266, "learning_rate": 3.7413554633471646e-05, "log_odds_chosen": 9.24374008178711, "log_odds_ratio": -0.0005770561983808875, "logits/chosen": -0.6040127277374268, "logits/rejected": -0.6861763000488281, "logps/chosen": -0.0012250742875039577, "logps/rejected": -1.8835153579711914, "loss": 1.9434, "nll_loss": 0.48578059673309326, "rewards/accuracies": 1.0, "rewards/chosen": -0.00012250742292962968, "rewards/margins": 0.1882290244102478, "rewards/rejected": -0.18835154175758362, "step": 4722 }, { "epoch": 3.2662517289073305, "grad_norm": 6.868340492248535, "learning_rate": 3.74097126171815e-05, "log_odds_chosen": 8.424043655395508, "log_odds_ratio": -0.030705248937010765, "logits/chosen": -0.4594751000404358, "logits/rejected": -0.5086755752563477, "logps/chosen": -0.00897553376853466, "logps/rejected": -1.7296792268753052, "loss": 2.3846, "nll_loss": 0.5930869579315186, "rewards/accuracies": 1.0, "rewards/chosen": -0.0008975533419288695, "rewards/margins": 0.17207039892673492, "rewards/rejected": -0.17296794056892395, "step": 4723 }, { "epoch": 3.2669432918395573, "grad_norm": 12.509700775146484, "learning_rate": 3.740587060089135e-05, "log_odds_chosen": 9.228704452514648, "log_odds_ratio": -0.0015716326888650656, "logits/chosen": -0.19610503315925598, "logits/rejected": -0.2479446977376938, "logps/chosen": -0.001635462511330843, "logps/rejected": -1.4660680294036865, "loss": 1.6548, "nll_loss": 0.4135492444038391, "rewards/accuracies": 1.0, "rewards/chosen": -0.00016354625404346734, "rewards/margins": 0.14644327759742737, "rewards/rejected": -0.14660681784152985, "step": 4724 }, { "epoch": 3.267634854771784, "grad_norm": 22.524150848388672, "learning_rate": 3.7402028584601204e-05, "log_odds_chosen": 8.740550994873047, "log_odds_ratio": -0.031293027102947235, "logits/chosen": -0.2052878588438034, "logits/rejected": -0.2539139986038208, "logps/chosen": -0.02158481813967228, "logps/rejected": -2.054436206817627, "loss": 1.9949, "nll_loss": 0.49560779333114624, "rewards/accuracies": 1.0, "rewards/chosen": -0.0021584820933640003, "rewards/margins": 0.20328515768051147, "rewards/rejected": -0.2054436206817627, "step": 4725 }, { "epoch": 3.268326417704011, "grad_norm": 10.84430980682373, "learning_rate": 3.739818656831105e-05, "log_odds_chosen": 9.799678802490234, "log_odds_ratio": -0.0003848494670819491, "logits/chosen": -0.34542927145957947, "logits/rejected": -0.43698975443840027, "logps/chosen": -0.0053330291993916035, "logps/rejected": -2.442246437072754, "loss": 1.2302, "nll_loss": 0.30752086639404297, "rewards/accuracies": 1.0, "rewards/chosen": -0.0005333030130714178, "rewards/margins": 0.2436913400888443, "rewards/rejected": -0.24422462284564972, "step": 4726 }, { "epoch": 3.269017980636238, "grad_norm": 16.114307403564453, "learning_rate": 3.73943445520209e-05, "log_odds_chosen": 9.769522666931152, "log_odds_ratio": -0.0005406438722275198, "logits/chosen": -0.8938454985618591, "logits/rejected": -1.0274933576583862, "logps/chosen": -0.0006901758024469018, "logps/rejected": -1.437190055847168, "loss": 2.3199, "nll_loss": 0.579930305480957, "rewards/accuracies": 1.0, "rewards/chosen": -6.901758024469018e-05, "rewards/margins": 0.14364999532699585, "rewards/rejected": -0.14371900260448456, "step": 4727 }, { "epoch": 3.2697095435684647, "grad_norm": 8.889013290405273, "learning_rate": 3.7390502535730754e-05, "log_odds_chosen": 8.69186019897461, "log_odds_ratio": -0.017956608906388283, "logits/chosen": -0.2601277828216553, "logits/rejected": -0.3766717314720154, "logps/chosen": -0.021709920838475227, "logps/rejected": -2.236056327819824, "loss": 1.2342, "nll_loss": 0.30675405263900757, "rewards/accuracies": 1.0, "rewards/chosen": -0.0021709920838475227, "rewards/margins": 0.22143465280532837, "rewards/rejected": -0.22360563278198242, "step": 4728 }, { "epoch": 3.2704011065006915, "grad_norm": 8.644242286682129, "learning_rate": 3.73866605194406e-05, "log_odds_chosen": 9.557779312133789, "log_odds_ratio": -0.0012040914734825492, "logits/chosen": -0.4545590877532959, "logits/rejected": -0.4834221601486206, "logps/chosen": -0.001696384628303349, "logps/rejected": -2.1973724365234375, "loss": 1.6645, "nll_loss": 0.4160057306289673, "rewards/accuracies": 1.0, "rewards/chosen": -0.0001696384570095688, "rewards/margins": 0.21956762671470642, "rewards/rejected": -0.21973726153373718, "step": 4729 }, { "epoch": 3.2710926694329183, "grad_norm": 12.513564109802246, "learning_rate": 3.738281850315046e-05, "log_odds_chosen": 9.51201057434082, "log_odds_ratio": -0.26938343048095703, "logits/chosen": -0.7239038944244385, "logits/rejected": -0.8134328722953796, "logps/chosen": -0.0924244299530983, "logps/rejected": -2.5132055282592773, "loss": 1.6323, "nll_loss": 0.38113951683044434, "rewards/accuracies": 0.875, "rewards/chosen": -0.009242444299161434, "rewards/margins": 0.24207809567451477, "rewards/rejected": -0.2513205409049988, "step": 4730 }, { "epoch": 3.271784232365145, "grad_norm": 5.1850152015686035, "learning_rate": 3.7378976486860305e-05, "log_odds_chosen": 8.564149856567383, "log_odds_ratio": -0.005627782549709082, "logits/chosen": -0.4772839844226837, "logits/rejected": -0.6249641180038452, "logps/chosen": -0.022416256368160248, "logps/rejected": -1.7249755859375, "loss": 1.7617, "nll_loss": 0.4398678243160248, "rewards/accuracies": 1.0, "rewards/chosen": -0.002241625916212797, "rewards/margins": 0.17025592923164368, "rewards/rejected": -0.17249755561351776, "step": 4731 }, { "epoch": 3.272475795297372, "grad_norm": 14.917389869689941, "learning_rate": 3.737513447057016e-05, "log_odds_chosen": 8.748239517211914, "log_odds_ratio": -0.0023502488620579243, "logits/chosen": -0.5880697965621948, "logits/rejected": -0.6493375897407532, "logps/chosen": -0.037263672798871994, "logps/rejected": -2.3619611263275146, "loss": 2.0115, "nll_loss": 0.5026419162750244, "rewards/accuracies": 1.0, "rewards/chosen": -0.003726367373019457, "rewards/margins": 0.23246973752975464, "rewards/rejected": -0.2361961156129837, "step": 4732 }, { "epoch": 3.273167358229599, "grad_norm": 8.856287002563477, "learning_rate": 3.737129245428001e-05, "log_odds_chosen": 10.241095542907715, "log_odds_ratio": -6.276419298956171e-05, "logits/chosen": -0.5032787322998047, "logits/rejected": -0.5542346239089966, "logps/chosen": -0.00041386799421161413, "logps/rejected": -2.267441749572754, "loss": 1.573, "nll_loss": 0.39324095845222473, "rewards/accuracies": 1.0, "rewards/chosen": -4.1386800148757175e-05, "rewards/margins": 0.22670279443264008, "rewards/rejected": -0.2267441749572754, "step": 4733 }, { "epoch": 3.2738589211618256, "grad_norm": 4.8699774742126465, "learning_rate": 3.736745043798986e-05, "log_odds_chosen": 7.746283531188965, "log_odds_ratio": -0.013506181538105011, "logits/chosen": -0.5837900638580322, "logits/rejected": -0.6591947078704834, "logps/chosen": -0.005485900677740574, "logps/rejected": -1.2493566274642944, "loss": 1.6014, "nll_loss": 0.3990109860897064, "rewards/accuracies": 1.0, "rewards/chosen": -0.0005485900910571218, "rewards/margins": 0.12438708543777466, "rewards/rejected": -0.12493567168712616, "step": 4734 }, { "epoch": 3.2745504840940525, "grad_norm": 11.48668384552002, "learning_rate": 3.736360842169971e-05, "log_odds_chosen": 9.136337280273438, "log_odds_ratio": -0.0008283422794193029, "logits/chosen": -0.8731837272644043, "logits/rejected": -1.00054132938385, "logps/chosen": -0.0013460691552609205, "logps/rejected": -1.7370753288269043, "loss": 2.0574, "nll_loss": 0.514275848865509, "rewards/accuracies": 1.0, "rewards/chosen": -0.0001346069184364751, "rewards/margins": 0.17357292771339417, "rewards/rejected": -0.17370754480361938, "step": 4735 }, { "epoch": 3.2752420470262793, "grad_norm": 4.959877014160156, "learning_rate": 3.735976640540956e-05, "log_odds_chosen": 8.550050735473633, "log_odds_ratio": -0.0060114869847893715, "logits/chosen": -0.5094909071922302, "logits/rejected": -0.528753936290741, "logps/chosen": -0.004416503012180328, "logps/rejected": -1.5144058465957642, "loss": 2.0407, "nll_loss": 0.5095845460891724, "rewards/accuracies": 1.0, "rewards/chosen": -0.00044165030703879893, "rewards/margins": 0.15099893510341644, "rewards/rejected": -0.1514405906200409, "step": 4736 }, { "epoch": 3.275933609958506, "grad_norm": 8.902658462524414, "learning_rate": 3.735592438911941e-05, "log_odds_chosen": 9.97501277923584, "log_odds_ratio": -0.0002525137388147414, "logits/chosen": -0.5565110445022583, "logits/rejected": -0.5615907311439514, "logps/chosen": -0.0011530027259141207, "logps/rejected": -2.144369602203369, "loss": 1.6975, "nll_loss": 0.42434871196746826, "rewards/accuracies": 1.0, "rewards/chosen": -0.0001153002813225612, "rewards/margins": 0.21432164311408997, "rewards/rejected": -0.21443697810173035, "step": 4737 }, { "epoch": 3.276625172890733, "grad_norm": 7.4417829513549805, "learning_rate": 3.735208237282926e-05, "log_odds_chosen": 9.793359756469727, "log_odds_ratio": -0.00034874703851528466, "logits/chosen": -0.7387258410453796, "logits/rejected": -0.7721572518348694, "logps/chosen": -0.0049118902534246445, "logps/rejected": -1.93038010597229, "loss": 1.2391, "nll_loss": 0.309741735458374, "rewards/accuracies": 1.0, "rewards/chosen": -0.0004911890137009323, "rewards/margins": 0.1925468146800995, "rewards/rejected": -0.19303801655769348, "step": 4738 }, { "epoch": 3.27731673582296, "grad_norm": 6.360231399536133, "learning_rate": 3.734824035653912e-05, "log_odds_chosen": 9.175899505615234, "log_odds_ratio": -0.0006352405180223286, "logits/chosen": -0.5413529872894287, "logits/rejected": -0.6137167811393738, "logps/chosen": -0.04030369967222214, "logps/rejected": -1.6991791725158691, "loss": 1.1838, "nll_loss": 0.29588454961776733, "rewards/accuracies": 1.0, "rewards/chosen": -0.004030370619148016, "rewards/margins": 0.16588754951953888, "rewards/rejected": -0.16991791129112244, "step": 4739 }, { "epoch": 3.2780082987551866, "grad_norm": 5.473787307739258, "learning_rate": 3.734439834024896e-05, "log_odds_chosen": 8.764920234680176, "log_odds_ratio": -0.002741128671914339, "logits/chosen": -0.7053573131561279, "logits/rejected": -0.6922200322151184, "logps/chosen": -0.0052336049266159534, "logps/rejected": -2.0399675369262695, "loss": 1.3755, "nll_loss": 0.34360355138778687, "rewards/accuracies": 1.0, "rewards/chosen": -0.0005233605043031275, "rewards/margins": 0.20347338914871216, "rewards/rejected": -0.20399674773216248, "step": 4740 }, { "epoch": 3.2786998616874135, "grad_norm": 8.485190391540527, "learning_rate": 3.7340556323958816e-05, "log_odds_chosen": 7.325547218322754, "log_odds_ratio": -0.06429079920053482, "logits/chosen": -0.5360948443412781, "logits/rejected": -0.577506422996521, "logps/chosen": -0.037310246378183365, "logps/rejected": -1.4450794458389282, "loss": 1.5523, "nll_loss": 0.3816385567188263, "rewards/accuracies": 1.0, "rewards/chosen": -0.0037310244515538216, "rewards/margins": 0.14077691733837128, "rewards/rejected": -0.14450794458389282, "step": 4741 }, { "epoch": 3.2793914246196403, "grad_norm": 9.204703330993652, "learning_rate": 3.733671430766867e-05, "log_odds_chosen": 9.312698364257812, "log_odds_ratio": -0.013727872632443905, "logits/chosen": -0.41289976239204407, "logits/rejected": -0.5273554921150208, "logps/chosen": -0.008033557794988155, "logps/rejected": -1.684893012046814, "loss": 2.0903, "nll_loss": 0.5211901664733887, "rewards/accuracies": 1.0, "rewards/chosen": -0.0008033557678572834, "rewards/margins": 0.16768595576286316, "rewards/rejected": -0.16848930716514587, "step": 4742 }, { "epoch": 3.280082987551867, "grad_norm": 7.151228427886963, "learning_rate": 3.733287229137852e-05, "log_odds_chosen": 9.259330749511719, "log_odds_ratio": -0.00040504755452275276, "logits/chosen": -0.268002986907959, "logits/rejected": -0.2747061848640442, "logps/chosen": -0.007128972094506025, "logps/rejected": -1.4844189882278442, "loss": 1.7262, "nll_loss": 0.431497186422348, "rewards/accuracies": 1.0, "rewards/chosen": -0.0007128972210921347, "rewards/margins": 0.14772900938987732, "rewards/rejected": -0.14844189584255219, "step": 4743 }, { "epoch": 3.280774550484094, "grad_norm": 10.045866966247559, "learning_rate": 3.7329030275088366e-05, "log_odds_chosen": 9.405765533447266, "log_odds_ratio": -0.00025165293482132256, "logits/chosen": -0.642254114151001, "logits/rejected": -0.7636048793792725, "logps/chosen": -0.0007097673369571567, "logps/rejected": -1.7180622816085815, "loss": 1.6269, "nll_loss": 0.40670347213745117, "rewards/accuracies": 1.0, "rewards/chosen": -7.097673369571567e-05, "rewards/margins": 0.1717352718114853, "rewards/rejected": -0.1718062460422516, "step": 4744 }, { "epoch": 3.2814661134163208, "grad_norm": 7.029824256896973, "learning_rate": 3.732518825879822e-05, "log_odds_chosen": 9.815935134887695, "log_odds_ratio": -0.0002565347240306437, "logits/chosen": -0.47242191433906555, "logits/rejected": -0.5548115968704224, "logps/chosen": -0.025179818272590637, "logps/rejected": -2.6731204986572266, "loss": 1.1865, "nll_loss": 0.29660704731941223, "rewards/accuracies": 1.0, "rewards/chosen": -0.002517981920391321, "rewards/margins": 0.26479408144950867, "rewards/rejected": -0.26731204986572266, "step": 4745 }, { "epoch": 3.2821576763485476, "grad_norm": 7.584310054779053, "learning_rate": 3.732134624250807e-05, "log_odds_chosen": 9.373991966247559, "log_odds_ratio": -0.00020613643573597074, "logits/chosen": -0.8278031349182129, "logits/rejected": -0.8617972135543823, "logps/chosen": -0.0006570966215804219, "logps/rejected": -1.5188932418823242, "loss": 1.2428, "nll_loss": 0.3106880187988281, "rewards/accuracies": 1.0, "rewards/chosen": -6.570966797880828e-05, "rewards/margins": 0.15182361006736755, "rewards/rejected": -0.15188932418823242, "step": 4746 }, { "epoch": 3.2828492392807744, "grad_norm": 5.923523902893066, "learning_rate": 3.731750422621792e-05, "log_odds_chosen": 8.727062225341797, "log_odds_ratio": -0.0840362086892128, "logits/chosen": -0.7970731258392334, "logits/rejected": -0.8237400054931641, "logps/chosen": -0.017797797918319702, "logps/rejected": -1.4784642457962036, "loss": 1.4738, "nll_loss": 0.36003950238227844, "rewards/accuracies": 1.0, "rewards/chosen": -0.0017797796754166484, "rewards/margins": 0.14606666564941406, "rewards/rejected": -0.14784643054008484, "step": 4747 }, { "epoch": 3.2835408022130013, "grad_norm": 8.834217071533203, "learning_rate": 3.7313662209927776e-05, "log_odds_chosen": 7.273342609405518, "log_odds_ratio": -0.022097529843449593, "logits/chosen": -0.3649141490459442, "logits/rejected": -0.4236929714679718, "logps/chosen": -0.027750710025429726, "logps/rejected": -2.403799533843994, "loss": 1.8193, "nll_loss": 0.4526097774505615, "rewards/accuracies": 1.0, "rewards/chosen": -0.0027750711888074875, "rewards/margins": 0.23760490119457245, "rewards/rejected": -0.24037997424602509, "step": 4748 }, { "epoch": 3.284232365145228, "grad_norm": 8.271860122680664, "learning_rate": 3.730982019363762e-05, "log_odds_chosen": 8.717733383178711, "log_odds_ratio": -0.02827119268476963, "logits/chosen": -0.6745389103889465, "logits/rejected": -0.646474301815033, "logps/chosen": -0.011464545503258705, "logps/rejected": -2.0525588989257812, "loss": 1.6412, "nll_loss": 0.4074724316596985, "rewards/accuracies": 1.0, "rewards/chosen": -0.0011464543640613556, "rewards/margins": 0.20410946011543274, "rewards/rejected": -0.2052558958530426, "step": 4749 }, { "epoch": 3.284923928077455, "grad_norm": 11.006572723388672, "learning_rate": 3.7305978177347474e-05, "log_odds_chosen": 8.783592224121094, "log_odds_ratio": -0.012629851698875427, "logits/chosen": -0.5243411064147949, "logits/rejected": -0.5682640075683594, "logps/chosen": -0.002311853226274252, "logps/rejected": -1.406437635421753, "loss": 1.7829, "nll_loss": 0.44445228576660156, "rewards/accuracies": 1.0, "rewards/chosen": -0.00023118533135857433, "rewards/margins": 0.1404125690460205, "rewards/rejected": -0.14064376056194305, "step": 4750 }, { "epoch": 3.2856154910096818, "grad_norm": 9.709880828857422, "learning_rate": 3.7302136161057326e-05, "log_odds_chosen": 8.833198547363281, "log_odds_ratio": -0.00226973881945014, "logits/chosen": -0.6147016882896423, "logits/rejected": -0.6394162178039551, "logps/chosen": -0.0030848130118101835, "logps/rejected": -1.5361934900283813, "loss": 2.1818, "nll_loss": 0.5452192425727844, "rewards/accuracies": 1.0, "rewards/chosen": -0.000308481277897954, "rewards/margins": 0.1533108651638031, "rewards/rejected": -0.15361934900283813, "step": 4751 }, { "epoch": 3.2863070539419086, "grad_norm": 9.029719352722168, "learning_rate": 3.729829414476718e-05, "log_odds_chosen": 9.643852233886719, "log_odds_ratio": -0.001490089576691389, "logits/chosen": -0.41162461042404175, "logits/rejected": -0.5935603380203247, "logps/chosen": -0.001466776942834258, "logps/rejected": -1.9239782094955444, "loss": 1.3499, "nll_loss": 0.33732154965400696, "rewards/accuracies": 1.0, "rewards/chosen": -0.00014667770301457494, "rewards/margins": 0.19225114583969116, "rewards/rejected": -0.1923978179693222, "step": 4752 }, { "epoch": 3.2869986168741354, "grad_norm": 12.33303165435791, "learning_rate": 3.7294452128477025e-05, "log_odds_chosen": 9.344205856323242, "log_odds_ratio": -0.0002524256706237793, "logits/chosen": -0.6274995803833008, "logits/rejected": -0.7359373569488525, "logps/chosen": -0.0009275332558900118, "logps/rejected": -1.629117727279663, "loss": 1.4462, "nll_loss": 0.3615281581878662, "rewards/accuracies": 1.0, "rewards/chosen": -9.275333286495879e-05, "rewards/margins": 0.162819042801857, "rewards/rejected": -0.1629117727279663, "step": 4753 }, { "epoch": 3.2876901798063622, "grad_norm": 17.345125198364258, "learning_rate": 3.729061011218688e-05, "log_odds_chosen": 8.758502006530762, "log_odds_ratio": -0.013476484455168247, "logits/chosen": -0.06280569732189178, "logits/rejected": -0.1538834273815155, "logps/chosen": -0.02584027126431465, "logps/rejected": -1.9973444938659668, "loss": 2.1901, "nll_loss": 0.5461861491203308, "rewards/accuracies": 1.0, "rewards/chosen": -0.002584027126431465, "rewards/margins": 0.19715043902397156, "rewards/rejected": -0.19973447918891907, "step": 4754 }, { "epoch": 3.288381742738589, "grad_norm": 7.788273811340332, "learning_rate": 3.728676809589673e-05, "log_odds_chosen": 8.044576644897461, "log_odds_ratio": -0.17449286580085754, "logits/chosen": -0.3410576283931732, "logits/rejected": -0.4063025414943695, "logps/chosen": -0.026682965457439423, "logps/rejected": -1.7172755002975464, "loss": 1.4453, "nll_loss": 0.34386640787124634, "rewards/accuracies": 0.875, "rewards/chosen": -0.002668296452611685, "rewards/margins": 0.16905924677848816, "rewards/rejected": -0.17172753810882568, "step": 4755 }, { "epoch": 3.289073305670816, "grad_norm": 13.536659240722656, "learning_rate": 3.7282926079606575e-05, "log_odds_chosen": 9.390349388122559, "log_odds_ratio": -0.00024700278299860656, "logits/chosen": -0.7557802200317383, "logits/rejected": -0.7990991473197937, "logps/chosen": -0.002822623588144779, "logps/rejected": -1.7095152139663696, "loss": 2.6826, "nll_loss": 0.6706240773200989, "rewards/accuracies": 1.0, "rewards/chosen": -0.0002822624228429049, "rewards/margins": 0.17066925764083862, "rewards/rejected": -0.17095153033733368, "step": 4756 }, { "epoch": 3.2897648686030427, "grad_norm": 16.100502014160156, "learning_rate": 3.7279084063316434e-05, "log_odds_chosen": 9.26073169708252, "log_odds_ratio": -0.002371899550780654, "logits/chosen": -0.45070087909698486, "logits/rejected": -0.5162512063980103, "logps/chosen": -0.0017145187593996525, "logps/rejected": -1.4648433923721313, "loss": 1.8425, "nll_loss": 0.4603844881057739, "rewards/accuracies": 1.0, "rewards/chosen": -0.0001714518730295822, "rewards/margins": 0.1463128924369812, "rewards/rejected": -0.1464843451976776, "step": 4757 }, { "epoch": 3.2904564315352696, "grad_norm": 6.50117826461792, "learning_rate": 3.727524204702628e-05, "log_odds_chosen": 9.957597732543945, "log_odds_ratio": -9.806110756471753e-05, "logits/chosen": -0.35002419352531433, "logits/rejected": -0.4274992346763611, "logps/chosen": -0.00034100955235771835, "logps/rejected": -1.987114667892456, "loss": 1.2194, "nll_loss": 0.30484938621520996, "rewards/accuracies": 1.0, "rewards/chosen": -3.4100954508176073e-05, "rewards/margins": 0.19867737591266632, "rewards/rejected": -0.19871146976947784, "step": 4758 }, { "epoch": 3.2911479944674964, "grad_norm": 11.918055534362793, "learning_rate": 3.727140003073613e-05, "log_odds_chosen": 10.449407577514648, "log_odds_ratio": -0.0009086823556572199, "logits/chosen": -0.7754091024398804, "logits/rejected": -0.7539989948272705, "logps/chosen": -0.0005091758212074637, "logps/rejected": -1.9469212293624878, "loss": 1.4888, "nll_loss": 0.3721088767051697, "rewards/accuracies": 1.0, "rewards/chosen": -5.0917587941512465e-05, "rewards/margins": 0.19464120268821716, "rewards/rejected": -0.19469213485717773, "step": 4759 }, { "epoch": 3.2918395573997232, "grad_norm": 9.799060821533203, "learning_rate": 3.7267558014445985e-05, "log_odds_chosen": 10.056310653686523, "log_odds_ratio": -0.0005241757608018816, "logits/chosen": -0.19608081877231598, "logits/rejected": -0.23960356414318085, "logps/chosen": -0.0006183648947626352, "logps/rejected": -2.189321756362915, "loss": 1.4989, "nll_loss": 0.37467676401138306, "rewards/accuracies": 1.0, "rewards/chosen": -6.183648656588048e-05, "rewards/margins": 0.2188703566789627, "rewards/rejected": -0.21893219649791718, "step": 4760 }, { "epoch": 3.29253112033195, "grad_norm": 10.568760871887207, "learning_rate": 3.726371599815584e-05, "log_odds_chosen": 8.719406127929688, "log_odds_ratio": -0.0002006332069868222, "logits/chosen": -0.05832742527127266, "logits/rejected": -0.1293889284133911, "logps/chosen": -0.004344870802015066, "logps/rejected": -2.0650267601013184, "loss": 1.9617, "nll_loss": 0.4904080033302307, "rewards/accuracies": 1.0, "rewards/chosen": -0.00043448706855997443, "rewards/margins": 0.20606820285320282, "rewards/rejected": -0.20650267601013184, "step": 4761 }, { "epoch": 3.293222683264177, "grad_norm": 6.398842811584473, "learning_rate": 3.725987398186568e-05, "log_odds_chosen": 10.638826370239258, "log_odds_ratio": -8.899492968339473e-05, "logits/chosen": -0.46759265661239624, "logits/rejected": -0.5266586542129517, "logps/chosen": -0.0001857294118963182, "logps/rejected": -2.1859965324401855, "loss": 1.4842, "nll_loss": 0.3710480034351349, "rewards/accuracies": 1.0, "rewards/chosen": -1.8572942281025462e-05, "rewards/margins": 0.21858109533786774, "rewards/rejected": -0.21859967708587646, "step": 4762 }, { "epoch": 3.2939142461964037, "grad_norm": 14.124534606933594, "learning_rate": 3.7256031965575535e-05, "log_odds_chosen": 7.912993431091309, "log_odds_ratio": -0.026029715314507484, "logits/chosen": -0.35933125019073486, "logits/rejected": -0.3802638351917267, "logps/chosen": -0.007536513265222311, "logps/rejected": -1.4723609685897827, "loss": 1.7175, "nll_loss": 0.42677438259124756, "rewards/accuracies": 1.0, "rewards/chosen": -0.0007536513148806989, "rewards/margins": 0.1464824378490448, "rewards/rejected": -0.14723609387874603, "step": 4763 }, { "epoch": 3.2946058091286305, "grad_norm": 8.384628295898438, "learning_rate": 3.725218994928539e-05, "log_odds_chosen": 9.867562294006348, "log_odds_ratio": -0.00011213291145395488, "logits/chosen": -0.3775690793991089, "logits/rejected": -0.5076505541801453, "logps/chosen": -0.013233819045126438, "logps/rejected": -2.4109346866607666, "loss": 1.8189, "nll_loss": 0.45471256971359253, "rewards/accuracies": 1.0, "rewards/chosen": -0.0013233819045126438, "rewards/margins": 0.23977011442184448, "rewards/rejected": -0.2410934865474701, "step": 4764 }, { "epoch": 3.2952973720608574, "grad_norm": 10.633298873901367, "learning_rate": 3.7248347932995233e-05, "log_odds_chosen": 10.024510383605957, "log_odds_ratio": -0.00019864975183736533, "logits/chosen": -0.5552939176559448, "logits/rejected": -0.6305736303329468, "logps/chosen": -0.00040514758438803256, "logps/rejected": -1.8062925338745117, "loss": 1.5388, "nll_loss": 0.3846917748451233, "rewards/accuracies": 1.0, "rewards/chosen": -4.0514758438803256e-05, "rewards/margins": 0.1805887222290039, "rewards/rejected": -0.18062923848628998, "step": 4765 }, { "epoch": 3.295988934993084, "grad_norm": 8.180196762084961, "learning_rate": 3.724450591670509e-05, "log_odds_chosen": 6.821606636047363, "log_odds_ratio": -0.08719392120838165, "logits/chosen": -0.6663360595703125, "logits/rejected": -0.601808488368988, "logps/chosen": -0.04260578006505966, "logps/rejected": -1.3567452430725098, "loss": 2.4537, "nll_loss": 0.6047061085700989, "rewards/accuracies": 1.0, "rewards/chosen": -0.004260578192770481, "rewards/margins": 0.13141396641731262, "rewards/rejected": -0.13567453622817993, "step": 4766 }, { "epoch": 3.296680497925311, "grad_norm": 9.587262153625488, "learning_rate": 3.724066390041494e-05, "log_odds_chosen": 7.748490333557129, "log_odds_ratio": -0.20099495351314545, "logits/chosen": -0.5211882591247559, "logits/rejected": -0.5478272438049316, "logps/chosen": -0.029580960050225258, "logps/rejected": -1.0142784118652344, "loss": 1.6294, "nll_loss": 0.38724058866500854, "rewards/accuracies": 0.875, "rewards/chosen": -0.002958096330985427, "rewards/margins": 0.09846975654363632, "rewards/rejected": -0.1014278456568718, "step": 4767 }, { "epoch": 3.297372060857538, "grad_norm": 10.243107795715332, "learning_rate": 3.723682188412479e-05, "log_odds_chosen": 9.759126663208008, "log_odds_ratio": -0.00040256179636344314, "logits/chosen": -0.7398092746734619, "logits/rejected": -0.7639995813369751, "logps/chosen": -0.0003068627556785941, "logps/rejected": -1.6395602226257324, "loss": 1.6066, "nll_loss": 0.4016038477420807, "rewards/accuracies": 1.0, "rewards/chosen": -3.068627484026365e-05, "rewards/margins": 0.16392534971237183, "rewards/rejected": -0.16395603120326996, "step": 4768 }, { "epoch": 3.2980636237897647, "grad_norm": 11.689058303833008, "learning_rate": 3.7232979867834636e-05, "log_odds_chosen": 8.956748008728027, "log_odds_ratio": -0.0011930334148928523, "logits/chosen": -0.5303239226341248, "logits/rejected": -0.5913784503936768, "logps/chosen": -0.016812235116958618, "logps/rejected": -1.9123353958129883, "loss": 2.2169, "nll_loss": 0.5541026592254639, "rewards/accuracies": 1.0, "rewards/chosen": -0.00168122339528054, "rewards/margins": 0.18955230712890625, "rewards/rejected": -0.1912335455417633, "step": 4769 }, { "epoch": 3.2987551867219915, "grad_norm": 9.376022338867188, "learning_rate": 3.7229137851544496e-05, "log_odds_chosen": 9.66012954711914, "log_odds_ratio": -0.0038533341139554977, "logits/chosen": -0.5144587755203247, "logits/rejected": -0.5820307731628418, "logps/chosen": -0.0020845159888267517, "logps/rejected": -2.0297763347625732, "loss": 1.5981, "nll_loss": 0.3991428315639496, "rewards/accuracies": 1.0, "rewards/chosen": -0.000208451587241143, "rewards/margins": 0.20276916027069092, "rewards/rejected": -0.20297762751579285, "step": 4770 }, { "epoch": 3.2994467496542184, "grad_norm": 12.997052192687988, "learning_rate": 3.722529583525434e-05, "log_odds_chosen": 10.184316635131836, "log_odds_ratio": -0.0001452596188755706, "logits/chosen": -0.6555896997451782, "logits/rejected": -0.7356351017951965, "logps/chosen": -0.004165071528404951, "logps/rejected": -2.575788974761963, "loss": 1.886, "nll_loss": 0.4714895784854889, "rewards/accuracies": 1.0, "rewards/chosen": -0.0004165071586612612, "rewards/margins": 0.2571623921394348, "rewards/rejected": -0.25757887959480286, "step": 4771 }, { "epoch": 3.300138312586445, "grad_norm": 13.00661849975586, "learning_rate": 3.7221453818964194e-05, "log_odds_chosen": 9.663476943969727, "log_odds_ratio": -0.0003805930900853127, "logits/chosen": -0.6680766344070435, "logits/rejected": -0.7372719049453735, "logps/chosen": -0.0004427245585247874, "logps/rejected": -1.4613854885101318, "loss": 1.5827, "nll_loss": 0.3956254720687866, "rewards/accuracies": 1.0, "rewards/chosen": -4.427245585247874e-05, "rewards/margins": 0.14609427750110626, "rewards/rejected": -0.14613854885101318, "step": 4772 }, { "epoch": 3.300829875518672, "grad_norm": 13.011005401611328, "learning_rate": 3.7217611802674046e-05, "log_odds_chosen": 11.087613105773926, "log_odds_ratio": -4.3162217480130494e-05, "logits/chosen": -0.5127568244934082, "logits/rejected": -0.6861209869384766, "logps/chosen": -0.00031132507137954235, "logps/rejected": -2.7826836109161377, "loss": 1.3939, "nll_loss": 0.3484596014022827, "rewards/accuracies": 1.0, "rewards/chosen": -3.113250932074152e-05, "rewards/margins": 0.2782372236251831, "rewards/rejected": -0.27826836705207825, "step": 4773 }, { "epoch": 3.301521438450899, "grad_norm": 9.222591400146484, "learning_rate": 3.721376978638389e-05, "log_odds_chosen": 8.618949890136719, "log_odds_ratio": -0.0009218844352290034, "logits/chosen": -0.47571730613708496, "logits/rejected": -0.4952141344547272, "logps/chosen": -0.006290379445999861, "logps/rejected": -1.6514685153961182, "loss": 1.4282, "nll_loss": 0.3569517135620117, "rewards/accuracies": 1.0, "rewards/chosen": -0.0006290380260907114, "rewards/margins": 0.1645178198814392, "rewards/rejected": -0.1651468724012375, "step": 4774 }, { "epoch": 3.3022130013831257, "grad_norm": 5.612703323364258, "learning_rate": 3.7209927770093744e-05, "log_odds_chosen": 7.623640060424805, "log_odds_ratio": -0.06587830185890198, "logits/chosen": -0.44902193546295166, "logits/rejected": -0.4639015793800354, "logps/chosen": -0.0315217524766922, "logps/rejected": -1.3852890729904175, "loss": 1.7845, "nll_loss": 0.43953758478164673, "rewards/accuracies": 1.0, "rewards/chosen": -0.0031521753408014774, "rewards/margins": 0.1353767365217209, "rewards/rejected": -0.13852891325950623, "step": 4775 }, { "epoch": 3.3029045643153525, "grad_norm": 9.524703979492188, "learning_rate": 3.72060857538036e-05, "log_odds_chosen": 7.134241580963135, "log_odds_ratio": -0.06777474284172058, "logits/chosen": -0.585055947303772, "logits/rejected": -0.6165054440498352, "logps/chosen": -0.04166354238986969, "logps/rejected": -1.9927994012832642, "loss": 1.7974, "nll_loss": 0.44256073236465454, "rewards/accuracies": 1.0, "rewards/chosen": -0.004166354890912771, "rewards/margins": 0.19511358439922333, "rewards/rejected": -0.19927993416786194, "step": 4776 }, { "epoch": 3.3035961272475793, "grad_norm": 8.323135375976562, "learning_rate": 3.720224373751345e-05, "log_odds_chosen": 10.659330368041992, "log_odds_ratio": -0.00018401745182927698, "logits/chosen": -0.5943928360939026, "logits/rejected": -0.6496483087539673, "logps/chosen": -0.0003157538012601435, "logps/rejected": -2.077713966369629, "loss": 1.7976, "nll_loss": 0.449379563331604, "rewards/accuracies": 1.0, "rewards/chosen": -3.157538594678044e-05, "rewards/margins": 0.20773981511592865, "rewards/rejected": -0.2077714055776596, "step": 4777 }, { "epoch": 3.304287690179806, "grad_norm": 6.543783187866211, "learning_rate": 3.7198401721223295e-05, "log_odds_chosen": 9.414234161376953, "log_odds_ratio": -0.0005971124628558755, "logits/chosen": -0.26304084062576294, "logits/rejected": -0.33669501543045044, "logps/chosen": -0.0007507125264964998, "logps/rejected": -1.7340823411941528, "loss": 1.6501, "nll_loss": 0.4124714136123657, "rewards/accuracies": 1.0, "rewards/chosen": -7.507124973926693e-05, "rewards/margins": 0.17333316802978516, "rewards/rejected": -0.17340824007987976, "step": 4778 }, { "epoch": 3.304979253112033, "grad_norm": 11.716812133789062, "learning_rate": 3.7194559704933154e-05, "log_odds_chosen": 7.778428077697754, "log_odds_ratio": -0.06828571856021881, "logits/chosen": -0.36957937479019165, "logits/rejected": -0.3652217388153076, "logps/chosen": -0.015425732359290123, "logps/rejected": -1.1986706256866455, "loss": 2.1979, "nll_loss": 0.5426568388938904, "rewards/accuracies": 1.0, "rewards/chosen": -0.001542573212645948, "rewards/margins": 0.11832448840141296, "rewards/rejected": -0.11986706405878067, "step": 4779 }, { "epoch": 3.30567081604426, "grad_norm": 10.31844711303711, "learning_rate": 3.7190717688643e-05, "log_odds_chosen": 8.872358322143555, "log_odds_ratio": -0.006060821935534477, "logits/chosen": -0.010356791317462921, "logits/rejected": -0.13408158719539642, "logps/chosen": -0.009832553565502167, "logps/rejected": -1.800208330154419, "loss": 1.3841, "nll_loss": 0.3454144597053528, "rewards/accuracies": 1.0, "rewards/chosen": -0.0009832553332671523, "rewards/margins": 0.17903758585453033, "rewards/rejected": -0.18002083897590637, "step": 4780 }, { "epoch": 3.3063623789764867, "grad_norm": 9.09090518951416, "learning_rate": 3.718687567235285e-05, "log_odds_chosen": 9.495561599731445, "log_odds_ratio": -0.0005472712800838053, "logits/chosen": -0.47284039855003357, "logits/rejected": -0.5695121884346008, "logps/chosen": -0.01995399221777916, "logps/rejected": -2.2598190307617188, "loss": 1.5845, "nll_loss": 0.3960671126842499, "rewards/accuracies": 1.0, "rewards/chosen": -0.001995399361476302, "rewards/margins": 0.22398647665977478, "rewards/rejected": -0.22598187625408173, "step": 4781 }, { "epoch": 3.3070539419087135, "grad_norm": 16.406513214111328, "learning_rate": 3.7183033656062705e-05, "log_odds_chosen": 11.0305814743042, "log_odds_ratio": -2.177390160795767e-05, "logits/chosen": -0.5238522887229919, "logits/rejected": -0.5845245122909546, "logps/chosen": -0.00019303163571748883, "logps/rejected": -2.37955379486084, "loss": 2.8331, "nll_loss": 0.7082697153091431, "rewards/accuracies": 1.0, "rewards/chosen": -1.930316284415312e-05, "rewards/margins": 0.23793606460094452, "rewards/rejected": -0.23795536160469055, "step": 4782 }, { "epoch": 3.3077455048409403, "grad_norm": 11.432744026184082, "learning_rate": 3.717919163977255e-05, "log_odds_chosen": 9.310041427612305, "log_odds_ratio": -0.00014564645243808627, "logits/chosen": -0.7327515482902527, "logits/rejected": -0.7921730279922485, "logps/chosen": -0.0005434756749309599, "logps/rejected": -1.7320445775985718, "loss": 1.7618, "nll_loss": 0.4404332637786865, "rewards/accuracies": 1.0, "rewards/chosen": -5.434756894828752e-05, "rewards/margins": 0.17315012216567993, "rewards/rejected": -0.1732044517993927, "step": 4783 }, { "epoch": 3.308437067773167, "grad_norm": 10.639967918395996, "learning_rate": 3.71753496234824e-05, "log_odds_chosen": 9.80274772644043, "log_odds_ratio": -7.620429096277803e-05, "logits/chosen": -0.7275650501251221, "logits/rejected": -0.756881833076477, "logps/chosen": -0.0004280672874301672, "logps/rejected": -1.7424432039260864, "loss": 1.9727, "nll_loss": 0.49316996335983276, "rewards/accuracies": 1.0, "rewards/chosen": -4.280672874301672e-05, "rewards/margins": 0.17420151829719543, "rewards/rejected": -0.17424434423446655, "step": 4784 }, { "epoch": 3.309128630705394, "grad_norm": 7.955739974975586, "learning_rate": 3.7171507607192255e-05, "log_odds_chosen": 10.062747955322266, "log_odds_ratio": -7.456566527253017e-05, "logits/chosen": -0.1481330692768097, "logits/rejected": -0.26083531975746155, "logps/chosen": -0.0004507679841481149, "logps/rejected": -2.0520520210266113, "loss": 1.0819, "nll_loss": 0.27046719193458557, "rewards/accuracies": 1.0, "rewards/chosen": -4.50768020527903e-05, "rewards/margins": 0.20516012609004974, "rewards/rejected": -0.20520521700382233, "step": 4785 }, { "epoch": 3.309820193637621, "grad_norm": 7.6142401695251465, "learning_rate": 3.716766559090211e-05, "log_odds_chosen": 9.340099334716797, "log_odds_ratio": -0.0005384586984291673, "logits/chosen": -0.25933513045310974, "logits/rejected": -0.32356947660446167, "logps/chosen": -0.031100928783416748, "logps/rejected": -2.5838711261749268, "loss": 1.492, "nll_loss": 0.37295711040496826, "rewards/accuracies": 1.0, "rewards/chosen": -0.003110092831775546, "rewards/margins": 0.25527700781822205, "rewards/rejected": -0.25838708877563477, "step": 4786 }, { "epoch": 3.3105117565698476, "grad_norm": 10.63508415222168, "learning_rate": 3.716382357461195e-05, "log_odds_chosen": 10.072786331176758, "log_odds_ratio": -0.00014562705473508686, "logits/chosen": -0.4115908443927765, "logits/rejected": -0.5597538948059082, "logps/chosen": -0.00044055056059733033, "logps/rejected": -1.7020169496536255, "loss": 1.7935, "nll_loss": 0.4483675956726074, "rewards/accuracies": 1.0, "rewards/chosen": -4.4055057514924556e-05, "rewards/margins": 0.17015764117240906, "rewards/rejected": -0.17020170390605927, "step": 4787 }, { "epoch": 3.3112033195020745, "grad_norm": 6.960546970367432, "learning_rate": 3.715998155832181e-05, "log_odds_chosen": 9.475131034851074, "log_odds_ratio": -0.0017900835955515504, "logits/chosen": -0.4856712818145752, "logits/rejected": -0.5267800092697144, "logps/chosen": -0.00179449247661978, "logps/rejected": -1.439497947692871, "loss": 2.1871, "nll_loss": 0.5465947985649109, "rewards/accuracies": 1.0, "rewards/chosen": -0.00017944925639312714, "rewards/margins": 0.14377035200595856, "rewards/rejected": -0.14394979178905487, "step": 4788 }, { "epoch": 3.3118948824343013, "grad_norm": 14.887598991394043, "learning_rate": 3.715613954203166e-05, "log_odds_chosen": 7.631422996520996, "log_odds_ratio": -0.0026702506002038717, "logits/chosen": -0.4037625193595886, "logits/rejected": -0.5052109956741333, "logps/chosen": -0.00553013663738966, "logps/rejected": -1.4539145231246948, "loss": 1.6762, "nll_loss": 0.4187846779823303, "rewards/accuracies": 1.0, "rewards/chosen": -0.0005530136404559016, "rewards/margins": 0.14483843743801117, "rewards/rejected": -0.14539144933223724, "step": 4789 }, { "epoch": 3.312586445366528, "grad_norm": 6.999749183654785, "learning_rate": 3.715229752574151e-05, "log_odds_chosen": 7.64384651184082, "log_odds_ratio": -0.005185406655073166, "logits/chosen": -0.34098395705223083, "logits/rejected": -0.3821839690208435, "logps/chosen": -0.11446300894021988, "logps/rejected": -1.7795770168304443, "loss": 1.458, "nll_loss": 0.3639754354953766, "rewards/accuracies": 1.0, "rewards/chosen": -0.011446301825344563, "rewards/margins": 0.1665114015340805, "rewards/rejected": -0.1779577136039734, "step": 4790 }, { "epoch": 3.313278008298755, "grad_norm": 5.284163475036621, "learning_rate": 3.714845550945136e-05, "log_odds_chosen": 8.501035690307617, "log_odds_ratio": -0.001044795848429203, "logits/chosen": -0.4037015736103058, "logits/rejected": -0.3955928087234497, "logps/chosen": -0.0008926805458031595, "logps/rejected": -1.4123013019561768, "loss": 1.7369, "nll_loss": 0.43411070108413696, "rewards/accuracies": 1.0, "rewards/chosen": -8.926806185627356e-05, "rewards/margins": 0.1411408632993698, "rewards/rejected": -0.14123013615608215, "step": 4791 }, { "epoch": 3.313969571230982, "grad_norm": 8.896031379699707, "learning_rate": 3.7144613493161216e-05, "log_odds_chosen": 10.451597213745117, "log_odds_ratio": -4.365799395600334e-05, "logits/chosen": -0.40877458453178406, "logits/rejected": -0.44063982367515564, "logps/chosen": -0.0005057338275946677, "logps/rejected": -2.0362067222595215, "loss": 1.5345, "nll_loss": 0.3836180865764618, "rewards/accuracies": 1.0, "rewards/chosen": -5.05733878526371e-05, "rewards/margins": 0.20357009768486023, "rewards/rejected": -0.20362067222595215, "step": 4792 }, { "epoch": 3.3146611341632086, "grad_norm": 6.940845012664795, "learning_rate": 3.714077147687106e-05, "log_odds_chosen": 8.740618705749512, "log_odds_ratio": -0.00044958863873034716, "logits/chosen": -0.5630000829696655, "logits/rejected": -0.6577044129371643, "logps/chosen": -0.0014754270669072866, "logps/rejected": -1.6538732051849365, "loss": 1.2009, "nll_loss": 0.3001739978790283, "rewards/accuracies": 1.0, "rewards/chosen": -0.00014754271251149476, "rewards/margins": 0.16523978114128113, "rewards/rejected": -0.1653873324394226, "step": 4793 }, { "epoch": 3.3153526970954355, "grad_norm": 6.365971565246582, "learning_rate": 3.7136929460580914e-05, "log_odds_chosen": 8.731281280517578, "log_odds_ratio": -0.0011150891659781337, "logits/chosen": -0.5653225779533386, "logits/rejected": -0.5691289901733398, "logps/chosen": -0.0014174432726576924, "logps/rejected": -1.182433843612671, "loss": 1.8279, "nll_loss": 0.4568542242050171, "rewards/accuracies": 1.0, "rewards/chosen": -0.00014174434181768447, "rewards/margins": 0.11810164898633957, "rewards/rejected": -0.11824339628219604, "step": 4794 }, { "epoch": 3.3160442600276623, "grad_norm": 12.208864212036133, "learning_rate": 3.7133087444290766e-05, "log_odds_chosen": 7.481908798217773, "log_odds_ratio": -0.05939479172229767, "logits/chosen": -0.37778154015541077, "logits/rejected": -0.3924955725669861, "logps/chosen": -0.025003833696246147, "logps/rejected": -2.280137062072754, "loss": 1.7202, "nll_loss": 0.42409858107566833, "rewards/accuracies": 1.0, "rewards/chosen": -0.002500383649021387, "rewards/margins": 0.2255133092403412, "rewards/rejected": -0.22801369428634644, "step": 4795 }, { "epoch": 3.316735822959889, "grad_norm": 10.980515480041504, "learning_rate": 3.712924542800061e-05, "log_odds_chosen": 9.156580924987793, "log_odds_ratio": -0.005245847627520561, "logits/chosen": -0.6666525602340698, "logits/rejected": -0.7071576118469238, "logps/chosen": -0.008825276978313923, "logps/rejected": -1.9418580532073975, "loss": 2.2029, "nll_loss": 0.5501908659934998, "rewards/accuracies": 1.0, "rewards/chosen": -0.0008825276745483279, "rewards/margins": 0.19330328702926636, "rewards/rejected": -0.1941857933998108, "step": 4796 }, { "epoch": 3.317427385892116, "grad_norm": 8.450002670288086, "learning_rate": 3.712540341171047e-05, "log_odds_chosen": 10.313995361328125, "log_odds_ratio": -5.322957440512255e-05, "logits/chosen": -0.7361082434654236, "logits/rejected": -0.7873325347900391, "logps/chosen": -0.00023771397536620498, "logps/rejected": -1.855948567390442, "loss": 2.2595, "nll_loss": 0.5648688077926636, "rewards/accuracies": 1.0, "rewards/chosen": -2.3771397536620498e-05, "rewards/margins": 0.18557108938694, "rewards/rejected": -0.1855948567390442, "step": 4797 }, { "epoch": 3.3181189488243428, "grad_norm": 10.989029884338379, "learning_rate": 3.712156139542032e-05, "log_odds_chosen": 7.254358291625977, "log_odds_ratio": -0.11698737740516663, "logits/chosen": -0.6756665706634521, "logits/rejected": -0.6980599164962769, "logps/chosen": -0.028153615072369576, "logps/rejected": -1.4995083808898926, "loss": 1.6483, "nll_loss": 0.40037593245506287, "rewards/accuracies": 0.875, "rewards/chosen": -0.0028153618331998587, "rewards/margins": 0.14713549613952637, "rewards/rejected": -0.14995084702968597, "step": 4798 }, { "epoch": 3.3188105117565696, "grad_norm": 5.330296039581299, "learning_rate": 3.711771937913017e-05, "log_odds_chosen": 9.479177474975586, "log_odds_ratio": -0.00011878873192472383, "logits/chosen": -0.49675679206848145, "logits/rejected": -0.4976978600025177, "logps/chosen": -0.0007499220664612949, "logps/rejected": -1.5317151546478271, "loss": 1.6152, "nll_loss": 0.40379104018211365, "rewards/accuracies": 1.0, "rewards/chosen": -7.499221101170406e-05, "rewards/margins": 0.1530965268611908, "rewards/rejected": -0.15317150950431824, "step": 4799 }, { "epoch": 3.3195020746887964, "grad_norm": 9.043655395507812, "learning_rate": 3.711387736284002e-05, "log_odds_chosen": 8.289007186889648, "log_odds_ratio": -0.14393651485443115, "logits/chosen": -0.4536038339138031, "logits/rejected": -0.46336764097213745, "logps/chosen": -0.0383828841149807, "logps/rejected": -1.167110562324524, "loss": 1.2512, "nll_loss": 0.2983952462673187, "rewards/accuracies": 0.875, "rewards/chosen": -0.0038382881321012974, "rewards/margins": 0.11287276446819305, "rewards/rejected": -0.11671105772256851, "step": 4800 }, { "epoch": 3.3201936376210233, "grad_norm": 6.758234024047852, "learning_rate": 3.7110035346549874e-05, "log_odds_chosen": 5.697368621826172, "log_odds_ratio": -0.11512251198291779, "logits/chosen": -0.019853461533784866, "logits/rejected": -0.1009041965007782, "logps/chosen": -0.0331367626786232, "logps/rejected": -0.8319405913352966, "loss": 1.7912, "nll_loss": 0.436276912689209, "rewards/accuracies": 1.0, "rewards/chosen": -0.003313676454126835, "rewards/margins": 0.07988037914037704, "rewards/rejected": -0.0831940546631813, "step": 4801 }, { "epoch": 3.3208852005532505, "grad_norm": 10.116432189941406, "learning_rate": 3.710619333025972e-05, "log_odds_chosen": 8.380141258239746, "log_odds_ratio": -0.005037578754127026, "logits/chosen": -0.3611530661582947, "logits/rejected": -0.36258572340011597, "logps/chosen": -0.012789360247552395, "logps/rejected": -1.5567326545715332, "loss": 2.2324, "nll_loss": 0.557590663433075, "rewards/accuracies": 1.0, "rewards/chosen": -0.0012789360480383039, "rewards/margins": 0.15439432859420776, "rewards/rejected": -0.15567326545715332, "step": 4802 }, { "epoch": 3.3215767634854774, "grad_norm": 17.863174438476562, "learning_rate": 3.710235131396957e-05, "log_odds_chosen": 8.179805755615234, "log_odds_ratio": -0.006676828023046255, "logits/chosen": -0.6714662909507751, "logits/rejected": -0.7768672704696655, "logps/chosen": -0.08153266459703445, "logps/rejected": -1.7913302183151245, "loss": 1.9364, "nll_loss": 0.4834328889846802, "rewards/accuracies": 1.0, "rewards/chosen": -0.008153267204761505, "rewards/margins": 0.17097975313663483, "rewards/rejected": -0.17913301289081573, "step": 4803 }, { "epoch": 3.322268326417704, "grad_norm": 6.330430030822754, "learning_rate": 3.7098509297679425e-05, "log_odds_chosen": 8.275510787963867, "log_odds_ratio": -0.006861433852463961, "logits/chosen": -0.5030953288078308, "logits/rejected": -0.6301823258399963, "logps/chosen": -0.003684797789901495, "logps/rejected": -1.0211801528930664, "loss": 2.1073, "nll_loss": 0.5261452794075012, "rewards/accuracies": 1.0, "rewards/chosen": -0.00036847975570708513, "rewards/margins": 0.10174953937530518, "rewards/rejected": -0.10211801528930664, "step": 4804 }, { "epoch": 3.322959889349931, "grad_norm": 5.604506015777588, "learning_rate": 3.709466728138927e-05, "log_odds_chosen": 9.087284088134766, "log_odds_ratio": -0.000316342047881335, "logits/chosen": -0.32346826791763306, "logits/rejected": -0.3070969581604004, "logps/chosen": -0.00042608988587744534, "logps/rejected": -1.4158622026443481, "loss": 1.1869, "nll_loss": 0.2966977059841156, "rewards/accuracies": 1.0, "rewards/chosen": -4.260899004293606e-05, "rewards/margins": 0.14154361188411713, "rewards/rejected": -0.14158621430397034, "step": 4805 }, { "epoch": 3.323651452282158, "grad_norm": 7.937886714935303, "learning_rate": 3.709082526509913e-05, "log_odds_chosen": 9.678321838378906, "log_odds_ratio": -0.00020336979650892317, "logits/chosen": -0.5113595724105835, "logits/rejected": -0.5489328503608704, "logps/chosen": -0.00028491675038821995, "logps/rejected": -1.596942663192749, "loss": 1.0067, "nll_loss": 0.2516666650772095, "rewards/accuracies": 1.0, "rewards/chosen": -2.8491673219832592e-05, "rewards/margins": 0.1596657633781433, "rewards/rejected": -0.15969425439834595, "step": 4806 }, { "epoch": 3.3243430152143847, "grad_norm": 11.258094787597656, "learning_rate": 3.7086983248808975e-05, "log_odds_chosen": 9.129833221435547, "log_odds_ratio": -0.0031539711635559797, "logits/chosen": -0.3189602792263031, "logits/rejected": -0.4230721890926361, "logps/chosen": -0.03089648112654686, "logps/rejected": -1.9423742294311523, "loss": 1.3906, "nll_loss": 0.3473414182662964, "rewards/accuracies": 1.0, "rewards/chosen": -0.0030896479729562998, "rewards/margins": 0.19114777445793152, "rewards/rejected": -0.19423742592334747, "step": 4807 }, { "epoch": 3.3250345781466115, "grad_norm": 10.421914100646973, "learning_rate": 3.708314123251883e-05, "log_odds_chosen": 9.675724029541016, "log_odds_ratio": -0.0007099607028067112, "logits/chosen": -0.479727566242218, "logits/rejected": -0.49939435720443726, "logps/chosen": -0.018236767500638962, "logps/rejected": -2.0636579990386963, "loss": 1.6704, "nll_loss": 0.4175257384777069, "rewards/accuracies": 1.0, "rewards/chosen": -0.001823676866479218, "rewards/margins": 0.2045421302318573, "rewards/rejected": -0.20636579394340515, "step": 4808 }, { "epoch": 3.3257261410788383, "grad_norm": 7.315793037414551, "learning_rate": 3.707929921622868e-05, "log_odds_chosen": 8.784111976623535, "log_odds_ratio": -0.03864699602127075, "logits/chosen": -0.43619537353515625, "logits/rejected": -0.4987362325191498, "logps/chosen": -0.012032095342874527, "logps/rejected": -1.2803680896759033, "loss": 1.5409, "nll_loss": 0.38137125968933105, "rewards/accuracies": 1.0, "rewards/chosen": -0.0012032096274197102, "rewards/margins": 0.12683358788490295, "rewards/rejected": -0.12803681194782257, "step": 4809 }, { "epoch": 3.326417704011065, "grad_norm": 9.22579574584961, "learning_rate": 3.707545719993853e-05, "log_odds_chosen": 7.742648124694824, "log_odds_ratio": -0.022376982495188713, "logits/chosen": -0.3023233413696289, "logits/rejected": -0.4116421937942505, "logps/chosen": -0.008770937100052834, "logps/rejected": -1.4326858520507812, "loss": 1.9755, "nll_loss": 0.491626501083374, "rewards/accuracies": 1.0, "rewards/chosen": -0.0008770937565714121, "rewards/margins": 0.14239150285720825, "rewards/rejected": -0.14326860010623932, "step": 4810 }, { "epoch": 3.327109266943292, "grad_norm": 15.761456489562988, "learning_rate": 3.707161518364838e-05, "log_odds_chosen": 11.231200218200684, "log_odds_ratio": -2.7991562092211097e-05, "logits/chosen": -0.5510903596878052, "logits/rejected": -0.5870881080627441, "logps/chosen": -0.00022866626386530697, "logps/rejected": -2.7161855697631836, "loss": 1.3399, "nll_loss": 0.3349756598472595, "rewards/accuracies": 1.0, "rewards/chosen": -2.2866624931339175e-05, "rewards/margins": 0.27159571647644043, "rewards/rejected": -0.2716185748577118, "step": 4811 }, { "epoch": 3.327800829875519, "grad_norm": 10.318330764770508, "learning_rate": 3.706777316735823e-05, "log_odds_chosen": 7.601865768432617, "log_odds_ratio": -0.08645662665367126, "logits/chosen": -0.3268486559391022, "logits/rejected": -0.3267649710178375, "logps/chosen": -0.019028767943382263, "logps/rejected": -1.6411206722259521, "loss": 1.4555, "nll_loss": 0.3552231788635254, "rewards/accuracies": 1.0, "rewards/chosen": -0.001902876771055162, "rewards/margins": 0.16220919787883759, "rewards/rejected": -0.16411206126213074, "step": 4812 }, { "epoch": 3.3284923928077457, "grad_norm": 7.725826740264893, "learning_rate": 3.706393115106808e-05, "log_odds_chosen": 7.646389484405518, "log_odds_ratio": -0.010575213469564915, "logits/chosen": -0.27327248454093933, "logits/rejected": -0.29643142223358154, "logps/chosen": -0.023020073771476746, "logps/rejected": -1.444989562034607, "loss": 1.4151, "nll_loss": 0.35272642970085144, "rewards/accuracies": 1.0, "rewards/chosen": -0.002302007284015417, "rewards/margins": 0.14219695329666138, "rewards/rejected": -0.14449895918369293, "step": 4813 }, { "epoch": 3.3291839557399725, "grad_norm": 7.744783401489258, "learning_rate": 3.706008913477793e-05, "log_odds_chosen": 8.471439361572266, "log_odds_ratio": -0.08351105451583862, "logits/chosen": -0.23322007060050964, "logits/rejected": -0.14264808595180511, "logps/chosen": -0.01474690344184637, "logps/rejected": -1.1287660598754883, "loss": 1.9275, "nll_loss": 0.47353053092956543, "rewards/accuracies": 1.0, "rewards/chosen": -0.0014746903907507658, "rewards/margins": 0.11140191555023193, "rewards/rejected": -0.11287661641836166, "step": 4814 }, { "epoch": 3.3298755186721993, "grad_norm": 5.945075035095215, "learning_rate": 3.705624711848779e-05, "log_odds_chosen": 7.3363752365112305, "log_odds_ratio": -0.008688355796039104, "logits/chosen": -0.5283100008964539, "logits/rejected": -0.5767071843147278, "logps/chosen": -0.039233021438121796, "logps/rejected": -1.4986639022827148, "loss": 1.1494, "nll_loss": 0.28649193048477173, "rewards/accuracies": 1.0, "rewards/chosen": -0.003923302050679922, "rewards/margins": 0.14594310522079468, "rewards/rejected": -0.14986640214920044, "step": 4815 }, { "epoch": 3.330567081604426, "grad_norm": 7.487738609313965, "learning_rate": 3.7052405102197634e-05, "log_odds_chosen": 7.99285888671875, "log_odds_ratio": -0.182649627327919, "logits/chosen": -0.5866954326629639, "logits/rejected": -0.5401970148086548, "logps/chosen": -0.02980240248143673, "logps/rejected": -1.5133633613586426, "loss": 1.9412, "nll_loss": 0.46703076362609863, "rewards/accuracies": 0.875, "rewards/chosen": -0.0029802401550114155, "rewards/margins": 0.148356094956398, "rewards/rejected": -0.15133635699748993, "step": 4816 }, { "epoch": 3.331258644536653, "grad_norm": 12.524292945861816, "learning_rate": 3.7048563085907486e-05, "log_odds_chosen": 7.655625343322754, "log_odds_ratio": -0.0921371728181839, "logits/chosen": -0.5067025423049927, "logits/rejected": -0.49522316455841064, "logps/chosen": -0.06759315729141235, "logps/rejected": -1.3731836080551147, "loss": 1.9436, "nll_loss": 0.4766749143600464, "rewards/accuracies": 1.0, "rewards/chosen": -0.006759315729141235, "rewards/margins": 0.130559042096138, "rewards/rejected": -0.13731835782527924, "step": 4817 }, { "epoch": 3.33195020746888, "grad_norm": 6.2454938888549805, "learning_rate": 3.704472106961734e-05, "log_odds_chosen": 7.497071743011475, "log_odds_ratio": -0.12156727910041809, "logits/chosen": -0.2898300588130951, "logits/rejected": -0.28401628136634827, "logps/chosen": -0.028395526111125946, "logps/rejected": -1.1893532276153564, "loss": 1.7249, "nll_loss": 0.4190668761730194, "rewards/accuracies": 0.875, "rewards/chosen": -0.002839552704244852, "rewards/margins": 0.11609578132629395, "rewards/rejected": -0.11893533170223236, "step": 4818 }, { "epoch": 3.3326417704011067, "grad_norm": 9.945439338684082, "learning_rate": 3.704087905332719e-05, "log_odds_chosen": 7.445404052734375, "log_odds_ratio": -0.027865968644618988, "logits/chosen": -0.3710874319076538, "logits/rejected": -0.4082677662372589, "logps/chosen": -0.015139044262468815, "logps/rejected": -1.3844302892684937, "loss": 1.3629, "nll_loss": 0.3379259407520294, "rewards/accuracies": 1.0, "rewards/chosen": -0.0015139044262468815, "rewards/margins": 0.13692912459373474, "rewards/rejected": -0.13844303786754608, "step": 4819 }, { "epoch": 3.3333333333333335, "grad_norm": 6.541971206665039, "learning_rate": 3.7037037037037037e-05, "log_odds_chosen": 9.330412864685059, "log_odds_ratio": -0.0032438477501273155, "logits/chosen": -0.21376971900463104, "logits/rejected": -0.23448964953422546, "logps/chosen": -0.010493476875126362, "logps/rejected": -1.3022187948226929, "loss": 1.7931, "nll_loss": 0.4479522705078125, "rewards/accuracies": 1.0, "rewards/chosen": -0.0010493475710973144, "rewards/margins": 0.12917253375053406, "rewards/rejected": -0.1302218735218048, "step": 4820 }, { "epoch": 3.3340248962655603, "grad_norm": 5.288464069366455, "learning_rate": 3.703319502074689e-05, "log_odds_chosen": 9.160746574401855, "log_odds_ratio": -0.0006003558519296348, "logits/chosen": -0.060743916779756546, "logits/rejected": -0.1093602329492569, "logps/chosen": -0.009671274572610855, "logps/rejected": -1.690582513809204, "loss": 1.7897, "nll_loss": 0.4473611116409302, "rewards/accuracies": 1.0, "rewards/chosen": -0.0009671273874118924, "rewards/margins": 0.16809113323688507, "rewards/rejected": -0.16905826330184937, "step": 4821 }, { "epoch": 3.334716459197787, "grad_norm": 10.332783699035645, "learning_rate": 3.702935300445674e-05, "log_odds_chosen": 10.676567077636719, "log_odds_ratio": -0.00016885343939065933, "logits/chosen": -0.6289178133010864, "logits/rejected": -0.711728036403656, "logps/chosen": -0.006669633090496063, "logps/rejected": -2.646531105041504, "loss": 1.7676, "nll_loss": 0.44187238812446594, "rewards/accuracies": 1.0, "rewards/chosen": -0.000666963285766542, "rewards/margins": 0.26398617029190063, "rewards/rejected": -0.26465311646461487, "step": 4822 }, { "epoch": 3.335408022130014, "grad_norm": 8.359293937683105, "learning_rate": 3.702551098816659e-05, "log_odds_chosen": 9.531179428100586, "log_odds_ratio": -0.0008255833527073264, "logits/chosen": -0.38351601362228394, "logits/rejected": -0.4077991545200348, "logps/chosen": -0.012399173341691494, "logps/rejected": -2.359907388687134, "loss": 1.2938, "nll_loss": 0.32336685061454773, "rewards/accuracies": 1.0, "rewards/chosen": -0.0012399173574522138, "rewards/margins": 0.23475082218647003, "rewards/rejected": -0.2359907627105713, "step": 4823 }, { "epoch": 3.336099585062241, "grad_norm": 11.444721221923828, "learning_rate": 3.7021668971876446e-05, "log_odds_chosen": 8.420994758605957, "log_odds_ratio": -0.003699307329952717, "logits/chosen": -0.10867477208375931, "logits/rejected": -0.1305103600025177, "logps/chosen": -0.01947128400206566, "logps/rejected": -3.052764654159546, "loss": 1.8229, "nll_loss": 0.45536285638809204, "rewards/accuracies": 1.0, "rewards/chosen": -0.0019471283303573728, "rewards/margins": 0.30332931876182556, "rewards/rejected": -0.30527645349502563, "step": 4824 }, { "epoch": 3.3367911479944676, "grad_norm": 8.161203384399414, "learning_rate": 3.701782695558629e-05, "log_odds_chosen": 7.156606197357178, "log_odds_ratio": -0.05996629595756531, "logits/chosen": -0.3300841152667999, "logits/rejected": -0.3203493356704712, "logps/chosen": -0.018597949296236038, "logps/rejected": -1.0929030179977417, "loss": 1.774, "nll_loss": 0.4375021159648895, "rewards/accuracies": 1.0, "rewards/chosen": -0.00185979506932199, "rewards/margins": 0.10743050277233124, "rewards/rejected": -0.10929030179977417, "step": 4825 }, { "epoch": 3.3374827109266945, "grad_norm": 7.885430812835693, "learning_rate": 3.7013984939296144e-05, "log_odds_chosen": 9.564796447753906, "log_odds_ratio": -0.0009235774632543325, "logits/chosen": -0.48841261863708496, "logits/rejected": -0.5725415945053101, "logps/chosen": -0.013065568171441555, "logps/rejected": -2.3977560997009277, "loss": 1.3252, "nll_loss": 0.3312044143676758, "rewards/accuracies": 1.0, "rewards/chosen": -0.0013065567472949624, "rewards/margins": 0.23846904933452606, "rewards/rejected": -0.239775612950325, "step": 4826 }, { "epoch": 3.3381742738589213, "grad_norm": 16.353010177612305, "learning_rate": 3.7010142923006e-05, "log_odds_chosen": 9.778549194335938, "log_odds_ratio": -0.0011052426416426897, "logits/chosen": -0.46861732006073, "logits/rejected": -0.5008033514022827, "logps/chosen": -0.007358612027019262, "logps/rejected": -2.2930526733398438, "loss": 2.2518, "nll_loss": 0.562834620475769, "rewards/accuracies": 1.0, "rewards/chosen": -0.0007358611328527331, "rewards/margins": 0.2285693883895874, "rewards/rejected": -0.22930525243282318, "step": 4827 }, { "epoch": 3.338865836791148, "grad_norm": 6.472213268280029, "learning_rate": 3.700630090671585e-05, "log_odds_chosen": 9.00424575805664, "log_odds_ratio": -0.0024379806127399206, "logits/chosen": -0.08923661708831787, "logits/rejected": -0.1967790573835373, "logps/chosen": -0.0013902625069022179, "logps/rejected": -1.270837664604187, "loss": 1.1089, "nll_loss": 0.2769761085510254, "rewards/accuracies": 1.0, "rewards/chosen": -0.00013902624777983874, "rewards/margins": 0.12694475054740906, "rewards/rejected": -0.12708376348018646, "step": 4828 }, { "epoch": 3.339557399723375, "grad_norm": 10.397558212280273, "learning_rate": 3.7002458890425695e-05, "log_odds_chosen": 9.968124389648438, "log_odds_ratio": -0.00025020475732162595, "logits/chosen": -0.13119478523731232, "logits/rejected": -0.2607486844062805, "logps/chosen": -0.013811449520289898, "logps/rejected": -2.4385852813720703, "loss": 1.4208, "nll_loss": 0.35518527030944824, "rewards/accuracies": 1.0, "rewards/chosen": -0.0013811449753120542, "rewards/margins": 0.2424774020910263, "rewards/rejected": -0.24385854601860046, "step": 4829 }, { "epoch": 3.340248962655602, "grad_norm": 5.243896007537842, "learning_rate": 3.699861687413555e-05, "log_odds_chosen": 9.199673652648926, "log_odds_ratio": -0.0005805004620924592, "logits/chosen": -0.0678810179233551, "logits/rejected": -0.017454490065574646, "logps/chosen": -0.004220792558044195, "logps/rejected": -1.9428520202636719, "loss": 1.6269, "nll_loss": 0.40665626525878906, "rewards/accuracies": 1.0, "rewards/chosen": -0.0004220792616251856, "rewards/margins": 0.1938631236553192, "rewards/rejected": -0.19428519904613495, "step": 4830 }, { "epoch": 3.3409405255878286, "grad_norm": 9.78297233581543, "learning_rate": 3.69947748578454e-05, "log_odds_chosen": 9.642803192138672, "log_odds_ratio": -0.00012088124640285969, "logits/chosen": -0.5920174717903137, "logits/rejected": -0.6880452036857605, "logps/chosen": -0.0005044254357926548, "logps/rejected": -1.7257821559906006, "loss": 1.2987, "nll_loss": 0.3246620297431946, "rewards/accuracies": 1.0, "rewards/chosen": -5.044254066888243e-05, "rewards/margins": 0.17252777516841888, "rewards/rejected": -0.17257821559906006, "step": 4831 }, { "epoch": 3.3416320885200554, "grad_norm": 17.485815048217773, "learning_rate": 3.6990932841555246e-05, "log_odds_chosen": 8.459027290344238, "log_odds_ratio": -0.009617390111088753, "logits/chosen": -0.39295753836631775, "logits/rejected": -0.48550981283187866, "logps/chosen": -0.03097320720553398, "logps/rejected": -1.9759016036987305, "loss": 1.6381, "nll_loss": 0.4085724651813507, "rewards/accuracies": 1.0, "rewards/chosen": -0.0030973206739872694, "rewards/margins": 0.19449284672737122, "rewards/rejected": -0.197590172290802, "step": 4832 }, { "epoch": 3.3423236514522823, "grad_norm": 8.457892417907715, "learning_rate": 3.6987090825265105e-05, "log_odds_chosen": 10.580424308776855, "log_odds_ratio": -5.697977030649781e-05, "logits/chosen": -0.4374619722366333, "logits/rejected": -0.5243746042251587, "logps/chosen": -0.00043220724910497665, "logps/rejected": -2.430696964263916, "loss": 1.639, "nll_loss": 0.40974873304367065, "rewards/accuracies": 1.0, "rewards/chosen": -4.322072709328495e-05, "rewards/margins": 0.243026465177536, "rewards/rejected": -0.24306970834732056, "step": 4833 }, { "epoch": 3.343015214384509, "grad_norm": 10.142221450805664, "learning_rate": 3.698324880897495e-05, "log_odds_chosen": 8.102699279785156, "log_odds_ratio": -0.005135298706591129, "logits/chosen": -0.7511797547340393, "logits/rejected": -0.7273877859115601, "logps/chosen": -0.006073735188692808, "logps/rejected": -1.4573071002960205, "loss": 2.3506, "nll_loss": 0.5871455073356628, "rewards/accuracies": 1.0, "rewards/chosen": -0.0006073735421523452, "rewards/margins": 0.14512333273887634, "rewards/rejected": -0.14573070406913757, "step": 4834 }, { "epoch": 3.343706777316736, "grad_norm": 8.492439270019531, "learning_rate": 3.69794067926848e-05, "log_odds_chosen": 10.246946334838867, "log_odds_ratio": -0.0009441052097827196, "logits/chosen": -0.5589309930801392, "logits/rejected": -0.5878577828407288, "logps/chosen": -0.016292212530970573, "logps/rejected": -2.1811840534210205, "loss": 1.4918, "nll_loss": 0.3728483021259308, "rewards/accuracies": 1.0, "rewards/chosen": -0.0016292212530970573, "rewards/margins": 0.21648918092250824, "rewards/rejected": -0.21811839938163757, "step": 4835 }, { "epoch": 3.3443983402489628, "grad_norm": 10.190574645996094, "learning_rate": 3.6975564776394655e-05, "log_odds_chosen": 8.5763521194458, "log_odds_ratio": -0.00470086932182312, "logits/chosen": -0.5186557173728943, "logits/rejected": -0.6129405498504639, "logps/chosen": -0.03856277093291283, "logps/rejected": -1.8780362606048584, "loss": 1.6883, "nll_loss": 0.42161381244659424, "rewards/accuracies": 1.0, "rewards/chosen": -0.0038562770932912827, "rewards/margins": 0.18394732475280762, "rewards/rejected": -0.18780359625816345, "step": 4836 }, { "epoch": 3.3450899031811896, "grad_norm": 7.443319797515869, "learning_rate": 3.697172276010451e-05, "log_odds_chosen": 9.213088989257812, "log_odds_ratio": -0.0003929064841940999, "logits/chosen": -0.303056925535202, "logits/rejected": -0.3446924388408661, "logps/chosen": -0.000988618703559041, "logps/rejected": -1.5611990690231323, "loss": 1.193, "nll_loss": 0.29819926619529724, "rewards/accuracies": 1.0, "rewards/chosen": -9.88618703559041e-05, "rewards/margins": 0.15602104365825653, "rewards/rejected": -0.1561199128627777, "step": 4837 }, { "epoch": 3.3457814661134164, "grad_norm": 6.408515453338623, "learning_rate": 3.6967880743814353e-05, "log_odds_chosen": 8.536367416381836, "log_odds_ratio": -0.0014657180290669203, "logits/chosen": -0.2517034113407135, "logits/rejected": -0.2618112862110138, "logps/chosen": -0.024171721190214157, "logps/rejected": -2.235044479370117, "loss": 1.3209, "nll_loss": 0.3300686180591583, "rewards/accuracies": 1.0, "rewards/chosen": -0.0024171723052859306, "rewards/margins": 0.2210872769355774, "rewards/rejected": -0.2235044538974762, "step": 4838 }, { "epoch": 3.3464730290456433, "grad_norm": 4.3202805519104, "learning_rate": 3.6964038727524206e-05, "log_odds_chosen": 9.348167419433594, "log_odds_ratio": -0.00027577090077102184, "logits/chosen": -0.2292935848236084, "logits/rejected": -0.26607123017311096, "logps/chosen": -0.00010278346599079669, "logps/rejected": -1.0340757369995117, "loss": 1.307, "nll_loss": 0.32673293352127075, "rewards/accuracies": 1.0, "rewards/chosen": -1.027834696287755e-05, "rewards/margins": 0.10339729487895966, "rewards/rejected": -0.10340756922960281, "step": 4839 }, { "epoch": 3.34716459197787, "grad_norm": 8.864399909973145, "learning_rate": 3.696019671123406e-05, "log_odds_chosen": 6.516083240509033, "log_odds_ratio": -0.061890941113233566, "logits/chosen": -0.3812077045440674, "logits/rejected": -0.3444467782974243, "logps/chosen": -0.09658174216747284, "logps/rejected": -1.4395780563354492, "loss": 1.7208, "nll_loss": 0.4240223467350006, "rewards/accuracies": 1.0, "rewards/chosen": -0.009658175520598888, "rewards/margins": 0.1342996507883072, "rewards/rejected": -0.14395782351493835, "step": 4840 }, { "epoch": 3.347856154910097, "grad_norm": 7.218293190002441, "learning_rate": 3.6956354694943904e-05, "log_odds_chosen": 7.828502655029297, "log_odds_ratio": -0.00611227797344327, "logits/chosen": -0.01647898741066456, "logits/rejected": -0.04726487398147583, "logps/chosen": -0.035098958760499954, "logps/rejected": -1.889201045036316, "loss": 1.7379, "nll_loss": 0.4338761568069458, "rewards/accuracies": 1.0, "rewards/chosen": -0.0035098965745419264, "rewards/margins": 0.18541020154953003, "rewards/rejected": -0.18892011046409607, "step": 4841 }, { "epoch": 3.3485477178423237, "grad_norm": 7.2084245681762695, "learning_rate": 3.695251267865376e-05, "log_odds_chosen": 8.534614562988281, "log_odds_ratio": -0.0014289494138211012, "logits/chosen": -0.6113095283508301, "logits/rejected": -0.6475774645805359, "logps/chosen": -0.0049095191061496735, "logps/rejected": -1.720369815826416, "loss": 1.427, "nll_loss": 0.356608510017395, "rewards/accuracies": 1.0, "rewards/chosen": -0.0004909519338980317, "rewards/margins": 0.17154602706432343, "rewards/rejected": -0.17203697562217712, "step": 4842 }, { "epoch": 3.3492392807745506, "grad_norm": 8.724457740783691, "learning_rate": 3.694867066236361e-05, "log_odds_chosen": 10.108199119567871, "log_odds_ratio": -8.961541607277468e-05, "logits/chosen": -0.22915613651275635, "logits/rejected": -0.20698942244052887, "logps/chosen": -0.00045968289487063885, "logps/rejected": -1.7437655925750732, "loss": 1.8042, "nll_loss": 0.45104360580444336, "rewards/accuracies": 1.0, "rewards/chosen": -4.596828875946812e-05, "rewards/margins": 0.17433059215545654, "rewards/rejected": -0.17437656223773956, "step": 4843 }, { "epoch": 3.3499308437067774, "grad_norm": 7.414542198181152, "learning_rate": 3.694482864607346e-05, "log_odds_chosen": 9.796012878417969, "log_odds_ratio": -0.0003306611906737089, "logits/chosen": -0.46491843461990356, "logits/rejected": -0.5566118955612183, "logps/chosen": -0.0025635913480073214, "logps/rejected": -1.987081527709961, "loss": 1.5142, "nll_loss": 0.378519743680954, "rewards/accuracies": 1.0, "rewards/chosen": -0.0002563591697253287, "rewards/margins": 0.19845178723335266, "rewards/rejected": -0.19870814681053162, "step": 4844 }, { "epoch": 3.3506224066390042, "grad_norm": 9.725508689880371, "learning_rate": 3.6940986629783314e-05, "log_odds_chosen": 10.11623764038086, "log_odds_ratio": -0.0001505285908933729, "logits/chosen": -0.8159855604171753, "logits/rejected": -0.8474183082580566, "logps/chosen": -0.0009152303100563586, "logps/rejected": -2.013854742050171, "loss": 1.4857, "nll_loss": 0.3714017868041992, "rewards/accuracies": 1.0, "rewards/chosen": -9.152302664006129e-05, "rewards/margins": 0.2012939602136612, "rewards/rejected": -0.2013854682445526, "step": 4845 }, { "epoch": 3.351313969571231, "grad_norm": 10.209623336791992, "learning_rate": 3.6937144613493166e-05, "log_odds_chosen": 7.748871326446533, "log_odds_ratio": -0.204082652926445, "logits/chosen": -0.4716426134109497, "logits/rejected": -0.5088870525360107, "logps/chosen": -0.0646195188164711, "logps/rejected": -1.4852840900421143, "loss": 1.7175, "nll_loss": 0.4089680314064026, "rewards/accuracies": 0.875, "rewards/chosen": -0.006461952812969685, "rewards/margins": 0.14206644892692566, "rewards/rejected": -0.14852841198444366, "step": 4846 }, { "epoch": 3.352005532503458, "grad_norm": 40.184871673583984, "learning_rate": 3.693330259720301e-05, "log_odds_chosen": 8.97463607788086, "log_odds_ratio": -0.5296390056610107, "logits/chosen": -0.5999283194541931, "logits/rejected": -0.6021513342857361, "logps/chosen": -0.07349149882793427, "logps/rejected": -2.196320056915283, "loss": 2.5445, "nll_loss": 0.5831631422042847, "rewards/accuracies": 0.875, "rewards/chosen": -0.007349150255322456, "rewards/margins": 0.21228285133838654, "rewards/rejected": -0.21963201463222504, "step": 4847 }, { "epoch": 3.3526970954356847, "grad_norm": 8.293537139892578, "learning_rate": 3.6929460580912864e-05, "log_odds_chosen": 9.890531539916992, "log_odds_ratio": -0.00047966151032596827, "logits/chosen": -0.671005129814148, "logits/rejected": -0.8335851430892944, "logps/chosen": -0.013375879265367985, "logps/rejected": -2.5320022106170654, "loss": 1.8409, "nll_loss": 0.46017351746559143, "rewards/accuracies": 1.0, "rewards/chosen": -0.0013375879498198628, "rewards/margins": 0.25186264514923096, "rewards/rejected": -0.2532002329826355, "step": 4848 }, { "epoch": 3.3533886583679116, "grad_norm": 11.15304946899414, "learning_rate": 3.692561856462272e-05, "log_odds_chosen": 8.975371360778809, "log_odds_ratio": -0.0040519824251532555, "logits/chosen": -0.7027145624160767, "logits/rejected": -0.708022952079773, "logps/chosen": -0.008829191327095032, "logps/rejected": -1.2773983478546143, "loss": 2.2991, "nll_loss": 0.5743774175643921, "rewards/accuracies": 1.0, "rewards/chosen": -0.000882919121067971, "rewards/margins": 0.12685692310333252, "rewards/rejected": -0.12773984670639038, "step": 4849 }, { "epoch": 3.3540802213001384, "grad_norm": 7.476949214935303, "learning_rate": 3.692177654833256e-05, "log_odds_chosen": 9.539048194885254, "log_odds_ratio": -0.0002563097223173827, "logits/chosen": -0.20793417096138, "logits/rejected": -0.25936827063560486, "logps/chosen": -0.0006166063249111176, "logps/rejected": -1.699591875076294, "loss": 2.0961, "nll_loss": 0.5239871740341187, "rewards/accuracies": 1.0, "rewards/chosen": -6.166063394630328e-05, "rewards/margins": 0.16989752650260925, "rewards/rejected": -0.1699592024087906, "step": 4850 }, { "epoch": 3.354771784232365, "grad_norm": 11.8095064163208, "learning_rate": 3.691793453204242e-05, "log_odds_chosen": 9.624149322509766, "log_odds_ratio": -0.0007174991187639534, "logits/chosen": -0.7241888046264648, "logits/rejected": -0.7914281487464905, "logps/chosen": -0.0007417750312015414, "logps/rejected": -1.8069579601287842, "loss": 1.6439, "nll_loss": 0.41089504957199097, "rewards/accuracies": 1.0, "rewards/chosen": -7.41775002097711e-05, "rewards/margins": 0.18062162399291992, "rewards/rejected": -0.1806957870721817, "step": 4851 }, { "epoch": 3.355463347164592, "grad_norm": 9.437474250793457, "learning_rate": 3.691409251575227e-05, "log_odds_chosen": 10.25767707824707, "log_odds_ratio": -7.604131678817794e-05, "logits/chosen": -0.5359310507774353, "logits/rejected": -0.5045655965805054, "logps/chosen": -0.00021611034753732383, "logps/rejected": -1.710390329360962, "loss": 1.5811, "nll_loss": 0.39527881145477295, "rewards/accuracies": 1.0, "rewards/chosen": -2.1611034753732383e-05, "rewards/margins": 0.17101740837097168, "rewards/rejected": -0.17103902995586395, "step": 4852 }, { "epoch": 3.356154910096819, "grad_norm": 7.6020894050598145, "learning_rate": 3.691025049946212e-05, "log_odds_chosen": 7.330630302429199, "log_odds_ratio": -0.1588706076145172, "logits/chosen": -0.27602532505989075, "logits/rejected": -0.3280216157436371, "logps/chosen": -0.044778451323509216, "logps/rejected": -1.4952619075775146, "loss": 1.8527, "nll_loss": 0.4472947120666504, "rewards/accuracies": 0.875, "rewards/chosen": -0.0044778455048799515, "rewards/margins": 0.1450483500957489, "rewards/rejected": -0.1495261937379837, "step": 4853 }, { "epoch": 3.3568464730290457, "grad_norm": 12.778995513916016, "learning_rate": 3.690640848317197e-05, "log_odds_chosen": 8.721086502075195, "log_odds_ratio": -0.12304294109344482, "logits/chosen": -0.6769004464149475, "logits/rejected": -0.7507296800613403, "logps/chosen": -0.03270779550075531, "logps/rejected": -1.7994873523712158, "loss": 1.5283, "nll_loss": 0.36977386474609375, "rewards/accuracies": 0.875, "rewards/chosen": -0.0032707795035094023, "rewards/margins": 0.17667795717716217, "rewards/rejected": -0.17994874715805054, "step": 4854 }, { "epoch": 3.3575380359612725, "grad_norm": 12.528250694274902, "learning_rate": 3.6902566466881825e-05, "log_odds_chosen": 10.009822845458984, "log_odds_ratio": -0.0009388787439092994, "logits/chosen": -0.7347992062568665, "logits/rejected": -0.8108212351799011, "logps/chosen": -0.0009514364064671099, "logps/rejected": -1.9790453910827637, "loss": 1.9298, "nll_loss": 0.4823543131351471, "rewards/accuracies": 1.0, "rewards/chosen": -9.514363773632795e-05, "rewards/margins": 0.1978093981742859, "rewards/rejected": -0.1979045569896698, "step": 4855 }, { "epoch": 3.3582295988934994, "grad_norm": 4.950071811676025, "learning_rate": 3.689872445059167e-05, "log_odds_chosen": 9.48250961303711, "log_odds_ratio": -0.0001974797050934285, "logits/chosen": -0.8554219603538513, "logits/rejected": -0.9148414731025696, "logps/chosen": -0.0004030237323604524, "logps/rejected": -1.7633424997329712, "loss": 1.3353, "nll_loss": 0.3338037133216858, "rewards/accuracies": 1.0, "rewards/chosen": -4.030237687402405e-05, "rewards/margins": 0.1762939691543579, "rewards/rejected": -0.17633426189422607, "step": 4856 }, { "epoch": 3.358921161825726, "grad_norm": 10.450413703918457, "learning_rate": 3.689488243430152e-05, "log_odds_chosen": 9.980003356933594, "log_odds_ratio": -0.00019624002743512392, "logits/chosen": -0.467332661151886, "logits/rejected": -0.5671140551567078, "logps/chosen": -0.0003920574963558465, "logps/rejected": -1.765358328819275, "loss": 1.4584, "nll_loss": 0.3645763397216797, "rewards/accuracies": 1.0, "rewards/chosen": -3.920574818039313e-05, "rewards/margins": 0.17649662494659424, "rewards/rejected": -0.17653582990169525, "step": 4857 }, { "epoch": 3.359612724757953, "grad_norm": 8.591510772705078, "learning_rate": 3.6891040418011375e-05, "log_odds_chosen": 8.480205535888672, "log_odds_ratio": -0.0006487661739811301, "logits/chosen": -0.47236570715904236, "logits/rejected": -0.5941859483718872, "logps/chosen": -0.0014029113808646798, "logps/rejected": -1.2931849956512451, "loss": 1.4132, "nll_loss": 0.35322386026382446, "rewards/accuracies": 1.0, "rewards/chosen": -0.00014029112935531884, "rewards/margins": 0.1291782110929489, "rewards/rejected": -0.129318505525589, "step": 4858 }, { "epoch": 3.36030428769018, "grad_norm": 9.483941078186035, "learning_rate": 3.688719840172122e-05, "log_odds_chosen": 8.947179794311523, "log_odds_ratio": -0.009097904898226261, "logits/chosen": -0.6214572191238403, "logits/rejected": -0.6888337135314941, "logps/chosen": -0.007219300139695406, "logps/rejected": -1.9767446517944336, "loss": 1.6643, "nll_loss": 0.4151747226715088, "rewards/accuracies": 1.0, "rewards/chosen": -0.0007219300023280084, "rewards/margins": 0.19695252180099487, "rewards/rejected": -0.1976744532585144, "step": 4859 }, { "epoch": 3.3609958506224067, "grad_norm": 5.145646572113037, "learning_rate": 3.688335638543108e-05, "log_odds_chosen": 10.351024627685547, "log_odds_ratio": -0.00010055641178041697, "logits/chosen": -0.37642523646354675, "logits/rejected": -0.3933575749397278, "logps/chosen": -0.00014752685092389584, "logps/rejected": -1.5574370622634888, "loss": 1.2852, "nll_loss": 0.32127830386161804, "rewards/accuracies": 1.0, "rewards/chosen": -1.4752684364793822e-05, "rewards/margins": 0.15572895109653473, "rewards/rejected": -0.15574368834495544, "step": 4860 }, { "epoch": 3.3616874135546335, "grad_norm": 7.796165943145752, "learning_rate": 3.6879514369140926e-05, "log_odds_chosen": 9.481037139892578, "log_odds_ratio": -0.0020998227410018444, "logits/chosen": -0.8685269355773926, "logits/rejected": -0.9142651557922363, "logps/chosen": -0.0064911977387964725, "logps/rejected": -1.3805325031280518, "loss": 1.9348, "nll_loss": 0.48348888754844666, "rewards/accuracies": 1.0, "rewards/chosen": -0.0006491197855211794, "rewards/margins": 0.13740414381027222, "rewards/rejected": -0.13805325329303741, "step": 4861 }, { "epoch": 3.3623789764868603, "grad_norm": 12.897394180297852, "learning_rate": 3.687567235285078e-05, "log_odds_chosen": 9.886244773864746, "log_odds_ratio": -0.0001866334059741348, "logits/chosen": -0.4743501543998718, "logits/rejected": -0.5830299258232117, "logps/chosen": -0.003318520961329341, "logps/rejected": -1.95000159740448, "loss": 2.0509, "nll_loss": 0.5126988887786865, "rewards/accuracies": 1.0, "rewards/chosen": -0.000331852090312168, "rewards/margins": 0.19466832280158997, "rewards/rejected": -0.19500017166137695, "step": 4862 }, { "epoch": 3.363070539419087, "grad_norm": 11.533024787902832, "learning_rate": 3.687183033656063e-05, "log_odds_chosen": 10.184024810791016, "log_odds_ratio": -0.00016964529640972614, "logits/chosen": -0.6249281167984009, "logits/rejected": -0.6705152988433838, "logps/chosen": -0.0009269227739423513, "logps/rejected": -2.1890594959259033, "loss": 1.7934, "nll_loss": 0.44832783937454224, "rewards/accuracies": 1.0, "rewards/chosen": -9.269227302866057e-05, "rewards/margins": 0.21881325542926788, "rewards/rejected": -0.218905970454216, "step": 4863 }, { "epoch": 3.363762102351314, "grad_norm": 9.165794372558594, "learning_rate": 3.686798832027048e-05, "log_odds_chosen": 9.309822082519531, "log_odds_ratio": -0.0005651208339259028, "logits/chosen": -0.6249001622200012, "logits/rejected": -0.6750528216362, "logps/chosen": -0.0012620101915672421, "logps/rejected": -1.5672866106033325, "loss": 1.7412, "nll_loss": 0.4352557063102722, "rewards/accuracies": 1.0, "rewards/chosen": -0.00012620101915672421, "rewards/margins": 0.15660247206687927, "rewards/rejected": -0.15672868490219116, "step": 4864 }, { "epoch": 3.364453665283541, "grad_norm": 11.619812965393066, "learning_rate": 3.686414630398033e-05, "log_odds_chosen": 9.562946319580078, "log_odds_ratio": -8.201718446798623e-05, "logits/chosen": -0.4055235981941223, "logits/rejected": -0.4862651228904724, "logps/chosen": -0.0003127045347355306, "logps/rejected": -1.5968135595321655, "loss": 1.5052, "nll_loss": 0.37629374861717224, "rewards/accuracies": 1.0, "rewards/chosen": -3.1270450563170016e-05, "rewards/margins": 0.15965008735656738, "rewards/rejected": -0.15968134999275208, "step": 4865 }, { "epoch": 3.3651452282157677, "grad_norm": 14.077691078186035, "learning_rate": 3.686030428769018e-05, "log_odds_chosen": 9.250917434692383, "log_odds_ratio": -0.005956509616225958, "logits/chosen": -0.8243609070777893, "logits/rejected": -0.8653722405433655, "logps/chosen": -0.008840306662023067, "logps/rejected": -2.3019497394561768, "loss": 2.1826, "nll_loss": 0.5450628995895386, "rewards/accuracies": 1.0, "rewards/chosen": -0.0008840306545607746, "rewards/margins": 0.22931094467639923, "rewards/rejected": -0.23019498586654663, "step": 4866 }, { "epoch": 3.3658367911479945, "grad_norm": 6.497355937957764, "learning_rate": 3.6856462271400034e-05, "log_odds_chosen": 6.605334281921387, "log_odds_ratio": -0.17641203105449677, "logits/chosen": -0.40605348348617554, "logits/rejected": -0.38486599922180176, "logps/chosen": -0.04633874073624611, "logps/rejected": -1.4925726652145386, "loss": 1.6609, "nll_loss": 0.39758217334747314, "rewards/accuracies": 0.875, "rewards/chosen": -0.004633874632418156, "rewards/margins": 0.14462338387966156, "rewards/rejected": -0.14925727248191833, "step": 4867 }, { "epoch": 3.3665283540802213, "grad_norm": 11.494439125061035, "learning_rate": 3.685262025510988e-05, "log_odds_chosen": 8.713448524475098, "log_odds_ratio": -0.08428078889846802, "logits/chosen": -0.39406687021255493, "logits/rejected": -0.4824514389038086, "logps/chosen": -0.014646649360656738, "logps/rejected": -1.1748372316360474, "loss": 2.0042, "nll_loss": 0.4926164150238037, "rewards/accuracies": 1.0, "rewards/chosen": -0.0014646650524809957, "rewards/margins": 0.11601905524730682, "rewards/rejected": -0.1174837201833725, "step": 4868 }, { "epoch": 3.367219917012448, "grad_norm": 8.511127471923828, "learning_rate": 3.684877823881974e-05, "log_odds_chosen": 10.085894584655762, "log_odds_ratio": -0.00014831179578322917, "logits/chosen": -0.6100676655769348, "logits/rejected": -0.6811908483505249, "logps/chosen": -0.00021410381305031478, "logps/rejected": -1.6063182353973389, "loss": 1.3799, "nll_loss": 0.3449620306491852, "rewards/accuracies": 1.0, "rewards/chosen": -2.141038203262724e-05, "rewards/margins": 0.1606104075908661, "rewards/rejected": -0.16063182055950165, "step": 4869 }, { "epoch": 3.367911479944675, "grad_norm": 8.44887638092041, "learning_rate": 3.6844936222529584e-05, "log_odds_chosen": 9.12254524230957, "log_odds_ratio": -0.0006699742516502738, "logits/chosen": -0.6472569704055786, "logits/rejected": -0.8005533814430237, "logps/chosen": -0.006916233338415623, "logps/rejected": -1.9187191724777222, "loss": 1.4938, "nll_loss": 0.37338870763778687, "rewards/accuracies": 1.0, "rewards/chosen": -0.0006916233105584979, "rewards/margins": 0.1911802887916565, "rewards/rejected": -0.19187191128730774, "step": 4870 }, { "epoch": 3.368603042876902, "grad_norm": 14.379709243774414, "learning_rate": 3.684109420623944e-05, "log_odds_chosen": 9.948789596557617, "log_odds_ratio": -8.950324263423681e-05, "logits/chosen": -0.6824163794517517, "logits/rejected": -0.6648140549659729, "logps/chosen": -0.0002694717841222882, "logps/rejected": -1.6847224235534668, "loss": 1.8646, "nll_loss": 0.4661399722099304, "rewards/accuracies": 1.0, "rewards/chosen": -2.694718205020763e-05, "rewards/margins": 0.16844528913497925, "rewards/rejected": -0.16847223043441772, "step": 4871 }, { "epoch": 3.3692946058091287, "grad_norm": 6.499044895172119, "learning_rate": 3.683725218994929e-05, "log_odds_chosen": 7.629302978515625, "log_odds_ratio": -0.002777328947558999, "logits/chosen": -0.327689528465271, "logits/rejected": -0.2956370711326599, "logps/chosen": -0.036704547703266144, "logps/rejected": -2.166947841644287, "loss": 1.5244, "nll_loss": 0.38081902265548706, "rewards/accuracies": 1.0, "rewards/chosen": -0.0036704547237604856, "rewards/margins": 0.2130243182182312, "rewards/rejected": -0.21669480204582214, "step": 4872 }, { "epoch": 3.3699861687413555, "grad_norm": 8.781851768493652, "learning_rate": 3.683341017365914e-05, "log_odds_chosen": 9.797788619995117, "log_odds_ratio": -0.00033861229894682765, "logits/chosen": -0.7021996974945068, "logits/rejected": -0.8226416110992432, "logps/chosen": -0.0005934900254942477, "logps/rejected": -1.7794638872146606, "loss": 1.8316, "nll_loss": 0.45786577463150024, "rewards/accuracies": 1.0, "rewards/chosen": -5.934900400461629e-05, "rewards/margins": 0.17788705229759216, "rewards/rejected": -0.17794638872146606, "step": 4873 }, { "epoch": 3.3706777316735823, "grad_norm": 8.821776390075684, "learning_rate": 3.682956815736899e-05, "log_odds_chosen": 8.472004890441895, "log_odds_ratio": -0.000865703565068543, "logits/chosen": -0.5522797703742981, "logits/rejected": -0.5616713166236877, "logps/chosen": -0.0003104619972873479, "logps/rejected": -0.9349074363708496, "loss": 2.4649, "nll_loss": 0.6161264181137085, "rewards/accuracies": 1.0, "rewards/chosen": -3.104619827354327e-05, "rewards/margins": 0.09345970302820206, "rewards/rejected": -0.09349074214696884, "step": 4874 }, { "epoch": 3.371369294605809, "grad_norm": 12.452609062194824, "learning_rate": 3.682572614107884e-05, "log_odds_chosen": 8.774795532226562, "log_odds_ratio": -0.0033878334797918797, "logits/chosen": -0.46289071440696716, "logits/rejected": -0.45690450072288513, "logps/chosen": -0.019403763115406036, "logps/rejected": -1.4844517707824707, "loss": 1.9597, "nll_loss": 0.4895821809768677, "rewards/accuracies": 1.0, "rewards/chosen": -0.0019403762416914105, "rewards/margins": 0.14650480449199677, "rewards/rejected": -0.14844517409801483, "step": 4875 }, { "epoch": 3.372060857538036, "grad_norm": 14.034537315368652, "learning_rate": 3.682188412478869e-05, "log_odds_chosen": 9.291325569152832, "log_odds_ratio": -0.00020458844664972275, "logits/chosen": -0.5656931400299072, "logits/rejected": -0.6621861457824707, "logps/chosen": -0.0002466610458213836, "logps/rejected": -1.292998194694519, "loss": 2.6441, "nll_loss": 0.6610121726989746, "rewards/accuracies": 1.0, "rewards/chosen": -2.4666105673532e-05, "rewards/margins": 0.1292751580476761, "rewards/rejected": -0.1292998194694519, "step": 4876 }, { "epoch": 3.372752420470263, "grad_norm": 10.457889556884766, "learning_rate": 3.681804210849854e-05, "log_odds_chosen": 9.468912124633789, "log_odds_ratio": -0.009811985306441784, "logits/chosen": -0.4400988817214966, "logits/rejected": -0.516470193862915, "logps/chosen": -0.0058551025576889515, "logps/rejected": -1.7885127067565918, "loss": 1.9934, "nll_loss": 0.49736106395721436, "rewards/accuracies": 1.0, "rewards/chosen": -0.0005855102790519595, "rewards/margins": 0.1782657653093338, "rewards/rejected": -0.17885126173496246, "step": 4877 }, { "epoch": 3.3734439834024896, "grad_norm": 5.182315349578857, "learning_rate": 3.68142000922084e-05, "log_odds_chosen": 9.558467864990234, "log_odds_ratio": -0.00024224047956522554, "logits/chosen": -0.507156252861023, "logits/rejected": -0.5910125970840454, "logps/chosen": -0.006765010766685009, "logps/rejected": -2.19610595703125, "loss": 1.122, "nll_loss": 0.2804679572582245, "rewards/accuracies": 1.0, "rewards/chosen": -0.0006765010766685009, "rewards/margins": 0.2189340889453888, "rewards/rejected": -0.21961060166358948, "step": 4878 }, { "epoch": 3.3741355463347165, "grad_norm": 8.035346984863281, "learning_rate": 3.681035807591824e-05, "log_odds_chosen": 8.364526748657227, "log_odds_ratio": -0.0009362439159303904, "logits/chosen": -0.5520797967910767, "logits/rejected": -0.5805567502975464, "logps/chosen": -0.006305334623903036, "logps/rejected": -1.6327786445617676, "loss": 1.2901, "nll_loss": 0.32244062423706055, "rewards/accuracies": 1.0, "rewards/chosen": -0.0006305334973149002, "rewards/margins": 0.1626473367214203, "rewards/rejected": -0.16327786445617676, "step": 4879 }, { "epoch": 3.3748271092669433, "grad_norm": 9.362500190734863, "learning_rate": 3.6806516059628095e-05, "log_odds_chosen": 6.6161298751831055, "log_odds_ratio": -0.12561531364917755, "logits/chosen": -0.332377552986145, "logits/rejected": -0.4098805785179138, "logps/chosen": -0.02750096283853054, "logps/rejected": -1.0053969621658325, "loss": 1.5629, "nll_loss": 0.37816768884658813, "rewards/accuracies": 0.875, "rewards/chosen": -0.0027500963769853115, "rewards/margins": 0.09778958559036255, "rewards/rejected": -0.1005396842956543, "step": 4880 }, { "epoch": 3.37551867219917, "grad_norm": 12.455388069152832, "learning_rate": 3.680267404333795e-05, "log_odds_chosen": 8.346939086914062, "log_odds_ratio": -0.01503918319940567, "logits/chosen": -0.5830432176589966, "logits/rejected": -0.6676121950149536, "logps/chosen": -0.005626752506941557, "logps/rejected": -1.3593151569366455, "loss": 1.3495, "nll_loss": 0.3358650207519531, "rewards/accuracies": 1.0, "rewards/chosen": -0.0005626753554679453, "rewards/margins": 0.13536885380744934, "rewards/rejected": -0.13593152165412903, "step": 4881 }, { "epoch": 3.376210235131397, "grad_norm": 12.881577491760254, "learning_rate": 3.67988320270478e-05, "log_odds_chosen": 8.353086471557617, "log_odds_ratio": -0.213613823056221, "logits/chosen": -0.46307358145713806, "logits/rejected": -0.4926253855228424, "logps/chosen": -0.03921017795801163, "logps/rejected": -1.454315185546875, "loss": 1.2457, "nll_loss": 0.2900546193122864, "rewards/accuracies": 0.875, "rewards/chosen": -0.00392101751640439, "rewards/margins": 0.1415105164051056, "rewards/rejected": -0.1454315334558487, "step": 4882 }, { "epoch": 3.376901798063624, "grad_norm": 7.863828182220459, "learning_rate": 3.6794990010757646e-05, "log_odds_chosen": 8.951114654541016, "log_odds_ratio": -0.025473404675722122, "logits/chosen": -0.45563632249832153, "logits/rejected": -0.5035054683685303, "logps/chosen": -0.012901807203888893, "logps/rejected": -1.6659057140350342, "loss": 1.5606, "nll_loss": 0.3876059353351593, "rewards/accuracies": 1.0, "rewards/chosen": -0.0012901807203888893, "rewards/margins": 0.1653003990650177, "rewards/rejected": -0.16659057140350342, "step": 4883 }, { "epoch": 3.3775933609958506, "grad_norm": 4.709164619445801, "learning_rate": 3.67911479944675e-05, "log_odds_chosen": 8.327884674072266, "log_odds_ratio": -0.0027023141738027334, "logits/chosen": -0.37330159544944763, "logits/rejected": -0.4148619771003723, "logps/chosen": -0.0011940773110836744, "logps/rejected": -1.006005048751831, "loss": 1.4531, "nll_loss": 0.36301249265670776, "rewards/accuracies": 1.0, "rewards/chosen": -0.0001194077412947081, "rewards/margins": 0.10048110783100128, "rewards/rejected": -0.10060051828622818, "step": 4884 }, { "epoch": 3.3782849239280774, "grad_norm": 16.095605850219727, "learning_rate": 3.678730597817735e-05, "log_odds_chosen": 9.359745025634766, "log_odds_ratio": -0.002318818122148514, "logits/chosen": -0.1334265023469925, "logits/rejected": -0.3042029142379761, "logps/chosen": -0.009565573185682297, "logps/rejected": -1.9956917762756348, "loss": 1.9727, "nll_loss": 0.49293580651283264, "rewards/accuracies": 1.0, "rewards/chosen": -0.0009565572836436331, "rewards/margins": 0.19861261546611786, "rewards/rejected": -0.1995691955089569, "step": 4885 }, { "epoch": 3.3789764868603043, "grad_norm": 5.004652500152588, "learning_rate": 3.6783463961887196e-05, "log_odds_chosen": 8.115644454956055, "log_odds_ratio": -0.01304934173822403, "logits/chosen": -0.4006246328353882, "logits/rejected": -0.467983216047287, "logps/chosen": -0.018195848912000656, "logps/rejected": -1.8651471138000488, "loss": 1.3653, "nll_loss": 0.34003227949142456, "rewards/accuracies": 1.0, "rewards/chosen": -0.0018195848679170012, "rewards/margins": 0.18469512462615967, "rewards/rejected": -0.1865147203207016, "step": 4886 }, { "epoch": 3.379668049792531, "grad_norm": 8.98936939239502, "learning_rate": 3.6779621945597055e-05, "log_odds_chosen": 9.405214309692383, "log_odds_ratio": -0.0025051278062164783, "logits/chosen": -0.5249983072280884, "logits/rejected": -0.5861697793006897, "logps/chosen": -0.016129275783896446, "logps/rejected": -1.8816158771514893, "loss": 1.2716, "nll_loss": 0.3176405429840088, "rewards/accuracies": 1.0, "rewards/chosen": -0.0016129277646541595, "rewards/margins": 0.18654866516590118, "rewards/rejected": -0.18816159665584564, "step": 4887 }, { "epoch": 3.380359612724758, "grad_norm": 17.38018798828125, "learning_rate": 3.67757799293069e-05, "log_odds_chosen": 9.830801010131836, "log_odds_ratio": -0.00029084287234582007, "logits/chosen": -0.48474544286727905, "logits/rejected": -0.553375244140625, "logps/chosen": -0.0007540949736721814, "logps/rejected": -2.2699027061462402, "loss": 1.4132, "nll_loss": 0.3532586395740509, "rewards/accuracies": 1.0, "rewards/chosen": -7.54095017327927e-05, "rewards/margins": 0.22691485285758972, "rewards/rejected": -0.2269902527332306, "step": 4888 }, { "epoch": 3.3810511756569848, "grad_norm": 10.87319564819336, "learning_rate": 3.6771937913016753e-05, "log_odds_chosen": 8.066292762756348, "log_odds_ratio": -0.0014551844215020537, "logits/chosen": -0.37830474972724915, "logits/rejected": -0.42029958963394165, "logps/chosen": -0.023234685882925987, "logps/rejected": -1.609140396118164, "loss": 1.9007, "nll_loss": 0.4750228524208069, "rewards/accuracies": 1.0, "rewards/chosen": -0.0023234684485942125, "rewards/margins": 0.15859057009220123, "rewards/rejected": -0.16091403365135193, "step": 4889 }, { "epoch": 3.3817427385892116, "grad_norm": 7.3637309074401855, "learning_rate": 3.6768095896726606e-05, "log_odds_chosen": 9.156384468078613, "log_odds_ratio": -0.00031103662331588566, "logits/chosen": -0.6396990418434143, "logits/rejected": -0.6863707304000854, "logps/chosen": -0.0024397203233093023, "logps/rejected": -1.5034765005111694, "loss": 1.6916, "nll_loss": 0.4228590726852417, "rewards/accuracies": 1.0, "rewards/chosen": -0.00024397202651016414, "rewards/margins": 0.15010368824005127, "rewards/rejected": -0.15034765005111694, "step": 4890 }, { "epoch": 3.3824343015214384, "grad_norm": 19.589651107788086, "learning_rate": 3.676425388043646e-05, "log_odds_chosen": 11.059402465820312, "log_odds_ratio": -2.0674237021012232e-05, "logits/chosen": -0.5618711709976196, "logits/rejected": -0.6643784046173096, "logps/chosen": -0.00018295198970008641, "logps/rejected": -2.2173166275024414, "loss": 2.0076, "nll_loss": 0.5019065141677856, "rewards/accuracies": 1.0, "rewards/chosen": -1.829519897000864e-05, "rewards/margins": 0.2217133641242981, "rewards/rejected": -0.22173166275024414, "step": 4891 }, { "epoch": 3.3831258644536653, "grad_norm": 6.292947769165039, "learning_rate": 3.6760411864146304e-05, "log_odds_chosen": 9.908514022827148, "log_odds_ratio": -0.00021772683248855174, "logits/chosen": -0.38789576292037964, "logits/rejected": -0.48067206144332886, "logps/chosen": -0.002419173950329423, "logps/rejected": -1.8695409297943115, "loss": 1.1321, "nll_loss": 0.2830020487308502, "rewards/accuracies": 1.0, "rewards/chosen": -0.00024191739794332534, "rewards/margins": 0.18671217560768127, "rewards/rejected": -0.1869540959596634, "step": 4892 }, { "epoch": 3.383817427385892, "grad_norm": 7.79653263092041, "learning_rate": 3.6756569847856156e-05, "log_odds_chosen": 8.922075271606445, "log_odds_ratio": -0.0006197122274897993, "logits/chosen": -0.6272656321525574, "logits/rejected": -0.6524254679679871, "logps/chosen": -0.0009733512415550649, "logps/rejected": -1.533919095993042, "loss": 1.1412, "nll_loss": 0.2852276563644409, "rewards/accuracies": 1.0, "rewards/chosen": -9.733513434184715e-05, "rewards/margins": 0.15329457819461823, "rewards/rejected": -0.15339191257953644, "step": 4893 }, { "epoch": 3.384508990318119, "grad_norm": 8.590242385864258, "learning_rate": 3.675272783156601e-05, "log_odds_chosen": 7.531237602233887, "log_odds_ratio": -0.30172964930534363, "logits/chosen": -0.11642412841320038, "logits/rejected": -0.13467153906822205, "logps/chosen": -0.03982772305607796, "logps/rejected": -1.277442216873169, "loss": 1.7269, "nll_loss": 0.4015564024448395, "rewards/accuracies": 0.875, "rewards/chosen": -0.003982772585004568, "rewards/margins": 0.12376146018505096, "rewards/rejected": -0.12774422764778137, "step": 4894 }, { "epoch": 3.3852005532503457, "grad_norm": 14.8712797164917, "learning_rate": 3.6748885815275855e-05, "log_odds_chosen": 10.017523765563965, "log_odds_ratio": -0.003008556319400668, "logits/chosen": -0.7771372199058533, "logits/rejected": -0.872925877571106, "logps/chosen": -0.0047239093109965324, "logps/rejected": -2.0477962493896484, "loss": 2.0123, "nll_loss": 0.5027673244476318, "rewards/accuracies": 1.0, "rewards/chosen": -0.0004723909660242498, "rewards/margins": 0.20430722832679749, "rewards/rejected": -0.20477962493896484, "step": 4895 }, { "epoch": 3.3858921161825726, "grad_norm": 7.275940418243408, "learning_rate": 3.6745043798985714e-05, "log_odds_chosen": 8.62596607208252, "log_odds_ratio": -0.0013201197143644094, "logits/chosen": -0.3239147365093231, "logits/rejected": -0.4286247789859772, "logps/chosen": -0.00924981851130724, "logps/rejected": -1.9703105688095093, "loss": 1.6702, "nll_loss": 0.41741546988487244, "rewards/accuracies": 1.0, "rewards/chosen": -0.0009249818976968527, "rewards/margins": 0.19610606133937836, "rewards/rejected": -0.19703106582164764, "step": 4896 }, { "epoch": 3.3865836791147994, "grad_norm": 13.720173835754395, "learning_rate": 3.674120178269556e-05, "log_odds_chosen": 9.530720710754395, "log_odds_ratio": -0.0002733979490585625, "logits/chosen": -0.6544563174247742, "logits/rejected": -0.7413820028305054, "logps/chosen": -0.0007776152924634516, "logps/rejected": -1.820650339126587, "loss": 1.4968, "nll_loss": 0.3741660714149475, "rewards/accuracies": 1.0, "rewards/chosen": -7.776152779115364e-05, "rewards/margins": 0.18198728561401367, "rewards/rejected": -0.18206505477428436, "step": 4897 }, { "epoch": 3.3872752420470262, "grad_norm": 8.170251846313477, "learning_rate": 3.673735976640541e-05, "log_odds_chosen": 8.625896453857422, "log_odds_ratio": -0.023443307727575302, "logits/chosen": -0.5422019958496094, "logits/rejected": -0.5712448954582214, "logps/chosen": -0.006744992453604937, "logps/rejected": -1.371507167816162, "loss": 1.9066, "nll_loss": 0.4743000864982605, "rewards/accuracies": 1.0, "rewards/chosen": -0.0006744992570020258, "rewards/margins": 0.13647620379924774, "rewards/rejected": -0.13715071976184845, "step": 4898 }, { "epoch": 3.387966804979253, "grad_norm": 5.375954627990723, "learning_rate": 3.6733517750115264e-05, "log_odds_chosen": 9.618953704833984, "log_odds_ratio": -0.0001843458303483203, "logits/chosen": -0.6380875706672668, "logits/rejected": -0.701280951499939, "logps/chosen": -0.018095921725034714, "logps/rejected": -2.783842086791992, "loss": 1.483, "nll_loss": 0.3707388639450073, "rewards/accuracies": 1.0, "rewards/chosen": -0.0018095922423526645, "rewards/margins": 0.27657461166381836, "rewards/rejected": -0.27838417887687683, "step": 4899 }, { "epoch": 3.38865836791148, "grad_norm": 13.91885757446289, "learning_rate": 3.672967573382512e-05, "log_odds_chosen": 10.686237335205078, "log_odds_ratio": -3.720568201970309e-05, "logits/chosen": -0.6631019115447998, "logits/rejected": -0.6991225481033325, "logps/chosen": -0.0006842121947556734, "logps/rejected": -2.6362667083740234, "loss": 1.8289, "nll_loss": 0.4572334587574005, "rewards/accuracies": 1.0, "rewards/chosen": -6.842121365480125e-05, "rewards/margins": 0.2635582685470581, "rewards/rejected": -0.26362669467926025, "step": 4900 }, { "epoch": 3.3893499308437067, "grad_norm": 10.791473388671875, "learning_rate": 3.672583371753496e-05, "log_odds_chosen": 8.14004898071289, "log_odds_ratio": -0.17836718261241913, "logits/chosen": -0.5453934669494629, "logits/rejected": -0.643043041229248, "logps/chosen": -0.02400169149041176, "logps/rejected": -1.5562868118286133, "loss": 1.136, "nll_loss": 0.2661687433719635, "rewards/accuracies": 0.875, "rewards/chosen": -0.0024001693818718195, "rewards/margins": 0.1532285213470459, "rewards/rejected": -0.15562868118286133, "step": 4901 }, { "epoch": 3.3900414937759336, "grad_norm": 14.164995193481445, "learning_rate": 3.6721991701244815e-05, "log_odds_chosen": 9.055171966552734, "log_odds_ratio": -0.0006158703472465277, "logits/chosen": -0.559080183506012, "logits/rejected": -0.6751291155815125, "logps/chosen": -0.0006700906669721007, "logps/rejected": -1.8620007038116455, "loss": 1.955, "nll_loss": 0.48869603872299194, "rewards/accuracies": 1.0, "rewards/chosen": -6.700906669721007e-05, "rewards/margins": 0.18613307178020477, "rewards/rejected": -0.1862000823020935, "step": 4902 }, { "epoch": 3.3907330567081604, "grad_norm": 9.520732879638672, "learning_rate": 3.671814968495467e-05, "log_odds_chosen": 8.1787691116333, "log_odds_ratio": -0.030842209234833717, "logits/chosen": -0.5617755055427551, "logits/rejected": -0.623063325881958, "logps/chosen": -0.008788838982582092, "logps/rejected": -1.0116199254989624, "loss": 1.6207, "nll_loss": 0.4020839333534241, "rewards/accuracies": 1.0, "rewards/chosen": -0.000878883816767484, "rewards/margins": 0.10028310865163803, "rewards/rejected": -0.10116199404001236, "step": 4903 }, { "epoch": 3.391424619640387, "grad_norm": 11.027510643005371, "learning_rate": 3.671430766866451e-05, "log_odds_chosen": 8.988664627075195, "log_odds_ratio": -0.0020216992124915123, "logits/chosen": -0.5873269438743591, "logits/rejected": -0.681334912776947, "logps/chosen": -0.005092856008559465, "logps/rejected": -1.7773408889770508, "loss": 1.6894, "nll_loss": 0.42215490341186523, "rewards/accuracies": 1.0, "rewards/chosen": -0.0005092856008559465, "rewards/margins": 0.177224799990654, "rewards/rejected": -0.1777341067790985, "step": 4904 }, { "epoch": 3.392116182572614, "grad_norm": 7.16702127456665, "learning_rate": 3.6710465652374365e-05, "log_odds_chosen": 8.271673202514648, "log_odds_ratio": -0.03412136435508728, "logits/chosen": -0.6266486048698425, "logits/rejected": -0.6206366419792175, "logps/chosen": -0.021237920969724655, "logps/rejected": -1.6427576541900635, "loss": 1.4746, "nll_loss": 0.3652297556400299, "rewards/accuracies": 1.0, "rewards/chosen": -0.002123792190104723, "rewards/margins": 0.1621519923210144, "rewards/rejected": -0.16427578032016754, "step": 4905 }, { "epoch": 3.392807745504841, "grad_norm": 10.318666458129883, "learning_rate": 3.670662363608422e-05, "log_odds_chosen": 9.693625450134277, "log_odds_ratio": -0.00013604509877040982, "logits/chosen": -0.9521337151527405, "logits/rejected": -1.0116444826126099, "logps/chosen": -0.0003407415933907032, "logps/rejected": -1.826693058013916, "loss": 2.1946, "nll_loss": 0.54863440990448, "rewards/accuracies": 1.0, "rewards/chosen": -3.407416079426184e-05, "rewards/margins": 0.18263523280620575, "rewards/rejected": -0.18266932666301727, "step": 4906 }, { "epoch": 3.3934993084370677, "grad_norm": 7.364135265350342, "learning_rate": 3.670278161979407e-05, "log_odds_chosen": 9.088154792785645, "log_odds_ratio": -0.0007545155822299421, "logits/chosen": -0.5334247350692749, "logits/rejected": -0.5444377064704895, "logps/chosen": -0.001719134277664125, "logps/rejected": -1.2499489784240723, "loss": 1.8055, "nll_loss": 0.451288104057312, "rewards/accuracies": 1.0, "rewards/chosen": -0.00017191344522871077, "rewards/margins": 0.12482300400733948, "rewards/rejected": -0.1249949038028717, "step": 4907 }, { "epoch": 3.3941908713692945, "grad_norm": 10.528717041015625, "learning_rate": 3.6698939603503916e-05, "log_odds_chosen": 10.229093551635742, "log_odds_ratio": -7.416566222673282e-05, "logits/chosen": -1.0112329721450806, "logits/rejected": -1.0509593486785889, "logps/chosen": -0.0007192917400971055, "logps/rejected": -2.0609207153320312, "loss": 2.2368, "nll_loss": 0.5591920614242554, "rewards/accuracies": 1.0, "rewards/chosen": -7.19291710993275e-05, "rewards/margins": 0.20602013170719147, "rewards/rejected": -0.20609205961227417, "step": 4908 }, { "epoch": 3.3948824343015214, "grad_norm": 7.505864143371582, "learning_rate": 3.6695097587213775e-05, "log_odds_chosen": 8.713752746582031, "log_odds_ratio": -0.024289878085255623, "logits/chosen": -0.6097840666770935, "logits/rejected": -0.5826661586761475, "logps/chosen": -0.19899815320968628, "logps/rejected": -1.7063466310501099, "loss": 1.9188, "nll_loss": 0.4772747755050659, "rewards/accuracies": 1.0, "rewards/chosen": -0.019899815320968628, "rewards/margins": 0.15073484182357788, "rewards/rejected": -0.1706346720457077, "step": 4909 }, { "epoch": 3.395573997233748, "grad_norm": 7.943901062011719, "learning_rate": 3.669125557092362e-05, "log_odds_chosen": 9.464241027832031, "log_odds_ratio": -0.0004103784740436822, "logits/chosen": -0.8146414756774902, "logits/rejected": -0.8581461906433105, "logps/chosen": -0.001535814255475998, "logps/rejected": -1.7533268928527832, "loss": 1.5418, "nll_loss": 0.3854040801525116, "rewards/accuracies": 1.0, "rewards/chosen": -0.00015358140808530152, "rewards/margins": 0.17517909407615662, "rewards/rejected": -0.1753326952457428, "step": 4910 }, { "epoch": 3.396265560165975, "grad_norm": 6.460813999176025, "learning_rate": 3.668741355463347e-05, "log_odds_chosen": 8.076436996459961, "log_odds_ratio": -0.053030095994472504, "logits/chosen": -0.4412263035774231, "logits/rejected": -0.4743978679180145, "logps/chosen": -0.019328976050019264, "logps/rejected": -1.1220247745513916, "loss": 1.2356, "nll_loss": 0.3035872280597687, "rewards/accuracies": 1.0, "rewards/chosen": -0.0019328977214172482, "rewards/margins": 0.11026957631111145, "rewards/rejected": -0.1122024729847908, "step": 4911 }, { "epoch": 3.396957123098202, "grad_norm": 10.06266975402832, "learning_rate": 3.6683571538343326e-05, "log_odds_chosen": 9.370516777038574, "log_odds_ratio": -0.002869781805202365, "logits/chosen": -0.32210591435432434, "logits/rejected": -0.4086999297142029, "logps/chosen": -0.0046303002163767815, "logps/rejected": -1.9719274044036865, "loss": 1.2744, "nll_loss": 0.3183148503303528, "rewards/accuracies": 1.0, "rewards/chosen": -0.00046303000999614596, "rewards/margins": 0.19672970473766327, "rewards/rejected": -0.1971927285194397, "step": 4912 }, { "epoch": 3.3976486860304287, "grad_norm": 19.497352600097656, "learning_rate": 3.667972952205317e-05, "log_odds_chosen": 11.04720687866211, "log_odds_ratio": -7.522387750213966e-05, "logits/chosen": -1.0588092803955078, "logits/rejected": -1.1463772058486938, "logps/chosen": -0.0001778826117515564, "logps/rejected": -2.447268009185791, "loss": 1.5913, "nll_loss": 0.39780738949775696, "rewards/accuracies": 1.0, "rewards/chosen": -1.778826117515564e-05, "rewards/margins": 0.24470902979373932, "rewards/rejected": -0.24472680687904358, "step": 4913 }, { "epoch": 3.3983402489626555, "grad_norm": 7.700289726257324, "learning_rate": 3.6675887505763024e-05, "log_odds_chosen": 9.542242050170898, "log_odds_ratio": -0.00026488027651794255, "logits/chosen": -0.5816613435745239, "logits/rejected": -0.5703340172767639, "logps/chosen": -0.0014484458370134234, "logps/rejected": -1.913750410079956, "loss": 1.6671, "nll_loss": 0.41675370931625366, "rewards/accuracies": 1.0, "rewards/chosen": -0.0001448446128051728, "rewards/margins": 0.19123020768165588, "rewards/rejected": -0.19137504696846008, "step": 4914 }, { "epoch": 3.3990318118948823, "grad_norm": 7.914902687072754, "learning_rate": 3.6672045489472876e-05, "log_odds_chosen": 8.947216987609863, "log_odds_ratio": -0.000677384901791811, "logits/chosen": -0.49002915620803833, "logits/rejected": -0.4953732490539551, "logps/chosen": -0.0006747872103005648, "logps/rejected": -1.2044634819030762, "loss": 1.1815, "nll_loss": 0.29530438780784607, "rewards/accuracies": 1.0, "rewards/chosen": -6.747871520929039e-05, "rewards/margins": 0.12037887424230576, "rewards/rejected": -0.1204463541507721, "step": 4915 }, { "epoch": 3.399723374827109, "grad_norm": 7.413805961608887, "learning_rate": 3.666820347318273e-05, "log_odds_chosen": 8.656720161437988, "log_odds_ratio": -0.014592528343200684, "logits/chosen": -0.6957269906997681, "logits/rejected": -0.6722280383110046, "logps/chosen": -0.007290617562830448, "logps/rejected": -1.567755937576294, "loss": 1.222, "nll_loss": 0.3040284514427185, "rewards/accuracies": 1.0, "rewards/chosen": -0.0007290617795661092, "rewards/margins": 0.156046524643898, "rewards/rejected": -0.1567755788564682, "step": 4916 }, { "epoch": 3.400414937759336, "grad_norm": 7.193758964538574, "learning_rate": 3.6664361456892574e-05, "log_odds_chosen": 8.801361083984375, "log_odds_ratio": -0.00035949796438217163, "logits/chosen": -0.5192161202430725, "logits/rejected": -0.5951474905014038, "logps/chosen": -0.019580980762839317, "logps/rejected": -1.7406668663024902, "loss": 1.5407, "nll_loss": 0.3851466178894043, "rewards/accuracies": 1.0, "rewards/chosen": -0.001958098029717803, "rewards/margins": 0.17210859060287476, "rewards/rejected": -0.1740666925907135, "step": 4917 }, { "epoch": 3.401106500691563, "grad_norm": 8.03855037689209, "learning_rate": 3.6660519440602434e-05, "log_odds_chosen": 8.034245491027832, "log_odds_ratio": -0.03657183051109314, "logits/chosen": -0.6469733715057373, "logits/rejected": -0.7322840690612793, "logps/chosen": -0.06051166355609894, "logps/rejected": -2.151496648788452, "loss": 1.4062, "nll_loss": 0.3478994369506836, "rewards/accuracies": 1.0, "rewards/chosen": -0.006051166914403439, "rewards/margins": 0.20909848809242249, "rewards/rejected": -0.2151496559381485, "step": 4918 }, { "epoch": 3.4017980636237897, "grad_norm": 6.80123233795166, "learning_rate": 3.665667742431228e-05, "log_odds_chosen": 8.935651779174805, "log_odds_ratio": -0.0021722454112023115, "logits/chosen": -0.4752444624900818, "logits/rejected": -0.5332791805267334, "logps/chosen": -0.015160152688622475, "logps/rejected": -1.858079195022583, "loss": 1.183, "nll_loss": 0.2955396771430969, "rewards/accuracies": 1.0, "rewards/chosen": -0.0015160152688622475, "rewards/margins": 0.18429191410541534, "rewards/rejected": -0.1858079433441162, "step": 4919 }, { "epoch": 3.4024896265560165, "grad_norm": 8.983878135681152, "learning_rate": 3.665283540802213e-05, "log_odds_chosen": 8.991377830505371, "log_odds_ratio": -0.0006017258856445551, "logits/chosen": -0.47858548164367676, "logits/rejected": -0.4981576204299927, "logps/chosen": -0.002347870497033, "logps/rejected": -1.3704099655151367, "loss": 1.2898, "nll_loss": 0.322380930185318, "rewards/accuracies": 1.0, "rewards/chosen": -0.00023478706134483218, "rewards/margins": 0.13680621981620789, "rewards/rejected": -0.13704100251197815, "step": 4920 }, { "epoch": 3.4031811894882433, "grad_norm": 9.267348289489746, "learning_rate": 3.6648993391731984e-05, "log_odds_chosen": 9.750808715820312, "log_odds_ratio": -0.06280557066202164, "logits/chosen": -0.7340126037597656, "logits/rejected": -0.7577941417694092, "logps/chosen": -0.049555785953998566, "logps/rejected": -1.7277791500091553, "loss": 3.0292, "nll_loss": 0.7510218620300293, "rewards/accuracies": 1.0, "rewards/chosen": -0.004955578595399857, "rewards/margins": 0.16782233119010925, "rewards/rejected": -0.17277792096138, "step": 4921 }, { "epoch": 3.40387275242047, "grad_norm": 8.101119041442871, "learning_rate": 3.664515137544183e-05, "log_odds_chosen": 10.515046119689941, "log_odds_ratio": -0.00010872560960706323, "logits/chosen": -0.6100264191627502, "logits/rejected": -0.6120408773422241, "logps/chosen": -0.00031758565455675125, "logps/rejected": -2.23378324508667, "loss": 1.607, "nll_loss": 0.401741623878479, "rewards/accuracies": 1.0, "rewards/chosen": -3.1758565455675125e-05, "rewards/margins": 0.22334657609462738, "rewards/rejected": -0.22337834537029266, "step": 4922 }, { "epoch": 3.404564315352697, "grad_norm": 14.783284187316895, "learning_rate": 3.664130935915168e-05, "log_odds_chosen": 8.665579795837402, "log_odds_ratio": -0.05588802322745323, "logits/chosen": -1.0751800537109375, "logits/rejected": -1.0729825496673584, "logps/chosen": -0.023833846673369408, "logps/rejected": -1.7382769584655762, "loss": 1.8384, "nll_loss": 0.4540029764175415, "rewards/accuracies": 1.0, "rewards/chosen": -0.002383384620770812, "rewards/margins": 0.17144431173801422, "rewards/rejected": -0.17382769286632538, "step": 4923 }, { "epoch": 3.405255878284924, "grad_norm": 7.320456504821777, "learning_rate": 3.6637467342861535e-05, "log_odds_chosen": 8.874919891357422, "log_odds_ratio": -0.07711444050073624, "logits/chosen": -0.5605841279029846, "logits/rejected": -0.6308436989784241, "logps/chosen": -0.0178984422236681, "logps/rejected": -1.7320085763931274, "loss": 1.2618, "nll_loss": 0.3077423572540283, "rewards/accuracies": 1.0, "rewards/chosen": -0.0017898440128192306, "rewards/margins": 0.17141102254390717, "rewards/rejected": -0.17320087552070618, "step": 4924 }, { "epoch": 3.4059474412171507, "grad_norm": 13.005224227905273, "learning_rate": 3.663362532657139e-05, "log_odds_chosen": 9.509387969970703, "log_odds_ratio": -0.0002891596523113549, "logits/chosen": -0.9162966012954712, "logits/rejected": -0.9428678154945374, "logps/chosen": -0.0006765555590391159, "logps/rejected": -1.5674023628234863, "loss": 1.4824, "nll_loss": 0.37056732177734375, "rewards/accuracies": 1.0, "rewards/chosen": -6.765555735910311e-05, "rewards/margins": 0.15667259693145752, "rewards/rejected": -0.1567402482032776, "step": 4925 }, { "epoch": 3.4066390041493775, "grad_norm": 7.998945713043213, "learning_rate": 3.662978331028123e-05, "log_odds_chosen": 9.285704612731934, "log_odds_ratio": -0.0003611869178712368, "logits/chosen": -0.6982068419456482, "logits/rejected": -0.7881613373756409, "logps/chosen": -0.005835440009832382, "logps/rejected": -1.9285674095153809, "loss": 2.1622, "nll_loss": 0.5405056476593018, "rewards/accuracies": 1.0, "rewards/chosen": -0.0005835440242663026, "rewards/margins": 0.19227319955825806, "rewards/rejected": -0.19285675883293152, "step": 4926 }, { "epoch": 3.4073305670816043, "grad_norm": 5.620597839355469, "learning_rate": 3.662594129399109e-05, "log_odds_chosen": 8.169038772583008, "log_odds_ratio": -0.0014991657808423042, "logits/chosen": -0.5135716199874878, "logits/rejected": -0.6402672529220581, "logps/chosen": -0.0455855056643486, "logps/rejected": -2.884558916091919, "loss": 1.7886, "nll_loss": 0.44701242446899414, "rewards/accuracies": 1.0, "rewards/chosen": -0.004558550659567118, "rewards/margins": 0.283897340297699, "rewards/rejected": -0.28845590353012085, "step": 4927 }, { "epoch": 3.408022130013831, "grad_norm": 13.760601997375488, "learning_rate": 3.662209927770094e-05, "log_odds_chosen": 10.625243186950684, "log_odds_ratio": -6.922941975062713e-05, "logits/chosen": -0.3769231140613556, "logits/rejected": -0.5266758799552917, "logps/chosen": -0.0001821487967390567, "logps/rejected": -2.1213905811309814, "loss": 1.661, "nll_loss": 0.41524389386177063, "rewards/accuracies": 1.0, "rewards/chosen": -1.821487967390567e-05, "rewards/margins": 0.21212083101272583, "rewards/rejected": -0.2121390402317047, "step": 4928 }, { "epoch": 3.408713692946058, "grad_norm": 10.003327369689941, "learning_rate": 3.661825726141079e-05, "log_odds_chosen": 7.59705114364624, "log_odds_ratio": -0.018656501546502113, "logits/chosen": -0.515770435333252, "logits/rejected": -0.6440622806549072, "logps/chosen": -0.01772279106080532, "logps/rejected": -1.9347506761550903, "loss": 2.0547, "nll_loss": 0.5118147134780884, "rewards/accuracies": 1.0, "rewards/chosen": -0.0017722791526466608, "rewards/margins": 0.19170278310775757, "rewards/rejected": -0.19347506761550903, "step": 4929 }, { "epoch": 3.409405255878285, "grad_norm": 6.344710350036621, "learning_rate": 3.661441524512064e-05, "log_odds_chosen": 8.29608154296875, "log_odds_ratio": -0.0021369177848100662, "logits/chosen": -0.5516109466552734, "logits/rejected": -0.5961017608642578, "logps/chosen": -0.016617944464087486, "logps/rejected": -1.9796082973480225, "loss": 1.2063, "nll_loss": 0.30136656761169434, "rewards/accuracies": 1.0, "rewards/chosen": -0.0016617946093901992, "rewards/margins": 0.19629904627799988, "rewards/rejected": -0.19796085357666016, "step": 4930 }, { "epoch": 3.4100968188105116, "grad_norm": 7.435446739196777, "learning_rate": 3.661057322883049e-05, "log_odds_chosen": 8.216718673706055, "log_odds_ratio": -0.0028341393917798996, "logits/chosen": -0.7698273062705994, "logits/rejected": -0.7450141906738281, "logps/chosen": -0.012603234499692917, "logps/rejected": -1.575285792350769, "loss": 1.4467, "nll_loss": 0.361391544342041, "rewards/accuracies": 1.0, "rewards/chosen": -0.0012603236827999353, "rewards/margins": 0.15626825392246246, "rewards/rejected": -0.1575285792350769, "step": 4931 }, { "epoch": 3.4107883817427385, "grad_norm": 10.248863220214844, "learning_rate": 3.660673121254034e-05, "log_odds_chosen": 10.4366455078125, "log_odds_ratio": -0.00032715650741010904, "logits/chosen": -0.40946805477142334, "logits/rejected": -0.49355852603912354, "logps/chosen": -0.0004456047317944467, "logps/rejected": -1.8036550283432007, "loss": 1.2218, "nll_loss": 0.3054129183292389, "rewards/accuracies": 1.0, "rewards/chosen": -4.4560470996657386e-05, "rewards/margins": 0.18032094836235046, "rewards/rejected": -0.18036550283432007, "step": 4932 }, { "epoch": 3.4114799446749653, "grad_norm": 9.72933292388916, "learning_rate": 3.660288919625019e-05, "log_odds_chosen": 9.800952911376953, "log_odds_ratio": -0.0003480328305158764, "logits/chosen": -0.5476824641227722, "logits/rejected": -0.6130825877189636, "logps/chosen": -0.00100328354164958, "logps/rejected": -2.4654011726379395, "loss": 1.7633, "nll_loss": 0.4407961666584015, "rewards/accuracies": 1.0, "rewards/chosen": -0.00010032836871687323, "rewards/margins": 0.24643978476524353, "rewards/rejected": -0.2465401142835617, "step": 4933 }, { "epoch": 3.412171507607192, "grad_norm": 10.899541854858398, "learning_rate": 3.6599047179960046e-05, "log_odds_chosen": 7.08076286315918, "log_odds_ratio": -0.05577649176120758, "logits/chosen": -0.4027561843395233, "logits/rejected": -0.43499526381492615, "logps/chosen": -0.023526165634393692, "logps/rejected": -1.6664758920669556, "loss": 1.477, "nll_loss": 0.36366114020347595, "rewards/accuracies": 1.0, "rewards/chosen": -0.0023526165168732405, "rewards/margins": 0.16429497301578522, "rewards/rejected": -0.16664758324623108, "step": 4934 }, { "epoch": 3.412863070539419, "grad_norm": 12.654091835021973, "learning_rate": 3.659520516366989e-05, "log_odds_chosen": 10.54169750213623, "log_odds_ratio": -5.913171116844751e-05, "logits/chosen": -0.4336695373058319, "logits/rejected": -0.5459045767784119, "logps/chosen": -0.00021590096002910286, "logps/rejected": -2.1453845500946045, "loss": 1.4929, "nll_loss": 0.3732162117958069, "rewards/accuracies": 1.0, "rewards/chosen": -2.1590094547718763e-05, "rewards/margins": 0.21451690793037415, "rewards/rejected": -0.21453848481178284, "step": 4935 }, { "epoch": 3.413554633471646, "grad_norm": 10.615986824035645, "learning_rate": 3.659136314737975e-05, "log_odds_chosen": 10.210262298583984, "log_odds_ratio": -0.0001049021229846403, "logits/chosen": -0.45864614844322205, "logits/rejected": -0.5385798215866089, "logps/chosen": -0.006646535359323025, "logps/rejected": -2.5832815170288086, "loss": 1.3402, "nll_loss": 0.3350418210029602, "rewards/accuracies": 1.0, "rewards/chosen": -0.0006646536057814956, "rewards/margins": 0.2576634883880615, "rewards/rejected": -0.2583281397819519, "step": 4936 }, { "epoch": 3.4142461964038726, "grad_norm": 13.86459732055664, "learning_rate": 3.6587521131089596e-05, "log_odds_chosen": 10.201940536499023, "log_odds_ratio": -0.00038717055576853454, "logits/chosen": -0.4907549023628235, "logits/rejected": -0.6592799425125122, "logps/chosen": -0.0009741213289089501, "logps/rejected": -2.4662675857543945, "loss": 1.7158, "nll_loss": 0.42890262603759766, "rewards/accuracies": 1.0, "rewards/chosen": -9.741213580127805e-05, "rewards/margins": 0.24652934074401855, "rewards/rejected": -0.24662676453590393, "step": 4937 }, { "epoch": 3.4149377593360994, "grad_norm": 12.028914451599121, "learning_rate": 3.658367911479945e-05, "log_odds_chosen": 9.5020751953125, "log_odds_ratio": -0.0001335785782430321, "logits/chosen": -0.7141174077987671, "logits/rejected": -0.8637199401855469, "logps/chosen": -0.0005194094264879823, "logps/rejected": -1.3897552490234375, "loss": 1.5818, "nll_loss": 0.3954324424266815, "rewards/accuracies": 1.0, "rewards/chosen": -5.194094046601094e-05, "rewards/margins": 0.13892358541488647, "rewards/rejected": -0.13897553086280823, "step": 4938 }, { "epoch": 3.4156293222683263, "grad_norm": 7.543969631195068, "learning_rate": 3.65798370985093e-05, "log_odds_chosen": 9.680569648742676, "log_odds_ratio": -0.0002025132707785815, "logits/chosen": -0.7034863233566284, "logits/rejected": -0.6991147994995117, "logps/chosen": -0.006458500865846872, "logps/rejected": -2.526951313018799, "loss": 1.3906, "nll_loss": 0.3476356267929077, "rewards/accuracies": 1.0, "rewards/chosen": -0.0006458500865846872, "rewards/margins": 0.2520492970943451, "rewards/rejected": -0.25269514322280884, "step": 4939 }, { "epoch": 3.416320885200553, "grad_norm": 13.711503982543945, "learning_rate": 3.657599508221915e-05, "log_odds_chosen": 10.692815780639648, "log_odds_ratio": -0.00013191672042012215, "logits/chosen": -0.6346333622932434, "logits/rejected": -0.7078859210014343, "logps/chosen": -0.00046483371988870203, "logps/rejected": -2.3791491985321045, "loss": 1.7774, "nll_loss": 0.44433537125587463, "rewards/accuracies": 1.0, "rewards/chosen": -4.6483371988870203e-05, "rewards/margins": 0.23786845803260803, "rewards/rejected": -0.23791491985321045, "step": 4940 }, { "epoch": 3.41701244813278, "grad_norm": 9.96644401550293, "learning_rate": 3.6572153065929e-05, "log_odds_chosen": 8.98678207397461, "log_odds_ratio": -0.0009053864632733166, "logits/chosen": -0.7697640061378479, "logits/rejected": -0.8465602397918701, "logps/chosen": -0.004368194378912449, "logps/rejected": -1.7595570087432861, "loss": 1.4312, "nll_loss": 0.3577001988887787, "rewards/accuracies": 1.0, "rewards/chosen": -0.000436819507740438, "rewards/margins": 0.17551888525485992, "rewards/rejected": -0.17595569789409637, "step": 4941 }, { "epoch": 3.4177040110650068, "grad_norm": 16.0850830078125, "learning_rate": 3.656831104963885e-05, "log_odds_chosen": 9.48902702331543, "log_odds_ratio": -0.17636118829250336, "logits/chosen": -0.7930417060852051, "logits/rejected": -0.8710612654685974, "logps/chosen": -0.020490722730755806, "logps/rejected": -2.0081634521484375, "loss": 1.847, "nll_loss": 0.4441096782684326, "rewards/accuracies": 0.875, "rewards/chosen": -0.0020490719471126795, "rewards/margins": 0.1987672746181488, "rewards/rejected": -0.2008163332939148, "step": 4942 }, { "epoch": 3.4183955739972336, "grad_norm": 17.637163162231445, "learning_rate": 3.6564469033348704e-05, "log_odds_chosen": 8.011113166809082, "log_odds_ratio": -0.011292091570794582, "logits/chosen": -0.32651209831237793, "logits/rejected": -0.34382864832878113, "logps/chosen": -0.013978070579469204, "logps/rejected": -1.6209546327590942, "loss": 1.6851, "nll_loss": 0.4201478660106659, "rewards/accuracies": 1.0, "rewards/chosen": -0.001397807034663856, "rewards/margins": 0.16069765388965607, "rewards/rejected": -0.16209547221660614, "step": 4943 }, { "epoch": 3.4190871369294604, "grad_norm": 8.398159980773926, "learning_rate": 3.656062701705855e-05, "log_odds_chosen": 9.178977012634277, "log_odds_ratio": -0.03616366535425186, "logits/chosen": -0.5493403077125549, "logits/rejected": -0.5453891754150391, "logps/chosen": -0.061218440532684326, "logps/rejected": -1.525048017501831, "loss": 1.8663, "nll_loss": 0.46294963359832764, "rewards/accuracies": 1.0, "rewards/chosen": -0.006121844053268433, "rewards/margins": 0.14638295769691467, "rewards/rejected": -0.1525048017501831, "step": 4944 }, { "epoch": 3.4197786998616873, "grad_norm": 10.590446472167969, "learning_rate": 3.655678500076841e-05, "log_odds_chosen": 9.760478973388672, "log_odds_ratio": -0.0004351499956101179, "logits/chosen": -1.0720016956329346, "logits/rejected": -1.1331433057785034, "logps/chosen": -0.004765262361615896, "logps/rejected": -2.271724224090576, "loss": 1.7589, "nll_loss": 0.43967100977897644, "rewards/accuracies": 1.0, "rewards/chosen": -0.0004765262419823557, "rewards/margins": 0.22669591009616852, "rewards/rejected": -0.22717243432998657, "step": 4945 }, { "epoch": 3.420470262793914, "grad_norm": 7.860476493835449, "learning_rate": 3.6552942984478255e-05, "log_odds_chosen": 10.309771537780762, "log_odds_ratio": -9.26225766306743e-05, "logits/chosen": -0.49840879440307617, "logits/rejected": -0.6406351923942566, "logps/chosen": -0.0018750398885458708, "logps/rejected": -2.5959949493408203, "loss": 1.2851, "nll_loss": 0.32125723361968994, "rewards/accuracies": 1.0, "rewards/chosen": -0.00018750397430267185, "rewards/margins": 0.25941202044487, "rewards/rejected": -0.259599506855011, "step": 4946 }, { "epoch": 3.421161825726141, "grad_norm": 9.410032272338867, "learning_rate": 3.654910096818811e-05, "log_odds_chosen": 9.610300064086914, "log_odds_ratio": -0.0013579919468611479, "logits/chosen": -0.7040968537330627, "logits/rejected": -0.7856951951980591, "logps/chosen": -0.010495437309145927, "logps/rejected": -2.737610340118408, "loss": 2.2543, "nll_loss": 0.5634455680847168, "rewards/accuracies": 1.0, "rewards/chosen": -0.0010495438473299146, "rewards/margins": 0.27271148562431335, "rewards/rejected": -0.2737610340118408, "step": 4947 }, { "epoch": 3.4218533886583677, "grad_norm": 8.460103034973145, "learning_rate": 3.654525895189796e-05, "log_odds_chosen": 8.626173973083496, "log_odds_ratio": -0.01648002117872238, "logits/chosen": -0.5038471221923828, "logits/rejected": -0.48866933584213257, "logps/chosen": -0.05864041671156883, "logps/rejected": -2.3046083450317383, "loss": 1.3673, "nll_loss": 0.3401760458946228, "rewards/accuracies": 1.0, "rewards/chosen": -0.005864041391760111, "rewards/margins": 0.2245967835187912, "rewards/rejected": -0.23046083748340607, "step": 4948 }, { "epoch": 3.4225449515905946, "grad_norm": 12.7888765335083, "learning_rate": 3.6541416935607805e-05, "log_odds_chosen": 9.462170600891113, "log_odds_ratio": -0.012112999334931374, "logits/chosen": -0.8550928831100464, "logits/rejected": -0.8987554311752319, "logps/chosen": -0.011790499091148376, "logps/rejected": -2.268587112426758, "loss": 1.8554, "nll_loss": 0.46263372898101807, "rewards/accuracies": 1.0, "rewards/chosen": -0.0011790499556809664, "rewards/margins": 0.2256796658039093, "rewards/rejected": -0.2268587350845337, "step": 4949 }, { "epoch": 3.4232365145228214, "grad_norm": 11.547723770141602, "learning_rate": 3.653757491931766e-05, "log_odds_chosen": 10.40541934967041, "log_odds_ratio": -0.0001380514440825209, "logits/chosen": -0.3020840585231781, "logits/rejected": -0.3681895136833191, "logps/chosen": -0.0003071234095841646, "logps/rejected": -1.6997017860412598, "loss": 1.57, "nll_loss": 0.39249423146247864, "rewards/accuracies": 1.0, "rewards/chosen": -3.071234095841646e-05, "rewards/margins": 0.16993945837020874, "rewards/rejected": -0.16997016966342926, "step": 4950 }, { "epoch": 3.4239280774550482, "grad_norm": 4.667510509490967, "learning_rate": 3.653373290302751e-05, "log_odds_chosen": 10.270744323730469, "log_odds_ratio": -0.00021994294365867972, "logits/chosen": -0.597452700138092, "logits/rejected": -0.6206096410751343, "logps/chosen": -0.010234748013317585, "logps/rejected": -2.91064715385437, "loss": 1.2375, "nll_loss": 0.3093594014644623, "rewards/accuracies": 1.0, "rewards/chosen": -0.0010234748478978872, "rewards/margins": 0.2900412678718567, "rewards/rejected": -0.2910647392272949, "step": 4951 }, { "epoch": 3.424619640387275, "grad_norm": 5.313857555389404, "learning_rate": 3.652989088673736e-05, "log_odds_chosen": 9.118802070617676, "log_odds_ratio": -0.0009384253062307835, "logits/chosen": -0.5868411064147949, "logits/rejected": -0.6619110703468323, "logps/chosen": -0.0023102620616555214, "logps/rejected": -1.7506974935531616, "loss": 1.526, "nll_loss": 0.3814122676849365, "rewards/accuracies": 1.0, "rewards/chosen": -0.0002310262352693826, "rewards/margins": 0.17483872175216675, "rewards/rejected": -0.17506974935531616, "step": 4952 }, { "epoch": 3.425311203319502, "grad_norm": 6.661731719970703, "learning_rate": 3.652604887044721e-05, "log_odds_chosen": 9.3414945602417, "log_odds_ratio": -0.029213862493634224, "logits/chosen": -0.67668217420578, "logits/rejected": -0.7876390218734741, "logps/chosen": -0.007439591456204653, "logps/rejected": -1.9399693012237549, "loss": 1.4387, "nll_loss": 0.3567417562007904, "rewards/accuracies": 1.0, "rewards/chosen": -0.0007439591572619975, "rewards/margins": 0.19325298070907593, "rewards/rejected": -0.19399693608283997, "step": 4953 }, { "epoch": 3.4260027662517287, "grad_norm": 9.601874351501465, "learning_rate": 3.652220685415707e-05, "log_odds_chosen": 8.631904602050781, "log_odds_ratio": -0.012579535134136677, "logits/chosen": -0.97607421875, "logits/rejected": -0.9456802606582642, "logps/chosen": -0.027140891179442406, "logps/rejected": -1.6035633087158203, "loss": 1.3115, "nll_loss": 0.32662340998649597, "rewards/accuracies": 1.0, "rewards/chosen": -0.002714089583605528, "rewards/margins": 0.15764223039150238, "rewards/rejected": -0.1603563129901886, "step": 4954 }, { "epoch": 3.4266943291839556, "grad_norm": 14.586040496826172, "learning_rate": 3.651836483786691e-05, "log_odds_chosen": 7.647185325622559, "log_odds_ratio": -0.16055667400360107, "logits/chosen": -0.6028072834014893, "logits/rejected": -0.6363714933395386, "logps/chosen": -0.031981196254491806, "logps/rejected": -1.623141884803772, "loss": 2.2166, "nll_loss": 0.5380910038948059, "rewards/accuracies": 0.875, "rewards/chosen": -0.0031981198117136955, "rewards/margins": 0.15911605954170227, "rewards/rejected": -0.16231419146060944, "step": 4955 }, { "epoch": 3.4273858921161824, "grad_norm": 12.094792366027832, "learning_rate": 3.6514522821576766e-05, "log_odds_chosen": 8.334588050842285, "log_odds_ratio": -0.038848213851451874, "logits/chosen": -0.3661819398403168, "logits/rejected": -0.4191260039806366, "logps/chosen": -0.01614089496433735, "logps/rejected": -1.3177118301391602, "loss": 0.9914, "nll_loss": 0.24396342039108276, "rewards/accuracies": 1.0, "rewards/chosen": -0.0016140895895659924, "rewards/margins": 0.13015709817409515, "rewards/rejected": -0.13177119195461273, "step": 4956 }, { "epoch": 3.428077455048409, "grad_norm": 11.956432342529297, "learning_rate": 3.651068080528662e-05, "log_odds_chosen": 9.768950462341309, "log_odds_ratio": -0.00014484582061413676, "logits/chosen": -0.4603797197341919, "logits/rejected": -0.589290976524353, "logps/chosen": -0.0023116571828722954, "logps/rejected": -2.05747389793396, "loss": 1.0762, "nll_loss": 0.2690298855304718, "rewards/accuracies": 1.0, "rewards/chosen": -0.00023116568627301604, "rewards/margins": 0.20551621913909912, "rewards/rejected": -0.20574738085269928, "step": 4957 }, { "epoch": 3.428769017980636, "grad_norm": 12.626391410827637, "learning_rate": 3.6506838788996464e-05, "log_odds_chosen": 8.608522415161133, "log_odds_ratio": -0.3388509154319763, "logits/chosen": -0.6620832085609436, "logits/rejected": -0.6999070644378662, "logps/chosen": -0.04924309626221657, "logps/rejected": -1.7659050226211548, "loss": 1.7579, "nll_loss": 0.40559306740760803, "rewards/accuracies": 0.875, "rewards/chosen": -0.004924309439957142, "rewards/margins": 0.1716661900281906, "rewards/rejected": -0.17659050226211548, "step": 4958 }, { "epoch": 3.429460580912863, "grad_norm": 10.061079025268555, "learning_rate": 3.6502996772706316e-05, "log_odds_chosen": 9.71721076965332, "log_odds_ratio": -0.0002621083986014128, "logits/chosen": -0.60477215051651, "logits/rejected": -0.7572970390319824, "logps/chosen": -0.0005827401182614267, "logps/rejected": -1.649122953414917, "loss": 1.0116, "nll_loss": 0.25288063287734985, "rewards/accuracies": 1.0, "rewards/chosen": -5.827401764690876e-05, "rewards/margins": 0.164854034781456, "rewards/rejected": -0.16491231322288513, "step": 4959 }, { "epoch": 3.43015214384509, "grad_norm": 5.07777214050293, "learning_rate": 3.649915475641617e-05, "log_odds_chosen": 9.303333282470703, "log_odds_ratio": -0.0015425317687913775, "logits/chosen": -0.3931879699230194, "logits/rejected": -0.3826301097869873, "logps/chosen": -0.014467225410044193, "logps/rejected": -2.122796058654785, "loss": 1.1946, "nll_loss": 0.2985040545463562, "rewards/accuracies": 1.0, "rewards/chosen": -0.0014467225410044193, "rewards/margins": 0.21083290874958038, "rewards/rejected": -0.21227963268756866, "step": 4960 }, { "epoch": 3.430843706777317, "grad_norm": 7.497944355010986, "learning_rate": 3.649531274012602e-05, "log_odds_chosen": 6.6718292236328125, "log_odds_ratio": -0.1012139543890953, "logits/chosen": -0.4591187536716461, "logits/rejected": -0.3664749562740326, "logps/chosen": -0.0395648330450058, "logps/rejected": -1.074185848236084, "loss": 1.6025, "nll_loss": 0.39051151275634766, "rewards/accuracies": 1.0, "rewards/chosen": -0.003956483211368322, "rewards/margins": 0.1034621000289917, "rewards/rejected": -0.10741858184337616, "step": 4961 }, { "epoch": 3.431535269709544, "grad_norm": 9.274129867553711, "learning_rate": 3.649147072383587e-05, "log_odds_chosen": 9.80703353881836, "log_odds_ratio": -0.0002453567576594651, "logits/chosen": -0.8744444847106934, "logits/rejected": -0.8338868618011475, "logps/chosen": -0.0007531539304181933, "logps/rejected": -1.9264980554580688, "loss": 1.8758, "nll_loss": 0.4689198136329651, "rewards/accuracies": 1.0, "rewards/chosen": -7.531539449701086e-05, "rewards/margins": 0.19257448613643646, "rewards/rejected": -0.19264981150627136, "step": 4962 }, { "epoch": 3.4322268326417706, "grad_norm": 12.359687805175781, "learning_rate": 3.6487628707545726e-05, "log_odds_chosen": 9.083701133728027, "log_odds_ratio": -0.0003218199999537319, "logits/chosen": -0.4226207137107849, "logits/rejected": -0.5028167366981506, "logps/chosen": -0.0009388489997945726, "logps/rejected": -1.8529709577560425, "loss": 1.4797, "nll_loss": 0.3699025511741638, "rewards/accuracies": 1.0, "rewards/chosen": -9.388489706907421e-05, "rewards/margins": 0.1852032095193863, "rewards/rejected": -0.18529711663722992, "step": 4963 }, { "epoch": 3.4329183955739975, "grad_norm": 14.259088516235352, "learning_rate": 3.648378669125557e-05, "log_odds_chosen": 8.883432388305664, "log_odds_ratio": -0.18358714878559113, "logits/chosen": -0.47711271047592163, "logits/rejected": -0.5507173538208008, "logps/chosen": -0.04038437455892563, "logps/rejected": -1.7237071990966797, "loss": 1.9157, "nll_loss": 0.4605787396430969, "rewards/accuracies": 0.875, "rewards/chosen": -0.004038437269628048, "rewards/margins": 0.1683322787284851, "rewards/rejected": -0.17237071692943573, "step": 4964 }, { "epoch": 3.4336099585062243, "grad_norm": 9.234458923339844, "learning_rate": 3.6479944674965424e-05, "log_odds_chosen": 9.029831886291504, "log_odds_ratio": -0.004684413317590952, "logits/chosen": -0.6124022603034973, "logits/rejected": -0.7258630990982056, "logps/chosen": -0.0036762619856745005, "logps/rejected": -1.8446922302246094, "loss": 1.7796, "nll_loss": 0.44442451000213623, "rewards/accuracies": 1.0, "rewards/chosen": -0.0003676262276712805, "rewards/margins": 0.18410161137580872, "rewards/rejected": -0.18446923792362213, "step": 4965 }, { "epoch": 3.434301521438451, "grad_norm": 8.6228609085083, "learning_rate": 3.6476102658675276e-05, "log_odds_chosen": 8.592721939086914, "log_odds_ratio": -0.05192786827683449, "logits/chosen": -0.7884331345558167, "logits/rejected": -0.7422505617141724, "logps/chosen": -0.019336678087711334, "logps/rejected": -1.6305170059204102, "loss": 2.0833, "nll_loss": 0.5156409740447998, "rewards/accuracies": 1.0, "rewards/chosen": -0.0019336676923558116, "rewards/margins": 0.1611180305480957, "rewards/rejected": -0.16305169463157654, "step": 4966 }, { "epoch": 3.434993084370678, "grad_norm": 7.429285049438477, "learning_rate": 3.647226064238512e-05, "log_odds_chosen": 10.410229682922363, "log_odds_ratio": -6.975359428906813e-05, "logits/chosen": -0.31530484557151794, "logits/rejected": -0.38272637128829956, "logps/chosen": -0.00015621320926584303, "logps/rejected": -1.8115465641021729, "loss": 0.9233, "nll_loss": 0.23082692921161652, "rewards/accuracies": 1.0, "rewards/chosen": -1.5621320926584303e-05, "rewards/margins": 0.1811390519142151, "rewards/rejected": -0.18115466833114624, "step": 4967 }, { "epoch": 3.435684647302905, "grad_norm": 16.53629493713379, "learning_rate": 3.6468418626094975e-05, "log_odds_chosen": 11.616415023803711, "log_odds_ratio": -1.644290932745207e-05, "logits/chosen": -0.6845104694366455, "logits/rejected": -0.8092417120933533, "logps/chosen": -0.0001430445263395086, "logps/rejected": -2.61985445022583, "loss": 2.3828, "nll_loss": 0.5956913828849792, "rewards/accuracies": 1.0, "rewards/chosen": -1.430445263395086e-05, "rewards/margins": 0.2619711458683014, "rewards/rejected": -0.2619854509830475, "step": 4968 }, { "epoch": 3.4363762102351316, "grad_norm": 13.607427597045898, "learning_rate": 3.646457660980483e-05, "log_odds_chosen": 10.069181442260742, "log_odds_ratio": -7.897378964116797e-05, "logits/chosen": -0.7286061644554138, "logits/rejected": -0.8447732329368591, "logps/chosen": -0.0006242129602469504, "logps/rejected": -2.1223394870758057, "loss": 1.6063, "nll_loss": 0.4015622138977051, "rewards/accuracies": 1.0, "rewards/chosen": -6.242129165912047e-05, "rewards/margins": 0.2121715545654297, "rewards/rejected": -0.21223396062850952, "step": 4969 }, { "epoch": 3.4370677731673585, "grad_norm": 6.911189556121826, "learning_rate": 3.646073459351468e-05, "log_odds_chosen": 10.088189125061035, "log_odds_ratio": -0.00012798573879990727, "logits/chosen": -0.6314866542816162, "logits/rejected": -0.6669266223907471, "logps/chosen": -0.004457239992916584, "logps/rejected": -2.2552828788757324, "loss": 1.7214, "nll_loss": 0.43033266067504883, "rewards/accuracies": 1.0, "rewards/chosen": -0.00044572402839548886, "rewards/margins": 0.22508256137371063, "rewards/rejected": -0.225528284907341, "step": 4970 }, { "epoch": 3.4377593360995853, "grad_norm": 8.085153579711914, "learning_rate": 3.6456892577224525e-05, "log_odds_chosen": 8.362859725952148, "log_odds_ratio": -0.0029876772314310074, "logits/chosen": -0.6107293367385864, "logits/rejected": -0.6765274405479431, "logps/chosen": -0.005427872762084007, "logps/rejected": -1.735783576965332, "loss": 1.2286, "nll_loss": 0.30684149265289307, "rewards/accuracies": 1.0, "rewards/chosen": -0.0005427872529253364, "rewards/margins": 0.17303556203842163, "rewards/rejected": -0.17357836663722992, "step": 4971 }, { "epoch": 3.438450899031812, "grad_norm": 14.384394645690918, "learning_rate": 3.6453050560934384e-05, "log_odds_chosen": 9.993062019348145, "log_odds_ratio": -0.0009481186280027032, "logits/chosen": -0.519564151763916, "logits/rejected": -0.5848604440689087, "logps/chosen": -0.027457591146230698, "logps/rejected": -1.879459023475647, "loss": 2.0429, "nll_loss": 0.5106297731399536, "rewards/accuracies": 1.0, "rewards/chosen": -0.0027457589749246836, "rewards/margins": 0.18520015478134155, "rewards/rejected": -0.18794593214988708, "step": 4972 }, { "epoch": 3.439142461964039, "grad_norm": 9.224777221679688, "learning_rate": 3.644920854464423e-05, "log_odds_chosen": 8.9891939163208, "log_odds_ratio": -0.0007203746354207397, "logits/chosen": -0.49874958395957947, "logits/rejected": -0.45370471477508545, "logps/chosen": -0.016186760738492012, "logps/rejected": -2.4834234714508057, "loss": 1.4326, "nll_loss": 0.35807323455810547, "rewards/accuracies": 1.0, "rewards/chosen": -0.001618676004000008, "rewards/margins": 0.24672365188598633, "rewards/rejected": -0.2483423352241516, "step": 4973 }, { "epoch": 3.4398340248962658, "grad_norm": 11.134475708007812, "learning_rate": 3.644536652835408e-05, "log_odds_chosen": 9.265724182128906, "log_odds_ratio": -0.00047064805403351784, "logits/chosen": -0.7496140003204346, "logits/rejected": -0.7866104245185852, "logps/chosen": -0.0027650538831949234, "logps/rejected": -2.056285858154297, "loss": 1.5796, "nll_loss": 0.39485177397727966, "rewards/accuracies": 1.0, "rewards/chosen": -0.00027650539414025843, "rewards/margins": 0.2053520828485489, "rewards/rejected": -0.20562858879566193, "step": 4974 }, { "epoch": 3.4405255878284926, "grad_norm": 8.373491287231445, "learning_rate": 3.6441524512063935e-05, "log_odds_chosen": 8.949793815612793, "log_odds_ratio": -0.0005880310200154781, "logits/chosen": -0.42748188972473145, "logits/rejected": -0.49028706550598145, "logps/chosen": -0.001193431206047535, "logps/rejected": -1.608788251876831, "loss": 1.1484, "nll_loss": 0.28704604506492615, "rewards/accuracies": 1.0, "rewards/chosen": -0.00011934312351513654, "rewards/margins": 0.16075949370861053, "rewards/rejected": -0.16087885200977325, "step": 4975 }, { "epoch": 3.4412171507607194, "grad_norm": 9.026708602905273, "learning_rate": 3.643768249577378e-05, "log_odds_chosen": 10.085662841796875, "log_odds_ratio": -0.0005494834040291607, "logits/chosen": -0.7614070773124695, "logits/rejected": -0.7914379835128784, "logps/chosen": -0.02739373780786991, "logps/rejected": -2.0095713138580322, "loss": 1.431, "nll_loss": 0.35769060254096985, "rewards/accuracies": 1.0, "rewards/chosen": -0.002739373594522476, "rewards/margins": 0.19821777939796448, "rewards/rejected": -0.20095713436603546, "step": 4976 }, { "epoch": 3.4419087136929463, "grad_norm": 7.560689449310303, "learning_rate": 3.643384047948363e-05, "log_odds_chosen": 8.658706665039062, "log_odds_ratio": -0.12949354946613312, "logits/chosen": -0.7013901472091675, "logits/rejected": -0.7911181449890137, "logps/chosen": -0.019102217629551888, "logps/rejected": -1.1386125087738037, "loss": 1.9893, "nll_loss": 0.48437100648880005, "rewards/accuracies": 0.875, "rewards/chosen": -0.0019102217629551888, "rewards/margins": 0.11195103079080582, "rewards/rejected": -0.11386125534772873, "step": 4977 }, { "epoch": 3.442600276625173, "grad_norm": 12.517111778259277, "learning_rate": 3.6429998463193485e-05, "log_odds_chosen": 8.388811111450195, "log_odds_ratio": -0.001296606264077127, "logits/chosen": -0.48516207933425903, "logits/rejected": -0.5658431053161621, "logps/chosen": -0.002016248879954219, "logps/rejected": -1.7087645530700684, "loss": 1.8797, "nll_loss": 0.4697989821434021, "rewards/accuracies": 1.0, "rewards/chosen": -0.0002016248763538897, "rewards/margins": 0.17067483067512512, "rewards/rejected": -0.17087645828723907, "step": 4978 }, { "epoch": 3.4432918395574, "grad_norm": 5.525219917297363, "learning_rate": 3.642615644690334e-05, "log_odds_chosen": 8.90900707244873, "log_odds_ratio": -0.005349991377443075, "logits/chosen": -0.8758846521377563, "logits/rejected": -0.8599585294723511, "logps/chosen": -0.0006094533018767834, "logps/rejected": -1.6298961639404297, "loss": 1.3454, "nll_loss": 0.3358096480369568, "rewards/accuracies": 1.0, "rewards/chosen": -6.0945334553252906e-05, "rewards/margins": 0.16292867064476013, "rewards/rejected": -0.16298961639404297, "step": 4979 }, { "epoch": 3.4439834024896268, "grad_norm": 11.920831680297852, "learning_rate": 3.6422314430613184e-05, "log_odds_chosen": 7.914310455322266, "log_odds_ratio": -0.11740975826978683, "logits/chosen": -0.8697179555892944, "logits/rejected": -0.9304721355438232, "logps/chosen": -0.01838577538728714, "logps/rejected": -1.3346633911132812, "loss": 1.9385, "nll_loss": 0.4728940725326538, "rewards/accuracies": 0.875, "rewards/chosen": -0.0018385774455964565, "rewards/margins": 0.13162776827812195, "rewards/rejected": -0.13346634805202484, "step": 4980 }, { "epoch": 3.4446749654218536, "grad_norm": 15.954282760620117, "learning_rate": 3.641847241432304e-05, "log_odds_chosen": 10.502399444580078, "log_odds_ratio": -5.634710396407172e-05, "logits/chosen": -0.8933683633804321, "logits/rejected": -0.8928529620170593, "logps/chosen": -0.00024164578644558787, "logps/rejected": -1.8561794757843018, "loss": 2.2232, "nll_loss": 0.5558005571365356, "rewards/accuracies": 1.0, "rewards/chosen": -2.416457937215455e-05, "rewards/margins": 0.18559378385543823, "rewards/rejected": -0.18561795353889465, "step": 4981 }, { "epoch": 3.4453665283540804, "grad_norm": 9.633877754211426, "learning_rate": 3.641463039803289e-05, "log_odds_chosen": 10.315112113952637, "log_odds_ratio": -0.00015177467139437795, "logits/chosen": -0.7014366984367371, "logits/rejected": -0.7228372097015381, "logps/chosen": -0.0009540664032101631, "logps/rejected": -2.119901180267334, "loss": 1.6241, "nll_loss": 0.40599820017814636, "rewards/accuracies": 1.0, "rewards/chosen": -9.540664177620783e-05, "rewards/margins": 0.21189472079277039, "rewards/rejected": -0.2119901180267334, "step": 4982 }, { "epoch": 3.4460580912863072, "grad_norm": 5.039992809295654, "learning_rate": 3.641078838174274e-05, "log_odds_chosen": 9.058915138244629, "log_odds_ratio": -0.0014726583613082767, "logits/chosen": -0.7530060410499573, "logits/rejected": -0.8648630380630493, "logps/chosen": -0.0016102747758850455, "logps/rejected": -1.217201828956604, "loss": 1.2089, "nll_loss": 0.3020736277103424, "rewards/accuracies": 1.0, "rewards/chosen": -0.00016102749214041978, "rewards/margins": 0.12155915796756744, "rewards/rejected": -0.12172017991542816, "step": 4983 }, { "epoch": 3.446749654218534, "grad_norm": 9.161917686462402, "learning_rate": 3.640694636545259e-05, "log_odds_chosen": 10.83303165435791, "log_odds_ratio": -3.968351666117087e-05, "logits/chosen": -0.6702843904495239, "logits/rejected": -0.701372504234314, "logps/chosen": -0.0001426434755558148, "logps/rejected": -2.0338659286499023, "loss": 1.288, "nll_loss": 0.32198911905288696, "rewards/accuracies": 1.0, "rewards/chosen": -1.4264348465076182e-05, "rewards/margins": 0.20337235927581787, "rewards/rejected": -0.2033866047859192, "step": 4984 }, { "epoch": 3.447441217150761, "grad_norm": 10.88602066040039, "learning_rate": 3.6403104349162446e-05, "log_odds_chosen": 7.93879508972168, "log_odds_ratio": -0.0036302765365689993, "logits/chosen": -0.8644218444824219, "logits/rejected": -0.9524936676025391, "logps/chosen": -0.00544738257303834, "logps/rejected": -1.3466427326202393, "loss": 1.7153, "nll_loss": 0.4284606873989105, "rewards/accuracies": 1.0, "rewards/chosen": -0.000544738257303834, "rewards/margins": 0.13411954045295715, "rewards/rejected": -0.13466428220272064, "step": 4985 }, { "epoch": 3.4481327800829877, "grad_norm": 15.391132354736328, "learning_rate": 3.639926233287229e-05, "log_odds_chosen": 8.355426788330078, "log_odds_ratio": -0.025042179971933365, "logits/chosen": -0.7084068059921265, "logits/rejected": -0.7590049505233765, "logps/chosen": -0.007605067919939756, "logps/rejected": -1.9548802375793457, "loss": 1.5422, "nll_loss": 0.38303864002227783, "rewards/accuracies": 1.0, "rewards/chosen": -0.0007605067803524435, "rewards/margins": 0.19472752511501312, "rewards/rejected": -0.19548803567886353, "step": 4986 }, { "epoch": 3.4488243430152146, "grad_norm": 6.186641693115234, "learning_rate": 3.6395420316582144e-05, "log_odds_chosen": 8.858957290649414, "log_odds_ratio": -0.0007753237732686102, "logits/chosen": -0.4936913847923279, "logits/rejected": -0.5381306409835815, "logps/chosen": -0.000559426611289382, "logps/rejected": -1.241579294204712, "loss": 1.7351, "nll_loss": 0.43369585275650024, "rewards/accuracies": 1.0, "rewards/chosen": -5.5942658946150914e-05, "rewards/margins": 0.12410198152065277, "rewards/rejected": -0.12415792047977448, "step": 4987 }, { "epoch": 3.4495159059474414, "grad_norm": 6.701933860778809, "learning_rate": 3.6391578300291996e-05, "log_odds_chosen": 8.373390197753906, "log_odds_ratio": -0.010810820385813713, "logits/chosen": -0.554107129573822, "logits/rejected": -0.5754260420799255, "logps/chosen": -0.08616117388010025, "logps/rejected": -1.98459792137146, "loss": 1.7258, "nll_loss": 0.4303753972053528, "rewards/accuracies": 1.0, "rewards/chosen": -0.008616117760539055, "rewards/margins": 0.18984368443489075, "rewards/rejected": -0.19845978915691376, "step": 4988 }, { "epoch": 3.4502074688796682, "grad_norm": 9.205174446105957, "learning_rate": 3.638773628400184e-05, "log_odds_chosen": 9.567306518554688, "log_odds_ratio": -0.00013541642692871392, "logits/chosen": -1.1078649759292603, "logits/rejected": -1.0323755741119385, "logps/chosen": -0.0006693107425235212, "logps/rejected": -1.786118507385254, "loss": 2.0256, "nll_loss": 0.5063755512237549, "rewards/accuracies": 1.0, "rewards/chosen": -6.693107570754364e-05, "rewards/margins": 0.17854492366313934, "rewards/rejected": -0.1786118596792221, "step": 4989 }, { "epoch": 3.450899031811895, "grad_norm": 8.76229190826416, "learning_rate": 3.63838942677117e-05, "log_odds_chosen": 9.120561599731445, "log_odds_ratio": -0.0009699111105874181, "logits/chosen": -0.98872971534729, "logits/rejected": -1.0143227577209473, "logps/chosen": -0.010068300180137157, "logps/rejected": -1.867543339729309, "loss": 1.7556, "nll_loss": 0.43879449367523193, "rewards/accuracies": 1.0, "rewards/chosen": -0.001006829901598394, "rewards/margins": 0.18574751913547516, "rewards/rejected": -0.18675434589385986, "step": 4990 }, { "epoch": 3.451590594744122, "grad_norm": 8.950358390808105, "learning_rate": 3.638005225142155e-05, "log_odds_chosen": 7.5963826179504395, "log_odds_ratio": -0.04105527698993683, "logits/chosen": -0.9477307796478271, "logits/rejected": -1.041830062866211, "logps/chosen": -0.053072813898324966, "logps/rejected": -1.931291937828064, "loss": 1.6146, "nll_loss": 0.3995518088340759, "rewards/accuracies": 1.0, "rewards/chosen": -0.005307281389832497, "rewards/margins": 0.1878219097852707, "rewards/rejected": -0.19312918186187744, "step": 4991 }, { "epoch": 3.4522821576763487, "grad_norm": 4.866816997528076, "learning_rate": 3.63762102351314e-05, "log_odds_chosen": 9.050535202026367, "log_odds_ratio": -0.0018034178065136075, "logits/chosen": -1.0131746530532837, "logits/rejected": -0.9534136056900024, "logps/chosen": -0.0012092224787920713, "logps/rejected": -1.3308520317077637, "loss": 1.4361, "nll_loss": 0.3588336110115051, "rewards/accuracies": 1.0, "rewards/chosen": -0.00012092224642401561, "rewards/margins": 0.13296428322792053, "rewards/rejected": -0.1330852061510086, "step": 4992 }, { "epoch": 3.4529737206085755, "grad_norm": 15.728497505187988, "learning_rate": 3.637236821884125e-05, "log_odds_chosen": 10.239688873291016, "log_odds_ratio": -0.00016703848086763173, "logits/chosen": -0.6586402654647827, "logits/rejected": -0.7343249320983887, "logps/chosen": -0.0003682894166558981, "logps/rejected": -2.2000532150268555, "loss": 1.8151, "nll_loss": 0.4537507891654968, "rewards/accuracies": 1.0, "rewards/chosen": -3.6828940210398287e-05, "rewards/margins": 0.2199684977531433, "rewards/rejected": -0.2200053334236145, "step": 4993 }, { "epoch": 3.4536652835408024, "grad_norm": 9.613495826721191, "learning_rate": 3.6368526202551104e-05, "log_odds_chosen": 9.965752601623535, "log_odds_ratio": -0.0001287447230424732, "logits/chosen": -0.8265679478645325, "logits/rejected": -0.9368499517440796, "logps/chosen": -0.0005226809298619628, "logps/rejected": -2.1379518508911133, "loss": 1.3859, "nll_loss": 0.34647077322006226, "rewards/accuracies": 1.0, "rewards/chosen": -5.226809298619628e-05, "rewards/margins": 0.2137429416179657, "rewards/rejected": -0.21379519999027252, "step": 4994 }, { "epoch": 3.454356846473029, "grad_norm": 11.395675659179688, "learning_rate": 3.636468418626095e-05, "log_odds_chosen": 8.483444213867188, "log_odds_ratio": -0.0013658700045198202, "logits/chosen": -0.8391974568367004, "logits/rejected": -0.759861171245575, "logps/chosen": -0.004849501885473728, "logps/rejected": -1.6757423877716064, "loss": 1.8679, "nll_loss": 0.46684902906417847, "rewards/accuracies": 1.0, "rewards/chosen": -0.000484950200188905, "rewards/margins": 0.1670892834663391, "rewards/rejected": -0.1675742268562317, "step": 4995 }, { "epoch": 3.455048409405256, "grad_norm": 14.174217224121094, "learning_rate": 3.63608421699708e-05, "log_odds_chosen": 9.07601547241211, "log_odds_ratio": -0.0004858938045799732, "logits/chosen": -0.6792766451835632, "logits/rejected": -0.7782905697822571, "logps/chosen": -0.0033511659130454063, "logps/rejected": -1.3728395700454712, "loss": 1.9694, "nll_loss": 0.4922906756401062, "rewards/accuracies": 1.0, "rewards/chosen": -0.00033511657966300845, "rewards/margins": 0.1369488388299942, "rewards/rejected": -0.13728396594524384, "step": 4996 }, { "epoch": 3.455739972337483, "grad_norm": 9.669139862060547, "learning_rate": 3.6357000153680655e-05, "log_odds_chosen": 8.411234855651855, "log_odds_ratio": -0.025437351316213608, "logits/chosen": -0.3525383770465851, "logits/rejected": -0.43491697311401367, "logps/chosen": -0.01492525078356266, "logps/rejected": -1.4280292987823486, "loss": 1.2865, "nll_loss": 0.31906917691230774, "rewards/accuracies": 1.0, "rewards/chosen": -0.0014925252180546522, "rewards/margins": 0.1413104087114334, "rewards/rejected": -0.14280293881893158, "step": 4997 }, { "epoch": 3.4564315352697097, "grad_norm": 6.708120346069336, "learning_rate": 3.63531581373905e-05, "log_odds_chosen": 8.262350082397461, "log_odds_ratio": -0.009439961053431034, "logits/chosen": -0.7109168767929077, "logits/rejected": -0.6905184984207153, "logps/chosen": -0.045669618993997574, "logps/rejected": -2.6598198413848877, "loss": 1.6848, "nll_loss": 0.420247346162796, "rewards/accuracies": 1.0, "rewards/chosen": -0.004566962365061045, "rewards/margins": 0.2614150047302246, "rewards/rejected": -0.2659819722175598, "step": 4998 }, { "epoch": 3.4571230982019365, "grad_norm": 11.655810356140137, "learning_rate": 3.634931612110036e-05, "log_odds_chosen": 9.850397109985352, "log_odds_ratio": -0.0011923499405384064, "logits/chosen": -0.559964656829834, "logits/rejected": -0.5518143177032471, "logps/chosen": -0.016001557931303978, "logps/rejected": -1.9830666780471802, "loss": 1.8046, "nll_loss": 0.45102426409721375, "rewards/accuracies": 1.0, "rewards/chosen": -0.0016001559561118484, "rewards/margins": 0.19670650362968445, "rewards/rejected": -0.19830666482448578, "step": 4999 }, { "epoch": 3.4578146611341634, "grad_norm": 7.783631324768066, "learning_rate": 3.6345474104810205e-05, "log_odds_chosen": 6.684453010559082, "log_odds_ratio": -0.12163373082876205, "logits/chosen": -0.8146560192108154, "logits/rejected": -0.7799224257469177, "logps/chosen": -0.020185653120279312, "logps/rejected": -0.8523962497711182, "loss": 1.86, "nll_loss": 0.45282793045043945, "rewards/accuracies": 0.875, "rewards/chosen": -0.0020185650791972876, "rewards/margins": 0.08322106301784515, "rewards/rejected": -0.08523963391780853, "step": 5000 }, { "epoch": 3.45850622406639, "grad_norm": 7.987496376037598, "learning_rate": 3.634163208852006e-05, "log_odds_chosen": 6.474782943725586, "log_odds_ratio": -0.0698920488357544, "logits/chosen": -0.5039654970169067, "logits/rejected": -0.4827998876571655, "logps/chosen": -0.025293994694948196, "logps/rejected": -1.415076732635498, "loss": 1.9146, "nll_loss": 0.47165244817733765, "rewards/accuracies": 1.0, "rewards/chosen": -0.0025293994694948196, "rewards/margins": 0.1389782726764679, "rewards/rejected": -0.14150768518447876, "step": 5001 }, { "epoch": 3.459197786998617, "grad_norm": 5.635584831237793, "learning_rate": 3.633779007222991e-05, "log_odds_chosen": 9.930513381958008, "log_odds_ratio": -0.012761876918375492, "logits/chosen": -0.47629499435424805, "logits/rejected": -0.5821311473846436, "logps/chosen": -0.012218811549246311, "logps/rejected": -1.8402409553527832, "loss": 1.2687, "nll_loss": 0.3158940076828003, "rewards/accuracies": 1.0, "rewards/chosen": -0.0012218811316415668, "rewards/margins": 0.182802215218544, "rewards/rejected": -0.18402409553527832, "step": 5002 }, { "epoch": 3.459889349930844, "grad_norm": 13.069116592407227, "learning_rate": 3.633394805593976e-05, "log_odds_chosen": 10.705204010009766, "log_odds_ratio": -6.245496479095891e-05, "logits/chosen": -0.7386019229888916, "logits/rejected": -0.7776150107383728, "logps/chosen": -0.0003734386991709471, "logps/rejected": -2.469666004180908, "loss": 1.2612, "nll_loss": 0.31529700756073, "rewards/accuracies": 1.0, "rewards/chosen": -3.734386700671166e-05, "rewards/margins": 0.24692925810813904, "rewards/rejected": -0.24696658551692963, "step": 5003 }, { "epoch": 3.4605809128630707, "grad_norm": 9.947142601013184, "learning_rate": 3.633010603964961e-05, "log_odds_chosen": 9.268041610717773, "log_odds_ratio": -0.0005995544488541782, "logits/chosen": -0.5643569231033325, "logits/rejected": -0.5897268056869507, "logps/chosen": -0.0008558162953704596, "logps/rejected": -1.5376017093658447, "loss": 2.4439, "nll_loss": 0.6109213829040527, "rewards/accuracies": 1.0, "rewards/chosen": -8.558163972338662e-05, "rewards/margins": 0.15367458760738373, "rewards/rejected": -0.1537601798772812, "step": 5004 }, { "epoch": 3.4612724757952975, "grad_norm": 11.088629722595215, "learning_rate": 3.632626402335946e-05, "log_odds_chosen": 8.69260025024414, "log_odds_ratio": -0.0019872181583195925, "logits/chosen": -0.3774298429489136, "logits/rejected": -0.4351969361305237, "logps/chosen": -0.044242698699235916, "logps/rejected": -2.4456701278686523, "loss": 1.7641, "nll_loss": 0.44082629680633545, "rewards/accuracies": 1.0, "rewards/chosen": -0.004424269776791334, "rewards/margins": 0.24014276266098022, "rewards/rejected": -0.24456703662872314, "step": 5005 }, { "epoch": 3.4619640387275243, "grad_norm": 11.110259056091309, "learning_rate": 3.632242200706931e-05, "log_odds_chosen": 10.339826583862305, "log_odds_ratio": -5.08381963300053e-05, "logits/chosen": -0.8093332648277283, "logits/rejected": -0.8982728719711304, "logps/chosen": -0.00030366991995833814, "logps/rejected": -1.7938203811645508, "loss": 1.2675, "nll_loss": 0.31685981154441833, "rewards/accuracies": 1.0, "rewards/chosen": -3.0366991268238053e-05, "rewards/margins": 0.17935167253017426, "rewards/rejected": -0.17938204109668732, "step": 5006 }, { "epoch": 3.462655601659751, "grad_norm": 9.65864372253418, "learning_rate": 3.631857999077916e-05, "log_odds_chosen": 10.515408515930176, "log_odds_ratio": -6.123816274339333e-05, "logits/chosen": -0.5729230046272278, "logits/rejected": -0.6748566627502441, "logps/chosen": -0.00019804044859483838, "logps/rejected": -1.9237462282180786, "loss": 1.4539, "nll_loss": 0.3634702265262604, "rewards/accuracies": 1.0, "rewards/chosen": -1.980404522328172e-05, "rewards/margins": 0.19235482811927795, "rewards/rejected": -0.19237461686134338, "step": 5007 }, { "epoch": 3.463347164591978, "grad_norm": 9.06814193725586, "learning_rate": 3.631473797448902e-05, "log_odds_chosen": 10.330401420593262, "log_odds_ratio": -7.388419908238575e-05, "logits/chosen": -0.6489390134811401, "logits/rejected": -0.7348330020904541, "logps/chosen": -0.00034219425288029015, "logps/rejected": -1.8815146684646606, "loss": 1.2441, "nll_loss": 0.3110177516937256, "rewards/accuracies": 1.0, "rewards/chosen": -3.4219425288029015e-05, "rewards/margins": 0.18811725080013275, "rewards/rejected": -0.18815146386623383, "step": 5008 }, { "epoch": 3.464038727524205, "grad_norm": 10.859692573547363, "learning_rate": 3.6310895958198864e-05, "log_odds_chosen": 10.254586219787598, "log_odds_ratio": -7.5828458648175e-05, "logits/chosen": -0.46878278255462646, "logits/rejected": -0.6421003937721252, "logps/chosen": -0.0002131734072463587, "logps/rejected": -1.9959816932678223, "loss": 1.721, "nll_loss": 0.43024158477783203, "rewards/accuracies": 1.0, "rewards/chosen": -2.131734072463587e-05, "rewards/margins": 0.19957688450813293, "rewards/rejected": -0.19959819316864014, "step": 5009 }, { "epoch": 3.4647302904564317, "grad_norm": 14.180079460144043, "learning_rate": 3.6307053941908716e-05, "log_odds_chosen": 10.092310905456543, "log_odds_ratio": -8.331875142175704e-05, "logits/chosen": -0.5899174213409424, "logits/rejected": -0.6186763644218445, "logps/chosen": -0.0002855797647498548, "logps/rejected": -1.7965035438537598, "loss": 1.944, "nll_loss": 0.48598384857177734, "rewards/accuracies": 1.0, "rewards/chosen": -2.8557977202581242e-05, "rewards/margins": 0.17962178587913513, "rewards/rejected": -0.17965035140514374, "step": 5010 }, { "epoch": 3.4654218533886585, "grad_norm": 8.489675521850586, "learning_rate": 3.630321192561857e-05, "log_odds_chosen": 9.157320022583008, "log_odds_ratio": -0.17574098706245422, "logits/chosen": -0.7258381843566895, "logits/rejected": -0.7407118082046509, "logps/chosen": -0.020532608032226562, "logps/rejected": -1.8599942922592163, "loss": 1.3642, "nll_loss": 0.32347384095191956, "rewards/accuracies": 0.875, "rewards/chosen": -0.0020532610360533, "rewards/margins": 0.1839461624622345, "rewards/rejected": -0.18599943816661835, "step": 5011 }, { "epoch": 3.4661134163208853, "grad_norm": 6.52730131149292, "learning_rate": 3.629936990932842e-05, "log_odds_chosen": 8.526227951049805, "log_odds_ratio": -0.03111344762146473, "logits/chosen": -0.6624473333358765, "logits/rejected": -0.687995433807373, "logps/chosen": -0.007035402115434408, "logps/rejected": -1.5009098052978516, "loss": 1.4651, "nll_loss": 0.3631598949432373, "rewards/accuracies": 1.0, "rewards/chosen": -0.000703540223184973, "rewards/margins": 0.1493874490261078, "rewards/rejected": -0.15009097754955292, "step": 5012 }, { "epoch": 3.466804979253112, "grad_norm": 14.020427703857422, "learning_rate": 3.629552789303827e-05, "log_odds_chosen": 9.0694580078125, "log_odds_ratio": -0.0027533157262951136, "logits/chosen": -0.18365775048732758, "logits/rejected": -0.2790081799030304, "logps/chosen": -0.017389468848705292, "logps/rejected": -1.5542614459991455, "loss": 1.4566, "nll_loss": 0.3638818562030792, "rewards/accuracies": 1.0, "rewards/chosen": -0.001738947001285851, "rewards/margins": 0.15368719398975372, "rewards/rejected": -0.15542612969875336, "step": 5013 }, { "epoch": 3.467496542185339, "grad_norm": 9.826322555541992, "learning_rate": 3.629168587674812e-05, "log_odds_chosen": 7.112824440002441, "log_odds_ratio": -0.06963668763637543, "logits/chosen": -0.8219603300094604, "logits/rejected": -0.8262119293212891, "logps/chosen": -0.014785894192755222, "logps/rejected": -1.1341627836227417, "loss": 1.4779, "nll_loss": 0.36250919103622437, "rewards/accuracies": 1.0, "rewards/chosen": -0.0014785894891247153, "rewards/margins": 0.11193770170211792, "rewards/rejected": -0.11341628432273865, "step": 5014 }, { "epoch": 3.468188105117566, "grad_norm": 10.303740501403809, "learning_rate": 3.628784386045797e-05, "log_odds_chosen": 8.587909698486328, "log_odds_ratio": -0.023652495816349983, "logits/chosen": -0.5451184511184692, "logits/rejected": -0.6260979771614075, "logps/chosen": -0.006036615930497646, "logps/rejected": -1.5167649984359741, "loss": 1.7524, "nll_loss": 0.43574270606040955, "rewards/accuracies": 1.0, "rewards/chosen": -0.0006036615814082325, "rewards/margins": 0.15107285976409912, "rewards/rejected": -0.1516765058040619, "step": 5015 }, { "epoch": 3.4688796680497926, "grad_norm": 10.99515151977539, "learning_rate": 3.628400184416782e-05, "log_odds_chosen": 7.448937892913818, "log_odds_ratio": -0.17324112355709076, "logits/chosen": -0.9495927095413208, "logits/rejected": -1.026933193206787, "logps/chosen": -0.04410245269536972, "logps/rejected": -1.1555567979812622, "loss": 1.9708, "nll_loss": 0.4753641188144684, "rewards/accuracies": 0.875, "rewards/chosen": -0.004410245455801487, "rewards/margins": 0.11114543676376343, "rewards/rejected": -0.11555567383766174, "step": 5016 }, { "epoch": 3.4695712309820195, "grad_norm": 14.707378387451172, "learning_rate": 3.6280159827877676e-05, "log_odds_chosen": 9.303018569946289, "log_odds_ratio": -0.0004590075695887208, "logits/chosen": -0.9421852231025696, "logits/rejected": -1.010554552078247, "logps/chosen": -0.0009109095553867519, "logps/rejected": -1.3994932174682617, "loss": 1.7846, "nll_loss": 0.4461010694503784, "rewards/accuracies": 1.0, "rewards/chosen": -9.109095844905823e-05, "rewards/margins": 0.13985824584960938, "rewards/rejected": -0.13994933664798737, "step": 5017 }, { "epoch": 3.4702627939142463, "grad_norm": 7.436912536621094, "learning_rate": 3.627631781158752e-05, "log_odds_chosen": 9.307181358337402, "log_odds_ratio": -0.004700258374214172, "logits/chosen": -0.5239299535751343, "logits/rejected": -0.5426942706108093, "logps/chosen": -0.0029026533011347055, "logps/rejected": -1.3353371620178223, "loss": 1.2961, "nll_loss": 0.3235432505607605, "rewards/accuracies": 1.0, "rewards/chosen": -0.0002902653650380671, "rewards/margins": 0.13324347138404846, "rewards/rejected": -0.13353373110294342, "step": 5018 }, { "epoch": 3.470954356846473, "grad_norm": 9.484705924987793, "learning_rate": 3.6272475795297375e-05, "log_odds_chosen": 9.229263305664062, "log_odds_ratio": -0.0014138160040602088, "logits/chosen": -0.693687915802002, "logits/rejected": -0.7113863229751587, "logps/chosen": -0.0005861036479473114, "logps/rejected": -1.4140403270721436, "loss": 1.8349, "nll_loss": 0.4585755467414856, "rewards/accuracies": 1.0, "rewards/chosen": -5.861037061549723e-05, "rewards/margins": 0.14134542644023895, "rewards/rejected": -0.14140403270721436, "step": 5019 }, { "epoch": 3.4716459197787, "grad_norm": 10.944015502929688, "learning_rate": 3.626863377900723e-05, "log_odds_chosen": 8.77437973022461, "log_odds_ratio": -0.0020061221439391375, "logits/chosen": -0.45963379740715027, "logits/rejected": -0.552274227142334, "logps/chosen": -0.0023264577612280846, "logps/rejected": -1.2882215976715088, "loss": 1.8215, "nll_loss": 0.45518240332603455, "rewards/accuracies": 1.0, "rewards/chosen": -0.00023264579067472368, "rewards/margins": 0.12858951091766357, "rewards/rejected": -0.12882214784622192, "step": 5020 }, { "epoch": 3.472337482710927, "grad_norm": 8.10033893585205, "learning_rate": 3.626479176271708e-05, "log_odds_chosen": 7.250068664550781, "log_odds_ratio": -0.11843190342187881, "logits/chosen": -0.21467533707618713, "logits/rejected": -0.2849116623401642, "logps/chosen": -0.0268861036747694, "logps/rejected": -1.3084977865219116, "loss": 1.2667, "nll_loss": 0.30483385920524597, "rewards/accuracies": 0.875, "rewards/chosen": -0.0026886104606091976, "rewards/margins": 0.12816117703914642, "rewards/rejected": -0.13084977865219116, "step": 5021 }, { "epoch": 3.4730290456431536, "grad_norm": 6.548411846160889, "learning_rate": 3.6260949746426925e-05, "log_odds_chosen": 8.930583953857422, "log_odds_ratio": -0.0018793250201269984, "logits/chosen": -0.38366758823394775, "logits/rejected": -0.4618600010871887, "logps/chosen": -0.009512092918157578, "logps/rejected": -2.154390811920166, "loss": 1.6325, "nll_loss": 0.4079264998435974, "rewards/accuracies": 1.0, "rewards/chosen": -0.0009512093383818865, "rewards/margins": 0.21448788046836853, "rewards/rejected": -0.2154390960931778, "step": 5022 }, { "epoch": 3.4737206085753805, "grad_norm": 5.993508815765381, "learning_rate": 3.625710773013678e-05, "log_odds_chosen": 9.173480987548828, "log_odds_ratio": -0.00028242330881766975, "logits/chosen": -0.5220339298248291, "logits/rejected": -0.5704896450042725, "logps/chosen": -0.000999884563498199, "logps/rejected": -1.7175759077072144, "loss": 1.5205, "nll_loss": 0.3800984025001526, "rewards/accuracies": 1.0, "rewards/chosen": -9.998845780501142e-05, "rewards/margins": 0.17165759205818176, "rewards/rejected": -0.17175757884979248, "step": 5023 }, { "epoch": 3.4744121715076073, "grad_norm": 11.428689002990723, "learning_rate": 3.625326571384663e-05, "log_odds_chosen": 9.072298049926758, "log_odds_ratio": -0.01014068815857172, "logits/chosen": -0.5121976137161255, "logits/rejected": -0.4744713008403778, "logps/chosen": -0.012900039553642273, "logps/rejected": -2.3478598594665527, "loss": 1.9418, "nll_loss": 0.4844461679458618, "rewards/accuracies": 1.0, "rewards/chosen": -0.001290004001930356, "rewards/margins": 0.23349598050117493, "rewards/rejected": -0.2347859889268875, "step": 5024 }, { "epoch": 3.475103734439834, "grad_norm": 8.699267387390137, "learning_rate": 3.6249423697556476e-05, "log_odds_chosen": 10.239505767822266, "log_odds_ratio": -4.9830130592454225e-05, "logits/chosen": -0.5903966426849365, "logits/rejected": -0.599553108215332, "logps/chosen": -0.0001406385563313961, "logps/rejected": -1.375978946685791, "loss": 1.2361, "nll_loss": 0.30903035402297974, "rewards/accuracies": 1.0, "rewards/chosen": -1.4063856724533252e-05, "rewards/margins": 0.13758382201194763, "rewards/rejected": -0.13759788870811462, "step": 5025 }, { "epoch": 3.475795297372061, "grad_norm": 8.938702583312988, "learning_rate": 3.6245581681266335e-05, "log_odds_chosen": 9.332566261291504, "log_odds_ratio": -0.00019775879627559334, "logits/chosen": -0.08711080998182297, "logits/rejected": -0.18843892216682434, "logps/chosen": -0.000356603559339419, "logps/rejected": -1.3457491397857666, "loss": 1.4083, "nll_loss": 0.35204318165779114, "rewards/accuracies": 1.0, "rewards/chosen": -3.56603559339419e-05, "rewards/margins": 0.13453926146030426, "rewards/rejected": -0.13457490503787994, "step": 5026 }, { "epoch": 3.4764868603042878, "grad_norm": 29.00394058227539, "learning_rate": 3.624173966497618e-05, "log_odds_chosen": 7.198078155517578, "log_odds_ratio": -0.40212199091911316, "logits/chosen": -0.7504348158836365, "logits/rejected": -0.7492233514785767, "logps/chosen": -0.04937904328107834, "logps/rejected": -1.4801826477050781, "loss": 2.1109, "nll_loss": 0.487504780292511, "rewards/accuracies": 0.875, "rewards/chosen": -0.004937904886901379, "rewards/margins": 0.14308036863803864, "rewards/rejected": -0.1480182707309723, "step": 5027 }, { "epoch": 3.4771784232365146, "grad_norm": 10.620253562927246, "learning_rate": 3.623789764868603e-05, "log_odds_chosen": 8.709571838378906, "log_odds_ratio": -0.0006758072413504124, "logits/chosen": -0.32940155267715454, "logits/rejected": -0.3942253589630127, "logps/chosen": -0.005983125418424606, "logps/rejected": -2.287093162536621, "loss": 1.5783, "nll_loss": 0.3945064842700958, "rewards/accuracies": 1.0, "rewards/chosen": -0.0005983125302009284, "rewards/margins": 0.22811101377010345, "rewards/rejected": -0.22870934009552002, "step": 5028 }, { "epoch": 3.4778699861687414, "grad_norm": 10.422560691833496, "learning_rate": 3.6234055632395885e-05, "log_odds_chosen": 9.66650676727295, "log_odds_ratio": -0.0008411741000600159, "logits/chosen": -0.36648574471473694, "logits/rejected": -0.43066418170928955, "logps/chosen": -0.0009089798550121486, "logps/rejected": -1.8880401849746704, "loss": 2.4622, "nll_loss": 0.6154661178588867, "rewards/accuracies": 1.0, "rewards/chosen": -9.089798550121486e-05, "rewards/margins": 0.18871311843395233, "rewards/rejected": -0.188804030418396, "step": 5029 }, { "epoch": 3.4785615491009683, "grad_norm": 6.423739433288574, "learning_rate": 3.623021361610574e-05, "log_odds_chosen": 8.680051803588867, "log_odds_ratio": -0.000523662893101573, "logits/chosen": -0.35798779129981995, "logits/rejected": -0.41463935375213623, "logps/chosen": -0.03764787316322327, "logps/rejected": -2.4031128883361816, "loss": 1.343, "nll_loss": 0.33569198846817017, "rewards/accuracies": 1.0, "rewards/chosen": -0.003764787456020713, "rewards/margins": 0.23654648661613464, "rewards/rejected": -0.24031127989292145, "step": 5030 }, { "epoch": 3.479253112033195, "grad_norm": 11.363663673400879, "learning_rate": 3.6226371599815584e-05, "log_odds_chosen": 8.720566749572754, "log_odds_ratio": -0.000603137887082994, "logits/chosen": -0.5339272022247314, "logits/rejected": -0.5698242783546448, "logps/chosen": -0.017068665474653244, "logps/rejected": -1.91888427734375, "loss": 1.6891, "nll_loss": 0.42220330238342285, "rewards/accuracies": 1.0, "rewards/chosen": -0.0017068665474653244, "rewards/margins": 0.19018155336380005, "rewards/rejected": -0.19188842177391052, "step": 5031 }, { "epoch": 3.479944674965422, "grad_norm": 16.539823532104492, "learning_rate": 3.6222529583525436e-05, "log_odds_chosen": 8.81527042388916, "log_odds_ratio": -0.0015905527397990227, "logits/chosen": -0.4191141724586487, "logits/rejected": -0.5238168239593506, "logps/chosen": -0.021409403532743454, "logps/rejected": -2.097461700439453, "loss": 1.6056, "nll_loss": 0.40124809741973877, "rewards/accuracies": 1.0, "rewards/chosen": -0.002140940399840474, "rewards/margins": 0.207605242729187, "rewards/rejected": -0.20974619686603546, "step": 5032 }, { "epoch": 3.4806362378976488, "grad_norm": 8.92994213104248, "learning_rate": 3.621868756723529e-05, "log_odds_chosen": 8.732751846313477, "log_odds_ratio": -0.03898253291845322, "logits/chosen": -0.44541874527931213, "logits/rejected": -0.4249739646911621, "logps/chosen": -0.011088935658335686, "logps/rejected": -1.4665523767471313, "loss": 1.2592, "nll_loss": 0.3109119236469269, "rewards/accuracies": 1.0, "rewards/chosen": -0.001108893658965826, "rewards/margins": 0.1455463469028473, "rewards/rejected": -0.14665524661540985, "step": 5033 }, { "epoch": 3.4813278008298756, "grad_norm": 9.916893005371094, "learning_rate": 3.6214845550945134e-05, "log_odds_chosen": 9.63333511352539, "log_odds_ratio": -0.00036752651794813573, "logits/chosen": -0.7815406322479248, "logits/rejected": -0.8568220138549805, "logps/chosen": -0.004876892548054457, "logps/rejected": -2.339784622192383, "loss": 1.4612, "nll_loss": 0.36527514457702637, "rewards/accuracies": 1.0, "rewards/chosen": -0.00048768927808851004, "rewards/margins": 0.23349076509475708, "rewards/rejected": -0.23397845029830933, "step": 5034 }, { "epoch": 3.4820193637621024, "grad_norm": 8.489008903503418, "learning_rate": 3.621100353465499e-05, "log_odds_chosen": 7.9217400550842285, "log_odds_ratio": -0.02504688873887062, "logits/chosen": -0.4744040369987488, "logits/rejected": -0.45413997769355774, "logps/chosen": -0.02560979500412941, "logps/rejected": -1.4098031520843506, "loss": 1.9231, "nll_loss": 0.47827842831611633, "rewards/accuracies": 1.0, "rewards/chosen": -0.0025609794538468122, "rewards/margins": 0.13841934502124786, "rewards/rejected": -0.1409803181886673, "step": 5035 }, { "epoch": 3.4827109266943292, "grad_norm": 7.887087345123291, "learning_rate": 3.620716151836484e-05, "log_odds_chosen": 9.41520881652832, "log_odds_ratio": -0.0003263282706029713, "logits/chosen": -0.6199313402175903, "logits/rejected": -0.6613143682479858, "logps/chosen": -0.00028231722535565495, "logps/rejected": -1.521788477897644, "loss": 1.5075, "nll_loss": 0.37684372067451477, "rewards/accuracies": 1.0, "rewards/chosen": -2.8231723263161257e-05, "rewards/margins": 0.15215063095092773, "rewards/rejected": -0.15217885375022888, "step": 5036 }, { "epoch": 3.483402489626556, "grad_norm": 10.450078964233398, "learning_rate": 3.620331950207469e-05, "log_odds_chosen": 7.924431800842285, "log_odds_ratio": -0.0865463986992836, "logits/chosen": -0.33945807814598083, "logits/rejected": -0.33382901549339294, "logps/chosen": -0.01411413960158825, "logps/rejected": -1.5017454624176025, "loss": 1.5724, "nll_loss": 0.3844349980354309, "rewards/accuracies": 1.0, "rewards/chosen": -0.0014114138903096318, "rewards/margins": 0.14876313507556915, "rewards/rejected": -0.1501745581626892, "step": 5037 }, { "epoch": 3.484094052558783, "grad_norm": 8.861961364746094, "learning_rate": 3.6199477485784544e-05, "log_odds_chosen": 9.79195785522461, "log_odds_ratio": -0.00022242713021114469, "logits/chosen": -0.6150322556495667, "logits/rejected": -0.7293128967285156, "logps/chosen": -0.05526260286569595, "logps/rejected": -2.4982879161834717, "loss": 2.0242, "nll_loss": 0.5060203075408936, "rewards/accuracies": 1.0, "rewards/chosen": -0.00552626047283411, "rewards/margins": 0.24430254101753235, "rewards/rejected": -0.2498287856578827, "step": 5038 }, { "epoch": 3.4847856154910097, "grad_norm": 28.145069122314453, "learning_rate": 3.6195635469494396e-05, "log_odds_chosen": 5.657858848571777, "log_odds_ratio": -0.2621900737285614, "logits/chosen": -0.7421402335166931, "logits/rejected": -0.7373098134994507, "logps/chosen": -0.06877894699573517, "logps/rejected": -1.5929591655731201, "loss": 1.9334, "nll_loss": 0.45713573694229126, "rewards/accuracies": 0.75, "rewards/chosen": -0.006877894978970289, "rewards/margins": 0.15241803228855133, "rewards/rejected": -0.159295916557312, "step": 5039 }, { "epoch": 3.4854771784232366, "grad_norm": 9.978339195251465, "learning_rate": 3.619179345320424e-05, "log_odds_chosen": 7.554687976837158, "log_odds_ratio": -0.20541711151599884, "logits/chosen": -0.3608725666999817, "logits/rejected": -0.3803059458732605, "logps/chosen": -0.040510617196559906, "logps/rejected": -1.93406343460083, "loss": 2.0015, "nll_loss": 0.47983455657958984, "rewards/accuracies": 0.875, "rewards/chosen": -0.004051061812788248, "rewards/margins": 0.1893552988767624, "rewards/rejected": -0.1934063583612442, "step": 5040 }, { "epoch": 3.4861687413554634, "grad_norm": 9.79824161529541, "learning_rate": 3.6187951436914094e-05, "log_odds_chosen": 8.895830154418945, "log_odds_ratio": -0.00149711431004107, "logits/chosen": -0.5272182822227478, "logits/rejected": -0.5619787573814392, "logps/chosen": -0.0031608245335519314, "logps/rejected": -1.6382068395614624, "loss": 1.9738, "nll_loss": 0.4933049976825714, "rewards/accuracies": 1.0, "rewards/chosen": -0.00031608244171366096, "rewards/margins": 0.16350463032722473, "rewards/rejected": -0.16382069885730743, "step": 5041 }, { "epoch": 3.4868603042876902, "grad_norm": 13.927655220031738, "learning_rate": 3.618410942062395e-05, "log_odds_chosen": 7.837332725524902, "log_odds_ratio": -0.03233502060174942, "logits/chosen": -0.29475536942481995, "logits/rejected": -0.33633238077163696, "logps/chosen": -0.00651334086433053, "logps/rejected": -1.2358462810516357, "loss": 1.4919, "nll_loss": 0.36975133419036865, "rewards/accuracies": 1.0, "rewards/chosen": -0.0006513340631499887, "rewards/margins": 0.1229332983493805, "rewards/rejected": -0.12358463555574417, "step": 5042 }, { "epoch": 3.487551867219917, "grad_norm": 9.157980918884277, "learning_rate": 3.618026740433379e-05, "log_odds_chosen": 8.778959274291992, "log_odds_ratio": -0.0015918298158794641, "logits/chosen": -0.3938351273536682, "logits/rejected": -0.48897871375083923, "logps/chosen": -0.006256352178752422, "logps/rejected": -1.4195303916931152, "loss": 1.1331, "nll_loss": 0.2831065058708191, "rewards/accuracies": 1.0, "rewards/chosen": -0.000625635264441371, "rewards/margins": 0.1413274109363556, "rewards/rejected": -0.14195305109024048, "step": 5043 }, { "epoch": 3.488243430152144, "grad_norm": 9.305924415588379, "learning_rate": 3.6176425388043645e-05, "log_odds_chosen": 9.255959510803223, "log_odds_ratio": -0.002576855244114995, "logits/chosen": -0.7664635181427002, "logits/rejected": -0.8265019059181213, "logps/chosen": -0.024486836045980453, "logps/rejected": -1.4735629558563232, "loss": 1.6929, "nll_loss": 0.42297691106796265, "rewards/accuracies": 1.0, "rewards/chosen": -0.0024486836045980453, "rewards/margins": 0.1449076235294342, "rewards/rejected": -0.1473563015460968, "step": 5044 }, { "epoch": 3.4889349930843707, "grad_norm": 7.845460414886475, "learning_rate": 3.61725833717535e-05, "log_odds_chosen": 8.247480392456055, "log_odds_ratio": -0.06533218920230865, "logits/chosen": -0.4811922013759613, "logits/rejected": -0.5305379033088684, "logps/chosen": -0.02111988700926304, "logps/rejected": -1.544506549835205, "loss": 2.2076, "nll_loss": 0.5453552603721619, "rewards/accuracies": 1.0, "rewards/chosen": -0.00211198884062469, "rewards/margins": 0.1523386687040329, "rewards/rejected": -0.1544506549835205, "step": 5045 }, { "epoch": 3.4896265560165975, "grad_norm": 5.572291374206543, "learning_rate": 3.616874135546335e-05, "log_odds_chosen": 8.172150611877441, "log_odds_ratio": -0.0033650745172053576, "logits/chosen": -0.2974160313606262, "logits/rejected": -0.2888872027397156, "logps/chosen": -0.020543230697512627, "logps/rejected": -1.5316355228424072, "loss": 1.5213, "nll_loss": 0.3799995481967926, "rewards/accuracies": 1.0, "rewards/chosen": -0.002054323209449649, "rewards/margins": 0.1511092185974121, "rewards/rejected": -0.15316355228424072, "step": 5046 }, { "epoch": 3.4903181189488244, "grad_norm": 10.916781425476074, "learning_rate": 3.6164899339173196e-05, "log_odds_chosen": 7.001707077026367, "log_odds_ratio": -0.10261018574237823, "logits/chosen": -0.5108945369720459, "logits/rejected": -0.5677796006202698, "logps/chosen": -0.05275255814194679, "logps/rejected": -1.37205171585083, "loss": 1.2155, "nll_loss": 0.29360371828079224, "rewards/accuracies": 0.875, "rewards/chosen": -0.0052752564661204815, "rewards/margins": 0.1319299042224884, "rewards/rejected": -0.13720516860485077, "step": 5047 }, { "epoch": 3.491009681881051, "grad_norm": 5.465517520904541, "learning_rate": 3.6161057322883055e-05, "log_odds_chosen": 8.64756965637207, "log_odds_ratio": -0.0032608138862997293, "logits/chosen": -0.3673758804798126, "logits/rejected": -0.3721608519554138, "logps/chosen": -0.010069094598293304, "logps/rejected": -1.5483287572860718, "loss": 1.4544, "nll_loss": 0.36326465010643005, "rewards/accuracies": 1.0, "rewards/chosen": -0.0010069095296785235, "rewards/margins": 0.15382596850395203, "rewards/rejected": -0.1548328697681427, "step": 5048 }, { "epoch": 3.491701244813278, "grad_norm": 11.032556533813477, "learning_rate": 3.61572153065929e-05, "log_odds_chosen": 7.748650074005127, "log_odds_ratio": -0.06495750695466995, "logits/chosen": -0.6558927297592163, "logits/rejected": -0.7183180451393127, "logps/chosen": -0.02538420259952545, "logps/rejected": -1.5755445957183838, "loss": 1.4078, "nll_loss": 0.34544721245765686, "rewards/accuracies": 1.0, "rewards/chosen": -0.002538420259952545, "rewards/margins": 0.15501603484153748, "rewards/rejected": -0.15755446255207062, "step": 5049 }, { "epoch": 3.492392807745505, "grad_norm": 12.697063446044922, "learning_rate": 3.615337329030275e-05, "log_odds_chosen": 9.086719512939453, "log_odds_ratio": -0.004768200218677521, "logits/chosen": -0.7487730979919434, "logits/rejected": -0.8184801936149597, "logps/chosen": -0.010872675105929375, "logps/rejected": -2.005723237991333, "loss": 1.7075, "nll_loss": 0.4263884127140045, "rewards/accuracies": 1.0, "rewards/chosen": -0.0010872675338760018, "rewards/margins": 0.19948504865169525, "rewards/rejected": -0.20057231187820435, "step": 5050 }, { "epoch": 3.4930843706777317, "grad_norm": 13.886307716369629, "learning_rate": 3.6149531274012605e-05, "log_odds_chosen": 10.670909881591797, "log_odds_ratio": -4.9107984523288906e-05, "logits/chosen": -0.9256460666656494, "logits/rejected": -0.9379943609237671, "logps/chosen": -0.00014166987966746092, "logps/rejected": -1.8907963037490845, "loss": 2.3547, "nll_loss": 0.5886602401733398, "rewards/accuracies": 1.0, "rewards/chosen": -1.416698705725139e-05, "rewards/margins": 0.18906547129154205, "rewards/rejected": -0.1890796422958374, "step": 5051 }, { "epoch": 3.4937759336099585, "grad_norm": 8.242508888244629, "learning_rate": 3.614568925772245e-05, "log_odds_chosen": 7.944234848022461, "log_odds_ratio": -0.006905496120452881, "logits/chosen": -0.48522865772247314, "logits/rejected": -0.5774377584457397, "logps/chosen": -0.03180186077952385, "logps/rejected": -1.4319233894348145, "loss": 1.3, "nll_loss": 0.3243020176887512, "rewards/accuracies": 1.0, "rewards/chosen": -0.0031801860313862562, "rewards/margins": 0.14001215994358063, "rewards/rejected": -0.1431923359632492, "step": 5052 }, { "epoch": 3.4944674965421854, "grad_norm": 8.319031715393066, "learning_rate": 3.6141847241432303e-05, "log_odds_chosen": 7.558518886566162, "log_odds_ratio": -0.14716488122940063, "logits/chosen": -0.22034114599227905, "logits/rejected": -0.32531481981277466, "logps/chosen": -0.03135522082448006, "logps/rejected": -1.3388361930847168, "loss": 1.4963, "nll_loss": 0.3593598008155823, "rewards/accuracies": 0.875, "rewards/chosen": -0.003135522361844778, "rewards/margins": 0.13074809312820435, "rewards/rejected": -0.13388362526893616, "step": 5053 }, { "epoch": 3.495159059474412, "grad_norm": 11.776897430419922, "learning_rate": 3.6138005225142156e-05, "log_odds_chosen": 9.902790069580078, "log_odds_ratio": -0.00023262518516276032, "logits/chosen": -0.03856794908642769, "logits/rejected": -0.1859092116355896, "logps/chosen": -0.0006960300961509347, "logps/rejected": -2.378844738006592, "loss": 1.459, "nll_loss": 0.36472997069358826, "rewards/accuracies": 1.0, "rewards/chosen": -6.960301107028499e-05, "rewards/margins": 0.23781487345695496, "rewards/rejected": -0.23788444697856903, "step": 5054 }, { "epoch": 3.495850622406639, "grad_norm": 10.10391902923584, "learning_rate": 3.613416320885201e-05, "log_odds_chosen": 10.151603698730469, "log_odds_ratio": -0.00014976883539929986, "logits/chosen": -0.6190488934516907, "logits/rejected": -0.7602345943450928, "logps/chosen": -0.0018250253051519394, "logps/rejected": -2.936875343322754, "loss": 1.7966, "nll_loss": 0.4491300582885742, "rewards/accuracies": 1.0, "rewards/chosen": -0.00018250252469442785, "rewards/margins": 0.2935050427913666, "rewards/rejected": -0.2936875522136688, "step": 5055 }, { "epoch": 3.496542185338866, "grad_norm": 9.116644859313965, "learning_rate": 3.6130321192561854e-05, "log_odds_chosen": 6.269155025482178, "log_odds_ratio": -0.41318410634994507, "logits/chosen": -0.355681836605072, "logits/rejected": -0.41957610845565796, "logps/chosen": -0.05598800256848335, "logps/rejected": -1.3569520711898804, "loss": 1.8405, "nll_loss": 0.41881901025772095, "rewards/accuracies": 0.875, "rewards/chosen": -0.005598800256848335, "rewards/margins": 0.1300964057445526, "rewards/rejected": -0.135695219039917, "step": 5056 }, { "epoch": 3.4972337482710927, "grad_norm": 13.085668563842773, "learning_rate": 3.612647917627171e-05, "log_odds_chosen": 8.548544883728027, "log_odds_ratio": -0.0006819585105404258, "logits/chosen": -0.4421701431274414, "logits/rejected": -0.5115413665771484, "logps/chosen": -0.0004435776500031352, "logps/rejected": -1.0637874603271484, "loss": 1.9339, "nll_loss": 0.48339563608169556, "rewards/accuracies": 1.0, "rewards/chosen": -4.435776645550504e-05, "rewards/margins": 0.106334388256073, "rewards/rejected": -0.10637873411178589, "step": 5057 }, { "epoch": 3.4979253112033195, "grad_norm": 6.2441020011901855, "learning_rate": 3.612263715998156e-05, "log_odds_chosen": 8.581676483154297, "log_odds_ratio": -0.03747273609042168, "logits/chosen": -0.6855819225311279, "logits/rejected": -0.6639347076416016, "logps/chosen": -0.011732269078493118, "logps/rejected": -1.8874115943908691, "loss": 1.4293, "nll_loss": 0.35356974601745605, "rewards/accuracies": 1.0, "rewards/chosen": -0.001173226861283183, "rewards/margins": 0.18756791949272156, "rewards/rejected": -0.18874114751815796, "step": 5058 }, { "epoch": 3.4986168741355463, "grad_norm": 4.798537254333496, "learning_rate": 3.611879514369141e-05, "log_odds_chosen": 9.492433547973633, "log_odds_ratio": -0.00040241493843495846, "logits/chosen": -0.48869088292121887, "logits/rejected": -0.48516204953193665, "logps/chosen": -0.00044442637590691447, "logps/rejected": -1.894187569618225, "loss": 1.2766, "nll_loss": 0.3191039264202118, "rewards/accuracies": 1.0, "rewards/chosen": -4.444263322511688e-05, "rewards/margins": 0.18937431275844574, "rewards/rejected": -0.189418762922287, "step": 5059 }, { "epoch": 3.499308437067773, "grad_norm": 9.459883689880371, "learning_rate": 3.6114953127401264e-05, "log_odds_chosen": 9.808856010437012, "log_odds_ratio": -0.001135217142291367, "logits/chosen": -0.6091340184211731, "logits/rejected": -0.6108609437942505, "logps/chosen": -0.001535170478746295, "logps/rejected": -2.7543370723724365, "loss": 1.3907, "nll_loss": 0.34757012128829956, "rewards/accuracies": 1.0, "rewards/chosen": -0.00015351705951616168, "rewards/margins": 0.2752802073955536, "rewards/rejected": -0.2754337191581726, "step": 5060 }, { "epoch": 3.5, "grad_norm": 13.043842315673828, "learning_rate": 3.611111111111111e-05, "log_odds_chosen": 8.608272552490234, "log_odds_ratio": -0.0255854744464159, "logits/chosen": -0.4813705086708069, "logits/rejected": -0.500043511390686, "logps/chosen": -0.006744784768670797, "logps/rejected": -1.9021813869476318, "loss": 2.3054, "nll_loss": 0.5737854838371277, "rewards/accuracies": 1.0, "rewards/chosen": -0.0006744785932824016, "rewards/margins": 0.18954366445541382, "rewards/rejected": -0.19021813571453094, "step": 5061 }, { "epoch": 3.500691562932227, "grad_norm": 9.071768760681152, "learning_rate": 3.610726909482096e-05, "log_odds_chosen": 10.202449798583984, "log_odds_ratio": -6.68539505568333e-05, "logits/chosen": -0.6950543522834778, "logits/rejected": -0.7431255578994751, "logps/chosen": -0.00206986372359097, "logps/rejected": -2.522648334503174, "loss": 1.1268, "nll_loss": 0.2816920876502991, "rewards/accuracies": 1.0, "rewards/chosen": -0.00020698636944871396, "rewards/margins": 0.25205785036087036, "rewards/rejected": -0.2522648274898529, "step": 5062 }, { "epoch": 3.5013831258644537, "grad_norm": 8.379225730895996, "learning_rate": 3.6103427078530814e-05, "log_odds_chosen": 9.258936882019043, "log_odds_ratio": -0.0001494312018621713, "logits/chosen": -0.5267353057861328, "logits/rejected": -0.5575824975967407, "logps/chosen": -0.00039477666723541915, "logps/rejected": -1.283223032951355, "loss": 1.7336, "nll_loss": 0.4333917498588562, "rewards/accuracies": 1.0, "rewards/chosen": -3.94776689063292e-05, "rewards/margins": 0.128282830119133, "rewards/rejected": -0.1283223032951355, "step": 5063 }, { "epoch": 3.5020746887966805, "grad_norm": 10.303986549377441, "learning_rate": 3.609958506224067e-05, "log_odds_chosen": 9.066301345825195, "log_odds_ratio": -0.15924587845802307, "logits/chosen": -0.5196304321289062, "logits/rejected": -0.5205535292625427, "logps/chosen": -0.03134698420763016, "logps/rejected": -2.229219675064087, "loss": 1.5492, "nll_loss": 0.3713781237602234, "rewards/accuracies": 0.875, "rewards/chosen": -0.0031346981413662434, "rewards/margins": 0.21978726983070374, "rewards/rejected": -0.2229219675064087, "step": 5064 }, { "epoch": 3.5027662517289073, "grad_norm": 10.489588737487793, "learning_rate": 3.609574304595051e-05, "log_odds_chosen": 9.330596923828125, "log_odds_ratio": -0.0003697268257383257, "logits/chosen": -0.8499776124954224, "logits/rejected": -0.892784595489502, "logps/chosen": -0.001054689404554665, "logps/rejected": -1.539988398551941, "loss": 1.5798, "nll_loss": 0.39492517709732056, "rewards/accuracies": 1.0, "rewards/chosen": -0.00010546894918661565, "rewards/margins": 0.1538933664560318, "rewards/rejected": -0.15399885177612305, "step": 5065 }, { "epoch": 3.503457814661134, "grad_norm": 9.035969734191895, "learning_rate": 3.609190102966037e-05, "log_odds_chosen": 10.0315580368042, "log_odds_ratio": -0.00021950459631625563, "logits/chosen": -0.24112743139266968, "logits/rejected": -0.27213215827941895, "logps/chosen": -0.012225059792399406, "logps/rejected": -3.4997692108154297, "loss": 1.3641, "nll_loss": 0.34100615978240967, "rewards/accuracies": 1.0, "rewards/chosen": -0.001222505932673812, "rewards/margins": 0.3487544059753418, "rewards/rejected": -0.34997692704200745, "step": 5066 }, { "epoch": 3.504149377593361, "grad_norm": 13.972981452941895, "learning_rate": 3.608805901337022e-05, "log_odds_chosen": 10.162301063537598, "log_odds_ratio": -0.00026165239978581667, "logits/chosen": -0.3325657844543457, "logits/rejected": -0.4123249053955078, "logps/chosen": -0.006227482575923204, "logps/rejected": -2.271134853363037, "loss": 1.3434, "nll_loss": 0.3358166217803955, "rewards/accuracies": 1.0, "rewards/chosen": -0.0006227482808753848, "rewards/margins": 0.22649073600769043, "rewards/rejected": -0.2271134853363037, "step": 5067 }, { "epoch": 3.504840940525588, "grad_norm": 6.947229385375977, "learning_rate": 3.608421699708007e-05, "log_odds_chosen": 8.882793426513672, "log_odds_ratio": -0.0007761258166283369, "logits/chosen": -0.7312330007553101, "logits/rejected": -0.7411985397338867, "logps/chosen": -0.017531974241137505, "logps/rejected": -2.05059814453125, "loss": 1.9768, "nll_loss": 0.4941311478614807, "rewards/accuracies": 1.0, "rewards/chosen": -0.001753197400830686, "rewards/margins": 0.20330661535263062, "rewards/rejected": -0.20505981147289276, "step": 5068 }, { "epoch": 3.5055325034578146, "grad_norm": 12.753296852111816, "learning_rate": 3.608037498078992e-05, "log_odds_chosen": 9.524681091308594, "log_odds_ratio": -0.00022812785755377263, "logits/chosen": -0.8692194819450378, "logits/rejected": -0.8909227252006531, "logps/chosen": -0.006189709063619375, "logps/rejected": -2.2828361988067627, "loss": 1.7645, "nll_loss": 0.44110339879989624, "rewards/accuracies": 1.0, "rewards/chosen": -0.0006189708947204053, "rewards/margins": 0.22766464948654175, "rewards/rejected": -0.22828364372253418, "step": 5069 }, { "epoch": 3.5062240663900415, "grad_norm": 10.580520629882812, "learning_rate": 3.607653296449977e-05, "log_odds_chosen": 9.021589279174805, "log_odds_ratio": -0.008893512189388275, "logits/chosen": -0.41797518730163574, "logits/rejected": -0.5197737812995911, "logps/chosen": -0.009041551500558853, "logps/rejected": -2.1127045154571533, "loss": 1.8537, "nll_loss": 0.46254584193229675, "rewards/accuracies": 1.0, "rewards/chosen": -0.0009041551384143531, "rewards/margins": 0.21036630868911743, "rewards/rejected": -0.21127045154571533, "step": 5070 }, { "epoch": 3.5069156293222683, "grad_norm": 14.19862174987793, "learning_rate": 3.607269094820962e-05, "log_odds_chosen": 9.665410041809082, "log_odds_ratio": -0.0405154712498188, "logits/chosen": -0.4829082190990448, "logits/rejected": -0.492472767829895, "logps/chosen": -0.010864950716495514, "logps/rejected": -1.8430681228637695, "loss": 1.4125, "nll_loss": 0.3490619659423828, "rewards/accuracies": 1.0, "rewards/chosen": -0.0010864951182156801, "rewards/margins": 0.18322032690048218, "rewards/rejected": -0.184306800365448, "step": 5071 }, { "epoch": 3.507607192254495, "grad_norm": 8.601861000061035, "learning_rate": 3.606884893191947e-05, "log_odds_chosen": 8.293171882629395, "log_odds_ratio": -0.04796692728996277, "logits/chosen": -0.47136473655700684, "logits/rejected": -0.5330482721328735, "logps/chosen": -0.01917835883796215, "logps/rejected": -2.065481662750244, "loss": 1.2136, "nll_loss": 0.29859915375709534, "rewards/accuracies": 1.0, "rewards/chosen": -0.0019178359070792794, "rewards/margins": 0.20463034510612488, "rewards/rejected": -0.20654819905757904, "step": 5072 }, { "epoch": 3.508298755186722, "grad_norm": 6.478180408477783, "learning_rate": 3.6065006915629325e-05, "log_odds_chosen": 10.741048812866211, "log_odds_ratio": -5.074698128737509e-05, "logits/chosen": -0.8846883177757263, "logits/rejected": -0.967951774597168, "logps/chosen": -0.00030177010921761394, "logps/rejected": -2.429570436477661, "loss": 1.9351, "nll_loss": 0.48377639055252075, "rewards/accuracies": 1.0, "rewards/chosen": -3.017700873897411e-05, "rewards/margins": 0.24292686581611633, "rewards/rejected": -0.24295704066753387, "step": 5073 }, { "epoch": 3.508990318118949, "grad_norm": 12.065322875976562, "learning_rate": 3.606116489933917e-05, "log_odds_chosen": 9.034112930297852, "log_odds_ratio": -0.0008123770821839571, "logits/chosen": -0.7525606155395508, "logits/rejected": -0.7461023330688477, "logps/chosen": -0.0008287005475722253, "logps/rejected": -1.683483600616455, "loss": 1.9323, "nll_loss": 0.4829895496368408, "rewards/accuracies": 1.0, "rewards/chosen": -8.287005039164796e-05, "rewards/margins": 0.16826549172401428, "rewards/rejected": -0.16834837198257446, "step": 5074 }, { "epoch": 3.5096818810511756, "grad_norm": 14.989665031433105, "learning_rate": 3.605732288304903e-05, "log_odds_chosen": 8.394176483154297, "log_odds_ratio": -0.0017240258166566491, "logits/chosen": -0.3496403694152832, "logits/rejected": -0.4731343984603882, "logps/chosen": -0.023600636050105095, "logps/rejected": -2.197500467300415, "loss": 2.4449, "nll_loss": 0.6110531091690063, "rewards/accuracies": 1.0, "rewards/chosen": -0.002360063372179866, "rewards/margins": 0.21738998591899872, "rewards/rejected": -0.2197500467300415, "step": 5075 }, { "epoch": 3.5103734439834025, "grad_norm": 11.799775123596191, "learning_rate": 3.6053480866758876e-05, "log_odds_chosen": 8.778727531433105, "log_odds_ratio": -0.0009676261688582599, "logits/chosen": -0.5945629477500916, "logits/rejected": -0.652061939239502, "logps/chosen": -0.01023485790938139, "logps/rejected": -1.929757833480835, "loss": 1.6064, "nll_loss": 0.40149986743927, "rewards/accuracies": 1.0, "rewards/chosen": -0.0010234859073534608, "rewards/margins": 0.19195228815078735, "rewards/rejected": -0.19297577440738678, "step": 5076 }, { "epoch": 3.5110650069156293, "grad_norm": 10.793294906616211, "learning_rate": 3.604963885046873e-05, "log_odds_chosen": 9.653995513916016, "log_odds_ratio": -0.00015553759294562042, "logits/chosen": -0.5547876954078674, "logits/rejected": -0.6828795075416565, "logps/chosen": -0.0003929367521777749, "logps/rejected": -1.7646691799163818, "loss": 1.3604, "nll_loss": 0.34009164571762085, "rewards/accuracies": 1.0, "rewards/chosen": -3.9293678128160536e-05, "rewards/margins": 0.17642761766910553, "rewards/rejected": -0.1764669120311737, "step": 5077 }, { "epoch": 3.511756569847856, "grad_norm": 11.926183700561523, "learning_rate": 3.604579683417858e-05, "log_odds_chosen": 10.17160415649414, "log_odds_ratio": -8.280223846668378e-05, "logits/chosen": -0.5874757766723633, "logits/rejected": -0.6495798826217651, "logps/chosen": -0.0005028080195188522, "logps/rejected": -2.0479609966278076, "loss": 0.914, "nll_loss": 0.2284971922636032, "rewards/accuracies": 1.0, "rewards/chosen": -5.028080704505555e-05, "rewards/margins": 0.2047458291053772, "rewards/rejected": -0.20479610562324524, "step": 5078 }, { "epoch": 3.512448132780083, "grad_norm": 4.949942588806152, "learning_rate": 3.6041954817888426e-05, "log_odds_chosen": 7.430364608764648, "log_odds_ratio": -0.14765413105487823, "logits/chosen": -0.4910086989402771, "logits/rejected": -0.535973072052002, "logps/chosen": -0.07196828722953796, "logps/rejected": -2.810218334197998, "loss": 1.5335, "nll_loss": 0.36859917640686035, "rewards/accuracies": 0.875, "rewards/chosen": -0.007196827791631222, "rewards/margins": 0.2738250195980072, "rewards/rejected": -0.2810218632221222, "step": 5079 }, { "epoch": 3.5131396957123098, "grad_norm": 11.681784629821777, "learning_rate": 3.603811280159828e-05, "log_odds_chosen": 10.228755950927734, "log_odds_ratio": -0.00011042998812627047, "logits/chosen": -0.5658866763114929, "logits/rejected": -0.6545494198799133, "logps/chosen": -0.012667344883084297, "logps/rejected": -3.262669086456299, "loss": 1.9467, "nll_loss": 0.48666873574256897, "rewards/accuracies": 1.0, "rewards/chosen": -0.0012667345581576228, "rewards/margins": 0.32500016689300537, "rewards/rejected": -0.326266884803772, "step": 5080 }, { "epoch": 3.5138312586445366, "grad_norm": 9.769969940185547, "learning_rate": 3.603427078530813e-05, "log_odds_chosen": 9.764698028564453, "log_odds_ratio": -0.000422182260081172, "logits/chosen": -0.966056227684021, "logits/rejected": -1.006844162940979, "logps/chosen": -0.0004481312062125653, "logps/rejected": -1.9890567064285278, "loss": 2.1247, "nll_loss": 0.5311307311058044, "rewards/accuracies": 1.0, "rewards/chosen": -4.48131249868311e-05, "rewards/margins": 0.19886085391044617, "rewards/rejected": -0.19890566170215607, "step": 5081 }, { "epoch": 3.5145228215767634, "grad_norm": 9.379589080810547, "learning_rate": 3.6030428769017984e-05, "log_odds_chosen": 9.625581741333008, "log_odds_ratio": -0.0015480243600904942, "logits/chosen": -0.6199181079864502, "logits/rejected": -0.7238527536392212, "logps/chosen": -0.0013244760921224952, "logps/rejected": -1.7693266868591309, "loss": 1.1488, "nll_loss": 0.287042498588562, "rewards/accuracies": 1.0, "rewards/chosen": -0.00013244761794339865, "rewards/margins": 0.1768002212047577, "rewards/rejected": -0.1769326776266098, "step": 5082 }, { "epoch": 3.5152143845089903, "grad_norm": 13.813467979431152, "learning_rate": 3.602658675272783e-05, "log_odds_chosen": 9.168367385864258, "log_odds_ratio": -0.0006410049390979111, "logits/chosen": -0.7326682806015015, "logits/rejected": -0.7909855842590332, "logps/chosen": -0.00048034184146672487, "logps/rejected": -1.5736289024353027, "loss": 1.9079, "nll_loss": 0.476909339427948, "rewards/accuracies": 1.0, "rewards/chosen": -4.803418414667249e-05, "rewards/margins": 0.15731483697891235, "rewards/rejected": -0.15736287832260132, "step": 5083 }, { "epoch": 3.515905947441217, "grad_norm": 9.20576000213623, "learning_rate": 3.602274473643769e-05, "log_odds_chosen": 7.716304302215576, "log_odds_ratio": -0.02225544862449169, "logits/chosen": -0.3978271484375, "logits/rejected": -0.4570864737033844, "logps/chosen": -0.01456506922841072, "logps/rejected": -1.4820979833602905, "loss": 1.7357, "nll_loss": 0.4317033290863037, "rewards/accuracies": 1.0, "rewards/chosen": -0.0014565070159733295, "rewards/margins": 0.14675329625606537, "rewards/rejected": -0.148209810256958, "step": 5084 }, { "epoch": 3.516597510373444, "grad_norm": 10.250093460083008, "learning_rate": 3.6018902720147534e-05, "log_odds_chosen": 9.908461570739746, "log_odds_ratio": -0.00018903396266978234, "logits/chosen": -0.9073819518089294, "logits/rejected": -1.0101597309112549, "logps/chosen": -0.0003365837619639933, "logps/rejected": -1.800266981124878, "loss": 1.4972, "nll_loss": 0.37427830696105957, "rewards/accuracies": 1.0, "rewards/chosen": -3.365837619639933e-05, "rewards/margins": 0.17999303340911865, "rewards/rejected": -0.18002669513225555, "step": 5085 }, { "epoch": 3.5172890733056708, "grad_norm": 9.994810104370117, "learning_rate": 3.601506070385739e-05, "log_odds_chosen": 9.212892532348633, "log_odds_ratio": -0.00037254547351039946, "logits/chosen": -0.42903658747673035, "logits/rejected": -0.5099354982376099, "logps/chosen": -0.0033264392986893654, "logps/rejected": -1.9749102592468262, "loss": 1.2616, "nll_loss": 0.31536275148391724, "rewards/accuracies": 1.0, "rewards/chosen": -0.00033264391822740436, "rewards/margins": 0.19715839624404907, "rewards/rejected": -0.19749103486537933, "step": 5086 }, { "epoch": 3.5179806362378976, "grad_norm": 10.837930679321289, "learning_rate": 3.601121868756724e-05, "log_odds_chosen": 8.183601379394531, "log_odds_ratio": -0.20187465846538544, "logits/chosen": -0.7843718528747559, "logits/rejected": -0.78766268491745, "logps/chosen": -0.032734133303165436, "logps/rejected": -1.520382046699524, "loss": 1.5588, "nll_loss": 0.36951279640197754, "rewards/accuracies": 0.875, "rewards/chosen": -0.003273413283750415, "rewards/margins": 0.14876478910446167, "rewards/rejected": -0.15203820168972015, "step": 5087 }, { "epoch": 3.5186721991701244, "grad_norm": 11.267701148986816, "learning_rate": 3.6007376671277085e-05, "log_odds_chosen": 8.13360595703125, "log_odds_ratio": -0.0014181515434756875, "logits/chosen": -0.8068364858627319, "logits/rejected": -0.7554510831832886, "logps/chosen": -0.033745840191841125, "logps/rejected": -2.1890408992767334, "loss": 2.5046, "nll_loss": 0.6260114908218384, "rewards/accuracies": 1.0, "rewards/chosen": -0.003374584252014756, "rewards/margins": 0.21552950143814087, "rewards/rejected": -0.21890407800674438, "step": 5088 }, { "epoch": 3.5193637621023512, "grad_norm": 7.490444660186768, "learning_rate": 3.600353465498694e-05, "log_odds_chosen": 9.055780410766602, "log_odds_ratio": -0.00031747232424095273, "logits/chosen": -0.4991127848625183, "logits/rejected": -0.6640005707740784, "logps/chosen": -0.0036325249820947647, "logps/rejected": -1.712687373161316, "loss": 1.0071, "nll_loss": 0.251755028963089, "rewards/accuracies": 1.0, "rewards/chosen": -0.00036325250403024256, "rewards/margins": 0.1709054708480835, "rewards/rejected": -0.17126873135566711, "step": 5089 }, { "epoch": 3.520055325034578, "grad_norm": 10.06659984588623, "learning_rate": 3.599969263869679e-05, "log_odds_chosen": 7.548827648162842, "log_odds_ratio": -0.00912503618746996, "logits/chosen": -0.41075634956359863, "logits/rejected": -0.44909027218818665, "logps/chosen": -0.01701589860022068, "logps/rejected": -1.6517646312713623, "loss": 1.4971, "nll_loss": 0.37337014079093933, "rewards/accuracies": 1.0, "rewards/chosen": -0.001701589790172875, "rewards/margins": 0.1634748876094818, "rewards/rejected": -0.16517646610736847, "step": 5090 }, { "epoch": 3.520746887966805, "grad_norm": 10.759828567504883, "learning_rate": 3.599585062240664e-05, "log_odds_chosen": 9.490163803100586, "log_odds_ratio": -0.00014201641897670925, "logits/chosen": -0.6013484001159668, "logits/rejected": -0.7783204913139343, "logps/chosen": -0.000322447856888175, "logps/rejected": -1.555269479751587, "loss": 1.3042, "nll_loss": 0.3260456323623657, "rewards/accuracies": 1.0, "rewards/chosen": -3.224478496122174e-05, "rewards/margins": 0.15549471974372864, "rewards/rejected": -0.15552696585655212, "step": 5091 }, { "epoch": 3.5214384508990317, "grad_norm": 13.803914070129395, "learning_rate": 3.599200860611649e-05, "log_odds_chosen": 9.127652168273926, "log_odds_ratio": -0.00017640476289670914, "logits/chosen": -0.8106200098991394, "logits/rejected": -0.8595431447029114, "logps/chosen": -0.0002514416119083762, "logps/rejected": -1.1357730627059937, "loss": 2.3148, "nll_loss": 0.5786784887313843, "rewards/accuracies": 1.0, "rewards/chosen": -2.5144163373624906e-05, "rewards/margins": 0.11355216801166534, "rewards/rejected": -0.11357730627059937, "step": 5092 }, { "epoch": 3.5221300138312586, "grad_norm": 14.12282657623291, "learning_rate": 3.598816658982635e-05, "log_odds_chosen": 8.557235717773438, "log_odds_ratio": -0.002839646302163601, "logits/chosen": -0.5079107880592346, "logits/rejected": -0.6302869319915771, "logps/chosen": -0.020289452746510506, "logps/rejected": -1.8531651496887207, "loss": 2.7005, "nll_loss": 0.6748350262641907, "rewards/accuracies": 1.0, "rewards/chosen": -0.0020289451349526644, "rewards/margins": 0.18328757584095, "rewards/rejected": -0.1853165179491043, "step": 5093 }, { "epoch": 3.5228215767634854, "grad_norm": 6.537510395050049, "learning_rate": 3.598432457353619e-05, "log_odds_chosen": 8.796355247497559, "log_odds_ratio": -0.00046882365131750703, "logits/chosen": -0.4269121289253235, "logits/rejected": -0.4743691682815552, "logps/chosen": -0.014062023721635342, "logps/rejected": -2.1716971397399902, "loss": 1.4093, "nll_loss": 0.35227110981941223, "rewards/accuracies": 1.0, "rewards/chosen": -0.0014062024420127273, "rewards/margins": 0.21576350927352905, "rewards/rejected": -0.21716971695423126, "step": 5094 }, { "epoch": 3.5235131396957122, "grad_norm": 12.389871597290039, "learning_rate": 3.5980482557246045e-05, "log_odds_chosen": 10.240442276000977, "log_odds_ratio": -7.592760084662586e-05, "logits/chosen": -0.3653874397277832, "logits/rejected": -0.4159523844718933, "logps/chosen": -0.0002882831613533199, "logps/rejected": -1.8680007457733154, "loss": 1.3335, "nll_loss": 0.3333684802055359, "rewards/accuracies": 1.0, "rewards/chosen": -2.882831722672563e-05, "rewards/margins": 0.1867712438106537, "rewards/rejected": -0.18680007755756378, "step": 5095 }, { "epoch": 3.524204702627939, "grad_norm": 12.912257194519043, "learning_rate": 3.59766405409559e-05, "log_odds_chosen": 8.562950134277344, "log_odds_ratio": -0.05261914059519768, "logits/chosen": -0.639365017414093, "logits/rejected": -0.649884819984436, "logps/chosen": -0.012849587015807629, "logps/rejected": -1.956565260887146, "loss": 2.0593, "nll_loss": 0.509568452835083, "rewards/accuracies": 1.0, "rewards/chosen": -0.0012849586782976985, "rewards/margins": 0.19437158107757568, "rewards/rejected": -0.19565653800964355, "step": 5096 }, { "epoch": 3.524896265560166, "grad_norm": 7.283536911010742, "learning_rate": 3.597279852466574e-05, "log_odds_chosen": 6.307476997375488, "log_odds_ratio": -0.10233741253614426, "logits/chosen": -0.6674084663391113, "logits/rejected": -0.6511844396591187, "logps/chosen": -0.019914401695132256, "logps/rejected": -0.8803103566169739, "loss": 1.5538, "nll_loss": 0.3782210946083069, "rewards/accuracies": 0.875, "rewards/chosen": -0.0019914403092116117, "rewards/margins": 0.08603960275650024, "rewards/rejected": -0.08803103119134903, "step": 5097 }, { "epoch": 3.5255878284923927, "grad_norm": 7.762702941894531, "learning_rate": 3.5968956508375596e-05, "log_odds_chosen": 6.637197971343994, "log_odds_ratio": -0.07845356315374374, "logits/chosen": -0.1810343861579895, "logits/rejected": -0.2177562117576599, "logps/chosen": -0.02767745964229107, "logps/rejected": -1.5530402660369873, "loss": 1.2597, "nll_loss": 0.307077020406723, "rewards/accuracies": 1.0, "rewards/chosen": -0.0027677458710968494, "rewards/margins": 0.1525362730026245, "rewards/rejected": -0.15530402958393097, "step": 5098 }, { "epoch": 3.5262793914246195, "grad_norm": 15.101137161254883, "learning_rate": 3.596511449208545e-05, "log_odds_chosen": 9.25841999053955, "log_odds_ratio": -0.030140476301312447, "logits/chosen": -0.21983087062835693, "logits/rejected": -0.3168938457965851, "logps/chosen": -0.019699474796652794, "logps/rejected": -1.9712846279144287, "loss": 1.3046, "nll_loss": 0.3231399655342102, "rewards/accuracies": 1.0, "rewards/chosen": -0.0019699474796652794, "rewards/margins": 0.19515851140022278, "rewards/rejected": -0.19712845981121063, "step": 5099 }, { "epoch": 3.5269709543568464, "grad_norm": 9.824456214904785, "learning_rate": 3.59612724757953e-05, "log_odds_chosen": 8.969016075134277, "log_odds_ratio": -0.00030478276312351227, "logits/chosen": -0.588179886341095, "logits/rejected": -0.6493480205535889, "logps/chosen": -0.009981921873986721, "logps/rejected": -1.593450665473938, "loss": 1.727, "nll_loss": 0.4317193627357483, "rewards/accuracies": 1.0, "rewards/chosen": -0.0009981922339648008, "rewards/margins": 0.15834686160087585, "rewards/rejected": -0.15934506058692932, "step": 5100 }, { "epoch": 3.527662517289073, "grad_norm": 6.7342963218688965, "learning_rate": 3.5957430459505146e-05, "log_odds_chosen": 9.023002624511719, "log_odds_ratio": -0.0004063228552695364, "logits/chosen": -0.6179463863372803, "logits/rejected": -0.6786239147186279, "logps/chosen": -0.0010950213763862848, "logps/rejected": -1.3784172534942627, "loss": 1.5397, "nll_loss": 0.3848962187767029, "rewards/accuracies": 1.0, "rewards/chosen": -0.00010950213618343696, "rewards/margins": 0.13773223757743835, "rewards/rejected": -0.13784173130989075, "step": 5101 }, { "epoch": 3.5283540802213, "grad_norm": 13.047355651855469, "learning_rate": 3.5953588443215005e-05, "log_odds_chosen": 8.957096099853516, "log_odds_ratio": -0.0010938331251963973, "logits/chosen": -0.5095317363739014, "logits/rejected": -0.5065636038780212, "logps/chosen": -0.0016800828743726015, "logps/rejected": -1.350773811340332, "loss": 1.7472, "nll_loss": 0.4366909861564636, "rewards/accuracies": 1.0, "rewards/chosen": -0.00016800829325802624, "rewards/margins": 0.13490936160087585, "rewards/rejected": -0.13507738709449768, "step": 5102 }, { "epoch": 3.529045643153527, "grad_norm": 11.464844703674316, "learning_rate": 3.594974642692485e-05, "log_odds_chosen": 10.505243301391602, "log_odds_ratio": -5.506931120180525e-05, "logits/chosen": -0.4430898427963257, "logits/rejected": -0.49872028827667236, "logps/chosen": -0.00032901056692935526, "logps/rejected": -1.9516682624816895, "loss": 1.4478, "nll_loss": 0.3619363605976105, "rewards/accuracies": 1.0, "rewards/chosen": -3.290105451014824e-05, "rewards/margins": 0.19513392448425293, "rewards/rejected": -0.19516682624816895, "step": 5103 }, { "epoch": 3.5297372060857537, "grad_norm": 12.956223487854004, "learning_rate": 3.5945904410634704e-05, "log_odds_chosen": 9.188024520874023, "log_odds_ratio": -0.004907457623630762, "logits/chosen": -0.9995465874671936, "logits/rejected": -1.0715291500091553, "logps/chosen": -0.0015922407619655132, "logps/rejected": -1.4828119277954102, "loss": 1.6385, "nll_loss": 0.40912729501724243, "rewards/accuracies": 1.0, "rewards/chosen": -0.00015922407328616828, "rewards/margins": 0.14812196791172028, "rewards/rejected": -0.14828118681907654, "step": 5104 }, { "epoch": 3.5304287690179805, "grad_norm": 7.3393025398254395, "learning_rate": 3.5942062394344556e-05, "log_odds_chosen": 7.815474033355713, "log_odds_ratio": -0.009405778720974922, "logits/chosen": -0.6533910036087036, "logits/rejected": -0.6671908497810364, "logps/chosen": -0.011968421749770641, "logps/rejected": -2.025322437286377, "loss": 1.7284, "nll_loss": 0.43116769194602966, "rewards/accuracies": 1.0, "rewards/chosen": -0.0011968421749770641, "rewards/margins": 0.20133540034294128, "rewards/rejected": -0.20253226161003113, "step": 5105 }, { "epoch": 3.5311203319502074, "grad_norm": 11.78327465057373, "learning_rate": 3.59382203780544e-05, "log_odds_chosen": 10.24453353881836, "log_odds_ratio": -8.006079588085413e-05, "logits/chosen": -0.7616961002349854, "logits/rejected": -0.8472107648849487, "logps/chosen": -0.0006323190173134208, "logps/rejected": -2.2396769523620605, "loss": 1.3703, "nll_loss": 0.3425724506378174, "rewards/accuracies": 1.0, "rewards/chosen": -6.323190609691665e-05, "rewards/margins": 0.22390446066856384, "rewards/rejected": -0.22396771609783173, "step": 5106 }, { "epoch": 3.531811894882434, "grad_norm": 10.847970962524414, "learning_rate": 3.5934378361764254e-05, "log_odds_chosen": 9.180673599243164, "log_odds_ratio": -0.043109308928251266, "logits/chosen": -1.0109539031982422, "logits/rejected": -1.0703439712524414, "logps/chosen": -0.016480334103107452, "logps/rejected": -1.9816501140594482, "loss": 1.8167, "nll_loss": 0.44986581802368164, "rewards/accuracies": 1.0, "rewards/chosen": -0.0016480335034430027, "rewards/margins": 0.1965169608592987, "rewards/rejected": -0.19816499948501587, "step": 5107 }, { "epoch": 3.532503457814661, "grad_norm": 10.240374565124512, "learning_rate": 3.5930536345474107e-05, "log_odds_chosen": 8.178666114807129, "log_odds_ratio": -0.006253361236304045, "logits/chosen": -0.5043379664421082, "logits/rejected": -0.5600912570953369, "logps/chosen": -0.01136441994458437, "logps/rejected": -1.7080068588256836, "loss": 1.5304, "nll_loss": 0.38196229934692383, "rewards/accuracies": 1.0, "rewards/chosen": -0.0011364419478923082, "rewards/margins": 0.16966424882411957, "rewards/rejected": -0.17080068588256836, "step": 5108 }, { "epoch": 3.533195020746888, "grad_norm": 8.874667167663574, "learning_rate": 3.592669432918396e-05, "log_odds_chosen": 9.3858642578125, "log_odds_ratio": -0.00019358650024514645, "logits/chosen": -0.4227873682975769, "logits/rejected": -0.4874667823314667, "logps/chosen": -0.001081955386325717, "logps/rejected": -2.3752799034118652, "loss": 1.5544, "nll_loss": 0.388569176197052, "rewards/accuracies": 1.0, "rewards/chosen": -0.00010819554154295474, "rewards/margins": 0.23741981387138367, "rewards/rejected": -0.2375280112028122, "step": 5109 }, { "epoch": 3.5338865836791147, "grad_norm": 14.556818962097168, "learning_rate": 3.5922852312893805e-05, "log_odds_chosen": 10.08336067199707, "log_odds_ratio": -8.728246029932052e-05, "logits/chosen": -0.7301170229911804, "logits/rejected": -0.7837525010108948, "logps/chosen": -0.0014646199997514486, "logps/rejected": -2.3359930515289307, "loss": 1.7196, "nll_loss": 0.429879754781723, "rewards/accuracies": 1.0, "rewards/chosen": -0.0001464620145270601, "rewards/margins": 0.23345284163951874, "rewards/rejected": -0.23359929025173187, "step": 5110 }, { "epoch": 3.5345781466113415, "grad_norm": 8.479273796081543, "learning_rate": 3.5919010296603664e-05, "log_odds_chosen": 8.859445571899414, "log_odds_ratio": -0.000794129678979516, "logits/chosen": -0.5082720518112183, "logits/rejected": -0.5971063375473022, "logps/chosen": -0.0007075598696246743, "logps/rejected": -1.2506650686264038, "loss": 1.7882, "nll_loss": 0.4469757676124573, "rewards/accuracies": 1.0, "rewards/chosen": -7.075598841765895e-05, "rewards/margins": 0.12499573826789856, "rewards/rejected": -0.12506650388240814, "step": 5111 }, { "epoch": 3.5352697095435683, "grad_norm": 9.707127571105957, "learning_rate": 3.591516828031351e-05, "log_odds_chosen": 9.019890785217285, "log_odds_ratio": -0.0011449077865108848, "logits/chosen": -0.4791383147239685, "logits/rejected": -0.4929508566856384, "logps/chosen": -0.02153146266937256, "logps/rejected": -1.8436179161071777, "loss": 1.6368, "nll_loss": 0.4090908467769623, "rewards/accuracies": 1.0, "rewards/chosen": -0.0021531463135033846, "rewards/margins": 0.18220864236354828, "rewards/rejected": -0.1843617856502533, "step": 5112 }, { "epoch": 3.535961272475795, "grad_norm": 7.732481002807617, "learning_rate": 3.591132626402336e-05, "log_odds_chosen": 6.761359691619873, "log_odds_ratio": -0.133843794465065, "logits/chosen": -0.1438131034374237, "logits/rejected": -0.19129794836044312, "logps/chosen": -0.02227877639234066, "logps/rejected": -0.9083170294761658, "loss": 2.0761, "nll_loss": 0.505638599395752, "rewards/accuracies": 0.875, "rewards/chosen": -0.00222787749953568, "rewards/margins": 0.08860382437705994, "rewards/rejected": -0.0908316969871521, "step": 5113 }, { "epoch": 3.536652835408022, "grad_norm": 11.014677047729492, "learning_rate": 3.5907484247733214e-05, "log_odds_chosen": 9.456984519958496, "log_odds_ratio": -0.0004772770043928176, "logits/chosen": -0.256796658039093, "logits/rejected": -0.3246535062789917, "logps/chosen": -0.006421164143830538, "logps/rejected": -2.324803352355957, "loss": 1.4841, "nll_loss": 0.37098228931427, "rewards/accuracies": 1.0, "rewards/chosen": -0.0006421164725907147, "rewards/margins": 0.23183822631835938, "rewards/rejected": -0.23248033225536346, "step": 5114 }, { "epoch": 3.537344398340249, "grad_norm": 8.51115894317627, "learning_rate": 3.590364223144306e-05, "log_odds_chosen": 9.182951927185059, "log_odds_ratio": -0.0006045700865797698, "logits/chosen": -0.43073856830596924, "logits/rejected": -0.5255957245826721, "logps/chosen": -0.016624998301267624, "logps/rejected": -2.1651108264923096, "loss": 1.2989, "nll_loss": 0.32465484738349915, "rewards/accuracies": 1.0, "rewards/chosen": -0.0016624998534098268, "rewards/margins": 0.2148485779762268, "rewards/rejected": -0.2165111005306244, "step": 5115 }, { "epoch": 3.5380359612724757, "grad_norm": 14.479390144348145, "learning_rate": 3.589980021515291e-05, "log_odds_chosen": 8.885518074035645, "log_odds_ratio": -0.006611211225390434, "logits/chosen": -0.7234517335891724, "logits/rejected": -0.751315712928772, "logps/chosen": -0.050526347011327744, "logps/rejected": -1.8813791275024414, "loss": 1.4642, "nll_loss": 0.3653944432735443, "rewards/accuracies": 1.0, "rewards/chosen": -0.005052634514868259, "rewards/margins": 0.18308529257774353, "rewards/rejected": -0.1881379336118698, "step": 5116 }, { "epoch": 3.5387275242047025, "grad_norm": 12.264790534973145, "learning_rate": 3.5895958198862765e-05, "log_odds_chosen": 8.67015266418457, "log_odds_ratio": -0.0009165530791506171, "logits/chosen": -0.7622514367103577, "logits/rejected": -0.8170218467712402, "logps/chosen": -0.01521426159888506, "logps/rejected": -1.7560782432556152, "loss": 2.9703, "nll_loss": 0.7424764633178711, "rewards/accuracies": 1.0, "rewards/chosen": -0.0015214262530207634, "rewards/margins": 0.17408640682697296, "rewards/rejected": -0.175607830286026, "step": 5117 }, { "epoch": 3.5394190871369293, "grad_norm": 4.114010810852051, "learning_rate": 3.589211618257262e-05, "log_odds_chosen": 7.7912750244140625, "log_odds_ratio": -0.0045229410752654076, "logits/chosen": -0.5681171417236328, "logits/rejected": -0.5650732517242432, "logps/chosen": -0.011351736262440681, "logps/rejected": -1.4451227188110352, "loss": 1.6012, "nll_loss": 0.39984703063964844, "rewards/accuracies": 1.0, "rewards/chosen": -0.0011351736029610038, "rewards/margins": 0.14337711036205292, "rewards/rejected": -0.14451228082180023, "step": 5118 }, { "epoch": 3.540110650069156, "grad_norm": 10.561309814453125, "learning_rate": 3.588827416628246e-05, "log_odds_chosen": 8.318758010864258, "log_odds_ratio": -0.23881548643112183, "logits/chosen": -0.27856093645095825, "logits/rejected": -0.3536270558834076, "logps/chosen": -0.04434996470808983, "logps/rejected": -1.96640944480896, "loss": 1.41, "nll_loss": 0.32863014936447144, "rewards/accuracies": 0.875, "rewards/chosen": -0.004434996284544468, "rewards/margins": 0.19220595061779022, "rewards/rejected": -0.1966409534215927, "step": 5119 }, { "epoch": 3.540802213001383, "grad_norm": 10.838645935058594, "learning_rate": 3.588443214999232e-05, "log_odds_chosen": 10.16311264038086, "log_odds_ratio": -0.00012633662845473737, "logits/chosen": -0.5232207775115967, "logits/rejected": -0.5633898973464966, "logps/chosen": -0.00033453942160122097, "logps/rejected": -2.079082489013672, "loss": 1.371, "nll_loss": 0.34272632002830505, "rewards/accuracies": 1.0, "rewards/chosen": -3.345394361531362e-05, "rewards/margins": 0.20787480473518372, "rewards/rejected": -0.2079082578420639, "step": 5120 }, { "epoch": 3.54149377593361, "grad_norm": 6.905520915985107, "learning_rate": 3.588059013370217e-05, "log_odds_chosen": 8.714499473571777, "log_odds_ratio": -0.0032024341635406017, "logits/chosen": -0.12589719891548157, "logits/rejected": -0.18861685693264008, "logps/chosen": -0.0030687712132930756, "logps/rejected": -1.5923564434051514, "loss": 1.1677, "nll_loss": 0.29160743951797485, "rewards/accuracies": 1.0, "rewards/chosen": -0.00030687713297083974, "rewards/margins": 0.158928781747818, "rewards/rejected": -0.1592356562614441, "step": 5121 }, { "epoch": 3.5421853388658366, "grad_norm": 8.405205726623535, "learning_rate": 3.587674811741202e-05, "log_odds_chosen": 8.597480773925781, "log_odds_ratio": -0.0008526691817678511, "logits/chosen": -0.4619186818599701, "logits/rejected": -0.5468069314956665, "logps/chosen": -0.0013205332215875387, "logps/rejected": -1.2612684965133667, "loss": 1.6883, "nll_loss": 0.42197930812835693, "rewards/accuracies": 1.0, "rewards/chosen": -0.00013205331924837083, "rewards/margins": 0.12599480152130127, "rewards/rejected": -0.12612685561180115, "step": 5122 }, { "epoch": 3.5428769017980635, "grad_norm": 9.318670272827148, "learning_rate": 3.587290610112187e-05, "log_odds_chosen": 6.977090358734131, "log_odds_ratio": -0.194001704454422, "logits/chosen": -0.2855735123157501, "logits/rejected": -0.33359235525131226, "logps/chosen": -0.06837824732065201, "logps/rejected": -1.256178855895996, "loss": 1.4145, "nll_loss": 0.33423250913619995, "rewards/accuracies": 0.875, "rewards/chosen": -0.006837825290858746, "rewards/margins": 0.11878006160259247, "rewards/rejected": -0.1256178915500641, "step": 5123 }, { "epoch": 3.5435684647302903, "grad_norm": 8.318540573120117, "learning_rate": 3.586906408483172e-05, "log_odds_chosen": 9.18307113647461, "log_odds_ratio": -0.0005332131404429674, "logits/chosen": 0.021685736253857613, "logits/rejected": -0.05196130648255348, "logps/chosen": -0.010211940854787827, "logps/rejected": -2.656543731689453, "loss": 1.3642, "nll_loss": 0.34099307656288147, "rewards/accuracies": 1.0, "rewards/chosen": -0.001021194038912654, "rewards/margins": 0.2646331787109375, "rewards/rejected": -0.26565438508987427, "step": 5124 }, { "epoch": 3.544260027662517, "grad_norm": 16.354997634887695, "learning_rate": 3.586522206854157e-05, "log_odds_chosen": 8.942000389099121, "log_odds_ratio": -0.015276739373803139, "logits/chosen": -0.3055412173271179, "logits/rejected": -0.3241763412952423, "logps/chosen": -0.011312441900372505, "logps/rejected": -2.309622287750244, "loss": 2.0613, "nll_loss": 0.5137892961502075, "rewards/accuracies": 1.0, "rewards/chosen": -0.0011312442366033792, "rewards/margins": 0.22983098030090332, "rewards/rejected": -0.23096221685409546, "step": 5125 }, { "epoch": 3.544951590594744, "grad_norm": 8.887134552001953, "learning_rate": 3.5861380052251423e-05, "log_odds_chosen": 10.164998054504395, "log_odds_ratio": -0.00015315644850488752, "logits/chosen": -0.24806389212608337, "logits/rejected": -0.3355085253715515, "logps/chosen": -0.0041093104518949986, "logps/rejected": -2.6971685886383057, "loss": 1.6534, "nll_loss": 0.41333335638046265, "rewards/accuracies": 1.0, "rewards/chosen": -0.0004109310102649033, "rewards/margins": 0.269305944442749, "rewards/rejected": -0.26971685886383057, "step": 5126 }, { "epoch": 3.545643153526971, "grad_norm": 7.772130489349365, "learning_rate": 3.5857538035961276e-05, "log_odds_chosen": 10.5562744140625, "log_odds_ratio": -7.292352529475465e-05, "logits/chosen": -0.6387104988098145, "logits/rejected": -0.6942251324653625, "logps/chosen": -0.0001461450883653015, "logps/rejected": -1.6912575960159302, "loss": 1.3432, "nll_loss": 0.33579444885253906, "rewards/accuracies": 1.0, "rewards/chosen": -1.4614510291721672e-05, "rewards/margins": 0.16911114752292633, "rewards/rejected": -0.1691257655620575, "step": 5127 }, { "epoch": 3.5463347164591976, "grad_norm": 11.181296348571777, "learning_rate": 3.585369601967112e-05, "log_odds_chosen": 9.056486129760742, "log_odds_ratio": -0.017171263694763184, "logits/chosen": -0.06890146434307098, "logits/rejected": -0.08440100401639938, "logps/chosen": -0.01895216852426529, "logps/rejected": -2.131861448287964, "loss": 2.0641, "nll_loss": 0.5143003463745117, "rewards/accuracies": 1.0, "rewards/chosen": -0.0018952169921249151, "rewards/margins": 0.21129092574119568, "rewards/rejected": -0.2131861448287964, "step": 5128 }, { "epoch": 3.5470262793914245, "grad_norm": 14.617752075195312, "learning_rate": 3.584985400338098e-05, "log_odds_chosen": 9.516202926635742, "log_odds_ratio": -0.0002230665850220248, "logits/chosen": -0.39011046290397644, "logits/rejected": -0.5370066165924072, "logps/chosen": -0.0019659469835460186, "logps/rejected": -2.265226364135742, "loss": 1.3383, "nll_loss": 0.3345586061477661, "rewards/accuracies": 1.0, "rewards/chosen": -0.0001965947012649849, "rewards/margins": 0.22632606327533722, "rewards/rejected": -0.22652265429496765, "step": 5129 }, { "epoch": 3.5477178423236513, "grad_norm": 8.726752281188965, "learning_rate": 3.5846011987090826e-05, "log_odds_chosen": 10.083745002746582, "log_odds_ratio": -9.456132829654962e-05, "logits/chosen": -0.736955463886261, "logits/rejected": -0.8158849477767944, "logps/chosen": -0.0003738811647053808, "logps/rejected": -1.9998457431793213, "loss": 1.202, "nll_loss": 0.30050134658813477, "rewards/accuracies": 1.0, "rewards/chosen": -3.7388119380921125e-05, "rewards/margins": 0.19994717836380005, "rewards/rejected": -0.1999845653772354, "step": 5130 }, { "epoch": 3.548409405255878, "grad_norm": 14.666512489318848, "learning_rate": 3.584216997080068e-05, "log_odds_chosen": 9.23165512084961, "log_odds_ratio": -0.002063015243038535, "logits/chosen": -0.5247082114219666, "logits/rejected": -0.5676276683807373, "logps/chosen": -0.020228393375873566, "logps/rejected": -2.463697671890259, "loss": 1.8779, "nll_loss": 0.4692714214324951, "rewards/accuracies": 1.0, "rewards/chosen": -0.0020228393841534853, "rewards/margins": 0.24434691667556763, "rewards/rejected": -0.24636974930763245, "step": 5131 }, { "epoch": 3.549100968188105, "grad_norm": 7.39258337020874, "learning_rate": 3.583832795451053e-05, "log_odds_chosen": 8.629104614257812, "log_odds_ratio": -0.005373574793338776, "logits/chosen": -0.44675952196121216, "logits/rejected": -0.4921689033508301, "logps/chosen": -0.008275295607745647, "logps/rejected": -2.400135040283203, "loss": 1.2359, "nll_loss": 0.3084494471549988, "rewards/accuracies": 1.0, "rewards/chosen": -0.00082752964226529, "rewards/margins": 0.23918597400188446, "rewards/rejected": -0.2400135099887848, "step": 5132 }, { "epoch": 3.5497925311203318, "grad_norm": 10.347201347351074, "learning_rate": 3.583448593822038e-05, "log_odds_chosen": 9.661993980407715, "log_odds_ratio": -0.00028375154943205416, "logits/chosen": -0.5061460733413696, "logits/rejected": -0.5724334716796875, "logps/chosen": -0.0007418065215460956, "logps/rejected": -1.877603530883789, "loss": 1.4939, "nll_loss": 0.373445987701416, "rewards/accuracies": 1.0, "rewards/chosen": -7.418065797537565e-05, "rewards/margins": 0.18768617510795593, "rewards/rejected": -0.1877603530883789, "step": 5133 }, { "epoch": 3.5504840940525586, "grad_norm": 7.307807445526123, "learning_rate": 3.583064392193023e-05, "log_odds_chosen": 9.501052856445312, "log_odds_ratio": -0.017502374947071075, "logits/chosen": -0.662177324295044, "logits/rejected": -0.6623290181159973, "logps/chosen": -0.00805945135653019, "logps/rejected": -1.526928186416626, "loss": 1.5985, "nll_loss": 0.3978814482688904, "rewards/accuracies": 1.0, "rewards/chosen": -0.000805945077445358, "rewards/margins": 0.15188688039779663, "rewards/rejected": -0.15269280970096588, "step": 5134 }, { "epoch": 3.5511756569847854, "grad_norm": 8.619414329528809, "learning_rate": 3.582680190564008e-05, "log_odds_chosen": 5.690260887145996, "log_odds_ratio": -0.12367193400859833, "logits/chosen": -0.5978754758834839, "logits/rejected": -0.6153057217597961, "logps/chosen": -0.041250962764024734, "logps/rejected": -1.0321168899536133, "loss": 2.0047, "nll_loss": 0.48880013823509216, "rewards/accuracies": 1.0, "rewards/chosen": -0.004125096369534731, "rewards/margins": 0.09908659756183624, "rewards/rejected": -0.10321169346570969, "step": 5135 }, { "epoch": 3.5518672199170123, "grad_norm": 11.130823135375977, "learning_rate": 3.5822959889349934e-05, "log_odds_chosen": 8.389826774597168, "log_odds_ratio": -0.11120603233575821, "logits/chosen": -0.821804940700531, "logits/rejected": -0.8347532749176025, "logps/chosen": -0.018633171916007996, "logps/rejected": -1.5349326133728027, "loss": 1.6964, "nll_loss": 0.41298729181289673, "rewards/accuracies": 0.875, "rewards/chosen": -0.0018633169820532203, "rewards/margins": 0.1516299545764923, "rewards/rejected": -0.1534932553768158, "step": 5136 }, { "epoch": 3.552558782849239, "grad_norm": 9.259598731994629, "learning_rate": 3.581911787305978e-05, "log_odds_chosen": 9.88913345336914, "log_odds_ratio": -0.006824263371527195, "logits/chosen": -0.684761106967926, "logits/rejected": -0.6901488900184631, "logps/chosen": -0.005095028318464756, "logps/rejected": -2.158032178878784, "loss": 1.3891, "nll_loss": 0.3465915620326996, "rewards/accuracies": 1.0, "rewards/chosen": -0.0005095028318464756, "rewards/margins": 0.21529372036457062, "rewards/rejected": -0.21580322086811066, "step": 5137 }, { "epoch": 3.553250345781466, "grad_norm": 14.307619094848633, "learning_rate": 3.581527585676964e-05, "log_odds_chosen": 8.201597213745117, "log_odds_ratio": -0.05911998078227043, "logits/chosen": -0.6577879190444946, "logits/rejected": -0.756862461566925, "logps/chosen": -0.012355667538940907, "logps/rejected": -1.847043514251709, "loss": 1.7893, "nll_loss": 0.44140928983688354, "rewards/accuracies": 1.0, "rewards/chosen": -0.0012355668004602194, "rewards/margins": 0.1834687888622284, "rewards/rejected": -0.18470436334609985, "step": 5138 }, { "epoch": 3.5539419087136928, "grad_norm": 8.32098388671875, "learning_rate": 3.5811433840479485e-05, "log_odds_chosen": 8.830156326293945, "log_odds_ratio": -0.036551717668771744, "logits/chosen": -0.6407181620597839, "logits/rejected": -0.6718611717224121, "logps/chosen": -0.016496429219841957, "logps/rejected": -2.304805040359497, "loss": 1.4094, "nll_loss": 0.348699152469635, "rewards/accuracies": 1.0, "rewards/chosen": -0.001649643061682582, "rewards/margins": 0.22883085906505585, "rewards/rejected": -0.23048050701618195, "step": 5139 }, { "epoch": 3.5546334716459196, "grad_norm": 26.237749099731445, "learning_rate": 3.580759182418934e-05, "log_odds_chosen": 9.193836212158203, "log_odds_ratio": -0.00029868149431422353, "logits/chosen": -0.5012434720993042, "logits/rejected": -0.5276903510093689, "logps/chosen": -0.008629154413938522, "logps/rejected": -2.401827573776245, "loss": 1.7928, "nll_loss": 0.44816479086875916, "rewards/accuracies": 1.0, "rewards/chosen": -0.0008629155345261097, "rewards/margins": 0.2393198311328888, "rewards/rejected": -0.24018274247646332, "step": 5140 }, { "epoch": 3.5553250345781464, "grad_norm": 10.040083885192871, "learning_rate": 3.580374980789919e-05, "log_odds_chosen": 8.394232749938965, "log_odds_ratio": -0.004813689272850752, "logits/chosen": -0.5562333464622498, "logits/rejected": -0.683615505695343, "logps/chosen": -0.0046744076535105705, "logps/rejected": -1.7374382019042969, "loss": 1.2966, "nll_loss": 0.323678582906723, "rewards/accuracies": 1.0, "rewards/chosen": -0.0004674407828133553, "rewards/margins": 0.1732763797044754, "rewards/rejected": -0.1737438142299652, "step": 5141 }, { "epoch": 3.5560165975103732, "grad_norm": 11.459824562072754, "learning_rate": 3.5799907791609035e-05, "log_odds_chosen": 8.987030982971191, "log_odds_ratio": -0.0007204932626336813, "logits/chosen": -0.7068724036216736, "logits/rejected": -0.6982411742210388, "logps/chosen": -0.0034352538641542196, "logps/rejected": -1.6314517259597778, "loss": 2.0082, "nll_loss": 0.5019901990890503, "rewards/accuracies": 1.0, "rewards/chosen": -0.0003435253456700593, "rewards/margins": 0.16280165314674377, "rewards/rejected": -0.16314518451690674, "step": 5142 }, { "epoch": 3.5567081604426, "grad_norm": 13.1761474609375, "learning_rate": 3.579606577531889e-05, "log_odds_chosen": 7.134213447570801, "log_odds_ratio": -0.07483165711164474, "logits/chosen": -0.6867763996124268, "logits/rejected": -0.5705421566963196, "logps/chosen": -0.08776207268238068, "logps/rejected": -1.3543072938919067, "loss": 1.5354, "nll_loss": 0.3763653039932251, "rewards/accuracies": 1.0, "rewards/chosen": -0.008776207454502583, "rewards/margins": 0.1266545206308365, "rewards/rejected": -0.1354307234287262, "step": 5143 }, { "epoch": 3.557399723374827, "grad_norm": 8.90782356262207, "learning_rate": 3.579222375902874e-05, "log_odds_chosen": 9.829313278198242, "log_odds_ratio": -0.00010671962081687525, "logits/chosen": -0.4369320273399353, "logits/rejected": -0.5152650475502014, "logps/chosen": -0.0003849728964269161, "logps/rejected": -1.4018714427947998, "loss": 2.5479, "nll_loss": 0.6369690299034119, "rewards/accuracies": 1.0, "rewards/chosen": -3.8497295463457704e-05, "rewards/margins": 0.14014865458011627, "rewards/rejected": -0.14018715918064117, "step": 5144 }, { "epoch": 3.5580912863070537, "grad_norm": 13.46090316772461, "learning_rate": 3.578838174273859e-05, "log_odds_chosen": 7.671215534210205, "log_odds_ratio": -0.21245524287223816, "logits/chosen": -0.6361114978790283, "logits/rejected": -0.6454745531082153, "logps/chosen": -0.029578909277915955, "logps/rejected": -1.829190969467163, "loss": 1.7448, "nll_loss": 0.4149635434150696, "rewards/accuracies": 0.875, "rewards/chosen": -0.002957891207188368, "rewards/margins": 0.1799612194299698, "rewards/rejected": -0.18291908502578735, "step": 5145 }, { "epoch": 3.5587828492392806, "grad_norm": 8.752176284790039, "learning_rate": 3.578453972644844e-05, "log_odds_chosen": 7.500901699066162, "log_odds_ratio": -0.014364222064614296, "logits/chosen": -0.6340488791465759, "logits/rejected": -0.5995294451713562, "logps/chosen": -0.027720659971237183, "logps/rejected": -1.7301421165466309, "loss": 1.8421, "nll_loss": 0.45909082889556885, "rewards/accuracies": 1.0, "rewards/chosen": -0.002772066043689847, "rewards/margins": 0.17024214565753937, "rewards/rejected": -0.17301422357559204, "step": 5146 }, { "epoch": 3.5594744121715074, "grad_norm": 12.609339714050293, "learning_rate": 3.57806977101583e-05, "log_odds_chosen": 9.670016288757324, "log_odds_ratio": -0.03675759211182594, "logits/chosen": -0.4056617319583893, "logits/rejected": -0.433749794960022, "logps/chosen": -0.009829879738390446, "logps/rejected": -2.218677520751953, "loss": 1.6728, "nll_loss": 0.41451871395111084, "rewards/accuracies": 1.0, "rewards/chosen": -0.0009829880436882377, "rewards/margins": 0.2208847850561142, "rewards/rejected": -0.22186775505542755, "step": 5147 }, { "epoch": 3.5601659751037342, "grad_norm": 11.2356595993042, "learning_rate": 3.577685569386814e-05, "log_odds_chosen": 9.592318534851074, "log_odds_ratio": -0.0001454985758755356, "logits/chosen": -0.5702686309814453, "logits/rejected": -0.7230437397956848, "logps/chosen": -0.0006389496265910566, "logps/rejected": -1.6784694194793701, "loss": 1.3064, "nll_loss": 0.326596200466156, "rewards/accuracies": 1.0, "rewards/chosen": -6.389496411429718e-05, "rewards/margins": 0.16778305172920227, "rewards/rejected": -0.1678469479084015, "step": 5148 }, { "epoch": 3.560857538035961, "grad_norm": 7.388978004455566, "learning_rate": 3.5773013677577996e-05, "log_odds_chosen": 8.739555358886719, "log_odds_ratio": -0.00031445466447621584, "logits/chosen": -0.7255094051361084, "logits/rejected": -0.6713707447052002, "logps/chosen": -0.0004937859484925866, "logps/rejected": -1.1414120197296143, "loss": 1.2413, "nll_loss": 0.3102880120277405, "rewards/accuracies": 1.0, "rewards/chosen": -4.937859557685442e-05, "rewards/margins": 0.11409182101488113, "rewards/rejected": -0.11414120346307755, "step": 5149 }, { "epoch": 3.561549100968188, "grad_norm": 9.674239158630371, "learning_rate": 3.576917166128785e-05, "log_odds_chosen": 9.749626159667969, "log_odds_ratio": -0.003634081920608878, "logits/chosen": -0.3100474774837494, "logits/rejected": -0.35088616609573364, "logps/chosen": -0.006417962722480297, "logps/rejected": -1.842191457748413, "loss": 1.4193, "nll_loss": 0.354466050863266, "rewards/accuracies": 1.0, "rewards/chosen": -0.0006417962722480297, "rewards/margins": 0.18357735872268677, "rewards/rejected": -0.1842191517353058, "step": 5150 }, { "epoch": 3.5622406639004147, "grad_norm": 13.221283912658691, "learning_rate": 3.5765329644997694e-05, "log_odds_chosen": 7.550267219543457, "log_odds_ratio": -0.1355026811361313, "logits/chosen": -0.592994749546051, "logits/rejected": -0.7096748352050781, "logps/chosen": -0.03942836821079254, "logps/rejected": -1.6778159141540527, "loss": 1.7986, "nll_loss": 0.43610402941703796, "rewards/accuracies": 0.875, "rewards/chosen": -0.003942836541682482, "rewards/margins": 0.16383875906467438, "rewards/rejected": -0.16778159141540527, "step": 5151 }, { "epoch": 3.5629322268326415, "grad_norm": 6.515930652618408, "learning_rate": 3.5761487628707546e-05, "log_odds_chosen": 8.004266738891602, "log_odds_ratio": -0.002393821021541953, "logits/chosen": -0.533973217010498, "logits/rejected": -0.6153483390808105, "logps/chosen": -0.02263014018535614, "logps/rejected": -1.9999516010284424, "loss": 1.2658, "nll_loss": 0.31619948148727417, "rewards/accuracies": 1.0, "rewards/chosen": -0.0022630139719694853, "rewards/margins": 0.1977321356534958, "rewards/rejected": -0.19999516010284424, "step": 5152 }, { "epoch": 3.5636237897648684, "grad_norm": 10.917157173156738, "learning_rate": 3.57576456124174e-05, "log_odds_chosen": 10.014253616333008, "log_odds_ratio": -7.547252607764676e-05, "logits/chosen": -0.6091383695602417, "logits/rejected": -0.6481549143791199, "logps/chosen": -0.0008129151538014412, "logps/rejected": -2.1516170501708984, "loss": 1.9984, "nll_loss": 0.49959611892700195, "rewards/accuracies": 1.0, "rewards/chosen": -8.129151683533564e-05, "rewards/margins": 0.21508042514324188, "rewards/rejected": -0.21516172587871552, "step": 5153 }, { "epoch": 3.564315352697095, "grad_norm": 5.88906192779541, "learning_rate": 3.575380359612725e-05, "log_odds_chosen": 8.342607498168945, "log_odds_ratio": -0.00917066354304552, "logits/chosen": -0.7415870428085327, "logits/rejected": -0.7444044947624207, "logps/chosen": -0.020944029092788696, "logps/rejected": -2.0400819778442383, "loss": 1.2578, "nll_loss": 0.31353580951690674, "rewards/accuracies": 1.0, "rewards/chosen": -0.002094402676448226, "rewards/margins": 0.2019137740135193, "rewards/rejected": -0.20400819182395935, "step": 5154 }, { "epoch": 3.565006915629322, "grad_norm": 9.904231071472168, "learning_rate": 3.57499615798371e-05, "log_odds_chosen": 8.931983947753906, "log_odds_ratio": -0.0025517181493341923, "logits/chosen": -0.21259820461273193, "logits/rejected": -0.3430328369140625, "logps/chosen": -0.01386320125311613, "logps/rejected": -2.474242687225342, "loss": 2.273, "nll_loss": 0.5680056810379028, "rewards/accuracies": 1.0, "rewards/chosen": -0.0013863201020285487, "rewards/margins": 0.24603793025016785, "rewards/rejected": -0.24742424488067627, "step": 5155 }, { "epoch": 3.565698478561549, "grad_norm": 8.710456848144531, "learning_rate": 3.5746119563546956e-05, "log_odds_chosen": 8.032377243041992, "log_odds_ratio": -0.027574822306632996, "logits/chosen": -0.6033883094787598, "logits/rejected": -0.6674955487251282, "logps/chosen": -0.018795570358633995, "logps/rejected": -2.009221315383911, "loss": 1.6532, "nll_loss": 0.41054731607437134, "rewards/accuracies": 1.0, "rewards/chosen": -0.0018795570358633995, "rewards/margins": 0.19904258847236633, "rewards/rejected": -0.2009221315383911, "step": 5156 }, { "epoch": 3.5663900414937757, "grad_norm": 11.037312507629395, "learning_rate": 3.57422775472568e-05, "log_odds_chosen": 7.575757026672363, "log_odds_ratio": -0.04499872028827667, "logits/chosen": -0.4731323719024658, "logits/rejected": -0.5606644153594971, "logps/chosen": -0.024044960737228394, "logps/rejected": -1.340688943862915, "loss": 2.1447, "nll_loss": 0.5316750407218933, "rewards/accuracies": 1.0, "rewards/chosen": -0.002404496306553483, "rewards/margins": 0.13166441023349762, "rewards/rejected": -0.13406890630722046, "step": 5157 }, { "epoch": 3.5670816044260025, "grad_norm": 6.969189167022705, "learning_rate": 3.5738435530966654e-05, "log_odds_chosen": 8.482478141784668, "log_odds_ratio": -0.04103895649313927, "logits/chosen": -0.3462011218070984, "logits/rejected": -0.3915109932422638, "logps/chosen": -0.00928918831050396, "logps/rejected": -1.1896979808807373, "loss": 1.4382, "nll_loss": 0.35545119643211365, "rewards/accuracies": 1.0, "rewards/chosen": -0.0009289190056733787, "rewards/margins": 0.11804087460041046, "rewards/rejected": -0.11896979808807373, "step": 5158 }, { "epoch": 3.5677731673582294, "grad_norm": 12.088088035583496, "learning_rate": 3.5734593514676507e-05, "log_odds_chosen": 9.807308197021484, "log_odds_ratio": -0.002077706390991807, "logits/chosen": -0.31680384278297424, "logits/rejected": -0.40869811177253723, "logps/chosen": -0.0016392001416534185, "logps/rejected": -2.416490077972412, "loss": 1.5806, "nll_loss": 0.3949546813964844, "rewards/accuracies": 1.0, "rewards/chosen": -0.00016392001998610795, "rewards/margins": 0.2414850890636444, "rewards/rejected": -0.24164903163909912, "step": 5159 }, { "epoch": 3.568464730290456, "grad_norm": 6.884808540344238, "learning_rate": 3.573075149838635e-05, "log_odds_chosen": 9.909006118774414, "log_odds_ratio": -0.0002003078261623159, "logits/chosen": -0.350724995136261, "logits/rejected": -0.4701218605041504, "logps/chosen": -0.0006381743587553501, "logps/rejected": -1.6747055053710938, "loss": 1.8905, "nll_loss": 0.4726088047027588, "rewards/accuracies": 1.0, "rewards/chosen": -6.381743878591806e-05, "rewards/margins": 0.16740673780441284, "rewards/rejected": -0.1674705445766449, "step": 5160 }, { "epoch": 3.569156293222683, "grad_norm": 9.014147758483887, "learning_rate": 3.5726909482096205e-05, "log_odds_chosen": 8.882246971130371, "log_odds_ratio": -0.0020518959499895573, "logits/chosen": -0.4286271333694458, "logits/rejected": -0.40721285343170166, "logps/chosen": -0.0014548527542501688, "logps/rejected": -1.1933650970458984, "loss": 1.1218, "nll_loss": 0.28023436665534973, "rewards/accuracies": 1.0, "rewards/chosen": -0.00014548527542501688, "rewards/margins": 0.11919102817773819, "rewards/rejected": -0.11933650076389313, "step": 5161 }, { "epoch": 3.56984785615491, "grad_norm": 7.56203031539917, "learning_rate": 3.572306746580606e-05, "log_odds_chosen": 9.380385398864746, "log_odds_ratio": -0.0003798382531385869, "logits/chosen": -0.7221666574478149, "logits/rejected": -0.7379388809204102, "logps/chosen": -0.004587444942444563, "logps/rejected": -1.950721263885498, "loss": 2.0756, "nll_loss": 0.5188639163970947, "rewards/accuracies": 1.0, "rewards/chosen": -0.0004587445582728833, "rewards/margins": 0.19461336731910706, "rewards/rejected": -0.19507211446762085, "step": 5162 }, { "epoch": 3.5705394190871367, "grad_norm": 7.279213905334473, "learning_rate": 3.571922544951591e-05, "log_odds_chosen": 9.02218246459961, "log_odds_ratio": -0.013631554320454597, "logits/chosen": -0.7051505446434021, "logits/rejected": -0.748414158821106, "logps/chosen": -0.011888135224580765, "logps/rejected": -1.3814345598220825, "loss": 1.6526, "nll_loss": 0.4117904007434845, "rewards/accuracies": 1.0, "rewards/chosen": -0.0011888135923072696, "rewards/margins": 0.1369546353816986, "rewards/rejected": -0.13814346492290497, "step": 5163 }, { "epoch": 3.5712309820193635, "grad_norm": 4.712618827819824, "learning_rate": 3.5715383433225755e-05, "log_odds_chosen": 7.798961162567139, "log_odds_ratio": -0.025595782324671745, "logits/chosen": -0.30597984790802, "logits/rejected": -0.3332618772983551, "logps/chosen": -0.027721570804715157, "logps/rejected": -1.0935512781143188, "loss": 2.3449, "nll_loss": 0.583656907081604, "rewards/accuracies": 1.0, "rewards/chosen": -0.0027721570804715157, "rewards/margins": 0.10658296942710876, "rewards/rejected": -0.10935512185096741, "step": 5164 }, { "epoch": 3.5719225449515903, "grad_norm": 12.295555114746094, "learning_rate": 3.5711541416935614e-05, "log_odds_chosen": 11.032272338867188, "log_odds_ratio": -3.914807894034311e-05, "logits/chosen": -0.5761994123458862, "logits/rejected": -0.5990482568740845, "logps/chosen": -0.00012401210551615804, "logps/rejected": -2.0997300148010254, "loss": 1.7092, "nll_loss": 0.4272838532924652, "rewards/accuracies": 1.0, "rewards/chosen": -1.2401211279211566e-05, "rewards/margins": 0.20996060967445374, "rewards/rejected": -0.20997300744056702, "step": 5165 }, { "epoch": 3.572614107883817, "grad_norm": 8.280593872070312, "learning_rate": 3.570769940064546e-05, "log_odds_chosen": 8.952764511108398, "log_odds_ratio": -0.0012806118465960026, "logits/chosen": -0.7560865879058838, "logits/rejected": -0.7571930885314941, "logps/chosen": -0.008908161893486977, "logps/rejected": -1.8132458925247192, "loss": 1.1701, "nll_loss": 0.2924080193042755, "rewards/accuracies": 1.0, "rewards/chosen": -0.000890816212631762, "rewards/margins": 0.18043377995491028, "rewards/rejected": -0.18132458627223969, "step": 5166 }, { "epoch": 3.573305670816044, "grad_norm": 7.02388334274292, "learning_rate": 3.570385738435531e-05, "log_odds_chosen": 9.885597229003906, "log_odds_ratio": -0.00014873781765345484, "logits/chosen": -0.8601112961769104, "logits/rejected": -0.8761740922927856, "logps/chosen": -0.011144283227622509, "logps/rejected": -2.309807300567627, "loss": 1.6112, "nll_loss": 0.4027857184410095, "rewards/accuracies": 1.0, "rewards/chosen": -0.0011144281597808003, "rewards/margins": 0.22986629605293274, "rewards/rejected": -0.2309807538986206, "step": 5167 }, { "epoch": 3.573997233748271, "grad_norm": 12.574030876159668, "learning_rate": 3.5700015368065165e-05, "log_odds_chosen": 7.35061502456665, "log_odds_ratio": -0.28171849250793457, "logits/chosen": -0.5025765299797058, "logits/rejected": -0.6474568843841553, "logps/chosen": -0.25245729088783264, "logps/rejected": -1.6816558837890625, "loss": 1.0523, "nll_loss": 0.23489251732826233, "rewards/accuracies": 0.875, "rewards/chosen": -0.025245727971196175, "rewards/margins": 0.1429198682308197, "rewards/rejected": -0.16816559433937073, "step": 5168 }, { "epoch": 3.5746887966804977, "grad_norm": 18.708730697631836, "learning_rate": 3.569617335177502e-05, "log_odds_chosen": 8.130278587341309, "log_odds_ratio": -0.2269182801246643, "logits/chosen": -0.7245622873306274, "logits/rejected": -0.7673302292823792, "logps/chosen": -0.040166158229112625, "logps/rejected": -1.931434154510498, "loss": 1.6429, "nll_loss": 0.38803112506866455, "rewards/accuracies": 0.875, "rewards/chosen": -0.00401661591604352, "rewards/margins": 0.18912678956985474, "rewards/rejected": -0.19314341247081757, "step": 5169 }, { "epoch": 3.5753803596127245, "grad_norm": 8.475849151611328, "learning_rate": 3.569233133548486e-05, "log_odds_chosen": 8.871143341064453, "log_odds_ratio": -0.0027840326074510813, "logits/chosen": -0.6744045615196228, "logits/rejected": -0.7654039263725281, "logps/chosen": -0.012963301502168179, "logps/rejected": -1.5083293914794922, "loss": 1.1291, "nll_loss": 0.2819991111755371, "rewards/accuracies": 1.0, "rewards/chosen": -0.0012963303597643971, "rewards/margins": 0.1495366096496582, "rewards/rejected": -0.15083295106887817, "step": 5170 }, { "epoch": 3.5760719225449513, "grad_norm": 8.500786781311035, "learning_rate": 3.5688489319194716e-05, "log_odds_chosen": 10.066465377807617, "log_odds_ratio": -0.00014326379459816962, "logits/chosen": -0.7963685989379883, "logits/rejected": -0.7845640182495117, "logps/chosen": -0.000356621399987489, "logps/rejected": -1.7721641063690186, "loss": 1.2365, "nll_loss": 0.30910173058509827, "rewards/accuracies": 1.0, "rewards/chosen": -3.5662138543557376e-05, "rewards/margins": 0.17718075215816498, "rewards/rejected": -0.17721642553806305, "step": 5171 }, { "epoch": 3.576763485477178, "grad_norm": 9.517539978027344, "learning_rate": 3.568464730290457e-05, "log_odds_chosen": 6.560201168060303, "log_odds_ratio": -0.23065565526485443, "logits/chosen": -0.6330443620681763, "logits/rejected": -0.6648932695388794, "logps/chosen": -0.07982344925403595, "logps/rejected": -1.2751703262329102, "loss": 1.9859, "nll_loss": 0.4733983278274536, "rewards/accuracies": 0.875, "rewards/chosen": -0.007982345297932625, "rewards/margins": 0.1195346862077713, "rewards/rejected": -0.12751702964305878, "step": 5172 }, { "epoch": 3.577455048409405, "grad_norm": 4.817649841308594, "learning_rate": 3.5680805286614414e-05, "log_odds_chosen": 7.474740982055664, "log_odds_ratio": -0.027025185525417328, "logits/chosen": -0.3730385899543762, "logits/rejected": -0.4131702780723572, "logps/chosen": -0.028401460498571396, "logps/rejected": -1.7878029346466064, "loss": 1.2991, "nll_loss": 0.3220648169517517, "rewards/accuracies": 1.0, "rewards/chosen": -0.0028401461895555258, "rewards/margins": 0.1759401559829712, "rewards/rejected": -0.17878031730651855, "step": 5173 }, { "epoch": 3.5781466113416323, "grad_norm": 11.787805557250977, "learning_rate": 3.5676963270324266e-05, "log_odds_chosen": 10.492147445678711, "log_odds_ratio": -0.0001426354137947783, "logits/chosen": -0.674610435962677, "logits/rejected": -0.6919329762458801, "logps/chosen": -0.021003739908337593, "logps/rejected": -2.5441362857818604, "loss": 1.5442, "nll_loss": 0.3860280215740204, "rewards/accuracies": 1.0, "rewards/chosen": -0.0021003740839660168, "rewards/margins": 0.2523132562637329, "rewards/rejected": -0.2544136345386505, "step": 5174 }, { "epoch": 3.578838174273859, "grad_norm": 8.707939147949219, "learning_rate": 3.567312125403412e-05, "log_odds_chosen": 9.505112648010254, "log_odds_ratio": -0.003918660804629326, "logits/chosen": -0.8012299537658691, "logits/rejected": -0.8023931980133057, "logps/chosen": -0.002562303561717272, "logps/rejected": -1.9537736177444458, "loss": 2.02, "nll_loss": 0.5045973062515259, "rewards/accuracies": 1.0, "rewards/chosen": -0.0002562303270678967, "rewards/margins": 0.19512110948562622, "rewards/rejected": -0.19537734985351562, "step": 5175 }, { "epoch": 3.579529737206086, "grad_norm": 13.434113502502441, "learning_rate": 3.566927923774397e-05, "log_odds_chosen": 8.59106159210205, "log_odds_ratio": -0.0021850857883691788, "logits/chosen": -1.0124863386154175, "logits/rejected": -1.021253228187561, "logps/chosen": -0.01776060089468956, "logps/rejected": -2.7843875885009766, "loss": 3.2201, "nll_loss": 0.8048138618469238, "rewards/accuracies": 1.0, "rewards/chosen": -0.0017760602058842778, "rewards/margins": 0.2766627073287964, "rewards/rejected": -0.2784387767314911, "step": 5176 }, { "epoch": 3.5802213001383127, "grad_norm": 8.495587348937988, "learning_rate": 3.566543722145382e-05, "log_odds_chosen": 10.054089546203613, "log_odds_ratio": -0.0004434631555341184, "logits/chosen": -0.5013400316238403, "logits/rejected": -0.5111294388771057, "logps/chosen": -0.00820872001349926, "logps/rejected": -2.961402416229248, "loss": 1.8074, "nll_loss": 0.45180925726890564, "rewards/accuracies": 1.0, "rewards/chosen": -0.0008208720246329904, "rewards/margins": 0.2953193783760071, "rewards/rejected": -0.29614022374153137, "step": 5177 }, { "epoch": 3.5809128630705396, "grad_norm": 9.217135429382324, "learning_rate": 3.5661595205163676e-05, "log_odds_chosen": 7.587440490722656, "log_odds_ratio": -0.014449968934059143, "logits/chosen": -0.6275132894515991, "logits/rejected": -0.712116003036499, "logps/chosen": -0.02014012075960636, "logps/rejected": -1.2985014915466309, "loss": 1.5584, "nll_loss": 0.38814911246299744, "rewards/accuracies": 1.0, "rewards/chosen": -0.002014012075960636, "rewards/margins": 0.12783613801002502, "rewards/rejected": -0.12985014915466309, "step": 5178 }, { "epoch": 3.5816044260027664, "grad_norm": 15.308435440063477, "learning_rate": 3.565775318887352e-05, "log_odds_chosen": 9.134074211120605, "log_odds_ratio": -0.005743531044572592, "logits/chosen": -0.60552978515625, "logits/rejected": -0.6348068118095398, "logps/chosen": -0.015729809179902077, "logps/rejected": -1.2693355083465576, "loss": 0.9502, "nll_loss": 0.23696690797805786, "rewards/accuracies": 1.0, "rewards/chosen": -0.0015729808947071433, "rewards/margins": 0.12536057829856873, "rewards/rejected": -0.12693354487419128, "step": 5179 }, { "epoch": 3.5822959889349932, "grad_norm": 8.912403106689453, "learning_rate": 3.5653911172583374e-05, "log_odds_chosen": 8.609804153442383, "log_odds_ratio": -0.11055989563465118, "logits/chosen": -0.7532698512077332, "logits/rejected": -0.8019864559173584, "logps/chosen": -0.018508533015847206, "logps/rejected": -1.8376288414001465, "loss": 1.2348, "nll_loss": 0.297635555267334, "rewards/accuracies": 0.875, "rewards/chosen": -0.001850853324867785, "rewards/margins": 0.18191203474998474, "rewards/rejected": -0.18376289308071136, "step": 5180 }, { "epoch": 3.58298755186722, "grad_norm": 8.784750938415527, "learning_rate": 3.5650069156293226e-05, "log_odds_chosen": 8.036988258361816, "log_odds_ratio": -0.029761577025055885, "logits/chosen": -0.4563424587249756, "logits/rejected": -0.49063539505004883, "logps/chosen": -0.02047664113342762, "logps/rejected": -2.1021976470947266, "loss": 1.8393, "nll_loss": 0.4568403959274292, "rewards/accuracies": 1.0, "rewards/chosen": -0.0020476640202105045, "rewards/margins": 0.20817212760448456, "rewards/rejected": -0.21021978557109833, "step": 5181 }, { "epoch": 3.583679114799447, "grad_norm": 9.938516616821289, "learning_rate": 3.564622714000307e-05, "log_odds_chosen": 9.53658390045166, "log_odds_ratio": -0.0001912098377943039, "logits/chosen": -0.029588311910629272, "logits/rejected": -0.11934809386730194, "logps/chosen": -0.0002886332804337144, "logps/rejected": -1.574800968170166, "loss": 1.7211, "nll_loss": 0.4302535057067871, "rewards/accuracies": 1.0, "rewards/chosen": -2.8863330953754485e-05, "rewards/margins": 0.15745124220848083, "rewards/rejected": -0.15748010575771332, "step": 5182 }, { "epoch": 3.5843706777316737, "grad_norm": 7.632718563079834, "learning_rate": 3.5642385123712925e-05, "log_odds_chosen": 8.880706787109375, "log_odds_ratio": -0.00037580274511128664, "logits/chosen": -0.3277152180671692, "logits/rejected": -0.35892271995544434, "logps/chosen": -0.0005073597421869636, "logps/rejected": -1.1650047302246094, "loss": 1.1231, "nll_loss": 0.2807457745075226, "rewards/accuracies": 1.0, "rewards/chosen": -5.073597276350483e-05, "rewards/margins": 0.116449736058712, "rewards/rejected": -0.11650047451257706, "step": 5183 }, { "epoch": 3.5850622406639006, "grad_norm": 5.33242130279541, "learning_rate": 3.563854310742278e-05, "log_odds_chosen": 8.373812675476074, "log_odds_ratio": -0.0011708845850080252, "logits/chosen": -0.35607287287712097, "logits/rejected": -0.37027066946029663, "logps/chosen": -0.012119665741920471, "logps/rejected": -1.9545071125030518, "loss": 1.3272, "nll_loss": 0.3316921591758728, "rewards/accuracies": 1.0, "rewards/chosen": -0.001211966504342854, "rewards/margins": 0.19423875212669373, "rewards/rejected": -0.19545072317123413, "step": 5184 }, { "epoch": 3.5857538035961274, "grad_norm": 8.848631858825684, "learning_rate": 3.563470109113263e-05, "log_odds_chosen": 9.168559074401855, "log_odds_ratio": -0.0005192652461118996, "logits/chosen": -0.4697961211204529, "logits/rejected": -0.5052551031112671, "logps/chosen": -0.014308737590909004, "logps/rejected": -2.15948486328125, "loss": 1.676, "nll_loss": 0.41894909739494324, "rewards/accuracies": 1.0, "rewards/chosen": -0.0014308736426755786, "rewards/margins": 0.21451762318611145, "rewards/rejected": -0.21594849228858948, "step": 5185 }, { "epoch": 3.586445366528354, "grad_norm": 11.535073280334473, "learning_rate": 3.5630859074842475e-05, "log_odds_chosen": 10.305524826049805, "log_odds_ratio": -0.00019394996343180537, "logits/chosen": -0.6018300652503967, "logits/rejected": -0.6703628301620483, "logps/chosen": -0.009994926862418652, "logps/rejected": -2.5256729125976562, "loss": 1.1935, "nll_loss": 0.29836463928222656, "rewards/accuracies": 1.0, "rewards/chosen": -0.0009994925931096077, "rewards/margins": 0.2515678107738495, "rewards/rejected": -0.252567321062088, "step": 5186 }, { "epoch": 3.587136929460581, "grad_norm": 10.138055801391602, "learning_rate": 3.5627017058552334e-05, "log_odds_chosen": 8.14724063873291, "log_odds_ratio": -0.0025975287426263094, "logits/chosen": -0.4650860130786896, "logits/rejected": -0.5495781898498535, "logps/chosen": -0.0034916566219180822, "logps/rejected": -1.3577914237976074, "loss": 2.3802, "nll_loss": 0.5947921276092529, "rewards/accuracies": 1.0, "rewards/chosen": -0.0003491656680125743, "rewards/margins": 0.1354299783706665, "rewards/rejected": -0.13577914237976074, "step": 5187 }, { "epoch": 3.587828492392808, "grad_norm": 16.81104278564453, "learning_rate": 3.562317504226218e-05, "log_odds_chosen": 10.003152847290039, "log_odds_ratio": -0.00014228149666450918, "logits/chosen": -0.4464438855648041, "logits/rejected": -0.4425196945667267, "logps/chosen": -0.00030187988886609674, "logps/rejected": -1.8005365133285522, "loss": 1.6632, "nll_loss": 0.4157760739326477, "rewards/accuracies": 1.0, "rewards/chosen": -3.0187989978003316e-05, "rewards/margins": 0.18002347648143768, "rewards/rejected": -0.18005365133285522, "step": 5188 }, { "epoch": 3.5885200553250347, "grad_norm": 10.235124588012695, "learning_rate": 3.561933302597203e-05, "log_odds_chosen": 7.02274751663208, "log_odds_ratio": -0.07414565235376358, "logits/chosen": -0.27639496326446533, "logits/rejected": -0.4222687780857086, "logps/chosen": -0.0243497546762228, "logps/rejected": -1.6426656246185303, "loss": 1.6987, "nll_loss": 0.41725045442581177, "rewards/accuracies": 1.0, "rewards/chosen": -0.00243497546762228, "rewards/margins": 0.1618315875530243, "rewards/rejected": -0.16426655650138855, "step": 5189 }, { "epoch": 3.5892116182572615, "grad_norm": 6.700727939605713, "learning_rate": 3.5615491009681885e-05, "log_odds_chosen": 9.764245986938477, "log_odds_ratio": -0.0013793071266263723, "logits/chosen": -0.613777220249176, "logits/rejected": -0.665103018283844, "logps/chosen": -0.00799685250967741, "logps/rejected": -2.3952579498291016, "loss": 0.9901, "nll_loss": 0.24738426506519318, "rewards/accuracies": 1.0, "rewards/chosen": -0.0007996853091754019, "rewards/margins": 0.23872610926628113, "rewards/rejected": -0.23952579498291016, "step": 5190 }, { "epoch": 3.5899031811894884, "grad_norm": 11.159008979797363, "learning_rate": 3.561164899339173e-05, "log_odds_chosen": 10.08697509765625, "log_odds_ratio": -0.0002770907594822347, "logits/chosen": -0.3040257692337036, "logits/rejected": -0.41321447491645813, "logps/chosen": -0.0003381900314707309, "logps/rejected": -2.081603765487671, "loss": 1.8172, "nll_loss": 0.4542771279811859, "rewards/accuracies": 1.0, "rewards/chosen": -3.381900387466885e-05, "rewards/margins": 0.20812655985355377, "rewards/rejected": -0.2081603854894638, "step": 5191 }, { "epoch": 3.590594744121715, "grad_norm": 78.92797088623047, "learning_rate": 3.560780697710158e-05, "log_odds_chosen": 7.735320091247559, "log_odds_ratio": -0.07346854358911514, "logits/chosen": -0.5990978479385376, "logits/rejected": -0.6643476486206055, "logps/chosen": -0.024174688383936882, "logps/rejected": -1.3395992517471313, "loss": 1.4912, "nll_loss": 0.3654648959636688, "rewards/accuracies": 1.0, "rewards/chosen": -0.0024174691643565893, "rewards/margins": 0.13154245913028717, "rewards/rejected": -0.13395991921424866, "step": 5192 }, { "epoch": 3.591286307053942, "grad_norm": 7.0599894523620605, "learning_rate": 3.5603964960811435e-05, "log_odds_chosen": 9.842082977294922, "log_odds_ratio": -0.00015221555077005178, "logits/chosen": -0.6480206847190857, "logits/rejected": -0.6932371854782104, "logps/chosen": -0.00041077513014897704, "logps/rejected": -1.7274799346923828, "loss": 1.8491, "nll_loss": 0.46225303411483765, "rewards/accuracies": 1.0, "rewards/chosen": -4.1077513742493466e-05, "rewards/margins": 0.17270690202713013, "rewards/rejected": -0.17274799942970276, "step": 5193 }, { "epoch": 3.591977869986169, "grad_norm": 15.730473518371582, "learning_rate": 3.560012294452129e-05, "log_odds_chosen": 9.617203712463379, "log_odds_ratio": -0.00021409033797681332, "logits/chosen": -0.3583557605743408, "logits/rejected": -0.3977838158607483, "logps/chosen": -0.016692141070961952, "logps/rejected": -2.3281161785125732, "loss": 1.7002, "nll_loss": 0.42503270506858826, "rewards/accuracies": 1.0, "rewards/chosen": -0.0016692141070961952, "rewards/margins": 0.23114240169525146, "rewards/rejected": -0.23281162977218628, "step": 5194 }, { "epoch": 3.5926694329183957, "grad_norm": 9.38475513458252, "learning_rate": 3.5596280928231134e-05, "log_odds_chosen": 9.190800666809082, "log_odds_ratio": -0.00075269874650985, "logits/chosen": -0.16665798425674438, "logits/rejected": -0.24333110451698303, "logps/chosen": -0.0010805196361616254, "logps/rejected": -1.378718614578247, "loss": 1.3852, "nll_loss": 0.34622547030448914, "rewards/accuracies": 1.0, "rewards/chosen": -0.00010805197234731168, "rewards/margins": 0.1377638280391693, "rewards/rejected": -0.1378718763589859, "step": 5195 }, { "epoch": 3.5933609958506225, "grad_norm": 13.01176643371582, "learning_rate": 3.559243891194099e-05, "log_odds_chosen": 9.629497528076172, "log_odds_ratio": -0.00012551844702102244, "logits/chosen": -0.9238946437835693, "logits/rejected": -0.981550931930542, "logps/chosen": -0.00043221822124905884, "logps/rejected": -1.55594801902771, "loss": 1.9092, "nll_loss": 0.47729602456092834, "rewards/accuracies": 1.0, "rewards/chosen": -4.322181848692708e-05, "rewards/margins": 0.15555158257484436, "rewards/rejected": -0.1555948108434677, "step": 5196 }, { "epoch": 3.5940525587828493, "grad_norm": 7.340988636016846, "learning_rate": 3.558859689565084e-05, "log_odds_chosen": 10.411932945251465, "log_odds_ratio": -5.617938222712837e-05, "logits/chosen": -0.36234623193740845, "logits/rejected": -0.5597435235977173, "logps/chosen": -0.0002136369002982974, "logps/rejected": -1.9466426372528076, "loss": 1.1771, "nll_loss": 0.29427260160446167, "rewards/accuracies": 1.0, "rewards/chosen": -2.1363688574638218e-05, "rewards/margins": 0.19464290142059326, "rewards/rejected": -0.19466425478458405, "step": 5197 }, { "epoch": 3.594744121715076, "grad_norm": 7.5101165771484375, "learning_rate": 3.558475487936069e-05, "log_odds_chosen": 7.9836931228637695, "log_odds_ratio": -0.057652927935123444, "logits/chosen": -0.5086445808410645, "logits/rejected": -0.5536125898361206, "logps/chosen": -0.01708623394370079, "logps/rejected": -1.378343939781189, "loss": 1.357, "nll_loss": 0.3334919214248657, "rewards/accuracies": 1.0, "rewards/chosen": -0.0017086233710870147, "rewards/margins": 0.13612577319145203, "rewards/rejected": -0.13783439993858337, "step": 5198 }, { "epoch": 3.595435684647303, "grad_norm": 4.695312023162842, "learning_rate": 3.558091286307054e-05, "log_odds_chosen": 7.252886772155762, "log_odds_ratio": -0.1333327293395996, "logits/chosen": -0.6493362188339233, "logits/rejected": -0.6358364224433899, "logps/chosen": -0.04882507771253586, "logps/rejected": -1.5421638488769531, "loss": 2.5656, "nll_loss": 0.6280553340911865, "rewards/accuracies": 0.875, "rewards/chosen": -0.004882507957518101, "rewards/margins": 0.1493338793516159, "rewards/rejected": -0.15421637892723083, "step": 5199 }, { "epoch": 3.59612724757953, "grad_norm": 5.692283630371094, "learning_rate": 3.557707084678039e-05, "log_odds_chosen": 9.078126907348633, "log_odds_ratio": -0.0004336358979344368, "logits/chosen": -0.31813618540763855, "logits/rejected": -0.34279268980026245, "logps/chosen": -0.014234584756195545, "logps/rejected": -2.1005630493164062, "loss": 1.0926, "nll_loss": 0.2731185555458069, "rewards/accuracies": 1.0, "rewards/chosen": -0.001423458568751812, "rewards/margins": 0.20863284170627594, "rewards/rejected": -0.21005630493164062, "step": 5200 }, { "epoch": 3.5968188105117567, "grad_norm": 7.402063846588135, "learning_rate": 3.557322883049024e-05, "log_odds_chosen": 8.764384269714355, "log_odds_ratio": -0.00031936116283759475, "logits/chosen": -0.19075855612754822, "logits/rejected": -0.2277907282114029, "logps/chosen": -0.026667606085538864, "logps/rejected": -1.5372223854064941, "loss": 1.4802, "nll_loss": 0.37002283334732056, "rewards/accuracies": 1.0, "rewards/chosen": -0.002666760701686144, "rewards/margins": 0.1510554850101471, "rewards/rejected": -0.15372224152088165, "step": 5201 }, { "epoch": 3.5975103734439835, "grad_norm": 9.261101722717285, "learning_rate": 3.5569386814200094e-05, "log_odds_chosen": 8.320323944091797, "log_odds_ratio": -0.05530842766165733, "logits/chosen": -0.4870779514312744, "logits/rejected": -0.517996609210968, "logps/chosen": -0.013912579044699669, "logps/rejected": -1.4187254905700684, "loss": 2.2164, "nll_loss": 0.5485726594924927, "rewards/accuracies": 1.0, "rewards/chosen": -0.0013912580907344818, "rewards/margins": 0.14048129320144653, "rewards/rejected": -0.14187254011631012, "step": 5202 }, { "epoch": 3.5982019363762103, "grad_norm": 7.648223876953125, "learning_rate": 3.5565544797909946e-05, "log_odds_chosen": 8.643014907836914, "log_odds_ratio": -0.0318199060857296, "logits/chosen": -0.31153562664985657, "logits/rejected": -0.3480609953403473, "logps/chosen": -0.03326771408319473, "logps/rejected": -1.7915986776351929, "loss": 1.8682, "nll_loss": 0.46386802196502686, "rewards/accuracies": 1.0, "rewards/chosen": -0.0033267715480178595, "rewards/margins": 0.1758331060409546, "rewards/rejected": -0.17915986478328705, "step": 5203 }, { "epoch": 3.598893499308437, "grad_norm": 9.878567695617676, "learning_rate": 3.556170278161979e-05, "log_odds_chosen": 9.812541961669922, "log_odds_ratio": -0.0006717155338265002, "logits/chosen": -0.5299800038337708, "logits/rejected": -0.5992415547370911, "logps/chosen": -0.0010979081271216273, "logps/rejected": -1.979628562927246, "loss": 1.4765, "nll_loss": 0.36904868483543396, "rewards/accuracies": 1.0, "rewards/chosen": -0.00010979082435369492, "rewards/margins": 0.19785307347774506, "rewards/rejected": -0.19796285033226013, "step": 5204 }, { "epoch": 3.599585062240664, "grad_norm": 9.53571891784668, "learning_rate": 3.555786076532965e-05, "log_odds_chosen": 8.218473434448242, "log_odds_ratio": -0.0811346173286438, "logits/chosen": -0.4749361276626587, "logits/rejected": -0.4611550271511078, "logps/chosen": -0.020573316141963005, "logps/rejected": -1.3474071025848389, "loss": 1.7931, "nll_loss": 0.44016969203948975, "rewards/accuracies": 1.0, "rewards/chosen": -0.002057331847026944, "rewards/margins": 0.1326833963394165, "rewards/rejected": -0.13474072515964508, "step": 5205 }, { "epoch": 3.600276625172891, "grad_norm": 6.642927169799805, "learning_rate": 3.55540187490395e-05, "log_odds_chosen": 9.684572219848633, "log_odds_ratio": -0.00022037234157323837, "logits/chosen": -0.1952618807554245, "logits/rejected": -0.258468896150589, "logps/chosen": -0.0021308918949216604, "logps/rejected": -1.52579665184021, "loss": 1.2332, "nll_loss": 0.30827754735946655, "rewards/accuracies": 1.0, "rewards/chosen": -0.00021308919531293213, "rewards/margins": 0.15236657857894897, "rewards/rejected": -0.152579665184021, "step": 5206 }, { "epoch": 3.6009681881051177, "grad_norm": 7.0721354484558105, "learning_rate": 3.555017673274935e-05, "log_odds_chosen": 7.419760704040527, "log_odds_ratio": -0.055805519223213196, "logits/chosen": -0.44695115089416504, "logits/rejected": -0.4739919900894165, "logps/chosen": -0.016496581956744194, "logps/rejected": -1.4811910390853882, "loss": 2.2693, "nll_loss": 0.5617491006851196, "rewards/accuracies": 1.0, "rewards/chosen": -0.0016496581956744194, "rewards/margins": 0.14646944403648376, "rewards/rejected": -0.14811909198760986, "step": 5207 }, { "epoch": 3.6016597510373445, "grad_norm": 8.998069763183594, "learning_rate": 3.55463347164592e-05, "log_odds_chosen": 8.890933990478516, "log_odds_ratio": -0.0005221288884058595, "logits/chosen": -0.19365093111991882, "logits/rejected": -0.2810370922088623, "logps/chosen": -0.005299925804138184, "logps/rejected": -2.2653985023498535, "loss": 1.4267, "nll_loss": 0.3566116690635681, "rewards/accuracies": 1.0, "rewards/chosen": -0.0005299926269799471, "rewards/margins": 0.22600987553596497, "rewards/rejected": -0.22653988003730774, "step": 5208 }, { "epoch": 3.6023513139695713, "grad_norm": 7.603548526763916, "learning_rate": 3.554249270016905e-05, "log_odds_chosen": 9.299885749816895, "log_odds_ratio": -0.0002216776047134772, "logits/chosen": -0.6699154376983643, "logits/rejected": -0.6954845190048218, "logps/chosen": -0.005512827541679144, "logps/rejected": -1.7632887363433838, "loss": 1.0745, "nll_loss": 0.2686063051223755, "rewards/accuracies": 1.0, "rewards/chosen": -0.0005512827192433178, "rewards/margins": 0.1757775992155075, "rewards/rejected": -0.1763288676738739, "step": 5209 }, { "epoch": 3.603042876901798, "grad_norm": 6.092649459838867, "learning_rate": 3.55386506838789e-05, "log_odds_chosen": 9.46326732635498, "log_odds_ratio": -0.000469283084385097, "logits/chosen": -0.451174795627594, "logits/rejected": -0.49339890480041504, "logps/chosen": -0.01303679309785366, "logps/rejected": -2.3563642501831055, "loss": 1.1468, "nll_loss": 0.2866421937942505, "rewards/accuracies": 1.0, "rewards/chosen": -0.001303679309785366, "rewards/margins": 0.23433274030685425, "rewards/rejected": -0.23563642799854279, "step": 5210 }, { "epoch": 3.603734439834025, "grad_norm": 9.824409484863281, "learning_rate": 3.553480866758875e-05, "log_odds_chosen": 8.063066482543945, "log_odds_ratio": -0.20818378031253815, "logits/chosen": -0.4809816777706146, "logits/rejected": -0.559431791305542, "logps/chosen": -0.03620007634162903, "logps/rejected": -1.4488892555236816, "loss": 1.9775, "nll_loss": 0.47356724739074707, "rewards/accuracies": 0.875, "rewards/chosen": -0.003620007773861289, "rewards/margins": 0.14126893877983093, "rewards/rejected": -0.14488893747329712, "step": 5211 }, { "epoch": 3.604426002766252, "grad_norm": 12.211295127868652, "learning_rate": 3.5530966651298605e-05, "log_odds_chosen": 10.524577140808105, "log_odds_ratio": -0.00020419417705852538, "logits/chosen": -0.7545992732048035, "logits/rejected": -0.7831495404243469, "logps/chosen": -0.0003767163143493235, "logps/rejected": -2.2208974361419678, "loss": 1.0508, "nll_loss": 0.26268112659454346, "rewards/accuracies": 1.0, "rewards/chosen": -3.76716343453154e-05, "rewards/margins": 0.22205206751823425, "rewards/rejected": -0.2220897376537323, "step": 5212 }, { "epoch": 3.6051175656984786, "grad_norm": 11.500664710998535, "learning_rate": 3.552712463500845e-05, "log_odds_chosen": 10.186233520507812, "log_odds_ratio": -0.00036904169246554375, "logits/chosen": -0.3501139283180237, "logits/rejected": -0.40883195400238037, "logps/chosen": -0.0018149593379348516, "logps/rejected": -2.5266244411468506, "loss": 1.5078, "nll_loss": 0.3769240379333496, "rewards/accuracies": 1.0, "rewards/chosen": -0.00018149595416616648, "rewards/margins": 0.25248095393180847, "rewards/rejected": -0.25266245007514954, "step": 5213 }, { "epoch": 3.6058091286307055, "grad_norm": 10.881603240966797, "learning_rate": 3.552328261871831e-05, "log_odds_chosen": 8.526883125305176, "log_odds_ratio": -0.09655605256557465, "logits/chosen": -0.8187621235847473, "logits/rejected": -0.8889845609664917, "logps/chosen": -0.04067990556359291, "logps/rejected": -2.1676435470581055, "loss": 2.1095, "nll_loss": 0.5177196860313416, "rewards/accuracies": 1.0, "rewards/chosen": -0.004067990463227034, "rewards/margins": 0.2126963585615158, "rewards/rejected": -0.21676437556743622, "step": 5214 }, { "epoch": 3.6065006915629323, "grad_norm": 8.708210945129395, "learning_rate": 3.5519440602428155e-05, "log_odds_chosen": 8.359586715698242, "log_odds_ratio": -0.07479312270879745, "logits/chosen": -0.8053072690963745, "logits/rejected": -0.8438823223114014, "logps/chosen": -0.016664860770106316, "logps/rejected": -1.6616318225860596, "loss": 1.4285, "nll_loss": 0.3496565520763397, "rewards/accuracies": 1.0, "rewards/chosen": -0.0016664863796904683, "rewards/margins": 0.16449669003486633, "rewards/rejected": -0.16616317629814148, "step": 5215 }, { "epoch": 3.607192254495159, "grad_norm": 9.633548736572266, "learning_rate": 3.551559858613801e-05, "log_odds_chosen": 9.497386932373047, "log_odds_ratio": -0.0003051602398045361, "logits/chosen": -0.6601248979568481, "logits/rejected": -0.6196205615997314, "logps/chosen": -0.0005067433230578899, "logps/rejected": -1.4025245904922485, "loss": 1.5811, "nll_loss": 0.3952542543411255, "rewards/accuracies": 1.0, "rewards/chosen": -5.067433085059747e-05, "rewards/margins": 0.14020179212093353, "rewards/rejected": -0.1402524709701538, "step": 5216 }, { "epoch": 3.607883817427386, "grad_norm": 7.4141716957092285, "learning_rate": 3.551175656984786e-05, "log_odds_chosen": 8.719673156738281, "log_odds_ratio": -0.0004461368080228567, "logits/chosen": -0.41223183274269104, "logits/rejected": -0.47403812408447266, "logps/chosen": -0.014796635136008263, "logps/rejected": -2.066044330596924, "loss": 1.7922, "nll_loss": 0.44800156354904175, "rewards/accuracies": 1.0, "rewards/chosen": -0.0014796635368838906, "rewards/margins": 0.20512478053569794, "rewards/rejected": -0.20660445094108582, "step": 5217 }, { "epoch": 3.608575380359613, "grad_norm": 10.772750854492188, "learning_rate": 3.5507914553557706e-05, "log_odds_chosen": 9.217242240905762, "log_odds_ratio": -0.0053271041251719, "logits/chosen": -0.23242275416851044, "logits/rejected": -0.3318224549293518, "logps/chosen": -0.0030850740149617195, "logps/rejected": -2.3515217304229736, "loss": 1.584, "nll_loss": 0.39547258615493774, "rewards/accuracies": 1.0, "rewards/chosen": -0.00030850741313770413, "rewards/margins": 0.2348436862230301, "rewards/rejected": -0.23515218496322632, "step": 5218 }, { "epoch": 3.6092669432918396, "grad_norm": 9.194649696350098, "learning_rate": 3.550407253726756e-05, "log_odds_chosen": 8.674663543701172, "log_odds_ratio": -0.04212072864174843, "logits/chosen": -0.5076473951339722, "logits/rejected": -0.5080469846725464, "logps/chosen": -0.009754427708685398, "logps/rejected": -1.1838669776916504, "loss": 1.5066, "nll_loss": 0.37244829535484314, "rewards/accuracies": 1.0, "rewards/chosen": -0.0009754427592270076, "rewards/margins": 0.11741125583648682, "rewards/rejected": -0.11838670819997787, "step": 5219 }, { "epoch": 3.6099585062240664, "grad_norm": 12.188793182373047, "learning_rate": 3.550023052097741e-05, "log_odds_chosen": 9.73184585571289, "log_odds_ratio": -0.00016213285562116653, "logits/chosen": -0.8976345062255859, "logits/rejected": -0.9340048432350159, "logps/chosen": -0.0012246439000591636, "logps/rejected": -2.3440892696380615, "loss": 1.3086, "nll_loss": 0.32713812589645386, "rewards/accuracies": 1.0, "rewards/chosen": -0.0001224643929162994, "rewards/margins": 0.23428647220134735, "rewards/rejected": -0.2344089150428772, "step": 5220 }, { "epoch": 3.6106500691562933, "grad_norm": 11.67767333984375, "learning_rate": 3.549638850468726e-05, "log_odds_chosen": 8.549223899841309, "log_odds_ratio": -0.0044251237995922565, "logits/chosen": -0.6429064869880676, "logits/rejected": -0.7014204263687134, "logps/chosen": -0.017330633476376534, "logps/rejected": -1.9335042238235474, "loss": 1.4883, "nll_loss": 0.37162601947784424, "rewards/accuracies": 1.0, "rewards/chosen": -0.0017330633709207177, "rewards/margins": 0.19161736965179443, "rewards/rejected": -0.1933504343032837, "step": 5221 }, { "epoch": 3.61134163208852, "grad_norm": 15.276002883911133, "learning_rate": 3.549254648839711e-05, "log_odds_chosen": 9.483627319335938, "log_odds_ratio": -0.0002317847975064069, "logits/chosen": -0.7881807088851929, "logits/rejected": -0.8484748601913452, "logps/chosen": -0.0012060196604579687, "logps/rejected": -1.873993992805481, "loss": 1.9021, "nll_loss": 0.47550061345100403, "rewards/accuracies": 1.0, "rewards/chosen": -0.00012060196604579687, "rewards/margins": 0.18727880716323853, "rewards/rejected": -0.18739941716194153, "step": 5222 }, { "epoch": 3.612033195020747, "grad_norm": 14.034067153930664, "learning_rate": 3.548870447210697e-05, "log_odds_chosen": 9.489564895629883, "log_odds_ratio": -0.032893918454647064, "logits/chosen": -0.5653495192527771, "logits/rejected": -0.628448486328125, "logps/chosen": -0.015954216942191124, "logps/rejected": -2.7742607593536377, "loss": 1.327, "nll_loss": 0.3284568786621094, "rewards/accuracies": 1.0, "rewards/chosen": -0.0015954216942191124, "rewards/margins": 0.2758306562900543, "rewards/rejected": -0.2774260938167572, "step": 5223 }, { "epoch": 3.6127247579529738, "grad_norm": 8.176433563232422, "learning_rate": 3.5484862455816814e-05, "log_odds_chosen": 9.06335163116455, "log_odds_ratio": -0.0018982174806296825, "logits/chosen": -0.772789478302002, "logits/rejected": -0.796208918094635, "logps/chosen": -0.00784214586019516, "logps/rejected": -2.7062885761260986, "loss": 1.3409, "nll_loss": 0.3350370228290558, "rewards/accuracies": 1.0, "rewards/chosen": -0.0007842145278118551, "rewards/margins": 0.269844651222229, "rewards/rejected": -0.27062883973121643, "step": 5224 }, { "epoch": 3.6134163208852006, "grad_norm": 9.80726432800293, "learning_rate": 3.5481020439526666e-05, "log_odds_chosen": 9.160164833068848, "log_odds_ratio": -0.009143702685832977, "logits/chosen": -0.5025213360786438, "logits/rejected": -0.5475513339042664, "logps/chosen": -0.005097491666674614, "logps/rejected": -2.0189099311828613, "loss": 1.7398, "nll_loss": 0.434039831161499, "rewards/accuracies": 1.0, "rewards/chosen": -0.0005097491666674614, "rewards/margins": 0.20138123631477356, "rewards/rejected": -0.2018910050392151, "step": 5225 }, { "epoch": 3.6141078838174274, "grad_norm": 6.498498439788818, "learning_rate": 3.547717842323652e-05, "log_odds_chosen": 7.767198085784912, "log_odds_ratio": -0.1166754812002182, "logits/chosen": -0.8081409931182861, "logits/rejected": -0.8334828019142151, "logps/chosen": -0.06212611868977547, "logps/rejected": -2.2269105911254883, "loss": 1.6677, "nll_loss": 0.40525153279304504, "rewards/accuracies": 0.875, "rewards/chosen": -0.006212612148374319, "rewards/margins": 0.21647846698760986, "rewards/rejected": -0.22269108891487122, "step": 5226 }, { "epoch": 3.6147994467496543, "grad_norm": 14.081502914428711, "learning_rate": 3.5473336406946364e-05, "log_odds_chosen": 7.013472557067871, "log_odds_ratio": -0.17359904944896698, "logits/chosen": -0.37669873237609863, "logits/rejected": -0.42986077070236206, "logps/chosen": -0.042637161910533905, "logps/rejected": -1.6250157356262207, "loss": 1.7087, "nll_loss": 0.40981000661849976, "rewards/accuracies": 0.875, "rewards/chosen": -0.004263716284185648, "rewards/margins": 0.15823784470558167, "rewards/rejected": -0.16250157356262207, "step": 5227 }, { "epoch": 3.615491009681881, "grad_norm": 4.868067741394043, "learning_rate": 3.546949439065622e-05, "log_odds_chosen": 7.783168315887451, "log_odds_ratio": -0.004499551374465227, "logits/chosen": -0.5527982115745544, "logits/rejected": -0.4671393632888794, "logps/chosen": -0.0024018839467316866, "logps/rejected": -1.0713505744934082, "loss": 1.163, "nll_loss": 0.2903061509132385, "rewards/accuracies": 1.0, "rewards/chosen": -0.0002401883975835517, "rewards/margins": 0.10689487308263779, "rewards/rejected": -0.10713505744934082, "step": 5228 }, { "epoch": 3.616182572614108, "grad_norm": 11.57120132446289, "learning_rate": 3.546565237436607e-05, "log_odds_chosen": 10.204926490783691, "log_odds_ratio": -0.00031510682310909033, "logits/chosen": -0.5665751695632935, "logits/rejected": -0.5830574631690979, "logps/chosen": -0.0030491678044199944, "logps/rejected": -2.6864333152770996, "loss": 1.0716, "nll_loss": 0.2678784132003784, "rewards/accuracies": 1.0, "rewards/chosen": -0.00030491675715893507, "rewards/margins": 0.2683384418487549, "rewards/rejected": -0.2686433494091034, "step": 5229 }, { "epoch": 3.6168741355463347, "grad_norm": 8.560798645019531, "learning_rate": 3.546181035807592e-05, "log_odds_chosen": 10.29986572265625, "log_odds_ratio": -0.00015330745372921228, "logits/chosen": -0.4275868833065033, "logits/rejected": -0.5136326551437378, "logps/chosen": -0.0002663989725988358, "logps/rejected": -1.7132883071899414, "loss": 1.699, "nll_loss": 0.4247225522994995, "rewards/accuracies": 1.0, "rewards/chosen": -2.663989653228782e-05, "rewards/margins": 0.1713021993637085, "rewards/rejected": -0.1713288277387619, "step": 5230 }, { "epoch": 3.6175656984785616, "grad_norm": 10.029023170471191, "learning_rate": 3.545796834178577e-05, "log_odds_chosen": 10.008489608764648, "log_odds_ratio": -6.896184640936553e-05, "logits/chosen": -0.5981601476669312, "logits/rejected": -0.662560224533081, "logps/chosen": -0.0005962676950730383, "logps/rejected": -1.9099280834197998, "loss": 1.2771, "nll_loss": 0.3192793130874634, "rewards/accuracies": 1.0, "rewards/chosen": -5.962677096249536e-05, "rewards/margins": 0.19093316793441772, "rewards/rejected": -0.1909928023815155, "step": 5231 }, { "epoch": 3.6182572614107884, "grad_norm": 6.729194164276123, "learning_rate": 3.5454126325495627e-05, "log_odds_chosen": 10.444304466247559, "log_odds_ratio": -7.478394400095567e-05, "logits/chosen": -0.43716686964035034, "logits/rejected": -0.47095102071762085, "logps/chosen": -0.00014576371177099645, "logps/rejected": -1.8478639125823975, "loss": 1.0408, "nll_loss": 0.2601904571056366, "rewards/accuracies": 1.0, "rewards/chosen": -1.4576370631402824e-05, "rewards/margins": 0.1847718209028244, "rewards/rejected": -0.18478639423847198, "step": 5232 }, { "epoch": 3.6189488243430152, "grad_norm": 11.273608207702637, "learning_rate": 3.545028430920547e-05, "log_odds_chosen": 9.06591510772705, "log_odds_ratio": -0.0006158847245387733, "logits/chosen": -0.8914425373077393, "logits/rejected": -1.0343084335327148, "logps/chosen": -0.0025831114035099745, "logps/rejected": -1.8664512634277344, "loss": 2.1863, "nll_loss": 0.5465248227119446, "rewards/accuracies": 1.0, "rewards/chosen": -0.0002583111636340618, "rewards/margins": 0.1863868236541748, "rewards/rejected": -0.18664513528347015, "step": 5233 }, { "epoch": 3.619640387275242, "grad_norm": 8.003049850463867, "learning_rate": 3.5446442292915325e-05, "log_odds_chosen": 10.12773609161377, "log_odds_ratio": -5.926351514062844e-05, "logits/chosen": -0.47422224283218384, "logits/rejected": -0.4392775595188141, "logps/chosen": -0.0071992347948253155, "logps/rejected": -2.150129556655884, "loss": 1.1627, "nll_loss": 0.290669322013855, "rewards/accuracies": 1.0, "rewards/chosen": -0.0007199235260486603, "rewards/margins": 0.21429303288459778, "rewards/rejected": -0.21501296758651733, "step": 5234 }, { "epoch": 3.620331950207469, "grad_norm": 10.644028663635254, "learning_rate": 3.544260027662518e-05, "log_odds_chosen": 11.33029556274414, "log_odds_ratio": -1.8505686966818757e-05, "logits/chosen": -0.7966725826263428, "logits/rejected": -0.8608373403549194, "logps/chosen": -0.00024159994791261852, "logps/rejected": -2.4138331413269043, "loss": 1.5136, "nll_loss": 0.3784021735191345, "rewards/accuracies": 1.0, "rewards/chosen": -2.4159995518857613e-05, "rewards/margins": 0.241359144449234, "rewards/rejected": -0.24138331413269043, "step": 5235 }, { "epoch": 3.6210235131396957, "grad_norm": 16.928823471069336, "learning_rate": 3.543875826033502e-05, "log_odds_chosen": 8.554088592529297, "log_odds_ratio": -0.027092065662145615, "logits/chosen": -0.6856877207756042, "logits/rejected": -0.7653172612190247, "logps/chosen": -0.000812489481177181, "logps/rejected": -1.5144953727722168, "loss": 1.3546, "nll_loss": 0.33592864871025085, "rewards/accuracies": 1.0, "rewards/chosen": -8.124895248329267e-05, "rewards/margins": 0.15136829018592834, "rewards/rejected": -0.1514495313167572, "step": 5236 }, { "epoch": 3.6217150760719226, "grad_norm": 16.311784744262695, "learning_rate": 3.5434916244044875e-05, "log_odds_chosen": 9.777567863464355, "log_odds_ratio": -0.0001523627433925867, "logits/chosen": -0.7406394481658936, "logits/rejected": -0.7905520796775818, "logps/chosen": -0.0004889132105745375, "logps/rejected": -1.948346734046936, "loss": 2.7937, "nll_loss": 0.6984192132949829, "rewards/accuracies": 1.0, "rewards/chosen": -4.889132105745375e-05, "rewards/margins": 0.19478577375411987, "rewards/rejected": -0.19483467936515808, "step": 5237 }, { "epoch": 3.6224066390041494, "grad_norm": 12.5172119140625, "learning_rate": 3.543107422775473e-05, "log_odds_chosen": 9.594234466552734, "log_odds_ratio": -9.091549145523459e-05, "logits/chosen": -0.7145823240280151, "logits/rejected": -0.7423162460327148, "logps/chosen": -0.00032870128052309155, "logps/rejected": -1.639130711555481, "loss": 1.4622, "nll_loss": 0.365535706281662, "rewards/accuracies": 1.0, "rewards/chosen": -3.2870128052309155e-05, "rewards/margins": 0.16388019919395447, "rewards/rejected": -0.1639130711555481, "step": 5238 }, { "epoch": 3.623098201936376, "grad_norm": 9.18506145477295, "learning_rate": 3.542723221146458e-05, "log_odds_chosen": 9.633953094482422, "log_odds_ratio": -0.00023023865651339293, "logits/chosen": -0.5641602277755737, "logits/rejected": -0.6728044152259827, "logps/chosen": -0.0016441429033875465, "logps/rejected": -1.8026257753372192, "loss": 1.459, "nll_loss": 0.3647145628929138, "rewards/accuracies": 1.0, "rewards/chosen": -0.00016441429033875465, "rewards/margins": 0.18009814620018005, "rewards/rejected": -0.18026258051395416, "step": 5239 }, { "epoch": 3.623789764868603, "grad_norm": 8.023653030395508, "learning_rate": 3.5423390195174426e-05, "log_odds_chosen": 10.032447814941406, "log_odds_ratio": -0.00021187691891100258, "logits/chosen": -0.5241720676422119, "logits/rejected": -0.5224738121032715, "logps/chosen": -0.0022757581900805235, "logps/rejected": -2.386683225631714, "loss": 1.8749, "nll_loss": 0.4687134027481079, "rewards/accuracies": 1.0, "rewards/chosen": -0.00022757584520149976, "rewards/margins": 0.23844075202941895, "rewards/rejected": -0.2386683225631714, "step": 5240 }, { "epoch": 3.62448132780083, "grad_norm": 10.000246047973633, "learning_rate": 3.5419548178884285e-05, "log_odds_chosen": 10.252166748046875, "log_odds_ratio": -0.00010917196050286293, "logits/chosen": -0.5360872149467468, "logits/rejected": -0.5863335132598877, "logps/chosen": -0.00992940180003643, "logps/rejected": -2.6332054138183594, "loss": 2.0352, "nll_loss": 0.5087817907333374, "rewards/accuracies": 1.0, "rewards/chosen": -0.0009929401567205787, "rewards/margins": 0.2623276114463806, "rewards/rejected": -0.26332053542137146, "step": 5241 }, { "epoch": 3.6251728907330567, "grad_norm": 118.457763671875, "learning_rate": 3.541570616259413e-05, "log_odds_chosen": 8.625368118286133, "log_odds_ratio": -0.1461210548877716, "logits/chosen": -0.7309638857841492, "logits/rejected": -0.7860568165779114, "logps/chosen": -0.004215087275952101, "logps/rejected": -1.4883962869644165, "loss": 1.5265, "nll_loss": 0.3670060336589813, "rewards/accuracies": 0.875, "rewards/chosen": -0.00042150873923674226, "rewards/margins": 0.148418128490448, "rewards/rejected": -0.14883963763713837, "step": 5242 }, { "epoch": 3.6258644536652835, "grad_norm": 17.151451110839844, "learning_rate": 3.541186414630398e-05, "log_odds_chosen": 9.89073657989502, "log_odds_ratio": -0.0001892504806164652, "logits/chosen": -0.41966360807418823, "logits/rejected": -0.434902161359787, "logps/chosen": -0.0042398408986628056, "logps/rejected": -2.0369925498962402, "loss": 1.3297, "nll_loss": 0.3324141800403595, "rewards/accuracies": 1.0, "rewards/chosen": -0.00042398410732857883, "rewards/margins": 0.20327524840831757, "rewards/rejected": -0.2036992460489273, "step": 5243 }, { "epoch": 3.6265560165975104, "grad_norm": 8.913837432861328, "learning_rate": 3.5408022130013836e-05, "log_odds_chosen": 10.53475570678711, "log_odds_ratio": -7.836183067411184e-05, "logits/chosen": -0.4833153486251831, "logits/rejected": -0.5408565998077393, "logps/chosen": -0.0027675952296704054, "logps/rejected": -2.565074920654297, "loss": 1.3758, "nll_loss": 0.34394752979278564, "rewards/accuracies": 1.0, "rewards/chosen": -0.00027675952878780663, "rewards/margins": 0.25623074173927307, "rewards/rejected": -0.2565074861049652, "step": 5244 }, { "epoch": 3.627247579529737, "grad_norm": 9.715182304382324, "learning_rate": 3.540418011372368e-05, "log_odds_chosen": 10.130058288574219, "log_odds_ratio": -7.551023736596107e-05, "logits/chosen": -0.8627445697784424, "logits/rejected": -0.9407069087028503, "logps/chosen": -0.00020197762933094054, "logps/rejected": -1.7689321041107178, "loss": 1.3774, "nll_loss": 0.3443450331687927, "rewards/accuracies": 1.0, "rewards/chosen": -2.0197761841700412e-05, "rewards/margins": 0.17687302827835083, "rewards/rejected": -0.1768932342529297, "step": 5245 }, { "epoch": 3.627939142461964, "grad_norm": 9.587575912475586, "learning_rate": 3.5400338097433534e-05, "log_odds_chosen": 8.340509414672852, "log_odds_ratio": -0.11384254693984985, "logits/chosen": -0.6631177663803101, "logits/rejected": -0.7274695634841919, "logps/chosen": -0.026078490540385246, "logps/rejected": -1.0123172998428345, "loss": 1.9941, "nll_loss": 0.4871327877044678, "rewards/accuracies": 0.875, "rewards/chosen": -0.002607849193736911, "rewards/margins": 0.09862387180328369, "rewards/rejected": -0.10123172402381897, "step": 5246 }, { "epoch": 3.628630705394191, "grad_norm": 11.814026832580566, "learning_rate": 3.5396496081143386e-05, "log_odds_chosen": 8.81454849243164, "log_odds_ratio": -0.00029346495284698904, "logits/chosen": -0.5719044804573059, "logits/rejected": -0.6320205330848694, "logps/chosen": -0.0002783064846880734, "logps/rejected": -0.9145975112915039, "loss": 2.0516, "nll_loss": 0.5128742456436157, "rewards/accuracies": 1.0, "rewards/chosen": -2.783064883260522e-05, "rewards/margins": 0.09143192321062088, "rewards/rejected": -0.0914597436785698, "step": 5247 }, { "epoch": 3.6293222683264177, "grad_norm": 9.833765983581543, "learning_rate": 3.539265406485324e-05, "log_odds_chosen": 9.416544914245605, "log_odds_ratio": -0.00028793158708140254, "logits/chosen": -0.49396389722824097, "logits/rejected": -0.5152664184570312, "logps/chosen": -0.00047146857832558453, "logps/rejected": -1.8146495819091797, "loss": 1.5623, "nll_loss": 0.39054417610168457, "rewards/accuracies": 1.0, "rewards/chosen": -4.714686292572878e-05, "rewards/margins": 0.1814178228378296, "rewards/rejected": -0.18146497011184692, "step": 5248 }, { "epoch": 3.6300138312586445, "grad_norm": 10.226951599121094, "learning_rate": 3.5388812048563084e-05, "log_odds_chosen": 8.603385925292969, "log_odds_ratio": -0.0006840950809419155, "logits/chosen": -0.2722872793674469, "logits/rejected": -0.33161452412605286, "logps/chosen": -0.0016118658240884542, "logps/rejected": -1.2224658727645874, "loss": 1.727, "nll_loss": 0.43167710304260254, "rewards/accuracies": 1.0, "rewards/chosen": -0.00016118655912578106, "rewards/margins": 0.12208539992570877, "rewards/rejected": -0.12224658578634262, "step": 5249 }, { "epoch": 3.6307053941908713, "grad_norm": 8.28382396697998, "learning_rate": 3.5384970032272943e-05, "log_odds_chosen": 9.789243698120117, "log_odds_ratio": -0.0009390619234181941, "logits/chosen": -0.7724297046661377, "logits/rejected": -0.782783031463623, "logps/chosen": -0.0007653178181499243, "logps/rejected": -1.5597801208496094, "loss": 1.3569, "nll_loss": 0.33912599086761475, "rewards/accuracies": 1.0, "rewards/chosen": -7.653178181499243e-05, "rewards/margins": 0.1559014618396759, "rewards/rejected": -0.1559779942035675, "step": 5250 }, { "epoch": 3.631396957123098, "grad_norm": 6.966404914855957, "learning_rate": 3.538112801598279e-05, "log_odds_chosen": 10.608278274536133, "log_odds_ratio": -8.463065751129761e-05, "logits/chosen": -0.28429800271987915, "logits/rejected": -0.3238435387611389, "logps/chosen": -0.00016617128858342767, "logps/rejected": -2.1215450763702393, "loss": 1.1423, "nll_loss": 0.2855673134326935, "rewards/accuracies": 1.0, "rewards/chosen": -1.661713031353429e-05, "rewards/margins": 0.21213790774345398, "rewards/rejected": -0.21215450763702393, "step": 5251 }, { "epoch": 3.632088520055325, "grad_norm": 7.324312210083008, "learning_rate": 3.537728599969264e-05, "log_odds_chosen": 10.257405281066895, "log_odds_ratio": -7.262609869940206e-05, "logits/chosen": -0.4230521321296692, "logits/rejected": -0.45840829610824585, "logps/chosen": -0.009859025478363037, "logps/rejected": -2.127150535583496, "loss": 1.0872, "nll_loss": 0.27178269624710083, "rewards/accuracies": 1.0, "rewards/chosen": -0.0009859026176854968, "rewards/margins": 0.21172913908958435, "rewards/rejected": -0.2127150595188141, "step": 5252 }, { "epoch": 3.632780082987552, "grad_norm": 7.544483184814453, "learning_rate": 3.5373443983402494e-05, "log_odds_chosen": 9.489943504333496, "log_odds_ratio": -0.0001858835166785866, "logits/chosen": -0.4171960949897766, "logits/rejected": -0.4828304052352905, "logps/chosen": -0.0004456111346371472, "logps/rejected": -1.377410888671875, "loss": 1.0404, "nll_loss": 0.26008322834968567, "rewards/accuracies": 1.0, "rewards/chosen": -4.456111491890624e-05, "rewards/margins": 0.1376965343952179, "rewards/rejected": -0.1377410888671875, "step": 5253 }, { "epoch": 3.6334716459197787, "grad_norm": 12.763961791992188, "learning_rate": 3.536960196711234e-05, "log_odds_chosen": 10.788370132446289, "log_odds_ratio": -3.103794006165117e-05, "logits/chosen": -0.693131148815155, "logits/rejected": -0.8062441945075989, "logps/chosen": -0.0004897600738331676, "logps/rejected": -2.7318973541259766, "loss": 1.6658, "nll_loss": 0.41644513607025146, "rewards/accuracies": 1.0, "rewards/chosen": -4.897600592812523e-05, "rewards/margins": 0.2731407880783081, "rewards/rejected": -0.2731897532939911, "step": 5254 }, { "epoch": 3.6341632088520055, "grad_norm": 6.80309534072876, "learning_rate": 3.536575995082219e-05, "log_odds_chosen": 10.249557495117188, "log_odds_ratio": -0.00013425902579911053, "logits/chosen": -0.42770153284072876, "logits/rejected": -0.4392857551574707, "logps/chosen": -0.00031385323381982744, "logps/rejected": -1.693392038345337, "loss": 1.2865, "nll_loss": 0.32160404324531555, "rewards/accuracies": 1.0, "rewards/chosen": -3.138531974400394e-05, "rewards/margins": 0.16930781304836273, "rewards/rejected": -0.16933920979499817, "step": 5255 }, { "epoch": 3.6348547717842323, "grad_norm": 19.48051643371582, "learning_rate": 3.5361917934532045e-05, "log_odds_chosen": 9.017301559448242, "log_odds_ratio": -0.013373545370995998, "logits/chosen": -0.5854345560073853, "logits/rejected": -0.5446378588676453, "logps/chosen": -0.04659513384103775, "logps/rejected": -2.5437159538269043, "loss": 2.0333, "nll_loss": 0.5069827437400818, "rewards/accuracies": 1.0, "rewards/chosen": -0.00465951394289732, "rewards/margins": 0.24971207976341248, "rewards/rejected": -0.25437161326408386, "step": 5256 }, { "epoch": 3.635546334716459, "grad_norm": 15.161421775817871, "learning_rate": 3.53580759182419e-05, "log_odds_chosen": 8.954034805297852, "log_odds_ratio": -0.00230272114276886, "logits/chosen": -0.5699727535247803, "logits/rejected": -0.5900068283081055, "logps/chosen": -0.00815976969897747, "logps/rejected": -1.8102946281433105, "loss": 2.0104, "nll_loss": 0.5023807287216187, "rewards/accuracies": 1.0, "rewards/chosen": -0.0008159770513884723, "rewards/margins": 0.18021351099014282, "rewards/rejected": -0.18102948367595673, "step": 5257 }, { "epoch": 3.636237897648686, "grad_norm": 14.364946365356445, "learning_rate": 3.535423390195174e-05, "log_odds_chosen": 8.934064865112305, "log_odds_ratio": -0.00522011611610651, "logits/chosen": -0.47142425179481506, "logits/rejected": -0.5534060597419739, "logps/chosen": -0.01788080856204033, "logps/rejected": -2.318324089050293, "loss": 1.8643, "nll_loss": 0.46554601192474365, "rewards/accuracies": 1.0, "rewards/chosen": -0.0017880809027701616, "rewards/margins": 0.23004432022571564, "rewards/rejected": -0.23183239996433258, "step": 5258 }, { "epoch": 3.636929460580913, "grad_norm": 4.957000255584717, "learning_rate": 3.53503918856616e-05, "log_odds_chosen": 9.00587272644043, "log_odds_ratio": -0.03742096573114395, "logits/chosen": -0.2998158037662506, "logits/rejected": -0.34346118569374084, "logps/chosen": -0.01705634780228138, "logps/rejected": -1.776115894317627, "loss": 1.3161, "nll_loss": 0.3252926766872406, "rewards/accuracies": 1.0, "rewards/chosen": -0.0017056346405297518, "rewards/margins": 0.17590594291687012, "rewards/rejected": -0.1776115894317627, "step": 5259 }, { "epoch": 3.6376210235131397, "grad_norm": 7.9335808753967285, "learning_rate": 3.534654986937145e-05, "log_odds_chosen": 9.278167724609375, "log_odds_ratio": -0.00018974630802404135, "logits/chosen": -0.33290767669677734, "logits/rejected": -0.3825133144855499, "logps/chosen": -0.005465247668325901, "logps/rejected": -1.8166536092758179, "loss": 1.3369, "nll_loss": 0.33420246839523315, "rewards/accuracies": 1.0, "rewards/chosen": -0.000546524825040251, "rewards/margins": 0.18111884593963623, "rewards/rejected": -0.1816653609275818, "step": 5260 }, { "epoch": 3.6383125864453665, "grad_norm": 5.281066417694092, "learning_rate": 3.53427078530813e-05, "log_odds_chosen": 10.223286628723145, "log_odds_ratio": -8.183442696463317e-05, "logits/chosen": -0.5100609660148621, "logits/rejected": -0.5995121002197266, "logps/chosen": -0.0002764645905699581, "logps/rejected": -1.9925496578216553, "loss": 1.8219, "nll_loss": 0.45547908544540405, "rewards/accuracies": 1.0, "rewards/chosen": -2.764645978459157e-05, "rewards/margins": 0.19922731816768646, "rewards/rejected": -0.19925497472286224, "step": 5261 }, { "epoch": 3.6390041493775933, "grad_norm": 12.383803367614746, "learning_rate": 3.533886583679115e-05, "log_odds_chosen": 11.8496732711792, "log_odds_ratio": -1.212448114529252e-05, "logits/chosen": -0.6695747971534729, "logits/rejected": -0.7234416007995605, "logps/chosen": -0.0002846869465429336, "logps/rejected": -3.2199788093566895, "loss": 1.571, "nll_loss": 0.3927599787712097, "rewards/accuracies": 1.0, "rewards/chosen": -2.8468695745687e-05, "rewards/margins": 0.3219693899154663, "rewards/rejected": -0.32199788093566895, "step": 5262 }, { "epoch": 3.63969571230982, "grad_norm": 8.492502212524414, "learning_rate": 3.5335023820501e-05, "log_odds_chosen": 8.009116172790527, "log_odds_ratio": -0.0038440132047981024, "logits/chosen": -0.8346610069274902, "logits/rejected": -0.8341476321220398, "logps/chosen": -0.0035278652794659138, "logps/rejected": -1.413739800453186, "loss": 1.9024, "nll_loss": 0.47520437836647034, "rewards/accuracies": 1.0, "rewards/chosen": -0.00035278653376735747, "rewards/margins": 0.14102119207382202, "rewards/rejected": -0.14137396216392517, "step": 5263 }, { "epoch": 3.640387275242047, "grad_norm": 7.315945148468018, "learning_rate": 3.533118180421085e-05, "log_odds_chosen": 10.510017395019531, "log_odds_ratio": -4.9991445848718286e-05, "logits/chosen": -0.5463863611221313, "logits/rejected": -0.6001462936401367, "logps/chosen": -0.0001689201162662357, "logps/rejected": -1.7600739002227783, "loss": 1.3571, "nll_loss": 0.3392818868160248, "rewards/accuracies": 1.0, "rewards/chosen": -1.6892012354219332e-05, "rewards/margins": 0.1759905070066452, "rewards/rejected": -0.17600739002227783, "step": 5264 }, { "epoch": 3.641078838174274, "grad_norm": 13.64138126373291, "learning_rate": 3.53273397879207e-05, "log_odds_chosen": 9.468687057495117, "log_odds_ratio": -0.00028739694971591234, "logits/chosen": -0.36536845564842224, "logits/rejected": -0.46932682394981384, "logps/chosen": -0.009552651084959507, "logps/rejected": -2.0713751316070557, "loss": 1.5895, "nll_loss": 0.39733579754829407, "rewards/accuracies": 1.0, "rewards/chosen": -0.000955265189986676, "rewards/margins": 0.20618225634098053, "rewards/rejected": -0.20713752508163452, "step": 5265 }, { "epoch": 3.6417704011065006, "grad_norm": 6.013581275939941, "learning_rate": 3.5323497771630555e-05, "log_odds_chosen": 10.099183082580566, "log_odds_ratio": -0.00015212551807053387, "logits/chosen": -0.6626241207122803, "logits/rejected": -0.7370970249176025, "logps/chosen": -0.0008881157846190035, "logps/rejected": -2.455702781677246, "loss": 1.099, "nll_loss": 0.2747448682785034, "rewards/accuracies": 1.0, "rewards/chosen": -8.881157555151731e-05, "rewards/margins": 0.2454814463853836, "rewards/rejected": -0.24557027220726013, "step": 5266 }, { "epoch": 3.6424619640387275, "grad_norm": 17.372495651245117, "learning_rate": 3.53196557553404e-05, "log_odds_chosen": 11.318941116333008, "log_odds_ratio": -2.0537449017865583e-05, "logits/chosen": -0.6090898513793945, "logits/rejected": -0.7666717171669006, "logps/chosen": -0.0004227885219734162, "logps/rejected": -3.2152180671691895, "loss": 1.64, "nll_loss": 0.4099968671798706, "rewards/accuracies": 1.0, "rewards/chosen": -4.22788507421501e-05, "rewards/margins": 0.32147955894470215, "rewards/rejected": -0.3215217888355255, "step": 5267 }, { "epoch": 3.6431535269709543, "grad_norm": 7.159913063049316, "learning_rate": 3.531581373905026e-05, "log_odds_chosen": 7.538683891296387, "log_odds_ratio": -0.08233918249607086, "logits/chosen": -0.4028078615665436, "logits/rejected": -0.41910237073898315, "logps/chosen": -0.02370680682361126, "logps/rejected": -1.4719009399414062, "loss": 0.9625, "nll_loss": 0.23239757120609283, "rewards/accuracies": 1.0, "rewards/chosen": -0.0023706809151917696, "rewards/margins": 0.14481940865516663, "rewards/rejected": -0.14719009399414062, "step": 5268 }, { "epoch": 3.643845089903181, "grad_norm": 11.902508735656738, "learning_rate": 3.5311971722760106e-05, "log_odds_chosen": 10.27293872833252, "log_odds_ratio": -0.005822064820677042, "logits/chosen": -0.9338794946670532, "logits/rejected": -1.0018417835235596, "logps/chosen": -0.028985779732465744, "logps/rejected": -2.964543342590332, "loss": 1.3907, "nll_loss": 0.3471011221408844, "rewards/accuracies": 1.0, "rewards/chosen": -0.002898578066378832, "rewards/margins": 0.29355576634407043, "rewards/rejected": -0.2964543402194977, "step": 5269 }, { "epoch": 3.644536652835408, "grad_norm": 9.203103065490723, "learning_rate": 3.530812970646996e-05, "log_odds_chosen": 9.439620971679688, "log_odds_ratio": -0.0022923145443201065, "logits/chosen": -0.7063536047935486, "logits/rejected": -0.8596088290214539, "logps/chosen": -0.03340257331728935, "logps/rejected": -2.259183645248413, "loss": 1.7079, "nll_loss": 0.4267502725124359, "rewards/accuracies": 1.0, "rewards/chosen": -0.0033402573317289352, "rewards/margins": 0.22257809340953827, "rewards/rejected": -0.22591835260391235, "step": 5270 }, { "epoch": 3.645228215767635, "grad_norm": 9.794646263122559, "learning_rate": 3.530428769017981e-05, "log_odds_chosen": 9.5487060546875, "log_odds_ratio": -0.0002214064879808575, "logits/chosen": -0.6062708497047424, "logits/rejected": -0.6592881679534912, "logps/chosen": -0.0003873534733429551, "logps/rejected": -1.6813409328460693, "loss": 1.4108, "nll_loss": 0.3526845872402191, "rewards/accuracies": 1.0, "rewards/chosen": -3.873534660669975e-05, "rewards/margins": 0.16809535026550293, "rewards/rejected": -0.16813409328460693, "step": 5271 }, { "epoch": 3.6459197786998616, "grad_norm": 6.463134765625, "learning_rate": 3.5300445673889656e-05, "log_odds_chosen": 8.663908004760742, "log_odds_ratio": -0.008357677608728409, "logits/chosen": -0.45225560665130615, "logits/rejected": -0.5104571580886841, "logps/chosen": -0.007889281958341599, "logps/rejected": -2.049224853515625, "loss": 1.1875, "nll_loss": 0.296050488948822, "rewards/accuracies": 1.0, "rewards/chosen": -0.0007889281841926277, "rewards/margins": 0.20413357019424438, "rewards/rejected": -0.20492246747016907, "step": 5272 }, { "epoch": 3.6466113416320884, "grad_norm": 11.775018692016602, "learning_rate": 3.529660365759951e-05, "log_odds_chosen": 10.624938011169434, "log_odds_ratio": -7.90832273196429e-05, "logits/chosen": -0.932185173034668, "logits/rejected": -0.9303665161132812, "logps/chosen": -0.0003103798080701381, "logps/rejected": -2.3480958938598633, "loss": 1.7896, "nll_loss": 0.44740378856658936, "rewards/accuracies": 1.0, "rewards/chosen": -3.1037983717396855e-05, "rewards/margins": 0.23477855324745178, "rewards/rejected": -0.23480960726737976, "step": 5273 }, { "epoch": 3.6473029045643153, "grad_norm": 10.549944877624512, "learning_rate": 3.529276164130936e-05, "log_odds_chosen": 10.02077865600586, "log_odds_ratio": -0.00011639117292361334, "logits/chosen": -0.9140655398368835, "logits/rejected": -0.9599613547325134, "logps/chosen": -0.00046777399256825447, "logps/rejected": -2.150455951690674, "loss": 1.932, "nll_loss": 0.48298266530036926, "rewards/accuracies": 1.0, "rewards/chosen": -4.677740071201697e-05, "rewards/margins": 0.21499884128570557, "rewards/rejected": -0.21504560112953186, "step": 5274 }, { "epoch": 3.647994467496542, "grad_norm": 9.046510696411133, "learning_rate": 3.5288919625019214e-05, "log_odds_chosen": 9.285402297973633, "log_odds_ratio": -0.0009156799060292542, "logits/chosen": -0.4625971019268036, "logits/rejected": -0.5015397071838379, "logps/chosen": -0.0060931481420993805, "logps/rejected": -2.2665536403656006, "loss": 1.5615, "nll_loss": 0.3902891278266907, "rewards/accuracies": 1.0, "rewards/chosen": -0.0006093148258514702, "rewards/margins": 0.22604604065418243, "rewards/rejected": -0.22665534913539886, "step": 5275 }, { "epoch": 3.648686030428769, "grad_norm": 11.310772895812988, "learning_rate": 3.528507760872906e-05, "log_odds_chosen": 10.605499267578125, "log_odds_ratio": -4.1127186705125496e-05, "logits/chosen": -0.566016674041748, "logits/rejected": -0.6915289163589478, "logps/chosen": -0.00027832394698634744, "logps/rejected": -2.2766027450561523, "loss": 1.3251, "nll_loss": 0.3312658369541168, "rewards/accuracies": 1.0, "rewards/chosen": -2.7832395062432624e-05, "rewards/margins": 0.22763243317604065, "rewards/rejected": -0.22766026854515076, "step": 5276 }, { "epoch": 3.6493775933609958, "grad_norm": 4.7569260597229, "learning_rate": 3.528123559243892e-05, "log_odds_chosen": 9.075827598571777, "log_odds_ratio": -0.0006618571933358908, "logits/chosen": -0.5907098054885864, "logits/rejected": -0.6073621511459351, "logps/chosen": -0.0007612211629748344, "logps/rejected": -1.488851547241211, "loss": 1.2438, "nll_loss": 0.3108810484409332, "rewards/accuracies": 1.0, "rewards/chosen": -7.612211629748344e-05, "rewards/margins": 0.1488090455532074, "rewards/rejected": -0.14888517558574677, "step": 5277 }, { "epoch": 3.6500691562932226, "grad_norm": 8.418963432312012, "learning_rate": 3.5277393576148764e-05, "log_odds_chosen": 9.35635757446289, "log_odds_ratio": -0.00023570825578644872, "logits/chosen": -0.6955604553222656, "logits/rejected": -0.7115451097488403, "logps/chosen": -0.008286512456834316, "logps/rejected": -2.2223093509674072, "loss": 2.2737, "nll_loss": 0.5684065818786621, "rewards/accuracies": 1.0, "rewards/chosen": -0.0008286512456834316, "rewards/margins": 0.22140227258205414, "rewards/rejected": -0.222230926156044, "step": 5278 }, { "epoch": 3.6507607192254494, "grad_norm": 8.238372802734375, "learning_rate": 3.527355155985862e-05, "log_odds_chosen": 9.846039772033691, "log_odds_ratio": -0.00010984414257109165, "logits/chosen": -0.6819452047348022, "logits/rejected": -0.5744008421897888, "logps/chosen": -0.003770021256059408, "logps/rejected": -2.1398353576660156, "loss": 1.162, "nll_loss": 0.29048237204551697, "rewards/accuracies": 1.0, "rewards/chosen": -0.00037700211396440864, "rewards/margins": 0.2136065512895584, "rewards/rejected": -0.21398356556892395, "step": 5279 }, { "epoch": 3.6514522821576763, "grad_norm": 8.050226211547852, "learning_rate": 3.526970954356847e-05, "log_odds_chosen": 9.431568145751953, "log_odds_ratio": -0.0002600555890239775, "logits/chosen": -0.6938271522521973, "logits/rejected": -0.7513946890830994, "logps/chosen": -0.0014918470988050103, "logps/rejected": -2.1459484100341797, "loss": 1.3419, "nll_loss": 0.33545613288879395, "rewards/accuracies": 1.0, "rewards/chosen": -0.00014918472152203321, "rewards/margins": 0.21444565057754517, "rewards/rejected": -0.21459482610225677, "step": 5280 }, { "epoch": 3.652143845089903, "grad_norm": 9.627875328063965, "learning_rate": 3.5265867527278315e-05, "log_odds_chosen": 8.359371185302734, "log_odds_ratio": -0.010650178417563438, "logits/chosen": -0.6847175359725952, "logits/rejected": -0.8081383109092712, "logps/chosen": -0.00730957230553031, "logps/rejected": -1.5649361610412598, "loss": 0.9909, "nll_loss": 0.24665333330631256, "rewards/accuracies": 1.0, "rewards/chosen": -0.0007309572538360953, "rewards/margins": 0.1557626575231552, "rewards/rejected": -0.15649361908435822, "step": 5281 }, { "epoch": 3.65283540802213, "grad_norm": 9.804635047912598, "learning_rate": 3.526202551098817e-05, "log_odds_chosen": 9.294599533081055, "log_odds_ratio": -0.00021372217452153563, "logits/chosen": -0.5657236576080322, "logits/rejected": -0.7095679044723511, "logps/chosen": -0.003741663182154298, "logps/rejected": -2.19268798828125, "loss": 1.2202, "nll_loss": 0.30502238869667053, "rewards/accuracies": 1.0, "rewards/chosen": -0.00037416635314002633, "rewards/margins": 0.21889464557170868, "rewards/rejected": -0.219268798828125, "step": 5282 }, { "epoch": 3.6535269709543567, "grad_norm": 8.655688285827637, "learning_rate": 3.525818349469802e-05, "log_odds_chosen": 10.956435203552246, "log_odds_ratio": -2.724559817579575e-05, "logits/chosen": -0.6136027574539185, "logits/rejected": -0.6853924989700317, "logps/chosen": -0.000356669828761369, "logps/rejected": -2.442622184753418, "loss": 1.5616, "nll_loss": 0.39040645956993103, "rewards/accuracies": 1.0, "rewards/chosen": -3.5666980693349615e-05, "rewards/margins": 0.24422653019428253, "rewards/rejected": -0.2442622035741806, "step": 5283 }, { "epoch": 3.6542185338865836, "grad_norm": 6.767275810241699, "learning_rate": 3.525434147840787e-05, "log_odds_chosen": 10.967052459716797, "log_odds_ratio": -3.499734521028586e-05, "logits/chosen": -0.2755697965621948, "logits/rejected": -0.314625084400177, "logps/chosen": -0.003400439629331231, "logps/rejected": -2.71468186378479, "loss": 1.586, "nll_loss": 0.3965045213699341, "rewards/accuracies": 1.0, "rewards/chosen": -0.0003400439745746553, "rewards/margins": 0.2711281478404999, "rewards/rejected": -0.2714681923389435, "step": 5284 }, { "epoch": 3.6549100968188104, "grad_norm": 18.51266098022461, "learning_rate": 3.525049946211772e-05, "log_odds_chosen": 7.9354658126831055, "log_odds_ratio": -0.3440208435058594, "logits/chosen": -0.4980732798576355, "logits/rejected": -0.5682406425476074, "logps/chosen": -0.09362926334142685, "logps/rejected": -2.1777169704437256, "loss": 1.949, "nll_loss": 0.4528387486934662, "rewards/accuracies": 0.875, "rewards/chosen": -0.00936292577534914, "rewards/margins": 0.20840878784656525, "rewards/rejected": -0.2177717089653015, "step": 5285 }, { "epoch": 3.6556016597510372, "grad_norm": 10.901885032653809, "learning_rate": 3.524665744582758e-05, "log_odds_chosen": 10.048656463623047, "log_odds_ratio": -9.540050814393908e-05, "logits/chosen": -0.42753034830093384, "logits/rejected": -0.4391542077064514, "logps/chosen": -0.0019503405783325434, "logps/rejected": -2.1295058727264404, "loss": 1.5479, "nll_loss": 0.3869664669036865, "rewards/accuracies": 1.0, "rewards/chosen": -0.00019503405201248825, "rewards/margins": 0.21275556087493896, "rewards/rejected": -0.21295058727264404, "step": 5286 }, { "epoch": 3.656293222683264, "grad_norm": 12.719574928283691, "learning_rate": 3.524281542953742e-05, "log_odds_chosen": 10.545530319213867, "log_odds_ratio": -0.00034519375185482204, "logits/chosen": -0.4994061589241028, "logits/rejected": -0.5819936394691467, "logps/chosen": -0.00015317212091758847, "logps/rejected": -2.056387186050415, "loss": 1.2105, "nll_loss": 0.3026004135608673, "rewards/accuracies": 1.0, "rewards/chosen": -1.531721318315249e-05, "rewards/margins": 0.20562341809272766, "rewards/rejected": -0.20563873648643494, "step": 5287 }, { "epoch": 3.656984785615491, "grad_norm": 14.119704246520996, "learning_rate": 3.5238973413247275e-05, "log_odds_chosen": 10.87884521484375, "log_odds_ratio": -5.273066199151799e-05, "logits/chosen": -0.45117008686065674, "logits/rejected": -0.5436999797821045, "logps/chosen": -0.00025493474095128477, "logps/rejected": -2.254000186920166, "loss": 1.4113, "nll_loss": 0.3528318703174591, "rewards/accuracies": 1.0, "rewards/chosen": -2.5493474822724238e-05, "rewards/margins": 0.22537453472614288, "rewards/rejected": -0.22540001571178436, "step": 5288 }, { "epoch": 3.6576763485477177, "grad_norm": 5.6090474128723145, "learning_rate": 3.523513139695713e-05, "log_odds_chosen": 8.451699256896973, "log_odds_ratio": -0.0005231672548688948, "logits/chosen": -0.4306209087371826, "logits/rejected": -0.47424978017807007, "logps/chosen": -0.010371813550591469, "logps/rejected": -2.286710023880005, "loss": 1.3906, "nll_loss": 0.3476030230522156, "rewards/accuracies": 1.0, "rewards/chosen": -0.0010371813550591469, "rewards/margins": 0.22763381898403168, "rewards/rejected": -0.22867099940776825, "step": 5289 }, { "epoch": 3.6583679114799446, "grad_norm": 9.015220642089844, "learning_rate": 3.523128938066697e-05, "log_odds_chosen": 10.912769317626953, "log_odds_ratio": -2.5071371055673808e-05, "logits/chosen": -0.4544152617454529, "logits/rejected": -0.5538532733917236, "logps/chosen": -0.00016627827426418662, "logps/rejected": -2.1886746883392334, "loss": 1.3752, "nll_loss": 0.34378570318222046, "rewards/accuracies": 1.0, "rewards/chosen": -1.662782597122714e-05, "rewards/margins": 0.21885083615779877, "rewards/rejected": -0.2188674807548523, "step": 5290 }, { "epoch": 3.6590594744121714, "grad_norm": 8.339679718017578, "learning_rate": 3.5227447364376826e-05, "log_odds_chosen": 10.106023788452148, "log_odds_ratio": -0.0002883929992094636, "logits/chosen": -0.5477585792541504, "logits/rejected": -0.7189350724220276, "logps/chosen": -0.0016047836979851127, "logps/rejected": -2.743410348892212, "loss": 1.4152, "nll_loss": 0.3537675440311432, "rewards/accuracies": 1.0, "rewards/chosen": -0.00016047836106736213, "rewards/margins": 0.2741805911064148, "rewards/rejected": -0.27434107661247253, "step": 5291 }, { "epoch": 3.659751037344398, "grad_norm": 7.859814643859863, "learning_rate": 3.522360534808668e-05, "log_odds_chosen": 9.92156982421875, "log_odds_ratio": -0.0001442175853298977, "logits/chosen": -0.4476546347141266, "logits/rejected": -0.5333471894264221, "logps/chosen": -0.00014952296623960137, "logps/rejected": -1.2835972309112549, "loss": 1.2927, "nll_loss": 0.3231571912765503, "rewards/accuracies": 1.0, "rewards/chosen": -1.4952296623960137e-05, "rewards/margins": 0.12834477424621582, "rewards/rejected": -0.12835972011089325, "step": 5292 }, { "epoch": 3.660442600276625, "grad_norm": 11.889843940734863, "learning_rate": 3.521976333179653e-05, "log_odds_chosen": 9.024433135986328, "log_odds_ratio": -0.12432266771793365, "logits/chosen": -0.3937477171421051, "logits/rejected": -0.31476932764053345, "logps/chosen": -0.03802483528852463, "logps/rejected": -1.9400608539581299, "loss": 0.8401, "nll_loss": 0.19758589565753937, "rewards/accuracies": 0.875, "rewards/chosen": -0.0038024834357202053, "rewards/margins": 0.19020359218120575, "rewards/rejected": -0.1940060704946518, "step": 5293 }, { "epoch": 3.661134163208852, "grad_norm": 9.2792387008667, "learning_rate": 3.5215921315506376e-05, "log_odds_chosen": 10.101093292236328, "log_odds_ratio": -8.72776290634647e-05, "logits/chosen": -0.8199343085289001, "logits/rejected": -0.8904087543487549, "logps/chosen": -0.0005541003774851561, "logps/rejected": -2.0077261924743652, "loss": 1.9961, "nll_loss": 0.49901145696640015, "rewards/accuracies": 1.0, "rewards/chosen": -5.541003338294104e-05, "rewards/margins": 0.20071722567081451, "rewards/rejected": -0.20077264308929443, "step": 5294 }, { "epoch": 3.6618257261410787, "grad_norm": 4.678475379943848, "learning_rate": 3.5212079299216236e-05, "log_odds_chosen": 9.025158882141113, "log_odds_ratio": -0.00037848821375519037, "logits/chosen": -0.26122820377349854, "logits/rejected": -0.22307443618774414, "logps/chosen": -0.013361023738980293, "logps/rejected": -1.7858524322509766, "loss": 1.0168, "nll_loss": 0.2541574537754059, "rewards/accuracies": 1.0, "rewards/chosen": -0.0013361023738980293, "rewards/margins": 0.17724913358688354, "rewards/rejected": -0.1785852462053299, "step": 5295 }, { "epoch": 3.6625172890733055, "grad_norm": 8.4315185546875, "learning_rate": 3.520823728292608e-05, "log_odds_chosen": 8.354340553283691, "log_odds_ratio": -0.018137505277991295, "logits/chosen": -0.343766450881958, "logits/rejected": -0.3963284492492676, "logps/chosen": -0.006356228142976761, "logps/rejected": -1.510011911392212, "loss": 1.3385, "nll_loss": 0.33280879259109497, "rewards/accuracies": 1.0, "rewards/chosen": -0.0006356228841468692, "rewards/margins": 0.15036556124687195, "rewards/rejected": -0.1510011851787567, "step": 5296 }, { "epoch": 3.6632088520055324, "grad_norm": 14.743172645568848, "learning_rate": 3.5204395266635934e-05, "log_odds_chosen": 9.12619400024414, "log_odds_ratio": -0.006320980843156576, "logits/chosen": -0.48610758781433105, "logits/rejected": -0.46463316679000854, "logps/chosen": -0.002753177424892783, "logps/rejected": -1.590240716934204, "loss": 1.2201, "nll_loss": 0.3043842315673828, "rewards/accuracies": 1.0, "rewards/chosen": -0.0002753177541308105, "rewards/margins": 0.1587487757205963, "rewards/rejected": -0.15902407467365265, "step": 5297 }, { "epoch": 3.663900414937759, "grad_norm": 8.207070350646973, "learning_rate": 3.5200553250345786e-05, "log_odds_chosen": 9.57107925415039, "log_odds_ratio": -0.00014903413830325007, "logits/chosen": -0.44907844066619873, "logits/rejected": -0.5032300353050232, "logps/chosen": -0.00021599276806227863, "logps/rejected": -0.9402081370353699, "loss": 1.4194, "nll_loss": 0.3548300266265869, "rewards/accuracies": 1.0, "rewards/chosen": -2.1599278625217266e-05, "rewards/margins": 0.0939992144703865, "rewards/rejected": -0.09402081370353699, "step": 5298 }, { "epoch": 3.664591977869986, "grad_norm": 5.6133856773376465, "learning_rate": 3.519671123405563e-05, "log_odds_chosen": 6.615942478179932, "log_odds_ratio": -0.07902704179286957, "logits/chosen": -0.703774094581604, "logits/rejected": -0.6925557255744934, "logps/chosen": -0.033892419189214706, "logps/rejected": -1.7209391593933105, "loss": 1.6216, "nll_loss": 0.3975079655647278, "rewards/accuracies": 1.0, "rewards/chosen": -0.0033892421051859856, "rewards/margins": 0.16870468854904175, "rewards/rejected": -0.17209392786026, "step": 5299 }, { "epoch": 3.665283540802213, "grad_norm": 15.447953224182129, "learning_rate": 3.5192869217765484e-05, "log_odds_chosen": 10.037712097167969, "log_odds_ratio": -0.0002351927396375686, "logits/chosen": -0.7943803071975708, "logits/rejected": -0.8371597528457642, "logps/chosen": -0.0004952938761562109, "logps/rejected": -1.7647054195404053, "loss": 2.1934, "nll_loss": 0.5483275651931763, "rewards/accuracies": 1.0, "rewards/chosen": -4.9529389798408374e-05, "rewards/margins": 0.17642101645469666, "rewards/rejected": -0.1764705330133438, "step": 5300 }, { "epoch": 3.6659751037344397, "grad_norm": 7.23792028427124, "learning_rate": 3.518902720147534e-05, "log_odds_chosen": 10.529439926147461, "log_odds_ratio": -6.0410486184991896e-05, "logits/chosen": -0.5384195446968079, "logits/rejected": -0.5411019921302795, "logps/chosen": -0.00020644822507165372, "logps/rejected": -2.137907028198242, "loss": 1.4403, "nll_loss": 0.36006906628608704, "rewards/accuracies": 1.0, "rewards/chosen": -2.0644820324378088e-05, "rewards/margins": 0.2137700617313385, "rewards/rejected": -0.21379071474075317, "step": 5301 }, { "epoch": 3.6666666666666665, "grad_norm": 9.008337020874023, "learning_rate": 3.518518518518519e-05, "log_odds_chosen": 9.11552619934082, "log_odds_ratio": -0.00044547885772772133, "logits/chosen": -0.8450495600700378, "logits/rejected": -0.882699191570282, "logps/chosen": -0.003004885744303465, "logps/rejected": -1.6603273153305054, "loss": 2.0394, "nll_loss": 0.5097946524620056, "rewards/accuracies": 1.0, "rewards/chosen": -0.00030048860935494304, "rewards/margins": 0.1657322496175766, "rewards/rejected": -0.16603274643421173, "step": 5302 }, { "epoch": 3.6673582295988933, "grad_norm": 12.148446083068848, "learning_rate": 3.5181343168895035e-05, "log_odds_chosen": 8.527297973632812, "log_odds_ratio": -0.006359103135764599, "logits/chosen": -0.7570721507072449, "logits/rejected": -0.8277689218521118, "logps/chosen": -0.004634576383978128, "logps/rejected": -1.966159462928772, "loss": 2.2884, "nll_loss": 0.5714757442474365, "rewards/accuracies": 1.0, "rewards/chosen": -0.00046345763257704675, "rewards/margins": 0.1961524933576584, "rewards/rejected": -0.19661596417427063, "step": 5303 }, { "epoch": 3.66804979253112, "grad_norm": 13.32793140411377, "learning_rate": 3.5177501152604894e-05, "log_odds_chosen": 8.763154983520508, "log_odds_ratio": -0.0016999999061226845, "logits/chosen": -0.773772120475769, "logits/rejected": -0.8412440419197083, "logps/chosen": -0.024526400491595268, "logps/rejected": -1.7753115892410278, "loss": 1.2816, "nll_loss": 0.32022562623023987, "rewards/accuracies": 1.0, "rewards/chosen": -0.0024526400957256556, "rewards/margins": 0.17507854104042053, "rewards/rejected": -0.1775311529636383, "step": 5304 }, { "epoch": 3.668741355463347, "grad_norm": 9.776931762695312, "learning_rate": 3.517365913631474e-05, "log_odds_chosen": 7.944180488586426, "log_odds_ratio": -0.04010344296693802, "logits/chosen": -0.43044549226760864, "logits/rejected": -0.4351937770843506, "logps/chosen": -0.014615101739764214, "logps/rejected": -1.7616087198257446, "loss": 1.9609, "nll_loss": 0.4862039387226105, "rewards/accuracies": 1.0, "rewards/chosen": -0.0014615101972594857, "rewards/margins": 0.17469936609268188, "rewards/rejected": -0.17616087198257446, "step": 5305 }, { "epoch": 3.669432918395574, "grad_norm": 12.19973373413086, "learning_rate": 3.516981712002459e-05, "log_odds_chosen": 9.850683212280273, "log_odds_ratio": -0.0001669059129199013, "logits/chosen": -0.9862415790557861, "logits/rejected": -0.9785559177398682, "logps/chosen": -0.0006409522611647844, "logps/rejected": -1.5707862377166748, "loss": 1.9491, "nll_loss": 0.4872695207595825, "rewards/accuracies": 1.0, "rewards/chosen": -6.409522029571235e-05, "rewards/margins": 0.15701454877853394, "rewards/rejected": -0.15707863867282867, "step": 5306 }, { "epoch": 3.6701244813278007, "grad_norm": 8.544509887695312, "learning_rate": 3.5165975103734445e-05, "log_odds_chosen": 9.832954406738281, "log_odds_ratio": -0.00016655519721098244, "logits/chosen": -0.9121188521385193, "logits/rejected": -0.9448140263557434, "logps/chosen": -0.0003426902985665947, "logps/rejected": -1.5457923412322998, "loss": 0.9375, "nll_loss": 0.23435595631599426, "rewards/accuracies": 1.0, "rewards/chosen": -3.426902912906371e-05, "rewards/margins": 0.15454496443271637, "rewards/rejected": -0.15457923710346222, "step": 5307 }, { "epoch": 3.6708160442600275, "grad_norm": 19.36949920654297, "learning_rate": 3.516213308744429e-05, "log_odds_chosen": 9.872499465942383, "log_odds_ratio": -0.001567936153151095, "logits/chosen": -0.6543141007423401, "logits/rejected": -0.7318291068077087, "logps/chosen": -0.007575335446745157, "logps/rejected": -2.5318126678466797, "loss": 2.4277, "nll_loss": 0.6067792177200317, "rewards/accuracies": 1.0, "rewards/chosen": -0.0007575335330329835, "rewards/margins": 0.2524237334728241, "rewards/rejected": -0.25318124890327454, "step": 5308 }, { "epoch": 3.6715076071922543, "grad_norm": 13.251935958862305, "learning_rate": 3.515829107115414e-05, "log_odds_chosen": 10.378414154052734, "log_odds_ratio": -3.7448902730830014e-05, "logits/chosen": -0.716993510723114, "logits/rejected": -0.7578259110450745, "logps/chosen": -0.00018267772975377738, "logps/rejected": -1.6985294818878174, "loss": 1.3589, "nll_loss": 0.33973079919815063, "rewards/accuracies": 1.0, "rewards/chosen": -1.8267772247781977e-05, "rewards/margins": 0.1698346734046936, "rewards/rejected": -0.16985295712947845, "step": 5309 }, { "epoch": 3.6721991701244816, "grad_norm": 5.7452569007873535, "learning_rate": 3.515444905486399e-05, "log_odds_chosen": 8.658774375915527, "log_odds_ratio": -0.000490661128424108, "logits/chosen": -0.2783205509185791, "logits/rejected": -0.3526964783668518, "logps/chosen": -0.0036929536145180464, "logps/rejected": -1.6045854091644287, "loss": 2.5169, "nll_loss": 0.6291677355766296, "rewards/accuracies": 1.0, "rewards/chosen": -0.0003692953905556351, "rewards/margins": 0.16008925437927246, "rewards/rejected": -0.1604585349559784, "step": 5310 }, { "epoch": 3.6728907330567084, "grad_norm": 8.994384765625, "learning_rate": 3.515060703857385e-05, "log_odds_chosen": 9.59211540222168, "log_odds_ratio": -0.002721622120589018, "logits/chosen": -0.9550088047981262, "logits/rejected": -0.925793468952179, "logps/chosen": -0.0007333287503570318, "logps/rejected": -1.986412763595581, "loss": 1.1019, "nll_loss": 0.2752057909965515, "rewards/accuracies": 1.0, "rewards/chosen": -7.333287794608623e-05, "rewards/margins": 0.1985679566860199, "rewards/rejected": -0.19864128530025482, "step": 5311 }, { "epoch": 3.6735822959889353, "grad_norm": 9.866668701171875, "learning_rate": 3.514676502228369e-05, "log_odds_chosen": 9.764283180236816, "log_odds_ratio": -0.0001480157079640776, "logits/chosen": -0.6280882954597473, "logits/rejected": -0.7089745998382568, "logps/chosen": -0.007530787028372288, "logps/rejected": -2.570647716522217, "loss": 1.5201, "nll_loss": 0.3800201714038849, "rewards/accuracies": 1.0, "rewards/chosen": -0.0007530787261202931, "rewards/margins": 0.25631168484687805, "rewards/rejected": -0.2570647597312927, "step": 5312 }, { "epoch": 3.674273858921162, "grad_norm": 10.664302825927734, "learning_rate": 3.5142923005993546e-05, "log_odds_chosen": 9.287500381469727, "log_odds_ratio": -0.0013609788147732615, "logits/chosen": -0.6665362119674683, "logits/rejected": -0.7844340205192566, "logps/chosen": -0.008134718984365463, "logps/rejected": -1.9219590425491333, "loss": 1.5939, "nll_loss": 0.3983459770679474, "rewards/accuracies": 1.0, "rewards/chosen": -0.0008134719682857394, "rewards/margins": 0.19138242304325104, "rewards/rejected": -0.19219589233398438, "step": 5313 }, { "epoch": 3.674965421853389, "grad_norm": 7.119990348815918, "learning_rate": 3.51390809897034e-05, "log_odds_chosen": 10.26752758026123, "log_odds_ratio": -4.403849015943706e-05, "logits/chosen": -0.7129647135734558, "logits/rejected": -0.726944088935852, "logps/chosen": -0.000326203036820516, "logps/rejected": -2.0272581577301025, "loss": 1.9439, "nll_loss": 0.48595935106277466, "rewards/accuracies": 1.0, "rewards/chosen": -3.262030077166855e-05, "rewards/margins": 0.20269319415092468, "rewards/rejected": -0.20272579789161682, "step": 5314 }, { "epoch": 3.6756569847856158, "grad_norm": 7.7854390144348145, "learning_rate": 3.513523897341325e-05, "log_odds_chosen": 8.846216201782227, "log_odds_ratio": -0.03087105229496956, "logits/chosen": -0.7045666575431824, "logits/rejected": -0.7305500507354736, "logps/chosen": -0.008291316218674183, "logps/rejected": -1.5737732648849487, "loss": 1.449, "nll_loss": 0.35915958881378174, "rewards/accuracies": 1.0, "rewards/chosen": -0.0008291316335089505, "rewards/margins": 0.15654820203781128, "rewards/rejected": -0.15737733244895935, "step": 5315 }, { "epoch": 3.6763485477178426, "grad_norm": 9.7339448928833, "learning_rate": 3.5131396957123096e-05, "log_odds_chosen": 7.857017517089844, "log_odds_ratio": -0.03638071566820145, "logits/chosen": -0.9640352129936218, "logits/rejected": -0.9572303295135498, "logps/chosen": -0.03163313865661621, "logps/rejected": -2.537631034851074, "loss": 2.0547, "nll_loss": 0.5100435018539429, "rewards/accuracies": 1.0, "rewards/chosen": -0.003163314191624522, "rewards/margins": 0.25059980154037476, "rewards/rejected": -0.2537631094455719, "step": 5316 }, { "epoch": 3.6770401106500694, "grad_norm": 8.134242057800293, "learning_rate": 3.512755494083295e-05, "log_odds_chosen": 7.235983848571777, "log_odds_ratio": -0.025236472487449646, "logits/chosen": -0.3153616487979889, "logits/rejected": -0.29091522097587585, "logps/chosen": -0.01593020185828209, "logps/rejected": -1.111178994178772, "loss": 1.7571, "nll_loss": 0.4367576837539673, "rewards/accuracies": 1.0, "rewards/chosen": -0.0015930201625451446, "rewards/margins": 0.10952487587928772, "rewards/rejected": -0.1111178994178772, "step": 5317 }, { "epoch": 3.6777316735822962, "grad_norm": 9.490833282470703, "learning_rate": 3.51237129245428e-05, "log_odds_chosen": 8.130983352661133, "log_odds_ratio": -0.17193548381328583, "logits/chosen": -0.7003574371337891, "logits/rejected": -0.7082849144935608, "logps/chosen": -0.0677189901471138, "logps/rejected": -1.5908578634262085, "loss": 1.9123, "nll_loss": 0.4608832001686096, "rewards/accuracies": 0.875, "rewards/chosen": -0.006771899294108152, "rewards/margins": 0.15231388807296753, "rewards/rejected": -0.15908578038215637, "step": 5318 }, { "epoch": 3.678423236514523, "grad_norm": 7.186295509338379, "learning_rate": 3.511987090825265e-05, "log_odds_chosen": 10.44674301147461, "log_odds_ratio": -4.972759779775515e-05, "logits/chosen": -0.6706852316856384, "logits/rejected": -0.7189573049545288, "logps/chosen": -0.0033246877137571573, "logps/rejected": -2.9027318954467773, "loss": 1.89, "nll_loss": 0.4724842607975006, "rewards/accuracies": 1.0, "rewards/chosen": -0.00033246877137571573, "rewards/margins": 0.2899407148361206, "rewards/rejected": -0.29027318954467773, "step": 5319 }, { "epoch": 3.67911479944675, "grad_norm": 11.441291809082031, "learning_rate": 3.5116028891962506e-05, "log_odds_chosen": 9.759711265563965, "log_odds_ratio": -0.0005217420402914286, "logits/chosen": -0.547791063785553, "logits/rejected": -0.6304538249969482, "logps/chosen": -0.000919260666705668, "logps/rejected": -1.9203557968139648, "loss": 1.4694, "nll_loss": 0.3673018217086792, "rewards/accuracies": 1.0, "rewards/chosen": -9.192607103614137e-05, "rewards/margins": 0.19194364547729492, "rewards/rejected": -0.19203555583953857, "step": 5320 }, { "epoch": 3.6798063623789767, "grad_norm": 9.163915634155273, "learning_rate": 3.511218687567235e-05, "log_odds_chosen": 9.769948959350586, "log_odds_ratio": -0.0007256059325300157, "logits/chosen": -0.5265632271766663, "logits/rejected": -0.6233397126197815, "logps/chosen": -0.007863366976380348, "logps/rejected": -2.6629951000213623, "loss": 1.5086, "nll_loss": 0.3770705461502075, "rewards/accuracies": 1.0, "rewards/chosen": -0.0007863366045057774, "rewards/margins": 0.26551318168640137, "rewards/rejected": -0.2662995457649231, "step": 5321 }, { "epoch": 3.6804979253112036, "grad_norm": 12.884857177734375, "learning_rate": 3.5108344859382204e-05, "log_odds_chosen": 7.08580207824707, "log_odds_ratio": -0.4825950860977173, "logits/chosen": -0.9943395853042603, "logits/rejected": -0.9396799802780151, "logps/chosen": -0.3394123613834381, "logps/rejected": -2.2305188179016113, "loss": 1.8811, "nll_loss": 0.4220207631587982, "rewards/accuracies": 0.75, "rewards/chosen": -0.03394123539328575, "rewards/margins": 0.1891106367111206, "rewards/rejected": -0.22305189073085785, "step": 5322 }, { "epoch": 3.6811894882434304, "grad_norm": 7.227943420410156, "learning_rate": 3.5104502843092057e-05, "log_odds_chosen": 8.04112434387207, "log_odds_ratio": -0.015502391383051872, "logits/chosen": -0.7972258925437927, "logits/rejected": -0.7825276851654053, "logps/chosen": -0.005595149472355843, "logps/rejected": -1.288925051689148, "loss": 1.648, "nll_loss": 0.4104374945163727, "rewards/accuracies": 1.0, "rewards/chosen": -0.0005595149705186486, "rewards/margins": 0.1283329874277115, "rewards/rejected": -0.12889249622821808, "step": 5323 }, { "epoch": 3.6818810511756572, "grad_norm": 9.782866477966309, "learning_rate": 3.510066082680191e-05, "log_odds_chosen": 10.535016059875488, "log_odds_ratio": -4.5212880650069565e-05, "logits/chosen": -1.0320000648498535, "logits/rejected": -1.047119140625, "logps/chosen": -0.00029143691062927246, "logps/rejected": -2.076714515686035, "loss": 1.3569, "nll_loss": 0.33922791481018066, "rewards/accuracies": 1.0, "rewards/chosen": -2.9143691790523008e-05, "rewards/margins": 0.2076423019170761, "rewards/rejected": -0.20767146348953247, "step": 5324 }, { "epoch": 3.682572614107884, "grad_norm": 4.98908805847168, "learning_rate": 3.5096818810511755e-05, "log_odds_chosen": 8.161107063293457, "log_odds_ratio": -0.004896479658782482, "logits/chosen": -0.6709215044975281, "logits/rejected": -0.6395055055618286, "logps/chosen": -0.015280601568520069, "logps/rejected": -1.730186939239502, "loss": 1.1412, "nll_loss": 0.2848084568977356, "rewards/accuracies": 1.0, "rewards/chosen": -0.001528060296550393, "rewards/margins": 0.1714906245470047, "rewards/rejected": -0.1730186939239502, "step": 5325 }, { "epoch": 3.683264177040111, "grad_norm": 9.627045631408691, "learning_rate": 3.509297679422161e-05, "log_odds_chosen": 9.907687187194824, "log_odds_ratio": -0.0009907097555696964, "logits/chosen": -0.6797293424606323, "logits/rejected": -0.7065413594245911, "logps/chosen": -0.0013398650335147977, "logps/rejected": -1.8299527168273926, "loss": 1.2516, "nll_loss": 0.31279319524765015, "rewards/accuracies": 1.0, "rewards/chosen": -0.00013398649753071368, "rewards/margins": 0.18286126852035522, "rewards/rejected": -0.1829952597618103, "step": 5326 }, { "epoch": 3.6839557399723377, "grad_norm": 10.69526195526123, "learning_rate": 3.508913477793146e-05, "log_odds_chosen": 10.115638732910156, "log_odds_ratio": -0.0002958014083560556, "logits/chosen": -0.7780247926712036, "logits/rejected": -0.8033609390258789, "logps/chosen": -0.0003876305709127337, "logps/rejected": -1.706833004951477, "loss": 1.9479, "nll_loss": 0.48695021867752075, "rewards/accuracies": 1.0, "rewards/chosen": -3.8763060729252174e-05, "rewards/margins": 0.17064453661441803, "rewards/rejected": -0.17068329453468323, "step": 5327 }, { "epoch": 3.6846473029045645, "grad_norm": 14.76516056060791, "learning_rate": 3.5085292761641305e-05, "log_odds_chosen": 11.188180923461914, "log_odds_ratio": -2.028615926974453e-05, "logits/chosen": -1.0329959392547607, "logits/rejected": -1.0130128860473633, "logps/chosen": -0.00010292732622474432, "logps/rejected": -1.8463349342346191, "loss": 1.7812, "nll_loss": 0.4453083574771881, "rewards/accuracies": 1.0, "rewards/chosen": -1.0292733350070193e-05, "rewards/margins": 0.18462321162223816, "rewards/rejected": -0.18463349342346191, "step": 5328 }, { "epoch": 3.6853388658367914, "grad_norm": 9.564327239990234, "learning_rate": 3.5081450745351164e-05, "log_odds_chosen": 9.434001922607422, "log_odds_ratio": -0.0002744604425970465, "logits/chosen": -0.5842224359512329, "logits/rejected": -0.6542115211486816, "logps/chosen": -0.0029525586869567633, "logps/rejected": -1.8732268810272217, "loss": 1.1858, "nll_loss": 0.2964109182357788, "rewards/accuracies": 1.0, "rewards/chosen": -0.00029525585705414414, "rewards/margins": 0.18702742457389832, "rewards/rejected": -0.1873226761817932, "step": 5329 }, { "epoch": 3.686030428769018, "grad_norm": 12.752079010009766, "learning_rate": 3.507760872906101e-05, "log_odds_chosen": 9.772260665893555, "log_odds_ratio": -8.484002319164574e-05, "logits/chosen": -0.6685524582862854, "logits/rejected": -0.7067282199859619, "logps/chosen": -0.0003044075274374336, "logps/rejected": -1.6522518396377563, "loss": 1.5685, "nll_loss": 0.3921244442462921, "rewards/accuracies": 1.0, "rewards/chosen": -3.044075310754124e-05, "rewards/margins": 0.16519474983215332, "rewards/rejected": -0.16522517800331116, "step": 5330 }, { "epoch": 3.686721991701245, "grad_norm": 6.9672393798828125, "learning_rate": 3.507376671277086e-05, "log_odds_chosen": 8.436867713928223, "log_odds_ratio": -0.0006808569887652993, "logits/chosen": -0.5947157144546509, "logits/rejected": -0.6862506866455078, "logps/chosen": -0.0007226442685350776, "logps/rejected": -1.2544167041778564, "loss": 1.2813, "nll_loss": 0.3202512860298157, "rewards/accuracies": 1.0, "rewards/chosen": -7.226442539831623e-05, "rewards/margins": 0.12536939978599548, "rewards/rejected": -0.12544165551662445, "step": 5331 }, { "epoch": 3.687413554633472, "grad_norm": 9.374478340148926, "learning_rate": 3.5069924696480715e-05, "log_odds_chosen": 9.65780258178711, "log_odds_ratio": -0.004418676253408194, "logits/chosen": -0.5301334857940674, "logits/rejected": -0.588121235370636, "logps/chosen": -0.09868014603853226, "logps/rejected": -2.407459259033203, "loss": 1.3604, "nll_loss": 0.33965355157852173, "rewards/accuracies": 1.0, "rewards/chosen": -0.009868014603853226, "rewards/margins": 0.23087790608406067, "rewards/rejected": -0.2407459318637848, "step": 5332 }, { "epoch": 3.6881051175656987, "grad_norm": 7.010161399841309, "learning_rate": 3.506608268019057e-05, "log_odds_chosen": 8.303940773010254, "log_odds_ratio": -0.005083529744297266, "logits/chosen": -0.7475804090499878, "logits/rejected": -0.760705828666687, "logps/chosen": -0.009333855472505093, "logps/rejected": -1.4144235849380493, "loss": 1.6738, "nll_loss": 0.41793039441108704, "rewards/accuracies": 1.0, "rewards/chosen": -0.0009333856287412345, "rewards/margins": 0.1405089795589447, "rewards/rejected": -0.14144235849380493, "step": 5333 }, { "epoch": 3.6887966804979255, "grad_norm": 11.381121635437012, "learning_rate": 3.506224066390041e-05, "log_odds_chosen": 9.073360443115234, "log_odds_ratio": -0.00032709480728954077, "logits/chosen": -0.8671411275863647, "logits/rejected": -0.9612331986427307, "logps/chosen": -0.00035379567998461425, "logps/rejected": -1.3636505603790283, "loss": 1.458, "nll_loss": 0.36446449160575867, "rewards/accuracies": 1.0, "rewards/chosen": -3.537956945365295e-05, "rewards/margins": 0.13632968068122864, "rewards/rejected": -0.13636507093906403, "step": 5334 }, { "epoch": 3.6894882434301524, "grad_norm": 10.14391803741455, "learning_rate": 3.5058398647610266e-05, "log_odds_chosen": 8.98252010345459, "log_odds_ratio": -0.15078553557395935, "logits/chosen": -0.7508845329284668, "logits/rejected": -0.7607273459434509, "logps/chosen": -0.019194740802049637, "logps/rejected": -1.7505712509155273, "loss": 1.5958, "nll_loss": 0.3838688135147095, "rewards/accuracies": 0.875, "rewards/chosen": -0.001919474103488028, "rewards/margins": 0.17313764989376068, "rewards/rejected": -0.17505714297294617, "step": 5335 }, { "epoch": 3.690179806362379, "grad_norm": 11.787603378295898, "learning_rate": 3.505455663132012e-05, "log_odds_chosen": 9.963918685913086, "log_odds_ratio": -0.0004981214297004044, "logits/chosen": -1.0922781229019165, "logits/rejected": -1.1762382984161377, "logps/chosen": -0.001630566199310124, "logps/rejected": -2.3342196941375732, "loss": 1.3523, "nll_loss": 0.3380275368690491, "rewards/accuracies": 1.0, "rewards/chosen": -0.00016305662575177848, "rewards/margins": 0.233258917927742, "rewards/rejected": -0.2334219515323639, "step": 5336 }, { "epoch": 3.690871369294606, "grad_norm": 11.271586418151855, "learning_rate": 3.5050714615029964e-05, "log_odds_chosen": 10.058823585510254, "log_odds_ratio": -6.0240603488637134e-05, "logits/chosen": -0.9258029460906982, "logits/rejected": -1.0561065673828125, "logps/chosen": -0.0003865394101012498, "logps/rejected": -1.8324633836746216, "loss": 1.7126, "nll_loss": 0.4281406104564667, "rewards/accuracies": 1.0, "rewards/chosen": -3.8653943192912266e-05, "rewards/margins": 0.1832076907157898, "rewards/rejected": -0.18324634432792664, "step": 5337 }, { "epoch": 3.691562932226833, "grad_norm": 9.628790855407715, "learning_rate": 3.504687259873982e-05, "log_odds_chosen": 9.142765045166016, "log_odds_ratio": -0.0002933957439381629, "logits/chosen": -0.6513609290122986, "logits/rejected": -0.7071292400360107, "logps/chosen": -0.0006643411470577121, "logps/rejected": -1.5814200639724731, "loss": 1.9233, "nll_loss": 0.4807976484298706, "rewards/accuracies": 1.0, "rewards/chosen": -6.643411325057968e-05, "rewards/margins": 0.15807557106018066, "rewards/rejected": -0.15814201533794403, "step": 5338 }, { "epoch": 3.6922544951590597, "grad_norm": 9.518936157226562, "learning_rate": 3.504303058244967e-05, "log_odds_chosen": 9.003790855407715, "log_odds_ratio": -0.001304905628785491, "logits/chosen": -0.618826150894165, "logits/rejected": -0.7828741073608398, "logps/chosen": -0.0028582715895026922, "logps/rejected": -1.3129936456680298, "loss": 1.4019, "nll_loss": 0.35033372044563293, "rewards/accuracies": 1.0, "rewards/chosen": -0.00028582714730873704, "rewards/margins": 0.13101352751255035, "rewards/rejected": -0.13129936158657074, "step": 5339 }, { "epoch": 3.6929460580912865, "grad_norm": 8.120966911315918, "learning_rate": 3.503918856615952e-05, "log_odds_chosen": 8.923418998718262, "log_odds_ratio": -0.0002836494822986424, "logits/chosen": -0.7749639749526978, "logits/rejected": -0.8973751068115234, "logps/chosen": -0.003522332990542054, "logps/rejected": -2.081045389175415, "loss": 1.8535, "nll_loss": 0.46334409713745117, "rewards/accuracies": 1.0, "rewards/chosen": -0.0003522332990542054, "rewards/margins": 0.2077522873878479, "rewards/rejected": -0.20810453593730927, "step": 5340 }, { "epoch": 3.6936376210235133, "grad_norm": 7.801592826843262, "learning_rate": 3.5035346549869373e-05, "log_odds_chosen": 9.398853302001953, "log_odds_ratio": -0.0006219418719410896, "logits/chosen": -0.431623637676239, "logits/rejected": -0.42418909072875977, "logps/chosen": -0.000501930364407599, "logps/rejected": -1.5170776844024658, "loss": 1.2517, "nll_loss": 0.31287214159965515, "rewards/accuracies": 1.0, "rewards/chosen": -5.019303716835566e-05, "rewards/margins": 0.1516575962305069, "rewards/rejected": -0.15170776844024658, "step": 5341 }, { "epoch": 3.69432918395574, "grad_norm": 10.358515739440918, "learning_rate": 3.5031504533579226e-05, "log_odds_chosen": 9.936261177062988, "log_odds_ratio": -0.00020719818712677807, "logits/chosen": -0.7464555501937866, "logits/rejected": -0.8372625708580017, "logps/chosen": -0.0009494010009802878, "logps/rejected": -2.646921396255493, "loss": 1.5907, "nll_loss": 0.3976495862007141, "rewards/accuracies": 1.0, "rewards/chosen": -9.494010737398639e-05, "rewards/margins": 0.26459717750549316, "rewards/rejected": -0.26469212770462036, "step": 5342 }, { "epoch": 3.695020746887967, "grad_norm": 15.795845985412598, "learning_rate": 3.502766251728907e-05, "log_odds_chosen": 10.614376068115234, "log_odds_ratio": -0.00011300211190246046, "logits/chosen": -0.9104953408241272, "logits/rejected": -1.0255991220474243, "logps/chosen": -0.00013850632240064442, "logps/rejected": -2.071258306503296, "loss": 2.1981, "nll_loss": 0.5495221018791199, "rewards/accuracies": 1.0, "rewards/chosen": -1.3850632058165502e-05, "rewards/margins": 0.20711196959018707, "rewards/rejected": -0.20712582767009735, "step": 5343 }, { "epoch": 3.695712309820194, "grad_norm": 10.562488555908203, "learning_rate": 3.5023820500998924e-05, "log_odds_chosen": 9.300116539001465, "log_odds_ratio": -0.01639566384255886, "logits/chosen": -0.6258934736251831, "logits/rejected": -0.6757691502571106, "logps/chosen": -0.025663437321782112, "logps/rejected": -2.3334107398986816, "loss": 2.4101, "nll_loss": 0.6008975505828857, "rewards/accuracies": 1.0, "rewards/chosen": -0.0025663438718765974, "rewards/margins": 0.23077471554279327, "rewards/rejected": -0.2333410680294037, "step": 5344 }, { "epoch": 3.6964038727524207, "grad_norm": 9.328142166137695, "learning_rate": 3.5019978484708776e-05, "log_odds_chosen": 7.483302593231201, "log_odds_ratio": -0.036034103482961655, "logits/chosen": -0.8576046824455261, "logits/rejected": -0.8339859843254089, "logps/chosen": -0.01491495966911316, "logps/rejected": -1.15474271774292, "loss": 1.957, "nll_loss": 0.48564010858535767, "rewards/accuracies": 1.0, "rewards/chosen": -0.0014914961066097021, "rewards/margins": 0.11398278176784515, "rewards/rejected": -0.11547426879405975, "step": 5345 }, { "epoch": 3.6970954356846475, "grad_norm": 8.036946296691895, "learning_rate": 3.501613646841863e-05, "log_odds_chosen": 8.698389053344727, "log_odds_ratio": -0.0007000649347901344, "logits/chosen": -0.6593747138977051, "logits/rejected": -0.7601386904716492, "logps/chosen": -0.00904887355864048, "logps/rejected": -1.7365727424621582, "loss": 1.9866, "nll_loss": 0.49658799171447754, "rewards/accuracies": 1.0, "rewards/chosen": -0.0009048873907886446, "rewards/margins": 0.17275238037109375, "rewards/rejected": -0.17365726828575134, "step": 5346 }, { "epoch": 3.6977869986168743, "grad_norm": 5.567675590515137, "learning_rate": 3.501229445212848e-05, "log_odds_chosen": 8.867687225341797, "log_odds_ratio": -0.018153710290789604, "logits/chosen": -0.6134251356124878, "logits/rejected": -0.6851860284805298, "logps/chosen": -0.005485460627824068, "logps/rejected": -1.1283323764801025, "loss": 1.0239, "nll_loss": 0.2541605532169342, "rewards/accuracies": 1.0, "rewards/chosen": -0.0005485460278578103, "rewards/margins": 0.11228469014167786, "rewards/rejected": -0.11283324658870697, "step": 5347 }, { "epoch": 3.698478561549101, "grad_norm": 12.602523803710938, "learning_rate": 3.500845243583833e-05, "log_odds_chosen": 7.065537452697754, "log_odds_ratio": -0.46093103289604187, "logits/chosen": -0.28088217973709106, "logits/rejected": -0.3088432252407074, "logps/chosen": -0.12832608819007874, "logps/rejected": -1.7279107570648193, "loss": 1.9474, "nll_loss": 0.44075435400009155, "rewards/accuracies": 0.75, "rewards/chosen": -0.012832608073949814, "rewards/margins": 0.15995845198631287, "rewards/rejected": -0.17279106378555298, "step": 5348 }, { "epoch": 3.699170124481328, "grad_norm": 10.695401191711426, "learning_rate": 3.500461041954818e-05, "log_odds_chosen": 9.166481018066406, "log_odds_ratio": -0.0002610564115457237, "logits/chosen": -0.5230810642242432, "logits/rejected": -0.5313901901245117, "logps/chosen": -0.0008728259126655757, "logps/rejected": -1.547613263130188, "loss": 1.3375, "nll_loss": 0.3343556523323059, "rewards/accuracies": 1.0, "rewards/chosen": -8.728259854251519e-05, "rewards/margins": 0.15467403829097748, "rewards/rejected": -0.15476132929325104, "step": 5349 }, { "epoch": 3.699861687413555, "grad_norm": 5.585293769836426, "learning_rate": 3.500076840325803e-05, "log_odds_chosen": 8.954833030700684, "log_odds_ratio": -0.013094688765704632, "logits/chosen": -0.5380806922912598, "logits/rejected": -0.6527281403541565, "logps/chosen": -0.004866013769060373, "logps/rejected": -1.382778286933899, "loss": 1.0659, "nll_loss": 0.2651692032814026, "rewards/accuracies": 1.0, "rewards/chosen": -0.0004866014060098678, "rewards/margins": 0.1377912312746048, "rewards/rejected": -0.1382778286933899, "step": 5350 }, { "epoch": 3.7005532503457816, "grad_norm": 11.971247673034668, "learning_rate": 3.4996926386967884e-05, "log_odds_chosen": 9.954809188842773, "log_odds_ratio": -0.006246030330657959, "logits/chosen": -0.5458557605743408, "logits/rejected": -0.5307517647743225, "logps/chosen": -0.002717132680118084, "logps/rejected": -1.9100207090377808, "loss": 1.3756, "nll_loss": 0.34327322244644165, "rewards/accuracies": 1.0, "rewards/chosen": -0.00027171324472874403, "rewards/margins": 0.19073036313056946, "rewards/rejected": -0.19100208580493927, "step": 5351 }, { "epoch": 3.7012448132780085, "grad_norm": 11.080292701721191, "learning_rate": 3.499308437067773e-05, "log_odds_chosen": 6.902338981628418, "log_odds_ratio": -0.15426041185855865, "logits/chosen": -0.5036279559135437, "logits/rejected": -0.5924159288406372, "logps/chosen": -0.051904480904340744, "logps/rejected": -1.4585072994232178, "loss": 2.1008, "nll_loss": 0.5097784399986267, "rewards/accuracies": 0.875, "rewards/chosen": -0.0051904479041695595, "rewards/margins": 0.14066028594970703, "rewards/rejected": -0.14585073292255402, "step": 5352 }, { "epoch": 3.7019363762102353, "grad_norm": 9.899896621704102, "learning_rate": 3.498924235438758e-05, "log_odds_chosen": 9.366655349731445, "log_odds_ratio": -0.00028243596898391843, "logits/chosen": -0.5405033826828003, "logits/rejected": -0.5533077716827393, "logps/chosen": -0.00033929411438293755, "logps/rejected": -1.1259949207305908, "loss": 1.29, "nll_loss": 0.32247021794319153, "rewards/accuracies": 1.0, "rewards/chosen": -3.392941289348528e-05, "rewards/margins": 0.11256556957960129, "rewards/rejected": -0.11259949207305908, "step": 5353 }, { "epoch": 3.702627939142462, "grad_norm": 8.126338005065918, "learning_rate": 3.4985400338097435e-05, "log_odds_chosen": 9.396369934082031, "log_odds_ratio": -0.0006253690226003528, "logits/chosen": -0.4254477024078369, "logits/rejected": -0.3807203769683838, "logps/chosen": -0.0022061774507164955, "logps/rejected": -1.7364078760147095, "loss": 1.6621, "nll_loss": 0.41545745730400085, "rewards/accuracies": 1.0, "rewards/chosen": -0.00022061774507164955, "rewards/margins": 0.17342017590999603, "rewards/rejected": -0.17364078760147095, "step": 5354 }, { "epoch": 3.703319502074689, "grad_norm": 11.008198738098145, "learning_rate": 3.498155832180729e-05, "log_odds_chosen": 8.941949844360352, "log_odds_ratio": -0.014349130913615227, "logits/chosen": -0.3872235119342804, "logits/rejected": -0.5136542320251465, "logps/chosen": -0.015712972730398178, "logps/rejected": -2.3724722862243652, "loss": 1.7264, "nll_loss": 0.43016284704208374, "rewards/accuracies": 1.0, "rewards/chosen": -0.0015712971799075603, "rewards/margins": 0.23567596077919006, "rewards/rejected": -0.2372472584247589, "step": 5355 }, { "epoch": 3.704011065006916, "grad_norm": 7.446681976318359, "learning_rate": 3.497771630551714e-05, "log_odds_chosen": 9.698359489440918, "log_odds_ratio": -0.0001687395852059126, "logits/chosen": -0.5469300746917725, "logits/rejected": -0.6297659873962402, "logps/chosen": -0.0029810178093612194, "logps/rejected": -1.8616513013839722, "loss": 0.9211, "nll_loss": 0.23024982213974, "rewards/accuracies": 1.0, "rewards/chosen": -0.00029810177511535585, "rewards/margins": 0.185867041349411, "rewards/rejected": -0.18616515398025513, "step": 5356 }, { "epoch": 3.7047026279391426, "grad_norm": 9.188549995422363, "learning_rate": 3.4973874289226985e-05, "log_odds_chosen": 9.125092506408691, "log_odds_ratio": -0.00030094856629148126, "logits/chosen": -0.8525630235671997, "logits/rejected": -0.9092421531677246, "logps/chosen": -0.0007442575879395008, "logps/rejected": -1.5756902694702148, "loss": 1.4686, "nll_loss": 0.3671131432056427, "rewards/accuracies": 1.0, "rewards/chosen": -7.442575588356704e-05, "rewards/margins": 0.15749460458755493, "rewards/rejected": -0.1575690358877182, "step": 5357 }, { "epoch": 3.7053941908713695, "grad_norm": 7.4708051681518555, "learning_rate": 3.497003227293684e-05, "log_odds_chosen": 10.437641143798828, "log_odds_ratio": -0.00015635325689800084, "logits/chosen": -0.5352045297622681, "logits/rejected": -0.5188069939613342, "logps/chosen": -0.0006159612676128745, "logps/rejected": -2.369760274887085, "loss": 1.3847, "nll_loss": 0.34616681933403015, "rewards/accuracies": 1.0, "rewards/chosen": -6.159612530609593e-05, "rewards/margins": 0.23691445589065552, "rewards/rejected": -0.2369760274887085, "step": 5358 }, { "epoch": 3.7060857538035963, "grad_norm": 12.068976402282715, "learning_rate": 3.496619025664669e-05, "log_odds_chosen": 10.090932846069336, "log_odds_ratio": -0.00021271216974128038, "logits/chosen": -0.7529656887054443, "logits/rejected": -0.7240673303604126, "logps/chosen": -0.000268957024673, "logps/rejected": -1.9103636741638184, "loss": 1.8827, "nll_loss": 0.4706517457962036, "rewards/accuracies": 1.0, "rewards/chosen": -2.6895704650087282e-05, "rewards/margins": 0.19100944697856903, "rewards/rejected": -0.19103635847568512, "step": 5359 }, { "epoch": 3.706777316735823, "grad_norm": 6.470218658447266, "learning_rate": 3.496234824035654e-05, "log_odds_chosen": 8.61813735961914, "log_odds_ratio": -0.09870389848947525, "logits/chosen": -0.313279390335083, "logits/rejected": -0.2931751608848572, "logps/chosen": -0.027196036651730537, "logps/rejected": -1.6684269905090332, "loss": 2.0358, "nll_loss": 0.4990912973880768, "rewards/accuracies": 0.875, "rewards/chosen": -0.002719603944569826, "rewards/margins": 0.16412308812141418, "rewards/rejected": -0.16684269905090332, "step": 5360 }, { "epoch": 3.70746887966805, "grad_norm": 6.061364650726318, "learning_rate": 3.495850622406639e-05, "log_odds_chosen": 7.981203079223633, "log_odds_ratio": -0.01701802760362625, "logits/chosen": -0.5911697149276733, "logits/rejected": -0.6583446264266968, "logps/chosen": -0.011462513357400894, "logps/rejected": -1.4020397663116455, "loss": 2.1631, "nll_loss": 0.5390677452087402, "rewards/accuracies": 1.0, "rewards/chosen": -0.0011462513357400894, "rewards/margins": 0.1390577107667923, "rewards/rejected": -0.14020396769046783, "step": 5361 }, { "epoch": 3.7081604426002768, "grad_norm": 6.6160888671875, "learning_rate": 3.495466420777625e-05, "log_odds_chosen": 9.713098526000977, "log_odds_ratio": -0.0001639363035792485, "logits/chosen": -0.7189328670501709, "logits/rejected": -0.7098298072814941, "logps/chosen": -0.004341424442827702, "logps/rejected": -2.150063991546631, "loss": 1.6854, "nll_loss": 0.42134472727775574, "rewards/accuracies": 1.0, "rewards/chosen": -0.00043414239189587533, "rewards/margins": 0.21457228064537048, "rewards/rejected": -0.21500641107559204, "step": 5362 }, { "epoch": 3.7088520055325036, "grad_norm": 7.108826160430908, "learning_rate": 3.495082219148609e-05, "log_odds_chosen": 9.363826751708984, "log_odds_ratio": -0.0003894947003573179, "logits/chosen": -0.708694338798523, "logits/rejected": -0.7024210095405579, "logps/chosen": -0.0006412908551283181, "logps/rejected": -1.5811138153076172, "loss": 1.0492, "nll_loss": 0.2622511088848114, "rewards/accuracies": 1.0, "rewards/chosen": -6.412908987840638e-05, "rewards/margins": 0.15804724395275116, "rewards/rejected": -0.15811137855052948, "step": 5363 }, { "epoch": 3.7095435684647304, "grad_norm": 9.981987953186035, "learning_rate": 3.4946980175195946e-05, "log_odds_chosen": 8.762392044067383, "log_odds_ratio": -0.11463475972414017, "logits/chosen": -0.645453929901123, "logits/rejected": -0.708638608455658, "logps/chosen": -0.017611129209399223, "logps/rejected": -1.5133848190307617, "loss": 1.9759, "nll_loss": 0.4825035035610199, "rewards/accuracies": 0.875, "rewards/chosen": -0.0017611129442229867, "rewards/margins": 0.14957737922668457, "rewards/rejected": -0.15133848786354065, "step": 5364 }, { "epoch": 3.7102351313969573, "grad_norm": 6.762959957122803, "learning_rate": 3.49431381589058e-05, "log_odds_chosen": 8.865896224975586, "log_odds_ratio": -0.0009606637177057564, "logits/chosen": -0.43857601284980774, "logits/rejected": -0.5364105105400085, "logps/chosen": -0.043151963502168655, "logps/rejected": -2.8622305393218994, "loss": 1.1842, "nll_loss": 0.2959616184234619, "rewards/accuracies": 1.0, "rewards/chosen": -0.004315196070820093, "rewards/margins": 0.281907856464386, "rewards/rejected": -0.28622305393218994, "step": 5365 }, { "epoch": 3.710926694329184, "grad_norm": 12.004629135131836, "learning_rate": 3.4939296142615644e-05, "log_odds_chosen": 9.092486381530762, "log_odds_ratio": -0.00038122880505397916, "logits/chosen": -0.6763482093811035, "logits/rejected": -0.7454387545585632, "logps/chosen": -0.016808513551950455, "logps/rejected": -2.034806966781616, "loss": 1.3493, "nll_loss": 0.33729538321495056, "rewards/accuracies": 1.0, "rewards/chosen": -0.001680851331911981, "rewards/margins": 0.20179985463619232, "rewards/rejected": -0.20348069071769714, "step": 5366 }, { "epoch": 3.711618257261411, "grad_norm": 9.979867935180664, "learning_rate": 3.4935454126325496e-05, "log_odds_chosen": 10.370789527893066, "log_odds_ratio": -9.933464752975851e-05, "logits/chosen": -0.6684778332710266, "logits/rejected": -0.6606014966964722, "logps/chosen": -0.00016815456910990179, "logps/rejected": -1.7887946367263794, "loss": 1.6075, "nll_loss": 0.4018716514110565, "rewards/accuracies": 1.0, "rewards/chosen": -1.6815456547192298e-05, "rewards/margins": 0.17886263132095337, "rewards/rejected": -0.17887946963310242, "step": 5367 }, { "epoch": 3.7123098201936378, "grad_norm": 8.801868438720703, "learning_rate": 3.493161211003535e-05, "log_odds_chosen": 9.470043182373047, "log_odds_ratio": -0.0001726085611153394, "logits/chosen": -0.465656578540802, "logits/rejected": -0.5587571263313293, "logps/chosen": -0.000835128128528595, "logps/rejected": -1.672318458557129, "loss": 1.0162, "nll_loss": 0.2540230453014374, "rewards/accuracies": 1.0, "rewards/chosen": -8.351281576324254e-05, "rewards/margins": 0.16714833676815033, "rewards/rejected": -0.16723184287548065, "step": 5368 }, { "epoch": 3.7130013831258646, "grad_norm": 12.182748794555664, "learning_rate": 3.49277700937452e-05, "log_odds_chosen": 8.469917297363281, "log_odds_ratio": -0.06619244813919067, "logits/chosen": -0.7275235056877136, "logits/rejected": -0.7132308483123779, "logps/chosen": -0.03503218665719032, "logps/rejected": -2.0422232151031494, "loss": 1.1944, "nll_loss": 0.29196980595588684, "rewards/accuracies": 1.0, "rewards/chosen": -0.0035032187588512897, "rewards/margins": 0.20071911811828613, "rewards/rejected": -0.20422232151031494, "step": 5369 }, { "epoch": 3.7136929460580914, "grad_norm": 9.088739395141602, "learning_rate": 3.492392807745505e-05, "log_odds_chosen": 9.97968864440918, "log_odds_ratio": -0.00012102635810151696, "logits/chosen": -0.5951199531555176, "logits/rejected": -0.6818069219589233, "logps/chosen": -0.00044668448390439153, "logps/rejected": -1.7563897371292114, "loss": 1.0097, "nll_loss": 0.2524169087409973, "rewards/accuracies": 1.0, "rewards/chosen": -4.466845348360948e-05, "rewards/margins": 0.175594300031662, "rewards/rejected": -0.17563897371292114, "step": 5370 }, { "epoch": 3.7143845089903182, "grad_norm": 15.796573638916016, "learning_rate": 3.4920086061164906e-05, "log_odds_chosen": 9.849954605102539, "log_odds_ratio": -0.00020008234423585236, "logits/chosen": -0.6006171703338623, "logits/rejected": -0.6448779106140137, "logps/chosen": -0.0005239631282165647, "logps/rejected": -1.7191863059997559, "loss": 1.7053, "nll_loss": 0.42631492018699646, "rewards/accuracies": 1.0, "rewards/chosen": -5.239631354925223e-05, "rewards/margins": 0.17186623811721802, "rewards/rejected": -0.17191863059997559, "step": 5371 }, { "epoch": 3.715076071922545, "grad_norm": 11.92275333404541, "learning_rate": 3.491624404487475e-05, "log_odds_chosen": 7.447054862976074, "log_odds_ratio": -0.10066209733486176, "logits/chosen": -0.38511592149734497, "logits/rejected": -0.43172594904899597, "logps/chosen": -0.02251449227333069, "logps/rejected": -1.2307636737823486, "loss": 1.9626, "nll_loss": 0.4805947542190552, "rewards/accuracies": 0.875, "rewards/chosen": -0.0022514492738991976, "rewards/margins": 0.1208249107003212, "rewards/rejected": -0.12307636439800262, "step": 5372 }, { "epoch": 3.715767634854772, "grad_norm": 10.29927921295166, "learning_rate": 3.4912402028584604e-05, "log_odds_chosen": 10.454160690307617, "log_odds_ratio": -6.0720885812770575e-05, "logits/chosen": -0.5991085171699524, "logits/rejected": -0.6778004169464111, "logps/chosen": -0.00020221697923261672, "logps/rejected": -1.793766975402832, "loss": 1.504, "nll_loss": 0.37600409984588623, "rewards/accuracies": 1.0, "rewards/chosen": -2.0221699742251076e-05, "rewards/margins": 0.17935647070407867, "rewards/rejected": -0.17937669157981873, "step": 5373 }, { "epoch": 3.7164591977869987, "grad_norm": 7.523771286010742, "learning_rate": 3.490856001229446e-05, "log_odds_chosen": 10.136287689208984, "log_odds_ratio": -7.101793016772717e-05, "logits/chosen": -0.5835416316986084, "logits/rejected": -0.5638810396194458, "logps/chosen": -0.00041338251321576536, "logps/rejected": -1.9321492910385132, "loss": 2.1528, "nll_loss": 0.5381991267204285, "rewards/accuracies": 1.0, "rewards/chosen": -4.133825495955534e-05, "rewards/margins": 0.1931736171245575, "rewards/rejected": -0.19321493804454803, "step": 5374 }, { "epoch": 3.7171507607192256, "grad_norm": 12.451519966125488, "learning_rate": 3.49047179960043e-05, "log_odds_chosen": 9.121818542480469, "log_odds_ratio": -0.0004346870118752122, "logits/chosen": -0.8031559586524963, "logits/rejected": -0.790696382522583, "logps/chosen": -0.0010023590875789523, "logps/rejected": -1.4638499021530151, "loss": 2.2833, "nll_loss": 0.5707757472991943, "rewards/accuracies": 1.0, "rewards/chosen": -0.00010023592039942741, "rewards/margins": 0.14628475904464722, "rewards/rejected": -0.14638498425483704, "step": 5375 }, { "epoch": 3.7178423236514524, "grad_norm": 20.69891357421875, "learning_rate": 3.4900875979714155e-05, "log_odds_chosen": 9.785394668579102, "log_odds_ratio": -0.09108485281467438, "logits/chosen": -0.42490309476852417, "logits/rejected": -0.6075316667556763, "logps/chosen": -0.015684589743614197, "logps/rejected": -2.362088203430176, "loss": 1.922, "nll_loss": 0.4714014232158661, "rewards/accuracies": 0.875, "rewards/chosen": -0.0015684588579460979, "rewards/margins": 0.23464035987854004, "rewards/rejected": -0.23620882630348206, "step": 5376 }, { "epoch": 3.7185338865836792, "grad_norm": 17.057636260986328, "learning_rate": 3.489703396342401e-05, "log_odds_chosen": 10.41865348815918, "log_odds_ratio": -0.00016036239685490727, "logits/chosen": -0.5188636779785156, "logits/rejected": -0.4948766529560089, "logps/chosen": -0.000365030748071149, "logps/rejected": -2.019139528274536, "loss": 1.8005, "nll_loss": 0.45011138916015625, "rewards/accuracies": 1.0, "rewards/chosen": -3.650307917268947e-05, "rewards/margins": 0.2018774449825287, "rewards/rejected": -0.2019139528274536, "step": 5377 }, { "epoch": 3.719225449515906, "grad_norm": 12.16074275970459, "learning_rate": 3.489319194713386e-05, "log_odds_chosen": 9.693315505981445, "log_odds_ratio": -0.0017601572908461094, "logits/chosen": -0.9097875356674194, "logits/rejected": -0.998991847038269, "logps/chosen": -0.001546016545034945, "logps/rejected": -1.6606801748275757, "loss": 1.3411, "nll_loss": 0.3350999057292938, "rewards/accuracies": 1.0, "rewards/chosen": -0.00015460167196579278, "rewards/margins": 0.1659134328365326, "rewards/rejected": -0.16606801748275757, "step": 5378 }, { "epoch": 3.719917012448133, "grad_norm": 9.151723861694336, "learning_rate": 3.4889349930843705e-05, "log_odds_chosen": 10.32984733581543, "log_odds_ratio": -6.051865784684196e-05, "logits/chosen": -0.2843622863292694, "logits/rejected": -0.37206345796585083, "logps/chosen": -0.00016166864952538162, "logps/rejected": -1.6887463331222534, "loss": 1.1799, "nll_loss": 0.29497507214546204, "rewards/accuracies": 1.0, "rewards/chosen": -1.6166864952538162e-05, "rewards/margins": 0.16885846853256226, "rewards/rejected": -0.1688746213912964, "step": 5379 }, { "epoch": 3.7206085753803597, "grad_norm": 9.702625274658203, "learning_rate": 3.4885507914553565e-05, "log_odds_chosen": 9.072803497314453, "log_odds_ratio": -0.0002321783103980124, "logits/chosen": -0.5725424289703369, "logits/rejected": -0.48636266589164734, "logps/chosen": -0.001193308038637042, "logps/rejected": -1.3661433458328247, "loss": 1.5208, "nll_loss": 0.38018402457237244, "rewards/accuracies": 1.0, "rewards/chosen": -0.00011933079804293811, "rewards/margins": 0.13649500906467438, "rewards/rejected": -0.1366143524646759, "step": 5380 }, { "epoch": 3.7213001383125865, "grad_norm": 15.534612655639648, "learning_rate": 3.488166589826341e-05, "log_odds_chosen": 9.534454345703125, "log_odds_ratio": -0.032325610518455505, "logits/chosen": -0.07890317589044571, "logits/rejected": -0.15461499989032745, "logps/chosen": -0.008514742366969585, "logps/rejected": -2.4597206115722656, "loss": 1.7176, "nll_loss": 0.4261553883552551, "rewards/accuracies": 1.0, "rewards/chosen": -0.0008514742366969585, "rewards/margins": 0.2451205849647522, "rewards/rejected": -0.24597206711769104, "step": 5381 }, { "epoch": 3.7219917012448134, "grad_norm": 11.658743858337402, "learning_rate": 3.487782388197326e-05, "log_odds_chosen": 9.717710494995117, "log_odds_ratio": -0.00044204670120961964, "logits/chosen": -0.6480785012245178, "logits/rejected": -0.7348360419273376, "logps/chosen": -0.00075390818528831, "logps/rejected": -1.9051095247268677, "loss": 1.1266, "nll_loss": 0.2816102206707001, "rewards/accuracies": 1.0, "rewards/chosen": -7.53908243495971e-05, "rewards/margins": 0.19043557345867157, "rewards/rejected": -0.19051097333431244, "step": 5382 }, { "epoch": 3.72268326417704, "grad_norm": 7.192076683044434, "learning_rate": 3.4873981865683115e-05, "log_odds_chosen": 9.726299285888672, "log_odds_ratio": -0.0001063284435076639, "logits/chosen": -0.8140656352043152, "logits/rejected": -0.7850347757339478, "logps/chosen": -0.00866577960550785, "logps/rejected": -1.912880301475525, "loss": 1.1967, "nll_loss": 0.2991747260093689, "rewards/accuracies": 1.0, "rewards/chosen": -0.000866578018758446, "rewards/margins": 0.1904214471578598, "rewards/rejected": -0.191288024187088, "step": 5383 }, { "epoch": 3.723374827109267, "grad_norm": 6.207744121551514, "learning_rate": 3.487013984939296e-05, "log_odds_chosen": 10.239447593688965, "log_odds_ratio": -6.98567891959101e-05, "logits/chosen": -0.6612863540649414, "logits/rejected": -0.700016438961029, "logps/chosen": -0.0005870179738849401, "logps/rejected": -1.7446274757385254, "loss": 0.8839, "nll_loss": 0.22095894813537598, "rewards/accuracies": 1.0, "rewards/chosen": -5.870179666089825e-05, "rewards/margins": 0.1744040548801422, "rewards/rejected": -0.17446276545524597, "step": 5384 }, { "epoch": 3.724066390041494, "grad_norm": 9.887097358703613, "learning_rate": 3.486629783310281e-05, "log_odds_chosen": 9.54232406616211, "log_odds_ratio": -0.00017396220937371254, "logits/chosen": -0.9332183599472046, "logits/rejected": -1.0139302015304565, "logps/chosen": -0.0003737725201062858, "logps/rejected": -1.457379698753357, "loss": 1.9432, "nll_loss": 0.4857736825942993, "rewards/accuracies": 1.0, "rewards/chosen": -3.7377249100245535e-05, "rewards/margins": 0.145700603723526, "rewards/rejected": -0.14573797583580017, "step": 5385 }, { "epoch": 3.7247579529737207, "grad_norm": 7.268083095550537, "learning_rate": 3.4862455816812666e-05, "log_odds_chosen": 8.003231048583984, "log_odds_ratio": -0.13872238993644714, "logits/chosen": -0.6145071387290955, "logits/rejected": -0.6586422920227051, "logps/chosen": -0.0258407574146986, "logps/rejected": -2.195486307144165, "loss": 1.6876, "nll_loss": 0.40801793336868286, "rewards/accuracies": 0.875, "rewards/chosen": -0.0025840753223747015, "rewards/margins": 0.21696454286575317, "rewards/rejected": -0.21954862773418427, "step": 5386 }, { "epoch": 3.7254495159059475, "grad_norm": 8.43387508392334, "learning_rate": 3.485861380052252e-05, "log_odds_chosen": 10.035348892211914, "log_odds_ratio": -5.451100514619611e-05, "logits/chosen": -0.5829866528511047, "logits/rejected": -0.5891355276107788, "logps/chosen": -0.0002491176419425756, "logps/rejected": -1.592644214630127, "loss": 1.5007, "nll_loss": 0.37516021728515625, "rewards/accuracies": 1.0, "rewards/chosen": -2.491176564944908e-05, "rewards/margins": 0.1592395156621933, "rewards/rejected": -0.1592644304037094, "step": 5387 }, { "epoch": 3.7261410788381744, "grad_norm": 9.96875, "learning_rate": 3.4854771784232364e-05, "log_odds_chosen": 9.071399688720703, "log_odds_ratio": -0.00036526485928334296, "logits/chosen": -0.4201893210411072, "logits/rejected": -0.47043901681900024, "logps/chosen": -0.0005718155298382044, "logps/rejected": -1.7792240381240845, "loss": 1.3203, "nll_loss": 0.3300449550151825, "rewards/accuracies": 1.0, "rewards/chosen": -5.7181550801033154e-05, "rewards/margins": 0.17786523699760437, "rewards/rejected": -0.17792241275310516, "step": 5388 }, { "epoch": 3.726832641770401, "grad_norm": 12.416715621948242, "learning_rate": 3.485092976794222e-05, "log_odds_chosen": 10.267997741699219, "log_odds_ratio": -0.00012193172005936503, "logits/chosen": -0.7236143946647644, "logits/rejected": -0.8546754717826843, "logps/chosen": -0.0007985993870534003, "logps/rejected": -2.2986698150634766, "loss": 1.4546, "nll_loss": 0.3636472523212433, "rewards/accuracies": 1.0, "rewards/chosen": -7.985993579495698e-05, "rewards/margins": 0.22978714108467102, "rewards/rejected": -0.22986699640750885, "step": 5389 }, { "epoch": 3.727524204702628, "grad_norm": 17.100522994995117, "learning_rate": 3.484708775165207e-05, "log_odds_chosen": 8.397350311279297, "log_odds_ratio": -0.01968861185014248, "logits/chosen": -0.8118228316307068, "logits/rejected": -0.7890980243682861, "logps/chosen": -0.012317357584834099, "logps/rejected": -2.130256175994873, "loss": 1.9555, "nll_loss": 0.4868970811367035, "rewards/accuracies": 1.0, "rewards/chosen": -0.0012317356886342168, "rewards/margins": 0.21179388463497162, "rewards/rejected": -0.21302561461925507, "step": 5390 }, { "epoch": 3.728215767634855, "grad_norm": 9.5698881149292, "learning_rate": 3.484324573536192e-05, "log_odds_chosen": 8.837024688720703, "log_odds_ratio": -0.010207761079072952, "logits/chosen": -0.5338362455368042, "logits/rejected": -0.6706526279449463, "logps/chosen": -0.010720196180045605, "logps/rejected": -1.4015872478485107, "loss": 1.78, "nll_loss": 0.44397395849227905, "rewards/accuracies": 1.0, "rewards/chosen": -0.0010720195714384317, "rewards/margins": 0.13908669352531433, "rewards/rejected": -0.1401587277650833, "step": 5391 }, { "epoch": 3.7289073305670817, "grad_norm": 10.274774551391602, "learning_rate": 3.4839403719071773e-05, "log_odds_chosen": 9.285009384155273, "log_odds_ratio": -0.0002976319519802928, "logits/chosen": -0.7594529986381531, "logits/rejected": -0.7697383761405945, "logps/chosen": -0.002318364568054676, "logps/rejected": -2.2340967655181885, "loss": 1.7638, "nll_loss": 0.4409220814704895, "rewards/accuracies": 1.0, "rewards/chosen": -0.0002318364568054676, "rewards/margins": 0.22317782044410706, "rewards/rejected": -0.22340966761112213, "step": 5392 }, { "epoch": 3.7295988934993085, "grad_norm": 8.453655242919922, "learning_rate": 3.483556170278162e-05, "log_odds_chosen": 11.143664360046387, "log_odds_ratio": -1.918661655508913e-05, "logits/chosen": -0.7070326805114746, "logits/rejected": -0.7929023504257202, "logps/chosen": -0.0002504032163415104, "logps/rejected": -2.2312471866607666, "loss": 1.3668, "nll_loss": 0.3416998088359833, "rewards/accuracies": 1.0, "rewards/chosen": -2.5040324544534087e-05, "rewards/margins": 0.22309967875480652, "rewards/rejected": -0.22312471270561218, "step": 5393 }, { "epoch": 3.7302904564315353, "grad_norm": 12.340773582458496, "learning_rate": 3.483171968649147e-05, "log_odds_chosen": 8.7693510055542, "log_odds_ratio": -0.0003056778514292091, "logits/chosen": -0.8789324760437012, "logits/rejected": -0.9095242023468018, "logps/chosen": -0.0030350149609148502, "logps/rejected": -1.8134474754333496, "loss": 1.3986, "nll_loss": 0.3496093153953552, "rewards/accuracies": 1.0, "rewards/chosen": -0.000303501496091485, "rewards/margins": 0.18104124069213867, "rewards/rejected": -0.18134474754333496, "step": 5394 }, { "epoch": 3.730982019363762, "grad_norm": 6.907254219055176, "learning_rate": 3.4827877670201324e-05, "log_odds_chosen": 8.385276794433594, "log_odds_ratio": -0.0015755126951262355, "logits/chosen": -0.6956421136856079, "logits/rejected": -0.7536182403564453, "logps/chosen": -0.00522532919421792, "logps/rejected": -1.7120745182037354, "loss": 1.9993, "nll_loss": 0.49965721368789673, "rewards/accuracies": 1.0, "rewards/chosen": -0.0005225329659879208, "rewards/margins": 0.17068493366241455, "rewards/rejected": -0.1712074726819992, "step": 5395 }, { "epoch": 3.731673582295989, "grad_norm": 9.952130317687988, "learning_rate": 3.4824035653911176e-05, "log_odds_chosen": 8.023850440979004, "log_odds_ratio": -0.014953254722058773, "logits/chosen": -0.6111462116241455, "logits/rejected": -0.6585705280303955, "logps/chosen": -0.02512936294078827, "logps/rejected": -1.6922228336334229, "loss": 1.7928, "nll_loss": 0.4467039108276367, "rewards/accuracies": 1.0, "rewards/chosen": -0.002512936247512698, "rewards/margins": 0.16670936346054077, "rewards/rejected": -0.16922229528427124, "step": 5396 }, { "epoch": 3.732365145228216, "grad_norm": 13.846830368041992, "learning_rate": 3.482019363762102e-05, "log_odds_chosen": 9.7283935546875, "log_odds_ratio": -0.00023480159870814532, "logits/chosen": 0.01851162314414978, "logits/rejected": -0.06452546268701553, "logps/chosen": -0.0014925599098205566, "logps/rejected": -2.2084391117095947, "loss": 1.1774, "nll_loss": 0.29433268308639526, "rewards/accuracies": 1.0, "rewards/chosen": -0.00014925599680282176, "rewards/margins": 0.2206946760416031, "rewards/rejected": -0.22084392607212067, "step": 5397 }, { "epoch": 3.7330567081604427, "grad_norm": 7.317983627319336, "learning_rate": 3.481635162133088e-05, "log_odds_chosen": 7.966273784637451, "log_odds_ratio": -0.06842464208602905, "logits/chosen": -0.4417526125907898, "logits/rejected": -0.4552074670791626, "logps/chosen": -0.022284694015979767, "logps/rejected": -1.8117376565933228, "loss": 1.4527, "nll_loss": 0.3563276529312134, "rewards/accuracies": 1.0, "rewards/chosen": -0.0022284695878624916, "rewards/margins": 0.17894530296325684, "rewards/rejected": -0.18117377161979675, "step": 5398 }, { "epoch": 3.7337482710926695, "grad_norm": 36.7801513671875, "learning_rate": 3.481250960504073e-05, "log_odds_chosen": 7.152010440826416, "log_odds_ratio": -0.19985008239746094, "logits/chosen": -0.4221605360507965, "logits/rejected": -0.4445953071117401, "logps/chosen": -0.035872478038072586, "logps/rejected": -1.4452604055404663, "loss": 2.1037, "nll_loss": 0.5059496760368347, "rewards/accuracies": 0.875, "rewards/chosen": -0.003587248269468546, "rewards/margins": 0.14093877375125885, "rewards/rejected": -0.14452604949474335, "step": 5399 }, { "epoch": 3.7344398340248963, "grad_norm": 99.5403823852539, "learning_rate": 3.480866758875058e-05, "log_odds_chosen": 7.870386600494385, "log_odds_ratio": -0.4392143189907074, "logits/chosen": -0.729932427406311, "logits/rejected": -0.7658141255378723, "logps/chosen": -0.21164977550506592, "logps/rejected": -1.6368392705917358, "loss": 3.0325, "nll_loss": 0.7142078280448914, "rewards/accuracies": 0.875, "rewards/chosen": -0.02116497792303562, "rewards/margins": 0.14251896739006042, "rewards/rejected": -0.1636839210987091, "step": 5400 }, { "epoch": 3.735131396957123, "grad_norm": 9.862106323242188, "learning_rate": 3.480482557246043e-05, "log_odds_chosen": 8.045354843139648, "log_odds_ratio": -0.003918115980923176, "logits/chosen": -0.2770964503288269, "logits/rejected": -0.3260282874107361, "logps/chosen": -0.017516067251563072, "logps/rejected": -1.9411693811416626, "loss": 1.4297, "nll_loss": 0.35704267024993896, "rewards/accuracies": 1.0, "rewards/chosen": -0.001751606585457921, "rewards/margins": 0.1923653483390808, "rewards/rejected": -0.19411695003509521, "step": 5401 }, { "epoch": 3.73582295988935, "grad_norm": 10.115386962890625, "learning_rate": 3.480098355617028e-05, "log_odds_chosen": 8.4605712890625, "log_odds_ratio": -0.003765811212360859, "logits/chosen": -0.3988216519355774, "logits/rejected": -0.5181287527084351, "logps/chosen": -0.03485646843910217, "logps/rejected": -1.7947022914886475, "loss": 1.8983, "nll_loss": 0.47420254349708557, "rewards/accuracies": 1.0, "rewards/chosen": -0.0034856467973440886, "rewards/margins": 0.17598459124565125, "rewards/rejected": -0.1794702410697937, "step": 5402 }, { "epoch": 3.736514522821577, "grad_norm": 13.618650436401367, "learning_rate": 3.479714153988013e-05, "log_odds_chosen": 9.20181655883789, "log_odds_ratio": -0.0016848170198500156, "logits/chosen": -0.4004411995410919, "logits/rejected": -0.3985813856124878, "logps/chosen": -0.002053108997642994, "logps/rejected": -1.9189532995224, "loss": 1.612, "nll_loss": 0.40284085273742676, "rewards/accuracies": 1.0, "rewards/chosen": -0.00020531090558506548, "rewards/margins": 0.19169002771377563, "rewards/rejected": -0.19189533591270447, "step": 5403 }, { "epoch": 3.7372060857538036, "grad_norm": 7.179841995239258, "learning_rate": 3.479329952358998e-05, "log_odds_chosen": 10.005006790161133, "log_odds_ratio": -0.00012902371236123145, "logits/chosen": -0.29995405673980713, "logits/rejected": -0.4108564257621765, "logps/chosen": -0.008333981037139893, "logps/rejected": -2.2930901050567627, "loss": 1.4825, "nll_loss": 0.3706183135509491, "rewards/accuracies": 1.0, "rewards/chosen": -0.0008333979640156031, "rewards/margins": 0.2284756302833557, "rewards/rejected": -0.22930902242660522, "step": 5404 }, { "epoch": 3.7378976486860305, "grad_norm": 13.740462303161621, "learning_rate": 3.4789457507299835e-05, "log_odds_chosen": 8.431564331054688, "log_odds_ratio": -0.007452045567333698, "logits/chosen": -0.6105407476425171, "logits/rejected": -0.6141482591629028, "logps/chosen": -0.05373113974928856, "logps/rejected": -2.1418652534484863, "loss": 1.5127, "nll_loss": 0.3774263858795166, "rewards/accuracies": 1.0, "rewards/chosen": -0.005373113788664341, "rewards/margins": 0.20881341397762299, "rewards/rejected": -0.21418653428554535, "step": 5405 }, { "epoch": 3.7385892116182573, "grad_norm": 7.859427452087402, "learning_rate": 3.478561549100968e-05, "log_odds_chosen": 9.265692710876465, "log_odds_ratio": -0.0010991651797667146, "logits/chosen": -0.33521929383277893, "logits/rejected": -0.29185032844543457, "logps/chosen": -0.004679600242525339, "logps/rejected": -2.2458150386810303, "loss": 1.8115, "nll_loss": 0.4527547061443329, "rewards/accuracies": 1.0, "rewards/chosen": -0.0004679600824601948, "rewards/margins": 0.22411353886127472, "rewards/rejected": -0.2245815098285675, "step": 5406 }, { "epoch": 3.739280774550484, "grad_norm": 8.53417682647705, "learning_rate": 3.478177347471954e-05, "log_odds_chosen": 8.067197799682617, "log_odds_ratio": -0.033554911613464355, "logits/chosen": -0.1194586306810379, "logits/rejected": -0.1688614785671234, "logps/chosen": -0.011145330965518951, "logps/rejected": -1.4050594568252563, "loss": 1.7933, "nll_loss": 0.4449673295021057, "rewards/accuracies": 1.0, "rewards/chosen": -0.0011145330499857664, "rewards/margins": 0.13939140737056732, "rewards/rejected": -0.14050595462322235, "step": 5407 }, { "epoch": 3.739972337482711, "grad_norm": 8.623878479003906, "learning_rate": 3.4777931458429385e-05, "log_odds_chosen": 9.07345199584961, "log_odds_ratio": -0.0004437947063706815, "logits/chosen": -0.42635229229927063, "logits/rejected": -0.4366256892681122, "logps/chosen": -0.00036963215097784996, "logps/rejected": -1.2568538188934326, "loss": 1.5811, "nll_loss": 0.39522841572761536, "rewards/accuracies": 1.0, "rewards/chosen": -3.696321800816804e-05, "rewards/margins": 0.1256484091281891, "rewards/rejected": -0.12568537890911102, "step": 5408 }, { "epoch": 3.740663900414938, "grad_norm": 8.635771751403809, "learning_rate": 3.477408944213924e-05, "log_odds_chosen": 11.017953872680664, "log_odds_ratio": -8.565557072870433e-05, "logits/chosen": -0.06105683743953705, "logits/rejected": -0.16102895140647888, "logps/chosen": -0.00024903842131607234, "logps/rejected": -2.231354236602783, "loss": 1.3598, "nll_loss": 0.3399292826652527, "rewards/accuracies": 1.0, "rewards/chosen": -2.4903842131607234e-05, "rewards/margins": 0.22311052680015564, "rewards/rejected": -0.22313544154167175, "step": 5409 }, { "epoch": 3.7413554633471646, "grad_norm": 5.872206687927246, "learning_rate": 3.477024742584909e-05, "log_odds_chosen": 8.316761016845703, "log_odds_ratio": -0.001023442717269063, "logits/chosen": -0.6279017925262451, "logits/rejected": -0.646528959274292, "logps/chosen": -0.013748231343925, "logps/rejected": -1.9299241304397583, "loss": 2.7098, "nll_loss": 0.6773370504379272, "rewards/accuracies": 1.0, "rewards/chosen": -0.0013748230412602425, "rewards/margins": 0.19161759316921234, "rewards/rejected": -0.1929924190044403, "step": 5410 }, { "epoch": 3.7420470262793915, "grad_norm": 7.386795520782471, "learning_rate": 3.4766405409558936e-05, "log_odds_chosen": 8.211042404174805, "log_odds_ratio": -0.03969957306981087, "logits/chosen": -0.6178238391876221, "logits/rejected": -0.627917468547821, "logps/chosen": -0.01131765078753233, "logps/rejected": -1.3942575454711914, "loss": 1.5187, "nll_loss": 0.37571465969085693, "rewards/accuracies": 1.0, "rewards/chosen": -0.0011317649623379111, "rewards/margins": 0.13829399645328522, "rewards/rejected": -0.13942575454711914, "step": 5411 }, { "epoch": 3.7427385892116183, "grad_norm": 6.971358776092529, "learning_rate": 3.476256339326879e-05, "log_odds_chosen": 8.650684356689453, "log_odds_ratio": -0.0025012255646288395, "logits/chosen": -0.31345134973526, "logits/rejected": -0.42115840315818787, "logps/chosen": -0.01780068688094616, "logps/rejected": -2.3259847164154053, "loss": 1.1501, "nll_loss": 0.2872798442840576, "rewards/accuracies": 1.0, "rewards/chosen": -0.0017800686182454228, "rewards/margins": 0.23081842064857483, "rewards/rejected": -0.23259848356246948, "step": 5412 }, { "epoch": 3.743430152143845, "grad_norm": 13.72053337097168, "learning_rate": 3.475872137697864e-05, "log_odds_chosen": 10.826781272888184, "log_odds_ratio": -3.6744382668985054e-05, "logits/chosen": -0.48712050914764404, "logits/rejected": -0.5958147644996643, "logps/chosen": -0.00024669343838468194, "logps/rejected": -2.1021265983581543, "loss": 1.0914, "nll_loss": 0.2728390097618103, "rewards/accuracies": 1.0, "rewards/chosen": -2.466934165568091e-05, "rewards/margins": 0.21018798649311066, "rewards/rejected": -0.21021266281604767, "step": 5413 }, { "epoch": 3.744121715076072, "grad_norm": 14.538620948791504, "learning_rate": 3.475487936068849e-05, "log_odds_chosen": 11.273270606994629, "log_odds_ratio": -1.5496178093599156e-05, "logits/chosen": -0.9595328569412231, "logits/rejected": -0.9811475276947021, "logps/chosen": -0.0001429565018042922, "logps/rejected": -2.308250904083252, "loss": 1.767, "nll_loss": 0.4417545795440674, "rewards/accuracies": 1.0, "rewards/chosen": -1.4295650544227101e-05, "rewards/margins": 0.23081077635288239, "rewards/rejected": -0.23082508146762848, "step": 5414 }, { "epoch": 3.7448132780082988, "grad_norm": 10.922853469848633, "learning_rate": 3.475103734439834e-05, "log_odds_chosen": 10.66468620300293, "log_odds_ratio": -8.033808262553066e-05, "logits/chosen": -0.4718412756919861, "logits/rejected": -0.49102020263671875, "logps/chosen": -0.0015652257716283202, "logps/rejected": -2.2271728515625, "loss": 1.4328, "nll_loss": 0.3581867218017578, "rewards/accuracies": 1.0, "rewards/chosen": -0.0001565225829835981, "rewards/margins": 0.22256073355674744, "rewards/rejected": -0.2227172553539276, "step": 5415 }, { "epoch": 3.7455048409405256, "grad_norm": 8.63065242767334, "learning_rate": 3.47471953281082e-05, "log_odds_chosen": 9.848794937133789, "log_odds_ratio": -0.00011888021253980696, "logits/chosen": -0.44257479906082153, "logits/rejected": -0.5804580450057983, "logps/chosen": -0.0003653892199508846, "logps/rejected": -1.6945195198059082, "loss": 1.2216, "nll_loss": 0.30539870262145996, "rewards/accuracies": 1.0, "rewards/chosen": -3.653892417787574e-05, "rewards/margins": 0.1694154143333435, "rewards/rejected": -0.16945196688175201, "step": 5416 }, { "epoch": 3.7461964038727524, "grad_norm": 12.192042350769043, "learning_rate": 3.4743353311818044e-05, "log_odds_chosen": 8.56835651397705, "log_odds_ratio": -0.14475424587726593, "logits/chosen": -0.682823657989502, "logits/rejected": -0.7653946876525879, "logps/chosen": -0.021642275154590607, "logps/rejected": -1.8503457307815552, "loss": 1.6474, "nll_loss": 0.39738529920578003, "rewards/accuracies": 0.875, "rewards/chosen": -0.002164227655157447, "rewards/margins": 0.18287035822868347, "rewards/rejected": -0.1850345879793167, "step": 5417 }, { "epoch": 3.7468879668049793, "grad_norm": 6.8051371574401855, "learning_rate": 3.4739511295527896e-05, "log_odds_chosen": 9.913517951965332, "log_odds_ratio": -6.164831575006247e-05, "logits/chosen": -0.4106307625770569, "logits/rejected": -0.4225896894931793, "logps/chosen": -0.0002594468533061445, "logps/rejected": -1.63685941696167, "loss": 1.0339, "nll_loss": 0.25846049189567566, "rewards/accuracies": 1.0, "rewards/chosen": -2.594468605821021e-05, "rewards/margins": 0.1636599898338318, "rewards/rejected": -0.16368593275547028, "step": 5418 }, { "epoch": 3.747579529737206, "grad_norm": 11.767422676086426, "learning_rate": 3.473566927923775e-05, "log_odds_chosen": 8.98084831237793, "log_odds_ratio": -0.0005303403595462441, "logits/chosen": -0.43547505140304565, "logits/rejected": -0.4607890546321869, "logps/chosen": -0.010340893641114235, "logps/rejected": -2.9152090549468994, "loss": 1.7148, "nll_loss": 0.42864322662353516, "rewards/accuracies": 1.0, "rewards/chosen": -0.0010340893641114235, "rewards/margins": 0.29048681259155273, "rewards/rejected": -0.291520893573761, "step": 5419 }, { "epoch": 3.748271092669433, "grad_norm": 8.567863464355469, "learning_rate": 3.4731827262947594e-05, "log_odds_chosen": 8.407416343688965, "log_odds_ratio": -0.03393007814884186, "logits/chosen": -0.49492907524108887, "logits/rejected": -0.519908607006073, "logps/chosen": -0.017692282795906067, "logps/rejected": -1.5308305025100708, "loss": 1.185, "nll_loss": 0.29286208748817444, "rewards/accuracies": 1.0, "rewards/chosen": -0.0017692282563075423, "rewards/margins": 0.15131384134292603, "rewards/rejected": -0.15308305621147156, "step": 5420 }, { "epoch": 3.7489626556016598, "grad_norm": 11.476261138916016, "learning_rate": 3.472798524665745e-05, "log_odds_chosen": 7.869997978210449, "log_odds_ratio": -0.012655356898903847, "logits/chosen": -0.370304673910141, "logits/rejected": -0.3986484408378601, "logps/chosen": -0.009240656159818172, "logps/rejected": -1.540095567703247, "loss": 1.1696, "nll_loss": 0.29112428426742554, "rewards/accuracies": 1.0, "rewards/chosen": -0.0009240655926987529, "rewards/margins": 0.15308550000190735, "rewards/rejected": -0.15400955080986023, "step": 5421 }, { "epoch": 3.7496542185338866, "grad_norm": 14.321914672851562, "learning_rate": 3.47241432303673e-05, "log_odds_chosen": 9.759228706359863, "log_odds_ratio": -0.009780412539839745, "logits/chosen": -0.6264888048171997, "logits/rejected": -0.6955153942108154, "logps/chosen": -0.051967553794384, "logps/rejected": -2.3276937007904053, "loss": 1.6218, "nll_loss": 0.4044795036315918, "rewards/accuracies": 1.0, "rewards/chosen": -0.005196755286306143, "rewards/margins": 0.22757261991500854, "rewards/rejected": -0.23276937007904053, "step": 5422 }, { "epoch": 3.7503457814661134, "grad_norm": 7.869983196258545, "learning_rate": 3.472030121407715e-05, "log_odds_chosen": 9.434892654418945, "log_odds_ratio": -0.0005775390309281647, "logits/chosen": -0.6662434339523315, "logits/rejected": -0.6797504425048828, "logps/chosen": -0.02035946026444435, "logps/rejected": -2.3536477088928223, "loss": 1.3647, "nll_loss": 0.3411150574684143, "rewards/accuracies": 1.0, "rewards/chosen": -0.002035945886746049, "rewards/margins": 0.23332881927490234, "rewards/rejected": -0.23536476492881775, "step": 5423 }, { "epoch": 3.7510373443983402, "grad_norm": 8.915014266967773, "learning_rate": 3.4716459197787e-05, "log_odds_chosen": 9.686923027038574, "log_odds_ratio": -0.00028954161098226905, "logits/chosen": -0.5250073671340942, "logits/rejected": -0.5078399181365967, "logps/chosen": -0.005831631366163492, "logps/rejected": -1.9717814922332764, "loss": 1.784, "nll_loss": 0.4459819495677948, "rewards/accuracies": 1.0, "rewards/chosen": -0.0005831630551256239, "rewards/margins": 0.1965949833393097, "rewards/rejected": -0.19717815518379211, "step": 5424 }, { "epoch": 3.751728907330567, "grad_norm": 10.456550598144531, "learning_rate": 3.471261718149686e-05, "log_odds_chosen": 8.61825942993164, "log_odds_ratio": -0.000583041284698993, "logits/chosen": -0.5446162819862366, "logits/rejected": -0.5490441918373108, "logps/chosen": -0.0019385780906304717, "logps/rejected": -1.8861501216888428, "loss": 1.7346, "nll_loss": 0.4335922598838806, "rewards/accuracies": 1.0, "rewards/chosen": -0.00019385780615266412, "rewards/margins": 0.18842115998268127, "rewards/rejected": -0.18861502408981323, "step": 5425 }, { "epoch": 3.752420470262794, "grad_norm": 7.999762535095215, "learning_rate": 3.47087751652067e-05, "log_odds_chosen": 9.54379653930664, "log_odds_ratio": -0.0001644420699449256, "logits/chosen": -0.29200607538223267, "logits/rejected": -0.3707253336906433, "logps/chosen": -0.0003151461132802069, "logps/rejected": -1.3649253845214844, "loss": 1.1779, "nll_loss": 0.29446306824684143, "rewards/accuracies": 1.0, "rewards/chosen": -3.151460987282917e-05, "rewards/margins": 0.1364610344171524, "rewards/rejected": -0.1364925503730774, "step": 5426 }, { "epoch": 3.7531120331950207, "grad_norm": 10.632295608520508, "learning_rate": 3.4704933148916555e-05, "log_odds_chosen": 8.853446006774902, "log_odds_ratio": -0.018114643171429634, "logits/chosen": -0.7446925640106201, "logits/rejected": -0.7750409841537476, "logps/chosen": -0.01745034009218216, "logps/rejected": -2.154756784439087, "loss": 2.0903, "nll_loss": 0.5207608342170715, "rewards/accuracies": 1.0, "rewards/chosen": -0.0017450341256335378, "rewards/margins": 0.21373064815998077, "rewards/rejected": -0.2154756784439087, "step": 5427 }, { "epoch": 3.7538035961272476, "grad_norm": 17.957927703857422, "learning_rate": 3.470109113262641e-05, "log_odds_chosen": 10.859111785888672, "log_odds_ratio": -5.457565202959813e-05, "logits/chosen": -0.37744563817977905, "logits/rejected": -0.5010693073272705, "logps/chosen": -0.00040545733645558357, "logps/rejected": -2.7265172004699707, "loss": 1.4821, "nll_loss": 0.37053102254867554, "rewards/accuracies": 1.0, "rewards/chosen": -4.054573582834564e-05, "rewards/margins": 0.27261117100715637, "rewards/rejected": -0.27265170216560364, "step": 5428 }, { "epoch": 3.7544951590594744, "grad_norm": 14.420669555664062, "learning_rate": 3.469724911633625e-05, "log_odds_chosen": 9.603649139404297, "log_odds_ratio": -0.0013310567010194063, "logits/chosen": -0.5402454733848572, "logits/rejected": -0.6616002321243286, "logps/chosen": -0.002875036559998989, "logps/rejected": -1.7499427795410156, "loss": 1.7188, "nll_loss": 0.429568886756897, "rewards/accuracies": 1.0, "rewards/chosen": -0.00028750361525453627, "rewards/margins": 0.17470680177211761, "rewards/rejected": -0.17499428987503052, "step": 5429 }, { "epoch": 3.7551867219917012, "grad_norm": 5.218062400817871, "learning_rate": 3.4693407100046105e-05, "log_odds_chosen": 7.883607387542725, "log_odds_ratio": -0.005045240744948387, "logits/chosen": -0.5111541748046875, "logits/rejected": -0.5074277520179749, "logps/chosen": -0.008123712614178658, "logps/rejected": -1.525122880935669, "loss": 1.946, "nll_loss": 0.4859926700592041, "rewards/accuracies": 1.0, "rewards/chosen": -0.0008123713196255267, "rewards/margins": 0.15169993042945862, "rewards/rejected": -0.1525123119354248, "step": 5430 }, { "epoch": 3.755878284923928, "grad_norm": 22.380603790283203, "learning_rate": 3.468956508375596e-05, "log_odds_chosen": 8.503776550292969, "log_odds_ratio": -0.004393002949655056, "logits/chosen": -0.6649891138076782, "logits/rejected": -0.6843799948692322, "logps/chosen": -0.024574745446443558, "logps/rejected": -1.8706204891204834, "loss": 1.8433, "nll_loss": 0.46039655804634094, "rewards/accuracies": 1.0, "rewards/chosen": -0.002457474824041128, "rewards/margins": 0.18460458517074585, "rewards/rejected": -0.18706205487251282, "step": 5431 }, { "epoch": 3.756569847856155, "grad_norm": 14.127603530883789, "learning_rate": 3.468572306746581e-05, "log_odds_chosen": 10.379104614257812, "log_odds_ratio": -6.107140507083386e-05, "logits/chosen": -0.7364578247070312, "logits/rejected": -0.7871562838554382, "logps/chosen": -0.00017190205107908696, "logps/rejected": -1.7768229246139526, "loss": 1.4118, "nll_loss": 0.3529343605041504, "rewards/accuracies": 1.0, "rewards/chosen": -1.719020656310022e-05, "rewards/margins": 0.1776650995016098, "rewards/rejected": -0.1776822805404663, "step": 5432 }, { "epoch": 3.7572614107883817, "grad_norm": 9.674400329589844, "learning_rate": 3.4681881051175656e-05, "log_odds_chosen": 8.356182098388672, "log_odds_ratio": -0.029486514627933502, "logits/chosen": -0.4162288010120392, "logits/rejected": -0.4873002767562866, "logps/chosen": -0.007492970675230026, "logps/rejected": -1.1829661130905151, "loss": 1.1816, "nll_loss": 0.29244640469551086, "rewards/accuracies": 1.0, "rewards/chosen": -0.0007492971490137279, "rewards/margins": 0.11754731833934784, "rewards/rejected": -0.11829661577939987, "step": 5433 }, { "epoch": 3.7579529737206085, "grad_norm": 9.0473051071167, "learning_rate": 3.4678039034885515e-05, "log_odds_chosen": 8.62293815612793, "log_odds_ratio": -0.0006495437119156122, "logits/chosen": -0.8198421597480774, "logits/rejected": -0.8905788660049438, "logps/chosen": -0.004521318711340427, "logps/rejected": -1.5219128131866455, "loss": 1.6793, "nll_loss": 0.419758677482605, "rewards/accuracies": 1.0, "rewards/chosen": -0.00045213187695480883, "rewards/margins": 0.15173915028572083, "rewards/rejected": -0.15219128131866455, "step": 5434 }, { "epoch": 3.7586445366528354, "grad_norm": 7.616481781005859, "learning_rate": 3.467419701859536e-05, "log_odds_chosen": 9.509088516235352, "log_odds_ratio": -0.000644803571049124, "logits/chosen": -0.39439302682876587, "logits/rejected": -0.45953693985939026, "logps/chosen": -0.0015192057471722364, "logps/rejected": -2.0177860260009766, "loss": 1.2328, "nll_loss": 0.3081299960613251, "rewards/accuracies": 1.0, "rewards/chosen": -0.00015192058344837278, "rewards/margins": 0.20162668824195862, "rewards/rejected": -0.2017786204814911, "step": 5435 }, { "epoch": 3.759336099585062, "grad_norm": 9.757256507873535, "learning_rate": 3.467035500230521e-05, "log_odds_chosen": 8.616477966308594, "log_odds_ratio": -0.0004357987781986594, "logits/chosen": -0.0953388512134552, "logits/rejected": -0.15484541654586792, "logps/chosen": -0.0006469248910434544, "logps/rejected": -1.1625604629516602, "loss": 1.7917, "nll_loss": 0.4478727877140045, "rewards/accuracies": 1.0, "rewards/chosen": -6.469249638030306e-05, "rewards/margins": 0.11619135737419128, "rewards/rejected": -0.11625605821609497, "step": 5436 }, { "epoch": 3.760027662517289, "grad_norm": 8.751032829284668, "learning_rate": 3.4666512986015066e-05, "log_odds_chosen": 9.165270805358887, "log_odds_ratio": -0.00036068703047931194, "logits/chosen": -0.7682297229766846, "logits/rejected": -0.8347011804580688, "logps/chosen": -0.008912745863199234, "logps/rejected": -2.608675479888916, "loss": 2.0795, "nll_loss": 0.5198372006416321, "rewards/accuracies": 1.0, "rewards/chosen": -0.0008912745979614556, "rewards/margins": 0.25997626781463623, "rewards/rejected": -0.26086753606796265, "step": 5437 }, { "epoch": 3.760719225449516, "grad_norm": 9.613621711730957, "learning_rate": 3.466267096972491e-05, "log_odds_chosen": 8.684236526489258, "log_odds_ratio": -0.08695728331804276, "logits/chosen": -0.3187865614891052, "logits/rejected": -0.3907451629638672, "logps/chosen": -0.019023144617676735, "logps/rejected": -1.8880045413970947, "loss": 1.1915, "nll_loss": 0.2891791760921478, "rewards/accuracies": 0.875, "rewards/chosen": -0.0019023144850507379, "rewards/margins": 0.1868981420993805, "rewards/rejected": -0.18880045413970947, "step": 5438 }, { "epoch": 3.7614107883817427, "grad_norm": 10.468111991882324, "learning_rate": 3.4658828953434764e-05, "log_odds_chosen": 7.793971538543701, "log_odds_ratio": -0.04423436149954796, "logits/chosen": -0.4522883892059326, "logits/rejected": -0.37912601232528687, "logps/chosen": -0.013493603095412254, "logps/rejected": -2.09865140914917, "loss": 1.5326, "nll_loss": 0.3787163496017456, "rewards/accuracies": 1.0, "rewards/chosen": -0.0013493604492396116, "rewards/margins": 0.20851579308509827, "rewards/rejected": -0.20986516773700714, "step": 5439 }, { "epoch": 3.7621023513139695, "grad_norm": 6.3440141677856445, "learning_rate": 3.4654986937144616e-05, "log_odds_chosen": 9.558298110961914, "log_odds_ratio": -0.0001284776080865413, "logits/chosen": -0.2267589271068573, "logits/rejected": -0.3366238474845886, "logps/chosen": -0.00021380206453613937, "logps/rejected": -1.3035376071929932, "loss": 1.1763, "nll_loss": 0.2940501868724823, "rewards/accuracies": 1.0, "rewards/chosen": -2.1380204998422414e-05, "rewards/margins": 0.13033238053321838, "rewards/rejected": -0.13035376369953156, "step": 5440 }, { "epoch": 3.7627939142461964, "grad_norm": 7.013885974884033, "learning_rate": 3.465114492085447e-05, "log_odds_chosen": 9.993585586547852, "log_odds_ratio": -0.00019201546092517674, "logits/chosen": -0.19097547233104706, "logits/rejected": -0.18724490702152252, "logps/chosen": -0.0007030193228274584, "logps/rejected": -2.3049535751342773, "loss": 1.4744, "nll_loss": 0.36857688426971436, "rewards/accuracies": 1.0, "rewards/chosen": -7.030193955870345e-05, "rewards/margins": 0.23042505979537964, "rewards/rejected": -0.23049534857273102, "step": 5441 }, { "epoch": 3.763485477178423, "grad_norm": 9.381342887878418, "learning_rate": 3.4647302904564314e-05, "log_odds_chosen": 10.290952682495117, "log_odds_ratio": -0.0002553090744186193, "logits/chosen": -0.35363638401031494, "logits/rejected": -0.38287413120269775, "logps/chosen": -0.0024143143091350794, "logps/rejected": -1.7505064010620117, "loss": 1.1547, "nll_loss": 0.2886606752872467, "rewards/accuracies": 1.0, "rewards/chosen": -0.00024143143673427403, "rewards/margins": 0.17480921745300293, "rewards/rejected": -0.17505064606666565, "step": 5442 }, { "epoch": 3.76417704011065, "grad_norm": 8.822539329528809, "learning_rate": 3.4643460888274174e-05, "log_odds_chosen": 9.295948028564453, "log_odds_ratio": -0.001861661090515554, "logits/chosen": -0.46477746963500977, "logits/rejected": -0.4023154377937317, "logps/chosen": -0.0046881274320185184, "logps/rejected": -1.7715680599212646, "loss": 1.3101, "nll_loss": 0.32735079526901245, "rewards/accuracies": 1.0, "rewards/chosen": -0.00046881273738108575, "rewards/margins": 0.17668798565864563, "rewards/rejected": -0.17715679109096527, "step": 5443 }, { "epoch": 3.764868603042877, "grad_norm": 11.759678840637207, "learning_rate": 3.463961887198402e-05, "log_odds_chosen": 7.539575099945068, "log_odds_ratio": -0.03255218267440796, "logits/chosen": -0.3159242570400238, "logits/rejected": -0.3614596426486969, "logps/chosen": -0.013484388589859009, "logps/rejected": -1.5943424701690674, "loss": 2.2776, "nll_loss": 0.5661398768424988, "rewards/accuracies": 1.0, "rewards/chosen": -0.0013484389055520296, "rewards/margins": 0.15808580815792084, "rewards/rejected": -0.1594342440366745, "step": 5444 }, { "epoch": 3.7655601659751037, "grad_norm": 10.029745101928711, "learning_rate": 3.463577685569387e-05, "log_odds_chosen": 8.731345176696777, "log_odds_ratio": -0.0019150073640048504, "logits/chosen": -0.4098225235939026, "logits/rejected": -0.4700325131416321, "logps/chosen": -0.00333023676648736, "logps/rejected": -1.3874382972717285, "loss": 1.688, "nll_loss": 0.4218185544013977, "rewards/accuracies": 1.0, "rewards/chosen": -0.0003330236650072038, "rewards/margins": 0.13841082155704498, "rewards/rejected": -0.13874384760856628, "step": 5445 }, { "epoch": 3.7662517289073305, "grad_norm": 4.650477886199951, "learning_rate": 3.463193483940372e-05, "log_odds_chosen": 8.619306564331055, "log_odds_ratio": -0.010178805328905582, "logits/chosen": -0.28032341599464417, "logits/rejected": -0.30539238452911377, "logps/chosen": -0.012560890056192875, "logps/rejected": -1.3080579042434692, "loss": 1.2625, "nll_loss": 0.3146149814128876, "rewards/accuracies": 1.0, "rewards/chosen": -0.0012560889590531588, "rewards/margins": 0.12954971194267273, "rewards/rejected": -0.13080579042434692, "step": 5446 }, { "epoch": 3.7669432918395573, "grad_norm": 10.749146461486816, "learning_rate": 3.462809282311357e-05, "log_odds_chosen": 10.158448219299316, "log_odds_ratio": -8.47989649628289e-05, "logits/chosen": -0.5772304534912109, "logits/rejected": -0.7337908744812012, "logps/chosen": -0.0004312293021939695, "logps/rejected": -1.7817490100860596, "loss": 1.0402, "nll_loss": 0.26004502177238464, "rewards/accuracies": 1.0, "rewards/chosen": -4.312293458497152e-05, "rewards/margins": 0.17813177406787872, "rewards/rejected": -0.1781749129295349, "step": 5447 }, { "epoch": 3.767634854771784, "grad_norm": 10.537328720092773, "learning_rate": 3.462425080682342e-05, "log_odds_chosen": 10.657795906066895, "log_odds_ratio": -3.852910958812572e-05, "logits/chosen": -0.30140772461891174, "logits/rejected": -0.2932639718055725, "logps/chosen": -0.00013195013161748648, "logps/rejected": -1.654346227645874, "loss": 1.249, "nll_loss": 0.3122471272945404, "rewards/accuracies": 1.0, "rewards/chosen": -1.3195011888456065e-05, "rewards/margins": 0.16542142629623413, "rewards/rejected": -0.16543462872505188, "step": 5448 }, { "epoch": 3.768326417704011, "grad_norm": 7.2749714851379395, "learning_rate": 3.462040879053327e-05, "log_odds_chosen": 9.7667236328125, "log_odds_ratio": -0.0003579896583687514, "logits/chosen": -0.7348923087120056, "logits/rejected": -0.7449120879173279, "logps/chosen": -0.0003365372831467539, "logps/rejected": -1.845354437828064, "loss": 1.2263, "nll_loss": 0.30654260516166687, "rewards/accuracies": 1.0, "rewards/chosen": -3.3653730497462675e-05, "rewards/margins": 0.1845017671585083, "rewards/rejected": -0.1845354437828064, "step": 5449 }, { "epoch": 3.769017980636238, "grad_norm": 8.713778495788574, "learning_rate": 3.461656677424313e-05, "log_odds_chosen": 9.448784828186035, "log_odds_ratio": -0.0003078113659285009, "logits/chosen": -0.4085184633731842, "logits/rejected": -0.46104303002357483, "logps/chosen": -0.0005590122891589999, "logps/rejected": -1.4110808372497559, "loss": 1.4083, "nll_loss": 0.35204440355300903, "rewards/accuracies": 1.0, "rewards/chosen": -5.590123328147456e-05, "rewards/margins": 0.14105218648910522, "rewards/rejected": -0.14110808074474335, "step": 5450 }, { "epoch": 3.7697095435684647, "grad_norm": 12.606882095336914, "learning_rate": 3.461272475795297e-05, "log_odds_chosen": 10.149349212646484, "log_odds_ratio": -0.0002808647695928812, "logits/chosen": -0.7192884683609009, "logits/rejected": -0.7955207228660583, "logps/chosen": -0.00038690725341439247, "logps/rejected": -2.0651695728302, "loss": 1.6302, "nll_loss": 0.40752604603767395, "rewards/accuracies": 1.0, "rewards/chosen": -3.869072315865196e-05, "rewards/margins": 0.2064782679080963, "rewards/rejected": -0.20651696622371674, "step": 5451 }, { "epoch": 3.7704011065006915, "grad_norm": 10.577102661132812, "learning_rate": 3.4608882741662825e-05, "log_odds_chosen": 9.80659008026123, "log_odds_ratio": -0.00022310206259135157, "logits/chosen": -0.4899018704891205, "logits/rejected": -0.5291027426719666, "logps/chosen": -0.010257050395011902, "logps/rejected": -2.334555149078369, "loss": 1.7324, "nll_loss": 0.4330710172653198, "rewards/accuracies": 1.0, "rewards/chosen": -0.0010257052490487695, "rewards/margins": 0.23242978751659393, "rewards/rejected": -0.23345550894737244, "step": 5452 }, { "epoch": 3.7710926694329183, "grad_norm": 7.982278823852539, "learning_rate": 3.460504072537268e-05, "log_odds_chosen": 9.754339218139648, "log_odds_ratio": -0.00030538038117811084, "logits/chosen": -0.3912992775440216, "logits/rejected": -0.44544699788093567, "logps/chosen": -0.0001893314765766263, "logps/rejected": -1.4891002178192139, "loss": 1.9684, "nll_loss": 0.4920760691165924, "rewards/accuracies": 1.0, "rewards/chosen": -1.893314765766263e-05, "rewards/margins": 0.14889109134674072, "rewards/rejected": -0.1489100158214569, "step": 5453 }, { "epoch": 3.771784232365145, "grad_norm": 16.61277198791504, "learning_rate": 3.460119870908253e-05, "log_odds_chosen": 9.0419921875, "log_odds_ratio": -0.19790831208229065, "logits/chosen": -0.3988495171070099, "logits/rejected": -0.5029575824737549, "logps/chosen": -0.031020207330584526, "logps/rejected": -1.7238812446594238, "loss": 1.7822, "nll_loss": 0.42575719952583313, "rewards/accuracies": 0.875, "rewards/chosen": -0.00310202082619071, "rewards/margins": 0.1692861169576645, "rewards/rejected": -0.17238813638687134, "step": 5454 }, { "epoch": 3.772475795297372, "grad_norm": 7.851132869720459, "learning_rate": 3.4597356692792376e-05, "log_odds_chosen": 9.307454109191895, "log_odds_ratio": -0.0012680424842983484, "logits/chosen": -0.6060723662376404, "logits/rejected": -0.6443691253662109, "logps/chosen": -0.001814560848288238, "logps/rejected": -1.7838623523712158, "loss": 2.055, "nll_loss": 0.5136182904243469, "rewards/accuracies": 1.0, "rewards/chosen": -0.000181456096470356, "rewards/margins": 0.17820480465888977, "rewards/rejected": -0.17838624119758606, "step": 5455 }, { "epoch": 3.773167358229599, "grad_norm": 10.791706085205078, "learning_rate": 3.459351467650223e-05, "log_odds_chosen": 9.634557723999023, "log_odds_ratio": -0.0003079564485233277, "logits/chosen": -0.46386775374412537, "logits/rejected": -0.6445642709732056, "logps/chosen": -0.001995598431676626, "logps/rejected": -2.0451226234436035, "loss": 1.7906, "nll_loss": 0.44761550426483154, "rewards/accuracies": 1.0, "rewards/chosen": -0.00019955984316766262, "rewards/margins": 0.20431271195411682, "rewards/rejected": -0.20451226830482483, "step": 5456 }, { "epoch": 3.7738589211618256, "grad_norm": 13.1845121383667, "learning_rate": 3.458967266021208e-05, "log_odds_chosen": 10.19453239440918, "log_odds_ratio": -0.0001586043363204226, "logits/chosen": -0.5500466823577881, "logits/rejected": -0.6049355268478394, "logps/chosen": -0.00206363620236516, "logps/rejected": -2.23915958404541, "loss": 2.229, "nll_loss": 0.5572376251220703, "rewards/accuracies": 1.0, "rewards/chosen": -0.00020636359113268554, "rewards/margins": 0.2237095981836319, "rewards/rejected": -0.2239159643650055, "step": 5457 }, { "epoch": 3.7745504840940525, "grad_norm": 8.474950790405273, "learning_rate": 3.4585830643921926e-05, "log_odds_chosen": 9.389838218688965, "log_odds_ratio": -0.012782618403434753, "logits/chosen": -0.5932435393333435, "logits/rejected": -0.5870939493179321, "logps/chosen": -0.004997893236577511, "logps/rejected": -1.9567699432373047, "loss": 0.9998, "nll_loss": 0.2486783266067505, "rewards/accuracies": 1.0, "rewards/chosen": -0.0004997893120162189, "rewards/margins": 0.19517722725868225, "rewards/rejected": -0.1956770122051239, "step": 5458 }, { "epoch": 3.7752420470262793, "grad_norm": 14.28724193572998, "learning_rate": 3.4581988627631786e-05, "log_odds_chosen": 11.236981391906738, "log_odds_ratio": -1.6953132217167877e-05, "logits/chosen": -0.4602336287498474, "logits/rejected": -0.5732054114341736, "logps/chosen": -0.00024608871899545193, "logps/rejected": -2.546398401260376, "loss": 1.8179, "nll_loss": 0.4544837176799774, "rewards/accuracies": 1.0, "rewards/chosen": -2.460887117194943e-05, "rewards/margins": 0.25461524724960327, "rewards/rejected": -0.2546398639678955, "step": 5459 }, { "epoch": 3.775933609958506, "grad_norm": 11.284528732299805, "learning_rate": 3.457814661134163e-05, "log_odds_chosen": 9.693879127502441, "log_odds_ratio": -0.015536747872829437, "logits/chosen": -0.32484331727027893, "logits/rejected": -0.3892527222633362, "logps/chosen": -0.006452981382608414, "logps/rejected": -1.7182650566101074, "loss": 2.2155, "nll_loss": 0.5523088574409485, "rewards/accuracies": 1.0, "rewards/chosen": -0.0006452981033362448, "rewards/margins": 0.17118120193481445, "rewards/rejected": -0.17182651162147522, "step": 5460 }, { "epoch": 3.776625172890733, "grad_norm": 7.1791911125183105, "learning_rate": 3.4574304595051484e-05, "log_odds_chosen": 8.559253692626953, "log_odds_ratio": -0.0024300473742187023, "logits/chosen": -0.2576027512550354, "logits/rejected": -0.33050549030303955, "logps/chosen": -0.005318128038197756, "logps/rejected": -1.4191077947616577, "loss": 1.0917, "nll_loss": 0.2726795971393585, "rewards/accuracies": 1.0, "rewards/chosen": -0.0005318127805367112, "rewards/margins": 0.1413789689540863, "rewards/rejected": -0.14191077649593353, "step": 5461 }, { "epoch": 3.77731673582296, "grad_norm": 11.800247192382812, "learning_rate": 3.4570462578761336e-05, "log_odds_chosen": 9.39711856842041, "log_odds_ratio": -0.0012419902486726642, "logits/chosen": -0.5844036936759949, "logits/rejected": -0.6802327632904053, "logps/chosen": -0.0009554505231790245, "logps/rejected": -1.7297091484069824, "loss": 1.656, "nll_loss": 0.41388368606567383, "rewards/accuracies": 1.0, "rewards/chosen": -9.554505231790245e-05, "rewards/margins": 0.17287535965442657, "rewards/rejected": -0.17297090590000153, "step": 5462 }, { "epoch": 3.7780082987551866, "grad_norm": 8.81169605255127, "learning_rate": 3.456662056247119e-05, "log_odds_chosen": 10.17690658569336, "log_odds_ratio": -5.8187048125546426e-05, "logits/chosen": -0.39503562450408936, "logits/rejected": -0.4741494059562683, "logps/chosen": -0.00017182572628371418, "logps/rejected": -1.5729782581329346, "loss": 1.3494, "nll_loss": 0.3373528718948364, "rewards/accuracies": 1.0, "rewards/chosen": -1.718257408356294e-05, "rewards/margins": 0.15728065371513367, "rewards/rejected": -0.15729783475399017, "step": 5463 }, { "epoch": 3.7786998616874135, "grad_norm": 7.847090721130371, "learning_rate": 3.4562778546181034e-05, "log_odds_chosen": 10.096391677856445, "log_odds_ratio": -7.392516999971122e-05, "logits/chosen": -0.7516282796859741, "logits/rejected": -0.7118873596191406, "logps/chosen": -0.00014651704987045377, "logps/rejected": -1.446256160736084, "loss": 1.2984, "nll_loss": 0.3245847523212433, "rewards/accuracies": 1.0, "rewards/chosen": -1.465170589654008e-05, "rewards/margins": 0.14461097121238708, "rewards/rejected": -0.14462561905384064, "step": 5464 }, { "epoch": 3.7793914246196403, "grad_norm": 9.622819900512695, "learning_rate": 3.455893652989089e-05, "log_odds_chosen": 7.836572647094727, "log_odds_ratio": -0.14803458750247955, "logits/chosen": -0.47771313786506653, "logits/rejected": -0.5944380760192871, "logps/chosen": -0.033774301409721375, "logps/rejected": -1.84254789352417, "loss": 1.4532, "nll_loss": 0.348498672246933, "rewards/accuracies": 0.875, "rewards/chosen": -0.003377429908141494, "rewards/margins": 0.18087737262248993, "rewards/rejected": -0.18425479531288147, "step": 5465 }, { "epoch": 3.780082987551867, "grad_norm": 13.050857543945312, "learning_rate": 3.455509451360074e-05, "log_odds_chosen": 10.594888687133789, "log_odds_ratio": -6.776869122404605e-05, "logits/chosen": -0.5613893866539001, "logits/rejected": -0.6028444170951843, "logps/chosen": -0.0006440202705562115, "logps/rejected": -2.637077569961548, "loss": 1.6949, "nll_loss": 0.42372721433639526, "rewards/accuracies": 1.0, "rewards/chosen": -6.440203287638724e-05, "rewards/margins": 0.263643354177475, "rewards/rejected": -0.2637077569961548, "step": 5466 }, { "epoch": 3.780774550484094, "grad_norm": 12.926630020141602, "learning_rate": 3.4551252497310585e-05, "log_odds_chosen": 10.582441329956055, "log_odds_ratio": -3.3281525247730315e-05, "logits/chosen": -0.5533230900764465, "logits/rejected": -0.5581379532814026, "logps/chosen": -0.00013502439833246171, "logps/rejected": -1.6435058116912842, "loss": 1.3104, "nll_loss": 0.3276028633117676, "rewards/accuracies": 1.0, "rewards/chosen": -1.3502440197044052e-05, "rewards/margins": 0.16433709859848022, "rewards/rejected": -0.16435059905052185, "step": 5467 }, { "epoch": 3.7814661134163208, "grad_norm": 12.386612892150879, "learning_rate": 3.4547410481020444e-05, "log_odds_chosen": 7.505845546722412, "log_odds_ratio": -0.022535445168614388, "logits/chosen": -0.46632689237594604, "logits/rejected": -0.4704776108264923, "logps/chosen": -0.02573973685503006, "logps/rejected": -1.221304178237915, "loss": 1.2671, "nll_loss": 0.31453269720077515, "rewards/accuracies": 1.0, "rewards/chosen": -0.0025739737320691347, "rewards/margins": 0.11955644190311432, "rewards/rejected": -0.12213042378425598, "step": 5468 }, { "epoch": 3.7821576763485476, "grad_norm": 9.335777282714844, "learning_rate": 3.454356846473029e-05, "log_odds_chosen": 9.750051498413086, "log_odds_ratio": -0.00015797732339706272, "logits/chosen": -0.3987863063812256, "logits/rejected": -0.390306293964386, "logps/chosen": -0.00020262066391296685, "logps/rejected": -1.3902456760406494, "loss": 2.2072, "nll_loss": 0.5517906546592712, "rewards/accuracies": 1.0, "rewards/chosen": -2.0262066755094565e-05, "rewards/margins": 0.13900430500507355, "rewards/rejected": -0.13902458548545837, "step": 5469 }, { "epoch": 3.7828492392807744, "grad_norm": 10.659040451049805, "learning_rate": 3.453972644844014e-05, "log_odds_chosen": 10.151968002319336, "log_odds_ratio": -0.00011392178566893563, "logits/chosen": -0.5698919296264648, "logits/rejected": -0.6047060489654541, "logps/chosen": -0.001036653877235949, "logps/rejected": -1.7720322608947754, "loss": 1.4491, "nll_loss": 0.36225903034210205, "rewards/accuracies": 1.0, "rewards/chosen": -0.00010366539208916947, "rewards/margins": 0.1770995706319809, "rewards/rejected": -0.1772032380104065, "step": 5470 }, { "epoch": 3.7835408022130013, "grad_norm": 8.225078582763672, "learning_rate": 3.4535884432149995e-05, "log_odds_chosen": 8.153675079345703, "log_odds_ratio": -0.00462321937084198, "logits/chosen": -0.5210813283920288, "logits/rejected": -0.5549103021621704, "logps/chosen": -0.010970650240778923, "logps/rejected": -1.762834906578064, "loss": 1.6095, "nll_loss": 0.4019153416156769, "rewards/accuracies": 1.0, "rewards/chosen": -0.0010970650473609567, "rewards/margins": 0.175186425447464, "rewards/rejected": -0.17628349363803864, "step": 5471 }, { "epoch": 3.784232365145228, "grad_norm": 10.515767097473145, "learning_rate": 3.453204241585985e-05, "log_odds_chosen": 8.277547836303711, "log_odds_ratio": -0.0133607042953372, "logits/chosen": -0.6946769952774048, "logits/rejected": -0.8080295324325562, "logps/chosen": -0.028481382876634598, "logps/rejected": -1.5522637367248535, "loss": 1.68, "nll_loss": 0.4186699390411377, "rewards/accuracies": 1.0, "rewards/chosen": -0.002848138101398945, "rewards/margins": 0.15237824618816376, "rewards/rejected": -0.15522637963294983, "step": 5472 }, { "epoch": 3.784923928077455, "grad_norm": 9.187445640563965, "learning_rate": 3.452820039956969e-05, "log_odds_chosen": 8.612812995910645, "log_odds_ratio": -0.0021816876251250505, "logits/chosen": -0.6883058547973633, "logits/rejected": -0.765465259552002, "logps/chosen": -0.010355999693274498, "logps/rejected": -1.5710110664367676, "loss": 1.5977, "nll_loss": 0.3991968333721161, "rewards/accuracies": 1.0, "rewards/chosen": -0.0010355999693274498, "rewards/margins": 0.15606552362442017, "rewards/rejected": -0.157101109623909, "step": 5473 }, { "epoch": 3.7856154910096818, "grad_norm": 7.578182220458984, "learning_rate": 3.4524358383279545e-05, "log_odds_chosen": 9.225852966308594, "log_odds_ratio": -0.13350188732147217, "logits/chosen": -0.33595073223114014, "logits/rejected": -0.38651329278945923, "logps/chosen": -0.037560053169727325, "logps/rejected": -2.838648557662964, "loss": 1.7924, "nll_loss": 0.4347480535507202, "rewards/accuracies": 0.875, "rewards/chosen": -0.003756005549803376, "rewards/margins": 0.28010886907577515, "rewards/rejected": -0.2838648557662964, "step": 5474 }, { "epoch": 3.7863070539419086, "grad_norm": 32.91754913330078, "learning_rate": 3.45205163669894e-05, "log_odds_chosen": 5.964416980743408, "log_odds_ratio": -0.6886153221130371, "logits/chosen": -0.5881133079528809, "logits/rejected": -0.6527892351150513, "logps/chosen": -0.09204834699630737, "logps/rejected": -1.7693171501159668, "loss": 1.8929, "nll_loss": 0.40435975790023804, "rewards/accuracies": 0.75, "rewards/chosen": -0.009204834699630737, "rewards/margins": 0.16772687435150146, "rewards/rejected": -0.1769317090511322, "step": 5475 }, { "epoch": 3.7869986168741354, "grad_norm": 13.364775657653809, "learning_rate": 3.451667435069924e-05, "log_odds_chosen": 8.272403717041016, "log_odds_ratio": -0.027833154425024986, "logits/chosen": -0.6419265866279602, "logits/rejected": -0.6762416362762451, "logps/chosen": -0.007033531554043293, "logps/rejected": -1.4588756561279297, "loss": 1.3591, "nll_loss": 0.33700141310691833, "rewards/accuracies": 1.0, "rewards/chosen": -0.0007033531437627971, "rewards/margins": 0.1451842188835144, "rewards/rejected": -0.1458875685930252, "step": 5476 }, { "epoch": 3.7876901798063622, "grad_norm": 11.957737922668457, "learning_rate": 3.45128323344091e-05, "log_odds_chosen": 10.761993408203125, "log_odds_ratio": -2.629458685987629e-05, "logits/chosen": -0.8296110033988953, "logits/rejected": -0.8898372054100037, "logps/chosen": -0.00018794195784721524, "logps/rejected": -2.174938201904297, "loss": 1.4994, "nll_loss": 0.3748519718647003, "rewards/accuracies": 1.0, "rewards/chosen": -1.8794196876115166e-05, "rewards/margins": 0.21747499704360962, "rewards/rejected": -0.21749380230903625, "step": 5477 }, { "epoch": 3.788381742738589, "grad_norm": 4.69172477722168, "learning_rate": 3.450899031811895e-05, "log_odds_chosen": 8.808714866638184, "log_odds_ratio": -0.006193371489644051, "logits/chosen": -0.48362404108047485, "logits/rejected": -0.5160248875617981, "logps/chosen": -0.00858994759619236, "logps/rejected": -2.0021519660949707, "loss": 1.4342, "nll_loss": 0.35793858766555786, "rewards/accuracies": 1.0, "rewards/chosen": -0.0008589947246946394, "rewards/margins": 0.19935621321201324, "rewards/rejected": -0.2002152055501938, "step": 5478 }, { "epoch": 3.789073305670816, "grad_norm": 7.904622554779053, "learning_rate": 3.45051483018288e-05, "log_odds_chosen": 8.666353225708008, "log_odds_ratio": -0.11096343398094177, "logits/chosen": -0.42745456099510193, "logits/rejected": -0.4892045259475708, "logps/chosen": -0.028296923264861107, "logps/rejected": -1.572574257850647, "loss": 1.3645, "nll_loss": 0.3300341069698334, "rewards/accuracies": 0.875, "rewards/chosen": -0.0028296923264861107, "rewards/margins": 0.15442773699760437, "rewards/rejected": -0.15725743770599365, "step": 5479 }, { "epoch": 3.7897648686030427, "grad_norm": 12.057756423950195, "learning_rate": 3.450130628553865e-05, "log_odds_chosen": 10.21658706665039, "log_odds_ratio": -0.00010321993613615632, "logits/chosen": -0.7294517159461975, "logits/rejected": -0.748309314250946, "logps/chosen": -0.00037140565109439194, "logps/rejected": -1.8634538650512695, "loss": 1.8853, "nll_loss": 0.471309095621109, "rewards/accuracies": 1.0, "rewards/chosen": -3.714056219905615e-05, "rewards/margins": 0.18630823493003845, "rewards/rejected": -0.18634536862373352, "step": 5480 }, { "epoch": 3.7904564315352696, "grad_norm": 8.47179126739502, "learning_rate": 3.4497464269248505e-05, "log_odds_chosen": 8.91946029663086, "log_odds_ratio": -0.0006514595006592572, "logits/chosen": -0.3386085331439972, "logits/rejected": -0.4115927219390869, "logps/chosen": -0.0004757773713208735, "logps/rejected": -1.090081810951233, "loss": 1.3782, "nll_loss": 0.34447595477104187, "rewards/accuracies": 1.0, "rewards/chosen": -4.757773785968311e-05, "rewards/margins": 0.1089605987071991, "rewards/rejected": -0.10900817811489105, "step": 5481 }, { "epoch": 3.7911479944674964, "grad_norm": 11.323335647583008, "learning_rate": 3.449362225295835e-05, "log_odds_chosen": 9.457518577575684, "log_odds_ratio": -0.0002437549119349569, "logits/chosen": -0.793126106262207, "logits/rejected": -0.8394900560379028, "logps/chosen": -0.0006705039413645864, "logps/rejected": -1.7430596351623535, "loss": 1.8774, "nll_loss": 0.4693189561367035, "rewards/accuracies": 1.0, "rewards/chosen": -6.705040141241625e-05, "rewards/margins": 0.174238920211792, "rewards/rejected": -0.1743059754371643, "step": 5482 }, { "epoch": 3.7918395573997232, "grad_norm": 9.364946365356445, "learning_rate": 3.4489780236668204e-05, "log_odds_chosen": 9.896523475646973, "log_odds_ratio": -0.0010984577238559723, "logits/chosen": -0.49681320786476135, "logits/rejected": -0.40957653522491455, "logps/chosen": -0.02884014882147312, "logps/rejected": -1.6628726720809937, "loss": 1.2071, "nll_loss": 0.301662802696228, "rewards/accuracies": 1.0, "rewards/chosen": -0.0028840149752795696, "rewards/margins": 0.16340325772762299, "rewards/rejected": -0.16628727316856384, "step": 5483 }, { "epoch": 3.79253112033195, "grad_norm": 11.672866821289062, "learning_rate": 3.4485938220378056e-05, "log_odds_chosen": 8.283211708068848, "log_odds_ratio": -0.031149858608841896, "logits/chosen": -0.12222898006439209, "logits/rejected": -0.1867779642343521, "logps/chosen": -0.011603178456425667, "logps/rejected": -2.018061637878418, "loss": 1.6232, "nll_loss": 0.4026898145675659, "rewards/accuracies": 1.0, "rewards/chosen": -0.001160317799076438, "rewards/margins": 0.20064584910869598, "rewards/rejected": -0.20180615782737732, "step": 5484 }, { "epoch": 3.793222683264177, "grad_norm": 16.668764114379883, "learning_rate": 3.44820962040879e-05, "log_odds_chosen": 8.102465629577637, "log_odds_ratio": -0.21552255749702454, "logits/chosen": -0.5265755653381348, "logits/rejected": -0.5696606040000916, "logps/chosen": -0.033316683024168015, "logps/rejected": -1.558127522468567, "loss": 1.6997, "nll_loss": 0.40336960554122925, "rewards/accuracies": 0.875, "rewards/chosen": -0.003331668209284544, "rewards/margins": 0.1524810940027237, "rewards/rejected": -0.15581277012825012, "step": 5485 }, { "epoch": 3.7939142461964037, "grad_norm": 9.900116920471191, "learning_rate": 3.447825418779776e-05, "log_odds_chosen": 10.101983070373535, "log_odds_ratio": -0.00040225035627372563, "logits/chosen": -0.45735013484954834, "logits/rejected": -0.5378046035766602, "logps/chosen": -0.0009811477502807975, "logps/rejected": -1.8887546062469482, "loss": 1.1332, "nll_loss": 0.2832505702972412, "rewards/accuracies": 1.0, "rewards/chosen": -9.811478230403736e-05, "rewards/margins": 0.188777357339859, "rewards/rejected": -0.1888754665851593, "step": 5486 }, { "epoch": 3.7946058091286305, "grad_norm": 8.449058532714844, "learning_rate": 3.4474412171507607e-05, "log_odds_chosen": 10.239374160766602, "log_odds_ratio": -5.797262565465644e-05, "logits/chosen": -0.9100509881973267, "logits/rejected": -0.9818972945213318, "logps/chosen": -0.0004340135201346129, "logps/rejected": -1.7591198682785034, "loss": 1.1093, "nll_loss": 0.2773301601409912, "rewards/accuracies": 1.0, "rewards/chosen": -4.340135274105705e-05, "rewards/margins": 0.17586857080459595, "rewards/rejected": -0.17591197788715363, "step": 5487 }, { "epoch": 3.7952973720608574, "grad_norm": 8.676505088806152, "learning_rate": 3.447057015521746e-05, "log_odds_chosen": 8.678366661071777, "log_odds_ratio": -0.0011480473913252354, "logits/chosen": -0.771061360836029, "logits/rejected": -0.8371139168739319, "logps/chosen": -0.0025784396566450596, "logps/rejected": -1.4748327732086182, "loss": 2.7718, "nll_loss": 0.6928279995918274, "rewards/accuracies": 1.0, "rewards/chosen": -0.00025784395984373987, "rewards/margins": 0.14722543954849243, "rewards/rejected": -0.14748328924179077, "step": 5488 }, { "epoch": 3.795988934993084, "grad_norm": 5.443553924560547, "learning_rate": 3.446672813892731e-05, "log_odds_chosen": 9.067232131958008, "log_odds_ratio": -0.004053841345012188, "logits/chosen": -0.5806595087051392, "logits/rejected": -0.620657205581665, "logps/chosen": -0.002897555474191904, "logps/rejected": -1.4690403938293457, "loss": 1.4835, "nll_loss": 0.37048161029815674, "rewards/accuracies": 1.0, "rewards/chosen": -0.0002897555532399565, "rewards/margins": 0.14661429822444916, "rewards/rejected": -0.14690403640270233, "step": 5489 }, { "epoch": 3.796680497925311, "grad_norm": 9.907852172851562, "learning_rate": 3.4462886122637164e-05, "log_odds_chosen": 9.625545501708984, "log_odds_ratio": -0.025490716099739075, "logits/chosen": -0.4287310838699341, "logits/rejected": -0.47044485807418823, "logps/chosen": -0.07580657303333282, "logps/rejected": -2.3400864601135254, "loss": 1.4314, "nll_loss": 0.35530906915664673, "rewards/accuracies": 1.0, "rewards/chosen": -0.00758065702393651, "rewards/margins": 0.22642800211906433, "rewards/rejected": -0.23400865495204926, "step": 5490 }, { "epoch": 3.797372060857538, "grad_norm": 9.823498725891113, "learning_rate": 3.445904410634701e-05, "log_odds_chosen": 10.158860206604004, "log_odds_ratio": -7.385762000922114e-05, "logits/chosen": -0.7536473274230957, "logits/rejected": -0.8191118836402893, "logps/chosen": -0.00031744063016958535, "logps/rejected": -1.594215989112854, "loss": 0.8827, "nll_loss": 0.2206687480211258, "rewards/accuracies": 1.0, "rewards/chosen": -3.174406447215006e-05, "rewards/margins": 0.15938985347747803, "rewards/rejected": -0.15942159295082092, "step": 5491 }, { "epoch": 3.7980636237897647, "grad_norm": 10.448057174682617, "learning_rate": 3.445520209005686e-05, "log_odds_chosen": 9.904424667358398, "log_odds_ratio": -7.187219307525083e-05, "logits/chosen": -0.7379270195960999, "logits/rejected": -0.8148084878921509, "logps/chosen": -0.00037250755121931434, "logps/rejected": -1.759903073310852, "loss": 1.5139, "nll_loss": 0.3784638047218323, "rewards/accuracies": 1.0, "rewards/chosen": -3.7250756577122957e-05, "rewards/margins": 0.17595306038856506, "rewards/rejected": -0.17599031329154968, "step": 5492 }, { "epoch": 3.7987551867219915, "grad_norm": 13.461642265319824, "learning_rate": 3.4451360073766714e-05, "log_odds_chosen": 9.684606552124023, "log_odds_ratio": -0.00027112613315694034, "logits/chosen": -1.044844150543213, "logits/rejected": -1.0704052448272705, "logps/chosen": -0.0007236794335767627, "logps/rejected": -1.8184692859649658, "loss": 1.7831, "nll_loss": 0.4457409381866455, "rewards/accuracies": 1.0, "rewards/chosen": -7.236794044729322e-05, "rewards/margins": 0.1817745566368103, "rewards/rejected": -0.18184691667556763, "step": 5493 }, { "epoch": 3.7994467496542184, "grad_norm": 7.109790325164795, "learning_rate": 3.444751805747656e-05, "log_odds_chosen": 8.760310173034668, "log_odds_ratio": -0.002181628718972206, "logits/chosen": -0.719219446182251, "logits/rejected": -0.7013097405433655, "logps/chosen": -0.005476102232933044, "logps/rejected": -1.2716690301895142, "loss": 1.5912, "nll_loss": 0.39759424328804016, "rewards/accuracies": 1.0, "rewards/chosen": -0.0005476102232933044, "rewards/margins": 0.12661930918693542, "rewards/rejected": -0.12716691195964813, "step": 5494 }, { "epoch": 3.800138312586445, "grad_norm": 6.834258556365967, "learning_rate": 3.444367604118642e-05, "log_odds_chosen": 10.347545623779297, "log_odds_ratio": -0.00019219264504499733, "logits/chosen": -0.3056133985519409, "logits/rejected": -0.33867448568344116, "logps/chosen": -0.00030523439636453986, "logps/rejected": -1.6999119520187378, "loss": 1.4002, "nll_loss": 0.35004106163978577, "rewards/accuracies": 1.0, "rewards/chosen": -3.052344254683703e-05, "rewards/margins": 0.16996067762374878, "rewards/rejected": -0.16999119520187378, "step": 5495 }, { "epoch": 3.800829875518672, "grad_norm": 8.571849822998047, "learning_rate": 3.4439834024896265e-05, "log_odds_chosen": 9.261777877807617, "log_odds_ratio": -0.0028540126513689756, "logits/chosen": -0.7312589287757874, "logits/rejected": -0.7759085893630981, "logps/chosen": -0.0023634701501578093, "logps/rejected": -1.8596577644348145, "loss": 1.9622, "nll_loss": 0.49027568101882935, "rewards/accuracies": 1.0, "rewards/chosen": -0.00023634699755348265, "rewards/margins": 0.18572944402694702, "rewards/rejected": -0.18596579134464264, "step": 5496 }, { "epoch": 3.801521438450899, "grad_norm": 7.503764629364014, "learning_rate": 3.443599200860612e-05, "log_odds_chosen": 7.809076309204102, "log_odds_ratio": -0.0015637363540008664, "logits/chosen": -0.4097944498062134, "logits/rejected": -0.5135847330093384, "logps/chosen": -0.0023143496364355087, "logps/rejected": -1.2307206392288208, "loss": 1.6174, "nll_loss": 0.40419769287109375, "rewards/accuracies": 1.0, "rewards/chosen": -0.00023143496946431696, "rewards/margins": 0.12284062802791595, "rewards/rejected": -0.1230720579624176, "step": 5497 }, { "epoch": 3.8022130013831257, "grad_norm": 13.338193893432617, "learning_rate": 3.443214999231597e-05, "log_odds_chosen": 8.965517044067383, "log_odds_ratio": -0.29515203833580017, "logits/chosen": -0.05749291926622391, "logits/rejected": -0.11087454110383987, "logps/chosen": -0.03462938219308853, "logps/rejected": -2.2734875679016113, "loss": 1.6813, "nll_loss": 0.39079830050468445, "rewards/accuracies": 0.875, "rewards/chosen": -0.0034629383590072393, "rewards/margins": 0.22388583421707153, "rewards/rejected": -0.22734877467155457, "step": 5498 }, { "epoch": 3.8029045643153525, "grad_norm": 6.508269309997559, "learning_rate": 3.442830797602582e-05, "log_odds_chosen": 9.821160316467285, "log_odds_ratio": -0.00017450877930969, "logits/chosen": -0.846017062664032, "logits/rejected": -0.8550729751586914, "logps/chosen": -0.005477503407746553, "logps/rejected": -2.649533987045288, "loss": 1.6649, "nll_loss": 0.41621753573417664, "rewards/accuracies": 1.0, "rewards/chosen": -0.0005477503291331232, "rewards/margins": 0.2644056975841522, "rewards/rejected": -0.2649534344673157, "step": 5499 }, { "epoch": 3.8035961272475793, "grad_norm": 7.401920795440674, "learning_rate": 3.442446595973567e-05, "log_odds_chosen": 7.918020248413086, "log_odds_ratio": -0.007441325578838587, "logits/chosen": -0.7410718202590942, "logits/rejected": -0.829316258430481, "logps/chosen": -0.03185814246535301, "logps/rejected": -2.061920404434204, "loss": 1.6512, "nll_loss": 0.412044495344162, "rewards/accuracies": 1.0, "rewards/chosen": -0.0031858140137046576, "rewards/margins": 0.20300622284412384, "rewards/rejected": -0.2061920464038849, "step": 5500 }, { "epoch": 3.804287690179806, "grad_norm": 12.682905197143555, "learning_rate": 3.442062394344552e-05, "log_odds_chosen": 9.693799018859863, "log_odds_ratio": -0.0005106168682686985, "logits/chosen": -0.44629478454589844, "logits/rejected": -0.5728074312210083, "logps/chosen": -0.0007117882487364113, "logps/rejected": -1.6500269174575806, "loss": 1.3445, "nll_loss": 0.3360791802406311, "rewards/accuracies": 1.0, "rewards/chosen": -7.117882341844961e-05, "rewards/margins": 0.164931520819664, "rewards/rejected": -0.16500268876552582, "step": 5501 }, { "epoch": 3.804979253112033, "grad_norm": 7.961850643157959, "learning_rate": 3.441678192715537e-05, "log_odds_chosen": 9.812564849853516, "log_odds_ratio": -0.0006582457572221756, "logits/chosen": -0.24773460626602173, "logits/rejected": -0.3943350911140442, "logps/chosen": -0.0027490374632179737, "logps/rejected": -1.855540156364441, "loss": 1.139, "nll_loss": 0.2846890687942505, "rewards/accuracies": 1.0, "rewards/chosen": -0.00027490375214256346, "rewards/margins": 0.18527910113334656, "rewards/rejected": -0.18555399775505066, "step": 5502 }, { "epoch": 3.80567081604426, "grad_norm": 11.752182960510254, "learning_rate": 3.441293991086522e-05, "log_odds_chosen": 9.51585578918457, "log_odds_ratio": -0.028415260836482048, "logits/chosen": -0.7475553154945374, "logits/rejected": -0.8064841628074646, "logps/chosen": -0.009086564183235168, "logps/rejected": -2.0079166889190674, "loss": 1.7488, "nll_loss": 0.4343549609184265, "rewards/accuracies": 1.0, "rewards/chosen": -0.0009086563950404525, "rewards/margins": 0.19988302886486053, "rewards/rejected": -0.20079168677330017, "step": 5503 }, { "epoch": 3.8063623789764867, "grad_norm": 12.0503568649292, "learning_rate": 3.440909789457508e-05, "log_odds_chosen": 8.72144889831543, "log_odds_ratio": -0.017577024176716805, "logits/chosen": -0.5443108081817627, "logits/rejected": -0.6571115255355835, "logps/chosen": -0.005644082557410002, "logps/rejected": -1.5024874210357666, "loss": 1.7932, "nll_loss": 0.4465530812740326, "rewards/accuracies": 1.0, "rewards/chosen": -0.0005644082557410002, "rewards/margins": 0.149684339761734, "rewards/rejected": -0.15024875104427338, "step": 5504 }, { "epoch": 3.8070539419087135, "grad_norm": 7.695371627807617, "learning_rate": 3.4405255878284923e-05, "log_odds_chosen": 10.452595710754395, "log_odds_ratio": -3.497970465105027e-05, "logits/chosen": -0.7729029059410095, "logits/rejected": -0.8375166654586792, "logps/chosen": -0.00011977435497101396, "logps/rejected": -1.5107817649841309, "loss": 1.2068, "nll_loss": 0.3017030954360962, "rewards/accuracies": 1.0, "rewards/chosen": -1.1977434951404575e-05, "rewards/margins": 0.15106619894504547, "rewards/rejected": -0.15107816457748413, "step": 5505 }, { "epoch": 3.8077455048409403, "grad_norm": 11.576738357543945, "learning_rate": 3.4401413861994776e-05, "log_odds_chosen": 9.514175415039062, "log_odds_ratio": -0.00028477475279942155, "logits/chosen": -0.4824485778808594, "logits/rejected": -0.5558584928512573, "logps/chosen": -0.000295641046250239, "logps/rejected": -1.5355809926986694, "loss": 1.2252, "nll_loss": 0.3062796890735626, "rewards/accuracies": 1.0, "rewards/chosen": -2.9564107535406947e-05, "rewards/margins": 0.15352854132652283, "rewards/rejected": -0.15355810523033142, "step": 5506 }, { "epoch": 3.808437067773167, "grad_norm": 5.991958141326904, "learning_rate": 3.439757184570463e-05, "log_odds_chosen": 8.088354110717773, "log_odds_ratio": -0.020768703892827034, "logits/chosen": -0.6435887217521667, "logits/rejected": -0.6004131436347961, "logps/chosen": -0.024852951988577843, "logps/rejected": -2.28918194770813, "loss": 1.4803, "nll_loss": 0.3679874539375305, "rewards/accuracies": 1.0, "rewards/chosen": -0.0024852952919900417, "rewards/margins": 0.2264329046010971, "rewards/rejected": -0.228918194770813, "step": 5507 }, { "epoch": 3.809128630705394, "grad_norm": 12.097492218017578, "learning_rate": 3.439372982941448e-05, "log_odds_chosen": 9.174922943115234, "log_odds_ratio": -0.0025260585825890303, "logits/chosen": -0.769309401512146, "logits/rejected": -0.8545984029769897, "logps/chosen": -0.0010624685091897845, "logps/rejected": -1.3888287544250488, "loss": 1.4034, "nll_loss": 0.35060185194015503, "rewards/accuracies": 1.0, "rewards/chosen": -0.00010624684364302084, "rewards/margins": 0.13877663016319275, "rewards/rejected": -0.13888287544250488, "step": 5508 }, { "epoch": 3.809820193637621, "grad_norm": 11.435340881347656, "learning_rate": 3.4389887813124326e-05, "log_odds_chosen": 10.789989471435547, "log_odds_ratio": -3.569827458704822e-05, "logits/chosen": -0.4533803164958954, "logits/rejected": -0.44226211309432983, "logps/chosen": -0.0005586327752098441, "logps/rejected": -2.573535203933716, "loss": 1.5026, "nll_loss": 0.37563616037368774, "rewards/accuracies": 1.0, "rewards/chosen": -5.586327824858017e-05, "rewards/margins": 0.25729766488075256, "rewards/rejected": -0.2573535442352295, "step": 5509 }, { "epoch": 3.8105117565698476, "grad_norm": 7.45504903793335, "learning_rate": 3.438604579683418e-05, "log_odds_chosen": 8.48158073425293, "log_odds_ratio": -0.001207137480378151, "logits/chosen": -0.28822168707847595, "logits/rejected": -0.40649378299713135, "logps/chosen": -0.0009628716506995261, "logps/rejected": -1.1258182525634766, "loss": 1.6578, "nll_loss": 0.4143317937850952, "rewards/accuracies": 1.0, "rewards/chosen": -9.628715633880347e-05, "rewards/margins": 0.11248555779457092, "rewards/rejected": -0.11258183419704437, "step": 5510 }, { "epoch": 3.8112033195020745, "grad_norm": 11.299424171447754, "learning_rate": 3.438220378054403e-05, "log_odds_chosen": 8.825550079345703, "log_odds_ratio": -0.0006356332451105118, "logits/chosen": -0.2884122133255005, "logits/rejected": -0.33634936809539795, "logps/chosen": -0.0017131754430010915, "logps/rejected": -1.3680305480957031, "loss": 2.3697, "nll_loss": 0.5923528075218201, "rewards/accuracies": 1.0, "rewards/chosen": -0.00017131752974819392, "rewards/margins": 0.13663172721862793, "rewards/rejected": -0.1368030607700348, "step": 5511 }, { "epoch": 3.8118948824343013, "grad_norm": 10.111493110656738, "learning_rate": 3.437836176425388e-05, "log_odds_chosen": 8.188075065612793, "log_odds_ratio": -0.003308902494609356, "logits/chosen": -0.4384719133377075, "logits/rejected": -0.4258015751838684, "logps/chosen": -0.01320966612547636, "logps/rejected": -1.672622799873352, "loss": 1.484, "nll_loss": 0.37067684531211853, "rewards/accuracies": 1.0, "rewards/chosen": -0.0013209665194153786, "rewards/margins": 0.16594132781028748, "rewards/rejected": -0.16726228594779968, "step": 5512 }, { "epoch": 3.812586445366528, "grad_norm": 9.90038013458252, "learning_rate": 3.4374519747963736e-05, "log_odds_chosen": 10.393072128295898, "log_odds_ratio": -6.332092016236857e-05, "logits/chosen": -0.3121805191040039, "logits/rejected": -0.4258049428462982, "logps/chosen": -0.00022715130762662739, "logps/rejected": -2.1377182006835938, "loss": 1.2688, "nll_loss": 0.31718710064888, "rewards/accuracies": 1.0, "rewards/chosen": -2.271513221785426e-05, "rewards/margins": 0.21374912559986115, "rewards/rejected": -0.21377183496952057, "step": 5513 }, { "epoch": 3.813278008298755, "grad_norm": 9.171831130981445, "learning_rate": 3.437067773167358e-05, "log_odds_chosen": 9.057912826538086, "log_odds_ratio": -0.0004997519426979125, "logits/chosen": -0.696790874004364, "logits/rejected": -0.6328009366989136, "logps/chosen": -0.001956242835149169, "logps/rejected": -1.4612929821014404, "loss": 1.5019, "nll_loss": 0.37541741132736206, "rewards/accuracies": 1.0, "rewards/chosen": -0.00019562429224606603, "rewards/margins": 0.14593368768692017, "rewards/rejected": -0.146129310131073, "step": 5514 }, { "epoch": 3.813969571230982, "grad_norm": 6.400440216064453, "learning_rate": 3.4366835715383434e-05, "log_odds_chosen": 10.220077514648438, "log_odds_ratio": -6.414575182134286e-05, "logits/chosen": -0.5167442560195923, "logits/rejected": -0.5549056529998779, "logps/chosen": -0.0003075231797993183, "logps/rejected": -1.7159326076507568, "loss": 0.9626, "nll_loss": 0.24063529074192047, "rewards/accuracies": 1.0, "rewards/chosen": -3.075231870752759e-05, "rewards/margins": 0.17156250774860382, "rewards/rejected": -0.17159326374530792, "step": 5515 }, { "epoch": 3.8146611341632086, "grad_norm": 13.907997131347656, "learning_rate": 3.436299369909329e-05, "log_odds_chosen": 9.476810455322266, "log_odds_ratio": -0.0004220995760988444, "logits/chosen": -0.3859747052192688, "logits/rejected": -0.40230223536491394, "logps/chosen": -0.0011472878977656364, "logps/rejected": -2.2569823265075684, "loss": 1.2103, "nll_loss": 0.302531361579895, "rewards/accuracies": 1.0, "rewards/chosen": -0.00011472879850771278, "rewards/margins": 0.2255834937095642, "rewards/rejected": -0.22569823265075684, "step": 5516 }, { "epoch": 3.8153526970954355, "grad_norm": 7.599922180175781, "learning_rate": 3.435915168280314e-05, "log_odds_chosen": 10.299822807312012, "log_odds_ratio": -0.0001517213531769812, "logits/chosen": -0.5298794507980347, "logits/rejected": -0.5793193578720093, "logps/chosen": -0.0002915368531830609, "logps/rejected": -2.1193902492523193, "loss": 0.8582, "nll_loss": 0.21453142166137695, "rewards/accuracies": 1.0, "rewards/chosen": -2.915368531830609e-05, "rewards/margins": 0.21190986037254333, "rewards/rejected": -0.2119390070438385, "step": 5517 }, { "epoch": 3.8160442600276623, "grad_norm": 8.166438102722168, "learning_rate": 3.4355309666512985e-05, "log_odds_chosen": 9.342557907104492, "log_odds_ratio": -0.020774465054273605, "logits/chosen": -0.38634446263313293, "logits/rejected": -0.43904387950897217, "logps/chosen": -0.026264827698469162, "logps/rejected": -1.5071449279785156, "loss": 1.6013, "nll_loss": 0.39825937151908875, "rewards/accuracies": 1.0, "rewards/chosen": -0.00262648263014853, "rewards/margins": 0.1480880081653595, "rewards/rejected": -0.15071450173854828, "step": 5518 }, { "epoch": 3.816735822959889, "grad_norm": 10.335421562194824, "learning_rate": 3.435146765022284e-05, "log_odds_chosen": 9.873821258544922, "log_odds_ratio": -0.0008224531775340438, "logits/chosen": -0.5271599292755127, "logits/rejected": -0.6046086549758911, "logps/chosen": -0.0232427716255188, "logps/rejected": -2.3042354583740234, "loss": 1.9371, "nll_loss": 0.48419103026390076, "rewards/accuracies": 1.0, "rewards/chosen": -0.002324277302250266, "rewards/margins": 0.2280992716550827, "rewards/rejected": -0.23042356967926025, "step": 5519 }, { "epoch": 3.817427385892116, "grad_norm": 10.293402671813965, "learning_rate": 3.434762563393269e-05, "log_odds_chosen": 8.963640213012695, "log_odds_ratio": -0.0009295929921790957, "logits/chosen": -0.3651862144470215, "logits/rejected": -0.4004555344581604, "logps/chosen": -0.006498910952359438, "logps/rejected": -1.286954402923584, "loss": 1.6556, "nll_loss": 0.413795530796051, "rewards/accuracies": 1.0, "rewards/chosen": -0.0006498911534436047, "rewards/margins": 0.12804555892944336, "rewards/rejected": -0.12869544327259064, "step": 5520 }, { "epoch": 3.8181189488243428, "grad_norm": 9.090123176574707, "learning_rate": 3.4343783617642535e-05, "log_odds_chosen": 9.645210266113281, "log_odds_ratio": -0.0015530278906226158, "logits/chosen": -0.335877388715744, "logits/rejected": -0.3623276948928833, "logps/chosen": -0.02916303090751171, "logps/rejected": -2.003610610961914, "loss": 1.7366, "nll_loss": 0.4339974820613861, "rewards/accuracies": 1.0, "rewards/chosen": -0.0029163029976189137, "rewards/margins": 0.19744475185871124, "rewards/rejected": -0.20036104321479797, "step": 5521 }, { "epoch": 3.8188105117565696, "grad_norm": 8.732023239135742, "learning_rate": 3.4339941601352395e-05, "log_odds_chosen": 10.926740646362305, "log_odds_ratio": -2.6167797841480933e-05, "logits/chosen": -0.33205878734588623, "logits/rejected": -0.413408488035202, "logps/chosen": -0.0003121374174952507, "logps/rejected": -2.4376025199890137, "loss": 1.6157, "nll_loss": 0.4039216637611389, "rewards/accuracies": 1.0, "rewards/chosen": -3.121374174952507e-05, "rewards/margins": 0.24372904002666473, "rewards/rejected": -0.24376025795936584, "step": 5522 }, { "epoch": 3.8195020746887964, "grad_norm": 9.307251930236816, "learning_rate": 3.433609958506224e-05, "log_odds_chosen": 9.603739738464355, "log_odds_ratio": -0.0003026507911272347, "logits/chosen": -0.46003973484039307, "logits/rejected": -0.49017009139060974, "logps/chosen": -0.0006151901325210929, "logps/rejected": -1.4377801418304443, "loss": 1.3264, "nll_loss": 0.3315601050853729, "rewards/accuracies": 1.0, "rewards/chosen": -6.151902198325843e-05, "rewards/margins": 0.143716499209404, "rewards/rejected": -0.1437780261039734, "step": 5523 }, { "epoch": 3.8201936376210233, "grad_norm": 13.252114295959473, "learning_rate": 3.433225756877209e-05, "log_odds_chosen": 7.5962018966674805, "log_odds_ratio": -0.2521457076072693, "logits/chosen": -0.305279016494751, "logits/rejected": -0.1939001977443695, "logps/chosen": -0.03892003744840622, "logps/rejected": -1.0653748512268066, "loss": 1.4177, "nll_loss": 0.32921168208122253, "rewards/accuracies": 0.875, "rewards/chosen": -0.003892003558576107, "rewards/margins": 0.10264548659324646, "rewards/rejected": -0.10653748363256454, "step": 5524 }, { "epoch": 3.82088520055325, "grad_norm": 8.918256759643555, "learning_rate": 3.4328415552481945e-05, "log_odds_chosen": 10.014975547790527, "log_odds_ratio": -0.0006707090069539845, "logits/chosen": -0.7125083208084106, "logits/rejected": -0.7475904822349548, "logps/chosen": -0.0003842590085696429, "logps/rejected": -1.7505316734313965, "loss": 1.1334, "nll_loss": 0.28329500555992126, "rewards/accuracies": 1.0, "rewards/chosen": -3.842590012936853e-05, "rewards/margins": 0.17501473426818848, "rewards/rejected": -0.1750531792640686, "step": 5525 }, { "epoch": 3.821576763485477, "grad_norm": 11.013578414916992, "learning_rate": 3.43245735361918e-05, "log_odds_chosen": 9.857749938964844, "log_odds_ratio": -0.006169512402266264, "logits/chosen": -0.8864176869392395, "logits/rejected": -0.9290403127670288, "logps/chosen": -0.0030358880758285522, "logps/rejected": -1.798154354095459, "loss": 1.2621, "nll_loss": 0.31491541862487793, "rewards/accuracies": 1.0, "rewards/chosen": -0.0003035888366866857, "rewards/margins": 0.17951184511184692, "rewards/rejected": -0.17981544137001038, "step": 5526 }, { "epoch": 3.8222683264177038, "grad_norm": 14.562955856323242, "learning_rate": 3.432073151990164e-05, "log_odds_chosen": 10.920267105102539, "log_odds_ratio": -1.9419023374211974e-05, "logits/chosen": -0.7472702264785767, "logits/rejected": -0.7232099771499634, "logps/chosen": -0.00019400370365474373, "logps/rejected": -2.222071409225464, "loss": 1.3396, "nll_loss": 0.334891140460968, "rewards/accuracies": 1.0, "rewards/chosen": -1.9400371456868015e-05, "rewards/margins": 0.22218775749206543, "rewards/rejected": -0.22220715880393982, "step": 5527 }, { "epoch": 3.8229598893499306, "grad_norm": 9.685209274291992, "learning_rate": 3.4316889503611496e-05, "log_odds_chosen": 9.99222469329834, "log_odds_ratio": -0.00041643757140263915, "logits/chosen": -0.8748489618301392, "logits/rejected": -1.0220654010772705, "logps/chosen": -0.0005274987197481096, "logps/rejected": -1.963135004043579, "loss": 1.2651, "nll_loss": 0.3162212371826172, "rewards/accuracies": 1.0, "rewards/chosen": -5.2749870519619435e-05, "rewards/margins": 0.1962607502937317, "rewards/rejected": -0.1963135153055191, "step": 5528 }, { "epoch": 3.8236514522821574, "grad_norm": 4.451651096343994, "learning_rate": 3.431304748732135e-05, "log_odds_chosen": 9.383699417114258, "log_odds_ratio": -0.00013729330385103822, "logits/chosen": -0.04785768687725067, "logits/rejected": -0.14522284269332886, "logps/chosen": -0.004471118096262217, "logps/rejected": -1.6100064516067505, "loss": 1.5907, "nll_loss": 0.39765363931655884, "rewards/accuracies": 1.0, "rewards/chosen": -0.00044711181544698775, "rewards/margins": 0.16055352985858917, "rewards/rejected": -0.16100063920021057, "step": 5529 }, { "epoch": 3.8243430152143842, "grad_norm": 6.9637017250061035, "learning_rate": 3.4309205471031194e-05, "log_odds_chosen": 8.777310371398926, "log_odds_ratio": -0.0008659964660182595, "logits/chosen": -0.8735266923904419, "logits/rejected": -0.9311203956604004, "logps/chosen": -0.002938113873824477, "logps/rejected": -1.6509557962417603, "loss": 1.131, "nll_loss": 0.2826663851737976, "rewards/accuracies": 1.0, "rewards/chosen": -0.00029381137574091554, "rewards/margins": 0.16480176150798798, "rewards/rejected": -0.16509558260440826, "step": 5530 }, { "epoch": 3.825034578146611, "grad_norm": 11.936813354492188, "learning_rate": 3.430536345474105e-05, "log_odds_chosen": 9.860944747924805, "log_odds_ratio": -0.0001037710826494731, "logits/chosen": -0.7240860462188721, "logits/rejected": -0.8060693740844727, "logps/chosen": -0.000538259744644165, "logps/rejected": -1.8071279525756836, "loss": 1.8437, "nll_loss": 0.46090230345726013, "rewards/accuracies": 1.0, "rewards/chosen": -5.382597737479955e-05, "rewards/margins": 0.1806589663028717, "rewards/rejected": -0.18071278929710388, "step": 5531 }, { "epoch": 3.825726141078838, "grad_norm": 7.769991874694824, "learning_rate": 3.43015214384509e-05, "log_odds_chosen": 9.484786987304688, "log_odds_ratio": -0.0006122213671915233, "logits/chosen": -0.7496232390403748, "logits/rejected": -0.8503137826919556, "logps/chosen": -0.005784421693533659, "logps/rejected": -1.4626810550689697, "loss": 1.5371, "nll_loss": 0.38420918583869934, "rewards/accuracies": 1.0, "rewards/chosen": -0.000578442239202559, "rewards/margins": 0.14568966627120972, "rewards/rejected": -0.1462681144475937, "step": 5532 }, { "epoch": 3.8264177040110647, "grad_norm": 10.754548072814941, "learning_rate": 3.429767942216075e-05, "log_odds_chosen": 8.897015571594238, "log_odds_ratio": -0.0016687134047970176, "logits/chosen": -0.49642038345336914, "logits/rejected": -0.5355390310287476, "logps/chosen": -0.004793742671608925, "logps/rejected": -1.7796322107315063, "loss": 1.6755, "nll_loss": 0.41870787739753723, "rewards/accuracies": 1.0, "rewards/chosen": -0.00047937428462319076, "rewards/margins": 0.17748384177684784, "rewards/rejected": -0.17796321213245392, "step": 5533 }, { "epoch": 3.8271092669432916, "grad_norm": 11.34075927734375, "learning_rate": 3.4293837405870604e-05, "log_odds_chosen": 8.991256713867188, "log_odds_ratio": -0.0016179453814402223, "logits/chosen": -0.7598484754562378, "logits/rejected": -0.7825421094894409, "logps/chosen": -0.01104398537427187, "logps/rejected": -2.0152955055236816, "loss": 1.9777, "nll_loss": 0.49426019191741943, "rewards/accuracies": 1.0, "rewards/chosen": -0.0011043986305594444, "rewards/margins": 0.2004251331090927, "rewards/rejected": -0.20152954757213593, "step": 5534 }, { "epoch": 3.8278008298755184, "grad_norm": 7.396518230438232, "learning_rate": 3.4289995389580456e-05, "log_odds_chosen": 8.842217445373535, "log_odds_ratio": -0.08921416848897934, "logits/chosen": -0.5817644596099854, "logits/rejected": -0.6932407021522522, "logps/chosen": -0.01519980188459158, "logps/rejected": -1.32765793800354, "loss": 1.3348, "nll_loss": 0.32476648688316345, "rewards/accuracies": 0.875, "rewards/chosen": -0.0015199801418930292, "rewards/margins": 0.13124582171440125, "rewards/rejected": -0.13276579976081848, "step": 5535 }, { "epoch": 3.8284923928077457, "grad_norm": 8.323075294494629, "learning_rate": 3.42861533732903e-05, "log_odds_chosen": 10.417610168457031, "log_odds_ratio": -6.538983143400401e-05, "logits/chosen": -0.38365769386291504, "logits/rejected": -0.4411577880382538, "logps/chosen": -0.00029318296583369374, "logps/rejected": -1.8801240921020508, "loss": 0.8762, "nll_loss": 0.2190488874912262, "rewards/accuracies": 1.0, "rewards/chosen": -2.9318298402358778e-05, "rewards/margins": 0.18798309564590454, "rewards/rejected": -0.18801242113113403, "step": 5536 }, { "epoch": 3.8291839557399725, "grad_norm": 11.362656593322754, "learning_rate": 3.4282311357000154e-05, "log_odds_chosen": 7.42609977722168, "log_odds_ratio": -0.021518532186746597, "logits/chosen": -0.4237701892852783, "logits/rejected": -0.5051606893539429, "logps/chosen": -0.006600437685847282, "logps/rejected": -1.3271539211273193, "loss": 2.0503, "nll_loss": 0.5104222297668457, "rewards/accuracies": 1.0, "rewards/chosen": -0.0006600437918677926, "rewards/margins": 0.1320553719997406, "rewards/rejected": -0.1327154040336609, "step": 5537 }, { "epoch": 3.8298755186721993, "grad_norm": 7.826541423797607, "learning_rate": 3.4278469340710007e-05, "log_odds_chosen": 9.108176231384277, "log_odds_ratio": -0.0018098466098308563, "logits/chosen": -0.8225448131561279, "logits/rejected": -0.869577944278717, "logps/chosen": -0.014577067457139492, "logps/rejected": -2.727315902709961, "loss": 1.5117, "nll_loss": 0.37774190306663513, "rewards/accuracies": 1.0, "rewards/chosen": -0.001457706792280078, "rewards/margins": 0.2712738513946533, "rewards/rejected": -0.2727315425872803, "step": 5538 }, { "epoch": 3.830567081604426, "grad_norm": 14.817159652709961, "learning_rate": 3.427462732441986e-05, "log_odds_chosen": 10.341073989868164, "log_odds_ratio": -0.0002269662218168378, "logits/chosen": -0.894839882850647, "logits/rejected": -0.9590165019035339, "logps/chosen": -0.007867439649999142, "logps/rejected": -2.476846694946289, "loss": 1.4406, "nll_loss": 0.3601387143135071, "rewards/accuracies": 1.0, "rewards/chosen": -0.0007867439999245107, "rewards/margins": 0.24689790606498718, "rewards/rejected": -0.24768467247486115, "step": 5539 }, { "epoch": 3.831258644536653, "grad_norm": 13.953787803649902, "learning_rate": 3.427078530812971e-05, "log_odds_chosen": 9.776470184326172, "log_odds_ratio": -0.0002314805460628122, "logits/chosen": -0.9279817342758179, "logits/rejected": -0.975051760673523, "logps/chosen": -0.00032981581171043217, "logps/rejected": -1.6627427339553833, "loss": 2.7871, "nll_loss": 0.6967459917068481, "rewards/accuracies": 1.0, "rewards/chosen": -3.298158117104322e-05, "rewards/margins": 0.16624130308628082, "rewards/rejected": -0.1662742793560028, "step": 5540 }, { "epoch": 3.83195020746888, "grad_norm": 5.432536602020264, "learning_rate": 3.426694329183956e-05, "log_odds_chosen": 9.824362754821777, "log_odds_ratio": -0.0005517660174518824, "logits/chosen": -0.6685186624526978, "logits/rejected": -0.7586067914962769, "logps/chosen": -0.008896322920918465, "logps/rejected": -2.1879920959472656, "loss": 1.1259, "nll_loss": 0.28141963481903076, "rewards/accuracies": 1.0, "rewards/chosen": -0.0008896323852241039, "rewards/margins": 0.2179095596075058, "rewards/rejected": -0.21879920363426208, "step": 5541 }, { "epoch": 3.8326417704011067, "grad_norm": 7.405891418457031, "learning_rate": 3.426310127554941e-05, "log_odds_chosen": 10.008780479431152, "log_odds_ratio": -0.0002761481737252325, "logits/chosen": -0.5124903917312622, "logits/rejected": -0.6155083179473877, "logps/chosen": -0.0004465877136681229, "logps/rejected": -1.7490488290786743, "loss": 1.3365, "nll_loss": 0.3340889811515808, "rewards/accuracies": 1.0, "rewards/chosen": -4.465877282200381e-05, "rewards/margins": 0.17486022412776947, "rewards/rejected": -0.17490488290786743, "step": 5542 }, { "epoch": 3.8333333333333335, "grad_norm": 11.694721221923828, "learning_rate": 3.425925925925926e-05, "log_odds_chosen": 8.600055694580078, "log_odds_ratio": -0.009092436172068119, "logits/chosen": -0.422126829624176, "logits/rejected": -0.4881291091442108, "logps/chosen": -0.004327110014855862, "logps/rejected": -1.978559970855713, "loss": 2.0677, "nll_loss": 0.5160139203071594, "rewards/accuracies": 1.0, "rewards/chosen": -0.00043271100730635226, "rewards/margins": 0.1974232941865921, "rewards/rejected": -0.19785597920417786, "step": 5543 }, { "epoch": 3.8340248962655603, "grad_norm": 7.458912372589111, "learning_rate": 3.4255417242969114e-05, "log_odds_chosen": 8.836742401123047, "log_odds_ratio": -0.0018061978043988347, "logits/chosen": -0.2798689305782318, "logits/rejected": -0.31368425488471985, "logps/chosen": -0.001996720675379038, "logps/rejected": -1.3222445249557495, "loss": 1.629, "nll_loss": 0.4070799946784973, "rewards/accuracies": 1.0, "rewards/chosen": -0.00019967206753790379, "rewards/margins": 0.13202476501464844, "rewards/rejected": -0.132224440574646, "step": 5544 }, { "epoch": 3.834716459197787, "grad_norm": 12.886466026306152, "learning_rate": 3.425157522667896e-05, "log_odds_chosen": 11.046795845031738, "log_odds_ratio": -3.086154174525291e-05, "logits/chosen": -0.08762979507446289, "logits/rejected": -0.2459934800863266, "logps/chosen": -0.00024006012245081365, "logps/rejected": -2.759465217590332, "loss": 1.4513, "nll_loss": 0.36282655596733093, "rewards/accuracies": 1.0, "rewards/chosen": -2.4006010789889842e-05, "rewards/margins": 0.2759225368499756, "rewards/rejected": -0.2759465277194977, "step": 5545 }, { "epoch": 3.835408022130014, "grad_norm": 9.756755828857422, "learning_rate": 3.424773321038882e-05, "log_odds_chosen": 9.83508586883545, "log_odds_ratio": -0.000582660548388958, "logits/chosen": -0.35356366634368896, "logits/rejected": -0.360950767993927, "logps/chosen": -0.004178952891379595, "logps/rejected": -2.676914930343628, "loss": 1.2778, "nll_loss": 0.3193887770175934, "rewards/accuracies": 1.0, "rewards/chosen": -0.00041789532406255603, "rewards/margins": 0.26727357506752014, "rewards/rejected": -0.2676914930343628, "step": 5546 }, { "epoch": 3.836099585062241, "grad_norm": 7.360530376434326, "learning_rate": 3.4243891194098665e-05, "log_odds_chosen": 8.683747291564941, "log_odds_ratio": -0.0014638010179623961, "logits/chosen": -0.3876939117908478, "logits/rejected": -0.4048527777194977, "logps/chosen": -0.001358279143460095, "logps/rejected": -1.3281148672103882, "loss": 0.9462, "nll_loss": 0.2364034652709961, "rewards/accuracies": 1.0, "rewards/chosen": -0.00013582792598754168, "rewards/margins": 0.1326756477355957, "rewards/rejected": -0.13281148672103882, "step": 5547 }, { "epoch": 3.8367911479944676, "grad_norm": 15.831966400146484, "learning_rate": 3.424004917780852e-05, "log_odds_chosen": 9.272870063781738, "log_odds_ratio": -0.003424287075176835, "logits/chosen": -0.40673232078552246, "logits/rejected": -0.5001996159553528, "logps/chosen": -0.012605813331902027, "logps/rejected": -2.0801045894622803, "loss": 1.2381, "nll_loss": 0.30918464064598083, "rewards/accuracies": 1.0, "rewards/chosen": -0.0012605813099071383, "rewards/margins": 0.20674987137317657, "rewards/rejected": -0.2080104500055313, "step": 5548 }, { "epoch": 3.8374827109266945, "grad_norm": 9.429594039916992, "learning_rate": 3.423620716151837e-05, "log_odds_chosen": 10.037227630615234, "log_odds_ratio": -0.000798444205429405, "logits/chosen": -0.7082902193069458, "logits/rejected": -0.716722846031189, "logps/chosen": -0.006209598854184151, "logps/rejected": -1.9420156478881836, "loss": 1.5434, "nll_loss": 0.38577979803085327, "rewards/accuracies": 1.0, "rewards/chosen": -0.0006209598504938185, "rewards/margins": 0.19358061254024506, "rewards/rejected": -0.19420158863067627, "step": 5549 }, { "epoch": 3.8381742738589213, "grad_norm": 8.317140579223633, "learning_rate": 3.4232365145228216e-05, "log_odds_chosen": 8.800092697143555, "log_odds_ratio": -0.001625056378543377, "logits/chosen": -0.37635883688926697, "logits/rejected": -0.4500593841075897, "logps/chosen": -0.005539075471460819, "logps/rejected": -1.682387113571167, "loss": 1.5154, "nll_loss": 0.37868595123291016, "rewards/accuracies": 1.0, "rewards/chosen": -0.0005539075937122107, "rewards/margins": 0.16768479347229004, "rewards/rejected": -0.16823871433734894, "step": 5550 }, { "epoch": 3.838865836791148, "grad_norm": 6.375285625457764, "learning_rate": 3.422852312893807e-05, "log_odds_chosen": 8.28230094909668, "log_odds_ratio": -0.11828190088272095, "logits/chosen": -0.5913441181182861, "logits/rejected": -0.6703944802284241, "logps/chosen": -0.017180640250444412, "logps/rejected": -1.775395393371582, "loss": 1.4604, "nll_loss": 0.3532821536064148, "rewards/accuracies": 0.875, "rewards/chosen": -0.0017180641880258918, "rewards/margins": 0.1758214831352234, "rewards/rejected": -0.17753954231739044, "step": 5551 }, { "epoch": 3.839557399723375, "grad_norm": 12.262701988220215, "learning_rate": 3.422468111264792e-05, "log_odds_chosen": 10.857995986938477, "log_odds_ratio": -9.511434473097324e-05, "logits/chosen": -0.4358949661254883, "logits/rejected": -0.4269968271255493, "logps/chosen": -0.0002777725167106837, "logps/rejected": -2.5557219982147217, "loss": 2.4963, "nll_loss": 0.6240707635879517, "rewards/accuracies": 1.0, "rewards/chosen": -2.7777254217653535e-05, "rewards/margins": 0.25554442405700684, "rewards/rejected": -0.25557222962379456, "step": 5552 }, { "epoch": 3.840248962655602, "grad_norm": 10.897068977355957, "learning_rate": 3.422083909635777e-05, "log_odds_chosen": 10.079792022705078, "log_odds_ratio": -8.238962618634105e-05, "logits/chosen": -0.39618369936943054, "logits/rejected": -0.4675847291946411, "logps/chosen": -0.00033035362139344215, "logps/rejected": -1.5016779899597168, "loss": 1.3218, "nll_loss": 0.33044198155403137, "rewards/accuracies": 1.0, "rewards/chosen": -3.303536504972726e-05, "rewards/margins": 0.15013474225997925, "rewards/rejected": -0.1501677930355072, "step": 5553 }, { "epoch": 3.8409405255878286, "grad_norm": 9.962440490722656, "learning_rate": 3.421699708006762e-05, "log_odds_chosen": 9.99749755859375, "log_odds_ratio": -0.00010504462261451408, "logits/chosen": -0.4203840494155884, "logits/rejected": -0.45524218678474426, "logps/chosen": -0.0003157463506795466, "logps/rejected": -1.7843303680419922, "loss": 2.0937, "nll_loss": 0.5234262347221375, "rewards/accuracies": 1.0, "rewards/chosen": -3.157463652314618e-05, "rewards/margins": 0.17840145528316498, "rewards/rejected": -0.17843303084373474, "step": 5554 }, { "epoch": 3.8416320885200554, "grad_norm": 7.3351664543151855, "learning_rate": 3.421315506377748e-05, "log_odds_chosen": 8.06844425201416, "log_odds_ratio": -0.07652094960212708, "logits/chosen": -0.44341135025024414, "logits/rejected": -0.4799209237098694, "logps/chosen": -0.017725473269820213, "logps/rejected": -1.821255087852478, "loss": 1.7225, "nll_loss": 0.4229816198348999, "rewards/accuracies": 1.0, "rewards/chosen": -0.00177254737354815, "rewards/margins": 0.18035295605659485, "rewards/rejected": -0.182125523686409, "step": 5555 }, { "epoch": 3.8423236514522823, "grad_norm": 12.786683082580566, "learning_rate": 3.4209313047487323e-05, "log_odds_chosen": 10.323480606079102, "log_odds_ratio": -0.00020878612122032791, "logits/chosen": -0.7404874563217163, "logits/rejected": -0.8786669373512268, "logps/chosen": -0.0006357074016705155, "logps/rejected": -2.5420761108398438, "loss": 3.0706, "nll_loss": 0.7676250338554382, "rewards/accuracies": 1.0, "rewards/chosen": -6.357074016705155e-05, "rewards/margins": 0.2541440427303314, "rewards/rejected": -0.2542076110839844, "step": 5556 }, { "epoch": 3.843015214384509, "grad_norm": 16.721275329589844, "learning_rate": 3.4205471031197176e-05, "log_odds_chosen": 7.947993278503418, "log_odds_ratio": -0.2099430114030838, "logits/chosen": -0.6919919848442078, "logits/rejected": -0.7184282541275024, "logps/chosen": -0.035298943519592285, "logps/rejected": -1.3522957563400269, "loss": 1.3653, "nll_loss": 0.3203205168247223, "rewards/accuracies": 0.875, "rewards/chosen": -0.0035298941656947136, "rewards/margins": 0.13169968128204346, "rewards/rejected": -0.13522957265377045, "step": 5557 }, { "epoch": 3.843706777316736, "grad_norm": 7.51786994934082, "learning_rate": 3.420162901490703e-05, "log_odds_chosen": 10.105172157287598, "log_odds_ratio": -0.00017221916641574353, "logits/chosen": -0.8171368837356567, "logits/rejected": -0.8305201530456543, "logps/chosen": -0.0010632172925397754, "logps/rejected": -2.030632734298706, "loss": 1.0309, "nll_loss": 0.2577052712440491, "rewards/accuracies": 1.0, "rewards/chosen": -0.00010632173507474363, "rewards/margins": 0.2029569447040558, "rewards/rejected": -0.2030632644891739, "step": 5558 }, { "epoch": 3.8443983402489628, "grad_norm": 11.065739631652832, "learning_rate": 3.4197786998616874e-05, "log_odds_chosen": 10.4656982421875, "log_odds_ratio": -0.00015175581211224198, "logits/chosen": -0.4404027462005615, "logits/rejected": -0.5089275240898132, "logps/chosen": -0.00016348894860129803, "logps/rejected": -1.9598641395568848, "loss": 1.3562, "nll_loss": 0.33904480934143066, "rewards/accuracies": 1.0, "rewards/chosen": -1.6348896679119207e-05, "rewards/margins": 0.1959700733423233, "rewards/rejected": -0.19598641991615295, "step": 5559 }, { "epoch": 3.8450899031811896, "grad_norm": 6.907554626464844, "learning_rate": 3.4193944982326726e-05, "log_odds_chosen": 7.720027923583984, "log_odds_ratio": -0.03721699118614197, "logits/chosen": -0.6247544884681702, "logits/rejected": -0.6959853172302246, "logps/chosen": -0.016281504184007645, "logps/rejected": -1.582554578781128, "loss": 1.461, "nll_loss": 0.3615221381187439, "rewards/accuracies": 1.0, "rewards/chosen": -0.0016281504649668932, "rewards/margins": 0.15662731230258942, "rewards/rejected": -0.1582554578781128, "step": 5560 }, { "epoch": 3.8457814661134164, "grad_norm": 7.4110188484191895, "learning_rate": 3.419010296603658e-05, "log_odds_chosen": 8.912864685058594, "log_odds_ratio": -0.0009941515745595098, "logits/chosen": -0.4958620071411133, "logits/rejected": -0.5392465591430664, "logps/chosen": -0.0017727524973452091, "logps/rejected": -1.8045464754104614, "loss": 1.4059, "nll_loss": 0.3513681888580322, "rewards/accuracies": 1.0, "rewards/chosen": -0.00017727524391375482, "rewards/margins": 0.18027737736701965, "rewards/rejected": -0.18045464158058167, "step": 5561 }, { "epoch": 3.8464730290456433, "grad_norm": 8.460882186889648, "learning_rate": 3.418626094974643e-05, "log_odds_chosen": 10.592855453491211, "log_odds_ratio": -3.915593333658762e-05, "logits/chosen": -0.7202191352844238, "logits/rejected": -0.7390909194946289, "logps/chosen": -0.00015226914547383785, "logps/rejected": -1.7440904378890991, "loss": 1.3169, "nll_loss": 0.32921481132507324, "rewards/accuracies": 1.0, "rewards/chosen": -1.5226914911181666e-05, "rewards/margins": 0.17439383268356323, "rewards/rejected": -0.17440906167030334, "step": 5562 }, { "epoch": 3.84716459197787, "grad_norm": 12.310235977172852, "learning_rate": 3.418241893345628e-05, "log_odds_chosen": 9.920722007751465, "log_odds_ratio": -0.0001332084502791986, "logits/chosen": -0.7831141948699951, "logits/rejected": -0.802308201789856, "logps/chosen": -0.0005803712992928922, "logps/rejected": -1.7757395505905151, "loss": 0.9645, "nll_loss": 0.24109968543052673, "rewards/accuracies": 1.0, "rewards/chosen": -5.803713429486379e-05, "rewards/margins": 0.1775159239768982, "rewards/rejected": -0.17757394909858704, "step": 5563 }, { "epoch": 3.847856154910097, "grad_norm": 7.889085292816162, "learning_rate": 3.4178576917166136e-05, "log_odds_chosen": 8.473007202148438, "log_odds_ratio": -0.0009104659548029304, "logits/chosen": -0.37859463691711426, "logits/rejected": -0.4623199701309204, "logps/chosen": -0.008291005156934261, "logps/rejected": -2.0283498764038086, "loss": 1.9784, "nll_loss": 0.4944990277290344, "rewards/accuracies": 1.0, "rewards/chosen": -0.0008291004924103618, "rewards/margins": 0.20200589299201965, "rewards/rejected": -0.20283500850200653, "step": 5564 }, { "epoch": 3.8485477178423237, "grad_norm": 9.304460525512695, "learning_rate": 3.417473490087598e-05, "log_odds_chosen": 9.714195251464844, "log_odds_ratio": -0.0001703681773506105, "logits/chosen": -0.6381903290748596, "logits/rejected": -0.7350766062736511, "logps/chosen": -0.0015592292184010148, "logps/rejected": -1.4405004978179932, "loss": 1.2451, "nll_loss": 0.31124961376190186, "rewards/accuracies": 1.0, "rewards/chosen": -0.00015592292766086757, "rewards/margins": 0.14389413595199585, "rewards/rejected": -0.14405004680156708, "step": 5565 }, { "epoch": 3.8492392807745506, "grad_norm": 14.469979286193848, "learning_rate": 3.4170892884585834e-05, "log_odds_chosen": 8.90275764465332, "log_odds_ratio": -0.0023119053803384304, "logits/chosen": -0.5259624719619751, "logits/rejected": -0.5423348546028137, "logps/chosen": -0.00645932462066412, "logps/rejected": -1.7382960319519043, "loss": 1.5171, "nll_loss": 0.3790552020072937, "rewards/accuracies": 1.0, "rewards/chosen": -0.0006459323922172189, "rewards/margins": 0.17318369448184967, "rewards/rejected": -0.17382961511611938, "step": 5566 }, { "epoch": 3.8499308437067774, "grad_norm": 8.519709587097168, "learning_rate": 3.416705086829569e-05, "log_odds_chosen": 10.836234092712402, "log_odds_ratio": -2.3567565222037956e-05, "logits/chosen": -0.6077121496200562, "logits/rejected": -0.635270357131958, "logps/chosen": -0.00024252112780231982, "logps/rejected": -2.135042667388916, "loss": 1.0484, "nll_loss": 0.2620912194252014, "rewards/accuracies": 1.0, "rewards/chosen": -2.4252112780231982e-05, "rewards/margins": 0.21348001062870026, "rewards/rejected": -0.21350425481796265, "step": 5567 }, { "epoch": 3.8506224066390042, "grad_norm": 9.673677444458008, "learning_rate": 3.416320885200553e-05, "log_odds_chosen": 9.430411338806152, "log_odds_ratio": -0.15776720643043518, "logits/chosen": -0.637581467628479, "logits/rejected": -0.6428850293159485, "logps/chosen": -0.02269704081118107, "logps/rejected": -1.8823751211166382, "loss": 1.6441, "nll_loss": 0.3952556252479553, "rewards/accuracies": 0.875, "rewards/chosen": -0.0022697041276842356, "rewards/margins": 0.1859678030014038, "rewards/rejected": -0.1882375180721283, "step": 5568 }, { "epoch": 3.851313969571231, "grad_norm": 12.571064949035645, "learning_rate": 3.4159366835715385e-05, "log_odds_chosen": 10.705487251281738, "log_odds_ratio": -6.690446753054857e-05, "logits/chosen": -0.6356642246246338, "logits/rejected": -0.6576147675514221, "logps/chosen": -0.00015859422273933887, "logps/rejected": -2.0107100009918213, "loss": 1.3405, "nll_loss": 0.33511292934417725, "rewards/accuracies": 1.0, "rewards/chosen": -1.585942300152965e-05, "rewards/margins": 0.20105516910552979, "rewards/rejected": -0.20107102394104004, "step": 5569 }, { "epoch": 3.852005532503458, "grad_norm": 11.851554870605469, "learning_rate": 3.415552481942524e-05, "log_odds_chosen": 8.76864242553711, "log_odds_ratio": -0.0007422867347486317, "logits/chosen": -0.26603615283966064, "logits/rejected": -0.30356648564338684, "logps/chosen": -0.004614387173205614, "logps/rejected": -1.554021954536438, "loss": 1.7769, "nll_loss": 0.444142609834671, "rewards/accuracies": 1.0, "rewards/chosen": -0.00046143875806592405, "rewards/margins": 0.15494075417518616, "rewards/rejected": -0.15540219843387604, "step": 5570 }, { "epoch": 3.8526970954356847, "grad_norm": 13.143882751464844, "learning_rate": 3.415168280313509e-05, "log_odds_chosen": 9.488032341003418, "log_odds_ratio": -0.024420535191893578, "logits/chosen": -0.8110833764076233, "logits/rejected": -0.8133986592292786, "logps/chosen": -0.01876739226281643, "logps/rejected": -1.945871114730835, "loss": 1.5925, "nll_loss": 0.3956940472126007, "rewards/accuracies": 1.0, "rewards/chosen": -0.0018767392029985785, "rewards/margins": 0.19271036982536316, "rewards/rejected": -0.1945871114730835, "step": 5571 }, { "epoch": 3.8533886583679116, "grad_norm": 8.051220893859863, "learning_rate": 3.4147840786844935e-05, "log_odds_chosen": 8.133522987365723, "log_odds_ratio": -0.02157263271510601, "logits/chosen": -0.2942221760749817, "logits/rejected": -0.3327701985836029, "logps/chosen": -0.007230726070702076, "logps/rejected": -1.4394934177398682, "loss": 1.8201, "nll_loss": 0.4528588056564331, "rewards/accuracies": 1.0, "rewards/chosen": -0.0007230726187117398, "rewards/margins": 0.1432262659072876, "rewards/rejected": -0.14394932985305786, "step": 5572 }, { "epoch": 3.8540802213001384, "grad_norm": 12.141948699951172, "learning_rate": 3.4143998770554795e-05, "log_odds_chosen": 8.340699195861816, "log_odds_ratio": -0.05566050112247467, "logits/chosen": -0.5719602704048157, "logits/rejected": -0.7078933119773865, "logps/chosen": -0.017384812235832214, "logps/rejected": -2.164154529571533, "loss": 1.6659, "nll_loss": 0.41090866923332214, "rewards/accuracies": 1.0, "rewards/chosen": -0.0017384812235832214, "rewards/margins": 0.21467696130275726, "rewards/rejected": -0.2164154350757599, "step": 5573 }, { "epoch": 3.854771784232365, "grad_norm": 7.900241851806641, "learning_rate": 3.414015675426464e-05, "log_odds_chosen": 7.441177845001221, "log_odds_ratio": -0.1711406111717224, "logits/chosen": -0.2587983012199402, "logits/rejected": -0.3203867971897125, "logps/chosen": -0.04765608161687851, "logps/rejected": -1.2207387685775757, "loss": 1.4017, "nll_loss": 0.3333078920841217, "rewards/accuracies": 0.875, "rewards/chosen": -0.004765608347952366, "rewards/margins": 0.11730826646089554, "rewards/rejected": -0.12207387387752533, "step": 5574 }, { "epoch": 3.855463347164592, "grad_norm": 6.0263824462890625, "learning_rate": 3.413631473797449e-05, "log_odds_chosen": 8.33633041381836, "log_odds_ratio": -0.0022357190027832985, "logits/chosen": -0.20025485754013062, "logits/rejected": -0.2713879346847534, "logps/chosen": -0.024495285004377365, "logps/rejected": -2.157153367996216, "loss": 1.7926, "nll_loss": 0.4479362964630127, "rewards/accuracies": 1.0, "rewards/chosen": -0.0024495285470038652, "rewards/margins": 0.2132658064365387, "rewards/rejected": -0.21571531891822815, "step": 5575 }, { "epoch": 3.856154910096819, "grad_norm": 11.386039733886719, "learning_rate": 3.4132472721684345e-05, "log_odds_chosen": 8.842057228088379, "log_odds_ratio": -0.1126728430390358, "logits/chosen": -0.7715977430343628, "logits/rejected": -0.8066084980964661, "logps/chosen": -0.018056262284517288, "logps/rejected": -2.1228909492492676, "loss": 1.3804, "nll_loss": 0.3338434398174286, "rewards/accuracies": 0.875, "rewards/chosen": -0.0018056264379993081, "rewards/margins": 0.21048344671726227, "rewards/rejected": -0.21228908002376556, "step": 5576 }, { "epoch": 3.8568464730290457, "grad_norm": 21.511932373046875, "learning_rate": 3.412863070539419e-05, "log_odds_chosen": 10.852688789367676, "log_odds_ratio": -0.00026287042419426143, "logits/chosen": -0.5599167943000793, "logits/rejected": -0.6122843027114868, "logps/chosen": -0.003043019911274314, "logps/rejected": -3.0699446201324463, "loss": 1.537, "nll_loss": 0.3842347264289856, "rewards/accuracies": 1.0, "rewards/chosen": -0.0003043019969481975, "rewards/margins": 0.30669015645980835, "rewards/rejected": -0.3069944679737091, "step": 5577 }, { "epoch": 3.8575380359612725, "grad_norm": 7.178199768066406, "learning_rate": 3.412478868910404e-05, "log_odds_chosen": 7.949138641357422, "log_odds_ratio": -0.014978266321122646, "logits/chosen": -0.7569225430488586, "logits/rejected": -0.7971171140670776, "logps/chosen": -0.009100032970309258, "logps/rejected": -1.8713810443878174, "loss": 1.3506, "nll_loss": 0.3361407518386841, "rewards/accuracies": 1.0, "rewards/chosen": -0.0009100032621063292, "rewards/margins": 0.18622809648513794, "rewards/rejected": -0.18713811039924622, "step": 5578 }, { "epoch": 3.8582295988934994, "grad_norm": 5.463674545288086, "learning_rate": 3.4120946672813896e-05, "log_odds_chosen": 10.04675006866455, "log_odds_ratio": -0.00010655250662239268, "logits/chosen": -0.5956853628158569, "logits/rejected": -0.5343964099884033, "logps/chosen": -0.004939241334795952, "logps/rejected": -2.332029342651367, "loss": 1.4955, "nll_loss": 0.37386226654052734, "rewards/accuracies": 1.0, "rewards/chosen": -0.0004939241334795952, "rewards/margins": 0.23270902037620544, "rewards/rejected": -0.23320293426513672, "step": 5579 }, { "epoch": 3.858921161825726, "grad_norm": 11.263189315795898, "learning_rate": 3.411710465652375e-05, "log_odds_chosen": 9.413596153259277, "log_odds_ratio": -0.0013633102644234896, "logits/chosen": -0.2171023190021515, "logits/rejected": -0.3067041039466858, "logps/chosen": -0.006868002470582724, "logps/rejected": -2.0416359901428223, "loss": 2.1914, "nll_loss": 0.5477181077003479, "rewards/accuracies": 1.0, "rewards/chosen": -0.000686800223775208, "rewards/margins": 0.20347681641578674, "rewards/rejected": -0.20416361093521118, "step": 5580 }, { "epoch": 3.859612724757953, "grad_norm": 7.4217848777771, "learning_rate": 3.4113262640233594e-05, "log_odds_chosen": 10.248290061950684, "log_odds_ratio": -0.00022609463485423476, "logits/chosen": -0.7831434607505798, "logits/rejected": -0.7565454244613647, "logps/chosen": -0.0007353498367592692, "logps/rejected": -2.2359209060668945, "loss": 1.3132, "nll_loss": 0.3282652497291565, "rewards/accuracies": 1.0, "rewards/chosen": -7.35349822207354e-05, "rewards/margins": 0.22351858019828796, "rewards/rejected": -0.2235921174287796, "step": 5581 }, { "epoch": 3.86030428769018, "grad_norm": 8.249073028564453, "learning_rate": 3.4109420623943446e-05, "log_odds_chosen": 9.593080520629883, "log_odds_ratio": -0.001667341566644609, "logits/chosen": -0.7265720367431641, "logits/rejected": -0.7452245354652405, "logps/chosen": -0.030651377514004707, "logps/rejected": -1.9601030349731445, "loss": 1.2229, "nll_loss": 0.30555465817451477, "rewards/accuracies": 1.0, "rewards/chosen": -0.003065137891098857, "rewards/margins": 0.19294516742229462, "rewards/rejected": -0.19601032137870789, "step": 5582 }, { "epoch": 3.8609958506224067, "grad_norm": 4.888029098510742, "learning_rate": 3.41055786076533e-05, "log_odds_chosen": 8.515039443969727, "log_odds_ratio": -0.02231896109879017, "logits/chosen": -0.2870052456855774, "logits/rejected": -0.369390606880188, "logps/chosen": -0.007543592248111963, "logps/rejected": -1.8465120792388916, "loss": 0.8638, "nll_loss": 0.21371084451675415, "rewards/accuracies": 1.0, "rewards/chosen": -0.0007543592946603894, "rewards/margins": 0.18389683961868286, "rewards/rejected": -0.1846512258052826, "step": 5583 }, { "epoch": 3.8616874135546335, "grad_norm": 10.852818489074707, "learning_rate": 3.410173659136315e-05, "log_odds_chosen": 8.885574340820312, "log_odds_ratio": -0.0029431653674691916, "logits/chosen": -0.230947345495224, "logits/rejected": -0.3188367187976837, "logps/chosen": -0.0016783340834081173, "logps/rejected": -1.059872031211853, "loss": 1.6579, "nll_loss": 0.4141872227191925, "rewards/accuracies": 1.0, "rewards/chosen": -0.00016783342289272696, "rewards/margins": 0.10581937432289124, "rewards/rejected": -0.10598720610141754, "step": 5584 }, { "epoch": 3.8623789764868603, "grad_norm": 8.725906372070312, "learning_rate": 3.4097894575073e-05, "log_odds_chosen": 8.555717468261719, "log_odds_ratio": -0.001971770077943802, "logits/chosen": -0.4405558109283447, "logits/rejected": -0.3410573899745941, "logps/chosen": -0.0018309359438717365, "logps/rejected": -1.2396399974822998, "loss": 1.725, "nll_loss": 0.4310452938079834, "rewards/accuracies": 1.0, "rewards/chosen": -0.00018309360893908888, "rewards/margins": 0.12378091365098953, "rewards/rejected": -0.12396401166915894, "step": 5585 }, { "epoch": 3.863070539419087, "grad_norm": 9.858050346374512, "learning_rate": 3.409405255878285e-05, "log_odds_chosen": 10.435060501098633, "log_odds_ratio": -0.0001268967316718772, "logits/chosen": -0.6413505673408508, "logits/rejected": -0.7253145575523376, "logps/chosen": -0.00029369723051786423, "logps/rejected": -1.9069111347198486, "loss": 1.2896, "nll_loss": 0.3223815858364105, "rewards/accuracies": 1.0, "rewards/chosen": -2.9369724870775826e-05, "rewards/margins": 0.1906617283821106, "rewards/rejected": -0.19069111347198486, "step": 5586 }, { "epoch": 3.863762102351314, "grad_norm": 8.920366287231445, "learning_rate": 3.40902105424927e-05, "log_odds_chosen": 9.892365455627441, "log_odds_ratio": -0.0001298495044466108, "logits/chosen": -0.20717889070510864, "logits/rejected": -0.2652691602706909, "logps/chosen": -0.00026627822080627084, "logps/rejected": -1.3087078332901, "loss": 1.7546, "nll_loss": 0.43863433599472046, "rewards/accuracies": 1.0, "rewards/chosen": -2.6627822080627084e-05, "rewards/margins": 0.1308441460132599, "rewards/rejected": -0.1308707892894745, "step": 5587 }, { "epoch": 3.864453665283541, "grad_norm": 11.133824348449707, "learning_rate": 3.408636852620255e-05, "log_odds_chosen": 9.764641761779785, "log_odds_ratio": -0.0001106760319089517, "logits/chosen": -0.45230603218078613, "logits/rejected": -0.5868815183639526, "logps/chosen": -0.0006661764928139746, "logps/rejected": -2.2513959407806396, "loss": 1.8702, "nll_loss": 0.46753376722335815, "rewards/accuracies": 1.0, "rewards/chosen": -6.661764928139746e-05, "rewards/margins": 0.22507299482822418, "rewards/rejected": -0.22513961791992188, "step": 5588 }, { "epoch": 3.8651452282157677, "grad_norm": 10.524619102478027, "learning_rate": 3.408252650991241e-05, "log_odds_chosen": 9.430778503417969, "log_odds_ratio": -0.0005737305618822575, "logits/chosen": -0.42517948150634766, "logits/rejected": -0.5130538940429688, "logps/chosen": -0.0003639504429884255, "logps/rejected": -1.6744965314865112, "loss": 1.5258, "nll_loss": 0.38138049840927124, "rewards/accuracies": 1.0, "rewards/chosen": -3.6395042116055265e-05, "rewards/margins": 0.16741326451301575, "rewards/rejected": -0.16744965314865112, "step": 5589 }, { "epoch": 3.8658367911479945, "grad_norm": 14.197625160217285, "learning_rate": 3.407868449362225e-05, "log_odds_chosen": 9.485942840576172, "log_odds_ratio": -0.004377941135317087, "logits/chosen": -0.32564330101013184, "logits/rejected": -0.4865632951259613, "logps/chosen": -0.0027153741102665663, "logps/rejected": -2.0737974643707275, "loss": 1.7531, "nll_loss": 0.437848836183548, "rewards/accuracies": 1.0, "rewards/chosen": -0.0002715374284889549, "rewards/margins": 0.20710822939872742, "rewards/rejected": -0.2073797583580017, "step": 5590 }, { "epoch": 3.8665283540802213, "grad_norm": 11.992483139038086, "learning_rate": 3.4074842477332105e-05, "log_odds_chosen": 10.378591537475586, "log_odds_ratio": -8.182358578778803e-05, "logits/chosen": -0.544563889503479, "logits/rejected": -0.5972481966018677, "logps/chosen": -0.00017176619439851493, "logps/rejected": -1.5616424083709717, "loss": 1.4838, "nll_loss": 0.3709515631198883, "rewards/accuracies": 1.0, "rewards/chosen": -1.7176620531245135e-05, "rewards/margins": 0.1561470627784729, "rewards/rejected": -0.1561642289161682, "step": 5591 }, { "epoch": 3.867219917012448, "grad_norm": 7.904953479766846, "learning_rate": 3.407100046104196e-05, "log_odds_chosen": 10.234663009643555, "log_odds_ratio": -0.0001564006961416453, "logits/chosen": -0.7005908489227295, "logits/rejected": -0.7648482322692871, "logps/chosen": -0.0009522702312096953, "logps/rejected": -2.065946102142334, "loss": 0.8907, "nll_loss": 0.22265967726707458, "rewards/accuracies": 1.0, "rewards/chosen": -9.522702021058649e-05, "rewards/margins": 0.2064993679523468, "rewards/rejected": -0.20659461617469788, "step": 5592 }, { "epoch": 3.867911479944675, "grad_norm": 8.900137901306152, "learning_rate": 3.406715844475181e-05, "log_odds_chosen": 8.831513404846191, "log_odds_ratio": -0.010963935405015945, "logits/chosen": -0.5970246195793152, "logits/rejected": -0.7142374515533447, "logps/chosen": -0.020394207909703255, "logps/rejected": -1.9975173473358154, "loss": 1.9492, "nll_loss": 0.48620539903640747, "rewards/accuracies": 1.0, "rewards/chosen": -0.002039420884102583, "rewards/margins": 0.19771233201026917, "rewards/rejected": -0.19975173473358154, "step": 5593 }, { "epoch": 3.868603042876902, "grad_norm": 10.622045516967773, "learning_rate": 3.4063316428461655e-05, "log_odds_chosen": 9.73248291015625, "log_odds_ratio": -0.0004534787149168551, "logits/chosen": -0.5400159358978271, "logits/rejected": -0.6580114364624023, "logps/chosen": -0.0007937573827803135, "logps/rejected": -2.3123836517333984, "loss": 2.226, "nll_loss": 0.556454062461853, "rewards/accuracies": 1.0, "rewards/chosen": -7.937574264360592e-05, "rewards/margins": 0.23115897178649902, "rewards/rejected": -0.23123835027217865, "step": 5594 }, { "epoch": 3.8692946058091287, "grad_norm": 9.243762016296387, "learning_rate": 3.405947441217151e-05, "log_odds_chosen": 8.463890075683594, "log_odds_ratio": -0.002155021531507373, "logits/chosen": -0.5418992638587952, "logits/rejected": -0.5896996259689331, "logps/chosen": -0.011017933487892151, "logps/rejected": -2.1293439865112305, "loss": 1.3508, "nll_loss": 0.3374752402305603, "rewards/accuracies": 1.0, "rewards/chosen": -0.0011017934884876013, "rewards/margins": 0.21183261275291443, "rewards/rejected": -0.21293440461158752, "step": 5595 }, { "epoch": 3.8699861687413555, "grad_norm": 21.918975830078125, "learning_rate": 3.405563239588136e-05, "log_odds_chosen": 7.5077643394470215, "log_odds_ratio": -0.1952972561120987, "logits/chosen": -0.5486396551132202, "logits/rejected": -0.5754981637001038, "logps/chosen": -0.025723986327648163, "logps/rejected": -1.0680969953536987, "loss": 2.4363, "nll_loss": 0.5895346999168396, "rewards/accuracies": 0.875, "rewards/chosen": -0.0025723983999341726, "rewards/margins": 0.10423729568719864, "rewards/rejected": -0.10680970549583435, "step": 5596 }, { "epoch": 3.8706777316735823, "grad_norm": 10.214221000671387, "learning_rate": 3.4051790379591206e-05, "log_odds_chosen": 8.93708324432373, "log_odds_ratio": -0.011676867492496967, "logits/chosen": -0.7973469495773315, "logits/rejected": -0.8488380312919617, "logps/chosen": -0.037191543728113174, "logps/rejected": -1.7417047023773193, "loss": 1.0208, "nll_loss": 0.25402408838272095, "rewards/accuracies": 1.0, "rewards/chosen": -0.0037191545125097036, "rewards/margins": 0.1704513281583786, "rewards/rejected": -0.17417047917842865, "step": 5597 }, { "epoch": 3.871369294605809, "grad_norm": 8.746747016906738, "learning_rate": 3.4047948363301065e-05, "log_odds_chosen": 10.5672607421875, "log_odds_ratio": -7.664141594432294e-05, "logits/chosen": -0.3067360818386078, "logits/rejected": -0.42486873269081116, "logps/chosen": -0.0032729327213019133, "logps/rejected": -2.47634220123291, "loss": 1.5306, "nll_loss": 0.3826458156108856, "rewards/accuracies": 1.0, "rewards/chosen": -0.00032729326630942523, "rewards/margins": 0.2473069131374359, "rewards/rejected": -0.24763420224189758, "step": 5598 }, { "epoch": 3.872060857538036, "grad_norm": 13.36917781829834, "learning_rate": 3.404410634701091e-05, "log_odds_chosen": 9.875089645385742, "log_odds_ratio": -0.02879687026143074, "logits/chosen": -0.45609021186828613, "logits/rejected": -0.5271731615066528, "logps/chosen": -0.06482497602701187, "logps/rejected": -1.9527268409729004, "loss": 1.3291, "nll_loss": 0.329405277967453, "rewards/accuracies": 1.0, "rewards/chosen": -0.0064824968576431274, "rewards/margins": 0.1887901872396469, "rewards/rejected": -0.19527268409729004, "step": 5599 }, { "epoch": 3.872752420470263, "grad_norm": 8.476516723632812, "learning_rate": 3.404026433072076e-05, "log_odds_chosen": 9.027252197265625, "log_odds_ratio": -0.0002830424637068063, "logits/chosen": -0.8720872402191162, "logits/rejected": -0.9834346771240234, "logps/chosen": -0.010724175721406937, "logps/rejected": -1.8943570852279663, "loss": 1.5209, "nll_loss": 0.3802030384540558, "rewards/accuracies": 1.0, "rewards/chosen": -0.001072417595423758, "rewards/margins": 0.18836328387260437, "rewards/rejected": -0.1894357055425644, "step": 5600 }, { "epoch": 3.8734439834024896, "grad_norm": 8.783854484558105, "learning_rate": 3.4036422314430616e-05, "log_odds_chosen": 9.651050567626953, "log_odds_ratio": -0.0009334798669442534, "logits/chosen": -0.331780344247818, "logits/rejected": -0.4134882092475891, "logps/chosen": -0.0129983089864254, "logps/rejected": -2.7373316287994385, "loss": 1.3279, "nll_loss": 0.3318936228752136, "rewards/accuracies": 1.0, "rewards/chosen": -0.001299830968491733, "rewards/margins": 0.272433340549469, "rewards/rejected": -0.2737331986427307, "step": 5601 }, { "epoch": 3.8741355463347165, "grad_norm": 4.210104942321777, "learning_rate": 3.403258029814047e-05, "log_odds_chosen": 9.92902660369873, "log_odds_ratio": -0.000435270689195022, "logits/chosen": -0.4320386052131653, "logits/rejected": -0.5295803546905518, "logps/chosen": -0.0008208720246329904, "logps/rejected": -1.9246351718902588, "loss": 0.8369, "nll_loss": 0.20917457342147827, "rewards/accuracies": 1.0, "rewards/chosen": -8.208720828406513e-05, "rewards/margins": 0.19238142669200897, "rewards/rejected": -0.19246351718902588, "step": 5602 }, { "epoch": 3.8748271092669433, "grad_norm": 6.597437381744385, "learning_rate": 3.4028738281850314e-05, "log_odds_chosen": 8.948493957519531, "log_odds_ratio": -0.0007399824680760503, "logits/chosen": -0.055549319833517075, "logits/rejected": -0.141621395945549, "logps/chosen": -0.0026224683970212936, "logps/rejected": -1.7882872819900513, "loss": 0.897, "nll_loss": 0.2241741567850113, "rewards/accuracies": 1.0, "rewards/chosen": -0.0002622468746267259, "rewards/margins": 0.17856647074222565, "rewards/rejected": -0.17882871627807617, "step": 5603 }, { "epoch": 3.87551867219917, "grad_norm": 11.046610832214355, "learning_rate": 3.4024896265560166e-05, "log_odds_chosen": 9.968111038208008, "log_odds_ratio": -0.00023064689594320953, "logits/chosen": -0.9279760122299194, "logits/rejected": -0.9948770403862, "logps/chosen": -0.00037911301478743553, "logps/rejected": -1.96274995803833, "loss": 1.0898, "nll_loss": 0.2724343538284302, "rewards/accuracies": 1.0, "rewards/chosen": -3.79113043891266e-05, "rewards/margins": 0.19623705744743347, "rewards/rejected": -0.196274995803833, "step": 5604 }, { "epoch": 3.876210235131397, "grad_norm": 6.500091075897217, "learning_rate": 3.402105424927002e-05, "log_odds_chosen": 8.721122741699219, "log_odds_ratio": -0.0005944301374256611, "logits/chosen": -0.755002498626709, "logits/rejected": -0.8262910842895508, "logps/chosen": -0.001076082931831479, "logps/rejected": -1.5059388875961304, "loss": 1.3978, "nll_loss": 0.34940218925476074, "rewards/accuracies": 1.0, "rewards/chosen": -0.000107608299003914, "rewards/margins": 0.1504862755537033, "rewards/rejected": -0.15059387683868408, "step": 5605 }, { "epoch": 3.876901798063624, "grad_norm": 5.433056831359863, "learning_rate": 3.4017212232979864e-05, "log_odds_chosen": 8.765649795532227, "log_odds_ratio": -0.0014494097558781505, "logits/chosen": -0.501061737537384, "logits/rejected": -0.5340345501899719, "logps/chosen": -0.001163999317213893, "logps/rejected": -1.2552547454833984, "loss": 1.1483, "nll_loss": 0.28693222999572754, "rewards/accuracies": 1.0, "rewards/chosen": -0.0001163999259006232, "rewards/margins": 0.1254090666770935, "rewards/rejected": -0.12552547454833984, "step": 5606 }, { "epoch": 3.8775933609958506, "grad_norm": 8.457118034362793, "learning_rate": 3.4013370216689724e-05, "log_odds_chosen": 10.066720008850098, "log_odds_ratio": -0.0003545557556208223, "logits/chosen": -0.19247552752494812, "logits/rejected": -0.2578640580177307, "logps/chosen": -0.0016848837258294225, "logps/rejected": -2.502847194671631, "loss": 1.6179, "nll_loss": 0.40442922711372375, "rewards/accuracies": 1.0, "rewards/chosen": -0.00016848836094141006, "rewards/margins": 0.250116229057312, "rewards/rejected": -0.25028473138809204, "step": 5607 }, { "epoch": 3.8782849239280774, "grad_norm": 8.580554962158203, "learning_rate": 3.400952820039957e-05, "log_odds_chosen": 11.60051441192627, "log_odds_ratio": -1.2054061699018348e-05, "logits/chosen": -0.5695351362228394, "logits/rejected": -0.6658599972724915, "logps/chosen": -0.00024254321760963649, "logps/rejected": -2.7273976802825928, "loss": 0.9558, "nll_loss": 0.23895087838172913, "rewards/accuracies": 1.0, "rewards/chosen": -2.4254321033367887e-05, "rewards/margins": 0.2727155387401581, "rewards/rejected": -0.27273979783058167, "step": 5608 }, { "epoch": 3.8789764868603043, "grad_norm": 17.818029403686523, "learning_rate": 3.400568618410942e-05, "log_odds_chosen": 10.083698272705078, "log_odds_ratio": -0.00014503306010738015, "logits/chosen": -0.4443364143371582, "logits/rejected": -0.5567833185195923, "logps/chosen": -0.000489625264890492, "logps/rejected": -2.1325652599334717, "loss": 1.8269, "nll_loss": 0.45671117305755615, "rewards/accuracies": 1.0, "rewards/chosen": -4.896252721664496e-05, "rewards/margins": 0.21320757269859314, "rewards/rejected": -0.21325653791427612, "step": 5609 }, { "epoch": 3.879668049792531, "grad_norm": 15.383901596069336, "learning_rate": 3.4001844167819274e-05, "log_odds_chosen": 9.334062576293945, "log_odds_ratio": -0.0005784723325632513, "logits/chosen": -0.7563439607620239, "logits/rejected": -0.778277575969696, "logps/chosen": -0.0020408525597304106, "logps/rejected": -2.2630202770233154, "loss": 1.3577, "nll_loss": 0.3393765985965729, "rewards/accuracies": 1.0, "rewards/chosen": -0.00020408528507687151, "rewards/margins": 0.226097971200943, "rewards/rejected": -0.22630205750465393, "step": 5610 }, { "epoch": 3.880359612724758, "grad_norm": 13.376522064208984, "learning_rate": 3.3998002151529127e-05, "log_odds_chosen": 9.306199073791504, "log_odds_ratio": -0.0012351719196885824, "logits/chosen": -0.57845139503479, "logits/rejected": -0.6579585075378418, "logps/chosen": -0.002754670102149248, "logps/rejected": -1.6692554950714111, "loss": 1.4071, "nll_loss": 0.3516432046890259, "rewards/accuracies": 1.0, "rewards/chosen": -0.00027546699857339263, "rewards/margins": 0.16665008664131165, "rewards/rejected": -0.1669255495071411, "step": 5611 }, { "epoch": 3.8810511756569848, "grad_norm": 10.505668640136719, "learning_rate": 3.399416013523897e-05, "log_odds_chosen": 9.230743408203125, "log_odds_ratio": -0.0024349126033484936, "logits/chosen": -0.3715485632419586, "logits/rejected": -0.4621528387069702, "logps/chosen": -0.014623850584030151, "logps/rejected": -1.5341432094573975, "loss": 1.193, "nll_loss": 0.29800331592559814, "rewards/accuracies": 1.0, "rewards/chosen": -0.0014623850584030151, "rewards/margins": 0.15195192396640778, "rewards/rejected": -0.1534143090248108, "step": 5612 }, { "epoch": 3.8817427385892116, "grad_norm": 6.59618616104126, "learning_rate": 3.3990318118948825e-05, "log_odds_chosen": 10.229150772094727, "log_odds_ratio": -4.0353632357437164e-05, "logits/chosen": -0.8435558080673218, "logits/rejected": -0.8600744605064392, "logps/chosen": -9.384715667692944e-05, "logps/rejected": -1.167233943939209, "loss": 0.885, "nll_loss": 0.2212393581867218, "rewards/accuracies": 1.0, "rewards/chosen": -9.384715667692944e-06, "rewards/margins": 0.11671401560306549, "rewards/rejected": -0.11672340333461761, "step": 5613 }, { "epoch": 3.8824343015214384, "grad_norm": 8.425680160522461, "learning_rate": 3.398647610265868e-05, "log_odds_chosen": 9.704595565795898, "log_odds_ratio": -0.0006063667242415249, "logits/chosen": -0.3385249972343445, "logits/rejected": -0.4208109974861145, "logps/chosen": -0.0007622221601195633, "logps/rejected": -1.711651086807251, "loss": 1.4288, "nll_loss": 0.35713446140289307, "rewards/accuracies": 1.0, "rewards/chosen": -7.622221164638177e-05, "rewards/margins": 0.17108888924121857, "rewards/rejected": -0.1711651086807251, "step": 5614 }, { "epoch": 3.8831258644536653, "grad_norm": 7.727184295654297, "learning_rate": 3.398263408636852e-05, "log_odds_chosen": 8.956552505493164, "log_odds_ratio": -0.020546168088912964, "logits/chosen": -0.7198243737220764, "logits/rejected": -0.7512015700340271, "logps/chosen": -0.008983590640127659, "logps/rejected": -2.044522762298584, "loss": 2.3386, "nll_loss": 0.5826031565666199, "rewards/accuracies": 1.0, "rewards/chosen": -0.0008983589941635728, "rewards/margins": 0.2035539150238037, "rewards/rejected": -0.2044522762298584, "step": 5615 }, { "epoch": 3.883817427385892, "grad_norm": 7.387112140655518, "learning_rate": 3.397879207007838e-05, "log_odds_chosen": 10.073363304138184, "log_odds_ratio": -5.7628094509709626e-05, "logits/chosen": -0.7603466510772705, "logits/rejected": -0.8240086436271667, "logps/chosen": -0.00038087720167823136, "logps/rejected": -1.7687740325927734, "loss": 1.1982, "nll_loss": 0.2995460629463196, "rewards/accuracies": 1.0, "rewards/chosen": -3.80877208954189e-05, "rewards/margins": 0.17683932185173035, "rewards/rejected": -0.17687739431858063, "step": 5616 }, { "epoch": 3.884508990318119, "grad_norm": 7.941006183624268, "learning_rate": 3.397495005378823e-05, "log_odds_chosen": 11.146267890930176, "log_odds_ratio": -2.293909346917644e-05, "logits/chosen": -0.4033728837966919, "logits/rejected": -0.5747873783111572, "logps/chosen": -0.00014363299123942852, "logps/rejected": -2.323676109313965, "loss": 1.2775, "nll_loss": 0.3193705677986145, "rewards/accuracies": 1.0, "rewards/chosen": -1.4363298760144971e-05, "rewards/margins": 0.23235327005386353, "rewards/rejected": -0.2323676347732544, "step": 5617 }, { "epoch": 3.8852005532503457, "grad_norm": 16.74596405029297, "learning_rate": 3.397110803749808e-05, "log_odds_chosen": 6.846477508544922, "log_odds_ratio": -0.2526886761188507, "logits/chosen": -0.7779616117477417, "logits/rejected": -0.7846757769584656, "logps/chosen": -0.03462180495262146, "logps/rejected": -1.091729760169983, "loss": 1.6382, "nll_loss": 0.3842869997024536, "rewards/accuracies": 0.875, "rewards/chosen": -0.0034621807280927896, "rewards/margins": 0.10571078956127167, "rewards/rejected": -0.10917297005653381, "step": 5618 }, { "epoch": 3.8858921161825726, "grad_norm": 12.336228370666504, "learning_rate": 3.396726602120793e-05, "log_odds_chosen": 10.408349990844727, "log_odds_ratio": -5.093478102935478e-05, "logits/chosen": -0.46157726645469666, "logits/rejected": -0.5192270278930664, "logps/chosen": -0.0016178932273760438, "logps/rejected": -1.9334490299224854, "loss": 1.9929, "nll_loss": 0.49821946024894714, "rewards/accuracies": 1.0, "rewards/chosen": -0.0001617893431102857, "rewards/margins": 0.19318309426307678, "rewards/rejected": -0.19334489107131958, "step": 5619 }, { "epoch": 3.8865836791147994, "grad_norm": 10.457387924194336, "learning_rate": 3.3963424004917785e-05, "log_odds_chosen": 8.117521286010742, "log_odds_ratio": -0.010431942529976368, "logits/chosen": -0.5782761573791504, "logits/rejected": -0.6354106664657593, "logps/chosen": -0.13770128786563873, "logps/rejected": -1.486854910850525, "loss": 1.3899, "nll_loss": 0.3464207351207733, "rewards/accuracies": 1.0, "rewards/chosen": -0.013770130462944508, "rewards/margins": 0.13491535186767578, "rewards/rejected": -0.148685485124588, "step": 5620 }, { "epoch": 3.8872752420470262, "grad_norm": 6.53814172744751, "learning_rate": 3.395958198862763e-05, "log_odds_chosen": 10.368768692016602, "log_odds_ratio": -6.338141247397289e-05, "logits/chosen": -0.7327615022659302, "logits/rejected": -0.753582775592804, "logps/chosen": -0.00012228148989379406, "logps/rejected": -1.6724853515625, "loss": 1.0725, "nll_loss": 0.26810744404792786, "rewards/accuracies": 1.0, "rewards/chosen": -1.2228148079884704e-05, "rewards/margins": 0.1672362983226776, "rewards/rejected": -0.16724853217601776, "step": 5621 }, { "epoch": 3.887966804979253, "grad_norm": 15.1021728515625, "learning_rate": 3.395573997233748e-05, "log_odds_chosen": 7.218578338623047, "log_odds_ratio": -0.2036381959915161, "logits/chosen": -0.5184769034385681, "logits/rejected": -0.6829452514648438, "logps/chosen": -0.0356486439704895, "logps/rejected": -1.2603774070739746, "loss": 1.8508, "nll_loss": 0.44233378767967224, "rewards/accuracies": 0.875, "rewards/chosen": -0.003564863931387663, "rewards/margins": 0.12247288227081299, "rewards/rejected": -0.12603774666786194, "step": 5622 }, { "epoch": 3.88865836791148, "grad_norm": 8.256840705871582, "learning_rate": 3.3951897956047336e-05, "log_odds_chosen": 9.119070053100586, "log_odds_ratio": -0.0005543202278204262, "logits/chosen": -0.4936408996582031, "logits/rejected": -0.5425743460655212, "logps/chosen": -0.02127469703555107, "logps/rejected": -2.053802967071533, "loss": 1.9283, "nll_loss": 0.48202627897262573, "rewards/accuracies": 1.0, "rewards/chosen": -0.002127469517290592, "rewards/margins": 0.20325282216072083, "rewards/rejected": -0.20538030564785004, "step": 5623 }, { "epoch": 3.8893499308437067, "grad_norm": 7.375672817230225, "learning_rate": 3.394805593975718e-05, "log_odds_chosen": 10.586771011352539, "log_odds_ratio": -4.97624023410026e-05, "logits/chosen": -0.4665629267692566, "logits/rejected": -0.4793822765350342, "logps/chosen": -0.0001592121843714267, "logps/rejected": -1.8775938749313354, "loss": 1.4208, "nll_loss": 0.3551906645298004, "rewards/accuracies": 1.0, "rewards/chosen": -1.592121770954691e-05, "rewards/margins": 0.18774347007274628, "rewards/rejected": -0.1877593845129013, "step": 5624 }, { "epoch": 3.8900414937759336, "grad_norm": 8.654998779296875, "learning_rate": 3.394421392346704e-05, "log_odds_chosen": 10.283309936523438, "log_odds_ratio": -0.0001582879776833579, "logits/chosen": -0.37717336416244507, "logits/rejected": -0.5226336717605591, "logps/chosen": -0.000453198270406574, "logps/rejected": -2.113046646118164, "loss": 1.0426, "nll_loss": 0.2606269121170044, "rewards/accuracies": 1.0, "rewards/chosen": -4.531982995104045e-05, "rewards/margins": 0.21125935018062592, "rewards/rejected": -0.2113046795129776, "step": 5625 }, { "epoch": 3.8907330567081604, "grad_norm": 8.630017280578613, "learning_rate": 3.3940371907176886e-05, "log_odds_chosen": 8.697437286376953, "log_odds_ratio": -0.0015453390078619123, "logits/chosen": -0.9022117853164673, "logits/rejected": -0.905013918876648, "logps/chosen": -0.011609883978962898, "logps/rejected": -1.6822335720062256, "loss": 1.2948, "nll_loss": 0.32355785369873047, "rewards/accuracies": 1.0, "rewards/chosen": -0.001160988351330161, "rewards/margins": 0.16706237196922302, "rewards/rejected": -0.16822335124015808, "step": 5626 }, { "epoch": 3.891424619640387, "grad_norm": 7.9558424949646, "learning_rate": 3.393652989088674e-05, "log_odds_chosen": 8.38878345489502, "log_odds_ratio": -0.04084280505776405, "logits/chosen": -0.7905771732330322, "logits/rejected": -0.8350014090538025, "logps/chosen": -0.04026196897029877, "logps/rejected": -1.4470926523208618, "loss": 1.4188, "nll_loss": 0.35061413049697876, "rewards/accuracies": 1.0, "rewards/chosen": -0.004026196897029877, "rewards/margins": 0.14068305492401123, "rewards/rejected": -0.1447092592716217, "step": 5627 }, { "epoch": 3.892116182572614, "grad_norm": 11.995192527770996, "learning_rate": 3.393268787459659e-05, "log_odds_chosen": 9.498333930969238, "log_odds_ratio": -0.000704533071257174, "logits/chosen": -0.6381096839904785, "logits/rejected": -0.6799237132072449, "logps/chosen": -0.0012884100433439016, "logps/rejected": -1.744314432144165, "loss": 2.1094, "nll_loss": 0.5272815823554993, "rewards/accuracies": 1.0, "rewards/chosen": -0.0001288410130655393, "rewards/margins": 0.1743026077747345, "rewards/rejected": -0.1744314581155777, "step": 5628 }, { "epoch": 3.892807745504841, "grad_norm": 9.74692153930664, "learning_rate": 3.3928845858306443e-05, "log_odds_chosen": 9.394281387329102, "log_odds_ratio": -0.007654293440282345, "logits/chosen": -0.483235627412796, "logits/rejected": -0.5125336050987244, "logps/chosen": -0.004396006464958191, "logps/rejected": -1.7465304136276245, "loss": 1.774, "nll_loss": 0.4427341818809509, "rewards/accuracies": 1.0, "rewards/chosen": -0.000439600640675053, "rewards/margins": 0.1742134392261505, "rewards/rejected": -0.1746530383825302, "step": 5629 }, { "epoch": 3.8934993084370677, "grad_norm": 12.278393745422363, "learning_rate": 3.392500384201629e-05, "log_odds_chosen": 10.144163131713867, "log_odds_ratio": -0.0001901520590763539, "logits/chosen": -0.6058459281921387, "logits/rejected": -0.6597455143928528, "logps/chosen": -0.00046932417899370193, "logps/rejected": -1.7287942171096802, "loss": 1.0825, "nll_loss": 0.2706070840358734, "rewards/accuracies": 1.0, "rewards/chosen": -4.693241498898715e-05, "rewards/margins": 0.17283248901367188, "rewards/rejected": -0.1728794276714325, "step": 5630 }, { "epoch": 3.8941908713692945, "grad_norm": 5.5625081062316895, "learning_rate": 3.392116182572614e-05, "log_odds_chosen": 8.977718353271484, "log_odds_ratio": -0.04539618268609047, "logits/chosen": -0.13042470812797546, "logits/rejected": -0.2869882583618164, "logps/chosen": -0.01703779771924019, "logps/rejected": -1.7658898830413818, "loss": 0.8727, "nll_loss": 0.21363988518714905, "rewards/accuracies": 1.0, "rewards/chosen": -0.0017037795623764396, "rewards/margins": 0.17488521337509155, "rewards/rejected": -0.1765889972448349, "step": 5631 }, { "epoch": 3.8948824343015214, "grad_norm": 14.509231567382812, "learning_rate": 3.3917319809435994e-05, "log_odds_chosen": 8.715024948120117, "log_odds_ratio": -0.005328967701643705, "logits/chosen": -0.4364680051803589, "logits/rejected": -0.4962000846862793, "logps/chosen": -0.03114049881696701, "logps/rejected": -1.6229883432388306, "loss": 2.0157, "nll_loss": 0.5033845901489258, "rewards/accuracies": 1.0, "rewards/chosen": -0.0031140500213950872, "rewards/margins": 0.1591847836971283, "rewards/rejected": -0.16229882836341858, "step": 5632 }, { "epoch": 3.895573997233748, "grad_norm": 4.437415599822998, "learning_rate": 3.391347779314584e-05, "log_odds_chosen": 9.241409301757812, "log_odds_ratio": -0.0003560621989890933, "logits/chosen": -0.6067017316818237, "logits/rejected": -0.6117865443229675, "logps/chosen": -0.020522449165582657, "logps/rejected": -2.357513904571533, "loss": 1.061, "nll_loss": 0.2652171552181244, "rewards/accuracies": 1.0, "rewards/chosen": -0.0020522449631243944, "rewards/margins": 0.23369914293289185, "rewards/rejected": -0.23575140535831451, "step": 5633 }, { "epoch": 3.896265560165975, "grad_norm": 96.32704162597656, "learning_rate": 3.39096357768557e-05, "log_odds_chosen": 8.560503959655762, "log_odds_ratio": -0.544462263584137, "logits/chosen": -0.6925035119056702, "logits/rejected": -0.7223390936851501, "logps/chosen": -0.07953737676143646, "logps/rejected": -1.662315011024475, "loss": 2.0527, "nll_loss": 0.45873352885246277, "rewards/accuracies": 0.875, "rewards/chosen": -0.007953736931085587, "rewards/margins": 0.1582777500152588, "rewards/rejected": -0.16623149812221527, "step": 5634 }, { "epoch": 3.896957123098202, "grad_norm": 4.607226848602295, "learning_rate": 3.3905793760565545e-05, "log_odds_chosen": 9.335061073303223, "log_odds_ratio": -0.0002637306461110711, "logits/chosen": -0.36337152123451233, "logits/rejected": -0.4686540365219116, "logps/chosen": -0.008282117545604706, "logps/rejected": -1.8453588485717773, "loss": 1.0499, "nll_loss": 0.2624465525150299, "rewards/accuracies": 1.0, "rewards/chosen": -0.000828211719635874, "rewards/margins": 0.18370766937732697, "rewards/rejected": -0.18453587591648102, "step": 5635 }, { "epoch": 3.8976486860304287, "grad_norm": 10.918571472167969, "learning_rate": 3.39019517442754e-05, "log_odds_chosen": 7.754406929016113, "log_odds_ratio": -0.03191447630524635, "logits/chosen": -0.6927816271781921, "logits/rejected": -0.7175300121307373, "logps/chosen": -0.01036337111145258, "logps/rejected": -1.0414409637451172, "loss": 1.9009, "nll_loss": 0.47203582525253296, "rewards/accuracies": 1.0, "rewards/chosen": -0.001036337111145258, "rewards/margins": 0.103107750415802, "rewards/rejected": -0.10414409637451172, "step": 5636 }, { "epoch": 3.8983402489626555, "grad_norm": 10.202701568603516, "learning_rate": 3.389810972798525e-05, "log_odds_chosen": 10.172609329223633, "log_odds_ratio": -0.0006169604021124542, "logits/chosen": -0.8397464156150818, "logits/rejected": -0.9218321442604065, "logps/chosen": -0.0015594592550769448, "logps/rejected": -2.2077858448028564, "loss": 1.175, "nll_loss": 0.293697714805603, "rewards/accuracies": 1.0, "rewards/chosen": -0.0001559459196869284, "rewards/margins": 0.22062262892723083, "rewards/rejected": -0.22077858448028564, "step": 5637 }, { "epoch": 3.8990318118948823, "grad_norm": 69.57799530029297, "learning_rate": 3.38942677116951e-05, "log_odds_chosen": 7.8932390213012695, "log_odds_ratio": -0.1768307238817215, "logits/chosen": -0.665978729724884, "logits/rejected": -0.6855136752128601, "logps/chosen": -0.03082280419766903, "logps/rejected": -2.3422088623046875, "loss": 1.8585, "nll_loss": 0.4469505846500397, "rewards/accuracies": 0.875, "rewards/chosen": -0.003082280745729804, "rewards/margins": 0.23113863170146942, "rewards/rejected": -0.23422090709209442, "step": 5638 }, { "epoch": 3.899723374827109, "grad_norm": 10.527148246765137, "learning_rate": 3.389042569540495e-05, "log_odds_chosen": 10.70540714263916, "log_odds_ratio": -9.131423576036468e-05, "logits/chosen": -0.9421408176422119, "logits/rejected": -0.9778171181678772, "logps/chosen": -0.00026267359498888254, "logps/rejected": -2.4190683364868164, "loss": 1.4453, "nll_loss": 0.3613080680370331, "rewards/accuracies": 1.0, "rewards/chosen": -2.6267360226484016e-05, "rewards/margins": 0.24188058078289032, "rewards/rejected": -0.24190685153007507, "step": 5639 }, { "epoch": 3.900414937759336, "grad_norm": 6.1640825271606445, "learning_rate": 3.38865836791148e-05, "log_odds_chosen": 8.700206756591797, "log_odds_ratio": -0.06421246379613876, "logits/chosen": -0.17634619772434235, "logits/rejected": -0.28321805596351624, "logps/chosen": -0.010652851313352585, "logps/rejected": -1.4425766468048096, "loss": 1.4418, "nll_loss": 0.3540385365486145, "rewards/accuracies": 1.0, "rewards/chosen": -0.001065285294316709, "rewards/margins": 0.1431923657655716, "rewards/rejected": -0.14425766468048096, "step": 5640 }, { "epoch": 3.901106500691563, "grad_norm": 7.589430332183838, "learning_rate": 3.388274166282465e-05, "log_odds_chosen": 9.661093711853027, "log_odds_ratio": -0.0003108852542936802, "logits/chosen": -0.6649161577224731, "logits/rejected": -0.685472309589386, "logps/chosen": -0.000709467101842165, "logps/rejected": -1.601885437965393, "loss": 0.9837, "nll_loss": 0.2459021657705307, "rewards/accuracies": 1.0, "rewards/chosen": -7.094671309459955e-05, "rewards/margins": 0.16011759638786316, "rewards/rejected": -0.16018852591514587, "step": 5641 }, { "epoch": 3.9017980636237897, "grad_norm": 10.761141777038574, "learning_rate": 3.38788996465345e-05, "log_odds_chosen": 10.6596097946167, "log_odds_ratio": -5.332134969648905e-05, "logits/chosen": -0.5889623165130615, "logits/rejected": -0.6932306885719299, "logps/chosen": -0.00045756419422104955, "logps/rejected": -2.1941545009613037, "loss": 1.7087, "nll_loss": 0.4271653890609741, "rewards/accuracies": 1.0, "rewards/chosen": -4.575642378767952e-05, "rewards/margins": 0.21936970949172974, "rewards/rejected": -0.21941545605659485, "step": 5642 }, { "epoch": 3.9024896265560165, "grad_norm": 9.148547172546387, "learning_rate": 3.387505763024436e-05, "log_odds_chosen": 9.726945877075195, "log_odds_ratio": -0.048618048429489136, "logits/chosen": -0.5311049222946167, "logits/rejected": -0.5555057525634766, "logps/chosen": -0.008680197410285473, "logps/rejected": -1.5641781091690063, "loss": 1.1858, "nll_loss": 0.29159092903137207, "rewards/accuracies": 1.0, "rewards/chosen": -0.0008680198225192726, "rewards/margins": 0.1555497944355011, "rewards/rejected": -0.1564178168773651, "step": 5643 }, { "epoch": 3.9031811894882433, "grad_norm": 10.668489456176758, "learning_rate": 3.38712156139542e-05, "log_odds_chosen": 8.888282775878906, "log_odds_ratio": -0.014365678653120995, "logits/chosen": -0.7868924736976624, "logits/rejected": -0.8093586564064026, "logps/chosen": -0.006576868239790201, "logps/rejected": -1.955217719078064, "loss": 1.3799, "nll_loss": 0.34352636337280273, "rewards/accuracies": 1.0, "rewards/chosen": -0.0006576868472620845, "rewards/margins": 0.19486409425735474, "rewards/rejected": -0.1955217719078064, "step": 5644 }, { "epoch": 3.90387275242047, "grad_norm": 9.662392616271973, "learning_rate": 3.3867373597664055e-05, "log_odds_chosen": 9.729461669921875, "log_odds_ratio": -0.004915539175271988, "logits/chosen": -0.8054319620132446, "logits/rejected": -0.8861163854598999, "logps/chosen": -0.0031532247085124254, "logps/rejected": -1.9161059856414795, "loss": 1.1258, "nll_loss": 0.2809663712978363, "rewards/accuracies": 1.0, "rewards/chosen": -0.00031532245338894427, "rewards/margins": 0.19129526615142822, "rewards/rejected": -0.19161058962345123, "step": 5645 }, { "epoch": 3.904564315352697, "grad_norm": 12.379378318786621, "learning_rate": 3.386353158137391e-05, "log_odds_chosen": 8.922910690307617, "log_odds_ratio": -0.15809865295886993, "logits/chosen": -0.28489434719085693, "logits/rejected": -0.35203734040260315, "logps/chosen": -0.02762320078909397, "logps/rejected": -2.4533705711364746, "loss": 1.3379, "nll_loss": 0.3186749815940857, "rewards/accuracies": 0.875, "rewards/chosen": -0.0027623199857771397, "rewards/margins": 0.2425747513771057, "rewards/rejected": -0.24533706903457642, "step": 5646 }, { "epoch": 3.905255878284924, "grad_norm": 12.518477439880371, "learning_rate": 3.385968956508376e-05, "log_odds_chosen": 9.158768653869629, "log_odds_ratio": -0.0021483541931957006, "logits/chosen": -0.23381444811820984, "logits/rejected": -0.4085695147514343, "logps/chosen": -0.021755579859018326, "logps/rejected": -2.0978522300720215, "loss": 1.7394, "nll_loss": 0.4346234202384949, "rewards/accuracies": 1.0, "rewards/chosen": -0.0021755581256002188, "rewards/margins": 0.20760969817638397, "rewards/rejected": -0.20978525280952454, "step": 5647 }, { "epoch": 3.9059474412171507, "grad_norm": 8.521187782287598, "learning_rate": 3.3855847548793606e-05, "log_odds_chosen": 10.767740249633789, "log_odds_ratio": -8.845600677886978e-05, "logits/chosen": -0.3243058919906616, "logits/rejected": -0.4415555000305176, "logps/chosen": -0.0006823982112109661, "logps/rejected": -2.7912864685058594, "loss": 1.1311, "nll_loss": 0.2827630639076233, "rewards/accuracies": 1.0, "rewards/chosen": -6.823982403147966e-05, "rewards/margins": 0.279060423374176, "rewards/rejected": -0.27912867069244385, "step": 5648 }, { "epoch": 3.9066390041493775, "grad_norm": 7.364728927612305, "learning_rate": 3.385200553250346e-05, "log_odds_chosen": 9.6636962890625, "log_odds_ratio": -0.00014321855269372463, "logits/chosen": -0.31962475180625916, "logits/rejected": -0.39147666096687317, "logps/chosen": -0.01126229576766491, "logps/rejected": -2.1666345596313477, "loss": 1.6725, "nll_loss": 0.4181104600429535, "rewards/accuracies": 1.0, "rewards/chosen": -0.001126229646615684, "rewards/margins": 0.21553722023963928, "rewards/rejected": -0.2166634500026703, "step": 5649 }, { "epoch": 3.9073305670816043, "grad_norm": 9.632866859436035, "learning_rate": 3.384816351621331e-05, "log_odds_chosen": 6.390057563781738, "log_odds_ratio": -0.09337200969457626, "logits/chosen": -0.6340508460998535, "logits/rejected": -0.7051093578338623, "logps/chosen": -0.03799796849489212, "logps/rejected": -1.526399850845337, "loss": 1.7607, "nll_loss": 0.4308300018310547, "rewards/accuracies": 1.0, "rewards/chosen": -0.0037997972685843706, "rewards/margins": 0.14884020388126373, "rewards/rejected": -0.1526399850845337, "step": 5650 }, { "epoch": 3.908022130013831, "grad_norm": 12.593082427978516, "learning_rate": 3.3844321499923156e-05, "log_odds_chosen": 11.50406265258789, "log_odds_ratio": -1.4786578503844794e-05, "logits/chosen": -0.3148428499698639, "logits/rejected": -0.37387746572494507, "logps/chosen": -9.004796447698027e-05, "logps/rejected": -2.1949687004089355, "loss": 1.3158, "nll_loss": 0.32895559072494507, "rewards/accuracies": 1.0, "rewards/chosen": -9.004796083900146e-06, "rewards/margins": 0.21948787569999695, "rewards/rejected": -0.21949687600135803, "step": 5651 }, { "epoch": 3.908713692946058, "grad_norm": 12.804461479187012, "learning_rate": 3.3840479483633016e-05, "log_odds_chosen": 10.317901611328125, "log_odds_ratio": -0.00017403802485205233, "logits/chosen": -0.42475345730781555, "logits/rejected": -0.5792452096939087, "logps/chosen": -0.0003738144878298044, "logps/rejected": -1.867620587348938, "loss": 1.8099, "nll_loss": 0.45245620608329773, "rewards/accuracies": 1.0, "rewards/chosen": -3.7381450965767726e-05, "rewards/margins": 0.1867246776819229, "rewards/rejected": -0.18676206469535828, "step": 5652 }, { "epoch": 3.909405255878285, "grad_norm": 5.101190090179443, "learning_rate": 3.383663746734286e-05, "log_odds_chosen": 8.503926277160645, "log_odds_ratio": -0.0007011541747488081, "logits/chosen": -0.4765605926513672, "logits/rejected": -0.5443054437637329, "logps/chosen": -0.02928958460688591, "logps/rejected": -1.743721842765808, "loss": 1.9917, "nll_loss": 0.4978662133216858, "rewards/accuracies": 1.0, "rewards/chosen": -0.0029289585072547197, "rewards/margins": 0.17144320905208588, "rewards/rejected": -0.17437216639518738, "step": 5653 }, { "epoch": 3.9100968188105116, "grad_norm": 8.05077075958252, "learning_rate": 3.3832795451052714e-05, "log_odds_chosen": 9.06122875213623, "log_odds_ratio": -0.0043915510177612305, "logits/chosen": -0.7249891757965088, "logits/rejected": -0.7603964805603027, "logps/chosen": -0.0036959440913051367, "logps/rejected": -1.5966659784317017, "loss": 1.4972, "nll_loss": 0.3738667070865631, "rewards/accuracies": 1.0, "rewards/chosen": -0.0003695944033097476, "rewards/margins": 0.15929700434207916, "rewards/rejected": -0.15966659784317017, "step": 5654 }, { "epoch": 3.9107883817427385, "grad_norm": 14.369111061096191, "learning_rate": 3.3828953434762566e-05, "log_odds_chosen": 8.608853340148926, "log_odds_ratio": -0.20911157131195068, "logits/chosen": -0.7546738386154175, "logits/rejected": -0.8016392588615417, "logps/chosen": -0.02957131341099739, "logps/rejected": -1.8817180395126343, "loss": 1.7217, "nll_loss": 0.40951788425445557, "rewards/accuracies": 0.875, "rewards/chosen": -0.002957131713628769, "rewards/margins": 0.18521468341350555, "rewards/rejected": -0.18817181885242462, "step": 5655 }, { "epoch": 3.9114799446749653, "grad_norm": 7.642223358154297, "learning_rate": 3.382511141847242e-05, "log_odds_chosen": 7.869269847869873, "log_odds_ratio": -0.05908142402768135, "logits/chosen": -0.5965639352798462, "logits/rejected": -0.6018823981285095, "logps/chosen": -0.016461383551359177, "logps/rejected": -1.225979208946228, "loss": 1.16, "nll_loss": 0.2840805649757385, "rewards/accuracies": 1.0, "rewards/chosen": -0.0016461381455883384, "rewards/margins": 0.12095178663730621, "rewards/rejected": -0.12259792536497116, "step": 5656 }, { "epoch": 3.912171507607192, "grad_norm": 8.684945106506348, "learning_rate": 3.3821269402182264e-05, "log_odds_chosen": 9.467185974121094, "log_odds_ratio": -0.0003374728839844465, "logits/chosen": -0.34399327635765076, "logits/rejected": -0.42578768730163574, "logps/chosen": -0.0021155558060854673, "logps/rejected": -2.233706474304199, "loss": 1.6564, "nll_loss": 0.4140542149543762, "rewards/accuracies": 1.0, "rewards/chosen": -0.00021155556896701455, "rewards/margins": 0.2231590747833252, "rewards/rejected": -0.22337064146995544, "step": 5657 }, { "epoch": 3.912863070539419, "grad_norm": 8.879938125610352, "learning_rate": 3.381742738589212e-05, "log_odds_chosen": 9.210071563720703, "log_odds_ratio": -0.0010100032668560743, "logits/chosen": -0.4090440571308136, "logits/rejected": -0.537046492099762, "logps/chosen": -0.006548542529344559, "logps/rejected": -1.4390618801116943, "loss": 1.4456, "nll_loss": 0.36130374670028687, "rewards/accuracies": 1.0, "rewards/chosen": -0.0006548542296513915, "rewards/margins": 0.14325134456157684, "rewards/rejected": -0.14390620589256287, "step": 5658 }, { "epoch": 3.913554633471646, "grad_norm": 15.874176025390625, "learning_rate": 3.381358536960197e-05, "log_odds_chosen": 9.65210247039795, "log_odds_ratio": -0.00018606259254738688, "logits/chosen": -0.16119661927223206, "logits/rejected": -0.2798956632614136, "logps/chosen": -0.0009072792017832398, "logps/rejected": -1.4413468837738037, "loss": 1.7459, "nll_loss": 0.4364451766014099, "rewards/accuracies": 1.0, "rewards/chosen": -9.072791726794094e-05, "rewards/margins": 0.1440439522266388, "rewards/rejected": -0.14413470029830933, "step": 5659 }, { "epoch": 3.9142461964038726, "grad_norm": 8.610029220581055, "learning_rate": 3.3809743353311815e-05, "log_odds_chosen": 9.745826721191406, "log_odds_ratio": -0.0004060302453581244, "logits/chosen": -0.7324758172035217, "logits/rejected": -0.7614333629608154, "logps/chosen": -0.010122316889464855, "logps/rejected": -1.9247747659683228, "loss": 1.4778, "nll_loss": 0.36940690875053406, "rewards/accuracies": 1.0, "rewards/chosen": -0.0010122316889464855, "rewards/margins": 0.19146525859832764, "rewards/rejected": -0.1924774944782257, "step": 5660 }, { "epoch": 3.9149377593360994, "grad_norm": 10.20022201538086, "learning_rate": 3.3805901337021674e-05, "log_odds_chosen": 10.281898498535156, "log_odds_ratio": -6.861681322334334e-05, "logits/chosen": -0.3770533800125122, "logits/rejected": -0.43447551131248474, "logps/chosen": -0.00025260145775973797, "logps/rejected": -2.160262107849121, "loss": 1.2095, "nll_loss": 0.3023737967014313, "rewards/accuracies": 1.0, "rewards/chosen": -2.5260145775973797e-05, "rewards/margins": 0.21600095927715302, "rewards/rejected": -0.2160262167453766, "step": 5661 }, { "epoch": 3.9156293222683263, "grad_norm": 14.370477676391602, "learning_rate": 3.380205932073152e-05, "log_odds_chosen": 9.886101722717285, "log_odds_ratio": -6.754438072675839e-05, "logits/chosen": -0.8756242990493774, "logits/rejected": -0.867035984992981, "logps/chosen": -0.0002980373101308942, "logps/rejected": -1.5816593170166016, "loss": 3.0271, "nll_loss": 0.756769061088562, "rewards/accuracies": 1.0, "rewards/chosen": -2.9803730285493657e-05, "rewards/margins": 0.15813612937927246, "rewards/rejected": -0.15816593170166016, "step": 5662 }, { "epoch": 3.916320885200553, "grad_norm": 16.824203491210938, "learning_rate": 3.379821730444137e-05, "log_odds_chosen": 10.197331428527832, "log_odds_ratio": -6.708221189910546e-05, "logits/chosen": -0.6757776737213135, "logits/rejected": -0.7043853998184204, "logps/chosen": -0.0004643636057153344, "logps/rejected": -2.069650411605835, "loss": 1.6953, "nll_loss": 0.4238058924674988, "rewards/accuracies": 1.0, "rewards/chosen": -4.6436362026724964e-05, "rewards/margins": 0.2069185972213745, "rewards/rejected": -0.20696504414081573, "step": 5663 }, { "epoch": 3.91701244813278, "grad_norm": 9.39612102508545, "learning_rate": 3.3794375288151225e-05, "log_odds_chosen": 9.340601921081543, "log_odds_ratio": -0.04161971062421799, "logits/chosen": -0.47248268127441406, "logits/rejected": -0.46865522861480713, "logps/chosen": -0.009520137682557106, "logps/rejected": -1.5743775367736816, "loss": 1.4817, "nll_loss": 0.366254061460495, "rewards/accuracies": 1.0, "rewards/chosen": -0.0009520137682557106, "rewards/margins": 0.15648573637008667, "rewards/rejected": -0.1574377417564392, "step": 5664 }, { "epoch": 3.9177040110650068, "grad_norm": 12.282084465026855, "learning_rate": 3.379053327186108e-05, "log_odds_chosen": 9.280634880065918, "log_odds_ratio": -0.0007975812768563628, "logits/chosen": -1.0122318267822266, "logits/rejected": -1.0686604976654053, "logps/chosen": -0.004065337125211954, "logps/rejected": -2.2676191329956055, "loss": 1.3949, "nll_loss": 0.3486459255218506, "rewards/accuracies": 1.0, "rewards/chosen": -0.00040653368341736495, "rewards/margins": 0.2263554036617279, "rewards/rejected": -0.22676193714141846, "step": 5665 }, { "epoch": 3.9183955739972336, "grad_norm": 16.394123077392578, "learning_rate": 3.378669125557092e-05, "log_odds_chosen": 9.665855407714844, "log_odds_ratio": -0.01791047677397728, "logits/chosen": -0.394491970539093, "logits/rejected": -0.51012122631073, "logps/chosen": -0.10202533006668091, "logps/rejected": -1.9771158695220947, "loss": 1.2788, "nll_loss": 0.31791606545448303, "rewards/accuracies": 1.0, "rewards/chosen": -0.010202532634139061, "rewards/margins": 0.18750904500484467, "rewards/rejected": -0.19771158695220947, "step": 5666 }, { "epoch": 3.9190871369294604, "grad_norm": 6.717284202575684, "learning_rate": 3.3782849239280775e-05, "log_odds_chosen": 9.576358795166016, "log_odds_ratio": -0.0009835807140916586, "logits/chosen": -0.6450543403625488, "logits/rejected": -0.7207726836204529, "logps/chosen": -0.0023794856388121843, "logps/rejected": -1.3716387748718262, "loss": 1.2093, "nll_loss": 0.3022391200065613, "rewards/accuracies": 1.0, "rewards/chosen": -0.00023794855223968625, "rewards/margins": 0.13692592084407806, "rewards/rejected": -0.13716387748718262, "step": 5667 }, { "epoch": 3.9197786998616873, "grad_norm": 7.360776901245117, "learning_rate": 3.377900722299063e-05, "log_odds_chosen": 9.744741439819336, "log_odds_ratio": -0.0002790922881104052, "logits/chosen": -0.5799486637115479, "logits/rejected": -0.5917710661888123, "logps/chosen": -0.0005377319175750017, "logps/rejected": -1.6921937465667725, "loss": 1.4989, "nll_loss": 0.37468501925468445, "rewards/accuracies": 1.0, "rewards/chosen": -5.3773193940287456e-05, "rewards/margins": 0.16916561126708984, "rewards/rejected": -0.16921937465667725, "step": 5668 }, { "epoch": 3.920470262793914, "grad_norm": 5.970398902893066, "learning_rate": 3.377516520670047e-05, "log_odds_chosen": 8.449975967407227, "log_odds_ratio": -0.008219233714044094, "logits/chosen": -0.6749115586280823, "logits/rejected": -0.7539016008377075, "logps/chosen": -0.004821880254894495, "logps/rejected": -1.5712368488311768, "loss": 1.6453, "nll_loss": 0.41051411628723145, "rewards/accuracies": 1.0, "rewards/chosen": -0.0004821880429517478, "rewards/margins": 0.15664149820804596, "rewards/rejected": -0.15712368488311768, "step": 5669 }, { "epoch": 3.921161825726141, "grad_norm": 11.404149055480957, "learning_rate": 3.377132319041033e-05, "log_odds_chosen": 8.849227905273438, "log_odds_ratio": -0.0011710242833942175, "logits/chosen": -0.6489973664283752, "logits/rejected": -0.6897670030593872, "logps/chosen": -0.028854500502347946, "logps/rejected": -2.122948169708252, "loss": 1.3919, "nll_loss": 0.34785789251327515, "rewards/accuracies": 1.0, "rewards/chosen": -0.0028854499105364084, "rewards/margins": 0.20940934121608734, "rewards/rejected": -0.212294802069664, "step": 5670 }, { "epoch": 3.9218533886583677, "grad_norm": 12.35291862487793, "learning_rate": 3.376748117412018e-05, "log_odds_chosen": 10.676984786987305, "log_odds_ratio": -0.0003543601487763226, "logits/chosen": -0.8129048347473145, "logits/rejected": -0.8609099984169006, "logps/chosen": -0.01920601725578308, "logps/rejected": -3.159191131591797, "loss": 1.425, "nll_loss": 0.3562049865722656, "rewards/accuracies": 1.0, "rewards/chosen": -0.001920601585879922, "rewards/margins": 0.3139985203742981, "rewards/rejected": -0.3159191310405731, "step": 5671 }, { "epoch": 3.922544951590595, "grad_norm": 13.723042488098145, "learning_rate": 3.376363915783003e-05, "log_odds_chosen": 11.369161605834961, "log_odds_ratio": -1.8434815501677804e-05, "logits/chosen": -0.6580374240875244, "logits/rejected": -0.7136290073394775, "logps/chosen": -0.00016635411884635687, "logps/rejected": -2.5891010761260986, "loss": 1.3817, "nll_loss": 0.3454234302043915, "rewards/accuracies": 1.0, "rewards/chosen": -1.6635411157039925e-05, "rewards/margins": 0.2588934600353241, "rewards/rejected": -0.2589101195335388, "step": 5672 }, { "epoch": 3.923236514522822, "grad_norm": 12.602025032043457, "learning_rate": 3.375979714153988e-05, "log_odds_chosen": 10.25037956237793, "log_odds_ratio": -7.309335342142731e-05, "logits/chosen": -0.41974616050720215, "logits/rejected": -0.5605471134185791, "logps/chosen": -0.0003384738811291754, "logps/rejected": -1.9320080280303955, "loss": 1.2772, "nll_loss": 0.31928902864456177, "rewards/accuracies": 1.0, "rewards/chosen": -3.384738738532178e-05, "rewards/margins": 0.19316695630550385, "rewards/rejected": -0.19320081174373627, "step": 5673 }, { "epoch": 3.9239280774550487, "grad_norm": 10.467412948608398, "learning_rate": 3.3755955125249736e-05, "log_odds_chosen": 10.826410293579102, "log_odds_ratio": -2.9135328077245504e-05, "logits/chosen": -1.025895118713379, "logits/rejected": -1.1272532939910889, "logps/chosen": -0.00017180161376018077, "logps/rejected": -2.1393187046051025, "loss": 1.2129, "nll_loss": 0.3032238185405731, "rewards/accuracies": 1.0, "rewards/chosen": -1.718016210361384e-05, "rewards/margins": 0.2139146775007248, "rewards/rejected": -0.2139318585395813, "step": 5674 }, { "epoch": 3.9246196403872755, "grad_norm": 9.592738151550293, "learning_rate": 3.375211310895958e-05, "log_odds_chosen": 10.144782066345215, "log_odds_ratio": -0.0037850146181881428, "logits/chosen": -0.4527406096458435, "logits/rejected": -0.5927005410194397, "logps/chosen": -0.003005788428708911, "logps/rejected": -2.244025230407715, "loss": 1.4077, "nll_loss": 0.35155072808265686, "rewards/accuracies": 1.0, "rewards/chosen": -0.0003005788312293589, "rewards/margins": 0.2241019308567047, "rewards/rejected": -0.2244025319814682, "step": 5675 }, { "epoch": 3.9253112033195023, "grad_norm": 9.165721893310547, "learning_rate": 3.3748271092669434e-05, "log_odds_chosen": 8.65027141571045, "log_odds_ratio": -0.006758753210306168, "logits/chosen": -0.6444883942604065, "logits/rejected": -0.7739410400390625, "logps/chosen": -0.006450993940234184, "logps/rejected": -1.4249351024627686, "loss": 1.9798, "nll_loss": 0.4942636489868164, "rewards/accuracies": 1.0, "rewards/chosen": -0.0006450994405895472, "rewards/margins": 0.1418484002351761, "rewards/rejected": -0.14249351620674133, "step": 5676 }, { "epoch": 3.926002766251729, "grad_norm": 8.881304740905762, "learning_rate": 3.3744429076379286e-05, "log_odds_chosen": 8.597208023071289, "log_odds_ratio": -0.002491341670975089, "logits/chosen": -0.9394902586936951, "logits/rejected": -0.9371085166931152, "logps/chosen": -0.002545249182730913, "logps/rejected": -1.9115054607391357, "loss": 1.7249, "nll_loss": 0.4309871792793274, "rewards/accuracies": 1.0, "rewards/chosen": -0.0002545249299146235, "rewards/margins": 0.19089603424072266, "rewards/rejected": -0.19115056097507477, "step": 5677 }, { "epoch": 3.926694329183956, "grad_norm": 6.6902689933776855, "learning_rate": 3.374058706008913e-05, "log_odds_chosen": 9.036705017089844, "log_odds_ratio": -0.05169745907187462, "logits/chosen": -0.7906173467636108, "logits/rejected": -0.8020066022872925, "logps/chosen": -0.013565192930400372, "logps/rejected": -1.5790175199508667, "loss": 1.0445, "nll_loss": 0.25596269965171814, "rewards/accuracies": 1.0, "rewards/chosen": -0.0013565192930400372, "rewards/margins": 0.1565452218055725, "rewards/rejected": -0.15790174901485443, "step": 5678 }, { "epoch": 3.927385892116183, "grad_norm": 8.133184432983398, "learning_rate": 3.373674504379899e-05, "log_odds_chosen": 9.908307075500488, "log_odds_ratio": -0.0002685143263079226, "logits/chosen": -0.7017204761505127, "logits/rejected": -0.8481262922286987, "logps/chosen": -0.0005481558619067073, "logps/rejected": -2.0266849994659424, "loss": 1.3127, "nll_loss": 0.3281383216381073, "rewards/accuracies": 1.0, "rewards/chosen": -5.481558764586225e-05, "rewards/margins": 0.2026136815547943, "rewards/rejected": -0.20266850292682648, "step": 5679 }, { "epoch": 3.9280774550484097, "grad_norm": 13.340405464172363, "learning_rate": 3.373290302750884e-05, "log_odds_chosen": 9.175450325012207, "log_odds_ratio": -0.0004610381438396871, "logits/chosen": -0.667832612991333, "logits/rejected": -0.7125696539878845, "logps/chosen": -0.0016391351819038391, "logps/rejected": -2.527400255203247, "loss": 2.3522, "nll_loss": 0.5880076885223389, "rewards/accuracies": 1.0, "rewards/chosen": -0.00016391351528000087, "rewards/margins": 0.25257614254951477, "rewards/rejected": -0.2527400255203247, "step": 5680 }, { "epoch": 3.9287690179806365, "grad_norm": 9.222174644470215, "learning_rate": 3.372906101121869e-05, "log_odds_chosen": 7.210216522216797, "log_odds_ratio": -0.17598360776901245, "logits/chosen": -0.9044046998023987, "logits/rejected": -0.9510660171508789, "logps/chosen": -0.02575918287038803, "logps/rejected": -1.4387961626052856, "loss": 1.1473, "nll_loss": 0.2692229449748993, "rewards/accuracies": 0.875, "rewards/chosen": -0.002575918333604932, "rewards/margins": 0.14130370318889618, "rewards/rejected": -0.14387962222099304, "step": 5681 }, { "epoch": 3.9294605809128633, "grad_norm": 11.76744270324707, "learning_rate": 3.372521899492854e-05, "log_odds_chosen": 9.367456436157227, "log_odds_ratio": -0.0007709745550528169, "logits/chosen": -0.6113981008529663, "logits/rejected": -0.7086760997772217, "logps/chosen": -0.010728825815021992, "logps/rejected": -2.1775169372558594, "loss": 1.6858, "nll_loss": 0.4213826060295105, "rewards/accuracies": 1.0, "rewards/chosen": -0.0010728825582191348, "rewards/margins": 0.21667879819869995, "rewards/rejected": -0.21775168180465698, "step": 5682 }, { "epoch": 3.93015214384509, "grad_norm": 7.538707256317139, "learning_rate": 3.3721376978638394e-05, "log_odds_chosen": 8.055183410644531, "log_odds_ratio": -0.006664213724434376, "logits/chosen": -0.5837987065315247, "logits/rejected": -0.6343657970428467, "logps/chosen": -0.003828394692391157, "logps/rejected": -1.2160768508911133, "loss": 1.213, "nll_loss": 0.3025856018066406, "rewards/accuracies": 1.0, "rewards/chosen": -0.0003828394692391157, "rewards/margins": 0.12122484296560287, "rewards/rejected": -0.12160768359899521, "step": 5683 }, { "epoch": 3.930843706777317, "grad_norm": 7.9338297843933105, "learning_rate": 3.371753496234824e-05, "log_odds_chosen": 10.308126449584961, "log_odds_ratio": -6.612496508751065e-05, "logits/chosen": -0.1366608887910843, "logits/rejected": -0.29656341671943665, "logps/chosen": -0.00024257070617750287, "logps/rejected": -1.475003957748413, "loss": 1.5041, "nll_loss": 0.37602758407592773, "rewards/accuracies": 1.0, "rewards/chosen": -2.425707134534605e-05, "rewards/margins": 0.14747613668441772, "rewards/rejected": -0.1475003957748413, "step": 5684 }, { "epoch": 3.931535269709544, "grad_norm": 12.777952194213867, "learning_rate": 3.371369294605809e-05, "log_odds_chosen": 8.208662986755371, "log_odds_ratio": -0.013890150934457779, "logits/chosen": -0.7391720414161682, "logits/rejected": -0.7633452415466309, "logps/chosen": -0.029466290026903152, "logps/rejected": -2.0348939895629883, "loss": 1.6546, "nll_loss": 0.4122610092163086, "rewards/accuracies": 1.0, "rewards/chosen": -0.0029466289561241865, "rewards/margins": 0.20054274797439575, "rewards/rejected": -0.20348937809467316, "step": 5685 }, { "epoch": 3.9322268326417706, "grad_norm": 5.8640031814575195, "learning_rate": 3.3709850929767945e-05, "log_odds_chosen": 8.976058006286621, "log_odds_ratio": -0.0009105091448873281, "logits/chosen": -0.17427073419094086, "logits/rejected": -0.16047167778015137, "logps/chosen": -0.00046488974476233125, "logps/rejected": -1.3876928091049194, "loss": 1.0374, "nll_loss": 0.2592521011829376, "rewards/accuracies": 1.0, "rewards/chosen": -4.6488974476233125e-05, "rewards/margins": 0.13872277736663818, "rewards/rejected": -0.138769268989563, "step": 5686 }, { "epoch": 3.9329183955739975, "grad_norm": 10.039111137390137, "learning_rate": 3.370600891347779e-05, "log_odds_chosen": 8.653139114379883, "log_odds_ratio": -0.006638075225055218, "logits/chosen": -0.770908772945404, "logits/rejected": -0.7579331398010254, "logps/chosen": -0.027387000620365143, "logps/rejected": -1.4432952404022217, "loss": 1.4031, "nll_loss": 0.3501099646091461, "rewards/accuracies": 1.0, "rewards/chosen": -0.002738700248301029, "rewards/margins": 0.14159083366394043, "rewards/rejected": -0.1443295180797577, "step": 5687 }, { "epoch": 3.9336099585062243, "grad_norm": 12.49831771850586, "learning_rate": 3.370216689718765e-05, "log_odds_chosen": 9.67283821105957, "log_odds_ratio": -0.011118143796920776, "logits/chosen": -0.4001826047897339, "logits/rejected": -0.5056847333908081, "logps/chosen": -0.01285476516932249, "logps/rejected": -2.169782876968384, "loss": 1.5136, "nll_loss": 0.3772900700569153, "rewards/accuracies": 1.0, "rewards/chosen": -0.0012854763772338629, "rewards/margins": 0.21569281816482544, "rewards/rejected": -0.2169782668352127, "step": 5688 }, { "epoch": 3.934301521438451, "grad_norm": 8.358305931091309, "learning_rate": 3.3698324880897495e-05, "log_odds_chosen": 9.098575592041016, "log_odds_ratio": -0.010579775087535381, "logits/chosen": 0.1532040536403656, "logits/rejected": 0.04398436099290848, "logps/chosen": -0.016014760360121727, "logps/rejected": -2.351327657699585, "loss": 1.2948, "nll_loss": 0.32264819741249084, "rewards/accuracies": 1.0, "rewards/chosen": -0.0016014762222766876, "rewards/margins": 0.23353126645088196, "rewards/rejected": -0.23513275384902954, "step": 5689 }, { "epoch": 3.934993084370678, "grad_norm": 11.339001655578613, "learning_rate": 3.369448286460735e-05, "log_odds_chosen": 9.736185073852539, "log_odds_ratio": -0.0001969828736037016, "logits/chosen": -0.41910022497177124, "logits/rejected": -0.44641178846359253, "logps/chosen": -0.0006493827095255256, "logps/rejected": -1.912126064300537, "loss": 1.242, "nll_loss": 0.3104857802391052, "rewards/accuracies": 1.0, "rewards/chosen": -6.493827095255256e-05, "rewards/margins": 0.19114765524864197, "rewards/rejected": -0.19121260941028595, "step": 5690 }, { "epoch": 3.935684647302905, "grad_norm": 7.187035083770752, "learning_rate": 3.36906408483172e-05, "log_odds_chosen": 9.698208808898926, "log_odds_ratio": -0.00012226369290146977, "logits/chosen": -0.15801896154880524, "logits/rejected": -0.17950023710727692, "logps/chosen": -0.0003952296101488173, "logps/rejected": -1.661260962486267, "loss": 1.518, "nll_loss": 0.37948352098464966, "rewards/accuracies": 1.0, "rewards/chosen": -3.95229653804563e-05, "rewards/margins": 0.1660865694284439, "rewards/rejected": -0.16612611711025238, "step": 5691 }, { "epoch": 3.9363762102351316, "grad_norm": 10.54863452911377, "learning_rate": 3.368679883202705e-05, "log_odds_chosen": 10.954380989074707, "log_odds_ratio": -4.8866688302950934e-05, "logits/chosen": -0.13272906839847565, "logits/rejected": -0.2529900372028351, "logps/chosen": -0.0001786511711543426, "logps/rejected": -2.4072556495666504, "loss": 1.3262, "nll_loss": 0.33153992891311646, "rewards/accuracies": 1.0, "rewards/chosen": -1.786511711543426e-05, "rewards/margins": 0.24070771038532257, "rewards/rejected": -0.240725576877594, "step": 5692 }, { "epoch": 3.9370677731673585, "grad_norm": 10.217341423034668, "learning_rate": 3.36829568157369e-05, "log_odds_chosen": 10.294401168823242, "log_odds_ratio": -0.0006365490262396634, "logits/chosen": -0.5235608220100403, "logits/rejected": -0.6177823543548584, "logps/chosen": -0.00029386149253696203, "logps/rejected": -2.145314931869507, "loss": 1.2532, "nll_loss": 0.31324440240859985, "rewards/accuracies": 1.0, "rewards/chosen": -2.9386150345089845e-05, "rewards/margins": 0.21450212597846985, "rewards/rejected": -0.21453151106834412, "step": 5693 }, { "epoch": 3.9377593360995853, "grad_norm": 9.897907257080078, "learning_rate": 3.367911479944675e-05, "log_odds_chosen": 7.953948020935059, "log_odds_ratio": -0.2536156177520752, "logits/chosen": -0.17607209086418152, "logits/rejected": -0.23834289610385895, "logps/chosen": -0.03188952058553696, "logps/rejected": -1.685053825378418, "loss": 1.5987, "nll_loss": 0.37431544065475464, "rewards/accuracies": 0.875, "rewards/chosen": -0.0031889521051198244, "rewards/margins": 0.16531643271446228, "rewards/rejected": -0.16850540041923523, "step": 5694 }, { "epoch": 3.938450899031812, "grad_norm": 11.081581115722656, "learning_rate": 3.36752727831566e-05, "log_odds_chosen": 9.44058895111084, "log_odds_ratio": -0.0004364719206932932, "logits/chosen": -0.5609422326087952, "logits/rejected": -0.5870753526687622, "logps/chosen": -0.03841325640678406, "logps/rejected": -2.3334743976593018, "loss": 1.6473, "nll_loss": 0.41177529096603394, "rewards/accuracies": 1.0, "rewards/chosen": -0.003841325407847762, "rewards/margins": 0.22950612008571625, "rewards/rejected": -0.23334744572639465, "step": 5695 }, { "epoch": 3.939142461964039, "grad_norm": 7.237489700317383, "learning_rate": 3.367143076686645e-05, "log_odds_chosen": 9.057950973510742, "log_odds_ratio": -0.0006810713675804436, "logits/chosen": -0.6686182022094727, "logits/rejected": -0.7706915736198425, "logps/chosen": -0.0011979506816715002, "logps/rejected": -1.3306667804718018, "loss": 1.5321, "nll_loss": 0.38295263051986694, "rewards/accuracies": 1.0, "rewards/chosen": -0.00011979506234638393, "rewards/margins": 0.1329468935728073, "rewards/rejected": -0.13306669890880585, "step": 5696 }, { "epoch": 3.9398340248962658, "grad_norm": 11.648604393005371, "learning_rate": 3.366758875057631e-05, "log_odds_chosen": 10.94399642944336, "log_odds_ratio": -3.988520984421484e-05, "logits/chosen": -0.6112430691719055, "logits/rejected": -0.6546251773834229, "logps/chosen": -0.0002443194971419871, "logps/rejected": -2.443819284439087, "loss": 1.1959, "nll_loss": 0.2989806830883026, "rewards/accuracies": 1.0, "rewards/chosen": -2.4431948986602947e-05, "rewards/margins": 0.24435749650001526, "rewards/rejected": -0.24438193440437317, "step": 5697 }, { "epoch": 3.9405255878284926, "grad_norm": 6.354919910430908, "learning_rate": 3.3663746734286154e-05, "log_odds_chosen": 10.356807708740234, "log_odds_ratio": -0.00037150000571273267, "logits/chosen": -0.25689586997032166, "logits/rejected": -0.3123002052307129, "logps/chosen": -0.0004208147875033319, "logps/rejected": -1.9435677528381348, "loss": 1.0913, "nll_loss": 0.27279186248779297, "rewards/accuracies": 1.0, "rewards/chosen": -4.2081475839950144e-05, "rewards/margins": 0.19431471824645996, "rewards/rejected": -0.1943567991256714, "step": 5698 }, { "epoch": 3.9412171507607194, "grad_norm": 8.171834945678711, "learning_rate": 3.3659904717996006e-05, "log_odds_chosen": 10.111719131469727, "log_odds_ratio": -7.267138425959274e-05, "logits/chosen": -0.6796890497207642, "logits/rejected": -0.6865609884262085, "logps/chosen": -0.00021459744311869144, "logps/rejected": -1.6100369691848755, "loss": 0.8555, "nll_loss": 0.2138572633266449, "rewards/accuracies": 1.0, "rewards/chosen": -2.1459745767060667e-05, "rewards/margins": 0.16098225116729736, "rewards/rejected": -0.1610036939382553, "step": 5699 }, { "epoch": 3.9419087136929463, "grad_norm": 5.196160316467285, "learning_rate": 3.365606270170586e-05, "log_odds_chosen": 9.61539077758789, "log_odds_ratio": -0.00044482407975010574, "logits/chosen": -0.8210750818252563, "logits/rejected": -0.8469647765159607, "logps/chosen": -0.001331267412751913, "logps/rejected": -2.3655765056610107, "loss": 0.9967, "nll_loss": 0.2491181343793869, "rewards/accuracies": 1.0, "rewards/chosen": -0.0001331267412751913, "rewards/margins": 0.2364245355129242, "rewards/rejected": -0.23655766248703003, "step": 5700 }, { "epoch": 3.942600276625173, "grad_norm": 13.016851425170898, "learning_rate": 3.365222068541571e-05, "log_odds_chosen": 7.50762939453125, "log_odds_ratio": -0.07106940448284149, "logits/chosen": -0.6622728109359741, "logits/rejected": -0.6977077722549438, "logps/chosen": -0.01938733085989952, "logps/rejected": -1.0778499841690063, "loss": 1.5959, "nll_loss": 0.391880065202713, "rewards/accuracies": 1.0, "rewards/chosen": -0.001938733272254467, "rewards/margins": 0.10584627091884613, "rewards/rejected": -0.10778500139713287, "step": 5701 }, { "epoch": 3.9432918395574, "grad_norm": 9.43426513671875, "learning_rate": 3.3648378669125557e-05, "log_odds_chosen": 10.202520370483398, "log_odds_ratio": -0.0006938951555639505, "logits/chosen": -0.6339164972305298, "logits/rejected": -0.6359383463859558, "logps/chosen": -0.0009514871053397655, "logps/rejected": -2.173431396484375, "loss": 1.6117, "nll_loss": 0.40285149216651917, "rewards/accuracies": 1.0, "rewards/chosen": -9.514870907878503e-05, "rewards/margins": 0.21724799275398254, "rewards/rejected": -0.21734313666820526, "step": 5702 }, { "epoch": 3.9439834024896268, "grad_norm": 8.37575626373291, "learning_rate": 3.364453665283541e-05, "log_odds_chosen": 9.026674270629883, "log_odds_ratio": -0.0002642723557073623, "logits/chosen": -0.6051280498504639, "logits/rejected": -0.6261605620384216, "logps/chosen": -0.001482822117395699, "logps/rejected": -1.4893754720687866, "loss": 1.0054, "nll_loss": 0.25131821632385254, "rewards/accuracies": 1.0, "rewards/chosen": -0.0001482822117395699, "rewards/margins": 0.14878925681114197, "rewards/rejected": -0.14893755316734314, "step": 5703 }, { "epoch": 3.9446749654218536, "grad_norm": 4.9485955238342285, "learning_rate": 3.364069463654526e-05, "log_odds_chosen": 9.150157928466797, "log_odds_ratio": -0.0018625075463205576, "logits/chosen": -0.32784304022789, "logits/rejected": -0.42211171984672546, "logps/chosen": -0.012415789999067783, "logps/rejected": -1.776462197303772, "loss": 1.2726, "nll_loss": 0.3179532289505005, "rewards/accuracies": 1.0, "rewards/chosen": -0.0012415789533406496, "rewards/margins": 0.17640462517738342, "rewards/rejected": -0.1776462197303772, "step": 5704 }, { "epoch": 3.9453665283540804, "grad_norm": 9.812108039855957, "learning_rate": 3.363685262025511e-05, "log_odds_chosen": 10.123137474060059, "log_odds_ratio": -6.920234591234475e-05, "logits/chosen": -0.7869176864624023, "logits/rejected": -0.8563541769981384, "logps/chosen": -0.0007024909136816859, "logps/rejected": -2.032478094100952, "loss": 1.3503, "nll_loss": 0.3375677466392517, "rewards/accuracies": 1.0, "rewards/chosen": -7.024908700259402e-05, "rewards/margins": 0.2031775563955307, "rewards/rejected": -0.2032478153705597, "step": 5705 }, { "epoch": 3.9460580912863072, "grad_norm": 7.877689838409424, "learning_rate": 3.3633010603964966e-05, "log_odds_chosen": 9.2701416015625, "log_odds_ratio": -0.0006889035576023161, "logits/chosen": -0.43943047523498535, "logits/rejected": -0.4963938593864441, "logps/chosen": -0.01611155830323696, "logps/rejected": -1.872589349746704, "loss": 1.4708, "nll_loss": 0.3676352798938751, "rewards/accuracies": 1.0, "rewards/chosen": -0.0016111559234559536, "rewards/margins": 0.18564778566360474, "rewards/rejected": -0.18725895881652832, "step": 5706 }, { "epoch": 3.946749654218534, "grad_norm": 7.677238464355469, "learning_rate": 3.362916858767481e-05, "log_odds_chosen": 10.964299201965332, "log_odds_ratio": -3.7906993384240195e-05, "logits/chosen": -0.7371675968170166, "logits/rejected": -0.6771216988563538, "logps/chosen": -0.00017539318650960922, "logps/rejected": -2.1775312423706055, "loss": 1.119, "nll_loss": 0.2797532379627228, "rewards/accuracies": 1.0, "rewards/chosen": -1.7539319742354564e-05, "rewards/margins": 0.21773558855056763, "rewards/rejected": -0.21775312721729279, "step": 5707 }, { "epoch": 3.947441217150761, "grad_norm": 8.230490684509277, "learning_rate": 3.3625326571384664e-05, "log_odds_chosen": 10.742960929870605, "log_odds_ratio": -4.162584446021356e-05, "logits/chosen": -0.32022398710250854, "logits/rejected": -0.3552589416503906, "logps/chosen": -8.393789175897837e-05, "logps/rejected": -1.5506086349487305, "loss": 1.3166, "nll_loss": 0.32913970947265625, "rewards/accuracies": 1.0, "rewards/chosen": -8.393788448302075e-06, "rewards/margins": 0.15505248308181763, "rewards/rejected": -0.15506088733673096, "step": 5708 }, { "epoch": 3.9481327800829877, "grad_norm": 8.795544624328613, "learning_rate": 3.362148455509452e-05, "log_odds_chosen": 10.026924133300781, "log_odds_ratio": -0.000733112683519721, "logits/chosen": -0.48883938789367676, "logits/rejected": -0.4959717392921448, "logps/chosen": -0.004272299353033304, "logps/rejected": -2.4745564460754395, "loss": 1.4151, "nll_loss": 0.3537033796310425, "rewards/accuracies": 1.0, "rewards/chosen": -0.00042722991202026606, "rewards/margins": 0.2470284104347229, "rewards/rejected": -0.2474556416273117, "step": 5709 }, { "epoch": 3.9488243430152146, "grad_norm": 7.566889762878418, "learning_rate": 3.361764253880437e-05, "log_odds_chosen": 9.145713806152344, "log_odds_ratio": -0.00047047666157595813, "logits/chosen": -0.5946757197380066, "logits/rejected": -0.6835692524909973, "logps/chosen": -0.0008808871498331428, "logps/rejected": -1.5034514665603638, "loss": 1.1975, "nll_loss": 0.2993380129337311, "rewards/accuracies": 1.0, "rewards/chosen": -8.808870916254818e-05, "rewards/margins": 0.15025705099105835, "rewards/rejected": -0.15034514665603638, "step": 5710 }, { "epoch": 3.9495159059474414, "grad_norm": 10.708879470825195, "learning_rate": 3.3613800522514215e-05, "log_odds_chosen": 10.223733901977539, "log_odds_ratio": -0.00016319968563038856, "logits/chosen": -0.5908886194229126, "logits/rejected": -0.5645523071289062, "logps/chosen": -0.00048043689457699656, "logps/rejected": -1.9205724000930786, "loss": 1.5375, "nll_loss": 0.38436540961265564, "rewards/accuracies": 1.0, "rewards/chosen": -4.8043693823274225e-05, "rewards/margins": 0.19200919568538666, "rewards/rejected": -0.19205725193023682, "step": 5711 }, { "epoch": 3.9502074688796682, "grad_norm": 8.03261661529541, "learning_rate": 3.360995850622407e-05, "log_odds_chosen": 8.706552505493164, "log_odds_ratio": -0.0011576716788113117, "logits/chosen": -0.5434591770172119, "logits/rejected": -0.5303152203559875, "logps/chosen": -0.0019687768071889877, "logps/rejected": -1.3109357357025146, "loss": 1.622, "nll_loss": 0.4053952991962433, "rewards/accuracies": 1.0, "rewards/chosen": -0.00019687767780851573, "rewards/margins": 0.1308967024087906, "rewards/rejected": -0.1310935765504837, "step": 5712 }, { "epoch": 3.950899031811895, "grad_norm": 19.672039031982422, "learning_rate": 3.360611648993392e-05, "log_odds_chosen": 8.991646766662598, "log_odds_ratio": -0.19604425132274628, "logits/chosen": -0.7051516771316528, "logits/rejected": -0.6763105392456055, "logps/chosen": -0.02533097378909588, "logps/rejected": -1.375767469406128, "loss": 1.6721, "nll_loss": 0.3984111547470093, "rewards/accuracies": 0.875, "rewards/chosen": -0.002533097518607974, "rewards/margins": 0.1350436508655548, "rewards/rejected": -0.13757675886154175, "step": 5713 }, { "epoch": 3.951590594744122, "grad_norm": 8.680757522583008, "learning_rate": 3.3602274473643766e-05, "log_odds_chosen": 9.2004976272583, "log_odds_ratio": -0.005787692964076996, "logits/chosen": -0.2553695738315582, "logits/rejected": -0.3262069821357727, "logps/chosen": -0.003667776472866535, "logps/rejected": -1.7895536422729492, "loss": 1.3477, "nll_loss": 0.3363400101661682, "rewards/accuracies": 1.0, "rewards/chosen": -0.0003667776472866535, "rewards/margins": 0.17858858406543732, "rewards/rejected": -0.17895537614822388, "step": 5714 }, { "epoch": 3.9522821576763487, "grad_norm": 10.9926118850708, "learning_rate": 3.3598432457353625e-05, "log_odds_chosen": 9.518470764160156, "log_odds_ratio": -0.0008939065737649798, "logits/chosen": -0.47945836186408997, "logits/rejected": -0.5430272221565247, "logps/chosen": -0.0026755905710160732, "logps/rejected": -1.9028359651565552, "loss": 1.4604, "nll_loss": 0.3650098443031311, "rewards/accuracies": 1.0, "rewards/chosen": -0.0002675590803846717, "rewards/margins": 0.1900160312652588, "rewards/rejected": -0.19028359651565552, "step": 5715 }, { "epoch": 3.9529737206085755, "grad_norm": 12.968902587890625, "learning_rate": 3.359459044106347e-05, "log_odds_chosen": 9.561538696289062, "log_odds_ratio": -0.000785676937084645, "logits/chosen": -0.7379172444343567, "logits/rejected": -0.7460415959358215, "logps/chosen": -0.0024614909198135138, "logps/rejected": -1.8133800029754639, "loss": 1.9004, "nll_loss": 0.47502267360687256, "rewards/accuracies": 1.0, "rewards/chosen": -0.0002461490803398192, "rewards/margins": 0.18109184503555298, "rewards/rejected": -0.18133798241615295, "step": 5716 }, { "epoch": 3.9536652835408024, "grad_norm": 13.025182723999023, "learning_rate": 3.359074842477332e-05, "log_odds_chosen": 8.811513900756836, "log_odds_ratio": -0.0005601159064099193, "logits/chosen": -0.08626483380794525, "logits/rejected": -0.18160587549209595, "logps/chosen": -0.0009426804026588798, "logps/rejected": -1.5935416221618652, "loss": 1.5662, "nll_loss": 0.3914954960346222, "rewards/accuracies": 1.0, "rewards/chosen": -9.426804899703711e-05, "rewards/margins": 0.15925991535186768, "rewards/rejected": -0.15935416519641876, "step": 5717 }, { "epoch": 3.954356846473029, "grad_norm": 9.333796501159668, "learning_rate": 3.358690640848317e-05, "log_odds_chosen": 10.182829856872559, "log_odds_ratio": -0.00021176054724492133, "logits/chosen": -0.239266037940979, "logits/rejected": -0.3665693998336792, "logps/chosen": -0.0002250690886285156, "logps/rejected": -1.7776684761047363, "loss": 1.1581, "nll_loss": 0.2895086407661438, "rewards/accuracies": 1.0, "rewards/chosen": -2.2506910681840964e-05, "rewards/margins": 0.1777443289756775, "rewards/rejected": -0.1777668446302414, "step": 5718 }, { "epoch": 3.955048409405256, "grad_norm": 9.824963569641113, "learning_rate": 3.358306439219303e-05, "log_odds_chosen": 8.434147834777832, "log_odds_ratio": -0.11634311825037003, "logits/chosen": -0.603499710559845, "logits/rejected": -0.6114636659622192, "logps/chosen": -0.025828877463936806, "logps/rejected": -1.4695003032684326, "loss": 1.4495, "nll_loss": 0.3507324457168579, "rewards/accuracies": 0.875, "rewards/chosen": -0.002582887653261423, "rewards/margins": 0.14436712861061096, "rewards/rejected": -0.14695002138614655, "step": 5719 }, { "epoch": 3.955739972337483, "grad_norm": 7.860080242156982, "learning_rate": 3.3579222375902873e-05, "log_odds_chosen": 8.629996299743652, "log_odds_ratio": -0.004712705500423908, "logits/chosen": -0.3507615327835083, "logits/rejected": -0.43837425112724304, "logps/chosen": -0.004645884968340397, "logps/rejected": -1.5997428894042969, "loss": 1.5647, "nll_loss": 0.39069458842277527, "rewards/accuracies": 1.0, "rewards/chosen": -0.00046458852011710405, "rewards/margins": 0.15950970351696014, "rewards/rejected": -0.15997430682182312, "step": 5720 }, { "epoch": 3.9564315352697097, "grad_norm": 7.354146480560303, "learning_rate": 3.3575380359612726e-05, "log_odds_chosen": 9.55754566192627, "log_odds_ratio": -0.0001672496582614258, "logits/chosen": -0.5126395225524902, "logits/rejected": -0.5443432331085205, "logps/chosen": -0.0005185157060623169, "logps/rejected": -1.7254047393798828, "loss": 1.0127, "nll_loss": 0.25316306948661804, "rewards/accuracies": 1.0, "rewards/chosen": -5.1851573516614735e-05, "rewards/margins": 0.17248864471912384, "rewards/rejected": -0.17254048585891724, "step": 5721 }, { "epoch": 3.9571230982019365, "grad_norm": 10.185460090637207, "learning_rate": 3.357153834332258e-05, "log_odds_chosen": 9.248023986816406, "log_odds_ratio": -0.0015630690613761544, "logits/chosen": -0.35558661818504333, "logits/rejected": -0.47364428639411926, "logps/chosen": -0.008763710036873817, "logps/rejected": -2.00473690032959, "loss": 1.3867, "nll_loss": 0.34651124477386475, "rewards/accuracies": 1.0, "rewards/chosen": -0.0008763709920458496, "rewards/margins": 0.1995973289012909, "rewards/rejected": -0.20047369599342346, "step": 5722 }, { "epoch": 3.9578146611341634, "grad_norm": 8.607810020446777, "learning_rate": 3.356769632703243e-05, "log_odds_chosen": 9.193594932556152, "log_odds_ratio": -0.0009846854954957962, "logits/chosen": -0.5531086325645447, "logits/rejected": -0.6834915280342102, "logps/chosen": -0.0011036059586331248, "logps/rejected": -1.6625056266784668, "loss": 1.0703, "nll_loss": 0.26746901869773865, "rewards/accuracies": 1.0, "rewards/chosen": -0.000110360597318504, "rewards/margins": 0.16614019870758057, "rewards/rejected": -0.1662505567073822, "step": 5723 }, { "epoch": 3.95850622406639, "grad_norm": 11.07143497467041, "learning_rate": 3.3563854310742276e-05, "log_odds_chosen": 8.199675559997559, "log_odds_ratio": -0.015890540555119514, "logits/chosen": -0.3973831832408905, "logits/rejected": -0.4261188507080078, "logps/chosen": -0.19782760739326477, "logps/rejected": -1.8517847061157227, "loss": 2.1722, "nll_loss": 0.5414702892303467, "rewards/accuracies": 1.0, "rewards/chosen": -0.019782761111855507, "rewards/margins": 0.16539573669433594, "rewards/rejected": -0.1851784884929657, "step": 5724 }, { "epoch": 3.959197786998617, "grad_norm": 8.371910095214844, "learning_rate": 3.356001229445213e-05, "log_odds_chosen": 8.336164474487305, "log_odds_ratio": -0.07454461604356766, "logits/chosen": -0.5055246353149414, "logits/rejected": -0.5621579885482788, "logps/chosen": -0.014081336557865143, "logps/rejected": -1.6171096563339233, "loss": 1.7341, "nll_loss": 0.42607590556144714, "rewards/accuracies": 1.0, "rewards/chosen": -0.0014081336557865143, "rewards/margins": 0.16030283272266388, "rewards/rejected": -0.1617109626531601, "step": 5725 }, { "epoch": 3.959889349930844, "grad_norm": 7.506570816040039, "learning_rate": 3.355617027816198e-05, "log_odds_chosen": 9.852703094482422, "log_odds_ratio": -0.00010472921712789685, "logits/chosen": -0.14278888702392578, "logits/rejected": -0.22829201817512512, "logps/chosen": -0.006612797733396292, "logps/rejected": -2.2164673805236816, "loss": 1.1552, "nll_loss": 0.28878581523895264, "rewards/accuracies": 1.0, "rewards/chosen": -0.0006612797733396292, "rewards/margins": 0.22098545730113983, "rewards/rejected": -0.2216467261314392, "step": 5726 }, { "epoch": 3.9605809128630707, "grad_norm": 19.204822540283203, "learning_rate": 3.355232826187183e-05, "log_odds_chosen": 8.902791976928711, "log_odds_ratio": -0.004878794774413109, "logits/chosen": -0.07866685092449188, "logits/rejected": -0.14008517563343048, "logps/chosen": -0.05354490503668785, "logps/rejected": -1.7077854871749878, "loss": 1.5674, "nll_loss": 0.39136484265327454, "rewards/accuracies": 1.0, "rewards/chosen": -0.005354490131139755, "rewards/margins": 0.16542406380176544, "rewards/rejected": -0.1707785427570343, "step": 5727 }, { "epoch": 3.9612724757952975, "grad_norm": 11.002792358398438, "learning_rate": 3.3548486245581686e-05, "log_odds_chosen": 10.120767593383789, "log_odds_ratio": -0.00026730989338830113, "logits/chosen": -0.6612759828567505, "logits/rejected": -0.6622802019119263, "logps/chosen": -0.00566504243761301, "logps/rejected": -2.387166976928711, "loss": 1.8902, "nll_loss": 0.4725325107574463, "rewards/accuracies": 1.0, "rewards/chosen": -0.0005665042554028332, "rewards/margins": 0.2381501942873001, "rewards/rejected": -0.2387167066335678, "step": 5728 }, { "epoch": 3.9619640387275243, "grad_norm": 10.607829093933105, "learning_rate": 3.354464422929153e-05, "log_odds_chosen": 10.475570678710938, "log_odds_ratio": -0.0001675660751061514, "logits/chosen": -0.7578421831130981, "logits/rejected": -0.7980519533157349, "logps/chosen": -0.0006828827317804098, "logps/rejected": -2.6656992435455322, "loss": 1.8156, "nll_loss": 0.4538804888725281, "rewards/accuracies": 1.0, "rewards/chosen": -6.82882746332325e-05, "rewards/margins": 0.26650163531303406, "rewards/rejected": -0.26656991243362427, "step": 5729 }, { "epoch": 3.962655601659751, "grad_norm": 9.261698722839355, "learning_rate": 3.3540802213001384e-05, "log_odds_chosen": 8.758027076721191, "log_odds_ratio": -0.0015625649830326438, "logits/chosen": -0.7957751154899597, "logits/rejected": -0.8652870655059814, "logps/chosen": -0.005631886888295412, "logps/rejected": -1.8644614219665527, "loss": 1.8505, "nll_loss": 0.46247023344039917, "rewards/accuracies": 1.0, "rewards/chosen": -0.0005631886888295412, "rewards/margins": 0.18588295578956604, "rewards/rejected": -0.1864461451768875, "step": 5730 }, { "epoch": 3.963347164591978, "grad_norm": 14.256701469421387, "learning_rate": 3.353696019671124e-05, "log_odds_chosen": 9.452154159545898, "log_odds_ratio": -0.00022387137869372964, "logits/chosen": -0.6790257692337036, "logits/rejected": -0.7358689308166504, "logps/chosen": -0.00040158629417419434, "logps/rejected": -1.465136170387268, "loss": 1.4173, "nll_loss": 0.3543011546134949, "rewards/accuracies": 1.0, "rewards/chosen": -4.0158629417419434e-05, "rewards/margins": 0.1464734822511673, "rewards/rejected": -0.14651362597942352, "step": 5731 }, { "epoch": 3.964038727524205, "grad_norm": 16.448766708374023, "learning_rate": 3.353311818042109e-05, "log_odds_chosen": 10.256701469421387, "log_odds_ratio": -8.063411951297894e-05, "logits/chosen": -0.6722682118415833, "logits/rejected": -0.681743860244751, "logps/chosen": -0.008627118542790413, "logps/rejected": -2.3765411376953125, "loss": 1.8795, "nll_loss": 0.46986570954322815, "rewards/accuracies": 1.0, "rewards/chosen": -0.0008627119823358953, "rewards/margins": 0.23679141700267792, "rewards/rejected": -0.23765411972999573, "step": 5732 }, { "epoch": 3.9647302904564317, "grad_norm": 18.156047821044922, "learning_rate": 3.3529276164130935e-05, "log_odds_chosen": 9.920660972595215, "log_odds_ratio": -0.00036892094067297876, "logits/chosen": -0.5115818977355957, "logits/rejected": -0.5205098390579224, "logps/chosen": -0.001742643304169178, "logps/rejected": -1.9661567211151123, "loss": 1.2746, "nll_loss": 0.3186228275299072, "rewards/accuracies": 1.0, "rewards/chosen": -0.0001742643362376839, "rewards/margins": 0.1964414119720459, "rewards/rejected": -0.19661568105220795, "step": 5733 }, { "epoch": 3.9654218533886585, "grad_norm": 6.548203468322754, "learning_rate": 3.352543414784079e-05, "log_odds_chosen": 9.376199722290039, "log_odds_ratio": -0.00019775544933509082, "logits/chosen": -0.7781115174293518, "logits/rejected": -0.8361927270889282, "logps/chosen": -0.00040810625068843365, "logps/rejected": -1.6837494373321533, "loss": 1.2971, "nll_loss": 0.3242448568344116, "rewards/accuracies": 1.0, "rewards/chosen": -4.08106243412476e-05, "rewards/margins": 0.16833411157131195, "rewards/rejected": -0.1683749407529831, "step": 5734 }, { "epoch": 3.9661134163208853, "grad_norm": 12.527767181396484, "learning_rate": 3.352159213155064e-05, "log_odds_chosen": 9.324653625488281, "log_odds_ratio": -0.0006891811499372125, "logits/chosen": -0.6570389270782471, "logits/rejected": -0.6967424154281616, "logps/chosen": -0.005305243190377951, "logps/rejected": -2.3198208808898926, "loss": 2.0938, "nll_loss": 0.5233712196350098, "rewards/accuracies": 1.0, "rewards/chosen": -0.0005305242957547307, "rewards/margins": 0.23145155608654022, "rewards/rejected": -0.23198208212852478, "step": 5735 }, { "epoch": 3.966804979253112, "grad_norm": 10.369845390319824, "learning_rate": 3.3517750115260485e-05, "log_odds_chosen": 9.477441787719727, "log_odds_ratio": -0.0003035986446775496, "logits/chosen": -0.8958337903022766, "logits/rejected": -0.9264642000198364, "logps/chosen": -0.0002738877374213189, "logps/rejected": -1.3348115682601929, "loss": 1.3162, "nll_loss": 0.32901012897491455, "rewards/accuracies": 1.0, "rewards/chosen": -2.738877446972765e-05, "rewards/margins": 0.13345377147197723, "rewards/rejected": -0.13348117470741272, "step": 5736 }, { "epoch": 3.967496542185339, "grad_norm": 9.416356086730957, "learning_rate": 3.3513908098970345e-05, "log_odds_chosen": 8.403035163879395, "log_odds_ratio": -0.02166915126144886, "logits/chosen": -0.4370851516723633, "logits/rejected": -0.5026123523712158, "logps/chosen": -0.06585156917572021, "logps/rejected": -1.6196904182434082, "loss": 1.4708, "nll_loss": 0.3655407428741455, "rewards/accuracies": 1.0, "rewards/chosen": -0.006585157476365566, "rewards/margins": 0.15538389980793, "rewards/rejected": -0.16196905076503754, "step": 5737 }, { "epoch": 3.968188105117566, "grad_norm": 9.437515258789062, "learning_rate": 3.351006608268019e-05, "log_odds_chosen": 8.181971549987793, "log_odds_ratio": -0.011595248244702816, "logits/chosen": -0.29133880138397217, "logits/rejected": -0.3592085540294647, "logps/chosen": -0.007398087531328201, "logps/rejected": -1.042150855064392, "loss": 1.8839, "nll_loss": 0.4698105454444885, "rewards/accuracies": 1.0, "rewards/chosen": -0.0007398087182082236, "rewards/margins": 0.10347528010606766, "rewards/rejected": -0.10421508550643921, "step": 5738 }, { "epoch": 3.9688796680497926, "grad_norm": 12.901277542114258, "learning_rate": 3.350622406639004e-05, "log_odds_chosen": 9.17611312866211, "log_odds_ratio": -0.048881880939006805, "logits/chosen": -0.6290692687034607, "logits/rejected": -0.6658339500427246, "logps/chosen": -0.01046671625226736, "logps/rejected": -1.2178230285644531, "loss": 1.7287, "nll_loss": 0.42728084325790405, "rewards/accuracies": 1.0, "rewards/chosen": -0.0010466716485098004, "rewards/margins": 0.12073563039302826, "rewards/rejected": -0.12178229540586472, "step": 5739 }, { "epoch": 3.9695712309820195, "grad_norm": 18.288244247436523, "learning_rate": 3.3502382050099895e-05, "log_odds_chosen": 9.014623641967773, "log_odds_ratio": -0.05034981295466423, "logits/chosen": -0.651118278503418, "logits/rejected": -0.7484937310218811, "logps/chosen": -0.005496119614690542, "logps/rejected": -1.8427544832229614, "loss": 1.4967, "nll_loss": 0.3691369593143463, "rewards/accuracies": 1.0, "rewards/chosen": -0.0005496119847521186, "rewards/margins": 0.18372584879398346, "rewards/rejected": -0.18427544832229614, "step": 5740 }, { "epoch": 3.9702627939142463, "grad_norm": 7.95030403137207, "learning_rate": 3.349854003380975e-05, "log_odds_chosen": 9.588155746459961, "log_odds_ratio": -0.0029957296792417765, "logits/chosen": 0.09066282957792282, "logits/rejected": 0.057482749223709106, "logps/chosen": -0.0023572836071252823, "logps/rejected": -2.472792387008667, "loss": 1.5405, "nll_loss": 0.38481464982032776, "rewards/accuracies": 1.0, "rewards/chosen": -0.00023572838108520955, "rewards/margins": 0.24704353511333466, "rewards/rejected": -0.24727925658226013, "step": 5741 }, { "epoch": 3.970954356846473, "grad_norm": 8.950033187866211, "learning_rate": 3.349469801751959e-05, "log_odds_chosen": 9.638641357421875, "log_odds_ratio": -0.00031053705606609583, "logits/chosen": -0.699450671672821, "logits/rejected": -0.7877517938613892, "logps/chosen": -0.00048190244706347585, "logps/rejected": -2.0894455909729004, "loss": 1.1709, "nll_loss": 0.29270097613334656, "rewards/accuracies": 1.0, "rewards/chosen": -4.819024616153911e-05, "rewards/margins": 0.20889636874198914, "rewards/rejected": -0.20894454419612885, "step": 5742 }, { "epoch": 3.9716459197787, "grad_norm": 7.144364833831787, "learning_rate": 3.3490856001229446e-05, "log_odds_chosen": 9.663379669189453, "log_odds_ratio": -0.007051974069327116, "logits/chosen": -0.503765881061554, "logits/rejected": -0.5336912870407104, "logps/chosen": -0.007468795869499445, "logps/rejected": -2.4491522312164307, "loss": 2.5254, "nll_loss": 0.6306518316268921, "rewards/accuracies": 1.0, "rewards/chosen": -0.0007468796102330089, "rewards/margins": 0.24416834115982056, "rewards/rejected": -0.2449152171611786, "step": 5743 }, { "epoch": 3.972337482710927, "grad_norm": 7.711954116821289, "learning_rate": 3.34870139849393e-05, "log_odds_chosen": 10.248712539672852, "log_odds_ratio": -0.0002956142125185579, "logits/chosen": -0.7737203240394592, "logits/rejected": -0.8155643939971924, "logps/chosen": -0.0010178536176681519, "logps/rejected": -2.6119794845581055, "loss": 1.5636, "nll_loss": 0.390875905752182, "rewards/accuracies": 1.0, "rewards/chosen": -0.00010178536467719823, "rewards/margins": 0.26109617948532104, "rewards/rejected": -0.26119792461395264, "step": 5744 }, { "epoch": 3.9730290456431536, "grad_norm": 9.639405250549316, "learning_rate": 3.3483171968649144e-05, "log_odds_chosen": 10.47792911529541, "log_odds_ratio": -4.7168614401016384e-05, "logits/chosen": -0.5498504042625427, "logits/rejected": -0.5651625394821167, "logps/chosen": -0.00014222090248949826, "logps/rejected": -1.7951178550720215, "loss": 2.0419, "nll_loss": 0.5104638934135437, "rewards/accuracies": 1.0, "rewards/chosen": -1.4222088793758303e-05, "rewards/margins": 0.17949756979942322, "rewards/rejected": -0.17951178550720215, "step": 5745 }, { "epoch": 3.9737206085753805, "grad_norm": 11.414177894592285, "learning_rate": 3.3479329952359e-05, "log_odds_chosen": 11.441179275512695, "log_odds_ratio": -1.4189552530297078e-05, "logits/chosen": -0.11674871295690536, "logits/rejected": -0.2236751914024353, "logps/chosen": -0.00010913712321780622, "logps/rejected": -2.2911489009857178, "loss": 1.3601, "nll_loss": 0.3400111794471741, "rewards/accuracies": 1.0, "rewards/chosen": -1.0913712685578503e-05, "rewards/margins": 0.22910398244857788, "rewards/rejected": -0.22911489009857178, "step": 5746 }, { "epoch": 3.9744121715076073, "grad_norm": 7.950533866882324, "learning_rate": 3.347548793606885e-05, "log_odds_chosen": 9.580647468566895, "log_odds_ratio": -0.0008307815878652036, "logits/chosen": -0.15887480974197388, "logits/rejected": -0.2429768145084381, "logps/chosen": -0.0018538651056587696, "logps/rejected": -1.919063925743103, "loss": 0.8787, "nll_loss": 0.21959391236305237, "rewards/accuracies": 1.0, "rewards/chosen": -0.00018538651056587696, "rewards/margins": 0.19172099232673645, "rewards/rejected": -0.1919063925743103, "step": 5747 }, { "epoch": 3.975103734439834, "grad_norm": 14.538896560668945, "learning_rate": 3.34716459197787e-05, "log_odds_chosen": 9.95122241973877, "log_odds_ratio": -9.063062316272408e-05, "logits/chosen": -0.2590225338935852, "logits/rejected": -0.36000490188598633, "logps/chosen": -0.0005485797300934792, "logps/rejected": -2.2005417346954346, "loss": 1.9276, "nll_loss": 0.4818934202194214, "rewards/accuracies": 1.0, "rewards/chosen": -5.4857977374922484e-05, "rewards/margins": 0.21999932825565338, "rewards/rejected": -0.22005417943000793, "step": 5748 }, { "epoch": 3.975795297372061, "grad_norm": 7.641196250915527, "learning_rate": 3.3467803903488554e-05, "log_odds_chosen": 10.689547538757324, "log_odds_ratio": -3.348453901708126e-05, "logits/chosen": -0.6348323225975037, "logits/rejected": -0.5878068208694458, "logps/chosen": -0.00016802028403617442, "logps/rejected": -1.9155058860778809, "loss": 1.0619, "nll_loss": 0.2654733657836914, "rewards/accuracies": 1.0, "rewards/chosen": -1.6802028767415322e-05, "rewards/margins": 0.19153380393981934, "rewards/rejected": -0.191550612449646, "step": 5749 }, { "epoch": 3.9764868603042878, "grad_norm": 12.81608772277832, "learning_rate": 3.3463961887198406e-05, "log_odds_chosen": 9.441143035888672, "log_odds_ratio": -0.007615984883159399, "logits/chosen": -0.3101378083229065, "logits/rejected": -0.30015456676483154, "logps/chosen": -0.03704637289047241, "logps/rejected": -2.1652421951293945, "loss": 1.1186, "nll_loss": 0.2788885831832886, "rewards/accuracies": 1.0, "rewards/chosen": -0.003704637521877885, "rewards/margins": 0.21281959116458893, "rewards/rejected": -0.21652421355247498, "step": 5750 }, { "epoch": 3.9771784232365146, "grad_norm": 7.47943115234375, "learning_rate": 3.346011987090825e-05, "log_odds_chosen": 10.377918243408203, "log_odds_ratio": -0.00010015325096901506, "logits/chosen": -0.5926077961921692, "logits/rejected": -0.5092741250991821, "logps/chosen": -0.0001969319419004023, "logps/rejected": -1.7772661447525024, "loss": 1.2954, "nll_loss": 0.32384994626045227, "rewards/accuracies": 1.0, "rewards/chosen": -1.969319419004023e-05, "rewards/margins": 0.17770692706108093, "rewards/rejected": -0.1777266263961792, "step": 5751 }, { "epoch": 3.9778699861687414, "grad_norm": 9.103669166564941, "learning_rate": 3.3456277854618104e-05, "log_odds_chosen": 10.58312702178955, "log_odds_ratio": -5.431749377748929e-05, "logits/chosen": -0.5314761996269226, "logits/rejected": -0.5825152397155762, "logps/chosen": -0.003828480839729309, "logps/rejected": -2.600499153137207, "loss": 1.9518, "nll_loss": 0.4879487156867981, "rewards/accuracies": 1.0, "rewards/chosen": -0.0003828480839729309, "rewards/margins": 0.2596670985221863, "rewards/rejected": -0.2600499391555786, "step": 5752 }, { "epoch": 3.9785615491009683, "grad_norm": 7.952563285827637, "learning_rate": 3.345243583832796e-05, "log_odds_chosen": 8.267032623291016, "log_odds_ratio": -0.001137011917307973, "logits/chosen": -0.31987977027893066, "logits/rejected": -0.3656128942966461, "logps/chosen": -0.007970752194523811, "logps/rejected": -1.5903152227401733, "loss": 1.3859, "nll_loss": 0.3463681936264038, "rewards/accuracies": 1.0, "rewards/chosen": -0.0007970751030370593, "rewards/margins": 0.15823444724082947, "rewards/rejected": -0.15903152525424957, "step": 5753 }, { "epoch": 3.979253112033195, "grad_norm": 15.227071762084961, "learning_rate": 3.34485938220378e-05, "log_odds_chosen": 9.630619049072266, "log_odds_ratio": -0.00011478106898721308, "logits/chosen": -0.6500551104545593, "logits/rejected": -0.6647905707359314, "logps/chosen": -0.0004875340382568538, "logps/rejected": -1.898605465888977, "loss": 1.7484, "nll_loss": 0.43709835410118103, "rewards/accuracies": 1.0, "rewards/chosen": -4.8753405280876905e-05, "rewards/margins": 0.1898117959499359, "rewards/rejected": -0.18986055254936218, "step": 5754 }, { "epoch": 3.979944674965422, "grad_norm": 10.086196899414062, "learning_rate": 3.344475180574766e-05, "log_odds_chosen": 10.998220443725586, "log_odds_ratio": -2.8250318791833706e-05, "logits/chosen": -0.5312788486480713, "logits/rejected": -0.5879403352737427, "logps/chosen": -9.712098108138889e-05, "logps/rejected": -1.8667224645614624, "loss": 1.5129, "nll_loss": 0.37821367383003235, "rewards/accuracies": 1.0, "rewards/chosen": -9.712097380543128e-06, "rewards/margins": 0.18666253983974457, "rewards/rejected": -0.18667224049568176, "step": 5755 }, { "epoch": 3.9806362378976488, "grad_norm": 11.90705394744873, "learning_rate": 3.344090978945751e-05, "log_odds_chosen": 10.894936561584473, "log_odds_ratio": -5.3509866120293736e-05, "logits/chosen": -0.4861408770084381, "logits/rejected": -0.5648641586303711, "logps/chosen": -0.00018957615247927606, "logps/rejected": -1.8090975284576416, "loss": 1.1082, "nll_loss": 0.27704793214797974, "rewards/accuracies": 1.0, "rewards/chosen": -1.895761670311913e-05, "rewards/margins": 0.18089079856872559, "rewards/rejected": -0.18090975284576416, "step": 5756 }, { "epoch": 3.9813278008298756, "grad_norm": 5.89326286315918, "learning_rate": 3.343706777316736e-05, "log_odds_chosen": 10.03038215637207, "log_odds_ratio": -0.0003101792826782912, "logits/chosen": -0.4241410791873932, "logits/rejected": -0.48782768845558167, "logps/chosen": -0.0013601405080407858, "logps/rejected": -2.202002763748169, "loss": 1.3956, "nll_loss": 0.34887629747390747, "rewards/accuracies": 1.0, "rewards/chosen": -0.00013601405953522772, "rewards/margins": 0.22006428241729736, "rewards/rejected": -0.2202003002166748, "step": 5757 }, { "epoch": 3.9820193637621024, "grad_norm": 9.086535453796387, "learning_rate": 3.343322575687721e-05, "log_odds_chosen": 11.076787948608398, "log_odds_ratio": -4.0664192056283355e-05, "logits/chosen": -0.7246346473693848, "logits/rejected": -0.7437798976898193, "logps/chosen": -0.00011383210949134082, "logps/rejected": -1.774545431137085, "loss": 1.4516, "nll_loss": 0.3628990650177002, "rewards/accuracies": 1.0, "rewards/chosen": -1.1383212040527724e-05, "rewards/margins": 0.17744316160678864, "rewards/rejected": -0.17745453119277954, "step": 5758 }, { "epoch": 3.9827109266943292, "grad_norm": 10.311131477355957, "learning_rate": 3.3429383740587065e-05, "log_odds_chosen": 9.026152610778809, "log_odds_ratio": -0.00034249460441060364, "logits/chosen": -0.6311862468719482, "logits/rejected": -0.58100426197052, "logps/chosen": -0.0006225037504918873, "logps/rejected": -1.2492806911468506, "loss": 1.3607, "nll_loss": 0.34014812111854553, "rewards/accuracies": 1.0, "rewards/chosen": -6.225037213880569e-05, "rewards/margins": 0.12486580014228821, "rewards/rejected": -0.1249280646443367, "step": 5759 }, { "epoch": 3.983402489626556, "grad_norm": 9.150677680969238, "learning_rate": 3.342554172429691e-05, "log_odds_chosen": 7.137037754058838, "log_odds_ratio": -0.1796838790178299, "logits/chosen": -0.38474273681640625, "logits/rejected": -0.26010262966156006, "logps/chosen": -0.03558574989438057, "logps/rejected": -1.1326240301132202, "loss": 1.5331, "nll_loss": 0.36530226469039917, "rewards/accuracies": 0.875, "rewards/chosen": -0.0035585747100412846, "rewards/margins": 0.10970382392406464, "rewards/rejected": -0.11326240003108978, "step": 5760 }, { "epoch": 3.984094052558783, "grad_norm": 7.6114630699157715, "learning_rate": 3.342169970800676e-05, "log_odds_chosen": 9.663246154785156, "log_odds_ratio": -0.00026955429348163307, "logits/chosen": -0.3328566551208496, "logits/rejected": -0.32750049233436584, "logps/chosen": -0.0006487221107818186, "logps/rejected": -1.4827356338500977, "loss": 1.2445, "nll_loss": 0.3111001253128052, "rewards/accuracies": 1.0, "rewards/chosen": -6.487221253337339e-05, "rewards/margins": 0.14820870757102966, "rewards/rejected": -0.14827357232570648, "step": 5761 }, { "epoch": 3.9847856154910097, "grad_norm": 9.025335311889648, "learning_rate": 3.3417857691716615e-05, "log_odds_chosen": 9.366215705871582, "log_odds_ratio": -0.00027741739177145064, "logits/chosen": -0.6465136408805847, "logits/rejected": -0.5712451934814453, "logps/chosen": -0.0003560830373317003, "logps/rejected": -1.2528438568115234, "loss": 1.9347, "nll_loss": 0.4836418032646179, "rewards/accuracies": 1.0, "rewards/chosen": -3.560830373317003e-05, "rewards/margins": 0.12524878978729248, "rewards/rejected": -0.12528440356254578, "step": 5762 }, { "epoch": 3.9854771784232366, "grad_norm": 6.303955554962158, "learning_rate": 3.341401567542646e-05, "log_odds_chosen": 10.434503555297852, "log_odds_ratio": -0.000102290025097318, "logits/chosen": -0.562961757183075, "logits/rejected": -0.673736572265625, "logps/chosen": -0.0002883031265810132, "logps/rejected": -2.2828972339630127, "loss": 0.9964, "nll_loss": 0.24909597635269165, "rewards/accuracies": 1.0, "rewards/chosen": -2.883031265810132e-05, "rewards/margins": 0.22826090455055237, "rewards/rejected": -0.22828972339630127, "step": 5763 }, { "epoch": 3.9861687413554634, "grad_norm": 12.010201454162598, "learning_rate": 3.341017365913632e-05, "log_odds_chosen": 7.408069610595703, "log_odds_ratio": -0.025262746959924698, "logits/chosen": -0.7419548034667969, "logits/rejected": -0.7320268154144287, "logps/chosen": -0.00811043381690979, "logps/rejected": -1.610243797302246, "loss": 1.5759, "nll_loss": 0.3914604187011719, "rewards/accuracies": 1.0, "rewards/chosen": -0.0008110433700494468, "rewards/margins": 0.16021332144737244, "rewards/rejected": -0.16102437674999237, "step": 5764 }, { "epoch": 3.9868603042876902, "grad_norm": 6.950577259063721, "learning_rate": 3.3406331642846166e-05, "log_odds_chosen": 8.224448204040527, "log_odds_ratio": -0.007369033992290497, "logits/chosen": -0.808485209941864, "logits/rejected": -0.788963258266449, "logps/chosen": -0.02338700369000435, "logps/rejected": -2.2966105937957764, "loss": 1.6337, "nll_loss": 0.4076803922653198, "rewards/accuracies": 1.0, "rewards/chosen": -0.0023387002293020487, "rewards/margins": 0.22732235491275787, "rewards/rejected": -0.22966104745864868, "step": 5765 }, { "epoch": 3.987551867219917, "grad_norm": 7.650085926055908, "learning_rate": 3.340248962655602e-05, "log_odds_chosen": 10.956058502197266, "log_odds_ratio": -4.377203731564805e-05, "logits/chosen": -0.4419000744819641, "logits/rejected": -0.5032147169113159, "logps/chosen": -0.00018789824389386922, "logps/rejected": -2.5352671146392822, "loss": 0.8885, "nll_loss": 0.2221318781375885, "rewards/accuracies": 1.0, "rewards/chosen": -1.878982402558904e-05, "rewards/margins": 0.25350794196128845, "rewards/rejected": -0.2535267472267151, "step": 5766 }, { "epoch": 3.988243430152144, "grad_norm": 8.995047569274902, "learning_rate": 3.339864761026587e-05, "log_odds_chosen": 9.685747146606445, "log_odds_ratio": -0.00021190382540225983, "logits/chosen": -0.6376558542251587, "logits/rejected": -0.6977887749671936, "logps/chosen": -0.000918483070563525, "logps/rejected": -1.987686038017273, "loss": 1.1773, "nll_loss": 0.2943156957626343, "rewards/accuracies": 1.0, "rewards/chosen": -9.184830560116097e-05, "rewards/margins": 0.19867676496505737, "rewards/rejected": -0.19876858592033386, "step": 5767 }, { "epoch": 3.9889349930843707, "grad_norm": 9.66429328918457, "learning_rate": 3.339480559397572e-05, "log_odds_chosen": 9.874263763427734, "log_odds_ratio": -0.03149972856044769, "logits/chosen": -0.5896898508071899, "logits/rejected": -0.698781430721283, "logps/chosen": -0.014882597140967846, "logps/rejected": -2.3216664791107178, "loss": 2.0424, "nll_loss": 0.5074531435966492, "rewards/accuracies": 1.0, "rewards/chosen": -0.0014882597606629133, "rewards/margins": 0.23067839443683624, "rewards/rejected": -0.23216666281223297, "step": 5768 }, { "epoch": 3.9896265560165975, "grad_norm": 10.384910583496094, "learning_rate": 3.339096357768557e-05, "log_odds_chosen": 10.813526153564453, "log_odds_ratio": -2.9791222914354876e-05, "logits/chosen": -0.48536092042922974, "logits/rejected": -0.4883587062358856, "logps/chosen": -0.0001726519549265504, "logps/rejected": -2.2120351791381836, "loss": 1.0716, "nll_loss": 0.26789313554763794, "rewards/accuracies": 1.0, "rewards/chosen": -1.72651962202508e-05, "rewards/margins": 0.22118626534938812, "rewards/rejected": -0.2212035059928894, "step": 5769 }, { "epoch": 3.9903181189488244, "grad_norm": 224.6512908935547, "learning_rate": 3.338712156139542e-05, "log_odds_chosen": 6.486282825469971, "log_odds_ratio": -1.4710427522659302, "logits/chosen": -0.4123340845108032, "logits/rejected": -0.39311158657073975, "logps/chosen": -0.2972959578037262, "logps/rejected": -1.7887805700302124, "loss": 2.2346, "nll_loss": 0.411540150642395, "rewards/accuracies": 0.75, "rewards/chosen": -0.02972959727048874, "rewards/margins": 0.14914844930171967, "rewards/rejected": -0.1788780689239502, "step": 5770 }, { "epoch": 3.991009681881051, "grad_norm": 7.9631876945495605, "learning_rate": 3.3383279545105273e-05, "log_odds_chosen": 10.068863868713379, "log_odds_ratio": -8.86126363184303e-05, "logits/chosen": -0.6509562134742737, "logits/rejected": -0.7054494023323059, "logps/chosen": -0.0013785153860226274, "logps/rejected": -1.69233238697052, "loss": 0.8035, "nll_loss": 0.2008584439754486, "rewards/accuracies": 1.0, "rewards/chosen": -0.00013785154442302883, "rewards/margins": 0.16909539699554443, "rewards/rejected": -0.16923324763774872, "step": 5771 }, { "epoch": 3.991701244813278, "grad_norm": 16.560514450073242, "learning_rate": 3.337943752881512e-05, "log_odds_chosen": 9.463789939880371, "log_odds_ratio": -0.005806229077279568, "logits/chosen": -0.5243285298347473, "logits/rejected": -0.5927683711051941, "logps/chosen": -0.0042679328471422195, "logps/rejected": -2.5110771656036377, "loss": 2.4263, "nll_loss": 0.6059852242469788, "rewards/accuracies": 1.0, "rewards/chosen": -0.00042679329635575414, "rewards/margins": 0.25068092346191406, "rewards/rejected": -0.25110769271850586, "step": 5772 }, { "epoch": 3.992392807745505, "grad_norm": 14.416969299316406, "learning_rate": 3.337559551252498e-05, "log_odds_chosen": 10.008670806884766, "log_odds_ratio": -0.00036832113983109593, "logits/chosen": -0.652914822101593, "logits/rejected": -0.7095764875411987, "logps/chosen": -0.0006406122702173889, "logps/rejected": -1.8933131694793701, "loss": 1.6654, "nll_loss": 0.4163016974925995, "rewards/accuracies": 1.0, "rewards/chosen": -6.406122702173889e-05, "rewards/margins": 0.18926726281642914, "rewards/rejected": -0.1893313229084015, "step": 5773 }, { "epoch": 3.9930843706777317, "grad_norm": 11.007437705993652, "learning_rate": 3.3371753496234824e-05, "log_odds_chosen": 9.9483642578125, "log_odds_ratio": -0.055682096630334854, "logits/chosen": -0.6526095867156982, "logits/rejected": -0.6657837629318237, "logps/chosen": -0.013267319649457932, "logps/rejected": -2.245244026184082, "loss": 1.2503, "nll_loss": 0.30701395869255066, "rewards/accuracies": 1.0, "rewards/chosen": -0.0013267318718135357, "rewards/margins": 0.22319768369197845, "rewards/rejected": -0.22452440857887268, "step": 5774 }, { "epoch": 3.9937759336099585, "grad_norm": 11.40454387664795, "learning_rate": 3.3367911479944676e-05, "log_odds_chosen": 9.399166107177734, "log_odds_ratio": -0.0002439269155729562, "logits/chosen": -0.3749449849128723, "logits/rejected": -0.46598711609840393, "logps/chosen": -0.0005690623656846583, "logps/rejected": -1.5011813640594482, "loss": 0.9484, "nll_loss": 0.23707221448421478, "rewards/accuracies": 1.0, "rewards/chosen": -5.6906239478848875e-05, "rewards/margins": 0.15006123483181, "rewards/rejected": -0.1501181423664093, "step": 5775 }, { "epoch": 3.9944674965421854, "grad_norm": 11.258934020996094, "learning_rate": 3.336406946365453e-05, "log_odds_chosen": 9.372721672058105, "log_odds_ratio": -0.00011832044401671737, "logits/chosen": -0.5779632925987244, "logits/rejected": -0.5768287181854248, "logps/chosen": -0.00047448737313970923, "logps/rejected": -1.6736748218536377, "loss": 1.5988, "nll_loss": 0.39969536662101746, "rewards/accuracies": 1.0, "rewards/chosen": -4.7448738769162446e-05, "rewards/margins": 0.16732004284858704, "rewards/rejected": -0.16736748814582825, "step": 5776 }, { "epoch": 3.995159059474412, "grad_norm": 13.851411819458008, "learning_rate": 3.336022744736438e-05, "log_odds_chosen": 8.354120254516602, "log_odds_ratio": -0.3387634754180908, "logits/chosen": -0.58873051404953, "logits/rejected": -0.6785441637039185, "logps/chosen": -0.0636986568570137, "logps/rejected": -1.902686595916748, "loss": 1.4361, "nll_loss": 0.3251374363899231, "rewards/accuracies": 0.875, "rewards/chosen": -0.00636986643075943, "rewards/margins": 0.18389879167079926, "rewards/rejected": -0.19026866555213928, "step": 5777 }, { "epoch": 3.995850622406639, "grad_norm": 8.498478889465332, "learning_rate": 3.335638543107423e-05, "log_odds_chosen": 8.823246002197266, "log_odds_ratio": -0.04847763851284981, "logits/chosen": -0.5457412004470825, "logits/rejected": -0.6254887580871582, "logps/chosen": -0.009564734995365143, "logps/rejected": -1.7078287601470947, "loss": 1.5128, "nll_loss": 0.3733523190021515, "rewards/accuracies": 1.0, "rewards/chosen": -0.0009564735228195786, "rewards/margins": 0.1698264181613922, "rewards/rejected": -0.1707828789949417, "step": 5778 }, { "epoch": 3.996542185338866, "grad_norm": 10.335137367248535, "learning_rate": 3.335254341478408e-05, "log_odds_chosen": 10.140752792358398, "log_odds_ratio": -0.00010038846812676638, "logits/chosen": -0.5030477046966553, "logits/rejected": -0.5023469924926758, "logps/chosen": -0.0018512359820306301, "logps/rejected": -2.288264274597168, "loss": 1.9676, "nll_loss": 0.4918965995311737, "rewards/accuracies": 1.0, "rewards/chosen": -0.00018512360111344606, "rewards/margins": 0.2286413162946701, "rewards/rejected": -0.22882644832134247, "step": 5779 }, { "epoch": 3.9972337482710927, "grad_norm": 6.054286003112793, "learning_rate": 3.334870139849393e-05, "log_odds_chosen": 7.87191915512085, "log_odds_ratio": -0.12952713668346405, "logits/chosen": -0.4336947500705719, "logits/rejected": -0.5151492953300476, "logps/chosen": -0.01956653967499733, "logps/rejected": -1.6524310111999512, "loss": 0.9637, "nll_loss": 0.2279704511165619, "rewards/accuracies": 0.875, "rewards/chosen": -0.0019566540140658617, "rewards/margins": 0.16328644752502441, "rewards/rejected": -0.16524310410022736, "step": 5780 }, { "epoch": 3.9979253112033195, "grad_norm": 12.384289741516113, "learning_rate": 3.334485938220378e-05, "log_odds_chosen": 9.222118377685547, "log_odds_ratio": -0.0014353328151628375, "logits/chosen": -0.7566649317741394, "logits/rejected": -0.8445166349411011, "logps/chosen": -0.023272883147001266, "logps/rejected": -2.233126163482666, "loss": 1.6794, "nll_loss": 0.41969895362854004, "rewards/accuracies": 1.0, "rewards/chosen": -0.002327288268133998, "rewards/margins": 0.2209853231906891, "rewards/rejected": -0.2233126163482666, "step": 5781 }, { "epoch": 3.9986168741355463, "grad_norm": 11.377348899841309, "learning_rate": 3.334101736591364e-05, "log_odds_chosen": 10.25976848602295, "log_odds_ratio": -0.00017066083091776818, "logits/chosen": -0.3861597180366516, "logits/rejected": -0.5417386293411255, "logps/chosen": -0.0005861219833604991, "logps/rejected": -2.005835771560669, "loss": 1.5664, "nll_loss": 0.3915861248970032, "rewards/accuracies": 1.0, "rewards/chosen": -5.861219688085839e-05, "rewards/margins": 0.2005249559879303, "rewards/rejected": -0.2005835920572281, "step": 5782 }, { "epoch": 3.999308437067773, "grad_norm": 18.3858585357666, "learning_rate": 3.333717534962348e-05, "log_odds_chosen": 10.059211730957031, "log_odds_ratio": -0.0001356978464173153, "logits/chosen": -0.836675763130188, "logits/rejected": -0.828482985496521, "logps/chosen": -0.00039792529423721135, "logps/rejected": -2.2047948837280273, "loss": 1.9703, "nll_loss": 0.49256181716918945, "rewards/accuracies": 1.0, "rewards/chosen": -3.979253233410418e-05, "rewards/margins": 0.22043968737125397, "rewards/rejected": -0.22047948837280273, "step": 5783 }, { "epoch": 4.0, "grad_norm": 19.170635223388672, "learning_rate": 3.3333333333333335e-05, "log_odds_chosen": 8.822633743286133, "log_odds_ratio": -0.1293601244688034, "logits/chosen": -0.4684547185897827, "logits/rejected": -0.5892400741577148, "logps/chosen": -0.019132127985358238, "logps/rejected": -1.3569003343582153, "loss": 2.0622, "nll_loss": 0.502617597579956, "rewards/accuracies": 0.875, "rewards/chosen": -0.0019132127054035664, "rewards/margins": 0.13377681374549866, "rewards/rejected": -0.13569003343582153, "step": 5784 }, { "epoch": 4.000691562932227, "grad_norm": 7.128642559051514, "learning_rate": 3.332949131704319e-05, "log_odds_chosen": 9.820199966430664, "log_odds_ratio": -0.000142527642310597, "logits/chosen": -0.2510155141353607, "logits/rejected": -0.3180846571922302, "logps/chosen": -0.00030701240757480264, "logps/rejected": -1.6696772575378418, "loss": 0.9549, "nll_loss": 0.23871463537216187, "rewards/accuracies": 1.0, "rewards/chosen": -3.0701241485076025e-05, "rewards/margins": 0.16693702340126038, "rewards/rejected": -0.1669677346944809, "step": 5785 }, { "epoch": 4.001383125864454, "grad_norm": 6.740342617034912, "learning_rate": 3.332564930075304e-05, "log_odds_chosen": 10.16427993774414, "log_odds_ratio": -0.0003346512676216662, "logits/chosen": -0.3441002666950226, "logits/rejected": -0.3391599655151367, "logps/chosen": -0.000762086478061974, "logps/rejected": -2.062274217605591, "loss": 1.1732, "nll_loss": 0.29325664043426514, "rewards/accuracies": 1.0, "rewards/chosen": -7.620864198543131e-05, "rewards/margins": 0.20615121722221375, "rewards/rejected": -0.20622742176055908, "step": 5786 }, { "epoch": 4.0020746887966805, "grad_norm": 9.397114753723145, "learning_rate": 3.3321807284462885e-05, "log_odds_chosen": 9.37049674987793, "log_odds_ratio": -0.0008974755764938891, "logits/chosen": -0.21732686460018158, "logits/rejected": -0.30178898572921753, "logps/chosen": -0.003378002205863595, "logps/rejected": -2.089550018310547, "loss": 1.0666, "nll_loss": 0.2665643095970154, "rewards/accuracies": 1.0, "rewards/chosen": -0.0003378002729732543, "rewards/margins": 0.2086172103881836, "rewards/rejected": -0.20895501971244812, "step": 5787 }, { "epoch": 4.002766251728907, "grad_norm": 6.804192066192627, "learning_rate": 3.331796526817274e-05, "log_odds_chosen": 8.74463176727295, "log_odds_ratio": -0.008925949223339558, "logits/chosen": 0.05974145978689194, "logits/rejected": -0.018502473831176758, "logps/chosen": -0.0030243650544434786, "logps/rejected": -1.566483974456787, "loss": 1.4798, "nll_loss": 0.36905479431152344, "rewards/accuracies": 1.0, "rewards/chosen": -0.00030243649962358177, "rewards/margins": 0.15634596347808838, "rewards/rejected": -0.1566484123468399, "step": 5788 }, { "epoch": 4.003457814661134, "grad_norm": 7.862604141235352, "learning_rate": 3.331412325188259e-05, "log_odds_chosen": 10.536462783813477, "log_odds_ratio": -8.758976764511317e-05, "logits/chosen": -0.43617433309555054, "logits/rejected": -0.4724277853965759, "logps/chosen": -0.00041452451841905713, "logps/rejected": -2.274538040161133, "loss": 1.2811, "nll_loss": 0.3202652633190155, "rewards/accuracies": 1.0, "rewards/chosen": -4.1452447476331145e-05, "rewards/margins": 0.2274123728275299, "rewards/rejected": -0.2274537980556488, "step": 5789 }, { "epoch": 4.004149377593361, "grad_norm": 6.825366973876953, "learning_rate": 3.3310281235592436e-05, "log_odds_chosen": 9.179971694946289, "log_odds_ratio": -0.02155376970767975, "logits/chosen": -0.5471166968345642, "logits/rejected": -0.5032497644424438, "logps/chosen": -0.012416576966643333, "logps/rejected": -1.6312053203582764, "loss": 1.114, "nll_loss": 0.276351660490036, "rewards/accuracies": 1.0, "rewards/chosen": -0.0012416577665135264, "rewards/margins": 0.16187886893749237, "rewards/rejected": -0.16312053799629211, "step": 5790 }, { "epoch": 4.004840940525588, "grad_norm": 7.924466609954834, "learning_rate": 3.3306439219302295e-05, "log_odds_chosen": 10.494912147521973, "log_odds_ratio": -8.59973588376306e-05, "logits/chosen": -0.2867485582828522, "logits/rejected": -0.3789327144622803, "logps/chosen": -0.00026885856641456485, "logps/rejected": -1.7884368896484375, "loss": 1.0137, "nll_loss": 0.2534157931804657, "rewards/accuracies": 1.0, "rewards/chosen": -2.6885860279435292e-05, "rewards/margins": 0.1788167953491211, "rewards/rejected": -0.178843691945076, "step": 5791 }, { "epoch": 4.005532503457815, "grad_norm": 5.553402900695801, "learning_rate": 3.330259720301214e-05, "log_odds_chosen": 10.350786209106445, "log_odds_ratio": -6.509172089863569e-05, "logits/chosen": -0.7506746649742126, "logits/rejected": -0.7882779836654663, "logps/chosen": -0.0001626495795790106, "logps/rejected": -1.4804134368896484, "loss": 1.3166, "nll_loss": 0.32914409041404724, "rewards/accuracies": 1.0, "rewards/chosen": -1.6264959413092583e-05, "rewards/margins": 0.14802506566047668, "rewards/rejected": -0.14804133772850037, "step": 5792 }, { "epoch": 4.0062240663900415, "grad_norm": 7.096441745758057, "learning_rate": 3.329875518672199e-05, "log_odds_chosen": 10.433094024658203, "log_odds_ratio": -4.2833940824493766e-05, "logits/chosen": -0.7019106149673462, "logits/rejected": -0.7552410364151001, "logps/chosen": -0.0001473083975724876, "logps/rejected": -1.739577293395996, "loss": 1.2298, "nll_loss": 0.30744898319244385, "rewards/accuracies": 1.0, "rewards/chosen": -1.4730839211551938e-05, "rewards/margins": 0.17394298315048218, "rewards/rejected": -0.1739577353000641, "step": 5793 }, { "epoch": 4.006915629322268, "grad_norm": 11.629804611206055, "learning_rate": 3.3294913170431846e-05, "log_odds_chosen": 9.714581489562988, "log_odds_ratio": -0.004458730109035969, "logits/chosen": -0.5414714813232422, "logits/rejected": -0.5641164183616638, "logps/chosen": -0.0026589545886963606, "logps/rejected": -1.81493079662323, "loss": 1.8955, "nll_loss": 0.47342973947525024, "rewards/accuracies": 1.0, "rewards/chosen": -0.00026589547633193433, "rewards/margins": 0.1812271922826767, "rewards/rejected": -0.18149308860301971, "step": 5794 }, { "epoch": 4.007607192254495, "grad_norm": 9.95302963256836, "learning_rate": 3.32910711541417e-05, "log_odds_chosen": 9.88841438293457, "log_odds_ratio": -0.0001660238776821643, "logits/chosen": -0.5515083074569702, "logits/rejected": -0.6294294595718384, "logps/chosen": -0.0006042959867045283, "logps/rejected": -1.9443670511245728, "loss": 1.36, "nll_loss": 0.33998462557792664, "rewards/accuracies": 1.0, "rewards/chosen": -6.042960376362316e-05, "rewards/margins": 0.19437627494335175, "rewards/rejected": -0.1944366991519928, "step": 5795 }, { "epoch": 4.008298755186722, "grad_norm": 5.845525741577148, "learning_rate": 3.3287229137851544e-05, "log_odds_chosen": 9.629524230957031, "log_odds_ratio": -0.0003176818136125803, "logits/chosen": -0.6585010290145874, "logits/rejected": -0.6973461508750916, "logps/chosen": -0.005589592270553112, "logps/rejected": -2.166471242904663, "loss": 0.9621, "nll_loss": 0.2404988408088684, "rewards/accuracies": 1.0, "rewards/chosen": -0.0005589592037722468, "rewards/margins": 0.2160881757736206, "rewards/rejected": -0.21664713323116302, "step": 5796 }, { "epoch": 4.008990318118949, "grad_norm": 5.421998500823975, "learning_rate": 3.3283387121561396e-05, "log_odds_chosen": 9.164302825927734, "log_odds_ratio": -0.0008697113371454179, "logits/chosen": -0.6519193649291992, "logits/rejected": -0.6003968119621277, "logps/chosen": -0.01484967116266489, "logps/rejected": -2.3859739303588867, "loss": 2.3477, "nll_loss": 0.5868465304374695, "rewards/accuracies": 1.0, "rewards/chosen": -0.0014849671861156821, "rewards/margins": 0.23711243271827698, "rewards/rejected": -0.23859739303588867, "step": 5797 }, { "epoch": 4.009681881051176, "grad_norm": 9.98675537109375, "learning_rate": 3.327954510527125e-05, "log_odds_chosen": 10.505779266357422, "log_odds_ratio": -5.718199099646881e-05, "logits/chosen": -0.9864379167556763, "logits/rejected": -1.007617473602295, "logps/chosen": -0.0009500356391072273, "logps/rejected": -2.0847883224487305, "loss": 1.2023, "nll_loss": 0.3005639314651489, "rewards/accuracies": 1.0, "rewards/chosen": -9.500356100033969e-05, "rewards/margins": 0.20838382840156555, "rewards/rejected": -0.20847883820533752, "step": 5798 }, { "epoch": 4.0103734439834025, "grad_norm": 8.891336441040039, "learning_rate": 3.3275703088981094e-05, "log_odds_chosen": 9.008024215698242, "log_odds_ratio": -0.0032351433765143156, "logits/chosen": -0.703883171081543, "logits/rejected": -0.689159631729126, "logps/chosen": -0.0022129842545837164, "logps/rejected": -1.6951110363006592, "loss": 0.9387, "nll_loss": 0.23434747755527496, "rewards/accuracies": 1.0, "rewards/chosen": -0.00022129842545837164, "rewards/margins": 0.16928979754447937, "rewards/rejected": -0.1695111095905304, "step": 5799 }, { "epoch": 4.011065006915629, "grad_norm": 6.8816142082214355, "learning_rate": 3.3271861072690954e-05, "log_odds_chosen": 9.651518821716309, "log_odds_ratio": -0.0003201818326488137, "logits/chosen": -0.559990644454956, "logits/rejected": -0.6841223239898682, "logps/chosen": -0.002246101386845112, "logps/rejected": -1.9939563274383545, "loss": 1.3129, "nll_loss": 0.32818740606307983, "rewards/accuracies": 1.0, "rewards/chosen": -0.00022461013577412814, "rewards/margins": 0.1991710364818573, "rewards/rejected": -0.19939564168453217, "step": 5800 }, { "epoch": 4.011756569847856, "grad_norm": 6.750901222229004, "learning_rate": 3.32680190564008e-05, "log_odds_chosen": 8.320624351501465, "log_odds_ratio": -0.0023899758234620094, "logits/chosen": -0.9084970355033875, "logits/rejected": -0.9268815517425537, "logps/chosen": -0.0070159039460122585, "logps/rejected": -1.546889066696167, "loss": 1.1501, "nll_loss": 0.2872742712497711, "rewards/accuracies": 1.0, "rewards/chosen": -0.0007015903829596937, "rewards/margins": 0.153987318277359, "rewards/rejected": -0.15468890964984894, "step": 5801 }, { "epoch": 4.012448132780083, "grad_norm": 13.624855995178223, "learning_rate": 3.326417704011065e-05, "log_odds_chosen": 10.405290603637695, "log_odds_ratio": -4.9031819798983634e-05, "logits/chosen": -0.9404025077819824, "logits/rejected": -1.0046405792236328, "logps/chosen": -0.00026822343352250755, "logps/rejected": -1.8819442987442017, "loss": 1.0397, "nll_loss": 0.25991424918174744, "rewards/accuracies": 1.0, "rewards/chosen": -2.6822342988452874e-05, "rewards/margins": 0.18816760182380676, "rewards/rejected": -0.1881944239139557, "step": 5802 }, { "epoch": 4.01313969571231, "grad_norm": 7.276597499847412, "learning_rate": 3.3260335023820504e-05, "log_odds_chosen": 9.873104095458984, "log_odds_ratio": -9.630187560105696e-05, "logits/chosen": -0.5927930474281311, "logits/rejected": -0.6485029458999634, "logps/chosen": -0.0012197827454656363, "logps/rejected": -2.201451063156128, "loss": 1.2459, "nll_loss": 0.3114704191684723, "rewards/accuracies": 1.0, "rewards/chosen": -0.0001219782789121382, "rewards/margins": 0.22002311050891876, "rewards/rejected": -0.2201451063156128, "step": 5803 }, { "epoch": 4.013831258644537, "grad_norm": 8.02657413482666, "learning_rate": 3.325649300753036e-05, "log_odds_chosen": 8.938360214233398, "log_odds_ratio": -0.0008153109229169786, "logits/chosen": -0.7673556804656982, "logits/rejected": -0.8766248226165771, "logps/chosen": -0.00693178316578269, "logps/rejected": -2.1472220420837402, "loss": 1.2568, "nll_loss": 0.3141286075115204, "rewards/accuracies": 1.0, "rewards/chosen": -0.0006931783864274621, "rewards/margins": 0.21402904391288757, "rewards/rejected": -0.21472221612930298, "step": 5804 }, { "epoch": 4.014522821576763, "grad_norm": 7.6537041664123535, "learning_rate": 3.32526509912402e-05, "log_odds_chosen": 7.112116813659668, "log_odds_ratio": -0.15588873624801636, "logits/chosen": -0.3076091408729553, "logits/rejected": -0.3353484272956848, "logps/chosen": -0.03844211995601654, "logps/rejected": -1.5237889289855957, "loss": 1.308, "nll_loss": 0.3114077150821686, "rewards/accuracies": 0.875, "rewards/chosen": -0.003844211809337139, "rewards/margins": 0.14853468537330627, "rewards/rejected": -0.1523789018392563, "step": 5805 }, { "epoch": 4.01521438450899, "grad_norm": 7.396170616149902, "learning_rate": 3.3248808974950055e-05, "log_odds_chosen": 8.05859375, "log_odds_ratio": -0.04836432635784149, "logits/chosen": -0.5385514497756958, "logits/rejected": -0.6036756634712219, "logps/chosen": -0.01102085318416357, "logps/rejected": -0.7705286145210266, "loss": 1.4619, "nll_loss": 0.36063531041145325, "rewards/accuracies": 1.0, "rewards/chosen": -0.0011020853416994214, "rewards/margins": 0.07595077902078629, "rewards/rejected": -0.07705286890268326, "step": 5806 }, { "epoch": 4.015905947441217, "grad_norm": 13.535194396972656, "learning_rate": 3.324496695865991e-05, "log_odds_chosen": 10.356252670288086, "log_odds_ratio": -8.396595512749627e-05, "logits/chosen": -0.6381219029426575, "logits/rejected": -0.7172715067863464, "logps/chosen": -0.00027148399385623634, "logps/rejected": -1.8285762071609497, "loss": 0.8793, "nll_loss": 0.2198178768157959, "rewards/accuracies": 1.0, "rewards/chosen": -2.7148398658027872e-05, "rewards/margins": 0.18283048272132874, "rewards/rejected": -0.18285763263702393, "step": 5807 }, { "epoch": 4.016597510373444, "grad_norm": 12.470870971679688, "learning_rate": 3.324112494236975e-05, "log_odds_chosen": 9.309225082397461, "log_odds_ratio": -0.004393730312585831, "logits/chosen": -0.423007607460022, "logits/rejected": -0.48332124948501587, "logps/chosen": -0.0026422408409416676, "logps/rejected": -1.7035892009735107, "loss": 1.0427, "nll_loss": 0.26024723052978516, "rewards/accuracies": 1.0, "rewards/chosen": -0.0002642240724526346, "rewards/margins": 0.17009469866752625, "rewards/rejected": -0.17035892605781555, "step": 5808 }, { "epoch": 4.017289073305671, "grad_norm": 8.032957077026367, "learning_rate": 3.323728292607961e-05, "log_odds_chosen": 8.703680038452148, "log_odds_ratio": -0.08522697538137436, "logits/chosen": -0.41288989782333374, "logits/rejected": -0.4818640947341919, "logps/chosen": -0.014834209345281124, "logps/rejected": -1.533931016921997, "loss": 1.2859, "nll_loss": 0.31296151876449585, "rewards/accuracies": 1.0, "rewards/chosen": -0.0014834211906418204, "rewards/margins": 0.15190967917442322, "rewards/rejected": -0.15339310467243195, "step": 5809 }, { "epoch": 4.017980636237898, "grad_norm": 5.716402053833008, "learning_rate": 3.323344090978946e-05, "log_odds_chosen": 8.23971176147461, "log_odds_ratio": -0.036382004618644714, "logits/chosen": 0.07608754932880402, "logits/rejected": 0.08648036420345306, "logps/chosen": -0.011090649291872978, "logps/rejected": -1.2002122402191162, "loss": 1.1303, "nll_loss": 0.27892497181892395, "rewards/accuracies": 1.0, "rewards/chosen": -0.0011090649059042335, "rewards/margins": 0.11891216039657593, "rewards/rejected": -0.12002123147249222, "step": 5810 }, { "epoch": 4.018672199170124, "grad_norm": 6.547496795654297, "learning_rate": 3.322959889349931e-05, "log_odds_chosen": 8.85584831237793, "log_odds_ratio": -0.08818444609642029, "logits/chosen": -0.5728781819343567, "logits/rejected": -0.6478579044342041, "logps/chosen": -0.015129598788917065, "logps/rejected": -1.615843415260315, "loss": 1.0228, "nll_loss": 0.2468700110912323, "rewards/accuracies": 0.875, "rewards/chosen": -0.0015129598323255777, "rewards/margins": 0.16007138788700104, "rewards/rejected": -0.16158434748649597, "step": 5811 }, { "epoch": 4.019363762102351, "grad_norm": 5.890070915222168, "learning_rate": 3.322575687720916e-05, "log_odds_chosen": 8.961607933044434, "log_odds_ratio": -0.0023988212924450636, "logits/chosen": -0.500460684299469, "logits/rejected": -0.5649879574775696, "logps/chosen": -0.0030070669017732143, "logps/rejected": -1.4147664308547974, "loss": 0.6675, "nll_loss": 0.16662628948688507, "rewards/accuracies": 1.0, "rewards/chosen": -0.00030070668435655534, "rewards/margins": 0.14117594063282013, "rewards/rejected": -0.14147664606571198, "step": 5812 }, { "epoch": 4.020055325034578, "grad_norm": 8.608002662658691, "learning_rate": 3.3221914860919015e-05, "log_odds_chosen": 10.015937805175781, "log_odds_ratio": -0.00011480056855361909, "logits/chosen": -0.38741636276245117, "logits/rejected": -0.47290775179862976, "logps/chosen": -0.000604795990511775, "logps/rejected": -2.022552013397217, "loss": 0.9456, "nll_loss": 0.23638921976089478, "rewards/accuracies": 1.0, "rewards/chosen": -6.047959686839022e-05, "rewards/margins": 0.2021946907043457, "rewards/rejected": -0.20225518941879272, "step": 5813 }, { "epoch": 4.020746887966805, "grad_norm": 8.891815185546875, "learning_rate": 3.321807284462886e-05, "log_odds_chosen": 10.402395248413086, "log_odds_ratio": -6.092392504797317e-05, "logits/chosen": -0.6218123435974121, "logits/rejected": -0.6601628065109253, "logps/chosen": -0.00022192316828295588, "logps/rejected": -1.955885648727417, "loss": 1.2569, "nll_loss": 0.31421294808387756, "rewards/accuracies": 1.0, "rewards/chosen": -2.219231828348711e-05, "rewards/margins": 0.19556638598442078, "rewards/rejected": -0.19558857381343842, "step": 5814 }, { "epoch": 4.021438450899032, "grad_norm": 6.3259100914001465, "learning_rate": 3.321423082833871e-05, "log_odds_chosen": 9.721456527709961, "log_odds_ratio": -0.0003540450125001371, "logits/chosen": -0.442880779504776, "logits/rejected": -0.5284356474876404, "logps/chosen": -0.0006333962082862854, "logps/rejected": -1.8140935897827148, "loss": 1.0699, "nll_loss": 0.2674439549446106, "rewards/accuracies": 1.0, "rewards/chosen": -6.333962664939463e-05, "rewards/margins": 0.18134601414203644, "rewards/rejected": -0.18140935897827148, "step": 5815 }, { "epoch": 4.022130013831259, "grad_norm": 10.469477653503418, "learning_rate": 3.3210388812048566e-05, "log_odds_chosen": 9.853558540344238, "log_odds_ratio": -0.00032846731483004987, "logits/chosen": -0.5319367051124573, "logits/rejected": -0.5036752223968506, "logps/chosen": -0.011169369332492352, "logps/rejected": -2.269057273864746, "loss": 0.9128, "nll_loss": 0.22815820574760437, "rewards/accuracies": 1.0, "rewards/chosen": -0.0011169369099661708, "rewards/margins": 0.22578881680965424, "rewards/rejected": -0.22690574824810028, "step": 5816 }, { "epoch": 4.022821576763485, "grad_norm": 12.238142013549805, "learning_rate": 3.320654679575841e-05, "log_odds_chosen": 10.812685012817383, "log_odds_ratio": -3.920509334420785e-05, "logits/chosen": -0.5341860055923462, "logits/rejected": -0.5553966760635376, "logps/chosen": -0.000269267096882686, "logps/rejected": -2.453526020050049, "loss": 1.8054, "nll_loss": 0.45134228467941284, "rewards/accuracies": 1.0, "rewards/chosen": -2.6926711143460125e-05, "rewards/margins": 0.24532568454742432, "rewards/rejected": -0.2453525960445404, "step": 5817 }, { "epoch": 4.023513139695712, "grad_norm": 9.472021102905273, "learning_rate": 3.320270477946827e-05, "log_odds_chosen": 10.345442771911621, "log_odds_ratio": -0.00016254739603027701, "logits/chosen": -0.6953545808792114, "logits/rejected": -0.7270474433898926, "logps/chosen": -0.002389824017882347, "logps/rejected": -2.3488526344299316, "loss": 1.2988, "nll_loss": 0.3246712386608124, "rewards/accuracies": 1.0, "rewards/chosen": -0.0002389824076090008, "rewards/margins": 0.2346462905406952, "rewards/rejected": -0.23488527536392212, "step": 5818 }, { "epoch": 4.024204702627939, "grad_norm": 9.914827346801758, "learning_rate": 3.3198862763178116e-05, "log_odds_chosen": 9.170671463012695, "log_odds_ratio": -0.0005998075939714909, "logits/chosen": -0.6479396820068359, "logits/rejected": -0.6544699668884277, "logps/chosen": -0.016290009021759033, "logps/rejected": -1.6676925420761108, "loss": 1.4749, "nll_loss": 0.3686674237251282, "rewards/accuracies": 1.0, "rewards/chosen": -0.0016290009953081608, "rewards/margins": 0.16514025628566742, "rewards/rejected": -0.16676926612854004, "step": 5819 }, { "epoch": 4.024896265560166, "grad_norm": 16.734146118164062, "learning_rate": 3.319502074688797e-05, "log_odds_chosen": 10.265254020690918, "log_odds_ratio": -4.8487512685824186e-05, "logits/chosen": -0.18163836002349854, "logits/rejected": -0.30264967679977417, "logps/chosen": -0.0008192628156393766, "logps/rejected": -2.293407917022705, "loss": 1.3015, "nll_loss": 0.3253812789916992, "rewards/accuracies": 1.0, "rewards/chosen": -8.192627865355462e-05, "rewards/margins": 0.22925885021686554, "rewards/rejected": -0.22934077680110931, "step": 5820 }, { "epoch": 4.025587828492393, "grad_norm": 8.877284049987793, "learning_rate": 3.319117873059782e-05, "log_odds_chosen": 10.271307945251465, "log_odds_ratio": -0.00017034000484272838, "logits/chosen": -0.3948308825492859, "logits/rejected": -0.4352070987224579, "logps/chosen": -0.00041239382699131966, "logps/rejected": -1.8108956813812256, "loss": 0.9203, "nll_loss": 0.23004809021949768, "rewards/accuracies": 1.0, "rewards/chosen": -4.1239381971536204e-05, "rewards/margins": 0.18104831874370575, "rewards/rejected": -0.18108955025672913, "step": 5821 }, { "epoch": 4.0262793914246195, "grad_norm": 9.338384628295898, "learning_rate": 3.3187336714307674e-05, "log_odds_chosen": 9.377090454101562, "log_odds_ratio": -0.0036353226751089096, "logits/chosen": -0.27123838663101196, "logits/rejected": -0.35256731510162354, "logps/chosen": -0.002619482111185789, "logps/rejected": -1.7505947351455688, "loss": 0.8744, "nll_loss": 0.21822936832904816, "rewards/accuracies": 1.0, "rewards/chosen": -0.00026194824022240937, "rewards/margins": 0.17479753494262695, "rewards/rejected": -0.1750594824552536, "step": 5822 }, { "epoch": 4.026970954356846, "grad_norm": 6.480832099914551, "learning_rate": 3.318349469801752e-05, "log_odds_chosen": 10.178642272949219, "log_odds_ratio": -9.12517643882893e-05, "logits/chosen": -0.6648082733154297, "logits/rejected": -0.702701210975647, "logps/chosen": -0.00019598891958594322, "logps/rejected": -1.6632938385009766, "loss": 1.1788, "nll_loss": 0.2946855127811432, "rewards/accuracies": 1.0, "rewards/chosen": -1.9598892322392203e-05, "rewards/margins": 0.16630978882312775, "rewards/rejected": -0.16632938385009766, "step": 5823 }, { "epoch": 4.027662517289073, "grad_norm": 7.462141513824463, "learning_rate": 3.317965268172737e-05, "log_odds_chosen": 8.131479263305664, "log_odds_ratio": -0.0034697859082370996, "logits/chosen": -0.7409053444862366, "logits/rejected": -0.7156566381454468, "logps/chosen": -0.0027547297067940235, "logps/rejected": -1.4278616905212402, "loss": 1.5324, "nll_loss": 0.38275811076164246, "rewards/accuracies": 1.0, "rewards/chosen": -0.0002754729939624667, "rewards/margins": 0.14251069724559784, "rewards/rejected": -0.1427861750125885, "step": 5824 }, { "epoch": 4.0283540802213, "grad_norm": 8.907909393310547, "learning_rate": 3.3175810665437224e-05, "log_odds_chosen": 9.18642807006836, "log_odds_ratio": -0.0003177436883561313, "logits/chosen": -0.6876662969589233, "logits/rejected": -0.6967380046844482, "logps/chosen": -0.0016029981197789311, "logps/rejected": -1.6288145780563354, "loss": 1.5022, "nll_loss": 0.37551790475845337, "rewards/accuracies": 1.0, "rewards/chosen": -0.0001602998236194253, "rewards/margins": 0.1627211570739746, "rewards/rejected": -0.16288146376609802, "step": 5825 }, { "epoch": 4.029045643153527, "grad_norm": 9.733176231384277, "learning_rate": 3.317196864914707e-05, "log_odds_chosen": 10.043136596679688, "log_odds_ratio": -0.00010084448149427772, "logits/chosen": -0.6539413928985596, "logits/rejected": -0.7563472986221313, "logps/chosen": -0.0002622072061058134, "logps/rejected": -1.8707082271575928, "loss": 1.3907, "nll_loss": 0.3476561903953552, "rewards/accuracies": 1.0, "rewards/chosen": -2.62207213381771e-05, "rewards/margins": 0.18704460561275482, "rewards/rejected": -0.187070831656456, "step": 5826 }, { "epoch": 4.029737206085754, "grad_norm": 5.350778102874756, "learning_rate": 3.316812663285693e-05, "log_odds_chosen": 9.771782875061035, "log_odds_ratio": -0.00010835661669261754, "logits/chosen": -0.5402613282203674, "logits/rejected": -0.6298444271087646, "logps/chosen": -0.00028526870300993323, "logps/rejected": -1.5779385566711426, "loss": 1.8248, "nll_loss": 0.45618361234664917, "rewards/accuracies": 1.0, "rewards/chosen": -2.8526872483780608e-05, "rewards/margins": 0.15776532888412476, "rewards/rejected": -0.15779386460781097, "step": 5827 }, { "epoch": 4.0304287690179805, "grad_norm": 6.235321998596191, "learning_rate": 3.3164284616566775e-05, "log_odds_chosen": 10.532633781433105, "log_odds_ratio": -3.9244259824045e-05, "logits/chosen": -0.2530810236930847, "logits/rejected": -0.2999076843261719, "logps/chosen": -0.0009787610033527017, "logps/rejected": -2.2996246814727783, "loss": 1.0636, "nll_loss": 0.26590120792388916, "rewards/accuracies": 1.0, "rewards/chosen": -9.78761090664193e-05, "rewards/margins": 0.22986459732055664, "rewards/rejected": -0.22996249794960022, "step": 5828 }, { "epoch": 4.031120331950207, "grad_norm": 12.791536331176758, "learning_rate": 3.316044260027663e-05, "log_odds_chosen": 10.55655288696289, "log_odds_ratio": -6.204926467034966e-05, "logits/chosen": -0.5250865817070007, "logits/rejected": -0.6933255791664124, "logps/chosen": -0.0002122572623193264, "logps/rejected": -1.961160659790039, "loss": 1.3719, "nll_loss": 0.34295907616615295, "rewards/accuracies": 1.0, "rewards/chosen": -2.122572550433688e-05, "rewards/margins": 0.1960948407649994, "rewards/rejected": -0.19611608982086182, "step": 5829 }, { "epoch": 4.031811894882434, "grad_norm": 75.29803466796875, "learning_rate": 3.315660058398648e-05, "log_odds_chosen": 9.58288288116455, "log_odds_ratio": -0.014156394638121128, "logits/chosen": -0.46651431918144226, "logits/rejected": -0.5524017214775085, "logps/chosen": -0.2065291553735733, "logps/rejected": -2.4342660903930664, "loss": 1.4847, "nll_loss": 0.36975815892219543, "rewards/accuracies": 1.0, "rewards/chosen": -0.02065291441977024, "rewards/margins": 0.2227737009525299, "rewards/rejected": -0.2434266209602356, "step": 5830 }, { "epoch": 4.032503457814661, "grad_norm": 10.947818756103516, "learning_rate": 3.315275856769633e-05, "log_odds_chosen": 10.092506408691406, "log_odds_ratio": -7.060384814394638e-05, "logits/chosen": -0.801364541053772, "logits/rejected": -0.8676729202270508, "logps/chosen": -0.00020597776165232062, "logps/rejected": -1.7199057340621948, "loss": 1.7281, "nll_loss": 0.4320223927497864, "rewards/accuracies": 1.0, "rewards/chosen": -2.059777580143418e-05, "rewards/margins": 0.17196998000144958, "rewards/rejected": -0.17199058830738068, "step": 5831 }, { "epoch": 4.033195020746888, "grad_norm": 5.659107685089111, "learning_rate": 3.314891655140618e-05, "log_odds_chosen": 9.809650421142578, "log_odds_ratio": -0.00014637268031947315, "logits/chosen": -0.37774717807769775, "logits/rejected": -0.39840513467788696, "logps/chosen": -0.0003810464695561677, "logps/rejected": -1.6779835224151611, "loss": 1.6287, "nll_loss": 0.40714868903160095, "rewards/accuracies": 1.0, "rewards/chosen": -3.810464477282949e-05, "rewards/margins": 0.16776025295257568, "rewards/rejected": -0.16779834032058716, "step": 5832 }, { "epoch": 4.033886583679115, "grad_norm": 5.58001708984375, "learning_rate": 3.314507453511603e-05, "log_odds_chosen": 9.410863876342773, "log_odds_ratio": -0.0003080420719925314, "logits/chosen": -0.025425251573324203, "logits/rejected": -0.09473808109760284, "logps/chosen": -0.00043381942668929696, "logps/rejected": -1.3813542127609253, "loss": 0.7886, "nll_loss": 0.19711832702159882, "rewards/accuracies": 1.0, "rewards/chosen": -4.338194048614241e-05, "rewards/margins": 0.1380920559167862, "rewards/rejected": -0.13813543319702148, "step": 5833 }, { "epoch": 4.0345781466113415, "grad_norm": 7.627755165100098, "learning_rate": 3.314123251882588e-05, "log_odds_chosen": 10.607280731201172, "log_odds_ratio": -0.0001868123799795285, "logits/chosen": -0.47707587480545044, "logits/rejected": -0.5821393728256226, "logps/chosen": -0.00018466573965270072, "logps/rejected": -2.233959674835205, "loss": 1.2688, "nll_loss": 0.3171829581260681, "rewards/accuracies": 1.0, "rewards/chosen": -1.8466576875653118e-05, "rewards/margins": 0.223377525806427, "rewards/rejected": -0.22339597344398499, "step": 5834 }, { "epoch": 4.035269709543568, "grad_norm": 10.557724952697754, "learning_rate": 3.313739050253573e-05, "log_odds_chosen": 10.566575050354004, "log_odds_ratio": -0.00015645605162717402, "logits/chosen": -0.575480580329895, "logits/rejected": -0.6633766293525696, "logps/chosen": -0.0013164678821340203, "logps/rejected": -1.9831597805023193, "loss": 1.5592, "nll_loss": 0.3897833228111267, "rewards/accuracies": 1.0, "rewards/chosen": -0.00013164679694455117, "rewards/margins": 0.1981843262910843, "rewards/rejected": -0.19831597805023193, "step": 5835 }, { "epoch": 4.035961272475795, "grad_norm": 11.694565773010254, "learning_rate": 3.313354848624559e-05, "log_odds_chosen": 10.140708923339844, "log_odds_ratio": -0.00014054967323318124, "logits/chosen": -1.0209980010986328, "logits/rejected": -1.130377173423767, "logps/chosen": -0.0005266097723506391, "logps/rejected": -1.9271914958953857, "loss": 1.2674, "nll_loss": 0.31684738397598267, "rewards/accuracies": 1.0, "rewards/chosen": -5.2660980145446956e-05, "rewards/margins": 0.19266650080680847, "rewards/rejected": -0.19271916151046753, "step": 5836 }, { "epoch": 4.036652835408022, "grad_norm": 10.414692878723145, "learning_rate": 3.312970646995543e-05, "log_odds_chosen": 9.67306900024414, "log_odds_ratio": -0.0002543226000852883, "logits/chosen": -0.4915074110031128, "logits/rejected": -0.49034228920936584, "logps/chosen": -0.006264523137360811, "logps/rejected": -2.669769287109375, "loss": 0.9577, "nll_loss": 0.23939737677574158, "rewards/accuracies": 1.0, "rewards/chosen": -0.0006264523253776133, "rewards/margins": 0.26635047793388367, "rewards/rejected": -0.266976922750473, "step": 5837 }, { "epoch": 4.037344398340249, "grad_norm": 8.162940979003906, "learning_rate": 3.3125864453665286e-05, "log_odds_chosen": 8.675409317016602, "log_odds_ratio": -0.0008966091554611921, "logits/chosen": -0.8568264245986938, "logits/rejected": -0.8570026755332947, "logps/chosen": -0.0007508368580602109, "logps/rejected": -1.2639950513839722, "loss": 1.5968, "nll_loss": 0.39911770820617676, "rewards/accuracies": 1.0, "rewards/chosen": -7.508369162678719e-05, "rewards/margins": 0.12632441520690918, "rewards/rejected": -0.12639950215816498, "step": 5838 }, { "epoch": 4.038035961272476, "grad_norm": 20.370845794677734, "learning_rate": 3.312202243737514e-05, "log_odds_chosen": 10.564475059509277, "log_odds_ratio": -4.5465276343747973e-05, "logits/chosen": -0.6202433109283447, "logits/rejected": -0.6588761210441589, "logps/chosen": -0.00035935192136093974, "logps/rejected": -2.3072285652160645, "loss": 1.3037, "nll_loss": 0.325916588306427, "rewards/accuracies": 1.0, "rewards/chosen": -3.593519068090245e-05, "rewards/margins": 0.23068693280220032, "rewards/rejected": -0.2307228446006775, "step": 5839 }, { "epoch": 4.0387275242047025, "grad_norm": 9.245139122009277, "learning_rate": 3.311818042108499e-05, "log_odds_chosen": 10.530648231506348, "log_odds_ratio": -4.213380452711135e-05, "logits/chosen": -0.6391951441764832, "logits/rejected": -0.6505733728408813, "logps/chosen": -0.00027310033328831196, "logps/rejected": -2.1863694190979004, "loss": 0.7774, "nll_loss": 0.19433549046516418, "rewards/accuracies": 1.0, "rewards/chosen": -2.7310034056426957e-05, "rewards/margins": 0.21860966086387634, "rewards/rejected": -0.21863695979118347, "step": 5840 }, { "epoch": 4.039419087136929, "grad_norm": 8.130341529846191, "learning_rate": 3.3114338404794836e-05, "log_odds_chosen": 10.152312278747559, "log_odds_ratio": -0.0006681890808977187, "logits/chosen": -0.6517927050590515, "logits/rejected": -0.7043352127075195, "logps/chosen": -0.0008484551799483597, "logps/rejected": -1.9009888172149658, "loss": 0.9638, "nll_loss": 0.240884929895401, "rewards/accuracies": 1.0, "rewards/chosen": -8.484552381560206e-05, "rewards/margins": 0.1900140345096588, "rewards/rejected": -0.19009888172149658, "step": 5841 }, { "epoch": 4.040110650069156, "grad_norm": 5.752309799194336, "learning_rate": 3.311049638850469e-05, "log_odds_chosen": 7.149251937866211, "log_odds_ratio": -0.01412150077521801, "logits/chosen": -0.5170456171035767, "logits/rejected": -0.5047367215156555, "logps/chosen": -0.00545818917453289, "logps/rejected": -1.265357494354248, "loss": 1.0664, "nll_loss": 0.26519855856895447, "rewards/accuracies": 1.0, "rewards/chosen": -0.0005458188825286925, "rewards/margins": 0.12598992884159088, "rewards/rejected": -0.12653574347496033, "step": 5842 }, { "epoch": 4.040802213001383, "grad_norm": 6.732954978942871, "learning_rate": 3.310665437221454e-05, "log_odds_chosen": 7.924806118011475, "log_odds_ratio": -0.004892845172435045, "logits/chosen": -0.7184340357780457, "logits/rejected": -0.6960312128067017, "logps/chosen": -0.00998085830360651, "logps/rejected": -1.0627918243408203, "loss": 1.4859, "nll_loss": 0.37099653482437134, "rewards/accuracies": 1.0, "rewards/chosen": -0.0009980859467759728, "rewards/margins": 0.10528109222650528, "rewards/rejected": -0.10627917945384979, "step": 5843 }, { "epoch": 4.04149377593361, "grad_norm": 9.003263473510742, "learning_rate": 3.310281235592439e-05, "log_odds_chosen": 8.91492748260498, "log_odds_ratio": -0.004036875907331705, "logits/chosen": -0.5508736371994019, "logits/rejected": -0.5939282178878784, "logps/chosen": -0.004110867623239756, "logps/rejected": -1.9761884212493896, "loss": 1.5571, "nll_loss": 0.38886570930480957, "rewards/accuracies": 1.0, "rewards/chosen": -0.0004110867448616773, "rewards/margins": 0.1972077488899231, "rewards/rejected": -0.19761884212493896, "step": 5844 }, { "epoch": 4.042185338865837, "grad_norm": 8.566186904907227, "learning_rate": 3.3098970339634246e-05, "log_odds_chosen": 9.330158233642578, "log_odds_ratio": -0.00024974337429739535, "logits/chosen": -0.7203190326690674, "logits/rejected": -0.6908995509147644, "logps/chosen": -0.0022361972369253635, "logps/rejected": -1.909590721130371, "loss": 1.2201, "nll_loss": 0.30500558018684387, "rewards/accuracies": 1.0, "rewards/chosen": -0.0002236197324236855, "rewards/margins": 0.19073545932769775, "rewards/rejected": -0.19095909595489502, "step": 5845 }, { "epoch": 4.0428769017980635, "grad_norm": 9.80972671508789, "learning_rate": 3.309512832334409e-05, "log_odds_chosen": 9.790373802185059, "log_odds_ratio": -0.0003783097490668297, "logits/chosen": -0.6303945183753967, "logits/rejected": -0.698731541633606, "logps/chosen": -0.000483025040011853, "logps/rejected": -1.6333590745925903, "loss": 1.0825, "nll_loss": 0.27057844400405884, "rewards/accuracies": 1.0, "rewards/chosen": -4.8302506911568344e-05, "rewards/margins": 0.16328760981559753, "rewards/rejected": -0.1633358895778656, "step": 5846 }, { "epoch": 4.04356846473029, "grad_norm": 11.211024284362793, "learning_rate": 3.3091286307053944e-05, "log_odds_chosen": 10.19981861114502, "log_odds_ratio": -0.0003373716026544571, "logits/chosen": -0.548815131187439, "logits/rejected": -0.6491366624832153, "logps/chosen": -0.0005771011346951127, "logps/rejected": -1.7962886095046997, "loss": 1.2562, "nll_loss": 0.3140169382095337, "rewards/accuracies": 1.0, "rewards/chosen": -5.77101091039367e-05, "rewards/margins": 0.17957115173339844, "rewards/rejected": -0.17962884902954102, "step": 5847 }, { "epoch": 4.044260027662517, "grad_norm": 6.113912105560303, "learning_rate": 3.3087444290763796e-05, "log_odds_chosen": 9.33714485168457, "log_odds_ratio": -0.0007019840413704515, "logits/chosen": -0.4767472743988037, "logits/rejected": -0.5687282681465149, "logps/chosen": -0.0005971384234726429, "logps/rejected": -1.553364872932434, "loss": 1.2813, "nll_loss": 0.3202533423900604, "rewards/accuracies": 1.0, "rewards/chosen": -5.971384234726429e-05, "rewards/margins": 0.15527677536010742, "rewards/rejected": -0.15533646941184998, "step": 5848 }, { "epoch": 4.044951590594744, "grad_norm": 13.186535835266113, "learning_rate": 3.308360227447365e-05, "log_odds_chosen": 9.782808303833008, "log_odds_ratio": -0.0001403852365911007, "logits/chosen": -0.42614397406578064, "logits/rejected": -0.4612559676170349, "logps/chosen": -0.0003595305315684527, "logps/rejected": -1.6214947700500488, "loss": 1.3976, "nll_loss": 0.3493949770927429, "rewards/accuracies": 1.0, "rewards/chosen": -3.595305315684527e-05, "rewards/margins": 0.16211353242397308, "rewards/rejected": -0.16214948892593384, "step": 5849 }, { "epoch": 4.045643153526971, "grad_norm": 8.229986190795898, "learning_rate": 3.3079760258183495e-05, "log_odds_chosen": 9.515350341796875, "log_odds_ratio": -0.000164119090186432, "logits/chosen": -0.2830381393432617, "logits/rejected": -0.32949697971343994, "logps/chosen": -0.013506095856428146, "logps/rejected": -2.190258026123047, "loss": 0.9495, "nll_loss": 0.23736843466758728, "rewards/accuracies": 1.0, "rewards/chosen": -0.0013506095856428146, "rewards/margins": 0.21767520904541016, "rewards/rejected": -0.21902580559253693, "step": 5850 }, { "epoch": 4.046334716459198, "grad_norm": 10.434491157531738, "learning_rate": 3.307591824189335e-05, "log_odds_chosen": 9.21742057800293, "log_odds_ratio": -0.013323817402124405, "logits/chosen": -0.29416847229003906, "logits/rejected": -0.32375025749206543, "logps/chosen": -0.004796840250492096, "logps/rejected": -1.5443867444992065, "loss": 1.422, "nll_loss": 0.3541748523712158, "rewards/accuracies": 1.0, "rewards/chosen": -0.0004796840075869113, "rewards/margins": 0.1539589762687683, "rewards/rejected": -0.15443867444992065, "step": 5851 }, { "epoch": 4.0470262793914245, "grad_norm": 11.886966705322266, "learning_rate": 3.30720762256032e-05, "log_odds_chosen": 9.648627281188965, "log_odds_ratio": -0.00014911373727954924, "logits/chosen": -0.6481659412384033, "logits/rejected": -0.7246516346931458, "logps/chosen": -0.009942540898919106, "logps/rejected": -1.962467908859253, "loss": 1.2039, "nll_loss": 0.3009604513645172, "rewards/accuracies": 1.0, "rewards/chosen": -0.0009942540200427175, "rewards/margins": 0.19525253772735596, "rewards/rejected": -0.19624680280685425, "step": 5852 }, { "epoch": 4.047717842323651, "grad_norm": 8.506421089172363, "learning_rate": 3.3068234209313045e-05, "log_odds_chosen": 10.214273452758789, "log_odds_ratio": -9.400276758242399e-05, "logits/chosen": -0.1977856159210205, "logits/rejected": -0.24321135878562927, "logps/chosen": -0.00024938437854871154, "logps/rejected": -1.7478985786437988, "loss": 0.8723, "nll_loss": 0.21805506944656372, "rewards/accuracies": 1.0, "rewards/chosen": -2.493843567208387e-05, "rewards/margins": 0.17476493120193481, "rewards/rejected": -0.17478986084461212, "step": 5853 }, { "epoch": 4.048409405255878, "grad_norm": 13.32843017578125, "learning_rate": 3.30643921930229e-05, "log_odds_chosen": 10.405963897705078, "log_odds_ratio": -0.0002303352957824245, "logits/chosen": -0.41388577222824097, "logits/rejected": -0.4686071276664734, "logps/chosen": -0.00018964394985232502, "logps/rejected": -2.0773470401763916, "loss": 1.3127, "nll_loss": 0.32814282178878784, "rewards/accuracies": 1.0, "rewards/chosen": -1.896439425763674e-05, "rewards/margins": 0.20771574974060059, "rewards/rejected": -0.20773470401763916, "step": 5854 }, { "epoch": 4.049100968188105, "grad_norm": 13.760919570922852, "learning_rate": 3.306055017673275e-05, "log_odds_chosen": 9.816307067871094, "log_odds_ratio": -0.0015375094953924417, "logits/chosen": -0.34091717004776, "logits/rejected": -0.41033634543418884, "logps/chosen": -0.0015424349112436175, "logps/rejected": -1.463100790977478, "loss": 1.694, "nll_loss": 0.42334362864494324, "rewards/accuracies": 1.0, "rewards/chosen": -0.00015424350567627698, "rewards/margins": 0.14615583419799805, "rewards/rejected": -0.14631007611751556, "step": 5855 }, { "epoch": 4.049792531120332, "grad_norm": 5.521707057952881, "learning_rate": 3.30567081604426e-05, "log_odds_chosen": 8.408859252929688, "log_odds_ratio": -0.0010520406067371368, "logits/chosen": -0.24230000376701355, "logits/rejected": -0.22912943363189697, "logps/chosen": -0.0007799810264259577, "logps/rejected": -1.02225661277771, "loss": 1.505, "nll_loss": 0.37614238262176514, "rewards/accuracies": 1.0, "rewards/chosen": -7.799810555297881e-05, "rewards/margins": 0.102147676050663, "rewards/rejected": -0.10222566872835159, "step": 5856 }, { "epoch": 4.050484094052559, "grad_norm": 8.019282341003418, "learning_rate": 3.305286614415245e-05, "log_odds_chosen": 9.622739791870117, "log_odds_ratio": -0.0003253653703723103, "logits/chosen": -0.6645296216011047, "logits/rejected": -0.6519110798835754, "logps/chosen": -0.0003871311782859266, "logps/rejected": -2.032543182373047, "loss": 0.873, "nll_loss": 0.21822920441627502, "rewards/accuracies": 1.0, "rewards/chosen": -3.8713122194167227e-05, "rewards/margins": 0.20321562886238098, "rewards/rejected": -0.2032543271780014, "step": 5857 }, { "epoch": 4.051175656984785, "grad_norm": 7.612847328186035, "learning_rate": 3.304902412786231e-05, "log_odds_chosen": 9.509521484375, "log_odds_ratio": -0.0007174806669354439, "logits/chosen": -0.18259556591510773, "logits/rejected": -0.18433810770511627, "logps/chosen": -0.0008035643259063363, "logps/rejected": -1.6824941635131836, "loss": 1.0933, "nll_loss": 0.2732608914375305, "rewards/accuracies": 1.0, "rewards/chosen": -8.035643259063363e-05, "rewards/margins": 0.1681690663099289, "rewards/rejected": -0.16824942827224731, "step": 5858 }, { "epoch": 4.051867219917012, "grad_norm": 10.871746063232422, "learning_rate": 3.304518211157215e-05, "log_odds_chosen": 9.347953796386719, "log_odds_ratio": -0.0002416951465420425, "logits/chosen": -0.5143193006515503, "logits/rejected": -0.6148710250854492, "logps/chosen": -0.0004571893368847668, "logps/rejected": -1.458450198173523, "loss": 1.7587, "nll_loss": 0.43964099884033203, "rewards/accuracies": 1.0, "rewards/chosen": -4.5718930778093636e-05, "rewards/margins": 0.14579930901527405, "rewards/rejected": -0.14584502577781677, "step": 5859 }, { "epoch": 4.052558782849239, "grad_norm": 9.969888687133789, "learning_rate": 3.3041340095282005e-05, "log_odds_chosen": 10.789196968078613, "log_odds_ratio": -7.73576321080327e-05, "logits/chosen": -0.6059550642967224, "logits/rejected": -0.656543493270874, "logps/chosen": -0.0004085530526936054, "logps/rejected": -2.321756601333618, "loss": 1.2811, "nll_loss": 0.32025599479675293, "rewards/accuracies": 1.0, "rewards/chosen": -4.085530963493511e-05, "rewards/margins": 0.23213481903076172, "rewards/rejected": -0.23217566311359406, "step": 5860 }, { "epoch": 4.053250345781466, "grad_norm": 6.3312530517578125, "learning_rate": 3.303749807899186e-05, "log_odds_chosen": 10.216769218444824, "log_odds_ratio": -0.0002273907302878797, "logits/chosen": -0.5803669691085815, "logits/rejected": -0.7455939650535583, "logps/chosen": -0.00029386900132521987, "logps/rejected": -2.0365443229675293, "loss": 1.4209, "nll_loss": 0.35520580410957336, "rewards/accuracies": 1.0, "rewards/chosen": -2.9386899768724106e-05, "rewards/margins": 0.20362505316734314, "rewards/rejected": -0.2036544531583786, "step": 5861 }, { "epoch": 4.053941908713693, "grad_norm": 9.015466690063477, "learning_rate": 3.3033656062701704e-05, "log_odds_chosen": 10.769645690917969, "log_odds_ratio": -3.3408618037356064e-05, "logits/chosen": -0.477365106344223, "logits/rejected": -0.5517987608909607, "logps/chosen": -0.00012404685548972338, "logps/rejected": -1.8976612091064453, "loss": 0.952, "nll_loss": 0.2380056381225586, "rewards/accuracies": 1.0, "rewards/chosen": -1.2404685548972338e-05, "rewards/margins": 0.1897537112236023, "rewards/rejected": -0.18976612389087677, "step": 5862 }, { "epoch": 4.05463347164592, "grad_norm": 7.8228912353515625, "learning_rate": 3.3029814046411556e-05, "log_odds_chosen": 10.78010368347168, "log_odds_ratio": -2.2395632186089642e-05, "logits/chosen": -0.5051406025886536, "logits/rejected": -0.52671217918396, "logps/chosen": -0.00011659066512947902, "logps/rejected": -1.7614760398864746, "loss": 0.78, "nll_loss": 0.1949884295463562, "rewards/accuracies": 1.0, "rewards/chosen": -1.165906542155426e-05, "rewards/margins": 0.17613595724105835, "rewards/rejected": -0.17614760994911194, "step": 5863 }, { "epoch": 4.055325034578146, "grad_norm": 7.346597671508789, "learning_rate": 3.302597203012141e-05, "log_odds_chosen": 10.719219207763672, "log_odds_ratio": -8.285381773021072e-05, "logits/chosen": -0.5942940711975098, "logits/rejected": -0.6187726259231567, "logps/chosen": -0.0016607241705060005, "logps/rejected": -2.183518886566162, "loss": 1.2496, "nll_loss": 0.3124021887779236, "rewards/accuracies": 1.0, "rewards/chosen": -0.00016607240831945091, "rewards/margins": 0.21818581223487854, "rewards/rejected": -0.21835188567638397, "step": 5864 }, { "epoch": 4.056016597510373, "grad_norm": 20.964130401611328, "learning_rate": 3.302213001383126e-05, "log_odds_chosen": 7.885706901550293, "log_odds_ratio": -0.8424516916275024, "logits/chosen": -0.69463050365448, "logits/rejected": -0.7300902009010315, "logps/chosen": -0.08962702751159668, "logps/rejected": -1.2243645191192627, "loss": 1.5402, "nll_loss": 0.300813227891922, "rewards/accuracies": 0.875, "rewards/chosen": -0.008962703868746758, "rewards/margins": 0.11347375810146332, "rewards/rejected": -0.12243645638227463, "step": 5865 }, { "epoch": 4.0567081604426, "grad_norm": 10.524365425109863, "learning_rate": 3.3018287997541107e-05, "log_odds_chosen": 10.204052925109863, "log_odds_ratio": -0.0017390053253620863, "logits/chosen": -0.6175505518913269, "logits/rejected": -0.7030659317970276, "logps/chosen": -0.010915473103523254, "logps/rejected": -2.2788500785827637, "loss": 1.283, "nll_loss": 0.3205869197845459, "rewards/accuracies": 1.0, "rewards/chosen": -0.0010915474267676473, "rewards/margins": 0.22679348289966583, "rewards/rejected": -0.22788502275943756, "step": 5866 }, { "epoch": 4.057399723374827, "grad_norm": 7.630101680755615, "learning_rate": 3.3014445981250966e-05, "log_odds_chosen": 10.327079772949219, "log_odds_ratio": -4.9537731683813035e-05, "logits/chosen": -0.6859206557273865, "logits/rejected": -0.7285531163215637, "logps/chosen": -0.00025954050943255424, "logps/rejected": -1.7925751209259033, "loss": 0.9484, "nll_loss": 0.2370968908071518, "rewards/accuracies": 1.0, "rewards/chosen": -2.595405385363847e-05, "rewards/margins": 0.17923158407211304, "rewards/rejected": -0.17925751209259033, "step": 5867 }, { "epoch": 4.058091286307054, "grad_norm": 7.37345027923584, "learning_rate": 3.301060396496081e-05, "log_odds_chosen": 8.724294662475586, "log_odds_ratio": -0.0013263337314128876, "logits/chosen": -0.7585796117782593, "logits/rejected": -0.7691759467124939, "logps/chosen": -0.012353342026472092, "logps/rejected": -1.8685768842697144, "loss": 1.0109, "nll_loss": 0.25259870290756226, "rewards/accuracies": 1.0, "rewards/chosen": -0.0012353342026472092, "rewards/margins": 0.18562236428260803, "rewards/rejected": -0.1868577003479004, "step": 5868 }, { "epoch": 4.058782849239281, "grad_norm": 8.354995727539062, "learning_rate": 3.3006761948670664e-05, "log_odds_chosen": 8.951905250549316, "log_odds_ratio": -0.005012372508645058, "logits/chosen": -0.4756959080696106, "logits/rejected": -0.6075941324234009, "logps/chosen": -0.01927120052278042, "logps/rejected": -1.5122439861297607, "loss": 1.3131, "nll_loss": 0.32777372002601624, "rewards/accuracies": 1.0, "rewards/chosen": -0.0019271199125796556, "rewards/margins": 0.14929728209972382, "rewards/rejected": -0.15122440457344055, "step": 5869 }, { "epoch": 4.059474412171507, "grad_norm": 7.3892107009887695, "learning_rate": 3.3002919932380516e-05, "log_odds_chosen": 8.95905590057373, "log_odds_ratio": -0.000225025272811763, "logits/chosen": -0.27506279945373535, "logits/rejected": -0.28999996185302734, "logps/chosen": -0.0003333079512231052, "logps/rejected": -1.255396842956543, "loss": 1.0496, "nll_loss": 0.26238468289375305, "rewards/accuracies": 1.0, "rewards/chosen": -3.333079075673595e-05, "rewards/margins": 0.12550634145736694, "rewards/rejected": -0.12553967535495758, "step": 5870 }, { "epoch": 4.060165975103734, "grad_norm": 28.17425537109375, "learning_rate": 3.299907791609036e-05, "log_odds_chosen": 9.628087997436523, "log_odds_ratio": -0.00012001794675597921, "logits/chosen": -0.43030136823654175, "logits/rejected": -0.4921196699142456, "logps/chosen": -0.0012534643756225705, "logps/rejected": -2.3265280723571777, "loss": 1.735, "nll_loss": 0.433744877576828, "rewards/accuracies": 1.0, "rewards/chosen": -0.00012534644338302314, "rewards/margins": 0.2325274646282196, "rewards/rejected": -0.23265281319618225, "step": 5871 }, { "epoch": 4.060857538035961, "grad_norm": 9.745065689086914, "learning_rate": 3.2995235899800214e-05, "log_odds_chosen": 10.053963661193848, "log_odds_ratio": -0.0008054873323999345, "logits/chosen": -0.353712797164917, "logits/rejected": -0.3067231774330139, "logps/chosen": -0.006351853255182505, "logps/rejected": -2.8184359073638916, "loss": 1.1014, "nll_loss": 0.27525702118873596, "rewards/accuracies": 1.0, "rewards/chosen": -0.0006351853371597826, "rewards/margins": 0.2812083959579468, "rewards/rejected": -0.2818436026573181, "step": 5872 }, { "epoch": 4.061549100968188, "grad_norm": 9.960372924804688, "learning_rate": 3.299139388351007e-05, "log_odds_chosen": 9.378376007080078, "log_odds_ratio": -0.000319282291457057, "logits/chosen": -0.5158789157867432, "logits/rejected": -0.47944214940071106, "logps/chosen": -0.003047320758923888, "logps/rejected": -1.903006911277771, "loss": 0.9215, "nll_loss": 0.23033401370048523, "rewards/accuracies": 1.0, "rewards/chosen": -0.0003047320933546871, "rewards/margins": 0.18999595940113068, "rewards/rejected": -0.19030068814754486, "step": 5873 }, { "epoch": 4.062240663900415, "grad_norm": 8.335897445678711, "learning_rate": 3.298755186721992e-05, "log_odds_chosen": 9.377771377563477, "log_odds_ratio": -0.0021932560484856367, "logits/chosen": -0.4992346465587616, "logits/rejected": -0.5463720560073853, "logps/chosen": -0.007180570159107447, "logps/rejected": -1.7047597169876099, "loss": 0.8746, "nll_loss": 0.21843618154525757, "rewards/accuracies": 1.0, "rewards/chosen": -0.000718057039193809, "rewards/margins": 0.1697579324245453, "rewards/rejected": -0.17047598958015442, "step": 5874 }, { "epoch": 4.0629322268326415, "grad_norm": 12.052624702453613, "learning_rate": 3.2983709850929765e-05, "log_odds_chosen": 9.854325294494629, "log_odds_ratio": -0.000265687849605456, "logits/chosen": -0.3645329773426056, "logits/rejected": -0.40780341625213623, "logps/chosen": -0.0005900555406697094, "logps/rejected": -1.7319858074188232, "loss": 1.2671, "nll_loss": 0.3167441487312317, "rewards/accuracies": 1.0, "rewards/chosen": -5.900555333937518e-05, "rewards/margins": 0.17313960194587708, "rewards/rejected": -0.1731986105442047, "step": 5875 }, { "epoch": 4.063623789764868, "grad_norm": 7.595922470092773, "learning_rate": 3.2979867834639624e-05, "log_odds_chosen": 10.499154090881348, "log_odds_ratio": -3.721401299117133e-05, "logits/chosen": -0.5955951809883118, "logits/rejected": -0.5733447074890137, "logps/chosen": -0.00019262160640209913, "logps/rejected": -1.7049528360366821, "loss": 0.8514, "nll_loss": 0.21284781396389008, "rewards/accuracies": 1.0, "rewards/chosen": -1.9262162822997198e-05, "rewards/margins": 0.1704760193824768, "rewards/rejected": -0.17049528658390045, "step": 5876 }, { "epoch": 4.064315352697095, "grad_norm": 7.582157611846924, "learning_rate": 3.297602581834947e-05, "log_odds_chosen": 9.53189468383789, "log_odds_ratio": -0.0004492271691560745, "logits/chosen": -0.46577298641204834, "logits/rejected": -0.4716201722621918, "logps/chosen": -0.0042579807341098785, "logps/rejected": -2.3687758445739746, "loss": 1.5047, "nll_loss": 0.3761416971683502, "rewards/accuracies": 1.0, "rewards/chosen": -0.00042579806176945567, "rewards/margins": 0.23645178973674774, "rewards/rejected": -0.23687759041786194, "step": 5877 }, { "epoch": 4.065006915629322, "grad_norm": 11.61339282989502, "learning_rate": 3.297218380205932e-05, "log_odds_chosen": 8.871149063110352, "log_odds_ratio": -0.003240432823076844, "logits/chosen": -0.18867714703083038, "logits/rejected": -0.22036635875701904, "logps/chosen": -0.0015413928776979446, "logps/rejected": -1.6997482776641846, "loss": 1.182, "nll_loss": 0.2951643466949463, "rewards/accuracies": 1.0, "rewards/chosen": -0.00015413928485941142, "rewards/margins": 0.16982069611549377, "rewards/rejected": -0.16997483372688293, "step": 5878 }, { "epoch": 4.065698478561549, "grad_norm": 8.902300834655762, "learning_rate": 3.2968341785769175e-05, "log_odds_chosen": 9.484688758850098, "log_odds_ratio": -0.0074333013035357, "logits/chosen": -0.5499727725982666, "logits/rejected": -0.6351827383041382, "logps/chosen": -0.003113050712272525, "logps/rejected": -1.3787400722503662, "loss": 0.7673, "nll_loss": 0.1910793036222458, "rewards/accuracies": 1.0, "rewards/chosen": -0.00031130510615184903, "rewards/margins": 0.13756272196769714, "rewards/rejected": -0.13787400722503662, "step": 5879 }, { "epoch": 4.066390041493776, "grad_norm": 8.004378318786621, "learning_rate": 3.296449976947902e-05, "log_odds_chosen": 10.061744689941406, "log_odds_ratio": -8.924882422434166e-05, "logits/chosen": -0.5520544052124023, "logits/rejected": -0.6138145327568054, "logps/chosen": -0.00017678551375865936, "logps/rejected": -1.2566429376602173, "loss": 1.1891, "nll_loss": 0.29726946353912354, "rewards/accuracies": 1.0, "rewards/chosen": -1.767855246725958e-05, "rewards/margins": 0.12564662098884583, "rewards/rejected": -0.12566430866718292, "step": 5880 }, { "epoch": 4.0670816044260025, "grad_norm": 5.754605770111084, "learning_rate": 3.296065775318887e-05, "log_odds_chosen": 9.56634521484375, "log_odds_ratio": -0.00027094371034763753, "logits/chosen": -0.3173554241657257, "logits/rejected": -0.4609263837337494, "logps/chosen": -0.00039389560697600245, "logps/rejected": -1.7693027257919312, "loss": 0.8986, "nll_loss": 0.22461257874965668, "rewards/accuracies": 1.0, "rewards/chosen": -3.9389560697600245e-05, "rewards/margins": 0.17689087986946106, "rewards/rejected": -0.1769302785396576, "step": 5881 }, { "epoch": 4.067773167358229, "grad_norm": 11.797194480895996, "learning_rate": 3.2956815736898725e-05, "log_odds_chosen": 10.15972900390625, "log_odds_ratio": -7.304631435545161e-05, "logits/chosen": -0.23659618198871613, "logits/rejected": -0.38847583532333374, "logps/chosen": -0.00039499488775618374, "logps/rejected": -2.1488943099975586, "loss": 1.0133, "nll_loss": 0.2533252239227295, "rewards/accuracies": 1.0, "rewards/chosen": -3.949948586523533e-05, "rewards/margins": 0.21484996378421783, "rewards/rejected": -0.21488946676254272, "step": 5882 }, { "epoch": 4.068464730290456, "grad_norm": 13.173077583312988, "learning_rate": 3.295297372060858e-05, "log_odds_chosen": 9.202802658081055, "log_odds_ratio": -0.0028482081834226847, "logits/chosen": -0.4829055368900299, "logits/rejected": -0.6257842779159546, "logps/chosen": -0.02677040360867977, "logps/rejected": -1.9920315742492676, "loss": 1.2244, "nll_loss": 0.30581286549568176, "rewards/accuracies": 1.0, "rewards/chosen": -0.0026770401746034622, "rewards/margins": 0.19652612507343292, "rewards/rejected": -0.19920317828655243, "step": 5883 }, { "epoch": 4.069156293222683, "grad_norm": 6.314594268798828, "learning_rate": 3.2949131704318423e-05, "log_odds_chosen": 10.545787811279297, "log_odds_ratio": -0.00012237382179591805, "logits/chosen": -0.338174968957901, "logits/rejected": -0.5499405860900879, "logps/chosen": -0.0005069324979558587, "logps/rejected": -2.1837666034698486, "loss": 0.7223, "nll_loss": 0.1805533468723297, "rewards/accuracies": 1.0, "rewards/chosen": -5.069325561635196e-05, "rewards/margins": 0.21832597255706787, "rewards/rejected": -0.21837666630744934, "step": 5884 }, { "epoch": 4.06984785615491, "grad_norm": 8.39501667022705, "learning_rate": 3.294528968802828e-05, "log_odds_chosen": 8.74677848815918, "log_odds_ratio": -0.07410023361444473, "logits/chosen": -0.29483741521835327, "logits/rejected": -0.35221293568611145, "logps/chosen": -0.013385063037276268, "logps/rejected": -1.4697821140289307, "loss": 1.2343, "nll_loss": 0.3011668920516968, "rewards/accuracies": 1.0, "rewards/chosen": -0.0013385062338784337, "rewards/margins": 0.14563970267772675, "rewards/rejected": -0.1469782143831253, "step": 5885 }, { "epoch": 4.070539419087137, "grad_norm": 9.42487907409668, "learning_rate": 3.294144767173813e-05, "log_odds_chosen": 10.418317794799805, "log_odds_ratio": -0.00018019750132225454, "logits/chosen": -0.2804778814315796, "logits/rejected": -0.26137036085128784, "logps/chosen": -0.003630939172580838, "logps/rejected": -2.4958736896514893, "loss": 0.998, "nll_loss": 0.24948766827583313, "rewards/accuracies": 1.0, "rewards/chosen": -0.0003630939172580838, "rewards/margins": 0.24922429025173187, "rewards/rejected": -0.24958738684654236, "step": 5886 }, { "epoch": 4.0712309820193635, "grad_norm": 7.741333484649658, "learning_rate": 3.293760565544798e-05, "log_odds_chosen": 8.39094352722168, "log_odds_ratio": -0.04762697592377663, "logits/chosen": -0.5252317786216736, "logits/rejected": -0.586992621421814, "logps/chosen": -0.037948936223983765, "logps/rejected": -1.6404378414154053, "loss": 0.9747, "nll_loss": 0.2389063537120819, "rewards/accuracies": 1.0, "rewards/chosen": -0.00379489385522902, "rewards/margins": 0.16024890542030334, "rewards/rejected": -0.16404379904270172, "step": 5887 }, { "epoch": 4.07192254495159, "grad_norm": 9.15391731262207, "learning_rate": 3.293376363915783e-05, "log_odds_chosen": 9.346750259399414, "log_odds_ratio": -0.06653932482004166, "logits/chosen": -0.4693371057510376, "logits/rejected": -0.535291314125061, "logps/chosen": -0.014121782034635544, "logps/rejected": -1.8337665796279907, "loss": 1.1923, "nll_loss": 0.29142358899116516, "rewards/accuracies": 1.0, "rewards/chosen": -0.0014121782733127475, "rewards/margins": 0.1819644719362259, "rewards/rejected": -0.18337665498256683, "step": 5888 }, { "epoch": 4.072614107883817, "grad_norm": 7.018202304840088, "learning_rate": 3.292992162286768e-05, "log_odds_chosen": 9.713285446166992, "log_odds_ratio": -0.00047497553168796003, "logits/chosen": -0.6435208916664124, "logits/rejected": -0.6868486404418945, "logps/chosen": -0.0004327491042204201, "logps/rejected": -1.6850796937942505, "loss": 0.8882, "nll_loss": 0.22199746966362, "rewards/accuracies": 1.0, "rewards/chosen": -4.327491842559539e-05, "rewards/margins": 0.16846470534801483, "rewards/rejected": -0.16850797832012177, "step": 5889 }, { "epoch": 4.073305670816044, "grad_norm": 12.950318336486816, "learning_rate": 3.292607960657753e-05, "log_odds_chosen": 8.975205421447754, "log_odds_ratio": -0.0029498322401195765, "logits/chosen": -0.47128579020500183, "logits/rejected": -0.5512241721153259, "logps/chosen": -0.0190330371260643, "logps/rejected": -1.9600040912628174, "loss": 1.4559, "nll_loss": 0.363680362701416, "rewards/accuracies": 1.0, "rewards/chosen": -0.001903303898870945, "rewards/margins": 0.1940971165895462, "rewards/rejected": -0.19600041210651398, "step": 5890 }, { "epoch": 4.073997233748271, "grad_norm": 10.255099296569824, "learning_rate": 3.2922237590287384e-05, "log_odds_chosen": 10.904985427856445, "log_odds_ratio": -2.5688645109767094e-05, "logits/chosen": -0.6291865706443787, "logits/rejected": -0.7621033787727356, "logps/chosen": -0.0003484275075607002, "logps/rejected": -2.4972190856933594, "loss": 1.2966, "nll_loss": 0.3241395652294159, "rewards/accuracies": 1.0, "rewards/chosen": -3.48427529388573e-05, "rewards/margins": 0.2496870756149292, "rewards/rejected": -0.24972191452980042, "step": 5891 }, { "epoch": 4.074688796680498, "grad_norm": 14.77944564819336, "learning_rate": 3.2918395573997236e-05, "log_odds_chosen": 10.145472526550293, "log_odds_ratio": -0.00012422248255461454, "logits/chosen": -0.5283649563789368, "logits/rejected": -0.6732466816902161, "logps/chosen": -0.0004888318944722414, "logps/rejected": -2.26007080078125, "loss": 1.7568, "nll_loss": 0.439181387424469, "rewards/accuracies": 1.0, "rewards/chosen": -4.88831901748199e-05, "rewards/margins": 0.2259582281112671, "rewards/rejected": -0.2260071039199829, "step": 5892 }, { "epoch": 4.0753803596127245, "grad_norm": 9.560647010803223, "learning_rate": 3.291455355770708e-05, "log_odds_chosen": 10.0001220703125, "log_odds_ratio": -0.0001762525353115052, "logits/chosen": -0.5513603687286377, "logits/rejected": -0.5678353309631348, "logps/chosen": -0.0002527603064663708, "logps/rejected": -1.3682398796081543, "loss": 0.715, "nll_loss": 0.17874178290367126, "rewards/accuracies": 1.0, "rewards/chosen": -2.527602919144556e-05, "rewards/margins": 0.1367987096309662, "rewards/rejected": -0.13682398200035095, "step": 5893 }, { "epoch": 4.076071922544951, "grad_norm": 6.863475322723389, "learning_rate": 3.291071154141694e-05, "log_odds_chosen": 9.999677658081055, "log_odds_ratio": -0.0005250984104350209, "logits/chosen": -0.38750988245010376, "logits/rejected": -0.481981486082077, "logps/chosen": -0.0008838768699206412, "logps/rejected": -1.9657940864562988, "loss": 0.7248, "nll_loss": 0.18114393949508667, "rewards/accuracies": 1.0, "rewards/chosen": -8.83876855368726e-05, "rewards/margins": 0.1964910328388214, "rewards/rejected": -0.19657942652702332, "step": 5894 }, { "epoch": 4.076763485477178, "grad_norm": 9.547638893127441, "learning_rate": 3.290686952512679e-05, "log_odds_chosen": 10.852001190185547, "log_odds_ratio": -5.883872654521838e-05, "logits/chosen": -0.6101143956184387, "logits/rejected": -0.6539497971534729, "logps/chosen": -0.00033553678076714277, "logps/rejected": -2.486168384552002, "loss": 1.0673, "nll_loss": 0.2668067514896393, "rewards/accuracies": 1.0, "rewards/chosen": -3.355367880431004e-05, "rewards/margins": 0.24858328700065613, "rewards/rejected": -0.24861682951450348, "step": 5895 }, { "epoch": 4.077455048409405, "grad_norm": 12.845402717590332, "learning_rate": 3.290302750883664e-05, "log_odds_chosen": 10.063456535339355, "log_odds_ratio": -0.001193615491501987, "logits/chosen": -0.5881873369216919, "logits/rejected": -0.6536589860916138, "logps/chosen": -0.0010188799351453781, "logps/rejected": -1.858687162399292, "loss": 1.304, "nll_loss": 0.32588329911231995, "rewards/accuracies": 1.0, "rewards/chosen": -0.00010188797750743106, "rewards/margins": 0.18576683104038239, "rewards/rejected": -0.1858687400817871, "step": 5896 }, { "epoch": 4.078146611341632, "grad_norm": 8.264073371887207, "learning_rate": 3.289918549254649e-05, "log_odds_chosen": 11.341009140014648, "log_odds_ratio": -3.128061871393584e-05, "logits/chosen": -0.8096005916595459, "logits/rejected": -0.9112792015075684, "logps/chosen": -0.0003100544272456318, "logps/rejected": -2.4255685806274414, "loss": 1.4221, "nll_loss": 0.3555248975753784, "rewards/accuracies": 1.0, "rewards/chosen": -3.100544199696742e-05, "rewards/margins": 0.2425258457660675, "rewards/rejected": -0.2425568550825119, "step": 5897 }, { "epoch": 4.078838174273859, "grad_norm": 11.904677391052246, "learning_rate": 3.289534347625634e-05, "log_odds_chosen": 9.696477890014648, "log_odds_ratio": -0.00014181065489538014, "logits/chosen": -0.41866156458854675, "logits/rejected": -0.4474356174468994, "logps/chosen": -0.0012857395922765136, "logps/rejected": -1.9312913417816162, "loss": 1.4583, "nll_loss": 0.3645554780960083, "rewards/accuracies": 1.0, "rewards/chosen": -0.00012857397086918354, "rewards/margins": 0.19300055503845215, "rewards/rejected": -0.19312912225723267, "step": 5898 }, { "epoch": 4.0795297372060855, "grad_norm": 12.756428718566895, "learning_rate": 3.289150145996619e-05, "log_odds_chosen": 10.162872314453125, "log_odds_ratio": -0.0003494315897114575, "logits/chosen": -0.5441153049468994, "logits/rejected": -0.6091464161872864, "logps/chosen": -0.0005706208175979555, "logps/rejected": -1.493101954460144, "loss": 0.9581, "nll_loss": 0.23949317634105682, "rewards/accuracies": 1.0, "rewards/chosen": -5.706208321498707e-05, "rewards/margins": 0.14925314486026764, "rewards/rejected": -0.14931020140647888, "step": 5899 }, { "epoch": 4.080221300138312, "grad_norm": 8.728287696838379, "learning_rate": 3.288765944367604e-05, "log_odds_chosen": 9.576757431030273, "log_odds_ratio": -0.000568702700547874, "logits/chosen": -0.9261612296104431, "logits/rejected": -0.887792706489563, "logps/chosen": -0.00765953678637743, "logps/rejected": -2.2173871994018555, "loss": 1.3768, "nll_loss": 0.3441358804702759, "rewards/accuracies": 1.0, "rewards/chosen": -0.0007659537368454039, "rewards/margins": 0.22097276151180267, "rewards/rejected": -0.22173872590065002, "step": 5900 }, { "epoch": 4.080912863070539, "grad_norm": 12.955114364624023, "learning_rate": 3.2883817427385895e-05, "log_odds_chosen": 10.813081741333008, "log_odds_ratio": -6.771107291569933e-05, "logits/chosen": -0.6529577374458313, "logits/rejected": -0.6758928298950195, "logps/chosen": -0.0003291813191026449, "logps/rejected": -2.374316453933716, "loss": 1.3182, "nll_loss": 0.32954463362693787, "rewards/accuracies": 1.0, "rewards/chosen": -3.291813118266873e-05, "rewards/margins": 0.23739872872829437, "rewards/rejected": -0.23743166029453278, "step": 5901 }, { "epoch": 4.081604426002766, "grad_norm": 12.583176612854004, "learning_rate": 3.287997541109574e-05, "log_odds_chosen": 9.861529350280762, "log_odds_ratio": -0.00019614600751083344, "logits/chosen": -0.6209136247634888, "logits/rejected": -0.6244215369224548, "logps/chosen": -0.0006503669428639114, "logps/rejected": -2.1978304386138916, "loss": 1.3384, "nll_loss": 0.33457767963409424, "rewards/accuracies": 1.0, "rewards/chosen": -6.503669283119962e-05, "rewards/margins": 0.21971800923347473, "rewards/rejected": -0.21978303790092468, "step": 5902 }, { "epoch": 4.082295988934993, "grad_norm": 7.011631011962891, "learning_rate": 3.28761333948056e-05, "log_odds_chosen": 10.310678482055664, "log_odds_ratio": -5.392678576754406e-05, "logits/chosen": -0.10984927415847778, "logits/rejected": -0.21680112183094025, "logps/chosen": -0.0005354200839065015, "logps/rejected": -2.361393928527832, "loss": 0.8099, "nll_loss": 0.20247748494148254, "rewards/accuracies": 1.0, "rewards/chosen": -5.354200766305439e-05, "rewards/margins": 0.23608583211898804, "rewards/rejected": -0.23613938689231873, "step": 5903 }, { "epoch": 4.08298755186722, "grad_norm": 6.9156718254089355, "learning_rate": 3.2872291378515445e-05, "log_odds_chosen": 9.817750930786133, "log_odds_ratio": -0.0004971225862391293, "logits/chosen": -0.2331920862197876, "logits/rejected": -0.341392457485199, "logps/chosen": -0.0009805553127080202, "logps/rejected": -2.3136556148529053, "loss": 1.1062, "nll_loss": 0.2765083611011505, "rewards/accuracies": 1.0, "rewards/chosen": -9.805553418118507e-05, "rewards/margins": 0.231267511844635, "rewards/rejected": -0.23136556148529053, "step": 5904 }, { "epoch": 4.0836791147994465, "grad_norm": 8.643861770629883, "learning_rate": 3.28684493622253e-05, "log_odds_chosen": 10.287995338439941, "log_odds_ratio": -0.00011583154264371842, "logits/chosen": -0.13520589470863342, "logits/rejected": -0.305339515209198, "logps/chosen": -0.0003736851504072547, "logps/rejected": -2.2782511711120605, "loss": 0.934, "nll_loss": 0.2334843873977661, "rewards/accuracies": 1.0, "rewards/chosen": -3.736852158908732e-05, "rewards/margins": 0.22778773307800293, "rewards/rejected": -0.2278251051902771, "step": 5905 }, { "epoch": 4.084370677731673, "grad_norm": 6.616100788116455, "learning_rate": 3.286460734593515e-05, "log_odds_chosen": 9.227767944335938, "log_odds_ratio": -0.00036859133979305625, "logits/chosen": -0.5632534623146057, "logits/rejected": -0.5830227136611938, "logps/chosen": -0.0007286292966455221, "logps/rejected": -1.7913846969604492, "loss": 1.7213, "nll_loss": 0.43027839064598083, "rewards/accuracies": 1.0, "rewards/chosen": -7.286293111974373e-05, "rewards/margins": 0.17906561493873596, "rewards/rejected": -0.17913848161697388, "step": 5906 }, { "epoch": 4.0850622406639, "grad_norm": 9.714203834533691, "learning_rate": 3.2860765329644996e-05, "log_odds_chosen": 10.036661148071289, "log_odds_ratio": -0.00015963260375428945, "logits/chosen": -0.545129656791687, "logits/rejected": -0.6559819579124451, "logps/chosen": -0.00045840261736884713, "logps/rejected": -2.1178178787231445, "loss": 1.0919, "nll_loss": 0.272967666387558, "rewards/accuracies": 1.0, "rewards/chosen": -4.584026100928895e-05, "rewards/margins": 0.21173596382141113, "rewards/rejected": -0.2117818146944046, "step": 5907 }, { "epoch": 4.085753803596127, "grad_norm": 21.723194122314453, "learning_rate": 3.285692331335485e-05, "log_odds_chosen": 10.542654037475586, "log_odds_ratio": -0.00011348607222316787, "logits/chosen": -0.46963703632354736, "logits/rejected": -0.531532347202301, "logps/chosen": -0.0001715036341920495, "logps/rejected": -1.9184401035308838, "loss": 0.9849, "nll_loss": 0.24622534215450287, "rewards/accuracies": 1.0, "rewards/chosen": -1.715036341920495e-05, "rewards/margins": 0.19182685017585754, "rewards/rejected": -0.19184401631355286, "step": 5908 }, { "epoch": 4.086445366528354, "grad_norm": 8.491382598876953, "learning_rate": 3.28530812970647e-05, "log_odds_chosen": 10.362955093383789, "log_odds_ratio": -8.813407475827262e-05, "logits/chosen": -0.45842936635017395, "logits/rejected": -0.5122732520103455, "logps/chosen": -0.0004604756541084498, "logps/rejected": -2.116807460784912, "loss": 0.7397, "nll_loss": 0.18491125106811523, "rewards/accuracies": 1.0, "rewards/chosen": -4.6047567593632266e-05, "rewards/margins": 0.21163472533226013, "rewards/rejected": -0.21168076992034912, "step": 5909 }, { "epoch": 4.087136929460581, "grad_norm": 11.866226196289062, "learning_rate": 3.284923928077455e-05, "log_odds_chosen": 9.371916770935059, "log_odds_ratio": -0.0015452952357009053, "logits/chosen": -0.6193146705627441, "logits/rejected": -0.5984625220298767, "logps/chosen": -0.0016424513887614012, "logps/rejected": -1.958803415298462, "loss": 1.8707, "nll_loss": 0.46751517057418823, "rewards/accuracies": 1.0, "rewards/chosen": -0.00016424513887614012, "rewards/margins": 0.19571609795093536, "rewards/rejected": -0.19588035345077515, "step": 5910 }, { "epoch": 4.087828492392807, "grad_norm": 16.31894302368164, "learning_rate": 3.28453972644844e-05, "log_odds_chosen": 9.167207717895508, "log_odds_ratio": -0.04448318853974342, "logits/chosen": -0.7948864698410034, "logits/rejected": -0.8742651343345642, "logps/chosen": -0.07551628351211548, "logps/rejected": -1.8025906085968018, "loss": 1.9373, "nll_loss": 0.47987082600593567, "rewards/accuracies": 1.0, "rewards/chosen": -0.0075516290962696075, "rewards/margins": 0.1727074384689331, "rewards/rejected": -0.1802590787410736, "step": 5911 }, { "epoch": 4.088520055325034, "grad_norm": 8.925230026245117, "learning_rate": 3.284155524819426e-05, "log_odds_chosen": 9.596630096435547, "log_odds_ratio": -0.0008474764181300998, "logits/chosen": -0.5018314123153687, "logits/rejected": -0.5587818026542664, "logps/chosen": -0.014918200671672821, "logps/rejected": -1.6959295272827148, "loss": 0.7926, "nll_loss": 0.19807027280330658, "rewards/accuracies": 1.0, "rewards/chosen": -0.0014918200904503465, "rewards/margins": 0.16810114681720734, "rewards/rejected": -0.1695929616689682, "step": 5912 }, { "epoch": 4.089211618257261, "grad_norm": 21.428508758544922, "learning_rate": 3.2837713231904104e-05, "log_odds_chosen": 8.879332542419434, "log_odds_ratio": -0.0006559221656061709, "logits/chosen": -0.3005560338497162, "logits/rejected": -0.2967386841773987, "logps/chosen": -0.0012168160174041986, "logps/rejected": -1.986401081085205, "loss": 1.3465, "nll_loss": 0.33656826615333557, "rewards/accuracies": 1.0, "rewards/chosen": -0.00012168160901637748, "rewards/margins": 0.19851842522621155, "rewards/rejected": -0.1986401081085205, "step": 5913 }, { "epoch": 4.089903181189488, "grad_norm": 11.837137222290039, "learning_rate": 3.2833871215613956e-05, "log_odds_chosen": 8.946540832519531, "log_odds_ratio": -0.0020058578811585903, "logits/chosen": -0.2927224636077881, "logits/rejected": -0.3152255117893219, "logps/chosen": -0.011459745466709137, "logps/rejected": -1.322188377380371, "loss": 1.6239, "nll_loss": 0.4057755768299103, "rewards/accuracies": 1.0, "rewards/chosen": -0.001145974500104785, "rewards/margins": 0.13107284903526306, "rewards/rejected": -0.1322188377380371, "step": 5914 }, { "epoch": 4.090594744121715, "grad_norm": 6.9727559089660645, "learning_rate": 3.283002919932381e-05, "log_odds_chosen": 8.786075592041016, "log_odds_ratio": -0.000865088019054383, "logits/chosen": -0.30533480644226074, "logits/rejected": -0.37163785099983215, "logps/chosen": -0.0008568483171984553, "logps/rejected": -1.5327558517456055, "loss": 1.0, "nll_loss": 0.24990104138851166, "rewards/accuracies": 1.0, "rewards/chosen": -8.568483463022858e-05, "rewards/margins": 0.15318989753723145, "rewards/rejected": -0.15327557921409607, "step": 5915 }, { "epoch": 4.091286307053942, "grad_norm": 12.23672866821289, "learning_rate": 3.282618718303366e-05, "log_odds_chosen": 10.677852630615234, "log_odds_ratio": -0.00016602696268819273, "logits/chosen": -0.635785698890686, "logits/rejected": -0.6920471787452698, "logps/chosen": -0.0003736467915587127, "logps/rejected": -2.306975841522217, "loss": 1.0924, "nll_loss": 0.273092120885849, "rewards/accuracies": 1.0, "rewards/chosen": -3.7364679883467034e-05, "rewards/margins": 0.23066022992134094, "rewards/rejected": -0.2306976020336151, "step": 5916 }, { "epoch": 4.091977869986168, "grad_norm": 8.536312103271484, "learning_rate": 3.2822345166743507e-05, "log_odds_chosen": 8.663702011108398, "log_odds_ratio": -0.00042895443039014935, "logits/chosen": -0.5982036590576172, "logits/rejected": -0.6158012747764587, "logps/chosen": -0.0025946851819753647, "logps/rejected": -1.8932663202285767, "loss": 1.1592, "nll_loss": 0.2897559702396393, "rewards/accuracies": 1.0, "rewards/chosen": -0.0002594685065560043, "rewards/margins": 0.18906715512275696, "rewards/rejected": -0.18932661414146423, "step": 5917 }, { "epoch": 4.092669432918395, "grad_norm": 5.972506046295166, "learning_rate": 3.281850315045336e-05, "log_odds_chosen": 10.543180465698242, "log_odds_ratio": -6.71078814775683e-05, "logits/chosen": -0.05855588614940643, "logits/rejected": -0.2059129923582077, "logps/chosen": -0.0003192424192093313, "logps/rejected": -2.2500996589660645, "loss": 1.1449, "nll_loss": 0.2862182855606079, "rewards/accuracies": 1.0, "rewards/chosen": -3.192424264852889e-05, "rewards/margins": 0.22497805953025818, "rewards/rejected": -0.2250099778175354, "step": 5918 }, { "epoch": 4.093360995850622, "grad_norm": 14.27978515625, "learning_rate": 3.281466113416321e-05, "log_odds_chosen": 10.131693840026855, "log_odds_ratio": -0.00028685503639280796, "logits/chosen": -0.45370054244995117, "logits/rejected": -0.4926850497722626, "logps/chosen": -0.00024568854132667184, "logps/rejected": -1.7987271547317505, "loss": 1.5017, "nll_loss": 0.3754034638404846, "rewards/accuracies": 1.0, "rewards/chosen": -2.456885704305023e-05, "rewards/margins": 0.17984813451766968, "rewards/rejected": -0.17987270653247833, "step": 5919 }, { "epoch": 4.094052558782849, "grad_norm": 9.181960105895996, "learning_rate": 3.281081911787306e-05, "log_odds_chosen": 10.21621322631836, "log_odds_ratio": -0.0005674352869391441, "logits/chosen": -0.40525904297828674, "logits/rejected": -0.4408324360847473, "logps/chosen": -0.0017286634538322687, "logps/rejected": -2.215144634246826, "loss": 0.9703, "nll_loss": 0.24252784252166748, "rewards/accuracies": 1.0, "rewards/chosen": -0.00017286634829360992, "rewards/margins": 0.22134160995483398, "rewards/rejected": -0.22151446342468262, "step": 5920 }, { "epoch": 4.094744121715076, "grad_norm": 8.669586181640625, "learning_rate": 3.2806977101582916e-05, "log_odds_chosen": 8.907610893249512, "log_odds_ratio": -0.00045416090870276093, "logits/chosen": -0.30150651931762695, "logits/rejected": -0.3959370255470276, "logps/chosen": -0.0006600356427952647, "logps/rejected": -1.5230951309204102, "loss": 1.2002, "nll_loss": 0.3000123202800751, "rewards/accuracies": 1.0, "rewards/chosen": -6.6003565734718e-05, "rewards/margins": 0.15224352478981018, "rewards/rejected": -0.15230952203273773, "step": 5921 }, { "epoch": 4.095435684647303, "grad_norm": 8.433770179748535, "learning_rate": 3.280313508529276e-05, "log_odds_chosen": 8.238189697265625, "log_odds_ratio": -0.09835493564605713, "logits/chosen": -0.3608969748020172, "logits/rejected": -0.45487987995147705, "logps/chosen": -0.014181341975927353, "logps/rejected": -1.7871878147125244, "loss": 1.357, "nll_loss": 0.32940763235092163, "rewards/accuracies": 0.875, "rewards/chosen": -0.0014181341975927353, "rewards/margins": 0.17730064690113068, "rewards/rejected": -0.17871877551078796, "step": 5922 }, { "epoch": 4.096127247579529, "grad_norm": 10.358454704284668, "learning_rate": 3.2799293069002614e-05, "log_odds_chosen": 10.86201286315918, "log_odds_ratio": -6.974166899453849e-05, "logits/chosen": -0.3564392924308777, "logits/rejected": -0.44083625078201294, "logps/chosen": -0.0003634452586993575, "logps/rejected": -2.568542957305908, "loss": 1.2318, "nll_loss": 0.3079310953617096, "rewards/accuracies": 1.0, "rewards/chosen": -3.63445287803188e-05, "rewards/margins": 0.2568179666996002, "rewards/rejected": -0.2568542957305908, "step": 5923 }, { "epoch": 4.096818810511756, "grad_norm": 7.985740661621094, "learning_rate": 3.279545105271247e-05, "log_odds_chosen": 9.483537673950195, "log_odds_ratio": -0.0002802881645038724, "logits/chosen": -0.5317890644073486, "logits/rejected": -0.5612384676933289, "logps/chosen": -0.0015085403574630618, "logps/rejected": -1.8522597551345825, "loss": 1.7163, "nll_loss": 0.4290497601032257, "rewards/accuracies": 1.0, "rewards/chosen": -0.0001508540299255401, "rewards/margins": 0.18507513403892517, "rewards/rejected": -0.1852259635925293, "step": 5924 }, { "epoch": 4.097510373443983, "grad_norm": 34.76710891723633, "learning_rate": 3.279160903642232e-05, "log_odds_chosen": 7.234254360198975, "log_odds_ratio": -0.29028746485710144, "logits/chosen": -0.2311570942401886, "logits/rejected": -0.24773958325386047, "logps/chosen": -0.03140858933329582, "logps/rejected": -1.6474460363388062, "loss": 1.4262, "nll_loss": 0.3275095820426941, "rewards/accuracies": 0.875, "rewards/chosen": -0.0031408590730279684, "rewards/margins": 0.16160376369953156, "rewards/rejected": -0.16474461555480957, "step": 5925 }, { "epoch": 4.09820193637621, "grad_norm": 8.7240571975708, "learning_rate": 3.2787767020132165e-05, "log_odds_chosen": 9.578914642333984, "log_odds_ratio": -0.000270306714810431, "logits/chosen": -0.7370679974555969, "logits/rejected": -0.8224969506263733, "logps/chosen": -0.002277072286233306, "logps/rejected": -1.888828992843628, "loss": 1.3327, "nll_loss": 0.33315569162368774, "rewards/accuracies": 1.0, "rewards/chosen": -0.00022770720534026623, "rewards/margins": 0.18865519762039185, "rewards/rejected": -0.18888290226459503, "step": 5926 }, { "epoch": 4.098893499308437, "grad_norm": 11.401165008544922, "learning_rate": 3.278392500384202e-05, "log_odds_chosen": 10.467416763305664, "log_odds_ratio": -9.527485963189974e-05, "logits/chosen": -0.3944201171398163, "logits/rejected": -0.48259276151657104, "logps/chosen": -0.0002736057504080236, "logps/rejected": -2.027223587036133, "loss": 1.4882, "nll_loss": 0.37202945351600647, "rewards/accuracies": 1.0, "rewards/chosen": -2.736057467700448e-05, "rewards/margins": 0.20269499719142914, "rewards/rejected": -0.20272235572338104, "step": 5927 }, { "epoch": 4.0995850622406635, "grad_norm": 10.183175086975098, "learning_rate": 3.278008298755187e-05, "log_odds_chosen": 9.941876411437988, "log_odds_ratio": -0.019247492775321007, "logits/chosen": -0.6036807298660278, "logits/rejected": -0.6116605997085571, "logps/chosen": -0.00579418009147048, "logps/rejected": -1.7697887420654297, "loss": 0.8289, "nll_loss": 0.20528829097747803, "rewards/accuracies": 1.0, "rewards/chosen": -0.0005794180324301124, "rewards/margins": 0.17639945447444916, "rewards/rejected": -0.17697887122631073, "step": 5928 }, { "epoch": 4.10027662517289, "grad_norm": 13.983654975891113, "learning_rate": 3.2776240971261716e-05, "log_odds_chosen": 10.866411209106445, "log_odds_ratio": -4.261564026819542e-05, "logits/chosen": -0.7931185960769653, "logits/rejected": -0.857774555683136, "logps/chosen": -0.0002357493358431384, "logps/rejected": -2.452587127685547, "loss": 1.4298, "nll_loss": 0.35744524002075195, "rewards/accuracies": 1.0, "rewards/chosen": -2.357493394811172e-05, "rewards/margins": 0.2452351450920105, "rewards/rejected": -0.24525870382785797, "step": 5929 }, { "epoch": 4.100968188105117, "grad_norm": 12.813995361328125, "learning_rate": 3.2772398954971575e-05, "log_odds_chosen": 9.736326217651367, "log_odds_ratio": -0.002923061139881611, "logits/chosen": -0.6696832180023193, "logits/rejected": -0.7407538890838623, "logps/chosen": -0.01896968111395836, "logps/rejected": -2.72799015045166, "loss": 1.2213, "nll_loss": 0.3050318658351898, "rewards/accuracies": 1.0, "rewards/chosen": -0.001896967994980514, "rewards/margins": 0.27090203762054443, "rewards/rejected": -0.272799015045166, "step": 5930 }, { "epoch": 4.101659751037344, "grad_norm": 8.693153381347656, "learning_rate": 3.276855693868142e-05, "log_odds_chosen": 10.148609161376953, "log_odds_ratio": -0.00025045208167284727, "logits/chosen": -0.9423700571060181, "logits/rejected": -0.9496661424636841, "logps/chosen": -0.00045394038897939026, "logps/rejected": -1.9736984968185425, "loss": 1.135, "nll_loss": 0.28372570872306824, "rewards/accuracies": 1.0, "rewards/chosen": -4.539404108072631e-05, "rewards/margins": 0.1973244547843933, "rewards/rejected": -0.19736984372138977, "step": 5931 }, { "epoch": 4.102351313969571, "grad_norm": 9.80392837524414, "learning_rate": 3.276471492239127e-05, "log_odds_chosen": 9.37426471710205, "log_odds_ratio": -0.0005990730132907629, "logits/chosen": -0.48925837874412537, "logits/rejected": -0.5344066023826599, "logps/chosen": -0.009337247349321842, "logps/rejected": -1.8132996559143066, "loss": 1.218, "nll_loss": 0.30442941188812256, "rewards/accuracies": 1.0, "rewards/chosen": -0.0009337248047813773, "rewards/margins": 0.18039622902870178, "rewards/rejected": -0.18132996559143066, "step": 5932 }, { "epoch": 4.103042876901798, "grad_norm": 9.872029304504395, "learning_rate": 3.2760872906101125e-05, "log_odds_chosen": 9.1129150390625, "log_odds_ratio": -0.0005607136990875006, "logits/chosen": -0.6379801630973816, "logits/rejected": -0.7192505598068237, "logps/chosen": -0.002543968614190817, "logps/rejected": -1.6058814525604248, "loss": 1.0869, "nll_loss": 0.27167174220085144, "rewards/accuracies": 1.0, "rewards/chosen": -0.0002543968439567834, "rewards/margins": 0.1603337526321411, "rewards/rejected": -0.16058816015720367, "step": 5933 }, { "epoch": 4.1037344398340245, "grad_norm": 12.728527069091797, "learning_rate": 3.275703088981098e-05, "log_odds_chosen": 9.624202728271484, "log_odds_ratio": -0.001561567303724587, "logits/chosen": -0.6482115983963013, "logits/rejected": -0.6900525093078613, "logps/chosen": -0.004731070715934038, "logps/rejected": -2.31296443939209, "loss": 1.2704, "nll_loss": 0.3174405097961426, "rewards/accuracies": 1.0, "rewards/chosen": -0.0004731070948764682, "rewards/margins": 0.2308233380317688, "rewards/rejected": -0.23129644989967346, "step": 5934 }, { "epoch": 4.104426002766251, "grad_norm": 9.520516395568848, "learning_rate": 3.2753188873520823e-05, "log_odds_chosen": 8.726845741271973, "log_odds_ratio": -0.0048818690702319145, "logits/chosen": -0.7764834761619568, "logits/rejected": -0.813412070274353, "logps/chosen": -0.009942354634404182, "logps/rejected": -1.928078293800354, "loss": 1.0051, "nll_loss": 0.25079038739204407, "rewards/accuracies": 1.0, "rewards/chosen": -0.000994235510006547, "rewards/margins": 0.19181358814239502, "rewards/rejected": -0.19280782341957092, "step": 5935 }, { "epoch": 4.105117565698478, "grad_norm": 11.137310981750488, "learning_rate": 3.2749346857230676e-05, "log_odds_chosen": 9.733144760131836, "log_odds_ratio": -0.0005869278684258461, "logits/chosen": -0.8129700422286987, "logits/rejected": -0.8991367816925049, "logps/chosen": -0.0008950646151788533, "logps/rejected": -2.1298701763153076, "loss": 1.359, "nll_loss": 0.339703768491745, "rewards/accuracies": 1.0, "rewards/chosen": -8.950645860750228e-05, "rewards/margins": 0.21289752423763275, "rewards/rejected": -0.2129870355129242, "step": 5936 }, { "epoch": 4.105809128630705, "grad_norm": 9.725931167602539, "learning_rate": 3.274550484094053e-05, "log_odds_chosen": 9.57811164855957, "log_odds_ratio": -0.0005339644267223775, "logits/chosen": -0.5569449663162231, "logits/rejected": -0.5920923948287964, "logps/chosen": -0.0014632527017965913, "logps/rejected": -1.5597307682037354, "loss": 0.9906, "nll_loss": 0.24760675430297852, "rewards/accuracies": 1.0, "rewards/chosen": -0.00014632527017965913, "rewards/margins": 0.15582674741744995, "rewards/rejected": -0.15597307682037354, "step": 5937 }, { "epoch": 4.106500691562932, "grad_norm": 11.63660717010498, "learning_rate": 3.2741662824650374e-05, "log_odds_chosen": 8.343074798583984, "log_odds_ratio": -0.06201335787773132, "logits/chosen": -0.5703570246696472, "logits/rejected": -0.5562098026275635, "logps/chosen": -0.023394770920276642, "logps/rejected": -1.5619940757751465, "loss": 1.1968, "nll_loss": 0.2929861843585968, "rewards/accuracies": 1.0, "rewards/chosen": -0.002339476952329278, "rewards/margins": 0.1538599133491516, "rewards/rejected": -0.1561993956565857, "step": 5938 }, { "epoch": 4.107192254495159, "grad_norm": 6.6649956703186035, "learning_rate": 3.273782080836023e-05, "log_odds_chosen": 8.792590141296387, "log_odds_ratio": -0.002866230206564069, "logits/chosen": -0.7098940014839172, "logits/rejected": -0.7736493349075317, "logps/chosen": -0.0035133520141243935, "logps/rejected": -1.5739152431488037, "loss": 1.0678, "nll_loss": 0.2666718661785126, "rewards/accuracies": 1.0, "rewards/chosen": -0.00035133521305397153, "rewards/margins": 0.15704019367694855, "rewards/rejected": -0.1573915332555771, "step": 5939 }, { "epoch": 4.1078838174273855, "grad_norm": 10.560246467590332, "learning_rate": 3.273397879207008e-05, "log_odds_chosen": 8.729288101196289, "log_odds_ratio": -0.016969040036201477, "logits/chosen": -0.3396521210670471, "logits/rejected": -0.41665220260620117, "logps/chosen": -0.05379095673561096, "logps/rejected": -2.278257369995117, "loss": 0.9527, "nll_loss": 0.23648518323898315, "rewards/accuracies": 1.0, "rewards/chosen": -0.005379095207899809, "rewards/margins": 0.22244666516780853, "rewards/rejected": -0.22782574594020844, "step": 5940 }, { "epoch": 4.108575380359612, "grad_norm": 13.353851318359375, "learning_rate": 3.273013677577993e-05, "log_odds_chosen": 8.815929412841797, "log_odds_ratio": -0.0002879203821066767, "logits/chosen": -0.40576013922691345, "logits/rejected": -0.5046470165252686, "logps/chosen": -0.0004096684278920293, "logps/rejected": -1.1543430089950562, "loss": 1.2258, "nll_loss": 0.30642008781433105, "rewards/accuracies": 1.0, "rewards/chosen": -4.096684278920293e-05, "rewards/margins": 0.11539334058761597, "rewards/rejected": -0.11543430387973785, "step": 5941 }, { "epoch": 4.109266943291839, "grad_norm": 8.844964027404785, "learning_rate": 3.2726294759489784e-05, "log_odds_chosen": 9.867124557495117, "log_odds_ratio": -0.00035422889050096273, "logits/chosen": -0.3862367868423462, "logits/rejected": -0.43661126494407654, "logps/chosen": -0.0004143910191487521, "logps/rejected": -1.955275535583496, "loss": 1.4467, "nll_loss": 0.36163824796676636, "rewards/accuracies": 1.0, "rewards/chosen": -4.143910337006673e-05, "rewards/margins": 0.1954861283302307, "rewards/rejected": -0.1955275535583496, "step": 5942 }, { "epoch": 4.109958506224066, "grad_norm": 9.529248237609863, "learning_rate": 3.2722452743199636e-05, "log_odds_chosen": 8.813203811645508, "log_odds_ratio": -0.004686347208917141, "logits/chosen": -0.21849806606769562, "logits/rejected": -0.3129451274871826, "logps/chosen": -0.003993101883679628, "logps/rejected": -2.194714307785034, "loss": 1.1136, "nll_loss": 0.2779342830181122, "rewards/accuracies": 1.0, "rewards/chosen": -0.00039931017090566456, "rewards/margins": 0.2190721482038498, "rewards/rejected": -0.21947146952152252, "step": 5943 }, { "epoch": 4.110650069156293, "grad_norm": 7.093349456787109, "learning_rate": 3.271861072690948e-05, "log_odds_chosen": 8.87593936920166, "log_odds_ratio": -0.0005173565004952252, "logits/chosen": -0.6296029686927795, "logits/rejected": -0.6486787796020508, "logps/chosen": -0.0005817078636027873, "logps/rejected": -1.1000468730926514, "loss": 1.2732, "nll_loss": 0.31825223565101624, "rewards/accuracies": 1.0, "rewards/chosen": -5.817079363623634e-05, "rewards/margins": 0.10994651913642883, "rewards/rejected": -0.11000467836856842, "step": 5944 }, { "epoch": 4.11134163208852, "grad_norm": 12.155810356140137, "learning_rate": 3.2714768710619334e-05, "log_odds_chosen": 9.416703224182129, "log_odds_ratio": -0.1775832176208496, "logits/chosen": -0.5456818342208862, "logits/rejected": -0.7242247462272644, "logps/chosen": -0.028735145926475525, "logps/rejected": -2.180703639984131, "loss": 1.2973, "nll_loss": 0.3065558075904846, "rewards/accuracies": 0.875, "rewards/chosen": -0.0028735145460814238, "rewards/margins": 0.2151968777179718, "rewards/rejected": -0.2180703729391098, "step": 5945 }, { "epoch": 4.1120331950207465, "grad_norm": 6.187151908874512, "learning_rate": 3.271092669432919e-05, "log_odds_chosen": 9.89011287689209, "log_odds_ratio": -0.00014637774438597262, "logits/chosen": -0.4840518832206726, "logits/rejected": -0.5275214910507202, "logps/chosen": -0.0006061262683942914, "logps/rejected": -2.1634793281555176, "loss": 0.7992, "nll_loss": 0.19979476928710938, "rewards/accuracies": 1.0, "rewards/chosen": -6.061262683942914e-05, "rewards/margins": 0.21628734469413757, "rewards/rejected": -0.21634796261787415, "step": 5946 }, { "epoch": 4.112724757952973, "grad_norm": 9.527305603027344, "learning_rate": 3.270708467803903e-05, "log_odds_chosen": 10.183591842651367, "log_odds_ratio": -0.00022850897221360356, "logits/chosen": -0.7199068665504456, "logits/rejected": -0.7739315629005432, "logps/chosen": -0.010009121149778366, "logps/rejected": -2.580620288848877, "loss": 1.4392, "nll_loss": 0.35976576805114746, "rewards/accuracies": 1.0, "rewards/chosen": -0.0010009119287133217, "rewards/margins": 0.25706109404563904, "rewards/rejected": -0.2580620050430298, "step": 5947 }, { "epoch": 4.1134163208852, "grad_norm": 16.418354034423828, "learning_rate": 3.270324266174889e-05, "log_odds_chosen": 9.409860610961914, "log_odds_ratio": -0.0008784055826254189, "logits/chosen": -0.1743004024028778, "logits/rejected": -0.29258230328559875, "logps/chosen": -0.0021755893249064684, "logps/rejected": -1.563657522201538, "loss": 1.2913, "nll_loss": 0.32274794578552246, "rewards/accuracies": 1.0, "rewards/chosen": -0.00021755897614639252, "rewards/margins": 0.15614819526672363, "rewards/rejected": -0.1563657522201538, "step": 5948 }, { "epoch": 4.114107883817427, "grad_norm": 6.429084777832031, "learning_rate": 3.269940064545874e-05, "log_odds_chosen": 7.539295196533203, "log_odds_ratio": -0.026218703016638756, "logits/chosen": -0.5419434309005737, "logits/rejected": -0.573211133480072, "logps/chosen": -0.018203264102339745, "logps/rejected": -1.1337330341339111, "loss": 0.9326, "nll_loss": 0.2305281162261963, "rewards/accuracies": 1.0, "rewards/chosen": -0.0018203264335170388, "rewards/margins": 0.11155297607183456, "rewards/rejected": -0.113373301923275, "step": 5949 }, { "epoch": 4.114799446749654, "grad_norm": 7.231356143951416, "learning_rate": 3.269555862916859e-05, "log_odds_chosen": 9.106095314025879, "log_odds_ratio": -0.03225746005773544, "logits/chosen": -0.3878445327281952, "logits/rejected": -0.4633275866508484, "logps/chosen": -0.008714303374290466, "logps/rejected": -1.9162147045135498, "loss": 1.1961, "nll_loss": 0.29580968618392944, "rewards/accuracies": 1.0, "rewards/chosen": -0.0008714303257875144, "rewards/margins": 0.19075004756450653, "rewards/rejected": -0.19162148237228394, "step": 5950 }, { "epoch": 4.115491009681881, "grad_norm": 9.443856239318848, "learning_rate": 3.269171661287844e-05, "log_odds_chosen": 7.6959004402160645, "log_odds_ratio": -0.04945721477270126, "logits/chosen": -0.49826863408088684, "logits/rejected": -0.5104885101318359, "logps/chosen": -0.013496211729943752, "logps/rejected": -2.165086507797241, "loss": 1.1163, "nll_loss": 0.27412450313568115, "rewards/accuracies": 1.0, "rewards/chosen": -0.0013496209867298603, "rewards/margins": 0.21515902876853943, "rewards/rejected": -0.2165086567401886, "step": 5951 }, { "epoch": 4.1161825726141075, "grad_norm": 9.486602783203125, "learning_rate": 3.2687874596588295e-05, "log_odds_chosen": 9.971977233886719, "log_odds_ratio": -0.007441829890012741, "logits/chosen": -0.46394485235214233, "logits/rejected": -0.5294165015220642, "logps/chosen": -0.0038535038474947214, "logps/rejected": -2.2446508407592773, "loss": 1.1127, "nll_loss": 0.27743101119995117, "rewards/accuracies": 1.0, "rewards/chosen": -0.0003853504022117704, "rewards/margins": 0.22407975792884827, "rewards/rejected": -0.22446510195732117, "step": 5952 }, { "epoch": 4.116874135546334, "grad_norm": 7.0303635597229, "learning_rate": 3.268403258029814e-05, "log_odds_chosen": 9.403999328613281, "log_odds_ratio": -0.000792883918620646, "logits/chosen": -0.4152238368988037, "logits/rejected": -0.4461762607097626, "logps/chosen": -0.0014086526352912188, "logps/rejected": -1.9261103868484497, "loss": 1.0918, "nll_loss": 0.27288228273391724, "rewards/accuracies": 1.0, "rewards/chosen": -0.00014086527517065406, "rewards/margins": 0.19247017800807953, "rewards/rejected": -0.19261103868484497, "step": 5953 }, { "epoch": 4.117565698478561, "grad_norm": 9.143287658691406, "learning_rate": 3.268019056400799e-05, "log_odds_chosen": 8.954416275024414, "log_odds_ratio": -0.0022243014536798, "logits/chosen": -0.4166088104248047, "logits/rejected": -0.5308199524879456, "logps/chosen": -0.0026472746394574642, "logps/rejected": -1.9135946035385132, "loss": 1.0367, "nll_loss": 0.25895246863365173, "rewards/accuracies": 1.0, "rewards/chosen": -0.0002647274814080447, "rewards/margins": 0.19109472632408142, "rewards/rejected": -0.19135946035385132, "step": 5954 }, { "epoch": 4.118257261410788, "grad_norm": 12.52299976348877, "learning_rate": 3.2676348547717845e-05, "log_odds_chosen": 10.522491455078125, "log_odds_ratio": -5.735613376600668e-05, "logits/chosen": -0.7633590698242188, "logits/rejected": -0.8200657367706299, "logps/chosen": -0.0006438453565351665, "logps/rejected": -2.771487236022949, "loss": 1.3546, "nll_loss": 0.3386405110359192, "rewards/accuracies": 1.0, "rewards/chosen": -6.438454147428274e-05, "rewards/margins": 0.2770843505859375, "rewards/rejected": -0.2771487236022949, "step": 5955 }, { "epoch": 4.118948824343015, "grad_norm": 9.641544342041016, "learning_rate": 3.267250653142769e-05, "log_odds_chosen": 9.222244262695312, "log_odds_ratio": -0.0002797696506604552, "logits/chosen": -0.4564547836780548, "logits/rejected": -0.47546225786209106, "logps/chosen": -0.0338638573884964, "logps/rejected": -2.6891396045684814, "loss": 1.0424, "nll_loss": 0.26056718826293945, "rewards/accuracies": 1.0, "rewards/chosen": -0.00338638573884964, "rewards/margins": 0.2655275762081146, "rewards/rejected": -0.26891398429870605, "step": 5956 }, { "epoch": 4.119640387275242, "grad_norm": 8.429539680480957, "learning_rate": 3.266866451513755e-05, "log_odds_chosen": 9.318424224853516, "log_odds_ratio": -0.0015697049675509334, "logits/chosen": -0.891433835029602, "logits/rejected": -0.9093539714813232, "logps/chosen": -0.023992005735635757, "logps/rejected": -2.0142667293548584, "loss": 1.6457, "nll_loss": 0.41125792264938354, "rewards/accuracies": 1.0, "rewards/chosen": -0.002399200340732932, "rewards/margins": 0.19902749359607697, "rewards/rejected": -0.2014266848564148, "step": 5957 }, { "epoch": 4.1203319502074685, "grad_norm": 7.106624126434326, "learning_rate": 3.2664822498847396e-05, "log_odds_chosen": 10.787242889404297, "log_odds_ratio": -3.572547575458884e-05, "logits/chosen": -0.33771446347236633, "logits/rejected": -0.4578153192996979, "logps/chosen": -0.00013065329403616488, "logps/rejected": -1.9293015003204346, "loss": 1.1003, "nll_loss": 0.275082528591156, "rewards/accuracies": 1.0, "rewards/chosen": -1.306533067690907e-05, "rewards/margins": 0.1929170936346054, "rewards/rejected": -0.1929301619529724, "step": 5958 }, { "epoch": 4.121023513139695, "grad_norm": 10.316193580627441, "learning_rate": 3.266098048255725e-05, "log_odds_chosen": 9.866926193237305, "log_odds_ratio": -0.00028171919984743, "logits/chosen": -0.833706259727478, "logits/rejected": -0.9109463691711426, "logps/chosen": -0.0019655900541692972, "logps/rejected": -2.0055766105651855, "loss": 1.1965, "nll_loss": 0.299089640378952, "rewards/accuracies": 1.0, "rewards/chosen": -0.00019655900541692972, "rewards/margins": 0.20036110281944275, "rewards/rejected": -0.2005576640367508, "step": 5959 }, { "epoch": 4.121715076071922, "grad_norm": 13.061858177185059, "learning_rate": 3.26571384662671e-05, "log_odds_chosen": 8.520166397094727, "log_odds_ratio": -0.007341983262449503, "logits/chosen": -0.25020256638526917, "logits/rejected": -0.357946515083313, "logps/chosen": -0.017194848507642746, "logps/rejected": -1.7626746892929077, "loss": 1.01, "nll_loss": 0.25176340341567993, "rewards/accuracies": 1.0, "rewards/chosen": -0.0017194848041981459, "rewards/margins": 0.1745479702949524, "rewards/rejected": -0.17626747488975525, "step": 5960 }, { "epoch": 4.122406639004149, "grad_norm": 12.465418815612793, "learning_rate": 3.265329644997695e-05, "log_odds_chosen": 9.764495849609375, "log_odds_ratio": -0.00030658257310278714, "logits/chosen": -0.5246774554252625, "logits/rejected": -0.5476590991020203, "logps/chosen": -0.0007004796643741429, "logps/rejected": -1.784714698791504, "loss": 1.1105, "nll_loss": 0.2775907516479492, "rewards/accuracies": 1.0, "rewards/chosen": -7.004797225818038e-05, "rewards/margins": 0.1784014254808426, "rewards/rejected": -0.17847149074077606, "step": 5961 }, { "epoch": 4.123098201936376, "grad_norm": 6.7162556648254395, "learning_rate": 3.26494544336868e-05, "log_odds_chosen": 9.411355018615723, "log_odds_ratio": -0.008453583344817162, "logits/chosen": -0.456272155046463, "logits/rejected": -0.41793495416641235, "logps/chosen": -0.0042070625349879265, "logps/rejected": -1.43600594997406, "loss": 1.748, "nll_loss": 0.4361457824707031, "rewards/accuracies": 1.0, "rewards/chosen": -0.00042070625931955874, "rewards/margins": 0.14317987859249115, "rewards/rejected": -0.14360059797763824, "step": 5962 }, { "epoch": 4.123789764868603, "grad_norm": 13.096224784851074, "learning_rate": 3.264561241739665e-05, "log_odds_chosen": 8.157176971435547, "log_odds_ratio": -0.2519817650318146, "logits/chosen": -0.47622549533843994, "logits/rejected": -0.492038756608963, "logps/chosen": -0.04018167033791542, "logps/rejected": -1.1400073766708374, "loss": 1.2235, "nll_loss": 0.2806675136089325, "rewards/accuracies": 0.875, "rewards/chosen": -0.004018167033791542, "rewards/margins": 0.10998257249593735, "rewards/rejected": -0.11400073766708374, "step": 5963 }, { "epoch": 4.124481327800829, "grad_norm": 12.945378303527832, "learning_rate": 3.2641770401106504e-05, "log_odds_chosen": 8.599679946899414, "log_odds_ratio": -0.26467394828796387, "logits/chosen": -0.2246679961681366, "logits/rejected": -0.31600359082221985, "logps/chosen": -0.0332237184047699, "logps/rejected": -2.024475574493408, "loss": 0.8778, "nll_loss": 0.1929716169834137, "rewards/accuracies": 0.875, "rewards/chosen": -0.0033223717473447323, "rewards/margins": 0.19912518560886383, "rewards/rejected": -0.2024475634098053, "step": 5964 }, { "epoch": 4.125172890733056, "grad_norm": 5.979857444763184, "learning_rate": 3.263792838481635e-05, "log_odds_chosen": 10.021464347839355, "log_odds_ratio": -0.002926250221207738, "logits/chosen": -0.4512186050415039, "logits/rejected": -0.5685581564903259, "logps/chosen": -0.0035476628690958023, "logps/rejected": -1.887199878692627, "loss": 0.9868, "nll_loss": 0.2464103400707245, "rewards/accuracies": 1.0, "rewards/chosen": -0.00035476626362651587, "rewards/margins": 0.18836522102355957, "rewards/rejected": -0.1887200027704239, "step": 5965 }, { "epoch": 4.125864453665283, "grad_norm": 7.337460994720459, "learning_rate": 3.263408636852621e-05, "log_odds_chosen": 9.19233512878418, "log_odds_ratio": -0.0003316085785627365, "logits/chosen": -0.7004691958427429, "logits/rejected": -0.7161996960639954, "logps/chosen": -0.0005070206825621426, "logps/rejected": -1.1151583194732666, "loss": 1.3041, "nll_loss": 0.32599785923957825, "rewards/accuracies": 1.0, "rewards/chosen": -5.070206680102274e-05, "rewards/margins": 0.11146514117717743, "rewards/rejected": -0.11151584982872009, "step": 5966 }, { "epoch": 4.12655601659751, "grad_norm": 9.277113914489746, "learning_rate": 3.2630244352236054e-05, "log_odds_chosen": 9.497467041015625, "log_odds_ratio": -0.00043352588545531034, "logits/chosen": -0.5457701683044434, "logits/rejected": -0.6004650592803955, "logps/chosen": -0.001261774217709899, "logps/rejected": -1.639230489730835, "loss": 1.5775, "nll_loss": 0.3943275809288025, "rewards/accuracies": 1.0, "rewards/chosen": -0.0001261774159502238, "rewards/margins": 0.16379685699939728, "rewards/rejected": -0.16392304003238678, "step": 5967 }, { "epoch": 4.127247579529737, "grad_norm": 8.806751251220703, "learning_rate": 3.262640233594591e-05, "log_odds_chosen": 9.815351486206055, "log_odds_ratio": -0.00011921973782591522, "logits/chosen": -0.5170784592628479, "logits/rejected": -0.5860993266105652, "logps/chosen": -0.00905107706785202, "logps/rejected": -3.105226993560791, "loss": 1.6434, "nll_loss": 0.41084566712379456, "rewards/accuracies": 1.0, "rewards/chosen": -0.0009051076485775411, "rewards/margins": 0.3096176087856293, "rewards/rejected": -0.31052273511886597, "step": 5968 }, { "epoch": 4.127939142461964, "grad_norm": 10.547689437866211, "learning_rate": 3.262256031965576e-05, "log_odds_chosen": 10.503625869750977, "log_odds_ratio": -4.8341156798414886e-05, "logits/chosen": -0.7468377947807312, "logits/rejected": -0.8255564570426941, "logps/chosen": -0.0004737896961160004, "logps/rejected": -1.9888479709625244, "loss": 1.3947, "nll_loss": 0.34866786003112793, "rewards/accuracies": 1.0, "rewards/chosen": -4.737896961160004e-05, "rewards/margins": 0.19883739948272705, "rewards/rejected": -0.1988847851753235, "step": 5969 }, { "epoch": 4.12863070539419, "grad_norm": 8.894291877746582, "learning_rate": 3.261871830336561e-05, "log_odds_chosen": 8.193218231201172, "log_odds_ratio": -0.039914391934871674, "logits/chosen": -0.44322913885116577, "logits/rejected": -0.5547171831130981, "logps/chosen": -0.02041376568377018, "logps/rejected": -1.4175244569778442, "loss": 1.4973, "nll_loss": 0.3703390657901764, "rewards/accuracies": 1.0, "rewards/chosen": -0.002041376894339919, "rewards/margins": 0.13971105217933655, "rewards/rejected": -0.1417524367570877, "step": 5970 }, { "epoch": 4.129322268326418, "grad_norm": 8.237187385559082, "learning_rate": 3.261487628707546e-05, "log_odds_chosen": 11.154354095458984, "log_odds_ratio": -4.79549344163388e-05, "logits/chosen": -0.7316851019859314, "logits/rejected": -0.7923774123191833, "logps/chosen": -0.00024489694624207914, "logps/rejected": -2.307917594909668, "loss": 0.8898, "nll_loss": 0.2224467545747757, "rewards/accuracies": 1.0, "rewards/chosen": -2.4489694624207914e-05, "rewards/margins": 0.23076725006103516, "rewards/rejected": -0.23079174757003784, "step": 5971 }, { "epoch": 4.130013831258645, "grad_norm": 7.732690334320068, "learning_rate": 3.261103427078531e-05, "log_odds_chosen": 9.728826522827148, "log_odds_ratio": -0.00044432198046706617, "logits/chosen": -1.0977262258529663, "logits/rejected": -1.1038000583648682, "logps/chosen": -0.008239268325269222, "logps/rejected": -2.0336647033691406, "loss": 1.7029, "nll_loss": 0.425682008266449, "rewards/accuracies": 1.0, "rewards/chosen": -0.0008239267626777291, "rewards/margins": 0.2025425285100937, "rewards/rejected": -0.2033664733171463, "step": 5972 }, { "epoch": 4.130705394190872, "grad_norm": 11.485577583312988, "learning_rate": 3.260719225449516e-05, "log_odds_chosen": 8.480979919433594, "log_odds_ratio": -0.021923229098320007, "logits/chosen": -0.4793844223022461, "logits/rejected": -0.4833126366138458, "logps/chosen": -0.006414573173969984, "logps/rejected": -1.7797949314117432, "loss": 1.2513, "nll_loss": 0.310627281665802, "rewards/accuracies": 1.0, "rewards/chosen": -0.0006414573872461915, "rewards/margins": 0.17733803391456604, "rewards/rejected": -0.1779794991016388, "step": 5973 }, { "epoch": 4.131396957123099, "grad_norm": 10.626336097717285, "learning_rate": 3.260335023820501e-05, "log_odds_chosen": 9.561973571777344, "log_odds_ratio": -0.010487427935004234, "logits/chosen": -0.9466665983200073, "logits/rejected": -0.9845972061157227, "logps/chosen": -0.0037271876353770494, "logps/rejected": -1.9134477376937866, "loss": 1.0319, "nll_loss": 0.25691381096839905, "rewards/accuracies": 1.0, "rewards/chosen": -0.0003727187868207693, "rewards/margins": 0.19097205996513367, "rewards/rejected": -0.19134476780891418, "step": 5974 }, { "epoch": 4.1320885200553255, "grad_norm": 7.434225082397461, "learning_rate": 3.259950822191487e-05, "log_odds_chosen": 10.406455993652344, "log_odds_ratio": -0.0002542786533012986, "logits/chosen": -0.5065848231315613, "logits/rejected": -0.5438984632492065, "logps/chosen": -0.0002799753565341234, "logps/rejected": -2.0893630981445312, "loss": 1.3304, "nll_loss": 0.3325809836387634, "rewards/accuracies": 1.0, "rewards/chosen": -2.799753929139115e-05, "rewards/margins": 0.2089083343744278, "rewards/rejected": -0.20893631875514984, "step": 5975 }, { "epoch": 4.132780082987552, "grad_norm": 43.933433532714844, "learning_rate": 3.259566620562471e-05, "log_odds_chosen": 8.503948211669922, "log_odds_ratio": -0.059916265308856964, "logits/chosen": -0.5901418328285217, "logits/rejected": -0.6609665155410767, "logps/chosen": -0.14128419756889343, "logps/rejected": -2.0661349296569824, "loss": 1.0805, "nll_loss": 0.2641289234161377, "rewards/accuracies": 1.0, "rewards/chosen": -0.014128419570624828, "rewards/margins": 0.19248507916927338, "rewards/rejected": -0.20661349594593048, "step": 5976 }, { "epoch": 4.133471645919779, "grad_norm": 14.006975173950195, "learning_rate": 3.2591824189334565e-05, "log_odds_chosen": 9.184225082397461, "log_odds_ratio": -0.007922169752418995, "logits/chosen": -0.7215189337730408, "logits/rejected": -0.7678462266921997, "logps/chosen": -0.004983365070074797, "logps/rejected": -1.9281487464904785, "loss": 1.1481, "nll_loss": 0.2862243950366974, "rewards/accuracies": 1.0, "rewards/chosen": -0.0004983365070074797, "rewards/margins": 0.19231653213500977, "rewards/rejected": -0.1928148865699768, "step": 5977 }, { "epoch": 4.134163208852006, "grad_norm": 7.512627124786377, "learning_rate": 3.258798217304442e-05, "log_odds_chosen": 8.804177284240723, "log_odds_ratio": -0.0018154741264879704, "logits/chosen": -0.4251983165740967, "logits/rejected": -0.4081365466117859, "logps/chosen": -0.0031050737015902996, "logps/rejected": -1.4643402099609375, "loss": 1.2657, "nll_loss": 0.31624752283096313, "rewards/accuracies": 1.0, "rewards/chosen": -0.0003105073992628604, "rewards/margins": 0.1461235135793686, "rewards/rejected": -0.1464340090751648, "step": 5978 }, { "epoch": 4.134854771784233, "grad_norm": 8.988731384277344, "learning_rate": 3.258414015675427e-05, "log_odds_chosen": 8.137323379516602, "log_odds_ratio": -0.02129383198916912, "logits/chosen": -0.7253506779670715, "logits/rejected": -0.7172971367835999, "logps/chosen": -0.009386150166392326, "logps/rejected": -1.9563298225402832, "loss": 1.0706, "nll_loss": 0.26553285121917725, "rewards/accuracies": 1.0, "rewards/chosen": -0.0009386150049977005, "rewards/margins": 0.1946943700313568, "rewards/rejected": -0.19563297927379608, "step": 5979 }, { "epoch": 4.13554633471646, "grad_norm": 10.024322509765625, "learning_rate": 3.2580298140464116e-05, "log_odds_chosen": 10.15527057647705, "log_odds_ratio": -9.017070988193154e-05, "logits/chosen": -0.5249866843223572, "logits/rejected": -0.6138657927513123, "logps/chosen": -0.00031795757240615785, "logps/rejected": -1.9348928928375244, "loss": 1.443, "nll_loss": 0.3607480525970459, "rewards/accuracies": 1.0, "rewards/chosen": -3.179576015099883e-05, "rewards/margins": 0.1934574991464615, "rewards/rejected": -0.19348928332328796, "step": 5980 }, { "epoch": 4.136237897648686, "grad_norm": 9.696584701538086, "learning_rate": 3.257645612417397e-05, "log_odds_chosen": 8.965103149414062, "log_odds_ratio": -0.010557899251580238, "logits/chosen": -0.49012356996536255, "logits/rejected": -0.5335181951522827, "logps/chosen": -0.009183174930512905, "logps/rejected": -2.1040782928466797, "loss": 1.2164, "nll_loss": 0.30304914712905884, "rewards/accuracies": 1.0, "rewards/chosen": -0.0009183174697682261, "rewards/margins": 0.20948949456214905, "rewards/rejected": -0.2104078084230423, "step": 5981 }, { "epoch": 4.136929460580913, "grad_norm": 9.6136474609375, "learning_rate": 3.257261410788382e-05, "log_odds_chosen": 10.920928955078125, "log_odds_ratio": -4.304420144762844e-05, "logits/chosen": -0.7003531455993652, "logits/rejected": -0.7433496713638306, "logps/chosen": -0.00014741663471795619, "logps/rejected": -2.029831647872925, "loss": 0.8727, "nll_loss": 0.2181766927242279, "rewards/accuracies": 1.0, "rewards/chosen": -1.4741663107997738e-05, "rewards/margins": 0.20296841859817505, "rewards/rejected": -0.20298317074775696, "step": 5982 }, { "epoch": 4.13762102351314, "grad_norm": 6.659314155578613, "learning_rate": 3.2568772091593666e-05, "log_odds_chosen": 8.73141098022461, "log_odds_ratio": -0.0003280085220467299, "logits/chosen": -0.25254154205322266, "logits/rejected": -0.2507188320159912, "logps/chosen": -0.0036556655541062355, "logps/rejected": -1.4496879577636719, "loss": 1.2142, "nll_loss": 0.3035261034965515, "rewards/accuracies": 1.0, "rewards/chosen": -0.00036556654958985746, "rewards/margins": 0.14460323750972748, "rewards/rejected": -0.14496880769729614, "step": 5983 }, { "epoch": 4.138312586445367, "grad_norm": 9.571125984191895, "learning_rate": 3.2564930075303525e-05, "log_odds_chosen": 9.067570686340332, "log_odds_ratio": -0.0011896053329110146, "logits/chosen": -0.6112010478973389, "logits/rejected": -0.6654389500617981, "logps/chosen": -0.009722664020955563, "logps/rejected": -2.00520920753479, "loss": 1.3412, "nll_loss": 0.33517637848854065, "rewards/accuracies": 1.0, "rewards/chosen": -0.0009722664253786206, "rewards/margins": 0.1995486617088318, "rewards/rejected": -0.20052093267440796, "step": 5984 }, { "epoch": 4.139004149377594, "grad_norm": 5.55220365524292, "learning_rate": 3.256108805901337e-05, "log_odds_chosen": 8.475414276123047, "log_odds_ratio": -0.026588575914502144, "logits/chosen": -0.3219301104545593, "logits/rejected": -0.3104025721549988, "logps/chosen": -0.008884113281965256, "logps/rejected": -1.4837077856063843, "loss": 0.7646, "nll_loss": 0.18848538398742676, "rewards/accuracies": 1.0, "rewards/chosen": -0.0008884113049134612, "rewards/margins": 0.14748236536979675, "rewards/rejected": -0.14837077260017395, "step": 5985 }, { "epoch": 4.139695712309821, "grad_norm": 10.565631866455078, "learning_rate": 3.2557246042723224e-05, "log_odds_chosen": 10.836447715759277, "log_odds_ratio": -2.8640048185479827e-05, "logits/chosen": -0.3206827640533447, "logits/rejected": -0.4384026825428009, "logps/chosen": -0.00023758437600918114, "logps/rejected": -2.334228038787842, "loss": 1.1158, "nll_loss": 0.27893880009651184, "rewards/accuracies": 1.0, "rewards/chosen": -2.3758439056109637e-05, "rewards/margins": 0.23339903354644775, "rewards/rejected": -0.23342278599739075, "step": 5986 }, { "epoch": 4.140387275242047, "grad_norm": 6.649505138397217, "learning_rate": 3.2553404026433076e-05, "log_odds_chosen": 8.939205169677734, "log_odds_ratio": -0.0010217225644737482, "logits/chosen": -0.3970273435115814, "logits/rejected": -0.47130677103996277, "logps/chosen": -0.0017841707449406385, "logps/rejected": -1.7476348876953125, "loss": 1.4555, "nll_loss": 0.3637797236442566, "rewards/accuracies": 1.0, "rewards/chosen": -0.00017841707449406385, "rewards/margins": 0.17458505928516388, "rewards/rejected": -0.17476347088813782, "step": 5987 }, { "epoch": 4.141078838174274, "grad_norm": 7.781863689422607, "learning_rate": 3.254956201014293e-05, "log_odds_chosen": 9.778545379638672, "log_odds_ratio": -0.00030032815993763506, "logits/chosen": -0.5041961669921875, "logits/rejected": -0.49716609716415405, "logps/chosen": -0.00019634825002867728, "logps/rejected": -1.313286304473877, "loss": 1.7611, "nll_loss": 0.4402513802051544, "rewards/accuracies": 1.0, "rewards/chosen": -1.963482645805925e-05, "rewards/margins": 0.13130898773670197, "rewards/rejected": -0.13132862746715546, "step": 5988 }, { "epoch": 4.141770401106501, "grad_norm": 7.763805389404297, "learning_rate": 3.2545719993852774e-05, "log_odds_chosen": 11.122587203979492, "log_odds_ratio": -2.156953269150108e-05, "logits/chosen": -0.4305421710014343, "logits/rejected": -0.48841428756713867, "logps/chosen": -0.00044233925291337073, "logps/rejected": -2.886995315551758, "loss": 0.9509, "nll_loss": 0.23773378133773804, "rewards/accuracies": 1.0, "rewards/chosen": -4.4233926018932834e-05, "rewards/margins": 0.2886553108692169, "rewards/rejected": -0.28869953751564026, "step": 5989 }, { "epoch": 4.142461964038728, "grad_norm": 9.990039825439453, "learning_rate": 3.2541877977562627e-05, "log_odds_chosen": 9.962656021118164, "log_odds_ratio": -0.00012990219693165272, "logits/chosen": -0.8702992796897888, "logits/rejected": -0.8638290166854858, "logps/chosen": -0.00043330626795068383, "logps/rejected": -1.8089728355407715, "loss": 1.1995, "nll_loss": 0.2998722791671753, "rewards/accuracies": 1.0, "rewards/chosen": -4.333062679506838e-05, "rewards/margins": 0.1808539628982544, "rewards/rejected": -0.1808972954750061, "step": 5990 }, { "epoch": 4.143153526970955, "grad_norm": 8.116752624511719, "learning_rate": 3.253803596127248e-05, "log_odds_chosen": 8.72532844543457, "log_odds_ratio": -0.025739800184965134, "logits/chosen": -0.43284478783607483, "logits/rejected": -0.4666554927825928, "logps/chosen": -0.008339660242199898, "logps/rejected": -1.546666145324707, "loss": 0.9343, "nll_loss": 0.23101186752319336, "rewards/accuracies": 1.0, "rewards/chosen": -0.0008339660707861185, "rewards/margins": 0.15383264422416687, "rewards/rejected": -0.15466661751270294, "step": 5991 }, { "epoch": 4.143845089903182, "grad_norm": 9.78846263885498, "learning_rate": 3.2534193944982325e-05, "log_odds_chosen": 6.673300743103027, "log_odds_ratio": -0.09282121807336807, "logits/chosen": -0.6692243814468384, "logits/rejected": -0.7034744024276733, "logps/chosen": -0.033201564103364944, "logps/rejected": -1.3767527341842651, "loss": 1.9898, "nll_loss": 0.4881555140018463, "rewards/accuracies": 0.875, "rewards/chosen": -0.0033201563637703657, "rewards/margins": 0.13435512781143188, "rewards/rejected": -0.13767528533935547, "step": 5992 }, { "epoch": 4.144536652835408, "grad_norm": 13.220309257507324, "learning_rate": 3.253035192869218e-05, "log_odds_chosen": 10.948552131652832, "log_odds_ratio": -3.2378186006098986e-05, "logits/chosen": -0.5019373893737793, "logits/rejected": -0.564258337020874, "logps/chosen": -0.00020370143465697765, "logps/rejected": -2.3918418884277344, "loss": 1.1606, "nll_loss": 0.2901498079299927, "rewards/accuracies": 1.0, "rewards/chosen": -2.0370142010506243e-05, "rewards/margins": 0.2391638308763504, "rewards/rejected": -0.2391842007637024, "step": 5993 }, { "epoch": 4.145228215767635, "grad_norm": 13.55427360534668, "learning_rate": 3.252650991240203e-05, "log_odds_chosen": 8.909460067749023, "log_odds_ratio": -0.0011914423666894436, "logits/chosen": -0.8773664832115173, "logits/rejected": -0.8870227336883545, "logps/chosen": -0.005369079299271107, "logps/rejected": -1.924501657485962, "loss": 1.248, "nll_loss": 0.3118761479854584, "rewards/accuracies": 1.0, "rewards/chosen": -0.0005369078717194498, "rewards/margins": 0.19191326200962067, "rewards/rejected": -0.1924501657485962, "step": 5994 }, { "epoch": 4.145919778699862, "grad_norm": 12.313704490661621, "learning_rate": 3.252266789611188e-05, "log_odds_chosen": 10.855825424194336, "log_odds_ratio": -3.5704721085494384e-05, "logits/chosen": -0.8360105156898499, "logits/rejected": -0.8496856689453125, "logps/chosen": -0.0002516081731300801, "logps/rejected": -2.481499671936035, "loss": 1.0471, "nll_loss": 0.2617621421813965, "rewards/accuracies": 1.0, "rewards/chosen": -2.5160818040603772e-05, "rewards/margins": 0.24812480807304382, "rewards/rejected": -0.24814999103546143, "step": 5995 }, { "epoch": 4.146611341632089, "grad_norm": 8.56289291381836, "learning_rate": 3.251882587982173e-05, "log_odds_chosen": 9.165348052978516, "log_odds_ratio": -0.001278581446968019, "logits/chosen": -0.679673433303833, "logits/rejected": -0.7639566659927368, "logps/chosen": -0.006403455510735512, "logps/rejected": -1.2529271841049194, "loss": 1.136, "nll_loss": 0.28387773036956787, "rewards/accuracies": 1.0, "rewards/chosen": -0.0006403456209227443, "rewards/margins": 0.12465237081050873, "rewards/rejected": -0.12529271841049194, "step": 5996 }, { "epoch": 4.147302904564316, "grad_norm": 7.371829032897949, "learning_rate": 3.251498386353159e-05, "log_odds_chosen": 9.111982345581055, "log_odds_ratio": -0.0002587471390143037, "logits/chosen": -0.8423949480056763, "logits/rejected": -0.976094663143158, "logps/chosen": -0.01260296069085598, "logps/rejected": -2.5366899967193604, "loss": 1.7986, "nll_loss": 0.4496348798274994, "rewards/accuracies": 1.0, "rewards/chosen": -0.0012602962087839842, "rewards/margins": 0.2524087131023407, "rewards/rejected": -0.25366899371147156, "step": 5997 }, { "epoch": 4.1479944674965425, "grad_norm": 7.737312316894531, "learning_rate": 3.251114184724143e-05, "log_odds_chosen": 10.224498748779297, "log_odds_ratio": -4.952462040819228e-05, "logits/chosen": -0.49761393666267395, "logits/rejected": -0.5714750289916992, "logps/chosen": -0.000272135715931654, "logps/rejected": -1.5678317546844482, "loss": 1.0023, "nll_loss": 0.2505626082420349, "rewards/accuracies": 1.0, "rewards/chosen": -2.721357304835692e-05, "rewards/margins": 0.1567559540271759, "rewards/rejected": -0.15678316354751587, "step": 5998 }, { "epoch": 4.148686030428769, "grad_norm": 6.962085723876953, "learning_rate": 3.2507299830951285e-05, "log_odds_chosen": 8.856086730957031, "log_odds_ratio": -0.003185291076079011, "logits/chosen": -0.8304173946380615, "logits/rejected": -0.8658832311630249, "logps/chosen": -0.013688081875443459, "logps/rejected": -1.5797288417816162, "loss": 1.4232, "nll_loss": 0.35549092292785645, "rewards/accuracies": 1.0, "rewards/chosen": -0.0013688082108274102, "rewards/margins": 0.15660406649112701, "rewards/rejected": -0.15797287225723267, "step": 5999 }, { "epoch": 4.149377593360996, "grad_norm": 8.609755516052246, "learning_rate": 3.250345781466114e-05, "log_odds_chosen": 8.917791366577148, "log_odds_ratio": -0.003855043789371848, "logits/chosen": -0.7871185541152954, "logits/rejected": -0.8323012590408325, "logps/chosen": -0.005348569247871637, "logps/rejected": -1.6735996007919312, "loss": 1.0782, "nll_loss": 0.26915350556373596, "rewards/accuracies": 1.0, "rewards/chosen": -0.0005348569247871637, "rewards/margins": 0.1668251007795334, "rewards/rejected": -0.16735996305942535, "step": 6000 }, { "epoch": 4.150069156293223, "grad_norm": 9.403129577636719, "learning_rate": 3.249961579837098e-05, "log_odds_chosen": 10.51172161102295, "log_odds_ratio": -5.671913459082134e-05, "logits/chosen": -0.5075657367706299, "logits/rejected": -0.5350244045257568, "logps/chosen": -0.0002172726672142744, "logps/rejected": -2.0582528114318848, "loss": 0.8376, "nll_loss": 0.20939427614212036, "rewards/accuracies": 1.0, "rewards/chosen": -2.1727268176618963e-05, "rewards/margins": 0.20580355823040009, "rewards/rejected": -0.20582528412342072, "step": 6001 }, { "epoch": 4.15076071922545, "grad_norm": 17.984451293945312, "learning_rate": 3.2495773782080836e-05, "log_odds_chosen": 8.329596519470215, "log_odds_ratio": -0.19380486011505127, "logits/chosen": -0.691654622554779, "logits/rejected": -0.7160911560058594, "logps/chosen": -0.026909837499260902, "logps/rejected": -1.85858154296875, "loss": 1.4193, "nll_loss": 0.33545446395874023, "rewards/accuracies": 0.875, "rewards/chosen": -0.0026909837033599615, "rewards/margins": 0.18316717445850372, "rewards/rejected": -0.18585816025733948, "step": 6002 }, { "epoch": 4.151452282157677, "grad_norm": 13.927128791809082, "learning_rate": 3.249193176579069e-05, "log_odds_chosen": 9.670804023742676, "log_odds_ratio": -0.0007266352185979486, "logits/chosen": -0.5136786699295044, "logits/rejected": -0.5342156291007996, "logps/chosen": -0.0009479423752054572, "logps/rejected": -2.0707037448883057, "loss": 1.089, "nll_loss": 0.2721821069717407, "rewards/accuracies": 1.0, "rewards/chosen": -9.479424625169486e-05, "rewards/margins": 0.20697560906410217, "rewards/rejected": -0.20707038044929504, "step": 6003 }, { "epoch": 4.1521438450899035, "grad_norm": 10.575544357299805, "learning_rate": 3.248808974950054e-05, "log_odds_chosen": 10.414182662963867, "log_odds_ratio": -0.00011947475286433473, "logits/chosen": -0.3136584162712097, "logits/rejected": -0.35366952419281006, "logps/chosen": -0.004044681787490845, "logps/rejected": -2.415982961654663, "loss": 1.3192, "nll_loss": 0.3297957181930542, "rewards/accuracies": 1.0, "rewards/chosen": -0.00040446818456985056, "rewards/margins": 0.24119384586811066, "rewards/rejected": -0.24159829318523407, "step": 6004 }, { "epoch": 4.15283540802213, "grad_norm": 8.583888053894043, "learning_rate": 3.2484247733210386e-05, "log_odds_chosen": 10.239829063415527, "log_odds_ratio": -0.0005477681988850236, "logits/chosen": -0.6634764671325684, "logits/rejected": -0.7279617786407471, "logps/chosen": -0.008838672190904617, "logps/rejected": -2.911771297454834, "loss": 0.7103, "nll_loss": 0.17753028869628906, "rewards/accuracies": 1.0, "rewards/chosen": -0.0008838672074489295, "rewards/margins": 0.29029324650764465, "rewards/rejected": -0.2911771535873413, "step": 6005 }, { "epoch": 4.153526970954357, "grad_norm": 14.97877311706543, "learning_rate": 3.2480405716920245e-05, "log_odds_chosen": 10.376019477844238, "log_odds_ratio": -0.002244369825348258, "logits/chosen": -0.4436874985694885, "logits/rejected": -0.4845767021179199, "logps/chosen": -0.0003641161310952157, "logps/rejected": -2.0352377891540527, "loss": 1.0946, "nll_loss": 0.27343636751174927, "rewards/accuracies": 1.0, "rewards/chosen": -3.641161310952157e-05, "rewards/margins": 0.203487366437912, "rewards/rejected": -0.20352376997470856, "step": 6006 }, { "epoch": 4.154218533886584, "grad_norm": 7.991260051727295, "learning_rate": 3.247656370063009e-05, "log_odds_chosen": 10.304679870605469, "log_odds_ratio": -0.00010443619976285845, "logits/chosen": -0.498263418674469, "logits/rejected": -0.5627723336219788, "logps/chosen": -0.0007239045226015151, "logps/rejected": -1.9947481155395508, "loss": 0.8668, "nll_loss": 0.21668842434883118, "rewards/accuracies": 1.0, "rewards/chosen": -7.23904522601515e-05, "rewards/margins": 0.19940242171287537, "rewards/rejected": -0.19947482645511627, "step": 6007 }, { "epoch": 4.154910096818811, "grad_norm": 11.769681930541992, "learning_rate": 3.2472721684339943e-05, "log_odds_chosen": 9.573690414428711, "log_odds_ratio": -0.0007894306909292936, "logits/chosen": -0.8522692322731018, "logits/rejected": -0.971172034740448, "logps/chosen": -0.002234040992334485, "logps/rejected": -1.9229815006256104, "loss": 2.2819, "nll_loss": 0.5703853964805603, "rewards/accuracies": 1.0, "rewards/chosen": -0.00022340410214383155, "rewards/margins": 0.1920747309923172, "rewards/rejected": -0.19229814410209656, "step": 6008 }, { "epoch": 4.155601659751038, "grad_norm": 7.188262939453125, "learning_rate": 3.2468879668049796e-05, "log_odds_chosen": 8.566203117370605, "log_odds_ratio": -0.0005539363482967019, "logits/chosen": -0.6005150675773621, "logits/rejected": -0.7028006315231323, "logps/chosen": -0.0007507450645789504, "logps/rejected": -1.3089596033096313, "loss": 0.7489, "nll_loss": 0.18715739250183105, "rewards/accuracies": 1.0, "rewards/chosen": -7.507450936827809e-05, "rewards/margins": 0.1308208703994751, "rewards/rejected": -0.1308959573507309, "step": 6009 }, { "epoch": 4.1562932226832645, "grad_norm": 10.46820068359375, "learning_rate": 3.246503765175964e-05, "log_odds_chosen": 9.431066513061523, "log_odds_ratio": -0.0006439237622544169, "logits/chosen": -0.2439686357975006, "logits/rejected": -0.3215882182121277, "logps/chosen": -0.0037022149190306664, "logps/rejected": -1.7961797714233398, "loss": 1.076, "nll_loss": 0.26894134283065796, "rewards/accuracies": 1.0, "rewards/chosen": -0.0003702215035445988, "rewards/margins": 0.17924776673316956, "rewards/rejected": -0.1796179711818695, "step": 6010 }, { "epoch": 4.156984785615491, "grad_norm": 8.864563941955566, "learning_rate": 3.2461195635469494e-05, "log_odds_chosen": 9.370769500732422, "log_odds_ratio": -0.0011887021828442812, "logits/chosen": -0.9394341707229614, "logits/rejected": -1.0180696249008179, "logps/chosen": -0.0038085738196969032, "logps/rejected": -2.3024024963378906, "loss": 1.4105, "nll_loss": 0.35250598192214966, "rewards/accuracies": 1.0, "rewards/chosen": -0.0003808573819696903, "rewards/margins": 0.22985941171646118, "rewards/rejected": -0.23024027049541473, "step": 6011 }, { "epoch": 4.157676348547718, "grad_norm": 6.366844177246094, "learning_rate": 3.2457353619179346e-05, "log_odds_chosen": 9.5271635055542, "log_odds_ratio": -0.0001840710174292326, "logits/chosen": -0.537406325340271, "logits/rejected": -0.5141395330429077, "logps/chosen": -0.0017021159874275327, "logps/rejected": -2.4246888160705566, "loss": 0.9852, "nll_loss": 0.24628528952598572, "rewards/accuracies": 1.0, "rewards/chosen": -0.0001702116132946685, "rewards/margins": 0.2422986775636673, "rewards/rejected": -0.24246887862682343, "step": 6012 }, { "epoch": 4.158367911479945, "grad_norm": 16.682363510131836, "learning_rate": 3.24535116028892e-05, "log_odds_chosen": 10.383573532104492, "log_odds_ratio": -8.243302727350965e-05, "logits/chosen": -0.6444257497787476, "logits/rejected": -0.6103654503822327, "logps/chosen": -0.0012489922810345888, "logps/rejected": -2.256441593170166, "loss": 0.7474, "nll_loss": 0.18683573603630066, "rewards/accuracies": 1.0, "rewards/chosen": -0.00012489923392422497, "rewards/margins": 0.22551925480365753, "rewards/rejected": -0.22564415633678436, "step": 6013 }, { "epoch": 4.159059474412172, "grad_norm": 11.687685012817383, "learning_rate": 3.2449669586599045e-05, "log_odds_chosen": 10.128734588623047, "log_odds_ratio": -0.0001414440048392862, "logits/chosen": -0.9767643809318542, "logits/rejected": -1.0264098644256592, "logps/chosen": -0.0003028717765118927, "logps/rejected": -1.7673587799072266, "loss": 1.4607, "nll_loss": 0.3651718199253082, "rewards/accuracies": 1.0, "rewards/chosen": -3.0287175832199864e-05, "rewards/margins": 0.17670559883117676, "rewards/rejected": -0.17673589289188385, "step": 6014 }, { "epoch": 4.159751037344399, "grad_norm": 8.603378295898438, "learning_rate": 3.2445827570308904e-05, "log_odds_chosen": 10.377304077148438, "log_odds_ratio": -8.300002082251012e-05, "logits/chosen": -0.7923702001571655, "logits/rejected": -0.8099848031997681, "logps/chosen": -0.0005034460918977857, "logps/rejected": -2.1220457553863525, "loss": 0.7945, "nll_loss": 0.19862057268619537, "rewards/accuracies": 1.0, "rewards/chosen": -5.0344613555353135e-05, "rewards/margins": 0.21215423941612244, "rewards/rejected": -0.21220457553863525, "step": 6015 }, { "epoch": 4.1604426002766255, "grad_norm": 8.598065376281738, "learning_rate": 3.244198555401875e-05, "log_odds_chosen": 10.105928421020508, "log_odds_ratio": -8.371302101295441e-05, "logits/chosen": -0.9319807887077332, "logits/rejected": -0.9607030749320984, "logps/chosen": -0.0002785869291983545, "logps/rejected": -1.7898892164230347, "loss": 1.0916, "nll_loss": 0.2728910744190216, "rewards/accuracies": 1.0, "rewards/chosen": -2.7858695830218494e-05, "rewards/margins": 0.17896106839179993, "rewards/rejected": -0.17898890376091003, "step": 6016 }, { "epoch": 4.161134163208852, "grad_norm": 8.079885482788086, "learning_rate": 3.24381435377286e-05, "log_odds_chosen": 10.28216552734375, "log_odds_ratio": -0.00011916200310224667, "logits/chosen": -0.5735573768615723, "logits/rejected": -0.6151795983314514, "logps/chosen": -0.00511480076238513, "logps/rejected": -2.6346943378448486, "loss": 1.3447, "nll_loss": 0.33617204427719116, "rewards/accuracies": 1.0, "rewards/chosen": -0.0005114800878800452, "rewards/margins": 0.26295799016952515, "rewards/rejected": -0.2634694576263428, "step": 6017 }, { "epoch": 4.161825726141079, "grad_norm": 6.784422874450684, "learning_rate": 3.2434301521438454e-05, "log_odds_chosen": 9.672140121459961, "log_odds_ratio": -0.12526734173297882, "logits/chosen": -0.12474574148654938, "logits/rejected": -0.20955440402030945, "logps/chosen": -0.019273709505796432, "logps/rejected": -1.6844278573989868, "loss": 1.4583, "nll_loss": 0.35204043984413147, "rewards/accuracies": 0.875, "rewards/chosen": -0.0019273711368441582, "rewards/margins": 0.16651540994644165, "rewards/rejected": -0.16844278573989868, "step": 6018 }, { "epoch": 4.162517289073306, "grad_norm": 7.922595500946045, "learning_rate": 3.24304595051483e-05, "log_odds_chosen": 7.988283157348633, "log_odds_ratio": -0.019988220185041428, "logits/chosen": -0.6160153150558472, "logits/rejected": -0.6896790862083435, "logps/chosen": -0.014414435252547264, "logps/rejected": -1.2084660530090332, "loss": 1.2632, "nll_loss": 0.31379175186157227, "rewards/accuracies": 1.0, "rewards/chosen": -0.0014414435718208551, "rewards/margins": 0.11940516531467438, "rewards/rejected": -0.12084661424160004, "step": 6019 }, { "epoch": 4.163208852005533, "grad_norm": 6.944870948791504, "learning_rate": 3.242661748885815e-05, "log_odds_chosen": 9.153783798217773, "log_odds_ratio": -0.005235993769019842, "logits/chosen": -0.6062872409820557, "logits/rejected": -0.6818249821662903, "logps/chosen": -0.00888950563967228, "logps/rejected": -1.3861886262893677, "loss": 1.1206, "nll_loss": 0.27963629364967346, "rewards/accuracies": 1.0, "rewards/chosen": -0.0008889504824765027, "rewards/margins": 0.13772991299629211, "rewards/rejected": -0.1386188566684723, "step": 6020 }, { "epoch": 4.16390041493776, "grad_norm": 8.618224143981934, "learning_rate": 3.2422775472568005e-05, "log_odds_chosen": 8.96036148071289, "log_odds_ratio": -0.002292029093950987, "logits/chosen": -0.6807083487510681, "logits/rejected": -0.7891625165939331, "logps/chosen": -0.035568371415138245, "logps/rejected": -2.684354066848755, "loss": 0.9246, "nll_loss": 0.23092928528785706, "rewards/accuracies": 1.0, "rewards/chosen": -0.0035568373277783394, "rewards/margins": 0.26487860083580017, "rewards/rejected": -0.26843541860580444, "step": 6021 }, { "epoch": 4.1645919778699865, "grad_norm": 8.75467300415039, "learning_rate": 3.241893345627786e-05, "log_odds_chosen": 10.150123596191406, "log_odds_ratio": -0.00011984707089141011, "logits/chosen": -0.8578680157661438, "logits/rejected": -0.9064348936080933, "logps/chosen": -0.00032188548357225955, "logps/rejected": -1.9927043914794922, "loss": 1.2899, "nll_loss": 0.32247376441955566, "rewards/accuracies": 1.0, "rewards/chosen": -3.2188549084821716e-05, "rewards/margins": 0.19923825562000275, "rewards/rejected": -0.19927042722702026, "step": 6022 }, { "epoch": 4.165283540802213, "grad_norm": 8.196616172790527, "learning_rate": 3.24150914399877e-05, "log_odds_chosen": 9.288247108459473, "log_odds_ratio": -0.00015818187966942787, "logits/chosen": -0.5348951816558838, "logits/rejected": -0.49097248911857605, "logps/chosen": -0.0005929345497861505, "logps/rejected": -1.3997464179992676, "loss": 1.2029, "nll_loss": 0.3007069528102875, "rewards/accuracies": 1.0, "rewards/chosen": -5.9293452068232e-05, "rewards/margins": 0.1399153620004654, "rewards/rejected": -0.1399746537208557, "step": 6023 }, { "epoch": 4.16597510373444, "grad_norm": 11.910942077636719, "learning_rate": 3.241124942369756e-05, "log_odds_chosen": 10.133342742919922, "log_odds_ratio": -0.00024989733356051147, "logits/chosen": -0.22623278200626373, "logits/rejected": -0.28069326281547546, "logps/chosen": -0.0007148812874220312, "logps/rejected": -1.95946204662323, "loss": 1.1296, "nll_loss": 0.28237199783325195, "rewards/accuracies": 1.0, "rewards/chosen": -7.148813165258616e-05, "rewards/margins": 0.19587473571300507, "rewards/rejected": -0.19594621658325195, "step": 6024 }, { "epoch": 4.166666666666667, "grad_norm": 9.333582878112793, "learning_rate": 3.240740740740741e-05, "log_odds_chosen": 9.219890594482422, "log_odds_ratio": -0.0006107841618359089, "logits/chosen": -0.5428364872932434, "logits/rejected": -0.5398375988006592, "logps/chosen": -0.001833610818721354, "logps/rejected": -1.6555602550506592, "loss": 1.1237, "nll_loss": 0.28086787462234497, "rewards/accuracies": 1.0, "rewards/chosen": -0.00018336107314098626, "rewards/margins": 0.16537266969680786, "rewards/rejected": -0.16555601358413696, "step": 6025 }, { "epoch": 4.167358229598894, "grad_norm": 7.693150997161865, "learning_rate": 3.240356539111726e-05, "log_odds_chosen": 10.226139068603516, "log_odds_ratio": -9.946373756974936e-05, "logits/chosen": -0.5391544699668884, "logits/rejected": -0.47298184037208557, "logps/chosen": -0.00015591408009640872, "logps/rejected": -1.5290985107421875, "loss": 1.9471, "nll_loss": 0.486769437789917, "rewards/accuracies": 1.0, "rewards/chosen": -1.5591409464832395e-05, "rewards/margins": 0.1528942734003067, "rewards/rejected": -0.15290986001491547, "step": 6026 }, { "epoch": 4.168049792531121, "grad_norm": 9.954532623291016, "learning_rate": 3.239972337482711e-05, "log_odds_chosen": 9.627957344055176, "log_odds_ratio": -0.0001280440337723121, "logits/chosen": -0.47433945536613464, "logits/rejected": -0.519112229347229, "logps/chosen": -0.0002949607733171433, "logps/rejected": -1.4962272644042969, "loss": 0.9205, "nll_loss": 0.23010030388832092, "rewards/accuracies": 1.0, "rewards/chosen": -2.9496077331714332e-05, "rewards/margins": 0.14959323406219482, "rewards/rejected": -0.14962273836135864, "step": 6027 }, { "epoch": 4.1687413554633475, "grad_norm": 7.624865531921387, "learning_rate": 3.239588135853696e-05, "log_odds_chosen": 9.83966064453125, "log_odds_ratio": -0.0001383407216053456, "logits/chosen": -0.6526281237602234, "logits/rejected": -0.7059519290924072, "logps/chosen": -0.0007061379146762192, "logps/rejected": -1.6809070110321045, "loss": 1.1578, "nll_loss": 0.2894425392150879, "rewards/accuracies": 1.0, "rewards/chosen": -7.061379437800497e-05, "rewards/margins": 0.168020099401474, "rewards/rejected": -0.16809071600437164, "step": 6028 }, { "epoch": 4.169432918395574, "grad_norm": 12.61556339263916, "learning_rate": 3.239203934224681e-05, "log_odds_chosen": 8.602940559387207, "log_odds_ratio": -0.0024029456544667482, "logits/chosen": -0.789209246635437, "logits/rejected": -0.879492461681366, "logps/chosen": -0.04383961856365204, "logps/rejected": -1.9742767810821533, "loss": 1.3313, "nll_loss": 0.3325907588005066, "rewards/accuracies": 1.0, "rewards/chosen": -0.004383962135761976, "rewards/margins": 0.19304370880126953, "rewards/rejected": -0.1974276900291443, "step": 6029 }, { "epoch": 4.170124481327801, "grad_norm": 11.348913192749023, "learning_rate": 3.238819732595666e-05, "log_odds_chosen": 10.986472129821777, "log_odds_ratio": -0.00034344103187322617, "logits/chosen": -0.9471420049667358, "logits/rejected": -0.9723103046417236, "logps/chosen": -0.012529644183814526, "logps/rejected": -2.8672900199890137, "loss": 1.6722, "nll_loss": 0.41800376772880554, "rewards/accuracies": 1.0, "rewards/chosen": -0.0012529646046459675, "rewards/margins": 0.28547605872154236, "rewards/rejected": -0.28672903776168823, "step": 6030 }, { "epoch": 4.170816044260028, "grad_norm": 11.74988079071045, "learning_rate": 3.2384355309666516e-05, "log_odds_chosen": 10.14430046081543, "log_odds_ratio": -6.812495121266693e-05, "logits/chosen": -0.7544984817504883, "logits/rejected": -0.7648979425430298, "logps/chosen": -0.0002719055919442326, "logps/rejected": -1.9476408958435059, "loss": 1.1991, "nll_loss": 0.29976290464401245, "rewards/accuracies": 1.0, "rewards/chosen": -2.7190561013412662e-05, "rewards/margins": 0.19473689794540405, "rewards/rejected": -0.19476407766342163, "step": 6031 }, { "epoch": 4.171507607192255, "grad_norm": 7.633362770080566, "learning_rate": 3.238051329337636e-05, "log_odds_chosen": 10.356497764587402, "log_odds_ratio": -0.0013323475141078234, "logits/chosen": -0.45199599862098694, "logits/rejected": -0.5082833766937256, "logps/chosen": -0.0002783353556878865, "logps/rejected": -1.9102857112884521, "loss": 1.0295, "nll_loss": 0.25723761320114136, "rewards/accuracies": 1.0, "rewards/chosen": -2.7833535568788648e-05, "rewards/margins": 0.19100074470043182, "rewards/rejected": -0.19102856516838074, "step": 6032 }, { "epoch": 4.172199170124482, "grad_norm": 19.848237991333008, "learning_rate": 3.237667127708622e-05, "log_odds_chosen": 9.494232177734375, "log_odds_ratio": -0.0001813523704186082, "logits/chosen": -0.8049036264419556, "logits/rejected": -0.9353877902030945, "logps/chosen": -0.0007863644859753549, "logps/rejected": -1.993652105331421, "loss": 1.5769, "nll_loss": 0.3941996991634369, "rewards/accuracies": 1.0, "rewards/chosen": -7.863643986638635e-05, "rewards/margins": 0.1992865949869156, "rewards/rejected": -0.19936522841453552, "step": 6033 }, { "epoch": 4.172890733056708, "grad_norm": 11.86107349395752, "learning_rate": 3.2372829260796066e-05, "log_odds_chosen": 10.998078346252441, "log_odds_ratio": -2.3432841771864332e-05, "logits/chosen": -0.5509462952613831, "logits/rejected": -0.6317671537399292, "logps/chosen": -0.00016442319611087441, "logps/rejected": -2.290289878845215, "loss": 1.177, "nll_loss": 0.29424944519996643, "rewards/accuracies": 1.0, "rewards/chosen": -1.6442319974885322e-05, "rewards/margins": 0.22901256382465363, "rewards/rejected": -0.22902899980545044, "step": 6034 }, { "epoch": 4.173582295988935, "grad_norm": 7.371344089508057, "learning_rate": 3.236898724450592e-05, "log_odds_chosen": 10.621955871582031, "log_odds_ratio": -0.0003117120068054646, "logits/chosen": -0.45636072754859924, "logits/rejected": -0.5469639897346497, "logps/chosen": -0.00021510719670914114, "logps/rejected": -2.3043928146362305, "loss": 1.0254, "nll_loss": 0.25631406903266907, "rewards/accuracies": 1.0, "rewards/chosen": -2.1510721126105636e-05, "rewards/margins": 0.23041778802871704, "rewards/rejected": -0.23043929040431976, "step": 6035 }, { "epoch": 4.174273858921162, "grad_norm": 6.2735419273376465, "learning_rate": 3.236514522821577e-05, "log_odds_chosen": 9.205724716186523, "log_odds_ratio": -0.004912311211228371, "logits/chosen": -0.6135444641113281, "logits/rejected": -0.5729051828384399, "logps/chosen": -0.027633341029286385, "logps/rejected": -1.7607975006103516, "loss": 0.9219, "nll_loss": 0.22998502850532532, "rewards/accuracies": 1.0, "rewards/chosen": -0.002763334196060896, "rewards/margins": 0.17331641912460327, "rewards/rejected": -0.17607976496219635, "step": 6036 }, { "epoch": 4.174965421853389, "grad_norm": 6.859205722808838, "learning_rate": 3.236130321192562e-05, "log_odds_chosen": 7.7524919509887695, "log_odds_ratio": -0.002604874549433589, "logits/chosen": -0.7248696684837341, "logits/rejected": -0.8061745762825012, "logps/chosen": -0.0017519703833386302, "logps/rejected": -1.1978111267089844, "loss": 1.1287, "nll_loss": 0.28192347288131714, "rewards/accuracies": 1.0, "rewards/chosen": -0.00017519704124424607, "rewards/margins": 0.1196059063076973, "rewards/rejected": -0.11978110671043396, "step": 6037 }, { "epoch": 4.175656984785616, "grad_norm": 8.528666496276855, "learning_rate": 3.235746119563547e-05, "log_odds_chosen": 9.001119613647461, "log_odds_ratio": -0.005455498117953539, "logits/chosen": -0.4776851534843445, "logits/rejected": -0.5432164669036865, "logps/chosen": -0.005602252669632435, "logps/rejected": -1.5672550201416016, "loss": 1.2448, "nll_loss": 0.3106645941734314, "rewards/accuracies": 1.0, "rewards/chosen": -0.0005602252203971148, "rewards/margins": 0.15616527199745178, "rewards/rejected": -0.15672549605369568, "step": 6038 }, { "epoch": 4.176348547717843, "grad_norm": 8.460310935974121, "learning_rate": 3.235361917934532e-05, "log_odds_chosen": 10.947973251342773, "log_odds_ratio": -2.6163972506765276e-05, "logits/chosen": -0.5510140061378479, "logits/rejected": -0.45879802107810974, "logps/chosen": -0.002526005730032921, "logps/rejected": -2.913262128829956, "loss": 1.3312, "nll_loss": 0.33279237151145935, "rewards/accuracies": 1.0, "rewards/chosen": -0.00025260058464482427, "rewards/margins": 0.29107362031936646, "rewards/rejected": -0.29132622480392456, "step": 6039 }, { "epoch": 4.177040110650069, "grad_norm": 6.418223857879639, "learning_rate": 3.2349777163055174e-05, "log_odds_chosen": 8.813223838806152, "log_odds_ratio": -0.0008950755000114441, "logits/chosen": -0.4318494498729706, "logits/rejected": -0.4654315114021301, "logps/chosen": -0.009978784248232841, "logps/rejected": -2.0951032638549805, "loss": 1.0086, "nll_loss": 0.252058744430542, "rewards/accuracies": 1.0, "rewards/chosen": -0.0009978783782571554, "rewards/margins": 0.20851242542266846, "rewards/rejected": -0.20951034128665924, "step": 6040 }, { "epoch": 4.177731673582296, "grad_norm": 13.69494342803955, "learning_rate": 3.234593514676502e-05, "log_odds_chosen": 8.5537748336792, "log_odds_ratio": -0.04404618591070175, "logits/chosen": -0.49824628233909607, "logits/rejected": -0.5926130414009094, "logps/chosen": -0.011127600446343422, "logps/rejected": -1.970362901687622, "loss": 1.3995, "nll_loss": 0.3454715609550476, "rewards/accuracies": 1.0, "rewards/chosen": -0.0011127600446343422, "rewards/margins": 0.19592353701591492, "rewards/rejected": -0.19703629612922668, "step": 6041 }, { "epoch": 4.178423236514523, "grad_norm": 14.875024795532227, "learning_rate": 3.234209313047488e-05, "log_odds_chosen": 9.159958839416504, "log_odds_ratio": -0.011396014131605625, "logits/chosen": -0.36483901739120483, "logits/rejected": -0.4657962918281555, "logps/chosen": -0.012494352646172047, "logps/rejected": -2.1094937324523926, "loss": 1.254, "nll_loss": 0.312368243932724, "rewards/accuracies": 1.0, "rewards/chosen": -0.0012494352413341403, "rewards/margins": 0.20969991385936737, "rewards/rejected": -0.2109493613243103, "step": 6042 }, { "epoch": 4.17911479944675, "grad_norm": 10.145478248596191, "learning_rate": 3.2338251114184725e-05, "log_odds_chosen": 10.829462051391602, "log_odds_ratio": -4.55024819530081e-05, "logits/chosen": -0.6555365324020386, "logits/rejected": -0.7552803754806519, "logps/chosen": -0.0007520442013628781, "logps/rejected": -2.7278504371643066, "loss": 1.8365, "nll_loss": 0.4591206908226013, "rewards/accuracies": 1.0, "rewards/chosen": -7.520441431552172e-05, "rewards/margins": 0.27270984649658203, "rewards/rejected": -0.2727850377559662, "step": 6043 }, { "epoch": 4.179806362378977, "grad_norm": 6.937947750091553, "learning_rate": 3.233440909789458e-05, "log_odds_chosen": 10.597908020019531, "log_odds_ratio": -6.170808774186298e-05, "logits/chosen": -0.6114860773086548, "logits/rejected": -0.5894403457641602, "logps/chosen": -0.004680998623371124, "logps/rejected": -2.3617563247680664, "loss": 1.0449, "nll_loss": 0.2612136900424957, "rewards/accuracies": 1.0, "rewards/chosen": -0.00046809983905404806, "rewards/margins": 0.23570753633975983, "rewards/rejected": -0.23617564141750336, "step": 6044 }, { "epoch": 4.180497925311204, "grad_norm": 7.551707744598389, "learning_rate": 3.233056708160443e-05, "log_odds_chosen": 10.2476806640625, "log_odds_ratio": -0.0013617995427921414, "logits/chosen": -0.7141506671905518, "logits/rejected": -0.7077866196632385, "logps/chosen": -0.0011035851202905178, "logps/rejected": -2.494922637939453, "loss": 1.4364, "nll_loss": 0.358967125415802, "rewards/accuracies": 1.0, "rewards/chosen": -0.00011035851639462635, "rewards/margins": 0.2493819147348404, "rewards/rejected": -0.24949225783348083, "step": 6045 }, { "epoch": 4.18118948824343, "grad_norm": 9.954728126525879, "learning_rate": 3.2326725065314275e-05, "log_odds_chosen": 9.69566535949707, "log_odds_ratio": -0.008394381031394005, "logits/chosen": -0.4188861846923828, "logits/rejected": -0.47607100009918213, "logps/chosen": -0.007114926818758249, "logps/rejected": -2.03810453414917, "loss": 1.0229, "nll_loss": 0.25488823652267456, "rewards/accuracies": 1.0, "rewards/chosen": -0.0007114927866496146, "rewards/margins": 0.20309896767139435, "rewards/rejected": -0.203810453414917, "step": 6046 }, { "epoch": 4.181881051175657, "grad_norm": 10.269842147827148, "learning_rate": 3.232288304902413e-05, "log_odds_chosen": 11.128933906555176, "log_odds_ratio": -3.3597352739889175e-05, "logits/chosen": -0.438433974981308, "logits/rejected": -0.5809503793716431, "logps/chosen": -0.0003488063521217555, "logps/rejected": -2.880452871322632, "loss": 1.9092, "nll_loss": 0.4772918224334717, "rewards/accuracies": 1.0, "rewards/chosen": -3.488063521217555e-05, "rewards/margins": 0.2880104184150696, "rewards/rejected": -0.2880452871322632, "step": 6047 }, { "epoch": 4.182572614107884, "grad_norm": 7.514199256896973, "learning_rate": 3.231904103273398e-05, "log_odds_chosen": 9.966431617736816, "log_odds_ratio": -0.00013272061187308282, "logits/chosen": -0.5462458729743958, "logits/rejected": -0.5374727845191956, "logps/chosen": -0.00018501865270081908, "logps/rejected": -1.1819729804992676, "loss": 0.9363, "nll_loss": 0.23406431078910828, "rewards/accuracies": 1.0, "rewards/chosen": -1.8501865270081908e-05, "rewards/margins": 0.11817879974842072, "rewards/rejected": -0.11819729954004288, "step": 6048 }, { "epoch": 4.183264177040111, "grad_norm": 8.622661590576172, "learning_rate": 3.231519901644383e-05, "log_odds_chosen": 10.133877754211426, "log_odds_ratio": -0.00022927882673684508, "logits/chosen": -0.3894578218460083, "logits/rejected": -0.4564048945903778, "logps/chosen": -0.0002046562294708565, "logps/rejected": -1.751450777053833, "loss": 0.9345, "nll_loss": 0.2336038500070572, "rewards/accuracies": 1.0, "rewards/chosen": -2.0465622583287768e-05, "rewards/margins": 0.17512460052967072, "rewards/rejected": -0.17514505982398987, "step": 6049 }, { "epoch": 4.183955739972338, "grad_norm": 8.824128150939941, "learning_rate": 3.231135700015368e-05, "log_odds_chosen": 10.109630584716797, "log_odds_ratio": -9.241971565643325e-05, "logits/chosen": -0.7996799945831299, "logits/rejected": -0.868754506111145, "logps/chosen": -0.0006303410045802593, "logps/rejected": -1.9893128871917725, "loss": 1.4641, "nll_loss": 0.36602720618247986, "rewards/accuracies": 1.0, "rewards/chosen": -6.303410191321746e-05, "rewards/margins": 0.1988682597875595, "rewards/rejected": -0.19893130660057068, "step": 6050 }, { "epoch": 4.1846473029045645, "grad_norm": 7.818981170654297, "learning_rate": 3.230751498386354e-05, "log_odds_chosen": 8.395126342773438, "log_odds_ratio": -0.0010139414807781577, "logits/chosen": -0.7074143290519714, "logits/rejected": -0.5708560943603516, "logps/chosen": -0.006374886259436607, "logps/rejected": -1.904820203781128, "loss": 1.7666, "nll_loss": 0.44153669476509094, "rewards/accuracies": 1.0, "rewards/chosen": -0.0006374885560944676, "rewards/margins": 0.18984454870224, "rewards/rejected": -0.1904820203781128, "step": 6051 }, { "epoch": 4.185338865836791, "grad_norm": 9.535639762878418, "learning_rate": 3.230367296757338e-05, "log_odds_chosen": 9.560710906982422, "log_odds_ratio": -0.0009351727785542607, "logits/chosen": -0.7795642018318176, "logits/rejected": -0.8280699253082275, "logps/chosen": -0.009284489788115025, "logps/rejected": -2.2715892791748047, "loss": 1.5569, "nll_loss": 0.38913029432296753, "rewards/accuracies": 1.0, "rewards/chosen": -0.0009284489788115025, "rewards/margins": 0.22623050212860107, "rewards/rejected": -0.22715893387794495, "step": 6052 }, { "epoch": 4.186030428769018, "grad_norm": 10.58439826965332, "learning_rate": 3.2299830951283236e-05, "log_odds_chosen": 10.500917434692383, "log_odds_ratio": -5.0223650760017335e-05, "logits/chosen": -0.6961247324943542, "logits/rejected": -0.7314223051071167, "logps/chosen": -0.000207190663786605, "logps/rejected": -1.9150999784469604, "loss": 0.9968, "nll_loss": 0.2492009401321411, "rewards/accuracies": 1.0, "rewards/chosen": -2.071906601486262e-05, "rewards/margins": 0.1914893090724945, "rewards/rejected": -0.19151002168655396, "step": 6053 }, { "epoch": 4.186721991701245, "grad_norm": 7.235872745513916, "learning_rate": 3.229598893499309e-05, "log_odds_chosen": 8.103301048278809, "log_odds_ratio": -0.026408672332763672, "logits/chosen": -0.683377742767334, "logits/rejected": -0.6405139565467834, "logps/chosen": -0.007550099398940802, "logps/rejected": -1.4345066547393799, "loss": 1.1267, "nll_loss": 0.279028058052063, "rewards/accuracies": 1.0, "rewards/chosen": -0.0007550099398940802, "rewards/margins": 0.14269566535949707, "rewards/rejected": -0.14345067739486694, "step": 6054 }, { "epoch": 4.187413554633472, "grad_norm": 7.28474760055542, "learning_rate": 3.2292146918702934e-05, "log_odds_chosen": 9.414963722229004, "log_odds_ratio": -0.000253773556323722, "logits/chosen": -0.6031832098960876, "logits/rejected": -0.6954954266548157, "logps/chosen": -0.00045355164911597967, "logps/rejected": -1.8198481798171997, "loss": 1.0316, "nll_loss": 0.2578781247138977, "rewards/accuracies": 1.0, "rewards/chosen": -4.535516200121492e-05, "rewards/margins": 0.18193946778774261, "rewards/rejected": -0.1819848120212555, "step": 6055 }, { "epoch": 4.188105117565699, "grad_norm": 7.456570148468018, "learning_rate": 3.2288304902412786e-05, "log_odds_chosen": 9.448654174804688, "log_odds_ratio": -0.010886706411838531, "logits/chosen": -0.6217695474624634, "logits/rejected": -0.5620329976081848, "logps/chosen": -0.08244097232818604, "logps/rejected": -1.5050716400146484, "loss": 0.8834, "nll_loss": 0.21975077688694, "rewards/accuracies": 1.0, "rewards/chosen": -0.008244097232818604, "rewards/margins": 0.14226306974887848, "rewards/rejected": -0.1505071520805359, "step": 6056 }, { "epoch": 4.1887966804979255, "grad_norm": 13.942699432373047, "learning_rate": 3.228446288612264e-05, "log_odds_chosen": 10.055791854858398, "log_odds_ratio": -0.0003354833461344242, "logits/chosen": -0.6835440397262573, "logits/rejected": -0.7492095232009888, "logps/chosen": -0.0004275651299394667, "logps/rejected": -1.910922646522522, "loss": 1.3045, "nll_loss": 0.3260917067527771, "rewards/accuracies": 1.0, "rewards/chosen": -4.2756510083563626e-05, "rewards/margins": 0.1910495012998581, "rewards/rejected": -0.19109223783016205, "step": 6057 }, { "epoch": 4.189488243430152, "grad_norm": 6.805962562561035, "learning_rate": 3.228062086983249e-05, "log_odds_chosen": 9.221537590026855, "log_odds_ratio": -0.0026556141674518585, "logits/chosen": -0.34238967299461365, "logits/rejected": -0.40595924854278564, "logps/chosen": -0.011685644276440144, "logps/rejected": -2.2613413333892822, "loss": 1.145, "nll_loss": 0.28597864508628845, "rewards/accuracies": 1.0, "rewards/chosen": -0.0011685644276440144, "rewards/margins": 0.22496557235717773, "rewards/rejected": -0.22613412141799927, "step": 6058 }, { "epoch": 4.190179806362379, "grad_norm": 9.257378578186035, "learning_rate": 3.227677885354234e-05, "log_odds_chosen": 10.462129592895508, "log_odds_ratio": -0.00010772267705760896, "logits/chosen": -0.955758810043335, "logits/rejected": -0.9561267495155334, "logps/chosen": -0.0004893806180916727, "logps/rejected": -2.2030954360961914, "loss": 1.0503, "nll_loss": 0.26257139444351196, "rewards/accuracies": 1.0, "rewards/chosen": -4.8938061809167266e-05, "rewards/margins": 0.2202606201171875, "rewards/rejected": -0.2203095555305481, "step": 6059 }, { "epoch": 4.190871369294606, "grad_norm": 8.771308898925781, "learning_rate": 3.2272936837252196e-05, "log_odds_chosen": 8.560272216796875, "log_odds_ratio": -0.024763397872447968, "logits/chosen": -0.5189086198806763, "logits/rejected": -0.5635733008384705, "logps/chosen": -0.0065836599096655846, "logps/rejected": -1.3034086227416992, "loss": 0.9276, "nll_loss": 0.22943153977394104, "rewards/accuracies": 1.0, "rewards/chosen": -0.0006583660142496228, "rewards/margins": 0.1296824961900711, "rewards/rejected": -0.13034087419509888, "step": 6060 }, { "epoch": 4.191562932226833, "grad_norm": 7.0978546142578125, "learning_rate": 3.226909482096204e-05, "log_odds_chosen": 8.697175979614258, "log_odds_ratio": -0.00700350059196353, "logits/chosen": -0.8847252130508423, "logits/rejected": -0.8667929172515869, "logps/chosen": -0.00935453362762928, "logps/rejected": -2.0996615886688232, "loss": 0.8984, "nll_loss": 0.22390399873256683, "rewards/accuracies": 1.0, "rewards/chosen": -0.0009354533394798636, "rewards/margins": 0.20903068780899048, "rewards/rejected": -0.20996615290641785, "step": 6061 }, { "epoch": 4.19225449515906, "grad_norm": 7.424405097961426, "learning_rate": 3.2265252804671894e-05, "log_odds_chosen": 8.875204086303711, "log_odds_ratio": -0.001359203364700079, "logits/chosen": -0.4519699811935425, "logits/rejected": -0.39701735973358154, "logps/chosen": -0.02748076431453228, "logps/rejected": -2.3076295852661133, "loss": 1.6804, "nll_loss": 0.4199597239494324, "rewards/accuracies": 1.0, "rewards/chosen": -0.0027480763383209705, "rewards/margins": 0.22801488637924194, "rewards/rejected": -0.23076295852661133, "step": 6062 }, { "epoch": 4.1929460580912865, "grad_norm": 11.73322582244873, "learning_rate": 3.2261410788381746e-05, "log_odds_chosen": 7.903585910797119, "log_odds_ratio": -0.0016246134182438254, "logits/chosen": -0.2822548449039459, "logits/rejected": -0.40047261118888855, "logps/chosen": -0.0024944001343101263, "logps/rejected": -1.2181968688964844, "loss": 1.5106, "nll_loss": 0.3774777948856354, "rewards/accuracies": 1.0, "rewards/chosen": -0.0002494400250725448, "rewards/margins": 0.12157024443149567, "rewards/rejected": -0.12181967496871948, "step": 6063 }, { "epoch": 4.193637621023513, "grad_norm": 13.860588073730469, "learning_rate": 3.225756877209159e-05, "log_odds_chosen": 11.175796508789062, "log_odds_ratio": -2.1426389139378443e-05, "logits/chosen": -0.8253310918807983, "logits/rejected": -0.8749798536300659, "logps/chosen": -0.0001329722290392965, "logps/rejected": -2.177316665649414, "loss": 1.5405, "nll_loss": 0.3851134181022644, "rewards/accuracies": 1.0, "rewards/chosen": -1.3297223631525412e-05, "rewards/margins": 0.21771836280822754, "rewards/rejected": -0.21773165464401245, "step": 6064 }, { "epoch": 4.19432918395574, "grad_norm": 6.953963279724121, "learning_rate": 3.2253726755801445e-05, "log_odds_chosen": 9.380655288696289, "log_odds_ratio": -0.0009703417308628559, "logits/chosen": -0.09669845551252365, "logits/rejected": -0.08982955664396286, "logps/chosen": -0.0030628573149442673, "logps/rejected": -1.5637767314910889, "loss": 1.3549, "nll_loss": 0.33862051367759705, "rewards/accuracies": 1.0, "rewards/chosen": -0.0003062857431359589, "rewards/margins": 0.15607139468193054, "rewards/rejected": -0.1563776731491089, "step": 6065 }, { "epoch": 4.195020746887967, "grad_norm": 8.322160720825195, "learning_rate": 3.22498847395113e-05, "log_odds_chosen": 10.143966674804688, "log_odds_ratio": -0.0001574133784743026, "logits/chosen": -0.4795479476451874, "logits/rejected": -0.6363297700881958, "logps/chosen": -0.0005211975076235831, "logps/rejected": -2.026139497756958, "loss": 0.9717, "nll_loss": 0.24290986359119415, "rewards/accuracies": 1.0, "rewards/chosen": -5.21197471243795e-05, "rewards/margins": 0.20256184041500092, "rewards/rejected": -0.2026139497756958, "step": 6066 }, { "epoch": 4.195712309820194, "grad_norm": 6.1845269203186035, "learning_rate": 3.224604272322115e-05, "log_odds_chosen": 9.356364250183105, "log_odds_ratio": -0.00019514214363880455, "logits/chosen": -0.3387003540992737, "logits/rejected": -0.29374903440475464, "logps/chosen": -0.0002730107225943357, "logps/rejected": -1.181304693222046, "loss": 0.6521, "nll_loss": 0.16300448775291443, "rewards/accuracies": 1.0, "rewards/chosen": -2.7301071895635687e-05, "rewards/margins": 0.11810317635536194, "rewards/rejected": -0.11813047528266907, "step": 6067 }, { "epoch": 4.196403872752421, "grad_norm": 13.75908088684082, "learning_rate": 3.2242200706930995e-05, "log_odds_chosen": 8.38686752319336, "log_odds_ratio": -0.00156727759167552, "logits/chosen": -0.7334190607070923, "logits/rejected": -0.7994363307952881, "logps/chosen": -0.009411602281033993, "logps/rejected": -1.4189223051071167, "loss": 1.4641, "nll_loss": 0.3658694624900818, "rewards/accuracies": 1.0, "rewards/chosen": -0.0009411601931788027, "rewards/margins": 0.14095106720924377, "rewards/rejected": -0.1418922245502472, "step": 6068 }, { "epoch": 4.1970954356846475, "grad_norm": 9.981428146362305, "learning_rate": 3.2238358690640854e-05, "log_odds_chosen": 10.802669525146484, "log_odds_ratio": -4.3216430640313774e-05, "logits/chosen": -0.30952149629592896, "logits/rejected": -0.4717317223548889, "logps/chosen": -0.0001675213425187394, "logps/rejected": -2.0709903240203857, "loss": 1.3458, "nll_loss": 0.33644914627075195, "rewards/accuracies": 1.0, "rewards/chosen": -1.675213388807606e-05, "rewards/margins": 0.20708227157592773, "rewards/rejected": -0.20709902048110962, "step": 6069 }, { "epoch": 4.197786998616874, "grad_norm": 6.36790657043457, "learning_rate": 3.22345166743507e-05, "log_odds_chosen": 10.169801712036133, "log_odds_ratio": -6.394273805199191e-05, "logits/chosen": -0.6206622123718262, "logits/rejected": -0.671150803565979, "logps/chosen": -0.00029006582917645574, "logps/rejected": -1.9227371215820312, "loss": 1.2688, "nll_loss": 0.31719785928726196, "rewards/accuracies": 1.0, "rewards/chosen": -2.9006583645241335e-05, "rewards/margins": 0.19224470853805542, "rewards/rejected": -0.19227372109889984, "step": 6070 }, { "epoch": 4.198478561549101, "grad_norm": 13.407061576843262, "learning_rate": 3.223067465806055e-05, "log_odds_chosen": 10.443319320678711, "log_odds_ratio": -7.405476935673505e-05, "logits/chosen": -0.9477553367614746, "logits/rejected": -0.9680919051170349, "logps/chosen": -0.0002495343505870551, "logps/rejected": -1.7604784965515137, "loss": 1.0552, "nll_loss": 0.26378288865089417, "rewards/accuracies": 1.0, "rewards/chosen": -2.4953433239716105e-05, "rewards/margins": 0.17602290213108063, "rewards/rejected": -0.17604784667491913, "step": 6071 }, { "epoch": 4.199170124481328, "grad_norm": 6.7353034019470215, "learning_rate": 3.2226832641770405e-05, "log_odds_chosen": 9.591629028320312, "log_odds_ratio": -0.0017070891335606575, "logits/chosen": -0.2949681282043457, "logits/rejected": -0.3625943064689636, "logps/chosen": -0.0026625811588019133, "logps/rejected": -2.1500141620635986, "loss": 1.0277, "nll_loss": 0.2567523121833801, "rewards/accuracies": 1.0, "rewards/chosen": -0.00026625811005942523, "rewards/margins": 0.21473515033721924, "rewards/rejected": -0.2150014191865921, "step": 6072 }, { "epoch": 4.199861687413555, "grad_norm": 8.42748737335205, "learning_rate": 3.222299062548025e-05, "log_odds_chosen": 9.877130508422852, "log_odds_ratio": -0.00023209548089653254, "logits/chosen": -0.34057578444480896, "logits/rejected": -0.370511531829834, "logps/chosen": -0.0006250985898077488, "logps/rejected": -1.5764187574386597, "loss": 1.2644, "nll_loss": 0.3160645365715027, "rewards/accuracies": 1.0, "rewards/chosen": -6.250986189115793e-05, "rewards/margins": 0.15757934749126434, "rewards/rejected": -0.15764187276363373, "step": 6073 }, { "epoch": 4.200553250345782, "grad_norm": 10.6553316116333, "learning_rate": 3.22191486091901e-05, "log_odds_chosen": 9.872819900512695, "log_odds_ratio": -0.0001839471369748935, "logits/chosen": -0.517216682434082, "logits/rejected": -0.6004024147987366, "logps/chosen": -0.0005334233283065259, "logps/rejected": -1.601365089416504, "loss": 1.3588, "nll_loss": 0.339683473110199, "rewards/accuracies": 1.0, "rewards/chosen": -5.3342333558248356e-05, "rewards/margins": 0.16008317470550537, "rewards/rejected": -0.16013650596141815, "step": 6074 }, { "epoch": 4.2012448132780085, "grad_norm": 9.332054138183594, "learning_rate": 3.2215306592899955e-05, "log_odds_chosen": 9.281045913696289, "log_odds_ratio": -0.00027333354228176177, "logits/chosen": -0.6094177961349487, "logits/rejected": -0.8387130498886108, "logps/chosen": -0.013858338817954063, "logps/rejected": -1.8963396549224854, "loss": 1.2064, "nll_loss": 0.30156952142715454, "rewards/accuracies": 1.0, "rewards/chosen": -0.0013858338352292776, "rewards/margins": 0.18824812769889832, "rewards/rejected": -0.18963396549224854, "step": 6075 }, { "epoch": 4.201936376210235, "grad_norm": 11.993613243103027, "learning_rate": 3.221146457660981e-05, "log_odds_chosen": 10.28180980682373, "log_odds_ratio": -0.0001393863931298256, "logits/chosen": -0.34310775995254517, "logits/rejected": -0.4047355055809021, "logps/chosen": -0.0015538227744400501, "logps/rejected": -2.500300407409668, "loss": 0.732, "nll_loss": 0.18299169838428497, "rewards/accuracies": 1.0, "rewards/chosen": -0.00015538226580247283, "rewards/margins": 0.24987468123435974, "rewards/rejected": -0.2500300407409668, "step": 6076 }, { "epoch": 4.202627939142462, "grad_norm": 9.744317054748535, "learning_rate": 3.2207622560319654e-05, "log_odds_chosen": 9.809011459350586, "log_odds_ratio": -0.00011489679309306666, "logits/chosen": -1.0774192810058594, "logits/rejected": -1.0722367763519287, "logps/chosen": -0.001422966131940484, "logps/rejected": -2.1753878593444824, "loss": 1.4331, "nll_loss": 0.35827144980430603, "rewards/accuracies": 1.0, "rewards/chosen": -0.00014229661610443145, "rewards/margins": 0.21739648282527924, "rewards/rejected": -0.2175387740135193, "step": 6077 }, { "epoch": 4.203319502074689, "grad_norm": 10.738659858703613, "learning_rate": 3.220378054402951e-05, "log_odds_chosen": 10.447037696838379, "log_odds_ratio": -7.902842480689287e-05, "logits/chosen": -0.37279537320137024, "logits/rejected": -0.4409979283809662, "logps/chosen": -0.00048452045302838087, "logps/rejected": -2.268338918685913, "loss": 1.3094, "nll_loss": 0.3273436725139618, "rewards/accuracies": 1.0, "rewards/chosen": -4.8452049668412656e-05, "rewards/margins": 0.22678545117378235, "rewards/rejected": -0.22683387994766235, "step": 6078 }, { "epoch": 4.204011065006916, "grad_norm": 8.255573272705078, "learning_rate": 3.219993852773936e-05, "log_odds_chosen": 9.974161148071289, "log_odds_ratio": -7.075396570144221e-05, "logits/chosen": -0.8444625735282898, "logits/rejected": -0.7926241159439087, "logps/chosen": -0.0003179244522470981, "logps/rejected": -1.792923927307129, "loss": 1.1419, "nll_loss": 0.2854565382003784, "rewards/accuracies": 1.0, "rewards/chosen": -3.179244595230557e-05, "rewards/margins": 0.17926061153411865, "rewards/rejected": -0.17929241061210632, "step": 6079 }, { "epoch": 4.204702627939143, "grad_norm": 11.725409507751465, "learning_rate": 3.219609651144921e-05, "log_odds_chosen": 8.379392623901367, "log_odds_ratio": -0.002833909820765257, "logits/chosen": -0.6038598418235779, "logits/rejected": -0.5764943361282349, "logps/chosen": -0.0019874493591487408, "logps/rejected": -1.4380276203155518, "loss": 1.4758, "nll_loss": 0.3686673045158386, "rewards/accuracies": 1.0, "rewards/chosen": -0.00019874492136295885, "rewards/margins": 0.14360404014587402, "rewards/rejected": -0.14380277693271637, "step": 6080 }, { "epoch": 4.2053941908713695, "grad_norm": 9.011848449707031, "learning_rate": 3.219225449515906e-05, "log_odds_chosen": 10.082165718078613, "log_odds_ratio": -5.680105823557824e-05, "logits/chosen": -0.429607093334198, "logits/rejected": -0.4168073534965515, "logps/chosen": -0.0007915243622846901, "logps/rejected": -2.1282901763916016, "loss": 1.1148, "nll_loss": 0.27870285511016846, "rewards/accuracies": 1.0, "rewards/chosen": -7.915243622846901e-05, "rewards/margins": 0.21274986863136292, "rewards/rejected": -0.21282902359962463, "step": 6081 }, { "epoch": 4.206085753803596, "grad_norm": 7.932432174682617, "learning_rate": 3.218841247886891e-05, "log_odds_chosen": 9.787649154663086, "log_odds_ratio": -0.00029163056751713157, "logits/chosen": -0.6433913707733154, "logits/rejected": -0.7329627871513367, "logps/chosen": -0.00033288367558270693, "logps/rejected": -1.5147624015808105, "loss": 1.0933, "nll_loss": 0.27328595519065857, "rewards/accuracies": 1.0, "rewards/chosen": -3.3288368285866454e-05, "rewards/margins": 0.15144294500350952, "rewards/rejected": -0.15147623419761658, "step": 6082 }, { "epoch": 4.206777316735823, "grad_norm": 5.991988182067871, "learning_rate": 3.218457046257876e-05, "log_odds_chosen": 10.22545051574707, "log_odds_ratio": -5.5978794989641756e-05, "logits/chosen": -0.2843632102012634, "logits/rejected": -0.3155117630958557, "logps/chosen": -0.00017813252634368837, "logps/rejected": -1.5342832803726196, "loss": 1.5026, "nll_loss": 0.37563425302505493, "rewards/accuracies": 1.0, "rewards/chosen": -1.781325408956036e-05, "rewards/margins": 0.15341052412986755, "rewards/rejected": -0.1534283459186554, "step": 6083 }, { "epoch": 4.20746887966805, "grad_norm": 9.368627548217773, "learning_rate": 3.2180728446288614e-05, "log_odds_chosen": 10.176862716674805, "log_odds_ratio": -6.35625547147356e-05, "logits/chosen": -0.2407834827899933, "logits/rejected": -0.3474578559398651, "logps/chosen": -0.00033351860474795103, "logps/rejected": -2.1866698265075684, "loss": 1.2583, "nll_loss": 0.3145698308944702, "rewards/accuracies": 1.0, "rewards/chosen": -3.3351861929986626e-05, "rewards/margins": 0.21863365173339844, "rewards/rejected": -0.21866700053215027, "step": 6084 }, { "epoch": 4.208160442600277, "grad_norm": 8.605694770812988, "learning_rate": 3.2176886429998466e-05, "log_odds_chosen": 8.470521926879883, "log_odds_ratio": -0.03702199459075928, "logits/chosen": -0.5160700082778931, "logits/rejected": -0.5076208114624023, "logps/chosen": -0.03687213361263275, "logps/rejected": -2.100747585296631, "loss": 0.8199, "nll_loss": 0.20127034187316895, "rewards/accuracies": 1.0, "rewards/chosen": -0.0036872131749987602, "rewards/margins": 0.20638757944107056, "rewards/rejected": -0.210074782371521, "step": 6085 }, { "epoch": 4.208852005532504, "grad_norm": 11.147346496582031, "learning_rate": 3.217304441370831e-05, "log_odds_chosen": 9.931164741516113, "log_odds_ratio": -0.0015833813231438398, "logits/chosen": -0.9578123688697815, "logits/rejected": -0.9660002589225769, "logps/chosen": -0.0014081323752179742, "logps/rejected": -1.8071503639221191, "loss": 1.2738, "nll_loss": 0.31828776001930237, "rewards/accuracies": 1.0, "rewards/chosen": -0.00014081323752179742, "rewards/margins": 0.1805742084980011, "rewards/rejected": -0.18071502447128296, "step": 6086 }, { "epoch": 4.20954356846473, "grad_norm": 5.810502052307129, "learning_rate": 3.216920239741817e-05, "log_odds_chosen": 8.74907398223877, "log_odds_ratio": -0.0006889136275276542, "logits/chosen": -0.3961918354034424, "logits/rejected": -0.32647937536239624, "logps/chosen": -0.000981375458650291, "logps/rejected": -1.149702787399292, "loss": 1.1302, "nll_loss": 0.2824803292751312, "rewards/accuracies": 1.0, "rewards/chosen": -9.813754877541214e-05, "rewards/margins": 0.11487214267253876, "rewards/rejected": -0.11497028172016144, "step": 6087 }, { "epoch": 4.210235131396957, "grad_norm": 8.0988187789917, "learning_rate": 3.216536038112802e-05, "log_odds_chosen": 7.66169548034668, "log_odds_ratio": -0.00979495607316494, "logits/chosen": -0.4049184024333954, "logits/rejected": -0.34466010332107544, "logps/chosen": -0.0033011361956596375, "logps/rejected": -0.9623667001724243, "loss": 1.0782, "nll_loss": 0.26856890320777893, "rewards/accuracies": 1.0, "rewards/chosen": -0.00033011360210366547, "rewards/margins": 0.095906563103199, "rewards/rejected": -0.09623667597770691, "step": 6088 }, { "epoch": 4.210926694329184, "grad_norm": 9.195820808410645, "learning_rate": 3.216151836483787e-05, "log_odds_chosen": 10.497115135192871, "log_odds_ratio": -4.924969471176155e-05, "logits/chosen": -0.7107508778572083, "logits/rejected": -0.7157828211784363, "logps/chosen": -0.0003697610227391124, "logps/rejected": -2.3502285480499268, "loss": 0.9972, "nll_loss": 0.2492949366569519, "rewards/accuracies": 1.0, "rewards/chosen": -3.6976100091123953e-05, "rewards/margins": 0.23498587310314178, "rewards/rejected": -0.23502285778522491, "step": 6089 }, { "epoch": 4.211618257261411, "grad_norm": 9.332670211791992, "learning_rate": 3.215767634854772e-05, "log_odds_chosen": 9.460079193115234, "log_odds_ratio": -0.02920219488441944, "logits/chosen": -0.2606007158756256, "logits/rejected": -0.2581832706928253, "logps/chosen": -0.007522523868829012, "logps/rejected": -1.9656100273132324, "loss": 1.801, "nll_loss": 0.44732433557510376, "rewards/accuracies": 1.0, "rewards/chosen": -0.0007522524101659656, "rewards/margins": 0.19580873847007751, "rewards/rejected": -0.19656097888946533, "step": 6090 }, { "epoch": 4.212309820193638, "grad_norm": 4.959421157836914, "learning_rate": 3.215383433225757e-05, "log_odds_chosen": 9.708085060119629, "log_odds_ratio": -0.000136367860250175, "logits/chosen": -0.41468676924705505, "logits/rejected": -0.48830556869506836, "logps/chosen": -0.00017094127542804927, "logps/rejected": -1.119558334350586, "loss": 1.7192, "nll_loss": 0.42978495359420776, "rewards/accuracies": 1.0, "rewards/chosen": -1.7094127542804927e-05, "rewards/margins": 0.11193873733282089, "rewards/rejected": -0.11195583641529083, "step": 6091 }, { "epoch": 4.213001383125865, "grad_norm": 6.860667705535889, "learning_rate": 3.214999231596742e-05, "log_odds_chosen": 11.162261962890625, "log_odds_ratio": -3.2493371691089123e-05, "logits/chosen": -0.3308905363082886, "logits/rejected": -0.3256247639656067, "logps/chosen": -0.00015509540389757603, "logps/rejected": -2.1066527366638184, "loss": 0.7364, "nll_loss": 0.18409138917922974, "rewards/accuracies": 1.0, "rewards/chosen": -1.5509540389757603e-05, "rewards/margins": 0.2106497585773468, "rewards/rejected": -0.2106652855873108, "step": 6092 }, { "epoch": 4.213692946058091, "grad_norm": 10.44353199005127, "learning_rate": 3.214615029967727e-05, "log_odds_chosen": 8.433902740478516, "log_odds_ratio": -0.012419860810041428, "logits/chosen": -0.6420484781265259, "logits/rejected": -0.7362602949142456, "logps/chosen": -0.07046063244342804, "logps/rejected": -1.4887151718139648, "loss": 1.8438, "nll_loss": 0.4597092270851135, "rewards/accuracies": 1.0, "rewards/chosen": -0.007046062964946032, "rewards/margins": 0.14182545244693756, "rewards/rejected": -0.148871511220932, "step": 6093 }, { "epoch": 4.214384508990318, "grad_norm": 9.768402099609375, "learning_rate": 3.2142308283387125e-05, "log_odds_chosen": 9.392027854919434, "log_odds_ratio": -0.0011272934498265386, "logits/chosen": -0.37182796001434326, "logits/rejected": -0.39350610971450806, "logps/chosen": -0.000708098232280463, "logps/rejected": -1.8146228790283203, "loss": 1.1423, "nll_loss": 0.28546077013015747, "rewards/accuracies": 1.0, "rewards/chosen": -7.08098232280463e-05, "rewards/margins": 0.18139147758483887, "rewards/rejected": -0.18146228790283203, "step": 6094 }, { "epoch": 4.215076071922545, "grad_norm": 9.150002479553223, "learning_rate": 3.213846626709697e-05, "log_odds_chosen": 8.399335861206055, "log_odds_ratio": -0.0009091562824323773, "logits/chosen": -0.6835078001022339, "logits/rejected": -0.685696005821228, "logps/chosen": -0.0009837490506470203, "logps/rejected": -1.2630008459091187, "loss": 1.8023, "nll_loss": 0.4504738450050354, "rewards/accuracies": 1.0, "rewards/chosen": -9.837490506470203e-05, "rewards/margins": 0.12620171904563904, "rewards/rejected": -0.12630009651184082, "step": 6095 }, { "epoch": 4.215767634854772, "grad_norm": 6.342299461364746, "learning_rate": 3.213462425080683e-05, "log_odds_chosen": 8.81633472442627, "log_odds_ratio": -0.001122811110690236, "logits/chosen": -0.24323329329490662, "logits/rejected": -0.17171940207481384, "logps/chosen": -0.0029868108686059713, "logps/rejected": -1.5076463222503662, "loss": 1.367, "nll_loss": 0.3416462242603302, "rewards/accuracies": 1.0, "rewards/chosen": -0.00029868108686059713, "rewards/margins": 0.1504659503698349, "rewards/rejected": -0.15076464414596558, "step": 6096 }, { "epoch": 4.216459197786999, "grad_norm": 10.462725639343262, "learning_rate": 3.2130782234516675e-05, "log_odds_chosen": 10.151618957519531, "log_odds_ratio": -0.0014048486482352018, "logits/chosen": -0.5716694593429565, "logits/rejected": -0.7377324104309082, "logps/chosen": -0.0014676820719614625, "logps/rejected": -1.750305414199829, "loss": 1.2301, "nll_loss": 0.3073880076408386, "rewards/accuracies": 1.0, "rewards/chosen": -0.00014676823047921062, "rewards/margins": 0.17488376796245575, "rewards/rejected": -0.17503052949905396, "step": 6097 }, { "epoch": 4.217150760719226, "grad_norm": 7.823307037353516, "learning_rate": 3.212694021822653e-05, "log_odds_chosen": 10.025672912597656, "log_odds_ratio": -0.0014221564633771777, "logits/chosen": -0.534694492816925, "logits/rejected": -0.5644404888153076, "logps/chosen": -0.008679674938321114, "logps/rejected": -3.0878677368164062, "loss": 1.4253, "nll_loss": 0.35618501901626587, "rewards/accuracies": 1.0, "rewards/chosen": -0.0008679674356244504, "rewards/margins": 0.30791881680488586, "rewards/rejected": -0.3087867498397827, "step": 6098 }, { "epoch": 4.217842323651452, "grad_norm": 6.906431198120117, "learning_rate": 3.212309820193638e-05, "log_odds_chosen": 10.416519165039062, "log_odds_ratio": -0.00010263586591463536, "logits/chosen": -0.6076978445053101, "logits/rejected": -0.5971615314483643, "logps/chosen": -0.001731468946672976, "logps/rejected": -2.080808162689209, "loss": 1.1623, "nll_loss": 0.29056233167648315, "rewards/accuracies": 1.0, "rewards/chosen": -0.00017314690921921283, "rewards/margins": 0.20790766179561615, "rewards/rejected": -0.20808082818984985, "step": 6099 }, { "epoch": 4.218533886583679, "grad_norm": 3.3100812435150146, "learning_rate": 3.211925618564623e-05, "log_odds_chosen": 9.550172805786133, "log_odds_ratio": -0.002463590120896697, "logits/chosen": -0.4040372669696808, "logits/rejected": -0.3574890196323395, "logps/chosen": -0.00257070641964674, "logps/rejected": -2.1435904502868652, "loss": 1.1569, "nll_loss": 0.2889706492424011, "rewards/accuracies": 1.0, "rewards/chosen": -0.000257070641964674, "rewards/margins": 0.21410197019577026, "rewards/rejected": -0.21435905992984772, "step": 6100 }, { "epoch": 4.219225449515906, "grad_norm": 9.177789688110352, "learning_rate": 3.211541416935608e-05, "log_odds_chosen": 8.393549919128418, "log_odds_ratio": -0.07194874435663223, "logits/chosen": -0.1837049126625061, "logits/rejected": -0.1863904595375061, "logps/chosen": -0.039502695202827454, "logps/rejected": -1.401501178741455, "loss": 1.1874, "nll_loss": 0.2896571457386017, "rewards/accuracies": 1.0, "rewards/chosen": -0.003950269892811775, "rewards/margins": 0.13619986176490784, "rewards/rejected": -0.14015012979507446, "step": 6101 }, { "epoch": 4.219917012448133, "grad_norm": 7.0887579917907715, "learning_rate": 3.211157215306593e-05, "log_odds_chosen": 9.437784194946289, "log_odds_ratio": -0.00438450463116169, "logits/chosen": -0.5653685331344604, "logits/rejected": -0.6047671437263489, "logps/chosen": -0.0028303221333771944, "logps/rejected": -1.6747334003448486, "loss": 0.8287, "nll_loss": 0.2067372351884842, "rewards/accuracies": 1.0, "rewards/chosen": -0.000283032248262316, "rewards/margins": 0.1671903133392334, "rewards/rejected": -0.16747334599494934, "step": 6102 }, { "epoch": 4.22060857538036, "grad_norm": 12.398027420043945, "learning_rate": 3.210773013677578e-05, "log_odds_chosen": 10.742816925048828, "log_odds_ratio": -0.00032149453181773424, "logits/chosen": -0.616948127746582, "logits/rejected": -0.6294673085212708, "logps/chosen": -0.0004734890826512128, "logps/rejected": -2.062647819519043, "loss": 1.9112, "nll_loss": 0.47776588797569275, "rewards/accuracies": 1.0, "rewards/chosen": -4.734890899271704e-05, "rewards/margins": 0.2062174379825592, "rewards/rejected": -0.20626477897167206, "step": 6103 }, { "epoch": 4.2213001383125865, "grad_norm": 8.384824752807617, "learning_rate": 3.210388812048563e-05, "log_odds_chosen": 8.673848152160645, "log_odds_ratio": -0.013089235872030258, "logits/chosen": -0.26488542556762695, "logits/rejected": -0.3186490535736084, "logps/chosen": -0.07050205767154694, "logps/rejected": -1.806774616241455, "loss": 1.0463, "nll_loss": 0.26025834679603577, "rewards/accuracies": 1.0, "rewards/chosen": -0.007050206419080496, "rewards/margins": 0.17362727224826813, "rewards/rejected": -0.18067745864391327, "step": 6104 }, { "epoch": 4.221991701244813, "grad_norm": 8.230053901672363, "learning_rate": 3.210004610419549e-05, "log_odds_chosen": 9.35822582244873, "log_odds_ratio": -0.0004413676797412336, "logits/chosen": -0.4035882353782654, "logits/rejected": -0.570648729801178, "logps/chosen": -0.0005642552860081196, "logps/rejected": -1.2417488098144531, "loss": 1.6527, "nll_loss": 0.4131428003311157, "rewards/accuracies": 1.0, "rewards/chosen": -5.64255278732162e-05, "rewards/margins": 0.12411844730377197, "rewards/rejected": -0.12417487800121307, "step": 6105 }, { "epoch": 4.22268326417704, "grad_norm": 8.342848777770996, "learning_rate": 3.2096204087905334e-05, "log_odds_chosen": 9.080486297607422, "log_odds_ratio": -0.01734662614762783, "logits/chosen": -0.4084089398384094, "logits/rejected": -0.392818808555603, "logps/chosen": -0.00646335119381547, "logps/rejected": -1.2589843273162842, "loss": 1.5447, "nll_loss": 0.38444066047668457, "rewards/accuracies": 1.0, "rewards/chosen": -0.0006463351310230792, "rewards/margins": 0.12525209784507751, "rewards/rejected": -0.12589843571186066, "step": 6106 }, { "epoch": 4.223374827109267, "grad_norm": 9.799078941345215, "learning_rate": 3.2092362071615186e-05, "log_odds_chosen": 10.004591941833496, "log_odds_ratio": -9.156642772722989e-05, "logits/chosen": -0.48572254180908203, "logits/rejected": -0.5722478628158569, "logps/chosen": -0.00025361331063322723, "logps/rejected": -1.7181507349014282, "loss": 1.7266, "nll_loss": 0.43164747953414917, "rewards/accuracies": 1.0, "rewards/chosen": -2.5361332518514246e-05, "rewards/margins": 0.1717897206544876, "rewards/rejected": -0.17181509733200073, "step": 6107 }, { "epoch": 4.224066390041494, "grad_norm": 7.455623149871826, "learning_rate": 3.208852005532504e-05, "log_odds_chosen": 10.768928527832031, "log_odds_ratio": -5.826863707625307e-05, "logits/chosen": -0.40005171298980713, "logits/rejected": -0.47438183426856995, "logps/chosen": -0.00014770789130125195, "logps/rejected": -1.9838204383850098, "loss": 1.1787, "nll_loss": 0.29465824365615845, "rewards/accuracies": 1.0, "rewards/chosen": -1.4770789675822016e-05, "rewards/margins": 0.19836726784706116, "rewards/rejected": -0.19838203489780426, "step": 6108 }, { "epoch": 4.224757952973721, "grad_norm": 7.88972806930542, "learning_rate": 3.208467803903489e-05, "log_odds_chosen": 10.738612174987793, "log_odds_ratio": -4.431870911503211e-05, "logits/chosen": -0.6616283059120178, "logits/rejected": -0.7461217641830444, "logps/chosen": -0.00019027273810934275, "logps/rejected": -2.072432041168213, "loss": 1.2048, "nll_loss": 0.3012027144432068, "rewards/accuracies": 1.0, "rewards/chosen": -1.9027273083338514e-05, "rewards/margins": 0.20722419023513794, "rewards/rejected": -0.20724321901798248, "step": 6109 }, { "epoch": 4.2254495159059475, "grad_norm": 13.2335844039917, "learning_rate": 3.208083602274474e-05, "log_odds_chosen": 10.215333938598633, "log_odds_ratio": -0.0009812734788283706, "logits/chosen": -0.5087462663650513, "logits/rejected": -0.5887828469276428, "logps/chosen": -0.0011902485275641084, "logps/rejected": -2.5018463134765625, "loss": 1.3774, "nll_loss": 0.3442583680152893, "rewards/accuracies": 1.0, "rewards/chosen": -0.00011902485857717693, "rewards/margins": 0.2500656247138977, "rewards/rejected": -0.2501846253871918, "step": 6110 }, { "epoch": 4.226141078838174, "grad_norm": 7.052605152130127, "learning_rate": 3.207699400645459e-05, "log_odds_chosen": 10.94500732421875, "log_odds_ratio": -5.660950409946963e-05, "logits/chosen": -0.5007820725440979, "logits/rejected": -0.571925699710846, "logps/chosen": -0.00013881264021620154, "logps/rejected": -1.9232251644134521, "loss": 1.068, "nll_loss": 0.2669881284236908, "rewards/accuracies": 1.0, "rewards/chosen": -1.3881262930226512e-05, "rewards/margins": 0.19230863451957703, "rewards/rejected": -0.1923225224018097, "step": 6111 }, { "epoch": 4.226832641770401, "grad_norm": 7.348779678344727, "learning_rate": 3.207315199016444e-05, "log_odds_chosen": 10.577987670898438, "log_odds_ratio": -5.887104634894058e-05, "logits/chosen": -0.3846423029899597, "logits/rejected": -0.4360952079296112, "logps/chosen": -0.00017599599959794432, "logps/rejected": -1.9604169130325317, "loss": 1.1102, "nll_loss": 0.27753946185112, "rewards/accuracies": 1.0, "rewards/chosen": -1.7599601051188074e-05, "rewards/margins": 0.1960241049528122, "rewards/rejected": -0.19604171812534332, "step": 6112 }, { "epoch": 4.227524204702628, "grad_norm": 6.602800369262695, "learning_rate": 3.206930997387429e-05, "log_odds_chosen": 9.438689231872559, "log_odds_ratio": -0.000783280935138464, "logits/chosen": -0.5962222814559937, "logits/rejected": -0.612580418586731, "logps/chosen": -0.0053511569276452065, "logps/rejected": -1.8292964696884155, "loss": 1.7798, "nll_loss": 0.44486165046691895, "rewards/accuracies": 1.0, "rewards/chosen": -0.0005351156578399241, "rewards/margins": 0.18239453434944153, "rewards/rejected": -0.1829296350479126, "step": 6113 }, { "epoch": 4.228215767634855, "grad_norm": 22.292438507080078, "learning_rate": 3.2065467957584147e-05, "log_odds_chosen": 8.830830574035645, "log_odds_ratio": -0.07559003680944443, "logits/chosen": -0.3832665681838989, "logits/rejected": -0.43286067247390747, "logps/chosen": -0.25410130620002747, "logps/rejected": -2.479095220565796, "loss": 1.4115, "nll_loss": 0.3453068137168884, "rewards/accuracies": 1.0, "rewards/chosen": -0.025410132482647896, "rewards/margins": 0.22249938547611237, "rewards/rejected": -0.2479095309972763, "step": 6114 }, { "epoch": 4.228907330567082, "grad_norm": 9.745912551879883, "learning_rate": 3.206162594129399e-05, "log_odds_chosen": 8.932182312011719, "log_odds_ratio": -0.08399414271116257, "logits/chosen": -0.11780837923288345, "logits/rejected": -0.20581534504890442, "logps/chosen": -0.06577721983194351, "logps/rejected": -1.7126953601837158, "loss": 0.9885, "nll_loss": 0.23871365189552307, "rewards/accuracies": 1.0, "rewards/chosen": -0.006577721331268549, "rewards/margins": 0.16469180583953857, "rewards/rejected": -0.17126955091953278, "step": 6115 }, { "epoch": 4.2295988934993085, "grad_norm": 8.25196647644043, "learning_rate": 3.2057783925003845e-05, "log_odds_chosen": 9.533069610595703, "log_odds_ratio": -0.03513272851705551, "logits/chosen": -0.6124993562698364, "logits/rejected": -0.5530567169189453, "logps/chosen": -0.007125381845980883, "logps/rejected": -1.7799084186553955, "loss": 1.0589, "nll_loss": 0.26121440529823303, "rewards/accuracies": 1.0, "rewards/chosen": -0.0007125381380319595, "rewards/margins": 0.1772783100605011, "rewards/rejected": -0.1779908537864685, "step": 6116 }, { "epoch": 4.230290456431535, "grad_norm": 15.340441703796387, "learning_rate": 3.20539419087137e-05, "log_odds_chosen": 9.796329498291016, "log_odds_ratio": -0.00018708838615566492, "logits/chosen": -0.23748236894607544, "logits/rejected": -0.30873650312423706, "logps/chosen": -0.00044121668906882405, "logps/rejected": -1.911644697189331, "loss": 1.0922, "nll_loss": 0.2730366289615631, "rewards/accuracies": 1.0, "rewards/chosen": -4.4121668906882405e-05, "rewards/margins": 0.19112035632133484, "rewards/rejected": -0.19116447865962982, "step": 6117 }, { "epoch": 4.230982019363762, "grad_norm": 8.803054809570312, "learning_rate": 3.205009989242355e-05, "log_odds_chosen": 9.855988502502441, "log_odds_ratio": -0.0022794578690081835, "logits/chosen": -0.4125463366508484, "logits/rejected": -0.48373299837112427, "logps/chosen": -0.0019304307643324137, "logps/rejected": -1.82631254196167, "loss": 1.1137, "nll_loss": 0.2781933844089508, "rewards/accuracies": 1.0, "rewards/chosen": -0.00019304307352285832, "rewards/margins": 0.1824381947517395, "rewards/rejected": -0.182631254196167, "step": 6118 }, { "epoch": 4.231673582295989, "grad_norm": 10.609899520874023, "learning_rate": 3.2046257876133395e-05, "log_odds_chosen": 9.470155715942383, "log_odds_ratio": -0.0007128569413907826, "logits/chosen": -0.9156774282455444, "logits/rejected": -0.9039434194564819, "logps/chosen": -0.0009397302637808025, "logps/rejected": -1.8147461414337158, "loss": 2.4276, "nll_loss": 0.606838583946228, "rewards/accuracies": 1.0, "rewards/chosen": -9.39730234676972e-05, "rewards/margins": 0.18138065934181213, "rewards/rejected": -0.18147462606430054, "step": 6119 }, { "epoch": 4.232365145228216, "grad_norm": 10.940679550170898, "learning_rate": 3.204241585984325e-05, "log_odds_chosen": 10.973442077636719, "log_odds_ratio": -1.9855779100907966e-05, "logits/chosen": -0.09074971824884415, "logits/rejected": -0.25256481766700745, "logps/chosen": -0.0001564031554153189, "logps/rejected": -2.141129970550537, "loss": 0.962, "nll_loss": 0.2405095249414444, "rewards/accuracies": 1.0, "rewards/chosen": -1.5640316632925533e-05, "rewards/margins": 0.21409735083580017, "rewards/rejected": -0.2141129970550537, "step": 6120 }, { "epoch": 4.233056708160443, "grad_norm": 9.780346870422363, "learning_rate": 3.20385738435531e-05, "log_odds_chosen": 10.847525596618652, "log_odds_ratio": -0.00013202108675614, "logits/chosen": -0.5023176074028015, "logits/rejected": -0.5953850746154785, "logps/chosen": -0.00022254750365391374, "logps/rejected": -2.1902830600738525, "loss": 0.767, "nll_loss": 0.19174344837665558, "rewards/accuracies": 1.0, "rewards/chosen": -2.2254751456785016e-05, "rewards/margins": 0.21900604665279388, "rewards/rejected": -0.2190282940864563, "step": 6121 }, { "epoch": 4.2337482710926695, "grad_norm": 7.069876194000244, "learning_rate": 3.2034731827262946e-05, "log_odds_chosen": 9.487290382385254, "log_odds_ratio": -0.00037358151166699827, "logits/chosen": -0.4292406439781189, "logits/rejected": -0.4715169370174408, "logps/chosen": -0.0007413811981678009, "logps/rejected": -1.953993320465088, "loss": 0.8595, "nll_loss": 0.21483467519283295, "rewards/accuracies": 1.0, "rewards/chosen": -7.413812272716314e-05, "rewards/margins": 0.19532519578933716, "rewards/rejected": -0.19539934396743774, "step": 6122 }, { "epoch": 4.234439834024896, "grad_norm": 8.1299467086792, "learning_rate": 3.20308898109728e-05, "log_odds_chosen": 9.821857452392578, "log_odds_ratio": -0.00037624494871124625, "logits/chosen": -0.34869810938835144, "logits/rejected": -0.4733930230140686, "logps/chosen": -0.005378293804824352, "logps/rejected": -2.4110655784606934, "loss": 1.1122, "nll_loss": 0.27802199125289917, "rewards/accuracies": 1.0, "rewards/chosen": -0.0005378293571993709, "rewards/margins": 0.24056872725486755, "rewards/rejected": -0.2411065399646759, "step": 6123 }, { "epoch": 4.235131396957123, "grad_norm": 6.791889190673828, "learning_rate": 3.202704779468265e-05, "log_odds_chosen": 9.134946823120117, "log_odds_ratio": -0.000719600124284625, "logits/chosen": -0.43868210911750793, "logits/rejected": -0.429718554019928, "logps/chosen": -0.0015843857545405626, "logps/rejected": -1.8186661005020142, "loss": 1.1613, "nll_loss": 0.2902475595474243, "rewards/accuracies": 1.0, "rewards/chosen": -0.00015843857545405626, "rewards/margins": 0.1817081868648529, "rewards/rejected": -0.1818666309118271, "step": 6124 }, { "epoch": 4.23582295988935, "grad_norm": 6.706216812133789, "learning_rate": 3.20232057783925e-05, "log_odds_chosen": 10.093393325805664, "log_odds_ratio": -0.00034954206785187125, "logits/chosen": -0.4765441417694092, "logits/rejected": -0.47055739164352417, "logps/chosen": -0.0024698227643966675, "logps/rejected": -1.943615436553955, "loss": 1.2416, "nll_loss": 0.31037530303001404, "rewards/accuracies": 1.0, "rewards/chosen": -0.000246982293901965, "rewards/margins": 0.1941145360469818, "rewards/rejected": -0.19436152279376984, "step": 6125 }, { "epoch": 4.236514522821577, "grad_norm": 7.8888983726501465, "learning_rate": 3.201936376210235e-05, "log_odds_chosen": 7.003107070922852, "log_odds_ratio": -0.03631995618343353, "logits/chosen": -0.4368114769458771, "logits/rejected": -0.5176033973693848, "logps/chosen": -0.011342689394950867, "logps/rejected": -1.1231927871704102, "loss": 1.4229, "nll_loss": 0.3520943522453308, "rewards/accuracies": 1.0, "rewards/chosen": -0.0011342689394950867, "rewards/margins": 0.11118502169847488, "rewards/rejected": -0.11231927573680878, "step": 6126 }, { "epoch": 4.237206085753804, "grad_norm": 12.106274604797363, "learning_rate": 3.201552174581221e-05, "log_odds_chosen": 9.750165939331055, "log_odds_ratio": -0.0005116397514939308, "logits/chosen": -0.30245599150657654, "logits/rejected": -0.35725438594818115, "logps/chosen": -0.003118205117061734, "logps/rejected": -2.2988009452819824, "loss": 1.3329, "nll_loss": 0.3331792652606964, "rewards/accuracies": 1.0, "rewards/chosen": -0.0003118205349892378, "rewards/margins": 0.2295682728290558, "rewards/rejected": -0.22988007962703705, "step": 6127 }, { "epoch": 4.2378976486860305, "grad_norm": 5.6999969482421875, "learning_rate": 3.2011679729522054e-05, "log_odds_chosen": 10.503633499145508, "log_odds_ratio": -9.613503061700612e-05, "logits/chosen": -0.5862594842910767, "logits/rejected": -0.5881119966506958, "logps/chosen": -0.0017665009945631027, "logps/rejected": -2.8170573711395264, "loss": 1.2073, "nll_loss": 0.30181971192359924, "rewards/accuracies": 1.0, "rewards/chosen": -0.0001766501081874594, "rewards/margins": 0.28152909874916077, "rewards/rejected": -0.281705766916275, "step": 6128 }, { "epoch": 4.238589211618257, "grad_norm": 6.479320049285889, "learning_rate": 3.2007837713231906e-05, "log_odds_chosen": 8.953347206115723, "log_odds_ratio": -0.0009635446476750076, "logits/chosen": -0.5452961921691895, "logits/rejected": -0.5558996796607971, "logps/chosen": -0.014149404130876064, "logps/rejected": -2.3288004398345947, "loss": 1.6097, "nll_loss": 0.4023188054561615, "rewards/accuracies": 1.0, "rewards/chosen": -0.001414940576069057, "rewards/margins": 0.23146511614322662, "rewards/rejected": -0.23288005590438843, "step": 6129 }, { "epoch": 4.239280774550484, "grad_norm": 9.907674789428711, "learning_rate": 3.200399569694176e-05, "log_odds_chosen": 9.380223274230957, "log_odds_ratio": -0.007749211508780718, "logits/chosen": -0.5595596432685852, "logits/rejected": -0.6764500737190247, "logps/chosen": -0.002896445570513606, "logps/rejected": -2.051048755645752, "loss": 0.9159, "nll_loss": 0.22821009159088135, "rewards/accuracies": 1.0, "rewards/chosen": -0.0002896445512305945, "rewards/margins": 0.20481520891189575, "rewards/rejected": -0.20510487258434296, "step": 6130 }, { "epoch": 4.239972337482711, "grad_norm": 8.833740234375, "learning_rate": 3.2000153680651604e-05, "log_odds_chosen": 10.789620399475098, "log_odds_ratio": -5.9470221458468586e-05, "logits/chosen": -0.7925410866737366, "logits/rejected": -0.8144933581352234, "logps/chosen": -0.00027380624669604003, "logps/rejected": -2.0729591846466064, "loss": 0.9128, "nll_loss": 0.22818849980831146, "rewards/accuracies": 1.0, "rewards/chosen": -2.7380625397199765e-05, "rewards/margins": 0.20726853609085083, "rewards/rejected": -0.20729590952396393, "step": 6131 }, { "epoch": 4.240663900414938, "grad_norm": 11.288015365600586, "learning_rate": 3.199631166436146e-05, "log_odds_chosen": 10.079020500183105, "log_odds_ratio": -8.671080286148936e-05, "logits/chosen": -0.8377615213394165, "logits/rejected": -0.9092694520950317, "logps/chosen": -0.00032979599200189114, "logps/rejected": -1.8943513631820679, "loss": 1.6129, "nll_loss": 0.4032214879989624, "rewards/accuracies": 1.0, "rewards/chosen": -3.2979594834614545e-05, "rewards/margins": 0.18940216302871704, "rewards/rejected": -0.18943513929843903, "step": 6132 }, { "epoch": 4.241355463347165, "grad_norm": 9.863534927368164, "learning_rate": 3.199246964807131e-05, "log_odds_chosen": 10.014764785766602, "log_odds_ratio": -0.00013936430332250893, "logits/chosen": -0.7015513777732849, "logits/rejected": -0.7403870224952698, "logps/chosen": -0.0027051009237766266, "logps/rejected": -2.1331331729888916, "loss": 1.6729, "nll_loss": 0.41820576786994934, "rewards/accuracies": 1.0, "rewards/chosen": -0.00027051009237766266, "rewards/margins": 0.21304281055927277, "rewards/rejected": -0.21331332623958588, "step": 6133 }, { "epoch": 4.2420470262793915, "grad_norm": 9.676593780517578, "learning_rate": 3.198862763178116e-05, "log_odds_chosen": 9.681331634521484, "log_odds_ratio": -0.00056239910190925, "logits/chosen": -0.7100040316581726, "logits/rejected": -0.8493018746376038, "logps/chosen": -0.0024421019479632378, "logps/rejected": -2.028809070587158, "loss": 1.0036, "nll_loss": 0.25083646178245544, "rewards/accuracies": 1.0, "rewards/chosen": -0.0002442101831547916, "rewards/margins": 0.2026366889476776, "rewards/rejected": -0.20288090407848358, "step": 6134 }, { "epoch": 4.242738589211618, "grad_norm": 20.33088493347168, "learning_rate": 3.198478561549101e-05, "log_odds_chosen": 9.505403518676758, "log_odds_ratio": -0.044929858297109604, "logits/chosen": -0.6986443996429443, "logits/rejected": -0.7685960531234741, "logps/chosen": -0.1546137034893036, "logps/rejected": -2.1600124835968018, "loss": 0.9972, "nll_loss": 0.24481570720672607, "rewards/accuracies": 1.0, "rewards/chosen": -0.015461370348930359, "rewards/margins": 0.20053988695144653, "rewards/rejected": -0.21600127220153809, "step": 6135 }, { "epoch": 4.243430152143845, "grad_norm": 11.125940322875977, "learning_rate": 3.1980943599200866e-05, "log_odds_chosen": 10.615583419799805, "log_odds_ratio": -4.1170831536874175e-05, "logits/chosen": -0.4305480718612671, "logits/rejected": -0.5694938898086548, "logps/chosen": -0.00042377153295092285, "logps/rejected": -2.0867981910705566, "loss": 1.0256, "nll_loss": 0.25639212131500244, "rewards/accuracies": 1.0, "rewards/chosen": -4.237715620547533e-05, "rewards/margins": 0.20863744616508484, "rewards/rejected": -0.20867982506752014, "step": 6136 }, { "epoch": 4.244121715076072, "grad_norm": 16.073240280151367, "learning_rate": 3.197710158291071e-05, "log_odds_chosen": 10.713496208190918, "log_odds_ratio": -4.822468326892704e-05, "logits/chosen": -0.9513100385665894, "logits/rejected": -0.9958503246307373, "logps/chosen": -0.00024816146469675004, "logps/rejected": -2.3876280784606934, "loss": 1.1473, "nll_loss": 0.2868252694606781, "rewards/accuracies": 1.0, "rewards/chosen": -2.481614501448348e-05, "rewards/margins": 0.23873798549175262, "rewards/rejected": -0.23876279592514038, "step": 6137 }, { "epoch": 4.244813278008299, "grad_norm": 7.69055700302124, "learning_rate": 3.1973259566620565e-05, "log_odds_chosen": 10.11435317993164, "log_odds_ratio": -4.604416972142644e-05, "logits/chosen": -0.3621489107608795, "logits/rejected": -0.46428364515304565, "logps/chosen": -0.00031951890559867024, "logps/rejected": -2.013823986053467, "loss": 0.9587, "nll_loss": 0.23967821896076202, "rewards/accuracies": 1.0, "rewards/chosen": -3.195188764948398e-05, "rewards/margins": 0.20135048031806946, "rewards/rejected": -0.20138242840766907, "step": 6138 }, { "epoch": 4.245504840940526, "grad_norm": 7.717804908752441, "learning_rate": 3.196941755033042e-05, "log_odds_chosen": 10.34365177154541, "log_odds_ratio": -7.24259516573511e-05, "logits/chosen": -0.28142252564430237, "logits/rejected": -0.41735485196113586, "logps/chosen": -0.0002807824348565191, "logps/rejected": -1.962577223777771, "loss": 1.4669, "nll_loss": 0.36671096086502075, "rewards/accuracies": 1.0, "rewards/chosen": -2.8078244213247672e-05, "rewards/margins": 0.19622963666915894, "rewards/rejected": -0.19625772535800934, "step": 6139 }, { "epoch": 4.246196403872752, "grad_norm": 11.58918571472168, "learning_rate": 3.196557553404026e-05, "log_odds_chosen": 9.673727035522461, "log_odds_ratio": -0.0018433015793561935, "logits/chosen": -0.54544997215271, "logits/rejected": -0.5666953325271606, "logps/chosen": -0.02644137106835842, "logps/rejected": -1.6399683952331543, "loss": 1.3217, "nll_loss": 0.3302415609359741, "rewards/accuracies": 1.0, "rewards/chosen": -0.0026441370137035847, "rewards/margins": 0.1613527089357376, "rewards/rejected": -0.1639968454837799, "step": 6140 }, { "epoch": 4.246887966804979, "grad_norm": 8.090474128723145, "learning_rate": 3.1961733517750115e-05, "log_odds_chosen": 9.784919738769531, "log_odds_ratio": -0.00011589626228669658, "logits/chosen": -0.6708372831344604, "logits/rejected": -0.6841962337493896, "logps/chosen": -0.00021240772912278771, "logps/rejected": -1.6052401065826416, "loss": 1.5484, "nll_loss": 0.3870971202850342, "rewards/accuracies": 1.0, "rewards/chosen": -2.124077218468301e-05, "rewards/margins": 0.16050276160240173, "rewards/rejected": -0.16052399575710297, "step": 6141 }, { "epoch": 4.247579529737206, "grad_norm": 7.676374435424805, "learning_rate": 3.195789150145997e-05, "log_odds_chosen": 9.621414184570312, "log_odds_ratio": -0.00040039775194600224, "logits/chosen": -0.45882120728492737, "logits/rejected": -0.5545141100883484, "logps/chosen": -0.0005856946809217334, "logps/rejected": -1.842532992362976, "loss": 1.1008, "nll_loss": 0.27515920996665955, "rewards/accuracies": 1.0, "rewards/chosen": -5.85694688197691e-05, "rewards/margins": 0.18419474363327026, "rewards/rejected": -0.18425330519676208, "step": 6142 }, { "epoch": 4.248271092669433, "grad_norm": 13.426033973693848, "learning_rate": 3.195404948516982e-05, "log_odds_chosen": 10.054722785949707, "log_odds_ratio": -0.00023639341816306114, "logits/chosen": -0.8353263735771179, "logits/rejected": -0.8713321685791016, "logps/chosen": -0.0007747645722702146, "logps/rejected": -1.8731052875518799, "loss": 1.5143, "nll_loss": 0.3785462975502014, "rewards/accuracies": 1.0, "rewards/chosen": -7.747646304778755e-05, "rewards/margins": 0.18723304569721222, "rewards/rejected": -0.18731053173542023, "step": 6143 }, { "epoch": 4.24896265560166, "grad_norm": 4.346104145050049, "learning_rate": 3.1950207468879666e-05, "log_odds_chosen": 8.975990295410156, "log_odds_ratio": -0.001990825869143009, "logits/chosen": -0.36998724937438965, "logits/rejected": -0.3497503995895386, "logps/chosen": -0.002082593971863389, "logps/rejected": -1.9057161808013916, "loss": 1.5676, "nll_loss": 0.3916909694671631, "rewards/accuracies": 1.0, "rewards/chosen": -0.00020825938554480672, "rewards/margins": 0.19036336243152618, "rewards/rejected": -0.1905716359615326, "step": 6144 }, { "epoch": 4.249654218533887, "grad_norm": 6.410309791564941, "learning_rate": 3.1946365452589525e-05, "log_odds_chosen": 10.1351957321167, "log_odds_ratio": -0.00023676696582697332, "logits/chosen": -0.27143821120262146, "logits/rejected": -0.30729198455810547, "logps/chosen": -0.0009003905579447746, "logps/rejected": -2.2765510082244873, "loss": 0.6741, "nll_loss": 0.16851022839546204, "rewards/accuracies": 1.0, "rewards/chosen": -9.003906598081812e-05, "rewards/margins": 0.22756507992744446, "rewards/rejected": -0.22765511274337769, "step": 6145 }, { "epoch": 4.250345781466113, "grad_norm": 8.370780944824219, "learning_rate": 3.194252343629937e-05, "log_odds_chosen": 8.947935104370117, "log_odds_ratio": -0.0002222056791651994, "logits/chosen": -0.4107566177845001, "logits/rejected": -0.4923899173736572, "logps/chosen": -0.0005199002334848046, "logps/rejected": -1.4588299989700317, "loss": 1.1181, "nll_loss": 0.27949339151382446, "rewards/accuracies": 1.0, "rewards/chosen": -5.1990024076076224e-05, "rewards/margins": 0.14583101868629456, "rewards/rejected": -0.1458830088376999, "step": 6146 }, { "epoch": 4.25103734439834, "grad_norm": 14.382808685302734, "learning_rate": 3.193868142000922e-05, "log_odds_chosen": 9.37031078338623, "log_odds_ratio": -0.1772003322839737, "logits/chosen": -0.5559031367301941, "logits/rejected": -0.5951072573661804, "logps/chosen": -0.02392636425793171, "logps/rejected": -2.083078384399414, "loss": 1.1989, "nll_loss": 0.2820078134536743, "rewards/accuracies": 0.875, "rewards/chosen": -0.002392636379227042, "rewards/margins": 0.2059151828289032, "rewards/rejected": -0.20830783247947693, "step": 6147 }, { "epoch": 4.251728907330567, "grad_norm": 11.454696655273438, "learning_rate": 3.1934839403719075e-05, "log_odds_chosen": 10.160030364990234, "log_odds_ratio": -0.00027405653963796794, "logits/chosen": -0.1272168755531311, "logits/rejected": -0.24657240509986877, "logps/chosen": -0.00119110569357872, "logps/rejected": -2.3499598503112793, "loss": 0.901, "nll_loss": 0.22522665560245514, "rewards/accuracies": 1.0, "rewards/chosen": -0.00011911056935787201, "rewards/margins": 0.2348768562078476, "rewards/rejected": -0.2349959760904312, "step": 6148 }, { "epoch": 4.252420470262794, "grad_norm": 7.529397487640381, "learning_rate": 3.193099738742892e-05, "log_odds_chosen": 9.81788444519043, "log_odds_ratio": -0.00012479073484428227, "logits/chosen": -0.46000170707702637, "logits/rejected": -0.5587924718856812, "logps/chosen": -0.00022344016178976744, "logps/rejected": -1.3893221616744995, "loss": 1.178, "nll_loss": 0.29448843002319336, "rewards/accuracies": 1.0, "rewards/chosen": -2.2344016542774625e-05, "rewards/margins": 0.13890986144542694, "rewards/rejected": -0.1389322131872177, "step": 6149 }, { "epoch": 4.253112033195021, "grad_norm": 7.670037746429443, "learning_rate": 3.1927155371138773e-05, "log_odds_chosen": 10.348284721374512, "log_odds_ratio": -8.794210589258e-05, "logits/chosen": -0.736275315284729, "logits/rejected": -0.7626364231109619, "logps/chosen": -0.0004248587938491255, "logps/rejected": -2.323251962661743, "loss": 0.9088, "nll_loss": 0.22719423472881317, "rewards/accuracies": 1.0, "rewards/chosen": -4.2485877202125266e-05, "rewards/margins": 0.23228272795677185, "rewards/rejected": -0.23232519626617432, "step": 6150 }, { "epoch": 4.253803596127248, "grad_norm": 17.140338897705078, "learning_rate": 3.1923313354848626e-05, "log_odds_chosen": 9.948582649230957, "log_odds_ratio": -0.00014420642401091754, "logits/chosen": -0.376644492149353, "logits/rejected": -0.456285297870636, "logps/chosen": -0.00035677163396030664, "logps/rejected": -1.8041694164276123, "loss": 1.3221, "nll_loss": 0.33050060272216797, "rewards/accuracies": 1.0, "rewards/chosen": -3.5677163396030664e-05, "rewards/margins": 0.18038126826286316, "rewards/rejected": -0.18041695654392242, "step": 6151 }, { "epoch": 4.254495159059474, "grad_norm": 22.46241569519043, "learning_rate": 3.191947133855848e-05, "log_odds_chosen": 9.51242446899414, "log_odds_ratio": -0.1370656043291092, "logits/chosen": -0.9688135385513306, "logits/rejected": -1.068524718284607, "logps/chosen": -0.2347746342420578, "logps/rejected": -2.2272887229919434, "loss": 1.0347, "nll_loss": 0.2449595332145691, "rewards/accuracies": 0.875, "rewards/chosen": -0.02347746305167675, "rewards/margins": 0.19925141334533691, "rewards/rejected": -0.2227288782596588, "step": 6152 }, { "epoch": 4.255186721991701, "grad_norm": 7.2648491859436035, "learning_rate": 3.1915629322268324e-05, "log_odds_chosen": 9.57111644744873, "log_odds_ratio": -0.0026391155552119017, "logits/chosen": -0.5854471921920776, "logits/rejected": -0.6443982720375061, "logps/chosen": -0.0010829295497387648, "logps/rejected": -2.35952091217041, "loss": 1.5768, "nll_loss": 0.3939476013183594, "rewards/accuracies": 1.0, "rewards/chosen": -0.00010829296661540866, "rewards/margins": 0.23584382236003876, "rewards/rejected": -0.23595212399959564, "step": 6153 }, { "epoch": 4.255878284923928, "grad_norm": 5.667422294616699, "learning_rate": 3.191178730597818e-05, "log_odds_chosen": 9.090551376342773, "log_odds_ratio": -0.0007788903312757611, "logits/chosen": -0.538144588470459, "logits/rejected": -0.5147003531455994, "logps/chosen": -0.006274771876633167, "logps/rejected": -1.5023272037506104, "loss": 1.3573, "nll_loss": 0.3392564654350281, "rewards/accuracies": 1.0, "rewards/chosen": -0.0006274771294556558, "rewards/margins": 0.14960524439811707, "rewards/rejected": -0.1502327024936676, "step": 6154 }, { "epoch": 4.256569847856155, "grad_norm": 7.264804363250732, "learning_rate": 3.190794528968803e-05, "log_odds_chosen": 8.99667739868164, "log_odds_ratio": -0.004868743941187859, "logits/chosen": -0.6970754265785217, "logits/rejected": -0.6954235434532166, "logps/chosen": -0.003572126617655158, "logps/rejected": -1.9685618877410889, "loss": 1.0954, "nll_loss": 0.2733573019504547, "rewards/accuracies": 1.0, "rewards/chosen": -0.000357212673407048, "rewards/margins": 0.19649897515773773, "rewards/rejected": -0.19685617089271545, "step": 6155 }, { "epoch": 4.257261410788382, "grad_norm": 7.831867694854736, "learning_rate": 3.190410327339788e-05, "log_odds_chosen": 10.755487442016602, "log_odds_ratio": -8.687889931024984e-05, "logits/chosen": -0.25262823700904846, "logits/rejected": -0.3432296812534332, "logps/chosen": -0.0003574866277631372, "logps/rejected": -2.4831430912017822, "loss": 0.8574, "nll_loss": 0.21433907747268677, "rewards/accuracies": 1.0, "rewards/chosen": -3.5748664231505245e-05, "rewards/margins": 0.24827855825424194, "rewards/rejected": -0.24831432104110718, "step": 6156 }, { "epoch": 4.2579529737206085, "grad_norm": 9.009108543395996, "learning_rate": 3.1900261257107734e-05, "log_odds_chosen": 9.415932655334473, "log_odds_ratio": -0.00192168727517128, "logits/chosen": -0.6164818406105042, "logits/rejected": -0.704775869846344, "logps/chosen": -0.0009632366127334535, "logps/rejected": -1.8765302896499634, "loss": 1.2155, "nll_loss": 0.303676038980484, "rewards/accuracies": 1.0, "rewards/chosen": -9.632366709411144e-05, "rewards/margins": 0.18755671381950378, "rewards/rejected": -0.18765303492546082, "step": 6157 }, { "epoch": 4.258644536652835, "grad_norm": 13.167975425720215, "learning_rate": 3.189641924081758e-05, "log_odds_chosen": 9.81691837310791, "log_odds_ratio": -0.0013511620927602053, "logits/chosen": -0.5068100690841675, "logits/rejected": -0.5617337226867676, "logps/chosen": -0.0005183805478736758, "logps/rejected": -1.5767027139663696, "loss": 1.2908, "nll_loss": 0.3225646913051605, "rewards/accuracies": 1.0, "rewards/chosen": -5.183805842534639e-05, "rewards/margins": 0.1576184332370758, "rewards/rejected": -0.1576702892780304, "step": 6158 }, { "epoch": 4.259336099585062, "grad_norm": 8.414799690246582, "learning_rate": 3.189257722452743e-05, "log_odds_chosen": 10.277162551879883, "log_odds_ratio": -8.107912435662001e-05, "logits/chosen": 0.052820704877376556, "logits/rejected": -0.06814444065093994, "logps/chosen": -0.00030761188827455044, "logps/rejected": -1.597118854522705, "loss": 0.6989, "nll_loss": 0.1747276932001114, "rewards/accuracies": 1.0, "rewards/chosen": -3.076119173783809e-05, "rewards/margins": 0.15968112647533417, "rewards/rejected": -0.15971189737319946, "step": 6159 }, { "epoch": 4.260027662517289, "grad_norm": 12.140266418457031, "learning_rate": 3.1888735208237284e-05, "log_odds_chosen": 11.18593978881836, "log_odds_ratio": -0.0001817693846533075, "logits/chosen": -0.6569154262542725, "logits/rejected": -0.7095228433609009, "logps/chosen": -0.00019287460600025952, "logps/rejected": -2.2339508533477783, "loss": 1.0071, "nll_loss": 0.2517620921134949, "rewards/accuracies": 1.0, "rewards/chosen": -1.9287461327621713e-05, "rewards/margins": 0.22337576746940613, "rewards/rejected": -0.22339506447315216, "step": 6160 }, { "epoch": 4.260719225449516, "grad_norm": 7.150200366973877, "learning_rate": 3.188489319194714e-05, "log_odds_chosen": 10.353399276733398, "log_odds_ratio": -0.0003910641244146973, "logits/chosen": -0.19342248141765594, "logits/rejected": -0.1816299855709076, "logps/chosen": -0.0005973252700641751, "logps/rejected": -2.1960625648498535, "loss": 0.8542, "nll_loss": 0.21351881325244904, "rewards/accuracies": 1.0, "rewards/chosen": -5.9732523368438706e-05, "rewards/margins": 0.21954652667045593, "rewards/rejected": -0.21960625052452087, "step": 6161 }, { "epoch": 4.261410788381743, "grad_norm": 13.170788764953613, "learning_rate": 3.188105117565698e-05, "log_odds_chosen": 8.552861213684082, "log_odds_ratio": -0.0017542075365781784, "logits/chosen": -0.08158275485038757, "logits/rejected": -0.18088936805725098, "logps/chosen": -0.0019205547869205475, "logps/rejected": -1.5736401081085205, "loss": 1.5537, "nll_loss": 0.3882403075695038, "rewards/accuracies": 1.0, "rewards/chosen": -0.00019205547869205475, "rewards/margins": 0.15717196464538574, "rewards/rejected": -0.15736402571201324, "step": 6162 }, { "epoch": 4.2621023513139695, "grad_norm": 17.794445037841797, "learning_rate": 3.187720915936684e-05, "log_odds_chosen": 10.468133926391602, "log_odds_ratio": -9.857804980129004e-05, "logits/chosen": -0.4853760302066803, "logits/rejected": -0.49158012866973877, "logps/chosen": -0.000775833148509264, "logps/rejected": -2.0211195945739746, "loss": 1.2288, "nll_loss": 0.3071891665458679, "rewards/accuracies": 1.0, "rewards/chosen": -7.758331776130944e-05, "rewards/margins": 0.2020343840122223, "rewards/rejected": -0.20211195945739746, "step": 6163 }, { "epoch": 4.262793914246196, "grad_norm": 15.141548156738281, "learning_rate": 3.187336714307669e-05, "log_odds_chosen": 10.6614351272583, "log_odds_ratio": -7.608214218635112e-05, "logits/chosen": -0.2411142736673355, "logits/rejected": -0.2971709668636322, "logps/chosen": -0.0001604513672646135, "logps/rejected": -2.035788059234619, "loss": 0.9799, "nll_loss": 0.24496668577194214, "rewards/accuracies": 1.0, "rewards/chosen": -1.6045138181652874e-05, "rewards/margins": 0.20356276631355286, "rewards/rejected": -0.20357881486415863, "step": 6164 }, { "epoch": 4.263485477178423, "grad_norm": 9.609676361083984, "learning_rate": 3.186952512678654e-05, "log_odds_chosen": 10.652984619140625, "log_odds_ratio": -4.42389864474535e-05, "logits/chosen": -0.40380859375, "logits/rejected": -0.47176897525787354, "logps/chosen": -0.0005353515734896064, "logps/rejected": -2.288562774658203, "loss": 0.9475, "nll_loss": 0.2368660271167755, "rewards/accuracies": 1.0, "rewards/chosen": -5.353515734896064e-05, "rewards/margins": 0.22880274057388306, "rewards/rejected": -0.22885626554489136, "step": 6165 }, { "epoch": 4.26417704011065, "grad_norm": 18.316030502319336, "learning_rate": 3.186568311049639e-05, "log_odds_chosen": 9.397259712219238, "log_odds_ratio": -0.00036490714410319924, "logits/chosen": -0.2662919759750366, "logits/rejected": -0.2668308615684509, "logps/chosen": -0.0013899412006139755, "logps/rejected": -1.6413580179214478, "loss": 1.5478, "nll_loss": 0.3869023323059082, "rewards/accuracies": 1.0, "rewards/chosen": -0.00013899413170292974, "rewards/margins": 0.16399678587913513, "rewards/rejected": -0.16413578391075134, "step": 6166 }, { "epoch": 4.264868603042877, "grad_norm": 5.9011054039001465, "learning_rate": 3.186184109420624e-05, "log_odds_chosen": 9.216154098510742, "log_odds_ratio": -0.004653660114854574, "logits/chosen": -0.42850807309150696, "logits/rejected": -0.43218332529067993, "logps/chosen": -0.009668417274951935, "logps/rejected": -1.7757195234298706, "loss": 0.9863, "nll_loss": 0.24610421061515808, "rewards/accuracies": 1.0, "rewards/chosen": -0.0009668418788351119, "rewards/margins": 0.17660510540008545, "rewards/rejected": -0.17757193744182587, "step": 6167 }, { "epoch": 4.265560165975104, "grad_norm": 7.113187313079834, "learning_rate": 3.185799907791609e-05, "log_odds_chosen": 9.243945121765137, "log_odds_ratio": -0.0005347700789570808, "logits/chosen": -0.3659195601940155, "logits/rejected": -0.4238894581794739, "logps/chosen": -0.0038989405147731304, "logps/rejected": -2.413435220718384, "loss": 1.2477, "nll_loss": 0.31186720728874207, "rewards/accuracies": 1.0, "rewards/chosen": -0.0003898940631188452, "rewards/margins": 0.24095362424850464, "rewards/rejected": -0.24134352803230286, "step": 6168 }, { "epoch": 4.2662517289073305, "grad_norm": 9.815381050109863, "learning_rate": 3.185415706162594e-05, "log_odds_chosen": 9.05932331085205, "log_odds_ratio": -0.00020118005340918899, "logits/chosen": -0.10414472222328186, "logits/rejected": -0.23452311754226685, "logps/chosen": -0.0012550798710435629, "logps/rejected": -1.68681001663208, "loss": 1.0896, "nll_loss": 0.2723812162876129, "rewards/accuracies": 1.0, "rewards/chosen": -0.00012550799874588847, "rewards/margins": 0.16855549812316895, "rewards/rejected": -0.16868099570274353, "step": 6169 }, { "epoch": 4.266943291839557, "grad_norm": 9.147217750549316, "learning_rate": 3.1850315045335795e-05, "log_odds_chosen": 9.957201957702637, "log_odds_ratio": -9.266338020097464e-05, "logits/chosen": -0.16600045561790466, "logits/rejected": -0.21061308681964874, "logps/chosen": -0.005030377767980099, "logps/rejected": -2.65336275100708, "loss": 1.3004, "nll_loss": 0.325092077255249, "rewards/accuracies": 1.0, "rewards/chosen": -0.0005030377651564777, "rewards/margins": 0.2648332118988037, "rewards/rejected": -0.265336275100708, "step": 6170 }, { "epoch": 4.267634854771784, "grad_norm": 8.045340538024902, "learning_rate": 3.184647302904564e-05, "log_odds_chosen": 9.198465347290039, "log_odds_ratio": -0.0001735072728479281, "logits/chosen": -0.47205400466918945, "logits/rejected": -0.5205153226852417, "logps/chosen": -0.0007983793038874865, "logps/rejected": -1.7312718629837036, "loss": 1.3807, "nll_loss": 0.3451688587665558, "rewards/accuracies": 1.0, "rewards/chosen": -7.983793329913169e-05, "rewards/margins": 0.17304734885692596, "rewards/rejected": -0.1731271743774414, "step": 6171 }, { "epoch": 4.268326417704011, "grad_norm": 6.69143533706665, "learning_rate": 3.18426310127555e-05, "log_odds_chosen": 10.010408401489258, "log_odds_ratio": -0.0001274587557418272, "logits/chosen": -0.27231353521347046, "logits/rejected": -0.28820914030075073, "logps/chosen": -0.0008799899369478226, "logps/rejected": -2.1249094009399414, "loss": 1.2909, "nll_loss": 0.3227103352546692, "rewards/accuracies": 1.0, "rewards/chosen": -8.799900388112292e-05, "rewards/margins": 0.21240293979644775, "rewards/rejected": -0.21249093115329742, "step": 6172 }, { "epoch": 4.269017980636238, "grad_norm": 6.601019382476807, "learning_rate": 3.1838788996465346e-05, "log_odds_chosen": 9.913965225219727, "log_odds_ratio": -0.0002963221340905875, "logits/chosen": -0.46160951256752014, "logits/rejected": -0.4814470410346985, "logps/chosen": -0.00020028551807627082, "logps/rejected": -1.554571270942688, "loss": 1.0545, "nll_loss": 0.26360398530960083, "rewards/accuracies": 1.0, "rewards/chosen": -2.0028553990414366e-05, "rewards/margins": 0.15543711185455322, "rewards/rejected": -0.15545713901519775, "step": 6173 }, { "epoch": 4.269709543568465, "grad_norm": 12.24593448638916, "learning_rate": 3.18349469801752e-05, "log_odds_chosen": 9.407413482666016, "log_odds_ratio": -0.0007775399135425687, "logits/chosen": -0.32303255796432495, "logits/rejected": -0.34748154878616333, "logps/chosen": -0.0036095932591706514, "logps/rejected": -1.9337127208709717, "loss": 1.3275, "nll_loss": 0.3318028748035431, "rewards/accuracies": 1.0, "rewards/chosen": -0.00036095932591706514, "rewards/margins": 0.1930103302001953, "rewards/rejected": -0.19337128102779388, "step": 6174 }, { "epoch": 4.2704011065006915, "grad_norm": 15.678728103637695, "learning_rate": 3.183110496388505e-05, "log_odds_chosen": 10.00384521484375, "log_odds_ratio": -0.00040300548425875604, "logits/chosen": -0.6204206943511963, "logits/rejected": -0.6617852449417114, "logps/chosen": -0.0006737353978678584, "logps/rejected": -2.5181639194488525, "loss": 1.2319, "nll_loss": 0.3079381287097931, "rewards/accuracies": 1.0, "rewards/chosen": -6.737354124197736e-05, "rewards/margins": 0.25174903869628906, "rewards/rejected": -0.25181639194488525, "step": 6175 }, { "epoch": 4.271092669432918, "grad_norm": 7.809788703918457, "learning_rate": 3.1827262947594896e-05, "log_odds_chosen": 9.518634796142578, "log_odds_ratio": -0.0002678804157767445, "logits/chosen": -0.3987533450126648, "logits/rejected": -0.49139589071273804, "logps/chosen": -0.0005189487710595131, "logps/rejected": -1.4620037078857422, "loss": 0.7988, "nll_loss": 0.19967755675315857, "rewards/accuracies": 1.0, "rewards/chosen": -5.1894880016334355e-05, "rewards/margins": 0.1461484730243683, "rewards/rejected": -0.14620037376880646, "step": 6176 }, { "epoch": 4.271784232365145, "grad_norm": 8.224957466125488, "learning_rate": 3.182342093130475e-05, "log_odds_chosen": 11.28808307647705, "log_odds_ratio": -3.974015999119729e-05, "logits/chosen": -0.4204134941101074, "logits/rejected": -0.5216044187545776, "logps/chosen": -0.00029299373272806406, "logps/rejected": -2.9208827018737793, "loss": 1.3039, "nll_loss": 0.32597795128822327, "rewards/accuracies": 1.0, "rewards/chosen": -2.9299371817614883e-05, "rewards/margins": 0.2920589745044708, "rewards/rejected": -0.29208827018737793, "step": 6177 }, { "epoch": 4.272475795297372, "grad_norm": 6.33683443069458, "learning_rate": 3.18195789150146e-05, "log_odds_chosen": 8.957198143005371, "log_odds_ratio": -0.0002472910564392805, "logits/chosen": -0.582464337348938, "logits/rejected": -0.6484727263450623, "logps/chosen": -0.0011252148542553186, "logps/rejected": -1.8942865133285522, "loss": 1.2003, "nll_loss": 0.3000393509864807, "rewards/accuracies": 1.0, "rewards/chosen": -0.00011252149124629796, "rewards/margins": 0.18931612372398376, "rewards/rejected": -0.1894286572933197, "step": 6178 }, { "epoch": 4.273167358229599, "grad_norm": 14.971855163574219, "learning_rate": 3.1815736898724454e-05, "log_odds_chosen": 8.94025993347168, "log_odds_ratio": -0.00026140769477933645, "logits/chosen": -0.7007652521133423, "logits/rejected": -0.687438428401947, "logps/chosen": -0.0008105351589620113, "logps/rejected": -1.8527700901031494, "loss": 1.067, "nll_loss": 0.26671651005744934, "rewards/accuracies": 1.0, "rewards/chosen": -8.10535202617757e-05, "rewards/margins": 0.1851959526538849, "rewards/rejected": -0.18527701497077942, "step": 6179 }, { "epoch": 4.273858921161826, "grad_norm": 11.929402351379395, "learning_rate": 3.18118948824343e-05, "log_odds_chosen": 9.497038841247559, "log_odds_ratio": -0.002557209460064769, "logits/chosen": -0.5874805450439453, "logits/rejected": -0.5811681747436523, "logps/chosen": -0.0019133985042572021, "logps/rejected": -1.8315576314926147, "loss": 1.4228, "nll_loss": 0.3554553985595703, "rewards/accuracies": 1.0, "rewards/chosen": -0.00019133984460495412, "rewards/margins": 0.18296441435813904, "rewards/rejected": -0.18315577507019043, "step": 6180 }, { "epoch": 4.2745504840940525, "grad_norm": 7.399724006652832, "learning_rate": 3.180805286614416e-05, "log_odds_chosen": 10.53152847290039, "log_odds_ratio": -0.0001274074602406472, "logits/chosen": -0.6589803695678711, "logits/rejected": -0.7211213111877441, "logps/chosen": -0.0033396054059267044, "logps/rejected": -3.2292990684509277, "loss": 0.9119, "nll_loss": 0.2279733568429947, "rewards/accuracies": 1.0, "rewards/chosen": -0.000333960575517267, "rewards/margins": 0.3225959539413452, "rewards/rejected": -0.32292991876602173, "step": 6181 }, { "epoch": 4.275242047026279, "grad_norm": 7.8806843757629395, "learning_rate": 3.1804210849854004e-05, "log_odds_chosen": 8.394222259521484, "log_odds_ratio": -0.050156790763139725, "logits/chosen": -0.6371682286262512, "logits/rejected": -0.6561437249183655, "logps/chosen": -0.01156701985746622, "logps/rejected": -1.389466404914856, "loss": 1.1804, "nll_loss": 0.29009053111076355, "rewards/accuracies": 1.0, "rewards/chosen": -0.0011567020555958152, "rewards/margins": 0.13778993487358093, "rewards/rejected": -0.13894663751125336, "step": 6182 }, { "epoch": 4.275933609958506, "grad_norm": 10.309439659118652, "learning_rate": 3.180036883356386e-05, "log_odds_chosen": 11.258016586303711, "log_odds_ratio": -2.8753411243087612e-05, "logits/chosen": -0.5273156762123108, "logits/rejected": -0.5840225219726562, "logps/chosen": -0.0003254458715673536, "logps/rejected": -2.6107728481292725, "loss": 1.5655, "nll_loss": 0.391368567943573, "rewards/accuracies": 1.0, "rewards/chosen": -3.254459079471417e-05, "rewards/margins": 0.2610447406768799, "rewards/rejected": -0.26107728481292725, "step": 6183 }, { "epoch": 4.276625172890733, "grad_norm": 6.367475509643555, "learning_rate": 3.179652681727371e-05, "log_odds_chosen": 9.545506477355957, "log_odds_ratio": -0.0002627922222018242, "logits/chosen": -0.5451605916023254, "logits/rejected": -0.5639867186546326, "logps/chosen": -0.0010228962637484074, "logps/rejected": -1.7442550659179688, "loss": 1.5626, "nll_loss": 0.3906342387199402, "rewards/accuracies": 1.0, "rewards/chosen": -0.00010228962491964921, "rewards/margins": 0.17432323098182678, "rewards/rejected": -0.17442551255226135, "step": 6184 }, { "epoch": 4.27731673582296, "grad_norm": 9.653816223144531, "learning_rate": 3.1792684800983555e-05, "log_odds_chosen": 10.542591094970703, "log_odds_ratio": -8.921239350456744e-05, "logits/chosen": -0.4528236985206604, "logits/rejected": -0.416860431432724, "logps/chosen": -0.0014932905323803425, "logps/rejected": -2.6712212562561035, "loss": 0.9257, "nll_loss": 0.23141497373580933, "rewards/accuracies": 1.0, "rewards/chosen": -0.00014932906196918339, "rewards/margins": 0.26697278022766113, "rewards/rejected": -0.2671221196651459, "step": 6185 }, { "epoch": 4.278008298755187, "grad_norm": 7.284701824188232, "learning_rate": 3.178884278469341e-05, "log_odds_chosen": 10.624105453491211, "log_odds_ratio": -5.059983232058585e-05, "logits/chosen": -0.7741506695747375, "logits/rejected": -0.7754232883453369, "logps/chosen": -0.00011048675514757633, "logps/rejected": -1.6449944972991943, "loss": 1.0486, "nll_loss": 0.26214975118637085, "rewards/accuracies": 1.0, "rewards/chosen": -1.1048676242353395e-05, "rewards/margins": 0.16448840498924255, "rewards/rejected": -0.1644994616508484, "step": 6186 }, { "epoch": 4.2786998616874135, "grad_norm": 9.619255065917969, "learning_rate": 3.178500076840326e-05, "log_odds_chosen": 10.326604843139648, "log_odds_ratio": -5.458838495542295e-05, "logits/chosen": -0.5391168594360352, "logits/rejected": -0.6189460158348083, "logps/chosen": -0.0002454041095916182, "logps/rejected": -1.8849753141403198, "loss": 1.1774, "nll_loss": 0.29435694217681885, "rewards/accuracies": 1.0, "rewards/chosen": -2.454041168675758e-05, "rewards/margins": 0.18847298622131348, "rewards/rejected": -0.18849752843379974, "step": 6187 }, { "epoch": 4.27939142461964, "grad_norm": 11.625914573669434, "learning_rate": 3.178115875211311e-05, "log_odds_chosen": 9.457094192504883, "log_odds_ratio": -0.0007933979504741728, "logits/chosen": -0.6278635263442993, "logits/rejected": -0.6144816875457764, "logps/chosen": -0.004008726682513952, "logps/rejected": -1.4434616565704346, "loss": 1.273, "nll_loss": 0.31816577911376953, "rewards/accuracies": 1.0, "rewards/chosen": -0.0004008726973552257, "rewards/margins": 0.14394529163837433, "rewards/rejected": -0.14434616267681122, "step": 6188 }, { "epoch": 4.280082987551867, "grad_norm": 9.663738250732422, "learning_rate": 3.177731673582296e-05, "log_odds_chosen": 10.204988479614258, "log_odds_ratio": -0.0002085541345877573, "logits/chosen": -0.5629463791847229, "logits/rejected": -0.5773528814315796, "logps/chosen": -0.0008364799432456493, "logps/rejected": -1.5560381412506104, "loss": 1.031, "nll_loss": 0.2577328085899353, "rewards/accuracies": 1.0, "rewards/chosen": -8.364799577975646e-05, "rewards/margins": 0.15552017092704773, "rewards/rejected": -0.1556038111448288, "step": 6189 }, { "epoch": 4.280774550484094, "grad_norm": 9.566874504089355, "learning_rate": 3.177347471953282e-05, "log_odds_chosen": 10.460851669311523, "log_odds_ratio": -5.972566214040853e-05, "logits/chosen": -0.4141422212123871, "logits/rejected": -0.4708189070224762, "logps/chosen": -0.00026144867297261953, "logps/rejected": -2.0418572425842285, "loss": 0.9027, "nll_loss": 0.22567889094352722, "rewards/accuracies": 1.0, "rewards/chosen": -2.6144867661059834e-05, "rewards/margins": 0.20415958762168884, "rewards/rejected": -0.20418575406074524, "step": 6190 }, { "epoch": 4.281466113416321, "grad_norm": 11.769757270812988, "learning_rate": 3.176963270324266e-05, "log_odds_chosen": 8.909507751464844, "log_odds_ratio": -0.0021207600366324186, "logits/chosen": -0.3657255172729492, "logits/rejected": -0.3715681731700897, "logps/chosen": -0.009017495438456535, "logps/rejected": -1.7288877964019775, "loss": 1.09, "nll_loss": 0.2722766399383545, "rewards/accuracies": 1.0, "rewards/chosen": -0.0009017496486194432, "rewards/margins": 0.17198702692985535, "rewards/rejected": -0.17288878560066223, "step": 6191 }, { "epoch": 4.282157676348548, "grad_norm": 6.800326347351074, "learning_rate": 3.1765790686952515e-05, "log_odds_chosen": 9.953897476196289, "log_odds_ratio": -0.00017238502914551646, "logits/chosen": -0.557531476020813, "logits/rejected": -0.5738213062286377, "logps/chosen": -0.00028701216797344387, "logps/rejected": -2.059114456176758, "loss": 1.4842, "nll_loss": 0.37102198600769043, "rewards/accuracies": 1.0, "rewards/chosen": -2.8701215342152864e-05, "rewards/margins": 0.2058827430009842, "rewards/rejected": -0.20591145753860474, "step": 6192 }, { "epoch": 4.282849239280774, "grad_norm": 6.554988861083984, "learning_rate": 3.176194867066237e-05, "log_odds_chosen": 10.263959884643555, "log_odds_ratio": -6.602435314562172e-05, "logits/chosen": -0.3205175995826721, "logits/rejected": -0.38556766510009766, "logps/chosen": -0.0001721544103929773, "logps/rejected": -1.56480073928833, "loss": 0.9438, "nll_loss": 0.23595106601715088, "rewards/accuracies": 1.0, "rewards/chosen": -1.721544140309561e-05, "rewards/margins": 0.1564628630876541, "rewards/rejected": -0.156480073928833, "step": 6193 }, { "epoch": 4.283540802213001, "grad_norm": 5.991519927978516, "learning_rate": 3.175810665437221e-05, "log_odds_chosen": 9.662172317504883, "log_odds_ratio": -0.002522186143323779, "logits/chosen": -0.22076928615570068, "logits/rejected": -0.24936668574810028, "logps/chosen": -0.00199855281971395, "logps/rejected": -2.0251848697662354, "loss": 1.2241, "nll_loss": 0.3057834208011627, "rewards/accuracies": 1.0, "rewards/chosen": -0.00019985527615062892, "rewards/margins": 0.20231863856315613, "rewards/rejected": -0.202518492937088, "step": 6194 }, { "epoch": 4.284232365145228, "grad_norm": 8.247119903564453, "learning_rate": 3.1754264638082066e-05, "log_odds_chosen": 8.842205047607422, "log_odds_ratio": -0.0006784327561035752, "logits/chosen": -0.5647929906845093, "logits/rejected": -0.5543622374534607, "logps/chosen": -0.01458799373358488, "logps/rejected": -1.484726905822754, "loss": 1.3746, "nll_loss": 0.34358879923820496, "rewards/accuracies": 1.0, "rewards/chosen": -0.0014587993500754237, "rewards/margins": 0.14701390266418457, "rewards/rejected": -0.14847269654273987, "step": 6195 }, { "epoch": 4.284923928077455, "grad_norm": 13.228196144104004, "learning_rate": 3.175042262179192e-05, "log_odds_chosen": 9.361620903015137, "log_odds_ratio": -0.01754622533917427, "logits/chosen": -0.45240429043769836, "logits/rejected": -0.522560715675354, "logps/chosen": -0.005461498629301786, "logps/rejected": -1.9183518886566162, "loss": 1.6339, "nll_loss": 0.40672767162323, "rewards/accuracies": 1.0, "rewards/chosen": -0.0005461499094963074, "rewards/margins": 0.1912890374660492, "rewards/rejected": -0.1918351948261261, "step": 6196 }, { "epoch": 4.285615491009682, "grad_norm": 10.74985408782959, "learning_rate": 3.174658060550177e-05, "log_odds_chosen": 9.648872375488281, "log_odds_ratio": -0.0002749493869487196, "logits/chosen": -0.5711510181427002, "logits/rejected": -0.667496919631958, "logps/chosen": -0.0009153534774668515, "logps/rejected": -2.0735342502593994, "loss": 0.9794, "nll_loss": 0.2448323518037796, "rewards/accuracies": 1.0, "rewards/chosen": -9.153535211225972e-05, "rewards/margins": 0.20726190507411957, "rewards/rejected": -0.20735344290733337, "step": 6197 }, { "epoch": 4.286307053941909, "grad_norm": 13.684673309326172, "learning_rate": 3.1742738589211616e-05, "log_odds_chosen": 9.251256942749023, "log_odds_ratio": -0.04825928807258606, "logits/chosen": -0.17080645263195038, "logits/rejected": -0.33765342831611633, "logps/chosen": -0.00878140889108181, "logps/rejected": -1.4698766469955444, "loss": 1.8133, "nll_loss": 0.44850003719329834, "rewards/accuracies": 1.0, "rewards/chosen": -0.0008781409705989063, "rewards/margins": 0.14610953629016876, "rewards/rejected": -0.1469876766204834, "step": 6198 }, { "epoch": 4.286998616874135, "grad_norm": 10.797607421875, "learning_rate": 3.1738896572921475e-05, "log_odds_chosen": 9.443811416625977, "log_odds_ratio": -0.000161836898769252, "logits/chosen": -0.6183329820632935, "logits/rejected": -0.6296303272247314, "logps/chosen": -0.00029242795426398516, "logps/rejected": -1.2201664447784424, "loss": 0.8209, "nll_loss": 0.2052021622657776, "rewards/accuracies": 1.0, "rewards/chosen": -2.9242795790196396e-05, "rewards/margins": 0.12198740243911743, "rewards/rejected": -0.12201663851737976, "step": 6199 }, { "epoch": 4.287690179806362, "grad_norm": 8.2676420211792, "learning_rate": 3.173505455663132e-05, "log_odds_chosen": 9.324875831604004, "log_odds_ratio": -0.001229931483976543, "logits/chosen": -0.6896675825119019, "logits/rejected": -0.7380616068840027, "logps/chosen": -0.0015790637116879225, "logps/rejected": -1.530112862586975, "loss": 0.7646, "nll_loss": 0.19102557003498077, "rewards/accuracies": 1.0, "rewards/chosen": -0.0001579063682584092, "rewards/margins": 0.1528533697128296, "rewards/rejected": -0.1530112773180008, "step": 6200 }, { "epoch": 4.288381742738589, "grad_norm": 6.931770324707031, "learning_rate": 3.1731212540341174e-05, "log_odds_chosen": 9.740365982055664, "log_odds_ratio": -0.00048356314073316753, "logits/chosen": -0.6223162412643433, "logits/rejected": -0.692702054977417, "logps/chosen": -0.001973244594410062, "logps/rejected": -2.078831911087036, "loss": 1.1004, "nll_loss": 0.27504581212997437, "rewards/accuracies": 1.0, "rewards/chosen": -0.00019732445070985705, "rewards/margins": 0.20768587291240692, "rewards/rejected": -0.20788319408893585, "step": 6201 }, { "epoch": 4.289073305670816, "grad_norm": 5.612523078918457, "learning_rate": 3.1727370524051026e-05, "log_odds_chosen": 10.49863052368164, "log_odds_ratio": -0.00012968035298399627, "logits/chosen": -0.47685426473617554, "logits/rejected": -0.41339313983917236, "logps/chosen": -0.00019079844059888273, "logps/rejected": -1.6685700416564941, "loss": 0.9857, "nll_loss": 0.24640297889709473, "rewards/accuracies": 1.0, "rewards/chosen": -1.9079845515079796e-05, "rewards/margins": 0.1668379306793213, "rewards/rejected": -0.16685700416564941, "step": 6202 }, { "epoch": 4.289764868603043, "grad_norm": 15.797565460205078, "learning_rate": 3.172352850776087e-05, "log_odds_chosen": 9.270092010498047, "log_odds_ratio": -0.26327821612358093, "logits/chosen": -0.49884092807769775, "logits/rejected": -0.5388543605804443, "logps/chosen": -0.04154008626937866, "logps/rejected": -2.206192970275879, "loss": 1.4869, "nll_loss": 0.34539881348609924, "rewards/accuracies": 0.875, "rewards/chosen": -0.004154008813202381, "rewards/margins": 0.2164652943611145, "rewards/rejected": -0.2206193059682846, "step": 6203 }, { "epoch": 4.29045643153527, "grad_norm": 7.614452838897705, "learning_rate": 3.1719686491470724e-05, "log_odds_chosen": 10.388860702514648, "log_odds_ratio": -5.83199507673271e-05, "logits/chosen": -0.3829791843891144, "logits/rejected": -0.5302500128746033, "logps/chosen": -0.0001508757413830608, "logps/rejected": -1.7036199569702148, "loss": 1.3709, "nll_loss": 0.34271174669265747, "rewards/accuracies": 1.0, "rewards/chosen": -1.5087575775396544e-05, "rewards/margins": 0.17034690082073212, "rewards/rejected": -0.17036199569702148, "step": 6204 }, { "epoch": 4.291147994467496, "grad_norm": 7.703399181365967, "learning_rate": 3.1715844475180577e-05, "log_odds_chosen": 10.075663566589355, "log_odds_ratio": -0.00024130381643772125, "logits/chosen": -0.6611663103103638, "logits/rejected": -0.6568069458007812, "logps/chosen": -0.0006587379612028599, "logps/rejected": -1.9626967906951904, "loss": 0.6575, "nll_loss": 0.1643550843000412, "rewards/accuracies": 1.0, "rewards/chosen": -6.587379903066903e-05, "rewards/margins": 0.1962037980556488, "rewards/rejected": -0.1962696760892868, "step": 6205 }, { "epoch": 4.291839557399723, "grad_norm": 6.695903301239014, "learning_rate": 3.171200245889043e-05, "log_odds_chosen": 10.23775863647461, "log_odds_ratio": -0.00025228591402992606, "logits/chosen": -0.6362022161483765, "logits/rejected": -0.5913698673248291, "logps/chosen": -0.0008647385984659195, "logps/rejected": -1.8681186437606812, "loss": 1.7754, "nll_loss": 0.4438358545303345, "rewards/accuracies": 1.0, "rewards/chosen": -8.647386857774109e-05, "rewards/margins": 0.18672539293766022, "rewards/rejected": -0.1868118792772293, "step": 6206 }, { "epoch": 4.29253112033195, "grad_norm": 11.357057571411133, "learning_rate": 3.1708160442600275e-05, "log_odds_chosen": 9.543838500976562, "log_odds_ratio": -0.003506321692839265, "logits/chosen": -0.7389904856681824, "logits/rejected": -0.8157459497451782, "logps/chosen": -0.002916330471634865, "logps/rejected": -2.448678970336914, "loss": 1.3222, "nll_loss": 0.3302033245563507, "rewards/accuracies": 1.0, "rewards/chosen": -0.0002916330413427204, "rewards/margins": 0.24457626044750214, "rewards/rejected": -0.24486789107322693, "step": 6207 }, { "epoch": 4.293222683264177, "grad_norm": 10.25238037109375, "learning_rate": 3.1704318426310134e-05, "log_odds_chosen": 9.856980323791504, "log_odds_ratio": -0.0007413313142023981, "logits/chosen": -0.6136857867240906, "logits/rejected": -0.6719917058944702, "logps/chosen": -0.0017734833527356386, "logps/rejected": -1.808726191520691, "loss": 1.108, "nll_loss": 0.2769239842891693, "rewards/accuracies": 1.0, "rewards/chosen": -0.0001773483381839469, "rewards/margins": 0.1806952804327011, "rewards/rejected": -0.18087263405323029, "step": 6208 }, { "epoch": 4.293914246196404, "grad_norm": 9.573114395141602, "learning_rate": 3.170047641001998e-05, "log_odds_chosen": 9.353816032409668, "log_odds_ratio": -0.11365321278572083, "logits/chosen": -0.40987884998321533, "logits/rejected": -0.5696350932121277, "logps/chosen": -0.01775352470576763, "logps/rejected": -2.1001336574554443, "loss": 1.404, "nll_loss": 0.33963605761528015, "rewards/accuracies": 0.875, "rewards/chosen": -0.0017753525171428919, "rewards/margins": 0.20823803544044495, "rewards/rejected": -0.21001337468624115, "step": 6209 }, { "epoch": 4.2946058091286305, "grad_norm": 13.752728462219238, "learning_rate": 3.169663439372983e-05, "log_odds_chosen": 10.185070037841797, "log_odds_ratio": -0.0002414601040072739, "logits/chosen": -0.6724650859832764, "logits/rejected": -0.6352249383926392, "logps/chosen": -0.00029455244657583535, "logps/rejected": -1.8959944248199463, "loss": 1.0568, "nll_loss": 0.26417943835258484, "rewards/accuracies": 1.0, "rewards/chosen": -2.9455244657583535e-05, "rewards/margins": 0.18957000970840454, "rewards/rejected": -0.18959945440292358, "step": 6210 }, { "epoch": 4.295297372060857, "grad_norm": 8.722634315490723, "learning_rate": 3.1692792377439684e-05, "log_odds_chosen": 9.901902198791504, "log_odds_ratio": -0.0005723107024095953, "logits/chosen": -0.6653061509132385, "logits/rejected": -0.745096743106842, "logps/chosen": -0.0008094396907836199, "logps/rejected": -1.9767580032348633, "loss": 0.8833, "nll_loss": 0.22076524794101715, "rewards/accuracies": 1.0, "rewards/chosen": -8.094397344393656e-05, "rewards/margins": 0.19759485125541687, "rewards/rejected": -0.19767579436302185, "step": 6211 }, { "epoch": 4.295988934993084, "grad_norm": 8.75367259979248, "learning_rate": 3.168895036114953e-05, "log_odds_chosen": 10.493782043457031, "log_odds_ratio": -4.299761349102482e-05, "logits/chosen": -0.6574910283088684, "logits/rejected": -0.6594315767288208, "logps/chosen": -0.0008926051668822765, "logps/rejected": -2.085134744644165, "loss": 1.665, "nll_loss": 0.41624513268470764, "rewards/accuracies": 1.0, "rewards/chosen": -8.926051668822765e-05, "rewards/margins": 0.20842422544956207, "rewards/rejected": -0.20851348340511322, "step": 6212 }, { "epoch": 4.296680497925311, "grad_norm": 8.879002571105957, "learning_rate": 3.168510834485938e-05, "log_odds_chosen": 10.296270370483398, "log_odds_ratio": -0.0007908447296358645, "logits/chosen": -0.4450835585594177, "logits/rejected": -0.546353280544281, "logps/chosen": -0.004724219441413879, "logps/rejected": -2.280630111694336, "loss": 1.0695, "nll_loss": 0.26730722188949585, "rewards/accuracies": 1.0, "rewards/chosen": -0.00047242193249985576, "rewards/margins": 0.22759059071540833, "rewards/rejected": -0.22806301712989807, "step": 6213 }, { "epoch": 4.297372060857538, "grad_norm": 10.082377433776855, "learning_rate": 3.1681266328569235e-05, "log_odds_chosen": 9.513387680053711, "log_odds_ratio": -0.0001838295574998483, "logits/chosen": -0.626352071762085, "logits/rejected": -0.6274489760398865, "logps/chosen": -0.0003809796180576086, "logps/rejected": -1.644523024559021, "loss": 0.9256, "nll_loss": 0.2313939332962036, "rewards/accuracies": 1.0, "rewards/chosen": -3.809796180576086e-05, "rewards/margins": 0.1644142121076584, "rewards/rejected": -0.16445229947566986, "step": 6214 }, { "epoch": 4.298063623789765, "grad_norm": 7.109057426452637, "learning_rate": 3.167742431227909e-05, "log_odds_chosen": 9.066092491149902, "log_odds_ratio": -0.0006189306732267141, "logits/chosen": -0.8764826655387878, "logits/rejected": -0.9166613817214966, "logps/chosen": -0.007097132503986359, "logps/rejected": -2.346010684967041, "loss": 1.5469, "nll_loss": 0.38666069507598877, "rewards/accuracies": 1.0, "rewards/chosen": -0.0007097133784554899, "rewards/margins": 0.2338913381099701, "rewards/rejected": -0.23460106551647186, "step": 6215 }, { "epoch": 4.2987551867219915, "grad_norm": 11.553153991699219, "learning_rate": 3.167358229598893e-05, "log_odds_chosen": 9.455713272094727, "log_odds_ratio": -0.0008415338816121221, "logits/chosen": -0.4361748993396759, "logits/rejected": -0.500927209854126, "logps/chosen": -0.012969731353223324, "logps/rejected": -1.6395647525787354, "loss": 1.77, "nll_loss": 0.4424084424972534, "rewards/accuracies": 1.0, "rewards/chosen": -0.0012969732051715255, "rewards/margins": 0.16265949606895447, "rewards/rejected": -0.16395646333694458, "step": 6216 }, { "epoch": 4.299446749654218, "grad_norm": 9.645062446594238, "learning_rate": 3.166974027969879e-05, "log_odds_chosen": 9.888082504272461, "log_odds_ratio": -0.0001669441699050367, "logits/chosen": -0.1050456166267395, "logits/rejected": -0.24527603387832642, "logps/chosen": -0.00054099754197523, "logps/rejected": -1.55792236328125, "loss": 1.1636, "nll_loss": 0.2908813953399658, "rewards/accuracies": 1.0, "rewards/chosen": -5.4099757107906044e-05, "rewards/margins": 0.15573813021183014, "rewards/rejected": -0.155792236328125, "step": 6217 }, { "epoch": 4.300138312586445, "grad_norm": 5.756036758422852, "learning_rate": 3.166589826340864e-05, "log_odds_chosen": 9.767498970031738, "log_odds_ratio": -0.0009746247669681907, "logits/chosen": -0.6501675844192505, "logits/rejected": -0.6296141147613525, "logps/chosen": -0.007666432298719883, "logps/rejected": -2.4295947551727295, "loss": 1.1535, "nll_loss": 0.28826743364334106, "rewards/accuracies": 1.0, "rewards/chosen": -0.0007666432065889239, "rewards/margins": 0.2421928197145462, "rewards/rejected": -0.24295946955680847, "step": 6218 }, { "epoch": 4.300829875518672, "grad_norm": 11.394264221191406, "learning_rate": 3.166205624711849e-05, "log_odds_chosen": 10.402400970458984, "log_odds_ratio": -9.294199844589457e-05, "logits/chosen": -0.5928707718849182, "logits/rejected": -0.5991271138191223, "logps/chosen": -0.0001955939515028149, "logps/rejected": -1.826383352279663, "loss": 1.2356, "nll_loss": 0.3088921904563904, "rewards/accuracies": 1.0, "rewards/chosen": -1.9559396605473012e-05, "rewards/margins": 0.18261878192424774, "rewards/rejected": -0.18263834714889526, "step": 6219 }, { "epoch": 4.301521438450899, "grad_norm": 11.74155044555664, "learning_rate": 3.165821423082834e-05, "log_odds_chosen": 8.715950965881348, "log_odds_ratio": -0.01336581725627184, "logits/chosen": -0.2825634181499481, "logits/rejected": -0.387153685092926, "logps/chosen": -0.012938362546265125, "logps/rejected": -1.8848291635513306, "loss": 1.0797, "nll_loss": 0.2685927152633667, "rewards/accuracies": 1.0, "rewards/chosen": -0.001293836277909577, "rewards/margins": 0.18718910217285156, "rewards/rejected": -0.18848291039466858, "step": 6220 }, { "epoch": 4.302213001383126, "grad_norm": 11.069859504699707, "learning_rate": 3.165437221453819e-05, "log_odds_chosen": 10.328326225280762, "log_odds_ratio": -6.953918637009338e-05, "logits/chosen": -0.75736403465271, "logits/rejected": -0.7972024083137512, "logps/chosen": -0.00018067838391289115, "logps/rejected": -1.670365810394287, "loss": 0.8068, "nll_loss": 0.20169062912464142, "rewards/accuracies": 1.0, "rewards/chosen": -1.8067839846480638e-05, "rewards/margins": 0.16701850295066833, "rewards/rejected": -0.16703656315803528, "step": 6221 }, { "epoch": 4.3029045643153525, "grad_norm": 9.046951293945312, "learning_rate": 3.165053019824804e-05, "log_odds_chosen": 10.310076713562012, "log_odds_ratio": -0.00022209499729797244, "logits/chosen": -0.1708485186100006, "logits/rejected": -0.25514480471611023, "logps/chosen": -0.001422532950527966, "logps/rejected": -2.1108036041259766, "loss": 1.151, "nll_loss": 0.28773412108421326, "rewards/accuracies": 1.0, "rewards/chosen": -0.00014225330960471183, "rewards/margins": 0.21093809604644775, "rewards/rejected": -0.2110803723335266, "step": 6222 }, { "epoch": 4.303596127247579, "grad_norm": 5.351454257965088, "learning_rate": 3.1646688181957893e-05, "log_odds_chosen": 8.290478706359863, "log_odds_ratio": -0.0019745633471757174, "logits/chosen": -0.47701704502105713, "logits/rejected": -0.42275407910346985, "logps/chosen": -0.0068691265769302845, "logps/rejected": -2.001173496246338, "loss": 1.1783, "nll_loss": 0.2943659722805023, "rewards/accuracies": 1.0, "rewards/chosen": -0.0006869126809760928, "rewards/margins": 0.199430450797081, "rewards/rejected": -0.20011736452579498, "step": 6223 }, { "epoch": 4.304287690179806, "grad_norm": 10.639006614685059, "learning_rate": 3.1642846165667746e-05, "log_odds_chosen": 7.769967079162598, "log_odds_ratio": -0.10666661709547043, "logits/chosen": -0.21983850002288818, "logits/rejected": -0.13200643658638, "logps/chosen": -0.02141474559903145, "logps/rejected": -1.41822350025177, "loss": 1.8189, "nll_loss": 0.44406723976135254, "rewards/accuracies": 0.875, "rewards/chosen": -0.002141474513337016, "rewards/margins": 0.139680877327919, "rewards/rejected": -0.14182235300540924, "step": 6224 }, { "epoch": 4.304979253112033, "grad_norm": 5.231021881103516, "learning_rate": 3.163900414937759e-05, "log_odds_chosen": 9.701105117797852, "log_odds_ratio": -0.00032720022136345506, "logits/chosen": -0.24703583121299744, "logits/rejected": -0.290180504322052, "logps/chosen": -0.006611797958612442, "logps/rejected": -2.07330322265625, "loss": 0.6531, "nll_loss": 0.1632453203201294, "rewards/accuracies": 1.0, "rewards/chosen": -0.0006611798307858407, "rewards/margins": 0.2066691517829895, "rewards/rejected": -0.20733033120632172, "step": 6225 }, { "epoch": 4.30567081604426, "grad_norm": 11.784645080566406, "learning_rate": 3.163516213308745e-05, "log_odds_chosen": 10.546085357666016, "log_odds_ratio": -9.521207539364696e-05, "logits/chosen": -0.21548408269882202, "logits/rejected": -0.2985669672489166, "logps/chosen": -0.0006060738814994693, "logps/rejected": -2.327183485031128, "loss": 0.7961, "nll_loss": 0.1990167200565338, "rewards/accuracies": 1.0, "rewards/chosen": -6.0607391787925735e-05, "rewards/margins": 0.23265774548053741, "rewards/rejected": -0.2327183485031128, "step": 6226 }, { "epoch": 4.306362378976487, "grad_norm": 14.44436264038086, "learning_rate": 3.1631320116797296e-05, "log_odds_chosen": 10.643296241760254, "log_odds_ratio": -0.0004135722410865128, "logits/chosen": -0.5684934854507446, "logits/rejected": -0.4808126986026764, "logps/chosen": -0.001616842346265912, "logps/rejected": -2.3420138359069824, "loss": 1.3186, "nll_loss": 0.3295997381210327, "rewards/accuracies": 1.0, "rewards/chosen": -0.00016168422007467598, "rewards/margins": 0.23403970897197723, "rewards/rejected": -0.2342013716697693, "step": 6227 }, { "epoch": 4.3070539419087135, "grad_norm": 12.899102210998535, "learning_rate": 3.162747810050715e-05, "log_odds_chosen": 8.68464469909668, "log_odds_ratio": -0.0008347373805008829, "logits/chosen": -0.35283803939819336, "logits/rejected": -0.4004957675933838, "logps/chosen": -0.0025889326352626085, "logps/rejected": -1.9696290493011475, "loss": 1.6656, "nll_loss": 0.41631948947906494, "rewards/accuracies": 1.0, "rewards/chosen": -0.0002588932402431965, "rewards/margins": 0.19670403003692627, "rewards/rejected": -0.19696292281150818, "step": 6228 }, { "epoch": 4.30774550484094, "grad_norm": 8.760489463806152, "learning_rate": 3.1623636084217e-05, "log_odds_chosen": 10.442303657531738, "log_odds_ratio": -5.676070213667117e-05, "logits/chosen": -0.6148850917816162, "logits/rejected": -0.5957847237586975, "logps/chosen": -0.0001890771382022649, "logps/rejected": -1.8346233367919922, "loss": 0.9068, "nll_loss": 0.22670197486877441, "rewards/accuracies": 1.0, "rewards/chosen": -1.8907714547822252e-05, "rewards/margins": 0.18344342708587646, "rewards/rejected": -0.18346232175827026, "step": 6229 }, { "epoch": 4.308437067773167, "grad_norm": 12.87412166595459, "learning_rate": 3.161979406792685e-05, "log_odds_chosen": 9.761995315551758, "log_odds_ratio": -0.0008566225878894329, "logits/chosen": -0.41904377937316895, "logits/rejected": -0.5353478789329529, "logps/chosen": -0.0015510877128690481, "logps/rejected": -1.9486550092697144, "loss": 1.1563, "nll_loss": 0.2889835834503174, "rewards/accuracies": 1.0, "rewards/chosen": -0.0001551087771076709, "rewards/margins": 0.1947103887796402, "rewards/rejected": -0.19486550986766815, "step": 6230 }, { "epoch": 4.309128630705394, "grad_norm": 6.854430198669434, "learning_rate": 3.16159520516367e-05, "log_odds_chosen": 9.272346496582031, "log_odds_ratio": -0.00026201200671494007, "logits/chosen": 0.016521714627742767, "logits/rejected": -0.06517700105905533, "logps/chosen": -0.0007249008631333709, "logps/rejected": -1.488654613494873, "loss": 0.9257, "nll_loss": 0.2313927710056305, "rewards/accuracies": 1.0, "rewards/chosen": -7.249008922372013e-05, "rewards/margins": 0.14879296720027924, "rewards/rejected": -0.1488654613494873, "step": 6231 }, { "epoch": 4.309820193637621, "grad_norm": 11.679051399230957, "learning_rate": 3.161211003534655e-05, "log_odds_chosen": 9.514870643615723, "log_odds_ratio": -0.000159199204063043, "logits/chosen": -0.3904048800468445, "logits/rejected": -0.39881831407546997, "logps/chosen": -0.0010039537446573377, "logps/rejected": -2.0680408477783203, "loss": 1.295, "nll_loss": 0.3237430453300476, "rewards/accuracies": 1.0, "rewards/chosen": -0.00010039537301054224, "rewards/margins": 0.20670370757579803, "rewards/rejected": -0.206804096698761, "step": 6232 }, { "epoch": 4.310511756569848, "grad_norm": 6.508286476135254, "learning_rate": 3.1608268019056404e-05, "log_odds_chosen": 9.726067543029785, "log_odds_ratio": -0.0003728815645445138, "logits/chosen": -0.5468158721923828, "logits/rejected": -0.6554831266403198, "logps/chosen": -0.0006326130824163556, "logps/rejected": -1.5322580337524414, "loss": 1.2165, "nll_loss": 0.3040837049484253, "rewards/accuracies": 1.0, "rewards/chosen": -6.326130824163556e-05, "rewards/margins": 0.15316253900527954, "rewards/rejected": -0.15322580933570862, "step": 6233 }, { "epoch": 4.3112033195020745, "grad_norm": 10.364480018615723, "learning_rate": 3.160442600276625e-05, "log_odds_chosen": 9.975172996520996, "log_odds_ratio": -0.00010962605301756412, "logits/chosen": -0.60814368724823, "logits/rejected": -0.6393572688102722, "logps/chosen": -0.00020576076349243522, "logps/rejected": -1.593639850616455, "loss": 0.886, "nll_loss": 0.22148962318897247, "rewards/accuracies": 1.0, "rewards/chosen": -2.0576077076839283e-05, "rewards/margins": 0.159343421459198, "rewards/rejected": -0.1593639999628067, "step": 6234 }, { "epoch": 4.311894882434301, "grad_norm": 11.097878456115723, "learning_rate": 3.160058398647611e-05, "log_odds_chosen": 10.253120422363281, "log_odds_ratio": -7.164876296883449e-05, "logits/chosen": -0.2703378200531006, "logits/rejected": -0.33766970038414, "logps/chosen": -0.00017250265227630734, "logps/rejected": -1.751900553703308, "loss": 0.9669, "nll_loss": 0.24171766638755798, "rewards/accuracies": 1.0, "rewards/chosen": -1.7250265955226496e-05, "rewards/margins": 0.1751728057861328, "rewards/rejected": -0.1751900613307953, "step": 6235 }, { "epoch": 4.312586445366528, "grad_norm": 7.3292083740234375, "learning_rate": 3.1596741970185955e-05, "log_odds_chosen": 9.296087265014648, "log_odds_ratio": -0.00020717288134619594, "logits/chosen": -0.4497448205947876, "logits/rejected": -0.48011958599090576, "logps/chosen": -0.0043256874196231365, "logps/rejected": -1.6357231140136719, "loss": 1.1735, "nll_loss": 0.29336437582969666, "rewards/accuracies": 1.0, "rewards/chosen": -0.0004325687186792493, "rewards/margins": 0.16313976049423218, "rewards/rejected": -0.16357232630252838, "step": 6236 }, { "epoch": 4.313278008298755, "grad_norm": 5.9431352615356445, "learning_rate": 3.159289995389581e-05, "log_odds_chosen": 10.564034461975098, "log_odds_ratio": -4.40002913819626e-05, "logits/chosen": -0.2893856465816498, "logits/rejected": -0.27585193514823914, "logps/chosen": -0.00027149386005476117, "logps/rejected": -1.8761788606643677, "loss": 0.8388, "nll_loss": 0.20970144867897034, "rewards/accuracies": 1.0, "rewards/chosen": -2.7149384550284594e-05, "rewards/margins": 0.18759074807167053, "rewards/rejected": -0.18761791288852692, "step": 6237 }, { "epoch": 4.313969571230982, "grad_norm": 5.800085067749023, "learning_rate": 3.158905793760566e-05, "log_odds_chosen": 9.571613311767578, "log_odds_ratio": -0.00014140141138341278, "logits/chosen": -0.4417632520198822, "logits/rejected": -0.5179417729377747, "logps/chosen": -0.0002516951353754848, "logps/rejected": -1.5897767543792725, "loss": 0.905, "nll_loss": 0.22622837126255035, "rewards/accuracies": 1.0, "rewards/chosen": -2.5169516447931528e-05, "rewards/margins": 0.1589525192975998, "rewards/rejected": -0.1589776873588562, "step": 6238 }, { "epoch": 4.314661134163209, "grad_norm": 7.026059150695801, "learning_rate": 3.1585215921315505e-05, "log_odds_chosen": 9.260969161987305, "log_odds_ratio": -0.0007084406679496169, "logits/chosen": -0.6787703037261963, "logits/rejected": -0.6812416911125183, "logps/chosen": -0.0006535067805089056, "logps/rejected": -1.1790461540222168, "loss": 0.7697, "nll_loss": 0.1923505812883377, "rewards/accuracies": 1.0, "rewards/chosen": -6.535067950608209e-05, "rewards/margins": 0.1178392618894577, "rewards/rejected": -0.11790461093187332, "step": 6239 }, { "epoch": 4.3153526970954355, "grad_norm": 7.0646185874938965, "learning_rate": 3.158137390502536e-05, "log_odds_chosen": 9.476990699768066, "log_odds_ratio": -0.00029607085161842406, "logits/chosen": -0.32096603512763977, "logits/rejected": -0.37244951725006104, "logps/chosen": -0.0006322484114207327, "logps/rejected": -1.5482921600341797, "loss": 0.7637, "nll_loss": 0.190888911485672, "rewards/accuracies": 1.0, "rewards/chosen": -6.322484841803089e-05, "rewards/margins": 0.1547659933567047, "rewards/rejected": -0.1548292189836502, "step": 6240 }, { "epoch": 4.316044260027662, "grad_norm": 10.357381820678711, "learning_rate": 3.157753188873521e-05, "log_odds_chosen": 10.52917766571045, "log_odds_ratio": -9.83021454885602e-05, "logits/chosen": -0.43881893157958984, "logits/rejected": -0.44511866569519043, "logps/chosen": -0.00018113931582774967, "logps/rejected": -1.9013128280639648, "loss": 1.425, "nll_loss": 0.3562512993812561, "rewards/accuracies": 1.0, "rewards/chosen": -1.8113931218977086e-05, "rewards/margins": 0.19011318683624268, "rewards/rejected": -0.1901313066482544, "step": 6241 }, { "epoch": 4.316735822959889, "grad_norm": 7.102433204650879, "learning_rate": 3.157368987244506e-05, "log_odds_chosen": 9.688699722290039, "log_odds_ratio": -8.351320866495371e-05, "logits/chosen": -0.4052876830101013, "logits/rejected": -0.48662394285202026, "logps/chosen": -0.00021907503833062947, "logps/rejected": -1.4428023099899292, "loss": 1.0505, "nll_loss": 0.2626084089279175, "rewards/accuracies": 1.0, "rewards/chosen": -2.1907504560658708e-05, "rewards/margins": 0.1442583203315735, "rewards/rejected": -0.14428022503852844, "step": 6242 }, { "epoch": 4.317427385892116, "grad_norm": 7.361631870269775, "learning_rate": 3.156984785615491e-05, "log_odds_chosen": 10.357135772705078, "log_odds_ratio": -0.00010510971333133057, "logits/chosen": -0.6652143001556396, "logits/rejected": -0.6585478186607361, "logps/chosen": -0.000304955814499408, "logps/rejected": -1.9736860990524292, "loss": 1.1196, "nll_loss": 0.27989059686660767, "rewards/accuracies": 1.0, "rewards/chosen": -3.0495582905132324e-05, "rewards/margins": 0.19733810424804688, "rewards/rejected": -0.19736860692501068, "step": 6243 }, { "epoch": 4.318118948824343, "grad_norm": 9.14620304107666, "learning_rate": 3.156600583986477e-05, "log_odds_chosen": 9.410257339477539, "log_odds_ratio": -0.00020506588043645024, "logits/chosen": -0.6210223436355591, "logits/rejected": -0.7020326256752014, "logps/chosen": -0.0007779947482049465, "logps/rejected": -1.7426540851593018, "loss": 0.9272, "nll_loss": 0.2317844182252884, "rewards/accuracies": 1.0, "rewards/chosen": -7.77994719101116e-05, "rewards/margins": 0.17418763041496277, "rewards/rejected": -0.17426541447639465, "step": 6244 }, { "epoch": 4.31881051175657, "grad_norm": 8.903249740600586, "learning_rate": 3.156216382357461e-05, "log_odds_chosen": 9.798042297363281, "log_odds_ratio": -0.00030079117277637124, "logits/chosen": -0.6821163296699524, "logits/rejected": -0.69622802734375, "logps/chosen": -0.00029044965049251914, "logps/rejected": -1.3761014938354492, "loss": 1.0466, "nll_loss": 0.26163235306739807, "rewards/accuracies": 1.0, "rewards/chosen": -2.904496795963496e-05, "rewards/margins": 0.13758111000061035, "rewards/rejected": -0.13761015236377716, "step": 6245 }, { "epoch": 4.319502074688796, "grad_norm": 13.108745574951172, "learning_rate": 3.1558321807284466e-05, "log_odds_chosen": 11.272746086120605, "log_odds_ratio": -1.8102173271472566e-05, "logits/chosen": -0.5722870826721191, "logits/rejected": -0.6570054292678833, "logps/chosen": -0.0001335109700448811, "logps/rejected": -2.3407795429229736, "loss": 0.8703, "nll_loss": 0.21758487820625305, "rewards/accuracies": 1.0, "rewards/chosen": -1.3351098459679633e-05, "rewards/margins": 0.23406460881233215, "rewards/rejected": -0.23407796025276184, "step": 6246 }, { "epoch": 4.320193637621023, "grad_norm": 4.177562713623047, "learning_rate": 3.155447979099432e-05, "log_odds_chosen": 8.351842880249023, "log_odds_ratio": -0.008854147978127003, "logits/chosen": -0.3191889226436615, "logits/rejected": -0.37525224685668945, "logps/chosen": -0.004266391042619944, "logps/rejected": -1.2601978778839111, "loss": 0.7204, "nll_loss": 0.1792147159576416, "rewards/accuracies": 1.0, "rewards/chosen": -0.00042663910426199436, "rewards/margins": 0.1255931556224823, "rewards/rejected": -0.12601980566978455, "step": 6247 }, { "epoch": 4.32088520055325, "grad_norm": 9.98576831817627, "learning_rate": 3.1550637774704164e-05, "log_odds_chosen": 8.758403778076172, "log_odds_ratio": -0.0007475988240912557, "logits/chosen": -0.4082600772380829, "logits/rejected": -0.4571593105792999, "logps/chosen": -0.001500868471339345, "logps/rejected": -1.382610559463501, "loss": 0.8104, "nll_loss": 0.20252512395381927, "rewards/accuracies": 1.0, "rewards/chosen": -0.0001500868529547006, "rewards/margins": 0.13811098039150238, "rewards/rejected": -0.13826106488704681, "step": 6248 }, { "epoch": 4.321576763485477, "grad_norm": 10.829977035522461, "learning_rate": 3.1546795758414016e-05, "log_odds_chosen": 10.14036750793457, "log_odds_ratio": -7.867669046390802e-05, "logits/chosen": -0.6110687851905823, "logits/rejected": -0.6491025686264038, "logps/chosen": -0.0002936455130111426, "logps/rejected": -1.9828588962554932, "loss": 1.0259, "nll_loss": 0.25645485520362854, "rewards/accuracies": 1.0, "rewards/chosen": -2.9364551664912142e-05, "rewards/margins": 0.19825652241706848, "rewards/rejected": -0.19828589260578156, "step": 6249 }, { "epoch": 4.322268326417704, "grad_norm": 10.261870384216309, "learning_rate": 3.154295374212387e-05, "log_odds_chosen": 10.657238006591797, "log_odds_ratio": -0.0002526202879380435, "logits/chosen": -0.0940227285027504, "logits/rejected": -0.11147341132164001, "logps/chosen": -0.00025920767802745104, "logps/rejected": -2.4953396320343018, "loss": 1.2257, "nll_loss": 0.3063918948173523, "rewards/accuracies": 1.0, "rewards/chosen": -2.5920769985532388e-05, "rewards/margins": 0.2495080530643463, "rewards/rejected": -0.2495339810848236, "step": 6250 }, { "epoch": 4.322959889349931, "grad_norm": 6.061576843261719, "learning_rate": 3.153911172583372e-05, "log_odds_chosen": 9.507963180541992, "log_odds_ratio": -0.009043208323419094, "logits/chosen": -0.03186788409948349, "logits/rejected": -0.15542441606521606, "logps/chosen": -0.007601817604154348, "logps/rejected": -2.2683682441711426, "loss": 1.1288, "nll_loss": 0.28129690885543823, "rewards/accuracies": 1.0, "rewards/chosen": -0.0007601818069815636, "rewards/margins": 0.2260766476392746, "rewards/rejected": -0.22683684527873993, "step": 6251 }, { "epoch": 4.323651452282157, "grad_norm": 10.646059036254883, "learning_rate": 3.153526970954357e-05, "log_odds_chosen": 11.553914070129395, "log_odds_ratio": -2.8313286748016253e-05, "logits/chosen": -0.6371780037879944, "logits/rejected": -0.666010320186615, "logps/chosen": -0.0007130609010346234, "logps/rejected": -3.1989545822143555, "loss": 0.8338, "nll_loss": 0.20845037698745728, "rewards/accuracies": 1.0, "rewards/chosen": -7.130609446903691e-05, "rewards/margins": 0.31982409954071045, "rewards/rejected": -0.3198954463005066, "step": 6252 }, { "epoch": 4.324343015214384, "grad_norm": 6.541245460510254, "learning_rate": 3.1531427693253426e-05, "log_odds_chosen": 8.380668640136719, "log_odds_ratio": -0.001909209880977869, "logits/chosen": -0.39806440472602844, "logits/rejected": -0.35889142751693726, "logps/chosen": -0.0015298908110707998, "logps/rejected": -1.3094111680984497, "loss": 1.3593, "nll_loss": 0.3396381139755249, "rewards/accuracies": 1.0, "rewards/chosen": -0.00015298907237593085, "rewards/margins": 0.13078811764717102, "rewards/rejected": -0.13094110786914825, "step": 6253 }, { "epoch": 4.325034578146611, "grad_norm": 12.133667945861816, "learning_rate": 3.152758567696327e-05, "log_odds_chosen": 9.911739349365234, "log_odds_ratio": -0.00013189579476602376, "logits/chosen": -0.47079241275787354, "logits/rejected": -0.4168504476547241, "logps/chosen": -0.00032032723538577557, "logps/rejected": -1.798808217048645, "loss": 1.2075, "nll_loss": 0.30186164379119873, "rewards/accuracies": 1.0, "rewards/chosen": -3.2032723538577557e-05, "rewards/margins": 0.1798487901687622, "rewards/rejected": -0.17988081276416779, "step": 6254 }, { "epoch": 4.325726141078838, "grad_norm": 7.795219421386719, "learning_rate": 3.1523743660673124e-05, "log_odds_chosen": 8.880459785461426, "log_odds_ratio": -0.008680138736963272, "logits/chosen": -0.5198830962181091, "logits/rejected": -0.5463992953300476, "logps/chosen": -0.004741398151963949, "logps/rejected": -1.9662740230560303, "loss": 0.6808, "nll_loss": 0.1693269908428192, "rewards/accuracies": 1.0, "rewards/chosen": -0.00047413978609256446, "rewards/margins": 0.19615328311920166, "rewards/rejected": -0.1966274082660675, "step": 6255 }, { "epoch": 4.326417704011065, "grad_norm": 13.251655578613281, "learning_rate": 3.151990164438298e-05, "log_odds_chosen": 10.546645164489746, "log_odds_ratio": -0.00021031236974522471, "logits/chosen": -0.6055862307548523, "logits/rejected": -0.671293318271637, "logps/chosen": -0.001339723588898778, "logps/rejected": -2.2606611251831055, "loss": 1.0264, "nll_loss": 0.2565711438655853, "rewards/accuracies": 1.0, "rewards/chosen": -0.00013397236762102693, "rewards/margins": 0.22593215107917786, "rewards/rejected": -0.22606611251831055, "step": 6256 }, { "epoch": 4.327109266943292, "grad_norm": 8.937263488769531, "learning_rate": 3.151605962809282e-05, "log_odds_chosen": 10.133855819702148, "log_odds_ratio": -7.504695531679317e-05, "logits/chosen": -0.540047287940979, "logits/rejected": -0.5837610363960266, "logps/chosen": -0.00033649199758656323, "logps/rejected": -1.7779037952423096, "loss": 0.6846, "nll_loss": 0.1711336225271225, "rewards/accuracies": 1.0, "rewards/chosen": -3.3649201213847846e-05, "rewards/margins": 0.17775672674179077, "rewards/rejected": -0.17779038846492767, "step": 6257 }, { "epoch": 4.327800829875518, "grad_norm": 7.280256271362305, "learning_rate": 3.1512217611802675e-05, "log_odds_chosen": 9.665289878845215, "log_odds_ratio": -0.0004072503070347011, "logits/chosen": -0.03371931612491608, "logits/rejected": -0.18863540887832642, "logps/chosen": -0.002507374854758382, "logps/rejected": -2.404458522796631, "loss": 0.9425, "nll_loss": 0.23557284474372864, "rewards/accuracies": 1.0, "rewards/chosen": -0.00025073750293813646, "rewards/margins": 0.2401951253414154, "rewards/rejected": -0.24044585227966309, "step": 6258 }, { "epoch": 4.328492392807745, "grad_norm": 5.6786394119262695, "learning_rate": 3.150837559551252e-05, "log_odds_chosen": 9.919912338256836, "log_odds_ratio": -0.00016488172695972025, "logits/chosen": -0.6246330738067627, "logits/rejected": -0.6343197822570801, "logps/chosen": -0.0025698572862893343, "logps/rejected": -2.1179940700531006, "loss": 0.6839, "nll_loss": 0.17096826434135437, "rewards/accuracies": 1.0, "rewards/chosen": -0.0002569857460912317, "rewards/margins": 0.21154239773750305, "rewards/rejected": -0.21179938316345215, "step": 6259 }, { "epoch": 4.329183955739972, "grad_norm": 11.633374214172363, "learning_rate": 3.150453357922238e-05, "log_odds_chosen": 9.088045120239258, "log_odds_ratio": -0.030026573687791824, "logits/chosen": -0.5848643779754639, "logits/rejected": -0.5763362050056458, "logps/chosen": -0.006531553342938423, "logps/rejected": -1.5310980081558228, "loss": 1.1287, "nll_loss": 0.2791779637336731, "rewards/accuracies": 1.0, "rewards/chosen": -0.0006531553226523101, "rewards/margins": 0.1524566411972046, "rewards/rejected": -0.15310980379581451, "step": 6260 }, { "epoch": 4.329875518672199, "grad_norm": 10.868045806884766, "learning_rate": 3.1500691562932225e-05, "log_odds_chosen": 10.415914535522461, "log_odds_ratio": -0.00022055921726860106, "logits/chosen": -0.14247390627861023, "logits/rejected": -0.2097904235124588, "logps/chosen": -0.0006233732565306127, "logps/rejected": -2.1600489616394043, "loss": 0.8514, "nll_loss": 0.21282441914081573, "rewards/accuracies": 1.0, "rewards/chosen": -6.23373271082528e-05, "rewards/margins": 0.21594256162643433, "rewards/rejected": -0.216004878282547, "step": 6261 }, { "epoch": 4.330567081604426, "grad_norm": 12.684412002563477, "learning_rate": 3.149684954664208e-05, "log_odds_chosen": 11.01368522644043, "log_odds_ratio": -9.89335312624462e-05, "logits/chosen": -0.7479545474052429, "logits/rejected": -0.8491002321243286, "logps/chosen": -0.00012943128240294755, "logps/rejected": -1.9096832275390625, "loss": 1.6489, "nll_loss": 0.4122098684310913, "rewards/accuracies": 1.0, "rewards/chosen": -1.2943129149789456e-05, "rewards/margins": 0.19095538556575775, "rewards/rejected": -0.1909683346748352, "step": 6262 }, { "epoch": 4.3312586445366525, "grad_norm": 9.980140686035156, "learning_rate": 3.149300753035193e-05, "log_odds_chosen": 10.222119331359863, "log_odds_ratio": -0.00010748507338576019, "logits/chosen": -0.844120979309082, "logits/rejected": -0.9237422943115234, "logps/chosen": -0.0004031779244542122, "logps/rejected": -1.920005202293396, "loss": 1.0495, "nll_loss": 0.2623547315597534, "rewards/accuracies": 1.0, "rewards/chosen": -4.0317790990229696e-05, "rewards/margins": 0.19196021556854248, "rewards/rejected": -0.19200052320957184, "step": 6263 }, { "epoch": 4.331950207468879, "grad_norm": 9.320433616638184, "learning_rate": 3.148916551406178e-05, "log_odds_chosen": 10.097317695617676, "log_odds_ratio": -0.002059478312730789, "logits/chosen": -0.7607566714286804, "logits/rejected": -0.7903776168823242, "logps/chosen": -0.012105059809982777, "logps/rejected": -2.138204336166382, "loss": 1.2992, "nll_loss": 0.3245936334133148, "rewards/accuracies": 1.0, "rewards/chosen": -0.0012105060741305351, "rewards/margins": 0.2126099318265915, "rewards/rejected": -0.2138204574584961, "step": 6264 }, { "epoch": 4.332641770401106, "grad_norm": 14.335369110107422, "learning_rate": 3.148532349777163e-05, "log_odds_chosen": 10.663875579833984, "log_odds_ratio": -5.3084073442732915e-05, "logits/chosen": -0.6468909382820129, "logits/rejected": -0.6795991063117981, "logps/chosen": -0.0003323418786749244, "logps/rejected": -2.278066396713257, "loss": 1.4742, "nll_loss": 0.36855268478393555, "rewards/accuracies": 1.0, "rewards/chosen": -3.323418786749244e-05, "rewards/margins": 0.22777341306209564, "rewards/rejected": -0.22780664265155792, "step": 6265 }, { "epoch": 4.333333333333333, "grad_norm": 9.74260139465332, "learning_rate": 3.148148148148148e-05, "log_odds_chosen": 10.406856536865234, "log_odds_ratio": -5.082024654257111e-05, "logits/chosen": -0.359982967376709, "logits/rejected": -0.37254437804222107, "logps/chosen": -0.0001446517271688208, "logps/rejected": -1.5443391799926758, "loss": 0.9381, "nll_loss": 0.23451650142669678, "rewards/accuracies": 1.0, "rewards/chosen": -1.4465171261690557e-05, "rewards/margins": 0.1544194370508194, "rewards/rejected": -0.15443390607833862, "step": 6266 }, { "epoch": 4.33402489626556, "grad_norm": 9.490935325622559, "learning_rate": 3.147763946519133e-05, "log_odds_chosen": 9.49018669128418, "log_odds_ratio": -0.003614935325458646, "logits/chosen": -0.7341195344924927, "logits/rejected": -0.7036961913108826, "logps/chosen": -0.00993090309202671, "logps/rejected": -1.7946966886520386, "loss": 1.4416, "nll_loss": 0.360050767660141, "rewards/accuracies": 1.0, "rewards/chosen": -0.0009930903324857354, "rewards/margins": 0.17847657203674316, "rewards/rejected": -0.17946967482566833, "step": 6267 }, { "epoch": 4.334716459197787, "grad_norm": 12.99151611328125, "learning_rate": 3.147379744890118e-05, "log_odds_chosen": 8.455595970153809, "log_odds_ratio": -0.0108507564291358, "logits/chosen": -0.175982266664505, "logits/rejected": -0.15738755464553833, "logps/chosen": -0.004563149530440569, "logps/rejected": -1.2955995798110962, "loss": 1.3614, "nll_loss": 0.3392714560031891, "rewards/accuracies": 1.0, "rewards/chosen": -0.00045631497050635517, "rewards/margins": 0.1291036456823349, "rewards/rejected": -0.1295599639415741, "step": 6268 }, { "epoch": 4.3354080221300135, "grad_norm": 10.58656120300293, "learning_rate": 3.146995543261104e-05, "log_odds_chosen": 9.847926139831543, "log_odds_ratio": -0.00018483387248124927, "logits/chosen": -0.2619816064834595, "logits/rejected": -0.36310145258903503, "logps/chosen": -0.0008300583576783538, "logps/rejected": -2.2136294841766357, "loss": 0.9725, "nll_loss": 0.24309849739074707, "rewards/accuracies": 1.0, "rewards/chosen": -8.300583431264386e-05, "rewards/margins": 0.22127996385097504, "rewards/rejected": -0.22136294841766357, "step": 6269 }, { "epoch": 4.33609958506224, "grad_norm": 5.926123142242432, "learning_rate": 3.1466113416320884e-05, "log_odds_chosen": 9.165082931518555, "log_odds_ratio": -0.0006193573353812099, "logits/chosen": -0.5741816759109497, "logits/rejected": -0.5986604690551758, "logps/chosen": -0.002135189948603511, "logps/rejected": -1.836089015007019, "loss": 1.3148, "nll_loss": 0.3286486566066742, "rewards/accuracies": 1.0, "rewards/chosen": -0.00021351898612920195, "rewards/margins": 0.1833954006433487, "rewards/rejected": -0.18360891938209534, "step": 6270 }, { "epoch": 4.336791147994467, "grad_norm": 12.717452049255371, "learning_rate": 3.1462271400030736e-05, "log_odds_chosen": 10.55025863647461, "log_odds_ratio": -8.598245040047914e-05, "logits/chosen": -0.4449648857116699, "logits/rejected": -0.6279736161231995, "logps/chosen": -0.00019479054026305676, "logps/rejected": -1.6564853191375732, "loss": 1.5201, "nll_loss": 0.380012571811676, "rewards/accuracies": 1.0, "rewards/chosen": -1.9479053662507795e-05, "rewards/margins": 0.1656290590763092, "rewards/rejected": -0.16564851999282837, "step": 6271 }, { "epoch": 4.337482710926694, "grad_norm": 7.52541971206665, "learning_rate": 3.145842938374059e-05, "log_odds_chosen": 9.816986083984375, "log_odds_ratio": -0.0005100779235363007, "logits/chosen": -0.39175713062286377, "logits/rejected": -0.45417341589927673, "logps/chosen": -0.0009921849705278873, "logps/rejected": -2.0651535987854004, "loss": 1.6299, "nll_loss": 0.4074295461177826, "rewards/accuracies": 1.0, "rewards/chosen": -9.921850869432092e-05, "rewards/margins": 0.20641617476940155, "rewards/rejected": -0.20651540160179138, "step": 6272 }, { "epoch": 4.338174273858921, "grad_norm": 5.848926067352295, "learning_rate": 3.145458736745044e-05, "log_odds_chosen": 10.613710403442383, "log_odds_ratio": -0.00014787871623411775, "logits/chosen": -0.25838908553123474, "logits/rejected": -0.3856915235519409, "logps/chosen": -0.007207159884274006, "logps/rejected": -2.8834474086761475, "loss": 1.2895, "nll_loss": 0.3223702609539032, "rewards/accuracies": 1.0, "rewards/chosen": -0.0007207159651443362, "rewards/margins": 0.2876240313053131, "rewards/rejected": -0.28834474086761475, "step": 6273 }, { "epoch": 4.338865836791148, "grad_norm": 5.730781078338623, "learning_rate": 3.145074535116029e-05, "log_odds_chosen": 10.336332321166992, "log_odds_ratio": -9.005053289001808e-05, "logits/chosen": -0.3600419759750366, "logits/rejected": -0.4638018012046814, "logps/chosen": -0.00014610840298701078, "logps/rejected": -1.6459441184997559, "loss": 0.6049, "nll_loss": 0.15122506022453308, "rewards/accuracies": 1.0, "rewards/chosen": -1.4610840480600018e-05, "rewards/margins": 0.16457980871200562, "rewards/rejected": -0.16459441184997559, "step": 6274 }, { "epoch": 4.3395573997233745, "grad_norm": 10.16214656829834, "learning_rate": 3.144690333487014e-05, "log_odds_chosen": 11.042724609375, "log_odds_ratio": -0.00011868192814290524, "logits/chosen": -0.5489569306373596, "logits/rejected": -0.548592746257782, "logps/chosen": -0.00011969899060204625, "logps/rejected": -2.194108724594116, "loss": 1.259, "nll_loss": 0.3147435784339905, "rewards/accuracies": 1.0, "rewards/chosen": -1.1969898878305685e-05, "rewards/margins": 0.21939890086650848, "rewards/rejected": -0.21941088140010834, "step": 6275 }, { "epoch": 4.340248962655601, "grad_norm": 5.415022373199463, "learning_rate": 3.144306131857999e-05, "log_odds_chosen": 8.669913291931152, "log_odds_ratio": -0.02633490413427353, "logits/chosen": -0.24330535531044006, "logits/rejected": -0.24419859051704407, "logps/chosen": -0.009514041244983673, "logps/rejected": -2.1480984687805176, "loss": 1.089, "nll_loss": 0.26962801814079285, "rewards/accuracies": 1.0, "rewards/chosen": -0.0009514041012153029, "rewards/margins": 0.2138584554195404, "rewards/rejected": -0.2148098647594452, "step": 6276 }, { "epoch": 4.340940525587828, "grad_norm": 10.989092826843262, "learning_rate": 3.1439219302289844e-05, "log_odds_chosen": 9.908502578735352, "log_odds_ratio": -0.0016798458527773619, "logits/chosen": -0.25677382946014404, "logits/rejected": -0.2983488440513611, "logps/chosen": -0.0018546772189438343, "logps/rejected": -2.5655713081359863, "loss": 1.1805, "nll_loss": 0.2949519157409668, "rewards/accuracies": 1.0, "rewards/chosen": -0.00018546771025285125, "rewards/margins": 0.2563716769218445, "rewards/rejected": -0.2565571665763855, "step": 6277 }, { "epoch": 4.341632088520055, "grad_norm": 8.060803413391113, "learning_rate": 3.1435377285999696e-05, "log_odds_chosen": 8.83830451965332, "log_odds_ratio": -0.0004681129357777536, "logits/chosen": -0.5368528962135315, "logits/rejected": -0.5375138521194458, "logps/chosen": -0.00047587224980816245, "logps/rejected": -1.2851731777191162, "loss": 1.236, "nll_loss": 0.30894869565963745, "rewards/accuracies": 1.0, "rewards/chosen": -4.7587229346390814e-05, "rewards/margins": 0.12846973538398743, "rewards/rejected": -0.12851732969284058, "step": 6278 }, { "epoch": 4.342323651452282, "grad_norm": 8.903595924377441, "learning_rate": 3.143153526970954e-05, "log_odds_chosen": 10.817100524902344, "log_odds_ratio": -3.656329499790445e-05, "logits/chosen": -0.19402526319026947, "logits/rejected": -0.29778486490249634, "logps/chosen": -0.00024108865181915462, "logps/rejected": -2.496133327484131, "loss": 1.2467, "nll_loss": 0.31167399883270264, "rewards/accuracies": 1.0, "rewards/chosen": -2.4108867364702746e-05, "rewards/margins": 0.2495892345905304, "rewards/rejected": -0.24961334466934204, "step": 6279 }, { "epoch": 4.343015214384509, "grad_norm": 6.821666240692139, "learning_rate": 3.1427693253419395e-05, "log_odds_chosen": 10.714070320129395, "log_odds_ratio": -8.616933337179944e-05, "logits/chosen": -0.5038744807243347, "logits/rejected": -0.5399574041366577, "logps/chosen": -0.0026380920317023993, "logps/rejected": -3.0238983631134033, "loss": 0.9245, "nll_loss": 0.23110723495483398, "rewards/accuracies": 1.0, "rewards/chosen": -0.00026380919734947383, "rewards/margins": 0.3021259903907776, "rewards/rejected": -0.30238983035087585, "step": 6280 }, { "epoch": 4.3437067773167355, "grad_norm": 10.771331787109375, "learning_rate": 3.142385123712925e-05, "log_odds_chosen": 10.633634567260742, "log_odds_ratio": -3.2800868211779743e-05, "logits/chosen": -0.2504116892814636, "logits/rejected": -0.2996535301208496, "logps/chosen": -0.00015955072012729943, "logps/rejected": -1.9660744667053223, "loss": 1.3028, "nll_loss": 0.32570430636405945, "rewards/accuracies": 1.0, "rewards/chosen": -1.59550709213363e-05, "rewards/margins": 0.19659149646759033, "rewards/rejected": -0.19660745561122894, "step": 6281 }, { "epoch": 4.344398340248962, "grad_norm": 5.823162078857422, "learning_rate": 3.14200092208391e-05, "log_odds_chosen": 8.517678260803223, "log_odds_ratio": -0.0007794699631631374, "logits/chosen": -0.6882646083831787, "logits/rejected": -0.7718308568000793, "logps/chosen": -0.00037540867924690247, "logps/rejected": -1.1263272762298584, "loss": 1.5819, "nll_loss": 0.39540088176727295, "rewards/accuracies": 1.0, "rewards/chosen": -3.754087083507329e-05, "rewards/margins": 0.11259518563747406, "rewards/rejected": -0.11263272911310196, "step": 6282 }, { "epoch": 4.345089903181189, "grad_norm": 6.239787578582764, "learning_rate": 3.1416167204548945e-05, "log_odds_chosen": 8.946929931640625, "log_odds_ratio": -0.004028361290693283, "logits/chosen": -0.254109263420105, "logits/rejected": -0.32316648960113525, "logps/chosen": -0.0019965223036706448, "logps/rejected": -1.1533297300338745, "loss": 1.5395, "nll_loss": 0.38447076082229614, "rewards/accuracies": 1.0, "rewards/chosen": -0.0001996522187255323, "rewards/margins": 0.11513333022594452, "rewards/rejected": -0.11533297598361969, "step": 6283 }, { "epoch": 4.345781466113416, "grad_norm": 4.709649085998535, "learning_rate": 3.1412325188258804e-05, "log_odds_chosen": 7.86823844909668, "log_odds_ratio": -0.018861282616853714, "logits/chosen": -0.785298764705658, "logits/rejected": -0.7515165209770203, "logps/chosen": -0.007227227091789246, "logps/rejected": -1.1606642007827759, "loss": 1.1264, "nll_loss": 0.2797185480594635, "rewards/accuracies": 1.0, "rewards/chosen": -0.0007227227324619889, "rewards/margins": 0.11534368991851807, "rewards/rejected": -0.11606641858816147, "step": 6284 }, { "epoch": 4.346473029045643, "grad_norm": 12.456677436828613, "learning_rate": 3.140848317196865e-05, "log_odds_chosen": 9.091700553894043, "log_odds_ratio": -0.0003683864197228104, "logits/chosen": -0.6549928784370422, "logits/rejected": -0.6938451528549194, "logps/chosen": -0.0008419096702709794, "logps/rejected": -1.842326045036316, "loss": 1.144, "nll_loss": 0.2859728932380676, "rewards/accuracies": 1.0, "rewards/chosen": -8.41909641167149e-05, "rewards/margins": 0.1841484010219574, "rewards/rejected": -0.18423257768154144, "step": 6285 }, { "epoch": 4.34716459197787, "grad_norm": 12.204995155334473, "learning_rate": 3.14046411556785e-05, "log_odds_chosen": 11.336495399475098, "log_odds_ratio": -9.398195834364742e-05, "logits/chosen": -0.28229689598083496, "logits/rejected": -0.40144163370132446, "logps/chosen": -0.00024727190611884, "logps/rejected": -2.923536539077759, "loss": 1.3709, "nll_loss": 0.34271159768104553, "rewards/accuracies": 1.0, "rewards/chosen": -2.4727192794671282e-05, "rewards/margins": 0.29232892394065857, "rewards/rejected": -0.29235365986824036, "step": 6286 }, { "epoch": 4.3478561549100965, "grad_norm": 13.519350051879883, "learning_rate": 3.1400799139388355e-05, "log_odds_chosen": 10.042278289794922, "log_odds_ratio": -0.00012913253158330917, "logits/chosen": -0.526986300945282, "logits/rejected": -0.6442042589187622, "logps/chosen": -0.0019563455134630203, "logps/rejected": -2.9566519260406494, "loss": 1.6818, "nll_loss": 0.4204367995262146, "rewards/accuracies": 1.0, "rewards/chosen": -0.0001956345367943868, "rewards/margins": 0.29546958208084106, "rewards/rejected": -0.2956652045249939, "step": 6287 }, { "epoch": 4.348547717842323, "grad_norm": 6.363492965698242, "learning_rate": 3.13969571230982e-05, "log_odds_chosen": 9.305254936218262, "log_odds_ratio": -0.00025624711997807026, "logits/chosen": -0.6630715131759644, "logits/rejected": -0.6110356450080872, "logps/chosen": -0.00042787453276105225, "logps/rejected": -1.1967389583587646, "loss": 0.5836, "nll_loss": 0.14586985111236572, "rewards/accuracies": 1.0, "rewards/chosen": -4.278745473129675e-05, "rewards/margins": 0.11963111162185669, "rewards/rejected": -0.11967390775680542, "step": 6288 }, { "epoch": 4.34923928077455, "grad_norm": 7.60081672668457, "learning_rate": 3.139311510680805e-05, "log_odds_chosen": 10.671063423156738, "log_odds_ratio": -5.429089651443064e-05, "logits/chosen": -0.28530365228652954, "logits/rejected": -0.4221542775630951, "logps/chosen": -0.00037182040978223085, "logps/rejected": -2.3406002521514893, "loss": 1.2078, "nll_loss": 0.30194365978240967, "rewards/accuracies": 1.0, "rewards/chosen": -3.718204243341461e-05, "rewards/margins": 0.2340228259563446, "rewards/rejected": -0.23406001925468445, "step": 6289 }, { "epoch": 4.349930843706777, "grad_norm": 5.530222415924072, "learning_rate": 3.1389273090517905e-05, "log_odds_chosen": 9.497110366821289, "log_odds_ratio": -0.0004657884710468352, "logits/chosen": -0.3096838593482971, "logits/rejected": -0.475458562374115, "logps/chosen": -0.00798887200653553, "logps/rejected": -1.6125645637512207, "loss": 0.9461, "nll_loss": 0.23648670315742493, "rewards/accuracies": 1.0, "rewards/chosen": -0.0007988871657289565, "rewards/margins": 0.1604575663805008, "rewards/rejected": -0.16125646233558655, "step": 6290 }, { "epoch": 4.350622406639004, "grad_norm": 8.4190673828125, "learning_rate": 3.138543107422776e-05, "log_odds_chosen": 8.526147842407227, "log_odds_ratio": -0.03643810376524925, "logits/chosen": -0.5177884697914124, "logits/rejected": -0.7100130319595337, "logps/chosen": -0.011017205193638802, "logps/rejected": -1.941372275352478, "loss": 1.2788, "nll_loss": 0.31605058908462524, "rewards/accuracies": 1.0, "rewards/chosen": -0.0011017204960808158, "rewards/margins": 0.1930355280637741, "rewards/rejected": -0.19413723051548004, "step": 6291 }, { "epoch": 4.351313969571231, "grad_norm": 232.70631408691406, "learning_rate": 3.1381589057937604e-05, "log_odds_chosen": 8.946935653686523, "log_odds_ratio": -0.7460908889770508, "logits/chosen": -0.2698379158973694, "logits/rejected": -0.3084045648574829, "logps/chosen": -0.03984811156988144, "logps/rejected": -2.1382994651794434, "loss": 1.2856, "nll_loss": 0.2467801868915558, "rewards/accuracies": 0.875, "rewards/chosen": -0.003984811250120401, "rewards/margins": 0.2098451405763626, "rewards/rejected": -0.21382997930049896, "step": 6292 }, { "epoch": 4.3520055325034575, "grad_norm": 16.808666229248047, "learning_rate": 3.137774704164746e-05, "log_odds_chosen": 9.995148658752441, "log_odds_ratio": -5.602094461210072e-05, "logits/chosen": -0.23712369799613953, "logits/rejected": -0.39177846908569336, "logps/chosen": -0.0004925878602080047, "logps/rejected": -2.1555447578430176, "loss": 1.1607, "nll_loss": 0.2901768386363983, "rewards/accuracies": 1.0, "rewards/chosen": -4.925878965877928e-05, "rewards/margins": 0.2155052125453949, "rewards/rejected": -0.21555446088314056, "step": 6293 }, { "epoch": 4.352697095435684, "grad_norm": 14.556714057922363, "learning_rate": 3.137390502535731e-05, "log_odds_chosen": 10.153697967529297, "log_odds_ratio": -0.000147096739965491, "logits/chosen": -1.0491799116134644, "logits/rejected": -1.1178945302963257, "logps/chosen": -0.0002521372225601226, "logps/rejected": -1.6516971588134766, "loss": 1.8252, "nll_loss": 0.4562966525554657, "rewards/accuracies": 1.0, "rewards/chosen": -2.5213725166395307e-05, "rewards/margins": 0.16514450311660767, "rewards/rejected": -0.16516971588134766, "step": 6294 }, { "epoch": 4.353388658367911, "grad_norm": 6.729737758636475, "learning_rate": 3.137006300906716e-05, "log_odds_chosen": 9.903938293457031, "log_odds_ratio": -0.0006013888050802052, "logits/chosen": -0.2919148802757263, "logits/rejected": -0.3714301884174347, "logps/chosen": -0.0004974387702532113, "logps/rejected": -1.703548789024353, "loss": 1.3713, "nll_loss": 0.34275490045547485, "rewards/accuracies": 1.0, "rewards/chosen": -4.974387411493808e-05, "rewards/margins": 0.17030513286590576, "rewards/rejected": -0.17035487294197083, "step": 6295 }, { "epoch": 4.354080221300138, "grad_norm": 8.606863021850586, "learning_rate": 3.136622099277701e-05, "log_odds_chosen": 9.143202781677246, "log_odds_ratio": -0.0030732681043446064, "logits/chosen": -0.5923776030540466, "logits/rejected": -0.5626744031906128, "logps/chosen": -0.0018515931442379951, "logps/rejected": -1.5052129030227661, "loss": 1.0512, "nll_loss": 0.26248684525489807, "rewards/accuracies": 1.0, "rewards/chosen": -0.0001851593260653317, "rewards/margins": 0.1503361314535141, "rewards/rejected": -0.15052127838134766, "step": 6296 }, { "epoch": 4.354771784232365, "grad_norm": 7.347190856933594, "learning_rate": 3.136237897648686e-05, "log_odds_chosen": 10.128762245178223, "log_odds_ratio": -0.0003108138043899089, "logits/chosen": -0.21433761715888977, "logits/rejected": -0.2856504023075104, "logps/chosen": -0.0016921722562983632, "logps/rejected": -2.441856861114502, "loss": 1.0754, "nll_loss": 0.26881054043769836, "rewards/accuracies": 1.0, "rewards/chosen": -0.00016921722271945328, "rewards/margins": 0.24401646852493286, "rewards/rejected": -0.2441856861114502, "step": 6297 }, { "epoch": 4.355463347164592, "grad_norm": 8.210977554321289, "learning_rate": 3.135853696019671e-05, "log_odds_chosen": 9.857641220092773, "log_odds_ratio": -0.0008883035625331104, "logits/chosen": -0.40693163871765137, "logits/rejected": -0.4411735236644745, "logps/chosen": -0.0008371093426831067, "logps/rejected": -1.812008261680603, "loss": 0.8135, "nll_loss": 0.20329168438911438, "rewards/accuracies": 1.0, "rewards/chosen": -8.371093281311914e-05, "rewards/margins": 0.18111711740493774, "rewards/rejected": -0.1812008172273636, "step": 6298 }, { "epoch": 4.356154910096818, "grad_norm": 11.318500518798828, "learning_rate": 3.1354694943906564e-05, "log_odds_chosen": 8.595135688781738, "log_odds_ratio": -0.0008043124689720571, "logits/chosen": -0.5766262412071228, "logits/rejected": -0.6325322389602661, "logps/chosen": -0.003487899899482727, "logps/rejected": -1.8228943347930908, "loss": 1.0605, "nll_loss": 0.2650408446788788, "rewards/accuracies": 1.0, "rewards/chosen": -0.0003487899957690388, "rewards/margins": 0.18194065988063812, "rewards/rejected": -0.18228945136070251, "step": 6299 }, { "epoch": 4.356846473029045, "grad_norm": 7.36438512802124, "learning_rate": 3.1350852927616416e-05, "log_odds_chosen": 8.635614395141602, "log_odds_ratio": -0.003634521272033453, "logits/chosen": -0.7639305591583252, "logits/rejected": -0.7949274778366089, "logps/chosen": -0.026982376351952553, "logps/rejected": -2.173861503601074, "loss": 1.2849, "nll_loss": 0.3208683133125305, "rewards/accuracies": 1.0, "rewards/chosen": -0.002698237542062998, "rewards/margins": 0.21468791365623474, "rewards/rejected": -0.2173861414194107, "step": 6300 }, { "epoch": 4.357538035961272, "grad_norm": 8.610318183898926, "learning_rate": 3.134701091132626e-05, "log_odds_chosen": 11.18701457977295, "log_odds_ratio": -2.265540933876764e-05, "logits/chosen": -0.5687964558601379, "logits/rejected": -0.6419916152954102, "logps/chosen": -0.00025438808370381594, "logps/rejected": -2.238746404647827, "loss": 0.8686, "nll_loss": 0.2171362340450287, "rewards/accuracies": 1.0, "rewards/chosen": -2.5438808734179474e-05, "rewards/margins": 0.22384920716285706, "rewards/rejected": -0.22387462854385376, "step": 6301 }, { "epoch": 4.358229598893499, "grad_norm": 8.536808013916016, "learning_rate": 3.134316889503612e-05, "log_odds_chosen": 9.764930725097656, "log_odds_ratio": -0.0027465950697660446, "logits/chosen": -0.7701665163040161, "logits/rejected": -0.8795304298400879, "logps/chosen": -0.0021428868640214205, "logps/rejected": -1.8732999563217163, "loss": 0.9178, "nll_loss": 0.22917912900447845, "rewards/accuracies": 1.0, "rewards/chosen": -0.00021428869513329118, "rewards/margins": 0.18711569905281067, "rewards/rejected": -0.18733000755310059, "step": 6302 }, { "epoch": 4.358921161825726, "grad_norm": 6.359549522399902, "learning_rate": 3.133932687874597e-05, "log_odds_chosen": 8.725833892822266, "log_odds_ratio": -0.0040402826853096485, "logits/chosen": -0.015667788684368134, "logits/rejected": -0.0844547301530838, "logps/chosen": -0.0029326328076422215, "logps/rejected": -1.3450301885604858, "loss": 1.1471, "nll_loss": 0.2863708734512329, "rewards/accuracies": 1.0, "rewards/chosen": -0.00029326329240575433, "rewards/margins": 0.1342097520828247, "rewards/rejected": -0.13450302183628082, "step": 6303 }, { "epoch": 4.359612724757953, "grad_norm": 11.345746040344238, "learning_rate": 3.133548486245582e-05, "log_odds_chosen": 9.424605369567871, "log_odds_ratio": -0.011097854934632778, "logits/chosen": -0.3127516806125641, "logits/rejected": -0.4279908537864685, "logps/chosen": -0.010858694091439247, "logps/rejected": -2.0717811584472656, "loss": 0.9532, "nll_loss": 0.2371962070465088, "rewards/accuracies": 1.0, "rewards/chosen": -0.0010858693858608603, "rewards/margins": 0.20609226822853088, "rewards/rejected": -0.20717814564704895, "step": 6304 }, { "epoch": 4.360304287690179, "grad_norm": 10.459247589111328, "learning_rate": 3.133164284616567e-05, "log_odds_chosen": 10.67799186706543, "log_odds_ratio": -3.142370769637637e-05, "logits/chosen": -0.2904345989227295, "logits/rejected": -0.3955543637275696, "logps/chosen": -0.0003603932564146817, "logps/rejected": -2.526035785675049, "loss": 0.9621, "nll_loss": 0.24051694571971893, "rewards/accuracies": 1.0, "rewards/chosen": -3.603932782425545e-05, "rewards/margins": 0.2525675594806671, "rewards/rejected": -0.25260359048843384, "step": 6305 }, { "epoch": 4.360995850622406, "grad_norm": 7.519941806793213, "learning_rate": 3.132780082987552e-05, "log_odds_chosen": 8.597479820251465, "log_odds_ratio": -0.0254450011998415, "logits/chosen": -0.4681594967842102, "logits/rejected": -0.5076048970222473, "logps/chosen": -0.007410486228764057, "logps/rejected": -1.4946553707122803, "loss": 1.5485, "nll_loss": 0.38457822799682617, "rewards/accuracies": 1.0, "rewards/chosen": -0.0007410486578010023, "rewards/margins": 0.1487244963645935, "rewards/rejected": -0.14946553111076355, "step": 6306 }, { "epoch": 4.361687413554633, "grad_norm": 14.771382331848145, "learning_rate": 3.132395881358537e-05, "log_odds_chosen": 10.483898162841797, "log_odds_ratio": -6.732901238137856e-05, "logits/chosen": -0.7547286152839661, "logits/rejected": -0.6616960167884827, "logps/chosen": -0.00047322813770733774, "logps/rejected": -2.4050559997558594, "loss": 0.9754, "nll_loss": 0.24383097887039185, "rewards/accuracies": 1.0, "rewards/chosen": -4.7322813770733774e-05, "rewards/margins": 0.24045827984809875, "rewards/rejected": -0.24050560593605042, "step": 6307 }, { "epoch": 4.36237897648686, "grad_norm": 10.824990272521973, "learning_rate": 3.132011679729522e-05, "log_odds_chosen": 9.566398620605469, "log_odds_ratio": -0.017600275576114655, "logits/chosen": -0.2919943034648895, "logits/rejected": -0.2732120752334595, "logps/chosen": -0.005583908874541521, "logps/rejected": -2.318268299102783, "loss": 1.231, "nll_loss": 0.3060001730918884, "rewards/accuracies": 1.0, "rewards/chosen": -0.0005583909805864096, "rewards/margins": 0.23126843571662903, "rewards/rejected": -0.23182684183120728, "step": 6308 }, { "epoch": 4.363070539419087, "grad_norm": 6.783447742462158, "learning_rate": 3.1316274781005075e-05, "log_odds_chosen": 10.260091781616211, "log_odds_ratio": -5.726013478124514e-05, "logits/chosen": -0.5818588137626648, "logits/rejected": -0.6268680095672607, "logps/chosen": -0.00023265733034349978, "logps/rejected": -1.4145288467407227, "loss": 0.9643, "nll_loss": 0.24107356369495392, "rewards/accuracies": 1.0, "rewards/chosen": -2.3265733034349978e-05, "rewards/margins": 0.141429603099823, "rewards/rejected": -0.1414528787136078, "step": 6309 }, { "epoch": 4.363762102351314, "grad_norm": 7.472887992858887, "learning_rate": 3.131243276471492e-05, "log_odds_chosen": 10.513906478881836, "log_odds_ratio": -0.0002800179354380816, "logits/chosen": 0.02222248911857605, "logits/rejected": -0.04449723660945892, "logps/chosen": -0.017652522772550583, "logps/rejected": -2.3833508491516113, "loss": 1.0615, "nll_loss": 0.2653350234031677, "rewards/accuracies": 1.0, "rewards/chosen": -0.001765252323821187, "rewards/margins": 0.2365698516368866, "rewards/rejected": -0.23833508789539337, "step": 6310 }, { "epoch": 4.36445366528354, "grad_norm": 9.911407470703125, "learning_rate": 3.130859074842478e-05, "log_odds_chosen": 9.882485389709473, "log_odds_ratio": -6.964397471165285e-05, "logits/chosen": -0.66880202293396, "logits/rejected": -0.7102699279785156, "logps/chosen": -0.000869112613145262, "logps/rejected": -1.9686014652252197, "loss": 1.0695, "nll_loss": 0.26737716794013977, "rewards/accuracies": 1.0, "rewards/chosen": -8.691126276971772e-05, "rewards/margins": 0.1967732310295105, "rewards/rejected": -0.19686013460159302, "step": 6311 }, { "epoch": 4.365145228215767, "grad_norm": 8.024314880371094, "learning_rate": 3.1304748732134625e-05, "log_odds_chosen": 9.696148872375488, "log_odds_ratio": -0.00016530677385162562, "logits/chosen": -0.5593657493591309, "logits/rejected": -0.5686834454536438, "logps/chosen": -0.0015895981341600418, "logps/rejected": -2.1018128395080566, "loss": 0.8795, "nll_loss": 0.2198611944913864, "rewards/accuracies": 1.0, "rewards/chosen": -0.00015895982505753636, "rewards/margins": 0.21002231538295746, "rewards/rejected": -0.21018128097057343, "step": 6312 }, { "epoch": 4.365836791147994, "grad_norm": 7.493190288543701, "learning_rate": 3.130090671584448e-05, "log_odds_chosen": 10.404024124145508, "log_odds_ratio": -0.0001439659099560231, "logits/chosen": -0.19742217659950256, "logits/rejected": -0.3219975531101227, "logps/chosen": -0.007236870937049389, "logps/rejected": -2.362229824066162, "loss": 1.133, "nll_loss": 0.28323426842689514, "rewards/accuracies": 1.0, "rewards/chosen": -0.0007236871169880033, "rewards/margins": 0.235499307513237, "rewards/rejected": -0.2362229824066162, "step": 6313 }, { "epoch": 4.366528354080221, "grad_norm": 11.898686408996582, "learning_rate": 3.129706469955433e-05, "log_odds_chosen": 9.762331008911133, "log_odds_ratio": -0.0001290647342102602, "logits/chosen": -0.6055392026901245, "logits/rejected": -0.6020311117172241, "logps/chosen": -0.0004267815675120801, "logps/rejected": -1.929985761642456, "loss": 2.3559, "nll_loss": 0.5889644622802734, "rewards/accuracies": 1.0, "rewards/chosen": -4.2678155296016484e-05, "rewards/margins": 0.1929558962583542, "rewards/rejected": -0.19299857318401337, "step": 6314 }, { "epoch": 4.367219917012449, "grad_norm": 15.363786697387695, "learning_rate": 3.1293222683264176e-05, "log_odds_chosen": 9.683354377746582, "log_odds_ratio": -0.00012579330359585583, "logits/chosen": -0.4168752133846283, "logits/rejected": -0.44384777545928955, "logps/chosen": -0.0011151648359373212, "logps/rejected": -1.5676586627960205, "loss": 0.9781, "nll_loss": 0.24452057480812073, "rewards/accuracies": 1.0, "rewards/chosen": -0.00011151648504892364, "rewards/margins": 0.15665435791015625, "rewards/rejected": -0.156765878200531, "step": 6315 }, { "epoch": 4.367911479944675, "grad_norm": 8.174242973327637, "learning_rate": 3.128938066697403e-05, "log_odds_chosen": 10.951789855957031, "log_odds_ratio": -2.8747548640239984e-05, "logits/chosen": -0.719359278678894, "logits/rejected": -0.7517074346542358, "logps/chosen": -9.578256140230224e-05, "logps/rejected": -1.6426482200622559, "loss": 0.8635, "nll_loss": 0.2158716768026352, "rewards/accuracies": 1.0, "rewards/chosen": -9.578255230735522e-06, "rewards/margins": 0.16425524652004242, "rewards/rejected": -0.16426481306552887, "step": 6316 }, { "epoch": 4.368603042876902, "grad_norm": 8.694875717163086, "learning_rate": 3.128553865068388e-05, "log_odds_chosen": 9.255899429321289, "log_odds_ratio": -0.0016122335800901055, "logits/chosen": -0.46941107511520386, "logits/rejected": -0.43659359216690063, "logps/chosen": -0.0011213673278689384, "logps/rejected": -1.4043328762054443, "loss": 1.3327, "nll_loss": 0.33301347494125366, "rewards/accuracies": 1.0, "rewards/chosen": -0.00011213673133170232, "rewards/margins": 0.14032115042209625, "rewards/rejected": -0.14043329656124115, "step": 6317 }, { "epoch": 4.369294605809129, "grad_norm": 10.649413108825684, "learning_rate": 3.128169663439373e-05, "log_odds_chosen": 9.875417709350586, "log_odds_ratio": -0.00015953306865412742, "logits/chosen": -0.12319202721118927, "logits/rejected": -0.1773851215839386, "logps/chosen": -0.00036378856748342514, "logps/rejected": -1.6384358406066895, "loss": 1.3269, "nll_loss": 0.3316992223262787, "rewards/accuracies": 1.0, "rewards/chosen": -3.6378856748342514e-05, "rewards/margins": 0.163807213306427, "rewards/rejected": -0.16384358704090118, "step": 6318 }, { "epoch": 4.369986168741356, "grad_norm": 8.679130554199219, "learning_rate": 3.127785461810358e-05, "log_odds_chosen": 10.387129783630371, "log_odds_ratio": -0.00014563562581315637, "logits/chosen": -0.24549484252929688, "logits/rejected": -0.3316296339035034, "logps/chosen": -0.00041450935532338917, "logps/rejected": -2.2968225479125977, "loss": 0.9877, "nll_loss": 0.24691948294639587, "rewards/accuracies": 1.0, "rewards/chosen": -4.1450934077147394e-05, "rewards/margins": 0.22964079678058624, "rewards/rejected": -0.22968225181102753, "step": 6319 }, { "epoch": 4.370677731673583, "grad_norm": 6.213496685028076, "learning_rate": 3.127401260181344e-05, "log_odds_chosen": 9.122066497802734, "log_odds_ratio": -0.00042584422044456005, "logits/chosen": -0.34517765045166016, "logits/rejected": -0.3442385196685791, "logps/chosen": -0.003846704261377454, "logps/rejected": -2.2931177616119385, "loss": 1.6242, "nll_loss": 0.40601515769958496, "rewards/accuracies": 1.0, "rewards/chosen": -0.00038467044942080975, "rewards/margins": 0.2289271056652069, "rewards/rejected": -0.2293117642402649, "step": 6320 }, { "epoch": 4.37136929460581, "grad_norm": 10.913430213928223, "learning_rate": 3.1270170585523284e-05, "log_odds_chosen": 10.11327838897705, "log_odds_ratio": -0.0007298666751012206, "logits/chosen": -0.581683337688446, "logits/rejected": -0.621495246887207, "logps/chosen": -0.0038006172981113195, "logps/rejected": -2.459005355834961, "loss": 0.8858, "nll_loss": 0.22136807441711426, "rewards/accuracies": 1.0, "rewards/chosen": -0.00038006174145266414, "rewards/margins": 0.2455204725265503, "rewards/rejected": -0.24590054154396057, "step": 6321 }, { "epoch": 4.372060857538036, "grad_norm": 7.445157527923584, "learning_rate": 3.1266328569233136e-05, "log_odds_chosen": 8.718201637268066, "log_odds_ratio": -0.00027666238020174205, "logits/chosen": -0.4421314597129822, "logits/rejected": -0.3595300018787384, "logps/chosen": -0.009511098265647888, "logps/rejected": -1.9657456874847412, "loss": 1.519, "nll_loss": 0.37971681356430054, "rewards/accuracies": 1.0, "rewards/chosen": -0.0009511099196970463, "rewards/margins": 0.1956234574317932, "rewards/rejected": -0.19657456874847412, "step": 6322 }, { "epoch": 4.372752420470263, "grad_norm": 7.530955791473389, "learning_rate": 3.126248655294299e-05, "log_odds_chosen": 10.011698722839355, "log_odds_ratio": -0.0005805735709145665, "logits/chosen": -0.4899846017360687, "logits/rejected": -0.5263997316360474, "logps/chosen": -0.000975239381659776, "logps/rejected": -2.055481195449829, "loss": 1.2532, "nll_loss": 0.31323084235191345, "rewards/accuracies": 1.0, "rewards/chosen": -9.75239381659776e-05, "rewards/margins": 0.20545059442520142, "rewards/rejected": -0.20554812252521515, "step": 6323 }, { "epoch": 4.37344398340249, "grad_norm": 5.401859283447266, "learning_rate": 3.1258644536652834e-05, "log_odds_chosen": 9.362093925476074, "log_odds_ratio": -0.0017716753063723445, "logits/chosen": -0.5734344720840454, "logits/rejected": -0.5739961862564087, "logps/chosen": -0.0014673115219920874, "logps/rejected": -1.9922149181365967, "loss": 0.9307, "nll_loss": 0.23249852657318115, "rewards/accuracies": 1.0, "rewards/chosen": -0.00014673115219920874, "rewards/margins": 0.19907476007938385, "rewards/rejected": -0.19922150671482086, "step": 6324 }, { "epoch": 4.374135546334717, "grad_norm": 15.756174087524414, "learning_rate": 3.125480252036269e-05, "log_odds_chosen": 10.876068115234375, "log_odds_ratio": -0.0008979838457889855, "logits/chosen": -1.0250927209854126, "logits/rejected": -1.001251459121704, "logps/chosen": -0.007843158207833767, "logps/rejected": -3.035405158996582, "loss": 1.179, "nll_loss": 0.29465389251708984, "rewards/accuracies": 1.0, "rewards/chosen": -0.0007843158091418445, "rewards/margins": 0.3027561902999878, "rewards/rejected": -0.30354049801826477, "step": 6325 }, { "epoch": 4.374827109266944, "grad_norm": 8.404903411865234, "learning_rate": 3.125096050407254e-05, "log_odds_chosen": 10.507247924804688, "log_odds_ratio": -7.449003896908835e-05, "logits/chosen": -0.7850874066352844, "logits/rejected": -0.85020512342453, "logps/chosen": -0.0002060161205008626, "logps/rejected": -2.043398857116699, "loss": 1.2735, "nll_loss": 0.318358451128006, "rewards/accuracies": 1.0, "rewards/chosen": -2.0601613869075663e-05, "rewards/margins": 0.2043193131685257, "rewards/rejected": -0.2043399214744568, "step": 6326 }, { "epoch": 4.375518672199171, "grad_norm": 6.757132053375244, "learning_rate": 3.124711848778239e-05, "log_odds_chosen": 8.752026557922363, "log_odds_ratio": -0.0007465857197530568, "logits/chosen": -0.3212154507637024, "logits/rejected": -0.4050557613372803, "logps/chosen": -0.003038684604689479, "logps/rejected": -1.4287402629852295, "loss": 1.6756, "nll_loss": 0.41883230209350586, "rewards/accuracies": 1.0, "rewards/chosen": -0.000303868466289714, "rewards/margins": 0.1425701528787613, "rewards/rejected": -0.14287403225898743, "step": 6327 }, { "epoch": 4.376210235131397, "grad_norm": 7.012808322906494, "learning_rate": 3.124327647149224e-05, "log_odds_chosen": 9.273959159851074, "log_odds_ratio": -0.0010292872320860624, "logits/chosen": -0.7706098556518555, "logits/rejected": -0.7494393587112427, "logps/chosen": -0.0008519127150066197, "logps/rejected": -1.6534192562103271, "loss": 0.9163, "nll_loss": 0.2289770245552063, "rewards/accuracies": 1.0, "rewards/chosen": -8.519127004547045e-05, "rewards/margins": 0.16525673866271973, "rewards/rejected": -0.16534192860126495, "step": 6328 }, { "epoch": 4.376901798063624, "grad_norm": 11.536850929260254, "learning_rate": 3.1239434455202097e-05, "log_odds_chosen": 10.570297241210938, "log_odds_ratio": -5.9236168453935534e-05, "logits/chosen": -0.4841303825378418, "logits/rejected": -0.5646402835845947, "logps/chosen": -0.00024312842288054526, "logps/rejected": -2.0626015663146973, "loss": 0.877, "nll_loss": 0.21923774480819702, "rewards/accuracies": 1.0, "rewards/chosen": -2.4312843379448168e-05, "rewards/margins": 0.2062358409166336, "rewards/rejected": -0.20626014471054077, "step": 6329 }, { "epoch": 4.377593360995851, "grad_norm": 8.52833080291748, "learning_rate": 3.123559243891194e-05, "log_odds_chosen": 10.625216484069824, "log_odds_ratio": -5.232186958892271e-05, "logits/chosen": -0.3533399701118469, "logits/rejected": -0.34043389558792114, "logps/chosen": -0.00021401699632406235, "logps/rejected": -2.0122456550598145, "loss": 0.9324, "nll_loss": 0.23309096693992615, "rewards/accuracies": 1.0, "rewards/chosen": -2.1401698177214712e-05, "rewards/margins": 0.20120316743850708, "rewards/rejected": -0.20122458040714264, "step": 6330 }, { "epoch": 4.378284923928078, "grad_norm": 5.83406400680542, "learning_rate": 3.1231750422621795e-05, "log_odds_chosen": 9.288875579833984, "log_odds_ratio": -0.01151504460722208, "logits/chosen": -0.4276590347290039, "logits/rejected": -0.46351057291030884, "logps/chosen": -0.004148704465478659, "logps/rejected": -1.5049939155578613, "loss": 0.697, "nll_loss": 0.17310747504234314, "rewards/accuracies": 1.0, "rewards/chosen": -0.0004148704756516963, "rewards/margins": 0.15008452534675598, "rewards/rejected": -0.1504994034767151, "step": 6331 }, { "epoch": 4.378976486860305, "grad_norm": 33.9369010925293, "learning_rate": 3.122790840633165e-05, "log_odds_chosen": 8.93118667602539, "log_odds_ratio": -0.15788762271404266, "logits/chosen": -0.644578218460083, "logits/rejected": -0.685411274433136, "logps/chosen": -0.024288659915328026, "logps/rejected": -2.2254717350006104, "loss": 1.7872, "nll_loss": 0.43102213740348816, "rewards/accuracies": 0.875, "rewards/chosen": -0.0024288659915328026, "rewards/margins": 0.22011832892894745, "rewards/rejected": -0.22254718840122223, "step": 6332 }, { "epoch": 4.3796680497925315, "grad_norm": 9.28854751586914, "learning_rate": 3.122406639004149e-05, "log_odds_chosen": 9.086782455444336, "log_odds_ratio": -0.001112618949264288, "logits/chosen": -0.4650830924510956, "logits/rejected": -0.495891273021698, "logps/chosen": -0.001777901779860258, "logps/rejected": -1.8395483493804932, "loss": 1.2764, "nll_loss": 0.3189956843852997, "rewards/accuracies": 1.0, "rewards/chosen": -0.00017779016343411058, "rewards/margins": 0.18377703428268433, "rewards/rejected": -0.18395483493804932, "step": 6333 }, { "epoch": 4.380359612724758, "grad_norm": 8.1270751953125, "learning_rate": 3.1220224373751345e-05, "log_odds_chosen": 9.927444458007812, "log_odds_ratio": -0.00011174234532518312, "logits/chosen": -0.48753228783607483, "logits/rejected": -0.503075361251831, "logps/chosen": -0.0002461006515659392, "logps/rejected": -1.4193822145462036, "loss": 1.0163, "nll_loss": 0.25406113266944885, "rewards/accuracies": 1.0, "rewards/chosen": -2.4610068066976964e-05, "rewards/margins": 0.14191360771656036, "rewards/rejected": -0.1419382244348526, "step": 6334 }, { "epoch": 4.381051175656985, "grad_norm": 9.07845401763916, "learning_rate": 3.12163823574612e-05, "log_odds_chosen": 10.563726425170898, "log_odds_ratio": -0.00018957343127112836, "logits/chosen": -0.21047934889793396, "logits/rejected": -0.2606376111507416, "logps/chosen": -0.0005684728967025876, "logps/rejected": -2.5454013347625732, "loss": 1.1947, "nll_loss": 0.2986586391925812, "rewards/accuracies": 1.0, "rewards/chosen": -5.684728603227995e-05, "rewards/margins": 0.25448331236839294, "rewards/rejected": -0.2545401453971863, "step": 6335 }, { "epoch": 4.381742738589212, "grad_norm": 7.290050983428955, "learning_rate": 3.121254034117105e-05, "log_odds_chosen": 9.2028169631958, "log_odds_ratio": -0.01780695840716362, "logits/chosen": -0.3207651972770691, "logits/rejected": -0.4059835970401764, "logps/chosen": -0.005791170988231897, "logps/rejected": -1.461303949356079, "loss": 1.0177, "nll_loss": 0.2526489794254303, "rewards/accuracies": 1.0, "rewards/chosen": -0.0005791170406155288, "rewards/margins": 0.14555127918720245, "rewards/rejected": -0.14613039791584015, "step": 6336 }, { "epoch": 4.382434301521439, "grad_norm": 10.609784126281738, "learning_rate": 3.1208698324880896e-05, "log_odds_chosen": 10.936665534973145, "log_odds_ratio": -4.089557842235081e-05, "logits/chosen": -0.6534034013748169, "logits/rejected": -0.6795228719711304, "logps/chosen": -0.0002644860651344061, "logps/rejected": -2.4768505096435547, "loss": 0.7925, "nll_loss": 0.19811291992664337, "rewards/accuracies": 1.0, "rewards/chosen": -2.6448606149642728e-05, "rewards/margins": 0.2476586103439331, "rewards/rejected": -0.24768505990505219, "step": 6337 }, { "epoch": 4.383125864453666, "grad_norm": 10.921443939208984, "learning_rate": 3.1204856308590755e-05, "log_odds_chosen": 10.012323379516602, "log_odds_ratio": -0.0001081754598999396, "logits/chosen": -0.3615493178367615, "logits/rejected": -0.44846972823143005, "logps/chosen": -0.000523362890817225, "logps/rejected": -1.896093726158142, "loss": 1.0746, "nll_loss": 0.2686314880847931, "rewards/accuracies": 1.0, "rewards/chosen": -5.233629053691402e-05, "rewards/margins": 0.1895570307970047, "rewards/rejected": -0.1896093785762787, "step": 6338 }, { "epoch": 4.3838174273858925, "grad_norm": 11.553305625915527, "learning_rate": 3.12010142923006e-05, "log_odds_chosen": 10.591811180114746, "log_odds_ratio": -0.00015707314014434814, "logits/chosen": -0.3892832398414612, "logits/rejected": -0.353343665599823, "logps/chosen": -0.0004059493658132851, "logps/rejected": -2.3211452960968018, "loss": 1.2325, "nll_loss": 0.30811676383018494, "rewards/accuracies": 1.0, "rewards/chosen": -4.059493585373275e-05, "rewards/margins": 0.23207393288612366, "rewards/rejected": -0.2321145236492157, "step": 6339 }, { "epoch": 4.384508990318119, "grad_norm": 6.500206470489502, "learning_rate": 3.119717227601045e-05, "log_odds_chosen": 10.269033432006836, "log_odds_ratio": -5.843197868671268e-05, "logits/chosen": -0.5071641802787781, "logits/rejected": -0.546855628490448, "logps/chosen": -0.0001225728919962421, "logps/rejected": -1.1638572216033936, "loss": 0.9306, "nll_loss": 0.2326522022485733, "rewards/accuracies": 1.0, "rewards/chosen": -1.225728919962421e-05, "rewards/margins": 0.1163734719157219, "rewards/rejected": -0.11638573557138443, "step": 6340 }, { "epoch": 4.385200553250346, "grad_norm": 14.92447280883789, "learning_rate": 3.1193330259720306e-05, "log_odds_chosen": 10.092230796813965, "log_odds_ratio": -0.00015422774595208466, "logits/chosen": -0.7137937545776367, "logits/rejected": -0.7913176417350769, "logps/chosen": -0.0002549771743360907, "logps/rejected": -1.6001590490341187, "loss": 1.1712, "nll_loss": 0.2927763760089874, "rewards/accuracies": 1.0, "rewards/chosen": -2.5497716706013307e-05, "rewards/margins": 0.15999040007591248, "rewards/rejected": -0.16001591086387634, "step": 6341 }, { "epoch": 4.385892116182573, "grad_norm": 10.537651062011719, "learning_rate": 3.118948824343015e-05, "log_odds_chosen": 10.228643417358398, "log_odds_ratio": -7.193143392214552e-05, "logits/chosen": -0.2224959433078766, "logits/rejected": -0.28621044754981995, "logps/chosen": -0.00033477373654022813, "logps/rejected": -1.872227668762207, "loss": 1.0915, "nll_loss": 0.2728642225265503, "rewards/accuracies": 1.0, "rewards/chosen": -3.347737219883129e-05, "rewards/margins": 0.18718931078910828, "rewards/rejected": -0.18722279369831085, "step": 6342 }, { "epoch": 4.3865836791148, "grad_norm": 9.82940673828125, "learning_rate": 3.1185646227140004e-05, "log_odds_chosen": 10.144380569458008, "log_odds_ratio": -0.022902216762304306, "logits/chosen": -0.8177899122238159, "logits/rejected": -0.8910137414932251, "logps/chosen": -0.05426086485385895, "logps/rejected": -2.715169906616211, "loss": 1.2558, "nll_loss": 0.3116682767868042, "rewards/accuracies": 1.0, "rewards/chosen": -0.005426086951047182, "rewards/margins": 0.26609086990356445, "rewards/rejected": -0.27151697874069214, "step": 6343 }, { "epoch": 4.387275242047027, "grad_norm": 11.173566818237305, "learning_rate": 3.1181804210849856e-05, "log_odds_chosen": 11.273879051208496, "log_odds_ratio": -1.8894152162829414e-05, "logits/chosen": -0.36173272132873535, "logits/rejected": -0.4322207570075989, "logps/chosen": -0.00011111483763670549, "logps/rejected": -2.1379709243774414, "loss": 1.1858, "nll_loss": 0.2964479625225067, "rewards/accuracies": 1.0, "rewards/chosen": -1.111148412746843e-05, "rewards/margins": 0.21378597617149353, "rewards/rejected": -0.21379709243774414, "step": 6344 }, { "epoch": 4.3879668049792535, "grad_norm": 9.317232131958008, "learning_rate": 3.117796219455971e-05, "log_odds_chosen": 8.893519401550293, "log_odds_ratio": -0.0006592870340682566, "logits/chosen": -0.2592763900756836, "logits/rejected": -0.31366080045700073, "logps/chosen": -0.0014565077144652605, "logps/rejected": -1.7281951904296875, "loss": 1.1022, "nll_loss": 0.27548471093177795, "rewards/accuracies": 1.0, "rewards/chosen": -0.00014565078890882432, "rewards/margins": 0.17267388105392456, "rewards/rejected": -0.17281952500343323, "step": 6345 }, { "epoch": 4.38865836791148, "grad_norm": 11.148494720458984, "learning_rate": 3.1174120178269554e-05, "log_odds_chosen": 9.223249435424805, "log_odds_ratio": -0.0002908221213147044, "logits/chosen": -0.5727559328079224, "logits/rejected": -0.7116425037384033, "logps/chosen": -0.0018146205693483353, "logps/rejected": -1.5348750352859497, "loss": 1.2702, "nll_loss": 0.3175121545791626, "rewards/accuracies": 1.0, "rewards/chosen": -0.00018146206275559962, "rewards/margins": 0.1533060371875763, "rewards/rejected": -0.15348750352859497, "step": 6346 }, { "epoch": 4.389349930843707, "grad_norm": 10.441041946411133, "learning_rate": 3.1170278161979413e-05, "log_odds_chosen": 8.41359806060791, "log_odds_ratio": -0.03371895104646683, "logits/chosen": -0.4587884545326233, "logits/rejected": -0.5229263305664062, "logps/chosen": -0.04264894500374794, "logps/rejected": -1.6140475273132324, "loss": 1.4225, "nll_loss": 0.3522500991821289, "rewards/accuracies": 1.0, "rewards/chosen": -0.0042648944072425365, "rewards/margins": 0.1571398675441742, "rewards/rejected": -0.16140475869178772, "step": 6347 }, { "epoch": 4.390041493775934, "grad_norm": 9.487386703491211, "learning_rate": 3.116643614568926e-05, "log_odds_chosen": 10.76372241973877, "log_odds_ratio": -9.364535799250007e-05, "logits/chosen": -0.6684786081314087, "logits/rejected": -0.7083771228790283, "logps/chosen": -0.00019393152615521103, "logps/rejected": -1.8834095001220703, "loss": 1.2263, "nll_loss": 0.30655691027641296, "rewards/accuracies": 1.0, "rewards/chosen": -1.9393151887925342e-05, "rewards/margins": 0.1883215457201004, "rewards/rejected": -0.1883409470319748, "step": 6348 }, { "epoch": 4.390733056708161, "grad_norm": 8.466039657592773, "learning_rate": 3.116259412939911e-05, "log_odds_chosen": 8.854142189025879, "log_odds_ratio": -0.0010371842654421926, "logits/chosen": -0.7207614183425903, "logits/rejected": -0.7611744999885559, "logps/chosen": -0.0017626279732212424, "logps/rejected": -1.460201621055603, "loss": 1.6915, "nll_loss": 0.42276665568351746, "rewards/accuracies": 1.0, "rewards/chosen": -0.00017626280896365643, "rewards/margins": 0.14584389328956604, "rewards/rejected": -0.14602015912532806, "step": 6349 }, { "epoch": 4.391424619640388, "grad_norm": 9.773248672485352, "learning_rate": 3.1158752113108964e-05, "log_odds_chosen": 10.251541137695312, "log_odds_ratio": -4.190437539364211e-05, "logits/chosen": -0.4113655090332031, "logits/rejected": -0.5633144378662109, "logps/chosen": -0.0003510084352456033, "logps/rejected": -2.211672306060791, "loss": 0.7652, "nll_loss": 0.19130098819732666, "rewards/accuracies": 1.0, "rewards/chosen": -3.5100845707347617e-05, "rewards/margins": 0.22113212943077087, "rewards/rejected": -0.22116723656654358, "step": 6350 }, { "epoch": 4.3921161825726145, "grad_norm": 7.457752227783203, "learning_rate": 3.115491009681881e-05, "log_odds_chosen": 10.767346382141113, "log_odds_ratio": -6.526858487632126e-05, "logits/chosen": -0.7429406046867371, "logits/rejected": -0.7705117464065552, "logps/chosen": -0.00023447003331966698, "logps/rejected": -2.1938061714172363, "loss": 1.336, "nll_loss": 0.33399924635887146, "rewards/accuracies": 1.0, "rewards/chosen": -2.3447002604370937e-05, "rewards/margins": 0.21935716271400452, "rewards/rejected": -0.21938061714172363, "step": 6351 }, { "epoch": 4.392807745504841, "grad_norm": 10.649615287780762, "learning_rate": 3.115106808052866e-05, "log_odds_chosen": 9.68832778930664, "log_odds_ratio": -0.12552544474601746, "logits/chosen": -0.8307643532752991, "logits/rejected": -0.8816390037536621, "logps/chosen": -0.019055670127272606, "logps/rejected": -1.485122799873352, "loss": 1.8428, "nll_loss": 0.4481407105922699, "rewards/accuracies": 0.875, "rewards/chosen": -0.0019055670127272606, "rewards/margins": 0.14660672843456268, "rewards/rejected": -0.14851228892803192, "step": 6352 }, { "epoch": 4.393499308437068, "grad_norm": 9.400788307189941, "learning_rate": 3.1147226064238515e-05, "log_odds_chosen": 10.734479904174805, "log_odds_ratio": -7.558297511423007e-05, "logits/chosen": -0.6922191977500916, "logits/rejected": -0.7359859943389893, "logps/chosen": -0.00017174580716527998, "logps/rejected": -2.06654691696167, "loss": 0.7905, "nll_loss": 0.1976223587989807, "rewards/accuracies": 1.0, "rewards/chosen": -1.717458144412376e-05, "rewards/margins": 0.20663750171661377, "rewards/rejected": -0.20665468275547028, "step": 6353 }, { "epoch": 4.394190871369295, "grad_norm": 7.185258865356445, "learning_rate": 3.114338404794837e-05, "log_odds_chosen": 9.011981964111328, "log_odds_ratio": -0.0004941418301314116, "logits/chosen": -0.5035545229911804, "logits/rejected": -0.5419560670852661, "logps/chosen": -0.0006765194702893496, "logps/rejected": -1.283606767654419, "loss": 0.939, "nll_loss": 0.2347015142440796, "rewards/accuracies": 1.0, "rewards/chosen": -6.765194848412648e-05, "rewards/margins": 0.1282930225133896, "rewards/rejected": -0.12836067378520966, "step": 6354 }, { "epoch": 4.394882434301522, "grad_norm": 8.787233352661133, "learning_rate": 3.113954203165821e-05, "log_odds_chosen": 9.925066947937012, "log_odds_ratio": -0.0003282953693997115, "logits/chosen": -0.45993995666503906, "logits/rejected": -0.4733354449272156, "logps/chosen": -0.000683745660353452, "logps/rejected": -1.690459132194519, "loss": 0.9997, "nll_loss": 0.2498854249715805, "rewards/accuracies": 1.0, "rewards/chosen": -6.83745602145791e-05, "rewards/margins": 0.1689775437116623, "rewards/rejected": -0.16904591023921967, "step": 6355 }, { "epoch": 4.395573997233749, "grad_norm": 10.263931274414062, "learning_rate": 3.113570001536807e-05, "log_odds_chosen": 8.73983097076416, "log_odds_ratio": -0.011168360710144043, "logits/chosen": -0.3822953402996063, "logits/rejected": -0.43949460983276367, "logps/chosen": -0.008786053396761417, "logps/rejected": -1.7901290655136108, "loss": 1.0338, "nll_loss": 0.25732412934303284, "rewards/accuracies": 1.0, "rewards/chosen": -0.0008786054095253348, "rewards/margins": 0.17813430726528168, "rewards/rejected": -0.17901290953159332, "step": 6356 }, { "epoch": 4.3962655601659755, "grad_norm": 5.810608386993408, "learning_rate": 3.113185799907792e-05, "log_odds_chosen": 10.540541648864746, "log_odds_ratio": -5.537183824344538e-05, "logits/chosen": -0.2416747510433197, "logits/rejected": -0.29514622688293457, "logps/chosen": -0.007341462187469006, "logps/rejected": -2.8202743530273438, "loss": 1.2999, "nll_loss": 0.3249596357345581, "rewards/accuracies": 1.0, "rewards/chosen": -0.0007341462187469006, "rewards/margins": 0.28129327297210693, "rewards/rejected": -0.2820274531841278, "step": 6357 }, { "epoch": 4.396957123098202, "grad_norm": 5.3782877922058105, "learning_rate": 3.112801598278777e-05, "log_odds_chosen": 9.902215957641602, "log_odds_ratio": -0.0002081981801893562, "logits/chosen": -0.5407758355140686, "logits/rejected": -0.4762265384197235, "logps/chosen": -0.0003181890642736107, "logps/rejected": -1.5239753723144531, "loss": 0.9757, "nll_loss": 0.2439117431640625, "rewards/accuracies": 1.0, "rewards/chosen": -3.1818908610148355e-05, "rewards/margins": 0.15236571431159973, "rewards/rejected": -0.1523975431919098, "step": 6358 }, { "epoch": 4.397648686030429, "grad_norm": 8.642234802246094, "learning_rate": 3.112417396649762e-05, "log_odds_chosen": 10.313976287841797, "log_odds_ratio": -0.00011284256470389664, "logits/chosen": -0.7431192398071289, "logits/rejected": -0.7744413614273071, "logps/chosen": -0.0001350109523627907, "logps/rejected": -1.5020142793655396, "loss": 1.1418, "nll_loss": 0.28544408082962036, "rewards/accuracies": 1.0, "rewards/chosen": -1.350109505438013e-05, "rewards/margins": 0.15018793940544128, "rewards/rejected": -0.1502014398574829, "step": 6359 }, { "epoch": 4.398340248962656, "grad_norm": 11.897019386291504, "learning_rate": 3.112033195020747e-05, "log_odds_chosen": 10.22813606262207, "log_odds_ratio": -0.00019520701607689261, "logits/chosen": -0.452121376991272, "logits/rejected": -0.5452828407287598, "logps/chosen": -0.0006089904345571995, "logps/rejected": -2.3550171852111816, "loss": 1.0218, "nll_loss": 0.25543949007987976, "rewards/accuracies": 1.0, "rewards/chosen": -6.0899052186869085e-05, "rewards/margins": 0.23544083535671234, "rewards/rejected": -0.2355017364025116, "step": 6360 }, { "epoch": 4.399031811894883, "grad_norm": 7.805273056030273, "learning_rate": 3.111648993391732e-05, "log_odds_chosen": 9.458495140075684, "log_odds_ratio": -0.00022262008860707283, "logits/chosen": -0.7785831093788147, "logits/rejected": -0.7868790030479431, "logps/chosen": -0.0004530495498329401, "logps/rejected": -1.3462885618209839, "loss": 0.9961, "nll_loss": 0.24900603294372559, "rewards/accuracies": 1.0, "rewards/chosen": -4.5304957893677056e-05, "rewards/margins": 0.13458356261253357, "rewards/rejected": -0.13462886214256287, "step": 6361 }, { "epoch": 4.39972337482711, "grad_norm": 5.2602338790893555, "learning_rate": 3.111264791762717e-05, "log_odds_chosen": 9.917596817016602, "log_odds_ratio": -0.00048271557898260653, "logits/chosen": -0.5518268346786499, "logits/rejected": -0.6171162128448486, "logps/chosen": -0.006441672332584858, "logps/rejected": -1.653882384300232, "loss": 1.098, "nll_loss": 0.2744504511356354, "rewards/accuracies": 1.0, "rewards/chosen": -0.000644167244900018, "rewards/margins": 0.1647440642118454, "rewards/rejected": -0.16538822650909424, "step": 6362 }, { "epoch": 4.4004149377593365, "grad_norm": 10.160521507263184, "learning_rate": 3.1108805901337025e-05, "log_odds_chosen": 10.352978706359863, "log_odds_ratio": -9.755916835274547e-05, "logits/chosen": -0.21207648515701294, "logits/rejected": -0.3640451729297638, "logps/chosen": -0.00020385342941153795, "logps/rejected": -1.6681032180786133, "loss": 1.612, "nll_loss": 0.40298330783843994, "rewards/accuracies": 1.0, "rewards/chosen": -2.038534512394108e-05, "rewards/margins": 0.1667899638414383, "rewards/rejected": -0.16681033372879028, "step": 6363 }, { "epoch": 4.401106500691563, "grad_norm": 10.753729820251465, "learning_rate": 3.110496388504687e-05, "log_odds_chosen": 10.651169776916504, "log_odds_ratio": -4.766930578625761e-05, "logits/chosen": -0.510490357875824, "logits/rejected": -0.6057192087173462, "logps/chosen": -0.0003168170223943889, "logps/rejected": -2.441676616668701, "loss": 1.157, "nll_loss": 0.28923308849334717, "rewards/accuracies": 1.0, "rewards/chosen": -3.168170223943889e-05, "rewards/margins": 0.24413597583770752, "rewards/rejected": -0.24416767060756683, "step": 6364 }, { "epoch": 4.40179806362379, "grad_norm": 9.026993751525879, "learning_rate": 3.110112186875673e-05, "log_odds_chosen": 10.503043174743652, "log_odds_ratio": -9.254955512005836e-05, "logits/chosen": -0.5653536319732666, "logits/rejected": -0.5738909244537354, "logps/chosen": -0.00036744290264323354, "logps/rejected": -2.0169739723205566, "loss": 0.8859, "nll_loss": 0.2214638888835907, "rewards/accuracies": 1.0, "rewards/chosen": -3.6744291719514877e-05, "rewards/margins": 0.2016606628894806, "rewards/rejected": -0.20169739425182343, "step": 6365 }, { "epoch": 4.402489626556017, "grad_norm": 10.971336364746094, "learning_rate": 3.1097279852466576e-05, "log_odds_chosen": 10.245159149169922, "log_odds_ratio": -0.00012390792835503817, "logits/chosen": -0.7902606725692749, "logits/rejected": -0.8872899413108826, "logps/chosen": -0.00017842264787759632, "logps/rejected": -1.6078989505767822, "loss": 0.9098, "nll_loss": 0.2274371087551117, "rewards/accuracies": 1.0, "rewards/chosen": -1.7842265151557513e-05, "rewards/margins": 0.16077205538749695, "rewards/rejected": -0.16078990697860718, "step": 6366 }, { "epoch": 4.403181189488244, "grad_norm": 17.538488388061523, "learning_rate": 3.109343783617643e-05, "log_odds_chosen": 10.931398391723633, "log_odds_ratio": -3.503288826323114e-05, "logits/chosen": -0.7548744678497314, "logits/rejected": -0.8804567456245422, "logps/chosen": -0.0001960860681720078, "logps/rejected": -2.3025503158569336, "loss": 1.2374, "nll_loss": 0.309344083070755, "rewards/accuracies": 1.0, "rewards/chosen": -1.960860754479654e-05, "rewards/margins": 0.23023542761802673, "rewards/rejected": -0.23025503754615784, "step": 6367 }, { "epoch": 4.403872752420471, "grad_norm": 6.541409015655518, "learning_rate": 3.108959581988628e-05, "log_odds_chosen": 9.534074783325195, "log_odds_ratio": -0.0003987067029811442, "logits/chosen": -0.43024882674217224, "logits/rejected": -0.44275692105293274, "logps/chosen": -0.0007749908836558461, "logps/rejected": -1.574808955192566, "loss": 0.6758, "nll_loss": 0.16890332102775574, "rewards/accuracies": 1.0, "rewards/chosen": -7.749909127596766e-05, "rewards/margins": 0.15740340948104858, "rewards/rejected": -0.1574808955192566, "step": 6368 }, { "epoch": 4.404564315352697, "grad_norm": 17.76714515686035, "learning_rate": 3.1085753803596127e-05, "log_odds_chosen": 8.868440628051758, "log_odds_ratio": -0.00660840654745698, "logits/chosen": -0.5223338007926941, "logits/rejected": -0.5196986198425293, "logps/chosen": -0.03560849279165268, "logps/rejected": -1.5677039623260498, "loss": 0.6861, "nll_loss": 0.17087195813655853, "rewards/accuracies": 1.0, "rewards/chosen": -0.0035608489997684956, "rewards/margins": 0.15320953726768494, "rewards/rejected": -0.15677037835121155, "step": 6369 }, { "epoch": 4.405255878284924, "grad_norm": 7.546093463897705, "learning_rate": 3.108191178730598e-05, "log_odds_chosen": 9.531181335449219, "log_odds_ratio": -0.029602551832795143, "logits/chosen": -0.5928676724433899, "logits/rejected": -0.4486514925956726, "logps/chosen": -0.006365879438817501, "logps/rejected": -1.5473785400390625, "loss": 0.9026, "nll_loss": 0.22268284857273102, "rewards/accuracies": 1.0, "rewards/chosen": -0.0006365880253724754, "rewards/margins": 0.15410126745700836, "rewards/rejected": -0.15473784506320953, "step": 6370 }, { "epoch": 4.405947441217151, "grad_norm": 13.640862464904785, "learning_rate": 3.107806977101583e-05, "log_odds_chosen": 8.941659927368164, "log_odds_ratio": -0.00953722931444645, "logits/chosen": -0.16276182234287262, "logits/rejected": -0.22773955762386322, "logps/chosen": -0.05365453660488129, "logps/rejected": -2.1357078552246094, "loss": 0.9216, "nll_loss": 0.22944633662700653, "rewards/accuracies": 1.0, "rewards/chosen": -0.005365454591810703, "rewards/margins": 0.20820534229278564, "rewards/rejected": -0.21357078850269318, "step": 6371 }, { "epoch": 4.406639004149378, "grad_norm": 8.92506217956543, "learning_rate": 3.1074227754725684e-05, "log_odds_chosen": 9.07198715209961, "log_odds_ratio": -0.016817396506667137, "logits/chosen": -0.10156579315662384, "logits/rejected": -0.19527952373027802, "logps/chosen": -0.023559454828500748, "logps/rejected": -2.2837753295898438, "loss": 1.3893, "nll_loss": 0.34563571214675903, "rewards/accuracies": 1.0, "rewards/chosen": -0.0023559455294162035, "rewards/margins": 0.22602161765098572, "rewards/rejected": -0.2283775508403778, "step": 6372 }, { "epoch": 4.407330567081605, "grad_norm": 7.370696067810059, "learning_rate": 3.107038573843553e-05, "log_odds_chosen": 9.33350944519043, "log_odds_ratio": -0.0006806927849538624, "logits/chosen": -0.6236019134521484, "logits/rejected": -0.6567580103874207, "logps/chosen": -0.007249999791383743, "logps/rejected": -1.7281427383422852, "loss": 1.4798, "nll_loss": 0.36987632513046265, "rewards/accuracies": 1.0, "rewards/chosen": -0.0007249999325722456, "rewards/margins": 0.17208926379680634, "rewards/rejected": -0.1728142648935318, "step": 6373 }, { "epoch": 4.408022130013832, "grad_norm": 18.29096031188965, "learning_rate": 3.106654372214539e-05, "log_odds_chosen": 10.78128719329834, "log_odds_ratio": -6.995126022957265e-05, "logits/chosen": -0.6699973940849304, "logits/rejected": -0.731697678565979, "logps/chosen": -0.0005232581752352417, "logps/rejected": -2.4144153594970703, "loss": 1.048, "nll_loss": 0.2619859576225281, "rewards/accuracies": 1.0, "rewards/chosen": -5.232582043390721e-05, "rewards/margins": 0.2413892298936844, "rewards/rejected": -0.241441547870636, "step": 6374 }, { "epoch": 4.408713692946058, "grad_norm": 7.64169454574585, "learning_rate": 3.1062701705855234e-05, "log_odds_chosen": 10.888839721679688, "log_odds_ratio": -0.00017014621698763222, "logits/chosen": -0.40166181325912476, "logits/rejected": -0.4729452431201935, "logps/chosen": -0.0003403805603738874, "logps/rejected": -2.4208157062530518, "loss": 0.8522, "nll_loss": 0.21304450929164886, "rewards/accuracies": 1.0, "rewards/chosen": -3.403805385460146e-05, "rewards/margins": 0.242047518491745, "rewards/rejected": -0.24208158254623413, "step": 6375 }, { "epoch": 4.409405255878285, "grad_norm": 5.552724838256836, "learning_rate": 3.105885968956509e-05, "log_odds_chosen": 9.83051872253418, "log_odds_ratio": -0.0004149650048930198, "logits/chosen": -0.5414485931396484, "logits/rejected": -0.5747936964035034, "logps/chosen": -0.0008759694756008685, "logps/rejected": -2.1983699798583984, "loss": 0.9518, "nll_loss": 0.23790419101715088, "rewards/accuracies": 1.0, "rewards/chosen": -8.759694901527837e-05, "rewards/margins": 0.21974940598011017, "rewards/rejected": -0.2198370099067688, "step": 6376 }, { "epoch": 4.410096818810512, "grad_norm": 6.212398052215576, "learning_rate": 3.105501767327494e-05, "log_odds_chosen": 9.110426902770996, "log_odds_ratio": -0.0009867295157164335, "logits/chosen": -0.7143625617027283, "logits/rejected": -0.6959802508354187, "logps/chosen": -0.0036927165929228067, "logps/rejected": -1.8916161060333252, "loss": 1.0186, "nll_loss": 0.25453901290893555, "rewards/accuracies": 1.0, "rewards/chosen": -0.00036927167093381286, "rewards/margins": 0.18879231810569763, "rewards/rejected": -0.18916161358356476, "step": 6377 }, { "epoch": 4.410788381742739, "grad_norm": 8.868108749389648, "learning_rate": 3.1051175656984785e-05, "log_odds_chosen": 8.546025276184082, "log_odds_ratio": -0.008631639182567596, "logits/chosen": -0.3716889023780823, "logits/rejected": -0.43401286005973816, "logps/chosen": -0.016441212967038155, "logps/rejected": -1.6564141511917114, "loss": 1.308, "nll_loss": 0.3261297941207886, "rewards/accuracies": 1.0, "rewards/chosen": -0.0016441213665530086, "rewards/margins": 0.16399729251861572, "rewards/rejected": -0.1656414121389389, "step": 6378 }, { "epoch": 4.411479944674966, "grad_norm": 6.24019718170166, "learning_rate": 3.104733364069464e-05, "log_odds_chosen": 10.215981483459473, "log_odds_ratio": -0.00044016563333570957, "logits/chosen": -0.7779475450515747, "logits/rejected": -0.7803343534469604, "logps/chosen": -0.0005113465595059097, "logps/rejected": -2.0377840995788574, "loss": 1.1477, "nll_loss": 0.28688517212867737, "rewards/accuracies": 1.0, "rewards/chosen": -5.1134658860974014e-05, "rewards/margins": 0.20372727513313293, "rewards/rejected": -0.20377840101718903, "step": 6379 }, { "epoch": 4.412171507607193, "grad_norm": 12.92465877532959, "learning_rate": 3.104349162440449e-05, "log_odds_chosen": 11.562097549438477, "log_odds_ratio": -1.4962448403821327e-05, "logits/chosen": -0.7335084676742554, "logits/rejected": -0.7306284308433533, "logps/chosen": -0.0002169413783121854, "logps/rejected": -2.4687299728393555, "loss": 1.3994, "nll_loss": 0.34985631704330444, "rewards/accuracies": 1.0, "rewards/chosen": -2.1694138922612183e-05, "rewards/margins": 0.24685129523277283, "rewards/rejected": -0.24687300622463226, "step": 6380 }, { "epoch": 4.412863070539419, "grad_norm": 9.519418716430664, "learning_rate": 3.103964960811434e-05, "log_odds_chosen": 9.936283111572266, "log_odds_ratio": -0.00035743307671509683, "logits/chosen": -0.6547790169715881, "logits/rejected": -0.6877670288085938, "logps/chosen": -0.0006231877487152815, "logps/rejected": -2.062743663787842, "loss": 0.9886, "nll_loss": 0.2471020221710205, "rewards/accuracies": 1.0, "rewards/chosen": -6.231878069229424e-05, "rewards/margins": 0.20621204376220703, "rewards/rejected": -0.2062743753194809, "step": 6381 }, { "epoch": 4.413554633471646, "grad_norm": 11.097110748291016, "learning_rate": 3.103580759182419e-05, "log_odds_chosen": 10.018781661987305, "log_odds_ratio": -0.00010880576155614108, "logits/chosen": -0.36143845319747925, "logits/rejected": -0.40808019042015076, "logps/chosen": -0.00034866592613980174, "logps/rejected": -1.9138808250427246, "loss": 1.2034, "nll_loss": 0.30082690715789795, "rewards/accuracies": 1.0, "rewards/chosen": -3.486659625195898e-05, "rewards/margins": 0.1913532167673111, "rewards/rejected": -0.1913880705833435, "step": 6382 }, { "epoch": 4.414246196403873, "grad_norm": 6.786111831665039, "learning_rate": 3.103196557553405e-05, "log_odds_chosen": 10.440179824829102, "log_odds_ratio": -0.0006543896161019802, "logits/chosen": -0.48476123809814453, "logits/rejected": -0.5525182485580444, "logps/chosen": -0.00040884173358790576, "logps/rejected": -1.9244616031646729, "loss": 0.9288, "nll_loss": 0.23214299976825714, "rewards/accuracies": 1.0, "rewards/chosen": -4.0884173358790576e-05, "rewards/margins": 0.19240528345108032, "rewards/rejected": -0.19244614243507385, "step": 6383 }, { "epoch": 4.4149377593361, "grad_norm": 8.502795219421387, "learning_rate": 3.102812355924389e-05, "log_odds_chosen": 9.403182983398438, "log_odds_ratio": -0.0007035625749267638, "logits/chosen": -0.5949018597602844, "logits/rejected": -0.5342052578926086, "logps/chosen": -0.0016120446380227804, "logps/rejected": -1.7205469608306885, "loss": 1.262, "nll_loss": 0.31543827056884766, "rewards/accuracies": 1.0, "rewards/chosen": -0.00016120447253342718, "rewards/margins": 0.17189349234104156, "rewards/rejected": -0.1720547080039978, "step": 6384 }, { "epoch": 4.415629322268327, "grad_norm": 6.596683025360107, "learning_rate": 3.1024281542953745e-05, "log_odds_chosen": 9.544157028198242, "log_odds_ratio": -0.00027197145391255617, "logits/chosen": -0.5985953211784363, "logits/rejected": -0.6238245964050293, "logps/chosen": -0.0003750473551917821, "logps/rejected": -1.8366684913635254, "loss": 1.1725, "nll_loss": 0.2931087613105774, "rewards/accuracies": 1.0, "rewards/chosen": -3.750473479158245e-05, "rewards/margins": 0.1836293488740921, "rewards/rejected": -0.18366685509681702, "step": 6385 }, { "epoch": 4.4163208852005535, "grad_norm": 9.299519538879395, "learning_rate": 3.10204395266636e-05, "log_odds_chosen": 10.27207088470459, "log_odds_ratio": -0.00013783818576484919, "logits/chosen": -0.26307037472724915, "logits/rejected": -0.4357321560382843, "logps/chosen": -0.0016038173343986273, "logps/rejected": -2.7095847129821777, "loss": 0.923, "nll_loss": 0.23072925209999084, "rewards/accuracies": 1.0, "rewards/chosen": -0.00016038172179833055, "rewards/margins": 0.27079811692237854, "rewards/rejected": -0.27095848321914673, "step": 6386 }, { "epoch": 4.41701244813278, "grad_norm": 10.660256385803223, "learning_rate": 3.1016597510373443e-05, "log_odds_chosen": 9.878748893737793, "log_odds_ratio": -0.0004966585547663271, "logits/chosen": -0.390649676322937, "logits/rejected": -0.5493044853210449, "logps/chosen": -0.008036092855036259, "logps/rejected": -2.27475905418396, "loss": 0.9765, "nll_loss": 0.244069904088974, "rewards/accuracies": 1.0, "rewards/chosen": -0.0008036093786358833, "rewards/margins": 0.22667229175567627, "rewards/rejected": -0.22747588157653809, "step": 6387 }, { "epoch": 4.417704011065007, "grad_norm": 11.35327434539795, "learning_rate": 3.1012755494083296e-05, "log_odds_chosen": 9.402727127075195, "log_odds_ratio": -0.0061118570156395435, "logits/chosen": -0.4596131145954132, "logits/rejected": -0.6097520589828491, "logps/chosen": -0.0035455625038594007, "logps/rejected": -1.9800617694854736, "loss": 0.8287, "nll_loss": 0.20656505227088928, "rewards/accuracies": 1.0, "rewards/chosen": -0.0003545562212821096, "rewards/margins": 0.19765162467956543, "rewards/rejected": -0.19800618290901184, "step": 6388 }, { "epoch": 4.418395573997234, "grad_norm": 8.991832733154297, "learning_rate": 3.100891347779315e-05, "log_odds_chosen": 9.527231216430664, "log_odds_ratio": -0.00015416370297316462, "logits/chosen": -0.24660900235176086, "logits/rejected": -0.290757417678833, "logps/chosen": -0.001949973520822823, "logps/rejected": -2.1968846321105957, "loss": 0.8304, "nll_loss": 0.2075803279876709, "rewards/accuracies": 1.0, "rewards/chosen": -0.0001949973520822823, "rewards/margins": 0.21949344873428345, "rewards/rejected": -0.21968846023082733, "step": 6389 }, { "epoch": 4.419087136929461, "grad_norm": 107.30193328857422, "learning_rate": 3.1005071461503e-05, "log_odds_chosen": 8.85942554473877, "log_odds_ratio": -0.4232620298862457, "logits/chosen": -0.19119130074977875, "logits/rejected": -0.2185167372226715, "logps/chosen": -0.053555119782686234, "logps/rejected": -2.0136759281158447, "loss": 1.255, "nll_loss": 0.2714240550994873, "rewards/accuracies": 0.875, "rewards/chosen": -0.005355512257665396, "rewards/margins": 0.19601207971572876, "rewards/rejected": -0.20136758685112, "step": 6390 }, { "epoch": 4.419778699861688, "grad_norm": 5.519765853881836, "learning_rate": 3.1001229445212846e-05, "log_odds_chosen": 10.144405364990234, "log_odds_ratio": -0.0001967347925528884, "logits/chosen": -0.37597039341926575, "logits/rejected": -0.4154529571533203, "logps/chosen": -0.01816502772271633, "logps/rejected": -2.4218828678131104, "loss": 1.3425, "nll_loss": 0.3355969786643982, "rewards/accuracies": 1.0, "rewards/chosen": -0.001816502888686955, "rewards/margins": 0.24037177860736847, "rewards/rejected": -0.24218828976154327, "step": 6391 }, { "epoch": 4.4204702627939145, "grad_norm": 9.79228687286377, "learning_rate": 3.0997387428922706e-05, "log_odds_chosen": 9.638640403747559, "log_odds_ratio": -0.0003855983086396009, "logits/chosen": -0.3042322099208832, "logits/rejected": -0.35943084955215454, "logps/chosen": -0.0038778900634497404, "logps/rejected": -2.6894869804382324, "loss": 1.006, "nll_loss": 0.2514561116695404, "rewards/accuracies": 1.0, "rewards/chosen": -0.00038778901216574013, "rewards/margins": 0.2685609459877014, "rewards/rejected": -0.2689487338066101, "step": 6392 }, { "epoch": 4.421161825726141, "grad_norm": 10.364742279052734, "learning_rate": 3.099354541263255e-05, "log_odds_chosen": 10.168045043945312, "log_odds_ratio": -0.00010480596392881125, "logits/chosen": -0.30890512466430664, "logits/rejected": -0.37130510807037354, "logps/chosen": -0.0009393454529345036, "logps/rejected": -1.9779996871948242, "loss": 1.1569, "nll_loss": 0.28921347856521606, "rewards/accuracies": 1.0, "rewards/chosen": -9.393454092787579e-05, "rewards/margins": 0.19770604372024536, "rewards/rejected": -0.19779998064041138, "step": 6393 }, { "epoch": 4.421853388658368, "grad_norm": 7.593661308288574, "learning_rate": 3.0989703396342404e-05, "log_odds_chosen": 10.138404846191406, "log_odds_ratio": -0.0004462719371076673, "logits/chosen": -0.5672686100006104, "logits/rejected": -0.6298636794090271, "logps/chosen": -0.0006593248108401895, "logps/rejected": -2.124025344848633, "loss": 1.2576, "nll_loss": 0.3143640160560608, "rewards/accuracies": 1.0, "rewards/chosen": -6.593247962882742e-05, "rewards/margins": 0.21233659982681274, "rewards/rejected": -0.21240252256393433, "step": 6394 }, { "epoch": 4.422544951590595, "grad_norm": 11.665322303771973, "learning_rate": 3.098586138005225e-05, "log_odds_chosen": 10.167012214660645, "log_odds_ratio": -0.0001747005881043151, "logits/chosen": -0.9220602512359619, "logits/rejected": -0.9767961502075195, "logps/chosen": -0.0002635978162288666, "logps/rejected": -1.346954345703125, "loss": 1.2314, "nll_loss": 0.30783140659332275, "rewards/accuracies": 1.0, "rewards/chosen": -2.635978307807818e-05, "rewards/margins": 0.13466906547546387, "rewards/rejected": -0.13469544053077698, "step": 6395 }, { "epoch": 4.423236514522822, "grad_norm": 7.8194661140441895, "learning_rate": 3.09820193637621e-05, "log_odds_chosen": 10.15457534790039, "log_odds_ratio": -8.2727252447512e-05, "logits/chosen": -0.7054091095924377, "logits/rejected": -0.7562676668167114, "logps/chosen": -0.00047401481424458325, "logps/rejected": -2.0918338298797607, "loss": 1.4992, "nll_loss": 0.37479984760284424, "rewards/accuracies": 1.0, "rewards/chosen": -4.7401481424458325e-05, "rewards/margins": 0.2091359794139862, "rewards/rejected": -0.20918338000774384, "step": 6396 }, { "epoch": 4.423928077455049, "grad_norm": 6.56026029586792, "learning_rate": 3.0978177347471954e-05, "log_odds_chosen": 10.212728500366211, "log_odds_ratio": -5.804194006486796e-05, "logits/chosen": -0.5783580541610718, "logits/rejected": -0.6343774199485779, "logps/chosen": -0.0001326277997577563, "logps/rejected": -1.5737571716308594, "loss": 0.9005, "nll_loss": 0.2251134216785431, "rewards/accuracies": 1.0, "rewards/chosen": -1.3262779248179868e-05, "rewards/margins": 0.1573624610900879, "rewards/rejected": -0.15737572312355042, "step": 6397 }, { "epoch": 4.4246196403872755, "grad_norm": 8.338546752929688, "learning_rate": 3.09743353311818e-05, "log_odds_chosen": 10.239168167114258, "log_odds_ratio": -0.00016068453260231763, "logits/chosen": -0.7848932147026062, "logits/rejected": -0.8591190576553345, "logps/chosen": -0.00024313712492585182, "logps/rejected": -1.6348729133605957, "loss": 1.0218, "nll_loss": 0.2554447054862976, "rewards/accuracies": 1.0, "rewards/chosen": -2.4313714675372466e-05, "rewards/margins": 0.16346299648284912, "rewards/rejected": -0.16348731517791748, "step": 6398 }, { "epoch": 4.425311203319502, "grad_norm": 20.944805145263672, "learning_rate": 3.097049331489166e-05, "log_odds_chosen": 10.49547004699707, "log_odds_ratio": -0.0018726128619164228, "logits/chosen": -0.7779617309570312, "logits/rejected": -0.8085945844650269, "logps/chosen": -0.0009949326049536467, "logps/rejected": -1.942845344543457, "loss": 1.1049, "nll_loss": 0.27604442834854126, "rewards/accuracies": 1.0, "rewards/chosen": -9.949326340574771e-05, "rewards/margins": 0.1941850334405899, "rewards/rejected": -0.19428452849388123, "step": 6399 }, { "epoch": 4.426002766251729, "grad_norm": 16.597238540649414, "learning_rate": 3.0966651298601505e-05, "log_odds_chosen": 10.280326843261719, "log_odds_ratio": -0.0021524475887417793, "logits/chosen": -0.3056260347366333, "logits/rejected": -0.3296273350715637, "logps/chosen": -0.0016646343283355236, "logps/rejected": -2.4135384559631348, "loss": 1.0892, "nll_loss": 0.2720944285392761, "rewards/accuracies": 1.0, "rewards/chosen": -0.00016646343283355236, "rewards/margins": 0.24118739366531372, "rewards/rejected": -0.2413538694381714, "step": 6400 }, { "epoch": 4.426694329183956, "grad_norm": 13.069977760314941, "learning_rate": 3.096280928231136e-05, "log_odds_chosen": 9.882538795471191, "log_odds_ratio": -0.00026573645300231874, "logits/chosen": -0.5362889766693115, "logits/rejected": -0.5939916968345642, "logps/chosen": -0.0003392535727471113, "logps/rejected": -1.5014183521270752, "loss": 1.0444, "nll_loss": 0.2610677480697632, "rewards/accuracies": 1.0, "rewards/chosen": -3.392536018509418e-05, "rewards/margins": 0.15010792016983032, "rewards/rejected": -0.1501418501138687, "step": 6401 }, { "epoch": 4.427385892116183, "grad_norm": 12.240621566772461, "learning_rate": 3.095896726602121e-05, "log_odds_chosen": 10.700019836425781, "log_odds_ratio": -9.252676682081074e-05, "logits/chosen": -0.3738962411880493, "logits/rejected": -0.4703482389450073, "logps/chosen": -0.0011295323492959142, "logps/rejected": -2.3972387313842773, "loss": 1.3156, "nll_loss": 0.3288910686969757, "rewards/accuracies": 1.0, "rewards/chosen": -0.00011295323201920837, "rewards/margins": 0.23961091041564941, "rewards/rejected": -0.23972387611865997, "step": 6402 }, { "epoch": 4.42807745504841, "grad_norm": 11.340660095214844, "learning_rate": 3.095512524973106e-05, "log_odds_chosen": 10.539658546447754, "log_odds_ratio": -0.00026402639923617244, "logits/chosen": -0.5166309475898743, "logits/rejected": -0.6453360915184021, "logps/chosen": -0.0009884284809231758, "logps/rejected": -1.8809010982513428, "loss": 1.2957, "nll_loss": 0.32388707995414734, "rewards/accuracies": 1.0, "rewards/chosen": -9.884285100270063e-05, "rewards/margins": 0.18799127638339996, "rewards/rejected": -0.18809011578559875, "step": 6403 }, { "epoch": 4.4287690179806365, "grad_norm": 9.22094440460205, "learning_rate": 3.095128323344091e-05, "log_odds_chosen": 8.828676223754883, "log_odds_ratio": -0.007879544980823994, "logits/chosen": -0.6498576402664185, "logits/rejected": -0.6279178857803345, "logps/chosen": -0.04624735936522484, "logps/rejected": -1.2368273735046387, "loss": 1.3293, "nll_loss": 0.33152979612350464, "rewards/accuracies": 1.0, "rewards/chosen": -0.004624736495316029, "rewards/margins": 0.11905801296234131, "rewards/rejected": -0.12368274480104446, "step": 6404 }, { "epoch": 4.429460580912863, "grad_norm": 10.181614875793457, "learning_rate": 3.094744121715076e-05, "log_odds_chosen": 9.462580680847168, "log_odds_ratio": -0.06620946526527405, "logits/chosen": -0.7551283240318298, "logits/rejected": -0.7843388319015503, "logps/chosen": -0.014280532486736774, "logps/rejected": -1.8985339403152466, "loss": 1.3754, "nll_loss": 0.3372199535369873, "rewards/accuracies": 1.0, "rewards/chosen": -0.0014280533650889993, "rewards/margins": 0.18842534720897675, "rewards/rejected": -0.18985339999198914, "step": 6405 }, { "epoch": 4.43015214384509, "grad_norm": 12.048741340637207, "learning_rate": 3.094359920086061e-05, "log_odds_chosen": 9.604146957397461, "log_odds_ratio": -0.0002486729063093662, "logits/chosen": -0.16522309184074402, "logits/rejected": -0.2706667184829712, "logps/chosen": -0.0008669736562296748, "logps/rejected": -1.7650139331817627, "loss": 1.1396, "nll_loss": 0.28488093614578247, "rewards/accuracies": 1.0, "rewards/chosen": -8.669737144373357e-05, "rewards/margins": 0.17641469836235046, "rewards/rejected": -0.17650139331817627, "step": 6406 }, { "epoch": 4.430843706777317, "grad_norm": 7.385758399963379, "learning_rate": 3.093975718457046e-05, "log_odds_chosen": 10.225251197814941, "log_odds_ratio": -0.0005238738958723843, "logits/chosen": -0.6043409109115601, "logits/rejected": -0.6176189184188843, "logps/chosen": -0.022112663835287094, "logps/rejected": -2.721525192260742, "loss": 2.0196, "nll_loss": 0.5048423409461975, "rewards/accuracies": 1.0, "rewards/chosen": -0.002211266430094838, "rewards/margins": 0.2699412703514099, "rewards/rejected": -0.27215251326560974, "step": 6407 }, { "epoch": 4.431535269709544, "grad_norm": 5.175098896026611, "learning_rate": 3.093591516828032e-05, "log_odds_chosen": 9.267239570617676, "log_odds_ratio": -0.0011088968021795154, "logits/chosen": -0.6202181577682495, "logits/rejected": -0.6301971673965454, "logps/chosen": -0.0027066871989518404, "logps/rejected": -2.233309030532837, "loss": 1.1131, "nll_loss": 0.2781616449356079, "rewards/accuracies": 1.0, "rewards/chosen": -0.0002706687373574823, "rewards/margins": 0.2230602353811264, "rewards/rejected": -0.22333090007305145, "step": 6408 }, { "epoch": 4.432226832641771, "grad_norm": 9.984957695007324, "learning_rate": 3.093207315199016e-05, "log_odds_chosen": 9.920219421386719, "log_odds_ratio": -0.0005398921784944832, "logits/chosen": -0.5337777137756348, "logits/rejected": -0.5695146322250366, "logps/chosen": -0.0006002856534905732, "logps/rejected": -1.1492040157318115, "loss": 1.2768, "nll_loss": 0.3191404938697815, "rewards/accuracies": 1.0, "rewards/chosen": -6.002855661790818e-05, "rewards/margins": 0.11486037075519562, "rewards/rejected": -0.11492040008306503, "step": 6409 }, { "epoch": 4.4329183955739975, "grad_norm": 11.91898250579834, "learning_rate": 3.0928231135700016e-05, "log_odds_chosen": 10.906294822692871, "log_odds_ratio": -5.3284056775737554e-05, "logits/chosen": -0.6611661911010742, "logits/rejected": -0.7557975053787231, "logps/chosen": -0.0003193242009729147, "logps/rejected": -2.520918607711792, "loss": 1.267, "nll_loss": 0.3167416453361511, "rewards/accuracies": 1.0, "rewards/chosen": -3.193242082488723e-05, "rewards/margins": 0.2520599067211151, "rewards/rejected": -0.2520918548107147, "step": 6410 }, { "epoch": 4.433609958506224, "grad_norm": 8.004185676574707, "learning_rate": 3.092438911940987e-05, "log_odds_chosen": 10.14210319519043, "log_odds_ratio": -8.355508180102333e-05, "logits/chosen": -0.7471209764480591, "logits/rejected": -0.7696855068206787, "logps/chosen": -0.0011674391571432352, "logps/rejected": -2.0500221252441406, "loss": 1.0659, "nll_loss": 0.26646915078163147, "rewards/accuracies": 1.0, "rewards/chosen": -0.00011674391134874895, "rewards/margins": 0.20488549768924713, "rewards/rejected": -0.20500224828720093, "step": 6411 }, { "epoch": 4.434301521438451, "grad_norm": 12.455580711364746, "learning_rate": 3.092054710311972e-05, "log_odds_chosen": 10.676868438720703, "log_odds_ratio": -0.00018224478117190301, "logits/chosen": -0.7852025032043457, "logits/rejected": -0.8711831569671631, "logps/chosen": -0.00014174616080708802, "logps/rejected": -1.6316860914230347, "loss": 1.4967, "nll_loss": 0.37414783239364624, "rewards/accuracies": 1.0, "rewards/chosen": -1.4174616808304563e-05, "rewards/margins": 0.1631544530391693, "rewards/rejected": -0.16316860914230347, "step": 6412 }, { "epoch": 4.434993084370678, "grad_norm": 7.661368370056152, "learning_rate": 3.0916705086829566e-05, "log_odds_chosen": 10.154672622680664, "log_odds_ratio": -0.00014710980758536607, "logits/chosen": -0.7227736711502075, "logits/rejected": -0.8169190883636475, "logps/chosen": -0.0003246809064876288, "logps/rejected": -2.028472661972046, "loss": 1.1493, "nll_loss": 0.2873007655143738, "rewards/accuracies": 1.0, "rewards/chosen": -3.246809137635864e-05, "rewards/margins": 0.20281481742858887, "rewards/rejected": -0.20284727215766907, "step": 6413 }, { "epoch": 4.435684647302905, "grad_norm": 8.186695098876953, "learning_rate": 3.091286307053942e-05, "log_odds_chosen": 9.463798522949219, "log_odds_ratio": -0.00047613875358365476, "logits/chosen": -0.4769138693809509, "logits/rejected": -0.4970892667770386, "logps/chosen": -0.0004365852801129222, "logps/rejected": -1.2462592124938965, "loss": 0.9771, "nll_loss": 0.24422718584537506, "rewards/accuracies": 1.0, "rewards/chosen": -4.3658525100909173e-05, "rewards/margins": 0.12458226084709167, "rewards/rejected": -0.12462591379880905, "step": 6414 }, { "epoch": 4.436376210235132, "grad_norm": 10.404152870178223, "learning_rate": 3.090902105424927e-05, "log_odds_chosen": 9.704734802246094, "log_odds_ratio": -0.0008971289498731494, "logits/chosen": -0.6355952024459839, "logits/rejected": -0.6960784196853638, "logps/chosen": -0.0017384829698130488, "logps/rejected": -2.2069873809814453, "loss": 1.1935, "nll_loss": 0.2982823848724365, "rewards/accuracies": 1.0, "rewards/chosen": -0.0001738483115332201, "rewards/margins": 0.2205248773097992, "rewards/rejected": -0.22069872915744781, "step": 6415 }, { "epoch": 4.4370677731673585, "grad_norm": 11.764803886413574, "learning_rate": 3.090517903795912e-05, "log_odds_chosen": 10.937488555908203, "log_odds_ratio": -3.620861025410704e-05, "logits/chosen": -0.21870945394039154, "logits/rejected": -0.3768613636493683, "logps/chosen": -0.0001105781557271257, "logps/rejected": -1.834923267364502, "loss": 1.0417, "nll_loss": 0.2604144215583801, "rewards/accuracies": 1.0, "rewards/chosen": -1.105781575461151e-05, "rewards/margins": 0.18348127603530884, "rewards/rejected": -0.18349234759807587, "step": 6416 }, { "epoch": 4.437759336099585, "grad_norm": 9.569924354553223, "learning_rate": 3.0901337021668976e-05, "log_odds_chosen": 8.868842124938965, "log_odds_ratio": -0.006081722676753998, "logits/chosen": -0.6976808905601501, "logits/rejected": -0.7641477584838867, "logps/chosen": -0.0033662666101008654, "logps/rejected": -1.2174699306488037, "loss": 0.9788, "nll_loss": 0.24408631026744843, "rewards/accuracies": 1.0, "rewards/chosen": -0.00033662666101008654, "rewards/margins": 0.12141037732362747, "rewards/rejected": -0.1217469871044159, "step": 6417 }, { "epoch": 4.438450899031812, "grad_norm": 8.44909954071045, "learning_rate": 3.089749500537882e-05, "log_odds_chosen": 10.522440910339355, "log_odds_ratio": -0.00027663138462230563, "logits/chosen": -0.876654863357544, "logits/rejected": -0.9026960134506226, "logps/chosen": -0.0003937871369998902, "logps/rejected": -2.161076545715332, "loss": 1.7714, "nll_loss": 0.44281840324401855, "rewards/accuracies": 1.0, "rewards/chosen": -3.93787122447975e-05, "rewards/margins": 0.21606828272342682, "rewards/rejected": -0.21610765159130096, "step": 6418 }, { "epoch": 4.439142461964039, "grad_norm": 7.009344577789307, "learning_rate": 3.0893652989088674e-05, "log_odds_chosen": 9.771029472351074, "log_odds_ratio": -0.0011895447969436646, "logits/chosen": -0.49803614616394043, "logits/rejected": -0.6052780151367188, "logps/chosen": -0.04391154646873474, "logps/rejected": -2.5477042198181152, "loss": 1.2561, "nll_loss": 0.313909113407135, "rewards/accuracies": 1.0, "rewards/chosen": -0.004391154740005732, "rewards/margins": 0.2503792941570282, "rewards/rejected": -0.254770427942276, "step": 6419 }, { "epoch": 4.439834024896266, "grad_norm": 6.5041093826293945, "learning_rate": 3.0889810972798527e-05, "log_odds_chosen": 9.302213668823242, "log_odds_ratio": -0.0004543719405774027, "logits/chosen": -0.7108435034751892, "logits/rejected": -0.7111088633537292, "logps/chosen": -0.0005316220922395587, "logps/rejected": -1.6619925498962402, "loss": 1.1103, "nll_loss": 0.2775220274925232, "rewards/accuracies": 1.0, "rewards/chosen": -5.316221358953044e-05, "rewards/margins": 0.16614609956741333, "rewards/rejected": -0.16619926691055298, "step": 6420 }, { "epoch": 4.440525587828493, "grad_norm": 4.889501094818115, "learning_rate": 3.088596895650838e-05, "log_odds_chosen": 9.731374740600586, "log_odds_ratio": -0.00034095943556167185, "logits/chosen": -0.3538818657398224, "logits/rejected": -0.32288655638694763, "logps/chosen": -0.0011292172130197287, "logps/rejected": -1.4478421211242676, "loss": 1.3477, "nll_loss": 0.3369019627571106, "rewards/accuracies": 1.0, "rewards/chosen": -0.00011292171984678134, "rewards/margins": 0.14467130601406097, "rewards/rejected": -0.14478421211242676, "step": 6421 }, { "epoch": 4.441217150760719, "grad_norm": 8.64886474609375, "learning_rate": 3.0882126940218225e-05, "log_odds_chosen": 10.265148162841797, "log_odds_ratio": -0.00011314827861497179, "logits/chosen": -0.942933201789856, "logits/rejected": -0.8902233839035034, "logps/chosen": -0.000461632531369105, "logps/rejected": -1.9104479551315308, "loss": 0.6952, "nll_loss": 0.17379961907863617, "rewards/accuracies": 1.0, "rewards/chosen": -4.616325531969778e-05, "rewards/margins": 0.1909986287355423, "rewards/rejected": -0.19104479253292084, "step": 6422 }, { "epoch": 4.441908713692946, "grad_norm": 15.804269790649414, "learning_rate": 3.087828492392808e-05, "log_odds_chosen": 10.559988021850586, "log_odds_ratio": -5.758218321716413e-05, "logits/chosen": -0.8409276008605957, "logits/rejected": -0.8524722456932068, "logps/chosen": -0.0002882361295633018, "logps/rejected": -2.1920557022094727, "loss": 1.7781, "nll_loss": 0.44452598690986633, "rewards/accuracies": 1.0, "rewards/chosen": -2.8823611501138657e-05, "rewards/margins": 0.2191767692565918, "rewards/rejected": -0.2192055881023407, "step": 6423 }, { "epoch": 4.442600276625173, "grad_norm": 15.704327583312988, "learning_rate": 3.087444290763793e-05, "log_odds_chosen": 10.257490158081055, "log_odds_ratio": -0.0005293237045407295, "logits/chosen": -0.48299890756607056, "logits/rejected": -0.47468554973602295, "logps/chosen": -0.001382496440783143, "logps/rejected": -2.5976834297180176, "loss": 1.1784, "nll_loss": 0.2945585250854492, "rewards/accuracies": 1.0, "rewards/chosen": -0.00013824964116793126, "rewards/margins": 0.25963011384010315, "rewards/rejected": -0.25976836681365967, "step": 6424 }, { "epoch": 4.4432918395574, "grad_norm": 5.465470790863037, "learning_rate": 3.0870600891347775e-05, "log_odds_chosen": 11.397879600524902, "log_odds_ratio": -1.5060177247505635e-05, "logits/chosen": -0.5835100412368774, "logits/rejected": -0.6525250673294067, "logps/chosen": -0.00014226968050934374, "logps/rejected": -2.534497022628784, "loss": 0.7325, "nll_loss": 0.1831332892179489, "rewards/accuracies": 1.0, "rewards/chosen": -1.4226966413843911e-05, "rewards/margins": 0.25343549251556396, "rewards/rejected": -0.2534497082233429, "step": 6425 }, { "epoch": 4.443983402489627, "grad_norm": 11.010247230529785, "learning_rate": 3.0866758875057634e-05, "log_odds_chosen": 10.234821319580078, "log_odds_ratio": -8.235462155425921e-05, "logits/chosen": -0.19324612617492676, "logits/rejected": -0.24131500720977783, "logps/chosen": -0.0001237574906554073, "logps/rejected": -1.435868263244629, "loss": 1.371, "nll_loss": 0.3427380323410034, "rewards/accuracies": 1.0, "rewards/chosen": -1.237574815604603e-05, "rewards/margins": 0.14357444643974304, "rewards/rejected": -0.14358682930469513, "step": 6426 }, { "epoch": 4.444674965421854, "grad_norm": 11.74059772491455, "learning_rate": 3.086291685876748e-05, "log_odds_chosen": 10.10733413696289, "log_odds_ratio": -8.586710464442149e-05, "logits/chosen": -0.42839187383651733, "logits/rejected": -0.5178070068359375, "logps/chosen": -0.0004466324462555349, "logps/rejected": -1.95094633102417, "loss": 1.1682, "nll_loss": 0.2920527458190918, "rewards/accuracies": 1.0, "rewards/chosen": -4.4663247535936534e-05, "rewards/margins": 0.19504998624324799, "rewards/rejected": -0.19509464502334595, "step": 6427 }, { "epoch": 4.44536652835408, "grad_norm": 7.334704875946045, "learning_rate": 3.085907484247733e-05, "log_odds_chosen": 8.667322158813477, "log_odds_ratio": -0.0066236890852451324, "logits/chosen": -0.44771382212638855, "logits/rejected": -0.545290470123291, "logps/chosen": -0.007994197309017181, "logps/rejected": -1.48850417137146, "loss": 1.5761, "nll_loss": 0.3933669328689575, "rewards/accuracies": 1.0, "rewards/chosen": -0.0007994197658263147, "rewards/margins": 0.14805099368095398, "rewards/rejected": -0.1488504260778427, "step": 6428 }, { "epoch": 4.446058091286307, "grad_norm": 6.081279754638672, "learning_rate": 3.0855232826187185e-05, "log_odds_chosen": 9.644600868225098, "log_odds_ratio": -0.0006043565226718783, "logits/chosen": -0.5438819527626038, "logits/rejected": -0.6006056070327759, "logps/chosen": -0.013640167191624641, "logps/rejected": -2.112248420715332, "loss": 0.953, "nll_loss": 0.23818287253379822, "rewards/accuracies": 1.0, "rewards/chosen": -0.0013640167890116572, "rewards/margins": 0.20986083149909973, "rewards/rejected": -0.21122485399246216, "step": 6429 }, { "epoch": 4.446749654218534, "grad_norm": 9.497159004211426, "learning_rate": 3.085139080989704e-05, "log_odds_chosen": 8.957687377929688, "log_odds_ratio": -0.001704613328911364, "logits/chosen": -0.8124638795852661, "logits/rejected": -0.7367603182792664, "logps/chosen": -0.0011274907737970352, "logps/rejected": -1.1338789463043213, "loss": 1.1416, "nll_loss": 0.28522783517837524, "rewards/accuracies": 1.0, "rewards/chosen": -0.00011274906864855438, "rewards/margins": 0.11327514052391052, "rewards/rejected": -0.11338789016008377, "step": 6430 }, { "epoch": 4.447441217150761, "grad_norm": 7.188299179077148, "learning_rate": 3.084754879360688e-05, "log_odds_chosen": 8.436171531677246, "log_odds_ratio": -0.0023574642837047577, "logits/chosen": -0.4236351251602173, "logits/rejected": -0.4923994541168213, "logps/chosen": -0.0010589384473860264, "logps/rejected": -1.0145111083984375, "loss": 1.0346, "nll_loss": 0.25842535495758057, "rewards/accuracies": 1.0, "rewards/chosen": -0.00010589385055936873, "rewards/margins": 0.10134520381689072, "rewards/rejected": -0.1014510989189148, "step": 6431 }, { "epoch": 4.448132780082988, "grad_norm": 8.30783462524414, "learning_rate": 3.0843706777316736e-05, "log_odds_chosen": 8.656213760375977, "log_odds_ratio": -0.04048136621713638, "logits/chosen": -0.4624170958995819, "logits/rejected": -0.4649544358253479, "logps/chosen": -0.00918676145374775, "logps/rejected": -1.6840345859527588, "loss": 1.0427, "nll_loss": 0.25663626194000244, "rewards/accuracies": 1.0, "rewards/chosen": -0.000918676087167114, "rewards/margins": 0.16748477518558502, "rewards/rejected": -0.16840344667434692, "step": 6432 }, { "epoch": 4.448824343015215, "grad_norm": 10.003007888793945, "learning_rate": 3.083986476102659e-05, "log_odds_chosen": 11.435136795043945, "log_odds_ratio": -4.180811811238527e-05, "logits/chosen": -0.6977967023849487, "logits/rejected": -0.6912387609481812, "logps/chosen": -0.00014621099398937076, "logps/rejected": -2.687011480331421, "loss": 1.6182, "nll_loss": 0.40453970432281494, "rewards/accuracies": 1.0, "rewards/chosen": -1.4621098671341315e-05, "rewards/margins": 0.26868653297424316, "rewards/rejected": -0.2687011659145355, "step": 6433 }, { "epoch": 4.449515905947441, "grad_norm": 11.087772369384766, "learning_rate": 3.0836022744736434e-05, "log_odds_chosen": 9.166482925415039, "log_odds_ratio": -0.00566418282687664, "logits/chosen": -0.45822763442993164, "logits/rejected": -0.48969143629074097, "logps/chosen": -0.0015105127822607756, "logps/rejected": -1.6272878646850586, "loss": 1.8693, "nll_loss": 0.4667499363422394, "rewards/accuracies": 1.0, "rewards/chosen": -0.00015105126658454537, "rewards/margins": 0.16257771849632263, "rewards/rejected": -0.16272878646850586, "step": 6434 }, { "epoch": 4.450207468879668, "grad_norm": 9.10877799987793, "learning_rate": 3.083218072844629e-05, "log_odds_chosen": 9.429122924804688, "log_odds_ratio": -0.001028799219056964, "logits/chosen": -0.6517131924629211, "logits/rejected": -0.6948776841163635, "logps/chosen": -0.015063981525599957, "logps/rejected": -1.857469081878662, "loss": 0.8447, "nll_loss": 0.2110617756843567, "rewards/accuracies": 1.0, "rewards/chosen": -0.0015063981991261244, "rewards/margins": 0.18424050509929657, "rewards/rejected": -0.1857469230890274, "step": 6435 }, { "epoch": 4.450899031811895, "grad_norm": 7.889139652252197, "learning_rate": 3.082833871215614e-05, "log_odds_chosen": 11.177837371826172, "log_odds_ratio": -2.0480580133153126e-05, "logits/chosen": -0.5944574475288391, "logits/rejected": -0.5678659677505493, "logps/chosen": -0.00023821931972634047, "logps/rejected": -2.3895411491394043, "loss": 1.0383, "nll_loss": 0.25957247614860535, "rewards/accuracies": 1.0, "rewards/chosen": -2.382193270022981e-05, "rewards/margins": 0.23893029987812042, "rewards/rejected": -0.23895412683486938, "step": 6436 }, { "epoch": 4.451590594744122, "grad_norm": 12.398857116699219, "learning_rate": 3.082449669586599e-05, "log_odds_chosen": 9.734447479248047, "log_odds_ratio": -0.0419284924864769, "logits/chosen": -0.9242825508117676, "logits/rejected": -1.0619961023330688, "logps/chosen": -0.009608970023691654, "logps/rejected": -1.9355764389038086, "loss": 0.9802, "nll_loss": 0.24086391925811768, "rewards/accuracies": 1.0, "rewards/chosen": -0.0009608970722183585, "rewards/margins": 0.19259673357009888, "rewards/rejected": -0.19355764985084534, "step": 6437 }, { "epoch": 4.452282157676349, "grad_norm": 23.13532829284668, "learning_rate": 3.0820654679575843e-05, "log_odds_chosen": 8.321240425109863, "log_odds_ratio": -0.15772351622581482, "logits/chosen": -0.487088680267334, "logits/rejected": -0.5112364888191223, "logps/chosen": -0.022836901247501373, "logps/rejected": -1.2704685926437378, "loss": 1.113, "nll_loss": 0.2624875605106354, "rewards/accuracies": 0.875, "rewards/chosen": -0.0022836902644485235, "rewards/margins": 0.12476316839456558, "rewards/rejected": -0.1270468682050705, "step": 6438 }, { "epoch": 4.4529737206085755, "grad_norm": 11.514544486999512, "learning_rate": 3.0816812663285696e-05, "log_odds_chosen": 11.126787185668945, "log_odds_ratio": -7.575732888653874e-05, "logits/chosen": -0.7251293659210205, "logits/rejected": -0.809004545211792, "logps/chosen": -0.0003191279247403145, "logps/rejected": -2.6014175415039062, "loss": 0.9218, "nll_loss": 0.23044687509536743, "rewards/accuracies": 1.0, "rewards/chosen": -3.191279392922297e-05, "rewards/margins": 0.2601098418235779, "rewards/rejected": -0.2601417601108551, "step": 6439 }, { "epoch": 4.453665283540802, "grad_norm": 10.459208488464355, "learning_rate": 3.081297064699554e-05, "log_odds_chosen": 10.432282447814941, "log_odds_ratio": -0.0002344041276955977, "logits/chosen": -0.32301580905914307, "logits/rejected": -0.3637202978134155, "logps/chosen": -0.0006796496454626322, "logps/rejected": -2.54196834564209, "loss": 0.824, "nll_loss": 0.20596429705619812, "rewards/accuracies": 1.0, "rewards/chosen": -6.796496745664626e-05, "rewards/margins": 0.2541288733482361, "rewards/rejected": -0.2541968524456024, "step": 6440 }, { "epoch": 4.454356846473029, "grad_norm": 10.875798225402832, "learning_rate": 3.0809128630705394e-05, "log_odds_chosen": 10.470844268798828, "log_odds_ratio": -0.0001139358791988343, "logits/chosen": -0.4594096541404724, "logits/rejected": -0.5815310478210449, "logps/chosen": -0.0007354323752224445, "logps/rejected": -2.4845809936523438, "loss": 0.9644, "nll_loss": 0.24107995629310608, "rewards/accuracies": 1.0, "rewards/chosen": -7.35432404326275e-05, "rewards/margins": 0.24838455021381378, "rewards/rejected": -0.24845808744430542, "step": 6441 }, { "epoch": 4.455048409405256, "grad_norm": 9.098546981811523, "learning_rate": 3.0805286614415246e-05, "log_odds_chosen": 11.00687313079834, "log_odds_ratio": -8.472923218505457e-05, "logits/chosen": -0.6078069806098938, "logits/rejected": -0.6600771546363831, "logps/chosen": -0.00038548995507881045, "logps/rejected": -2.265836715698242, "loss": 0.6532, "nll_loss": 0.16329284012317657, "rewards/accuracies": 1.0, "rewards/chosen": -3.8548994780285284e-05, "rewards/margins": 0.22654514014720917, "rewards/rejected": -0.22658368945121765, "step": 6442 }, { "epoch": 4.455739972337483, "grad_norm": 10.756627082824707, "learning_rate": 3.080144459812509e-05, "log_odds_chosen": 10.610451698303223, "log_odds_ratio": -0.0009124780190177262, "logits/chosen": -0.6079075336456299, "logits/rejected": -0.6510196924209595, "logps/chosen": -0.0014088767347857356, "logps/rejected": -2.842536449432373, "loss": 1.286, "nll_loss": 0.3214002847671509, "rewards/accuracies": 1.0, "rewards/chosen": -0.00014088767056819052, "rewards/margins": 0.28411275148391724, "rewards/rejected": -0.28425362706184387, "step": 6443 }, { "epoch": 4.45643153526971, "grad_norm": 6.399550914764404, "learning_rate": 3.079760258183495e-05, "log_odds_chosen": 10.297996520996094, "log_odds_ratio": -0.0007506664260290563, "logits/chosen": -0.3183915615081787, "logits/rejected": -0.41979384422302246, "logps/chosen": -0.0004420267359819263, "logps/rejected": -1.9215115308761597, "loss": 1.0138, "nll_loss": 0.253369003534317, "rewards/accuracies": 1.0, "rewards/chosen": -4.42026685050223e-05, "rewards/margins": 0.1921069324016571, "rewards/rejected": -0.19215114414691925, "step": 6444 }, { "epoch": 4.4571230982019365, "grad_norm": 7.257493019104004, "learning_rate": 3.07937605655448e-05, "log_odds_chosen": 11.059222221374512, "log_odds_ratio": -5.228218651609495e-05, "logits/chosen": -0.4313526749610901, "logits/rejected": -0.5512241721153259, "logps/chosen": -0.0009454325772821903, "logps/rejected": -2.021420955657959, "loss": 0.6734, "nll_loss": 0.16833902895450592, "rewards/accuracies": 1.0, "rewards/chosen": -9.45432620937936e-05, "rewards/margins": 0.20204755663871765, "rewards/rejected": -0.20214208960533142, "step": 6445 }, { "epoch": 4.457814661134163, "grad_norm": 7.653397560119629, "learning_rate": 3.078991854925465e-05, "log_odds_chosen": 10.379366874694824, "log_odds_ratio": -9.936068090610206e-05, "logits/chosen": -0.5582969188690186, "logits/rejected": -0.5746288299560547, "logps/chosen": -0.00010474787268321961, "logps/rejected": -1.4376403093338013, "loss": 0.9908, "nll_loss": 0.24768760800361633, "rewards/accuracies": 1.0, "rewards/chosen": -1.047478690452408e-05, "rewards/margins": 0.1437535434961319, "rewards/rejected": -0.14376403391361237, "step": 6446 }, { "epoch": 4.45850622406639, "grad_norm": 8.728631019592285, "learning_rate": 3.07860765329645e-05, "log_odds_chosen": 8.934124946594238, "log_odds_ratio": -0.012789107859134674, "logits/chosen": -0.6111518144607544, "logits/rejected": -0.6506301760673523, "logps/chosen": -0.004878000356256962, "logps/rejected": -1.5417579412460327, "loss": 1.0899, "nll_loss": 0.2711877226829529, "rewards/accuracies": 1.0, "rewards/chosen": -0.00048779998905956745, "rewards/margins": 0.153687983751297, "rewards/rejected": -0.1541757881641388, "step": 6447 }, { "epoch": 4.459197786998617, "grad_norm": 8.42003059387207, "learning_rate": 3.0782234516674354e-05, "log_odds_chosen": 9.99289321899414, "log_odds_ratio": -6.565036164829507e-05, "logits/chosen": -0.3047183156013489, "logits/rejected": -0.33375582098960876, "logps/chosen": -0.00017102361016441137, "logps/rejected": -1.4403502941131592, "loss": 0.8766, "nll_loss": 0.21914011240005493, "rewards/accuracies": 1.0, "rewards/chosen": -1.7102360288845375e-05, "rewards/margins": 0.14401793479919434, "rewards/rejected": -0.14403502643108368, "step": 6448 }, { "epoch": 4.459889349930844, "grad_norm": 7.8015594482421875, "learning_rate": 3.07783925003842e-05, "log_odds_chosen": 9.683923721313477, "log_odds_ratio": -0.00011912950139958411, "logits/chosen": -0.5940296053886414, "logits/rejected": -0.6487449407577515, "logps/chosen": -0.00021323611144907773, "logps/rejected": -1.2047635316848755, "loss": 1.4649, "nll_loss": 0.3662136495113373, "rewards/accuracies": 1.0, "rewards/chosen": -2.1323612600099295e-05, "rewards/margins": 0.12045504152774811, "rewards/rejected": -0.12047635018825531, "step": 6449 }, { "epoch": 4.460580912863071, "grad_norm": 5.781506538391113, "learning_rate": 3.077455048409405e-05, "log_odds_chosen": 9.560298919677734, "log_odds_ratio": -0.00018497445853427052, "logits/chosen": -0.45896121859550476, "logits/rejected": -0.4588027000427246, "logps/chosen": -0.00031572478474117815, "logps/rejected": -1.33500075340271, "loss": 0.8983, "nll_loss": 0.22454404830932617, "rewards/accuracies": 1.0, "rewards/chosen": -3.157247920171358e-05, "rewards/margins": 0.13346850872039795, "rewards/rejected": -0.13350006937980652, "step": 6450 }, { "epoch": 4.4612724757952975, "grad_norm": 5.633298397064209, "learning_rate": 3.0770708467803905e-05, "log_odds_chosen": 11.570259094238281, "log_odds_ratio": -3.354436921654269e-05, "logits/chosen": -0.3385199010372162, "logits/rejected": -0.44875243306159973, "logps/chosen": -0.000133796245791018, "logps/rejected": -2.294816732406616, "loss": 0.9183, "nll_loss": 0.22957469522953033, "rewards/accuracies": 1.0, "rewards/chosen": -1.3379624761000741e-05, "rewards/margins": 0.22946830093860626, "rewards/rejected": -0.22948168218135834, "step": 6451 }, { "epoch": 4.461964038727524, "grad_norm": 13.917255401611328, "learning_rate": 3.076686645151375e-05, "log_odds_chosen": 7.53969144821167, "log_odds_ratio": -0.21071849763393402, "logits/chosen": -0.3512672185897827, "logits/rejected": -0.3968029320240021, "logps/chosen": -0.030562492087483406, "logps/rejected": -1.0880842208862305, "loss": 1.3055, "nll_loss": 0.3052915334701538, "rewards/accuracies": 0.875, "rewards/chosen": -0.003056249115616083, "rewards/margins": 0.10575217008590698, "rewards/rejected": -0.10880842059850693, "step": 6452 }, { "epoch": 4.462655601659751, "grad_norm": 10.778855323791504, "learning_rate": 3.076302443522361e-05, "log_odds_chosen": 9.772245407104492, "log_odds_ratio": -0.0013269998598843813, "logits/chosen": -0.3936904966831207, "logits/rejected": -0.530383825302124, "logps/chosen": -0.0011856453493237495, "logps/rejected": -1.6621289253234863, "loss": 1.3422, "nll_loss": 0.33542922139167786, "rewards/accuracies": 1.0, "rewards/chosen": -0.00011856453056680039, "rewards/margins": 0.1660943180322647, "rewards/rejected": -0.16621288657188416, "step": 6453 }, { "epoch": 4.463347164591978, "grad_norm": 7.1760406494140625, "learning_rate": 3.0759182418933455e-05, "log_odds_chosen": 9.600784301757812, "log_odds_ratio": -0.0004721590084955096, "logits/chosen": -0.08456657826900482, "logits/rejected": -0.17342565953731537, "logps/chosen": -0.0016557632479816675, "logps/rejected": -2.121009588241577, "loss": 1.121, "nll_loss": 0.2802083194255829, "rewards/accuracies": 1.0, "rewards/chosen": -0.0001655763335293159, "rewards/margins": 0.2119353711605072, "rewards/rejected": -0.21210096776485443, "step": 6454 }, { "epoch": 4.464038727524205, "grad_norm": 8.384750366210938, "learning_rate": 3.075534040264331e-05, "log_odds_chosen": 11.154319763183594, "log_odds_ratio": -3.194598320988007e-05, "logits/chosen": -0.3221395015716553, "logits/rejected": -0.3981391191482544, "logps/chosen": -0.00023946535657159984, "logps/rejected": -2.5700619220733643, "loss": 0.7746, "nll_loss": 0.19364149868488312, "rewards/accuracies": 1.0, "rewards/chosen": -2.3946535293362103e-05, "rewards/margins": 0.2569822371006012, "rewards/rejected": -0.2570061683654785, "step": 6455 }, { "epoch": 4.464730290456432, "grad_norm": 8.794690132141113, "learning_rate": 3.075149838635316e-05, "log_odds_chosen": 9.15900707244873, "log_odds_ratio": -0.0008501263218931854, "logits/chosen": -0.28918007016181946, "logits/rejected": -0.3754921555519104, "logps/chosen": -0.0006217118352651596, "logps/rejected": -1.515347957611084, "loss": 1.0513, "nll_loss": 0.26275113224983215, "rewards/accuracies": 1.0, "rewards/chosen": -6.217118789209053e-05, "rewards/margins": 0.15147262811660767, "rewards/rejected": -0.1515347957611084, "step": 6456 }, { "epoch": 4.4654218533886585, "grad_norm": 19.42965316772461, "learning_rate": 3.074765637006301e-05, "log_odds_chosen": 9.063629150390625, "log_odds_ratio": -0.0010017786407843232, "logits/chosen": -0.06623756885528564, "logits/rejected": -0.08045337349176407, "logps/chosen": -0.001309290062636137, "logps/rejected": -1.6273454427719116, "loss": 1.1708, "nll_loss": 0.29259809851646423, "rewards/accuracies": 1.0, "rewards/chosen": -0.00013092900917399675, "rewards/margins": 0.16260361671447754, "rewards/rejected": -0.16273455321788788, "step": 6457 }, { "epoch": 4.466113416320885, "grad_norm": 6.670252799987793, "learning_rate": 3.074381435377286e-05, "log_odds_chosen": 9.334833145141602, "log_odds_ratio": -0.00016046586097218096, "logits/chosen": -0.1623792052268982, "logits/rejected": -0.1207355409860611, "logps/chosen": -0.00028514739824458957, "logps/rejected": -1.278767466545105, "loss": 0.9254, "nll_loss": 0.23132863640785217, "rewards/accuracies": 1.0, "rewards/chosen": -2.8514739824458957e-05, "rewards/margins": 0.12784823775291443, "rewards/rejected": -0.12787675857543945, "step": 6458 }, { "epoch": 4.466804979253112, "grad_norm": 9.468250274658203, "learning_rate": 3.073997233748271e-05, "log_odds_chosen": 9.812359809875488, "log_odds_ratio": -0.08580458164215088, "logits/chosen": -0.17165197432041168, "logits/rejected": -0.2862173914909363, "logps/chosen": -0.015067455358803272, "logps/rejected": -2.258084297180176, "loss": 1.4031, "nll_loss": 0.34219345450401306, "rewards/accuracies": 1.0, "rewards/chosen": -0.0015067454660311341, "rewards/margins": 0.22430166602134705, "rewards/rejected": -0.22580842673778534, "step": 6459 }, { "epoch": 4.467496542185339, "grad_norm": 9.811150550842285, "learning_rate": 3.073613032119256e-05, "log_odds_chosen": 9.866673469543457, "log_odds_ratio": -9.497060091234744e-05, "logits/chosen": -0.4492124021053314, "logits/rejected": -0.48748695850372314, "logps/chosen": -0.0005426113493740559, "logps/rejected": -1.7650691270828247, "loss": 0.9769, "nll_loss": 0.24422520399093628, "rewards/accuracies": 1.0, "rewards/chosen": -5.426114148576744e-05, "rewards/margins": 0.1764526516199112, "rewards/rejected": -0.1765069216489792, "step": 6460 }, { "epoch": 4.468188105117566, "grad_norm": 13.469379425048828, "learning_rate": 3.0732288304902416e-05, "log_odds_chosen": 10.870882987976074, "log_odds_ratio": -0.00022644597629550844, "logits/chosen": -0.7534062266349792, "logits/rejected": -0.7338254451751709, "logps/chosen": -0.000653579889331013, "logps/rejected": -2.304877758026123, "loss": 1.039, "nll_loss": 0.25973108410835266, "rewards/accuracies": 1.0, "rewards/chosen": -6.535799184348434e-05, "rewards/margins": 0.23042240738868713, "rewards/rejected": -0.23048776388168335, "step": 6461 }, { "epoch": 4.468879668049793, "grad_norm": 6.386672019958496, "learning_rate": 3.072844628861227e-05, "log_odds_chosen": 9.686408996582031, "log_odds_ratio": -0.0006362023414112628, "logits/chosen": -0.3017466962337494, "logits/rejected": -0.290844589471817, "logps/chosen": -0.00386090693064034, "logps/rejected": -1.9374815225601196, "loss": 1.3238, "nll_loss": 0.3308817744255066, "rewards/accuracies": 1.0, "rewards/chosen": -0.0003860906872432679, "rewards/margins": 0.19336208701133728, "rewards/rejected": -0.19374816119670868, "step": 6462 }, { "epoch": 4.4695712309820195, "grad_norm": 11.212063789367676, "learning_rate": 3.0724604272322114e-05, "log_odds_chosen": 10.193517684936523, "log_odds_ratio": -9.134951687883586e-05, "logits/chosen": -0.5317569971084595, "logits/rejected": -0.611341655254364, "logps/chosen": -0.0002531587961129844, "logps/rejected": -1.7362111806869507, "loss": 0.9071, "nll_loss": 0.2267615795135498, "rewards/accuracies": 1.0, "rewards/chosen": -2.5315879611298442e-05, "rewards/margins": 0.17359580099582672, "rewards/rejected": -0.17362111806869507, "step": 6463 }, { "epoch": 4.470262793914246, "grad_norm": 6.128077030181885, "learning_rate": 3.0720762256031966e-05, "log_odds_chosen": 8.539804458618164, "log_odds_ratio": -0.004929071757942438, "logits/chosen": -0.5161978006362915, "logits/rejected": -0.5088739395141602, "logps/chosen": -0.002719791140407324, "logps/rejected": -1.3809276819229126, "loss": 0.9154, "nll_loss": 0.2283586859703064, "rewards/accuracies": 1.0, "rewards/chosen": -0.00027197913732379675, "rewards/margins": 0.1378207802772522, "rewards/rejected": -0.1380927711725235, "step": 6464 }, { "epoch": 4.470954356846473, "grad_norm": 6.816812992095947, "learning_rate": 3.071692023974182e-05, "log_odds_chosen": 9.135784149169922, "log_odds_ratio": -0.013926029205322266, "logits/chosen": -0.5467950105667114, "logits/rejected": -0.6252622604370117, "logps/chosen": -0.005378572270274162, "logps/rejected": -1.3269848823547363, "loss": 0.8008, "nll_loss": 0.19880104064941406, "rewards/accuracies": 1.0, "rewards/chosen": -0.0005378572386689484, "rewards/margins": 0.13216063380241394, "rewards/rejected": -0.13269847631454468, "step": 6465 }, { "epoch": 4.4716459197787, "grad_norm": 11.138849258422852, "learning_rate": 3.071307822345167e-05, "log_odds_chosen": 10.323291778564453, "log_odds_ratio": -0.00015442782023455948, "logits/chosen": -0.30465757846832275, "logits/rejected": -0.25000882148742676, "logps/chosen": -0.00015198114851955324, "logps/rejected": -1.6996678113937378, "loss": 0.9524, "nll_loss": 0.2380809187889099, "rewards/accuracies": 1.0, "rewards/chosen": -1.5198114851955324e-05, "rewards/margins": 0.16995158791542053, "rewards/rejected": -0.16996678709983826, "step": 6466 }, { "epoch": 4.472337482710927, "grad_norm": 10.755675315856934, "learning_rate": 3.070923620716152e-05, "log_odds_chosen": 10.522125244140625, "log_odds_ratio": -3.707344876602292e-05, "logits/chosen": -0.3853769898414612, "logits/rejected": -0.44511574506759644, "logps/chosen": -0.00046376383397728205, "logps/rejected": -2.301818609237671, "loss": 1.1055, "nll_loss": 0.27637752890586853, "rewards/accuracies": 1.0, "rewards/chosen": -4.637638630811125e-05, "rewards/margins": 0.2301354855298996, "rewards/rejected": -0.23018187284469604, "step": 6467 }, { "epoch": 4.473029045643154, "grad_norm": 8.67910385131836, "learning_rate": 3.070539419087137e-05, "log_odds_chosen": 11.46983528137207, "log_odds_ratio": -2.3607193725183606e-05, "logits/chosen": -0.30719006061553955, "logits/rejected": -0.4418572783470154, "logps/chosen": -0.00018790410831570625, "logps/rejected": -2.8087122440338135, "loss": 0.8674, "nll_loss": 0.21685031056404114, "rewards/accuracies": 1.0, "rewards/chosen": -1.8790411559166387e-05, "rewards/margins": 0.28085243701934814, "rewards/rejected": -0.2808712124824524, "step": 6468 }, { "epoch": 4.4737206085753805, "grad_norm": 51.31665802001953, "learning_rate": 3.070155217458122e-05, "log_odds_chosen": 8.52737808227539, "log_odds_ratio": -0.4789644181728363, "logits/chosen": -0.2602759897708893, "logits/rejected": -0.309836745262146, "logps/chosen": -0.20493757724761963, "logps/rejected": -2.1075263023376465, "loss": 1.8546, "nll_loss": 0.41575437784194946, "rewards/accuracies": 0.875, "rewards/chosen": -0.020493758842349052, "rewards/margins": 0.19025887548923492, "rewards/rejected": -0.21075263619422913, "step": 6469 }, { "epoch": 4.474412171507607, "grad_norm": 18.089824676513672, "learning_rate": 3.0697710158291074e-05, "log_odds_chosen": 9.630249977111816, "log_odds_ratio": -0.0002327169495401904, "logits/chosen": -0.34218454360961914, "logits/rejected": -0.378109335899353, "logps/chosen": -0.000677041825838387, "logps/rejected": -2.0278592109680176, "loss": 1.1102, "nll_loss": 0.27751752734184265, "rewards/accuracies": 1.0, "rewards/chosen": -6.770417530788109e-05, "rewards/margins": 0.20271822810173035, "rewards/rejected": -0.2027859091758728, "step": 6470 }, { "epoch": 4.475103734439834, "grad_norm": 11.263705253601074, "learning_rate": 3.069386814200093e-05, "log_odds_chosen": 10.034893035888672, "log_odds_ratio": -0.00014404130342882127, "logits/chosen": -0.6196680665016174, "logits/rejected": -0.6530418395996094, "logps/chosen": -0.0002830620505847037, "logps/rejected": -1.627251386642456, "loss": 1.2924, "nll_loss": 0.323082834482193, "rewards/accuracies": 1.0, "rewards/chosen": -2.8306207241257653e-05, "rewards/margins": 0.16269683837890625, "rewards/rejected": -0.16272515058517456, "step": 6471 }, { "epoch": 4.475795297372061, "grad_norm": 10.325894355773926, "learning_rate": 3.069002612571077e-05, "log_odds_chosen": 9.838895797729492, "log_odds_ratio": -0.0002516931272111833, "logits/chosen": -0.5684917569160461, "logits/rejected": -0.574885904788971, "logps/chosen": -0.001969832694157958, "logps/rejected": -2.108513116836548, "loss": 0.7378, "nll_loss": 0.18441295623779297, "rewards/accuracies": 1.0, "rewards/chosen": -0.00019698326650541276, "rewards/margins": 0.2106543481349945, "rewards/rejected": -0.21085131168365479, "step": 6472 }, { "epoch": 4.476486860304288, "grad_norm": 8.056211471557617, "learning_rate": 3.0686184109420625e-05, "log_odds_chosen": 10.88155460357666, "log_odds_ratio": -3.325043508084491e-05, "logits/chosen": -0.27730709314346313, "logits/rejected": -0.4008949398994446, "logps/chosen": -0.00010624650894897059, "logps/rejected": -1.9483246803283691, "loss": 1.4361, "nll_loss": 0.359016478061676, "rewards/accuracies": 1.0, "rewards/chosen": -1.0624650712998118e-05, "rewards/margins": 0.19482184946537018, "rewards/rejected": -0.1948324739933014, "step": 6473 }, { "epoch": 4.477178423236515, "grad_norm": 17.21849822998047, "learning_rate": 3.068234209313048e-05, "log_odds_chosen": 10.295563697814941, "log_odds_ratio": -0.001130000571720302, "logits/chosen": -0.6260051727294922, "logits/rejected": -0.6374498605728149, "logps/chosen": -0.0025418612640351057, "logps/rejected": -1.9003947973251343, "loss": 0.8236, "nll_loss": 0.20578572154045105, "rewards/accuracies": 1.0, "rewards/chosen": -0.00025418613222427666, "rewards/margins": 0.18978530168533325, "rewards/rejected": -0.1900394707918167, "step": 6474 }, { "epoch": 4.477869986168741, "grad_norm": 8.649698257446289, "learning_rate": 3.067850007684033e-05, "log_odds_chosen": 9.734735488891602, "log_odds_ratio": -0.0014319606125354767, "logits/chosen": -0.45698633790016174, "logits/rejected": -0.5206592082977295, "logps/chosen": -0.0013818284496665, "logps/rejected": -1.6835741996765137, "loss": 1.5192, "nll_loss": 0.379658579826355, "rewards/accuracies": 1.0, "rewards/chosen": -0.00013818284787703305, "rewards/margins": 0.16821923851966858, "rewards/rejected": -0.16835743188858032, "step": 6475 }, { "epoch": 4.478561549100968, "grad_norm": 22.343177795410156, "learning_rate": 3.0674658060550175e-05, "log_odds_chosen": 9.140680313110352, "log_odds_ratio": -0.00031606812262907624, "logits/chosen": -0.6005803942680359, "logits/rejected": -0.5960612893104553, "logps/chosen": -0.0007477106410078704, "logps/rejected": -1.4693350791931152, "loss": 0.8878, "nll_loss": 0.2219061553478241, "rewards/accuracies": 1.0, "rewards/chosen": -7.477107283193618e-05, "rewards/margins": 0.14685875177383423, "rewards/rejected": -0.14693352580070496, "step": 6476 }, { "epoch": 4.479253112033195, "grad_norm": 6.908090114593506, "learning_rate": 3.0670816044260035e-05, "log_odds_chosen": 10.161052703857422, "log_odds_ratio": -5.6024065997917205e-05, "logits/chosen": -0.5555582642555237, "logits/rejected": -0.4909493029117584, "logps/chosen": -0.00017970267799682915, "logps/rejected": -1.46084463596344, "loss": 1.7021, "nll_loss": 0.4255087375640869, "rewards/accuracies": 1.0, "rewards/chosen": -1.7970269254874438e-05, "rewards/margins": 0.14606650173664093, "rewards/rejected": -0.1460844725370407, "step": 6477 }, { "epoch": 4.479944674965422, "grad_norm": 9.877211570739746, "learning_rate": 3.066697402796988e-05, "log_odds_chosen": 9.934467315673828, "log_odds_ratio": -0.00014345439558383077, "logits/chosen": -0.6109917759895325, "logits/rejected": -0.7236210703849792, "logps/chosen": -0.0007814106647856534, "logps/rejected": -2.0042483806610107, "loss": 1.0619, "nll_loss": 0.2654609978199005, "rewards/accuracies": 1.0, "rewards/chosen": -7.814106356818229e-05, "rewards/margins": 0.2003467082977295, "rewards/rejected": -0.20042484998703003, "step": 6478 }, { "epoch": 4.480636237897649, "grad_norm": 10.250088691711426, "learning_rate": 3.066313201167973e-05, "log_odds_chosen": 10.149007797241211, "log_odds_ratio": -0.0005294461152516305, "logits/chosen": -0.5758021473884583, "logits/rejected": -0.6397185325622559, "logps/chosen": -0.0003705144044943154, "logps/rejected": -1.7317607402801514, "loss": 1.1609, "nll_loss": 0.29016971588134766, "rewards/accuracies": 1.0, "rewards/chosen": -3.7051438994240016e-05, "rewards/margins": 0.1731390357017517, "rewards/rejected": -0.17317607998847961, "step": 6479 }, { "epoch": 4.481327800829876, "grad_norm": 6.251984119415283, "learning_rate": 3.0659289995389585e-05, "log_odds_chosen": 9.889983177185059, "log_odds_ratio": -0.00018252171867061406, "logits/chosen": -0.23073536157608032, "logits/rejected": -0.31376710534095764, "logps/chosen": -0.00015304457338061184, "logps/rejected": -1.3972463607788086, "loss": 1.0859, "nll_loss": 0.27145934104919434, "rewards/accuracies": 1.0, "rewards/chosen": -1.5304456610465422e-05, "rewards/margins": 0.13970933854579926, "rewards/rejected": -0.13972464203834534, "step": 6480 }, { "epoch": 4.482019363762102, "grad_norm": 6.738919734954834, "learning_rate": 3.065544797909943e-05, "log_odds_chosen": 9.208086967468262, "log_odds_ratio": -0.0006657785852439702, "logits/chosen": -0.5761962532997131, "logits/rejected": -0.4735212028026581, "logps/chosen": -0.004164504818618298, "logps/rejected": -1.194311499595642, "loss": 1.0736, "nll_loss": 0.2683413624763489, "rewards/accuracies": 1.0, "rewards/chosen": -0.0004164504643995315, "rewards/margins": 0.1190146952867508, "rewards/rejected": -0.11943115293979645, "step": 6481 }, { "epoch": 4.482710926694329, "grad_norm": 11.047977447509766, "learning_rate": 3.065160596280928e-05, "log_odds_chosen": 8.649320602416992, "log_odds_ratio": -0.0034021895844489336, "logits/chosen": 0.0005720322951674461, "logits/rejected": -0.038629673421382904, "logps/chosen": -0.0032845381647348404, "logps/rejected": -1.5344747304916382, "loss": 0.9752, "nll_loss": 0.2434644252061844, "rewards/accuracies": 1.0, "rewards/chosen": -0.00032845381065271795, "rewards/margins": 0.1531190127134323, "rewards/rejected": -0.1534474790096283, "step": 6482 }, { "epoch": 4.483402489626556, "grad_norm": 14.265397071838379, "learning_rate": 3.0647763946519136e-05, "log_odds_chosen": 9.652074813842773, "log_odds_ratio": -0.00034976223832927644, "logits/chosen": -0.26749786734580994, "logits/rejected": -0.29552537202835083, "logps/chosen": -0.0014181090518832207, "logps/rejected": -2.2106587886810303, "loss": 1.0416, "nll_loss": 0.26037660241127014, "rewards/accuracies": 1.0, "rewards/chosen": -0.00014181091682985425, "rewards/margins": 0.22092406451702118, "rewards/rejected": -0.22106587886810303, "step": 6483 }, { "epoch": 4.484094052558783, "grad_norm": 12.375883102416992, "learning_rate": 3.064392193022899e-05, "log_odds_chosen": 8.698393821716309, "log_odds_ratio": -0.030465498566627502, "logits/chosen": 0.23788906633853912, "logits/rejected": 0.11636831611394882, "logps/chosen": -0.010362344793975353, "logps/rejected": -2.2280845642089844, "loss": 1.5567, "nll_loss": 0.3861318528652191, "rewards/accuracies": 1.0, "rewards/chosen": -0.0010362345492467284, "rewards/margins": 0.2217722237110138, "rewards/rejected": -0.22280846536159515, "step": 6484 }, { "epoch": 4.48478561549101, "grad_norm": 13.210124015808105, "learning_rate": 3.0640079913938834e-05, "log_odds_chosen": 10.175982475280762, "log_odds_ratio": -0.00024061251315288246, "logits/chosen": -0.5196781754493713, "logits/rejected": -0.5784398913383484, "logps/chosen": -0.0012963440967723727, "logps/rejected": -2.0012803077697754, "loss": 0.727, "nll_loss": 0.18171842396259308, "rewards/accuracies": 1.0, "rewards/chosen": -0.00012963441258762032, "rewards/margins": 0.1999983787536621, "rewards/rejected": -0.2001280039548874, "step": 6485 }, { "epoch": 4.485477178423237, "grad_norm": 5.110747814178467, "learning_rate": 3.063623789764869e-05, "log_odds_chosen": 10.602209091186523, "log_odds_ratio": -5.619807416223921e-05, "logits/chosen": -0.5373456478118896, "logits/rejected": -0.6098401546478271, "logps/chosen": -0.00016629225865472108, "logps/rejected": -1.6668782234191895, "loss": 1.095, "nll_loss": 0.27373725175857544, "rewards/accuracies": 1.0, "rewards/chosen": -1.662922659306787e-05, "rewards/margins": 0.16667118668556213, "rewards/rejected": -0.16668781638145447, "step": 6486 }, { "epoch": 4.486168741355463, "grad_norm": 12.7305326461792, "learning_rate": 3.063239588135854e-05, "log_odds_chosen": 10.567861557006836, "log_odds_ratio": -0.00015114758571144193, "logits/chosen": -0.9034011363983154, "logits/rejected": -0.9532999992370605, "logps/chosen": -0.0006381792481988668, "logps/rejected": -2.1844778060913086, "loss": 0.9608, "nll_loss": 0.24017715454101562, "rewards/accuracies": 1.0, "rewards/chosen": -6.381792627507821e-05, "rewards/margins": 0.21838395297527313, "rewards/rejected": -0.21844777464866638, "step": 6487 }, { "epoch": 4.48686030428769, "grad_norm": 7.28488826751709, "learning_rate": 3.062855386506839e-05, "log_odds_chosen": 10.389266967773438, "log_odds_ratio": -0.00010775520786410198, "logits/chosen": -0.3085196316242218, "logits/rejected": -0.30753573775291443, "logps/chosen": -0.0011101680574938655, "logps/rejected": -2.559150218963623, "loss": 0.8785, "nll_loss": 0.21961821615695953, "rewards/accuracies": 1.0, "rewards/chosen": -0.0001110168086597696, "rewards/margins": 0.25580400228500366, "rewards/rejected": -0.2559150457382202, "step": 6488 }, { "epoch": 4.487551867219917, "grad_norm": 10.296831130981445, "learning_rate": 3.0624711848778244e-05, "log_odds_chosen": 10.753174781799316, "log_odds_ratio": -2.9885119147365913e-05, "logits/chosen": -0.8495357036590576, "logits/rejected": -0.8660904169082642, "logps/chosen": -0.0001955320913111791, "logps/rejected": -1.82537043094635, "loss": 0.6896, "nll_loss": 0.17240005731582642, "rewards/accuracies": 1.0, "rewards/chosen": -1.955320840352215e-05, "rewards/margins": 0.18251748383045197, "rewards/rejected": -0.1825370341539383, "step": 6489 }, { "epoch": 4.488243430152144, "grad_norm": 7.585280418395996, "learning_rate": 3.062086983248809e-05, "log_odds_chosen": 11.144027709960938, "log_odds_ratio": -2.5699517209432088e-05, "logits/chosen": -0.4447624683380127, "logits/rejected": -0.526018500328064, "logps/chosen": -0.00010345203190809116, "logps/rejected": -1.9488626718521118, "loss": 0.7421, "nll_loss": 0.18552720546722412, "rewards/accuracies": 1.0, "rewards/chosen": -1.0345203008910175e-05, "rewards/margins": 0.19487592577934265, "rewards/rejected": -0.19488626718521118, "step": 6490 }, { "epoch": 4.488934993084371, "grad_norm": 7.347438812255859, "learning_rate": 3.061702781619794e-05, "log_odds_chosen": 10.01000690460205, "log_odds_ratio": -9.229998249793425e-05, "logits/chosen": -0.25663602352142334, "logits/rejected": -0.3326743245124817, "logps/chosen": -0.0004510592552833259, "logps/rejected": -1.9487425088882446, "loss": 0.8923, "nll_loss": 0.22306877374649048, "rewards/accuracies": 1.0, "rewards/chosen": -4.510592407314107e-05, "rewards/margins": 0.19482913613319397, "rewards/rejected": -0.19487425684928894, "step": 6491 }, { "epoch": 4.4896265560165975, "grad_norm": 10.743110656738281, "learning_rate": 3.0613185799907794e-05, "log_odds_chosen": 9.014936447143555, "log_odds_ratio": -0.0004175748908892274, "logits/chosen": -0.616611659526825, "logits/rejected": -0.6440533995628357, "logps/chosen": -0.0017504625720903277, "logps/rejected": -1.6501240730285645, "loss": 1.3402, "nll_loss": 0.33499693870544434, "rewards/accuracies": 1.0, "rewards/chosen": -0.00017504626885056496, "rewards/margins": 0.16483736038208008, "rewards/rejected": -0.1650124043226242, "step": 6492 }, { "epoch": 4.490318118948824, "grad_norm": 5.233790397644043, "learning_rate": 3.0609343783617647e-05, "log_odds_chosen": 10.454694747924805, "log_odds_ratio": -9.337958181276917e-05, "logits/chosen": -0.5710014700889587, "logits/rejected": -0.5607348084449768, "logps/chosen": -0.00013453798601403832, "logps/rejected": -1.7762622833251953, "loss": 0.6604, "nll_loss": 0.16509346663951874, "rewards/accuracies": 1.0, "rewards/chosen": -1.3453798601403832e-05, "rewards/margins": 0.1776127815246582, "rewards/rejected": -0.17762622237205505, "step": 6493 }, { "epoch": 4.491009681881051, "grad_norm": 10.930121421813965, "learning_rate": 3.060550176732749e-05, "log_odds_chosen": 10.477840423583984, "log_odds_ratio": -5.168091593077406e-05, "logits/chosen": -0.3589015007019043, "logits/rejected": -0.4596545100212097, "logps/chosen": -0.00032352475682273507, "logps/rejected": -2.120523452758789, "loss": 0.9069, "nll_loss": 0.22672848403453827, "rewards/accuracies": 1.0, "rewards/chosen": -3.235247277189046e-05, "rewards/margins": 0.21202000975608826, "rewards/rejected": -0.2120523750782013, "step": 6494 }, { "epoch": 4.491701244813278, "grad_norm": 13.32094669342041, "learning_rate": 3.060165975103735e-05, "log_odds_chosen": 9.130834579467773, "log_odds_ratio": -0.0003811029309872538, "logits/chosen": -0.5448084473609924, "logits/rejected": -0.6247289776802063, "logps/chosen": -0.0015338326338678598, "logps/rejected": -1.9310564994812012, "loss": 1.1071, "nll_loss": 0.27673590183258057, "rewards/accuracies": 1.0, "rewards/chosen": -0.00015338326920755208, "rewards/margins": 0.19295227527618408, "rewards/rejected": -0.19310565292835236, "step": 6495 }, { "epoch": 4.492392807745505, "grad_norm": 6.493412971496582, "learning_rate": 3.05978177347472e-05, "log_odds_chosen": 10.520059585571289, "log_odds_ratio": -3.027506136277225e-05, "logits/chosen": -0.3154093623161316, "logits/rejected": -0.3896132707595825, "logps/chosen": -0.00016197854711208493, "logps/rejected": -1.7350319623947144, "loss": 1.006, "nll_loss": 0.2514877915382385, "rewards/accuracies": 1.0, "rewards/chosen": -1.619785325601697e-05, "rewards/margins": 0.17348699271678925, "rewards/rejected": -0.17350319027900696, "step": 6496 }, { "epoch": 4.493084370677732, "grad_norm": 9.13463020324707, "learning_rate": 3.059397571845705e-05, "log_odds_chosen": 10.163230895996094, "log_odds_ratio": -0.0003173485165461898, "logits/chosen": -0.39032599329948425, "logits/rejected": -0.41342228651046753, "logps/chosen": -0.000749665021430701, "logps/rejected": -2.1161184310913086, "loss": 1.3606, "nll_loss": 0.34011974930763245, "rewards/accuracies": 1.0, "rewards/chosen": -7.496650505345315e-05, "rewards/margins": 0.21153688430786133, "rewards/rejected": -0.21161183714866638, "step": 6497 }, { "epoch": 4.4937759336099585, "grad_norm": 8.330862998962402, "learning_rate": 3.05901337021669e-05, "log_odds_chosen": 10.870183944702148, "log_odds_ratio": -2.5071134587051347e-05, "logits/chosen": -0.5879911184310913, "logits/rejected": -0.6214119791984558, "logps/chosen": -0.00018948808428831398, "logps/rejected": -2.0447065830230713, "loss": 1.5259, "nll_loss": 0.38148459792137146, "rewards/accuracies": 1.0, "rewards/chosen": -1.8948812794405967e-05, "rewards/margins": 0.20445170998573303, "rewards/rejected": -0.2044706642627716, "step": 6498 }, { "epoch": 4.494467496542185, "grad_norm": 11.30947208404541, "learning_rate": 3.058629168587675e-05, "log_odds_chosen": 10.4102783203125, "log_odds_ratio": -0.0002449329767841846, "logits/chosen": -0.5021913647651672, "logits/rejected": -0.5918756127357483, "logps/chosen": -0.01916118897497654, "logps/rejected": -2.790419816970825, "loss": 1.3731, "nll_loss": 0.34324946999549866, "rewards/accuracies": 1.0, "rewards/chosen": -0.001916118897497654, "rewards/margins": 0.27712589502334595, "rewards/rejected": -0.27904200553894043, "step": 6499 }, { "epoch": 4.495159059474412, "grad_norm": 7.023859977722168, "learning_rate": 3.05824496695866e-05, "log_odds_chosen": 9.978353500366211, "log_odds_ratio": -7.063882367219776e-05, "logits/chosen": -0.611998975276947, "logits/rejected": -0.649333655834198, "logps/chosen": -0.0001519985671620816, "logps/rejected": -1.3650856018066406, "loss": 0.8083, "nll_loss": 0.20207872986793518, "rewards/accuracies": 1.0, "rewards/chosen": -1.5199855624814518e-05, "rewards/margins": 0.13649335503578186, "rewards/rejected": -0.13650855422019958, "step": 6500 }, { "epoch": 4.495850622406639, "grad_norm": 8.580329895019531, "learning_rate": 3.057860765329645e-05, "log_odds_chosen": 10.203696250915527, "log_odds_ratio": -8.89887596713379e-05, "logits/chosen": -0.6458160281181335, "logits/rejected": -0.6352592706680298, "logps/chosen": -0.0002670662652235478, "logps/rejected": -1.7849435806274414, "loss": 0.5684, "nll_loss": 0.14209191501140594, "rewards/accuracies": 1.0, "rewards/chosen": -2.6706629796535708e-05, "rewards/margins": 0.17846766114234924, "rewards/rejected": -0.17849434912204742, "step": 6501 }, { "epoch": 4.496542185338866, "grad_norm": 11.85151481628418, "learning_rate": 3.0574765637006305e-05, "log_odds_chosen": 9.779272079467773, "log_odds_ratio": -0.00041906890692189336, "logits/chosen": -0.676744282245636, "logits/rejected": -0.7503317594528198, "logps/chosen": -0.004989419132471085, "logps/rejected": -2.3462343215942383, "loss": 1.365, "nll_loss": 0.34119948744773865, "rewards/accuracies": 1.0, "rewards/chosen": -0.0004989419248886406, "rewards/margins": 0.23412448167800903, "rewards/rejected": -0.23462343215942383, "step": 6502 }, { "epoch": 4.497233748271093, "grad_norm": 10.800078392028809, "learning_rate": 3.057092362071615e-05, "log_odds_chosen": 9.714326858520508, "log_odds_ratio": -0.00018371779879089445, "logits/chosen": -0.20595361292362213, "logits/rejected": -0.2719736099243164, "logps/chosen": -0.00038460243376903236, "logps/rejected": -1.7641628980636597, "loss": 1.1704, "nll_loss": 0.2925931513309479, "rewards/accuracies": 1.0, "rewards/chosen": -3.846023537334986e-05, "rewards/margins": 0.17637783288955688, "rewards/rejected": -0.176416277885437, "step": 6503 }, { "epoch": 4.4979253112033195, "grad_norm": 8.698437690734863, "learning_rate": 3.056708160442601e-05, "log_odds_chosen": 9.91480541229248, "log_odds_ratio": -0.000267309311311692, "logits/chosen": -0.3980754315853119, "logits/rejected": -0.45291000604629517, "logps/chosen": -0.0006971318507567048, "logps/rejected": -1.9680414199829102, "loss": 1.8652, "nll_loss": 0.46628376841545105, "rewards/accuracies": 1.0, "rewards/chosen": -6.971318362047896e-05, "rewards/margins": 0.19673442840576172, "rewards/rejected": -0.19680413603782654, "step": 6504 }, { "epoch": 4.498616874135546, "grad_norm": 6.617920398712158, "learning_rate": 3.0563239588135856e-05, "log_odds_chosen": 9.921056747436523, "log_odds_ratio": -0.0003054399276152253, "logits/chosen": -0.4499887228012085, "logits/rejected": -0.48313918709754944, "logps/chosen": -0.0006958750309422612, "logps/rejected": -1.9423108100891113, "loss": 1.5065, "nll_loss": 0.3765985369682312, "rewards/accuracies": 1.0, "rewards/chosen": -6.958749872865155e-05, "rewards/margins": 0.19416150450706482, "rewards/rejected": -0.1942310780286789, "step": 6505 }, { "epoch": 4.499308437067773, "grad_norm": 8.874361991882324, "learning_rate": 3.055939757184571e-05, "log_odds_chosen": 9.87329387664795, "log_odds_ratio": -0.0001342103787465021, "logits/chosen": -0.4978351294994354, "logits/rejected": -0.5142601132392883, "logps/chosen": -0.0016254674410447478, "logps/rejected": -2.251915693283081, "loss": 0.7337, "nll_loss": 0.1834157556295395, "rewards/accuracies": 1.0, "rewards/chosen": -0.00016254674119409174, "rewards/margins": 0.22502902150154114, "rewards/rejected": -0.22519156336784363, "step": 6506 }, { "epoch": 4.5, "grad_norm": 13.488443374633789, "learning_rate": 3.055555555555556e-05, "log_odds_chosen": 10.155275344848633, "log_odds_ratio": -5.018026786274277e-05, "logits/chosen": -0.44602784514427185, "logits/rejected": -0.6105251908302307, "logps/chosen": -0.00018949707737192512, "logps/rejected": -1.6713563203811646, "loss": 1.015, "nll_loss": 0.25374555587768555, "rewards/accuracies": 1.0, "rewards/chosen": -1.894970773719251e-05, "rewards/margins": 0.1671166867017746, "rewards/rejected": -0.16713562607765198, "step": 6507 }, { "epoch": 4.500691562932227, "grad_norm": 10.886319160461426, "learning_rate": 3.0551713539265406e-05, "log_odds_chosen": 10.691394805908203, "log_odds_ratio": -3.336165173095651e-05, "logits/chosen": -0.57883620262146, "logits/rejected": -0.6944460272789001, "logps/chosen": -0.00029359126347117126, "logps/rejected": -1.981209397315979, "loss": 1.2622, "nll_loss": 0.3155488967895508, "rewards/accuracies": 1.0, "rewards/chosen": -2.9359125619521365e-05, "rewards/margins": 0.19809159636497498, "rewards/rejected": -0.19812093675136566, "step": 6508 }, { "epoch": 4.501383125864454, "grad_norm": 7.991898059844971, "learning_rate": 3.054787152297526e-05, "log_odds_chosen": 10.266471862792969, "log_odds_ratio": -0.002679745201021433, "logits/chosen": -0.4057050943374634, "logits/rejected": -0.4801580309867859, "logps/chosen": -0.001888161525130272, "logps/rejected": -2.244232654571533, "loss": 0.9319, "nll_loss": 0.23269890248775482, "rewards/accuracies": 1.0, "rewards/chosen": -0.00018881616415455937, "rewards/margins": 0.2242344468832016, "rewards/rejected": -0.22442325949668884, "step": 6509 }, { "epoch": 4.5020746887966805, "grad_norm": 11.097415924072266, "learning_rate": 3.054402950668511e-05, "log_odds_chosen": 8.94814682006836, "log_odds_ratio": -0.00044577824883162975, "logits/chosen": -0.31504395604133606, "logits/rejected": -0.42398470640182495, "logps/chosen": -0.0010490479180589318, "logps/rejected": -1.37216317653656, "loss": 1.145, "nll_loss": 0.28619518876075745, "rewards/accuracies": 1.0, "rewards/chosen": -0.00010490478598512709, "rewards/margins": 0.13711142539978027, "rewards/rejected": -0.13721632957458496, "step": 6510 }, { "epoch": 4.502766251728907, "grad_norm": 10.749506950378418, "learning_rate": 3.0540187490394963e-05, "log_odds_chosen": 10.62763786315918, "log_odds_ratio": -0.0008419828373007476, "logits/chosen": -0.1942656934261322, "logits/rejected": -0.28289175033569336, "logps/chosen": -0.0006234937463887036, "logps/rejected": -2.479548454284668, "loss": 1.0909, "nll_loss": 0.27264609932899475, "rewards/accuracies": 1.0, "rewards/chosen": -6.234937609406188e-05, "rewards/margins": 0.24789251387119293, "rewards/rejected": -0.24795487523078918, "step": 6511 }, { "epoch": 4.503457814661134, "grad_norm": 5.3080244064331055, "learning_rate": 3.053634547410481e-05, "log_odds_chosen": 9.412324905395508, "log_odds_ratio": -0.00030437344685196877, "logits/chosen": -0.7221852540969849, "logits/rejected": -0.7768763303756714, "logps/chosen": -0.0006163233192637563, "logps/rejected": -1.5118619203567505, "loss": 0.9364, "nll_loss": 0.23406724631786346, "rewards/accuracies": 1.0, "rewards/chosen": -6.16323304711841e-05, "rewards/margins": 0.15112455189228058, "rewards/rejected": -0.15118618309497833, "step": 6512 }, { "epoch": 4.504149377593361, "grad_norm": 6.281959533691406, "learning_rate": 3.053250345781467e-05, "log_odds_chosen": 9.127216339111328, "log_odds_ratio": -0.0005243317573331296, "logits/chosen": -0.3163529634475708, "logits/rejected": -0.3212625980377197, "logps/chosen": -0.0006598001928068697, "logps/rejected": -1.6097946166992188, "loss": 0.9903, "nll_loss": 0.2475152164697647, "rewards/accuracies": 1.0, "rewards/chosen": -6.598001345992088e-05, "rewards/margins": 0.16091348230838776, "rewards/rejected": -0.16097944974899292, "step": 6513 }, { "epoch": 4.504840940525588, "grad_norm": 7.1390700340271, "learning_rate": 3.0528661441524514e-05, "log_odds_chosen": 10.395563125610352, "log_odds_ratio": -6.140669574961066e-05, "logits/chosen": -0.5050083994865417, "logits/rejected": -0.574657142162323, "logps/chosen": -0.013657084666192532, "logps/rejected": -2.174135684967041, "loss": 0.7363, "nll_loss": 0.1840781271457672, "rewards/accuracies": 1.0, "rewards/chosen": -0.0013657084200531244, "rewards/margins": 0.21604785323143005, "rewards/rejected": -0.21741357445716858, "step": 6514 }, { "epoch": 4.505532503457815, "grad_norm": 8.09498119354248, "learning_rate": 3.0524819425234366e-05, "log_odds_chosen": 10.00520133972168, "log_odds_ratio": -0.00016035634325817227, "logits/chosen": -0.7113980650901794, "logits/rejected": -0.7848362326622009, "logps/chosen": -0.0005661610630340874, "logps/rejected": -1.7406871318817139, "loss": 1.0059, "nll_loss": 0.25144797563552856, "rewards/accuracies": 1.0, "rewards/chosen": -5.66161070310045e-05, "rewards/margins": 0.17401209473609924, "rewards/rejected": -0.17406870424747467, "step": 6515 }, { "epoch": 4.5062240663900415, "grad_norm": 3.606623649597168, "learning_rate": 3.052097740894422e-05, "log_odds_chosen": 8.800888061523438, "log_odds_ratio": -0.0008264086209237576, "logits/chosen": -0.6696504354476929, "logits/rejected": -0.6730940341949463, "logps/chosen": -0.005246603395789862, "logps/rejected": -2.150418758392334, "loss": 1.0627, "nll_loss": 0.26559382677078247, "rewards/accuracies": 1.0, "rewards/chosen": -0.0005246603395789862, "rewards/margins": 0.2145172357559204, "rewards/rejected": -0.2150418758392334, "step": 6516 }, { "epoch": 4.506915629322268, "grad_norm": 8.892373085021973, "learning_rate": 3.0517135392654068e-05, "log_odds_chosen": 9.085613250732422, "log_odds_ratio": -0.017949793487787247, "logits/chosen": -0.39097902178764343, "logits/rejected": -0.41564783453941345, "logps/chosen": -0.006918535102158785, "logps/rejected": -2.56168270111084, "loss": 1.3884, "nll_loss": 0.3452964127063751, "rewards/accuracies": 1.0, "rewards/chosen": -0.0006918534636497498, "rewards/margins": 0.2554764151573181, "rewards/rejected": -0.25616827607154846, "step": 6517 }, { "epoch": 4.507607192254495, "grad_norm": 8.086743354797363, "learning_rate": 3.0513293376363917e-05, "log_odds_chosen": 8.997087478637695, "log_odds_ratio": -0.0004165216232649982, "logits/chosen": -0.587999701499939, "logits/rejected": -0.5053616762161255, "logps/chosen": -0.0005531564820557833, "logps/rejected": -1.6527694463729858, "loss": 1.4879, "nll_loss": 0.3719237446784973, "rewards/accuracies": 1.0, "rewards/chosen": -5.531564966076985e-05, "rewards/margins": 0.16522163152694702, "rewards/rejected": -0.16527694463729858, "step": 6518 }, { "epoch": 4.508298755186722, "grad_norm": 10.235040664672852, "learning_rate": 3.050945136007377e-05, "log_odds_chosen": 9.41419506072998, "log_odds_ratio": -0.049024879932403564, "logits/chosen": -0.44523167610168457, "logits/rejected": -0.46845299005508423, "logps/chosen": -0.010855313390493393, "logps/rejected": -2.408637523651123, "loss": 1.0393, "nll_loss": 0.2549152374267578, "rewards/accuracies": 1.0, "rewards/chosen": -0.0010855314321815968, "rewards/margins": 0.23977823555469513, "rewards/rejected": -0.24086375534534454, "step": 6519 }, { "epoch": 4.508990318118949, "grad_norm": 13.492453575134277, "learning_rate": 3.050560934378362e-05, "log_odds_chosen": 9.1912260055542, "log_odds_ratio": -0.05707675591111183, "logits/chosen": -0.22407862544059753, "logits/rejected": -0.24346131086349487, "logps/chosen": -0.011582519859075546, "logps/rejected": -1.6870681047439575, "loss": 1.1594, "nll_loss": 0.28415244817733765, "rewards/accuracies": 1.0, "rewards/chosen": -0.001158252009190619, "rewards/margins": 0.1675485521554947, "rewards/rejected": -0.16870680451393127, "step": 6520 }, { "epoch": 4.509681881051176, "grad_norm": 9.874868392944336, "learning_rate": 3.0501767327493468e-05, "log_odds_chosen": 8.210152626037598, "log_odds_ratio": -0.1597500443458557, "logits/chosen": -0.33186623454093933, "logits/rejected": -0.3340374827384949, "logps/chosen": -0.019689546898007393, "logps/rejected": -1.824156403541565, "loss": 1.8054, "nll_loss": 0.4353860020637512, "rewards/accuracies": 0.875, "rewards/chosen": -0.0019689546898007393, "rewards/margins": 0.18044669926166534, "rewards/rejected": -0.1824156492948532, "step": 6521 }, { "epoch": 4.5103734439834025, "grad_norm": 9.211250305175781, "learning_rate": 3.0497925311203323e-05, "log_odds_chosen": 9.563366889953613, "log_odds_ratio": -0.0007702955044806004, "logits/chosen": -0.7755285501480103, "logits/rejected": -0.7752860188484192, "logps/chosen": -0.00045156345004215837, "logps/rejected": -1.422647476196289, "loss": 1.1282, "nll_loss": 0.2819834351539612, "rewards/accuracies": 1.0, "rewards/chosen": -4.515634645940736e-05, "rewards/margins": 0.14221958816051483, "rewards/rejected": -0.14226475358009338, "step": 6522 }, { "epoch": 4.511065006915629, "grad_norm": 5.907479763031006, "learning_rate": 3.0494083294913172e-05, "log_odds_chosen": 10.348957061767578, "log_odds_ratio": -0.0003926011559087783, "logits/chosen": -0.40856266021728516, "logits/rejected": -0.4991058111190796, "logps/chosen": -0.0006147118401713669, "logps/rejected": -2.118276596069336, "loss": 0.9335, "nll_loss": 0.2333364188671112, "rewards/accuracies": 1.0, "rewards/chosen": -6.147118256194517e-05, "rewards/margins": 0.21176619827747345, "rewards/rejected": -0.21182768046855927, "step": 6523 }, { "epoch": 4.511756569847856, "grad_norm": 5.421164512634277, "learning_rate": 3.049024127862302e-05, "log_odds_chosen": 9.136701583862305, "log_odds_ratio": -0.00031635037157684565, "logits/chosen": -0.5312263369560242, "logits/rejected": -0.5826066136360168, "logps/chosen": -0.0030627246014773846, "logps/rejected": -1.6864638328552246, "loss": 1.5047, "nll_loss": 0.3761472702026367, "rewards/accuracies": 1.0, "rewards/chosen": -0.00030627247178927064, "rewards/margins": 0.16834010183811188, "rewards/rejected": -0.16864638030529022, "step": 6524 }, { "epoch": 4.512448132780083, "grad_norm": 6.3993706703186035, "learning_rate": 3.0486399262332877e-05, "log_odds_chosen": 9.904411315917969, "log_odds_ratio": -0.00023630354553461075, "logits/chosen": -0.5708715915679932, "logits/rejected": -0.6928766965866089, "logps/chosen": -0.0031272037886083126, "logps/rejected": -2.286712408065796, "loss": 1.4193, "nll_loss": 0.354805588722229, "rewards/accuracies": 1.0, "rewards/chosen": -0.00031272039632312953, "rewards/margins": 0.22835853695869446, "rewards/rejected": -0.22867125272750854, "step": 6525 }, { "epoch": 4.51313969571231, "grad_norm": 6.600321292877197, "learning_rate": 3.0482557246042726e-05, "log_odds_chosen": 9.932918548583984, "log_odds_ratio": -0.00024182464403565973, "logits/chosen": -0.40240278840065, "logits/rejected": -0.5140780210494995, "logps/chosen": -0.0005220412276685238, "logps/rejected": -2.092681884765625, "loss": 0.9942, "nll_loss": 0.2485257387161255, "rewards/accuracies": 1.0, "rewards/chosen": -5.220412276685238e-05, "rewards/margins": 0.20921599864959717, "rewards/rejected": -0.20926819741725922, "step": 6526 }, { "epoch": 4.513831258644537, "grad_norm": 11.148608207702637, "learning_rate": 3.0478715229752575e-05, "log_odds_chosen": 10.128450393676758, "log_odds_ratio": -5.4566891776630655e-05, "logits/chosen": -0.49932363629341125, "logits/rejected": -0.5284522771835327, "logps/chosen": -0.00018085417104884982, "logps/rejected": -1.2286375761032104, "loss": 1.2895, "nll_loss": 0.3223586082458496, "rewards/accuracies": 1.0, "rewards/chosen": -1.80854167410871e-05, "rewards/margins": 0.12284567952156067, "rewards/rejected": -0.1228637546300888, "step": 6527 }, { "epoch": 4.514522821576763, "grad_norm": 8.504036903381348, "learning_rate": 3.0474873213462428e-05, "log_odds_chosen": 7.822513103485107, "log_odds_ratio": -0.05407997593283653, "logits/chosen": -0.38704991340637207, "logits/rejected": -0.42549827694892883, "logps/chosen": -0.012933324091136456, "logps/rejected": -1.0580989122390747, "loss": 0.8781, "nll_loss": 0.2141069769859314, "rewards/accuracies": 1.0, "rewards/chosen": -0.00129333243239671, "rewards/margins": 0.10451656579971313, "rewards/rejected": -0.10580989718437195, "step": 6528 }, { "epoch": 4.51521438450899, "grad_norm": 5.502275466918945, "learning_rate": 3.0471031197172277e-05, "log_odds_chosen": 8.165353775024414, "log_odds_ratio": -0.015324725769460201, "logits/chosen": -0.40444430708885193, "logits/rejected": -0.43285876512527466, "logps/chosen": -0.004935602191835642, "logps/rejected": -1.6389837265014648, "loss": 0.7102, "nll_loss": 0.17601147294044495, "rewards/accuracies": 1.0, "rewards/chosen": -0.0004935602191835642, "rewards/margins": 0.16340482234954834, "rewards/rejected": -0.16389837861061096, "step": 6529 }, { "epoch": 4.515905947441217, "grad_norm": 10.776494026184082, "learning_rate": 3.0467189180882126e-05, "log_odds_chosen": 9.77366828918457, "log_odds_ratio": -0.0003069191880058497, "logits/chosen": -0.17800062894821167, "logits/rejected": -0.2688555121421814, "logps/chosen": -0.0002740153868217021, "logps/rejected": -1.6104249954223633, "loss": 1.5437, "nll_loss": 0.3858945965766907, "rewards/accuracies": 1.0, "rewards/chosen": -2.740153831837233e-05, "rewards/margins": 0.1610150933265686, "rewards/rejected": -0.16104251146316528, "step": 6530 }, { "epoch": 4.516597510373444, "grad_norm": 9.113598823547363, "learning_rate": 3.0463347164591975e-05, "log_odds_chosen": 9.05614948272705, "log_odds_ratio": -0.0004940159851685166, "logits/chosen": -0.371293842792511, "logits/rejected": -0.45829761028289795, "logps/chosen": -0.003316486719995737, "logps/rejected": -1.857425332069397, "loss": 0.8088, "nll_loss": 0.20213846862316132, "rewards/accuracies": 1.0, "rewards/chosen": -0.000331648625433445, "rewards/margins": 0.18541088700294495, "rewards/rejected": -0.1857425421476364, "step": 6531 }, { "epoch": 4.517289073305671, "grad_norm": 9.714481353759766, "learning_rate": 3.045950514830183e-05, "log_odds_chosen": 10.28732681274414, "log_odds_ratio": -0.0032634905073791742, "logits/chosen": -0.5074493885040283, "logits/rejected": -0.6017922759056091, "logps/chosen": -0.014635481871664524, "logps/rejected": -2.667628526687622, "loss": 1.7365, "nll_loss": 0.433795690536499, "rewards/accuracies": 1.0, "rewards/chosen": -0.0014635482802987099, "rewards/margins": 0.26529932022094727, "rewards/rejected": -0.2667628526687622, "step": 6532 }, { "epoch": 4.517980636237898, "grad_norm": 8.554143905639648, "learning_rate": 3.045566313201168e-05, "log_odds_chosen": 10.311903953552246, "log_odds_ratio": -9.492110257269815e-05, "logits/chosen": -0.5861060619354248, "logits/rejected": -0.6993334889411926, "logps/chosen": -0.000430218962719664, "logps/rejected": -2.0976133346557617, "loss": 0.9784, "nll_loss": 0.24457991123199463, "rewards/accuracies": 1.0, "rewards/chosen": -4.3021900637540966e-05, "rewards/margins": 0.20971831679344177, "rewards/rejected": -0.2097613364458084, "step": 6533 }, { "epoch": 4.518672199170124, "grad_norm": 7.542330265045166, "learning_rate": 3.045182111572153e-05, "log_odds_chosen": 10.505435943603516, "log_odds_ratio": -5.5059033911675215e-05, "logits/chosen": -0.6798256635665894, "logits/rejected": -0.8034828305244446, "logps/chosen": -0.0003200229548383504, "logps/rejected": -1.9811010360717773, "loss": 0.6408, "nll_loss": 0.1601899266242981, "rewards/accuracies": 1.0, "rewards/chosen": -3.200229548383504e-05, "rewards/margins": 0.19807809591293335, "rewards/rejected": -0.19811010360717773, "step": 6534 }, { "epoch": 4.519363762102351, "grad_norm": 14.879232406616211, "learning_rate": 3.0447979099431385e-05, "log_odds_chosen": 8.73969841003418, "log_odds_ratio": -0.00043307646410539746, "logits/chosen": -0.3079538941383362, "logits/rejected": -0.3495687246322632, "logps/chosen": -0.001202198676764965, "logps/rejected": -1.1880905628204346, "loss": 1.2734, "nll_loss": 0.3183126151561737, "rewards/accuracies": 1.0, "rewards/chosen": -0.00012021987640764564, "rewards/margins": 0.11868884414434433, "rewards/rejected": -0.1188090592622757, "step": 6535 }, { "epoch": 4.520055325034578, "grad_norm": 11.615407943725586, "learning_rate": 3.0444137083141234e-05, "log_odds_chosen": 10.744592666625977, "log_odds_ratio": -4.009983967989683e-05, "logits/chosen": -0.5666415095329285, "logits/rejected": -0.5954309701919556, "logps/chosen": -0.00016624416457489133, "logps/rejected": -1.9238814115524292, "loss": 0.9044, "nll_loss": 0.22610604763031006, "rewards/accuracies": 1.0, "rewards/chosen": -1.6624417185084894e-05, "rewards/margins": 0.19237151741981506, "rewards/rejected": -0.1923881471157074, "step": 6536 }, { "epoch": 4.520746887966805, "grad_norm": 8.567818641662598, "learning_rate": 3.0440295066851083e-05, "log_odds_chosen": 10.016960144042969, "log_odds_ratio": -0.000617867975961417, "logits/chosen": -0.4099767208099365, "logits/rejected": -0.5116313099861145, "logps/chosen": -0.0036224813666194677, "logps/rejected": -2.081411838531494, "loss": 1.0917, "nll_loss": 0.27285081148147583, "rewards/accuracies": 1.0, "rewards/chosen": -0.00036224815994501114, "rewards/margins": 0.2077789306640625, "rewards/rejected": -0.20814116299152374, "step": 6537 }, { "epoch": 4.521438450899032, "grad_norm": 11.559277534484863, "learning_rate": 3.0436453050560935e-05, "log_odds_chosen": 10.627859115600586, "log_odds_ratio": -3.2798125175759196e-05, "logits/chosen": -0.46019408106803894, "logits/rejected": -0.4336419403553009, "logps/chosen": -0.0002833693288266659, "logps/rejected": -1.8214410543441772, "loss": 1.3351, "nll_loss": 0.3337723910808563, "rewards/accuracies": 1.0, "rewards/chosen": -2.833693361026235e-05, "rewards/margins": 0.18211576342582703, "rewards/rejected": -0.18214410543441772, "step": 6538 }, { "epoch": 4.522130013831259, "grad_norm": 11.562444686889648, "learning_rate": 3.0432611034270784e-05, "log_odds_chosen": 10.475576400756836, "log_odds_ratio": -0.00014733010903000832, "logits/chosen": -0.6835733652114868, "logits/rejected": -0.7739354372024536, "logps/chosen": -0.0013565432745963335, "logps/rejected": -2.290761947631836, "loss": 1.5765, "nll_loss": 0.3941211700439453, "rewards/accuracies": 1.0, "rewards/chosen": -0.0001356543361907825, "rewards/margins": 0.22894051671028137, "rewards/rejected": -0.22907620668411255, "step": 6539 }, { "epoch": 4.522821576763485, "grad_norm": 7.830057621002197, "learning_rate": 3.0428769017980633e-05, "log_odds_chosen": 8.738075256347656, "log_odds_ratio": -0.016228843480348587, "logits/chosen": -0.6461495161056519, "logits/rejected": -0.6525962352752686, "logps/chosen": -0.024869779124855995, "logps/rejected": -2.2403388023376465, "loss": 1.2305, "nll_loss": 0.30600258708000183, "rewards/accuracies": 1.0, "rewards/chosen": -0.0024869779590517282, "rewards/margins": 0.22154691815376282, "rewards/rejected": -0.2240338772535324, "step": 6540 }, { "epoch": 4.523513139695712, "grad_norm": 5.756031513214111, "learning_rate": 3.042492700169049e-05, "log_odds_chosen": 9.648249626159668, "log_odds_ratio": -0.0002620015002321452, "logits/chosen": -0.3646390438079834, "logits/rejected": -0.49688512086868286, "logps/chosen": -0.0002835288178175688, "logps/rejected": -1.7063332796096802, "loss": 1.483, "nll_loss": 0.37071168422698975, "rewards/accuracies": 1.0, "rewards/chosen": -2.835288250935264e-05, "rewards/margins": 0.17060495913028717, "rewards/rejected": -0.17063331604003906, "step": 6541 }, { "epoch": 4.524204702627939, "grad_norm": 5.895205497741699, "learning_rate": 3.042108498540034e-05, "log_odds_chosen": 9.715417861938477, "log_odds_ratio": -0.00039025815203785896, "logits/chosen": -0.6709186434745789, "logits/rejected": -0.7191147208213806, "logps/chosen": -0.00036088767228648067, "logps/rejected": -1.4903755187988281, "loss": 0.7793, "nll_loss": 0.19479727745056152, "rewards/accuracies": 1.0, "rewards/chosen": -3.608876795624383e-05, "rewards/margins": 0.14900147914886475, "rewards/rejected": -0.14903756976127625, "step": 6542 }, { "epoch": 4.524896265560166, "grad_norm": 14.820718765258789, "learning_rate": 3.0417242969110187e-05, "log_odds_chosen": 10.488899230957031, "log_odds_ratio": -5.811005030409433e-05, "logits/chosen": -0.25905176997184753, "logits/rejected": -0.3118452727794647, "logps/chosen": -0.00025490691768936813, "logps/rejected": -2.0734028816223145, "loss": 1.54, "nll_loss": 0.3849845230579376, "rewards/accuracies": 1.0, "rewards/chosen": -2.5490691768936813e-05, "rewards/margins": 0.20731480419635773, "rewards/rejected": -0.2073403000831604, "step": 6543 }, { "epoch": 4.525587828492393, "grad_norm": 10.09522533416748, "learning_rate": 3.0413400952820043e-05, "log_odds_chosen": 8.911540985107422, "log_odds_ratio": -0.07205822318792343, "logits/chosen": -0.7996432781219482, "logits/rejected": -0.8874554634094238, "logps/chosen": -0.20625396072864532, "logps/rejected": -2.42454195022583, "loss": 1.3134, "nll_loss": 0.3211488425731659, "rewards/accuracies": 1.0, "rewards/chosen": -0.0206253994256258, "rewards/margins": 0.22182880342006683, "rewards/rejected": -0.2424541860818863, "step": 6544 }, { "epoch": 4.5262793914246195, "grad_norm": 10.470040321350098, "learning_rate": 3.0409558936529892e-05, "log_odds_chosen": 9.751822471618652, "log_odds_ratio": -0.00018719106446951628, "logits/chosen": -0.43893668055534363, "logits/rejected": -0.5430324077606201, "logps/chosen": -0.0002558599226176739, "logps/rejected": -1.4737515449523926, "loss": 1.1636, "nll_loss": 0.29087692499160767, "rewards/accuracies": 1.0, "rewards/chosen": -2.5585992261767387e-05, "rewards/margins": 0.14734956622123718, "rewards/rejected": -0.1473751664161682, "step": 6545 }, { "epoch": 4.526970954356846, "grad_norm": 12.051141738891602, "learning_rate": 3.040571692023974e-05, "log_odds_chosen": 8.957733154296875, "log_odds_ratio": -0.035229574888944626, "logits/chosen": -0.2745705544948578, "logits/rejected": -0.42236384749412537, "logps/chosen": -0.00745656481012702, "logps/rejected": -1.6540604829788208, "loss": 0.9206, "nll_loss": 0.22662924230098724, "rewards/accuracies": 1.0, "rewards/chosen": -0.0007456564926542342, "rewards/margins": 0.16466039419174194, "rewards/rejected": -0.16540606319904327, "step": 6546 }, { "epoch": 4.527662517289073, "grad_norm": 8.523053169250488, "learning_rate": 3.0401874903949594e-05, "log_odds_chosen": 9.304712295532227, "log_odds_ratio": -0.07581852376461029, "logits/chosen": -0.41242682933807373, "logits/rejected": -0.497077614068985, "logps/chosen": -0.013863787986338139, "logps/rejected": -2.0134148597717285, "loss": 0.9989, "nll_loss": 0.24213922023773193, "rewards/accuracies": 1.0, "rewards/chosen": -0.0013863787753507495, "rewards/margins": 0.19995509088039398, "rewards/rejected": -0.20134148001670837, "step": 6547 }, { "epoch": 4.5283540802213, "grad_norm": 6.447927474975586, "learning_rate": 3.0398032887659443e-05, "log_odds_chosen": 8.518516540527344, "log_odds_ratio": -0.004801702219992876, "logits/chosen": -0.37904903292655945, "logits/rejected": -0.4070984125137329, "logps/chosen": -0.0038324713241308928, "logps/rejected": -1.227960228919983, "loss": 1.0774, "nll_loss": 0.2688811123371124, "rewards/accuracies": 1.0, "rewards/chosen": -0.00038324715569615364, "rewards/margins": 0.1224127858877182, "rewards/rejected": -0.12279602885246277, "step": 6548 }, { "epoch": 4.529045643153527, "grad_norm": 6.222280025482178, "learning_rate": 3.0394190871369292e-05, "log_odds_chosen": 11.532266616821289, "log_odds_ratio": -2.348639100091532e-05, "logits/chosen": -0.692290186882019, "logits/rejected": -0.7823889255523682, "logps/chosen": -0.00016611188766546547, "logps/rejected": -2.6906960010528564, "loss": 0.8388, "nll_loss": 0.20968790352344513, "rewards/accuracies": 1.0, "rewards/chosen": -1.661118949414231e-05, "rewards/margins": 0.26905298233032227, "rewards/rejected": -0.2690696120262146, "step": 6549 }, { "epoch": 4.529737206085754, "grad_norm": 5.384705066680908, "learning_rate": 3.0390348855079148e-05, "log_odds_chosen": 9.603094100952148, "log_odds_ratio": -0.00023756190785206854, "logits/chosen": -0.7226128578186035, "logits/rejected": -0.6962193846702576, "logps/chosen": -0.0005082216230221093, "logps/rejected": -1.7970054149627686, "loss": 1.6197, "nll_loss": 0.40490108728408813, "rewards/accuracies": 1.0, "rewards/chosen": -5.082216011942364e-05, "rewards/margins": 0.1796497106552124, "rewards/rejected": -0.1797005534172058, "step": 6550 }, { "epoch": 4.5304287690179805, "grad_norm": 12.002609252929688, "learning_rate": 3.0386506838788997e-05, "log_odds_chosen": 10.036380767822266, "log_odds_ratio": -0.0004266396281309426, "logits/chosen": -0.14297831058502197, "logits/rejected": -0.21859431266784668, "logps/chosen": -0.002509652404114604, "logps/rejected": -2.0817997455596924, "loss": 1.5089, "nll_loss": 0.37718573212623596, "rewards/accuracies": 1.0, "rewards/chosen": -0.00025096521130762994, "rewards/margins": 0.20792901515960693, "rewards/rejected": -0.20817998051643372, "step": 6551 }, { "epoch": 4.531120331950207, "grad_norm": 6.717325210571289, "learning_rate": 3.0382664822498846e-05, "log_odds_chosen": 11.133275985717773, "log_odds_ratio": -2.4246892280643806e-05, "logits/chosen": -0.5152009129524231, "logits/rejected": -0.5383793711662292, "logps/chosen": -0.0005616003763861954, "logps/rejected": -2.65570330619812, "loss": 0.9919, "nll_loss": 0.24797604978084564, "rewards/accuracies": 1.0, "rewards/chosen": -5.616003181785345e-05, "rewards/margins": 0.26551416516304016, "rewards/rejected": -0.2655703127384186, "step": 6552 }, { "epoch": 4.531811894882434, "grad_norm": 8.466388702392578, "learning_rate": 3.03788228062087e-05, "log_odds_chosen": 9.45691204071045, "log_odds_ratio": -0.0012083580950275064, "logits/chosen": -0.7959653735160828, "logits/rejected": -0.8534533977508545, "logps/chosen": -0.0015747640281915665, "logps/rejected": -1.6520307064056396, "loss": 0.7995, "nll_loss": 0.19975440204143524, "rewards/accuracies": 1.0, "rewards/chosen": -0.00015747641737107188, "rewards/margins": 0.1650455892086029, "rewards/rejected": -0.16520309448242188, "step": 6553 }, { "epoch": 4.532503457814661, "grad_norm": 6.245697498321533, "learning_rate": 3.037498078991855e-05, "log_odds_chosen": 8.997611999511719, "log_odds_ratio": -0.003799182828515768, "logits/chosen": -0.1710015833377838, "logits/rejected": -0.26420480012893677, "logps/chosen": -0.002259556669741869, "logps/rejected": -1.2888202667236328, "loss": 1.653, "nll_loss": 0.4128655791282654, "rewards/accuracies": 1.0, "rewards/chosen": -0.00022595567861571908, "rewards/margins": 0.1286560595035553, "rewards/rejected": -0.12888203561306, "step": 6554 }, { "epoch": 4.533195020746888, "grad_norm": 9.199127197265625, "learning_rate": 3.03711387736284e-05, "log_odds_chosen": 11.216801643371582, "log_odds_ratio": -5.718818283639848e-05, "logits/chosen": -0.8356324434280396, "logits/rejected": -0.9238957166671753, "logps/chosen": -0.000302387576084584, "logps/rejected": -2.171398162841797, "loss": 1.0406, "nll_loss": 0.2601334750652313, "rewards/accuracies": 1.0, "rewards/chosen": -3.0238759791245684e-05, "rewards/margins": 0.2171095758676529, "rewards/rejected": -0.2171398103237152, "step": 6555 }, { "epoch": 4.533886583679115, "grad_norm": 6.731876373291016, "learning_rate": 3.0367296757338252e-05, "log_odds_chosen": 9.419376373291016, "log_odds_ratio": -0.0002908431924879551, "logits/chosen": -0.7840916514396667, "logits/rejected": -0.7459971308708191, "logps/chosen": -0.0008626552298665047, "logps/rejected": -1.6166490316390991, "loss": 0.8508, "nll_loss": 0.212665855884552, "rewards/accuracies": 1.0, "rewards/chosen": -8.626552880741656e-05, "rewards/margins": 0.16157862544059753, "rewards/rejected": -0.1616649031639099, "step": 6556 }, { "epoch": 4.5345781466113415, "grad_norm": 11.93840217590332, "learning_rate": 3.03634547410481e-05, "log_odds_chosen": 10.537141799926758, "log_odds_ratio": -3.9206992369145155e-05, "logits/chosen": -0.7244991064071655, "logits/rejected": -0.853850245475769, "logps/chosen": -0.0005426603020168841, "logps/rejected": -2.082249164581299, "loss": 1.3752, "nll_loss": 0.34379222989082336, "rewards/accuracies": 1.0, "rewards/chosen": -5.426602729130536e-05, "rewards/margins": 0.20817065238952637, "rewards/rejected": -0.20822492241859436, "step": 6557 }, { "epoch": 4.535269709543568, "grad_norm": 7.598918914794922, "learning_rate": 3.0359612724757954e-05, "log_odds_chosen": 9.795328140258789, "log_odds_ratio": -0.00011863884719787166, "logits/chosen": -0.6790354251861572, "logits/rejected": -0.747089147567749, "logps/chosen": -0.0004407150554470718, "logps/rejected": -1.7884000539779663, "loss": 1.4007, "nll_loss": 0.35017552971839905, "rewards/accuracies": 1.0, "rewards/chosen": -4.407150481711142e-05, "rewards/margins": 0.1787959486246109, "rewards/rejected": -0.1788400113582611, "step": 6558 }, { "epoch": 4.535961272475795, "grad_norm": 7.28376579284668, "learning_rate": 3.0355770708467806e-05, "log_odds_chosen": 10.144323348999023, "log_odds_ratio": -0.00016057609173003584, "logits/chosen": -0.8811751008033752, "logits/rejected": -0.8111578226089478, "logps/chosen": -0.00015959145093802363, "logps/rejected": -1.4356104135513306, "loss": 1.8257, "nll_loss": 0.45640096068382263, "rewards/accuracies": 1.0, "rewards/chosen": -1.5959145457600243e-05, "rewards/margins": 0.14354507625102997, "rewards/rejected": -0.14356103539466858, "step": 6559 }, { "epoch": 4.536652835408022, "grad_norm": 8.035957336425781, "learning_rate": 3.0351928692177655e-05, "log_odds_chosen": 9.968259811401367, "log_odds_ratio": -0.00022156504564918578, "logits/chosen": -0.6383379697799683, "logits/rejected": -0.6144269704818726, "logps/chosen": -0.0004586254362948239, "logps/rejected": -1.715651512145996, "loss": 0.7176, "nll_loss": 0.1793805956840515, "rewards/accuracies": 1.0, "rewards/chosen": -4.5862547267461196e-05, "rewards/margins": 0.17151929438114166, "rewards/rejected": -0.17156516015529633, "step": 6560 }, { "epoch": 4.537344398340249, "grad_norm": 7.693199634552002, "learning_rate": 3.0348086675887504e-05, "log_odds_chosen": 10.411820411682129, "log_odds_ratio": -0.00023046185378916562, "logits/chosen": -0.6372178196907043, "logits/rejected": -0.7216589450836182, "logps/chosen": -0.00032094272319227457, "logps/rejected": -1.8828961849212646, "loss": 0.9919, "nll_loss": 0.24796098470687866, "rewards/accuracies": 1.0, "rewards/chosen": -3.2094270864035934e-05, "rewards/margins": 0.18825753033161163, "rewards/rejected": -0.18828962743282318, "step": 6561 }, { "epoch": 4.538035961272476, "grad_norm": 5.328170299530029, "learning_rate": 3.034424465959736e-05, "log_odds_chosen": 9.515695571899414, "log_odds_ratio": -0.03716710954904556, "logits/chosen": -0.6656532883644104, "logits/rejected": -0.8682924509048462, "logps/chosen": -0.019652245566248894, "logps/rejected": -1.7537295818328857, "loss": 0.8466, "nll_loss": 0.20792116224765778, "rewards/accuracies": 1.0, "rewards/chosen": -0.0019652247428894043, "rewards/margins": 0.17340771853923798, "rewards/rejected": -0.17537295818328857, "step": 6562 }, { "epoch": 4.5387275242047025, "grad_norm": 20.874311447143555, "learning_rate": 3.034040264330721e-05, "log_odds_chosen": 9.981575965881348, "log_odds_ratio": -0.0005322595243342221, "logits/chosen": -0.4351813495159149, "logits/rejected": -0.5474547743797302, "logps/chosen": -0.0009682047530077398, "logps/rejected": -1.9328224658966064, "loss": 1.3389, "nll_loss": 0.3346773386001587, "rewards/accuracies": 1.0, "rewards/chosen": -9.682048403192312e-05, "rewards/margins": 0.1931854486465454, "rewards/rejected": -0.19328224658966064, "step": 6563 }, { "epoch": 4.539419087136929, "grad_norm": 14.790209770202637, "learning_rate": 3.0336560627017058e-05, "log_odds_chosen": 10.135683059692383, "log_odds_ratio": -0.00022825025371275842, "logits/chosen": -0.9268226027488708, "logits/rejected": -1.0066332817077637, "logps/chosen": -0.006441335193812847, "logps/rejected": -2.714794635772705, "loss": 1.5193, "nll_loss": 0.3798065483570099, "rewards/accuracies": 1.0, "rewards/chosen": -0.0006441335426643491, "rewards/margins": 0.27083534002304077, "rewards/rejected": -0.27147945761680603, "step": 6564 }, { "epoch": 4.540110650069156, "grad_norm": 13.425837516784668, "learning_rate": 3.0332718610726914e-05, "log_odds_chosen": 9.612133979797363, "log_odds_ratio": -0.0017814553575590253, "logits/chosen": -0.5331589579582214, "logits/rejected": -0.5802890062332153, "logps/chosen": -0.002197981346398592, "logps/rejected": -1.5549582242965698, "loss": 1.1525, "nll_loss": 0.28794315457344055, "rewards/accuracies": 1.0, "rewards/chosen": -0.00021979815210215747, "rewards/margins": 0.15527603030204773, "rewards/rejected": -0.15549582242965698, "step": 6565 }, { "epoch": 4.540802213001383, "grad_norm": 10.138154983520508, "learning_rate": 3.0328876594436763e-05, "log_odds_chosen": 10.31335735321045, "log_odds_ratio": -5.144106762600131e-05, "logits/chosen": -0.5672279596328735, "logits/rejected": -0.5084483027458191, "logps/chosen": -0.00024329419829882681, "logps/rejected": -1.719193458557129, "loss": 1.0099, "nll_loss": 0.25245851278305054, "rewards/accuracies": 1.0, "rewards/chosen": -2.432941982988268e-05, "rewards/margins": 0.17189502716064453, "rewards/rejected": -0.1719193458557129, "step": 6566 }, { "epoch": 4.54149377593361, "grad_norm": 7.8678789138793945, "learning_rate": 3.0325034578146612e-05, "log_odds_chosen": 10.099241256713867, "log_odds_ratio": -5.9387311921454966e-05, "logits/chosen": -0.49886399507522583, "logits/rejected": -0.48791369795799255, "logps/chosen": -0.0002885722788050771, "logps/rejected": -1.4851117134094238, "loss": 0.9878, "nll_loss": 0.24695292115211487, "rewards/accuracies": 1.0, "rewards/chosen": -2.8857228244305588e-05, "rewards/margins": 0.1484823226928711, "rewards/rejected": -0.14851117134094238, "step": 6567 }, { "epoch": 4.542185338865837, "grad_norm": 20.60369873046875, "learning_rate": 3.0321192561856465e-05, "log_odds_chosen": 9.15850830078125, "log_odds_ratio": -0.0005096118547953665, "logits/chosen": -0.37946510314941406, "logits/rejected": -0.4805373251438141, "logps/chosen": -0.0014331219717860222, "logps/rejected": -1.675501823425293, "loss": 1.1373, "nll_loss": 0.28428125381469727, "rewards/accuracies": 1.0, "rewards/chosen": -0.0001433122088201344, "rewards/margins": 0.1674068719148636, "rewards/rejected": -0.167550191283226, "step": 6568 }, { "epoch": 4.5428769017980635, "grad_norm": 7.6536736488342285, "learning_rate": 3.0317350545566314e-05, "log_odds_chosen": 10.412943840026855, "log_odds_ratio": -0.0005767575348727405, "logits/chosen": -0.6405270099639893, "logits/rejected": -0.7021207809448242, "logps/chosen": -0.00048545480240136385, "logps/rejected": -2.3228001594543457, "loss": 0.7183, "nll_loss": 0.1795252412557602, "rewards/accuracies": 1.0, "rewards/chosen": -4.8545480240136385e-05, "rewards/margins": 0.23223146796226501, "rewards/rejected": -0.23228003084659576, "step": 6569 }, { "epoch": 4.54356846473029, "grad_norm": 5.79603385925293, "learning_rate": 3.0313508529276163e-05, "log_odds_chosen": 8.871224403381348, "log_odds_ratio": -0.001314416411332786, "logits/chosen": -0.34749066829681396, "logits/rejected": -0.3158177137374878, "logps/chosen": -0.0010376510908827186, "logps/rejected": -1.6415541172027588, "loss": 1.1476, "nll_loss": 0.28676947951316833, "rewards/accuracies": 1.0, "rewards/chosen": -0.00010376510908827186, "rewards/margins": 0.16405165195465088, "rewards/rejected": -0.16415540874004364, "step": 6570 }, { "epoch": 4.544260027662517, "grad_norm": 13.5281343460083, "learning_rate": 3.030966651298602e-05, "log_odds_chosen": 8.49252986907959, "log_odds_ratio": -0.041920870542526245, "logits/chosen": -0.26285240054130554, "logits/rejected": -0.41422227025032043, "logps/chosen": -0.009888852015137672, "logps/rejected": -1.6656150817871094, "loss": 1.1532, "nll_loss": 0.28410616517066956, "rewards/accuracies": 1.0, "rewards/chosen": -0.0009888852946460247, "rewards/margins": 0.1655726581811905, "rewards/rejected": -0.1665615290403366, "step": 6571 }, { "epoch": 4.544951590594744, "grad_norm": 9.392267227172852, "learning_rate": 3.0305824496695868e-05, "log_odds_chosen": 9.859603881835938, "log_odds_ratio": -0.00022053778229746968, "logits/chosen": -0.2102208435535431, "logits/rejected": -0.2464631199836731, "logps/chosen": -0.0003437441191636026, "logps/rejected": -1.7045176029205322, "loss": 1.5048, "nll_loss": 0.37617313861846924, "rewards/accuracies": 1.0, "rewards/chosen": -3.437441409914754e-05, "rewards/margins": 0.1704173982143402, "rewards/rejected": -0.17045177519321442, "step": 6572 }, { "epoch": 4.545643153526971, "grad_norm": 12.560931205749512, "learning_rate": 3.0301982480405717e-05, "log_odds_chosen": 10.292903900146484, "log_odds_ratio": -5.359947317629121e-05, "logits/chosen": -0.6326181292533875, "logits/rejected": -0.6115179061889648, "logps/chosen": -0.00027635638252831995, "logps/rejected": -1.6791993379592896, "loss": 1.0485, "nll_loss": 0.2621276378631592, "rewards/accuracies": 1.0, "rewards/chosen": -2.7635642254608683e-05, "rewards/margins": 0.16789229214191437, "rewards/rejected": -0.16791994869709015, "step": 6573 }, { "epoch": 4.546334716459198, "grad_norm": 12.031844139099121, "learning_rate": 3.0298140464115572e-05, "log_odds_chosen": 10.533636093139648, "log_odds_ratio": -5.4832369642099366e-05, "logits/chosen": -0.1512179672718048, "logits/rejected": -0.2702445387840271, "logps/chosen": -0.0006668913993053138, "logps/rejected": -2.733823776245117, "loss": 1.0078, "nll_loss": 0.2519344985485077, "rewards/accuracies": 1.0, "rewards/chosen": -6.668914284091443e-05, "rewards/margins": 0.2733156979084015, "rewards/rejected": -0.27338239550590515, "step": 6574 }, { "epoch": 4.5470262793914245, "grad_norm": 7.381800651550293, "learning_rate": 3.029429844782542e-05, "log_odds_chosen": 9.192039489746094, "log_odds_ratio": -0.0007829120149835944, "logits/chosen": -0.6911160945892334, "logits/rejected": -0.6769638061523438, "logps/chosen": -0.0007873540162108839, "logps/rejected": -1.4364888668060303, "loss": 1.0262, "nll_loss": 0.25647637248039246, "rewards/accuracies": 1.0, "rewards/chosen": -7.873540016589686e-05, "rewards/margins": 0.1435701549053192, "rewards/rejected": -0.1436488926410675, "step": 6575 }, { "epoch": 4.547717842323651, "grad_norm": 8.626969337463379, "learning_rate": 3.029045643153527e-05, "log_odds_chosen": 10.28857707977295, "log_odds_ratio": -0.00033702142536640167, "logits/chosen": -0.5012588500976562, "logits/rejected": -0.5341583490371704, "logps/chosen": -0.00022805618937127292, "logps/rejected": -1.5251436233520508, "loss": 0.8619, "nll_loss": 0.21544235944747925, "rewards/accuracies": 1.0, "rewards/chosen": -2.2805619664723054e-05, "rewards/margins": 0.15249155461788177, "rewards/rejected": -0.15251435339450836, "step": 6576 }, { "epoch": 4.548409405255878, "grad_norm": 8.244384765625, "learning_rate": 3.0286614415245123e-05, "log_odds_chosen": 9.377249717712402, "log_odds_ratio": -0.00011702909978339449, "logits/chosen": -0.3914976119995117, "logits/rejected": -0.46458885073661804, "logps/chosen": -0.00035335979191586375, "logps/rejected": -1.3842849731445312, "loss": 0.9243, "nll_loss": 0.23106145858764648, "rewards/accuracies": 1.0, "rewards/chosen": -3.5335979191586375e-05, "rewards/margins": 0.13839316368103027, "rewards/rejected": -0.13842850923538208, "step": 6577 }, { "epoch": 4.549100968188105, "grad_norm": 9.749436378479004, "learning_rate": 3.0282772398954972e-05, "log_odds_chosen": 11.123355865478516, "log_odds_ratio": -2.415657763776835e-05, "logits/chosen": -0.37832289934158325, "logits/rejected": -0.4357031583786011, "logps/chosen": -0.00014345777162816375, "logps/rejected": -2.0811164379119873, "loss": 1.0, "nll_loss": 0.25000274181365967, "rewards/accuracies": 1.0, "rewards/chosen": -1.4345778254210018e-05, "rewards/margins": 0.20809729397296906, "rewards/rejected": -0.20811164379119873, "step": 6578 }, { "epoch": 4.549792531120332, "grad_norm": 9.854357719421387, "learning_rate": 3.027893038266482e-05, "log_odds_chosen": 10.580504417419434, "log_odds_ratio": -0.00043930544052273035, "logits/chosen": -0.7240288853645325, "logits/rejected": -0.7891085147857666, "logps/chosen": -0.0024362949188798666, "logps/rejected": -2.18922758102417, "loss": 1.3597, "nll_loss": 0.339883416891098, "rewards/accuracies": 1.0, "rewards/chosen": -0.00024362948897760361, "rewards/margins": 0.21867913007736206, "rewards/rejected": -0.21892276406288147, "step": 6579 }, { "epoch": 4.550484094052559, "grad_norm": 11.040898323059082, "learning_rate": 3.0275088366374677e-05, "log_odds_chosen": 10.449048042297363, "log_odds_ratio": -5.974875239189714e-05, "logits/chosen": -0.7696495652198792, "logits/rejected": -0.8473649621009827, "logps/chosen": -0.00033175962744280696, "logps/rejected": -2.1812098026275635, "loss": 1.4198, "nll_loss": 0.35495179891586304, "rewards/accuracies": 1.0, "rewards/chosen": -3.317596565466374e-05, "rewards/margins": 0.2180878072977066, "rewards/rejected": -0.2181209921836853, "step": 6580 }, { "epoch": 4.551175656984785, "grad_norm": 9.304978370666504, "learning_rate": 3.0271246350084526e-05, "log_odds_chosen": 10.155534744262695, "log_odds_ratio": -0.00018998852465301752, "logits/chosen": -0.8298165798187256, "logits/rejected": -0.8783495426177979, "logps/chosen": -0.0010503771482035518, "logps/rejected": -1.9879919290542603, "loss": 1.5645, "nll_loss": 0.3911066949367523, "rewards/accuracies": 1.0, "rewards/chosen": -0.00010503771773073822, "rewards/margins": 0.1986941695213318, "rewards/rejected": -0.19879920780658722, "step": 6581 }, { "epoch": 4.551867219917012, "grad_norm": 7.36096715927124, "learning_rate": 3.0267404333794375e-05, "log_odds_chosen": 10.879387855529785, "log_odds_ratio": -6.295489583862945e-05, "logits/chosen": -0.4926462471485138, "logits/rejected": -0.5912123322486877, "logps/chosen": -0.0021177981980144978, "logps/rejected": -2.5622596740722656, "loss": 0.841, "nll_loss": 0.21025186777114868, "rewards/accuracies": 1.0, "rewards/chosen": -0.00021177981398068368, "rewards/margins": 0.25601422786712646, "rewards/rejected": -0.2562260031700134, "step": 6582 }, { "epoch": 4.552558782849239, "grad_norm": 7.8839898109436035, "learning_rate": 3.026356231750423e-05, "log_odds_chosen": 10.247995376586914, "log_odds_ratio": -0.0001559545489726588, "logits/chosen": -0.7979452013969421, "logits/rejected": -0.9169696569442749, "logps/chosen": -0.0002751094871200621, "logps/rejected": -2.117699384689331, "loss": 1.2424, "nll_loss": 0.3105788230895996, "rewards/accuracies": 1.0, "rewards/chosen": -2.751094871200621e-05, "rewards/margins": 0.21174244582653046, "rewards/rejected": -0.2117699384689331, "step": 6583 }, { "epoch": 4.553250345781466, "grad_norm": 12.00692367553711, "learning_rate": 3.025972030121408e-05, "log_odds_chosen": 9.980520248413086, "log_odds_ratio": -0.00025905261281877756, "logits/chosen": -0.424941748380661, "logits/rejected": -0.5358071327209473, "logps/chosen": -0.0001751706877257675, "logps/rejected": -1.4843164682388306, "loss": 1.0665, "nll_loss": 0.26660993695259094, "rewards/accuracies": 1.0, "rewards/chosen": -1.751706986397039e-05, "rewards/margins": 0.14841413497924805, "rewards/rejected": -0.148431658744812, "step": 6584 }, { "epoch": 4.553941908713693, "grad_norm": 7.942290782928467, "learning_rate": 3.025587828492393e-05, "log_odds_chosen": 8.972213745117188, "log_odds_ratio": -0.0004230959457345307, "logits/chosen": -0.8286534547805786, "logits/rejected": -0.8645509481430054, "logps/chosen": -0.0003187672991771251, "logps/rejected": -1.2250981330871582, "loss": 1.1575, "nll_loss": 0.28933191299438477, "rewards/accuracies": 1.0, "rewards/chosen": -3.187673064530827e-05, "rewards/margins": 0.1224779412150383, "rewards/rejected": -0.12250981479883194, "step": 6585 }, { "epoch": 4.55463347164592, "grad_norm": 11.324723243713379, "learning_rate": 3.025203626863378e-05, "log_odds_chosen": 9.58649730682373, "log_odds_ratio": -0.012563646771013737, "logits/chosen": -0.7872684597969055, "logits/rejected": -0.8913931250572205, "logps/chosen": -0.0046211532317101955, "logps/rejected": -2.069636821746826, "loss": 1.3229, "nll_loss": 0.3294662833213806, "rewards/accuracies": 1.0, "rewards/chosen": -0.00046211533481255174, "rewards/margins": 0.2065015733242035, "rewards/rejected": -0.2069637030363083, "step": 6586 }, { "epoch": 4.555325034578146, "grad_norm": 11.3200044631958, "learning_rate": 3.024819425234363e-05, "log_odds_chosen": 10.222295761108398, "log_odds_ratio": -0.0001525928091723472, "logits/chosen": -0.5663999915122986, "logits/rejected": -0.6826832294464111, "logps/chosen": -0.002781313844025135, "logps/rejected": -2.424255847930908, "loss": 0.9093, "nll_loss": 0.2273210734128952, "rewards/accuracies": 1.0, "rewards/chosen": -0.00027813136694021523, "rewards/margins": 0.24214744567871094, "rewards/rejected": -0.2424255907535553, "step": 6587 }, { "epoch": 4.556016597510373, "grad_norm": 11.659857749938965, "learning_rate": 3.024435223605348e-05, "log_odds_chosen": 10.368592262268066, "log_odds_ratio": -0.000933436444029212, "logits/chosen": -0.918364405632019, "logits/rejected": -0.9740538597106934, "logps/chosen": -0.009822577238082886, "logps/rejected": -2.1933836936950684, "loss": 1.291, "nll_loss": 0.32265663146972656, "rewards/accuracies": 1.0, "rewards/chosen": -0.0009822577703744173, "rewards/margins": 0.21835610270500183, "rewards/rejected": -0.21933835744857788, "step": 6588 }, { "epoch": 4.5567081604426, "grad_norm": 6.759733200073242, "learning_rate": 3.0240510219763335e-05, "log_odds_chosen": 9.884389877319336, "log_odds_ratio": -0.0005357956397347152, "logits/chosen": -0.5267442464828491, "logits/rejected": -0.5922637581825256, "logps/chosen": -0.004163261502981186, "logps/rejected": -2.33642315864563, "loss": 1.1374, "nll_loss": 0.2843058705329895, "rewards/accuracies": 1.0, "rewards/chosen": -0.0004163261619396508, "rewards/margins": 0.2332259863615036, "rewards/rejected": -0.2336423099040985, "step": 6589 }, { "epoch": 4.557399723374827, "grad_norm": 6.967859268188477, "learning_rate": 3.0236668203473184e-05, "log_odds_chosen": 9.72664737701416, "log_odds_ratio": -0.0008469682070426643, "logits/chosen": -0.24553634226322174, "logits/rejected": -0.2926129996776581, "logps/chosen": -0.0009131368133239448, "logps/rejected": -1.4904640913009644, "loss": 0.6963, "nll_loss": 0.17398318648338318, "rewards/accuracies": 1.0, "rewards/chosen": -9.1313682787586e-05, "rewards/margins": 0.1489550918340683, "rewards/rejected": -0.1490464061498642, "step": 6590 }, { "epoch": 4.558091286307054, "grad_norm": 14.491275787353516, "learning_rate": 3.0232826187183034e-05, "log_odds_chosen": 9.976200103759766, "log_odds_ratio": -0.00011364954843884334, "logits/chosen": -0.7161533832550049, "logits/rejected": -0.7391790747642517, "logps/chosen": -0.00015934955445118248, "logps/rejected": -1.5275839567184448, "loss": 1.0246, "nll_loss": 0.2561395764350891, "rewards/accuracies": 1.0, "rewards/chosen": -1.5934954717522487e-05, "rewards/margins": 0.15274246037006378, "rewards/rejected": -0.1527584046125412, "step": 6591 }, { "epoch": 4.558782849239281, "grad_norm": 10.900084495544434, "learning_rate": 3.022898417089289e-05, "log_odds_chosen": 11.52933120727539, "log_odds_ratio": -1.7895972632686608e-05, "logits/chosen": -0.504792332649231, "logits/rejected": -0.6362007856369019, "logps/chosen": -0.00036690360866487026, "logps/rejected": -3.0588414669036865, "loss": 0.8552, "nll_loss": 0.21380570530891418, "rewards/accuracies": 1.0, "rewards/chosen": -3.669036232167855e-05, "rewards/margins": 0.3058474659919739, "rewards/rejected": -0.3058841824531555, "step": 6592 }, { "epoch": 4.559474412171507, "grad_norm": 8.694879531860352, "learning_rate": 3.022514215460274e-05, "log_odds_chosen": 8.654937744140625, "log_odds_ratio": -0.00045291121932677925, "logits/chosen": -0.46551069617271423, "logits/rejected": -0.520065426826477, "logps/chosen": -0.0035642811562865973, "logps/rejected": -1.6542596817016602, "loss": 1.5287, "nll_loss": 0.3821191191673279, "rewards/accuracies": 1.0, "rewards/chosen": -0.0003564281214494258, "rewards/margins": 0.16506953537464142, "rewards/rejected": -0.16542597115039825, "step": 6593 }, { "epoch": 4.560165975103734, "grad_norm": 9.74400520324707, "learning_rate": 3.0221300138312587e-05, "log_odds_chosen": 9.620219230651855, "log_odds_ratio": -0.00015013368101790547, "logits/chosen": -0.6944003701210022, "logits/rejected": -0.690482497215271, "logps/chosen": -0.0019507486140355468, "logps/rejected": -1.684833288192749, "loss": 1.1181, "nll_loss": 0.2795025706291199, "rewards/accuracies": 1.0, "rewards/chosen": -0.0001950748555827886, "rewards/margins": 0.1682882457971573, "rewards/rejected": -0.16848333179950714, "step": 6594 }, { "epoch": 4.560857538035961, "grad_norm": 5.794257164001465, "learning_rate": 3.021745812202244e-05, "log_odds_chosen": 9.387247085571289, "log_odds_ratio": -0.0009698733920231462, "logits/chosen": -0.18023420870304108, "logits/rejected": -0.19014205038547516, "logps/chosen": -0.004922578576952219, "logps/rejected": -2.4882915019989014, "loss": 1.1872, "nll_loss": 0.2966940402984619, "rewards/accuracies": 1.0, "rewards/chosen": -0.0004922578809782863, "rewards/margins": 0.24833691120147705, "rewards/rejected": -0.2488291710615158, "step": 6595 }, { "epoch": 4.561549100968188, "grad_norm": 7.39327335357666, "learning_rate": 3.021361610573229e-05, "log_odds_chosen": 10.013833999633789, "log_odds_ratio": -0.0006084858905524015, "logits/chosen": -0.7817709445953369, "logits/rejected": -0.7556931972503662, "logps/chosen": -0.006435707677155733, "logps/rejected": -1.9030237197875977, "loss": 0.9846, "nll_loss": 0.24608328938484192, "rewards/accuracies": 1.0, "rewards/chosen": -0.0006435707909986377, "rewards/margins": 0.18965880572795868, "rewards/rejected": -0.19030238687992096, "step": 6596 }, { "epoch": 4.562240663900415, "grad_norm": 7.989497661590576, "learning_rate": 3.0209774089442138e-05, "log_odds_chosen": 10.590084075927734, "log_odds_ratio": -0.0002257568994536996, "logits/chosen": -0.6370759010314941, "logits/rejected": -0.7055598497390747, "logps/chosen": -0.0007681610295549035, "logps/rejected": -2.212214708328247, "loss": 0.8154, "nll_loss": 0.2038293480873108, "rewards/accuracies": 1.0, "rewards/chosen": -7.681611168663949e-05, "rewards/margins": 0.2211446613073349, "rewards/rejected": -0.22122147679328918, "step": 6597 }, { "epoch": 4.5629322268326415, "grad_norm": 8.126029968261719, "learning_rate": 3.0205932073151994e-05, "log_odds_chosen": 10.009937286376953, "log_odds_ratio": -9.851453796727583e-05, "logits/chosen": -0.2403627336025238, "logits/rejected": -0.2549085319042206, "logps/chosen": -0.00030068133492022753, "logps/rejected": -1.7907345294952393, "loss": 1.3476, "nll_loss": 0.33689039945602417, "rewards/accuracies": 1.0, "rewards/chosen": -3.0068133128224872e-05, "rewards/margins": 0.17904338240623474, "rewards/rejected": -0.17907343804836273, "step": 6598 }, { "epoch": 4.563623789764868, "grad_norm": 10.333507537841797, "learning_rate": 3.0202090056861843e-05, "log_odds_chosen": 10.019725799560547, "log_odds_ratio": -0.00012765916471835226, "logits/chosen": -0.33758291602134705, "logits/rejected": -0.3801264464855194, "logps/chosen": -0.00041800137842074037, "logps/rejected": -2.058382987976074, "loss": 0.921, "nll_loss": 0.23023031651973724, "rewards/accuracies": 1.0, "rewards/chosen": -4.180014366284013e-05, "rewards/margins": 0.2057965099811554, "rewards/rejected": -0.20583830773830414, "step": 6599 }, { "epoch": 4.564315352697095, "grad_norm": 6.865717887878418, "learning_rate": 3.0198248040571692e-05, "log_odds_chosen": 9.156005859375, "log_odds_ratio": -0.00019638205412775278, "logits/chosen": -0.8517537713050842, "logits/rejected": -0.8063177466392517, "logps/chosen": -0.0004978242795914412, "logps/rejected": -1.4446221590042114, "loss": 1.0236, "nll_loss": 0.25587552785873413, "rewards/accuracies": 1.0, "rewards/chosen": -4.978242941433564e-05, "rewards/margins": 0.14441244304180145, "rewards/rejected": -0.1444622129201889, "step": 6600 }, { "epoch": 4.565006915629322, "grad_norm": 14.249171257019043, "learning_rate": 3.0194406024281548e-05, "log_odds_chosen": 9.464423179626465, "log_odds_ratio": -0.05572696775197983, "logits/chosen": -0.488655686378479, "logits/rejected": -0.6141406297683716, "logps/chosen": -0.013542162254452705, "logps/rejected": -2.2569494247436523, "loss": 0.8959, "nll_loss": 0.21839742362499237, "rewards/accuracies": 1.0, "rewards/chosen": -0.001354216248728335, "rewards/margins": 0.22434073686599731, "rewards/rejected": -0.22569496929645538, "step": 6601 }, { "epoch": 4.565698478561549, "grad_norm": 11.11478042602539, "learning_rate": 3.0190564007991397e-05, "log_odds_chosen": 11.521812438964844, "log_odds_ratio": -1.6018464521039277e-05, "logits/chosen": -0.3399466276168823, "logits/rejected": -0.4136958718299866, "logps/chosen": -8.431501191807911e-05, "logps/rejected": -2.1025118827819824, "loss": 1.0616, "nll_loss": 0.2653941512107849, "rewards/accuracies": 1.0, "rewards/chosen": -8.431501555605792e-06, "rewards/margins": 0.21024277806282043, "rewards/rejected": -0.21025118231773376, "step": 6602 }, { "epoch": 4.566390041493776, "grad_norm": 12.986268043518066, "learning_rate": 3.0186721991701246e-05, "log_odds_chosen": 9.19881820678711, "log_odds_ratio": -0.02131696231663227, "logits/chosen": -0.4616313576698303, "logits/rejected": -0.558713436126709, "logps/chosen": -0.0075446078553795815, "logps/rejected": -1.5928486585617065, "loss": 0.8881, "nll_loss": 0.21988779306411743, "rewards/accuracies": 1.0, "rewards/chosen": -0.0007544608088210225, "rewards/margins": 0.15853041410446167, "rewards/rejected": -0.15928487479686737, "step": 6603 }, { "epoch": 4.5670816044260025, "grad_norm": 12.540678024291992, "learning_rate": 3.01828799754111e-05, "log_odds_chosen": 10.581785202026367, "log_odds_ratio": -5.553047958528623e-05, "logits/chosen": -0.42860329151153564, "logits/rejected": -0.5122525095939636, "logps/chosen": -0.0002509926271159202, "logps/rejected": -2.055676221847534, "loss": 1.0941, "nll_loss": 0.27351298928260803, "rewards/accuracies": 1.0, "rewards/chosen": -2.509926343918778e-05, "rewards/margins": 0.20554250478744507, "rewards/rejected": -0.2055675983428955, "step": 6604 }, { "epoch": 4.567773167358229, "grad_norm": 7.179683685302734, "learning_rate": 3.0179037959120947e-05, "log_odds_chosen": 9.905824661254883, "log_odds_ratio": -0.00036644781357608736, "logits/chosen": -0.6355658173561096, "logits/rejected": -0.6388107538223267, "logps/chosen": -0.00045511574717238545, "logps/rejected": -2.179323434829712, "loss": 0.9415, "nll_loss": 0.2353421449661255, "rewards/accuracies": 1.0, "rewards/chosen": -4.551157326204702e-05, "rewards/margins": 0.21788683533668518, "rewards/rejected": -0.21793235838413239, "step": 6605 }, { "epoch": 4.568464730290456, "grad_norm": 6.183163642883301, "learning_rate": 3.0175195942830796e-05, "log_odds_chosen": 10.373852729797363, "log_odds_ratio": -0.00013491684512700886, "logits/chosen": -0.4657822847366333, "logits/rejected": -0.5364692807197571, "logps/chosen": -0.01596393622457981, "logps/rejected": -2.2673511505126953, "loss": 0.8466, "nll_loss": 0.21162846684455872, "rewards/accuracies": 1.0, "rewards/chosen": -0.0015963935293257236, "rewards/margins": 0.22513870894908905, "rewards/rejected": -0.22673510015010834, "step": 6606 }, { "epoch": 4.569156293222683, "grad_norm": 10.276148796081543, "learning_rate": 3.0171353926540652e-05, "log_odds_chosen": 10.319684982299805, "log_odds_ratio": -4.735357651952654e-05, "logits/chosen": -0.5993685126304626, "logits/rejected": -0.6914522647857666, "logps/chosen": -0.0001445843663532287, "logps/rejected": -1.6822584867477417, "loss": 0.7713, "nll_loss": 0.19281740486621857, "rewards/accuracies": 1.0, "rewards/chosen": -1.4458439181908034e-05, "rewards/margins": 0.16821140050888062, "rewards/rejected": -0.16822585463523865, "step": 6607 }, { "epoch": 4.56984785615491, "grad_norm": 11.633027076721191, "learning_rate": 3.01675119102505e-05, "log_odds_chosen": 9.985593795776367, "log_odds_ratio": -0.0004732572997454554, "logits/chosen": -0.41848012804985046, "logits/rejected": -0.4558772146701813, "logps/chosen": -0.006857479456812143, "logps/rejected": -2.1081955432891846, "loss": 1.1383, "nll_loss": 0.28452807664871216, "rewards/accuracies": 1.0, "rewards/chosen": -0.0006857480038888752, "rewards/margins": 0.21013380587100983, "rewards/rejected": -0.2108195573091507, "step": 6608 }, { "epoch": 4.570539419087137, "grad_norm": 10.77037239074707, "learning_rate": 3.016366989396035e-05, "log_odds_chosen": 9.556285858154297, "log_odds_ratio": -0.00019223152776248753, "logits/chosen": -0.5291285514831543, "logits/rejected": -0.5538058876991272, "logps/chosen": -0.0005219366285018623, "logps/rejected": -1.4201699495315552, "loss": 1.3276, "nll_loss": 0.33189016580581665, "rewards/accuracies": 1.0, "rewards/chosen": -5.21936672157608e-05, "rewards/margins": 0.14196480810642242, "rewards/rejected": -0.14201700687408447, "step": 6609 }, { "epoch": 4.5712309820193635, "grad_norm": 6.790186882019043, "learning_rate": 3.0159827877670206e-05, "log_odds_chosen": 9.672239303588867, "log_odds_ratio": -0.00028869410743936896, "logits/chosen": -0.3598228991031647, "logits/rejected": -0.4058411419391632, "logps/chosen": -0.00029092555632814765, "logps/rejected": -1.4371891021728516, "loss": 1.1986, "nll_loss": 0.29960960149765015, "rewards/accuracies": 1.0, "rewards/chosen": -2.909255272243172e-05, "rewards/margins": 0.1436898112297058, "rewards/rejected": -0.1437188982963562, "step": 6610 }, { "epoch": 4.57192254495159, "grad_norm": 10.04366397857666, "learning_rate": 3.0155985861380055e-05, "log_odds_chosen": 10.897849082946777, "log_odds_ratio": -4.32228043791838e-05, "logits/chosen": -0.6647764444351196, "logits/rejected": -0.8023943901062012, "logps/chosen": -0.00021091816597618163, "logps/rejected": -2.00454044342041, "loss": 1.3433, "nll_loss": 0.33581259846687317, "rewards/accuracies": 1.0, "rewards/chosen": -2.1091816961416043e-05, "rewards/margins": 0.20043295621871948, "rewards/rejected": -0.20045407116413116, "step": 6611 }, { "epoch": 4.572614107883817, "grad_norm": 11.911702156066895, "learning_rate": 3.0152143845089904e-05, "log_odds_chosen": 11.172669410705566, "log_odds_ratio": -0.0001469549461035058, "logits/chosen": -0.6482466459274292, "logits/rejected": -0.6237611770629883, "logps/chosen": -0.00021537227439694107, "logps/rejected": -2.3251004219055176, "loss": 0.8104, "nll_loss": 0.20259681344032288, "rewards/accuracies": 1.0, "rewards/chosen": -2.1537227439694107e-05, "rewards/margins": 0.2324884831905365, "rewards/rejected": -0.2325100302696228, "step": 6612 }, { "epoch": 4.573305670816044, "grad_norm": 9.78806209564209, "learning_rate": 3.0148301828799757e-05, "log_odds_chosen": 9.983158111572266, "log_odds_ratio": -0.00013181474059820175, "logits/chosen": -0.7097947597503662, "logits/rejected": -0.7412753105163574, "logps/chosen": -0.003653676016256213, "logps/rejected": -2.4717650413513184, "loss": 1.0662, "nll_loss": 0.2665305435657501, "rewards/accuracies": 1.0, "rewards/chosen": -0.0003653676249086857, "rewards/margins": 0.2468111515045166, "rewards/rejected": -0.24717652797698975, "step": 6613 }, { "epoch": 4.573997233748271, "grad_norm": 10.576286315917969, "learning_rate": 3.0144459812509606e-05, "log_odds_chosen": 10.688820838928223, "log_odds_ratio": -8.817969501251355e-05, "logits/chosen": -0.5782037377357483, "logits/rejected": -0.633255660533905, "logps/chosen": -0.00026176689425483346, "logps/rejected": -1.9208297729492188, "loss": 1.1002, "nll_loss": 0.2750369608402252, "rewards/accuracies": 1.0, "rewards/chosen": -2.6176689061685465e-05, "rewards/margins": 0.192056804895401, "rewards/rejected": -0.1920829713344574, "step": 6614 }, { "epoch": 4.574688796680498, "grad_norm": 12.326896667480469, "learning_rate": 3.0140617796219455e-05, "log_odds_chosen": 10.266992568969727, "log_odds_ratio": -5.857451105839573e-05, "logits/chosen": -0.19259458780288696, "logits/rejected": -0.33749744296073914, "logps/chosen": -0.0005202666507102549, "logps/rejected": -2.267693281173706, "loss": 1.2466, "nll_loss": 0.31163233518600464, "rewards/accuracies": 1.0, "rewards/chosen": -5.202666943660006e-05, "rewards/margins": 0.22671732306480408, "rewards/rejected": -0.2267693281173706, "step": 6615 }, { "epoch": 4.5753803596127245, "grad_norm": 8.438218116760254, "learning_rate": 3.013677577992931e-05, "log_odds_chosen": 11.793233871459961, "log_odds_ratio": -1.9513814550009556e-05, "logits/chosen": -0.4520335793495178, "logits/rejected": -0.5310763120651245, "logps/chosen": -0.00013092244626022875, "logps/rejected": -2.669154405593872, "loss": 0.7859, "nll_loss": 0.19648477435112, "rewards/accuracies": 1.0, "rewards/chosen": -1.3092246263113339e-05, "rewards/margins": 0.266902357339859, "rewards/rejected": -0.2669154405593872, "step": 6616 }, { "epoch": 4.576071922544951, "grad_norm": 5.844875335693359, "learning_rate": 3.013293376363916e-05, "log_odds_chosen": 9.786355972290039, "log_odds_ratio": -0.003753600874915719, "logits/chosen": -0.35187357664108276, "logits/rejected": -0.44419077038764954, "logps/chosen": -0.024547820910811424, "logps/rejected": -2.0552241802215576, "loss": 0.9076, "nll_loss": 0.2265336960554123, "rewards/accuracies": 1.0, "rewards/chosen": -0.002454782137647271, "rewards/margins": 0.20306764543056488, "rewards/rejected": -0.20552241802215576, "step": 6617 }, { "epoch": 4.576763485477178, "grad_norm": 10.234806060791016, "learning_rate": 3.012909174734901e-05, "log_odds_chosen": 10.920913696289062, "log_odds_ratio": -0.00010152783215744421, "logits/chosen": -0.806179404258728, "logits/rejected": -0.823478102684021, "logps/chosen": -0.017593462020158768, "logps/rejected": -2.7665202617645264, "loss": 0.8564, "nll_loss": 0.21409085392951965, "rewards/accuracies": 1.0, "rewards/chosen": -0.0017593461088836193, "rewards/margins": 0.2748926877975464, "rewards/rejected": -0.2766520082950592, "step": 6618 }, { "epoch": 4.577455048409405, "grad_norm": 13.536564826965332, "learning_rate": 3.0125249731058865e-05, "log_odds_chosen": 9.243236541748047, "log_odds_ratio": -0.00024905483587644994, "logits/chosen": -0.6628894805908203, "logits/rejected": -0.7527157664299011, "logps/chosen": -0.0009462746093049645, "logps/rejected": -1.8274426460266113, "loss": 1.1826, "nll_loss": 0.29562896490097046, "rewards/accuracies": 1.0, "rewards/chosen": -9.462745947530493e-05, "rewards/margins": 0.1826496571302414, "rewards/rejected": -0.18274426460266113, "step": 6619 }, { "epoch": 4.578146611341632, "grad_norm": 10.473002433776855, "learning_rate": 3.0121407714768714e-05, "log_odds_chosen": 11.051270484924316, "log_odds_ratio": -3.43362771673128e-05, "logits/chosen": -0.38918614387512207, "logits/rejected": -0.44833943247795105, "logps/chosen": -0.00021262890368234366, "logps/rejected": -2.373044013977051, "loss": 0.8369, "nll_loss": 0.2092183381319046, "rewards/accuracies": 1.0, "rewards/chosen": -2.1262891095830128e-05, "rewards/margins": 0.2372831404209137, "rewards/rejected": -0.2373044192790985, "step": 6620 }, { "epoch": 4.578838174273859, "grad_norm": 9.426041603088379, "learning_rate": 3.0117565698478563e-05, "log_odds_chosen": 9.773505210876465, "log_odds_ratio": -0.010695736855268478, "logits/chosen": -0.36876046657562256, "logits/rejected": -0.4833451509475708, "logps/chosen": -0.00378953549079597, "logps/rejected": -1.9265997409820557, "loss": 0.9171, "nll_loss": 0.22820425033569336, "rewards/accuracies": 1.0, "rewards/chosen": -0.0003789535549003631, "rewards/margins": 0.19228103756904602, "rewards/rejected": -0.19265997409820557, "step": 6621 }, { "epoch": 4.5795297372060855, "grad_norm": 7.291322708129883, "learning_rate": 3.0113723682188415e-05, "log_odds_chosen": 10.150293350219727, "log_odds_ratio": -8.896956569515169e-05, "logits/chosen": -0.40212830901145935, "logits/rejected": -0.453365683555603, "logps/chosen": -0.00020818223129026592, "logps/rejected": -1.8073318004608154, "loss": 0.8227, "nll_loss": 0.20566749572753906, "rewards/accuracies": 1.0, "rewards/chosen": -2.0818220946239308e-05, "rewards/margins": 0.18071237206459045, "rewards/rejected": -0.18073318898677826, "step": 6622 }, { "epoch": 4.580221300138312, "grad_norm": 7.882122039794922, "learning_rate": 3.0109881665898264e-05, "log_odds_chosen": 11.177387237548828, "log_odds_ratio": -2.653186493262183e-05, "logits/chosen": -0.40092629194259644, "logits/rejected": -0.4798882007598877, "logps/chosen": -0.00017878400103654712, "logps/rejected": -2.480536699295044, "loss": 1.1797, "nll_loss": 0.29491573572158813, "rewards/accuracies": 1.0, "rewards/chosen": -1.7878401195048355e-05, "rewards/margins": 0.24803580343723297, "rewards/rejected": -0.2480536699295044, "step": 6623 }, { "epoch": 4.580912863070539, "grad_norm": 14.837285041809082, "learning_rate": 3.0106039649608113e-05, "log_odds_chosen": 10.875441551208496, "log_odds_ratio": -0.00019159108342137188, "logits/chosen": -0.43085187673568726, "logits/rejected": -0.4576089382171631, "logps/chosen": -0.0002225075295427814, "logps/rejected": -1.8764679431915283, "loss": 1.2767, "nll_loss": 0.31915417313575745, "rewards/accuracies": 1.0, "rewards/chosen": -2.2250755137065426e-05, "rewards/margins": 0.18762452900409698, "rewards/rejected": -0.1876468062400818, "step": 6624 }, { "epoch": 4.581604426002766, "grad_norm": 7.431987762451172, "learning_rate": 3.010219763331797e-05, "log_odds_chosen": 11.103547096252441, "log_odds_ratio": -3.522342740325257e-05, "logits/chosen": -0.29567962884902954, "logits/rejected": -0.3659539818763733, "logps/chosen": -0.00013930797285865992, "logps/rejected": -2.1855380535125732, "loss": 0.7527, "nll_loss": 0.18817052245140076, "rewards/accuracies": 1.0, "rewards/chosen": -1.3930797649663873e-05, "rewards/margins": 0.21853987872600555, "rewards/rejected": -0.2185538113117218, "step": 6625 }, { "epoch": 4.582295988934993, "grad_norm": 12.275331497192383, "learning_rate": 3.0098355617027818e-05, "log_odds_chosen": 8.130497932434082, "log_odds_ratio": -0.25943076610565186, "logits/chosen": -0.5913535952568054, "logits/rejected": -0.6271120309829712, "logps/chosen": -0.04674238711595535, "logps/rejected": -1.2596242427825928, "loss": 1.1692, "nll_loss": 0.2663576006889343, "rewards/accuracies": 0.875, "rewards/chosen": -0.004674238618463278, "rewards/margins": 0.12128818035125732, "rewards/rejected": -0.12596242129802704, "step": 6626 }, { "epoch": 4.58298755186722, "grad_norm": 11.490532875061035, "learning_rate": 3.0094513600737667e-05, "log_odds_chosen": 10.451854705810547, "log_odds_ratio": -6.06868889008183e-05, "logits/chosen": -0.5514692664146423, "logits/rejected": -0.5786818265914917, "logps/chosen": -0.00012749088637065142, "logps/rejected": -1.7657053470611572, "loss": 0.8133, "nll_loss": 0.20331361889839172, "rewards/accuracies": 1.0, "rewards/chosen": -1.2749089364660904e-05, "rewards/margins": 0.1765577793121338, "rewards/rejected": -0.17657053470611572, "step": 6627 }, { "epoch": 4.5836791147994465, "grad_norm": 9.477652549743652, "learning_rate": 3.0090671584447523e-05, "log_odds_chosen": 10.639892578125, "log_odds_ratio": -6.321460386971012e-05, "logits/chosen": -0.5839773416519165, "logits/rejected": -0.675246000289917, "logps/chosen": -0.00020599793060682714, "logps/rejected": -2.062002420425415, "loss": 0.896, "nll_loss": 0.22398124635219574, "rewards/accuracies": 1.0, "rewards/chosen": -2.0599793060682714e-05, "rewards/margins": 0.2061796486377716, "rewards/rejected": -0.2062002420425415, "step": 6628 }, { "epoch": 4.584370677731673, "grad_norm": 9.168537139892578, "learning_rate": 3.0086829568157372e-05, "log_odds_chosen": 10.252494812011719, "log_odds_ratio": -0.00018270703731104732, "logits/chosen": -0.3839903771877289, "logits/rejected": -0.43178194761276245, "logps/chosen": -0.00047848522081039846, "logps/rejected": -2.5032529830932617, "loss": 0.9095, "nll_loss": 0.22736404836177826, "rewards/accuracies": 1.0, "rewards/chosen": -4.784852353623137e-05, "rewards/margins": 0.25027742981910706, "rewards/rejected": -0.2503252923488617, "step": 6629 }, { "epoch": 4.5850622406639, "grad_norm": 6.157984733581543, "learning_rate": 3.008298755186722e-05, "log_odds_chosen": 10.422435760498047, "log_odds_ratio": -0.00046115758595988154, "logits/chosen": -0.4528391659259796, "logits/rejected": -0.37236273288726807, "logps/chosen": -0.0007764915935695171, "logps/rejected": -2.314472198486328, "loss": 0.6048, "nll_loss": 0.15114331245422363, "rewards/accuracies": 1.0, "rewards/chosen": -7.764915062580258e-05, "rewards/margins": 0.23136958479881287, "rewards/rejected": -0.2314472198486328, "step": 6630 }, { "epoch": 4.585753803596127, "grad_norm": 8.872358322143555, "learning_rate": 3.0079145535577074e-05, "log_odds_chosen": 10.070481300354004, "log_odds_ratio": -0.00012420877465046942, "logits/chosen": -0.6568784117698669, "logits/rejected": -0.7422617673873901, "logps/chosen": -0.000309359427774325, "logps/rejected": -1.989261507987976, "loss": 1.4695, "nll_loss": 0.367355614900589, "rewards/accuracies": 1.0, "rewards/chosen": -3.093594204983674e-05, "rewards/margins": 0.19889521598815918, "rewards/rejected": -0.1989261358976364, "step": 6631 }, { "epoch": 4.586445366528354, "grad_norm": 14.159653663635254, "learning_rate": 3.0075303519286923e-05, "log_odds_chosen": 9.802715301513672, "log_odds_ratio": -0.00021838059183210135, "logits/chosen": -0.6921088099479675, "logits/rejected": -0.7150027751922607, "logps/chosen": -0.0007769321673549712, "logps/rejected": -1.8968806266784668, "loss": 1.1374, "nll_loss": 0.28433793783187866, "rewards/accuracies": 1.0, "rewards/chosen": -7.769322110107169e-05, "rewards/margins": 0.18961036205291748, "rewards/rejected": -0.1896880567073822, "step": 6632 }, { "epoch": 4.587136929460581, "grad_norm": 4.186121463775635, "learning_rate": 3.0071461502996772e-05, "log_odds_chosen": 9.516704559326172, "log_odds_ratio": -0.0005803394014947116, "logits/chosen": -0.5224634408950806, "logits/rejected": -0.5306433439254761, "logps/chosen": -0.0054843733087182045, "logps/rejected": -1.9898731708526611, "loss": 1.0931, "nll_loss": 0.27322930097579956, "rewards/accuracies": 1.0, "rewards/chosen": -0.0005484373541548848, "rewards/margins": 0.1984388828277588, "rewards/rejected": -0.19898732006549835, "step": 6633 }, { "epoch": 4.587828492392807, "grad_norm": 9.91395092010498, "learning_rate": 3.0067619486706628e-05, "log_odds_chosen": 10.172961235046387, "log_odds_ratio": -0.00016539839270990342, "logits/chosen": -0.8813390135765076, "logits/rejected": -0.9066269397735596, "logps/chosen": -0.0005005454295314848, "logps/rejected": -1.9067983627319336, "loss": 0.9538, "nll_loss": 0.23842957615852356, "rewards/accuracies": 1.0, "rewards/chosen": -5.0054542953148484e-05, "rewards/margins": 0.190629780292511, "rewards/rejected": -0.19067983329296112, "step": 6634 }, { "epoch": 4.588520055325034, "grad_norm": 8.857604026794434, "learning_rate": 3.0063777470416477e-05, "log_odds_chosen": 10.181537628173828, "log_odds_ratio": -0.00013955699978396297, "logits/chosen": -0.8722149133682251, "logits/rejected": -0.8384766578674316, "logps/chosen": -0.0005608652718365192, "logps/rejected": -1.980457067489624, "loss": 1.0769, "nll_loss": 0.2692164480686188, "rewards/accuracies": 1.0, "rewards/chosen": -5.6086522818077356e-05, "rewards/margins": 0.19798964262008667, "rewards/rejected": -0.19804571568965912, "step": 6635 }, { "epoch": 4.589211618257261, "grad_norm": 9.029706954956055, "learning_rate": 3.0059935454126326e-05, "log_odds_chosen": 10.915395736694336, "log_odds_ratio": -3.5561828553909436e-05, "logits/chosen": -0.16081270575523376, "logits/rejected": -0.23908120393753052, "logps/chosen": -0.0010874989675357938, "logps/rejected": -2.6500515937805176, "loss": 1.1949, "nll_loss": 0.29872551560401917, "rewards/accuracies": 1.0, "rewards/chosen": -0.00010874988947762176, "rewards/margins": 0.26489636301994324, "rewards/rejected": -0.26500511169433594, "step": 6636 }, { "epoch": 4.589903181189488, "grad_norm": 8.428266525268555, "learning_rate": 3.005609343783618e-05, "log_odds_chosen": 9.293031692504883, "log_odds_ratio": -0.07677154242992401, "logits/chosen": -0.26036933064460754, "logits/rejected": -0.34439903497695923, "logps/chosen": -0.013922506012022495, "logps/rejected": -2.2738609313964844, "loss": 1.1792, "nll_loss": 0.2871167063713074, "rewards/accuracies": 1.0, "rewards/chosen": -0.0013922505313530564, "rewards/margins": 0.22599384188652039, "rewards/rejected": -0.22738608717918396, "step": 6637 }, { "epoch": 4.590594744121715, "grad_norm": 11.015109062194824, "learning_rate": 3.005225142154603e-05, "log_odds_chosen": 9.079282760620117, "log_odds_ratio": -0.0012578824535012245, "logits/chosen": -0.577182412147522, "logits/rejected": -0.6306379437446594, "logps/chosen": -0.0030781887471675873, "logps/rejected": -1.887145757675171, "loss": 0.8156, "nll_loss": 0.20376461744308472, "rewards/accuracies": 1.0, "rewards/chosen": -0.00030781884561292827, "rewards/margins": 0.18840676546096802, "rewards/rejected": -0.18871459364891052, "step": 6638 }, { "epoch": 4.591286307053942, "grad_norm": 7.341928005218506, "learning_rate": 3.004840940525588e-05, "log_odds_chosen": 9.25500202178955, "log_odds_ratio": -0.00026959230308420956, "logits/chosen": -0.505585253238678, "logits/rejected": -0.5270559787750244, "logps/chosen": -0.0005544669693335891, "logps/rejected": -1.2817394733428955, "loss": 1.031, "nll_loss": 0.2577342092990875, "rewards/accuracies": 1.0, "rewards/chosen": -5.54466932953801e-05, "rewards/margins": 0.12811851501464844, "rewards/rejected": -0.12817394733428955, "step": 6639 }, { "epoch": 4.591977869986168, "grad_norm": 6.349297046661377, "learning_rate": 3.0044567388965732e-05, "log_odds_chosen": 9.84512710571289, "log_odds_ratio": -0.0002761775394901633, "logits/chosen": -0.41043686866760254, "logits/rejected": -0.4271596074104309, "logps/chosen": -0.0007267515175044537, "logps/rejected": -1.9643175601959229, "loss": 0.921, "nll_loss": 0.23021897673606873, "rewards/accuracies": 1.0, "rewards/chosen": -7.267515320563689e-05, "rewards/margins": 0.1963590681552887, "rewards/rejected": -0.1964317411184311, "step": 6640 }, { "epoch": 4.592669432918395, "grad_norm": 9.569697380065918, "learning_rate": 3.004072537267558e-05, "log_odds_chosen": 11.393840789794922, "log_odds_ratio": -2.0055922504980117e-05, "logits/chosen": -0.5695845484733582, "logits/rejected": -0.600030243396759, "logps/chosen": -9.230027353623882e-05, "logps/rejected": -2.1485538482666016, "loss": 1.2374, "nll_loss": 0.30935972929000854, "rewards/accuracies": 1.0, "rewards/chosen": -9.230027899320703e-06, "rewards/margins": 0.2148461490869522, "rewards/rejected": -0.2148553729057312, "step": 6641 }, { "epoch": 4.593360995850622, "grad_norm": 7.704564094543457, "learning_rate": 3.003688335638543e-05, "log_odds_chosen": 10.57839584350586, "log_odds_ratio": -0.00010343089525122195, "logits/chosen": -0.5169810652732849, "logits/rejected": -0.6554980874061584, "logps/chosen": -0.000415709859225899, "logps/rejected": -2.6744332313537598, "loss": 1.0457, "nll_loss": 0.2614179253578186, "rewards/accuracies": 1.0, "rewards/chosen": -4.157098737778142e-05, "rewards/margins": 0.2674017548561096, "rewards/rejected": -0.26744332909584045, "step": 6642 }, { "epoch": 4.594052558782849, "grad_norm": 8.762954711914062, "learning_rate": 3.0033041340095286e-05, "log_odds_chosen": 10.157194137573242, "log_odds_ratio": -0.0011035792995244265, "logits/chosen": -0.33303841948509216, "logits/rejected": -0.38343989849090576, "logps/chosen": -0.0021737630013376474, "logps/rejected": -1.5332502126693726, "loss": 1.388, "nll_loss": 0.3468846082687378, "rewards/accuracies": 1.0, "rewards/chosen": -0.0002173762914026156, "rewards/margins": 0.1531076431274414, "rewards/rejected": -0.15332502126693726, "step": 6643 }, { "epoch": 4.594744121715076, "grad_norm": 8.015052795410156, "learning_rate": 3.0029199323805135e-05, "log_odds_chosen": 9.884615898132324, "log_odds_ratio": -0.00012229596904944628, "logits/chosen": -0.5854188799858093, "logits/rejected": -0.6728988885879517, "logps/chosen": -0.001457500271499157, "logps/rejected": -2.1083319187164307, "loss": 1.3311, "nll_loss": 0.33276960253715515, "rewards/accuracies": 1.0, "rewards/chosen": -0.0001457500329706818, "rewards/margins": 0.21068742871284485, "rewards/rejected": -0.21083319187164307, "step": 6644 }, { "epoch": 4.595435684647303, "grad_norm": 6.856884002685547, "learning_rate": 3.0025357307514984e-05, "log_odds_chosen": 9.551240921020508, "log_odds_ratio": -0.00017050621681846678, "logits/chosen": -0.8499241471290588, "logits/rejected": -0.8596094250679016, "logps/chosen": -0.002166606020182371, "logps/rejected": -2.0629520416259766, "loss": 1.1256, "nll_loss": 0.2813722491264343, "rewards/accuracies": 1.0, "rewards/chosen": -0.00021666062821168453, "rewards/margins": 0.20607852935791016, "rewards/rejected": -0.2062952071428299, "step": 6645 }, { "epoch": 4.596127247579529, "grad_norm": 8.078551292419434, "learning_rate": 3.002151529122484e-05, "log_odds_chosen": 10.179945945739746, "log_odds_ratio": -8.246686775237322e-05, "logits/chosen": -0.618579089641571, "logits/rejected": -0.6546553373336792, "logps/chosen": -0.0015918964054435492, "logps/rejected": -2.2097814083099365, "loss": 0.8485, "nll_loss": 0.21211211383342743, "rewards/accuracies": 1.0, "rewards/chosen": -0.00015918964345473796, "rewards/margins": 0.22081895172595978, "rewards/rejected": -0.22097815573215485, "step": 6646 }, { "epoch": 4.596818810511756, "grad_norm": 8.41762924194336, "learning_rate": 3.001767327493469e-05, "log_odds_chosen": 9.121322631835938, "log_odds_ratio": -0.0009582031634636223, "logits/chosen": -0.46929413080215454, "logits/rejected": -0.4976672828197479, "logps/chosen": -0.0007144063129089773, "logps/rejected": -1.3871290683746338, "loss": 1.5951, "nll_loss": 0.39868098497390747, "rewards/accuracies": 1.0, "rewards/chosen": -7.144063420128077e-05, "rewards/margins": 0.13864146173000336, "rewards/rejected": -0.13871291279792786, "step": 6647 }, { "epoch": 4.597510373443983, "grad_norm": 12.470500946044922, "learning_rate": 3.0013831258644538e-05, "log_odds_chosen": 11.022992134094238, "log_odds_ratio": -2.5863595510600135e-05, "logits/chosen": -0.43043211102485657, "logits/rejected": -0.5416529774665833, "logps/chosen": -0.00022176577476784587, "logps/rejected": -2.37802791595459, "loss": 1.1301, "nll_loss": 0.2825261056423187, "rewards/accuracies": 1.0, "rewards/chosen": -2.2176576749188825e-05, "rewards/margins": 0.2377806156873703, "rewards/rejected": -0.23780277371406555, "step": 6648 }, { "epoch": 4.59820193637621, "grad_norm": 11.435942649841309, "learning_rate": 3.000998924235439e-05, "log_odds_chosen": 10.821590423583984, "log_odds_ratio": -2.8062824640073813e-05, "logits/chosen": -0.7152904272079468, "logits/rejected": -0.7026421427726746, "logps/chosen": -0.000827385054435581, "logps/rejected": -2.3652238845825195, "loss": 0.9629, "nll_loss": 0.2407165765762329, "rewards/accuracies": 1.0, "rewards/chosen": -8.273850107798353e-05, "rewards/margins": 0.23643964529037476, "rewards/rejected": -0.2365223914384842, "step": 6649 }, { "epoch": 4.598893499308437, "grad_norm": 9.441259384155273, "learning_rate": 3.000614722606424e-05, "log_odds_chosen": 9.505755424499512, "log_odds_ratio": -0.0006548656383529305, "logits/chosen": -0.29557374119758606, "logits/rejected": -0.3252328634262085, "logps/chosen": -0.001687394455075264, "logps/rejected": -1.5947422981262207, "loss": 1.0515, "nll_loss": 0.26281145215034485, "rewards/accuracies": 1.0, "rewards/chosen": -0.0001687394396867603, "rewards/margins": 0.15930549800395966, "rewards/rejected": -0.1594742387533188, "step": 6650 }, { "epoch": 4.5995850622406635, "grad_norm": 6.911854267120361, "learning_rate": 3.000230520977409e-05, "log_odds_chosen": 9.908332824707031, "log_odds_ratio": -0.0005658690934069455, "logits/chosen": -0.3408907949924469, "logits/rejected": -0.42232099175453186, "logps/chosen": -0.00037419985164888203, "logps/rejected": -1.9144506454467773, "loss": 1.3619, "nll_loss": 0.3404099941253662, "rewards/accuracies": 1.0, "rewards/chosen": -3.741998807527125e-05, "rewards/margins": 0.19140766561031342, "rewards/rejected": -0.19144508242607117, "step": 6651 }, { "epoch": 4.60027662517289, "grad_norm": 25.65146827697754, "learning_rate": 2.9998463193483944e-05, "log_odds_chosen": 10.697637557983398, "log_odds_ratio": -6.30640861345455e-05, "logits/chosen": -0.5637302398681641, "logits/rejected": -0.6442798376083374, "logps/chosen": -0.00011240709864068776, "logps/rejected": -1.6637163162231445, "loss": 0.9666, "nll_loss": 0.24165429174900055, "rewards/accuracies": 1.0, "rewards/chosen": -1.1240710591664538e-05, "rewards/margins": 0.16636039316654205, "rewards/rejected": -0.16637162864208221, "step": 6652 }, { "epoch": 4.600968188105117, "grad_norm": 9.794515609741211, "learning_rate": 2.9994621177193794e-05, "log_odds_chosen": 9.300960540771484, "log_odds_ratio": -0.0009341944241896272, "logits/chosen": -0.3178715109825134, "logits/rejected": -0.4060957133769989, "logps/chosen": -0.0024310587905347347, "logps/rejected": -1.4401997327804565, "loss": 0.8431, "nll_loss": 0.2106890082359314, "rewards/accuracies": 1.0, "rewards/chosen": -0.0002431058674119413, "rewards/margins": 0.14377684891223907, "rewards/rejected": -0.1440199613571167, "step": 6653 }, { "epoch": 4.601659751037344, "grad_norm": 7.053846836090088, "learning_rate": 2.9990779160903643e-05, "log_odds_chosen": 8.772018432617188, "log_odds_ratio": -0.0012893974781036377, "logits/chosen": -0.3111286163330078, "logits/rejected": -0.42209941148757935, "logps/chosen": -0.0010461423080414534, "logps/rejected": -1.2797952890396118, "loss": 1.3091, "nll_loss": 0.3271556794643402, "rewards/accuracies": 1.0, "rewards/chosen": -0.0001046142351697199, "rewards/margins": 0.1278749257326126, "rewards/rejected": -0.12797953188419342, "step": 6654 }, { "epoch": 4.602351313969571, "grad_norm": 5.7435407638549805, "learning_rate": 2.99869371446135e-05, "log_odds_chosen": 9.842205047607422, "log_odds_ratio": -0.010721296072006226, "logits/chosen": -0.5009051561355591, "logits/rejected": -0.5647544264793396, "logps/chosen": -0.0034427782520651817, "logps/rejected": -2.2130255699157715, "loss": 1.0543, "nll_loss": 0.2624974548816681, "rewards/accuracies": 1.0, "rewards/chosen": -0.0003442778252065182, "rewards/margins": 0.22095826268196106, "rewards/rejected": -0.22130253911018372, "step": 6655 }, { "epoch": 4.603042876901798, "grad_norm": 7.0054779052734375, "learning_rate": 2.9983095128323347e-05, "log_odds_chosen": 8.576118469238281, "log_odds_ratio": -0.0006403782172128558, "logits/chosen": -0.6368761658668518, "logits/rejected": -0.7257099151611328, "logps/chosen": -0.0013416606234386563, "logps/rejected": -1.0327801704406738, "loss": 0.9233, "nll_loss": 0.2307559698820114, "rewards/accuracies": 1.0, "rewards/chosen": -0.0001341660536127165, "rewards/margins": 0.10314386337995529, "rewards/rejected": -0.1032780259847641, "step": 6656 }, { "epoch": 4.6037344398340245, "grad_norm": 14.184807777404785, "learning_rate": 2.9979253112033196e-05, "log_odds_chosen": 8.543257713317871, "log_odds_ratio": -0.002161895390599966, "logits/chosen": -0.7052701711654663, "logits/rejected": -0.6956675052642822, "logps/chosen": -0.0020439941436052322, "logps/rejected": -1.7696683406829834, "loss": 1.2847, "nll_loss": 0.3209618330001831, "rewards/accuracies": 1.0, "rewards/chosen": -0.00020439941727090627, "rewards/margins": 0.17676246166229248, "rewards/rejected": -0.1769668459892273, "step": 6657 }, { "epoch": 4.604426002766251, "grad_norm": 15.939481735229492, "learning_rate": 2.997541109574305e-05, "log_odds_chosen": 10.017705917358398, "log_odds_ratio": -0.00015626417007297277, "logits/chosen": -1.0082440376281738, "logits/rejected": -1.1075737476348877, "logps/chosen": -0.0001465157838538289, "logps/rejected": -1.5758423805236816, "loss": 0.8355, "nll_loss": 0.20884928107261658, "rewards/accuracies": 1.0, "rewards/chosen": -1.4651579476776533e-05, "rewards/margins": 0.15756958723068237, "rewards/rejected": -0.15758424997329712, "step": 6658 }, { "epoch": 4.605117565698478, "grad_norm": 5.694345951080322, "learning_rate": 2.9971569079452898e-05, "log_odds_chosen": 9.860860824584961, "log_odds_ratio": -9.92153218248859e-05, "logits/chosen": -0.67734694480896, "logits/rejected": -0.6845455169677734, "logps/chosen": -0.0003241387603338808, "logps/rejected": -1.8322769403457642, "loss": 0.8143, "nll_loss": 0.20356862246990204, "rewards/accuracies": 1.0, "rewards/chosen": -3.241387821617536e-05, "rewards/margins": 0.1831952929496765, "rewards/rejected": -0.18322770297527313, "step": 6659 }, { "epoch": 4.605809128630705, "grad_norm": 18.043245315551758, "learning_rate": 2.9967727063162747e-05, "log_odds_chosen": 9.604475021362305, "log_odds_ratio": -0.09748678654432297, "logits/chosen": -0.9602915644645691, "logits/rejected": -1.0716558694839478, "logps/chosen": -0.015980206429958344, "logps/rejected": -1.7664905786514282, "loss": 1.471, "nll_loss": 0.3579968810081482, "rewards/accuracies": 0.875, "rewards/chosen": -0.0015980205498635769, "rewards/margins": 0.17505104839801788, "rewards/rejected": -0.1766490638256073, "step": 6660 }, { "epoch": 4.606500691562932, "grad_norm": 14.086503028869629, "learning_rate": 2.9963885046872603e-05, "log_odds_chosen": 9.341814041137695, "log_odds_ratio": -0.0008506190497428179, "logits/chosen": -0.623991847038269, "logits/rejected": -0.6753250360488892, "logps/chosen": -0.10699453204870224, "logps/rejected": -2.235908031463623, "loss": 1.6336, "nll_loss": 0.4083053469657898, "rewards/accuracies": 1.0, "rewards/chosen": -0.010699453763663769, "rewards/margins": 0.2128913700580597, "rewards/rejected": -0.22359079122543335, "step": 6661 }, { "epoch": 4.607192254495159, "grad_norm": 13.7391996383667, "learning_rate": 2.9960043030582452e-05, "log_odds_chosen": 10.506267547607422, "log_odds_ratio": -0.00021020628628320992, "logits/chosen": -1.1174015998840332, "logits/rejected": -1.1321393251419067, "logps/chosen": -0.00016034118016250432, "logps/rejected": -1.6753671169281006, "loss": 1.1868, "nll_loss": 0.2966703772544861, "rewards/accuracies": 1.0, "rewards/chosen": -1.603411692485679e-05, "rewards/margins": 0.1675206869840622, "rewards/rejected": -0.16753672063350677, "step": 6662 }, { "epoch": 4.6078838174273855, "grad_norm": 7.0717267990112305, "learning_rate": 2.99562010142923e-05, "log_odds_chosen": 10.244396209716797, "log_odds_ratio": -0.0005225987988524139, "logits/chosen": -0.3503478765487671, "logits/rejected": -0.42929190397262573, "logps/chosen": -0.0022827768698334694, "logps/rejected": -2.1327946186065674, "loss": 0.9079, "nll_loss": 0.2269156128168106, "rewards/accuracies": 1.0, "rewards/chosen": -0.00022827771317679435, "rewards/margins": 0.21305117011070251, "rewards/rejected": -0.21327945590019226, "step": 6663 }, { "epoch": 4.608575380359612, "grad_norm": 8.944998741149902, "learning_rate": 2.9952358998002157e-05, "log_odds_chosen": 10.742843627929688, "log_odds_ratio": -6.899197614984587e-05, "logits/chosen": -0.6813872456550598, "logits/rejected": -0.7148157358169556, "logps/chosen": -0.00014297892630565912, "logps/rejected": -1.8107256889343262, "loss": 0.9531, "nll_loss": 0.23827821016311646, "rewards/accuracies": 1.0, "rewards/chosen": -1.4297892448666971e-05, "rewards/margins": 0.18105828762054443, "rewards/rejected": -0.18107259273529053, "step": 6664 }, { "epoch": 4.609266943291839, "grad_norm": 7.837378978729248, "learning_rate": 2.9948516981712006e-05, "log_odds_chosen": 10.556222915649414, "log_odds_ratio": -0.0005359348724596202, "logits/chosen": -0.6345524787902832, "logits/rejected": -0.6113138794898987, "logps/chosen": -0.00039145839400589466, "logps/rejected": -1.9433598518371582, "loss": 1.4939, "nll_loss": 0.37342697381973267, "rewards/accuracies": 1.0, "rewards/chosen": -3.914583794539794e-05, "rewards/margins": 0.19429683685302734, "rewards/rejected": -0.1943359673023224, "step": 6665 }, { "epoch": 4.609958506224066, "grad_norm": 7.503310203552246, "learning_rate": 2.9944674965421855e-05, "log_odds_chosen": 10.827353477478027, "log_odds_ratio": -7.019040640443563e-05, "logits/chosen": -0.9183793067932129, "logits/rejected": -0.9392495155334473, "logps/chosen": -0.00016982029774226248, "logps/rejected": -1.765545129776001, "loss": 0.7157, "nll_loss": 0.17891749739646912, "rewards/accuracies": 1.0, "rewards/chosen": -1.698203050182201e-05, "rewards/margins": 0.17653754353523254, "rewards/rejected": -0.17655451595783234, "step": 6666 }, { "epoch": 4.610650069156293, "grad_norm": 7.25770378112793, "learning_rate": 2.9940832949131704e-05, "log_odds_chosen": 9.678262710571289, "log_odds_ratio": -0.000425715115852654, "logits/chosen": -0.500344455242157, "logits/rejected": -0.5148862600326538, "logps/chosen": -0.0018647005781531334, "logps/rejected": -2.219193458557129, "loss": 1.1087, "nll_loss": 0.2771277129650116, "rewards/accuracies": 1.0, "rewards/chosen": -0.00018647006072569638, "rewards/margins": 0.22173286974430084, "rewards/rejected": -0.22191934287548065, "step": 6667 }, { "epoch": 4.61134163208852, "grad_norm": 10.05141830444336, "learning_rate": 2.9936990932841556e-05, "log_odds_chosen": 10.017276763916016, "log_odds_ratio": -0.0006272240425460041, "logits/chosen": -0.5370017290115356, "logits/rejected": -0.625502347946167, "logps/chosen": -0.020561806857585907, "logps/rejected": -2.392383098602295, "loss": 1.302, "nll_loss": 0.3254457712173462, "rewards/accuracies": 1.0, "rewards/chosen": -0.0020561807323247194, "rewards/margins": 0.2371821403503418, "rewards/rejected": -0.23923832178115845, "step": 6668 }, { "epoch": 4.6120331950207465, "grad_norm": 8.749216079711914, "learning_rate": 2.9933148916551405e-05, "log_odds_chosen": 9.878754615783691, "log_odds_ratio": -0.00017952104099094868, "logits/chosen": -0.6044222116470337, "logits/rejected": -0.6368842124938965, "logps/chosen": -0.0011460966197773814, "logps/rejected": -1.6823590993881226, "loss": 0.9386, "nll_loss": 0.2346310317516327, "rewards/accuracies": 1.0, "rewards/chosen": -0.00011460966197773814, "rewards/margins": 0.1681213080883026, "rewards/rejected": -0.1682359278202057, "step": 6669 }, { "epoch": 4.612724757952973, "grad_norm": 8.973983764648438, "learning_rate": 2.9929306900261255e-05, "log_odds_chosen": 10.453866958618164, "log_odds_ratio": -0.00011220773740205914, "logits/chosen": -0.29473161697387695, "logits/rejected": -0.3772168755531311, "logps/chosen": -0.0007200624095275998, "logps/rejected": -1.8716492652893066, "loss": 1.2924, "nll_loss": 0.32308429479599, "rewards/accuracies": 1.0, "rewards/chosen": -7.200624531833455e-05, "rewards/margins": 0.18709293007850647, "rewards/rejected": -0.18716493248939514, "step": 6670 }, { "epoch": 4.6134163208852, "grad_norm": 11.71799373626709, "learning_rate": 2.992546488397111e-05, "log_odds_chosen": 11.238321304321289, "log_odds_ratio": -1.8379301764070988e-05, "logits/chosen": -0.7530157566070557, "logits/rejected": -0.7985197901725769, "logps/chosen": -7.886964158387855e-05, "logps/rejected": -1.859510898590088, "loss": 0.8885, "nll_loss": 0.22211512923240662, "rewards/accuracies": 1.0, "rewards/chosen": -7.886964340286795e-06, "rewards/margins": 0.18594320118427277, "rewards/rejected": -0.1859510838985443, "step": 6671 }, { "epoch": 4.614107883817427, "grad_norm": 11.972702980041504, "learning_rate": 2.992162286768096e-05, "log_odds_chosen": 10.402179718017578, "log_odds_ratio": -4.1663435695227236e-05, "logits/chosen": -0.7485939860343933, "logits/rejected": -0.8353657126426697, "logps/chosen": -0.0001987726427614689, "logps/rejected": -1.8271870613098145, "loss": 0.9217, "nll_loss": 0.23042625188827515, "rewards/accuracies": 1.0, "rewards/chosen": -1.987726500374265e-05, "rewards/margins": 0.1826988309621811, "rewards/rejected": -0.18271872401237488, "step": 6672 }, { "epoch": 4.614799446749654, "grad_norm": 6.361045837402344, "learning_rate": 2.991778085139081e-05, "log_odds_chosen": 9.737350463867188, "log_odds_ratio": -0.0010446513770148158, "logits/chosen": -0.6925402283668518, "logits/rejected": -0.7096436023712158, "logps/chosen": -0.0007525760447606444, "logps/rejected": -1.0624769926071167, "loss": 1.0312, "nll_loss": 0.2577068507671356, "rewards/accuracies": 1.0, "rewards/chosen": -7.52576015656814e-05, "rewards/margins": 0.10617244243621826, "rewards/rejected": -0.10624770075082779, "step": 6673 }, { "epoch": 4.615491009681881, "grad_norm": 8.369453430175781, "learning_rate": 2.9913938835100664e-05, "log_odds_chosen": 9.764873504638672, "log_odds_ratio": -0.0006577305030077696, "logits/chosen": -0.2709742486476898, "logits/rejected": -0.31986692547798157, "logps/chosen": -0.001359713263809681, "logps/rejected": -1.6538102626800537, "loss": 0.9001, "nll_loss": 0.22496314346790314, "rewards/accuracies": 1.0, "rewards/chosen": -0.000135971320560202, "rewards/margins": 0.16524508595466614, "rewards/rejected": -0.1653810441493988, "step": 6674 }, { "epoch": 4.6161825726141075, "grad_norm": 9.031880378723145, "learning_rate": 2.9910096818810513e-05, "log_odds_chosen": 11.301053047180176, "log_odds_ratio": -1.9644532585516572e-05, "logits/chosen": -0.4663847088813782, "logits/rejected": -0.5085259079933167, "logps/chosen": -0.0001327487698290497, "logps/rejected": -2.331967830657959, "loss": 0.7171, "nll_loss": 0.1792832463979721, "rewards/accuracies": 1.0, "rewards/chosen": -1.327487643720815e-05, "rewards/margins": 0.23318353295326233, "rewards/rejected": -0.23319679498672485, "step": 6675 }, { "epoch": 4.616874135546334, "grad_norm": 6.838178634643555, "learning_rate": 2.9906254802520362e-05, "log_odds_chosen": 10.975125312805176, "log_odds_ratio": -0.0002458269300404936, "logits/chosen": -0.6259713172912598, "logits/rejected": -0.676510214805603, "logps/chosen": -0.0003355609951540828, "logps/rejected": -2.4494009017944336, "loss": 0.725, "nll_loss": 0.18123364448547363, "rewards/accuracies": 1.0, "rewards/chosen": -3.3556098060216755e-05, "rewards/margins": 0.24490654468536377, "rewards/rejected": -0.2449401170015335, "step": 6676 }, { "epoch": 4.617565698478561, "grad_norm": 5.756682872772217, "learning_rate": 2.9902412786230215e-05, "log_odds_chosen": 10.777154922485352, "log_odds_ratio": -7.546051347162575e-05, "logits/chosen": -0.6829020380973816, "logits/rejected": -0.7713437080383301, "logps/chosen": -0.00017902077524922788, "logps/rejected": -2.108368158340454, "loss": 0.6858, "nll_loss": 0.17143920063972473, "rewards/accuracies": 1.0, "rewards/chosen": -1.790207898011431e-05, "rewards/margins": 0.21081890165805817, "rewards/rejected": -0.21083681285381317, "step": 6677 }, { "epoch": 4.618257261410788, "grad_norm": 11.32691764831543, "learning_rate": 2.9898570769940064e-05, "log_odds_chosen": 10.667409896850586, "log_odds_ratio": -2.6573019567877054e-05, "logits/chosen": -0.674767255783081, "logits/rejected": -0.7759418487548828, "logps/chosen": -0.00016158992366399616, "logps/rejected": -1.9589648246765137, "loss": 1.1013, "nll_loss": 0.27531570196151733, "rewards/accuracies": 1.0, "rewards/chosen": -1.6158992366399616e-05, "rewards/margins": 0.19588032364845276, "rewards/rejected": -0.1958964765071869, "step": 6678 }, { "epoch": 4.618948824343015, "grad_norm": 13.166940689086914, "learning_rate": 2.9894728753649913e-05, "log_odds_chosen": 10.21392822265625, "log_odds_ratio": -0.000317515863571316, "logits/chosen": -0.8075710535049438, "logits/rejected": -0.8033132553100586, "logps/chosen": -0.003124582814052701, "logps/rejected": -2.2208189964294434, "loss": 1.0702, "nll_loss": 0.2675139307975769, "rewards/accuracies": 1.0, "rewards/chosen": -0.0003124582872260362, "rewards/margins": 0.22176943719387054, "rewards/rejected": -0.22208191454410553, "step": 6679 }, { "epoch": 4.619640387275242, "grad_norm": 9.520648956298828, "learning_rate": 2.989088673735977e-05, "log_odds_chosen": 10.81861686706543, "log_odds_ratio": -6.064638728275895e-05, "logits/chosen": -0.5004931092262268, "logits/rejected": -0.6433913707733154, "logps/chosen": -0.0003089347155764699, "logps/rejected": -2.1328179836273193, "loss": 0.746, "nll_loss": 0.1864846795797348, "rewards/accuracies": 1.0, "rewards/chosen": -3.089347228524275e-05, "rewards/margins": 0.21325090527534485, "rewards/rejected": -0.2132818102836609, "step": 6680 }, { "epoch": 4.6203319502074685, "grad_norm": 12.623364448547363, "learning_rate": 2.9887044721069618e-05, "log_odds_chosen": 8.48847770690918, "log_odds_ratio": -0.19390060007572174, "logits/chosen": -0.7172576189041138, "logits/rejected": -0.793793261051178, "logps/chosen": -0.027234375476837158, "logps/rejected": -1.615464448928833, "loss": 0.7798, "nll_loss": 0.17555385828018188, "rewards/accuracies": 0.875, "rewards/chosen": -0.002723437501117587, "rewards/margins": 0.15882301330566406, "rewards/rejected": -0.16154645383358002, "step": 6681 }, { "epoch": 4.621023513139695, "grad_norm": 16.15542221069336, "learning_rate": 2.9883202704779467e-05, "log_odds_chosen": 10.003597259521484, "log_odds_ratio": -0.0015024865278974175, "logits/chosen": -0.5204415917396545, "logits/rejected": -0.625032901763916, "logps/chosen": -0.0031819252762943506, "logps/rejected": -2.1989731788635254, "loss": 0.9172, "nll_loss": 0.2291489839553833, "rewards/accuracies": 1.0, "rewards/chosen": -0.00031819252762943506, "rewards/margins": 0.21957910060882568, "rewards/rejected": -0.2198973149061203, "step": 6682 }, { "epoch": 4.621715076071922, "grad_norm": 15.578567504882812, "learning_rate": 2.9879360688489323e-05, "log_odds_chosen": 11.986330032348633, "log_odds_ratio": -2.9383349101408385e-05, "logits/chosen": -0.6631225943565369, "logits/rejected": -0.7706457376480103, "logps/chosen": -0.0003626207762863487, "logps/rejected": -3.311293125152588, "loss": 1.02, "nll_loss": 0.2550080120563507, "rewards/accuracies": 1.0, "rewards/chosen": -3.626207762863487e-05, "rewards/margins": 0.3310930132865906, "rewards/rejected": -0.3311293125152588, "step": 6683 }, { "epoch": 4.622406639004149, "grad_norm": 10.762539863586426, "learning_rate": 2.9875518672199172e-05, "log_odds_chosen": 9.830475807189941, "log_odds_ratio": -0.0014509977772831917, "logits/chosen": -0.36546534299850464, "logits/rejected": -0.35679420828819275, "logps/chosen": -0.002010797383263707, "logps/rejected": -1.9924137592315674, "loss": 0.9966, "nll_loss": 0.2490108609199524, "rewards/accuracies": 1.0, "rewards/chosen": -0.00020107974705751985, "rewards/margins": 0.19904029369354248, "rewards/rejected": -0.19924138486385345, "step": 6684 }, { "epoch": 4.623098201936376, "grad_norm": 8.75740909576416, "learning_rate": 2.987167665590902e-05, "log_odds_chosen": 9.379545211791992, "log_odds_ratio": -0.0076197548769414425, "logits/chosen": -0.4256450831890106, "logits/rejected": -0.4568532109260559, "logps/chosen": -0.0030377001967281103, "logps/rejected": -1.3667492866516113, "loss": 0.8778, "nll_loss": 0.21868903934955597, "rewards/accuracies": 1.0, "rewards/chosen": -0.00030377000803127885, "rewards/margins": 0.13637115061283112, "rewards/rejected": -0.1366749256849289, "step": 6685 }, { "epoch": 4.623789764868603, "grad_norm": 14.677732467651367, "learning_rate": 2.9867834639618873e-05, "log_odds_chosen": 10.845108032226562, "log_odds_ratio": -0.001355968415737152, "logits/chosen": -0.3229242265224457, "logits/rejected": -0.4597609043121338, "logps/chosen": -0.00671126926317811, "logps/rejected": -2.8743770122528076, "loss": 0.995, "nll_loss": 0.2486252337694168, "rewards/accuracies": 1.0, "rewards/chosen": -0.0006711269379593432, "rewards/margins": 0.28676658868789673, "rewards/rejected": -0.2874377369880676, "step": 6686 }, { "epoch": 4.624481327800829, "grad_norm": 15.354605674743652, "learning_rate": 2.9863992623328722e-05, "log_odds_chosen": 9.914780616760254, "log_odds_ratio": -0.0001268537453142926, "logits/chosen": -0.42282068729400635, "logits/rejected": -0.5017947554588318, "logps/chosen": -0.0005255751311779022, "logps/rejected": -1.6391267776489258, "loss": 0.8269, "nll_loss": 0.20670977234840393, "rewards/accuracies": 1.0, "rewards/chosen": -5.255751602817327e-05, "rewards/margins": 0.16386012732982635, "rewards/rejected": -0.16391268372535706, "step": 6687 }, { "epoch": 4.625172890733056, "grad_norm": 14.23159408569336, "learning_rate": 2.986015060703857e-05, "log_odds_chosen": 9.756415367126465, "log_odds_ratio": -0.00016371147648897022, "logits/chosen": -0.33216923475265503, "logits/rejected": -0.42489707469940186, "logps/chosen": -0.0005953738000243902, "logps/rejected": -1.8340036869049072, "loss": 0.952, "nll_loss": 0.23797619342803955, "rewards/accuracies": 1.0, "rewards/chosen": -5.95373785472475e-05, "rewards/margins": 0.18334081768989563, "rewards/rejected": -0.18340037763118744, "step": 6688 }, { "epoch": 4.625864453665283, "grad_norm": 8.754727363586426, "learning_rate": 2.9856308590748427e-05, "log_odds_chosen": 10.663585662841797, "log_odds_ratio": -0.00016630203754175454, "logits/chosen": -0.46455100178718567, "logits/rejected": -0.48470622301101685, "logps/chosen": -0.00017484716954641044, "logps/rejected": -1.9499876499176025, "loss": 0.7495, "nll_loss": 0.187362939119339, "rewards/accuracies": 1.0, "rewards/chosen": -1.7484717318438925e-05, "rewards/margins": 0.19498127698898315, "rewards/rejected": -0.19499877095222473, "step": 6689 }, { "epoch": 4.62655601659751, "grad_norm": 16.4685115814209, "learning_rate": 2.9852466574458276e-05, "log_odds_chosen": 9.15806770324707, "log_odds_ratio": -0.31617483496665955, "logits/chosen": -0.44033369421958923, "logits/rejected": -0.5191015601158142, "logps/chosen": -0.040293145924806595, "logps/rejected": -2.562631130218506, "loss": 1.799, "nll_loss": 0.418133020401001, "rewards/accuracies": 0.875, "rewards/chosen": -0.004029314499348402, "rewards/margins": 0.2522338032722473, "rewards/rejected": -0.2562631368637085, "step": 6690 }, { "epoch": 4.627247579529737, "grad_norm": 13.75235366821289, "learning_rate": 2.9848624558168125e-05, "log_odds_chosen": 10.918336868286133, "log_odds_ratio": -0.00023003183014225215, "logits/chosen": -0.15504327416419983, "logits/rejected": -0.15456047654151917, "logps/chosen": -0.001960280817002058, "logps/rejected": -2.830104351043701, "loss": 1.0103, "nll_loss": 0.25255751609802246, "rewards/accuracies": 1.0, "rewards/chosen": -0.00019602806423790753, "rewards/margins": 0.2828144133090973, "rewards/rejected": -0.28301042318344116, "step": 6691 }, { "epoch": 4.627939142461964, "grad_norm": 6.367249965667725, "learning_rate": 2.984478254187798e-05, "log_odds_chosen": 10.774747848510742, "log_odds_ratio": -0.00012701345258392394, "logits/chosen": -0.5788582563400269, "logits/rejected": -0.6444706916809082, "logps/chosen": -0.00046742905396968126, "logps/rejected": -2.019412040710449, "loss": 1.008, "nll_loss": 0.25199609994888306, "rewards/accuracies": 1.0, "rewards/chosen": -4.6742905396968126e-05, "rewards/margins": 0.20189446210861206, "rewards/rejected": -0.20194122195243835, "step": 6692 }, { "epoch": 4.62863070539419, "grad_norm": 11.876404762268066, "learning_rate": 2.984094052558783e-05, "log_odds_chosen": 9.97553539276123, "log_odds_ratio": -0.0002181089366786182, "logits/chosen": 0.052924394607543945, "logits/rejected": -0.01140899583697319, "logps/chosen": -0.0006699280929751694, "logps/rejected": -2.2521538734436035, "loss": 1.353, "nll_loss": 0.33823224902153015, "rewards/accuracies": 1.0, "rewards/chosen": -6.699281220789999e-05, "rewards/margins": 0.22514837980270386, "rewards/rejected": -0.2252153903245926, "step": 6693 }, { "epoch": 4.629322268326417, "grad_norm": 8.747437477111816, "learning_rate": 2.983709850929768e-05, "log_odds_chosen": 9.873016357421875, "log_odds_ratio": -0.0003475734847597778, "logits/chosen": -0.8836798071861267, "logits/rejected": -0.9364238977432251, "logps/chosen": -0.0031481364276260138, "logps/rejected": -2.0192229747772217, "loss": 1.5869, "nll_loss": 0.39668264985084534, "rewards/accuracies": 1.0, "rewards/chosen": -0.0003148136311210692, "rewards/margins": 0.20160748064517975, "rewards/rejected": -0.20192229747772217, "step": 6694 }, { "epoch": 4.630013831258644, "grad_norm": 14.169708251953125, "learning_rate": 2.9833256493007532e-05, "log_odds_chosen": 10.764555931091309, "log_odds_ratio": -2.6315743525628932e-05, "logits/chosen": -0.5093426704406738, "logits/rejected": -0.5909894108772278, "logps/chosen": -0.0002890737378038466, "logps/rejected": -2.246802806854248, "loss": 0.9648, "nll_loss": 0.24118672311306, "rewards/accuracies": 1.0, "rewards/chosen": -2.890737414418254e-05, "rewards/margins": 0.22465135157108307, "rewards/rejected": -0.22468025982379913, "step": 6695 }, { "epoch": 4.630705394190871, "grad_norm": 10.401350975036621, "learning_rate": 2.982941447671738e-05, "log_odds_chosen": 8.530662536621094, "log_odds_ratio": -0.04185483232140541, "logits/chosen": -0.09511050581932068, "logits/rejected": -0.1275017112493515, "logps/chosen": -0.013998882845044136, "logps/rejected": -1.9299473762512207, "loss": 0.9557, "nll_loss": 0.23473045229911804, "rewards/accuracies": 1.0, "rewards/chosen": -0.0013998884242027998, "rewards/margins": 0.19159486889839172, "rewards/rejected": -0.19299474358558655, "step": 6696 }, { "epoch": 4.631396957123098, "grad_norm": 12.37734603881836, "learning_rate": 2.982557246042723e-05, "log_odds_chosen": 9.830785751342773, "log_odds_ratio": -0.0006041385349817574, "logits/chosen": -0.5130643844604492, "logits/rejected": -0.6958015561103821, "logps/chosen": -0.0009830754715949297, "logps/rejected": -1.7544587850570679, "loss": 1.1146, "nll_loss": 0.27858293056488037, "rewards/accuracies": 1.0, "rewards/chosen": -9.830755152506754e-05, "rewards/margins": 0.17534756660461426, "rewards/rejected": -0.17544588446617126, "step": 6697 }, { "epoch": 4.632088520055325, "grad_norm": 14.123680114746094, "learning_rate": 2.9821730444137086e-05, "log_odds_chosen": 10.47813606262207, "log_odds_ratio": -5.120155037730001e-05, "logits/chosen": -0.6141031980514526, "logits/rejected": -0.6569243669509888, "logps/chosen": -0.00016339441935997456, "logps/rejected": -1.7974165678024292, "loss": 1.1983, "nll_loss": 0.29958146810531616, "rewards/accuracies": 1.0, "rewards/chosen": -1.6339441572199576e-05, "rewards/margins": 0.17972531914710999, "rewards/rejected": -0.17974165081977844, "step": 6698 }, { "epoch": 4.632780082987551, "grad_norm": 9.06856918334961, "learning_rate": 2.9817888427846935e-05, "log_odds_chosen": 10.46054458618164, "log_odds_ratio": -7.147344149416313e-05, "logits/chosen": -0.7953734397888184, "logits/rejected": -0.850338339805603, "logps/chosen": -0.0003077928558923304, "logps/rejected": -1.904348611831665, "loss": 1.7438, "nll_loss": 0.43594974279403687, "rewards/accuracies": 1.0, "rewards/chosen": -3.0779283406445757e-05, "rewards/margins": 0.19040407240390778, "rewards/rejected": -0.19043487310409546, "step": 6699 }, { "epoch": 4.633471645919778, "grad_norm": 8.853693008422852, "learning_rate": 2.9814046411556784e-05, "log_odds_chosen": 9.68885326385498, "log_odds_ratio": -0.0001513104361947626, "logits/chosen": -0.6373350620269775, "logits/rejected": -0.6955586671829224, "logps/chosen": -0.008956330828368664, "logps/rejected": -1.9807580709457397, "loss": 1.4056, "nll_loss": 0.35137683153152466, "rewards/accuracies": 1.0, "rewards/chosen": -0.0008956331294029951, "rewards/margins": 0.19718018174171448, "rewards/rejected": -0.1980758011341095, "step": 6700 }, { "epoch": 4.634163208852005, "grad_norm": 6.464890480041504, "learning_rate": 2.981020439526664e-05, "log_odds_chosen": 9.772090911865234, "log_odds_ratio": -0.000506377371493727, "logits/chosen": -0.8277691602706909, "logits/rejected": -0.8677041530609131, "logps/chosen": -0.007026453502476215, "logps/rejected": -1.6709896326065063, "loss": 0.7766, "nll_loss": 0.19409973919391632, "rewards/accuracies": 1.0, "rewards/chosen": -0.0007026453968137503, "rewards/margins": 0.16639631986618042, "rewards/rejected": -0.1670989692211151, "step": 6701 }, { "epoch": 4.634854771784232, "grad_norm": 7.539913654327393, "learning_rate": 2.980636237897649e-05, "log_odds_chosen": 10.478494644165039, "log_odds_ratio": -0.0006916585261933506, "logits/chosen": -1.0056345462799072, "logits/rejected": -0.9335618019104004, "logps/chosen": -0.0024261826183646917, "logps/rejected": -2.136960029602051, "loss": 1.0764, "nll_loss": 0.26902419328689575, "rewards/accuracies": 1.0, "rewards/chosen": -0.00024261824728455395, "rewards/margins": 0.21345339715480804, "rewards/rejected": -0.21369600296020508, "step": 6702 }, { "epoch": 4.635546334716459, "grad_norm": 12.231609344482422, "learning_rate": 2.9802520362686338e-05, "log_odds_chosen": 10.67332649230957, "log_odds_ratio": -3.185367677360773e-05, "logits/chosen": -0.632622241973877, "logits/rejected": -0.6896726489067078, "logps/chosen": -0.00011290707334410399, "logps/rejected": -1.5393511056900024, "loss": 1.2231, "nll_loss": 0.30576059222221375, "rewards/accuracies": 1.0, "rewards/chosen": -1.1290708243905101e-05, "rewards/margins": 0.15392382442951202, "rewards/rejected": -0.15393511950969696, "step": 6703 }, { "epoch": 4.6362378976486855, "grad_norm": 9.596137046813965, "learning_rate": 2.979867834639619e-05, "log_odds_chosen": 10.572694778442383, "log_odds_ratio": -9.505627531325445e-05, "logits/chosen": -0.7113038897514343, "logits/rejected": -0.7726638317108154, "logps/chosen": -0.0003162118955515325, "logps/rejected": -2.251645088195801, "loss": 1.2044, "nll_loss": 0.30109548568725586, "rewards/accuracies": 1.0, "rewards/chosen": -3.162118809996173e-05, "rewards/margins": 0.22513288259506226, "rewards/rejected": -0.2251645028591156, "step": 6704 }, { "epoch": 4.636929460580912, "grad_norm": 7.570445537567139, "learning_rate": 2.979483633010604e-05, "log_odds_chosen": 10.802045822143555, "log_odds_ratio": -4.337707287049852e-05, "logits/chosen": -0.5815101861953735, "logits/rejected": -0.5723499059677124, "logps/chosen": -0.00032818655017763376, "logps/rejected": -2.4690756797790527, "loss": 1.1733, "nll_loss": 0.29332929849624634, "rewards/accuracies": 1.0, "rewards/chosen": -3.2818654290167615e-05, "rewards/margins": 0.24687474966049194, "rewards/rejected": -0.2469075620174408, "step": 6705 }, { "epoch": 4.637621023513139, "grad_norm": 11.220012664794922, "learning_rate": 2.9790994313815888e-05, "log_odds_chosen": 10.171663284301758, "log_odds_ratio": -0.00024045373720582575, "logits/chosen": -0.6686966419219971, "logits/rejected": -0.7283098697662354, "logps/chosen": -0.00045101437717676163, "logps/rejected": -1.970219612121582, "loss": 1.1927, "nll_loss": 0.29814326763153076, "rewards/accuracies": 1.0, "rewards/chosen": -4.5101438445271924e-05, "rewards/margins": 0.19697685539722443, "rewards/rejected": -0.1970219612121582, "step": 6706 }, { "epoch": 4.638312586445366, "grad_norm": 10.200971603393555, "learning_rate": 2.9787152297525744e-05, "log_odds_chosen": 9.55989933013916, "log_odds_ratio": -0.001099316868931055, "logits/chosen": -0.6456737518310547, "logits/rejected": -0.6532997488975525, "logps/chosen": -0.0012371373595669866, "logps/rejected": -2.002163887023926, "loss": 1.0476, "nll_loss": 0.26179665327072144, "rewards/accuracies": 1.0, "rewards/chosen": -0.0001237137330463156, "rewards/margins": 0.20009265840053558, "rewards/rejected": -0.2002163827419281, "step": 6707 }, { "epoch": 4.639004149377593, "grad_norm": 7.387045860290527, "learning_rate": 2.9783310281235593e-05, "log_odds_chosen": 9.868995666503906, "log_odds_ratio": -0.00022995221661403775, "logits/chosen": -0.5058600306510925, "logits/rejected": -0.5563660264015198, "logps/chosen": -0.0004528115096036345, "logps/rejected": -2.0728092193603516, "loss": 0.7037, "nll_loss": 0.17589400708675385, "rewards/accuracies": 1.0, "rewards/chosen": -4.528115096036345e-05, "rewards/margins": 0.2072356641292572, "rewards/rejected": -0.2072809487581253, "step": 6708 }, { "epoch": 4.63969571230982, "grad_norm": 6.884265899658203, "learning_rate": 2.9779468264945442e-05, "log_odds_chosen": 9.497474670410156, "log_odds_ratio": -0.006152651272714138, "logits/chosen": -0.453260600566864, "logits/rejected": -0.4585949778556824, "logps/chosen": -0.015122218057513237, "logps/rejected": -2.5086190700531006, "loss": 1.8145, "nll_loss": 0.4530009627342224, "rewards/accuracies": 1.0, "rewards/chosen": -0.001512221759185195, "rewards/margins": 0.2493496686220169, "rewards/rejected": -0.25086188316345215, "step": 6709 }, { "epoch": 4.6403872752420465, "grad_norm": 5.996611595153809, "learning_rate": 2.9775626248655298e-05, "log_odds_chosen": 9.754794120788574, "log_odds_ratio": -0.0006676408229395747, "logits/chosen": -0.34979021549224854, "logits/rejected": -0.3468340039253235, "logps/chosen": -0.0010450142435729504, "logps/rejected": -2.0860064029693604, "loss": 1.5725, "nll_loss": 0.39306214451789856, "rewards/accuracies": 1.0, "rewards/chosen": -0.00010450142144691199, "rewards/margins": 0.20849615335464478, "rewards/rejected": -0.20860064029693604, "step": 6710 }, { "epoch": 4.641078838174274, "grad_norm": 15.907814025878906, "learning_rate": 2.9771784232365147e-05, "log_odds_chosen": 10.96168327331543, "log_odds_ratio": -6.310870230663568e-05, "logits/chosen": -0.27718105912208557, "logits/rejected": -0.4333638846874237, "logps/chosen": -0.00034265409340150654, "logps/rejected": -2.578684091567993, "loss": 0.8577, "nll_loss": 0.21442833542823792, "rewards/accuracies": 1.0, "rewards/chosen": -3.426541297812946e-05, "rewards/margins": 0.25783413648605347, "rewards/rejected": -0.2578684091567993, "step": 6711 }, { "epoch": 4.641770401106501, "grad_norm": 6.910033226013184, "learning_rate": 2.9767942216074996e-05, "log_odds_chosen": 10.08421516418457, "log_odds_ratio": -0.00010271971405018121, "logits/chosen": -0.7868772745132446, "logits/rejected": -0.9348423480987549, "logps/chosen": -0.00020229278015904129, "logps/rejected": -1.6391842365264893, "loss": 0.7629, "nll_loss": 0.19071084260940552, "rewards/accuracies": 1.0, "rewards/chosen": -2.0229277652106248e-05, "rewards/margins": 0.16389819979667664, "rewards/rejected": -0.1639184206724167, "step": 6712 }, { "epoch": 4.642461964038728, "grad_norm": 10.08545970916748, "learning_rate": 2.976410019978485e-05, "log_odds_chosen": 11.719478607177734, "log_odds_ratio": -2.821564339683391e-05, "logits/chosen": -0.612644612789154, "logits/rejected": -0.666480541229248, "logps/chosen": -0.00043208003626205027, "logps/rejected": -3.3896830081939697, "loss": 1.0204, "nll_loss": 0.25510790944099426, "rewards/accuracies": 1.0, "rewards/chosen": -4.320800144341774e-05, "rewards/margins": 0.3389251232147217, "rewards/rejected": -0.33896830677986145, "step": 6713 }, { "epoch": 4.643153526970955, "grad_norm": 8.085092544555664, "learning_rate": 2.9760258183494698e-05, "log_odds_chosen": 9.24812126159668, "log_odds_ratio": -0.00013786503404844552, "logits/chosen": -0.44048011302948, "logits/rejected": -0.4726550877094269, "logps/chosen": -0.0008194476831704378, "logps/rejected": -1.8271818161010742, "loss": 1.387, "nll_loss": 0.34672486782073975, "rewards/accuracies": 1.0, "rewards/chosen": -8.194477413780987e-05, "rewards/margins": 0.18263621628284454, "rewards/rejected": -0.1827181577682495, "step": 6714 }, { "epoch": 4.643845089903182, "grad_norm": 15.791669845581055, "learning_rate": 2.9756416167204547e-05, "log_odds_chosen": 10.043708801269531, "log_odds_ratio": -0.00042942730942741036, "logits/chosen": -0.10093079507350922, "logits/rejected": -0.1990877091884613, "logps/chosen": -0.0007182598346844316, "logps/rejected": -1.9333560466766357, "loss": 0.9672, "nll_loss": 0.24174702167510986, "rewards/accuracies": 1.0, "rewards/chosen": -7.182598346844316e-05, "rewards/margins": 0.19326378405094147, "rewards/rejected": -0.1933356076478958, "step": 6715 }, { "epoch": 4.644536652835408, "grad_norm": 20.379234313964844, "learning_rate": 2.9752574150914403e-05, "log_odds_chosen": 10.522616386413574, "log_odds_ratio": -0.0002614251570776105, "logits/chosen": -0.4869431257247925, "logits/rejected": -0.6128153800964355, "logps/chosen": -0.00046562464558519423, "logps/rejected": -2.1855411529541016, "loss": 0.9109, "nll_loss": 0.2276930809020996, "rewards/accuracies": 1.0, "rewards/chosen": -4.6562465286115184e-05, "rewards/margins": 0.2185075581073761, "rewards/rejected": -0.21855413913726807, "step": 6716 }, { "epoch": 4.645228215767635, "grad_norm": 8.88293743133545, "learning_rate": 2.974873213462425e-05, "log_odds_chosen": 10.745494842529297, "log_odds_ratio": -0.0006010960787534714, "logits/chosen": -0.540441632270813, "logits/rejected": -0.6040372252464294, "logps/chosen": -0.0009351319749839604, "logps/rejected": -1.978389859199524, "loss": 0.9779, "nll_loss": 0.24442104995250702, "rewards/accuracies": 1.0, "rewards/chosen": -9.351320477435365e-05, "rewards/margins": 0.19774548709392548, "rewards/rejected": -0.19783899188041687, "step": 6717 }, { "epoch": 4.645919778699862, "grad_norm": 9.662896156311035, "learning_rate": 2.97448901183341e-05, "log_odds_chosen": 10.538196563720703, "log_odds_ratio": -0.03499438986182213, "logits/chosen": -0.39672914147377014, "logits/rejected": -0.4780488908290863, "logps/chosen": -0.007500995881855488, "logps/rejected": -3.2057886123657227, "loss": 1.1877, "nll_loss": 0.29342931509017944, "rewards/accuracies": 1.0, "rewards/chosen": -0.000750099599827081, "rewards/margins": 0.3198287785053253, "rewards/rejected": -0.3205788731575012, "step": 6718 }, { "epoch": 4.646611341632089, "grad_norm": 12.759986877441406, "learning_rate": 2.9741048102043957e-05, "log_odds_chosen": 9.300508499145508, "log_odds_ratio": -0.0013926469255238771, "logits/chosen": -0.0835866630077362, "logits/rejected": -0.16006775200366974, "logps/chosen": -0.002518139313906431, "logps/rejected": -2.1676414012908936, "loss": 1.5449, "nll_loss": 0.3860914707183838, "rewards/accuracies": 1.0, "rewards/chosen": -0.00025181396631523967, "rewards/margins": 0.21651235222816467, "rewards/rejected": -0.2167641669511795, "step": 6719 }, { "epoch": 4.647302904564316, "grad_norm": 14.2500581741333, "learning_rate": 2.9737206085753806e-05, "log_odds_chosen": 11.568772315979004, "log_odds_ratio": -1.765798151609488e-05, "logits/chosen": -0.4689595401287079, "logits/rejected": -0.598329484462738, "logps/chosen": -0.00032325286883860826, "logps/rejected": -3.025010585784912, "loss": 0.9417, "nll_loss": 0.23542800545692444, "rewards/accuracies": 1.0, "rewards/chosen": -3.232528979424387e-05, "rewards/margins": 0.30246874690055847, "rewards/rejected": -0.30250105261802673, "step": 6720 }, { "epoch": 4.6479944674965425, "grad_norm": 6.891107082366943, "learning_rate": 2.9733364069463655e-05, "log_odds_chosen": 10.009754180908203, "log_odds_ratio": -0.00011356735194567591, "logits/chosen": -1.004388689994812, "logits/rejected": -1.014186143875122, "logps/chosen": -0.0022962328512221575, "logps/rejected": -2.5089192390441895, "loss": 1.0871, "nll_loss": 0.2717720866203308, "rewards/accuracies": 1.0, "rewards/chosen": -0.0002296233142260462, "rewards/margins": 0.25066232681274414, "rewards/rejected": -0.25089192390441895, "step": 6721 }, { "epoch": 4.648686030428769, "grad_norm": 15.071685791015625, "learning_rate": 2.9729522053173507e-05, "log_odds_chosen": 10.099405288696289, "log_odds_ratio": -0.00013610887981485575, "logits/chosen": -0.996317982673645, "logits/rejected": -1.0054402351379395, "logps/chosen": -0.00043816506513394415, "logps/rejected": -1.9022955894470215, "loss": 1.911, "nll_loss": 0.47772806882858276, "rewards/accuracies": 1.0, "rewards/chosen": -4.381650796858594e-05, "rewards/margins": 0.19018574059009552, "rewards/rejected": -0.19022956490516663, "step": 6722 }, { "epoch": 4.649377593360996, "grad_norm": 10.62243938446045, "learning_rate": 2.9725680036883356e-05, "log_odds_chosen": 10.941905975341797, "log_odds_ratio": -3.1653813493903726e-05, "logits/chosen": -0.5696731209754944, "logits/rejected": -0.5938451886177063, "logps/chosen": -0.00010829799430212006, "logps/rejected": -1.8097453117370605, "loss": 0.9972, "nll_loss": 0.24928641319274902, "rewards/accuracies": 1.0, "rewards/chosen": -1.0829799066414125e-05, "rewards/margins": 0.1809636950492859, "rewards/rejected": -0.180974543094635, "step": 6723 }, { "epoch": 4.650069156293223, "grad_norm": 6.662535667419434, "learning_rate": 2.9721838020593205e-05, "log_odds_chosen": 9.740900993347168, "log_odds_ratio": -0.00012547167716547847, "logits/chosen": -0.6915889978408813, "logits/rejected": -0.6611911058425903, "logps/chosen": -0.000395122857298702, "logps/rejected": -1.763127088546753, "loss": 1.0219, "nll_loss": 0.2554568648338318, "rewards/accuracies": 1.0, "rewards/chosen": -3.9512287912657484e-05, "rewards/margins": 0.1762731969356537, "rewards/rejected": -0.17631271481513977, "step": 6724 }, { "epoch": 4.65076071922545, "grad_norm": 8.516170501708984, "learning_rate": 2.971799600430306e-05, "log_odds_chosen": 9.447857856750488, "log_odds_ratio": -0.0008436216157861054, "logits/chosen": -0.9329970479011536, "logits/rejected": -0.9244383573532104, "logps/chosen": -0.002910643583163619, "logps/rejected": -2.0838112831115723, "loss": 1.6259, "nll_loss": 0.40639549493789673, "rewards/accuracies": 1.0, "rewards/chosen": -0.0002910643524955958, "rewards/margins": 0.20809006690979004, "rewards/rejected": -0.20838113129138947, "step": 6725 }, { "epoch": 4.651452282157677, "grad_norm": 7.389032363891602, "learning_rate": 2.971415398801291e-05, "log_odds_chosen": 10.360387802124023, "log_odds_ratio": -4.4586155127035454e-05, "logits/chosen": -0.48345616459846497, "logits/rejected": -0.48519355058670044, "logps/chosen": -0.0008797052432782948, "logps/rejected": -2.5228095054626465, "loss": 0.9653, "nll_loss": 0.2413209080696106, "rewards/accuracies": 1.0, "rewards/chosen": -8.7970525783021e-05, "rewards/margins": 0.25219297409057617, "rewards/rejected": -0.25228095054626465, "step": 6726 }, { "epoch": 4.6521438450899035, "grad_norm": 10.816040992736816, "learning_rate": 2.971031197172276e-05, "log_odds_chosen": 11.398842811584473, "log_odds_ratio": -1.864444675447885e-05, "logits/chosen": -0.472339928150177, "logits/rejected": -0.501425564289093, "logps/chosen": -0.00012852560030296445, "logps/rejected": -2.451239585876465, "loss": 0.8833, "nll_loss": 0.22082111239433289, "rewards/accuracies": 1.0, "rewards/chosen": -1.2852560757892206e-05, "rewards/margins": 0.2451111227273941, "rewards/rejected": -0.2451239675283432, "step": 6727 }, { "epoch": 4.65283540802213, "grad_norm": 9.398346900939941, "learning_rate": 2.9706469955432615e-05, "log_odds_chosen": 9.201940536499023, "log_odds_ratio": -0.0012506047496572137, "logits/chosen": -0.456091046333313, "logits/rejected": -0.5010344982147217, "logps/chosen": -0.018052559345960617, "logps/rejected": -2.53879451751709, "loss": 1.0523, "nll_loss": 0.26295000314712524, "rewards/accuracies": 1.0, "rewards/chosen": -0.0018052560044452548, "rewards/margins": 0.2520741820335388, "rewards/rejected": -0.2538794279098511, "step": 6728 }, { "epoch": 4.653526970954357, "grad_norm": 9.349139213562012, "learning_rate": 2.9702627939142464e-05, "log_odds_chosen": 8.978045463562012, "log_odds_ratio": -0.0026159649714827538, "logits/chosen": -0.6020793914794922, "logits/rejected": -0.6840202808380127, "logps/chosen": -0.044595979154109955, "logps/rejected": -2.194462776184082, "loss": 1.3426, "nll_loss": 0.3353860378265381, "rewards/accuracies": 1.0, "rewards/chosen": -0.004459597636014223, "rewards/margins": 0.2149866819381714, "rewards/rejected": -0.2194463014602661, "step": 6729 }, { "epoch": 4.654218533886584, "grad_norm": 6.017029285430908, "learning_rate": 2.9698785922852313e-05, "log_odds_chosen": 10.089128494262695, "log_odds_ratio": -0.00041553491610102355, "logits/chosen": -0.7684400081634521, "logits/rejected": -0.7028689384460449, "logps/chosen": -0.00038414757000282407, "logps/rejected": -1.840576171875, "loss": 0.5227, "nll_loss": 0.13064205646514893, "rewards/accuracies": 1.0, "rewards/chosen": -3.841475700028241e-05, "rewards/margins": 0.18401920795440674, "rewards/rejected": -0.18405762314796448, "step": 6730 }, { "epoch": 4.654910096818811, "grad_norm": 22.103477478027344, "learning_rate": 2.9694943906562165e-05, "log_odds_chosen": 9.41515827178955, "log_odds_ratio": -0.23566730320453644, "logits/chosen": -0.9601256251335144, "logits/rejected": -1.0031406879425049, "logps/chosen": -0.037202395498752594, "logps/rejected": -2.241128444671631, "loss": 1.5142, "nll_loss": 0.3549814224243164, "rewards/accuracies": 0.875, "rewards/chosen": -0.003720239968970418, "rewards/margins": 0.2203925997018814, "rewards/rejected": -0.2241128385066986, "step": 6731 }, { "epoch": 4.655601659751038, "grad_norm": 8.17127799987793, "learning_rate": 2.9691101890272015e-05, "log_odds_chosen": 9.61920166015625, "log_odds_ratio": -0.0006140960031189024, "logits/chosen": -0.8896629810333252, "logits/rejected": -0.9282146692276001, "logps/chosen": -0.0010857881279662251, "logps/rejected": -1.5764262676239014, "loss": 0.6273, "nll_loss": 0.1567581444978714, "rewards/accuracies": 1.0, "rewards/chosen": -0.00010857881716219708, "rewards/margins": 0.15753406286239624, "rewards/rejected": -0.1576426476240158, "step": 6732 }, { "epoch": 4.6562932226832645, "grad_norm": 6.767904281616211, "learning_rate": 2.9687259873981864e-05, "log_odds_chosen": 11.021403312683105, "log_odds_ratio": -7.743245805613697e-05, "logits/chosen": -0.7251055836677551, "logits/rejected": -0.6987432837486267, "logps/chosen": -0.00013259478146210313, "logps/rejected": -1.8017305135726929, "loss": 0.5248, "nll_loss": 0.13118384778499603, "rewards/accuracies": 1.0, "rewards/chosen": -1.3259479601401836e-05, "rewards/margins": 0.1801597774028778, "rewards/rejected": -0.18017305433750153, "step": 6733 }, { "epoch": 4.656984785615491, "grad_norm": 14.025473594665527, "learning_rate": 2.968341785769172e-05, "log_odds_chosen": 9.692667007446289, "log_odds_ratio": -0.005310252774506807, "logits/chosen": -0.8718782067298889, "logits/rejected": -0.9011925458908081, "logps/chosen": -0.00242875749245286, "logps/rejected": -1.8067294359207153, "loss": 1.2001, "nll_loss": 0.2994995713233948, "rewards/accuracies": 1.0, "rewards/chosen": -0.0002428757434245199, "rewards/margins": 0.180430069565773, "rewards/rejected": -0.18067294359207153, "step": 6734 }, { "epoch": 4.657676348547718, "grad_norm": 10.400674819946289, "learning_rate": 2.967957584140157e-05, "log_odds_chosen": 10.835912704467773, "log_odds_ratio": -0.00011656482820399106, "logits/chosen": -0.751828134059906, "logits/rejected": -0.7273005247116089, "logps/chosen": -0.00023792957654222846, "logps/rejected": -2.1163973808288574, "loss": 1.4838, "nll_loss": 0.3709476590156555, "rewards/accuracies": 1.0, "rewards/chosen": -2.3792958018020727e-05, "rewards/margins": 0.21161596477031708, "rewards/rejected": -0.21163977682590485, "step": 6735 }, { "epoch": 4.658367911479945, "grad_norm": 9.04215145111084, "learning_rate": 2.9675733825111418e-05, "log_odds_chosen": 10.640592575073242, "log_odds_ratio": -6.056567144696601e-05, "logits/chosen": -0.47394564747810364, "logits/rejected": -0.48224952816963196, "logps/chosen": -0.00046629508142359555, "logps/rejected": -2.1928296089172363, "loss": 0.8759, "nll_loss": 0.21896211802959442, "rewards/accuracies": 1.0, "rewards/chosen": -4.662950959755108e-05, "rewards/margins": 0.2192363440990448, "rewards/rejected": -0.21928296983242035, "step": 6736 }, { "epoch": 4.659059474412172, "grad_norm": 10.252370834350586, "learning_rate": 2.9671891808821273e-05, "log_odds_chosen": 9.582907676696777, "log_odds_ratio": -0.0002878558880183846, "logits/chosen": -0.41817861795425415, "logits/rejected": -0.48817765712738037, "logps/chosen": -0.013663535937666893, "logps/rejected": -2.3412153720855713, "loss": 1.0591, "nll_loss": 0.2647481858730316, "rewards/accuracies": 1.0, "rewards/chosen": -0.0013663534773513675, "rewards/margins": 0.23275518417358398, "rewards/rejected": -0.23412156105041504, "step": 6737 }, { "epoch": 4.659751037344399, "grad_norm": 8.530871391296387, "learning_rate": 2.9668049792531122e-05, "log_odds_chosen": 10.306585311889648, "log_odds_ratio": -0.00010736883996287361, "logits/chosen": -0.975071907043457, "logits/rejected": -0.9711197018623352, "logps/chosen": -0.00044086261186748743, "logps/rejected": -1.8548561334609985, "loss": 0.894, "nll_loss": 0.22348177433013916, "rewards/accuracies": 1.0, "rewards/chosen": -4.408626045915298e-05, "rewards/margins": 0.1854415386915207, "rewards/rejected": -0.1854856312274933, "step": 6738 }, { "epoch": 4.6604426002766255, "grad_norm": 8.039578437805176, "learning_rate": 2.966420777624097e-05, "log_odds_chosen": 10.206424713134766, "log_odds_ratio": -0.00015417771646752954, "logits/chosen": -0.329103946685791, "logits/rejected": -0.362267404794693, "logps/chosen": -0.0006552881095558405, "logps/rejected": -2.4694182872772217, "loss": 0.8473, "nll_loss": 0.21181637048721313, "rewards/accuracies": 1.0, "rewards/chosen": -6.552880950039253e-05, "rewards/margins": 0.2468762993812561, "rewards/rejected": -0.24694183468818665, "step": 6739 }, { "epoch": 4.661134163208852, "grad_norm": 25.52229118347168, "learning_rate": 2.9660365759950824e-05, "log_odds_chosen": 9.05695629119873, "log_odds_ratio": -0.016237886622548103, "logits/chosen": -0.40049076080322266, "logits/rejected": -0.47549617290496826, "logps/chosen": -0.036526232957839966, "logps/rejected": -1.8961656093597412, "loss": 1.2379, "nll_loss": 0.3078601658344269, "rewards/accuracies": 1.0, "rewards/chosen": -0.0036526236217468977, "rewards/margins": 0.1859639585018158, "rewards/rejected": -0.18961656093597412, "step": 6740 }, { "epoch": 4.661825726141079, "grad_norm": 9.990700721740723, "learning_rate": 2.9656523743660673e-05, "log_odds_chosen": 11.605825424194336, "log_odds_ratio": -2.0440449588932097e-05, "logits/chosen": -0.7656416296958923, "logits/rejected": -0.8104311227798462, "logps/chosen": -0.00011805635585915297, "logps/rejected": -2.349353075027466, "loss": 0.9251, "nll_loss": 0.2312828004360199, "rewards/accuracies": 1.0, "rewards/chosen": -1.1805635949713178e-05, "rewards/margins": 0.23492351174354553, "rewards/rejected": -0.23493532836437225, "step": 6741 }, { "epoch": 4.662517289073306, "grad_norm": 13.435784339904785, "learning_rate": 2.9652681727370525e-05, "log_odds_chosen": 10.331426620483398, "log_odds_ratio": -0.0005047993618063629, "logits/chosen": -0.8178121447563171, "logits/rejected": -0.8129633665084839, "logps/chosen": -0.0013931768480688334, "logps/rejected": -2.429903984069824, "loss": 1.2838, "nll_loss": 0.3208959102630615, "rewards/accuracies": 1.0, "rewards/chosen": -0.00013931769353803247, "rewards/margins": 0.2428511083126068, "rewards/rejected": -0.2429904341697693, "step": 6742 }, { "epoch": 4.663208852005533, "grad_norm": 15.896458625793457, "learning_rate": 2.9648839711080378e-05, "log_odds_chosen": 9.113409996032715, "log_odds_ratio": -0.00859312154352665, "logits/chosen": -0.47966164350509644, "logits/rejected": -0.512639582157135, "logps/chosen": -0.004731173627078533, "logps/rejected": -2.3525338172912598, "loss": 1.606, "nll_loss": 0.4006463289260864, "rewards/accuracies": 1.0, "rewards/chosen": -0.00047311733942478895, "rewards/margins": 0.23478025197982788, "rewards/rejected": -0.23525336384773254, "step": 6743 }, { "epoch": 4.66390041493776, "grad_norm": 22.674076080322266, "learning_rate": 2.9644997694790227e-05, "log_odds_chosen": 10.03700065612793, "log_odds_ratio": -0.00355674815364182, "logits/chosen": -0.22789748013019562, "logits/rejected": -0.30582594871520996, "logps/chosen": -0.02659655548632145, "logps/rejected": -2.2190628051757812, "loss": 1.1222, "nll_loss": 0.2802049517631531, "rewards/accuracies": 1.0, "rewards/chosen": -0.0026596554089337587, "rewards/margins": 0.21924662590026855, "rewards/rejected": -0.22190627455711365, "step": 6744 }, { "epoch": 4.6645919778699865, "grad_norm": 6.5764689445495605, "learning_rate": 2.9641155678500076e-05, "log_odds_chosen": 8.920831680297852, "log_odds_ratio": -0.0003492921532597393, "logits/chosen": -0.4340789318084717, "logits/rejected": -0.47069665789604187, "logps/chosen": -0.0005598999559879303, "logps/rejected": -1.394984483718872, "loss": 1.0783, "nll_loss": 0.26954346895217896, "rewards/accuracies": 1.0, "rewards/chosen": -5.5989992688409984e-05, "rewards/margins": 0.13944245874881744, "rewards/rejected": -0.13949845731258392, "step": 6745 }, { "epoch": 4.665283540802213, "grad_norm": 9.251626014709473, "learning_rate": 2.9637313662209932e-05, "log_odds_chosen": 9.983993530273438, "log_odds_ratio": -0.00015731611347291619, "logits/chosen": -0.758710503578186, "logits/rejected": -0.8479531407356262, "logps/chosen": -0.001559579512104392, "logps/rejected": -2.231616973876953, "loss": 1.3107, "nll_loss": 0.3276580274105072, "rewards/accuracies": 1.0, "rewards/chosen": -0.00015595793956890702, "rewards/margins": 0.2230057418346405, "rewards/rejected": -0.2231617271900177, "step": 6746 }, { "epoch": 4.66597510373444, "grad_norm": 14.86572551727295, "learning_rate": 2.963347164591978e-05, "log_odds_chosen": 9.572809219360352, "log_odds_ratio": -0.005756652448326349, "logits/chosen": -0.013816140592098236, "logits/rejected": -0.1874159425497055, "logps/chosen": -0.05472245439887047, "logps/rejected": -1.966597318649292, "loss": 1.0252, "nll_loss": 0.2557242214679718, "rewards/accuracies": 1.0, "rewards/chosen": -0.005472245626151562, "rewards/margins": 0.19118750095367432, "rewards/rejected": -0.19665974378585815, "step": 6747 }, { "epoch": 4.666666666666667, "grad_norm": 8.244834899902344, "learning_rate": 2.962962962962963e-05, "log_odds_chosen": 9.516754150390625, "log_odds_ratio": -0.00035925908014178276, "logits/chosen": -0.8049564361572266, "logits/rejected": -0.857810914516449, "logps/chosen": -0.0058831567876040936, "logps/rejected": -2.215717315673828, "loss": 0.9812, "nll_loss": 0.24527311325073242, "rewards/accuracies": 1.0, "rewards/chosen": -0.0005883157136850059, "rewards/margins": 0.22098343074321747, "rewards/rejected": -0.22157174348831177, "step": 6748 }, { "epoch": 4.667358229598894, "grad_norm": 7.945059299468994, "learning_rate": 2.9625787613339482e-05, "log_odds_chosen": 9.842604637145996, "log_odds_ratio": -0.0027358822990208864, "logits/chosen": -0.5420747995376587, "logits/rejected": -0.5979031324386597, "logps/chosen": -0.0017957030795514584, "logps/rejected": -2.231954336166382, "loss": 1.0751, "nll_loss": 0.26851096749305725, "rewards/accuracies": 1.0, "rewards/chosen": -0.00017957028467208147, "rewards/margins": 0.22301585972309113, "rewards/rejected": -0.22319543361663818, "step": 6749 }, { "epoch": 4.668049792531121, "grad_norm": 8.897541999816895, "learning_rate": 2.9621945597049335e-05, "log_odds_chosen": 10.37528133392334, "log_odds_ratio": -0.0002767530968412757, "logits/chosen": -0.3038465976715088, "logits/rejected": -0.3836557865142822, "logps/chosen": -0.0015654441667720675, "logps/rejected": -2.045405626296997, "loss": 0.9581, "nll_loss": 0.23949073255062103, "rewards/accuracies": 1.0, "rewards/chosen": -0.00015654441085644066, "rewards/margins": 0.20438402891159058, "rewards/rejected": -0.20454056560993195, "step": 6750 }, { "epoch": 4.6687413554633475, "grad_norm": 14.269709587097168, "learning_rate": 2.9618103580759184e-05, "log_odds_chosen": 9.976061820983887, "log_odds_ratio": -0.003950594458729029, "logits/chosen": -0.5603251457214355, "logits/rejected": -0.6679450869560242, "logps/chosen": -0.0018375938525423408, "logps/rejected": -1.8488361835479736, "loss": 0.9504, "nll_loss": 0.23719294369220734, "rewards/accuracies": 1.0, "rewards/chosen": -0.0001837593736127019, "rewards/margins": 0.1846998631954193, "rewards/rejected": -0.18488362431526184, "step": 6751 }, { "epoch": 4.669432918395574, "grad_norm": 9.75061321258545, "learning_rate": 2.9614261564469036e-05, "log_odds_chosen": 10.370887756347656, "log_odds_ratio": -0.0001470722199883312, "logits/chosen": -0.7315962910652161, "logits/rejected": -0.8423492312431335, "logps/chosen": -0.0003737437364179641, "logps/rejected": -1.7472317218780518, "loss": 0.6407, "nll_loss": 0.16015413403511047, "rewards/accuracies": 1.0, "rewards/chosen": -3.737437509698793e-05, "rewards/margins": 0.1746857762336731, "rewards/rejected": -0.17472316324710846, "step": 6752 }, { "epoch": 4.670124481327801, "grad_norm": 6.04262113571167, "learning_rate": 2.9610419548178885e-05, "log_odds_chosen": 9.649259567260742, "log_odds_ratio": -0.00010987659334205091, "logits/chosen": -0.5185578465461731, "logits/rejected": -0.5149763822555542, "logps/chosen": -0.00021657871548086405, "logps/rejected": -1.3449153900146484, "loss": 0.8995, "nll_loss": 0.22486598789691925, "rewards/accuracies": 1.0, "rewards/chosen": -2.1657871911884286e-05, "rewards/margins": 0.1344698965549469, "rewards/rejected": -0.13449154794216156, "step": 6753 }, { "epoch": 4.670816044260028, "grad_norm": 8.17538070678711, "learning_rate": 2.9606577531888734e-05, "log_odds_chosen": 10.649566650390625, "log_odds_ratio": -5.23365379194729e-05, "logits/chosen": -0.3867025673389435, "logits/rejected": -0.5043997764587402, "logps/chosen": -0.00031243887497112155, "logps/rejected": -2.469163179397583, "loss": 1.3914, "nll_loss": 0.3478538990020752, "rewards/accuracies": 1.0, "rewards/chosen": -3.124388604192063e-05, "rewards/margins": 0.24688507616519928, "rewards/rejected": -0.24691632390022278, "step": 6754 }, { "epoch": 4.671507607192255, "grad_norm": 7.780572414398193, "learning_rate": 2.960273551559859e-05, "log_odds_chosen": 9.025650978088379, "log_odds_ratio": -0.013080338016152382, "logits/chosen": -0.8236268162727356, "logits/rejected": -0.9438607692718506, "logps/chosen": -0.008138904348015785, "logps/rejected": -1.5952751636505127, "loss": 0.9462, "nll_loss": 0.2352454960346222, "rewards/accuracies": 1.0, "rewards/chosen": -0.0008138904813677073, "rewards/margins": 0.15871362388134003, "rewards/rejected": -0.1595275104045868, "step": 6755 }, { "epoch": 4.672199170124482, "grad_norm": 10.750596046447754, "learning_rate": 2.959889349930844e-05, "log_odds_chosen": 10.115555763244629, "log_odds_ratio": -0.00031970939016900957, "logits/chosen": -0.7900500297546387, "logits/rejected": -0.8546708822250366, "logps/chosen": -0.00033075647661462426, "logps/rejected": -1.9938645362854004, "loss": 1.102, "nll_loss": 0.27545589208602905, "rewards/accuracies": 1.0, "rewards/chosen": -3.307564475107938e-05, "rewards/margins": 0.1993533968925476, "rewards/rejected": -0.19938646256923676, "step": 6756 }, { "epoch": 4.672890733056708, "grad_norm": 7.898308277130127, "learning_rate": 2.959505148301829e-05, "log_odds_chosen": 9.88327693939209, "log_odds_ratio": -0.00012817922106478363, "logits/chosen": -0.6938830614089966, "logits/rejected": -0.7637455463409424, "logps/chosen": -0.00039965560426935554, "logps/rejected": -1.8069370985031128, "loss": 1.6343, "nll_loss": 0.4085546135902405, "rewards/accuracies": 1.0, "rewards/chosen": -3.996555824414827e-05, "rewards/margins": 0.18065372109413147, "rewards/rejected": -0.18069370090961456, "step": 6757 }, { "epoch": 4.673582295988935, "grad_norm": 12.937272071838379, "learning_rate": 2.9591209466728144e-05, "log_odds_chosen": 11.028230667114258, "log_odds_ratio": -1.9701441487995908e-05, "logits/chosen": -0.4903797507286072, "logits/rejected": -0.5154451131820679, "logps/chosen": -0.00011774554150179029, "logps/rejected": -2.0176382064819336, "loss": 0.9532, "nll_loss": 0.238307923078537, "rewards/accuracies": 1.0, "rewards/chosen": -1.177455487777479e-05, "rewards/margins": 0.20175206661224365, "rewards/rejected": -0.2017638385295868, "step": 6758 }, { "epoch": 4.674273858921162, "grad_norm": 9.832000732421875, "learning_rate": 2.9587367450437993e-05, "log_odds_chosen": 11.508541107177734, "log_odds_ratio": -1.2472798516682815e-05, "logits/chosen": -0.28365594148635864, "logits/rejected": -0.37036430835723877, "logps/chosen": -0.00010214448411716148, "logps/rejected": -2.215914249420166, "loss": 0.8492, "nll_loss": 0.21230274438858032, "rewards/accuracies": 1.0, "rewards/chosen": -1.0214447684120387e-05, "rewards/margins": 0.22158122062683105, "rewards/rejected": -0.22159142792224884, "step": 6759 }, { "epoch": 4.674965421853389, "grad_norm": 18.991636276245117, "learning_rate": 2.9583525434147842e-05, "log_odds_chosen": 10.068572998046875, "log_odds_ratio": -0.00012414647790137678, "logits/chosen": -0.45053717494010925, "logits/rejected": -0.5302484035491943, "logps/chosen": -0.000786515069194138, "logps/rejected": -2.0815036296844482, "loss": 0.9152, "nll_loss": 0.22878772020339966, "rewards/accuracies": 1.0, "rewards/chosen": -7.865150837460533e-05, "rewards/margins": 0.2080717235803604, "rewards/rejected": -0.20815038681030273, "step": 6760 }, { "epoch": 4.675656984785616, "grad_norm": 10.791906356811523, "learning_rate": 2.9579683417857695e-05, "log_odds_chosen": 10.770875930786133, "log_odds_ratio": -4.5727851102128625e-05, "logits/chosen": -0.692563533782959, "logits/rejected": -0.785979151725769, "logps/chosen": -0.00026987557066604495, "logps/rejected": -2.103133201599121, "loss": 0.9404, "nll_loss": 0.23508614301681519, "rewards/accuracies": 1.0, "rewards/chosen": -2.6987556339008734e-05, "rewards/margins": 0.21028634905815125, "rewards/rejected": -0.2103133201599121, "step": 6761 }, { "epoch": 4.676348547717843, "grad_norm": 5.9802565574646, "learning_rate": 2.9575841401567544e-05, "log_odds_chosen": 10.031012535095215, "log_odds_ratio": -0.00016975995094981045, "logits/chosen": -0.717975378036499, "logits/rejected": -0.7597091197967529, "logps/chosen": -0.0008287794189527631, "logps/rejected": -2.2093329429626465, "loss": 0.8268, "nll_loss": 0.2066713571548462, "rewards/accuracies": 1.0, "rewards/chosen": -8.287794480565935e-05, "rewards/margins": 0.22085043787956238, "rewards/rejected": -0.22093330323696136, "step": 6762 }, { "epoch": 4.677040110650069, "grad_norm": 11.813547134399414, "learning_rate": 2.9571999385277393e-05, "log_odds_chosen": 10.200616836547852, "log_odds_ratio": -0.0007247006869874895, "logits/chosen": -0.20666056871414185, "logits/rejected": -0.32452890276908875, "logps/chosen": -0.0031705538276582956, "logps/rejected": -2.181896209716797, "loss": 0.8624, "nll_loss": 0.21553808450698853, "rewards/accuracies": 1.0, "rewards/chosen": -0.00031705538276582956, "rewards/margins": 0.21787258982658386, "rewards/rejected": -0.21818962693214417, "step": 6763 }, { "epoch": 4.677731673582296, "grad_norm": 13.046178817749023, "learning_rate": 2.956815736898725e-05, "log_odds_chosen": 9.904052734375, "log_odds_ratio": -0.0009172922000288963, "logits/chosen": -0.33849143981933594, "logits/rejected": -0.4988017678260803, "logps/chosen": -0.0004887538962066174, "logps/rejected": -1.6745448112487793, "loss": 1.2527, "nll_loss": 0.31307268142700195, "rewards/accuracies": 1.0, "rewards/chosen": -4.887538671027869e-05, "rewards/margins": 0.1674056053161621, "rewards/rejected": -0.16745448112487793, "step": 6764 }, { "epoch": 4.678423236514523, "grad_norm": 16.583633422851562, "learning_rate": 2.9564315352697098e-05, "log_odds_chosen": 11.349023818969727, "log_odds_ratio": -2.5245026336051524e-05, "logits/chosen": -0.42005637288093567, "logits/rejected": -0.4679103493690491, "logps/chosen": -0.0001921855000546202, "logps/rejected": -2.653156280517578, "loss": 0.9384, "nll_loss": 0.23458930850028992, "rewards/accuracies": 1.0, "rewards/chosen": -1.9218550733057782e-05, "rewards/margins": 0.26529639959335327, "rewards/rejected": -0.2653156518936157, "step": 6765 }, { "epoch": 4.67911479944675, "grad_norm": 9.391258239746094, "learning_rate": 2.9560473336406947e-05, "log_odds_chosen": 10.527746200561523, "log_odds_ratio": -3.056988862226717e-05, "logits/chosen": -0.516139566898346, "logits/rejected": -0.5341812968254089, "logps/chosen": -0.0001992563484236598, "logps/rejected": -1.9834439754486084, "loss": 0.8513, "nll_loss": 0.21281671524047852, "rewards/accuracies": 1.0, "rewards/chosen": -1.992563556996174e-05, "rewards/margins": 0.19832447171211243, "rewards/rejected": -0.1983444094657898, "step": 6766 }, { "epoch": 4.679806362378977, "grad_norm": 7.875655651092529, "learning_rate": 2.9556631320116803e-05, "log_odds_chosen": 9.65318489074707, "log_odds_ratio": -0.001145646208897233, "logits/chosen": -0.3034501075744629, "logits/rejected": -0.3228107988834381, "logps/chosen": -0.0034056699369102716, "logps/rejected": -1.8219208717346191, "loss": 0.8401, "nll_loss": 0.2099209874868393, "rewards/accuracies": 1.0, "rewards/chosen": -0.00034056699951179326, "rewards/margins": 0.18185152113437653, "rewards/rejected": -0.18219208717346191, "step": 6767 }, { "epoch": 4.680497925311204, "grad_norm": 15.802404403686523, "learning_rate": 2.955278930382665e-05, "log_odds_chosen": 10.450215339660645, "log_odds_ratio": -0.00019292582874186337, "logits/chosen": -0.48566311597824097, "logits/rejected": -0.5424797534942627, "logps/chosen": -0.0002574539976194501, "logps/rejected": -2.1139955520629883, "loss": 1.3705, "nll_loss": 0.3426019549369812, "rewards/accuracies": 1.0, "rewards/chosen": -2.5745401217136532e-05, "rewards/margins": 0.21137382090091705, "rewards/rejected": -0.21139955520629883, "step": 6768 }, { "epoch": 4.68118948824343, "grad_norm": 11.465133666992188, "learning_rate": 2.95489472875365e-05, "log_odds_chosen": 10.353599548339844, "log_odds_ratio": -6.696392665617168e-05, "logits/chosen": -0.6689953207969666, "logits/rejected": -0.7008723020553589, "logps/chosen": -0.0002801914815790951, "logps/rejected": -1.9261640310287476, "loss": 1.2188, "nll_loss": 0.3047032356262207, "rewards/accuracies": 1.0, "rewards/chosen": -2.8019148885505274e-05, "rewards/margins": 0.19258840382099152, "rewards/rejected": -0.19261643290519714, "step": 6769 }, { "epoch": 4.681881051175657, "grad_norm": 5.23617696762085, "learning_rate": 2.9545105271246353e-05, "log_odds_chosen": 9.388136863708496, "log_odds_ratio": -0.0003933067782782018, "logits/chosen": -0.0530322790145874, "logits/rejected": -0.10469657182693481, "logps/chosen": -0.0008069298346526921, "logps/rejected": -1.6773535013198853, "loss": 0.7605, "nll_loss": 0.19009101390838623, "rewards/accuracies": 1.0, "rewards/chosen": -8.069298201007769e-05, "rewards/margins": 0.16765466332435608, "rewards/rejected": -0.16773535311222076, "step": 6770 }, { "epoch": 4.682572614107884, "grad_norm": 6.162405490875244, "learning_rate": 2.9541263254956202e-05, "log_odds_chosen": 10.414556503295898, "log_odds_ratio": -0.00015066277410369366, "logits/chosen": -0.8306690454483032, "logits/rejected": -0.8582077026367188, "logps/chosen": -0.00023762512137182057, "logps/rejected": -1.9316320419311523, "loss": 0.9783, "nll_loss": 0.24455897510051727, "rewards/accuracies": 1.0, "rewards/chosen": -2.376251359237358e-05, "rewards/margins": 0.1931394636631012, "rewards/rejected": -0.1931632161140442, "step": 6771 }, { "epoch": 4.683264177040111, "grad_norm": 8.889054298400879, "learning_rate": 2.953742123866605e-05, "log_odds_chosen": 10.634115219116211, "log_odds_ratio": -5.0110269512515515e-05, "logits/chosen": -0.12986613810062408, "logits/rejected": -0.28570762276649475, "logps/chosen": -0.0003423771704547107, "logps/rejected": -2.379361391067505, "loss": 1.523, "nll_loss": 0.3807332515716553, "rewards/accuracies": 1.0, "rewards/chosen": -3.423772068344988e-05, "rewards/margins": 0.23790189623832703, "rewards/rejected": -0.2379361242055893, "step": 6772 }, { "epoch": 4.683955739972338, "grad_norm": 9.803380966186523, "learning_rate": 2.9533579222375907e-05, "log_odds_chosen": 9.352560043334961, "log_odds_ratio": -0.04867429658770561, "logits/chosen": -0.12249897420406342, "logits/rejected": -0.13682352006435394, "logps/chosen": -0.2188890129327774, "logps/rejected": -1.6129618883132935, "loss": 1.1042, "nll_loss": 0.2711852788925171, "rewards/accuracies": 1.0, "rewards/chosen": -0.02188890241086483, "rewards/margins": 0.13940727710723877, "rewards/rejected": -0.16129618883132935, "step": 6773 }, { "epoch": 4.6846473029045645, "grad_norm": 9.887007713317871, "learning_rate": 2.9529737206085756e-05, "log_odds_chosen": 9.593646049499512, "log_odds_ratio": -0.00030946338665671647, "logits/chosen": -0.2552592158317566, "logits/rejected": -0.3207557499408722, "logps/chosen": -0.004805741831660271, "logps/rejected": -1.8524577617645264, "loss": 1.0016, "nll_loss": 0.2503683865070343, "rewards/accuracies": 1.0, "rewards/chosen": -0.000480574177345261, "rewards/margins": 0.18476520478725433, "rewards/rejected": -0.18524578213691711, "step": 6774 }, { "epoch": 4.685338865836791, "grad_norm": 9.28144359588623, "learning_rate": 2.9525895189795605e-05, "log_odds_chosen": 8.273391723632812, "log_odds_ratio": -0.039550162851810455, "logits/chosen": -0.5619184970855713, "logits/rejected": -0.6418176293373108, "logps/chosen": -0.014480775222182274, "logps/rejected": -1.4526318311691284, "loss": 2.157, "nll_loss": 0.5352879762649536, "rewards/accuracies": 1.0, "rewards/chosen": -0.0014480776153504848, "rewards/margins": 0.14381510019302368, "rewards/rejected": -0.1452631652355194, "step": 6775 }, { "epoch": 4.686030428769018, "grad_norm": 11.07194995880127, "learning_rate": 2.952205317350546e-05, "log_odds_chosen": 9.83755111694336, "log_odds_ratio": -0.0002198005822720006, "logits/chosen": -0.5238659381866455, "logits/rejected": -0.5820973515510559, "logps/chosen": -0.00036986565100960433, "logps/rejected": -1.439079999923706, "loss": 1.0547, "nll_loss": 0.263644814491272, "rewards/accuracies": 1.0, "rewards/chosen": -3.6986566556151956e-05, "rewards/margins": 0.143871009349823, "rewards/rejected": -0.14390799403190613, "step": 6776 }, { "epoch": 4.686721991701245, "grad_norm": 5.638495445251465, "learning_rate": 2.951821115721531e-05, "log_odds_chosen": 10.339385986328125, "log_odds_ratio": -8.466203871648759e-05, "logits/chosen": -0.391963392496109, "logits/rejected": -0.4621970057487488, "logps/chosen": -0.00017859251238405704, "logps/rejected": -1.751267910003662, "loss": 1.2182, "nll_loss": 0.30454888939857483, "rewards/accuracies": 1.0, "rewards/chosen": -1.7859250874607824e-05, "rewards/margins": 0.17510893940925598, "rewards/rejected": -0.17512677609920502, "step": 6777 }, { "epoch": 4.687413554633472, "grad_norm": 7.306526184082031, "learning_rate": 2.951436914092516e-05, "log_odds_chosen": 8.994571685791016, "log_odds_ratio": -0.0004123014223296195, "logits/chosen": -0.48963162302970886, "logits/rejected": -0.5226364731788635, "logps/chosen": -0.0007051755674183369, "logps/rejected": -1.1294283866882324, "loss": 0.9442, "nll_loss": 0.23601235449314117, "rewards/accuracies": 1.0, "rewards/chosen": -7.051755528664216e-05, "rewards/margins": 0.11287231743335724, "rewards/rejected": -0.11294284462928772, "step": 6778 }, { "epoch": 4.688105117565699, "grad_norm": 10.07761287689209, "learning_rate": 2.951052712463501e-05, "log_odds_chosen": 11.039661407470703, "log_odds_ratio": -3.118627500953153e-05, "logits/chosen": -0.7747355699539185, "logits/rejected": -0.7683219909667969, "logps/chosen": -0.0003158682957291603, "logps/rejected": -2.298959493637085, "loss": 1.2224, "nll_loss": 0.3056063652038574, "rewards/accuracies": 1.0, "rewards/chosen": -3.1586831028107554e-05, "rewards/margins": 0.22986435890197754, "rewards/rejected": -0.2298959493637085, "step": 6779 }, { "epoch": 4.6887966804979255, "grad_norm": 10.291252136230469, "learning_rate": 2.950668510834486e-05, "log_odds_chosen": 11.137717247009277, "log_odds_ratio": -2.4501310690538958e-05, "logits/chosen": -0.6697853207588196, "logits/rejected": -0.7131739854812622, "logps/chosen": -0.00019688297470565885, "logps/rejected": -2.2473249435424805, "loss": 0.7454, "nll_loss": 0.18635433912277222, "rewards/accuracies": 1.0, "rewards/chosen": -1.9688297470565885e-05, "rewards/margins": 0.2247128188610077, "rewards/rejected": -0.22473251819610596, "step": 6780 }, { "epoch": 4.689488243430152, "grad_norm": 6.318768501281738, "learning_rate": 2.950284309205471e-05, "log_odds_chosen": 11.049291610717773, "log_odds_ratio": -2.7139243684359826e-05, "logits/chosen": -0.8393341302871704, "logits/rejected": -0.8682998418807983, "logps/chosen": -0.000242653870373033, "logps/rejected": -2.561972141265869, "loss": 0.9783, "nll_loss": 0.24456657469272614, "rewards/accuracies": 1.0, "rewards/chosen": -2.4265385945909657e-05, "rewards/margins": 0.25617295503616333, "rewards/rejected": -0.2561972141265869, "step": 6781 }, { "epoch": 4.690179806362379, "grad_norm": 12.883466720581055, "learning_rate": 2.9499001075764566e-05, "log_odds_chosen": 9.238396644592285, "log_odds_ratio": -0.0039947787299752235, "logits/chosen": -0.43368393182754517, "logits/rejected": -0.5059782266616821, "logps/chosen": -0.0029813749715685844, "logps/rejected": -2.0180764198303223, "loss": 0.9323, "nll_loss": 0.23266473412513733, "rewards/accuracies": 1.0, "rewards/chosen": -0.00029813748551532626, "rewards/margins": 0.2015094757080078, "rewards/rejected": -0.2018076330423355, "step": 6782 }, { "epoch": 4.690871369294606, "grad_norm": 5.375931262969971, "learning_rate": 2.9495159059474415e-05, "log_odds_chosen": 8.770668029785156, "log_odds_ratio": -0.002029500436037779, "logits/chosen": -0.3985563814640045, "logits/rejected": -0.4183902144432068, "logps/chosen": -0.011122825555503368, "logps/rejected": -1.9470322132110596, "loss": 1.6924, "nll_loss": 0.4229055643081665, "rewards/accuracies": 1.0, "rewards/chosen": -0.00111228262539953, "rewards/margins": 0.1935909390449524, "rewards/rejected": -0.19470323622226715, "step": 6783 }, { "epoch": 4.691562932226833, "grad_norm": 9.022359848022461, "learning_rate": 2.9491317043184264e-05, "log_odds_chosen": 10.731500625610352, "log_odds_ratio": -4.688445551437326e-05, "logits/chosen": -0.31862491369247437, "logits/rejected": -0.39162176847457886, "logps/chosen": -0.0008599141729064286, "logps/rejected": -3.046576738357544, "loss": 1.2466, "nll_loss": 0.3116372227668762, "rewards/accuracies": 1.0, "rewards/chosen": -8.599141438025981e-05, "rewards/margins": 0.30457165837287903, "rewards/rejected": -0.3046576678752899, "step": 6784 }, { "epoch": 4.69225449515906, "grad_norm": 14.344029426574707, "learning_rate": 2.948747502689412e-05, "log_odds_chosen": 11.06399917602539, "log_odds_ratio": -3.6989782529417425e-05, "logits/chosen": -0.3671875, "logits/rejected": -0.46688222885131836, "logps/chosen": -0.00018051578081212938, "logps/rejected": -2.368514060974121, "loss": 0.9802, "nll_loss": 0.24503794312477112, "rewards/accuracies": 1.0, "rewards/chosen": -1.8051578081212938e-05, "rewards/margins": 0.2368333637714386, "rewards/rejected": -0.23685140907764435, "step": 6785 }, { "epoch": 4.6929460580912865, "grad_norm": 61.70487594604492, "learning_rate": 2.948363301060397e-05, "log_odds_chosen": 9.329105377197266, "log_odds_ratio": -0.15523530542850494, "logits/chosen": -0.5905463695526123, "logits/rejected": -0.6490134000778198, "logps/chosen": -0.009185466915369034, "logps/rejected": -1.9207974672317505, "loss": 1.0517, "nll_loss": 0.2474137246608734, "rewards/accuracies": 0.875, "rewards/chosen": -0.0009185466915369034, "rewards/margins": 0.19116121530532837, "rewards/rejected": -0.19207975268363953, "step": 6786 }, { "epoch": 4.693637621023513, "grad_norm": 15.269037246704102, "learning_rate": 2.9479790994313818e-05, "log_odds_chosen": 10.547459602355957, "log_odds_ratio": -0.0004707665357273072, "logits/chosen": -0.39600372314453125, "logits/rejected": -0.43267565965652466, "logps/chosen": -0.0004747907514683902, "logps/rejected": -2.174017906188965, "loss": 2.1666, "nll_loss": 0.541592538356781, "rewards/accuracies": 1.0, "rewards/chosen": -4.747907951241359e-05, "rewards/margins": 0.21735432744026184, "rewards/rejected": -0.21740183234214783, "step": 6787 }, { "epoch": 4.69432918395574, "grad_norm": 7.543539047241211, "learning_rate": 2.947594897802367e-05, "log_odds_chosen": 9.116312026977539, "log_odds_ratio": -0.00036397125222720206, "logits/chosen": -0.35737016797065735, "logits/rejected": -0.4064314067363739, "logps/chosen": -0.0007826816872693598, "logps/rejected": -1.6374640464782715, "loss": 0.819, "nll_loss": 0.20471778512001038, "rewards/accuracies": 1.0, "rewards/chosen": -7.826816727174446e-05, "rewards/margins": 0.16366812586784363, "rewards/rejected": -0.16374638676643372, "step": 6788 }, { "epoch": 4.695020746887967, "grad_norm": 10.115254402160645, "learning_rate": 2.947210696173352e-05, "log_odds_chosen": 10.277286529541016, "log_odds_ratio": -7.167382864281535e-05, "logits/chosen": -0.6997770071029663, "logits/rejected": -0.701981246471405, "logps/chosen": -0.0008305530645884573, "logps/rejected": -2.0024566650390625, "loss": 0.7632, "nll_loss": 0.19080215692520142, "rewards/accuracies": 1.0, "rewards/chosen": -8.30553108244203e-05, "rewards/margins": 0.2001626044511795, "rewards/rejected": -0.20024564862251282, "step": 6789 }, { "epoch": 4.695712309820194, "grad_norm": 12.732426643371582, "learning_rate": 2.9468264945443368e-05, "log_odds_chosen": 10.833272933959961, "log_odds_ratio": -9.282723476644605e-05, "logits/chosen": -0.6699355244636536, "logits/rejected": -0.5854648351669312, "logps/chosen": -0.0005036251386627555, "logps/rejected": -2.8181521892547607, "loss": 1.1987, "nll_loss": 0.29966798424720764, "rewards/accuracies": 1.0, "rewards/chosen": -5.036251968704164e-05, "rewards/margins": 0.2817648649215698, "rewards/rejected": -0.28181523084640503, "step": 6790 }, { "epoch": 4.696403872752421, "grad_norm": 7.614867210388184, "learning_rate": 2.9464422929153224e-05, "log_odds_chosen": 9.232556343078613, "log_odds_ratio": -0.0149430176243186, "logits/chosen": -0.5365698337554932, "logits/rejected": -0.542522132396698, "logps/chosen": -0.021838007494807243, "logps/rejected": -2.3092708587646484, "loss": 0.9792, "nll_loss": 0.2432941496372223, "rewards/accuracies": 1.0, "rewards/chosen": -0.002183800796046853, "rewards/margins": 0.228743314743042, "rewards/rejected": -0.23092710971832275, "step": 6791 }, { "epoch": 4.6970954356846475, "grad_norm": 11.789693832397461, "learning_rate": 2.9460580912863073e-05, "log_odds_chosen": 10.382354736328125, "log_odds_ratio": -0.00018708905554376543, "logits/chosen": -0.47791990637779236, "logits/rejected": -0.5692132115364075, "logps/chosen": -0.00045102040166966617, "logps/rejected": -2.1589295864105225, "loss": 0.9743, "nll_loss": 0.24356132745742798, "rewards/accuracies": 1.0, "rewards/chosen": -4.51020423497539e-05, "rewards/margins": 0.21584787964820862, "rewards/rejected": -0.2158929705619812, "step": 6792 }, { "epoch": 4.697786998616874, "grad_norm": 15.505138397216797, "learning_rate": 2.9456738896572922e-05, "log_odds_chosen": 10.668023109436035, "log_odds_ratio": -3.8151094486238435e-05, "logits/chosen": -0.767189621925354, "logits/rejected": -0.7929633855819702, "logps/chosen": -0.00047995190834626555, "logps/rejected": -2.420189380645752, "loss": 1.1575, "nll_loss": 0.28936582803726196, "rewards/accuracies": 1.0, "rewards/chosen": -4.799519228981808e-05, "rewards/margins": 0.241970956325531, "rewards/rejected": -0.2420189529657364, "step": 6793 }, { "epoch": 4.698478561549101, "grad_norm": 7.923763751983643, "learning_rate": 2.9452896880282778e-05, "log_odds_chosen": 10.526309967041016, "log_odds_ratio": -6.047174974810332e-05, "logits/chosen": -0.7259615659713745, "logits/rejected": -0.8289102911949158, "logps/chosen": -0.0004142590332776308, "logps/rejected": -2.5150129795074463, "loss": 0.9214, "nll_loss": 0.23034964501857758, "rewards/accuracies": 1.0, "rewards/chosen": -4.1425904782954603e-05, "rewards/margins": 0.25145989656448364, "rewards/rejected": -0.25150129199028015, "step": 6794 }, { "epoch": 4.699170124481328, "grad_norm": 8.245205879211426, "learning_rate": 2.9449054863992627e-05, "log_odds_chosen": 9.464591979980469, "log_odds_ratio": -0.00016946755931712687, "logits/chosen": -0.5645436644554138, "logits/rejected": -0.6196568608283997, "logps/chosen": -0.00036220205947756767, "logps/rejected": -1.5313994884490967, "loss": 0.859, "nll_loss": 0.2147316336631775, "rewards/accuracies": 1.0, "rewards/chosen": -3.622020813054405e-05, "rewards/margins": 0.15310373902320862, "rewards/rejected": -0.15313996374607086, "step": 6795 }, { "epoch": 4.699861687413555, "grad_norm": 7.095120906829834, "learning_rate": 2.9445212847702476e-05, "log_odds_chosen": 10.993948936462402, "log_odds_ratio": -0.0001342833274975419, "logits/chosen": -0.5555762052536011, "logits/rejected": -0.6703388094902039, "logps/chosen": -0.0022787712514400482, "logps/rejected": -2.9189109802246094, "loss": 0.7338, "nll_loss": 0.1834450513124466, "rewards/accuracies": 1.0, "rewards/chosen": -0.0002278771426063031, "rewards/margins": 0.29166319966316223, "rewards/rejected": -0.29189109802246094, "step": 6796 }, { "epoch": 4.700553250345782, "grad_norm": 10.602357864379883, "learning_rate": 2.944137083141233e-05, "log_odds_chosen": 10.636397361755371, "log_odds_ratio": -3.553461283445358e-05, "logits/chosen": -0.6809238791465759, "logits/rejected": -0.6949537396430969, "logps/chosen": -0.0002272947458550334, "logps/rejected": -1.9710800647735596, "loss": 0.7617, "nll_loss": 0.19042199850082397, "rewards/accuracies": 1.0, "rewards/chosen": -2.2729476768290624e-05, "rewards/margins": 0.19708527624607086, "rewards/rejected": -0.19710800051689148, "step": 6797 }, { "epoch": 4.7012448132780085, "grad_norm": 7.388942718505859, "learning_rate": 2.9437528815122178e-05, "log_odds_chosen": 9.084571838378906, "log_odds_ratio": -0.0004327888018451631, "logits/chosen": -0.2653324604034424, "logits/rejected": -0.25310999155044556, "logps/chosen": -0.00107409933116287, "logps/rejected": -1.4760522842407227, "loss": 1.0311, "nll_loss": 0.25771990418434143, "rewards/accuracies": 1.0, "rewards/chosen": -0.0001074099272955209, "rewards/margins": 0.14749783277511597, "rewards/rejected": -0.14760524034500122, "step": 6798 }, { "epoch": 4.701936376210235, "grad_norm": 9.694303512573242, "learning_rate": 2.9433686798832027e-05, "log_odds_chosen": 10.724264144897461, "log_odds_ratio": -9.620962373446673e-05, "logits/chosen": -0.4596264362335205, "logits/rejected": -0.6023463606834412, "logps/chosen": -0.00062222481938079, "logps/rejected": -2.387749195098877, "loss": 0.8183, "nll_loss": 0.20455913245677948, "rewards/accuracies": 1.0, "rewards/chosen": -6.222247611731291e-05, "rewards/margins": 0.23871271312236786, "rewards/rejected": -0.23877492547035217, "step": 6799 }, { "epoch": 4.702627939142462, "grad_norm": 8.140204429626465, "learning_rate": 2.9429844782541882e-05, "log_odds_chosen": 9.990387916564941, "log_odds_ratio": -6.377643148880452e-05, "logits/chosen": -0.6972765326499939, "logits/rejected": -0.6815809011459351, "logps/chosen": -0.000211845021112822, "logps/rejected": -1.403898000717163, "loss": 0.6581, "nll_loss": 0.1645159125328064, "rewards/accuracies": 1.0, "rewards/chosen": -2.118450174748432e-05, "rewards/margins": 0.14036861062049866, "rewards/rejected": -0.1403898000717163, "step": 6800 }, { "epoch": 4.703319502074689, "grad_norm": 9.126114845275879, "learning_rate": 2.942600276625173e-05, "log_odds_chosen": 9.252166748046875, "log_odds_ratio": -0.003878280520439148, "logits/chosen": -0.42757394909858704, "logits/rejected": -0.43310630321502686, "logps/chosen": -0.002153117908164859, "logps/rejected": -1.7966618537902832, "loss": 0.9991, "nll_loss": 0.2493833601474762, "rewards/accuracies": 1.0, "rewards/chosen": -0.00021531182574108243, "rewards/margins": 0.1794508695602417, "rewards/rejected": -0.1796661913394928, "step": 6801 }, { "epoch": 4.704011065006916, "grad_norm": 10.610753059387207, "learning_rate": 2.942216074996158e-05, "log_odds_chosen": 10.365510940551758, "log_odds_ratio": -6.0238788137212396e-05, "logits/chosen": -0.6273984313011169, "logits/rejected": -0.7365648746490479, "logps/chosen": -0.0004422231577336788, "logps/rejected": -1.8178184032440186, "loss": 1.3457, "nll_loss": 0.336431086063385, "rewards/accuracies": 1.0, "rewards/chosen": -4.4222317228559405e-05, "rewards/margins": 0.18173763155937195, "rewards/rejected": -0.1817818433046341, "step": 6802 }, { "epoch": 4.704702627939143, "grad_norm": 7.150958061218262, "learning_rate": 2.941831873367143e-05, "log_odds_chosen": 9.965583801269531, "log_odds_ratio": -0.00029022121452726424, "logits/chosen": -0.0954047217965126, "logits/rejected": -0.19729548692703247, "logps/chosen": -0.0007090939325280488, "logps/rejected": -1.9928028583526611, "loss": 0.7298, "nll_loss": 0.18241362273693085, "rewards/accuracies": 1.0, "rewards/chosen": -7.09093947079964e-05, "rewards/margins": 0.19920937716960907, "rewards/rejected": -0.1992802768945694, "step": 6803 }, { "epoch": 4.7053941908713695, "grad_norm": 11.659815788269043, "learning_rate": 2.9414476717381285e-05, "log_odds_chosen": 11.124892234802246, "log_odds_ratio": -9.604761726222932e-05, "logits/chosen": -0.016843080520629883, "logits/rejected": -0.1653369814157486, "logps/chosen": -0.0004137184005230665, "logps/rejected": -2.80672025680542, "loss": 1.8558, "nll_loss": 0.4639298915863037, "rewards/accuracies": 1.0, "rewards/chosen": -4.1371840779902413e-05, "rewards/margins": 0.2806306481361389, "rewards/rejected": -0.28067201375961304, "step": 6804 }, { "epoch": 4.706085753803596, "grad_norm": 6.719081401824951, "learning_rate": 2.9410634701091134e-05, "log_odds_chosen": 9.30002498626709, "log_odds_ratio": -0.0004191567131783813, "logits/chosen": -0.4892222285270691, "logits/rejected": -0.5243061780929565, "logps/chosen": -0.0003415496030356735, "logps/rejected": -1.4499791860580444, "loss": 0.7867, "nll_loss": 0.19663241505622864, "rewards/accuracies": 1.0, "rewards/chosen": -3.415496030356735e-05, "rewards/margins": 0.14496377110481262, "rewards/rejected": -0.14499792456626892, "step": 6805 }, { "epoch": 4.706777316735823, "grad_norm": 10.404929161071777, "learning_rate": 2.9406792684800984e-05, "log_odds_chosen": 9.235153198242188, "log_odds_ratio": -0.01029971707612276, "logits/chosen": -0.48685115575790405, "logits/rejected": -0.5751073360443115, "logps/chosen": -0.00509566580876708, "logps/rejected": -2.1039867401123047, "loss": 0.7998, "nll_loss": 0.19892632961273193, "rewards/accuracies": 1.0, "rewards/chosen": -0.0005095665692351758, "rewards/margins": 0.20988911390304565, "rewards/rejected": -0.21039867401123047, "step": 6806 }, { "epoch": 4.70746887966805, "grad_norm": 10.810821533203125, "learning_rate": 2.9402950668510836e-05, "log_odds_chosen": 8.567014694213867, "log_odds_ratio": -0.16073843836784363, "logits/chosen": -0.3772326707839966, "logits/rejected": -0.3111382722854614, "logps/chosen": -0.026545803993940353, "logps/rejected": -1.2393962144851685, "loss": 1.171, "nll_loss": 0.2766638696193695, "rewards/accuracies": 0.875, "rewards/chosen": -0.0026545801665633917, "rewards/margins": 0.12128505110740662, "rewards/rejected": -0.1239396333694458, "step": 6807 }, { "epoch": 4.708160442600277, "grad_norm": 9.465850830078125, "learning_rate": 2.9399108652220685e-05, "log_odds_chosen": 10.434772491455078, "log_odds_ratio": -8.693186100572348e-05, "logits/chosen": -0.27980881929397583, "logits/rejected": -0.2941112518310547, "logps/chosen": -0.0003163870715070516, "logps/rejected": -1.8002769947052002, "loss": 1.1756, "nll_loss": 0.29389774799346924, "rewards/accuracies": 1.0, "rewards/chosen": -3.163870860589668e-05, "rewards/margins": 0.1799960732460022, "rewards/rejected": -0.18002769351005554, "step": 6808 }, { "epoch": 4.708852005532504, "grad_norm": 13.45466423034668, "learning_rate": 2.9395266635930534e-05, "log_odds_chosen": 9.926715850830078, "log_odds_ratio": -0.00035792539711110294, "logits/chosen": -0.47436320781707764, "logits/rejected": -0.4171980321407318, "logps/chosen": -0.00326165952719748, "logps/rejected": -1.8485264778137207, "loss": 1.2901, "nll_loss": 0.32247692346572876, "rewards/accuracies": 1.0, "rewards/chosen": -0.0003261659585405141, "rewards/margins": 0.1845264881849289, "rewards/rejected": -0.18485264480113983, "step": 6809 }, { "epoch": 4.70954356846473, "grad_norm": 7.703126430511475, "learning_rate": 2.939142461964039e-05, "log_odds_chosen": 11.158015251159668, "log_odds_ratio": -2.2229780370253138e-05, "logits/chosen": -0.27547189593315125, "logits/rejected": -0.5276872515678406, "logps/chosen": -8.773449371801689e-05, "logps/rejected": -1.8417258262634277, "loss": 0.9583, "nll_loss": 0.23958033323287964, "rewards/accuracies": 1.0, "rewards/chosen": -8.773448826104868e-06, "rewards/margins": 0.18416382372379303, "rewards/rejected": -0.18417257070541382, "step": 6810 }, { "epoch": 4.710235131396957, "grad_norm": 12.61095142364502, "learning_rate": 2.938758260335024e-05, "log_odds_chosen": 9.955721855163574, "log_odds_ratio": -0.00022129954595584422, "logits/chosen": -0.5432331562042236, "logits/rejected": -0.5477120876312256, "logps/chosen": -0.0053403074853122234, "logps/rejected": -2.3294897079467773, "loss": 1.3859, "nll_loss": 0.34646186232566833, "rewards/accuracies": 1.0, "rewards/chosen": -0.0005340307834558189, "rewards/margins": 0.23241494596004486, "rewards/rejected": -0.23294898867607117, "step": 6811 }, { "epoch": 4.710926694329184, "grad_norm": 6.147278785705566, "learning_rate": 2.9383740587060088e-05, "log_odds_chosen": 7.364340305328369, "log_odds_ratio": -0.009784827940165997, "logits/chosen": -0.7493782639503479, "logits/rejected": -0.7773865461349487, "logps/chosen": -0.004916701465845108, "logps/rejected": -0.7641756534576416, "loss": 1.4489, "nll_loss": 0.36123567819595337, "rewards/accuracies": 1.0, "rewards/chosen": -0.0004916701000183821, "rewards/margins": 0.07592590153217316, "rewards/rejected": -0.07641757279634476, "step": 6812 }, { "epoch": 4.711618257261411, "grad_norm": 9.089730262756348, "learning_rate": 2.9379898570769944e-05, "log_odds_chosen": 9.9685640335083, "log_odds_ratio": -0.00010080543870572001, "logits/chosen": -0.4879155158996582, "logits/rejected": -0.5909554362297058, "logps/chosen": -0.00021607377857435495, "logps/rejected": -1.5597730875015259, "loss": 1.2522, "nll_loss": 0.31304243206977844, "rewards/accuracies": 1.0, "rewards/chosen": -2.1607378585031256e-05, "rewards/margins": 0.15595568716526031, "rewards/rejected": -0.1559773087501526, "step": 6813 }, { "epoch": 4.712309820193638, "grad_norm": 9.110308647155762, "learning_rate": 2.9376056554479793e-05, "log_odds_chosen": 9.04574203491211, "log_odds_ratio": -0.0009579684119671583, "logits/chosen": -0.40703973174095154, "logits/rejected": -0.46325159072875977, "logps/chosen": -0.0026330589316785336, "logps/rejected": -1.9530531167984009, "loss": 0.9536, "nll_loss": 0.23829597234725952, "rewards/accuracies": 1.0, "rewards/chosen": -0.00026330590480938554, "rewards/margins": 0.19504201412200928, "rewards/rejected": -0.19530531764030457, "step": 6814 }, { "epoch": 4.713001383125865, "grad_norm": 9.90314769744873, "learning_rate": 2.9372214538189642e-05, "log_odds_chosen": 9.989084243774414, "log_odds_ratio": -0.0004183893615845591, "logits/chosen": -0.8353537321090698, "logits/rejected": -0.8668037056922913, "logps/chosen": -0.0021147681400179863, "logps/rejected": -1.9255058765411377, "loss": 1.2717, "nll_loss": 0.31787949800491333, "rewards/accuracies": 1.0, "rewards/chosen": -0.0002114767994498834, "rewards/margins": 0.19233912229537964, "rewards/rejected": -0.19255059957504272, "step": 6815 }, { "epoch": 4.713692946058091, "grad_norm": 14.415225982666016, "learning_rate": 2.9368372521899494e-05, "log_odds_chosen": 10.086376190185547, "log_odds_ratio": -0.0005009483429603279, "logits/chosen": -0.4449045658111572, "logits/rejected": -0.47074446082115173, "logps/chosen": -0.0013042137725278735, "logps/rejected": -2.4045162200927734, "loss": 1.4341, "nll_loss": 0.3584754467010498, "rewards/accuracies": 1.0, "rewards/chosen": -0.0001304213801631704, "rewards/margins": 0.24032121896743774, "rewards/rejected": -0.2404516339302063, "step": 6816 }, { "epoch": 4.714384508990318, "grad_norm": 10.669299125671387, "learning_rate": 2.9364530505609343e-05, "log_odds_chosen": 11.092180252075195, "log_odds_ratio": -3.602875949582085e-05, "logits/chosen": -0.5740249156951904, "logits/rejected": -0.7104464173316956, "logps/chosen": -0.00017480396491009742, "logps/rejected": -2.3945038318634033, "loss": 0.8741, "nll_loss": 0.2185106873512268, "rewards/accuracies": 1.0, "rewards/chosen": -1.7480397218605503e-05, "rewards/margins": 0.2394329011440277, "rewards/rejected": -0.2394503802061081, "step": 6817 }, { "epoch": 4.715076071922545, "grad_norm": 12.476048469543457, "learning_rate": 2.9360688489319193e-05, "log_odds_chosen": 10.14078140258789, "log_odds_ratio": -0.0008515632362104952, "logits/chosen": -0.34306657314300537, "logits/rejected": -0.4307641386985779, "logps/chosen": -0.00048003694973886013, "logps/rejected": -2.012887716293335, "loss": 0.7408, "nll_loss": 0.18511009216308594, "rewards/accuracies": 1.0, "rewards/chosen": -4.800369788426906e-05, "rewards/margins": 0.20124077796936035, "rewards/rejected": -0.20128877460956573, "step": 6818 }, { "epoch": 4.715767634854772, "grad_norm": 5.907188892364502, "learning_rate": 2.935684647302905e-05, "log_odds_chosen": 10.725056648254395, "log_odds_ratio": -6.276977364905179e-05, "logits/chosen": -0.5912138223648071, "logits/rejected": -0.5068807601928711, "logps/chosen": -0.00017866448615677655, "logps/rejected": -2.0803234577178955, "loss": 1.3981, "nll_loss": 0.34951943159103394, "rewards/accuracies": 1.0, "rewards/chosen": -1.7866448615677655e-05, "rewards/margins": 0.20801447331905365, "rewards/rejected": -0.20803233981132507, "step": 6819 }, { "epoch": 4.716459197786999, "grad_norm": 11.002961158752441, "learning_rate": 2.9353004456738897e-05, "log_odds_chosen": 10.004314422607422, "log_odds_ratio": -0.0002287816460011527, "logits/chosen": -0.515632152557373, "logits/rejected": -0.7423189878463745, "logps/chosen": -0.00037495637661777437, "logps/rejected": -1.7557978630065918, "loss": 1.0634, "nll_loss": 0.26582497358322144, "rewards/accuracies": 1.0, "rewards/chosen": -3.7495636206585914e-05, "rewards/margins": 0.17554229497909546, "rewards/rejected": -0.17557978630065918, "step": 6820 }, { "epoch": 4.717150760719226, "grad_norm": 5.770873069763184, "learning_rate": 2.9349162440448746e-05, "log_odds_chosen": 9.757637023925781, "log_odds_ratio": -0.00031008111545816064, "logits/chosen": -0.37380141019821167, "logits/rejected": -0.4303475320339203, "logps/chosen": -0.00025031069526448846, "logps/rejected": -1.5108685493469238, "loss": 1.1455, "nll_loss": 0.2863425016403198, "rewards/accuracies": 1.0, "rewards/chosen": -2.5031067707459442e-05, "rewards/margins": 0.15106181800365448, "rewards/rejected": -0.15108685195446014, "step": 6821 }, { "epoch": 4.717842323651452, "grad_norm": 7.252089977264404, "learning_rate": 2.9345320424158602e-05, "log_odds_chosen": 10.901185989379883, "log_odds_ratio": -4.147412983002141e-05, "logits/chosen": -0.5456046462059021, "logits/rejected": -0.6253412961959839, "logps/chosen": -0.00021441985154524446, "logps/rejected": -2.2036609649658203, "loss": 0.9259, "nll_loss": 0.2314816415309906, "rewards/accuracies": 1.0, "rewards/chosen": -2.144198697351385e-05, "rewards/margins": 0.2203446626663208, "rewards/rejected": -0.22036609053611755, "step": 6822 }, { "epoch": 4.718533886583679, "grad_norm": 15.433568954467773, "learning_rate": 2.934147840786845e-05, "log_odds_chosen": 10.816705703735352, "log_odds_ratio": -6.392317300196737e-05, "logits/chosen": -0.1447380781173706, "logits/rejected": -0.2733813524246216, "logps/chosen": -7.367720536421984e-05, "logps/rejected": -1.4634238481521606, "loss": 1.1584, "nll_loss": 0.2896004319190979, "rewards/accuracies": 1.0, "rewards/chosen": -7.367720172624104e-06, "rewards/margins": 0.14633502066135406, "rewards/rejected": -0.14634239673614502, "step": 6823 }, { "epoch": 4.719225449515906, "grad_norm": 5.212158679962158, "learning_rate": 2.93376363915783e-05, "log_odds_chosen": 9.519055366516113, "log_odds_ratio": -0.0023928459268063307, "logits/chosen": -0.5571908950805664, "logits/rejected": -0.5788753032684326, "logps/chosen": -0.004182653967291117, "logps/rejected": -1.5381381511688232, "loss": 0.6618, "nll_loss": 0.16522231698036194, "rewards/accuracies": 1.0, "rewards/chosen": -0.0004182653210591525, "rewards/margins": 0.15339556336402893, "rewards/rejected": -0.15381380915641785, "step": 6824 }, { "epoch": 4.719917012448133, "grad_norm": 6.5578203201293945, "learning_rate": 2.9333794375288153e-05, "log_odds_chosen": 9.255062103271484, "log_odds_ratio": -0.005615750327706337, "logits/chosen": -0.46286746859550476, "logits/rejected": -0.5253312587738037, "logps/chosen": -0.004062678664922714, "logps/rejected": -2.030621290206909, "loss": 1.6068, "nll_loss": 0.4011441767215729, "rewards/accuracies": 1.0, "rewards/chosen": -0.00040626790723763406, "rewards/margins": 0.2026558518409729, "rewards/rejected": -0.20306211709976196, "step": 6825 }, { "epoch": 4.72060857538036, "grad_norm": 6.186581134796143, "learning_rate": 2.9329952358998002e-05, "log_odds_chosen": 10.776817321777344, "log_odds_ratio": -3.6418459785636514e-05, "logits/chosen": -0.2636483907699585, "logits/rejected": -0.3309072256088257, "logps/chosen": -0.0003143001231364906, "logps/rejected": -2.2573060989379883, "loss": 1.2208, "nll_loss": 0.3051925003528595, "rewards/accuracies": 1.0, "rewards/chosen": -3.143001231364906e-05, "rewards/margins": 0.22569917142391205, "rewards/rejected": -0.22573062777519226, "step": 6826 }, { "epoch": 4.7213001383125865, "grad_norm": 14.888287544250488, "learning_rate": 2.932611034270785e-05, "log_odds_chosen": 7.935850620269775, "log_odds_ratio": -0.18102018535137177, "logits/chosen": -0.5066199898719788, "logits/rejected": -0.3732776343822479, "logps/chosen": -0.09034513682126999, "logps/rejected": -2.89693021774292, "loss": 1.7254, "nll_loss": 0.41325053572654724, "rewards/accuracies": 0.875, "rewards/chosen": -0.009034513495862484, "rewards/margins": 0.2806585133075714, "rewards/rejected": -0.28969305753707886, "step": 6827 }, { "epoch": 4.721991701244813, "grad_norm": 6.673293590545654, "learning_rate": 2.9322268326417707e-05, "log_odds_chosen": 9.484441757202148, "log_odds_ratio": -0.00032633356750011444, "logits/chosen": -0.3358270823955536, "logits/rejected": -0.429345965385437, "logps/chosen": -0.015161263756453991, "logps/rejected": -2.182262420654297, "loss": 0.8388, "nll_loss": 0.20967671275138855, "rewards/accuracies": 1.0, "rewards/chosen": -0.0015161264454945922, "rewards/margins": 0.21671010553836823, "rewards/rejected": -0.21822622418403625, "step": 6828 }, { "epoch": 4.72268326417704, "grad_norm": 11.503182411193848, "learning_rate": 2.9318426310127556e-05, "log_odds_chosen": 9.144638061523438, "log_odds_ratio": -0.0009379127295687795, "logits/chosen": -0.14240528643131256, "logits/rejected": -0.28116291761398315, "logps/chosen": -0.0031801690347492695, "logps/rejected": -2.391510486602783, "loss": 0.652, "nll_loss": 0.16289681196212769, "rewards/accuracies": 1.0, "rewards/chosen": -0.00031801691511645913, "rewards/margins": 0.23883303999900818, "rewards/rejected": -0.23915104568004608, "step": 6829 }, { "epoch": 4.723374827109267, "grad_norm": 12.40114974975586, "learning_rate": 2.9314584293837405e-05, "log_odds_chosen": 9.611428260803223, "log_odds_ratio": -0.0002756261674221605, "logits/chosen": -0.016961220651865005, "logits/rejected": -0.09042391180992126, "logps/chosen": -0.0017954304348677397, "logps/rejected": -2.43148136138916, "loss": 1.3062, "nll_loss": 0.3265213966369629, "rewards/accuracies": 1.0, "rewards/chosen": -0.00017954302893485874, "rewards/margins": 0.2429685890674591, "rewards/rejected": -0.24314813315868378, "step": 6830 }, { "epoch": 4.724066390041494, "grad_norm": 14.327370643615723, "learning_rate": 2.931074227754726e-05, "log_odds_chosen": 11.19952392578125, "log_odds_ratio": -2.174938163079787e-05, "logits/chosen": -0.6013371348381042, "logits/rejected": -0.5689268708229065, "logps/chosen": -0.00013965470134280622, "logps/rejected": -2.163695812225342, "loss": 0.7687, "nll_loss": 0.1921716034412384, "rewards/accuracies": 1.0, "rewards/chosen": -1.396546940668486e-05, "rewards/margins": 0.21635562181472778, "rewards/rejected": -0.2163695991039276, "step": 6831 }, { "epoch": 4.724757952973721, "grad_norm": 6.135366439819336, "learning_rate": 2.930690026125711e-05, "log_odds_chosen": 9.640559196472168, "log_odds_ratio": -9.722045069793239e-05, "logits/chosen": -0.5914327502250671, "logits/rejected": -0.5960256457328796, "logps/chosen": -0.00017935251526068896, "logps/rejected": -1.1278718709945679, "loss": 0.82, "nll_loss": 0.20498919486999512, "rewards/accuracies": 1.0, "rewards/chosen": -1.7935251889866777e-05, "rewards/margins": 0.11276926100254059, "rewards/rejected": -0.11278717964887619, "step": 6832 }, { "epoch": 4.7254495159059475, "grad_norm": 9.616642951965332, "learning_rate": 2.930305824496696e-05, "log_odds_chosen": 10.35329818725586, "log_odds_ratio": -0.00021150981774553657, "logits/chosen": -0.3476685881614685, "logits/rejected": -0.427687406539917, "logps/chosen": -0.00041303792386315763, "logps/rejected": -2.0688071250915527, "loss": 1.236, "nll_loss": 0.3089710474014282, "rewards/accuracies": 1.0, "rewards/chosen": -4.130379602429457e-05, "rewards/margins": 0.2068394124507904, "rewards/rejected": -0.20688071846961975, "step": 6833 }, { "epoch": 4.726141078838174, "grad_norm": 9.028099060058594, "learning_rate": 2.929921622867681e-05, "log_odds_chosen": 10.01650619506836, "log_odds_ratio": -0.00020999395928811282, "logits/chosen": -0.3519641160964966, "logits/rejected": -0.47386786341667175, "logps/chosen": -0.0007251882343553007, "logps/rejected": -2.154243230819702, "loss": 0.8524, "nll_loss": 0.21308580040931702, "rewards/accuracies": 1.0, "rewards/chosen": -7.251882925629616e-05, "rewards/margins": 0.21535180509090424, "rewards/rejected": -0.2154243290424347, "step": 6834 }, { "epoch": 4.726832641770401, "grad_norm": 6.53804874420166, "learning_rate": 2.929537421238666e-05, "log_odds_chosen": 9.094326972961426, "log_odds_ratio": -0.0007695475942455232, "logits/chosen": -0.5255453586578369, "logits/rejected": -0.4840846061706543, "logps/chosen": -0.007395816966891289, "logps/rejected": -2.5426483154296875, "loss": 0.8317, "nll_loss": 0.20785032212734222, "rewards/accuracies": 1.0, "rewards/chosen": -0.000739581766538322, "rewards/margins": 0.2535252869129181, "rewards/rejected": -0.25426483154296875, "step": 6835 }, { "epoch": 4.727524204702628, "grad_norm": 13.52680778503418, "learning_rate": 2.929153219609651e-05, "log_odds_chosen": 9.964470863342285, "log_odds_ratio": -0.00014706116053275764, "logits/chosen": -0.9494578838348389, "logits/rejected": -0.9703213572502136, "logps/chosen": -0.01268111914396286, "logps/rejected": -2.3685264587402344, "loss": 1.1147, "nll_loss": 0.2786535620689392, "rewards/accuracies": 1.0, "rewards/chosen": -0.001268112100660801, "rewards/margins": 0.23558452725410461, "rewards/rejected": -0.23685264587402344, "step": 6836 }, { "epoch": 4.728215767634855, "grad_norm": 8.254740715026855, "learning_rate": 2.9287690179806365e-05, "log_odds_chosen": 9.981782913208008, "log_odds_ratio": -0.00010545213444856927, "logits/chosen": -0.5261760354042053, "logits/rejected": -0.6000370979309082, "logps/chosen": -0.001380457542836666, "logps/rejected": -2.0302622318267822, "loss": 0.9971, "nll_loss": 0.24925638735294342, "rewards/accuracies": 1.0, "rewards/chosen": -0.00013804573973175138, "rewards/margins": 0.20288817584514618, "rewards/rejected": -0.20302622020244598, "step": 6837 }, { "epoch": 4.728907330567082, "grad_norm": 14.098108291625977, "learning_rate": 2.9283848163516214e-05, "log_odds_chosen": 10.196685791015625, "log_odds_ratio": -0.00651351734995842, "logits/chosen": -0.5857874751091003, "logits/rejected": -0.6654095649719238, "logps/chosen": -0.002720482647418976, "logps/rejected": -1.7905726432800293, "loss": 0.8894, "nll_loss": 0.22168682515621185, "rewards/accuracies": 1.0, "rewards/chosen": -0.0002720482589211315, "rewards/margins": 0.17878523468971252, "rewards/rejected": -0.1790572851896286, "step": 6838 }, { "epoch": 4.7295988934993085, "grad_norm": 15.732999801635742, "learning_rate": 2.9280006147226063e-05, "log_odds_chosen": 10.79551887512207, "log_odds_ratio": -0.00019581010565161705, "logits/chosen": -0.34896230697631836, "logits/rejected": -0.44549262523651123, "logps/chosen": -0.0003223324310965836, "logps/rejected": -2.4413161277770996, "loss": 1.4394, "nll_loss": 0.35984066128730774, "rewards/accuracies": 1.0, "rewards/chosen": -3.223324165446684e-05, "rewards/margins": 0.24409937858581543, "rewards/rejected": -0.24413160979747772, "step": 6839 }, { "epoch": 4.730290456431535, "grad_norm": 6.080704689025879, "learning_rate": 2.927616413093592e-05, "log_odds_chosen": 11.242830276489258, "log_odds_ratio": -8.2915517850779e-05, "logits/chosen": -0.5376768112182617, "logits/rejected": -0.5856378078460693, "logps/chosen": -0.0006905734771862626, "logps/rejected": -3.2126030921936035, "loss": 0.9296, "nll_loss": 0.23237961530685425, "rewards/accuracies": 1.0, "rewards/chosen": -6.90573506290093e-05, "rewards/margins": 0.3211912512779236, "rewards/rejected": -0.32126033306121826, "step": 6840 }, { "epoch": 4.730982019363762, "grad_norm": 10.972521781921387, "learning_rate": 2.9272322114645768e-05, "log_odds_chosen": 10.36833381652832, "log_odds_ratio": -4.3805652239825577e-05, "logits/chosen": -0.86460942029953, "logits/rejected": -0.9351431131362915, "logps/chosen": -0.0007878030883148313, "logps/rejected": -2.054579019546509, "loss": 1.3458, "nll_loss": 0.3364557921886444, "rewards/accuracies": 1.0, "rewards/chosen": -7.87803073762916e-05, "rewards/margins": 0.20537912845611572, "rewards/rejected": -0.2054579108953476, "step": 6841 }, { "epoch": 4.731673582295989, "grad_norm": 9.36708927154541, "learning_rate": 2.9268480098355617e-05, "log_odds_chosen": 8.909065246582031, "log_odds_ratio": -0.009562905877828598, "logits/chosen": -0.41860371828079224, "logits/rejected": -0.43275922536849976, "logps/chosen": -0.005152152851223946, "logps/rejected": -1.9994559288024902, "loss": 1.0685, "nll_loss": 0.2661742568016052, "rewards/accuracies": 1.0, "rewards/chosen": -0.0005152152734808624, "rewards/margins": 0.19943037629127502, "rewards/rejected": -0.1999455988407135, "step": 6842 }, { "epoch": 4.732365145228216, "grad_norm": 17.818878173828125, "learning_rate": 2.926463808206547e-05, "log_odds_chosen": 11.118095397949219, "log_odds_ratio": -2.2907794118509628e-05, "logits/chosen": -0.6833236813545227, "logits/rejected": -0.7515619993209839, "logps/chosen": -0.0003684722469188273, "logps/rejected": -2.491759777069092, "loss": 0.9781, "nll_loss": 0.24452534317970276, "rewards/accuracies": 1.0, "rewards/chosen": -3.684722105390392e-05, "rewards/margins": 0.24913913011550903, "rewards/rejected": -0.24917598068714142, "step": 6843 }, { "epoch": 4.733056708160443, "grad_norm": 9.49808406829834, "learning_rate": 2.926079606577532e-05, "log_odds_chosen": 10.65138053894043, "log_odds_ratio": -4.175342837697826e-05, "logits/chosen": -0.7912575006484985, "logits/rejected": -0.7968187928199768, "logps/chosen": -0.0003375337109901011, "logps/rejected": -2.363999128341675, "loss": 1.4575, "nll_loss": 0.3643767833709717, "rewards/accuracies": 1.0, "rewards/chosen": -3.375337109901011e-05, "rewards/margins": 0.2363661676645279, "rewards/rejected": -0.23639993369579315, "step": 6844 }, { "epoch": 4.7337482710926695, "grad_norm": 5.025328159332275, "learning_rate": 2.9256954049485168e-05, "log_odds_chosen": 9.391399383544922, "log_odds_ratio": -0.0013213662896305323, "logits/chosen": -0.6365231871604919, "logits/rejected": -0.6581612825393677, "logps/chosen": -0.0017269393429160118, "logps/rejected": -2.219834089279175, "loss": 1.985, "nll_loss": 0.4961143434047699, "rewards/accuracies": 1.0, "rewards/chosen": -0.00017269395175389946, "rewards/margins": 0.2218107283115387, "rewards/rejected": -0.221983402967453, "step": 6845 }, { "epoch": 4.734439834024896, "grad_norm": 9.549975395202637, "learning_rate": 2.9253112033195024e-05, "log_odds_chosen": 9.500479698181152, "log_odds_ratio": -0.0008104207227006555, "logits/chosen": -0.44024544954299927, "logits/rejected": -0.5364531874656677, "logps/chosen": -0.001147839822806418, "logps/rejected": -1.707168459892273, "loss": 1.1765, "nll_loss": 0.2940320372581482, "rewards/accuracies": 1.0, "rewards/chosen": -0.00011478398664621636, "rewards/margins": 0.17060205340385437, "rewards/rejected": -0.17071683704853058, "step": 6846 }, { "epoch": 4.735131396957123, "grad_norm": 7.652698993682861, "learning_rate": 2.9249270016904873e-05, "log_odds_chosen": 10.424861907958984, "log_odds_ratio": -5.540785423363559e-05, "logits/chosen": -0.8006737232208252, "logits/rejected": -0.8251084089279175, "logps/chosen": -0.0002055955264950171, "logps/rejected": -1.7508656978607178, "loss": 0.7046, "nll_loss": 0.1761522740125656, "rewards/accuracies": 1.0, "rewards/chosen": -2.0559553377097473e-05, "rewards/margins": 0.1750660091638565, "rewards/rejected": -0.17508655786514282, "step": 6847 }, { "epoch": 4.73582295988935, "grad_norm": 11.136614799499512, "learning_rate": 2.9245428000614722e-05, "log_odds_chosen": 11.116748809814453, "log_odds_ratio": -1.792412695067469e-05, "logits/chosen": -0.6997305154800415, "logits/rejected": -0.7258143424987793, "logps/chosen": -0.00017949882021639496, "logps/rejected": -2.3606107234954834, "loss": 1.0916, "nll_loss": 0.27288857102394104, "rewards/accuracies": 1.0, "rewards/chosen": -1.7949882021639496e-05, "rewards/margins": 0.23604314029216766, "rewards/rejected": -0.23606108129024506, "step": 6848 }, { "epoch": 4.736514522821577, "grad_norm": 6.447893142700195, "learning_rate": 2.9241585984324578e-05, "log_odds_chosen": 9.89136791229248, "log_odds_ratio": -0.00013746600598096848, "logits/chosen": -0.31088072061538696, "logits/rejected": -0.37981265783309937, "logps/chosen": -0.0003857373376376927, "logps/rejected": -1.7355481386184692, "loss": 0.8318, "nll_loss": 0.20794863998889923, "rewards/accuracies": 1.0, "rewards/chosen": -3.857373303617351e-05, "rewards/margins": 0.17351624369621277, "rewards/rejected": -0.17355480790138245, "step": 6849 }, { "epoch": 4.737206085753804, "grad_norm": 4.731841564178467, "learning_rate": 2.9237743968034427e-05, "log_odds_chosen": 10.951615333557129, "log_odds_ratio": -6.95253474987112e-05, "logits/chosen": -0.2506423592567444, "logits/rejected": -0.26202479004859924, "logps/chosen": -0.00010808245133375749, "logps/rejected": -1.7184841632843018, "loss": 0.5642, "nll_loss": 0.14103692770004272, "rewards/accuracies": 1.0, "rewards/chosen": -1.080824586097151e-05, "rewards/margins": 0.17183761298656464, "rewards/rejected": -0.17184841632843018, "step": 6850 }, { "epoch": 4.7378976486860305, "grad_norm": 6.2841339111328125, "learning_rate": 2.9233901951744276e-05, "log_odds_chosen": 9.07808780670166, "log_odds_ratio": -0.0015401438577100635, "logits/chosen": -0.6515994071960449, "logits/rejected": -0.670263409614563, "logps/chosen": -0.0038971693720668554, "logps/rejected": -1.562159776687622, "loss": 0.9586, "nll_loss": 0.23948949575424194, "rewards/accuracies": 1.0, "rewards/chosen": -0.000389716966310516, "rewards/margins": 0.15582627058029175, "rewards/rejected": -0.15621599555015564, "step": 6851 }, { "epoch": 4.738589211618257, "grad_norm": 14.31385612487793, "learning_rate": 2.9230059935454128e-05, "log_odds_chosen": 10.582845687866211, "log_odds_ratio": -0.00012897816486656666, "logits/chosen": -0.2380611151456833, "logits/rejected": -0.2947745621204376, "logps/chosen": -0.0022557827178388834, "logps/rejected": -2.843648910522461, "loss": 1.2785, "nll_loss": 0.31961789727211, "rewards/accuracies": 1.0, "rewards/chosen": -0.00022557828924618661, "rewards/margins": 0.28413933515548706, "rewards/rejected": -0.28436487913131714, "step": 6852 }, { "epoch": 4.739280774550484, "grad_norm": 18.392780303955078, "learning_rate": 2.9226217919163977e-05, "log_odds_chosen": 9.665806770324707, "log_odds_ratio": -0.0036647897213697433, "logits/chosen": 0.08682667464017868, "logits/rejected": 0.012252740561962128, "logps/chosen": -0.002794300438836217, "logps/rejected": -1.483237624168396, "loss": 0.7833, "nll_loss": 0.19546152651309967, "rewards/accuracies": 1.0, "rewards/chosen": -0.00027943006716668606, "rewards/margins": 0.14804433286190033, "rewards/rejected": -0.14832375943660736, "step": 6853 }, { "epoch": 4.739972337482711, "grad_norm": 8.896881103515625, "learning_rate": 2.9222375902873826e-05, "log_odds_chosen": 9.814126968383789, "log_odds_ratio": -0.002193056046962738, "logits/chosen": -0.3735635280609131, "logits/rejected": -0.40650835633277893, "logps/chosen": -0.016320127993822098, "logps/rejected": -2.8269453048706055, "loss": 0.9176, "nll_loss": 0.22918462753295898, "rewards/accuracies": 1.0, "rewards/chosen": -0.0016320126596838236, "rewards/margins": 0.2810625433921814, "rewards/rejected": -0.2826945185661316, "step": 6854 }, { "epoch": 4.740663900414938, "grad_norm": 12.259506225585938, "learning_rate": 2.9218533886583682e-05, "log_odds_chosen": 10.840656280517578, "log_odds_ratio": -3.960132016800344e-05, "logits/chosen": -0.402145117521286, "logits/rejected": -0.424393892288208, "logps/chosen": -0.00025384893524460495, "logps/rejected": -1.978333592414856, "loss": 1.0334, "nll_loss": 0.25833389163017273, "rewards/accuracies": 1.0, "rewards/chosen": -2.5384893888258375e-05, "rewards/margins": 0.19780796766281128, "rewards/rejected": -0.1978333741426468, "step": 6855 }, { "epoch": 4.741355463347165, "grad_norm": 10.188279151916504, "learning_rate": 2.921469187029353e-05, "log_odds_chosen": 10.682395935058594, "log_odds_ratio": -4.144748891121708e-05, "logits/chosen": -0.2968418598175049, "logits/rejected": -0.28772151470184326, "logps/chosen": -0.00025522822397761047, "logps/rejected": -2.024625301361084, "loss": 1.1162, "nll_loss": 0.27904069423675537, "rewards/accuracies": 1.0, "rewards/chosen": -2.5522822397761047e-05, "rewards/margins": 0.20243701338768005, "rewards/rejected": -0.20246252417564392, "step": 6856 }, { "epoch": 4.7420470262793915, "grad_norm": 12.18539810180664, "learning_rate": 2.921084985400338e-05, "log_odds_chosen": 10.338611602783203, "log_odds_ratio": -6.693832256132737e-05, "logits/chosen": -0.431133896112442, "logits/rejected": -0.46184462308883667, "logps/chosen": -0.00026900594821199775, "logps/rejected": -1.8532644510269165, "loss": 0.7654, "nll_loss": 0.1913406252861023, "rewards/accuracies": 1.0, "rewards/chosen": -2.690059773158282e-05, "rewards/margins": 0.18529954552650452, "rewards/rejected": -0.1853264570236206, "step": 6857 }, { "epoch": 4.742738589211618, "grad_norm": 12.808045387268066, "learning_rate": 2.9207007837713236e-05, "log_odds_chosen": 7.228079795837402, "log_odds_ratio": -0.03855385258793831, "logits/chosen": -0.7129365801811218, "logits/rejected": -0.6985089778900146, "logps/chosen": -0.05296050012111664, "logps/rejected": -1.012087106704712, "loss": 0.9236, "nll_loss": 0.2270495742559433, "rewards/accuracies": 1.0, "rewards/chosen": -0.005296050570905209, "rewards/margins": 0.09591265767812729, "rewards/rejected": -0.10120870918035507, "step": 6858 }, { "epoch": 4.743430152143845, "grad_norm": 12.793832778930664, "learning_rate": 2.9203165821423085e-05, "log_odds_chosen": 9.732256889343262, "log_odds_ratio": -0.0052786460146307945, "logits/chosen": -0.6305315494537354, "logits/rejected": -0.7240175008773804, "logps/chosen": -0.0033087388146668673, "logps/rejected": -1.9252982139587402, "loss": 1.2624, "nll_loss": 0.31506380438804626, "rewards/accuracies": 1.0, "rewards/chosen": -0.0003308738814666867, "rewards/margins": 0.1921989470720291, "rewards/rejected": -0.1925298124551773, "step": 6859 }, { "epoch": 4.744121715076072, "grad_norm": 12.654400825500488, "learning_rate": 2.9199323805132934e-05, "log_odds_chosen": 9.324658393859863, "log_odds_ratio": -0.024602821096777916, "logits/chosen": -0.8696132898330688, "logits/rejected": -0.9861457347869873, "logps/chosen": -0.0066756573505699635, "logps/rejected": -2.0346033573150635, "loss": 1.4154, "nll_loss": 0.3513864278793335, "rewards/accuracies": 1.0, "rewards/chosen": -0.0006675656768493354, "rewards/margins": 0.20279279351234436, "rewards/rejected": -0.20346036553382874, "step": 6860 }, { "epoch": 4.744813278008299, "grad_norm": 8.300000190734863, "learning_rate": 2.9195481788842787e-05, "log_odds_chosen": 10.4987154006958, "log_odds_ratio": -5.7042387197725475e-05, "logits/chosen": -0.3381150960922241, "logits/rejected": -0.46609026193618774, "logps/chosen": -0.00037316203815862536, "logps/rejected": -1.864619493484497, "loss": 0.7372, "nll_loss": 0.18428348004817963, "rewards/accuracies": 1.0, "rewards/chosen": -3.7316207453841344e-05, "rewards/margins": 0.1864246428012848, "rewards/rejected": -0.18646195530891418, "step": 6861 }, { "epoch": 4.745504840940526, "grad_norm": 7.251094341278076, "learning_rate": 2.9191639772552636e-05, "log_odds_chosen": 11.42138385772705, "log_odds_ratio": -1.9719334886758588e-05, "logits/chosen": -0.4525086283683777, "logits/rejected": -0.44969817996025085, "logps/chosen": -0.00018888995691668242, "logps/rejected": -2.7225518226623535, "loss": 0.6575, "nll_loss": 0.16436880826950073, "rewards/accuracies": 1.0, "rewards/chosen": -1.888899532787036e-05, "rewards/margins": 0.27223631739616394, "rewards/rejected": -0.27225518226623535, "step": 6862 }, { "epoch": 4.746196403872752, "grad_norm": 8.857154846191406, "learning_rate": 2.9187797756262485e-05, "log_odds_chosen": 9.985649108886719, "log_odds_ratio": -5.7827353884931654e-05, "logits/chosen": -0.5242704749107361, "logits/rejected": -0.5169914364814758, "logps/chosen": -0.00016215082723647356, "logps/rejected": -1.2925277948379517, "loss": 0.8603, "nll_loss": 0.21506929397583008, "rewards/accuracies": 1.0, "rewards/chosen": -1.6215082723647356e-05, "rewards/margins": 0.12923656404018402, "rewards/rejected": -0.12925279140472412, "step": 6863 }, { "epoch": 4.746887966804979, "grad_norm": 9.788476943969727, "learning_rate": 2.918395573997234e-05, "log_odds_chosen": 10.543601036071777, "log_odds_ratio": -6.150496483314782e-05, "logits/chosen": -0.5418503880500793, "logits/rejected": -0.599971354007721, "logps/chosen": -0.00018318725051358342, "logps/rejected": -1.932619333267212, "loss": 0.7238, "nll_loss": 0.1809331774711609, "rewards/accuracies": 1.0, "rewards/chosen": -1.8318725778954104e-05, "rewards/margins": 0.193243607878685, "rewards/rejected": -0.19326192140579224, "step": 6864 }, { "epoch": 4.747579529737206, "grad_norm": 9.583395957946777, "learning_rate": 2.918011372368219e-05, "log_odds_chosen": 8.546165466308594, "log_odds_ratio": -0.002676730277016759, "logits/chosen": -0.3589290380477905, "logits/rejected": -0.3195071220397949, "logps/chosen": -0.0016474723815917969, "logps/rejected": -1.278213620185852, "loss": 1.1595, "nll_loss": 0.28960418701171875, "rewards/accuracies": 1.0, "rewards/chosen": -0.0001647472381591797, "rewards/margins": 0.12765660881996155, "rewards/rejected": -0.12782135605812073, "step": 6865 }, { "epoch": 4.748271092669433, "grad_norm": 9.593999862670898, "learning_rate": 2.917627170739204e-05, "log_odds_chosen": 10.382241249084473, "log_odds_ratio": -0.00010884056246140972, "logits/chosen": -0.20423482358455658, "logits/rejected": -0.24997065961360931, "logps/chosen": -0.0003937993897125125, "logps/rejected": -2.322338581085205, "loss": 1.2207, "nll_loss": 0.3051546514034271, "rewards/accuracies": 1.0, "rewards/chosen": -3.937993824365549e-05, "rewards/margins": 0.23219448328018188, "rewards/rejected": -0.23223388195037842, "step": 6866 }, { "epoch": 4.74896265560166, "grad_norm": 5.929619789123535, "learning_rate": 2.9172429691101894e-05, "log_odds_chosen": 9.464195251464844, "log_odds_ratio": -0.0007255043019540608, "logits/chosen": -0.45147010684013367, "logits/rejected": -0.62174391746521, "logps/chosen": -0.006944280583411455, "logps/rejected": -2.5819144248962402, "loss": 1.4374, "nll_loss": 0.35927289724349976, "rewards/accuracies": 1.0, "rewards/chosen": -0.0006944281049072742, "rewards/margins": 0.25749704241752625, "rewards/rejected": -0.25819146633148193, "step": 6867 }, { "epoch": 4.749654218533887, "grad_norm": 6.857417583465576, "learning_rate": 2.9168587674811744e-05, "log_odds_chosen": 10.632462501525879, "log_odds_ratio": -0.0001170021205325611, "logits/chosen": -0.8040260076522827, "logits/rejected": -0.8201639652252197, "logps/chosen": -0.001678706961683929, "logps/rejected": -2.558511734008789, "loss": 1.5705, "nll_loss": 0.3926093280315399, "rewards/accuracies": 1.0, "rewards/chosen": -0.0001678706903476268, "rewards/margins": 0.2556833028793335, "rewards/rejected": -0.255851149559021, "step": 6868 }, { "epoch": 4.750345781466113, "grad_norm": 9.932305335998535, "learning_rate": 2.9164745658521593e-05, "log_odds_chosen": 10.164902687072754, "log_odds_ratio": -0.000663359765894711, "logits/chosen": -0.8776459693908691, "logits/rejected": -0.9496436715126038, "logps/chosen": -0.0014868256403133273, "logps/rejected": -1.9420034885406494, "loss": 1.1408, "nll_loss": 0.2851316034793854, "rewards/accuracies": 1.0, "rewards/chosen": -0.00014868256403133273, "rewards/margins": 0.19405166804790497, "rewards/rejected": -0.194200336933136, "step": 6869 }, { "epoch": 4.75103734439834, "grad_norm": 11.761977195739746, "learning_rate": 2.9160903642231445e-05, "log_odds_chosen": 11.273965835571289, "log_odds_ratio": -1.961475209100172e-05, "logits/chosen": -0.9739042520523071, "logits/rejected": -1.0394816398620605, "logps/chosen": -0.00017334794392809272, "logps/rejected": -2.333815574645996, "loss": 1.4907, "nll_loss": 0.37267541885375977, "rewards/accuracies": 1.0, "rewards/chosen": -1.7334794392809272e-05, "rewards/margins": 0.23336420953273773, "rewards/rejected": -0.23338152468204498, "step": 6870 }, { "epoch": 4.751728907330567, "grad_norm": 4.890721321105957, "learning_rate": 2.9157061625941294e-05, "log_odds_chosen": 9.884302139282227, "log_odds_ratio": -0.0005739243933930993, "logits/chosen": -0.5567978620529175, "logits/rejected": -0.5279030799865723, "logps/chosen": -0.0034497843589633703, "logps/rejected": -2.587547540664673, "loss": 1.3542, "nll_loss": 0.3385036587715149, "rewards/accuracies": 1.0, "rewards/chosen": -0.0003449784417171031, "rewards/margins": 0.2584097981452942, "rewards/rejected": -0.25875476002693176, "step": 6871 }, { "epoch": 4.752420470262794, "grad_norm": 8.71142578125, "learning_rate": 2.9153219609651143e-05, "log_odds_chosen": 10.85763931274414, "log_odds_ratio": -3.525703868945129e-05, "logits/chosen": -0.7038207054138184, "logits/rejected": -0.6762052178382874, "logps/chosen": -0.0002038024686044082, "logps/rejected": -2.1100528240203857, "loss": 1.3561, "nll_loss": 0.33902662992477417, "rewards/accuracies": 1.0, "rewards/chosen": -2.0380248315632343e-05, "rewards/margins": 0.21098490059375763, "rewards/rejected": -0.21100527048110962, "step": 6872 }, { "epoch": 4.753112033195021, "grad_norm": 16.125587463378906, "learning_rate": 2.9149377593361e-05, "log_odds_chosen": 10.835062026977539, "log_odds_ratio": -8.323652582475916e-05, "logits/chosen": -0.7778855562210083, "logits/rejected": -0.897720217704773, "logps/chosen": -0.0006206457619555295, "logps/rejected": -2.8107075691223145, "loss": 1.0211, "nll_loss": 0.255276620388031, "rewards/accuracies": 1.0, "rewards/chosen": -6.20645732851699e-05, "rewards/margins": 0.28100869059562683, "rewards/rejected": -0.281070739030838, "step": 6873 }, { "epoch": 4.753803596127248, "grad_norm": 5.896915435791016, "learning_rate": 2.9145535577070848e-05, "log_odds_chosen": 10.160299301147461, "log_odds_ratio": -5.5131924455054104e-05, "logits/chosen": -0.7155916690826416, "logits/rejected": -0.6823525428771973, "logps/chosen": -0.0005040961550548673, "logps/rejected": -2.3147830963134766, "loss": 0.819, "nll_loss": 0.20473623275756836, "rewards/accuracies": 1.0, "rewards/chosen": -5.040961696067825e-05, "rewards/margins": 0.23142790794372559, "rewards/rejected": -0.23147833347320557, "step": 6874 }, { "epoch": 4.754495159059474, "grad_norm": 10.174572944641113, "learning_rate": 2.9141693560780697e-05, "log_odds_chosen": 9.549734115600586, "log_odds_ratio": -0.0004066435503773391, "logits/chosen": -0.4515111446380615, "logits/rejected": -0.5325211882591248, "logps/chosen": -0.002241352340206504, "logps/rejected": -1.8842123746871948, "loss": 1.1395, "nll_loss": 0.28482872247695923, "rewards/accuracies": 1.0, "rewards/chosen": -0.00022413526312448084, "rewards/margins": 0.18819710612297058, "rewards/rejected": -0.18842124938964844, "step": 6875 }, { "epoch": 4.755186721991701, "grad_norm": 13.478486061096191, "learning_rate": 2.9137851544490553e-05, "log_odds_chosen": 11.715140342712402, "log_odds_ratio": -1.260038152395282e-05, "logits/chosen": -0.6951454281806946, "logits/rejected": -0.664121687412262, "logps/chosen": -0.00015733492909930646, "logps/rejected": -2.7464029788970947, "loss": 0.9162, "nll_loss": 0.22904790937900543, "rewards/accuracies": 1.0, "rewards/chosen": -1.573349436512217e-05, "rewards/margins": 0.2746245563030243, "rewards/rejected": -0.274640291929245, "step": 6876 }, { "epoch": 4.755878284923928, "grad_norm": 9.419013977050781, "learning_rate": 2.9134009528200402e-05, "log_odds_chosen": 10.264875411987305, "log_odds_ratio": -8.103143773041666e-05, "logits/chosen": -0.9880549907684326, "logits/rejected": -1.0210462808609009, "logps/chosen": -0.0005025397404097021, "logps/rejected": -2.2138586044311523, "loss": 0.8977, "nll_loss": 0.22442609071731567, "rewards/accuracies": 1.0, "rewards/chosen": -5.0253973313374445e-05, "rewards/margins": 0.22133558988571167, "rewards/rejected": -0.2213858664035797, "step": 6877 }, { "epoch": 4.756569847856155, "grad_norm": 9.813071250915527, "learning_rate": 2.913016751191025e-05, "log_odds_chosen": 10.428069114685059, "log_odds_ratio": -6.0980233683949336e-05, "logits/chosen": -0.4846192002296448, "logits/rejected": -0.5764331221580505, "logps/chosen": -0.0006243964890018106, "logps/rejected": -2.4788196086883545, "loss": 0.7249, "nll_loss": 0.1812150776386261, "rewards/accuracies": 1.0, "rewards/chosen": -6.243964890018106e-05, "rewards/margins": 0.24781951308250427, "rewards/rejected": -0.2478819638490677, "step": 6878 }, { "epoch": 4.757261410788382, "grad_norm": 8.915365219116211, "learning_rate": 2.9126325495620103e-05, "log_odds_chosen": 11.576354026794434, "log_odds_ratio": -1.2781900295522064e-05, "logits/chosen": -1.0454456806182861, "logits/rejected": -1.0896204710006714, "logps/chosen": -0.00016211725596804172, "logps/rejected": -2.7902166843414307, "loss": 0.8306, "nll_loss": 0.20765334367752075, "rewards/accuracies": 1.0, "rewards/chosen": -1.6211726688197814e-05, "rewards/margins": 0.2790054678916931, "rewards/rejected": -0.279021680355072, "step": 6879 }, { "epoch": 4.7579529737206085, "grad_norm": 13.442499160766602, "learning_rate": 2.9122483479329953e-05, "log_odds_chosen": 9.946861267089844, "log_odds_ratio": -0.0005052194464951754, "logits/chosen": -0.7467355728149414, "logits/rejected": -0.7708727121353149, "logps/chosen": -0.0042512849904596806, "logps/rejected": -2.125576972961426, "loss": 1.4994, "nll_loss": 0.3748067021369934, "rewards/accuracies": 1.0, "rewards/chosen": -0.00042512849904596806, "rewards/margins": 0.21213257312774658, "rewards/rejected": -0.21255768835544586, "step": 6880 }, { "epoch": 4.758644536652835, "grad_norm": 4.847219944000244, "learning_rate": 2.91186414630398e-05, "log_odds_chosen": 9.050625801086426, "log_odds_ratio": -0.017848612740635872, "logits/chosen": -0.199097141623497, "logits/rejected": -0.2752664089202881, "logps/chosen": -0.006846034899353981, "logps/rejected": -1.7001570463180542, "loss": 1.1058, "nll_loss": 0.2746548652648926, "rewards/accuracies": 1.0, "rewards/chosen": -0.0006846035248599946, "rewards/margins": 0.16933110356330872, "rewards/rejected": -0.17001570761203766, "step": 6881 }, { "epoch": 4.759336099585062, "grad_norm": 13.729485511779785, "learning_rate": 2.9114799446749657e-05, "log_odds_chosen": 9.440155029296875, "log_odds_ratio": -0.00013803632464259863, "logits/chosen": -0.2779269814491272, "logits/rejected": -0.3506007492542267, "logps/chosen": -0.00039239716716110706, "logps/rejected": -1.6433520317077637, "loss": 0.9868, "nll_loss": 0.2466796338558197, "rewards/accuracies": 1.0, "rewards/chosen": -3.9239715988514945e-05, "rewards/margins": 0.164295956492424, "rewards/rejected": -0.1643352061510086, "step": 6882 }, { "epoch": 4.760027662517289, "grad_norm": 7.702147960662842, "learning_rate": 2.9110957430459506e-05, "log_odds_chosen": 10.948719024658203, "log_odds_ratio": -7.656020170543343e-05, "logits/chosen": -0.36923375725746155, "logits/rejected": -0.3752695918083191, "logps/chosen": -0.00018135455320589244, "logps/rejected": -2.171945810317993, "loss": 0.9153, "nll_loss": 0.22881731390953064, "rewards/accuracies": 1.0, "rewards/chosen": -1.8135455320589244e-05, "rewards/margins": 0.2171764373779297, "rewards/rejected": -0.2171945720911026, "step": 6883 }, { "epoch": 4.760719225449516, "grad_norm": 9.405759811401367, "learning_rate": 2.9107115414169356e-05, "log_odds_chosen": 10.371192932128906, "log_odds_ratio": -3.8315774872899055e-05, "logits/chosen": -0.370733380317688, "logits/rejected": -0.4181835949420929, "logps/chosen": -0.00018985618953593075, "logps/rejected": -1.7754912376403809, "loss": 0.741, "nll_loss": 0.1852462887763977, "rewards/accuracies": 1.0, "rewards/chosen": -1.8985620044986717e-05, "rewards/margins": 0.17753013968467712, "rewards/rejected": -0.17754912376403809, "step": 6884 }, { "epoch": 4.761410788381743, "grad_norm": 6.149433135986328, "learning_rate": 2.910327339787921e-05, "log_odds_chosen": 9.182944297790527, "log_odds_ratio": -0.004304947331547737, "logits/chosen": -0.48084306716918945, "logits/rejected": -0.5079762935638428, "logps/chosen": -0.004953990690410137, "logps/rejected": -2.269893169403076, "loss": 0.9361, "nll_loss": 0.23360510170459747, "rewards/accuracies": 1.0, "rewards/chosen": -0.0004953990573994815, "rewards/margins": 0.2264939248561859, "rewards/rejected": -0.22698931396007538, "step": 6885 }, { "epoch": 4.7621023513139695, "grad_norm": 10.778986930847168, "learning_rate": 2.909943138158906e-05, "log_odds_chosen": 10.12750244140625, "log_odds_ratio": -0.0005071478663012385, "logits/chosen": -0.8861774802207947, "logits/rejected": -0.950569212436676, "logps/chosen": -0.0003685950068756938, "logps/rejected": -1.6489312648773193, "loss": 1.4241, "nll_loss": 0.35597312450408936, "rewards/accuracies": 1.0, "rewards/chosen": -3.685949923237786e-05, "rewards/margins": 0.16485625505447388, "rewards/rejected": -0.16489310562610626, "step": 6886 }, { "epoch": 4.762793914246196, "grad_norm": 11.243009567260742, "learning_rate": 2.909558936529891e-05, "log_odds_chosen": 9.722782135009766, "log_odds_ratio": -0.002325496170669794, "logits/chosen": -0.7638850212097168, "logits/rejected": -0.7820266485214233, "logps/chosen": -0.002243445487692952, "logps/rejected": -2.244614362716675, "loss": 1.1933, "nll_loss": 0.2981026768684387, "rewards/accuracies": 1.0, "rewards/chosen": -0.00022434454876929522, "rewards/margins": 0.2242370843887329, "rewards/rejected": -0.2244614213705063, "step": 6887 }, { "epoch": 4.763485477178423, "grad_norm": 7.690074920654297, "learning_rate": 2.9091747349008762e-05, "log_odds_chosen": 10.77735710144043, "log_odds_ratio": -0.0010174752678722143, "logits/chosen": -0.7401840090751648, "logits/rejected": -0.8211042284965515, "logps/chosen": -0.0016121247317641973, "logps/rejected": -3.074896812438965, "loss": 0.8382, "nll_loss": 0.20945952832698822, "rewards/accuracies": 1.0, "rewards/chosen": -0.00016121247608680278, "rewards/margins": 0.307328462600708, "rewards/rejected": -0.30748969316482544, "step": 6888 }, { "epoch": 4.76417704011065, "grad_norm": 9.203757286071777, "learning_rate": 2.908790533271861e-05, "log_odds_chosen": 9.383280754089355, "log_odds_ratio": -0.004346000496298075, "logits/chosen": -0.9272629022598267, "logits/rejected": -0.9472211599349976, "logps/chosen": -0.0026920849923044443, "logps/rejected": -1.4084715843200684, "loss": 1.5712, "nll_loss": 0.39237409830093384, "rewards/accuracies": 1.0, "rewards/chosen": -0.00026920848176814616, "rewards/margins": 0.14057794213294983, "rewards/rejected": -0.14084716141223907, "step": 6889 }, { "epoch": 4.764868603042877, "grad_norm": 10.183735847473145, "learning_rate": 2.908406331642846e-05, "log_odds_chosen": 9.105962753295898, "log_odds_ratio": -0.0018381911795586348, "logits/chosen": -0.7122400999069214, "logits/rejected": -0.8362756371498108, "logps/chosen": -0.0014686386566609144, "logps/rejected": -1.8283908367156982, "loss": 0.8191, "nll_loss": 0.20458099246025085, "rewards/accuracies": 1.0, "rewards/chosen": -0.00014686388021800667, "rewards/margins": 0.18269222974777222, "rewards/rejected": -0.1828390657901764, "step": 6890 }, { "epoch": 4.765560165975104, "grad_norm": 12.078200340270996, "learning_rate": 2.9080221300138316e-05, "log_odds_chosen": 8.823981285095215, "log_odds_ratio": -0.014966276474297047, "logits/chosen": -0.7389836311340332, "logits/rejected": -0.7439814805984497, "logps/chosen": -0.005864677019417286, "logps/rejected": -2.1136207580566406, "loss": 1.0892, "nll_loss": 0.27079248428344727, "rewards/accuracies": 1.0, "rewards/chosen": -0.0005864677368663251, "rewards/margins": 0.21077561378479004, "rewards/rejected": -0.2113620638847351, "step": 6891 }, { "epoch": 4.7662517289073305, "grad_norm": 9.881623268127441, "learning_rate": 2.9076379283848165e-05, "log_odds_chosen": 10.488458633422852, "log_odds_ratio": -0.00015489206998609006, "logits/chosen": -0.6555848121643066, "logits/rejected": -0.6229965686798096, "logps/chosen": -0.000846775365062058, "logps/rejected": -2.6446585655212402, "loss": 1.1214, "nll_loss": 0.28033578395843506, "rewards/accuracies": 1.0, "rewards/chosen": -8.46775365062058e-05, "rewards/margins": 0.26438117027282715, "rewards/rejected": -0.264465868473053, "step": 6892 }, { "epoch": 4.766943291839557, "grad_norm": 10.037402153015137, "learning_rate": 2.9072537267558014e-05, "log_odds_chosen": 10.82370376586914, "log_odds_ratio": -6.398136611096561e-05, "logits/chosen": -0.12798471748828888, "logits/rejected": -0.1659693568944931, "logps/chosen": -0.0010921619832515717, "logps/rejected": -2.795205593109131, "loss": 1.1708, "nll_loss": 0.2927056849002838, "rewards/accuracies": 1.0, "rewards/chosen": -0.00010921619832515717, "rewards/margins": 0.27941134572029114, "rewards/rejected": -0.27952057123184204, "step": 6893 }, { "epoch": 4.767634854771784, "grad_norm": 6.187631607055664, "learning_rate": 2.906869525126787e-05, "log_odds_chosen": 10.588701248168945, "log_odds_ratio": -6.957812001928687e-05, "logits/chosen": -0.5498100519180298, "logits/rejected": -0.6913700103759766, "logps/chosen": -0.000816626416053623, "logps/rejected": -2.9310169219970703, "loss": 0.8164, "nll_loss": 0.20408526062965393, "rewards/accuracies": 1.0, "rewards/chosen": -8.166264888131991e-05, "rewards/margins": 0.29302000999450684, "rewards/rejected": -0.2931016981601715, "step": 6894 }, { "epoch": 4.768326417704011, "grad_norm": 10.408868789672852, "learning_rate": 2.906485323497772e-05, "log_odds_chosen": 9.834101676940918, "log_odds_ratio": -0.02988821268081665, "logits/chosen": -0.3863828182220459, "logits/rejected": -0.5003218054771423, "logps/chosen": -0.008340238593518734, "logps/rejected": -2.438636302947998, "loss": 0.918, "nll_loss": 0.2265024036169052, "rewards/accuracies": 1.0, "rewards/chosen": -0.0008340239292010665, "rewards/margins": 0.2430296242237091, "rewards/rejected": -0.24386364221572876, "step": 6895 }, { "epoch": 4.769017980636238, "grad_norm": 9.838829040527344, "learning_rate": 2.9061011218687568e-05, "log_odds_chosen": 9.691939353942871, "log_odds_ratio": -0.0018959257286041975, "logits/chosen": -0.6795417070388794, "logits/rejected": -0.7193290591239929, "logps/chosen": -0.0024579009041190147, "logps/rejected": -2.421403646469116, "loss": 0.9841, "nll_loss": 0.24582822620868683, "rewards/accuracies": 1.0, "rewards/chosen": -0.00024579011369496584, "rewards/margins": 0.24189457297325134, "rewards/rejected": -0.24214035272598267, "step": 6896 }, { "epoch": 4.769709543568465, "grad_norm": 9.481804847717285, "learning_rate": 2.905716920239742e-05, "log_odds_chosen": 9.713380813598633, "log_odds_ratio": -0.0004562270478345454, "logits/chosen": -0.9557902812957764, "logits/rejected": -0.9785552620887756, "logps/chosen": -0.0007574007613584399, "logps/rejected": -1.7303085327148438, "loss": 1.0776, "nll_loss": 0.26935750246047974, "rewards/accuracies": 1.0, "rewards/chosen": -7.57400703150779e-05, "rewards/margins": 0.17295509576797485, "rewards/rejected": -0.17303083837032318, "step": 6897 }, { "epoch": 4.7704011065006915, "grad_norm": 15.33812141418457, "learning_rate": 2.905332718610727e-05, "log_odds_chosen": 10.432807922363281, "log_odds_ratio": -5.272179623716511e-05, "logits/chosen": -0.8567076325416565, "logits/rejected": -0.8398425579071045, "logps/chosen": -0.00020101238624192774, "logps/rejected": -1.5139999389648438, "loss": 1.3735, "nll_loss": 0.3433811366558075, "rewards/accuracies": 1.0, "rewards/chosen": -2.010124080698006e-05, "rewards/margins": 0.15137988328933716, "rewards/rejected": -0.15139998495578766, "step": 6898 }, { "epoch": 4.771092669432918, "grad_norm": 8.230432510375977, "learning_rate": 2.904948516981712e-05, "log_odds_chosen": 10.600896835327148, "log_odds_ratio": -0.00017199788999278098, "logits/chosen": -0.5474926233291626, "logits/rejected": -0.5394644737243652, "logps/chosen": -0.00012897196575067937, "logps/rejected": -1.8010598421096802, "loss": 0.979, "nll_loss": 0.24473422765731812, "rewards/accuracies": 1.0, "rewards/chosen": -1.2897196938865818e-05, "rewards/margins": 0.18009309470653534, "rewards/rejected": -0.18010598421096802, "step": 6899 }, { "epoch": 4.771784232365145, "grad_norm": 6.7657151222229, "learning_rate": 2.9045643153526974e-05, "log_odds_chosen": 9.881768226623535, "log_odds_ratio": -0.0007866210653446615, "logits/chosen": -0.6773836016654968, "logits/rejected": -0.7118434906005859, "logps/chosen": -0.0003718239313457161, "logps/rejected": -1.8470865488052368, "loss": 1.406, "nll_loss": 0.3514169752597809, "rewards/accuracies": 1.0, "rewards/chosen": -3.718238804140128e-05, "rewards/margins": 0.18467146158218384, "rewards/rejected": -0.18470865488052368, "step": 6900 }, { "epoch": 4.772475795297372, "grad_norm": 7.639023780822754, "learning_rate": 2.9041801137236823e-05, "log_odds_chosen": 10.500572204589844, "log_odds_ratio": -3.654634565464221e-05, "logits/chosen": -0.7020278573036194, "logits/rejected": -0.7347521781921387, "logps/chosen": -0.00024071653024293482, "logps/rejected": -1.8748853206634521, "loss": 0.8128, "nll_loss": 0.20319299399852753, "rewards/accuracies": 1.0, "rewards/chosen": -2.4071654479485005e-05, "rewards/margins": 0.18746446073055267, "rewards/rejected": -0.18748852610588074, "step": 6901 }, { "epoch": 4.773167358229599, "grad_norm": 17.48367691040039, "learning_rate": 2.9037959120946672e-05, "log_odds_chosen": 11.267285346984863, "log_odds_ratio": -7.754185207886621e-05, "logits/chosen": -0.3741925358772278, "logits/rejected": -0.33184921741485596, "logps/chosen": -0.0003230986767448485, "logps/rejected": -2.374788284301758, "loss": 0.8192, "nll_loss": 0.20479440689086914, "rewards/accuracies": 1.0, "rewards/chosen": -3.230986840208061e-05, "rewards/margins": 0.23744650185108185, "rewards/rejected": -0.2374788224697113, "step": 6902 }, { "epoch": 4.773858921161826, "grad_norm": 12.380620002746582, "learning_rate": 2.9034117104656528e-05, "log_odds_chosen": 9.672462463378906, "log_odds_ratio": -0.0028193434700369835, "logits/chosen": -0.5997171401977539, "logits/rejected": -0.651904284954071, "logps/chosen": -0.019300207495689392, "logps/rejected": -1.9906163215637207, "loss": 1.003, "nll_loss": 0.25047749280929565, "rewards/accuracies": 1.0, "rewards/chosen": -0.0019300205167382956, "rewards/margins": 0.19713161885738373, "rewards/rejected": -0.19906164705753326, "step": 6903 }, { "epoch": 4.7745504840940525, "grad_norm": 11.972405433654785, "learning_rate": 2.9030275088366377e-05, "log_odds_chosen": 9.76927661895752, "log_odds_ratio": -0.0007554758340120316, "logits/chosen": -0.7512736320495605, "logits/rejected": -0.8519138693809509, "logps/chosen": -0.0012535990681499243, "logps/rejected": -2.0746865272521973, "loss": 0.9445, "nll_loss": 0.23604007065296173, "rewards/accuracies": 1.0, "rewards/chosen": -0.00012535988935269415, "rewards/margins": 0.20734326541423798, "rewards/rejected": -0.20746862888336182, "step": 6904 }, { "epoch": 4.775242047026279, "grad_norm": 8.559404373168945, "learning_rate": 2.9026433072076226e-05, "log_odds_chosen": 10.705414772033691, "log_odds_ratio": -4.388433080748655e-05, "logits/chosen": -0.17357826232910156, "logits/rejected": -0.2473318874835968, "logps/chosen": -0.00044088560389354825, "logps/rejected": -2.56740403175354, "loss": 1.0881, "nll_loss": 0.27202650904655457, "rewards/accuracies": 1.0, "rewards/chosen": -4.408856329973787e-05, "rewards/margins": 0.25669634342193604, "rewards/rejected": -0.25674039125442505, "step": 6905 }, { "epoch": 4.775933609958506, "grad_norm": 9.186101913452148, "learning_rate": 2.902259105578608e-05, "log_odds_chosen": 9.613700866699219, "log_odds_ratio": -0.00048775109462440014, "logits/chosen": -0.17357760667800903, "logits/rejected": -0.20531266927719116, "logps/chosen": -0.0005673019913956523, "logps/rejected": -1.8855153322219849, "loss": 1.0829, "nll_loss": 0.27068501710891724, "rewards/accuracies": 1.0, "rewards/chosen": -5.6730197684373707e-05, "rewards/margins": 0.18849480152130127, "rewards/rejected": -0.18855154514312744, "step": 6906 }, { "epoch": 4.776625172890733, "grad_norm": 11.298944473266602, "learning_rate": 2.9018749039495928e-05, "log_odds_chosen": 11.490623474121094, "log_odds_ratio": -3.84019294870086e-05, "logits/chosen": -0.6738793253898621, "logits/rejected": -0.7730412483215332, "logps/chosen": -0.00020757513993885368, "logps/rejected": -2.5061240196228027, "loss": 1.0231, "nll_loss": 0.25577008724212646, "rewards/accuracies": 1.0, "rewards/chosen": -2.0757517631864175e-05, "rewards/margins": 0.2505916357040405, "rewards/rejected": -0.25061243772506714, "step": 6907 }, { "epoch": 4.77731673582296, "grad_norm": 5.515003204345703, "learning_rate": 2.9014907023205777e-05, "log_odds_chosen": 9.199296951293945, "log_odds_ratio": -0.000681077188346535, "logits/chosen": -0.410367488861084, "logits/rejected": -0.46633514761924744, "logps/chosen": -0.016598787158727646, "logps/rejected": -2.2026970386505127, "loss": 1.5174, "nll_loss": 0.3792707920074463, "rewards/accuracies": 1.0, "rewards/chosen": -0.0016598786460235715, "rewards/margins": 0.21860985457897186, "rewards/rejected": -0.22026972472667694, "step": 6908 }, { "epoch": 4.778008298755187, "grad_norm": 8.467878341674805, "learning_rate": 2.9011065006915633e-05, "log_odds_chosen": 10.931889533996582, "log_odds_ratio": -0.00010012050188379362, "logits/chosen": -0.6796239614486694, "logits/rejected": -0.5677720308303833, "logps/chosen": -0.00023513483756687492, "logps/rejected": -2.034045934677124, "loss": 0.8391, "nll_loss": 0.20975741744041443, "rewards/accuracies": 1.0, "rewards/chosen": -2.3513486667070538e-05, "rewards/margins": 0.20338107645511627, "rewards/rejected": -0.20340459048748016, "step": 6909 }, { "epoch": 4.7786998616874135, "grad_norm": 7.321889400482178, "learning_rate": 2.9007222990625482e-05, "log_odds_chosen": 9.465747833251953, "log_odds_ratio": -0.00028930528787896037, "logits/chosen": -0.7294983863830566, "logits/rejected": -0.7918939590454102, "logps/chosen": -0.000825887662358582, "logps/rejected": -1.7185850143432617, "loss": 0.8373, "nll_loss": 0.2092876136302948, "rewards/accuracies": 1.0, "rewards/chosen": -8.258876914624125e-05, "rewards/margins": 0.1717759221792221, "rewards/rejected": -0.1718585044145584, "step": 6910 }, { "epoch": 4.77939142461964, "grad_norm": 5.821832656860352, "learning_rate": 2.900338097433533e-05, "log_odds_chosen": 9.919578552246094, "log_odds_ratio": -0.00020514108473435044, "logits/chosen": -0.3137022852897644, "logits/rejected": -0.3225104808807373, "logps/chosen": -0.013574248179793358, "logps/rejected": -2.6222431659698486, "loss": 1.6493, "nll_loss": 0.4123116731643677, "rewards/accuracies": 1.0, "rewards/chosen": -0.0013574250042438507, "rewards/margins": 0.2608668804168701, "rewards/rejected": -0.26222431659698486, "step": 6911 }, { "epoch": 4.780082987551867, "grad_norm": 5.551862716674805, "learning_rate": 2.8999538958045187e-05, "log_odds_chosen": 10.600739479064941, "log_odds_ratio": -4.490656647249125e-05, "logits/chosen": -0.9346702694892883, "logits/rejected": -0.8946956992149353, "logps/chosen": -9.617433534003794e-05, "logps/rejected": -1.5342824459075928, "loss": 0.8074, "nll_loss": 0.20183630287647247, "rewards/accuracies": 1.0, "rewards/chosen": -9.617434443498496e-06, "rewards/margins": 0.1534186154603958, "rewards/rejected": -0.15342822670936584, "step": 6912 }, { "epoch": 4.780774550484094, "grad_norm": 7.531041622161865, "learning_rate": 2.8995696941755036e-05, "log_odds_chosen": 10.089245796203613, "log_odds_ratio": -6.500841118395329e-05, "logits/chosen": -0.8466919660568237, "logits/rejected": -0.8212411403656006, "logps/chosen": -0.0002608322538435459, "logps/rejected": -1.7257875204086304, "loss": 1.0531, "nll_loss": 0.26327186822891235, "rewards/accuracies": 1.0, "rewards/chosen": -2.6083227567141876e-05, "rewards/margins": 0.1725526750087738, "rewards/rejected": -0.17257876694202423, "step": 6913 }, { "epoch": 4.781466113416321, "grad_norm": 15.634546279907227, "learning_rate": 2.8991854925464885e-05, "log_odds_chosen": 11.55288314819336, "log_odds_ratio": -1.260529097635299e-05, "logits/chosen": -0.9646028876304626, "logits/rejected": -0.9988402128219604, "logps/chosen": -0.000123454665299505, "logps/rejected": -2.4872028827667236, "loss": 1.3473, "nll_loss": 0.3368183672428131, "rewards/accuracies": 1.0, "rewards/chosen": -1.2345464710961096e-05, "rewards/margins": 0.24870797991752625, "rewards/rejected": -0.24872031807899475, "step": 6914 }, { "epoch": 4.782157676348548, "grad_norm": 6.2947916984558105, "learning_rate": 2.8988012909174737e-05, "log_odds_chosen": 10.003267288208008, "log_odds_ratio": -7.919715426396579e-05, "logits/chosen": -0.7195393443107605, "logits/rejected": -0.7393893003463745, "logps/chosen": -0.0005369444843381643, "logps/rejected": -1.976283073425293, "loss": 0.7504, "nll_loss": 0.18759770691394806, "rewards/accuracies": 1.0, "rewards/chosen": -5.369445352698676e-05, "rewards/margins": 0.19757461547851562, "rewards/rejected": -0.19762831926345825, "step": 6915 }, { "epoch": 4.782849239280774, "grad_norm": 9.371685981750488, "learning_rate": 2.8984170892884586e-05, "log_odds_chosen": 9.39436149597168, "log_odds_ratio": -0.029092937707901, "logits/chosen": -0.7665365934371948, "logits/rejected": -0.7613022327423096, "logps/chosen": -0.009415735490620136, "logps/rejected": -1.9681740999221802, "loss": 0.8, "nll_loss": 0.197085440158844, "rewards/accuracies": 1.0, "rewards/chosen": -0.0009415735257789493, "rewards/margins": 0.1958758533000946, "rewards/rejected": -0.19681741297245026, "step": 6916 }, { "epoch": 4.783540802213001, "grad_norm": 8.897584915161133, "learning_rate": 2.8980328876594435e-05, "log_odds_chosen": 8.432744026184082, "log_odds_ratio": -0.006203706841915846, "logits/chosen": -1.054870843887329, "logits/rejected": -1.0128101110458374, "logps/chosen": -0.0035966283176094294, "logps/rejected": -1.6831315755844116, "loss": 1.1094, "nll_loss": 0.2767356038093567, "rewards/accuracies": 1.0, "rewards/chosen": -0.00035966283758170903, "rewards/margins": 0.1679534912109375, "rewards/rejected": -0.1683131456375122, "step": 6917 }, { "epoch": 4.784232365145228, "grad_norm": 9.129132270812988, "learning_rate": 2.897648686030429e-05, "log_odds_chosen": 10.543561935424805, "log_odds_ratio": -6.228917482076213e-05, "logits/chosen": -0.9463077187538147, "logits/rejected": -0.9844076037406921, "logps/chosen": -0.00043040671152994037, "logps/rejected": -2.390472412109375, "loss": 0.8443, "nll_loss": 0.2110660970211029, "rewards/accuracies": 1.0, "rewards/chosen": -4.304067260818556e-05, "rewards/margins": 0.23900417983531952, "rewards/rejected": -0.23904724419116974, "step": 6918 }, { "epoch": 4.784923928077455, "grad_norm": 12.14979362487793, "learning_rate": 2.897264484401414e-05, "log_odds_chosen": 9.436766624450684, "log_odds_ratio": -0.002850313438102603, "logits/chosen": -0.7446776628494263, "logits/rejected": -0.7585545778274536, "logps/chosen": -0.007610342465341091, "logps/rejected": -1.6009125709533691, "loss": 0.6803, "nll_loss": 0.16978859901428223, "rewards/accuracies": 1.0, "rewards/chosen": -0.0007610342581756413, "rewards/margins": 0.15933021903038025, "rewards/rejected": -0.16009125113487244, "step": 6919 }, { "epoch": 4.785615491009682, "grad_norm": 9.8521146774292, "learning_rate": 2.896880282772399e-05, "log_odds_chosen": 10.63294506072998, "log_odds_ratio": -0.0001836131705204025, "logits/chosen": -0.8492832183837891, "logits/rejected": -0.9085828065872192, "logps/chosen": -0.0003643471281975508, "logps/rejected": -2.2177181243896484, "loss": 1.1283, "nll_loss": 0.2820499837398529, "rewards/accuracies": 1.0, "rewards/chosen": -3.64347142749466e-05, "rewards/margins": 0.2217353880405426, "rewards/rejected": -0.22177180647850037, "step": 6920 }, { "epoch": 4.786307053941909, "grad_norm": 6.861515998840332, "learning_rate": 2.8964960811433845e-05, "log_odds_chosen": 8.752782821655273, "log_odds_ratio": -0.007573779206722975, "logits/chosen": -0.6578083038330078, "logits/rejected": -0.7704108357429504, "logps/chosen": -0.006619012448936701, "logps/rejected": -1.6753588914871216, "loss": 0.8632, "nll_loss": 0.2150411754846573, "rewards/accuracies": 1.0, "rewards/chosen": -0.0006619012565352023, "rewards/margins": 0.1668739914894104, "rewards/rejected": -0.16753588616847992, "step": 6921 }, { "epoch": 4.786998616874135, "grad_norm": 7.481729030609131, "learning_rate": 2.8961118795143694e-05, "log_odds_chosen": 9.5519437789917, "log_odds_ratio": -0.002999893156811595, "logits/chosen": -0.6682575345039368, "logits/rejected": -0.7227833867073059, "logps/chosen": -0.0021522603929042816, "logps/rejected": -1.8938777446746826, "loss": 0.7787, "nll_loss": 0.19438041746616364, "rewards/accuracies": 1.0, "rewards/chosen": -0.00021522602764889598, "rewards/margins": 0.18917253613471985, "rewards/rejected": -0.18938776850700378, "step": 6922 }, { "epoch": 4.787690179806362, "grad_norm": 16.188194274902344, "learning_rate": 2.8957276778853543e-05, "log_odds_chosen": 10.199849128723145, "log_odds_ratio": -0.00026228633942082524, "logits/chosen": -0.32946816086769104, "logits/rejected": -0.42485928535461426, "logps/chosen": -0.0007974720792844892, "logps/rejected": -2.426337242126465, "loss": 1.249, "nll_loss": 0.31223201751708984, "rewards/accuracies": 1.0, "rewards/chosen": -7.974720938364044e-05, "rewards/margins": 0.2425539791584015, "rewards/rejected": -0.24263374507427216, "step": 6923 }, { "epoch": 4.788381742738589, "grad_norm": 10.802689552307129, "learning_rate": 2.8953434762563396e-05, "log_odds_chosen": 9.829700469970703, "log_odds_ratio": -0.002595470752567053, "logits/chosen": -0.7074468731880188, "logits/rejected": -0.7966371774673462, "logps/chosen": -0.004519506823271513, "logps/rejected": -2.0750341415405273, "loss": 0.8556, "nll_loss": 0.21363969147205353, "rewards/accuracies": 1.0, "rewards/chosen": -0.0004519507347140461, "rewards/margins": 0.20705148577690125, "rewards/rejected": -0.20750342309474945, "step": 6924 }, { "epoch": 4.789073305670816, "grad_norm": 11.141003608703613, "learning_rate": 2.8949592746273245e-05, "log_odds_chosen": 10.795259475708008, "log_odds_ratio": -5.5939930462045595e-05, "logits/chosen": -0.5992904901504517, "logits/rejected": -0.584691047668457, "logps/chosen": -0.00029081429238431156, "logps/rejected": -2.1754791736602783, "loss": 1.3131, "nll_loss": 0.3282589316368103, "rewards/accuracies": 1.0, "rewards/chosen": -2.9081427783239633e-05, "rewards/margins": 0.21751883625984192, "rewards/rejected": -0.21754790842533112, "step": 6925 }, { "epoch": 4.789764868603043, "grad_norm": 7.459900856018066, "learning_rate": 2.8945750729983094e-05, "log_odds_chosen": 9.909021377563477, "log_odds_ratio": -0.0002982753503601998, "logits/chosen": -0.25852513313293457, "logits/rejected": -0.30407804250717163, "logps/chosen": -0.0006219090428203344, "logps/rejected": -2.053335666656494, "loss": 0.9451, "nll_loss": 0.236250102519989, "rewards/accuracies": 1.0, "rewards/chosen": -6.219090573722497e-05, "rewards/margins": 0.2052713930606842, "rewards/rejected": -0.20533359050750732, "step": 6926 }, { "epoch": 4.79045643153527, "grad_norm": 11.572301864624023, "learning_rate": 2.894190871369295e-05, "log_odds_chosen": 10.061004638671875, "log_odds_ratio": -0.0001310033112531528, "logits/chosen": -0.1928636133670807, "logits/rejected": -0.28308868408203125, "logps/chosen": -0.002506204880774021, "logps/rejected": -1.9154969453811646, "loss": 1.1552, "nll_loss": 0.2887880206108093, "rewards/accuracies": 1.0, "rewards/chosen": -0.00025062047643586993, "rewards/margins": 0.19129906594753265, "rewards/rejected": -0.19154968857765198, "step": 6927 }, { "epoch": 4.791147994467496, "grad_norm": 9.191129684448242, "learning_rate": 2.89380666974028e-05, "log_odds_chosen": 9.518013000488281, "log_odds_ratio": -0.0001670484634814784, "logits/chosen": -0.460419237613678, "logits/rejected": -0.42179930210113525, "logps/chosen": -0.0162142775952816, "logps/rejected": -2.1252405643463135, "loss": 1.0023, "nll_loss": 0.25056424736976624, "rewards/accuracies": 1.0, "rewards/chosen": -0.0016214278293773532, "rewards/margins": 0.2109026312828064, "rewards/rejected": -0.21252407133579254, "step": 6928 }, { "epoch": 4.791839557399723, "grad_norm": 7.784974575042725, "learning_rate": 2.8934224681112648e-05, "log_odds_chosen": 9.323896408081055, "log_odds_ratio": -0.0005712855490855873, "logits/chosen": -0.7076651453971863, "logits/rejected": -0.7251378297805786, "logps/chosen": -0.004440257325768471, "logps/rejected": -2.320218324661255, "loss": 1.2093, "nll_loss": 0.30227917432785034, "rewards/accuracies": 1.0, "rewards/chosen": -0.0004440257325768471, "rewards/margins": 0.2315778136253357, "rewards/rejected": -0.23202183842658997, "step": 6929 }, { "epoch": 4.79253112033195, "grad_norm": 7.4357476234436035, "learning_rate": 2.8930382664822504e-05, "log_odds_chosen": 9.982955932617188, "log_odds_ratio": -0.0002719922049436718, "logits/chosen": -0.6078499555587769, "logits/rejected": -0.6500560641288757, "logps/chosen": -0.0003774300857912749, "logps/rejected": -1.7039949893951416, "loss": 1.2563, "nll_loss": 0.3140376806259155, "rewards/accuracies": 1.0, "rewards/chosen": -3.774300785153173e-05, "rewards/margins": 0.17036175727844238, "rewards/rejected": -0.1703994870185852, "step": 6930 }, { "epoch": 4.793222683264177, "grad_norm": 7.098720073699951, "learning_rate": 2.8926540648532353e-05, "log_odds_chosen": 10.781723022460938, "log_odds_ratio": -2.407423744443804e-05, "logits/chosen": -0.47065818309783936, "logits/rejected": -0.5136620998382568, "logps/chosen": -0.00033951684599742293, "logps/rejected": -2.5275063514709473, "loss": 0.654, "nll_loss": 0.1635047197341919, "rewards/accuracies": 1.0, "rewards/chosen": -3.395168459974229e-05, "rewards/margins": 0.25271666049957275, "rewards/rejected": -0.2527506351470947, "step": 6931 }, { "epoch": 4.793914246196404, "grad_norm": 8.527052879333496, "learning_rate": 2.89226986322422e-05, "log_odds_chosen": 10.775848388671875, "log_odds_ratio": -3.8844307709950954e-05, "logits/chosen": -0.12336946278810501, "logits/rejected": -0.16141879558563232, "logps/chosen": -0.0003509092202875763, "logps/rejected": -2.6161699295043945, "loss": 0.7901, "nll_loss": 0.1975254863500595, "rewards/accuracies": 1.0, "rewards/chosen": -3.509092493914068e-05, "rewards/margins": 0.2615818977355957, "rewards/rejected": -0.2616170048713684, "step": 6932 }, { "epoch": 4.7946058091286305, "grad_norm": 9.076807975769043, "learning_rate": 2.8918856615952054e-05, "log_odds_chosen": 11.056121826171875, "log_odds_ratio": -2.2091770006227307e-05, "logits/chosen": -0.25333231687545776, "logits/rejected": -0.30482399463653564, "logps/chosen": -0.0003058099828194827, "logps/rejected": -2.733250617980957, "loss": 0.8876, "nll_loss": 0.22190183401107788, "rewards/accuracies": 1.0, "rewards/chosen": -3.058099900954403e-05, "rewards/margins": 0.27329447865486145, "rewards/rejected": -0.2733250856399536, "step": 6933 }, { "epoch": 4.795297372060857, "grad_norm": 9.712004661560059, "learning_rate": 2.8915014599661907e-05, "log_odds_chosen": 10.782461166381836, "log_odds_ratio": -4.8897571105044335e-05, "logits/chosen": -0.5463672876358032, "logits/rejected": -0.6378750801086426, "logps/chosen": -0.0003280511009506881, "logps/rejected": -2.362738609313965, "loss": 1.1677, "nll_loss": 0.29191526770591736, "rewards/accuracies": 1.0, "rewards/chosen": -3.280511009506881e-05, "rewards/margins": 0.23624107241630554, "rewards/rejected": -0.2362738847732544, "step": 6934 }, { "epoch": 4.795988934993084, "grad_norm": 10.277175903320312, "learning_rate": 2.8911172583371756e-05, "log_odds_chosen": 10.328908920288086, "log_odds_ratio": -0.00012547847290989012, "logits/chosen": -0.8211703896522522, "logits/rejected": -0.8321343660354614, "logps/chosen": -0.00041813915595412254, "logps/rejected": -2.0395636558532715, "loss": 0.8992, "nll_loss": 0.22478783130645752, "rewards/accuracies": 1.0, "rewards/chosen": -4.181391341262497e-05, "rewards/margins": 0.2039145529270172, "rewards/rejected": -0.20395638048648834, "step": 6935 }, { "epoch": 4.796680497925311, "grad_norm": 4.285028457641602, "learning_rate": 2.8907330567081608e-05, "log_odds_chosen": 9.490989685058594, "log_odds_ratio": -0.0008424659026786685, "logits/chosen": -0.1907077431678772, "logits/rejected": -0.25019198656082153, "logps/chosen": -0.0022601436357945204, "logps/rejected": -2.04176664352417, "loss": 1.5389, "nll_loss": 0.38463160395622253, "rewards/accuracies": 1.0, "rewards/chosen": -0.00022601439559366554, "rewards/margins": 0.2039506435394287, "rewards/rejected": -0.204176664352417, "step": 6936 }, { "epoch": 4.797372060857538, "grad_norm": 11.503093719482422, "learning_rate": 2.8903488550791457e-05, "log_odds_chosen": 10.554071426391602, "log_odds_ratio": -0.0002274377184221521, "logits/chosen": -0.6637147665023804, "logits/rejected": -0.6363039016723633, "logps/chosen": -0.00023640092695131898, "logps/rejected": -2.3359031677246094, "loss": 1.4716, "nll_loss": 0.36787402629852295, "rewards/accuracies": 1.0, "rewards/chosen": -2.3640095605514944e-05, "rewards/margins": 0.23356668651103973, "rewards/rejected": -0.2335902899503708, "step": 6937 }, { "epoch": 4.798063623789765, "grad_norm": 13.230957984924316, "learning_rate": 2.8899646534501306e-05, "log_odds_chosen": 9.819869041442871, "log_odds_ratio": -0.0002255823346786201, "logits/chosen": -0.4164173901081085, "logits/rejected": -0.47124138474464417, "logps/chosen": -0.00040291884215548635, "logps/rejected": -1.3943617343902588, "loss": 1.0964, "nll_loss": 0.27407020330429077, "rewards/accuracies": 1.0, "rewards/chosen": -4.029188130516559e-05, "rewards/margins": 0.13939589262008667, "rewards/rejected": -0.13943618535995483, "step": 6938 }, { "epoch": 4.7987551867219915, "grad_norm": 6.186459541320801, "learning_rate": 2.8895804518211155e-05, "log_odds_chosen": 10.776379585266113, "log_odds_ratio": -2.662005317688454e-05, "logits/chosen": -0.652151882648468, "logits/rejected": -0.6157358288764954, "logps/chosen": -0.0001764782064128667, "logps/rejected": -1.6163636445999146, "loss": 1.0078, "nll_loss": 0.2519550919532776, "rewards/accuracies": 1.0, "rewards/chosen": -1.764782064128667e-05, "rewards/margins": 0.16161870956420898, "rewards/rejected": -0.1616363674402237, "step": 6939 }, { "epoch": 4.799446749654218, "grad_norm": 14.097229957580566, "learning_rate": 2.889196250192101e-05, "log_odds_chosen": 10.038839340209961, "log_odds_ratio": -0.00015504976909141988, "logits/chosen": -0.4597371518611908, "logits/rejected": -0.544449508190155, "logps/chosen": -0.0006131302798166871, "logps/rejected": -1.6367748975753784, "loss": 1.2847, "nll_loss": 0.3211716413497925, "rewards/accuracies": 1.0, "rewards/chosen": -6.131303234724328e-05, "rewards/margins": 0.16361618041992188, "rewards/rejected": -0.16367748379707336, "step": 6940 }, { "epoch": 4.800138312586445, "grad_norm": 9.109824180603027, "learning_rate": 2.888812048563086e-05, "log_odds_chosen": 10.58295726776123, "log_odds_ratio": -5.280112964101136e-05, "logits/chosen": -0.5606398582458496, "logits/rejected": -0.6050992012023926, "logps/chosen": -0.0002038203674601391, "logps/rejected": -1.8081471920013428, "loss": 0.7582, "nll_loss": 0.18954379856586456, "rewards/accuracies": 1.0, "rewards/chosen": -2.038203638221603e-05, "rewards/margins": 0.1807943433523178, "rewards/rejected": -0.180814728140831, "step": 6941 }, { "epoch": 4.800829875518672, "grad_norm": 9.375306129455566, "learning_rate": 2.888427846934071e-05, "log_odds_chosen": 9.825284004211426, "log_odds_ratio": -0.006563317961990833, "logits/chosen": -0.10774577409029007, "logits/rejected": -0.20767997205257416, "logps/chosen": -0.019267892464995384, "logps/rejected": -2.3732481002807617, "loss": 1.004, "nll_loss": 0.250335156917572, "rewards/accuracies": 1.0, "rewards/chosen": -0.0019267891766503453, "rewards/margins": 0.23539802432060242, "rewards/rejected": -0.2373248189687729, "step": 6942 }, { "epoch": 4.801521438450899, "grad_norm": 7.552305221557617, "learning_rate": 2.8880436453050565e-05, "log_odds_chosen": 10.752092361450195, "log_odds_ratio": -2.9556467779912055e-05, "logits/chosen": -0.6632587909698486, "logits/rejected": -0.6243708729743958, "logps/chosen": -0.00019705788872670382, "logps/rejected": -2.006537914276123, "loss": 0.8495, "nll_loss": 0.21236670017242432, "rewards/accuracies": 1.0, "rewards/chosen": -1.9705788872670382e-05, "rewards/margins": 0.20063409209251404, "rewards/rejected": -0.2006537914276123, "step": 6943 }, { "epoch": 4.802213001383126, "grad_norm": 12.195704460144043, "learning_rate": 2.8876594436760414e-05, "log_odds_chosen": 10.260480880737305, "log_odds_ratio": -0.00013605033745989203, "logits/chosen": -0.6474810242652893, "logits/rejected": -0.7049196362495422, "logps/chosen": -0.001023083459585905, "logps/rejected": -2.2923169136047363, "loss": 0.8756, "nll_loss": 0.218886598944664, "rewards/accuracies": 1.0, "rewards/chosen": -0.00010230834595859051, "rewards/margins": 0.2291293889284134, "rewards/rejected": -0.22923171520233154, "step": 6944 }, { "epoch": 4.8029045643153525, "grad_norm": 5.283492088317871, "learning_rate": 2.8872752420470263e-05, "log_odds_chosen": 10.04088020324707, "log_odds_ratio": -6.29095738986507e-05, "logits/chosen": -0.645579993724823, "logits/rejected": -0.6827752590179443, "logps/chosen": -0.00022650565369985998, "logps/rejected": -1.4571340084075928, "loss": 0.6205, "nll_loss": 0.15512149035930634, "rewards/accuracies": 1.0, "rewards/chosen": -2.2650565369985998e-05, "rewards/margins": 0.14569075405597687, "rewards/rejected": -0.14571340382099152, "step": 6945 }, { "epoch": 4.803596127247579, "grad_norm": 6.904294967651367, "learning_rate": 2.8868910404180116e-05, "log_odds_chosen": 10.803171157836914, "log_odds_ratio": -0.00027451833011582494, "logits/chosen": -0.5428705215454102, "logits/rejected": -0.5604684948921204, "logps/chosen": -0.00022846259525977075, "logps/rejected": -2.276235342025757, "loss": 1.5928, "nll_loss": 0.3981609046459198, "rewards/accuracies": 1.0, "rewards/chosen": -2.2846259525977075e-05, "rewards/margins": 0.22760069370269775, "rewards/rejected": -0.22762353718280792, "step": 6946 }, { "epoch": 4.804287690179806, "grad_norm": 10.15185546875, "learning_rate": 2.8865068387889965e-05, "log_odds_chosen": 9.572251319885254, "log_odds_ratio": -0.000584468012675643, "logits/chosen": -0.568747341632843, "logits/rejected": -0.5709191560745239, "logps/chosen": -0.0007926247199065983, "logps/rejected": -1.5048116445541382, "loss": 1.4307, "nll_loss": 0.3576134443283081, "rewards/accuracies": 1.0, "rewards/chosen": -7.926247781142592e-05, "rewards/margins": 0.15040190517902374, "rewards/rejected": -0.150481179356575, "step": 6947 }, { "epoch": 4.804979253112033, "grad_norm": 7.460203170776367, "learning_rate": 2.8861226371599814e-05, "log_odds_chosen": 9.168164253234863, "log_odds_ratio": -0.0009161671041510999, "logits/chosen": -0.5154004096984863, "logits/rejected": -0.5313372015953064, "logps/chosen": -0.001504352898336947, "logps/rejected": -1.4024771451950073, "loss": 0.778, "nll_loss": 0.19440346956253052, "rewards/accuracies": 1.0, "rewards/chosen": -0.00015043529856484383, "rewards/margins": 0.14009729027748108, "rewards/rejected": -0.14024771749973297, "step": 6948 }, { "epoch": 4.80567081604426, "grad_norm": 8.82359504699707, "learning_rate": 2.885738435530967e-05, "log_odds_chosen": 10.764310836791992, "log_odds_ratio": -3.1426825444214046e-05, "logits/chosen": -0.5414690971374512, "logits/rejected": -0.5535372495651245, "logps/chosen": -0.0002997158153448254, "logps/rejected": -2.4745771884918213, "loss": 0.8529, "nll_loss": 0.2132188379764557, "rewards/accuracies": 1.0, "rewards/chosen": -2.9971581170684658e-05, "rewards/margins": 0.24742776155471802, "rewards/rejected": -0.24745772778987885, "step": 6949 }, { "epoch": 4.806362378976487, "grad_norm": 8.205737113952637, "learning_rate": 2.885354233901952e-05, "log_odds_chosen": 10.115606307983398, "log_odds_ratio": -0.0003620981296990067, "logits/chosen": -0.7475257515907288, "logits/rejected": -0.8717905879020691, "logps/chosen": -0.0007908196421340108, "logps/rejected": -1.9689276218414307, "loss": 1.127, "nll_loss": 0.2817220389842987, "rewards/accuracies": 1.0, "rewards/chosen": -7.908196130301803e-05, "rewards/margins": 0.1968136727809906, "rewards/rejected": -0.19689278304576874, "step": 6950 }, { "epoch": 4.8070539419087135, "grad_norm": 8.313371658325195, "learning_rate": 2.8849700322729368e-05, "log_odds_chosen": 8.338306427001953, "log_odds_ratio": -0.006208367180079222, "logits/chosen": -0.356538861989975, "logits/rejected": -0.37412649393081665, "logps/chosen": -0.0012255026958882809, "logps/rejected": -1.243715524673462, "loss": 1.6415, "nll_loss": 0.4097641706466675, "rewards/accuracies": 1.0, "rewards/chosen": -0.00012255027831997722, "rewards/margins": 0.12424901127815247, "rewards/rejected": -0.12437155842781067, "step": 6951 }, { "epoch": 4.80774550484094, "grad_norm": 6.488125801086426, "learning_rate": 2.8845858306439223e-05, "log_odds_chosen": 9.322070121765137, "log_odds_ratio": -0.0010448751272633672, "logits/chosen": -0.4465481638908386, "logits/rejected": -0.5063095688819885, "logps/chosen": -0.001341567956842482, "logps/rejected": -1.9834538698196411, "loss": 0.856, "nll_loss": 0.21390560269355774, "rewards/accuracies": 1.0, "rewards/chosen": -0.00013415678404271603, "rewards/margins": 0.19821123778820038, "rewards/rejected": -0.1983453780412674, "step": 6952 }, { "epoch": 4.808437067773167, "grad_norm": 7.999490261077881, "learning_rate": 2.8842016290149072e-05, "log_odds_chosen": 11.270200729370117, "log_odds_ratio": -2.683803359104786e-05, "logits/chosen": -0.26797768473625183, "logits/rejected": -0.31660839915275574, "logps/chosen": -0.00011563336738618091, "logps/rejected": -2.1397147178649902, "loss": 1.0249, "nll_loss": 0.25621429085731506, "rewards/accuracies": 1.0, "rewards/chosen": -1.1563337466213852e-05, "rewards/margins": 0.2139599323272705, "rewards/rejected": -0.21397149562835693, "step": 6953 }, { "epoch": 4.809128630705394, "grad_norm": 8.063843727111816, "learning_rate": 2.883817427385892e-05, "log_odds_chosen": 8.964914321899414, "log_odds_ratio": -0.04095021262764931, "logits/chosen": -0.2575796842575073, "logits/rejected": -0.3876993656158447, "logps/chosen": -0.009480806067585945, "logps/rejected": -2.020704746246338, "loss": 1.1192, "nll_loss": 0.27570194005966187, "rewards/accuracies": 1.0, "rewards/chosen": -0.0009480806766077876, "rewards/margins": 0.20112240314483643, "rewards/rejected": -0.2020704746246338, "step": 6954 }, { "epoch": 4.809820193637621, "grad_norm": 4.980713367462158, "learning_rate": 2.8834332257568774e-05, "log_odds_chosen": 9.91356372833252, "log_odds_ratio": -0.00010528671555221081, "logits/chosen": -0.5045120120048523, "logits/rejected": -0.5280041694641113, "logps/chosen": -0.0001594717614352703, "logps/rejected": -1.4369676113128662, "loss": 0.914, "nll_loss": 0.2284928560256958, "rewards/accuracies": 1.0, "rewards/chosen": -1.5947174688335508e-05, "rewards/margins": 0.14368081092834473, "rewards/rejected": -0.14369675517082214, "step": 6955 }, { "epoch": 4.810511756569848, "grad_norm": 10.412702560424805, "learning_rate": 2.8830490241278623e-05, "log_odds_chosen": 10.395805358886719, "log_odds_ratio": -7.902117067715153e-05, "logits/chosen": -0.2385944426059723, "logits/rejected": -0.3181019425392151, "logps/chosen": -0.0003422585141379386, "logps/rejected": -2.067488431930542, "loss": 1.0646, "nll_loss": 0.26614975929260254, "rewards/accuracies": 1.0, "rewards/chosen": -3.422584995860234e-05, "rewards/margins": 0.20671463012695312, "rewards/rejected": -0.2067488580942154, "step": 6956 }, { "epoch": 4.8112033195020745, "grad_norm": 6.461427211761475, "learning_rate": 2.8826648224988472e-05, "log_odds_chosen": 9.895060539245605, "log_odds_ratio": -0.00010841433686437085, "logits/chosen": -0.5785304307937622, "logits/rejected": -0.6983880996704102, "logps/chosen": -0.0001771124079823494, "logps/rejected": -1.1903425455093384, "loss": 1.2011, "nll_loss": 0.3002629280090332, "rewards/accuracies": 1.0, "rewards/chosen": -1.77112415258307e-05, "rewards/margins": 0.11901654303073883, "rewards/rejected": -0.11903425306081772, "step": 6957 }, { "epoch": 4.811894882434301, "grad_norm": 10.47810173034668, "learning_rate": 2.8822806208698328e-05, "log_odds_chosen": 9.634604454040527, "log_odds_ratio": -0.00022385548800230026, "logits/chosen": -0.31653398275375366, "logits/rejected": -0.3796745538711548, "logps/chosen": -0.0011035995557904243, "logps/rejected": -2.300333023071289, "loss": 0.8245, "nll_loss": 0.20609070360660553, "rewards/accuracies": 1.0, "rewards/chosen": -0.00011035994975827634, "rewards/margins": 0.22992295026779175, "rewards/rejected": -0.230033278465271, "step": 6958 }, { "epoch": 4.812586445366528, "grad_norm": 10.434643745422363, "learning_rate": 2.8818964192408177e-05, "log_odds_chosen": 8.491510391235352, "log_odds_ratio": -0.010704604908823967, "logits/chosen": -0.6227741837501526, "logits/rejected": -0.6373161673545837, "logps/chosen": -0.00477764243260026, "logps/rejected": -1.6983509063720703, "loss": 1.2679, "nll_loss": 0.31591036915779114, "rewards/accuracies": 1.0, "rewards/chosen": -0.00047776426072232425, "rewards/margins": 0.1693573296070099, "rewards/rejected": -0.16983510553836823, "step": 6959 }, { "epoch": 4.813278008298755, "grad_norm": 17.066213607788086, "learning_rate": 2.8815122176118026e-05, "log_odds_chosen": 10.402935028076172, "log_odds_ratio": -0.00013788833166472614, "logits/chosen": -0.44745564460754395, "logits/rejected": -0.5753912329673767, "logps/chosen": -0.0005248216330073774, "logps/rejected": -2.174628734588623, "loss": 1.7491, "nll_loss": 0.43725520372390747, "rewards/accuracies": 1.0, "rewards/chosen": -5.248216257314198e-05, "rewards/margins": 0.2174103856086731, "rewards/rejected": -0.21746288239955902, "step": 6960 }, { "epoch": 4.813969571230982, "grad_norm": 12.821651458740234, "learning_rate": 2.8811280159827882e-05, "log_odds_chosen": 10.219192504882812, "log_odds_ratio": -0.0001237966789631173, "logits/chosen": -0.627615213394165, "logits/rejected": -0.6616013646125793, "logps/chosen": -0.0002719200274441391, "logps/rejected": -1.7700080871582031, "loss": 0.9606, "nll_loss": 0.24014118313789368, "rewards/accuracies": 1.0, "rewards/chosen": -2.719200165302027e-05, "rewards/margins": 0.1769736111164093, "rewards/rejected": -0.17700082063674927, "step": 6961 }, { "epoch": 4.814661134163209, "grad_norm": 11.982014656066895, "learning_rate": 2.880743814353773e-05, "log_odds_chosen": 8.90776538848877, "log_odds_ratio": -0.05733673647046089, "logits/chosen": -0.6063227653503418, "logits/rejected": -0.7158474326133728, "logps/chosen": -0.012450098991394043, "logps/rejected": -1.8934953212738037, "loss": 1.4886, "nll_loss": 0.3664115071296692, "rewards/accuracies": 1.0, "rewards/chosen": -0.001245009945705533, "rewards/margins": 0.188104510307312, "rewards/rejected": -0.18934953212738037, "step": 6962 }, { "epoch": 4.8153526970954355, "grad_norm": 9.394391059875488, "learning_rate": 2.880359612724758e-05, "log_odds_chosen": 10.815286636352539, "log_odds_ratio": -3.915661363862455e-05, "logits/chosen": -0.7519272565841675, "logits/rejected": -0.8650994300842285, "logps/chosen": -0.00031088574905879796, "logps/rejected": -2.595947742462158, "loss": 0.7355, "nll_loss": 0.18388155102729797, "rewards/accuracies": 1.0, "rewards/chosen": -3.108857345068827e-05, "rewards/margins": 0.259563684463501, "rewards/rejected": -0.25959476828575134, "step": 6963 }, { "epoch": 4.816044260027662, "grad_norm": 8.491909980773926, "learning_rate": 2.8799754110957432e-05, "log_odds_chosen": 10.1375732421875, "log_odds_ratio": -0.00012776776566170156, "logits/chosen": -0.4785597622394562, "logits/rejected": -0.5305933952331543, "logps/chosen": -0.000312845193548128, "logps/rejected": -1.6996479034423828, "loss": 0.8897, "nll_loss": 0.2224000245332718, "rewards/accuracies": 1.0, "rewards/chosen": -3.128451862721704e-05, "rewards/margins": 0.1699334979057312, "rewards/rejected": -0.16996479034423828, "step": 6964 }, { "epoch": 4.816735822959889, "grad_norm": 7.3495073318481445, "learning_rate": 2.879591209466728e-05, "log_odds_chosen": 10.194000244140625, "log_odds_ratio": -0.0005381208611652255, "logits/chosen": -0.5459569096565247, "logits/rejected": -0.5725046992301941, "logps/chosen": -0.00019581487867981195, "logps/rejected": -1.9669796228408813, "loss": 1.2325, "nll_loss": 0.30806687474250793, "rewards/accuracies": 1.0, "rewards/chosen": -1.9581486412789673e-05, "rewards/margins": 0.19667838513851166, "rewards/rejected": -0.19669798016548157, "step": 6965 }, { "epoch": 4.817427385892116, "grad_norm": 10.577987670898438, "learning_rate": 2.879207007837713e-05, "log_odds_chosen": 9.530058860778809, "log_odds_ratio": -0.0007562245009467006, "logits/chosen": -0.7595070004463196, "logits/rejected": -0.7329185009002686, "logps/chosen": -0.01416083239018917, "logps/rejected": -2.174567461013794, "loss": 1.1374, "nll_loss": 0.2842686176300049, "rewards/accuracies": 1.0, "rewards/chosen": -0.0014160831924527884, "rewards/margins": 0.21604067087173462, "rewards/rejected": -0.21745674312114716, "step": 6966 }, { "epoch": 4.818118948824343, "grad_norm": 8.284407615661621, "learning_rate": 2.8788228062086986e-05, "log_odds_chosen": 9.523808479309082, "log_odds_ratio": -0.00033556658308953047, "logits/chosen": -0.4195675849914551, "logits/rejected": -0.4756912291049957, "logps/chosen": -0.0008030449389480054, "logps/rejected": -1.5736851692199707, "loss": 1.4564, "nll_loss": 0.364074170589447, "rewards/accuracies": 1.0, "rewards/chosen": -8.030448952922598e-05, "rewards/margins": 0.15728820860385895, "rewards/rejected": -0.1573685258626938, "step": 6967 }, { "epoch": 4.81881051175657, "grad_norm": 8.860459327697754, "learning_rate": 2.8784386045796835e-05, "log_odds_chosen": 10.552122116088867, "log_odds_ratio": -0.0001919109927257523, "logits/chosen": -0.9970296025276184, "logits/rejected": -1.0275627374649048, "logps/chosen": -0.0004448512918315828, "logps/rejected": -2.17071533203125, "loss": 0.8643, "nll_loss": 0.21605589985847473, "rewards/accuracies": 1.0, "rewards/chosen": -4.448512481758371e-05, "rewards/margins": 0.21702706813812256, "rewards/rejected": -0.2170715630054474, "step": 6968 }, { "epoch": 4.819502074688796, "grad_norm": 8.528804779052734, "learning_rate": 2.8780544029506684e-05, "log_odds_chosen": 10.231672286987305, "log_odds_ratio": -0.00021445140009745955, "logits/chosen": -0.7950515151023865, "logits/rejected": -0.8399736881256104, "logps/chosen": -0.0004185446014162153, "logps/rejected": -1.7746440172195435, "loss": 0.9838, "nll_loss": 0.24592530727386475, "rewards/accuracies": 1.0, "rewards/chosen": -4.185445868643001e-05, "rewards/margins": 0.17742255330085754, "rewards/rejected": -0.17746439576148987, "step": 6969 }, { "epoch": 4.820193637621023, "grad_norm": 7.747142791748047, "learning_rate": 2.877670201321654e-05, "log_odds_chosen": 9.8212251663208, "log_odds_ratio": -0.00026398696354590356, "logits/chosen": -0.6424762010574341, "logits/rejected": -0.7730410099029541, "logps/chosen": -0.00431458605453372, "logps/rejected": -2.3638572692871094, "loss": 1.0091, "nll_loss": 0.25225630402565, "rewards/accuracies": 1.0, "rewards/chosen": -0.00043145858217030764, "rewards/margins": 0.23595428466796875, "rewards/rejected": -0.23638573288917542, "step": 6970 }, { "epoch": 4.82088520055325, "grad_norm": 7.259042739868164, "learning_rate": 2.877285999692639e-05, "log_odds_chosen": 10.099575996398926, "log_odds_ratio": -0.0002996406110469252, "logits/chosen": -0.4730404019355774, "logits/rejected": -0.535588264465332, "logps/chosen": -0.0002547122712712735, "logps/rejected": -1.7693817615509033, "loss": 1.0013, "nll_loss": 0.25030577182769775, "rewards/accuracies": 1.0, "rewards/chosen": -2.547122676332947e-05, "rewards/margins": 0.17691272497177124, "rewards/rejected": -0.17693819105625153, "step": 6971 }, { "epoch": 4.821576763485477, "grad_norm": 6.7854509353637695, "learning_rate": 2.876901798063624e-05, "log_odds_chosen": 10.333219528198242, "log_odds_ratio": -0.0025019964668899775, "logits/chosen": -0.3313809037208557, "logits/rejected": -0.3299313187599182, "logps/chosen": -0.0035334054846316576, "logps/rejected": -2.027862071990967, "loss": 0.9505, "nll_loss": 0.23737327754497528, "rewards/accuracies": 1.0, "rewards/chosen": -0.0003533405833877623, "rewards/margins": 0.20243285596370697, "rewards/rejected": -0.20278620719909668, "step": 6972 }, { "epoch": 4.822268326417704, "grad_norm": 13.108231544494629, "learning_rate": 2.876517596434609e-05, "log_odds_chosen": 9.169116973876953, "log_odds_ratio": -0.021218866109848022, "logits/chosen": -0.5680206418037415, "logits/rejected": -0.6453905701637268, "logps/chosen": -0.006536668166518211, "logps/rejected": -1.1700851917266846, "loss": 0.6656, "nll_loss": 0.16427476704120636, "rewards/accuracies": 1.0, "rewards/chosen": -0.0006536668515764177, "rewards/margins": 0.11635485291481018, "rewards/rejected": -0.1170085221529007, "step": 6973 }, { "epoch": 4.822959889349931, "grad_norm": 6.058053970336914, "learning_rate": 2.876133394805594e-05, "log_odds_chosen": 9.656610488891602, "log_odds_ratio": -0.00018438557162880898, "logits/chosen": -0.2926297187805176, "logits/rejected": -0.3582976758480072, "logps/chosen": -0.007463652174919844, "logps/rejected": -2.2228102684020996, "loss": 0.9425, "nll_loss": 0.2356175184249878, "rewards/accuracies": 1.0, "rewards/chosen": -0.0007463652291335166, "rewards/margins": 0.2215346395969391, "rewards/rejected": -0.22228102385997772, "step": 6974 }, { "epoch": 4.823651452282157, "grad_norm": 8.221524238586426, "learning_rate": 2.875749193176579e-05, "log_odds_chosen": 8.097722053527832, "log_odds_ratio": -0.012327348813414574, "logits/chosen": -0.2094922661781311, "logits/rejected": -0.2288666069507599, "logps/chosen": -0.006490036379545927, "logps/rejected": -1.4914829730987549, "loss": 1.5915, "nll_loss": 0.3966403007507324, "rewards/accuracies": 1.0, "rewards/chosen": -0.0006490036030299962, "rewards/margins": 0.1484992951154709, "rewards/rejected": -0.14914830029010773, "step": 6975 }, { "epoch": 4.824343015214384, "grad_norm": 7.369786262512207, "learning_rate": 2.8753649915475645e-05, "log_odds_chosen": 11.036026954650879, "log_odds_ratio": -5.5173979490064085e-05, "logits/chosen": -0.25902676582336426, "logits/rejected": -0.28121620416641235, "logps/chosen": -0.0019505223026499152, "logps/rejected": -2.6775636672973633, "loss": 1.3716, "nll_loss": 0.34289392828941345, "rewards/accuracies": 1.0, "rewards/chosen": -0.00019505222735460848, "rewards/margins": 0.26756131649017334, "rewards/rejected": -0.2677563726902008, "step": 6976 }, { "epoch": 4.825034578146611, "grad_norm": 5.510984897613525, "learning_rate": 2.8749807899185494e-05, "log_odds_chosen": 9.41744613647461, "log_odds_ratio": -0.0012238244526088238, "logits/chosen": -0.10021279007196426, "logits/rejected": -0.1837349683046341, "logps/chosen": -0.0012109712697565556, "logps/rejected": -1.9403990507125854, "loss": 1.0165, "nll_loss": 0.2540140151977539, "rewards/accuracies": 1.0, "rewards/chosen": -0.00012109712406527251, "rewards/margins": 0.19391882419586182, "rewards/rejected": -0.19403991103172302, "step": 6977 }, { "epoch": 4.825726141078838, "grad_norm": 10.768878936767578, "learning_rate": 2.8745965882895343e-05, "log_odds_chosen": 11.214912414550781, "log_odds_ratio": -4.51734995294828e-05, "logits/chosen": -0.8881174325942993, "logits/rejected": -0.8989272713661194, "logps/chosen": -0.00020168480114080012, "logps/rejected": -2.6242618560791016, "loss": 1.1215, "nll_loss": 0.28037723898887634, "rewards/accuracies": 1.0, "rewards/chosen": -2.016847975028213e-05, "rewards/margins": 0.26240602135658264, "rewards/rejected": -0.2624261975288391, "step": 6978 }, { "epoch": 4.826417704011065, "grad_norm": 6.553099155426025, "learning_rate": 2.87421238666052e-05, "log_odds_chosen": 9.763384819030762, "log_odds_ratio": -0.00017629990179557353, "logits/chosen": -0.584071695804596, "logits/rejected": -0.5996840000152588, "logps/chosen": -0.0059030367992818356, "logps/rejected": -1.842178463935852, "loss": 1.1901, "nll_loss": 0.29749614000320435, "rewards/accuracies": 1.0, "rewards/chosen": -0.0005903037963435054, "rewards/margins": 0.18362754583358765, "rewards/rejected": -0.18421784043312073, "step": 6979 }, { "epoch": 4.827109266943292, "grad_norm": 9.622838973999023, "learning_rate": 2.8738281850315048e-05, "log_odds_chosen": 10.019664764404297, "log_odds_ratio": -0.0002728485851548612, "logits/chosen": -0.7304296493530273, "logits/rejected": -0.7358352541923523, "logps/chosen": -0.0003393233346287161, "logps/rejected": -1.7413800954818726, "loss": 1.1464, "nll_loss": 0.28656476736068726, "rewards/accuracies": 1.0, "rewards/chosen": -3.393233419046737e-05, "rewards/margins": 0.17410407960414886, "rewards/rejected": -0.17413800954818726, "step": 6980 }, { "epoch": 4.827800829875518, "grad_norm": 7.335915565490723, "learning_rate": 2.8734439834024897e-05, "log_odds_chosen": 9.990732192993164, "log_odds_ratio": -8.258214802481234e-05, "logits/chosen": -0.3298628330230713, "logits/rejected": -0.2266494482755661, "logps/chosen": -0.0004717921547126025, "logps/rejected": -1.9673501253128052, "loss": 0.9213, "nll_loss": 0.23031499981880188, "rewards/accuracies": 1.0, "rewards/chosen": -4.717921547126025e-05, "rewards/margins": 0.19668781757354736, "rewards/rejected": -0.19673500955104828, "step": 6981 }, { "epoch": 4.828492392807745, "grad_norm": 9.187235832214355, "learning_rate": 2.873059781773475e-05, "log_odds_chosen": 10.611069679260254, "log_odds_ratio": -9.819894330576062e-05, "logits/chosen": -0.44648605585098267, "logits/rejected": -0.5582661032676697, "logps/chosen": -0.00030931332730688155, "logps/rejected": -2.1796324253082275, "loss": 0.9753, "nll_loss": 0.24380630254745483, "rewards/accuracies": 1.0, "rewards/chosen": -3.093133636866696e-05, "rewards/margins": 0.2179323136806488, "rewards/rejected": -0.21796324849128723, "step": 6982 }, { "epoch": 4.829183955739972, "grad_norm": 16.725278854370117, "learning_rate": 2.87267558014446e-05, "log_odds_chosen": 9.19306755065918, "log_odds_ratio": -0.00032923344406299293, "logits/chosen": -0.07299592345952988, "logits/rejected": -0.12843886017799377, "logps/chosen": -0.0005057539092376828, "logps/rejected": -1.6225523948669434, "loss": 1.0773, "nll_loss": 0.269281268119812, "rewards/accuracies": 1.0, "rewards/chosen": -5.057539601693861e-05, "rewards/margins": 0.16220466792583466, "rewards/rejected": -0.16225524246692657, "step": 6983 }, { "epoch": 4.829875518672199, "grad_norm": 13.59363842010498, "learning_rate": 2.8722913785154447e-05, "log_odds_chosen": 10.44688606262207, "log_odds_ratio": -0.000149241866893135, "logits/chosen": -0.41675865650177, "logits/rejected": -0.5150689482688904, "logps/chosen": -0.0002905310539063066, "logps/rejected": -1.9604620933532715, "loss": 1.1596, "nll_loss": 0.28988033533096313, "rewards/accuracies": 1.0, "rewards/chosen": -2.905310429923702e-05, "rewards/margins": 0.19601714611053467, "rewards/rejected": -0.19604620337486267, "step": 6984 }, { "epoch": 4.830567081604426, "grad_norm": 12.085798263549805, "learning_rate": 2.8719071768864303e-05, "log_odds_chosen": 11.489034652709961, "log_odds_ratio": -1.973729013116099e-05, "logits/chosen": -0.454725980758667, "logits/rejected": -0.5726956725120544, "logps/chosen": -0.00019914706354029477, "logps/rejected": -2.6913857460021973, "loss": 0.9174, "nll_loss": 0.22934210300445557, "rewards/accuracies": 1.0, "rewards/chosen": -1.9914707081625238e-05, "rewards/margins": 0.26911866664886475, "rewards/rejected": -0.2691385746002197, "step": 6985 }, { "epoch": 4.8312586445366525, "grad_norm": 12.423589706420898, "learning_rate": 2.8715229752574152e-05, "log_odds_chosen": 9.722612380981445, "log_odds_ratio": -0.10177444666624069, "logits/chosen": -0.38128212094306946, "logits/rejected": -0.39975857734680176, "logps/chosen": -0.25344493985176086, "logps/rejected": -2.009124279022217, "loss": 0.9418, "nll_loss": 0.22527502477169037, "rewards/accuracies": 0.875, "rewards/chosen": -0.025344498455524445, "rewards/margins": 0.17556792497634888, "rewards/rejected": -0.2009124457836151, "step": 6986 }, { "epoch": 4.831950207468879, "grad_norm": 13.209919929504395, "learning_rate": 2.8711387736284e-05, "log_odds_chosen": 10.672974586486816, "log_odds_ratio": -0.020712081342935562, "logits/chosen": -0.2563137114048004, "logits/rejected": -0.28360506892204285, "logps/chosen": -0.00576416403055191, "logps/rejected": -2.3452091217041016, "loss": 1.2627, "nll_loss": 0.3136104345321655, "rewards/accuracies": 1.0, "rewards/chosen": -0.0005764163797721267, "rewards/margins": 0.23394452035427094, "rewards/rejected": -0.23452092707157135, "step": 6987 }, { "epoch": 4.832641770401106, "grad_norm": 8.65539264678955, "learning_rate": 2.8707545719993857e-05, "log_odds_chosen": 10.266307830810547, "log_odds_ratio": -7.117915811249986e-05, "logits/chosen": -0.08035125583410263, "logits/rejected": -0.17950567603111267, "logps/chosen": -0.0002787821867968887, "logps/rejected": -2.0251896381378174, "loss": 1.1863, "nll_loss": 0.296577513217926, "rewards/accuracies": 1.0, "rewards/chosen": -2.787821904348675e-05, "rewards/margins": 0.20249108970165253, "rewards/rejected": -0.20251896977424622, "step": 6988 }, { "epoch": 4.833333333333333, "grad_norm": 17.296316146850586, "learning_rate": 2.8703703703703706e-05, "log_odds_chosen": 9.091753005981445, "log_odds_ratio": -0.048046279698610306, "logits/chosen": -0.3633042275905609, "logits/rejected": -0.3943461775779724, "logps/chosen": -0.01235463097691536, "logps/rejected": -2.001265048980713, "loss": 1.1691, "nll_loss": 0.2874664068222046, "rewards/accuracies": 1.0, "rewards/chosen": -0.0012354630744084716, "rewards/margins": 0.19889101386070251, "rewards/rejected": -0.2001264989376068, "step": 6989 }, { "epoch": 4.83402489626556, "grad_norm": 12.84473705291748, "learning_rate": 2.8699861687413555e-05, "log_odds_chosen": 10.6940279006958, "log_odds_ratio": -0.00028842902975156903, "logits/chosen": -0.41434893012046814, "logits/rejected": -0.48944592475891113, "logps/chosen": -0.00044106371933594346, "logps/rejected": -2.1125221252441406, "loss": 2.0763, "nll_loss": 0.519040584564209, "rewards/accuracies": 1.0, "rewards/chosen": -4.4106371205998585e-05, "rewards/margins": 0.21120813488960266, "rewards/rejected": -0.21125222742557526, "step": 6990 }, { "epoch": 4.834716459197787, "grad_norm": 8.409330368041992, "learning_rate": 2.8696019671123408e-05, "log_odds_chosen": 10.525460243225098, "log_odds_ratio": -0.0009910862427204847, "logits/chosen": -0.2692156136035919, "logits/rejected": -0.24852915108203888, "logps/chosen": -0.015924440696835518, "logps/rejected": -2.109683036804199, "loss": 1.0572, "nll_loss": 0.2641902565956116, "rewards/accuracies": 1.0, "rewards/chosen": -0.001592444023117423, "rewards/margins": 0.20937585830688477, "rewards/rejected": -0.21096831560134888, "step": 6991 }, { "epoch": 4.8354080221300135, "grad_norm": 8.460469245910645, "learning_rate": 2.8692177654833257e-05, "log_odds_chosen": 9.595645904541016, "log_odds_ratio": -0.00011171397636644542, "logits/chosen": -0.2434559166431427, "logits/rejected": -0.3016476631164551, "logps/chosen": -0.0008374938042834401, "logps/rejected": -1.8347755670547485, "loss": 1.1008, "nll_loss": 0.2751849889755249, "rewards/accuracies": 1.0, "rewards/chosen": -8.37493862491101e-05, "rewards/margins": 0.18339380621910095, "rewards/rejected": -0.18347755074501038, "step": 6992 }, { "epoch": 4.83609958506224, "grad_norm": 12.384178161621094, "learning_rate": 2.8688335638543106e-05, "log_odds_chosen": 10.073097229003906, "log_odds_ratio": -0.00015708267164882272, "logits/chosen": -0.8474388718605042, "logits/rejected": -0.8515263795852661, "logps/chosen": -0.00024788122391328216, "logps/rejected": -1.6563420295715332, "loss": 0.7981, "nll_loss": 0.1995052993297577, "rewards/accuracies": 1.0, "rewards/chosen": -2.4788121663732454e-05, "rewards/margins": 0.1656094193458557, "rewards/rejected": -0.1656341850757599, "step": 6993 }, { "epoch": 4.836791147994467, "grad_norm": 18.428924560546875, "learning_rate": 2.868449362225296e-05, "log_odds_chosen": 8.584789276123047, "log_odds_ratio": -0.01099585834890604, "logits/chosen": -0.6042527556419373, "logits/rejected": -0.6270811557769775, "logps/chosen": -0.10832807421684265, "logps/rejected": -2.304208993911743, "loss": 1.3723, "nll_loss": 0.3419734239578247, "rewards/accuracies": 1.0, "rewards/chosen": -0.01083280798047781, "rewards/margins": 0.21958810091018677, "rewards/rejected": -0.23042091727256775, "step": 6994 }, { "epoch": 4.837482710926694, "grad_norm": 13.578597068786621, "learning_rate": 2.868065160596281e-05, "log_odds_chosen": 9.709571838378906, "log_odds_ratio": -0.0009176249150186777, "logits/chosen": -0.922393798828125, "logits/rejected": -0.9200150966644287, "logps/chosen": -0.012347464449703693, "logps/rejected": -1.901907205581665, "loss": 1.4535, "nll_loss": 0.3632957339286804, "rewards/accuracies": 1.0, "rewards/chosen": -0.001234746421687305, "rewards/margins": 0.18895597755908966, "rewards/rejected": -0.19019073247909546, "step": 6995 }, { "epoch": 4.838174273858921, "grad_norm": 8.315958023071289, "learning_rate": 2.867680958967266e-05, "log_odds_chosen": 9.92038345336914, "log_odds_ratio": -0.0030150411184877157, "logits/chosen": -0.27385860681533813, "logits/rejected": -0.32332712411880493, "logps/chosen": -0.0008827511919662356, "logps/rejected": -2.2805981636047363, "loss": 1.1089, "nll_loss": 0.27691400051116943, "rewards/accuracies": 1.0, "rewards/chosen": -8.827511919662356e-05, "rewards/margins": 0.22797155380249023, "rewards/rejected": -0.2280598282814026, "step": 6996 }, { "epoch": 4.838865836791148, "grad_norm": 7.787275791168213, "learning_rate": 2.8672967573382516e-05, "log_odds_chosen": 10.508706092834473, "log_odds_ratio": -9.931164822774008e-05, "logits/chosen": -0.5204631686210632, "logits/rejected": -0.5042663812637329, "logps/chosen": -0.0002793048042804003, "logps/rejected": -2.2063889503479004, "loss": 0.8487, "nll_loss": 0.21217229962348938, "rewards/accuracies": 1.0, "rewards/chosen": -2.793048224702943e-05, "rewards/margins": 0.22061097621917725, "rewards/rejected": -0.2206389158964157, "step": 6997 }, { "epoch": 4.8395573997233745, "grad_norm": 9.41072940826416, "learning_rate": 2.8669125557092365e-05, "log_odds_chosen": 10.23240852355957, "log_odds_ratio": -5.44110698683653e-05, "logits/chosen": -0.5337792634963989, "logits/rejected": -0.634337842464447, "logps/chosen": -0.0015219207853078842, "logps/rejected": -2.103792190551758, "loss": 0.7162, "nll_loss": 0.17903977632522583, "rewards/accuracies": 1.0, "rewards/chosen": -0.0001521920639788732, "rewards/margins": 0.21022702753543854, "rewards/rejected": -0.2103792130947113, "step": 6998 }, { "epoch": 4.840248962655601, "grad_norm": 13.882169723510742, "learning_rate": 2.8665283540802214e-05, "log_odds_chosen": 10.442000389099121, "log_odds_ratio": -0.00022704862931277603, "logits/chosen": -0.5982738137245178, "logits/rejected": -0.7945213317871094, "logps/chosen": -0.003369520418345928, "logps/rejected": -2.137873888015747, "loss": 0.9619, "nll_loss": 0.24045637249946594, "rewards/accuracies": 1.0, "rewards/chosen": -0.0003369520418345928, "rewards/margins": 0.21345043182373047, "rewards/rejected": -0.21378740668296814, "step": 6999 }, { "epoch": 4.840940525587828, "grad_norm": 7.5849409103393555, "learning_rate": 2.8661441524512066e-05, "log_odds_chosen": 8.664499282836914, "log_odds_ratio": -0.0005070245242677629, "logits/chosen": -0.5758646130561829, "logits/rejected": -0.5692422986030579, "logps/chosen": -0.0009210213320329785, "logps/rejected": -1.2004210948944092, "loss": 0.7885, "nll_loss": 0.19706769287586212, "rewards/accuracies": 1.0, "rewards/chosen": -9.210212738253176e-05, "rewards/margins": 0.11995001137256622, "rewards/rejected": -0.1200421079993248, "step": 7000 }, { "epoch": 4.841632088520055, "grad_norm": 6.227912902832031, "learning_rate": 2.8657599508221915e-05, "log_odds_chosen": 9.58493709564209, "log_odds_ratio": -0.0006402077851817012, "logits/chosen": -0.5111309885978699, "logits/rejected": -0.5352218747138977, "logps/chosen": -0.0005271884147077799, "logps/rejected": -1.3354413509368896, "loss": 0.7259, "nll_loss": 0.1814233660697937, "rewards/accuracies": 1.0, "rewards/chosen": -5.271883856039494e-05, "rewards/margins": 0.1334914267063141, "rewards/rejected": -0.13354414701461792, "step": 7001 }, { "epoch": 4.842323651452282, "grad_norm": 11.530202865600586, "learning_rate": 2.8653757491931764e-05, "log_odds_chosen": 10.981690406799316, "log_odds_ratio": -2.312998731213156e-05, "logits/chosen": -0.8751830458641052, "logits/rejected": -0.8583577871322632, "logps/chosen": -0.00014386913971975446, "logps/rejected": -2.095196008682251, "loss": 1.1425, "nll_loss": 0.2856108248233795, "rewards/accuracies": 1.0, "rewards/chosen": -1.4386915609065909e-05, "rewards/margins": 0.20950521528720856, "rewards/rejected": -0.20951959490776062, "step": 7002 }, { "epoch": 4.843015214384509, "grad_norm": 5.708413124084473, "learning_rate": 2.864991547564162e-05, "log_odds_chosen": 9.564705848693848, "log_odds_ratio": -0.000253094854997471, "logits/chosen": -0.4983338713645935, "logits/rejected": -0.4991118907928467, "logps/chosen": -0.00042362918611615896, "logps/rejected": -1.703924298286438, "loss": 1.1459, "nll_loss": 0.2864604890346527, "rewards/accuracies": 1.0, "rewards/chosen": -4.2362917156424373e-05, "rewards/margins": 0.1703500598669052, "rewards/rejected": -0.17039242386817932, "step": 7003 }, { "epoch": 4.8437067773167355, "grad_norm": 6.260480880737305, "learning_rate": 2.864607345935147e-05, "log_odds_chosen": 9.71456527709961, "log_odds_ratio": -0.00022481786436401308, "logits/chosen": -0.38238197565078735, "logits/rejected": -0.4862619638442993, "logps/chosen": -0.006300437729805708, "logps/rejected": -2.351069450378418, "loss": 1.3251, "nll_loss": 0.3312584161758423, "rewards/accuracies": 1.0, "rewards/chosen": -0.0006300437962636352, "rewards/margins": 0.23447692394256592, "rewards/rejected": -0.235106959939003, "step": 7004 }, { "epoch": 4.844398340248962, "grad_norm": 10.423199653625488, "learning_rate": 2.8642231443061318e-05, "log_odds_chosen": 9.46685791015625, "log_odds_ratio": -0.0017505851574242115, "logits/chosen": -0.457302451133728, "logits/rejected": -0.4845985770225525, "logps/chosen": -0.0013079033233225346, "logps/rejected": -1.8090081214904785, "loss": 1.1001, "nll_loss": 0.2748434543609619, "rewards/accuracies": 1.0, "rewards/chosen": -0.0001307903294218704, "rewards/margins": 0.18077000975608826, "rewards/rejected": -0.18090081214904785, "step": 7005 }, { "epoch": 4.845089903181189, "grad_norm": 11.410719871520996, "learning_rate": 2.8638389426771174e-05, "log_odds_chosen": 10.684247970581055, "log_odds_ratio": -0.00015776841610204428, "logits/chosen": -1.0845293998718262, "logits/rejected": -1.1410024166107178, "logps/chosen": -0.0006103913183324039, "logps/rejected": -2.401841640472412, "loss": 1.2811, "nll_loss": 0.32025855779647827, "rewards/accuracies": 1.0, "rewards/chosen": -6.103913619881496e-05, "rewards/margins": 0.24012315273284912, "rewards/rejected": -0.24018418788909912, "step": 7006 }, { "epoch": 4.845781466113416, "grad_norm": 6.313530445098877, "learning_rate": 2.8634547410481023e-05, "log_odds_chosen": 10.980323791503906, "log_odds_ratio": -0.0005783824599348009, "logits/chosen": -0.41384977102279663, "logits/rejected": -0.47831642627716064, "logps/chosen": -0.00023087850422598422, "logps/rejected": -2.7741446495056152, "loss": 1.0061, "nll_loss": 0.25145581364631653, "rewards/accuracies": 1.0, "rewards/chosen": -2.308785042259842e-05, "rewards/margins": 0.27739137411117554, "rewards/rejected": -0.277414470911026, "step": 7007 }, { "epoch": 4.846473029045643, "grad_norm": 11.239285469055176, "learning_rate": 2.8630705394190872e-05, "log_odds_chosen": 11.080429077148438, "log_odds_ratio": -3.404112794669345e-05, "logits/chosen": -0.07994014769792557, "logits/rejected": -0.24010121822357178, "logps/chosen": -0.0002886131114792079, "logps/rejected": -2.016791582107544, "loss": 0.9119, "nll_loss": 0.2279655486345291, "rewards/accuracies": 1.0, "rewards/chosen": -2.8861313694505952e-05, "rewards/margins": 0.20165029168128967, "rewards/rejected": -0.20167915523052216, "step": 7008 }, { "epoch": 4.84716459197787, "grad_norm": 10.09489917755127, "learning_rate": 2.8626863377900725e-05, "log_odds_chosen": 10.397052764892578, "log_odds_ratio": -0.0014343769289553165, "logits/chosen": -0.4876552224159241, "logits/rejected": -0.5108673572540283, "logps/chosen": -0.0017708453815430403, "logps/rejected": -2.815434217453003, "loss": 1.2702, "nll_loss": 0.3173943758010864, "rewards/accuracies": 1.0, "rewards/chosen": -0.00017708452651277184, "rewards/margins": 0.28136634826660156, "rewards/rejected": -0.28154343366622925, "step": 7009 }, { "epoch": 4.8478561549100965, "grad_norm": 5.118479251861572, "learning_rate": 2.8623021361610574e-05, "log_odds_chosen": 10.387958526611328, "log_odds_ratio": -0.0003742785775102675, "logits/chosen": -0.7877905368804932, "logits/rejected": -0.8531047105789185, "logps/chosen": -0.004792619496583939, "logps/rejected": -2.6325814723968506, "loss": 0.7409, "nll_loss": 0.18518517911434174, "rewards/accuracies": 1.0, "rewards/chosen": -0.0004792619729414582, "rewards/margins": 0.2627788782119751, "rewards/rejected": -0.263258159160614, "step": 7010 }, { "epoch": 4.848547717842323, "grad_norm": 8.308259010314941, "learning_rate": 2.8619179345320423e-05, "log_odds_chosen": 10.528495788574219, "log_odds_ratio": -3.735262362170033e-05, "logits/chosen": -0.07867129892110825, "logits/rejected": -0.20108526945114136, "logps/chosen": -0.0003321646945551038, "logps/rejected": -2.288141965866089, "loss": 0.8417, "nll_loss": 0.2104172706604004, "rewards/accuracies": 1.0, "rewards/chosen": -3.32164709107019e-05, "rewards/margins": 0.22878098487854004, "rewards/rejected": -0.22881419956684113, "step": 7011 }, { "epoch": 4.84923928077455, "grad_norm": 6.285458564758301, "learning_rate": 2.861533732903028e-05, "log_odds_chosen": 9.053383827209473, "log_odds_ratio": -0.018818309530615807, "logits/chosen": -0.09861317276954651, "logits/rejected": -0.31000280380249023, "logps/chosen": -0.0163175780326128, "logps/rejected": -1.712219476699829, "loss": 1.0068, "nll_loss": 0.24982908368110657, "rewards/accuracies": 1.0, "rewards/chosen": -0.0016317577101290226, "rewards/margins": 0.16959017515182495, "rewards/rejected": -0.17122194170951843, "step": 7012 }, { "epoch": 4.849930843706777, "grad_norm": 17.221817016601562, "learning_rate": 2.8611495312740128e-05, "log_odds_chosen": 10.633796691894531, "log_odds_ratio": -4.5296914322534576e-05, "logits/chosen": -0.3142685890197754, "logits/rejected": -0.3037078380584717, "logps/chosen": -0.00020182921434752643, "logps/rejected": -1.8377907276153564, "loss": 0.9146, "nll_loss": 0.2286510318517685, "rewards/accuracies": 1.0, "rewards/chosen": -2.018292070715688e-05, "rewards/margins": 0.18375888466835022, "rewards/rejected": -0.1837790608406067, "step": 7013 }, { "epoch": 4.850622406639004, "grad_norm": 5.6909990310668945, "learning_rate": 2.8607653296449977e-05, "log_odds_chosen": 9.910855293273926, "log_odds_ratio": -0.0014734583673998713, "logits/chosen": -0.3158652186393738, "logits/rejected": -0.31831100583076477, "logps/chosen": -0.0014183268649503589, "logps/rejected": -2.021665096282959, "loss": 1.088, "nll_loss": 0.27186426520347595, "rewards/accuracies": 1.0, "rewards/chosen": -0.0001418326864950359, "rewards/margins": 0.2020246684551239, "rewards/rejected": -0.20216649770736694, "step": 7014 }, { "epoch": 4.851313969571231, "grad_norm": 10.531902313232422, "learning_rate": 2.8603811280159832e-05, "log_odds_chosen": 11.14271354675293, "log_odds_ratio": -3.343543357914314e-05, "logits/chosen": -0.63079434633255, "logits/rejected": -0.6695267558097839, "logps/chosen": -9.733759361552075e-05, "logps/rejected": -1.8637280464172363, "loss": 0.9089, "nll_loss": 0.22723162174224854, "rewards/accuracies": 1.0, "rewards/chosen": -9.733759725349955e-06, "rewards/margins": 0.1863630712032318, "rewards/rejected": -0.1863728165626526, "step": 7015 }, { "epoch": 4.8520055325034575, "grad_norm": 12.947765350341797, "learning_rate": 2.859996926386968e-05, "log_odds_chosen": 11.508096694946289, "log_odds_ratio": -2.6764759240904823e-05, "logits/chosen": -1.0222357511520386, "logits/rejected": -1.0842270851135254, "logps/chosen": -0.00017423175449948758, "logps/rejected": -2.8178083896636963, "loss": 1.558, "nll_loss": 0.3895052671432495, "rewards/accuracies": 1.0, "rewards/chosen": -1.7423175449948758e-05, "rewards/margins": 0.2817634344100952, "rewards/rejected": -0.281780868768692, "step": 7016 }, { "epoch": 4.852697095435684, "grad_norm": 20.07524871826172, "learning_rate": 2.859612724757953e-05, "log_odds_chosen": 11.379074096679688, "log_odds_ratio": -1.8707838535192423e-05, "logits/chosen": -0.13574762642383575, "logits/rejected": -0.19763877987861633, "logps/chosen": -0.0002729441621340811, "logps/rejected": -2.996054172515869, "loss": 1.7399, "nll_loss": 0.4349702298641205, "rewards/accuracies": 1.0, "rewards/chosen": -2.7294418032397516e-05, "rewards/margins": 0.29957813024520874, "rewards/rejected": -0.29960542917251587, "step": 7017 }, { "epoch": 4.853388658367911, "grad_norm": 7.500363826751709, "learning_rate": 2.8592285231289383e-05, "log_odds_chosen": 9.627655982971191, "log_odds_ratio": -0.0007912968285381794, "logits/chosen": -0.3740471601486206, "logits/rejected": -0.49173182249069214, "logps/chosen": -0.0012759091332554817, "logps/rejected": -2.257746696472168, "loss": 1.0627, "nll_loss": 0.2655911445617676, "rewards/accuracies": 1.0, "rewards/chosen": -0.00012759091623593122, "rewards/margins": 0.22564706206321716, "rewards/rejected": -0.22577466070652008, "step": 7018 }, { "epoch": 4.854080221300138, "grad_norm": 8.293935775756836, "learning_rate": 2.8588443214999232e-05, "log_odds_chosen": 10.88177490234375, "log_odds_ratio": -2.4730215955059975e-05, "logits/chosen": -0.7868159413337708, "logits/rejected": -0.8734317421913147, "logps/chosen": -0.00011132473446195945, "logps/rejected": -1.7708433866500854, "loss": 0.6616, "nll_loss": 0.16538910567760468, "rewards/accuracies": 1.0, "rewards/chosen": -1.1132473446195945e-05, "rewards/margins": 0.17707321047782898, "rewards/rejected": -0.17708435654640198, "step": 7019 }, { "epoch": 4.854771784232365, "grad_norm": 8.555705070495605, "learning_rate": 2.858460119870908e-05, "log_odds_chosen": 10.491169929504395, "log_odds_ratio": -5.717053500120528e-05, "logits/chosen": -0.24532398581504822, "logits/rejected": -0.32132548093795776, "logps/chosen": -0.0002730230917222798, "logps/rejected": -2.187023162841797, "loss": 0.7002, "nll_loss": 0.1750478297472, "rewards/accuracies": 1.0, "rewards/chosen": -2.7302312446408905e-05, "rewards/margins": 0.21867501735687256, "rewards/rejected": -0.2187023162841797, "step": 7020 }, { "epoch": 4.855463347164592, "grad_norm": 10.897348403930664, "learning_rate": 2.8580759182418937e-05, "log_odds_chosen": 10.51218032836914, "log_odds_ratio": -4.524239921011031e-05, "logits/chosen": -0.3870241045951843, "logits/rejected": -0.4795756936073303, "logps/chosen": -0.00019211246399208903, "logps/rejected": -1.650565505027771, "loss": 0.9654, "nll_loss": 0.24134519696235657, "rewards/accuracies": 1.0, "rewards/chosen": -1.9211245671613142e-05, "rewards/margins": 0.1650373339653015, "rewards/rejected": -0.16505655646324158, "step": 7021 }, { "epoch": 4.856154910096818, "grad_norm": 9.62659740447998, "learning_rate": 2.8576917166128786e-05, "log_odds_chosen": 10.597943305969238, "log_odds_ratio": -9.808303730096668e-05, "logits/chosen": -0.6156014204025269, "logits/rejected": -0.6655142903327942, "logps/chosen": -0.0015430478379130363, "logps/rejected": -2.7433271408081055, "loss": 1.0996, "nll_loss": 0.2748914062976837, "rewards/accuracies": 1.0, "rewards/chosen": -0.00015430479834321886, "rewards/margins": 0.2741783857345581, "rewards/rejected": -0.2743327021598816, "step": 7022 }, { "epoch": 4.856846473029045, "grad_norm": 16.081926345825195, "learning_rate": 2.8573075149838635e-05, "log_odds_chosen": 10.689600944519043, "log_odds_ratio": -0.0007699825218878686, "logits/chosen": -0.5941533446311951, "logits/rejected": -0.5842257738113403, "logps/chosen": -0.0030286216642707586, "logps/rejected": -2.574476718902588, "loss": 0.8608, "nll_loss": 0.21511797606945038, "rewards/accuracies": 1.0, "rewards/chosen": -0.00030286217224784195, "rewards/margins": 0.2571448087692261, "rewards/rejected": -0.25744765996932983, "step": 7023 }, { "epoch": 4.857538035961272, "grad_norm": 9.591567993164062, "learning_rate": 2.856923313354849e-05, "log_odds_chosen": 11.012365341186523, "log_odds_ratio": -3.3873315260279924e-05, "logits/chosen": -0.5489360094070435, "logits/rejected": -0.6033146977424622, "logps/chosen": -0.00011565893510123715, "logps/rejected": -1.677449107170105, "loss": 0.6449, "nll_loss": 0.16123202443122864, "rewards/accuracies": 1.0, "rewards/chosen": -1.1565893146325834e-05, "rewards/margins": 0.1677333414554596, "rewards/rejected": -0.16774490475654602, "step": 7024 }, { "epoch": 4.858229598893499, "grad_norm": 22.097078323364258, "learning_rate": 2.856539111725834e-05, "log_odds_chosen": 8.750368118286133, "log_odds_ratio": -0.19398075342178345, "logits/chosen": -0.41244447231292725, "logits/rejected": -0.40756645798683167, "logps/chosen": -0.022159744054079056, "logps/rejected": -1.5988523960113525, "loss": 0.9419, "nll_loss": 0.21607756614685059, "rewards/accuracies": 0.875, "rewards/chosen": -0.002215974498540163, "rewards/margins": 0.1576692759990692, "rewards/rejected": -0.1598852425813675, "step": 7025 }, { "epoch": 4.858921161825726, "grad_norm": 8.726022720336914, "learning_rate": 2.856154910096819e-05, "log_odds_chosen": 9.916749954223633, "log_odds_ratio": -0.00024162212503142655, "logits/chosen": -0.3288005590438843, "logits/rejected": -0.5277249813079834, "logps/chosen": -0.0008131344802677631, "logps/rejected": -1.8410141468048096, "loss": 1.2813, "nll_loss": 0.3203001022338867, "rewards/accuracies": 1.0, "rewards/chosen": -8.13134538475424e-05, "rewards/margins": 0.18402010202407837, "rewards/rejected": -0.1841014176607132, "step": 7026 }, { "epoch": 4.8596127247579535, "grad_norm": 5.5701704025268555, "learning_rate": 2.855770708467804e-05, "log_odds_chosen": 10.251022338867188, "log_odds_ratio": -0.00010756327537819743, "logits/chosen": -0.4105292558670044, "logits/rejected": -0.47585058212280273, "logps/chosen": -0.0001659254776313901, "logps/rejected": -1.7069003582000732, "loss": 0.636, "nll_loss": 0.1589965969324112, "rewards/accuracies": 1.0, "rewards/chosen": -1.659254849073477e-05, "rewards/margins": 0.1706734299659729, "rewards/rejected": -0.17069002985954285, "step": 7027 }, { "epoch": 4.86030428769018, "grad_norm": 8.971461296081543, "learning_rate": 2.855386506838789e-05, "log_odds_chosen": 11.052430152893066, "log_odds_ratio": -3.381207716302015e-05, "logits/chosen": -0.43431317806243896, "logits/rejected": -0.46402910351753235, "logps/chosen": -0.00028077964088879526, "logps/rejected": -2.6134989261627197, "loss": 0.7517, "nll_loss": 0.18791866302490234, "rewards/accuracies": 1.0, "rewards/chosen": -2.807796590786893e-05, "rewards/margins": 0.2613218128681183, "rewards/rejected": -0.2613498866558075, "step": 7028 }, { "epoch": 4.860995850622407, "grad_norm": 8.809112548828125, "learning_rate": 2.855002305209774e-05, "log_odds_chosen": 8.960859298706055, "log_odds_ratio": -0.0003623150405474007, "logits/chosen": -0.8475053310394287, "logits/rejected": -0.8835227489471436, "logps/chosen": -0.0005206743371672928, "logps/rejected": -1.0224153995513916, "loss": 1.1576, "nll_loss": 0.2893637418746948, "rewards/accuracies": 1.0, "rewards/chosen": -5.2067429351154715e-05, "rewards/margins": 0.10218948125839233, "rewards/rejected": -0.10224154591560364, "step": 7029 }, { "epoch": 4.861687413554634, "grad_norm": 8.545720100402832, "learning_rate": 2.8546181035807595e-05, "log_odds_chosen": 9.107865333557129, "log_odds_ratio": -0.004004120826721191, "logits/chosen": -0.6395995616912842, "logits/rejected": -0.6510448455810547, "logps/chosen": -0.010273730382323265, "logps/rejected": -2.113751173019409, "loss": 1.369, "nll_loss": 0.34184643626213074, "rewards/accuracies": 1.0, "rewards/chosen": -0.0010273730149492621, "rewards/margins": 0.2103477418422699, "rewards/rejected": -0.2113751322031021, "step": 7030 }, { "epoch": 4.862378976486861, "grad_norm": 9.708702087402344, "learning_rate": 2.8542339019517444e-05, "log_odds_chosen": 10.770926475524902, "log_odds_ratio": -3.156045568175614e-05, "logits/chosen": -0.30541956424713135, "logits/rejected": -0.33389005064964294, "logps/chosen": -0.0002467721060384065, "logps/rejected": -2.4047722816467285, "loss": 1.1034, "nll_loss": 0.27585557103157043, "rewards/accuracies": 1.0, "rewards/chosen": -2.467721060384065e-05, "rewards/margins": 0.24045255780220032, "rewards/rejected": -0.24047723412513733, "step": 7031 }, { "epoch": 4.863070539419088, "grad_norm": 5.549326419830322, "learning_rate": 2.8538497003227294e-05, "log_odds_chosen": 10.177644729614258, "log_odds_ratio": -7.75428197812289e-05, "logits/chosen": -0.5677988529205322, "logits/rejected": -0.5510009527206421, "logps/chosen": -0.000404900754801929, "logps/rejected": -1.6147857904434204, "loss": 1.0492, "nll_loss": 0.26228851079940796, "rewards/accuracies": 1.0, "rewards/chosen": -4.049007475259714e-05, "rewards/margins": 0.16143809258937836, "rewards/rejected": -0.16147857904434204, "step": 7032 }, { "epoch": 4.8637621023513145, "grad_norm": 7.651041030883789, "learning_rate": 2.853465498693715e-05, "log_odds_chosen": 9.282649993896484, "log_odds_ratio": -0.0006824180600233376, "logits/chosen": -0.8561673164367676, "logits/rejected": -0.893168032169342, "logps/chosen": -0.007127598859369755, "logps/rejected": -1.5555087327957153, "loss": 1.0692, "nll_loss": 0.2672296166419983, "rewards/accuracies": 1.0, "rewards/chosen": -0.0007127599674277008, "rewards/margins": 0.15483811497688293, "rewards/rejected": -0.15555086731910706, "step": 7033 }, { "epoch": 4.864453665283541, "grad_norm": 13.443490982055664, "learning_rate": 2.8530812970647e-05, "log_odds_chosen": 7.656833648681641, "log_odds_ratio": -0.35919690132141113, "logits/chosen": -0.3445935845375061, "logits/rejected": -0.3648972809314728, "logps/chosen": -0.051969386637210846, "logps/rejected": -2.030459403991699, "loss": 1.4184, "nll_loss": 0.31868651509284973, "rewards/accuracies": 0.875, "rewards/chosen": -0.005196938756853342, "rewards/margins": 0.19784902036190033, "rewards/rejected": -0.20304596424102783, "step": 7034 }, { "epoch": 4.865145228215768, "grad_norm": 7.0616278648376465, "learning_rate": 2.8526970954356847e-05, "log_odds_chosen": 9.736413955688477, "log_odds_ratio": -0.00019674711802508682, "logits/chosen": -0.4405166804790497, "logits/rejected": -0.5529405474662781, "logps/chosen": -0.0004194923967588693, "logps/rejected": -1.604013204574585, "loss": 1.2811, "nll_loss": 0.32026198506355286, "rewards/accuracies": 1.0, "rewards/chosen": -4.194923531031236e-05, "rewards/margins": 0.16035938262939453, "rewards/rejected": -0.16040131449699402, "step": 7035 }, { "epoch": 4.865836791147995, "grad_norm": 6.439877986907959, "learning_rate": 2.85231289380667e-05, "log_odds_chosen": 9.398344039916992, "log_odds_ratio": -0.0017098677344620228, "logits/chosen": -0.5163373351097107, "logits/rejected": -0.5247904062271118, "logps/chosen": -0.001775981392711401, "logps/rejected": -1.9271403551101685, "loss": 0.9267, "nll_loss": 0.23149679601192474, "rewards/accuracies": 1.0, "rewards/chosen": -0.00017759812180884182, "rewards/margins": 0.19253644347190857, "rewards/rejected": -0.19271403551101685, "step": 7036 }, { "epoch": 4.866528354080222, "grad_norm": 16.124523162841797, "learning_rate": 2.851928692177655e-05, "log_odds_chosen": 10.64400863647461, "log_odds_ratio": -6.0084301367169246e-05, "logits/chosen": -0.8334964513778687, "logits/rejected": -0.8010177612304688, "logps/chosen": -0.0005602404708042741, "logps/rejected": -2.1358814239501953, "loss": 0.9616, "nll_loss": 0.2403859794139862, "rewards/accuracies": 1.0, "rewards/chosen": -5.602405144600198e-05, "rewards/margins": 0.21353211998939514, "rewards/rejected": -0.213588148355484, "step": 7037 }, { "epoch": 4.867219917012449, "grad_norm": 6.622456073760986, "learning_rate": 2.8515444905486398e-05, "log_odds_chosen": 10.069990158081055, "log_odds_ratio": -0.0001646141754463315, "logits/chosen": -0.4626843333244324, "logits/rejected": -0.5455853343009949, "logps/chosen": -0.004076323471963406, "logps/rejected": -2.6835875511169434, "loss": 1.5953, "nll_loss": 0.3988024592399597, "rewards/accuracies": 1.0, "rewards/chosen": -0.00040763235301710665, "rewards/margins": 0.2679511308670044, "rewards/rejected": -0.2683587670326233, "step": 7038 }, { "epoch": 4.867911479944675, "grad_norm": 6.874902725219727, "learning_rate": 2.8511602889196254e-05, "log_odds_chosen": 10.539509773254395, "log_odds_ratio": -4.3701493268599734e-05, "logits/chosen": -0.6427075862884521, "logits/rejected": -0.727043092250824, "logps/chosen": -0.00018337485380470753, "logps/rejected": -1.7754486799240112, "loss": 0.85, "nll_loss": 0.21248428523540497, "rewards/accuracies": 1.0, "rewards/chosen": -1.8337485016672872e-05, "rewards/margins": 0.1775265336036682, "rewards/rejected": -0.17754487693309784, "step": 7039 }, { "epoch": 4.868603042876902, "grad_norm": 5.979666233062744, "learning_rate": 2.8507760872906103e-05, "log_odds_chosen": 10.752289772033691, "log_odds_ratio": -6.954609852982685e-05, "logits/chosen": -0.3465648889541626, "logits/rejected": -0.4407429099082947, "logps/chosen": -0.000171839288668707, "logps/rejected": -1.8220527172088623, "loss": 0.9089, "nll_loss": 0.2272091805934906, "rewards/accuracies": 1.0, "rewards/chosen": -1.7183931049657986e-05, "rewards/margins": 0.18218809366226196, "rewards/rejected": -0.18220525979995728, "step": 7040 }, { "epoch": 4.869294605809129, "grad_norm": 3.938154935836792, "learning_rate": 2.8503918856615952e-05, "log_odds_chosen": 9.546857833862305, "log_odds_ratio": -0.0006905286572873592, "logits/chosen": -0.13503634929656982, "logits/rejected": -0.14862604439258575, "logps/chosen": -0.0007053675362840295, "logps/rejected": -1.8098433017730713, "loss": 0.9726, "nll_loss": 0.24308504164218903, "rewards/accuracies": 1.0, "rewards/chosen": -7.053676381474361e-05, "rewards/margins": 0.1809138059616089, "rewards/rejected": -0.18098433315753937, "step": 7041 }, { "epoch": 4.869986168741356, "grad_norm": 8.758733749389648, "learning_rate": 2.8500076840325808e-05, "log_odds_chosen": 9.40017318725586, "log_odds_ratio": -0.00036749555147252977, "logits/chosen": -0.7903873920440674, "logits/rejected": -0.8652528524398804, "logps/chosen": -0.007027729880064726, "logps/rejected": -2.663066864013672, "loss": 1.3046, "nll_loss": 0.32612505555152893, "rewards/accuracies": 1.0, "rewards/chosen": -0.0007027729880064726, "rewards/margins": 0.2656038999557495, "rewards/rejected": -0.26630666851997375, "step": 7042 }, { "epoch": 4.870677731673583, "grad_norm": 12.399148941040039, "learning_rate": 2.8496234824035657e-05, "log_odds_chosen": 9.200675010681152, "log_odds_ratio": -0.0005780202336609364, "logits/chosen": -0.45060375332832336, "logits/rejected": -0.4034438133239746, "logps/chosen": -0.003487096168100834, "logps/rejected": -1.8712232112884521, "loss": 1.0474, "nll_loss": 0.26178497076034546, "rewards/accuracies": 1.0, "rewards/chosen": -0.0003487096109893173, "rewards/margins": 0.18677359819412231, "rewards/rejected": -0.18712231516838074, "step": 7043 }, { "epoch": 4.87136929460581, "grad_norm": 17.244375228881836, "learning_rate": 2.8492392807745506e-05, "log_odds_chosen": 10.586780548095703, "log_odds_ratio": -0.0002845847629942, "logits/chosen": -0.7591636776924133, "logits/rejected": -0.7484526634216309, "logps/chosen": -0.00022362990421243012, "logps/rejected": -2.1559767723083496, "loss": 1.7396, "nll_loss": 0.4348672032356262, "rewards/accuracies": 1.0, "rewards/chosen": -2.2362992240232415e-05, "rewards/margins": 0.21557533740997314, "rewards/rejected": -0.2155977040529251, "step": 7044 }, { "epoch": 4.872060857538036, "grad_norm": 8.071002960205078, "learning_rate": 2.848855079145536e-05, "log_odds_chosen": 9.05749225616455, "log_odds_ratio": -0.11127299070358276, "logits/chosen": -0.3790587782859802, "logits/rejected": -0.41623783111572266, "logps/chosen": -0.021599093452095985, "logps/rejected": -2.0281240940093994, "loss": 1.1519, "nll_loss": 0.27685868740081787, "rewards/accuracies": 0.875, "rewards/chosen": -0.0021599093452095985, "rewards/margins": 0.20065252482891083, "rewards/rejected": -0.20281243324279785, "step": 7045 }, { "epoch": 4.872752420470263, "grad_norm": 6.992929935455322, "learning_rate": 2.8484708775165207e-05, "log_odds_chosen": 11.098962783813477, "log_odds_ratio": -5.613052780972794e-05, "logits/chosen": -0.5032197833061218, "logits/rejected": -0.4811609387397766, "logps/chosen": -0.00013649038737639785, "logps/rejected": -2.3080101013183594, "loss": 0.7479, "nll_loss": 0.1869587004184723, "rewards/accuracies": 1.0, "rewards/chosen": -1.3649039829033427e-05, "rewards/margins": 0.23078739643096924, "rewards/rejected": -0.2308010309934616, "step": 7046 }, { "epoch": 4.87344398340249, "grad_norm": 9.915785789489746, "learning_rate": 2.8480866758875056e-05, "log_odds_chosen": 9.496391296386719, "log_odds_ratio": -0.02560707926750183, "logits/chosen": -0.6899237036705017, "logits/rejected": -0.7254764437675476, "logps/chosen": -0.011045449413359165, "logps/rejected": -1.974931240081787, "loss": 1.4553, "nll_loss": 0.3612610399723053, "rewards/accuracies": 1.0, "rewards/chosen": -0.0011045449646189809, "rewards/margins": 0.19638857245445251, "rewards/rejected": -0.19749312102794647, "step": 7047 }, { "epoch": 4.874135546334717, "grad_norm": 7.999567031860352, "learning_rate": 2.8477024742584912e-05, "log_odds_chosen": 9.674551963806152, "log_odds_ratio": -0.0001579619711264968, "logits/chosen": -0.13578563928604126, "logits/rejected": -0.24292123317718506, "logps/chosen": -0.0016413903795182705, "logps/rejected": -2.1390011310577393, "loss": 0.7133, "nll_loss": 0.1783016175031662, "rewards/accuracies": 1.0, "rewards/chosen": -0.00016413902631029487, "rewards/margins": 0.21373598277568817, "rewards/rejected": -0.2139001190662384, "step": 7048 }, { "epoch": 4.874827109266944, "grad_norm": 11.47680950164795, "learning_rate": 2.847318272629476e-05, "log_odds_chosen": 9.738288879394531, "log_odds_ratio": -0.005372277460992336, "logits/chosen": -0.3812277615070343, "logits/rejected": -0.4588232636451721, "logps/chosen": -0.0028886208310723305, "logps/rejected": -2.0673182010650635, "loss": 0.9856, "nll_loss": 0.24587345123291016, "rewards/accuracies": 1.0, "rewards/chosen": -0.0002888621238525957, "rewards/margins": 0.2064429521560669, "rewards/rejected": -0.20673182606697083, "step": 7049 }, { "epoch": 4.875518672199171, "grad_norm": 7.6114935874938965, "learning_rate": 2.846934071000461e-05, "log_odds_chosen": 9.799309730529785, "log_odds_ratio": -9.547019726596773e-05, "logits/chosen": -0.5523931384086609, "logits/rejected": -0.5366276502609253, "logps/chosen": -0.00037644183612428606, "logps/rejected": -1.6992120742797852, "loss": 1.224, "nll_loss": 0.30597805976867676, "rewards/accuracies": 1.0, "rewards/chosen": -3.764418579521589e-05, "rewards/margins": 0.1698835790157318, "rewards/rejected": -0.16992120444774628, "step": 7050 }, { "epoch": 4.876210235131397, "grad_norm": 9.410329818725586, "learning_rate": 2.8465498693714466e-05, "log_odds_chosen": 7.662788391113281, "log_odds_ratio": -0.014153570868074894, "logits/chosen": -0.4911881387233734, "logits/rejected": -0.4591422975063324, "logps/chosen": -0.07480232417583466, "logps/rejected": -1.362932801246643, "loss": 0.779, "nll_loss": 0.19333398342132568, "rewards/accuracies": 1.0, "rewards/chosen": -0.007480232045054436, "rewards/margins": 0.12881305813789368, "rewards/rejected": -0.13629327714443207, "step": 7051 }, { "epoch": 4.876901798063624, "grad_norm": 11.054753303527832, "learning_rate": 2.8461656677424315e-05, "log_odds_chosen": 10.068439483642578, "log_odds_ratio": -9.35841744649224e-05, "logits/chosen": -0.5366771817207336, "logits/rejected": -0.556289792060852, "logps/chosen": -0.0003213782620150596, "logps/rejected": -1.6700217723846436, "loss": 1.0708, "nll_loss": 0.2676818370819092, "rewards/accuracies": 1.0, "rewards/chosen": -3.2137824746314436e-05, "rewards/margins": 0.16697004437446594, "rewards/rejected": -0.16700220108032227, "step": 7052 }, { "epoch": 4.877593360995851, "grad_norm": 23.047603607177734, "learning_rate": 2.8457814661134164e-05, "log_odds_chosen": 10.605345726013184, "log_odds_ratio": -4.5849927118979394e-05, "logits/chosen": -0.6093308925628662, "logits/rejected": -0.5740828514099121, "logps/chosen": -0.0003509022935759276, "logps/rejected": -2.091120958328247, "loss": 0.9871, "nll_loss": 0.2467714101076126, "rewards/accuracies": 1.0, "rewards/chosen": -3.509023008518852e-05, "rewards/margins": 0.20907700061798096, "rewards/rejected": -0.20911210775375366, "step": 7053 }, { "epoch": 4.878284923928078, "grad_norm": 6.905877590179443, "learning_rate": 2.8453972644844017e-05, "log_odds_chosen": 9.971370697021484, "log_odds_ratio": -0.0015021146973595023, "logits/chosen": -0.6287491321563721, "logits/rejected": -0.7699867486953735, "logps/chosen": -0.002430099993944168, "logps/rejected": -2.4299771785736084, "loss": 0.7239, "nll_loss": 0.18082213401794434, "rewards/accuracies": 1.0, "rewards/chosen": -0.00024300998484250158, "rewards/margins": 0.24275469779968262, "rewards/rejected": -0.24299770593643188, "step": 7054 }, { "epoch": 4.878976486860305, "grad_norm": 16.925321578979492, "learning_rate": 2.8450130628553866e-05, "log_odds_chosen": 9.296889305114746, "log_odds_ratio": -0.004414747469127178, "logits/chosen": -0.6305980086326599, "logits/rejected": -0.6567308306694031, "logps/chosen": -0.003485491033643484, "logps/rejected": -2.01444673538208, "loss": 1.4225, "nll_loss": 0.3551926016807556, "rewards/accuracies": 1.0, "rewards/chosen": -0.0003485491033643484, "rewards/margins": 0.20109611749649048, "rewards/rejected": -0.20144465565681458, "step": 7055 }, { "epoch": 4.8796680497925315, "grad_norm": 7.701079368591309, "learning_rate": 2.8446288612263715e-05, "log_odds_chosen": 10.75440788269043, "log_odds_ratio": -3.690827725222334e-05, "logits/chosen": -0.5414289236068726, "logits/rejected": -0.6197251677513123, "logps/chosen": -0.0001932993473019451, "logps/rejected": -2.0484259128570557, "loss": 0.8056, "nll_loss": 0.20138521492481232, "rewards/accuracies": 1.0, "rewards/chosen": -1.9329936549183913e-05, "rewards/margins": 0.20482327044010162, "rewards/rejected": -0.20484259724617004, "step": 7056 }, { "epoch": 4.880359612724758, "grad_norm": 7.884543418884277, "learning_rate": 2.844244659597357e-05, "log_odds_chosen": 10.570260047912598, "log_odds_ratio": -4.935210745315999e-05, "logits/chosen": -0.2323668897151947, "logits/rejected": -0.3230237662792206, "logps/chosen": -0.00016158647486008704, "logps/rejected": -1.9191505908966064, "loss": 0.8309, "nll_loss": 0.207707941532135, "rewards/accuracies": 1.0, "rewards/chosen": -1.6158646758412942e-05, "rewards/margins": 0.191898912191391, "rewards/rejected": -0.19191506505012512, "step": 7057 }, { "epoch": 4.881051175656985, "grad_norm": 13.345209121704102, "learning_rate": 2.843860457968342e-05, "log_odds_chosen": 10.143343925476074, "log_odds_ratio": -8.016329229576513e-05, "logits/chosen": -0.8679466247558594, "logits/rejected": -0.9242144227027893, "logps/chosen": -0.0001684374874457717, "logps/rejected": -1.3030248880386353, "loss": 1.1419, "nll_loss": 0.2854706943035126, "rewards/accuracies": 1.0, "rewards/chosen": -1.684374910837505e-05, "rewards/margins": 0.13028565049171448, "rewards/rejected": -0.13030248880386353, "step": 7058 }, { "epoch": 4.881742738589212, "grad_norm": 9.145713806152344, "learning_rate": 2.843476256339327e-05, "log_odds_chosen": 10.114141464233398, "log_odds_ratio": -0.0005261494661681354, "logits/chosen": -0.6389458775520325, "logits/rejected": -0.6943326592445374, "logps/chosen": -0.000395385519368574, "logps/rejected": -1.9835808277130127, "loss": 0.7446, "nll_loss": 0.18608568608760834, "rewards/accuracies": 1.0, "rewards/chosen": -3.9538554119644687e-05, "rewards/margins": 0.19831854104995728, "rewards/rejected": -0.19835807383060455, "step": 7059 }, { "epoch": 4.882434301521439, "grad_norm": 9.99998950958252, "learning_rate": 2.8430920547103125e-05, "log_odds_chosen": 10.815114974975586, "log_odds_ratio": -0.0009209688869304955, "logits/chosen": -0.2506277561187744, "logits/rejected": -0.2800312638282776, "logps/chosen": -0.000968419888522476, "logps/rejected": -2.3028392791748047, "loss": 1.0574, "nll_loss": 0.2642573118209839, "rewards/accuracies": 1.0, "rewards/chosen": -9.684199176263064e-05, "rewards/margins": 0.2301870882511139, "rewards/rejected": -0.2302839159965515, "step": 7060 }, { "epoch": 4.883125864453666, "grad_norm": 8.69649887084961, "learning_rate": 2.8427078530812974e-05, "log_odds_chosen": 10.826623916625977, "log_odds_ratio": -0.00020552946079988033, "logits/chosen": -0.5005373358726501, "logits/rejected": -0.5175811052322388, "logps/chosen": -0.00024266143736895174, "logps/rejected": -2.551815986633301, "loss": 0.9927, "nll_loss": 0.24816061556339264, "rewards/accuracies": 1.0, "rewards/chosen": -2.426614264550153e-05, "rewards/margins": 0.25515735149383545, "rewards/rejected": -0.25518161058425903, "step": 7061 }, { "epoch": 4.8838174273858925, "grad_norm": 6.960662841796875, "learning_rate": 2.8423236514522823e-05, "log_odds_chosen": 10.739348411560059, "log_odds_ratio": -0.00014458272198680788, "logits/chosen": -0.3545234203338623, "logits/rejected": -0.43811148405075073, "logps/chosen": -0.0006923141772858799, "logps/rejected": -2.5062310695648193, "loss": 0.7574, "nll_loss": 0.18933825194835663, "rewards/accuracies": 1.0, "rewards/chosen": -6.923142063897103e-05, "rewards/margins": 0.25055384635925293, "rewards/rejected": -0.25062310695648193, "step": 7062 }, { "epoch": 4.884508990318119, "grad_norm": 7.970198631286621, "learning_rate": 2.8419394498232675e-05, "log_odds_chosen": 10.426850318908691, "log_odds_ratio": -6.352874333970249e-05, "logits/chosen": -0.18800753355026245, "logits/rejected": -0.37803998589515686, "logps/chosen": -0.0001434234291082248, "logps/rejected": -1.4915777444839478, "loss": 0.55, "nll_loss": 0.1375032365322113, "rewards/accuracies": 1.0, "rewards/chosen": -1.4342344002216123e-05, "rewards/margins": 0.14914342761039734, "rewards/rejected": -0.14915776252746582, "step": 7063 }, { "epoch": 4.885200553250346, "grad_norm": 10.392330169677734, "learning_rate": 2.8415552481942524e-05, "log_odds_chosen": 10.369099617004395, "log_odds_ratio": -0.00018756282224785537, "logits/chosen": -0.5978999733924866, "logits/rejected": -0.6408947706222534, "logps/chosen": -0.00031749578192830086, "logps/rejected": -2.243783473968506, "loss": 0.7938, "nll_loss": 0.1984332650899887, "rewards/accuracies": 1.0, "rewards/chosen": -3.174957964802161e-05, "rewards/margins": 0.2243466079235077, "rewards/rejected": -0.22437834739685059, "step": 7064 }, { "epoch": 4.885892116182573, "grad_norm": 8.495529174804688, "learning_rate": 2.8411710465652373e-05, "log_odds_chosen": 10.161759376525879, "log_odds_ratio": -9.85856240731664e-05, "logits/chosen": -0.2775004804134369, "logits/rejected": -0.3813784718513489, "logps/chosen": -0.00040236441418528557, "logps/rejected": -1.987892508506775, "loss": 0.7487, "nll_loss": 0.18717548251152039, "rewards/accuracies": 1.0, "rewards/chosen": -4.0236445784103125e-05, "rewards/margins": 0.19874900579452515, "rewards/rejected": -0.19878923892974854, "step": 7065 }, { "epoch": 4.8865836791148, "grad_norm": 7.843557357788086, "learning_rate": 2.840786844936223e-05, "log_odds_chosen": 9.858935356140137, "log_odds_ratio": -0.0006947257206775248, "logits/chosen": -0.4358893930912018, "logits/rejected": -0.47982048988342285, "logps/chosen": -0.001503008883446455, "logps/rejected": -2.213164806365967, "loss": 1.4627, "nll_loss": 0.36561495065689087, "rewards/accuracies": 1.0, "rewards/chosen": -0.00015030089707579464, "rewards/margins": 0.22116619348526, "rewards/rejected": -0.22131648659706116, "step": 7066 }, { "epoch": 4.887275242047027, "grad_norm": 8.010547637939453, "learning_rate": 2.8404026433072078e-05, "log_odds_chosen": 9.644980430603027, "log_odds_ratio": -0.0008684393833391368, "logits/chosen": -0.25602462887763977, "logits/rejected": -0.3479723632335663, "logps/chosen": -0.053071144968271255, "logps/rejected": -2.9593915939331055, "loss": 1.121, "nll_loss": 0.2801644206047058, "rewards/accuracies": 1.0, "rewards/chosen": -0.0053071146830916405, "rewards/margins": 0.29063206911087036, "rewards/rejected": -0.295939177274704, "step": 7067 }, { "epoch": 4.8879668049792535, "grad_norm": 7.515931606292725, "learning_rate": 2.8400184416781927e-05, "log_odds_chosen": 9.98626708984375, "log_odds_ratio": -8.94946715561673e-05, "logits/chosen": -0.7496136426925659, "logits/rejected": -0.7395554780960083, "logps/chosen": -0.0012114938581362367, "logps/rejected": -2.4176342487335205, "loss": 0.9002, "nll_loss": 0.22504302859306335, "rewards/accuracies": 1.0, "rewards/chosen": -0.0001211493945447728, "rewards/margins": 0.24164226651191711, "rewards/rejected": -0.2417634129524231, "step": 7068 }, { "epoch": 4.88865836791148, "grad_norm": 7.007376670837402, "learning_rate": 2.8396342400491783e-05, "log_odds_chosen": 10.143765449523926, "log_odds_ratio": -7.062454096740112e-05, "logits/chosen": -0.6521151065826416, "logits/rejected": -0.6742969751358032, "logps/chosen": -0.0001779589947545901, "logps/rejected": -1.59269380569458, "loss": 0.5192, "nll_loss": 0.12978312373161316, "rewards/accuracies": 1.0, "rewards/chosen": -1.7795900930650532e-05, "rewards/margins": 0.15925158560276031, "rewards/rejected": -0.15926937758922577, "step": 7069 }, { "epoch": 4.889349930843707, "grad_norm": 7.883659362792969, "learning_rate": 2.8392500384201632e-05, "log_odds_chosen": 9.163580894470215, "log_odds_ratio": -0.0011066696606576443, "logits/chosen": -0.008930400013923645, "logits/rejected": -0.0676572397351265, "logps/chosen": -0.0009055061964318156, "logps/rejected": -1.5239794254302979, "loss": 1.0772, "nll_loss": 0.2691981792449951, "rewards/accuracies": 1.0, "rewards/chosen": -9.055061673279852e-05, "rewards/margins": 0.1523074060678482, "rewards/rejected": -0.15239794552326202, "step": 7070 }, { "epoch": 4.890041493775934, "grad_norm": 9.622332572937012, "learning_rate": 2.838865836791148e-05, "log_odds_chosen": 10.254562377929688, "log_odds_ratio": -0.0001300430449191481, "logits/chosen": -0.43594586849212646, "logits/rejected": -0.46360427141189575, "logps/chosen": -0.0010361828608438373, "logps/rejected": -2.300558567047119, "loss": 0.848, "nll_loss": 0.2119934856891632, "rewards/accuracies": 1.0, "rewards/chosen": -0.00010361829481553286, "rewards/margins": 0.22995226085186005, "rewards/rejected": -0.23005586862564087, "step": 7071 }, { "epoch": 4.890733056708161, "grad_norm": 6.660597324371338, "learning_rate": 2.838481635162133e-05, "log_odds_chosen": 9.821388244628906, "log_odds_ratio": -0.0002597160346340388, "logits/chosen": -0.09285390377044678, "logits/rejected": -0.23921740055084229, "logps/chosen": -0.0010828624945133924, "logps/rejected": -2.154656171798706, "loss": 0.6899, "nll_loss": 0.17244890332221985, "rewards/accuracies": 1.0, "rewards/chosen": -0.00010828624363057315, "rewards/margins": 0.21535731852054596, "rewards/rejected": -0.21546560525894165, "step": 7072 }, { "epoch": 4.891424619640388, "grad_norm": 10.330202102661133, "learning_rate": 2.8380974335331183e-05, "log_odds_chosen": 11.040985107421875, "log_odds_ratio": -2.5466662918915972e-05, "logits/chosen": -0.39895886182785034, "logits/rejected": -0.3868694007396698, "logps/chosen": -0.00016861945914570242, "logps/rejected": -2.319244146347046, "loss": 1.0313, "nll_loss": 0.2578234076499939, "rewards/accuracies": 1.0, "rewards/chosen": -1.6861946278368123e-05, "rewards/margins": 0.23190754652023315, "rewards/rejected": -0.2319244146347046, "step": 7073 }, { "epoch": 4.8921161825726145, "grad_norm": 10.073043823242188, "learning_rate": 2.8377132319041032e-05, "log_odds_chosen": 9.917614936828613, "log_odds_ratio": -0.06503642350435257, "logits/chosen": -0.6255109906196594, "logits/rejected": -0.6862890720367432, "logps/chosen": -0.019769448786973953, "logps/rejected": -2.653496503829956, "loss": 1.3958, "nll_loss": 0.34243640303611755, "rewards/accuracies": 1.0, "rewards/chosen": -0.001976944738999009, "rewards/margins": 0.2633727192878723, "rewards/rejected": -0.2653496563434601, "step": 7074 }, { "epoch": 4.892807745504841, "grad_norm": 9.930667877197266, "learning_rate": 2.837329030275088e-05, "log_odds_chosen": 10.876503944396973, "log_odds_ratio": -2.9976836231071502e-05, "logits/chosen": -0.6453677415847778, "logits/rejected": -0.6829240322113037, "logps/chosen": -8.378988422919065e-05, "logps/rejected": -1.5633125305175781, "loss": 0.7998, "nll_loss": 0.19993676245212555, "rewards/accuracies": 1.0, "rewards/chosen": -8.378989150514826e-06, "rewards/margins": 0.15632286667823792, "rewards/rejected": -0.15633124113082886, "step": 7075 }, { "epoch": 4.893499308437068, "grad_norm": 8.007487297058105, "learning_rate": 2.8369448286460737e-05, "log_odds_chosen": 10.561995506286621, "log_odds_ratio": -0.0045930189080536366, "logits/chosen": -0.25250446796417236, "logits/rejected": -0.4099150002002716, "logps/chosen": -0.002251701895147562, "logps/rejected": -2.5110809803009033, "loss": 0.8776, "nll_loss": 0.21893686056137085, "rewards/accuracies": 1.0, "rewards/chosen": -0.00022517020988743752, "rewards/margins": 0.25088292360305786, "rewards/rejected": -0.2511081099510193, "step": 7076 }, { "epoch": 4.894190871369295, "grad_norm": 7.112217903137207, "learning_rate": 2.8365606270170586e-05, "log_odds_chosen": 10.102551460266113, "log_odds_ratio": -0.0003846402687486261, "logits/chosen": -0.4543187916278839, "logits/rejected": -0.5494933128356934, "logps/chosen": -0.0003145010559819639, "logps/rejected": -1.8796675205230713, "loss": 1.1046, "nll_loss": 0.27610817551612854, "rewards/accuracies": 1.0, "rewards/chosen": -3.145010850857943e-05, "rewards/margins": 0.18793530762195587, "rewards/rejected": -0.1879667490720749, "step": 7077 }, { "epoch": 4.894882434301522, "grad_norm": 10.677894592285156, "learning_rate": 2.8361764253880435e-05, "log_odds_chosen": 9.235824584960938, "log_odds_ratio": -0.0006279587978497148, "logits/chosen": -0.7199358344078064, "logits/rejected": -0.71857088804245, "logps/chosen": -0.000889734597876668, "logps/rejected": -1.353074312210083, "loss": 1.3665, "nll_loss": 0.34156861901283264, "rewards/accuracies": 1.0, "rewards/chosen": -8.897345833247527e-05, "rewards/margins": 0.13521845638751984, "rewards/rejected": -0.1353074163198471, "step": 7078 }, { "epoch": 4.895573997233749, "grad_norm": 5.909156322479248, "learning_rate": 2.835792223759029e-05, "log_odds_chosen": 9.649864196777344, "log_odds_ratio": -0.0005108444020152092, "logits/chosen": -0.32373201847076416, "logits/rejected": -0.37446802854537964, "logps/chosen": -0.0009714511688798666, "logps/rejected": -1.7121704816818237, "loss": 1.1864, "nll_loss": 0.2965487241744995, "rewards/accuracies": 1.0, "rewards/chosen": -9.714511543279514e-05, "rewards/margins": 0.17111989855766296, "rewards/rejected": -0.17121705412864685, "step": 7079 }, { "epoch": 4.8962655601659755, "grad_norm": 7.765115737915039, "learning_rate": 2.835408022130014e-05, "log_odds_chosen": 11.179744720458984, "log_odds_ratio": -6.359211693052202e-05, "logits/chosen": -0.48739296197891235, "logits/rejected": -0.5888567566871643, "logps/chosen": -0.00020552228670567274, "logps/rejected": -2.5004618167877197, "loss": 0.647, "nll_loss": 0.16173242032527924, "rewards/accuracies": 1.0, "rewards/chosen": -2.0552230125758797e-05, "rewards/margins": 0.2500256299972534, "rewards/rejected": -0.2500461935997009, "step": 7080 }, { "epoch": 4.896957123098202, "grad_norm": 6.932773590087891, "learning_rate": 2.835023820500999e-05, "log_odds_chosen": 10.954737663269043, "log_odds_ratio": -8.48414929350838e-05, "logits/chosen": -0.5642880201339722, "logits/rejected": -0.6650699973106384, "logps/chosen": -0.00023761890770401806, "logps/rejected": -2.4510226249694824, "loss": 0.9495, "nll_loss": 0.23736077547073364, "rewards/accuracies": 1.0, "rewards/chosen": -2.3761891497997567e-05, "rewards/margins": 0.24507847428321838, "rewards/rejected": -0.24510225653648376, "step": 7081 }, { "epoch": 4.897648686030429, "grad_norm": 7.493467330932617, "learning_rate": 2.834639618871984e-05, "log_odds_chosen": 10.005603790283203, "log_odds_ratio": -0.0001240583078470081, "logits/chosen": -0.7019184827804565, "logits/rejected": -0.8387026786804199, "logps/chosen": -0.00021465314785018563, "logps/rejected": -1.8058335781097412, "loss": 1.0316, "nll_loss": 0.25787556171417236, "rewards/accuracies": 1.0, "rewards/chosen": -2.1465315512614325e-05, "rewards/margins": 0.1805618852376938, "rewards/rejected": -0.18058335781097412, "step": 7082 }, { "epoch": 4.898340248962656, "grad_norm": 8.680520057678223, "learning_rate": 2.834255417242969e-05, "log_odds_chosen": 9.830467224121094, "log_odds_ratio": -0.00024964113254100084, "logits/chosen": -0.6379505395889282, "logits/rejected": -0.6919896006584167, "logps/chosen": -0.0005304609076119959, "logps/rejected": -1.8459217548370361, "loss": 0.787, "nll_loss": 0.1967170685529709, "rewards/accuracies": 1.0, "rewards/chosen": -5.304608930600807e-05, "rewards/margins": 0.18453910946846008, "rewards/rejected": -0.18459217250347137, "step": 7083 }, { "epoch": 4.899031811894883, "grad_norm": 12.539398193359375, "learning_rate": 2.833871215613954e-05, "log_odds_chosen": 9.541128158569336, "log_odds_ratio": -0.0040365769527852535, "logits/chosen": -0.973002016544342, "logits/rejected": -0.9376418590545654, "logps/chosen": -0.12223078310489655, "logps/rejected": -2.7071738243103027, "loss": 0.8637, "nll_loss": 0.21553358435630798, "rewards/accuracies": 1.0, "rewards/chosen": -0.012223077937960625, "rewards/margins": 0.2584943175315857, "rewards/rejected": -0.27071741223335266, "step": 7084 }, { "epoch": 4.89972337482711, "grad_norm": 6.391757965087891, "learning_rate": 2.8334870139849395e-05, "log_odds_chosen": 9.40736198425293, "log_odds_ratio": -0.0002655688440427184, "logits/chosen": -0.6223936080932617, "logits/rejected": -0.7707822322845459, "logps/chosen": -0.006117125973105431, "logps/rejected": -2.1625566482543945, "loss": 0.8992, "nll_loss": 0.22478394210338593, "rewards/accuracies": 1.0, "rewards/chosen": -0.0006117126322351396, "rewards/margins": 0.21564392745494843, "rewards/rejected": -0.21625563502311707, "step": 7085 }, { "epoch": 4.9004149377593365, "grad_norm": 5.640289783477783, "learning_rate": 2.8331028123559244e-05, "log_odds_chosen": 9.61543083190918, "log_odds_ratio": -0.00035600896808318794, "logits/chosen": -0.4709721505641937, "logits/rejected": -0.504892110824585, "logps/chosen": -0.001349491416476667, "logps/rejected": -1.618531346321106, "loss": 1.3241, "nll_loss": 0.3310004472732544, "rewards/accuracies": 1.0, "rewards/chosen": -0.0001349491358269006, "rewards/margins": 0.1617181897163391, "rewards/rejected": -0.1618531197309494, "step": 7086 }, { "epoch": 4.901106500691563, "grad_norm": 18.874542236328125, "learning_rate": 2.8327186107269093e-05, "log_odds_chosen": 10.87725830078125, "log_odds_ratio": -3.1916541047394276e-05, "logits/chosen": -0.390527606010437, "logits/rejected": -0.5419718027114868, "logps/chosen": -0.00040930911200121045, "logps/rejected": -2.722926378250122, "loss": 1.035, "nll_loss": 0.25874730944633484, "rewards/accuracies": 1.0, "rewards/chosen": -4.0930910472525284e-05, "rewards/margins": 0.2722517251968384, "rewards/rejected": -0.2722926437854767, "step": 7087 }, { "epoch": 4.90179806362379, "grad_norm": 8.923735618591309, "learning_rate": 2.832334409097895e-05, "log_odds_chosen": 10.070110321044922, "log_odds_ratio": -6.992067937972024e-05, "logits/chosen": -0.4296715259552002, "logits/rejected": -0.47074654698371887, "logps/chosen": -0.00021902300068177283, "logps/rejected": -1.5266903638839722, "loss": 1.0921, "nll_loss": 0.27300915122032166, "rewards/accuracies": 1.0, "rewards/chosen": -2.190229861298576e-05, "rewards/margins": 0.15264713764190674, "rewards/rejected": -0.1526690423488617, "step": 7088 }, { "epoch": 4.902489626556017, "grad_norm": 12.43345832824707, "learning_rate": 2.8319502074688798e-05, "log_odds_chosen": 10.237215042114258, "log_odds_ratio": -8.461821562377736e-05, "logits/chosen": -0.7066891193389893, "logits/rejected": -0.7382270097732544, "logps/chosen": -0.0008181549492292106, "logps/rejected": -2.758671283721924, "loss": 1.1065, "nll_loss": 0.2766094207763672, "rewards/accuracies": 1.0, "rewards/chosen": -8.181549492292106e-05, "rewards/margins": 0.27578532695770264, "rewards/rejected": -0.27586713433265686, "step": 7089 }, { "epoch": 4.903181189488244, "grad_norm": 9.602688789367676, "learning_rate": 2.8315660058398647e-05, "log_odds_chosen": 9.875539779663086, "log_odds_ratio": -0.00012150348629802465, "logits/chosen": -0.832602858543396, "logits/rejected": -0.7552993297576904, "logps/chosen": -0.0005037355585955083, "logps/rejected": -1.9359135627746582, "loss": 2.19, "nll_loss": 0.5474786758422852, "rewards/accuracies": 1.0, "rewards/chosen": -5.0373557314742357e-05, "rewards/margins": 0.19354099035263062, "rewards/rejected": -0.19359135627746582, "step": 7090 }, { "epoch": 4.903872752420471, "grad_norm": 8.904827117919922, "learning_rate": 2.83118180421085e-05, "log_odds_chosen": 9.818799018859863, "log_odds_ratio": -0.0008873422048054636, "logits/chosen": -0.8685452938079834, "logits/rejected": -0.9092991352081299, "logps/chosen": -0.0036431835032999516, "logps/rejected": -1.6690925359725952, "loss": 0.7524, "nll_loss": 0.18801911175251007, "rewards/accuracies": 1.0, "rewards/chosen": -0.0003643183736130595, "rewards/margins": 0.16654494404792786, "rewards/rejected": -0.16690924763679504, "step": 7091 }, { "epoch": 4.904564315352697, "grad_norm": 7.237812042236328, "learning_rate": 2.830797602581835e-05, "log_odds_chosen": 9.628220558166504, "log_odds_ratio": -0.0002161394222639501, "logits/chosen": -0.759405791759491, "logits/rejected": -0.7982853055000305, "logps/chosen": -0.0005565644823946059, "logps/rejected": -1.7072293758392334, "loss": 0.6136, "nll_loss": 0.1533796787261963, "rewards/accuracies": 1.0, "rewards/chosen": -5.5656451877439395e-05, "rewards/margins": 0.17066729068756104, "rewards/rejected": -0.17072294652462006, "step": 7092 }, { "epoch": 4.905255878284924, "grad_norm": 6.623456001281738, "learning_rate": 2.8304134009528198e-05, "log_odds_chosen": 9.786426544189453, "log_odds_ratio": -0.0002996628754772246, "logits/chosen": -0.399116575717926, "logits/rejected": -0.4428660273551941, "logps/chosen": -0.004113033413887024, "logps/rejected": -2.5627689361572266, "loss": 1.4367, "nll_loss": 0.3591574728488922, "rewards/accuracies": 1.0, "rewards/chosen": -0.00041130336467176676, "rewards/margins": 0.25586557388305664, "rewards/rejected": -0.2562769055366516, "step": 7093 }, { "epoch": 4.905947441217151, "grad_norm": 12.373395919799805, "learning_rate": 2.8300291993238054e-05, "log_odds_chosen": 9.527667045593262, "log_odds_ratio": -0.0003717107174452394, "logits/chosen": -0.9429394006729126, "logits/rejected": -0.9072678089141846, "logps/chosen": -0.0004049554408993572, "logps/rejected": -1.8451400995254517, "loss": 1.1802, "nll_loss": 0.295009046792984, "rewards/accuracies": 1.0, "rewards/chosen": -4.0495546272723004e-05, "rewards/margins": 0.18447351455688477, "rewards/rejected": -0.18451401591300964, "step": 7094 }, { "epoch": 4.906639004149378, "grad_norm": 6.210014820098877, "learning_rate": 2.8296449976947903e-05, "log_odds_chosen": 10.036115646362305, "log_odds_ratio": -0.0001186348672490567, "logits/chosen": -0.6889455318450928, "logits/rejected": -0.7251041531562805, "logps/chosen": -0.0006964325439184904, "logps/rejected": -1.879245400428772, "loss": 0.9805, "nll_loss": 0.2451135814189911, "rewards/accuracies": 1.0, "rewards/chosen": -6.964324711589143e-05, "rewards/margins": 0.18785491585731506, "rewards/rejected": -0.1879245489835739, "step": 7095 }, { "epoch": 4.907330567081605, "grad_norm": 10.427117347717285, "learning_rate": 2.829260796065775e-05, "log_odds_chosen": 10.610797882080078, "log_odds_ratio": -0.00011776221072068438, "logits/chosen": -0.610815167427063, "logits/rejected": -0.6836470365524292, "logps/chosen": -0.0021078032441437244, "logps/rejected": -2.2562201023101807, "loss": 1.006, "nll_loss": 0.2514980435371399, "rewards/accuracies": 1.0, "rewards/chosen": -0.0002107803156832233, "rewards/margins": 0.22541120648384094, "rewards/rejected": -0.2256219983100891, "step": 7096 }, { "epoch": 4.908022130013832, "grad_norm": 8.188974380493164, "learning_rate": 2.8288765944367607e-05, "log_odds_chosen": 10.249313354492188, "log_odds_ratio": -0.014942159876227379, "logits/chosen": -0.2611965835094452, "logits/rejected": -0.36121267080307007, "logps/chosen": -0.0052610295824706554, "logps/rejected": -2.3836238384246826, "loss": 0.8521, "nll_loss": 0.21152344346046448, "rewards/accuracies": 1.0, "rewards/chosen": -0.0005261029582470655, "rewards/margins": 0.23783627152442932, "rewards/rejected": -0.2383623719215393, "step": 7097 }, { "epoch": 4.908713692946058, "grad_norm": 7.87142276763916, "learning_rate": 2.8284923928077457e-05, "log_odds_chosen": 10.834593772888184, "log_odds_ratio": -0.0003307293518446386, "logits/chosen": -0.7625287771224976, "logits/rejected": -0.6767745018005371, "logps/chosen": -0.0034907907247543335, "logps/rejected": -3.2138097286224365, "loss": 1.5473, "nll_loss": 0.3867877721786499, "rewards/accuracies": 1.0, "rewards/chosen": -0.0003490790550131351, "rewards/margins": 0.32103192806243896, "rewards/rejected": -0.32138100266456604, "step": 7098 }, { "epoch": 4.909405255878285, "grad_norm": 6.212900638580322, "learning_rate": 2.8281081911787306e-05, "log_odds_chosen": 8.325897216796875, "log_odds_ratio": -0.005754491779953241, "logits/chosen": -0.5125560164451599, "logits/rejected": -0.4441109001636505, "logps/chosen": -0.0019233720377087593, "logps/rejected": -1.003936767578125, "loss": 0.8757, "nll_loss": 0.21836042404174805, "rewards/accuracies": 1.0, "rewards/chosen": -0.00019233721832279116, "rewards/margins": 0.10020134598016739, "rewards/rejected": -0.10039368271827698, "step": 7099 }, { "epoch": 4.910096818810512, "grad_norm": 11.607251167297363, "learning_rate": 2.8277239895497158e-05, "log_odds_chosen": 9.61685848236084, "log_odds_ratio": -0.056531526148319244, "logits/chosen": -0.47347909212112427, "logits/rejected": -0.487918883562088, "logps/chosen": -0.08753280341625214, "logps/rejected": -1.9314079284667969, "loss": 1.0522, "nll_loss": 0.25739336013793945, "rewards/accuracies": 1.0, "rewards/chosen": -0.008753281086683273, "rewards/margins": 0.18438750505447388, "rewards/rejected": -0.19314078986644745, "step": 7100 }, { "epoch": 4.910788381742739, "grad_norm": 8.894390106201172, "learning_rate": 2.8273397879207007e-05, "log_odds_chosen": 10.345115661621094, "log_odds_ratio": -0.00022988113050814718, "logits/chosen": -0.7872570157051086, "logits/rejected": -0.8193588852882385, "logps/chosen": -0.0006045059417374432, "logps/rejected": -1.8319408893585205, "loss": 0.8216, "nll_loss": 0.20537596940994263, "rewards/accuracies": 1.0, "rewards/chosen": -6.0450591263361275e-05, "rewards/margins": 0.18313364684581757, "rewards/rejected": -0.183194100856781, "step": 7101 }, { "epoch": 4.911479944674966, "grad_norm": 9.430028915405273, "learning_rate": 2.8269555862916856e-05, "log_odds_chosen": 10.861661911010742, "log_odds_ratio": -5.2497900469461456e-05, "logits/chosen": -0.6316702365875244, "logits/rejected": -0.6554051041603088, "logps/chosen": -0.00034378620330244303, "logps/rejected": -2.400463819503784, "loss": 0.7852, "nll_loss": 0.19629782438278198, "rewards/accuracies": 1.0, "rewards/chosen": -3.437861960264854e-05, "rewards/margins": 0.2400120198726654, "rewards/rejected": -0.2400463968515396, "step": 7102 }, { "epoch": 4.912171507607193, "grad_norm": 9.566428184509277, "learning_rate": 2.8265713846626712e-05, "log_odds_chosen": 9.804618835449219, "log_odds_ratio": -8.933767821872607e-05, "logits/chosen": -0.5262328386306763, "logits/rejected": -0.556152880191803, "logps/chosen": -0.0026998610701411963, "logps/rejected": -2.2227210998535156, "loss": 1.2569, "nll_loss": 0.31422197818756104, "rewards/accuracies": 1.0, "rewards/chosen": -0.0002699861070141196, "rewards/margins": 0.22200213372707367, "rewards/rejected": -0.2222720980644226, "step": 7103 }, { "epoch": 4.912863070539419, "grad_norm": 7.35467529296875, "learning_rate": 2.826187183033656e-05, "log_odds_chosen": 10.32733154296875, "log_odds_ratio": -0.000474480475531891, "logits/chosen": -0.6774312257766724, "logits/rejected": -0.7373085021972656, "logps/chosen": -0.0009708892903290689, "logps/rejected": -2.3438022136688232, "loss": 1.1322, "nll_loss": 0.2829919159412384, "rewards/accuracies": 1.0, "rewards/chosen": -9.708893776405603e-05, "rewards/margins": 0.23428313434123993, "rewards/rejected": -0.23438023030757904, "step": 7104 }, { "epoch": 4.913554633471646, "grad_norm": 4.576963901519775, "learning_rate": 2.825802981404641e-05, "log_odds_chosen": 9.78951358795166, "log_odds_ratio": -0.0002390899317106232, "logits/chosen": -0.5074937343597412, "logits/rejected": -0.4873002767562866, "logps/chosen": -0.00036632048431783915, "logps/rejected": -1.5628546476364136, "loss": 1.3782, "nll_loss": 0.34453463554382324, "rewards/accuracies": 1.0, "rewards/chosen": -3.663205279735848e-05, "rewards/margins": 0.15624882280826569, "rewards/rejected": -0.15628546476364136, "step": 7105 }, { "epoch": 4.914246196403873, "grad_norm": 10.732585906982422, "learning_rate": 2.8254187797756266e-05, "log_odds_chosen": 9.88197135925293, "log_odds_ratio": -0.0027279232162982225, "logits/chosen": -0.714326024055481, "logits/rejected": -0.7508030533790588, "logps/chosen": -0.0015103038167580962, "logps/rejected": -1.6368227005004883, "loss": 0.7312, "nll_loss": 0.18253552913665771, "rewards/accuracies": 1.0, "rewards/chosen": -0.0001510303991381079, "rewards/margins": 0.16353122889995575, "rewards/rejected": -0.1636822670698166, "step": 7106 }, { "epoch": 4.9149377593361, "grad_norm": 5.092597961425781, "learning_rate": 2.8250345781466115e-05, "log_odds_chosen": 8.184118270874023, "log_odds_ratio": -0.002079986035823822, "logits/chosen": -0.513596773147583, "logits/rejected": -0.6206921339035034, "logps/chosen": -0.0010421369224786758, "logps/rejected": -1.033446192741394, "loss": 1.1106, "nll_loss": 0.27744877338409424, "rewards/accuracies": 1.0, "rewards/chosen": -0.00010421368642710149, "rewards/margins": 0.10324040055274963, "rewards/rejected": -0.10334461182355881, "step": 7107 }, { "epoch": 4.915629322268327, "grad_norm": 9.54631519317627, "learning_rate": 2.8246503765175964e-05, "log_odds_chosen": 10.113319396972656, "log_odds_ratio": -0.0001325900957453996, "logits/chosen": -0.5567490458488464, "logits/rejected": -0.6400761008262634, "logps/chosen": -0.0008515854133293033, "logps/rejected": -2.127261161804199, "loss": 0.7234, "nll_loss": 0.18084672093391418, "rewards/accuracies": 1.0, "rewards/chosen": -8.515853551216424e-05, "rewards/margins": 0.21264095604419708, "rewards/rejected": -0.21272613108158112, "step": 7108 }, { "epoch": 4.9163208852005535, "grad_norm": 5.1094560623168945, "learning_rate": 2.8242661748885816e-05, "log_odds_chosen": 10.541118621826172, "log_odds_ratio": -0.00010706786997616291, "logits/chosen": -0.7783686518669128, "logits/rejected": -0.6153342723846436, "logps/chosen": -0.0002766298421192914, "logps/rejected": -1.9104892015457153, "loss": 0.7444, "nll_loss": 0.18608124554157257, "rewards/accuracies": 1.0, "rewards/chosen": -2.7662985303322785e-05, "rewards/margins": 0.19102126359939575, "rewards/rejected": -0.19104892015457153, "step": 7109 }, { "epoch": 4.91701244813278, "grad_norm": 6.176060676574707, "learning_rate": 2.8238819732595665e-05, "log_odds_chosen": 9.143302917480469, "log_odds_ratio": -0.002591161523014307, "logits/chosen": -0.28634414076805115, "logits/rejected": -0.4724721312522888, "logps/chosen": -0.003382663941010833, "logps/rejected": -1.5004411935806274, "loss": 0.9143, "nll_loss": 0.22832506895065308, "rewards/accuracies": 1.0, "rewards/chosen": -0.00033826639992184937, "rewards/margins": 0.14970585703849792, "rewards/rejected": -0.15004411339759827, "step": 7110 }, { "epoch": 4.917704011065007, "grad_norm": 5.31650447845459, "learning_rate": 2.8234977716305518e-05, "log_odds_chosen": 9.15156364440918, "log_odds_ratio": -0.000578921171836555, "logits/chosen": -0.691253125667572, "logits/rejected": -0.7496019005775452, "logps/chosen": -0.0018787914887070656, "logps/rejected": -1.509812831878662, "loss": 1.1418, "nll_loss": 0.28538262844085693, "rewards/accuracies": 1.0, "rewards/chosen": -0.00018787916633300483, "rewards/margins": 0.1507934182882309, "rewards/rejected": -0.15098129212856293, "step": 7111 }, { "epoch": 4.918395573997234, "grad_norm": 5.6800856590271, "learning_rate": 2.823113570001537e-05, "log_odds_chosen": 8.849769592285156, "log_odds_ratio": -0.000252558384090662, "logits/chosen": -0.5067331790924072, "logits/rejected": -0.6426399946212769, "logps/chosen": -0.0010645565344020724, "logps/rejected": -1.4535598754882812, "loss": 0.9729, "nll_loss": 0.24319294095039368, "rewards/accuracies": 1.0, "rewards/chosen": -0.0001064556636265479, "rewards/margins": 0.14524953067302704, "rewards/rejected": -0.1453559845685959, "step": 7112 }, { "epoch": 4.919087136929461, "grad_norm": 17.448562622070312, "learning_rate": 2.822729368372522e-05, "log_odds_chosen": 10.552684783935547, "log_odds_ratio": -0.00010548812861088663, "logits/chosen": -0.7737633585929871, "logits/rejected": -0.8090916275978088, "logps/chosen": -0.0003632918233051896, "logps/rejected": -1.9622200727462769, "loss": 0.7682, "nll_loss": 0.19204050302505493, "rewards/accuracies": 1.0, "rewards/chosen": -3.6329183785710484e-05, "rewards/margins": 0.19618570804595947, "rewards/rejected": -0.19622200727462769, "step": 7113 }, { "epoch": 4.919778699861688, "grad_norm": 8.25586223602295, "learning_rate": 2.822345166743507e-05, "log_odds_chosen": 10.54365348815918, "log_odds_ratio": -9.378491813549772e-05, "logits/chosen": -0.5775281190872192, "logits/rejected": -0.6037197113037109, "logps/chosen": -0.0002865190908778459, "logps/rejected": -1.636115312576294, "loss": 0.8237, "nll_loss": 0.2059212476015091, "rewards/accuracies": 1.0, "rewards/chosen": -2.8651907996390946e-05, "rewards/margins": 0.16358289122581482, "rewards/rejected": -0.1636115312576294, "step": 7114 }, { "epoch": 4.9204702627939145, "grad_norm": 7.199887275695801, "learning_rate": 2.8219609651144924e-05, "log_odds_chosen": 10.180112838745117, "log_odds_ratio": -0.00013731225044466555, "logits/chosen": -0.2917217016220093, "logits/rejected": -0.3498440384864807, "logps/chosen": -0.00036871773772872984, "logps/rejected": -2.0209054946899414, "loss": 0.7018, "nll_loss": 0.1754380762577057, "rewards/accuracies": 1.0, "rewards/chosen": -3.6871773772872984e-05, "rewards/margins": 0.20205369591712952, "rewards/rejected": -0.2020905613899231, "step": 7115 }, { "epoch": 4.921161825726141, "grad_norm": 9.046202659606934, "learning_rate": 2.8215767634854773e-05, "log_odds_chosen": 10.894472122192383, "log_odds_ratio": -4.8574976972304285e-05, "logits/chosen": -0.4562535285949707, "logits/rejected": -0.4837872087955475, "logps/chosen": -0.00042225071229040623, "logps/rejected": -2.4725146293640137, "loss": 1.2179, "nll_loss": 0.30447250604629517, "rewards/accuracies": 1.0, "rewards/chosen": -4.222507413942367e-05, "rewards/margins": 0.24720925092697144, "rewards/rejected": -0.2472514808177948, "step": 7116 }, { "epoch": 4.921853388658368, "grad_norm": 11.569889068603516, "learning_rate": 2.8211925618564622e-05, "log_odds_chosen": 10.427947044372559, "log_odds_ratio": -0.00016333235544152558, "logits/chosen": -0.44238170981407166, "logits/rejected": -0.5646301507949829, "logps/chosen": -0.0005494217621162534, "logps/rejected": -2.6457605361938477, "loss": 1.1981, "nll_loss": 0.2994979918003082, "rewards/accuracies": 1.0, "rewards/chosen": -5.494217839441262e-05, "rewards/margins": 0.26452112197875977, "rewards/rejected": -0.2645760476589203, "step": 7117 }, { "epoch": 4.922544951590595, "grad_norm": 8.835566520690918, "learning_rate": 2.8208083602274475e-05, "log_odds_chosen": 9.710332870483398, "log_odds_ratio": -0.00014777285105083138, "logits/chosen": -0.5706153512001038, "logits/rejected": -0.6766719818115234, "logps/chosen": -0.0003430528158787638, "logps/rejected": -1.5136841535568237, "loss": 1.0568, "nll_loss": 0.2641783356666565, "rewards/accuracies": 1.0, "rewards/chosen": -3.430528158787638e-05, "rewards/margins": 0.15133410692214966, "rewards/rejected": -0.1513684093952179, "step": 7118 }, { "epoch": 4.923236514522822, "grad_norm": 11.772375106811523, "learning_rate": 2.8204241585984327e-05, "log_odds_chosen": 11.087120056152344, "log_odds_ratio": -4.5395812776405364e-05, "logits/chosen": -0.5636887550354004, "logits/rejected": -0.5888187289237976, "logps/chosen": -0.0004168281448073685, "logps/rejected": -1.8479971885681152, "loss": 0.8673, "nll_loss": 0.21683260798454285, "rewards/accuracies": 1.0, "rewards/chosen": -4.1682811570353806e-05, "rewards/margins": 0.1847580522298813, "rewards/rejected": -0.18479973077774048, "step": 7119 }, { "epoch": 4.923928077455049, "grad_norm": 7.783324718475342, "learning_rate": 2.8200399569694176e-05, "log_odds_chosen": 9.75548267364502, "log_odds_ratio": -0.00065141316736117, "logits/chosen": -0.3341536819934845, "logits/rejected": -0.3753906488418579, "logps/chosen": -0.0032086719293147326, "logps/rejected": -2.3842759132385254, "loss": 0.8875, "nll_loss": 0.22179758548736572, "rewards/accuracies": 1.0, "rewards/chosen": -0.00032086719875223935, "rewards/margins": 0.23810669779777527, "rewards/rejected": -0.23842757940292358, "step": 7120 }, { "epoch": 4.9246196403872755, "grad_norm": 8.361810684204102, "learning_rate": 2.819655755340403e-05, "log_odds_chosen": 10.507516860961914, "log_odds_ratio": -3.8556561776204035e-05, "logits/chosen": -0.3292539715766907, "logits/rejected": -0.4065108299255371, "logps/chosen": -0.0001679821580182761, "logps/rejected": -1.817864179611206, "loss": 0.7462, "nll_loss": 0.1865575909614563, "rewards/accuracies": 1.0, "rewards/chosen": -1.6798217984614894e-05, "rewards/margins": 0.18176962435245514, "rewards/rejected": -0.1817864179611206, "step": 7121 }, { "epoch": 4.925311203319502, "grad_norm": 20.088041305541992, "learning_rate": 2.8192715537113878e-05, "log_odds_chosen": 12.001582145690918, "log_odds_ratio": -1.2325194802542683e-05, "logits/chosen": -0.48205146193504333, "logits/rejected": -0.5669339299201965, "logps/chosen": -0.00016101561777759343, "logps/rejected": -2.9314229488372803, "loss": 1.2894, "nll_loss": 0.322337806224823, "rewards/accuracies": 1.0, "rewards/chosen": -1.6101563232950866e-05, "rewards/margins": 0.2931261956691742, "rewards/rejected": -0.29314231872558594, "step": 7122 }, { "epoch": 4.926002766251729, "grad_norm": 46.864810943603516, "learning_rate": 2.8188873520823727e-05, "log_odds_chosen": 8.082642555236816, "log_odds_ratio": -0.6128248572349548, "logits/chosen": -0.5687606334686279, "logits/rejected": -0.6862469911575317, "logps/chosen": -0.0712505653500557, "logps/rejected": -1.3998901844024658, "loss": 1.241, "nll_loss": 0.24896618723869324, "rewards/accuracies": 0.875, "rewards/chosen": -0.007125055883079767, "rewards/margins": 0.13286396861076355, "rewards/rejected": -0.13998901844024658, "step": 7123 }, { "epoch": 4.926694329183956, "grad_norm": 10.988321304321289, "learning_rate": 2.8185031504533583e-05, "log_odds_chosen": 10.095243453979492, "log_odds_ratio": -0.0001219494006363675, "logits/chosen": -0.7098445892333984, "logits/rejected": -0.8073045015335083, "logps/chosen": -0.00027236994355916977, "logps/rejected": -1.8411864042282104, "loss": 1.0152, "nll_loss": 0.25377851724624634, "rewards/accuracies": 1.0, "rewards/chosen": -2.7236994355916977e-05, "rewards/margins": 0.18409138917922974, "rewards/rejected": -0.18411864340305328, "step": 7124 }, { "epoch": 4.927385892116183, "grad_norm": 8.891566276550293, "learning_rate": 2.8181189488243432e-05, "log_odds_chosen": 9.694140434265137, "log_odds_ratio": -0.0010962700471282005, "logits/chosen": -0.39752835035324097, "logits/rejected": -0.41798263788223267, "logps/chosen": -0.0014211098896339536, "logps/rejected": -1.951611876487732, "loss": 0.8885, "nll_loss": 0.22201602160930634, "rewards/accuracies": 1.0, "rewards/chosen": -0.0001421109918737784, "rewards/margins": 0.19501908123493195, "rewards/rejected": -0.19516119360923767, "step": 7125 }, { "epoch": 4.92807745504841, "grad_norm": 16.277318954467773, "learning_rate": 2.817734747195328e-05, "log_odds_chosen": 10.069537162780762, "log_odds_ratio": -0.0037255052011460066, "logits/chosen": -0.7205549478530884, "logits/rejected": -0.6914384961128235, "logps/chosen": -0.006732581183314323, "logps/rejected": -1.870539903640747, "loss": 1.5036, "nll_loss": 0.3755381405353546, "rewards/accuracies": 1.0, "rewards/chosen": -0.0006732581532560289, "rewards/margins": 0.18638074398040771, "rewards/rejected": -0.18705399334430695, "step": 7126 }, { "epoch": 4.9287690179806365, "grad_norm": 7.239488124847412, "learning_rate": 2.8173505455663137e-05, "log_odds_chosen": 10.33989143371582, "log_odds_ratio": -7.038117473712191e-05, "logits/chosen": -0.11135803908109665, "logits/rejected": -0.1713535189628601, "logps/chosen": -0.00010937307524727657, "logps/rejected": -1.5407695770263672, "loss": 1.2532, "nll_loss": 0.31329673528671265, "rewards/accuracies": 1.0, "rewards/chosen": -1.09373086161213e-05, "rewards/margins": 0.1540660262107849, "rewards/rejected": -0.1540769636631012, "step": 7127 }, { "epoch": 4.929460580912863, "grad_norm": 8.722808837890625, "learning_rate": 2.8169663439372986e-05, "log_odds_chosen": 10.938897132873535, "log_odds_ratio": -4.421987250680104e-05, "logits/chosen": -0.2911446690559387, "logits/rejected": -0.4413534700870514, "logps/chosen": -0.00021581347391474992, "logps/rejected": -2.3797059059143066, "loss": 0.9606, "nll_loss": 0.24014431238174438, "rewards/accuracies": 1.0, "rewards/chosen": -2.1581348846666515e-05, "rewards/margins": 0.23794902861118317, "rewards/rejected": -0.23797062039375305, "step": 7128 }, { "epoch": 4.93015214384509, "grad_norm": 12.4188814163208, "learning_rate": 2.8165821423082835e-05, "log_odds_chosen": 9.827089309692383, "log_odds_ratio": -0.00031211768509820104, "logits/chosen": -0.584121584892273, "logits/rejected": -0.5907494425773621, "logps/chosen": -0.0003768773749470711, "logps/rejected": -1.5813324451446533, "loss": 1.3272, "nll_loss": 0.3317672908306122, "rewards/accuracies": 1.0, "rewards/chosen": -3.768773967749439e-05, "rewards/margins": 0.1580955684185028, "rewards/rejected": -0.15813326835632324, "step": 7129 }, { "epoch": 4.930843706777317, "grad_norm": 8.122848510742188, "learning_rate": 2.8161979406792687e-05, "log_odds_chosen": 10.707836151123047, "log_odds_ratio": -4.415389776113443e-05, "logits/chosen": -0.373961478471756, "logits/rejected": -0.4320948123931885, "logps/chosen": -0.0007988472352735698, "logps/rejected": -2.38476300239563, "loss": 0.7998, "nll_loss": 0.1999528706073761, "rewards/accuracies": 1.0, "rewards/chosen": -7.98847249825485e-05, "rewards/margins": 0.23839640617370605, "rewards/rejected": -0.23847629129886627, "step": 7130 }, { "epoch": 4.931535269709544, "grad_norm": 5.815088272094727, "learning_rate": 2.8158137390502536e-05, "log_odds_chosen": 9.746646881103516, "log_odds_ratio": -0.00011611563240876421, "logits/chosen": -0.42615267634391785, "logits/rejected": -0.4549277722835541, "logps/chosen": -0.0003674745967146009, "logps/rejected": -1.1377838850021362, "loss": 1.0002, "nll_loss": 0.2500423192977905, "rewards/accuracies": 1.0, "rewards/chosen": -3.6747456761077046e-05, "rewards/margins": 0.11374164372682571, "rewards/rejected": -0.11377838999032974, "step": 7131 }, { "epoch": 4.932226832641771, "grad_norm": 8.254155158996582, "learning_rate": 2.8154295374212385e-05, "log_odds_chosen": 9.269465446472168, "log_odds_ratio": -0.0017738983733579516, "logits/chosen": -0.3891604542732239, "logits/rejected": -0.3855813145637512, "logps/chosen": -0.0017140365671366453, "logps/rejected": -1.8060569763183594, "loss": 1.0991, "nll_loss": 0.27460891008377075, "rewards/accuracies": 1.0, "rewards/chosen": -0.00017140366253443062, "rewards/margins": 0.18043428659439087, "rewards/rejected": -0.1806056946516037, "step": 7132 }, { "epoch": 4.9329183955739975, "grad_norm": 5.22756290435791, "learning_rate": 2.815045335792224e-05, "log_odds_chosen": 8.7532377243042, "log_odds_ratio": -0.0049340082332491875, "logits/chosen": -0.23282581567764282, "logits/rejected": -0.3273771405220032, "logps/chosen": -0.0010553733445703983, "logps/rejected": -1.7135753631591797, "loss": 1.1454, "nll_loss": 0.2858678102493286, "rewards/accuracies": 1.0, "rewards/chosen": -0.00010553734318818897, "rewards/margins": 0.17125201225280762, "rewards/rejected": -0.17135754227638245, "step": 7133 }, { "epoch": 4.933609958506224, "grad_norm": 11.229767799377441, "learning_rate": 2.814661134163209e-05, "log_odds_chosen": 11.21669864654541, "log_odds_ratio": -2.4041275537456386e-05, "logits/chosen": -0.3968821167945862, "logits/rejected": -0.48639771342277527, "logps/chosen": -0.00011265697685303167, "logps/rejected": -2.149205207824707, "loss": 1.2412, "nll_loss": 0.31030285358428955, "rewards/accuracies": 1.0, "rewards/chosen": -1.1265698049101047e-05, "rewards/margins": 0.21490925550460815, "rewards/rejected": -0.2149205207824707, "step": 7134 }, { "epoch": 4.934301521438451, "grad_norm": 5.015374660491943, "learning_rate": 2.814276932534194e-05, "log_odds_chosen": 9.667210578918457, "log_odds_ratio": -0.001268291613087058, "logits/chosen": -0.48809224367141724, "logits/rejected": -0.6303143501281738, "logps/chosen": -0.0014353214064612985, "logps/rejected": -1.379345417022705, "loss": 0.7382, "nll_loss": 0.18442240357398987, "rewards/accuracies": 1.0, "rewards/chosen": -0.00014353214646689594, "rewards/margins": 0.13779102265834808, "rewards/rejected": -0.13793453574180603, "step": 7135 }, { "epoch": 4.934993084370678, "grad_norm": 5.099924564361572, "learning_rate": 2.8138927309051795e-05, "log_odds_chosen": 11.092218399047852, "log_odds_ratio": -4.954072937835008e-05, "logits/chosen": -0.5482072234153748, "logits/rejected": -0.5716742277145386, "logps/chosen": -0.0003784211294259876, "logps/rejected": -3.1070432662963867, "loss": 0.7207, "nll_loss": 0.18016774952411652, "rewards/accuracies": 1.0, "rewards/chosen": -3.7842110032215714e-05, "rewards/margins": 0.3106665015220642, "rewards/rejected": -0.3107043206691742, "step": 7136 }, { "epoch": 4.935684647302905, "grad_norm": 6.366814136505127, "learning_rate": 2.8135085292761644e-05, "log_odds_chosen": 10.189170837402344, "log_odds_ratio": -0.0005262996419332922, "logits/chosen": -0.0720197930932045, "logits/rejected": -0.11023354530334473, "logps/chosen": -0.000608351023402065, "logps/rejected": -2.044856548309326, "loss": 0.9023, "nll_loss": 0.22551508247852325, "rewards/accuracies": 1.0, "rewards/chosen": -6.083510379539803e-05, "rewards/margins": 0.20442481338977814, "rewards/rejected": -0.20448563992977142, "step": 7137 }, { "epoch": 4.936376210235132, "grad_norm": 11.162799835205078, "learning_rate": 2.8131243276471493e-05, "log_odds_chosen": 9.664872169494629, "log_odds_ratio": -0.021280528977513313, "logits/chosen": -0.3223462998867035, "logits/rejected": -0.4159541428089142, "logps/chosen": -0.006408995017409325, "logps/rejected": -1.8025219440460205, "loss": 1.3599, "nll_loss": 0.3378509283065796, "rewards/accuracies": 1.0, "rewards/chosen": -0.000640899408608675, "rewards/margins": 0.17961131036281586, "rewards/rejected": -0.18025220930576324, "step": 7138 }, { "epoch": 4.9370677731673585, "grad_norm": 8.728205680847168, "learning_rate": 2.8127401260181346e-05, "log_odds_chosen": 10.00544548034668, "log_odds_ratio": -0.00018134855781681836, "logits/chosen": -0.6819294691085815, "logits/rejected": -0.7730222940444946, "logps/chosen": -0.0005425587296485901, "logps/rejected": -1.7225030660629272, "loss": 0.7433, "nll_loss": 0.18581163883209229, "rewards/accuracies": 1.0, "rewards/chosen": -5.425587005447596e-05, "rewards/margins": 0.17219604551792145, "rewards/rejected": -0.17225030064582825, "step": 7139 }, { "epoch": 4.937759336099585, "grad_norm": 17.405189514160156, "learning_rate": 2.8123559243891195e-05, "log_odds_chosen": 10.889158248901367, "log_odds_ratio": -2.6139588953810744e-05, "logits/chosen": -0.37216049432754517, "logits/rejected": -0.45730894804000854, "logps/chosen": -0.00019618839723989367, "logps/rejected": -2.3127663135528564, "loss": 1.0295, "nll_loss": 0.2573762536048889, "rewards/accuracies": 1.0, "rewards/chosen": -1.961884117918089e-05, "rewards/margins": 0.23125702142715454, "rewards/rejected": -0.23127663135528564, "step": 7140 }, { "epoch": 4.938450899031812, "grad_norm": 7.012986183166504, "learning_rate": 2.8119717227601044e-05, "log_odds_chosen": 10.775957107543945, "log_odds_ratio": -3.96674768126104e-05, "logits/chosen": -0.3651018738746643, "logits/rejected": -0.4808062016963959, "logps/chosen": -0.00010476561146788299, "logps/rejected": -1.7233092784881592, "loss": 0.7375, "nll_loss": 0.18437014520168304, "rewards/accuracies": 1.0, "rewards/chosen": -1.0476562238181941e-05, "rewards/margins": 0.17232045531272888, "rewards/rejected": -0.17233094573020935, "step": 7141 }, { "epoch": 4.939142461964039, "grad_norm": 7.365050315856934, "learning_rate": 2.81158752113109e-05, "log_odds_chosen": 10.282831192016602, "log_odds_ratio": -6.569912511622533e-05, "logits/chosen": -0.5295911431312561, "logits/rejected": -0.49910950660705566, "logps/chosen": -0.00039605889469385147, "logps/rejected": -2.092613697052002, "loss": 0.8776, "nll_loss": 0.21940162777900696, "rewards/accuracies": 1.0, "rewards/chosen": -3.960588946938515e-05, "rewards/margins": 0.2092217653989792, "rewards/rejected": -0.20926138758659363, "step": 7142 }, { "epoch": 4.939834024896266, "grad_norm": 8.847102165222168, "learning_rate": 2.811203319502075e-05, "log_odds_chosen": 10.336320877075195, "log_odds_ratio": -6.936783756827936e-05, "logits/chosen": -0.3571697771549225, "logits/rejected": -0.36846020817756653, "logps/chosen": -0.00019183357653673738, "logps/rejected": -1.95271897315979, "loss": 0.8321, "nll_loss": 0.20801444351673126, "rewards/accuracies": 1.0, "rewards/chosen": -1.9183356926077977e-05, "rewards/margins": 0.1952527016401291, "rewards/rejected": -0.19527189433574677, "step": 7143 }, { "epoch": 4.940525587828493, "grad_norm": 9.471915245056152, "learning_rate": 2.8108191178730598e-05, "log_odds_chosen": 11.665063858032227, "log_odds_ratio": -1.2130387403885834e-05, "logits/chosen": -0.4402073621749878, "logits/rejected": -0.6375857591629028, "logps/chosen": -9.851202776189893e-05, "logps/rejected": -2.3184638023376465, "loss": 0.9339, "nll_loss": 0.23347574472427368, "rewards/accuracies": 1.0, "rewards/chosen": -9.851202776189893e-06, "rewards/margins": 0.23183652758598328, "rewards/rejected": -0.2318463772535324, "step": 7144 }, { "epoch": 4.941217150760719, "grad_norm": 10.781583786010742, "learning_rate": 2.8104349162440454e-05, "log_odds_chosen": 9.704204559326172, "log_odds_ratio": -0.0012408954789862037, "logits/chosen": -0.6811150908470154, "logits/rejected": -0.6740538477897644, "logps/chosen": -0.005846824496984482, "logps/rejected": -2.4501309394836426, "loss": 0.9965, "nll_loss": 0.24899712204933167, "rewards/accuracies": 1.0, "rewards/chosen": -0.0005846824496984482, "rewards/margins": 0.24442841112613678, "rewards/rejected": -0.24501308798789978, "step": 7145 }, { "epoch": 4.941908713692946, "grad_norm": 7.452633857727051, "learning_rate": 2.8100507146150303e-05, "log_odds_chosen": 9.428400039672852, "log_odds_ratio": -0.0004272775840945542, "logits/chosen": -0.3518384099006653, "logits/rejected": -0.36135247349739075, "logps/chosen": -0.0015641606878489256, "logps/rejected": -1.492548942565918, "loss": 1.0784, "nll_loss": 0.26955974102020264, "rewards/accuracies": 1.0, "rewards/chosen": -0.00015641606296412647, "rewards/margins": 0.1490984857082367, "rewards/rejected": -0.1492549031972885, "step": 7146 }, { "epoch": 4.942600276625173, "grad_norm": 10.414713859558105, "learning_rate": 2.809666512986015e-05, "log_odds_chosen": 11.029153823852539, "log_odds_ratio": -0.0001740563748171553, "logits/chosen": -0.7576574087142944, "logits/rejected": -0.8908603191375732, "logps/chosen": -0.00031457317527383566, "logps/rejected": -2.333446502685547, "loss": 1.0123, "nll_loss": 0.25305071473121643, "rewards/accuracies": 1.0, "rewards/chosen": -3.145731534459628e-05, "rewards/margins": 0.2333131730556488, "rewards/rejected": -0.2333446443080902, "step": 7147 }, { "epoch": 4.9432918395574, "grad_norm": 7.546963691711426, "learning_rate": 2.8092823113570004e-05, "log_odds_chosen": 10.32200813293457, "log_odds_ratio": -4.672615250456147e-05, "logits/chosen": -1.0263385772705078, "logits/rejected": -1.0607138872146606, "logps/chosen": -0.00017116457456722856, "logps/rejected": -1.5321038961410522, "loss": 0.8777, "nll_loss": 0.2194298803806305, "rewards/accuracies": 1.0, "rewards/chosen": -1.711645927571226e-05, "rewards/margins": 0.15319326519966125, "rewards/rejected": -0.15321038663387299, "step": 7148 }, { "epoch": 4.943983402489627, "grad_norm": 8.159418106079102, "learning_rate": 2.8088981097279853e-05, "log_odds_chosen": 9.63131046295166, "log_odds_ratio": -0.012594000436365604, "logits/chosen": -0.610599160194397, "logits/rejected": -0.6731650233268738, "logps/chosen": -0.007956145331263542, "logps/rejected": -2.213127851486206, "loss": 0.9725, "nll_loss": 0.2418547123670578, "rewards/accuracies": 1.0, "rewards/chosen": -0.0007956145564094186, "rewards/margins": 0.22051715850830078, "rewards/rejected": -0.2213127762079239, "step": 7149 }, { "epoch": 4.944674965421854, "grad_norm": 10.675069808959961, "learning_rate": 2.8085139080989702e-05, "log_odds_chosen": 8.94044017791748, "log_odds_ratio": -0.001029696548357606, "logits/chosen": -0.011049099266529083, "logits/rejected": -0.12137185037136078, "logps/chosen": -0.03985258564352989, "logps/rejected": -2.9449570178985596, "loss": 1.2629, "nll_loss": 0.3156171143054962, "rewards/accuracies": 1.0, "rewards/chosen": -0.003985258284956217, "rewards/margins": 0.2905104458332062, "rewards/rejected": -0.29449570178985596, "step": 7150 }, { "epoch": 4.94536652835408, "grad_norm": 23.415855407714844, "learning_rate": 2.8081297064699558e-05, "log_odds_chosen": 9.784978866577148, "log_odds_ratio": -0.12540185451507568, "logits/chosen": -0.3491271436214447, "logits/rejected": -0.4001074433326721, "logps/chosen": -0.01915506273508072, "logps/rejected": -2.304745674133301, "loss": 0.9497, "nll_loss": 0.22488565742969513, "rewards/accuracies": 0.875, "rewards/chosen": -0.0019155063200742006, "rewards/margins": 0.22855907678604126, "rewards/rejected": -0.2304745614528656, "step": 7151 }, { "epoch": 4.946058091286307, "grad_norm": 6.221928596496582, "learning_rate": 2.8077455048409407e-05, "log_odds_chosen": 10.982514381408691, "log_odds_ratio": -2.903457971115131e-05, "logits/chosen": -0.07943695038557053, "logits/rejected": -0.1861354261636734, "logps/chosen": -0.0002079754340229556, "logps/rejected": -1.972883939743042, "loss": 0.6171, "nll_loss": 0.15427665412425995, "rewards/accuracies": 1.0, "rewards/chosen": -2.0797542674699798e-05, "rewards/margins": 0.1972675919532776, "rewards/rejected": -0.1972883939743042, "step": 7152 }, { "epoch": 4.946749654218534, "grad_norm": 9.135435104370117, "learning_rate": 2.8073613032119256e-05, "log_odds_chosen": 10.792257308959961, "log_odds_ratio": -8.120985148707405e-05, "logits/chosen": -0.48409971594810486, "logits/rejected": -0.4654289186000824, "logps/chosen": -0.0002383202954661101, "logps/rejected": -1.8510081768035889, "loss": 1.2696, "nll_loss": 0.31740251183509827, "rewards/accuracies": 1.0, "rewards/chosen": -2.3832026272430085e-05, "rewards/margins": 0.1850769966840744, "rewards/rejected": -0.18510082364082336, "step": 7153 }, { "epoch": 4.947441217150761, "grad_norm": 4.866416931152344, "learning_rate": 2.8069771015829112e-05, "log_odds_chosen": 9.464195251464844, "log_odds_ratio": -0.0020351430866867304, "logits/chosen": -0.3395107388496399, "logits/rejected": -0.2789302468299866, "logps/chosen": -0.0017817820189520717, "logps/rejected": -1.871229887008667, "loss": 1.2115, "nll_loss": 0.3026636838912964, "rewards/accuracies": 1.0, "rewards/chosen": -0.00017817817570175976, "rewards/margins": 0.18694482743740082, "rewards/rejected": -0.18712300062179565, "step": 7154 }, { "epoch": 4.948132780082988, "grad_norm": 8.04308032989502, "learning_rate": 2.806592899953896e-05, "log_odds_chosen": 10.79904556274414, "log_odds_ratio": -4.452460052561946e-05, "logits/chosen": -0.4465823769569397, "logits/rejected": -0.5061565637588501, "logps/chosen": -0.00028760547866113484, "logps/rejected": -1.9426701068878174, "loss": 0.9098, "nll_loss": 0.22744998335838318, "rewards/accuracies": 1.0, "rewards/chosen": -2.8760547138517722e-05, "rewards/margins": 0.19423826038837433, "rewards/rejected": -0.19426700472831726, "step": 7155 }, { "epoch": 4.948824343015215, "grad_norm": 13.30815601348877, "learning_rate": 2.806208698324881e-05, "log_odds_chosen": 10.1293306350708, "log_odds_ratio": -0.00010954002937069163, "logits/chosen": -0.6125750541687012, "logits/rejected": -0.6850671768188477, "logps/chosen": -0.0003307849692646414, "logps/rejected": -1.939996600151062, "loss": 0.9422, "nll_loss": 0.23554423451423645, "rewards/accuracies": 1.0, "rewards/chosen": -3.307849692646414e-05, "rewards/margins": 0.1939665675163269, "rewards/rejected": -0.19399964809417725, "step": 7156 }, { "epoch": 4.949515905947441, "grad_norm": 6.909346103668213, "learning_rate": 2.8058244966958663e-05, "log_odds_chosen": 9.113512992858887, "log_odds_ratio": -0.0011480834800750017, "logits/chosen": -0.3116508424282074, "logits/rejected": -0.4106285572052002, "logps/chosen": -0.001304534263908863, "logps/rejected": -1.1426763534545898, "loss": 1.0868, "nll_loss": 0.2715791165828705, "rewards/accuracies": 1.0, "rewards/chosen": -0.00013045342348050326, "rewards/margins": 0.1141371801495552, "rewards/rejected": -0.11426763981580734, "step": 7157 }, { "epoch": 4.950207468879668, "grad_norm": 15.718669891357422, "learning_rate": 2.805440295066851e-05, "log_odds_chosen": 6.992504596710205, "log_odds_ratio": -0.10088855028152466, "logits/chosen": -0.43080681562423706, "logits/rejected": -0.49850064516067505, "logps/chosen": -0.01973787322640419, "logps/rejected": -1.4150389432907104, "loss": 1.1032, "nll_loss": 0.2657163739204407, "rewards/accuracies": 1.0, "rewards/chosen": -0.001973787322640419, "rewards/margins": 0.13953009247779846, "rewards/rejected": -0.14150390028953552, "step": 7158 }, { "epoch": 4.950899031811895, "grad_norm": 10.54096794128418, "learning_rate": 2.805056093437836e-05, "log_odds_chosen": 9.446221351623535, "log_odds_ratio": -0.0005509358597919345, "logits/chosen": -0.09963419288396835, "logits/rejected": -0.15448901057243347, "logps/chosen": -0.0333227813243866, "logps/rejected": -1.847109317779541, "loss": 0.9162, "nll_loss": 0.22899924218654633, "rewards/accuracies": 1.0, "rewards/chosen": -0.003332278225570917, "rewards/margins": 0.18137866258621216, "rewards/rejected": -0.18471094965934753, "step": 7159 }, { "epoch": 4.951590594744122, "grad_norm": 9.546717643737793, "learning_rate": 2.8046718918088217e-05, "log_odds_chosen": 11.02538013458252, "log_odds_ratio": -4.7218050895025954e-05, "logits/chosen": -0.509639322757721, "logits/rejected": -0.5242058038711548, "logps/chosen": -0.0010804994963109493, "logps/rejected": -2.4925341606140137, "loss": 0.7729, "nll_loss": 0.1932220757007599, "rewards/accuracies": 1.0, "rewards/chosen": -0.00010804997145896778, "rewards/margins": 0.24914538860321045, "rewards/rejected": -0.24925342202186584, "step": 7160 }, { "epoch": 4.952282157676349, "grad_norm": 9.92019271850586, "learning_rate": 2.8042876901798066e-05, "log_odds_chosen": 11.617349624633789, "log_odds_ratio": -1.9354396499693394e-05, "logits/chosen": -0.3191656470298767, "logits/rejected": -0.3231254816055298, "logps/chosen": -0.00020129804033786058, "logps/rejected": -2.650850296020508, "loss": 1.3355, "nll_loss": 0.33388522267341614, "rewards/accuracies": 1.0, "rewards/chosen": -2.0129802578594536e-05, "rewards/margins": 0.26506489515304565, "rewards/rejected": -0.26508504152297974, "step": 7161 }, { "epoch": 4.9529737206085755, "grad_norm": 9.240811347961426, "learning_rate": 2.8039034885507915e-05, "log_odds_chosen": 10.988016128540039, "log_odds_ratio": -4.003685899078846e-05, "logits/chosen": -0.3881503939628601, "logits/rejected": -0.32961368560791016, "logps/chosen": -0.0013664222788065672, "logps/rejected": -2.931913137435913, "loss": 1.1138, "nll_loss": 0.27844059467315674, "rewards/accuracies": 1.0, "rewards/chosen": -0.00013664223661180586, "rewards/margins": 0.2930546700954437, "rewards/rejected": -0.2931913137435913, "step": 7162 }, { "epoch": 4.953665283540802, "grad_norm": 12.443624496459961, "learning_rate": 2.803519286921777e-05, "log_odds_chosen": 11.71902084350586, "log_odds_ratio": -3.129677497781813e-05, "logits/chosen": -0.5320737361907959, "logits/rejected": -0.5102954506874084, "logps/chosen": -0.0008184172911569476, "logps/rejected": -3.300198554992676, "loss": 0.7979, "nll_loss": 0.19947592914104462, "rewards/accuracies": 1.0, "rewards/chosen": -8.184173202607781e-05, "rewards/margins": 0.32993799448013306, "rewards/rejected": -0.33001986145973206, "step": 7163 }, { "epoch": 4.954356846473029, "grad_norm": 19.79650115966797, "learning_rate": 2.803135085292762e-05, "log_odds_chosen": 7.7862958908081055, "log_odds_ratio": -0.358162522315979, "logits/chosen": -0.2744470238685608, "logits/rejected": -0.352327823638916, "logps/chosen": -0.06442242860794067, "logps/rejected": -1.561842679977417, "loss": 1.1889, "nll_loss": 0.2614128291606903, "rewards/accuracies": 0.875, "rewards/chosen": -0.006442242302000523, "rewards/margins": 0.1497420370578766, "rewards/rejected": -0.15618427097797394, "step": 7164 }, { "epoch": 4.955048409405256, "grad_norm": 8.847530364990234, "learning_rate": 2.802750883663747e-05, "log_odds_chosen": 9.146276473999023, "log_odds_ratio": -0.0027183485217392445, "logits/chosen": -0.18752719461917877, "logits/rejected": -0.289531409740448, "logps/chosen": -0.005841393955051899, "logps/rejected": -1.2292550802230835, "loss": 0.9267, "nll_loss": 0.23141279816627502, "rewards/accuracies": 1.0, "rewards/chosen": -0.0005841394304297864, "rewards/margins": 0.12234137952327728, "rewards/rejected": -0.1229255199432373, "step": 7165 }, { "epoch": 4.955739972337483, "grad_norm": 6.832130432128906, "learning_rate": 2.802366682034732e-05, "log_odds_chosen": 9.88257122039795, "log_odds_ratio": -0.0002631518291309476, "logits/chosen": -0.34477508068084717, "logits/rejected": -0.43727973103523254, "logps/chosen": -0.0010147679131478071, "logps/rejected": -2.3121986389160156, "loss": 0.9036, "nll_loss": 0.22586682438850403, "rewards/accuracies": 1.0, "rewards/chosen": -0.00010147679859073833, "rewards/margins": 0.23111838102340698, "rewards/rejected": -0.2312198430299759, "step": 7166 }, { "epoch": 4.95643153526971, "grad_norm": 11.183934211730957, "learning_rate": 2.801982480405717e-05, "log_odds_chosen": 9.253469467163086, "log_odds_ratio": -0.0005922535201534629, "logits/chosen": -0.5971043705940247, "logits/rejected": -0.584150493144989, "logps/chosen": -0.0020415710750967264, "logps/rejected": -1.5619051456451416, "loss": 1.2975, "nll_loss": 0.3243168592453003, "rewards/accuracies": 1.0, "rewards/chosen": -0.00020415711333043873, "rewards/margins": 0.15598636865615845, "rewards/rejected": -0.15619052946567535, "step": 7167 }, { "epoch": 4.9571230982019365, "grad_norm": 14.405940055847168, "learning_rate": 2.801598278776702e-05, "log_odds_chosen": 11.010702133178711, "log_odds_ratio": -2.6105004508281127e-05, "logits/chosen": -0.33093854784965515, "logits/rejected": -0.40159279108047485, "logps/chosen": -9.661898366175592e-05, "logps/rejected": -1.8814085721969604, "loss": 1.1622, "nll_loss": 0.29055318236351013, "rewards/accuracies": 1.0, "rewards/chosen": -9.661898729973473e-06, "rewards/margins": 0.1881311982870102, "rewards/rejected": -0.1881408542394638, "step": 7168 }, { "epoch": 4.957814661134163, "grad_norm": 10.01386547088623, "learning_rate": 2.8012140771476875e-05, "log_odds_chosen": 10.201593399047852, "log_odds_ratio": -7.670342893106863e-05, "logits/chosen": -0.613256573677063, "logits/rejected": -0.6443556547164917, "logps/chosen": -0.002180723240599036, "logps/rejected": -2.1001086235046387, "loss": 0.8454, "nll_loss": 0.21134643256664276, "rewards/accuracies": 1.0, "rewards/chosen": -0.0002180723095079884, "rewards/margins": 0.20979279279708862, "rewards/rejected": -0.21001088619232178, "step": 7169 }, { "epoch": 4.95850622406639, "grad_norm": 8.180740356445312, "learning_rate": 2.8008298755186724e-05, "log_odds_chosen": 9.803583145141602, "log_odds_ratio": -0.00024330374435521662, "logits/chosen": -0.5474072098731995, "logits/rejected": -0.5633154511451721, "logps/chosen": -0.0019682589918375015, "logps/rejected": -2.279942750930786, "loss": 1.1217, "nll_loss": 0.28041279315948486, "rewards/accuracies": 1.0, "rewards/chosen": -0.00019682593119796365, "rewards/margins": 0.22779744863510132, "rewards/rejected": -0.22799426317214966, "step": 7170 }, { "epoch": 4.959197786998617, "grad_norm": 9.358325958251953, "learning_rate": 2.8004456738896573e-05, "log_odds_chosen": 8.370705604553223, "log_odds_ratio": -0.009722158312797546, "logits/chosen": -0.43345358967781067, "logits/rejected": -0.562606930732727, "logps/chosen": -0.006823183968663216, "logps/rejected": -1.5810699462890625, "loss": 0.9671, "nll_loss": 0.24081367254257202, "rewards/accuracies": 1.0, "rewards/chosen": -0.0006823184085078537, "rewards/margins": 0.1574246734380722, "rewards/rejected": -0.1581069827079773, "step": 7171 }, { "epoch": 4.959889349930844, "grad_norm": 11.307122230529785, "learning_rate": 2.800061472260643e-05, "log_odds_chosen": 11.312531471252441, "log_odds_ratio": -1.7610067516216077e-05, "logits/chosen": -0.4398566484451294, "logits/rejected": -0.49697765707969666, "logps/chosen": -0.00024596997536718845, "logps/rejected": -2.504828691482544, "loss": 0.7498, "nll_loss": 0.18744593858718872, "rewards/accuracies": 1.0, "rewards/chosen": -2.459700044710189e-05, "rewards/margins": 0.250458300113678, "rewards/rejected": -0.2504828870296478, "step": 7172 }, { "epoch": 4.960580912863071, "grad_norm": 11.591588973999023, "learning_rate": 2.7996772706316278e-05, "log_odds_chosen": 11.175124168395996, "log_odds_ratio": -3.3854037610581145e-05, "logits/chosen": -0.2384709119796753, "logits/rejected": -0.3869898319244385, "logps/chosen": -0.0001587655715411529, "logps/rejected": -2.376006603240967, "loss": 1.0801, "nll_loss": 0.2700336277484894, "rewards/accuracies": 1.0, "rewards/chosen": -1.587655788171105e-05, "rewards/margins": 0.23758478462696075, "rewards/rejected": -0.2376006543636322, "step": 7173 }, { "epoch": 4.9612724757952975, "grad_norm": 5.465503215789795, "learning_rate": 2.7992930690026127e-05, "log_odds_chosen": 10.007530212402344, "log_odds_ratio": -8.857608190737665e-05, "logits/chosen": -0.4582262337207794, "logits/rejected": -0.506310224533081, "logps/chosen": -0.000461061776150018, "logps/rejected": -2.222295045852661, "loss": 0.8939, "nll_loss": 0.22345994412899017, "rewards/accuracies": 1.0, "rewards/chosen": -4.610617907019332e-05, "rewards/margins": 0.22218340635299683, "rewards/rejected": -0.2222295105457306, "step": 7174 }, { "epoch": 4.961964038727524, "grad_norm": 6.3152289390563965, "learning_rate": 2.798908867373598e-05, "log_odds_chosen": 9.170822143554688, "log_odds_ratio": -0.00022362380695994943, "logits/chosen": -0.39062464237213135, "logits/rejected": -0.4400513768196106, "logps/chosen": -0.0003000005381181836, "logps/rejected": -1.2168389558792114, "loss": 1.2484, "nll_loss": 0.31207793951034546, "rewards/accuracies": 1.0, "rewards/chosen": -3.0000055630807765e-05, "rewards/margins": 0.12165389209985733, "rewards/rejected": -0.12168390303850174, "step": 7175 }, { "epoch": 4.962655601659751, "grad_norm": 7.906181335449219, "learning_rate": 2.798524665744583e-05, "log_odds_chosen": 10.254146575927734, "log_odds_ratio": -0.00024489304632879794, "logits/chosen": -0.5680465698242188, "logits/rejected": -0.5180726647377014, "logps/chosen": -0.00018432183424010873, "logps/rejected": -1.705075979232788, "loss": 1.3018, "nll_loss": 0.3254315257072449, "rewards/accuracies": 1.0, "rewards/chosen": -1.8432185243000276e-05, "rewards/margins": 0.17048917710781097, "rewards/rejected": -0.17050760984420776, "step": 7176 }, { "epoch": 4.963347164591978, "grad_norm": 7.250458240509033, "learning_rate": 2.7981404641155678e-05, "log_odds_chosen": 9.816474914550781, "log_odds_ratio": -0.000194189342437312, "logits/chosen": -0.23732808232307434, "logits/rejected": -0.27680909633636475, "logps/chosen": -0.00036069515044800937, "logps/rejected": -1.7613012790679932, "loss": 1.253, "nll_loss": 0.31322553753852844, "rewards/accuracies": 1.0, "rewards/chosen": -3.606951213441789e-05, "rewards/margins": 0.17609405517578125, "rewards/rejected": -0.17613013088703156, "step": 7177 }, { "epoch": 4.964038727524205, "grad_norm": 8.668585777282715, "learning_rate": 2.7977562624865533e-05, "log_odds_chosen": 10.292119026184082, "log_odds_ratio": -0.0008847196586430073, "logits/chosen": -0.1949756145477295, "logits/rejected": -0.23017552495002747, "logps/chosen": -0.0005422401009127498, "logps/rejected": -1.8730604648590088, "loss": 0.7748, "nll_loss": 0.19360435009002686, "rewards/accuracies": 1.0, "rewards/chosen": -5.422401227406226e-05, "rewards/margins": 0.18725183606147766, "rewards/rejected": -0.18730604648590088, "step": 7178 }, { "epoch": 4.964730290456432, "grad_norm": 7.899402141571045, "learning_rate": 2.7973720608575382e-05, "log_odds_chosen": 10.159967422485352, "log_odds_ratio": -0.00010139452933799475, "logits/chosen": -0.19844774901866913, "logits/rejected": -0.2780728042125702, "logps/chosen": -0.00024211732670664787, "logps/rejected": -1.8791581392288208, "loss": 0.7403, "nll_loss": 0.1850677728652954, "rewards/accuracies": 1.0, "rewards/chosen": -2.4211733034462668e-05, "rewards/margins": 0.1878916174173355, "rewards/rejected": -0.1879158318042755, "step": 7179 }, { "epoch": 4.9654218533886585, "grad_norm": 8.387503623962402, "learning_rate": 2.796987859228523e-05, "log_odds_chosen": 9.664134979248047, "log_odds_ratio": -0.00022844914929009974, "logits/chosen": -0.39181891083717346, "logits/rejected": -0.3865174949169159, "logps/chosen": -0.00030988542130216956, "logps/rejected": -1.6006946563720703, "loss": 0.8644, "nll_loss": 0.21606658399105072, "rewards/accuracies": 1.0, "rewards/chosen": -3.098854358540848e-05, "rewards/margins": 0.16003848612308502, "rewards/rejected": -0.16006948053836823, "step": 7180 }, { "epoch": 4.966113416320885, "grad_norm": 8.570829391479492, "learning_rate": 2.7966036575995087e-05, "log_odds_chosen": 8.541007995605469, "log_odds_ratio": -0.0035613575018942356, "logits/chosen": -0.13719545304775238, "logits/rejected": -0.14414338767528534, "logps/chosen": -0.004820042755454779, "logps/rejected": -1.8059543371200562, "loss": 1.2728, "nll_loss": 0.3178354799747467, "rewards/accuracies": 1.0, "rewards/chosen": -0.00048200428136624396, "rewards/margins": 0.18011343479156494, "rewards/rejected": -0.18059545755386353, "step": 7181 }, { "epoch": 4.966804979253112, "grad_norm": 10.654635429382324, "learning_rate": 2.7962194559704936e-05, "log_odds_chosen": 10.080081939697266, "log_odds_ratio": -0.00025363650638610125, "logits/chosen": 0.14342381060123444, "logits/rejected": 0.042509760707616806, "logps/chosen": -0.0006070620147511363, "logps/rejected": -1.8312735557556152, "loss": 0.6985, "nll_loss": 0.1746012568473816, "rewards/accuracies": 1.0, "rewards/chosen": -6.070620656828396e-05, "rewards/margins": 0.18306663632392883, "rewards/rejected": -0.18312734365463257, "step": 7182 }, { "epoch": 4.967496542185339, "grad_norm": 8.130351066589355, "learning_rate": 2.7958352543414785e-05, "log_odds_chosen": 10.405618667602539, "log_odds_ratio": -5.621470700134523e-05, "logits/chosen": -0.11633479595184326, "logits/rejected": -0.1447855681180954, "logps/chosen": -0.002370691392570734, "logps/rejected": -1.6240513324737549, "loss": 1.2587, "nll_loss": 0.3146775960922241, "rewards/accuracies": 1.0, "rewards/chosen": -0.00023706913634669036, "rewards/margins": 0.16216805577278137, "rewards/rejected": -0.1624051332473755, "step": 7183 }, { "epoch": 4.968188105117566, "grad_norm": 8.688759803771973, "learning_rate": 2.7954510527124638e-05, "log_odds_chosen": 10.065336227416992, "log_odds_ratio": -0.00010105091496370733, "logits/chosen": -0.4148634374141693, "logits/rejected": -0.4110666513442993, "logps/chosen": -0.00040934234857559204, "logps/rejected": -1.5639256238937378, "loss": 0.7909, "nll_loss": 0.1977076232433319, "rewards/accuracies": 1.0, "rewards/chosen": -4.0934235585154966e-05, "rewards/margins": 0.15635162591934204, "rewards/rejected": -0.15639255940914154, "step": 7184 }, { "epoch": 4.968879668049793, "grad_norm": 7.783679485321045, "learning_rate": 2.7950668510834487e-05, "log_odds_chosen": 10.572954177856445, "log_odds_ratio": -5.103446892462671e-05, "logits/chosen": -0.26260286569595337, "logits/rejected": -0.28437915444374084, "logps/chosen": -0.0011634384281933308, "logps/rejected": -2.1006100177764893, "loss": 0.8019, "nll_loss": 0.20047153532505035, "rewards/accuracies": 1.0, "rewards/chosen": -0.00011634385009529069, "rewards/margins": 0.20994466543197632, "rewards/rejected": -0.21006101369857788, "step": 7185 }, { "epoch": 4.9695712309820195, "grad_norm": 9.223774909973145, "learning_rate": 2.7946826494544336e-05, "log_odds_chosen": 10.326700210571289, "log_odds_ratio": -0.00012215416063554585, "logits/chosen": -0.22556382417678833, "logits/rejected": -0.2984452545642853, "logps/chosen": -0.00024488597409799695, "logps/rejected": -2.0094027519226074, "loss": 0.6946, "nll_loss": 0.17364706099033356, "rewards/accuracies": 1.0, "rewards/chosen": -2.4488595954608172e-05, "rewards/margins": 0.20091575384140015, "rewards/rejected": -0.20094025135040283, "step": 7186 }, { "epoch": 4.970262793914246, "grad_norm": 11.973540306091309, "learning_rate": 2.7942984478254192e-05, "log_odds_chosen": 10.546344757080078, "log_odds_ratio": -0.00012341087858658284, "logits/chosen": -0.5267561674118042, "logits/rejected": -0.5304883718490601, "logps/chosen": -0.0005036446964368224, "logps/rejected": -2.3137152194976807, "loss": 1.2295, "nll_loss": 0.3073546886444092, "rewards/accuracies": 1.0, "rewards/chosen": -5.036446964368224e-05, "rewards/margins": 0.23132115602493286, "rewards/rejected": -0.23137152194976807, "step": 7187 }, { "epoch": 4.970954356846473, "grad_norm": 11.056028366088867, "learning_rate": 2.793914246196404e-05, "log_odds_chosen": 10.640401840209961, "log_odds_ratio": -3.979918619734235e-05, "logits/chosen": -0.5876413583755493, "logits/rejected": -0.5758023858070374, "logps/chosen": -0.00012887921184301376, "logps/rejected": -1.7237038612365723, "loss": 0.9218, "nll_loss": 0.23043809831142426, "rewards/accuracies": 1.0, "rewards/chosen": -1.2887921911897138e-05, "rewards/margins": 0.1723574995994568, "rewards/rejected": -0.17237038910388947, "step": 7188 }, { "epoch": 4.9716459197787, "grad_norm": 5.960145950317383, "learning_rate": 2.793530044567389e-05, "log_odds_chosen": 11.051551818847656, "log_odds_ratio": -2.116498217219487e-05, "logits/chosen": -0.35925549268722534, "logits/rejected": -0.4869048595428467, "logps/chosen": -0.00010636688966769725, "logps/rejected": -1.9271949529647827, "loss": 0.6382, "nll_loss": 0.1595507115125656, "rewards/accuracies": 1.0, "rewards/chosen": -1.0636688784870785e-05, "rewards/margins": 0.1927088499069214, "rewards/rejected": -0.1927194893360138, "step": 7189 }, { "epoch": 4.972337482710927, "grad_norm": 8.614933013916016, "learning_rate": 2.7931458429383746e-05, "log_odds_chosen": 11.913887977600098, "log_odds_ratio": -8.357697879546322e-06, "logits/chosen": -0.6220681071281433, "logits/rejected": -0.646041214466095, "logps/chosen": -9.512872929917648e-05, "logps/rejected": -2.6356735229492188, "loss": 0.9438, "nll_loss": 0.23593756556510925, "rewards/accuracies": 1.0, "rewards/chosen": -9.512872566119768e-06, "rewards/margins": 0.2635578513145447, "rewards/rejected": -0.26356735825538635, "step": 7190 }, { "epoch": 4.973029045643154, "grad_norm": 12.957245826721191, "learning_rate": 2.7927616413093595e-05, "log_odds_chosen": 11.058793067932129, "log_odds_ratio": -0.00019432292901910841, "logits/chosen": -0.27864354848861694, "logits/rejected": -0.3478084206581116, "logps/chosen": -0.0017332588322460651, "logps/rejected": -2.526738405227661, "loss": 0.5722, "nll_loss": 0.1430254876613617, "rewards/accuracies": 1.0, "rewards/chosen": -0.00017332589777652174, "rewards/margins": 0.2525005340576172, "rewards/rejected": -0.252673864364624, "step": 7191 }, { "epoch": 4.9737206085753805, "grad_norm": 16.11777687072754, "learning_rate": 2.7923774396803444e-05, "log_odds_chosen": 10.152289390563965, "log_odds_ratio": -0.00022718863328918815, "logits/chosen": -0.40325024724006653, "logits/rejected": -0.4796661138534546, "logps/chosen": -0.003221320454031229, "logps/rejected": -2.233243942260742, "loss": 0.9004, "nll_loss": 0.2250874638557434, "rewards/accuracies": 1.0, "rewards/chosen": -0.000322132051223889, "rewards/margins": 0.22300225496292114, "rewards/rejected": -0.22332441806793213, "step": 7192 }, { "epoch": 4.974412171507607, "grad_norm": 9.064397811889648, "learning_rate": 2.7919932380513296e-05, "log_odds_chosen": 10.586275100708008, "log_odds_ratio": -5.1827355491695926e-05, "logits/chosen": -0.28463008999824524, "logits/rejected": -0.487646222114563, "logps/chosen": -0.0004171731125097722, "logps/rejected": -2.5234835147857666, "loss": 0.8846, "nll_loss": 0.2211383581161499, "rewards/accuracies": 1.0, "rewards/chosen": -4.171731052338146e-05, "rewards/margins": 0.25230664014816284, "rewards/rejected": -0.2523483335971832, "step": 7193 }, { "epoch": 4.975103734439834, "grad_norm": 12.817768096923828, "learning_rate": 2.7916090364223145e-05, "log_odds_chosen": 10.754351615905762, "log_odds_ratio": -4.719572461908683e-05, "logits/chosen": -0.7275233864784241, "logits/rejected": -0.7562810778617859, "logps/chosen": -0.00023445190163329244, "logps/rejected": -2.0250232219696045, "loss": 0.5522, "nll_loss": 0.13805551826953888, "rewards/accuracies": 1.0, "rewards/chosen": -2.3445190890925005e-05, "rewards/margins": 0.20247888565063477, "rewards/rejected": -0.2025023102760315, "step": 7194 }, { "epoch": 4.975795297372061, "grad_norm": 12.647860527038574, "learning_rate": 2.7912248347932994e-05, "log_odds_chosen": 7.500641822814941, "log_odds_ratio": -0.31233054399490356, "logits/chosen": -0.5068243741989136, "logits/rejected": -0.5850129127502441, "logps/chosen": -0.19046106934547424, "logps/rejected": -1.045350432395935, "loss": 1.1691, "nll_loss": 0.26104840636253357, "rewards/accuracies": 0.875, "rewards/chosen": -0.019046107307076454, "rewards/margins": 0.0854889377951622, "rewards/rejected": -0.1045350506901741, "step": 7195 }, { "epoch": 4.976486860304288, "grad_norm": 11.317513465881348, "learning_rate": 2.790840633164285e-05, "log_odds_chosen": 10.024337768554688, "log_odds_ratio": -0.0001522502425359562, "logits/chosen": -0.4047994613647461, "logits/rejected": -0.49443766474723816, "logps/chosen": -0.00027306549600325525, "logps/rejected": -1.7729493379592896, "loss": 0.6732, "nll_loss": 0.16828899085521698, "rewards/accuracies": 1.0, "rewards/chosen": -2.7306548872729763e-05, "rewards/margins": 0.1772676259279251, "rewards/rejected": -0.17729492485523224, "step": 7196 }, { "epoch": 4.977178423236515, "grad_norm": 12.056559562683105, "learning_rate": 2.79045643153527e-05, "log_odds_chosen": 9.828668594360352, "log_odds_ratio": -0.00015435523528140038, "logits/chosen": -0.42752793431282043, "logits/rejected": -0.5025291442871094, "logps/chosen": -0.0008905132999643683, "logps/rejected": -1.8689935207366943, "loss": 0.9808, "nll_loss": 0.24518075585365295, "rewards/accuracies": 1.0, "rewards/chosen": -8.905133290681988e-05, "rewards/margins": 0.18681031465530396, "rewards/rejected": -0.1868993490934372, "step": 7197 }, { "epoch": 4.977869986168741, "grad_norm": 7.273468017578125, "learning_rate": 2.790072229906255e-05, "log_odds_chosen": 10.339853286743164, "log_odds_ratio": -0.0003497989382594824, "logits/chosen": -0.5535563230514526, "logits/rejected": -0.6326808333396912, "logps/chosen": -0.0003252012247685343, "logps/rejected": -2.080918788909912, "loss": 0.9554, "nll_loss": 0.2388230413198471, "rewards/accuracies": 1.0, "rewards/chosen": -3.252012174925767e-05, "rewards/margins": 0.2080593854188919, "rewards/rejected": -0.20809191465377808, "step": 7198 }, { "epoch": 4.978561549100968, "grad_norm": 7.87250280380249, "learning_rate": 2.7896880282772404e-05, "log_odds_chosen": 9.142486572265625, "log_odds_ratio": -0.0016601777169853449, "logits/chosen": -0.298819899559021, "logits/rejected": -0.3006221055984497, "logps/chosen": -0.00171962333843112, "logps/rejected": -1.6306955814361572, "loss": 1.2686, "nll_loss": 0.31698286533355713, "rewards/accuracies": 1.0, "rewards/chosen": -0.0001719623542157933, "rewards/margins": 0.16289760172367096, "rewards/rejected": -0.16306956112384796, "step": 7199 }, { "epoch": 4.979253112033195, "grad_norm": 7.517648220062256, "learning_rate": 2.7893038266482253e-05, "log_odds_chosen": 9.140575408935547, "log_odds_ratio": -0.00042438553646206856, "logits/chosen": -0.47512996196746826, "logits/rejected": -0.43797892332077026, "logps/chosen": -0.0017509328899905086, "logps/rejected": -1.7444941997528076, "loss": 0.7559, "nll_loss": 0.18893952667713165, "rewards/accuracies": 1.0, "rewards/chosen": -0.00017509330064058304, "rewards/margins": 0.17427432537078857, "rewards/rejected": -0.17444941401481628, "step": 7200 }, { "epoch": 4.979944674965422, "grad_norm": 14.481325149536133, "learning_rate": 2.7889196250192102e-05, "log_odds_chosen": 11.609492301940918, "log_odds_ratio": -2.4778462829999626e-05, "logits/chosen": -0.6213642358779907, "logits/rejected": -0.6164488196372986, "logps/chosen": -0.00010396288416814059, "logps/rejected": -2.2234671115875244, "loss": 0.9045, "nll_loss": 0.22612226009368896, "rewards/accuracies": 1.0, "rewards/chosen": -1.0396288416814059e-05, "rewards/margins": 0.2223363071680069, "rewards/rejected": -0.2223467081785202, "step": 7201 }, { "epoch": 4.980636237897649, "grad_norm": 10.874162673950195, "learning_rate": 2.7885354233901955e-05, "log_odds_chosen": 9.768011093139648, "log_odds_ratio": -0.0003575875307433307, "logits/chosen": -0.9017777442932129, "logits/rejected": -0.8507659435272217, "logps/chosen": -0.0015366828301921487, "logps/rejected": -1.9805184602737427, "loss": 1.0513, "nll_loss": 0.26278311014175415, "rewards/accuracies": 1.0, "rewards/chosen": -0.00015366828301921487, "rewards/margins": 0.19789819419384003, "rewards/rejected": -0.1980518400669098, "step": 7202 }, { "epoch": 4.981327800829876, "grad_norm": 13.416150093078613, "learning_rate": 2.7881512217611804e-05, "log_odds_chosen": 10.33548355102539, "log_odds_ratio": -8.06988391559571e-05, "logits/chosen": -0.5759831070899963, "logits/rejected": -0.6761252284049988, "logps/chosen": -0.000314599514240399, "logps/rejected": -2.159302234649658, "loss": 1.2363, "nll_loss": 0.3090607225894928, "rewards/accuracies": 1.0, "rewards/chosen": -3.145995287923142e-05, "rewards/margins": 0.21589875221252441, "rewards/rejected": -0.21593022346496582, "step": 7203 }, { "epoch": 4.982019363762102, "grad_norm": 9.852493286132812, "learning_rate": 2.7877670201321653e-05, "log_odds_chosen": 11.337201118469238, "log_odds_ratio": -2.104753366438672e-05, "logits/chosen": -0.7978764772415161, "logits/rejected": -0.9169949889183044, "logps/chosen": -0.0002170755760744214, "logps/rejected": -2.6007652282714844, "loss": 0.8359, "nll_loss": 0.20897330343723297, "rewards/accuracies": 1.0, "rewards/chosen": -2.170755760744214e-05, "rewards/margins": 0.2600547671318054, "rewards/rejected": -0.26007649302482605, "step": 7204 }, { "epoch": 4.982710926694329, "grad_norm": 6.295581340789795, "learning_rate": 2.787382818503151e-05, "log_odds_chosen": 10.652413368225098, "log_odds_ratio": -4.092457311344333e-05, "logits/chosen": -0.35074669122695923, "logits/rejected": -0.4243549406528473, "logps/chosen": -0.0004147972504142672, "logps/rejected": -2.2696712017059326, "loss": 1.1565, "nll_loss": 0.28911954164505005, "rewards/accuracies": 1.0, "rewards/chosen": -4.147972504142672e-05, "rewards/margins": 0.22692564129829407, "rewards/rejected": -0.22696711122989655, "step": 7205 }, { "epoch": 4.983402489626556, "grad_norm": 6.3069329261779785, "learning_rate": 2.7869986168741358e-05, "log_odds_chosen": 10.849081993103027, "log_odds_ratio": -6.578413012903184e-05, "logits/chosen": -0.30424827337265015, "logits/rejected": -0.5037944912910461, "logps/chosen": -0.0002677696757018566, "logps/rejected": -2.5074574947357178, "loss": 1.0339, "nll_loss": 0.2584582567214966, "rewards/accuracies": 1.0, "rewards/chosen": -2.6776968297781423e-05, "rewards/margins": 0.25071901082992554, "rewards/rejected": -0.2507457733154297, "step": 7206 }, { "epoch": 4.984094052558783, "grad_norm": 7.28257942199707, "learning_rate": 2.7866144152451207e-05, "log_odds_chosen": 10.359865188598633, "log_odds_ratio": -8.066420559771359e-05, "logits/chosen": -0.40999922156333923, "logits/rejected": -0.4879832863807678, "logps/chosen": -0.0004006815142929554, "logps/rejected": -2.262136220932007, "loss": 0.694, "nll_loss": 0.17349769175052643, "rewards/accuracies": 1.0, "rewards/chosen": -4.006815288448706e-05, "rewards/margins": 0.22617356479167938, "rewards/rejected": -0.22621361911296844, "step": 7207 }, { "epoch": 4.98478561549101, "grad_norm": 15.563834190368652, "learning_rate": 2.7862302136161056e-05, "log_odds_chosen": 8.274723052978516, "log_odds_ratio": -0.7375338077545166, "logits/chosen": 0.01283140480518341, "logits/rejected": -0.041081175208091736, "logps/chosen": -0.1275419443845749, "logps/rejected": -2.047375202178955, "loss": 1.3067, "nll_loss": 0.25292617082595825, "rewards/accuracies": 0.875, "rewards/chosen": -0.012754194438457489, "rewards/margins": 0.19198331236839294, "rewards/rejected": -0.20473751425743103, "step": 7208 }, { "epoch": 4.985477178423237, "grad_norm": 13.405542373657227, "learning_rate": 2.785846011987091e-05, "log_odds_chosen": 10.846002578735352, "log_odds_ratio": -4.18073614127934e-05, "logits/chosen": -0.6093755960464478, "logits/rejected": -0.7273604273796082, "logps/chosen": -0.00028661603573709726, "logps/rejected": -2.459135055541992, "loss": 0.7619, "nll_loss": 0.1904742419719696, "rewards/accuracies": 1.0, "rewards/chosen": -2.8661603209911846e-05, "rewards/margins": 0.24588486552238464, "rewards/rejected": -0.2459135204553604, "step": 7209 }, { "epoch": 4.986168741355463, "grad_norm": 6.826845169067383, "learning_rate": 2.785461810358076e-05, "log_odds_chosen": 9.772747993469238, "log_odds_ratio": -0.0009639077470637858, "logits/chosen": -0.49732786417007446, "logits/rejected": -0.5413493514060974, "logps/chosen": -0.001487374771386385, "logps/rejected": -1.667379379272461, "loss": 0.4762, "nll_loss": 0.11894873529672623, "rewards/accuracies": 1.0, "rewards/chosen": -0.0001487374829594046, "rewards/margins": 0.16658920049667358, "rewards/rejected": -0.16673794388771057, "step": 7210 }, { "epoch": 4.98686030428769, "grad_norm": 6.9307122230529785, "learning_rate": 2.785077608729061e-05, "log_odds_chosen": 9.978912353515625, "log_odds_ratio": -0.00010862557246582583, "logits/chosen": -0.4285060167312622, "logits/rejected": -0.4519844651222229, "logps/chosen": -0.000182077128556557, "logps/rejected": -1.3714263439178467, "loss": 0.7845, "nll_loss": 0.19610899686813354, "rewards/accuracies": 1.0, "rewards/chosen": -1.8207714674645104e-05, "rewards/margins": 0.1371244341135025, "rewards/rejected": -0.1371426284313202, "step": 7211 }, { "epoch": 4.987551867219917, "grad_norm": 10.555225372314453, "learning_rate": 2.7846934071000462e-05, "log_odds_chosen": 10.284852027893066, "log_odds_ratio": -0.04881151393055916, "logits/chosen": -0.6675065159797668, "logits/rejected": -0.7506628036499023, "logps/chosen": -0.010504978708922863, "logps/rejected": -2.603940010070801, "loss": 1.0814, "nll_loss": 0.26547157764434814, "rewards/accuracies": 1.0, "rewards/chosen": -0.0010504978708922863, "rewards/margins": 0.2593434751033783, "rewards/rejected": -0.26039397716522217, "step": 7212 }, { "epoch": 4.988243430152144, "grad_norm": 8.400644302368164, "learning_rate": 2.784309205471031e-05, "log_odds_chosen": 10.498527526855469, "log_odds_ratio": -5.62083805561997e-05, "logits/chosen": -0.379863977432251, "logits/rejected": -0.45190173387527466, "logps/chosen": -0.0003514225536491722, "logps/rejected": -2.075934886932373, "loss": 1.04, "nll_loss": 0.25999289751052856, "rewards/accuracies": 1.0, "rewards/chosen": -3.5142253182129934e-05, "rewards/margins": 0.20755833387374878, "rewards/rejected": -0.20759347081184387, "step": 7213 }, { "epoch": 4.988934993084371, "grad_norm": 7.191450595855713, "learning_rate": 2.783925003842016e-05, "log_odds_chosen": 10.816597938537598, "log_odds_ratio": -3.74543851648923e-05, "logits/chosen": -0.23765279352664948, "logits/rejected": -0.39486026763916016, "logps/chosen": -0.0001370952813886106, "logps/rejected": -1.9729348421096802, "loss": 0.8655, "nll_loss": 0.2163611650466919, "rewards/accuracies": 1.0, "rewards/chosen": -1.3709528502658941e-05, "rewards/margins": 0.19727978110313416, "rewards/rejected": -0.1972934901714325, "step": 7214 }, { "epoch": 4.9896265560165975, "grad_norm": 6.2734246253967285, "learning_rate": 2.7835408022130016e-05, "log_odds_chosen": 8.643949508666992, "log_odds_ratio": -0.0012646322138607502, "logits/chosen": -0.25888389348983765, "logits/rejected": -0.31428784132003784, "logps/chosen": -0.003130083903670311, "logps/rejected": -2.032175064086914, "loss": 0.6893, "nll_loss": 0.17218798398971558, "rewards/accuracies": 1.0, "rewards/chosen": -0.00031300840782932937, "rewards/margins": 0.20290449261665344, "rewards/rejected": -0.2032174915075302, "step": 7215 }, { "epoch": 4.990318118948824, "grad_norm": 6.024848461151123, "learning_rate": 2.7831566005839865e-05, "log_odds_chosen": 10.480557441711426, "log_odds_ratio": -0.00015379002434201539, "logits/chosen": -0.3707646131515503, "logits/rejected": -0.5105197429656982, "logps/chosen": -0.010387999936938286, "logps/rejected": -2.8377151489257812, "loss": 0.935, "nll_loss": 0.2337241768836975, "rewards/accuracies": 1.0, "rewards/chosen": -0.0010387999936938286, "rewards/margins": 0.2827327251434326, "rewards/rejected": -0.2837715148925781, "step": 7216 }, { "epoch": 4.991009681881051, "grad_norm": 5.96161413192749, "learning_rate": 2.7827723989549714e-05, "log_odds_chosen": 8.475927352905273, "log_odds_ratio": -0.01788167841732502, "logits/chosen": -0.4176740050315857, "logits/rejected": -0.5164976119995117, "logps/chosen": -0.005372497718781233, "logps/rejected": -1.046644687652588, "loss": 0.6608, "nll_loss": 0.16340121626853943, "rewards/accuracies": 1.0, "rewards/chosen": -0.0005372497835196555, "rewards/margins": 0.10412721335887909, "rewards/rejected": -0.10466445982456207, "step": 7217 }, { "epoch": 4.991701244813278, "grad_norm": 10.724852561950684, "learning_rate": 2.782388197325957e-05, "log_odds_chosen": 10.809329986572266, "log_odds_ratio": -3.2888256100704893e-05, "logits/chosen": -0.26822346448898315, "logits/rejected": -0.33375710248947144, "logps/chosen": -0.00012892685481347144, "logps/rejected": -1.8372243642807007, "loss": 0.6983, "nll_loss": 0.17457273602485657, "rewards/accuracies": 1.0, "rewards/chosen": -1.2892686754639726e-05, "rewards/margins": 0.1837095469236374, "rewards/rejected": -0.18372243642807007, "step": 7218 }, { "epoch": 4.992392807745505, "grad_norm": 15.549614906311035, "learning_rate": 2.782003995696942e-05, "log_odds_chosen": 9.113948822021484, "log_odds_ratio": -0.008212381973862648, "logits/chosen": -0.5484795570373535, "logits/rejected": -0.6041221618652344, "logps/chosen": -0.03184864670038223, "logps/rejected": -1.857212781906128, "loss": 0.8248, "nll_loss": 0.20537768304347992, "rewards/accuracies": 1.0, "rewards/chosen": -0.0031848649960011244, "rewards/margins": 0.18253639340400696, "rewards/rejected": -0.1857212781906128, "step": 7219 }, { "epoch": 4.993084370677732, "grad_norm": 9.258271217346191, "learning_rate": 2.7816197940679268e-05, "log_odds_chosen": 10.35129451751709, "log_odds_ratio": -0.0004313408280722797, "logits/chosen": -0.5563446879386902, "logits/rejected": -0.4806095361709595, "logps/chosen": -0.012187390588223934, "logps/rejected": -2.4066896438598633, "loss": 0.9017, "nll_loss": 0.22538450360298157, "rewards/accuracies": 1.0, "rewards/chosen": -0.0012187390821054578, "rewards/margins": 0.23945021629333496, "rewards/rejected": -0.24066895246505737, "step": 7220 }, { "epoch": 4.9937759336099585, "grad_norm": 8.295549392700195, "learning_rate": 2.781235592438912e-05, "log_odds_chosen": 10.136941909790039, "log_odds_ratio": -0.00016842293553054333, "logits/chosen": -0.7361425161361694, "logits/rejected": -0.7589133381843567, "logps/chosen": -0.000340710103046149, "logps/rejected": -1.756840467453003, "loss": 0.8442, "nll_loss": 0.21102437376976013, "rewards/accuracies": 1.0, "rewards/chosen": -3.407101394259371e-05, "rewards/margins": 0.1756500005722046, "rewards/rejected": -0.17568406462669373, "step": 7221 }, { "epoch": 4.994467496542185, "grad_norm": 7.772411346435547, "learning_rate": 2.780851390809897e-05, "log_odds_chosen": 8.843536376953125, "log_odds_ratio": -0.002176887821406126, "logits/chosen": -0.32384103536605835, "logits/rejected": -0.4336056709289551, "logps/chosen": -0.0019777941051870584, "logps/rejected": -1.5082619190216064, "loss": 1.6187, "nll_loss": 0.4044612646102905, "rewards/accuracies": 1.0, "rewards/chosen": -0.00019777943089138716, "rewards/margins": 0.15062838792800903, "rewards/rejected": -0.15082618594169617, "step": 7222 }, { "epoch": 4.995159059474412, "grad_norm": 5.236146926879883, "learning_rate": 2.780467189180882e-05, "log_odds_chosen": 8.80754280090332, "log_odds_ratio": -0.014589495956897736, "logits/chosen": -0.2995303273200989, "logits/rejected": -0.38191771507263184, "logps/chosen": -0.00858377292752266, "logps/rejected": -1.7309571504592896, "loss": 1.0827, "nll_loss": 0.26920682191848755, "rewards/accuracies": 1.0, "rewards/chosen": -0.0008583773742429912, "rewards/margins": 0.1722373366355896, "rewards/rejected": -0.1730957180261612, "step": 7223 }, { "epoch": 4.995850622406639, "grad_norm": 7.186947822570801, "learning_rate": 2.7800829875518675e-05, "log_odds_chosen": 7.228179454803467, "log_odds_ratio": -0.026927338913083076, "logits/chosen": -0.7289636135101318, "logits/rejected": -0.668282151222229, "logps/chosen": -0.07573521882295609, "logps/rejected": -1.5513174533843994, "loss": 0.9258, "nll_loss": 0.2287617325782776, "rewards/accuracies": 1.0, "rewards/chosen": -0.007573522161692381, "rewards/margins": 0.14755821228027344, "rewards/rejected": -0.1551317423582077, "step": 7224 }, { "epoch": 4.996542185338866, "grad_norm": 10.229729652404785, "learning_rate": 2.7796987859228524e-05, "log_odds_chosen": 9.654919624328613, "log_odds_ratio": -0.0002468491729814559, "logits/chosen": -0.253642201423645, "logits/rejected": -0.3441639840602875, "logps/chosen": -0.0013339454308152199, "logps/rejected": -2.110731840133667, "loss": 0.7524, "nll_loss": 0.1880699098110199, "rewards/accuracies": 1.0, "rewards/chosen": -0.00013339455472305417, "rewards/margins": 0.21093979477882385, "rewards/rejected": -0.21107318997383118, "step": 7225 }, { "epoch": 4.997233748271093, "grad_norm": 7.260266304016113, "learning_rate": 2.7793145842938373e-05, "log_odds_chosen": 9.03848934173584, "log_odds_ratio": -0.0012857945403084159, "logits/chosen": -0.6567116975784302, "logits/rejected": -0.632990837097168, "logps/chosen": -0.0010413994314149022, "logps/rejected": -1.5133137702941895, "loss": 0.9957, "nll_loss": 0.24879108369350433, "rewards/accuracies": 1.0, "rewards/chosen": -0.00010413994459668174, "rewards/margins": 0.15122725069522858, "rewards/rejected": -0.15133139491081238, "step": 7226 }, { "epoch": 4.9979253112033195, "grad_norm": 5.915309429168701, "learning_rate": 2.778930382664823e-05, "log_odds_chosen": 10.508499145507812, "log_odds_ratio": -4.196947702439502e-05, "logits/chosen": -0.5167663097381592, "logits/rejected": -0.6766731142997742, "logps/chosen": -0.002997783711180091, "logps/rejected": -3.167526960372925, "loss": 0.9541, "nll_loss": 0.23851193487644196, "rewards/accuracies": 1.0, "rewards/chosen": -0.0002997783594764769, "rewards/margins": 0.31645292043685913, "rewards/rejected": -0.31675270199775696, "step": 7227 }, { "epoch": 4.998616874135546, "grad_norm": 9.131293296813965, "learning_rate": 2.7785461810358078e-05, "log_odds_chosen": 9.421924591064453, "log_odds_ratio": -0.00113877619151026, "logits/chosen": -0.8295114040374756, "logits/rejected": -0.8581159710884094, "logps/chosen": -0.0010786966886371374, "logps/rejected": -1.5355415344238281, "loss": 1.6171, "nll_loss": 0.404154896736145, "rewards/accuracies": 1.0, "rewards/chosen": -0.0001078696659533307, "rewards/margins": 0.1534462869167328, "rewards/rejected": -0.15355415642261505, "step": 7228 }, { "epoch": 4.999308437067773, "grad_norm": 9.949240684509277, "learning_rate": 2.7781619794067927e-05, "log_odds_chosen": 9.65066146850586, "log_odds_ratio": -0.0008199802250601351, "logits/chosen": -0.8208500742912292, "logits/rejected": -0.8325585126876831, "logps/chosen": -0.004914685618132353, "logps/rejected": -2.468127489089966, "loss": 0.8896, "nll_loss": 0.2223082333803177, "rewards/accuracies": 1.0, "rewards/chosen": -0.0004914685850962996, "rewards/margins": 0.24632127583026886, "rewards/rejected": -0.24681273102760315, "step": 7229 }, { "epoch": 5.0, "grad_norm": 7.1928815841674805, "learning_rate": 2.777777777777778e-05, "log_odds_chosen": 10.277364730834961, "log_odds_ratio": -0.0025406470522284508, "logits/chosen": -0.13449889421463013, "logits/rejected": -0.21222570538520813, "logps/chosen": -0.0019070475827902555, "logps/rejected": -2.5370426177978516, "loss": 1.0403, "nll_loss": 0.25981298089027405, "rewards/accuracies": 1.0, "rewards/chosen": -0.00019070478447247297, "rewards/margins": 0.2535135746002197, "rewards/rejected": -0.2537042796611786, "step": 7230 }, { "epoch": 5.000691562932227, "grad_norm": 7.312842845916748, "learning_rate": 2.7773935761487628e-05, "log_odds_chosen": 11.099580764770508, "log_odds_ratio": -5.0948812713613734e-05, "logits/chosen": -0.7584883570671082, "logits/rejected": -0.7560362815856934, "logps/chosen": -0.0005695630679838359, "logps/rejected": -2.5925183296203613, "loss": 0.9966, "nll_loss": 0.24914821982383728, "rewards/accuracies": 1.0, "rewards/chosen": -5.695630898117088e-05, "rewards/margins": 0.25919491052627563, "rewards/rejected": -0.25925183296203613, "step": 7231 }, { "epoch": 5.001383125864454, "grad_norm": 6.061283111572266, "learning_rate": 2.7770093745197477e-05, "log_odds_chosen": 11.408888816833496, "log_odds_ratio": -4.97270593768917e-05, "logits/chosen": -0.45496147871017456, "logits/rejected": -0.5137842893600464, "logps/chosen": -0.00019665747822728008, "logps/rejected": -2.7457075119018555, "loss": 0.6718, "nll_loss": 0.1679365336894989, "rewards/accuracies": 1.0, "rewards/chosen": -1.966574927791953e-05, "rewards/margins": 0.2745510935783386, "rewards/rejected": -0.2745707631111145, "step": 7232 }, { "epoch": 5.0020746887966805, "grad_norm": 5.719961166381836, "learning_rate": 2.7766251728907333e-05, "log_odds_chosen": 10.571435928344727, "log_odds_ratio": -8.010861347429454e-05, "logits/chosen": -0.6030963659286499, "logits/rejected": -0.5233713388442993, "logps/chosen": -0.0022244458086788654, "logps/rejected": -1.9987239837646484, "loss": 0.6556, "nll_loss": 0.16389495134353638, "rewards/accuracies": 1.0, "rewards/chosen": -0.0002224445779575035, "rewards/margins": 0.19964995980262756, "rewards/rejected": -0.19987240433692932, "step": 7233 }, { "epoch": 5.002766251728907, "grad_norm": 12.926962852478027, "learning_rate": 2.7762409712617182e-05, "log_odds_chosen": 10.400911331176758, "log_odds_ratio": -0.0005559362471103668, "logits/chosen": -0.11768453568220139, "logits/rejected": -0.13520941138267517, "logps/chosen": -0.0016663175774738193, "logps/rejected": -1.829831838607788, "loss": 0.9972, "nll_loss": 0.24924415349960327, "rewards/accuracies": 1.0, "rewards/chosen": -0.00016663174028508365, "rewards/margins": 0.1828165352344513, "rewards/rejected": -0.1829831600189209, "step": 7234 }, { "epoch": 5.003457814661134, "grad_norm": 5.983175754547119, "learning_rate": 2.775856769632703e-05, "log_odds_chosen": 10.03680419921875, "log_odds_ratio": -0.0002549725177232176, "logits/chosen": -0.49738869071006775, "logits/rejected": -0.5541632771492004, "logps/chosen": -0.0003511386748868972, "logps/rejected": -1.8507411479949951, "loss": 0.4813, "nll_loss": 0.12028904259204865, "rewards/accuracies": 1.0, "rewards/chosen": -3.511387330945581e-05, "rewards/margins": 0.1850389987230301, "rewards/rejected": -0.185074120759964, "step": 7235 }, { "epoch": 5.004149377593361, "grad_norm": 6.617901802062988, "learning_rate": 2.7754725680036887e-05, "log_odds_chosen": 10.194670677185059, "log_odds_ratio": -5.910087929805741e-05, "logits/chosen": -0.5459590554237366, "logits/rejected": -0.5887160897254944, "logps/chosen": -0.0002461395342834294, "logps/rejected": -1.8155051469802856, "loss": 0.5684, "nll_loss": 0.14209213852882385, "rewards/accuracies": 1.0, "rewards/chosen": -2.4613957066321746e-05, "rewards/margins": 0.18152591586112976, "rewards/rejected": -0.1815505027770996, "step": 7236 }, { "epoch": 5.004840940525588, "grad_norm": 6.73017692565918, "learning_rate": 2.7750883663746736e-05, "log_odds_chosen": 9.60072135925293, "log_odds_ratio": -0.0004344746412243694, "logits/chosen": -0.5849573612213135, "logits/rejected": -0.6097676753997803, "logps/chosen": -0.0005806325352750719, "logps/rejected": -1.6669625043869019, "loss": 0.7656, "nll_loss": 0.1913515031337738, "rewards/accuracies": 1.0, "rewards/chosen": -5.8063258620677516e-05, "rewards/margins": 0.16663819551467896, "rewards/rejected": -0.16669625043869019, "step": 7237 }, { "epoch": 5.005532503457815, "grad_norm": 11.552806854248047, "learning_rate": 2.7747041647456585e-05, "log_odds_chosen": 11.124504089355469, "log_odds_ratio": -6.782137643313035e-05, "logits/chosen": -0.3950381577014923, "logits/rejected": -0.577907383441925, "logps/chosen": -0.00024894665693864226, "logps/rejected": -2.787130117416382, "loss": 0.6576, "nll_loss": 0.16438151895999908, "rewards/accuracies": 1.0, "rewards/chosen": -2.489466714905575e-05, "rewards/margins": 0.27868813276290894, "rewards/rejected": -0.2787129878997803, "step": 7238 }, { "epoch": 5.0062240663900415, "grad_norm": 8.356595039367676, "learning_rate": 2.7743199631166438e-05, "log_odds_chosen": 10.580275535583496, "log_odds_ratio": -4.7172503400361165e-05, "logits/chosen": -0.29390430450439453, "logits/rejected": -0.38561365008354187, "logps/chosen": -0.0001928009296534583, "logps/rejected": -2.058584213256836, "loss": 0.4335, "nll_loss": 0.10836201161146164, "rewards/accuracies": 1.0, "rewards/chosen": -1.9280094420537353e-05, "rewards/margins": 0.2058391571044922, "rewards/rejected": -0.20585842430591583, "step": 7239 }, { "epoch": 5.006915629322268, "grad_norm": 4.8350982666015625, "learning_rate": 2.7739357614876287e-05, "log_odds_chosen": 9.761199951171875, "log_odds_ratio": -0.01201231125742197, "logits/chosen": -0.42824968695640564, "logits/rejected": -0.5191749930381775, "logps/chosen": -0.009147546254098415, "logps/rejected": -2.1493453979492188, "loss": 0.6846, "nll_loss": 0.1699400395154953, "rewards/accuracies": 1.0, "rewards/chosen": -0.0009147546952590346, "rewards/margins": 0.2140198051929474, "rewards/rejected": -0.2149345576763153, "step": 7240 }, { "epoch": 5.007607192254495, "grad_norm": 12.261672973632812, "learning_rate": 2.7735515598586136e-05, "log_odds_chosen": 11.049886703491211, "log_odds_ratio": -0.00011858274228870869, "logits/chosen": -0.6273783445358276, "logits/rejected": -0.633759617805481, "logps/chosen": -0.00024194586148951203, "logps/rejected": -2.5788331031799316, "loss": 0.688, "nll_loss": 0.17198488116264343, "rewards/accuracies": 1.0, "rewards/chosen": -2.419458542135544e-05, "rewards/margins": 0.25785911083221436, "rewards/rejected": -0.25788331031799316, "step": 7241 }, { "epoch": 5.008298755186722, "grad_norm": 16.330045700073242, "learning_rate": 2.773167358229599e-05, "log_odds_chosen": 11.262862205505371, "log_odds_ratio": -7.536452176282182e-05, "logits/chosen": -0.5741608142852783, "logits/rejected": -0.6448097229003906, "logps/chosen": -0.00036441374686546624, "logps/rejected": -2.5372867584228516, "loss": 0.7904, "nll_loss": 0.19759222865104675, "rewards/accuracies": 1.0, "rewards/chosen": -3.644137905212119e-05, "rewards/margins": 0.25369223952293396, "rewards/rejected": -0.2537286877632141, "step": 7242 }, { "epoch": 5.008990318118949, "grad_norm": 7.2465386390686035, "learning_rate": 2.772783156600584e-05, "log_odds_chosen": 9.80888843536377, "log_odds_ratio": -0.0011844683904200792, "logits/chosen": -0.689521074295044, "logits/rejected": -0.6528528332710266, "logps/chosen": -0.001049173646606505, "logps/rejected": -1.6593786478042603, "loss": 0.6494, "nll_loss": 0.16222906112670898, "rewards/accuracies": 1.0, "rewards/chosen": -0.00010491736611584201, "rewards/margins": 0.16583296656608582, "rewards/rejected": -0.1659378856420517, "step": 7243 }, { "epoch": 5.009681881051176, "grad_norm": 10.320951461791992, "learning_rate": 2.772398954971569e-05, "log_odds_chosen": 10.070535659790039, "log_odds_ratio": -0.00011470584286144003, "logits/chosen": -0.40506500005722046, "logits/rejected": -0.4523780643939972, "logps/chosen": -0.0030764671973884106, "logps/rejected": -2.1235923767089844, "loss": 0.8448, "nll_loss": 0.2111879140138626, "rewards/accuracies": 1.0, "rewards/chosen": -0.0003076466964557767, "rewards/margins": 0.2120516151189804, "rewards/rejected": -0.2123592644929886, "step": 7244 }, { "epoch": 5.0103734439834025, "grad_norm": 14.852372169494629, "learning_rate": 2.7720147533425545e-05, "log_odds_chosen": 10.029454231262207, "log_odds_ratio": -0.00010282750736223534, "logits/chosen": -0.50398850440979, "logits/rejected": -0.6273823380470276, "logps/chosen": -0.0004358980804681778, "logps/rejected": -2.150851011276245, "loss": 0.8415, "nll_loss": 0.21036122739315033, "rewards/accuracies": 1.0, "rewards/chosen": -4.358980731922202e-05, "rewards/margins": 0.21504151821136475, "rewards/rejected": -0.21508511900901794, "step": 7245 }, { "epoch": 5.011065006915629, "grad_norm": 11.475605010986328, "learning_rate": 2.7716305517135394e-05, "log_odds_chosen": 10.059589385986328, "log_odds_ratio": -0.00017879570077639073, "logits/chosen": -0.5300588607788086, "logits/rejected": -0.6242033243179321, "logps/chosen": -0.0006024139001965523, "logps/rejected": -1.965653419494629, "loss": 0.6115, "nll_loss": 0.15284880995750427, "rewards/accuracies": 1.0, "rewards/chosen": -6.0241389292059466e-05, "rewards/margins": 0.1965051293373108, "rewards/rejected": -0.19656535983085632, "step": 7246 }, { "epoch": 5.011756569847856, "grad_norm": 4.2483062744140625, "learning_rate": 2.7712463500845244e-05, "log_odds_chosen": 10.718931198120117, "log_odds_ratio": -5.807676279800944e-05, "logits/chosen": -0.6688945889472961, "logits/rejected": -0.6880615949630737, "logps/chosen": -0.00013566450797952712, "logps/rejected": -1.7550441026687622, "loss": 0.3919, "nll_loss": 0.09796933084726334, "rewards/accuracies": 1.0, "rewards/chosen": -1.3566450434154831e-05, "rewards/margins": 0.1754908561706543, "rewards/rejected": -0.1755044162273407, "step": 7247 }, { "epoch": 5.012448132780083, "grad_norm": 7.51168966293335, "learning_rate": 2.7708621484555096e-05, "log_odds_chosen": 9.49942684173584, "log_odds_ratio": -0.0002443839912302792, "logits/chosen": -0.49505460262298584, "logits/rejected": -0.5230669975280762, "logps/chosen": -0.0012824471341446042, "logps/rejected": -1.38538658618927, "loss": 1.1477, "nll_loss": 0.28689271211624146, "rewards/accuracies": 1.0, "rewards/chosen": -0.0001282447192352265, "rewards/margins": 0.13841041922569275, "rewards/rejected": -0.1385386735200882, "step": 7248 }, { "epoch": 5.01313969571231, "grad_norm": 6.2242889404296875, "learning_rate": 2.7704779468264945e-05, "log_odds_chosen": 9.833250999450684, "log_odds_ratio": -0.00011980785347986966, "logits/chosen": -0.4105455279350281, "logits/rejected": -0.45921966433525085, "logps/chosen": -0.00029663456371054053, "logps/rejected": -1.7124733924865723, "loss": 0.5858, "nll_loss": 0.14644891023635864, "rewards/accuracies": 1.0, "rewards/chosen": -2.966345527966041e-05, "rewards/margins": 0.171217679977417, "rewards/rejected": -0.17124733328819275, "step": 7249 }, { "epoch": 5.013831258644537, "grad_norm": 8.974833488464355, "learning_rate": 2.7700937451974794e-05, "log_odds_chosen": 9.523042678833008, "log_odds_ratio": -0.0032822166103869677, "logits/chosen": -0.5493214130401611, "logits/rejected": -0.5524061918258667, "logps/chosen": -0.009554987773299217, "logps/rejected": -2.449441432952881, "loss": 0.964, "nll_loss": 0.24066194891929626, "rewards/accuracies": 1.0, "rewards/chosen": -0.0009554987191222608, "rewards/margins": 0.24398863315582275, "rewards/rejected": -0.24494412541389465, "step": 7250 }, { "epoch": 5.014522821576763, "grad_norm": 11.83836555480957, "learning_rate": 2.769709543568465e-05, "log_odds_chosen": 10.720413208007812, "log_odds_ratio": -0.00014413942699320614, "logits/chosen": -0.815521240234375, "logits/rejected": -0.9365243911743164, "logps/chosen": -0.0003172925498802215, "logps/rejected": -2.3871865272521973, "loss": 0.8637, "nll_loss": 0.21590059995651245, "rewards/accuracies": 1.0, "rewards/chosen": -3.1729257898405194e-05, "rewards/margins": 0.2386869490146637, "rewards/rejected": -0.2387186884880066, "step": 7251 }, { "epoch": 5.01521438450899, "grad_norm": 6.946929931640625, "learning_rate": 2.76932534193945e-05, "log_odds_chosen": 9.54383659362793, "log_odds_ratio": -0.00016248160682152957, "logits/chosen": -0.4637078642845154, "logits/rejected": -0.5163708329200745, "logps/chosen": -0.0003959203895647079, "logps/rejected": -1.657344937324524, "loss": 0.7034, "nll_loss": 0.17582513391971588, "rewards/accuracies": 1.0, "rewards/chosen": -3.959203968406655e-05, "rewards/margins": 0.16569490730762482, "rewards/rejected": -0.16573449969291687, "step": 7252 }, { "epoch": 5.015905947441217, "grad_norm": 6.577551364898682, "learning_rate": 2.7689411403104348e-05, "log_odds_chosen": 9.708788871765137, "log_odds_ratio": -0.0003885244659613818, "logits/chosen": -0.1782318502664566, "logits/rejected": -0.23638510704040527, "logps/chosen": -0.0003596053284127265, "logps/rejected": -1.6300208568572998, "loss": 0.9062, "nll_loss": 0.2265045940876007, "rewards/accuracies": 1.0, "rewards/chosen": -3.596053284127265e-05, "rewards/margins": 0.16296613216400146, "rewards/rejected": -0.16300208866596222, "step": 7253 }, { "epoch": 5.016597510373444, "grad_norm": 7.003281593322754, "learning_rate": 2.7685569386814204e-05, "log_odds_chosen": 10.26560115814209, "log_odds_ratio": -7.491197175113484e-05, "logits/chosen": -0.275127112865448, "logits/rejected": -0.3356919288635254, "logps/chosen": -0.000538189138751477, "logps/rejected": -2.156519889831543, "loss": 0.7139, "nll_loss": 0.1784665733575821, "rewards/accuracies": 1.0, "rewards/chosen": -5.3818916057934985e-05, "rewards/margins": 0.21559816598892212, "rewards/rejected": -0.2156520038843155, "step": 7254 }, { "epoch": 5.017289073305671, "grad_norm": 8.421640396118164, "learning_rate": 2.7681727370524053e-05, "log_odds_chosen": 11.054540634155273, "log_odds_ratio": -1.7887143258121796e-05, "logits/chosen": 0.0565011166036129, "logits/rejected": -0.08696407079696655, "logps/chosen": -0.00017183017916977406, "logps/rejected": -2.2747251987457275, "loss": 0.7991, "nll_loss": 0.19977036118507385, "rewards/accuracies": 1.0, "rewards/chosen": -1.7183017916977406e-05, "rewards/margins": 0.22745534777641296, "rewards/rejected": -0.22747252881526947, "step": 7255 }, { "epoch": 5.017980636237898, "grad_norm": 9.712133407592773, "learning_rate": 2.7677885354233902e-05, "log_odds_chosen": 10.734992027282715, "log_odds_ratio": -0.0001417091116309166, "logits/chosen": -0.5213885307312012, "logits/rejected": -0.534921407699585, "logps/chosen": -0.0018052636878564954, "logps/rejected": -2.5130221843719482, "loss": 1.0096, "nll_loss": 0.25238972902297974, "rewards/accuracies": 1.0, "rewards/chosen": -0.00018052638915833086, "rewards/margins": 0.2511216998100281, "rewards/rejected": -0.25130224227905273, "step": 7256 }, { "epoch": 5.018672199170124, "grad_norm": 6.777373790740967, "learning_rate": 2.7674043337943754e-05, "log_odds_chosen": 9.396719932556152, "log_odds_ratio": -0.00021104556799400598, "logits/chosen": -0.6140926480293274, "logits/rejected": -0.7252911925315857, "logps/chosen": -0.0006496300920844078, "logps/rejected": -1.5737005472183228, "loss": 0.9829, "nll_loss": 0.24570497870445251, "rewards/accuracies": 1.0, "rewards/chosen": -6.496300920844078e-05, "rewards/margins": 0.15730509161949158, "rewards/rejected": -0.15737006068229675, "step": 7257 }, { "epoch": 5.019363762102351, "grad_norm": 5.959966659545898, "learning_rate": 2.7670201321653603e-05, "log_odds_chosen": 9.696066856384277, "log_odds_ratio": -0.00020695854618679732, "logits/chosen": -0.46064212918281555, "logits/rejected": -0.5616683959960938, "logps/chosen": -0.0001567387516843155, "logps/rejected": -1.2842741012573242, "loss": 0.7888, "nll_loss": 0.19716843962669373, "rewards/accuracies": 1.0, "rewards/chosen": -1.567387516843155e-05, "rewards/margins": 0.12841174006462097, "rewards/rejected": -0.1284274160861969, "step": 7258 }, { "epoch": 5.020055325034578, "grad_norm": 10.227088928222656, "learning_rate": 2.7666359305363453e-05, "log_odds_chosen": 10.79662036895752, "log_odds_ratio": -0.0001424798829248175, "logits/chosen": -0.46717405319213867, "logits/rejected": -0.5267354846000671, "logps/chosen": -0.00027820401010103524, "logps/rejected": -1.9651286602020264, "loss": 0.8254, "nll_loss": 0.20633377134799957, "rewards/accuracies": 1.0, "rewards/chosen": -2.7820402465295047e-05, "rewards/margins": 0.19648504257202148, "rewards/rejected": -0.1965128481388092, "step": 7259 }, { "epoch": 5.020746887966805, "grad_norm": 8.242298126220703, "learning_rate": 2.766251728907331e-05, "log_odds_chosen": 10.206131935119629, "log_odds_ratio": -0.002142932265996933, "logits/chosen": -0.7705875039100647, "logits/rejected": -0.8422242999076843, "logps/chosen": -0.0015574386343359947, "logps/rejected": -2.130014657974243, "loss": 0.8617, "nll_loss": 0.21520650386810303, "rewards/accuracies": 1.0, "rewards/chosen": -0.00015574386634398252, "rewards/margins": 0.21284572780132294, "rewards/rejected": -0.21300145983695984, "step": 7260 }, { "epoch": 5.021438450899032, "grad_norm": 9.300799369812012, "learning_rate": 2.7658675272783157e-05, "log_odds_chosen": 10.081828117370605, "log_odds_ratio": -9.091423271456733e-05, "logits/chosen": -0.43533748388290405, "logits/rejected": -0.4441641867160797, "logps/chosen": -0.000783787458203733, "logps/rejected": -2.2557663917541504, "loss": 0.6918, "nll_loss": 0.1729460507631302, "rewards/accuracies": 1.0, "rewards/chosen": -7.837874727556482e-05, "rewards/margins": 0.2254982590675354, "rewards/rejected": -0.22557662427425385, "step": 7261 }, { "epoch": 5.022130013831259, "grad_norm": 7.336118221282959, "learning_rate": 2.7654833256493006e-05, "log_odds_chosen": 10.239450454711914, "log_odds_ratio": -0.0003725987917277962, "logits/chosen": -0.6500371694564819, "logits/rejected": -0.6669209003448486, "logps/chosen": -0.0006331615149974823, "logps/rejected": -1.8425328731536865, "loss": 0.7544, "nll_loss": 0.1885695457458496, "rewards/accuracies": 1.0, "rewards/chosen": -6.331615441013128e-05, "rewards/margins": 0.18418999016284943, "rewards/rejected": -0.18425330519676208, "step": 7262 }, { "epoch": 5.022821576763485, "grad_norm": 9.546539306640625, "learning_rate": 2.7650991240202862e-05, "log_odds_chosen": 10.925753593444824, "log_odds_ratio": -4.614323552232236e-05, "logits/chosen": -0.5992140769958496, "logits/rejected": -0.65520840883255, "logps/chosen": -0.0001765150809660554, "logps/rejected": -2.0011026859283447, "loss": 1.016, "nll_loss": 0.2540021240711212, "rewards/accuracies": 1.0, "rewards/chosen": -1.7651509551797062e-05, "rewards/margins": 0.200092613697052, "rewards/rejected": -0.2001102864742279, "step": 7263 }, { "epoch": 5.023513139695712, "grad_norm": 6.58445930480957, "learning_rate": 2.764714922391271e-05, "log_odds_chosen": 10.151383399963379, "log_odds_ratio": -6.918309372849762e-05, "logits/chosen": -0.6456372737884521, "logits/rejected": -0.7023369073867798, "logps/chosen": -0.0014546513557434082, "logps/rejected": -1.9670419692993164, "loss": 1.2457, "nll_loss": 0.3114243149757385, "rewards/accuracies": 1.0, "rewards/chosen": -0.00014546513557434082, "rewards/margins": 0.19655874371528625, "rewards/rejected": -0.1967042088508606, "step": 7264 }, { "epoch": 5.024204702627939, "grad_norm": 5.597912311553955, "learning_rate": 2.764330720762256e-05, "log_odds_chosen": 10.879961013793945, "log_odds_ratio": -5.5353310017380863e-05, "logits/chosen": -0.20097941160202026, "logits/rejected": -0.3843633830547333, "logps/chosen": -0.0003638151101768017, "logps/rejected": -2.6702191829681396, "loss": 0.5997, "nll_loss": 0.14991742372512817, "rewards/accuracies": 1.0, "rewards/chosen": -3.638151247287169e-05, "rewards/margins": 0.26698553562164307, "rewards/rejected": -0.26702192425727844, "step": 7265 }, { "epoch": 5.024896265560166, "grad_norm": 6.466996669769287, "learning_rate": 2.7639465191332413e-05, "log_odds_chosen": 9.323081016540527, "log_odds_ratio": -0.0008883294649422169, "logits/chosen": -0.4426640570163727, "logits/rejected": -0.47450733184814453, "logps/chosen": -0.0012046220945194364, "logps/rejected": -1.328416109085083, "loss": 0.7256, "nll_loss": 0.18131092190742493, "rewards/accuracies": 1.0, "rewards/chosen": -0.00012046222400385886, "rewards/margins": 0.1327211558818817, "rewards/rejected": -0.13284161686897278, "step": 7266 }, { "epoch": 5.025587828492393, "grad_norm": 7.6741228103637695, "learning_rate": 2.7635623175042262e-05, "log_odds_chosen": 10.802885055541992, "log_odds_ratio": -0.00011306915985187516, "logits/chosen": -0.40469425916671753, "logits/rejected": -0.47581946849823, "logps/chosen": -0.00025673132040537894, "logps/rejected": -2.3214166164398193, "loss": 0.5633, "nll_loss": 0.1408044546842575, "rewards/accuracies": 1.0, "rewards/chosen": -2.5673132768133655e-05, "rewards/margins": 0.23211601376533508, "rewards/rejected": -0.2321416735649109, "step": 7267 }, { "epoch": 5.0262793914246195, "grad_norm": 6.7040815353393555, "learning_rate": 2.763178115875211e-05, "log_odds_chosen": 10.767393112182617, "log_odds_ratio": -3.190072311554104e-05, "logits/chosen": -0.6101112365722656, "logits/rejected": -0.6725625395774841, "logps/chosen": -0.0001772954419720918, "logps/rejected": -2.14335036277771, "loss": 0.7389, "nll_loss": 0.18472109735012054, "rewards/accuracies": 1.0, "rewards/chosen": -1.772954419720918e-05, "rewards/margins": 0.21431732177734375, "rewards/rejected": -0.21433503925800323, "step": 7268 }, { "epoch": 5.026970954356846, "grad_norm": 11.36666488647461, "learning_rate": 2.7627939142461967e-05, "log_odds_chosen": 9.944623947143555, "log_odds_ratio": -0.004825376905500889, "logits/chosen": -0.14808431267738342, "logits/rejected": -0.07526087760925293, "logps/chosen": -0.019000336527824402, "logps/rejected": -2.203162670135498, "loss": 1.2546, "nll_loss": 0.31317025423049927, "rewards/accuracies": 1.0, "rewards/chosen": -0.0019000339088961482, "rewards/margins": 0.218416228890419, "rewards/rejected": -0.22031627595424652, "step": 7269 }, { "epoch": 5.027662517289073, "grad_norm": 8.129460334777832, "learning_rate": 2.7624097126171816e-05, "log_odds_chosen": 10.38253116607666, "log_odds_ratio": -0.0021352546755224466, "logits/chosen": -0.7980105876922607, "logits/rejected": -0.8242998719215393, "logps/chosen": -0.0009977391455322504, "logps/rejected": -2.466899871826172, "loss": 0.8465, "nll_loss": 0.21141372621059418, "rewards/accuracies": 1.0, "rewards/chosen": -9.977391891879961e-05, "rewards/margins": 0.24659022688865662, "rewards/rejected": -0.24668997526168823, "step": 7270 }, { "epoch": 5.0283540802213, "grad_norm": 6.1578898429870605, "learning_rate": 2.7620255109881665e-05, "log_odds_chosen": 10.121634483337402, "log_odds_ratio": -0.0015102961333468556, "logits/chosen": -0.8386377096176147, "logits/rejected": -0.8806687593460083, "logps/chosen": -0.0004191715852357447, "logps/rejected": -2.1044983863830566, "loss": 0.9582, "nll_loss": 0.23941028118133545, "rewards/accuracies": 1.0, "rewards/chosen": -4.191716288914904e-05, "rewards/margins": 0.21040791273117065, "rewards/rejected": -0.21044982969760895, "step": 7271 }, { "epoch": 5.029045643153527, "grad_norm": 8.820459365844727, "learning_rate": 2.761641309359152e-05, "log_odds_chosen": 10.972347259521484, "log_odds_ratio": -2.266202136524953e-05, "logits/chosen": -0.33121025562286377, "logits/rejected": -0.3720765709877014, "logps/chosen": -0.00029626936884596944, "logps/rejected": -2.2669551372528076, "loss": 0.6732, "nll_loss": 0.16830147802829742, "rewards/accuracies": 1.0, "rewards/chosen": -2.962693542940542e-05, "rewards/margins": 0.2266658991575241, "rewards/rejected": -0.22669553756713867, "step": 7272 }, { "epoch": 5.029737206085754, "grad_norm": 11.16493034362793, "learning_rate": 2.761257107730137e-05, "log_odds_chosen": 11.278556823730469, "log_odds_ratio": -2.037870217463933e-05, "logits/chosen": -0.4210745096206665, "logits/rejected": -0.4552077054977417, "logps/chosen": -0.00014141926658339798, "logps/rejected": -2.2942113876342773, "loss": 0.9183, "nll_loss": 0.229561448097229, "rewards/accuracies": 1.0, "rewards/chosen": -1.4141928659228142e-05, "rewards/margins": 0.22940698266029358, "rewards/rejected": -0.22942113876342773, "step": 7273 }, { "epoch": 5.0304287690179805, "grad_norm": 8.953804016113281, "learning_rate": 2.760872906101122e-05, "log_odds_chosen": 9.440807342529297, "log_odds_ratio": -0.00022025183716323227, "logits/chosen": -0.49695202708244324, "logits/rejected": -0.5508847236633301, "logps/chosen": -0.00028177339117974043, "logps/rejected": -1.4282622337341309, "loss": 0.8057, "nll_loss": 0.20140813291072845, "rewards/accuracies": 1.0, "rewards/chosen": -2.8177339117974043e-05, "rewards/margins": 0.14279805123806, "rewards/rejected": -0.14282622933387756, "step": 7274 }, { "epoch": 5.031120331950207, "grad_norm": 13.167290687561035, "learning_rate": 2.760488704472107e-05, "log_odds_chosen": 10.361078262329102, "log_odds_ratio": -0.0012096577556803823, "logits/chosen": -0.6894783973693848, "logits/rejected": -0.7884131073951721, "logps/chosen": -0.0008816584595479071, "logps/rejected": -1.886361837387085, "loss": 1.0739, "nll_loss": 0.2683638632297516, "rewards/accuracies": 1.0, "rewards/chosen": -8.816583431325853e-05, "rewards/margins": 0.1885480284690857, "rewards/rejected": -0.1886361837387085, "step": 7275 }, { "epoch": 5.031811894882434, "grad_norm": 5.316054344177246, "learning_rate": 2.760104502843092e-05, "log_odds_chosen": 9.974425315856934, "log_odds_ratio": -0.00026799910119734704, "logits/chosen": -0.34158962965011597, "logits/rejected": -0.3852129578590393, "logps/chosen": -0.000451413361588493, "logps/rejected": -1.9002914428710938, "loss": 0.8266, "nll_loss": 0.20661142468452454, "rewards/accuracies": 1.0, "rewards/chosen": -4.51413361588493e-05, "rewards/margins": 0.1899840086698532, "rewards/rejected": -0.19002914428710938, "step": 7276 }, { "epoch": 5.032503457814661, "grad_norm": 7.2751569747924805, "learning_rate": 2.759720301214077e-05, "log_odds_chosen": 10.29867172241211, "log_odds_ratio": -5.476947262650356e-05, "logits/chosen": -0.22670063376426697, "logits/rejected": -0.23984383046627045, "logps/chosen": -0.0004981955280527472, "logps/rejected": -1.9702985286712646, "loss": 0.6271, "nll_loss": 0.15675778687000275, "rewards/accuracies": 1.0, "rewards/chosen": -4.98195513500832e-05, "rewards/margins": 0.1969800591468811, "rewards/rejected": -0.19702985882759094, "step": 7277 }, { "epoch": 5.033195020746888, "grad_norm": 8.760295867919922, "learning_rate": 2.7593360995850625e-05, "log_odds_chosen": 10.028202056884766, "log_odds_ratio": -0.00024880870478227735, "logits/chosen": -0.4461353123188019, "logits/rejected": -0.5395488739013672, "logps/chosen": -0.000239362838328816, "logps/rejected": -1.6638593673706055, "loss": 0.8082, "nll_loss": 0.2020374834537506, "rewards/accuracies": 1.0, "rewards/chosen": -2.393628346908372e-05, "rewards/margins": 0.166362002491951, "rewards/rejected": -0.1663859486579895, "step": 7278 }, { "epoch": 5.033886583679115, "grad_norm": 8.667499542236328, "learning_rate": 2.7589518979560474e-05, "log_odds_chosen": 10.32560920715332, "log_odds_ratio": -9.192282595904544e-05, "logits/chosen": -0.7508265376091003, "logits/rejected": -0.8265612125396729, "logps/chosen": -0.00017297209706157446, "logps/rejected": -1.863836407661438, "loss": 0.7363, "nll_loss": 0.18405523896217346, "rewards/accuracies": 1.0, "rewards/chosen": -1.7297210433753207e-05, "rewards/margins": 0.18636634945869446, "rewards/rejected": -0.18638364970684052, "step": 7279 }, { "epoch": 5.0345781466113415, "grad_norm": 8.381052017211914, "learning_rate": 2.7585676963270323e-05, "log_odds_chosen": 10.612117767333984, "log_odds_ratio": -9.46400105021894e-05, "logits/chosen": -0.5738332867622375, "logits/rejected": -0.6243719458580017, "logps/chosen": -0.006455769296735525, "logps/rejected": -2.590888500213623, "loss": 0.6972, "nll_loss": 0.1742965281009674, "rewards/accuracies": 1.0, "rewards/chosen": -0.0006455769180320203, "rewards/margins": 0.25844329595565796, "rewards/rejected": -0.2590888738632202, "step": 7280 }, { "epoch": 5.035269709543568, "grad_norm": 5.367280960083008, "learning_rate": 2.758183494698018e-05, "log_odds_chosen": 11.281238555908203, "log_odds_ratio": -1.8807790183927864e-05, "logits/chosen": -0.22567470371723175, "logits/rejected": -0.32583460211753845, "logps/chosen": -0.0001243324513779953, "logps/rejected": -2.2370567321777344, "loss": 0.4467, "nll_loss": 0.11168432235717773, "rewards/accuracies": 1.0, "rewards/chosen": -1.2433246411092114e-05, "rewards/margins": 0.22369323670864105, "rewards/rejected": -0.22370567917823792, "step": 7281 }, { "epoch": 5.035961272475795, "grad_norm": 8.441225051879883, "learning_rate": 2.7577992930690028e-05, "log_odds_chosen": 9.585426330566406, "log_odds_ratio": -0.0005702337948605418, "logits/chosen": -0.26939302682876587, "logits/rejected": -0.31572502851486206, "logps/chosen": -0.0015275696059688926, "logps/rejected": -1.8812816143035889, "loss": 0.8862, "nll_loss": 0.2214984893798828, "rewards/accuracies": 1.0, "rewards/chosen": -0.0001527569693280384, "rewards/margins": 0.18797540664672852, "rewards/rejected": -0.18812817335128784, "step": 7282 }, { "epoch": 5.036652835408022, "grad_norm": 7.499341011047363, "learning_rate": 2.7574150914399877e-05, "log_odds_chosen": 9.687578201293945, "log_odds_ratio": -0.0005626450874842703, "logits/chosen": -0.4453797936439514, "logits/rejected": -0.4718892574310303, "logps/chosen": -0.0005303797079250216, "logps/rejected": -1.8079785108566284, "loss": 0.9008, "nll_loss": 0.22513696551322937, "rewards/accuracies": 1.0, "rewards/chosen": -5.3037976613268256e-05, "rewards/margins": 0.18074482679367065, "rewards/rejected": -0.18079787492752075, "step": 7283 }, { "epoch": 5.037344398340249, "grad_norm": 7.906566143035889, "learning_rate": 2.757030889810973e-05, "log_odds_chosen": 11.452741622924805, "log_odds_ratio": -2.4343857148778625e-05, "logits/chosen": -0.21651677787303925, "logits/rejected": -0.2888755798339844, "logps/chosen": -0.00034281317493878305, "logps/rejected": -3.1185402870178223, "loss": 1.0789, "nll_loss": 0.269711434841156, "rewards/accuracies": 1.0, "rewards/chosen": -3.4281318221474066e-05, "rewards/margins": 0.31181973218917847, "rewards/rejected": -0.3118540346622467, "step": 7284 }, { "epoch": 5.038035961272476, "grad_norm": 7.8299384117126465, "learning_rate": 2.756646688181958e-05, "log_odds_chosen": 9.673845291137695, "log_odds_ratio": -0.00026930117746815085, "logits/chosen": -0.5036507844924927, "logits/rejected": -0.6252288818359375, "logps/chosen": -0.0005948683246970177, "logps/rejected": -1.937495231628418, "loss": 0.8598, "nll_loss": 0.2149186134338379, "rewards/accuracies": 1.0, "rewards/chosen": -5.948682883172296e-05, "rewards/margins": 0.19369004666805267, "rewards/rejected": -0.1937495321035385, "step": 7285 }, { "epoch": 5.0387275242047025, "grad_norm": 10.01733684539795, "learning_rate": 2.7562624865529428e-05, "log_odds_chosen": 10.357145309448242, "log_odds_ratio": -0.0006056310376152396, "logits/chosen": -0.21898457407951355, "logits/rejected": -0.2727906405925751, "logps/chosen": -0.0008674904238432646, "logps/rejected": -2.3992435932159424, "loss": 0.846, "nll_loss": 0.21144171059131622, "rewards/accuracies": 1.0, "rewards/chosen": -8.67490452947095e-05, "rewards/margins": 0.23983760178089142, "rewards/rejected": -0.2399243712425232, "step": 7286 }, { "epoch": 5.039419087136929, "grad_norm": 7.567254066467285, "learning_rate": 2.7558782849239284e-05, "log_odds_chosen": 9.905765533447266, "log_odds_ratio": -0.0006043082103133202, "logits/chosen": -0.45671600103378296, "logits/rejected": -0.5718494653701782, "logps/chosen": -0.0013060432393103838, "logps/rejected": -1.7504911422729492, "loss": 0.6407, "nll_loss": 0.16010862588882446, "rewards/accuracies": 1.0, "rewards/chosen": -0.00013060432684142143, "rewards/margins": 0.1749185174703598, "rewards/rejected": -0.17504912614822388, "step": 7287 }, { "epoch": 5.040110650069156, "grad_norm": 9.728728294372559, "learning_rate": 2.7554940832949133e-05, "log_odds_chosen": 9.93949031829834, "log_odds_ratio": -0.00012081762542948127, "logits/chosen": -0.3270997405052185, "logits/rejected": -0.40328624844551086, "logps/chosen": -0.00031918910099193454, "logps/rejected": -1.9016844034194946, "loss": 0.6612, "nll_loss": 0.16527943313121796, "rewards/accuracies": 1.0, "rewards/chosen": -3.1918905733618885e-05, "rewards/margins": 0.19013653695583344, "rewards/rejected": -0.19016844034194946, "step": 7288 }, { "epoch": 5.040802213001383, "grad_norm": 6.479235649108887, "learning_rate": 2.7551098816658982e-05, "log_odds_chosen": 10.745867729187012, "log_odds_ratio": -0.0001523199025541544, "logits/chosen": -0.10512614250183105, "logits/rejected": -0.025525301694869995, "logps/chosen": -0.000199339963728562, "logps/rejected": -2.0925867557525635, "loss": 1.1551, "nll_loss": 0.28876954317092896, "rewards/accuracies": 1.0, "rewards/chosen": -1.993399564526044e-05, "rewards/margins": 0.20923873782157898, "rewards/rejected": -0.20925866067409515, "step": 7289 }, { "epoch": 5.04149377593361, "grad_norm": 11.356147766113281, "learning_rate": 2.7547256800368838e-05, "log_odds_chosen": 10.036778450012207, "log_odds_ratio": -8.606391202192754e-05, "logits/chosen": -0.4650914669036865, "logits/rejected": -0.6013932228088379, "logps/chosen": -0.001222763443365693, "logps/rejected": -1.8660969734191895, "loss": 1.1791, "nll_loss": 0.29475611448287964, "rewards/accuracies": 1.0, "rewards/chosen": -0.00012227633851580322, "rewards/margins": 0.18648743629455566, "rewards/rejected": -0.18660971522331238, "step": 7290 }, { "epoch": 5.042185338865837, "grad_norm": 5.68522834777832, "learning_rate": 2.7543414784078687e-05, "log_odds_chosen": 9.280818939208984, "log_odds_ratio": -0.001574191846884787, "logits/chosen": -0.30058932304382324, "logits/rejected": -0.3923916220664978, "logps/chosen": -0.003772944677621126, "logps/rejected": -2.097243547439575, "loss": 0.7078, "nll_loss": 0.17678092420101166, "rewards/accuracies": 1.0, "rewards/chosen": -0.000377294491045177, "rewards/margins": 0.20934706926345825, "rewards/rejected": -0.20972435176372528, "step": 7291 }, { "epoch": 5.0428769017980635, "grad_norm": 24.780282974243164, "learning_rate": 2.7539572767788536e-05, "log_odds_chosen": 7.424704551696777, "log_odds_ratio": -0.23528823256492615, "logits/chosen": -0.8567458391189575, "logits/rejected": -0.8633086085319519, "logps/chosen": -0.035740286111831665, "logps/rejected": -1.280559778213501, "loss": 0.8606, "nll_loss": 0.19162975251674652, "rewards/accuracies": 0.875, "rewards/chosen": -0.00357402884401381, "rewards/margins": 0.12448194622993469, "rewards/rejected": -0.12805597484111786, "step": 7292 }, { "epoch": 5.04356846473029, "grad_norm": 7.827977657318115, "learning_rate": 2.7535730751498388e-05, "log_odds_chosen": 10.188173294067383, "log_odds_ratio": -0.00014951504999771714, "logits/chosen": -0.6818506121635437, "logits/rejected": -0.7837034463882446, "logps/chosen": -0.0002506999298930168, "logps/rejected": -1.8527156114578247, "loss": 0.9025, "nll_loss": 0.22561517357826233, "rewards/accuracies": 1.0, "rewards/chosen": -2.506999226170592e-05, "rewards/margins": 0.1852465122938156, "rewards/rejected": -0.18527157604694366, "step": 7293 }, { "epoch": 5.044260027662517, "grad_norm": 7.4346842765808105, "learning_rate": 2.7531888735208237e-05, "log_odds_chosen": 9.61988639831543, "log_odds_ratio": -0.015739865601062775, "logits/chosen": -0.6515390872955322, "logits/rejected": -0.7199310064315796, "logps/chosen": -0.0055860369466245174, "logps/rejected": -1.8236140012741089, "loss": 0.6803, "nll_loss": 0.168507918715477, "rewards/accuracies": 1.0, "rewards/chosen": -0.0005586037295870483, "rewards/margins": 0.18180277943611145, "rewards/rejected": -0.1823614090681076, "step": 7294 }, { "epoch": 5.044951590594744, "grad_norm": 15.84222412109375, "learning_rate": 2.7528046718918086e-05, "log_odds_chosen": 11.02806282043457, "log_odds_ratio": -2.5386010747752152e-05, "logits/chosen": -0.5460261702537537, "logits/rejected": -0.6166951060295105, "logps/chosen": -0.000791882339399308, "logps/rejected": -2.6014323234558105, "loss": 1.221, "nll_loss": 0.3052525818347931, "rewards/accuracies": 1.0, "rewards/chosen": -7.91882339399308e-05, "rewards/margins": 0.260064035654068, "rewards/rejected": -0.2601432204246521, "step": 7295 }, { "epoch": 5.045643153526971, "grad_norm": 7.974617958068848, "learning_rate": 2.7524204702627942e-05, "log_odds_chosen": 11.188053131103516, "log_odds_ratio": -5.614932888420299e-05, "logits/chosen": -0.27668648958206177, "logits/rejected": -0.37699154019355774, "logps/chosen": -0.00037966459058225155, "logps/rejected": -2.7652714252471924, "loss": 1.0089, "nll_loss": 0.25221163034439087, "rewards/accuracies": 1.0, "rewards/chosen": -3.796645614784211e-05, "rewards/margins": 0.27648916840553284, "rewards/rejected": -0.27652713656425476, "step": 7296 }, { "epoch": 5.046334716459198, "grad_norm": 19.900930404663086, "learning_rate": 2.752036268633779e-05, "log_odds_chosen": 11.485966682434082, "log_odds_ratio": -2.118528209393844e-05, "logits/chosen": -0.5126395225524902, "logits/rejected": -0.601915717124939, "logps/chosen": -0.0002069872571155429, "logps/rejected": -2.5941524505615234, "loss": 0.9578, "nll_loss": 0.23944054543972015, "rewards/accuracies": 1.0, "rewards/chosen": -2.0698724256362766e-05, "rewards/margins": 0.2593945264816284, "rewards/rejected": -0.25941523909568787, "step": 7297 }, { "epoch": 5.0470262793914245, "grad_norm": 7.200878620147705, "learning_rate": 2.751652067004764e-05, "log_odds_chosen": 10.318074226379395, "log_odds_ratio": -0.0001252438232768327, "logits/chosen": -0.71943199634552, "logits/rejected": -0.792367160320282, "logps/chosen": -0.0020204363390803337, "logps/rejected": -2.4491629600524902, "loss": 0.6531, "nll_loss": 0.16327083110809326, "rewards/accuracies": 1.0, "rewards/chosen": -0.0002020436222665012, "rewards/margins": 0.24471423029899597, "rewards/rejected": -0.24491627514362335, "step": 7298 }, { "epoch": 5.047717842323651, "grad_norm": 7.3060221672058105, "learning_rate": 2.7512678653757496e-05, "log_odds_chosen": 9.833172798156738, "log_odds_ratio": -0.00016102934023365378, "logits/chosen": -0.6357143521308899, "logits/rejected": -0.6339840888977051, "logps/chosen": -0.0005614220863208175, "logps/rejected": -2.1665122509002686, "loss": 0.8519, "nll_loss": 0.21295638382434845, "rewards/accuracies": 1.0, "rewards/chosen": -5.61422057216987e-05, "rewards/margins": 0.2165950983762741, "rewards/rejected": -0.21665123105049133, "step": 7299 }, { "epoch": 5.048409405255878, "grad_norm": 9.484922409057617, "learning_rate": 2.7508836637467345e-05, "log_odds_chosen": 9.575933456420898, "log_odds_ratio": -0.0013263956643640995, "logits/chosen": -0.818328320980072, "logits/rejected": -0.7978004813194275, "logps/chosen": -0.0018352242186665535, "logps/rejected": -2.237055778503418, "loss": 0.7705, "nll_loss": 0.1924881637096405, "rewards/accuracies": 1.0, "rewards/chosen": -0.0001835224247770384, "rewards/margins": 0.22352203726768494, "rewards/rejected": -0.22370555996894836, "step": 7300 }, { "epoch": 5.049100968188105, "grad_norm": 5.502103328704834, "learning_rate": 2.7504994621177194e-05, "log_odds_chosen": 11.25271987915039, "log_odds_ratio": -2.2424899725592695e-05, "logits/chosen": -0.7001416683197021, "logits/rejected": -0.7362242341041565, "logps/chosen": -0.00014827025006525218, "logps/rejected": -2.2779436111450195, "loss": 1.0947, "nll_loss": 0.27368372678756714, "rewards/accuracies": 1.0, "rewards/chosen": -1.4827023733232636e-05, "rewards/margins": 0.22777950763702393, "rewards/rejected": -0.227794349193573, "step": 7301 }, { "epoch": 5.049792531120332, "grad_norm": 8.534310340881348, "learning_rate": 2.7501152604887047e-05, "log_odds_chosen": 10.193739891052246, "log_odds_ratio": -7.256300887092948e-05, "logits/chosen": -0.5930569767951965, "logits/rejected": -0.5903542637825012, "logps/chosen": -0.004373773001134396, "logps/rejected": -1.9866235256195068, "loss": 0.7531, "nll_loss": 0.18827757239341736, "rewards/accuracies": 1.0, "rewards/chosen": -0.00043737731175497174, "rewards/margins": 0.1982249766588211, "rewards/rejected": -0.19866235554218292, "step": 7302 }, { "epoch": 5.050484094052559, "grad_norm": 10.663469314575195, "learning_rate": 2.7497310588596896e-05, "log_odds_chosen": 11.076746940612793, "log_odds_ratio": -3.118633685517125e-05, "logits/chosen": -0.9300976991653442, "logits/rejected": -0.972251296043396, "logps/chosen": -0.00029384903609752655, "logps/rejected": -2.5527446269989014, "loss": 0.7093, "nll_loss": 0.1773204654455185, "rewards/accuracies": 1.0, "rewards/chosen": -2.9384904337348416e-05, "rewards/margins": 0.2552450895309448, "rewards/rejected": -0.2552744746208191, "step": 7303 }, { "epoch": 5.051175656984785, "grad_norm": 10.101683616638184, "learning_rate": 2.7493468572306748e-05, "log_odds_chosen": 9.031973838806152, "log_odds_ratio": -0.00035060991649515927, "logits/chosen": -0.14871618151664734, "logits/rejected": -0.2351362407207489, "logps/chosen": -0.000733660242985934, "logps/rejected": -1.4281890392303467, "loss": 0.7831, "nll_loss": 0.19575095176696777, "rewards/accuracies": 1.0, "rewards/chosen": -7.336602720897645e-05, "rewards/margins": 0.14274555444717407, "rewards/rejected": -0.14281892776489258, "step": 7304 }, { "epoch": 5.051867219917012, "grad_norm": 7.80236291885376, "learning_rate": 2.74896265560166e-05, "log_odds_chosen": 10.683979034423828, "log_odds_ratio": -4.006546805612743e-05, "logits/chosen": -0.5397990345954895, "logits/rejected": -0.6449406743049622, "logps/chosen": -0.0005074103828519583, "logps/rejected": -2.488528251647949, "loss": 0.9742, "nll_loss": 0.24354231357574463, "rewards/accuracies": 1.0, "rewards/chosen": -5.07410331920255e-05, "rewards/margins": 0.2488020658493042, "rewards/rejected": -0.24885281920433044, "step": 7305 }, { "epoch": 5.052558782849239, "grad_norm": 15.392332077026367, "learning_rate": 2.748578453972645e-05, "log_odds_chosen": 9.010686874389648, "log_odds_ratio": -0.0005412200698629022, "logits/chosen": -0.5475982427597046, "logits/rejected": -0.5727043747901917, "logps/chosen": -0.0009371995693072677, "logps/rejected": -1.937116265296936, "loss": 0.8893, "nll_loss": 0.2222771793603897, "rewards/accuracies": 1.0, "rewards/chosen": -9.371995110996068e-05, "rewards/margins": 0.19361791014671326, "rewards/rejected": -0.19371163845062256, "step": 7306 }, { "epoch": 5.053250345781466, "grad_norm": 4.404837131500244, "learning_rate": 2.74819425234363e-05, "log_odds_chosen": 10.56973648071289, "log_odds_ratio": -0.00021840019326191396, "logits/chosen": -0.4324110448360443, "logits/rejected": -0.4252541661262512, "logps/chosen": -0.0003612586879171431, "logps/rejected": -2.1879324913024902, "loss": 0.7681, "nll_loss": 0.19199644029140472, "rewards/accuracies": 1.0, "rewards/chosen": -3.6125871702097356e-05, "rewards/margins": 0.21875713765621185, "rewards/rejected": -0.21879325807094574, "step": 7307 }, { "epoch": 5.053941908713693, "grad_norm": 5.678922176361084, "learning_rate": 2.7478100507146154e-05, "log_odds_chosen": 9.939447402954102, "log_odds_ratio": -0.0010102377273142338, "logits/chosen": -0.25071343779563904, "logits/rejected": -0.2884729504585266, "logps/chosen": -0.001003112643957138, "logps/rejected": -1.7373353242874146, "loss": 0.8569, "nll_loss": 0.2141135334968567, "rewards/accuracies": 1.0, "rewards/chosen": -0.0001003112702164799, "rewards/margins": 0.17363321781158447, "rewards/rejected": -0.17373353242874146, "step": 7308 }, { "epoch": 5.05463347164592, "grad_norm": 7.243196487426758, "learning_rate": 2.7474258490856004e-05, "log_odds_chosen": 10.330777168273926, "log_odds_ratio": -0.00013779370055999607, "logits/chosen": -0.38157716393470764, "logits/rejected": -0.43057459592819214, "logps/chosen": -0.0005845414707437158, "logps/rejected": -2.229933977127075, "loss": 0.7357, "nll_loss": 0.18391013145446777, "rewards/accuracies": 1.0, "rewards/chosen": -5.845414489158429e-05, "rewards/margins": 0.22293496131896973, "rewards/rejected": -0.222993403673172, "step": 7309 }, { "epoch": 5.055325034578146, "grad_norm": 10.608085632324219, "learning_rate": 2.7470416474565853e-05, "log_odds_chosen": 10.949653625488281, "log_odds_ratio": -0.00029080972308292985, "logits/chosen": -0.2496720403432846, "logits/rejected": -0.3969630002975464, "logps/chosen": -0.0003734467609319836, "logps/rejected": -2.2889750003814697, "loss": 0.9614, "nll_loss": 0.2403145283460617, "rewards/accuracies": 1.0, "rewards/chosen": -3.7344674638006836e-05, "rewards/margins": 0.22886013984680176, "rewards/rejected": -0.22889748215675354, "step": 7310 }, { "epoch": 5.056016597510373, "grad_norm": 6.699407577514648, "learning_rate": 2.746657445827571e-05, "log_odds_chosen": 8.956399917602539, "log_odds_ratio": -0.001024306402541697, "logits/chosen": -0.3196716904640198, "logits/rejected": -0.3115221858024597, "logps/chosen": -0.0013646759325638413, "logps/rejected": -1.663041591644287, "loss": 0.7016, "nll_loss": 0.17530988156795502, "rewards/accuracies": 1.0, "rewards/chosen": -0.00013646761362906545, "rewards/margins": 0.16616767644882202, "rewards/rejected": -0.16630415618419647, "step": 7311 }, { "epoch": 5.0567081604426, "grad_norm": 7.288957118988037, "learning_rate": 2.7462732441985557e-05, "log_odds_chosen": 10.09207534790039, "log_odds_ratio": -0.003282478777691722, "logits/chosen": -0.40368348360061646, "logits/rejected": -0.5203957557678223, "logps/chosen": -0.0042105019092559814, "logps/rejected": -1.6452527046203613, "loss": 0.7136, "nll_loss": 0.17807680368423462, "rewards/accuracies": 1.0, "rewards/chosen": -0.00042105026659555733, "rewards/margins": 0.16410420835018158, "rewards/rejected": -0.16452525556087494, "step": 7312 }, { "epoch": 5.057399723374827, "grad_norm": 6.980625629425049, "learning_rate": 2.7458890425695407e-05, "log_odds_chosen": 9.844117164611816, "log_odds_ratio": -0.00035047222627326846, "logits/chosen": -0.41656243801116943, "logits/rejected": -0.37486982345581055, "logps/chosen": -0.00019175885245203972, "logps/rejected": -1.1824655532836914, "loss": 0.8217, "nll_loss": 0.2053995132446289, "rewards/accuracies": 1.0, "rewards/chosen": -1.917588451760821e-05, "rewards/margins": 0.11822737753391266, "rewards/rejected": -0.11824654787778854, "step": 7313 }, { "epoch": 5.058091286307054, "grad_norm": 13.701227188110352, "learning_rate": 2.745504840940526e-05, "log_odds_chosen": 11.325311660766602, "log_odds_ratio": -0.00020471982134040445, "logits/chosen": -0.414910227060318, "logits/rejected": -0.5247098803520203, "logps/chosen": -0.0010233953362330794, "logps/rejected": -2.807422399520874, "loss": 0.7551, "nll_loss": 0.1887424886226654, "rewards/accuracies": 1.0, "rewards/chosen": -0.0001023395307129249, "rewards/margins": 0.2806398868560791, "rewards/rejected": -0.28074222803115845, "step": 7314 }, { "epoch": 5.058782849239281, "grad_norm": 10.800251007080078, "learning_rate": 2.7451206393115108e-05, "log_odds_chosen": 12.077896118164062, "log_odds_ratio": -9.185761882690713e-06, "logits/chosen": -0.5681400299072266, "logits/rejected": -0.5691365003585815, "logps/chosen": -0.0001330829836660996, "logps/rejected": -2.8732047080993652, "loss": 1.2474, "nll_loss": 0.31185391545295715, "rewards/accuracies": 1.0, "rewards/chosen": -1.3308298548508901e-05, "rewards/margins": 0.28730714321136475, "rewards/rejected": -0.28732046484947205, "step": 7315 }, { "epoch": 5.059474412171507, "grad_norm": 14.8779935836792, "learning_rate": 2.7447364376824957e-05, "log_odds_chosen": 10.254196166992188, "log_odds_ratio": -7.926978287287056e-05, "logits/chosen": -0.1750495433807373, "logits/rejected": -0.273913711309433, "logps/chosen": -0.0005362760275602341, "logps/rejected": -2.002532482147217, "loss": 0.845, "nll_loss": 0.21123018860816956, "rewards/accuracies": 1.0, "rewards/chosen": -5.362760566640645e-05, "rewards/margins": 0.20019961893558502, "rewards/rejected": -0.20025324821472168, "step": 7316 }, { "epoch": 5.060165975103734, "grad_norm": 6.954501152038574, "learning_rate": 2.7443522360534813e-05, "log_odds_chosen": 8.149320602416992, "log_odds_ratio": -0.000663488288410008, "logits/chosen": -0.4530641436576843, "logits/rejected": -0.469480037689209, "logps/chosen": -0.0008364081149920821, "logps/rejected": -1.3134746551513672, "loss": 1.0815, "nll_loss": 0.2703148126602173, "rewards/accuracies": 1.0, "rewards/chosen": -8.364080713363364e-05, "rewards/margins": 0.1312638372182846, "rewards/rejected": -0.13134747743606567, "step": 7317 }, { "epoch": 5.060857538035961, "grad_norm": 5.969472885131836, "learning_rate": 2.7439680344244662e-05, "log_odds_chosen": 10.451945304870605, "log_odds_ratio": -0.0001245027524419129, "logits/chosen": -0.5547382831573486, "logits/rejected": -0.5758868455886841, "logps/chosen": -0.005893957335501909, "logps/rejected": -2.719212770462036, "loss": 0.8003, "nll_loss": 0.20006687939167023, "rewards/accuracies": 1.0, "rewards/chosen": -0.0005893956986255944, "rewards/margins": 0.27133187651634216, "rewards/rejected": -0.2719212770462036, "step": 7318 }, { "epoch": 5.061549100968188, "grad_norm": 13.521698951721191, "learning_rate": 2.743583832795451e-05, "log_odds_chosen": 9.812695503234863, "log_odds_ratio": -0.00014395485050044954, "logits/chosen": -0.48720940947532654, "logits/rejected": -0.6409310102462769, "logps/chosen": -0.0002551696088630706, "logps/rejected": -1.7317194938659668, "loss": 0.6948, "nll_loss": 0.17368364334106445, "rewards/accuracies": 1.0, "rewards/chosen": -2.551695979491342e-05, "rewards/margins": 0.17314641177654266, "rewards/rejected": -0.17317193746566772, "step": 7319 }, { "epoch": 5.062240663900415, "grad_norm": 9.973909378051758, "learning_rate": 2.7431996311664367e-05, "log_odds_chosen": 9.408581733703613, "log_odds_ratio": -0.0003007837221957743, "logits/chosen": -0.9688934087753296, "logits/rejected": -0.948418378829956, "logps/chosen": -0.001488229027017951, "logps/rejected": -1.7405766248703003, "loss": 1.0078, "nll_loss": 0.2519216537475586, "rewards/accuracies": 1.0, "rewards/chosen": -0.0001488229027017951, "rewards/margins": 0.17390884459018707, "rewards/rejected": -0.17405766248703003, "step": 7320 }, { "epoch": 5.0629322268326415, "grad_norm": 16.34572410583496, "learning_rate": 2.7428154295374216e-05, "log_odds_chosen": 10.63037109375, "log_odds_ratio": -0.0007433656137436628, "logits/chosen": -0.5187327861785889, "logits/rejected": -0.5670936107635498, "logps/chosen": -0.0008166545303538442, "logps/rejected": -2.5092692375183105, "loss": 1.3585, "nll_loss": 0.33956095576286316, "rewards/accuracies": 1.0, "rewards/chosen": -8.166545012500137e-05, "rewards/margins": 0.2508452534675598, "rewards/rejected": -0.2509269118309021, "step": 7321 }, { "epoch": 5.063623789764868, "grad_norm": 7.8338212966918945, "learning_rate": 2.7424312279084065e-05, "log_odds_chosen": 10.2802734375, "log_odds_ratio": -6.797789683332667e-05, "logits/chosen": -0.6781834959983826, "logits/rejected": -0.8167017102241516, "logps/chosen": -0.0003164065128657967, "logps/rejected": -2.061220645904541, "loss": 0.7272, "nll_loss": 0.18178972601890564, "rewards/accuracies": 1.0, "rewards/chosen": -3.164065128657967e-05, "rewards/margins": 0.20609039068222046, "rewards/rejected": -0.206122025847435, "step": 7322 }, { "epoch": 5.064315352697095, "grad_norm": 10.21096134185791, "learning_rate": 2.7420470262793917e-05, "log_odds_chosen": 10.725156784057617, "log_odds_ratio": -5.2025687182322145e-05, "logits/chosen": -0.781559944152832, "logits/rejected": -0.8966841697692871, "logps/chosen": -0.00017686965293250978, "logps/rejected": -1.9730557203292847, "loss": 0.5743, "nll_loss": 0.14357338845729828, "rewards/accuracies": 1.0, "rewards/chosen": -1.7686963474261574e-05, "rewards/margins": 0.19728787243366241, "rewards/rejected": -0.1973055601119995, "step": 7323 }, { "epoch": 5.065006915629322, "grad_norm": 10.335928916931152, "learning_rate": 2.7416628246503766e-05, "log_odds_chosen": 10.741065979003906, "log_odds_ratio": -0.000119962845928967, "logits/chosen": -0.5748330354690552, "logits/rejected": -0.6753748655319214, "logps/chosen": -0.0003316613147035241, "logps/rejected": -2.3247146606445312, "loss": 1.0588, "nll_loss": 0.26468318700790405, "rewards/accuracies": 1.0, "rewards/chosen": -3.316613219794817e-05, "rewards/margins": 0.23243829607963562, "rewards/rejected": -0.23247148096561432, "step": 7324 }, { "epoch": 5.065698478561549, "grad_norm": 5.846102714538574, "learning_rate": 2.7412786230213616e-05, "log_odds_chosen": 11.197221755981445, "log_odds_ratio": -5.849684021086432e-05, "logits/chosen": -0.5642470717430115, "logits/rejected": -0.5235009789466858, "logps/chosen": -0.0014971166383475065, "logps/rejected": -2.859696388244629, "loss": 1.5394, "nll_loss": 0.38484257459640503, "rewards/accuracies": 1.0, "rewards/chosen": -0.0001497116609243676, "rewards/margins": 0.2858199179172516, "rewards/rejected": -0.28596964478492737, "step": 7325 }, { "epoch": 5.066390041493776, "grad_norm": 6.826424598693848, "learning_rate": 2.740894421392347e-05, "log_odds_chosen": 10.296684265136719, "log_odds_ratio": -0.0001223309664055705, "logits/chosen": -0.4076666235923767, "logits/rejected": -0.46820512413978577, "logps/chosen": -0.0005055164219811559, "logps/rejected": -2.137256622314453, "loss": 0.6483, "nll_loss": 0.16206586360931396, "rewards/accuracies": 1.0, "rewards/chosen": -5.055164729128592e-05, "rewards/margins": 0.2136751115322113, "rewards/rejected": -0.21372565627098083, "step": 7326 }, { "epoch": 5.0670816044260025, "grad_norm": 7.7066450119018555, "learning_rate": 2.740510219763332e-05, "log_odds_chosen": 11.011165618896484, "log_odds_ratio": -0.001046941615641117, "logits/chosen": -0.7362217903137207, "logits/rejected": -0.830572247505188, "logps/chosen": -0.004729542415589094, "logps/rejected": -3.1200973987579346, "loss": 0.6127, "nll_loss": 0.1530696600675583, "rewards/accuracies": 1.0, "rewards/chosen": -0.00047295421245507896, "rewards/margins": 0.3115368187427521, "rewards/rejected": -0.3120097517967224, "step": 7327 }, { "epoch": 5.067773167358229, "grad_norm": 14.417914390563965, "learning_rate": 2.740126018134317e-05, "log_odds_chosen": 10.635137557983398, "log_odds_ratio": -6.574903090950102e-05, "logits/chosen": -0.5258181095123291, "logits/rejected": -0.568513035774231, "logps/chosen": -0.00028529533301480114, "logps/rejected": -2.3477580547332764, "loss": 0.7924, "nll_loss": 0.1981056034564972, "rewards/accuracies": 1.0, "rewards/chosen": -2.8529530027299188e-05, "rewards/margins": 0.23474730551242828, "rewards/rejected": -0.23477581143379211, "step": 7328 }, { "epoch": 5.068464730290456, "grad_norm": 10.248303413391113, "learning_rate": 2.7397418165053025e-05, "log_odds_chosen": 10.010307312011719, "log_odds_ratio": -0.0003728670999407768, "logits/chosen": -0.05163934826850891, "logits/rejected": -0.262073278427124, "logps/chosen": -0.0023998182732611895, "logps/rejected": -2.346627712249756, "loss": 0.9532, "nll_loss": 0.23825368285179138, "rewards/accuracies": 1.0, "rewards/chosen": -0.0002399818185949698, "rewards/margins": 0.23442280292510986, "rewards/rejected": -0.23466277122497559, "step": 7329 }, { "epoch": 5.069156293222683, "grad_norm": 8.726041793823242, "learning_rate": 2.7393576148762874e-05, "log_odds_chosen": 10.123150825500488, "log_odds_ratio": -6.815577944507822e-05, "logits/chosen": -0.5572763085365295, "logits/rejected": -0.5811817646026611, "logps/chosen": -0.00027071969816461205, "logps/rejected": -1.9652814865112305, "loss": 0.6151, "nll_loss": 0.15378038585186005, "rewards/accuracies": 1.0, "rewards/chosen": -2.707197199924849e-05, "rewards/margins": 0.19650107622146606, "rewards/rejected": -0.19652815163135529, "step": 7330 }, { "epoch": 5.06984785615491, "grad_norm": 6.076335430145264, "learning_rate": 2.7389734132472723e-05, "log_odds_chosen": 10.358654975891113, "log_odds_ratio": -0.0001471362920710817, "logits/chosen": -0.6471813917160034, "logits/rejected": -0.7003373503684998, "logps/chosen": -0.0006569478427991271, "logps/rejected": -1.9787683486938477, "loss": 0.9182, "nll_loss": 0.22953800857067108, "rewards/accuracies": 1.0, "rewards/chosen": -6.569478864548728e-05, "rewards/margins": 0.19781114161014557, "rewards/rejected": -0.19787684082984924, "step": 7331 }, { "epoch": 5.070539419087137, "grad_norm": 8.848478317260742, "learning_rate": 2.7385892116182576e-05, "log_odds_chosen": 10.515992164611816, "log_odds_ratio": -8.050731412367895e-05, "logits/chosen": -0.7456662058830261, "logits/rejected": -0.645315945148468, "logps/chosen": -0.0002744827070273459, "logps/rejected": -1.9360021352767944, "loss": 1.0445, "nll_loss": 0.2611117362976074, "rewards/accuracies": 1.0, "rewards/chosen": -2.7448269975138828e-05, "rewards/margins": 0.1935727596282959, "rewards/rejected": -0.19360020756721497, "step": 7332 }, { "epoch": 5.0712309820193635, "grad_norm": 6.7651472091674805, "learning_rate": 2.7382050099892425e-05, "log_odds_chosen": 9.035691261291504, "log_odds_ratio": -0.0013945155078545213, "logits/chosen": -0.1447266787290573, "logits/rejected": -0.22644981741905212, "logps/chosen": -0.0022594670299440622, "logps/rejected": -1.7893898487091064, "loss": 0.7243, "nll_loss": 0.18092525005340576, "rewards/accuracies": 1.0, "rewards/chosen": -0.0002259467146359384, "rewards/margins": 0.17871303856372833, "rewards/rejected": -0.17893898487091064, "step": 7333 }, { "epoch": 5.07192254495159, "grad_norm": 9.09512996673584, "learning_rate": 2.7378208083602274e-05, "log_odds_chosen": 11.089442253112793, "log_odds_ratio": -2.5343755623907782e-05, "logits/chosen": -0.3498130440711975, "logits/rejected": -0.3135882019996643, "logps/chosen": -0.00015191901184152812, "logps/rejected": -2.361804485321045, "loss": 0.9629, "nll_loss": 0.24072042107582092, "rewards/accuracies": 1.0, "rewards/chosen": -1.5191902093647514e-05, "rewards/margins": 0.23616525530815125, "rewards/rejected": -0.23618043959140778, "step": 7334 }, { "epoch": 5.072614107883817, "grad_norm": 8.32583999633789, "learning_rate": 2.737436606731213e-05, "log_odds_chosen": 9.538285255432129, "log_odds_ratio": -0.0002937999670393765, "logits/chosen": -0.7607426047325134, "logits/rejected": -0.8624207973480225, "logps/chosen": -0.0005625184276141226, "logps/rejected": -1.5791518688201904, "loss": 0.7533, "nll_loss": 0.1882893443107605, "rewards/accuracies": 1.0, "rewards/chosen": -5.625184712698683e-05, "rewards/margins": 0.1578589528799057, "rewards/rejected": -0.15791520476341248, "step": 7335 }, { "epoch": 5.073305670816044, "grad_norm": 9.384604454040527, "learning_rate": 2.737052405102198e-05, "log_odds_chosen": 9.253545761108398, "log_odds_ratio": -0.006250377744436264, "logits/chosen": -0.2119435966014862, "logits/rejected": -0.2176445722579956, "logps/chosen": -0.004084007814526558, "logps/rejected": -1.9495849609375, "loss": 0.8884, "nll_loss": 0.22148054838180542, "rewards/accuracies": 1.0, "rewards/chosen": -0.00040840081055648625, "rewards/margins": 0.19455008208751678, "rewards/rejected": -0.19495847821235657, "step": 7336 }, { "epoch": 5.073997233748271, "grad_norm": 6.155551433563232, "learning_rate": 2.7366682034731828e-05, "log_odds_chosen": 11.532613754272461, "log_odds_ratio": -2.104714985762257e-05, "logits/chosen": -0.16297248005867004, "logits/rejected": -0.21947115659713745, "logps/chosen": -8.627733041066676e-05, "logps/rejected": -2.2461464405059814, "loss": 0.843, "nll_loss": 0.21074417233467102, "rewards/accuracies": 1.0, "rewards/chosen": -8.627734132460319e-06, "rewards/margins": 0.22460602223873138, "rewards/rejected": -0.22461465001106262, "step": 7337 }, { "epoch": 5.074688796680498, "grad_norm": 4.333896636962891, "learning_rate": 2.7362840018441684e-05, "log_odds_chosen": 11.221064567565918, "log_odds_ratio": -4.2140080040553585e-05, "logits/chosen": -0.5966606140136719, "logits/rejected": -0.6740995049476624, "logps/chosen": -0.00012577198504004627, "logps/rejected": -2.4205164909362793, "loss": 0.6636, "nll_loss": 0.1658967137336731, "rewards/accuracies": 1.0, "rewards/chosen": -1.2577198504004627e-05, "rewards/margins": 0.24203908443450928, "rewards/rejected": -0.2420516312122345, "step": 7338 }, { "epoch": 5.0753803596127245, "grad_norm": 7.258881092071533, "learning_rate": 2.7358998002151533e-05, "log_odds_chosen": 9.731328964233398, "log_odds_ratio": -0.0002475330838933587, "logits/chosen": -0.43540024757385254, "logits/rejected": -0.541413426399231, "logps/chosen": -0.0006951102986931801, "logps/rejected": -1.5809580087661743, "loss": 0.8467, "nll_loss": 0.21164801716804504, "rewards/accuracies": 1.0, "rewards/chosen": -6.951102841412649e-05, "rewards/margins": 0.1580262929201126, "rewards/rejected": -0.1580958068370819, "step": 7339 }, { "epoch": 5.076071922544951, "grad_norm": 11.20781135559082, "learning_rate": 2.7355155985861382e-05, "log_odds_chosen": 10.96607780456543, "log_odds_ratio": -5.328706538421102e-05, "logits/chosen": -0.6485635638237, "logits/rejected": -0.6323709487915039, "logps/chosen": -0.0002674778224900365, "logps/rejected": -2.192117691040039, "loss": 0.9264, "nll_loss": 0.23160181939601898, "rewards/accuracies": 1.0, "rewards/chosen": -2.6747784431790933e-05, "rewards/margins": 0.2191850244998932, "rewards/rejected": -0.21921177208423615, "step": 7340 }, { "epoch": 5.076763485477178, "grad_norm": 8.58651351928711, "learning_rate": 2.7351313969571234e-05, "log_odds_chosen": 10.983704566955566, "log_odds_ratio": -6.344070425257087e-05, "logits/chosen": -0.5228769779205322, "logits/rejected": -0.5102678537368774, "logps/chosen": -0.0003842473088297993, "logps/rejected": -2.586697816848755, "loss": 0.5418, "nll_loss": 0.1354333907365799, "rewards/accuracies": 1.0, "rewards/chosen": -3.842473233817145e-05, "rewards/margins": 0.2586313486099243, "rewards/rejected": -0.25866979360580444, "step": 7341 }, { "epoch": 5.077455048409405, "grad_norm": 7.6569695472717285, "learning_rate": 2.7347471953281083e-05, "log_odds_chosen": 10.862194061279297, "log_odds_ratio": -2.9732314942521043e-05, "logits/chosen": -0.5924553871154785, "logits/rejected": -0.6317382454872131, "logps/chosen": -0.0003253734321333468, "logps/rejected": -2.4295787811279297, "loss": 0.6985, "nll_loss": 0.1746188998222351, "rewards/accuracies": 1.0, "rewards/chosen": -3.253734394093044e-05, "rewards/margins": 0.24292536079883575, "rewards/rejected": -0.24295789003372192, "step": 7342 }, { "epoch": 5.078146611341632, "grad_norm": 9.416374206542969, "learning_rate": 2.7343629936990932e-05, "log_odds_chosen": 9.677162170410156, "log_odds_ratio": -0.001104559632949531, "logits/chosen": -0.07008127868175507, "logits/rejected": -0.10235333442687988, "logps/chosen": -0.0016829818487167358, "logps/rejected": -2.11376953125, "loss": 0.9175, "nll_loss": 0.22925275564193726, "rewards/accuracies": 1.0, "rewards/chosen": -0.00016829819651320577, "rewards/margins": 0.21120867133140564, "rewards/rejected": -0.21137696504592896, "step": 7343 }, { "epoch": 5.078838174273859, "grad_norm": 6.895681858062744, "learning_rate": 2.733978792070078e-05, "log_odds_chosen": 8.807646751403809, "log_odds_ratio": -0.03008407913148403, "logits/chosen": -0.398873507976532, "logits/rejected": -0.3212706446647644, "logps/chosen": -0.008143252693116665, "logps/rejected": -1.5997668504714966, "loss": 0.7238, "nll_loss": 0.1779380440711975, "rewards/accuracies": 1.0, "rewards/chosen": -0.0008143253508023918, "rewards/margins": 0.15916235744953156, "rewards/rejected": -0.15997669100761414, "step": 7344 }, { "epoch": 5.0795297372060855, "grad_norm": 11.062602996826172, "learning_rate": 2.7335945904410637e-05, "log_odds_chosen": 10.790531158447266, "log_odds_ratio": -5.3668307373300195e-05, "logits/chosen": -0.5789155960083008, "logits/rejected": -0.5793518424034119, "logps/chosen": -0.00012543403136078268, "logps/rejected": -1.6924831867218018, "loss": 0.9506, "nll_loss": 0.23764421045780182, "rewards/accuracies": 1.0, "rewards/chosen": -1.2543401680886745e-05, "rewards/margins": 0.16923576593399048, "rewards/rejected": -0.1692483127117157, "step": 7345 }, { "epoch": 5.080221300138312, "grad_norm": 9.413389205932617, "learning_rate": 2.7332103888120486e-05, "log_odds_chosen": 10.895193099975586, "log_odds_ratio": -2.4430100893368945e-05, "logits/chosen": -0.7501481771469116, "logits/rejected": -0.772050142288208, "logps/chosen": -0.00023057861835695803, "logps/rejected": -2.3615636825561523, "loss": 0.9952, "nll_loss": 0.24878647923469543, "rewards/accuracies": 1.0, "rewards/chosen": -2.3057862563291565e-05, "rewards/margins": 0.23613335192203522, "rewards/rejected": -0.2361564040184021, "step": 7346 }, { "epoch": 5.080912863070539, "grad_norm": 7.451417446136475, "learning_rate": 2.7328261871830335e-05, "log_odds_chosen": 11.018407821655273, "log_odds_ratio": -3.6289315175963566e-05, "logits/chosen": -0.28731396794319153, "logits/rejected": -0.30185893177986145, "logps/chosen": -0.00014567398466169834, "logps/rejected": -2.0808353424072266, "loss": 0.905, "nll_loss": 0.22625084221363068, "rewards/accuracies": 1.0, "rewards/chosen": -1.4567398466169834e-05, "rewards/margins": 0.20806896686553955, "rewards/rejected": -0.20808354020118713, "step": 7347 }, { "epoch": 5.081604426002766, "grad_norm": 5.167357444763184, "learning_rate": 2.732441985554019e-05, "log_odds_chosen": 10.189430236816406, "log_odds_ratio": -0.00752821983769536, "logits/chosen": -0.004103332757949829, "logits/rejected": -0.07965162396430969, "logps/chosen": -0.005710722412914038, "logps/rejected": -2.303321361541748, "loss": 0.7793, "nll_loss": 0.19407445192337036, "rewards/accuracies": 1.0, "rewards/chosen": -0.0005710722180083394, "rewards/margins": 0.2297610640525818, "rewards/rejected": -0.230332151055336, "step": 7348 }, { "epoch": 5.082295988934993, "grad_norm": 7.182365417480469, "learning_rate": 2.732057783925004e-05, "log_odds_chosen": 11.068100929260254, "log_odds_ratio": -8.026784053072333e-05, "logits/chosen": -1.1031522750854492, "logits/rejected": -1.079147458076477, "logps/chosen": -0.00028857134748250246, "logps/rejected": -2.3670783042907715, "loss": 0.7401, "nll_loss": 0.18502512574195862, "rewards/accuracies": 1.0, "rewards/chosen": -2.8857135475846007e-05, "rewards/margins": 0.23667895793914795, "rewards/rejected": -0.23670782148838043, "step": 7349 }, { "epoch": 5.08298755186722, "grad_norm": 7.975959300994873, "learning_rate": 2.731673582295989e-05, "log_odds_chosen": 11.034915924072266, "log_odds_ratio": -2.3781507479725406e-05, "logits/chosen": -0.046159759163856506, "logits/rejected": -0.14203815162181854, "logps/chosen": -0.00022653871565125883, "logps/rejected": -2.30277419090271, "loss": 0.9624, "nll_loss": 0.24060288071632385, "rewards/accuracies": 1.0, "rewards/chosen": -2.265387047373224e-05, "rewards/margins": 0.23025476932525635, "rewards/rejected": -0.230277419090271, "step": 7350 }, { "epoch": 5.0836791147994465, "grad_norm": 6.001211643218994, "learning_rate": 2.7312893806669742e-05, "log_odds_chosen": 8.615678787231445, "log_odds_ratio": -0.0018858063267543912, "logits/chosen": -0.41074180603027344, "logits/rejected": -0.4159555733203888, "logps/chosen": -0.002324719214811921, "logps/rejected": -1.4358938932418823, "loss": 0.831, "nll_loss": 0.20755891501903534, "rewards/accuracies": 1.0, "rewards/chosen": -0.00023247190983965993, "rewards/margins": 0.14335691928863525, "rewards/rejected": -0.14358939230442047, "step": 7351 }, { "epoch": 5.084370677731673, "grad_norm": 8.391213417053223, "learning_rate": 2.730905179037959e-05, "log_odds_chosen": 11.10920524597168, "log_odds_ratio": -9.447715274291113e-05, "logits/chosen": -0.3689385652542114, "logits/rejected": -0.40068185329437256, "logps/chosen": -0.006074007600545883, "logps/rejected": -3.171781063079834, "loss": 0.7202, "nll_loss": 0.18004505336284637, "rewards/accuracies": 1.0, "rewards/chosen": -0.0006074007251299918, "rewards/margins": 0.3165706992149353, "rewards/rejected": -0.31717807054519653, "step": 7352 }, { "epoch": 5.0850622406639, "grad_norm": 11.358319282531738, "learning_rate": 2.730520977408944e-05, "log_odds_chosen": 10.263338088989258, "log_odds_ratio": -0.0001083470560843125, "logits/chosen": -0.030622661113739014, "logits/rejected": -0.029181431978940964, "logps/chosen": -0.00030826477450318635, "logps/rejected": -1.6327803134918213, "loss": 0.6405, "nll_loss": 0.1601022183895111, "rewards/accuracies": 1.0, "rewards/chosen": -3.082647890551016e-05, "rewards/margins": 0.16324719786643982, "rewards/rejected": -0.1632780283689499, "step": 7353 }, { "epoch": 5.085753803596127, "grad_norm": 11.445735931396484, "learning_rate": 2.7301367757799296e-05, "log_odds_chosen": 9.003986358642578, "log_odds_ratio": -0.114555723965168, "logits/chosen": -0.5085049271583557, "logits/rejected": -0.3850405812263489, "logps/chosen": -0.01812593825161457, "logps/rejected": -2.1957643032073975, "loss": 1.3575, "nll_loss": 0.32793062925338745, "rewards/accuracies": 0.875, "rewards/chosen": -0.0018125936621800065, "rewards/margins": 0.21776384115219116, "rewards/rejected": -0.21957644820213318, "step": 7354 }, { "epoch": 5.086445366528354, "grad_norm": 5.323635101318359, "learning_rate": 2.7297525741509145e-05, "log_odds_chosen": 9.547505378723145, "log_odds_ratio": -0.00031736362143419683, "logits/chosen": -0.5309441685676575, "logits/rejected": -0.579699695110321, "logps/chosen": -0.00028090961859561503, "logps/rejected": -1.4435864686965942, "loss": 0.7717, "nll_loss": 0.19289781153202057, "rewards/accuracies": 1.0, "rewards/chosen": -2.8090962587157264e-05, "rewards/margins": 0.14433056116104126, "rewards/rejected": -0.14435866475105286, "step": 7355 }, { "epoch": 5.087136929460581, "grad_norm": 6.2045698165893555, "learning_rate": 2.7293683725218994e-05, "log_odds_chosen": 11.104674339294434, "log_odds_ratio": -3.998466127086431e-05, "logits/chosen": -0.5010840892791748, "logits/rejected": -0.5729426145553589, "logps/chosen": -0.00047244172310456634, "logps/rejected": -2.5478477478027344, "loss": 0.8783, "nll_loss": 0.21957121789455414, "rewards/accuracies": 1.0, "rewards/chosen": -4.724417158286087e-05, "rewards/margins": 0.25473752617836, "rewards/rejected": -0.2547847628593445, "step": 7356 }, { "epoch": 5.087828492392807, "grad_norm": 10.404763221740723, "learning_rate": 2.728984170892885e-05, "log_odds_chosen": 8.978584289550781, "log_odds_ratio": -0.023966118693351746, "logits/chosen": -0.5867160558700562, "logits/rejected": -0.536573588848114, "logps/chosen": -0.010090148076415062, "logps/rejected": -1.3400592803955078, "loss": 0.7064, "nll_loss": 0.17420047521591187, "rewards/accuracies": 1.0, "rewards/chosen": -0.0010090150171890855, "rewards/margins": 0.13299691677093506, "rewards/rejected": -0.13400593400001526, "step": 7357 }, { "epoch": 5.088520055325034, "grad_norm": 5.697388172149658, "learning_rate": 2.72859996926387e-05, "log_odds_chosen": 10.12976360321045, "log_odds_ratio": -0.0003553791902959347, "logits/chosen": -0.4104516804218292, "logits/rejected": -0.41303762793540955, "logps/chosen": -0.0001722878951113671, "logps/rejected": -1.5895068645477295, "loss": 0.8449, "nll_loss": 0.2111791968345642, "rewards/accuracies": 1.0, "rewards/chosen": -1.722878914733883e-05, "rewards/margins": 0.15893347561359406, "rewards/rejected": -0.15895068645477295, "step": 7358 }, { "epoch": 5.089211618257261, "grad_norm": 7.838433265686035, "learning_rate": 2.7282157676348548e-05, "log_odds_chosen": 10.014269828796387, "log_odds_ratio": -0.0005774472956545651, "logits/chosen": -0.841597855091095, "logits/rejected": -0.8463683128356934, "logps/chosen": -0.00038939566002227366, "logps/rejected": -1.616848111152649, "loss": 1.211, "nll_loss": 0.30269354581832886, "rewards/accuracies": 1.0, "rewards/chosen": -3.893956454703584e-05, "rewards/margins": 0.16164587438106537, "rewards/rejected": -0.1616848260164261, "step": 7359 }, { "epoch": 5.089903181189488, "grad_norm": 10.013843536376953, "learning_rate": 2.72783156600584e-05, "log_odds_chosen": 9.890030860900879, "log_odds_ratio": -0.0004454090667422861, "logits/chosen": -0.6855981945991516, "logits/rejected": -0.7428559064865112, "logps/chosen": -0.000614183722063899, "logps/rejected": -1.476334571838379, "loss": 0.8343, "nll_loss": 0.20853474736213684, "rewards/accuracies": 1.0, "rewards/chosen": -6.141837366158143e-05, "rewards/margins": 0.14757204055786133, "rewards/rejected": -0.14763344824314117, "step": 7360 }, { "epoch": 5.090594744121715, "grad_norm": 11.054221153259277, "learning_rate": 2.727447364376825e-05, "log_odds_chosen": 9.4026517868042, "log_odds_ratio": -0.00012418953701853752, "logits/chosen": -0.2300967276096344, "logits/rejected": -0.28243911266326904, "logps/chosen": -0.0014360551722347736, "logps/rejected": -1.9370076656341553, "loss": 0.6715, "nll_loss": 0.1678735762834549, "rewards/accuracies": 1.0, "rewards/chosen": -0.0001436055317753926, "rewards/margins": 0.19355714321136475, "rewards/rejected": -0.19370076060295105, "step": 7361 }, { "epoch": 5.091286307053942, "grad_norm": 5.798583984375, "learning_rate": 2.72706316274781e-05, "log_odds_chosen": 10.562169075012207, "log_odds_ratio": -0.0001344050106126815, "logits/chosen": -0.6078465580940247, "logits/rejected": -0.6434661149978638, "logps/chosen": -0.000248884956818074, "logps/rejected": -2.1479055881500244, "loss": 0.6105, "nll_loss": 0.15260586142539978, "rewards/accuracies": 1.0, "rewards/chosen": -2.4888498955988325e-05, "rewards/margins": 0.21476566791534424, "rewards/rejected": -0.21479055285453796, "step": 7362 }, { "epoch": 5.091977869986168, "grad_norm": 17.576988220214844, "learning_rate": 2.7266789611187954e-05, "log_odds_chosen": 9.467916488647461, "log_odds_ratio": -0.0028660153038799763, "logits/chosen": -0.6397823691368103, "logits/rejected": -0.6845981478691101, "logps/chosen": -0.022505810484290123, "logps/rejected": -2.307908773422241, "loss": 0.9296, "nll_loss": 0.23210205137729645, "rewards/accuracies": 1.0, "rewards/chosen": -0.002250581281259656, "rewards/margins": 0.228540301322937, "rewards/rejected": -0.2307908833026886, "step": 7363 }, { "epoch": 5.092669432918395, "grad_norm": 7.059711933135986, "learning_rate": 2.7262947594897803e-05, "log_odds_chosen": 11.390132904052734, "log_odds_ratio": -3.355491207912564e-05, "logits/chosen": -0.42600852251052856, "logits/rejected": -0.4258362650871277, "logps/chosen": -0.013769086450338364, "logps/rejected": -3.572200298309326, "loss": 0.6484, "nll_loss": 0.16209310293197632, "rewards/accuracies": 1.0, "rewards/chosen": -0.0013769087381660938, "rewards/margins": 0.35584312677383423, "rewards/rejected": -0.35722002387046814, "step": 7364 }, { "epoch": 5.093360995850622, "grad_norm": 6.885730266571045, "learning_rate": 2.7259105578607652e-05, "log_odds_chosen": 11.165336608886719, "log_odds_ratio": -2.7307323762215674e-05, "logits/chosen": -0.5768116116523743, "logits/rejected": -0.5612725019454956, "logps/chosen": -0.00014302022464107722, "logps/rejected": -2.2993600368499756, "loss": 0.6686, "nll_loss": 0.16714143753051758, "rewards/accuracies": 1.0, "rewards/chosen": -1.4302024283097126e-05, "rewards/margins": 0.22992171347141266, "rewards/rejected": -0.22993600368499756, "step": 7365 }, { "epoch": 5.094052558782849, "grad_norm": 5.898820877075195, "learning_rate": 2.7255263562317508e-05, "log_odds_chosen": 9.465486526489258, "log_odds_ratio": -0.00036768452264368534, "logits/chosen": -0.23210309445858002, "logits/rejected": -0.25932738184928894, "logps/chosen": -0.0003661748196464032, "logps/rejected": -1.7317314147949219, "loss": 0.7547, "nll_loss": 0.1886340081691742, "rewards/accuracies": 1.0, "rewards/chosen": -3.661748269223608e-05, "rewards/margins": 0.17313653230667114, "rewards/rejected": -0.17317314445972443, "step": 7366 }, { "epoch": 5.094744121715076, "grad_norm": 5.994511127471924, "learning_rate": 2.7251421546027357e-05, "log_odds_chosen": 10.102039337158203, "log_odds_ratio": -0.00019134912872686982, "logits/chosen": -0.47489118576049805, "logits/rejected": -0.5151061415672302, "logps/chosen": -0.0013110683066770434, "logps/rejected": -2.2649006843566895, "loss": 1.1379, "nll_loss": 0.28446272015571594, "rewards/accuracies": 1.0, "rewards/chosen": -0.00013110681902617216, "rewards/margins": 0.22635895013809204, "rewards/rejected": -0.2264900803565979, "step": 7367 }, { "epoch": 5.095435684647303, "grad_norm": 8.04350757598877, "learning_rate": 2.7247579529737206e-05, "log_odds_chosen": 11.359281539916992, "log_odds_ratio": -4.83084877487272e-05, "logits/chosen": -0.5122644901275635, "logits/rejected": -0.5524216890335083, "logps/chosen": -0.00036712043220177293, "logps/rejected": -2.3471951484680176, "loss": 1.5251, "nll_loss": 0.38128161430358887, "rewards/accuracies": 1.0, "rewards/chosen": -3.6712044675368816e-05, "rewards/margins": 0.23468279838562012, "rewards/rejected": -0.23471948504447937, "step": 7368 }, { "epoch": 5.096127247579529, "grad_norm": 5.575632095336914, "learning_rate": 2.724373751344706e-05, "log_odds_chosen": 10.116594314575195, "log_odds_ratio": -0.0032851833384484053, "logits/chosen": -0.2861647605895996, "logits/rejected": -0.35802027583122253, "logps/chosen": -0.002376972232013941, "logps/rejected": -2.0754401683807373, "loss": 1.079, "nll_loss": 0.26942795515060425, "rewards/accuracies": 1.0, "rewards/chosen": -0.0002376972115598619, "rewards/margins": 0.20730631053447723, "rewards/rejected": -0.2075439989566803, "step": 7369 }, { "epoch": 5.096818810511756, "grad_norm": 9.367115020751953, "learning_rate": 2.7239895497156908e-05, "log_odds_chosen": 10.316278457641602, "log_odds_ratio": -0.00023210124345496297, "logits/chosen": -0.25167056918144226, "logits/rejected": -0.30842748284339905, "logps/chosen": -0.0013250727206468582, "logps/rejected": -2.0884361267089844, "loss": 0.7695, "nll_loss": 0.1923428624868393, "rewards/accuracies": 1.0, "rewards/chosen": -0.00013250726624391973, "rewards/margins": 0.20871111750602722, "rewards/rejected": -0.20884361863136292, "step": 7370 }, { "epoch": 5.097510373443983, "grad_norm": 8.676201820373535, "learning_rate": 2.7236053480866757e-05, "log_odds_chosen": 10.798030853271484, "log_odds_ratio": -2.836514249793254e-05, "logits/chosen": -0.15103256702423096, "logits/rejected": -0.24257981777191162, "logps/chosen": -0.00027271793805994093, "logps/rejected": -2.4594173431396484, "loss": 0.9249, "nll_loss": 0.2312135100364685, "rewards/accuracies": 1.0, "rewards/chosen": -2.7271795261185616e-05, "rewards/margins": 0.24591445922851562, "rewards/rejected": -0.24594172835350037, "step": 7371 }, { "epoch": 5.09820193637621, "grad_norm": 6.839383602142334, "learning_rate": 2.7232211464576613e-05, "log_odds_chosen": 10.274839401245117, "log_odds_ratio": -7.13355912012048e-05, "logits/chosen": -0.4008041322231293, "logits/rejected": -0.5498058795928955, "logps/chosen": -0.00037555681774392724, "logps/rejected": -2.326822280883789, "loss": 0.9025, "nll_loss": 0.22562111914157867, "rewards/accuracies": 1.0, "rewards/chosen": -3.755568468477577e-05, "rewards/margins": 0.2326447069644928, "rewards/rejected": -0.2326822429895401, "step": 7372 }, { "epoch": 5.098893499308437, "grad_norm": 5.040561676025391, "learning_rate": 2.722836944828646e-05, "log_odds_chosen": 10.802556991577148, "log_odds_ratio": -4.267587428330444e-05, "logits/chosen": -0.7771573066711426, "logits/rejected": -0.7587347030639648, "logps/chosen": -0.0003114896244369447, "logps/rejected": -2.4623873233795166, "loss": 0.9528, "nll_loss": 0.23819348216056824, "rewards/accuracies": 1.0, "rewards/chosen": -3.1148963898885995e-05, "rewards/margins": 0.246207594871521, "rewards/rejected": -0.24623876810073853, "step": 7373 }, { "epoch": 5.0995850622406635, "grad_norm": 8.665498733520508, "learning_rate": 2.722452743199631e-05, "log_odds_chosen": 10.829631805419922, "log_odds_ratio": -7.544008258264512e-05, "logits/chosen": -0.11957578361034393, "logits/rejected": -0.16547317802906036, "logps/chosen": -0.00036867347080260515, "logps/rejected": -2.133676052093506, "loss": 0.7278, "nll_loss": 0.181938037276268, "rewards/accuracies": 1.0, "rewards/chosen": -3.6867346352664754e-05, "rewards/margins": 0.21333074569702148, "rewards/rejected": -0.21336761116981506, "step": 7374 }, { "epoch": 5.10027662517289, "grad_norm": 6.870170593261719, "learning_rate": 2.7220685415706167e-05, "log_odds_chosen": 11.019067764282227, "log_odds_ratio": -4.180756150162779e-05, "logits/chosen": -0.6265878081321716, "logits/rejected": -0.6470605134963989, "logps/chosen": -0.0006792093627154827, "logps/rejected": -2.668718099594116, "loss": 0.6031, "nll_loss": 0.1507745087146759, "rewards/accuracies": 1.0, "rewards/chosen": -6.792093336116523e-05, "rewards/margins": 0.26680392026901245, "rewards/rejected": -0.2668718099594116, "step": 7375 }, { "epoch": 5.100968188105117, "grad_norm": 5.347779273986816, "learning_rate": 2.7216843399416016e-05, "log_odds_chosen": 9.326358795166016, "log_odds_ratio": -0.000986828817985952, "logits/chosen": -0.5040815472602844, "logits/rejected": -0.5364580750465393, "logps/chosen": -0.0005646024364978075, "logps/rejected": -1.3842899799346924, "loss": 0.74, "nll_loss": 0.1848924309015274, "rewards/accuracies": 1.0, "rewards/chosen": -5.646024510497227e-05, "rewards/margins": 0.138372540473938, "rewards/rejected": -0.13842900097370148, "step": 7376 }, { "epoch": 5.101659751037344, "grad_norm": 7.426041126251221, "learning_rate": 2.7213001383125865e-05, "log_odds_chosen": 11.608118057250977, "log_odds_ratio": -1.4023098628968e-05, "logits/chosen": -0.6992658972740173, "logits/rejected": -0.7811527252197266, "logps/chosen": -0.0001724398462101817, "logps/rejected": -2.5273571014404297, "loss": 0.6181, "nll_loss": 0.15453001856803894, "rewards/accuracies": 1.0, "rewards/chosen": -1.7243986803805456e-05, "rewards/margins": 0.252718448638916, "rewards/rejected": -0.2527356743812561, "step": 7377 }, { "epoch": 5.102351313969571, "grad_norm": 5.961256980895996, "learning_rate": 2.7209159366835717e-05, "log_odds_chosen": 9.610187530517578, "log_odds_ratio": -0.0005559841520152986, "logits/chosen": -0.3288503587245941, "logits/rejected": -0.42061495780944824, "logps/chosen": -0.0010864774230867624, "logps/rejected": -2.516143321990967, "loss": 0.7354, "nll_loss": 0.1837823987007141, "rewards/accuracies": 1.0, "rewards/chosen": -0.00010864774230867624, "rewards/margins": 0.25150567293167114, "rewards/rejected": -0.2516143321990967, "step": 7378 }, { "epoch": 5.103042876901798, "grad_norm": 7.212077617645264, "learning_rate": 2.7205317350545566e-05, "log_odds_chosen": 9.604665756225586, "log_odds_ratio": -0.0002615238190628588, "logits/chosen": -0.39613234996795654, "logits/rejected": -0.3910067081451416, "logps/chosen": -0.00038741217576898634, "logps/rejected": -1.587263584136963, "loss": 0.91, "nll_loss": 0.2274717539548874, "rewards/accuracies": 1.0, "rewards/chosen": -3.87412219424732e-05, "rewards/margins": 0.15868760645389557, "rewards/rejected": -0.15872636437416077, "step": 7379 }, { "epoch": 5.1037344398340245, "grad_norm": 7.643638610839844, "learning_rate": 2.7201475334255415e-05, "log_odds_chosen": 10.92262077331543, "log_odds_ratio": -8.597999112680554e-05, "logits/chosen": -0.23542064428329468, "logits/rejected": -0.249238058924675, "logps/chosen": -0.00019922290812246501, "logps/rejected": -2.349911689758301, "loss": 0.6484, "nll_loss": 0.1620965451002121, "rewards/accuracies": 1.0, "rewards/chosen": -1.9922288629459217e-05, "rewards/margins": 0.23497125506401062, "rewards/rejected": -0.234991192817688, "step": 7380 }, { "epoch": 5.104426002766251, "grad_norm": 9.340903282165527, "learning_rate": 2.719763331796527e-05, "log_odds_chosen": 12.061431884765625, "log_odds_ratio": -2.306019450770691e-05, "logits/chosen": -0.42079007625579834, "logits/rejected": -0.5221748948097229, "logps/chosen": -0.00023580492415931076, "logps/rejected": -3.28064227104187, "loss": 1.0093, "nll_loss": 0.25232964754104614, "rewards/accuracies": 1.0, "rewards/chosen": -2.3580492779728957e-05, "rewards/margins": 0.32804062962532043, "rewards/rejected": -0.3280642330646515, "step": 7381 }, { "epoch": 5.105117565698478, "grad_norm": 6.661879062652588, "learning_rate": 2.719379130167512e-05, "log_odds_chosen": 10.242103576660156, "log_odds_ratio": -4.760442243423313e-05, "logits/chosen": -0.4171237349510193, "logits/rejected": -0.3926810622215271, "logps/chosen": -0.00015924654144328088, "logps/rejected": -1.5589709281921387, "loss": 0.5406, "nll_loss": 0.13514304161071777, "rewards/accuracies": 1.0, "rewards/chosen": -1.592465559951961e-05, "rewards/margins": 0.15588116645812988, "rewards/rejected": -0.1558970957994461, "step": 7382 }, { "epoch": 5.105809128630705, "grad_norm": 9.575935363769531, "learning_rate": 2.718994928538497e-05, "log_odds_chosen": 11.292200088500977, "log_odds_ratio": -3.7497273297049105e-05, "logits/chosen": -0.48665758967399597, "logits/rejected": -0.5950102806091309, "logps/chosen": -0.0019470170373097062, "logps/rejected": -2.7821574211120605, "loss": 1.4374, "nll_loss": 0.35933566093444824, "rewards/accuracies": 1.0, "rewards/chosen": -0.00019470170082058758, "rewards/margins": 0.27802103757858276, "rewards/rejected": -0.2782157361507416, "step": 7383 }, { "epoch": 5.106500691562932, "grad_norm": 6.445347785949707, "learning_rate": 2.7186107269094825e-05, "log_odds_chosen": 10.10672378540039, "log_odds_ratio": -6.40135767753236e-05, "logits/chosen": -0.3899781405925751, "logits/rejected": -0.4565275311470032, "logps/chosen": -0.00048476149095222354, "logps/rejected": -2.0757288932800293, "loss": 0.617, "nll_loss": 0.15423867106437683, "rewards/accuracies": 1.0, "rewards/chosen": -4.847614764003083e-05, "rewards/margins": 0.20752441883087158, "rewards/rejected": -0.20757289230823517, "step": 7384 }, { "epoch": 5.107192254495159, "grad_norm": 7.290511131286621, "learning_rate": 2.7182265252804674e-05, "log_odds_chosen": 10.069574356079102, "log_odds_ratio": -0.00016815030539873987, "logits/chosen": -0.5361185073852539, "logits/rejected": -0.5250629186630249, "logps/chosen": -0.0002581964072305709, "logps/rejected": -1.5850675106048584, "loss": 0.5924, "nll_loss": 0.14807942509651184, "rewards/accuracies": 1.0, "rewards/chosen": -2.5819641450652853e-05, "rewards/margins": 0.1584809273481369, "rewards/rejected": -0.15850675106048584, "step": 7385 }, { "epoch": 5.1078838174273855, "grad_norm": 7.795459270477295, "learning_rate": 2.7178423236514523e-05, "log_odds_chosen": 10.93010139465332, "log_odds_ratio": -4.356444696895778e-05, "logits/chosen": -0.19619879126548767, "logits/rejected": -0.21324469149112701, "logps/chosen": -0.0002850884629879147, "logps/rejected": -2.712125539779663, "loss": 1.3588, "nll_loss": 0.33969220519065857, "rewards/accuracies": 1.0, "rewards/chosen": -2.8508844479802065e-05, "rewards/margins": 0.2711840271949768, "rewards/rejected": -0.2712125778198242, "step": 7386 }, { "epoch": 5.108575380359612, "grad_norm": 5.70391845703125, "learning_rate": 2.7174581220224376e-05, "log_odds_chosen": 10.274075508117676, "log_odds_ratio": -0.00015871970390435308, "logits/chosen": -0.36832761764526367, "logits/rejected": -0.3919983506202698, "logps/chosen": -0.00020493712509050965, "logps/rejected": -1.8412601947784424, "loss": 0.4894, "nll_loss": 0.12234241515398026, "rewards/accuracies": 1.0, "rewards/chosen": -2.049371505563613e-05, "rewards/margins": 0.1841055452823639, "rewards/rejected": -0.18412603437900543, "step": 7387 }, { "epoch": 5.109266943291839, "grad_norm": 68.21056365966797, "learning_rate": 2.7170739203934225e-05, "log_odds_chosen": 8.288837432861328, "log_odds_ratio": -0.08500274270772934, "logits/chosen": -0.46375998854637146, "logits/rejected": -0.4677826464176178, "logps/chosen": -0.012799981981515884, "logps/rejected": -1.4783885478973389, "loss": 0.9068, "nll_loss": 0.21818780899047852, "rewards/accuracies": 1.0, "rewards/chosen": -0.0012799982214346528, "rewards/margins": 0.14655886590480804, "rewards/rejected": -0.14783886075019836, "step": 7388 }, { "epoch": 5.109958506224066, "grad_norm": 5.482029438018799, "learning_rate": 2.7166897187644074e-05, "log_odds_chosen": 10.815271377563477, "log_odds_ratio": -4.882266148342751e-05, "logits/chosen": -0.7241552472114563, "logits/rejected": -0.8382655382156372, "logps/chosen": -0.00020358421897981316, "logps/rejected": -2.069589138031006, "loss": 0.6462, "nll_loss": 0.16153374314308167, "rewards/accuracies": 1.0, "rewards/chosen": -2.0358422261779197e-05, "rewards/margins": 0.20693853497505188, "rewards/rejected": -0.20695888996124268, "step": 7389 }, { "epoch": 5.110650069156293, "grad_norm": 6.103107929229736, "learning_rate": 2.716305517135393e-05, "log_odds_chosen": 9.955148696899414, "log_odds_ratio": -0.0003933612897526473, "logits/chosen": -0.18130913376808167, "logits/rejected": -0.1627679169178009, "logps/chosen": -0.0005892410408705473, "logps/rejected": -1.9700955152511597, "loss": 0.4972, "nll_loss": 0.12425161898136139, "rewards/accuracies": 1.0, "rewards/chosen": -5.892410263186321e-05, "rewards/margins": 0.19695061445236206, "rewards/rejected": -0.19700954854488373, "step": 7390 }, { "epoch": 5.11134163208852, "grad_norm": 11.400738716125488, "learning_rate": 2.715921315506378e-05, "log_odds_chosen": 10.500256538391113, "log_odds_ratio": -8.448248263448477e-05, "logits/chosen": -0.44659101963043213, "logits/rejected": -0.5452315211296082, "logps/chosen": -0.00030397262889891863, "logps/rejected": -2.122675657272339, "loss": 0.6142, "nll_loss": 0.15353217720985413, "rewards/accuracies": 1.0, "rewards/chosen": -3.0397262889891863e-05, "rewards/margins": 0.212237149477005, "rewards/rejected": -0.21226757764816284, "step": 7391 }, { "epoch": 5.1120331950207465, "grad_norm": 9.719880104064941, "learning_rate": 2.7155371138773628e-05, "log_odds_chosen": 10.642242431640625, "log_odds_ratio": -3.789024049183354e-05, "logits/chosen": -0.05491916835308075, "logits/rejected": -0.18953341245651245, "logps/chosen": -0.004866994917392731, "logps/rejected": -3.1003847122192383, "loss": 0.9004, "nll_loss": 0.22509226202964783, "rewards/accuracies": 1.0, "rewards/chosen": -0.0004866994568146765, "rewards/margins": 0.30955177545547485, "rewards/rejected": -0.3100384771823883, "step": 7392 }, { "epoch": 5.112724757952973, "grad_norm": 7.443755149841309, "learning_rate": 2.7151529122483483e-05, "log_odds_chosen": 10.646068572998047, "log_odds_ratio": -4.681744030676782e-05, "logits/chosen": -0.2412424087524414, "logits/rejected": -0.2810656428337097, "logps/chosen": -0.0002580955915618688, "logps/rejected": -1.8962551355361938, "loss": 0.7401, "nll_loss": 0.185030996799469, "rewards/accuracies": 1.0, "rewards/chosen": -2.58095606113784e-05, "rewards/margins": 0.18959970772266388, "rewards/rejected": -0.18962550163269043, "step": 7393 }, { "epoch": 5.1134163208852, "grad_norm": 13.804279327392578, "learning_rate": 2.7147687106193332e-05, "log_odds_chosen": 9.380505561828613, "log_odds_ratio": -0.003324545454233885, "logits/chosen": -0.2428872287273407, "logits/rejected": -0.3313140869140625, "logps/chosen": -0.0022368980571627617, "logps/rejected": -2.006077766418457, "loss": 0.9655, "nll_loss": 0.24104931950569153, "rewards/accuracies": 1.0, "rewards/chosen": -0.0002236898144474253, "rewards/margins": 0.20038409531116486, "rewards/rejected": -0.2006077766418457, "step": 7394 }, { "epoch": 5.114107883817427, "grad_norm": 9.437492370605469, "learning_rate": 2.714384508990318e-05, "log_odds_chosen": 11.143658638000488, "log_odds_ratio": -0.0001585199497640133, "logits/chosen": -0.45222657918930054, "logits/rejected": -0.5829071998596191, "logps/chosen": -0.00032591738272458315, "logps/rejected": -2.8368942737579346, "loss": 0.6586, "nll_loss": 0.16463643312454224, "rewards/accuracies": 1.0, "rewards/chosen": -3.2591739000054076e-05, "rewards/margins": 0.2836568355560303, "rewards/rejected": -0.2836894392967224, "step": 7395 }, { "epoch": 5.114799446749654, "grad_norm": 6.6503753662109375, "learning_rate": 2.7140003073613034e-05, "log_odds_chosen": 9.678725242614746, "log_odds_ratio": -0.00023803582007531077, "logits/chosen": -0.3980298638343811, "logits/rejected": -0.35562199354171753, "logps/chosen": -0.0006691630696877837, "logps/rejected": -1.5765386819839478, "loss": 1.2738, "nll_loss": 0.31843432784080505, "rewards/accuracies": 1.0, "rewards/chosen": -6.691631278954446e-05, "rewards/margins": 0.157586932182312, "rewards/rejected": -0.15765386819839478, "step": 7396 }, { "epoch": 5.115491009681881, "grad_norm": 5.985082626342773, "learning_rate": 2.7136161057322883e-05, "log_odds_chosen": 9.743041038513184, "log_odds_ratio": -0.0058269198052585125, "logits/chosen": -0.20725753903388977, "logits/rejected": -0.20159000158309937, "logps/chosen": -0.03767280653119087, "logps/rejected": -1.575268030166626, "loss": 0.8821, "nll_loss": 0.2199416160583496, "rewards/accuracies": 1.0, "rewards/chosen": -0.0037672806065529585, "rewards/margins": 0.15375953912734985, "rewards/rejected": -0.15752682089805603, "step": 7397 }, { "epoch": 5.1161825726141075, "grad_norm": 9.815032958984375, "learning_rate": 2.7132319041032732e-05, "log_odds_chosen": 9.699831008911133, "log_odds_ratio": -0.0003538834862411022, "logits/chosen": -0.39152687788009644, "logits/rejected": -0.4046846032142639, "logps/chosen": -0.0007243537111207843, "logps/rejected": -1.5959980487823486, "loss": 0.8071, "nll_loss": 0.20174673199653625, "rewards/accuracies": 1.0, "rewards/chosen": -7.243537402246147e-05, "rewards/margins": 0.15952739119529724, "rewards/rejected": -0.15959982573986053, "step": 7398 }, { "epoch": 5.116874135546334, "grad_norm": 7.80418062210083, "learning_rate": 2.7128477024742588e-05, "log_odds_chosen": 9.415637969970703, "log_odds_ratio": -0.00019955117022618651, "logits/chosen": -0.3784105181694031, "logits/rejected": -0.3838611841201782, "logps/chosen": -0.00038098107324913144, "logps/rejected": -1.6047699451446533, "loss": 0.5059, "nll_loss": 0.12646718323230743, "rewards/accuracies": 1.0, "rewards/chosen": -3.809811096289195e-05, "rewards/margins": 0.1604388952255249, "rewards/rejected": -0.16047699749469757, "step": 7399 }, { "epoch": 5.117565698478561, "grad_norm": 6.374980926513672, "learning_rate": 2.7124635008452437e-05, "log_odds_chosen": 10.56325626373291, "log_odds_ratio": -4.551166057353839e-05, "logits/chosen": 0.02074243128299713, "logits/rejected": -0.06978891789913177, "logps/chosen": -0.0002741274074651301, "logps/rejected": -2.0129191875457764, "loss": 0.5533, "nll_loss": 0.13831853866577148, "rewards/accuracies": 1.0, "rewards/chosen": -2.7412745112087578e-05, "rewards/margins": 0.20126450061798096, "rewards/rejected": -0.20129193365573883, "step": 7400 }, { "epoch": 5.118257261410788, "grad_norm": 11.832037925720215, "learning_rate": 2.7120792992162286e-05, "log_odds_chosen": 10.429668426513672, "log_odds_ratio": -0.0001599583774805069, "logits/chosen": -0.6950970888137817, "logits/rejected": -0.7039766311645508, "logps/chosen": -0.0002300713094882667, "logps/rejected": -1.8066484928131104, "loss": 0.5992, "nll_loss": 0.14977310597896576, "rewards/accuracies": 1.0, "rewards/chosen": -2.300713094882667e-05, "rewards/margins": 0.18064185976982117, "rewards/rejected": -0.18066485226154327, "step": 7401 }, { "epoch": 5.118948824343015, "grad_norm": 6.376369476318359, "learning_rate": 2.7116950975872142e-05, "log_odds_chosen": 10.112187385559082, "log_odds_ratio": -0.0005594309768639505, "logits/chosen": -0.2071499228477478, "logits/rejected": -0.26662901043891907, "logps/chosen": -0.0005525099113583565, "logps/rejected": -2.110633373260498, "loss": 0.8324, "nll_loss": 0.208037868142128, "rewards/accuracies": 1.0, "rewards/chosen": -5.525099186343141e-05, "rewards/margins": 0.21100810170173645, "rewards/rejected": -0.21106334030628204, "step": 7402 }, { "epoch": 5.119640387275242, "grad_norm": 9.686321258544922, "learning_rate": 2.711310895958199e-05, "log_odds_chosen": 10.839088439941406, "log_odds_ratio": -0.00028093927539885044, "logits/chosen": 0.02540695294737816, "logits/rejected": -0.06891262531280518, "logps/chosen": -0.0037065306678414345, "logps/rejected": -3.1824331283569336, "loss": 0.8034, "nll_loss": 0.20082277059555054, "rewards/accuracies": 1.0, "rewards/chosen": -0.0003706530842464417, "rewards/margins": 0.31787267327308655, "rewards/rejected": -0.3182433247566223, "step": 7403 }, { "epoch": 5.1203319502074685, "grad_norm": 8.942167282104492, "learning_rate": 2.710926694329184e-05, "log_odds_chosen": 10.840582847595215, "log_odds_ratio": -8.063986024353653e-05, "logits/chosen": -0.23275119066238403, "logits/rejected": -0.25833365321159363, "logps/chosen": -0.012653451412916183, "logps/rejected": -2.892709970474243, "loss": 1.3948, "nll_loss": 0.34869325160980225, "rewards/accuracies": 1.0, "rewards/chosen": -0.0012653451412916183, "rewards/margins": 0.28800565004348755, "rewards/rejected": -0.2892709970474243, "step": 7404 }, { "epoch": 5.121023513139695, "grad_norm": 8.973079681396484, "learning_rate": 2.7105424927001692e-05, "log_odds_chosen": 11.203123092651367, "log_odds_ratio": -2.6533847631071694e-05, "logits/chosen": -0.4045684337615967, "logits/rejected": -0.4558801054954529, "logps/chosen": -8.019142842385918e-05, "logps/rejected": -1.9351506233215332, "loss": 0.5621, "nll_loss": 0.1405208706855774, "rewards/accuracies": 1.0, "rewards/chosen": -8.019143024284858e-06, "rewards/margins": 0.19350704550743103, "rewards/rejected": -0.19351506233215332, "step": 7405 }, { "epoch": 5.121715076071922, "grad_norm": 12.250910758972168, "learning_rate": 2.710158291071154e-05, "log_odds_chosen": 9.914047241210938, "log_odds_ratio": -0.00020869742729701102, "logits/chosen": -0.42722779512405396, "logits/rejected": -0.46873965859413147, "logps/chosen": -0.0006569348042830825, "logps/rejected": -1.443729043006897, "loss": 0.8885, "nll_loss": 0.22209203243255615, "rewards/accuracies": 1.0, "rewards/chosen": -6.569348624907434e-05, "rewards/margins": 0.1443072259426117, "rewards/rejected": -0.14437291026115417, "step": 7406 }, { "epoch": 5.122406639004149, "grad_norm": 9.799444198608398, "learning_rate": 2.709774089442139e-05, "log_odds_chosen": 9.920234680175781, "log_odds_ratio": -0.0002802509116008878, "logits/chosen": -0.10245361179113388, "logits/rejected": -0.1588355451822281, "logps/chosen": -0.0006374450167641044, "logps/rejected": -2.294321298599243, "loss": 0.7491, "nll_loss": 0.18724007904529572, "rewards/accuracies": 1.0, "rewards/chosen": -6.374450458679348e-05, "rewards/margins": 0.22936837375164032, "rewards/rejected": -0.2294321358203888, "step": 7407 }, { "epoch": 5.123098201936376, "grad_norm": 7.869354248046875, "learning_rate": 2.7093898878131246e-05, "log_odds_chosen": 10.49201774597168, "log_odds_ratio": -3.221644146833569e-05, "logits/chosen": -0.8095996975898743, "logits/rejected": -0.8921639919281006, "logps/chosen": -0.00012403872096911073, "logps/rejected": -1.7007167339324951, "loss": 0.8265, "nll_loss": 0.20662826299667358, "rewards/accuracies": 1.0, "rewards/chosen": -1.2403872460708953e-05, "rewards/margins": 0.17005929350852966, "rewards/rejected": -0.17007167637348175, "step": 7408 }, { "epoch": 5.123789764868603, "grad_norm": 13.822428703308105, "learning_rate": 2.7090056861841095e-05, "log_odds_chosen": 10.582257270812988, "log_odds_ratio": -8.250211976701394e-05, "logits/chosen": -0.2686958611011505, "logits/rejected": -0.37263888120651245, "logps/chosen": -0.00015262029774021357, "logps/rejected": -1.6685211658477783, "loss": 0.8688, "nll_loss": 0.21719194948673248, "rewards/accuracies": 1.0, "rewards/chosen": -1.526203050161712e-05, "rewards/margins": 0.16683685779571533, "rewards/rejected": -0.16685211658477783, "step": 7409 }, { "epoch": 5.124481327800829, "grad_norm": 6.8588714599609375, "learning_rate": 2.7086214845550944e-05, "log_odds_chosen": 9.660439491271973, "log_odds_ratio": -0.00010367112554376945, "logits/chosen": -0.35745152831077576, "logits/rejected": -0.3240693211555481, "logps/chosen": -0.0005797590129077435, "logps/rejected": -1.9160172939300537, "loss": 0.8896, "nll_loss": 0.22238633036613464, "rewards/accuracies": 1.0, "rewards/chosen": -5.7975896197604015e-05, "rewards/margins": 0.19154374301433563, "rewards/rejected": -0.1916017234325409, "step": 7410 }, { "epoch": 5.125172890733056, "grad_norm": 6.4284138679504395, "learning_rate": 2.70823728292608e-05, "log_odds_chosen": 9.837509155273438, "log_odds_ratio": -0.00026661824085749686, "logits/chosen": -0.2379743456840515, "logits/rejected": -0.26656097173690796, "logps/chosen": -0.0002663254563231021, "logps/rejected": -1.4803067445755005, "loss": 0.812, "nll_loss": 0.202966570854187, "rewards/accuracies": 1.0, "rewards/chosen": -2.6632545996108092e-05, "rewards/margins": 0.14800405502319336, "rewards/rejected": -0.14803066849708557, "step": 7411 }, { "epoch": 5.125864453665283, "grad_norm": 4.05260705947876, "learning_rate": 2.707853081297065e-05, "log_odds_chosen": 9.609614372253418, "log_odds_ratio": -0.00021065973851364106, "logits/chosen": -0.36739036440849304, "logits/rejected": -0.41593652963638306, "logps/chosen": -0.0002198894217144698, "logps/rejected": -1.3649433851242065, "loss": 0.6855, "nll_loss": 0.1713617742061615, "rewards/accuracies": 1.0, "rewards/chosen": -2.1988940716255456e-05, "rewards/margins": 0.13647235929965973, "rewards/rejected": -0.13649435341358185, "step": 7412 }, { "epoch": 5.12655601659751, "grad_norm": 14.065872192382812, "learning_rate": 2.70746887966805e-05, "log_odds_chosen": 10.061935424804688, "log_odds_ratio": -0.00015091894601937383, "logits/chosen": -0.4934547543525696, "logits/rejected": -0.4658409357070923, "logps/chosen": -0.00045286474050953984, "logps/rejected": -1.942396879196167, "loss": 0.5004, "nll_loss": 0.12507638335227966, "rewards/accuracies": 1.0, "rewards/chosen": -4.528647332335822e-05, "rewards/margins": 0.19419440627098083, "rewards/rejected": -0.19423969089984894, "step": 7413 }, { "epoch": 5.127247579529737, "grad_norm": 9.801753044128418, "learning_rate": 2.707084678039035e-05, "log_odds_chosen": 9.888916015625, "log_odds_ratio": -0.0004568792355712503, "logits/chosen": -0.6383622884750366, "logits/rejected": -0.6947815418243408, "logps/chosen": -0.00046679971273988485, "logps/rejected": -1.9457203149795532, "loss": 0.5612, "nll_loss": 0.14025507867336273, "rewards/accuracies": 1.0, "rewards/chosen": -4.6679975639563054e-05, "rewards/margins": 0.194525346159935, "rewards/rejected": -0.19457201659679413, "step": 7414 }, { "epoch": 5.127939142461964, "grad_norm": 6.193912982940674, "learning_rate": 2.70670047641002e-05, "log_odds_chosen": 9.266860961914062, "log_odds_ratio": -0.0017042263643816113, "logits/chosen": -0.3525208830833435, "logits/rejected": -0.4817659258842468, "logps/chosen": -0.001416980056092143, "logps/rejected": -1.485273003578186, "loss": 0.7362, "nll_loss": 0.18388216197490692, "rewards/accuracies": 1.0, "rewards/chosen": -0.00014169800851959735, "rewards/margins": 0.14838561415672302, "rewards/rejected": -0.14852730929851532, "step": 7415 }, { "epoch": 5.12863070539419, "grad_norm": 12.201619148254395, "learning_rate": 2.706316274781005e-05, "log_odds_chosen": 10.946922302246094, "log_odds_ratio": -3.287233994342387e-05, "logits/chosen": -0.041007209569215775, "logits/rejected": -0.1741712987422943, "logps/chosen": -0.00024743779795244336, "logps/rejected": -2.4667646884918213, "loss": 0.7509, "nll_loss": 0.18771187961101532, "rewards/accuracies": 1.0, "rewards/chosen": -2.4743778340052813e-05, "rewards/margins": 0.24665173888206482, "rewards/rejected": -0.2466764748096466, "step": 7416 }, { "epoch": 5.129322268326418, "grad_norm": 7.043701171875, "learning_rate": 2.7059320731519905e-05, "log_odds_chosen": 10.921808242797852, "log_odds_ratio": -3.566598388715647e-05, "logits/chosen": -0.6520669460296631, "logits/rejected": -0.744756817817688, "logps/chosen": -0.00022421804897021502, "logps/rejected": -1.977895975112915, "loss": 0.6564, "nll_loss": 0.1640891134738922, "rewards/accuracies": 1.0, "rewards/chosen": -2.242180380562786e-05, "rewards/margins": 0.19776716828346252, "rewards/rejected": -0.19778959453105927, "step": 7417 }, { "epoch": 5.130013831258645, "grad_norm": 8.132904052734375, "learning_rate": 2.7055478715229754e-05, "log_odds_chosen": 9.147832870483398, "log_odds_ratio": -0.0003267722495365888, "logits/chosen": -0.30655428767204285, "logits/rejected": -0.38167956471443176, "logps/chosen": -0.0005435227649286389, "logps/rejected": -1.5238276720046997, "loss": 0.6515, "nll_loss": 0.16284248232841492, "rewards/accuracies": 1.0, "rewards/chosen": -5.4352272854885086e-05, "rewards/margins": 0.15232841670513153, "rewards/rejected": -0.1523827612400055, "step": 7418 }, { "epoch": 5.130705394190872, "grad_norm": 12.80761432647705, "learning_rate": 2.7051636698939603e-05, "log_odds_chosen": 10.520578384399414, "log_odds_ratio": -0.0029854183085262775, "logits/chosen": -0.4390121400356293, "logits/rejected": -0.5808690786361694, "logps/chosen": -0.0020407303236424923, "logps/rejected": -3.0300936698913574, "loss": 0.8269, "nll_loss": 0.20642107725143433, "rewards/accuracies": 1.0, "rewards/chosen": -0.00020407306146807969, "rewards/margins": 0.3028053045272827, "rewards/rejected": -0.30300936102867126, "step": 7419 }, { "epoch": 5.131396957123099, "grad_norm": 10.896114349365234, "learning_rate": 2.704779468264946e-05, "log_odds_chosen": 9.920369148254395, "log_odds_ratio": -0.0003378927940502763, "logits/chosen": -0.8067368865013123, "logits/rejected": -0.8925114870071411, "logps/chosen": -0.0007252587238326669, "logps/rejected": -1.8626281023025513, "loss": 1.059, "nll_loss": 0.2647111713886261, "rewards/accuracies": 1.0, "rewards/chosen": -7.252587238326669e-05, "rewards/margins": 0.18619027733802795, "rewards/rejected": -0.1862628161907196, "step": 7420 }, { "epoch": 5.1320885200553255, "grad_norm": 9.972339630126953, "learning_rate": 2.7043952666359308e-05, "log_odds_chosen": 9.74928092956543, "log_odds_ratio": -0.0004444057121872902, "logits/chosen": -0.3922783136367798, "logits/rejected": -0.4056027829647064, "logps/chosen": -0.0012801011325791478, "logps/rejected": -2.130465507507324, "loss": 0.9444, "nll_loss": 0.23606690764427185, "rewards/accuracies": 1.0, "rewards/chosen": -0.00012801012780983, "rewards/margins": 0.21291851997375488, "rewards/rejected": -0.21304653584957123, "step": 7421 }, { "epoch": 5.132780082987552, "grad_norm": 12.015710830688477, "learning_rate": 2.7040110650069157e-05, "log_odds_chosen": 10.567075729370117, "log_odds_ratio": -8.119967969832942e-05, "logits/chosen": -0.2184918373823166, "logits/rejected": -0.3011958599090576, "logps/chosen": -0.008143655024468899, "logps/rejected": -2.781130790710449, "loss": 0.8808, "nll_loss": 0.2202032208442688, "rewards/accuracies": 1.0, "rewards/chosen": -0.000814365572296083, "rewards/margins": 0.2772987484931946, "rewards/rejected": -0.27811309695243835, "step": 7422 }, { "epoch": 5.133471645919779, "grad_norm": 5.104509353637695, "learning_rate": 2.703626863377901e-05, "log_odds_chosen": 9.874157905578613, "log_odds_ratio": -0.00014664784248452634, "logits/chosen": -0.47639042139053345, "logits/rejected": -0.47348520159721375, "logps/chosen": -0.0013921656645834446, "logps/rejected": -2.016475200653076, "loss": 0.592, "nll_loss": 0.14799407124519348, "rewards/accuracies": 1.0, "rewards/chosen": -0.00013921657227911055, "rewards/margins": 0.2015082985162735, "rewards/rejected": -0.20164752006530762, "step": 7423 }, { "epoch": 5.134163208852006, "grad_norm": 7.860300540924072, "learning_rate": 2.703242661748886e-05, "log_odds_chosen": 9.348637580871582, "log_odds_ratio": -0.001988054485991597, "logits/chosen": -0.6254793405532837, "logits/rejected": -0.6899417042732239, "logps/chosen": -0.00868395809084177, "logps/rejected": -1.6159273386001587, "loss": 0.8476, "nll_loss": 0.21169663965702057, "rewards/accuracies": 1.0, "rewards/chosen": -0.0008683958440087736, "rewards/margins": 0.16072434186935425, "rewards/rejected": -0.16159272193908691, "step": 7424 }, { "epoch": 5.134854771784233, "grad_norm": 10.588888168334961, "learning_rate": 2.7028584601198707e-05, "log_odds_chosen": 10.115678787231445, "log_odds_ratio": -0.0012466337066143751, "logits/chosen": -0.4161772131919861, "logits/rejected": -0.4731995463371277, "logps/chosen": -0.0017289479728788137, "logps/rejected": -1.8897874355316162, "loss": 0.7414, "nll_loss": 0.18522275984287262, "rewards/accuracies": 1.0, "rewards/chosen": -0.00017289479728788137, "rewards/margins": 0.18880584836006165, "rewards/rejected": -0.18897874653339386, "step": 7425 }, { "epoch": 5.13554633471646, "grad_norm": 4.885624408721924, "learning_rate": 2.7024742584908563e-05, "log_odds_chosen": 10.32637882232666, "log_odds_ratio": -0.00019028893439099193, "logits/chosen": -0.4755975604057312, "logits/rejected": -0.5734898447990417, "logps/chosen": -0.00046299712266772985, "logps/rejected": -2.3506150245666504, "loss": 2.0045, "nll_loss": 0.5011166930198669, "rewards/accuracies": 1.0, "rewards/chosen": -4.6299712266772985e-05, "rewards/margins": 0.23501522839069366, "rewards/rejected": -0.23506152629852295, "step": 7426 }, { "epoch": 5.136237897648686, "grad_norm": 11.235721588134766, "learning_rate": 2.7020900568618412e-05, "log_odds_chosen": 11.286067962646484, "log_odds_ratio": -1.9410845197853632e-05, "logits/chosen": -0.1929822564125061, "logits/rejected": -0.2506953179836273, "logps/chosen": -0.00020312087144702673, "logps/rejected": -2.4689719676971436, "loss": 0.8238, "nll_loss": 0.20594075322151184, "rewards/accuracies": 1.0, "rewards/chosen": -2.0312087144702673e-05, "rewards/margins": 0.24687688052654266, "rewards/rejected": -0.24689719080924988, "step": 7427 }, { "epoch": 5.136929460580913, "grad_norm": 9.159810066223145, "learning_rate": 2.701705855232826e-05, "log_odds_chosen": 9.450736999511719, "log_odds_ratio": -0.002133977599442005, "logits/chosen": -0.48191481828689575, "logits/rejected": -0.49468135833740234, "logps/chosen": -0.01258176565170288, "logps/rejected": -1.7106521129608154, "loss": 0.899, "nll_loss": 0.22454796731472015, "rewards/accuracies": 1.0, "rewards/chosen": -0.0012581765186041594, "rewards/margins": 0.1698070466518402, "rewards/rejected": -0.17106521129608154, "step": 7428 }, { "epoch": 5.13762102351314, "grad_norm": 11.671100616455078, "learning_rate": 2.7013216536038117e-05, "log_odds_chosen": 10.852087020874023, "log_odds_ratio": -9.678960486780852e-05, "logits/chosen": -0.39900654554367065, "logits/rejected": -0.47518616914749146, "logps/chosen": -0.00019505196542013437, "logps/rejected": -2.3899760246276855, "loss": 0.7959, "nll_loss": 0.19896200299263, "rewards/accuracies": 1.0, "rewards/chosen": -1.950519799720496e-05, "rewards/margins": 0.23897811770439148, "rewards/rejected": -0.23899762332439423, "step": 7429 }, { "epoch": 5.138312586445367, "grad_norm": 6.089036464691162, "learning_rate": 2.7009374519747966e-05, "log_odds_chosen": 10.770848274230957, "log_odds_ratio": -2.904493157984689e-05, "logits/chosen": -0.09202180802822113, "logits/rejected": -0.08273860067129135, "logps/chosen": -0.00025757448747754097, "logps/rejected": -2.359013319015503, "loss": 0.8615, "nll_loss": 0.21537932753562927, "rewards/accuracies": 1.0, "rewards/chosen": -2.5757448383956216e-05, "rewards/margins": 0.23587557673454285, "rewards/rejected": -0.23590132594108582, "step": 7430 }, { "epoch": 5.139004149377594, "grad_norm": 12.126273155212402, "learning_rate": 2.7005532503457815e-05, "log_odds_chosen": 12.461263656616211, "log_odds_ratio": -1.4111486052570399e-05, "logits/chosen": -0.42405080795288086, "logits/rejected": -0.38803666830062866, "logps/chosen": -0.0001690139906713739, "logps/rejected": -3.408078193664551, "loss": 1.0464, "nll_loss": 0.2615966796875, "rewards/accuracies": 1.0, "rewards/chosen": -1.690139833954163e-05, "rewards/margins": 0.34079092741012573, "rewards/rejected": -0.34080779552459717, "step": 7431 }, { "epoch": 5.139695712309821, "grad_norm": 6.1468305587768555, "learning_rate": 2.7001690487167668e-05, "log_odds_chosen": 9.392050743103027, "log_odds_ratio": -0.0002480958355590701, "logits/chosen": -0.47275733947753906, "logits/rejected": -0.47395336627960205, "logps/chosen": -0.0005006209248676896, "logps/rejected": -1.3755102157592773, "loss": 0.9054, "nll_loss": 0.2263253629207611, "rewards/accuracies": 1.0, "rewards/chosen": -5.006208812119439e-05, "rewards/margins": 0.13750094175338745, "rewards/rejected": -0.13755100965499878, "step": 7432 }, { "epoch": 5.140387275242047, "grad_norm": 12.276866912841797, "learning_rate": 2.6997848470877517e-05, "log_odds_chosen": 10.621747970581055, "log_odds_ratio": -0.020847471430897713, "logits/chosen": -0.467585027217865, "logits/rejected": -0.47685593366622925, "logps/chosen": -0.0061169276013970375, "logps/rejected": -2.9758238792419434, "loss": 1.0375, "nll_loss": 0.25729823112487793, "rewards/accuracies": 1.0, "rewards/chosen": -0.000611692841630429, "rewards/margins": 0.2969706952571869, "rewards/rejected": -0.29758238792419434, "step": 7433 }, { "epoch": 5.141078838174274, "grad_norm": 5.268993854522705, "learning_rate": 2.6994006454587366e-05, "log_odds_chosen": 9.679532051086426, "log_odds_ratio": -0.0001878141483757645, "logits/chosen": -0.28364884853363037, "logits/rejected": -0.30598005652427673, "logps/chosen": -0.0004549617297016084, "logps/rejected": -2.0728020668029785, "loss": 0.581, "nll_loss": 0.14523589611053467, "rewards/accuracies": 1.0, "rewards/chosen": -4.5496170059777796e-05, "rewards/margins": 0.2072347104549408, "rewards/rejected": -0.2072802186012268, "step": 7434 }, { "epoch": 5.141770401106501, "grad_norm": 5.956173896789551, "learning_rate": 2.699016443829722e-05, "log_odds_chosen": 9.59054183959961, "log_odds_ratio": -0.00022657515364699066, "logits/chosen": -0.17377327382564545, "logits/rejected": -0.20633672177791595, "logps/chosen": -0.0018869942286983132, "logps/rejected": -1.6651039123535156, "loss": 0.4865, "nll_loss": 0.12160070985555649, "rewards/accuracies": 1.0, "rewards/chosen": -0.00018869942869059741, "rewards/margins": 0.16632169485092163, "rewards/rejected": -0.16651038825511932, "step": 7435 }, { "epoch": 5.142461964038728, "grad_norm": 6.142175197601318, "learning_rate": 2.698632242200707e-05, "log_odds_chosen": 10.699289321899414, "log_odds_ratio": -3.694228871609084e-05, "logits/chosen": -0.7496898174285889, "logits/rejected": -0.757624089717865, "logps/chosen": -0.00015292735770344734, "logps/rejected": -1.9251577854156494, "loss": 0.6407, "nll_loss": 0.16017428040504456, "rewards/accuracies": 1.0, "rewards/chosen": -1.5292735042748973e-05, "rewards/margins": 0.192500501871109, "rewards/rejected": -0.1925157755613327, "step": 7436 }, { "epoch": 5.143153526970955, "grad_norm": 5.331760406494141, "learning_rate": 2.698248040571692e-05, "log_odds_chosen": 9.898192405700684, "log_odds_ratio": -0.002496067201718688, "logits/chosen": -0.3681006133556366, "logits/rejected": -0.4699682891368866, "logps/chosen": -0.0007896803435869515, "logps/rejected": -1.9627407789230347, "loss": 0.7278, "nll_loss": 0.18171098828315735, "rewards/accuracies": 1.0, "rewards/chosen": -7.896803435869515e-05, "rewards/margins": 0.19619512557983398, "rewards/rejected": -0.19627408683300018, "step": 7437 }, { "epoch": 5.143845089903182, "grad_norm": 6.796148777008057, "learning_rate": 2.6978638389426776e-05, "log_odds_chosen": 10.226568222045898, "log_odds_ratio": -0.00014895365166012198, "logits/chosen": -0.4499202370643616, "logits/rejected": -0.516467273235321, "logps/chosen": -0.00055777991656214, "logps/rejected": -2.1390910148620605, "loss": 0.8184, "nll_loss": 0.20458026230335236, "rewards/accuracies": 1.0, "rewards/chosen": -5.577799311140552e-05, "rewards/margins": 0.21385329961776733, "rewards/rejected": -0.2139090895652771, "step": 7438 }, { "epoch": 5.144536652835408, "grad_norm": 16.08154296875, "learning_rate": 2.6974796373136625e-05, "log_odds_chosen": 7.4634013175964355, "log_odds_ratio": -0.14611497521400452, "logits/chosen": -0.4200138449668884, "logits/rejected": -0.5181282758712769, "logps/chosen": -0.0243761595338583, "logps/rejected": -1.3469064235687256, "loss": 0.709, "nll_loss": 0.16263723373413086, "rewards/accuracies": 0.875, "rewards/chosen": -0.0024376162327826023, "rewards/margins": 0.132253035902977, "rewards/rejected": -0.13469064235687256, "step": 7439 }, { "epoch": 5.145228215767635, "grad_norm": 8.133143424987793, "learning_rate": 2.6970954356846474e-05, "log_odds_chosen": 9.185934066772461, "log_odds_ratio": -0.0035839800257235765, "logits/chosen": -0.6846455931663513, "logits/rejected": -0.700886607170105, "logps/chosen": -0.0032714849803596735, "logps/rejected": -2.566293716430664, "loss": 0.928, "nll_loss": 0.23163369297981262, "rewards/accuracies": 1.0, "rewards/chosen": -0.00032714850385673344, "rewards/margins": 0.25630223751068115, "rewards/rejected": -0.2566293776035309, "step": 7440 }, { "epoch": 5.145919778699862, "grad_norm": 9.776525497436523, "learning_rate": 2.6967112340556326e-05, "log_odds_chosen": 10.732635498046875, "log_odds_ratio": -5.480859545059502e-05, "logits/chosen": -0.36197394132614136, "logits/rejected": -0.3192319869995117, "logps/chosen": -0.0002830323646776378, "logps/rejected": -2.5188982486724854, "loss": 0.8279, "nll_loss": 0.2069777250289917, "rewards/accuracies": 1.0, "rewards/chosen": -2.8303238650551066e-05, "rewards/margins": 0.2518615126609802, "rewards/rejected": -0.25188982486724854, "step": 7441 }, { "epoch": 5.146611341632089, "grad_norm": 7.6851606369018555, "learning_rate": 2.6963270324266175e-05, "log_odds_chosen": 10.15788459777832, "log_odds_ratio": -0.0003483621112536639, "logits/chosen": -0.30846232175827026, "logits/rejected": -0.4022744297981262, "logps/chosen": -0.00043301653931848705, "logps/rejected": -1.8087468147277832, "loss": 0.6652, "nll_loss": 0.16627439856529236, "rewards/accuracies": 1.0, "rewards/chosen": -4.330165756982751e-05, "rewards/margins": 0.1808314025402069, "rewards/rejected": -0.18087470531463623, "step": 7442 }, { "epoch": 5.147302904564316, "grad_norm": 9.387350082397461, "learning_rate": 2.6959428307976024e-05, "log_odds_chosen": 9.48453140258789, "log_odds_ratio": -0.00023655460972804576, "logits/chosen": -0.8036692142486572, "logits/rejected": -0.7573479413986206, "logps/chosen": -0.0005227526417002082, "logps/rejected": -1.2725003957748413, "loss": 0.6543, "nll_loss": 0.16355383396148682, "rewards/accuracies": 1.0, "rewards/chosen": -5.227526344242506e-05, "rewards/margins": 0.1271977722644806, "rewards/rejected": -0.1272500455379486, "step": 7443 }, { "epoch": 5.1479944674965425, "grad_norm": 7.64993143081665, "learning_rate": 2.695558629168588e-05, "log_odds_chosen": 10.165771484375, "log_odds_ratio": -8.179282303899527e-05, "logits/chosen": -0.6584855914115906, "logits/rejected": -0.578942060470581, "logps/chosen": -0.0001937196939252317, "logps/rejected": -1.4448562860488892, "loss": 0.798, "nll_loss": 0.1994938999414444, "rewards/accuracies": 1.0, "rewards/chosen": -1.937196975632105e-05, "rewards/margins": 0.14446625113487244, "rewards/rejected": -0.14448562264442444, "step": 7444 }, { "epoch": 5.148686030428769, "grad_norm": 7.556155681610107, "learning_rate": 2.695174427539573e-05, "log_odds_chosen": 9.396717071533203, "log_odds_ratio": -0.0032166705932468176, "logits/chosen": -0.920258641242981, "logits/rejected": -0.89804607629776, "logps/chosen": -0.0029146973975002766, "logps/rejected": -1.8120362758636475, "loss": 0.8244, "nll_loss": 0.20578640699386597, "rewards/accuracies": 1.0, "rewards/chosen": -0.00029146973975002766, "rewards/margins": 0.18091216683387756, "rewards/rejected": -0.18120363354682922, "step": 7445 }, { "epoch": 5.149377593360996, "grad_norm": 9.94237995147705, "learning_rate": 2.6947902259105578e-05, "log_odds_chosen": 10.113234519958496, "log_odds_ratio": -0.00012419956328812987, "logits/chosen": -0.2607835531234741, "logits/rejected": -0.2949237823486328, "logps/chosen": -0.0004108600551262498, "logps/rejected": -2.0484108924865723, "loss": 0.6527, "nll_loss": 0.16316281259059906, "rewards/accuracies": 1.0, "rewards/chosen": -4.1086008423008025e-05, "rewards/margins": 0.20479997992515564, "rewards/rejected": -0.20484107732772827, "step": 7446 }, { "epoch": 5.150069156293223, "grad_norm": 8.69245719909668, "learning_rate": 2.6944060242815434e-05, "log_odds_chosen": 10.116579055786133, "log_odds_ratio": -0.00012764372513629496, "logits/chosen": -0.7028722763061523, "logits/rejected": -0.7171040773391724, "logps/chosen": -0.0006051872624084353, "logps/rejected": -2.0514676570892334, "loss": 0.4936, "nll_loss": 0.12338022142648697, "rewards/accuracies": 1.0, "rewards/chosen": -6.051873424439691e-05, "rewards/margins": 0.20508623123168945, "rewards/rejected": -0.20514675974845886, "step": 7447 }, { "epoch": 5.15076071922545, "grad_norm": 7.142855167388916, "learning_rate": 2.6940218226525283e-05, "log_odds_chosen": 10.812362670898438, "log_odds_ratio": -5.71875243622344e-05, "logits/chosen": -0.4539200961589813, "logits/rejected": -0.47321540117263794, "logps/chosen": -0.00028848316287621856, "logps/rejected": -2.2997775077819824, "loss": 0.9367, "nll_loss": 0.23417231440544128, "rewards/accuracies": 1.0, "rewards/chosen": -2.8848317015217617e-05, "rewards/margins": 0.22994890809059143, "rewards/rejected": -0.2299777716398239, "step": 7448 }, { "epoch": 5.151452282157677, "grad_norm": 8.273560523986816, "learning_rate": 2.6936376210235132e-05, "log_odds_chosen": 9.92202377319336, "log_odds_ratio": -0.00017694597772788256, "logits/chosen": -0.35956692695617676, "logits/rejected": -0.41068869829177856, "logps/chosen": -0.0004035543533973396, "logps/rejected": -1.659806251525879, "loss": 0.4615, "nll_loss": 0.11536431312561035, "rewards/accuracies": 1.0, "rewards/chosen": -4.035543679492548e-05, "rewards/margins": 0.1659402698278427, "rewards/rejected": -0.16598062217235565, "step": 7449 }, { "epoch": 5.1521438450899035, "grad_norm": 6.735435962677002, "learning_rate": 2.6932534193944985e-05, "log_odds_chosen": 9.072761535644531, "log_odds_ratio": -0.0005184119800105691, "logits/chosen": -0.17102433741092682, "logits/rejected": -0.24381959438323975, "logps/chosen": -0.0023045637644827366, "logps/rejected": -1.9304804801940918, "loss": 0.8306, "nll_loss": 0.20758703351020813, "rewards/accuracies": 1.0, "rewards/chosen": -0.00023045638226903975, "rewards/margins": 0.1928175985813141, "rewards/rejected": -0.19304805994033813, "step": 7450 }, { "epoch": 5.15283540802213, "grad_norm": 7.467382431030273, "learning_rate": 2.6928692177654834e-05, "log_odds_chosen": 10.252504348754883, "log_odds_ratio": -0.00010818125156220049, "logits/chosen": -0.3678957521915436, "logits/rejected": -0.4237433671951294, "logps/chosen": -0.0002906073350459337, "logps/rejected": -2.056698799133301, "loss": 0.5276, "nll_loss": 0.13188603520393372, "rewards/accuracies": 1.0, "rewards/chosen": -2.90607367787743e-05, "rewards/margins": 0.20564082264900208, "rewards/rejected": -0.20566987991333008, "step": 7451 }, { "epoch": 5.153526970954357, "grad_norm": 8.431239128112793, "learning_rate": 2.6924850161364683e-05, "log_odds_chosen": 10.633581161499023, "log_odds_ratio": -8.072963100858033e-05, "logits/chosen": -0.427499383687973, "logits/rejected": -0.4580521285533905, "logps/chosen": -0.00054318638285622, "logps/rejected": -2.396068572998047, "loss": 0.9553, "nll_loss": 0.23881791532039642, "rewards/accuracies": 1.0, "rewards/chosen": -5.431864701677114e-05, "rewards/margins": 0.23955252766609192, "rewards/rejected": -0.2396068572998047, "step": 7452 }, { "epoch": 5.154218533886584, "grad_norm": 6.476319789886475, "learning_rate": 2.692100814507454e-05, "log_odds_chosen": 10.82221508026123, "log_odds_ratio": -5.586762563325465e-05, "logits/chosen": -0.3435792326927185, "logits/rejected": -0.32957378029823303, "logps/chosen": -0.0001679111155681312, "logps/rejected": -1.8454334735870361, "loss": 0.5417, "nll_loss": 0.13541817665100098, "rewards/accuracies": 1.0, "rewards/chosen": -1.6791113012004644e-05, "rewards/margins": 0.18452654778957367, "rewards/rejected": -0.18454334139823914, "step": 7453 }, { "epoch": 5.154910096818811, "grad_norm": 6.970516681671143, "learning_rate": 2.6917166128784388e-05, "log_odds_chosen": 10.277645111083984, "log_odds_ratio": -6.148001557448879e-05, "logits/chosen": -0.5003536343574524, "logits/rejected": -0.5715048909187317, "logps/chosen": -0.00022070945124141872, "logps/rejected": -2.006925106048584, "loss": 0.9131, "nll_loss": 0.22826507687568665, "rewards/accuracies": 1.0, "rewards/chosen": -2.2070948034524918e-05, "rewards/margins": 0.20067042112350464, "rewards/rejected": -0.20069250464439392, "step": 7454 }, { "epoch": 5.155601659751038, "grad_norm": 11.107111930847168, "learning_rate": 2.6913324112494237e-05, "log_odds_chosen": 9.70268440246582, "log_odds_ratio": -0.0004626112640835345, "logits/chosen": -0.372115820646286, "logits/rejected": -0.3687889575958252, "logps/chosen": -0.001481476123444736, "logps/rejected": -1.5637428760528564, "loss": 0.7513, "nll_loss": 0.18778753280639648, "rewards/accuracies": 1.0, "rewards/chosen": -0.0001481476065237075, "rewards/margins": 0.15622614324092865, "rewards/rejected": -0.15637429058551788, "step": 7455 }, { "epoch": 5.1562932226832645, "grad_norm": 10.2018404006958, "learning_rate": 2.6909482096204092e-05, "log_odds_chosen": 11.103732109069824, "log_odds_ratio": -2.6793713914230466e-05, "logits/chosen": -0.4589177966117859, "logits/rejected": -0.5208662152290344, "logps/chosen": -0.0001817662123357877, "logps/rejected": -2.4946489334106445, "loss": 0.8262, "nll_loss": 0.2065504789352417, "rewards/accuracies": 1.0, "rewards/chosen": -1.817662086978089e-05, "rewards/margins": 0.24944674968719482, "rewards/rejected": -0.24946492910385132, "step": 7456 }, { "epoch": 5.156984785615491, "grad_norm": 11.086647033691406, "learning_rate": 2.690564007991394e-05, "log_odds_chosen": 10.889501571655273, "log_odds_ratio": -3.276738425483927e-05, "logits/chosen": -0.3176249861717224, "logits/rejected": -0.41258126497268677, "logps/chosen": -0.00030722259543836117, "logps/rejected": -2.074706554412842, "loss": 0.5755, "nll_loss": 0.1438601016998291, "rewards/accuracies": 1.0, "rewards/chosen": -3.07222617266234e-05, "rewards/margins": 0.20743992924690247, "rewards/rejected": -0.20747067034244537, "step": 7457 }, { "epoch": 5.157676348547718, "grad_norm": 7.860065460205078, "learning_rate": 2.690179806362379e-05, "log_odds_chosen": 10.567296028137207, "log_odds_ratio": -0.00013941126235295087, "logits/chosen": -0.6222514510154724, "logits/rejected": -0.7001794576644897, "logps/chosen": -0.0014393426245078444, "logps/rejected": -2.4629440307617188, "loss": 0.7707, "nll_loss": 0.19265933334827423, "rewards/accuracies": 1.0, "rewards/chosen": -0.00014393427409231663, "rewards/margins": 0.24615047872066498, "rewards/rejected": -0.24629440903663635, "step": 7458 }, { "epoch": 5.158367911479945, "grad_norm": 9.500434875488281, "learning_rate": 2.6897956047333643e-05, "log_odds_chosen": 11.329741477966309, "log_odds_ratio": -4.6019948058528826e-05, "logits/chosen": -0.7677967548370361, "logits/rejected": -0.7199459671974182, "logps/chosen": -0.00035254136309958994, "logps/rejected": -2.6514434814453125, "loss": 0.6871, "nll_loss": 0.17177699506282806, "rewards/accuracies": 1.0, "rewards/chosen": -3.5254131944384426e-05, "rewards/margins": 0.2651090919971466, "rewards/rejected": -0.26514434814453125, "step": 7459 }, { "epoch": 5.159059474412172, "grad_norm": 5.759252548217773, "learning_rate": 2.6894114031043492e-05, "log_odds_chosen": 8.701586723327637, "log_odds_ratio": -0.000857122300658375, "logits/chosen": -0.5436146259307861, "logits/rejected": -0.5152435302734375, "logps/chosen": -0.001559797441586852, "logps/rejected": -2.025024175643921, "loss": 1.0732, "nll_loss": 0.268216073513031, "rewards/accuracies": 1.0, "rewards/chosen": -0.00015597973833791912, "rewards/margins": 0.20234644412994385, "rewards/rejected": -0.20250242948532104, "step": 7460 }, { "epoch": 5.159751037344399, "grad_norm": 8.084737777709961, "learning_rate": 2.689027201475334e-05, "log_odds_chosen": 10.72265625, "log_odds_ratio": -3.390820711501874e-05, "logits/chosen": -0.4589902460575104, "logits/rejected": -0.464423805475235, "logps/chosen": -0.00032347970409318805, "logps/rejected": -2.2925033569335938, "loss": 0.7002, "nll_loss": 0.17503926157951355, "rewards/accuracies": 1.0, "rewards/chosen": -3.234797259210609e-05, "rewards/margins": 0.229217991232872, "rewards/rejected": -0.22925034165382385, "step": 7461 }, { "epoch": 5.1604426002766255, "grad_norm": 8.944840431213379, "learning_rate": 2.6886429998463197e-05, "log_odds_chosen": 9.330341339111328, "log_odds_ratio": -0.0014703095657750964, "logits/chosen": -0.4748300313949585, "logits/rejected": -0.5044779777526855, "logps/chosen": -0.0012496764538809657, "logps/rejected": -1.0973037481307983, "loss": 0.7171, "nll_loss": 0.17911916971206665, "rewards/accuracies": 1.0, "rewards/chosen": -0.00012496765702962875, "rewards/margins": 0.10960540920495987, "rewards/rejected": -0.10973037779331207, "step": 7462 }, { "epoch": 5.161134163208852, "grad_norm": 9.398916244506836, "learning_rate": 2.6882587982173046e-05, "log_odds_chosen": 10.321409225463867, "log_odds_ratio": -0.00010626899893395603, "logits/chosen": -0.6396389007568359, "logits/rejected": -0.7592541575431824, "logps/chosen": -0.0009459155262447894, "logps/rejected": -1.6495240926742554, "loss": 0.7927, "nll_loss": 0.19815877079963684, "rewards/accuracies": 1.0, "rewards/chosen": -9.459155262447894e-05, "rewards/margins": 0.16485781967639923, "rewards/rejected": -0.16495241224765778, "step": 7463 }, { "epoch": 5.161825726141079, "grad_norm": 7.023201942443848, "learning_rate": 2.6878745965882895e-05, "log_odds_chosen": 10.7672119140625, "log_odds_ratio": -6.182586366776377e-05, "logits/chosen": -0.4947451651096344, "logits/rejected": -0.5416525602340698, "logps/chosen": -0.00021441548597067595, "logps/rejected": -2.317390203475952, "loss": 0.6882, "nll_loss": 0.17205506563186646, "rewards/accuracies": 1.0, "rewards/chosen": -2.1441548597067595e-05, "rewards/margins": 0.23171758651733398, "rewards/rejected": -0.23173902928829193, "step": 7464 }, { "epoch": 5.162517289073306, "grad_norm": 264.32476806640625, "learning_rate": 2.687490394959275e-05, "log_odds_chosen": 8.581357955932617, "log_odds_ratio": -0.7932350635528564, "logits/chosen": -0.44917917251586914, "logits/rejected": -0.46375998854637146, "logps/chosen": -0.05425819382071495, "logps/rejected": -1.6769574880599976, "loss": 0.99, "nll_loss": 0.16817449033260345, "rewards/accuracies": 0.875, "rewards/chosen": -0.00542581919580698, "rewards/margins": 0.16226992011070251, "rewards/rejected": -0.16769574582576752, "step": 7465 }, { "epoch": 5.163208852005533, "grad_norm": 14.085131645202637, "learning_rate": 2.68710619333026e-05, "log_odds_chosen": 11.090156555175781, "log_odds_ratio": -5.3086037951288745e-05, "logits/chosen": -0.36317867040634155, "logits/rejected": -0.48448365926742554, "logps/chosen": -0.00022466076188720763, "logps/rejected": -2.407838821411133, "loss": 0.978, "nll_loss": 0.24448320269584656, "rewards/accuracies": 1.0, "rewards/chosen": -2.2466076188720763e-05, "rewards/margins": 0.240761399269104, "rewards/rejected": -0.24078388512134552, "step": 7466 }, { "epoch": 5.16390041493776, "grad_norm": 6.401428699493408, "learning_rate": 2.686721991701245e-05, "log_odds_chosen": 10.487455368041992, "log_odds_ratio": -7.464921509381384e-05, "logits/chosen": -0.5477094054222107, "logits/rejected": -0.5565400123596191, "logps/chosen": -0.00031943750218488276, "logps/rejected": -1.8109780550003052, "loss": 0.8222, "nll_loss": 0.2055395245552063, "rewards/accuracies": 1.0, "rewards/chosen": -3.194374585291371e-05, "rewards/margins": 0.1810658574104309, "rewards/rejected": -0.18109779059886932, "step": 7467 }, { "epoch": 5.1645919778699865, "grad_norm": 12.549816131591797, "learning_rate": 2.68633779007223e-05, "log_odds_chosen": 9.737430572509766, "log_odds_ratio": -0.0002125693717971444, "logits/chosen": -0.3117648661136627, "logits/rejected": -0.41824281215667725, "logps/chosen": -0.00019726053869817406, "logps/rejected": -1.5196945667266846, "loss": 1.9219, "nll_loss": 0.4804571866989136, "rewards/accuracies": 1.0, "rewards/chosen": -1.972605605260469e-05, "rewards/margins": 0.15194973349571228, "rewards/rejected": -0.15196946263313293, "step": 7468 }, { "epoch": 5.165283540802213, "grad_norm": 5.628894329071045, "learning_rate": 2.685953588443215e-05, "log_odds_chosen": 10.469507217407227, "log_odds_ratio": -0.00010986346023855731, "logits/chosen": -0.37138351798057556, "logits/rejected": -0.4485434889793396, "logps/chosen": -0.004168296232819557, "logps/rejected": -2.2689802646636963, "loss": 0.7922, "nll_loss": 0.1980508416891098, "rewards/accuracies": 1.0, "rewards/chosen": -0.0004168296291027218, "rewards/margins": 0.22648121416568756, "rewards/rejected": -0.22689804434776306, "step": 7469 }, { "epoch": 5.16597510373444, "grad_norm": 8.571297645568848, "learning_rate": 2.6855693868142e-05, "log_odds_chosen": 9.631418228149414, "log_odds_ratio": -8.710901602171361e-05, "logits/chosen": -0.3490288257598877, "logits/rejected": -0.3959527611732483, "logps/chosen": -0.00012631932622753084, "logps/rejected": -0.9703894853591919, "loss": 1.1691, "nll_loss": 0.2922728955745697, "rewards/accuracies": 1.0, "rewards/chosen": -1.2631931895157322e-05, "rewards/margins": 0.09702632576227188, "rewards/rejected": -0.09703895449638367, "step": 7470 }, { "epoch": 5.166666666666667, "grad_norm": 5.723106861114502, "learning_rate": 2.6851851851851855e-05, "log_odds_chosen": 10.555776596069336, "log_odds_ratio": -5.606668128166348e-05, "logits/chosen": -0.6238350868225098, "logits/rejected": -0.6863256692886353, "logps/chosen": -0.0005328550469130278, "logps/rejected": -2.041658401489258, "loss": 0.5854, "nll_loss": 0.14635531604290009, "rewards/accuracies": 1.0, "rewards/chosen": -5.3285504691302776e-05, "rewards/margins": 0.20411255955696106, "rewards/rejected": -0.20416586101055145, "step": 7471 }, { "epoch": 5.167358229598894, "grad_norm": 7.153631210327148, "learning_rate": 2.6848009835561704e-05, "log_odds_chosen": 9.928325653076172, "log_odds_ratio": -0.0010404024505987763, "logits/chosen": -0.36878323554992676, "logits/rejected": -0.4676334261894226, "logps/chosen": -0.001209319569170475, "logps/rejected": -2.2380104064941406, "loss": 0.6111, "nll_loss": 0.15266205370426178, "rewards/accuracies": 1.0, "rewards/chosen": -0.00012093195982743055, "rewards/margins": 0.22368010878562927, "rewards/rejected": -0.22380106151103973, "step": 7472 }, { "epoch": 5.168049792531121, "grad_norm": 4.801445484161377, "learning_rate": 2.6844167819271554e-05, "log_odds_chosen": 9.519782066345215, "log_odds_ratio": -0.0011550523340702057, "logits/chosen": -0.4875209629535675, "logits/rejected": -0.37438157200813293, "logps/chosen": -0.002648913534358144, "logps/rejected": -2.1521012783050537, "loss": 0.5119, "nll_loss": 0.12784883379936218, "rewards/accuracies": 1.0, "rewards/chosen": -0.00026489136507734656, "rewards/margins": 0.2149452269077301, "rewards/rejected": -0.21521010994911194, "step": 7473 }, { "epoch": 5.1687413554633475, "grad_norm": 10.136579513549805, "learning_rate": 2.684032580298141e-05, "log_odds_chosen": 11.658432006835938, "log_odds_ratio": -1.348641853837762e-05, "logits/chosen": -0.07469991594552994, "logits/rejected": -0.1592363864183426, "logps/chosen": -0.00022109775454737246, "logps/rejected": -3.093412160873413, "loss": 1.05, "nll_loss": 0.2624865174293518, "rewards/accuracies": 1.0, "rewards/chosen": -2.2109776182333007e-05, "rewards/margins": 0.3093191385269165, "rewards/rejected": -0.3093412518501282, "step": 7474 }, { "epoch": 5.169432918395574, "grad_norm": 8.295631408691406, "learning_rate": 2.683648378669126e-05, "log_odds_chosen": 11.123706817626953, "log_odds_ratio": -2.043310450972058e-05, "logits/chosen": -0.3312394917011261, "logits/rejected": -0.36622753739356995, "logps/chosen": -9.362811397295445e-05, "logps/rejected": -1.9202853441238403, "loss": 0.8433, "nll_loss": 0.21081441640853882, "rewards/accuracies": 1.0, "rewards/chosen": -9.362811397295445e-06, "rewards/margins": 0.19201916456222534, "rewards/rejected": -0.19202853739261627, "step": 7475 }, { "epoch": 5.170124481327801, "grad_norm": 7.556997299194336, "learning_rate": 2.6832641770401107e-05, "log_odds_chosen": 9.98617172241211, "log_odds_ratio": -8.398250793106854e-05, "logits/chosen": -0.28513598442077637, "logits/rejected": -0.35176780819892883, "logps/chosen": -0.0001761521416483447, "logps/rejected": -1.447932481765747, "loss": 0.6559, "nll_loss": 0.16395564377307892, "rewards/accuracies": 1.0, "rewards/chosen": -1.761521525622811e-05, "rewards/margins": 0.1447756290435791, "rewards/rejected": -0.14479325711727142, "step": 7476 }, { "epoch": 5.170816044260028, "grad_norm": 6.122884273529053, "learning_rate": 2.682879975411096e-05, "log_odds_chosen": 10.983901977539062, "log_odds_ratio": -4.2492694774409756e-05, "logits/chosen": -0.2037186324596405, "logits/rejected": -0.3470858931541443, "logps/chosen": -0.0003173601580783725, "logps/rejected": -2.748629570007324, "loss": 0.5459, "nll_loss": 0.13647319376468658, "rewards/accuracies": 1.0, "rewards/chosen": -3.173601726302877e-05, "rewards/margins": 0.27483123540878296, "rewards/rejected": -0.27486297488212585, "step": 7477 }, { "epoch": 5.171507607192255, "grad_norm": 9.355021476745605, "learning_rate": 2.682495773782081e-05, "log_odds_chosen": 10.416082382202148, "log_odds_ratio": -0.00011976615496678278, "logits/chosen": -0.3434603810310364, "logits/rejected": -0.34404170513153076, "logps/chosen": -0.00027853366918861866, "logps/rejected": -1.650794506072998, "loss": 0.7857, "nll_loss": 0.19641204178333282, "rewards/accuracies": 1.0, "rewards/chosen": -2.7853368010255508e-05, "rewards/margins": 0.16505160927772522, "rewards/rejected": -0.16507945954799652, "step": 7478 }, { "epoch": 5.172199170124482, "grad_norm": 10.500981330871582, "learning_rate": 2.6821115721530658e-05, "log_odds_chosen": 8.71304702758789, "log_odds_ratio": -0.1506773829460144, "logits/chosen": -0.4835907816886902, "logits/rejected": -0.5638684034347534, "logps/chosen": -0.15316888689994812, "logps/rejected": -1.914087176322937, "loss": 1.1405, "nll_loss": 0.27004915475845337, "rewards/accuracies": 0.875, "rewards/chosen": -0.015316887758672237, "rewards/margins": 0.17609182000160217, "rewards/rejected": -0.19140870869159698, "step": 7479 }, { "epoch": 5.172890733056708, "grad_norm": 10.053938865661621, "learning_rate": 2.6817273705240507e-05, "log_odds_chosen": 10.712471008300781, "log_odds_ratio": -0.0001415243314113468, "logits/chosen": -0.1618555337190628, "logits/rejected": -0.18134653568267822, "logps/chosen": -0.0003034502442460507, "logps/rejected": -1.9955106973648071, "loss": 0.755, "nll_loss": 0.18873383104801178, "rewards/accuracies": 1.0, "rewards/chosen": -3.0345025152200833e-05, "rewards/margins": 0.19952073693275452, "rewards/rejected": -0.1995510756969452, "step": 7480 }, { "epoch": 5.173582295988935, "grad_norm": 13.536983489990234, "learning_rate": 2.6813431688950363e-05, "log_odds_chosen": 10.499656677246094, "log_odds_ratio": -0.00011437821376603097, "logits/chosen": -0.613703727722168, "logits/rejected": -0.7598463296890259, "logps/chosen": -0.0006375666707754135, "logps/rejected": -2.5778279304504395, "loss": 0.8122, "nll_loss": 0.20303978025913239, "rewards/accuracies": 1.0, "rewards/chosen": -6.37566699879244e-05, "rewards/margins": 0.2577190399169922, "rewards/rejected": -0.25778278708457947, "step": 7481 }, { "epoch": 5.174273858921162, "grad_norm": 7.290798187255859, "learning_rate": 2.6809589672660212e-05, "log_odds_chosen": 10.250022888183594, "log_odds_ratio": -0.00485006021335721, "logits/chosen": -0.1034635454416275, "logits/rejected": -0.08264364302158356, "logps/chosen": -0.002070134272798896, "logps/rejected": -2.0911502838134766, "loss": 0.6515, "nll_loss": 0.1624019742012024, "rewards/accuracies": 1.0, "rewards/chosen": -0.00020701343601103872, "rewards/margins": 0.20890799164772034, "rewards/rejected": -0.20911502838134766, "step": 7482 }, { "epoch": 5.174965421853389, "grad_norm": 6.1445841789245605, "learning_rate": 2.680574765637006e-05, "log_odds_chosen": 11.127788543701172, "log_odds_ratio": -1.904199962154962e-05, "logits/chosen": -0.28616607189178467, "logits/rejected": -0.33013203740119934, "logps/chosen": -0.0003823993029072881, "logps/rejected": -2.642538070678711, "loss": 0.9725, "nll_loss": 0.24311384558677673, "rewards/accuracies": 1.0, "rewards/chosen": -3.823993029072881e-05, "rewards/margins": 0.2642155587673187, "rewards/rejected": -0.26425379514694214, "step": 7483 }, { "epoch": 5.175656984785616, "grad_norm": 8.26850414276123, "learning_rate": 2.6801905640079917e-05, "log_odds_chosen": 11.149333953857422, "log_odds_ratio": -8.133323717629537e-05, "logits/chosen": -0.40619760751724243, "logits/rejected": -0.4585905969142914, "logps/chosen": -0.0030611082911491394, "logps/rejected": -2.522730588912964, "loss": 0.744, "nll_loss": 0.18599961698055267, "rewards/accuracies": 1.0, "rewards/chosen": -0.0003061108582187444, "rewards/margins": 0.2519669532775879, "rewards/rejected": -0.2522730529308319, "step": 7484 }, { "epoch": 5.176348547717843, "grad_norm": 11.564437866210938, "learning_rate": 2.6798063623789766e-05, "log_odds_chosen": 9.528322219848633, "log_odds_ratio": -0.0002743283985182643, "logits/chosen": -0.33204129338264465, "logits/rejected": -0.3911939263343811, "logps/chosen": -0.0010037249885499477, "logps/rejected": -1.6887990236282349, "loss": 1.0054, "nll_loss": 0.25131741166114807, "rewards/accuracies": 1.0, "rewards/chosen": -0.00010037249739980325, "rewards/margins": 0.16877952218055725, "rewards/rejected": -0.168879896402359, "step": 7485 }, { "epoch": 5.177040110650069, "grad_norm": 7.996581077575684, "learning_rate": 2.6794221607499615e-05, "log_odds_chosen": 10.887833595275879, "log_odds_ratio": -3.939437010558322e-05, "logits/chosen": -0.38903307914733887, "logits/rejected": -0.4599970877170563, "logps/chosen": -0.0002921771665569395, "logps/rejected": -2.234811305999756, "loss": 0.562, "nll_loss": 0.14050793647766113, "rewards/accuracies": 1.0, "rewards/chosen": -2.921771738328971e-05, "rewards/margins": 0.2234519124031067, "rewards/rejected": -0.22348114848136902, "step": 7486 }, { "epoch": 5.177731673582296, "grad_norm": 5.6479692459106445, "learning_rate": 2.6790379591209467e-05, "log_odds_chosen": 10.950244903564453, "log_odds_ratio": -3.814305455307476e-05, "logits/chosen": -0.6038317680358887, "logits/rejected": -0.6398409605026245, "logps/chosen": -0.00017186827608384192, "logps/rejected": -2.3177053928375244, "loss": 0.6308, "nll_loss": 0.1576993316411972, "rewards/accuracies": 1.0, "rewards/chosen": -1.7186828699777834e-05, "rewards/margins": 0.23175334930419922, "rewards/rejected": -0.23177054524421692, "step": 7487 }, { "epoch": 5.178423236514523, "grad_norm": 7.2999982833862305, "learning_rate": 2.678653757491932e-05, "log_odds_chosen": 10.90628719329834, "log_odds_ratio": -0.00015409404295496643, "logits/chosen": -0.3412237763404846, "logits/rejected": -0.44108980894088745, "logps/chosen": -0.00016340138972736895, "logps/rejected": -2.2473998069763184, "loss": 0.9075, "nll_loss": 0.2268695831298828, "rewards/accuracies": 1.0, "rewards/chosen": -1.6340140064130537e-05, "rewards/margins": 0.22472365200519562, "rewards/rejected": -0.22473998367786407, "step": 7488 }, { "epoch": 5.17911479944675, "grad_norm": 9.799997329711914, "learning_rate": 2.678269555862917e-05, "log_odds_chosen": 10.669675827026367, "log_odds_ratio": -9.51956317294389e-05, "logits/chosen": -0.427339643239975, "logits/rejected": -0.4833088517189026, "logps/chosen": -0.00027621022309176624, "logps/rejected": -2.317042350769043, "loss": 1.0254, "nll_loss": 0.2563331127166748, "rewards/accuracies": 1.0, "rewards/chosen": -2.7621019398793578e-05, "rewards/margins": 0.2316766232252121, "rewards/rejected": -0.23170426487922668, "step": 7489 }, { "epoch": 5.179806362378977, "grad_norm": 3.7129058837890625, "learning_rate": 2.677885354233902e-05, "log_odds_chosen": 10.49702262878418, "log_odds_ratio": -5.258437158772722e-05, "logits/chosen": -0.7177293300628662, "logits/rejected": -0.6209688186645508, "logps/chosen": -0.0001718494895612821, "logps/rejected": -1.7512952089309692, "loss": 0.4853, "nll_loss": 0.12132173776626587, "rewards/accuracies": 1.0, "rewards/chosen": -1.718494968372397e-05, "rewards/margins": 0.17511233687400818, "rewards/rejected": -0.17512951791286469, "step": 7490 }, { "epoch": 5.180497925311204, "grad_norm": 11.894420623779297, "learning_rate": 2.677501152604887e-05, "log_odds_chosen": 11.11728286743164, "log_odds_ratio": -2.9727882065344602e-05, "logits/chosen": -0.5175437927246094, "logits/rejected": -0.5285531878471375, "logps/chosen": -0.0001054564054356888, "logps/rejected": -2.058851957321167, "loss": 0.9157, "nll_loss": 0.22891131043434143, "rewards/accuracies": 1.0, "rewards/chosen": -1.05456410892657e-05, "rewards/margins": 0.20587465167045593, "rewards/rejected": -0.20588520169258118, "step": 7491 }, { "epoch": 5.18118948824343, "grad_norm": 7.50294303894043, "learning_rate": 2.677116950975872e-05, "log_odds_chosen": 10.425357818603516, "log_odds_ratio": -0.001152600278146565, "logits/chosen": -0.45772436261177063, "logits/rejected": -0.43121635913848877, "logps/chosen": -0.0005207476206123829, "logps/rejected": -1.9553520679473877, "loss": 0.7615, "nll_loss": 0.19025787711143494, "rewards/accuracies": 1.0, "rewards/chosen": -5.207476351642981e-05, "rewards/margins": 0.19548313319683075, "rewards/rejected": -0.19553521275520325, "step": 7492 }, { "epoch": 5.181881051175657, "grad_norm": 6.374143123626709, "learning_rate": 2.6767327493468575e-05, "log_odds_chosen": 8.970407485961914, "log_odds_ratio": -0.00023207120830193162, "logits/chosen": -0.2817067801952362, "logits/rejected": -0.27195069193840027, "logps/chosen": -0.00061708630528301, "logps/rejected": -1.4450798034667969, "loss": 0.8252, "nll_loss": 0.2062731385231018, "rewards/accuracies": 1.0, "rewards/chosen": -6.170863343868405e-05, "rewards/margins": 0.14444628357887268, "rewards/rejected": -0.1445080041885376, "step": 7493 }, { "epoch": 5.182572614107884, "grad_norm": 5.238719463348389, "learning_rate": 2.6763485477178424e-05, "log_odds_chosen": 9.787389755249023, "log_odds_ratio": -0.002000561449676752, "logits/chosen": -0.5416412353515625, "logits/rejected": -0.5525435209274292, "logps/chosen": -0.009776615537703037, "logps/rejected": -2.6586475372314453, "loss": 0.6396, "nll_loss": 0.15969637036323547, "rewards/accuracies": 1.0, "rewards/chosen": -0.000977661577053368, "rewards/margins": 0.26488709449768066, "rewards/rejected": -0.2658647894859314, "step": 7494 }, { "epoch": 5.183264177040111, "grad_norm": 8.104238510131836, "learning_rate": 2.6759643460888273e-05, "log_odds_chosen": 9.921451568603516, "log_odds_ratio": -0.00029444476240314543, "logits/chosen": 0.10883663594722748, "logits/rejected": -0.02936922013759613, "logps/chosen": -0.0010627154260873795, "logps/rejected": -1.91524076461792, "loss": 1.0661, "nll_loss": 0.2664946913719177, "rewards/accuracies": 1.0, "rewards/chosen": -0.00010627156007103622, "rewards/margins": 0.19141781330108643, "rewards/rejected": -0.19152405858039856, "step": 7495 }, { "epoch": 5.183955739972338, "grad_norm": 11.126547813415527, "learning_rate": 2.675580144459813e-05, "log_odds_chosen": 10.28394889831543, "log_odds_ratio": -0.00021344845299609005, "logits/chosen": -0.46144601702690125, "logits/rejected": -0.6137267351150513, "logps/chosen": -0.0002671529073268175, "logps/rejected": -1.7458523511886597, "loss": 0.8458, "nll_loss": 0.21143922209739685, "rewards/accuracies": 1.0, "rewards/chosen": -2.6715293643064797e-05, "rewards/margins": 0.17455853521823883, "rewards/rejected": -0.1745852380990982, "step": 7496 }, { "epoch": 5.1846473029045645, "grad_norm": 8.785880088806152, "learning_rate": 2.6751959428307978e-05, "log_odds_chosen": 9.732967376708984, "log_odds_ratio": -0.00011404632095945999, "logits/chosen": -0.4368380904197693, "logits/rejected": -0.49852725863456726, "logps/chosen": -0.0003329858591314405, "logps/rejected": -1.4203979969024658, "loss": 0.682, "nll_loss": 0.17047792673110962, "rewards/accuracies": 1.0, "rewards/chosen": -3.329858373035677e-05, "rewards/margins": 0.142006516456604, "rewards/rejected": -0.14203980565071106, "step": 7497 }, { "epoch": 5.185338865836791, "grad_norm": 7.608089923858643, "learning_rate": 2.6748117412017827e-05, "log_odds_chosen": 10.018511772155762, "log_odds_ratio": -0.00010477846080902964, "logits/chosen": -0.33977967500686646, "logits/rejected": -0.3529067635536194, "logps/chosen": -0.0014785649254918098, "logps/rejected": -1.9228357076644897, "loss": 1.3271, "nll_loss": 0.33175739645957947, "rewards/accuracies": 1.0, "rewards/chosen": -0.0001478564809076488, "rewards/margins": 0.19213572144508362, "rewards/rejected": -0.19228358566761017, "step": 7498 }, { "epoch": 5.186030428769018, "grad_norm": 7.914434909820557, "learning_rate": 2.674427539572768e-05, "log_odds_chosen": 10.729510307312012, "log_odds_ratio": -0.00024930204381234944, "logits/chosen": -0.34463316202163696, "logits/rejected": -0.36579248309135437, "logps/chosen": -0.0005520581617020071, "logps/rejected": -2.259552478790283, "loss": 0.7027, "nll_loss": 0.17564159631729126, "rewards/accuracies": 1.0, "rewards/chosen": -5.520581908058375e-05, "rewards/margins": 0.22590003907680511, "rewards/rejected": -0.22595523297786713, "step": 7499 }, { "epoch": 5.186721991701245, "grad_norm": 4.368546009063721, "learning_rate": 2.674043337943753e-05, "log_odds_chosen": 10.761625289916992, "log_odds_ratio": -3.10266186716035e-05, "logits/chosen": -0.5241531133651733, "logits/rejected": -0.5674874782562256, "logps/chosen": -0.00037843859172426164, "logps/rejected": -2.337672710418701, "loss": 0.5361, "nll_loss": 0.13401782512664795, "rewards/accuracies": 1.0, "rewards/chosen": -3.7843859900021926e-05, "rewards/margins": 0.23372939229011536, "rewards/rejected": -0.23376727104187012, "step": 7500 }, { "epoch": 5.187413554633472, "grad_norm": 9.344945907592773, "learning_rate": 2.6736591363147378e-05, "log_odds_chosen": 9.932015419006348, "log_odds_ratio": -0.00025062222266569734, "logits/chosen": -0.024730026721954346, "logits/rejected": -0.1187935620546341, "logps/chosen": -0.0003689700970426202, "logps/rejected": -2.0399227142333984, "loss": 0.5947, "nll_loss": 0.1486581265926361, "rewards/accuracies": 1.0, "rewards/chosen": -3.689701406983659e-05, "rewards/margins": 0.20395538210868835, "rewards/rejected": -0.20399229228496552, "step": 7501 }, { "epoch": 5.188105117565699, "grad_norm": 6.984439373016357, "learning_rate": 2.6732749346857234e-05, "log_odds_chosen": 9.949422836303711, "log_odds_ratio": -0.0003776532830670476, "logits/chosen": -0.6317130327224731, "logits/rejected": -0.6177940368652344, "logps/chosen": -0.0006253727478906512, "logps/rejected": -1.4834885597229004, "loss": 0.5595, "nll_loss": 0.13984926044940948, "rewards/accuracies": 1.0, "rewards/chosen": -6.253727769944817e-05, "rewards/margins": 0.14828631281852722, "rewards/rejected": -0.1483488529920578, "step": 7502 }, { "epoch": 5.1887966804979255, "grad_norm": 7.357310771942139, "learning_rate": 2.6728907330567083e-05, "log_odds_chosen": 10.67965316772461, "log_odds_ratio": -5.9429581597214565e-05, "logits/chosen": -0.7382419109344482, "logits/rejected": -0.8030633330345154, "logps/chosen": -0.00030407847953028977, "logps/rejected": -2.4250638484954834, "loss": 0.7071, "nll_loss": 0.17677223682403564, "rewards/accuracies": 1.0, "rewards/chosen": -3.04078494082205e-05, "rewards/margins": 0.2424759864807129, "rewards/rejected": -0.24250638484954834, "step": 7503 }, { "epoch": 5.189488243430152, "grad_norm": 8.235445976257324, "learning_rate": 2.6725065314276932e-05, "log_odds_chosen": 9.837135314941406, "log_odds_ratio": -0.00010966081754304469, "logits/chosen": -0.4385377764701843, "logits/rejected": -0.4710184633731842, "logps/chosen": -0.0004885304369963706, "logps/rejected": -2.0021209716796875, "loss": 1.0324, "nll_loss": 0.2580997943878174, "rewards/accuracies": 1.0, "rewards/chosen": -4.885304588242434e-05, "rewards/margins": 0.20016326010227203, "rewards/rejected": -0.20021212100982666, "step": 7504 }, { "epoch": 5.190179806362379, "grad_norm": 6.21942138671875, "learning_rate": 2.6721223297986788e-05, "log_odds_chosen": 10.279722213745117, "log_odds_ratio": -0.00023209169739857316, "logits/chosen": -0.28356489539146423, "logits/rejected": -0.35319802165031433, "logps/chosen": -0.0003053020918741822, "logps/rejected": -1.986649513244629, "loss": 0.7567, "nll_loss": 0.18914204835891724, "rewards/accuracies": 1.0, "rewards/chosen": -3.053020918741822e-05, "rewards/margins": 0.19863444566726685, "rewards/rejected": -0.19866496324539185, "step": 7505 }, { "epoch": 5.190871369294606, "grad_norm": 5.215263366699219, "learning_rate": 2.6717381281696637e-05, "log_odds_chosen": 9.759660720825195, "log_odds_ratio": -0.0009383500437252223, "logits/chosen": -0.45862168073654175, "logits/rejected": -0.5552151799201965, "logps/chosen": -0.0033110720105469227, "logps/rejected": -1.696666955947876, "loss": 0.906, "nll_loss": 0.22640550136566162, "rewards/accuracies": 1.0, "rewards/chosen": -0.00033110720687545836, "rewards/margins": 0.16933558881282806, "rewards/rejected": -0.16966669261455536, "step": 7506 }, { "epoch": 5.191562932226833, "grad_norm": 13.544431686401367, "learning_rate": 2.6713539265406486e-05, "log_odds_chosen": 10.83315372467041, "log_odds_ratio": -0.0009716550703160465, "logits/chosen": -0.5825919508934021, "logits/rejected": -0.6007131934165955, "logps/chosen": -0.0005834702169522643, "logps/rejected": -2.635345935821533, "loss": 1.0757, "nll_loss": 0.2688401937484741, "rewards/accuracies": 1.0, "rewards/chosen": -5.834702096763067e-05, "rewards/margins": 0.2634762227535248, "rewards/rejected": -0.2635346055030823, "step": 7507 }, { "epoch": 5.19225449515906, "grad_norm": 4.866628170013428, "learning_rate": 2.6709697249116338e-05, "log_odds_chosen": 8.859233856201172, "log_odds_ratio": -0.0007630664622411132, "logits/chosen": -0.362945556640625, "logits/rejected": -0.17942696809768677, "logps/chosen": -0.004629339091479778, "logps/rejected": -2.050640821456909, "loss": 1.2956, "nll_loss": 0.3238285481929779, "rewards/accuracies": 1.0, "rewards/chosen": -0.00046293390914797783, "rewards/margins": 0.20460115373134613, "rewards/rejected": -0.20506411790847778, "step": 7508 }, { "epoch": 5.1929460580912865, "grad_norm": 5.803537845611572, "learning_rate": 2.6705855232826187e-05, "log_odds_chosen": 9.54582405090332, "log_odds_ratio": -0.0006485036574304104, "logits/chosen": -0.5477535724639893, "logits/rejected": -0.5572305917739868, "logps/chosen": -0.007590239401906729, "logps/rejected": -2.4489498138427734, "loss": 0.9146, "nll_loss": 0.22859731316566467, "rewards/accuracies": 1.0, "rewards/chosen": -0.0007590239401906729, "rewards/margins": 0.24413597583770752, "rewards/rejected": -0.24489499628543854, "step": 7509 }, { "epoch": 5.193637621023513, "grad_norm": 5.946167945861816, "learning_rate": 2.6702013216536036e-05, "log_odds_chosen": 10.172664642333984, "log_odds_ratio": -0.00047986849676817656, "logits/chosen": -0.6982347369194031, "logits/rejected": -0.703554093837738, "logps/chosen": -0.0008831677259877324, "logps/rejected": -2.2994911670684814, "loss": 0.8376, "nll_loss": 0.20934252440929413, "rewards/accuracies": 1.0, "rewards/chosen": -8.831676677800715e-05, "rewards/margins": 0.22986078262329102, "rewards/rejected": -0.22994911670684814, "step": 7510 }, { "epoch": 5.19432918395574, "grad_norm": 6.564667701721191, "learning_rate": 2.6698171200245892e-05, "log_odds_chosen": 10.04134464263916, "log_odds_ratio": -9.779791434993967e-05, "logits/chosen": -0.5647412538528442, "logits/rejected": -0.5988651514053345, "logps/chosen": -0.0001927485573105514, "logps/rejected": -1.5764150619506836, "loss": 1.0745, "nll_loss": 0.26861101388931274, "rewards/accuracies": 1.0, "rewards/chosen": -1.927485573105514e-05, "rewards/margins": 0.15762223303318024, "rewards/rejected": -0.15764150023460388, "step": 7511 }, { "epoch": 5.195020746887967, "grad_norm": 6.056695938110352, "learning_rate": 2.669432918395574e-05, "log_odds_chosen": 9.677583694458008, "log_odds_ratio": -0.003684965195134282, "logits/chosen": -0.4713554084300995, "logits/rejected": -0.5660151243209839, "logps/chosen": -0.0013247056631371379, "logps/rejected": -2.0641555786132812, "loss": 1.2825, "nll_loss": 0.320260226726532, "rewards/accuracies": 1.0, "rewards/chosen": -0.0001324705663137138, "rewards/margins": 0.2062830775976181, "rewards/rejected": -0.2064155638217926, "step": 7512 }, { "epoch": 5.195712309820194, "grad_norm": 7.452434539794922, "learning_rate": 2.669048716766559e-05, "log_odds_chosen": 10.309123992919922, "log_odds_ratio": -6.893956015119329e-05, "logits/chosen": -0.45162904262542725, "logits/rejected": -0.49090200662612915, "logps/chosen": -0.00016838658484630287, "logps/rejected": -1.5242056846618652, "loss": 0.7136, "nll_loss": 0.17838457226753235, "rewards/accuracies": 1.0, "rewards/chosen": -1.6838657757034525e-05, "rewards/margins": 0.15240372717380524, "rewards/rejected": -0.15242056548595428, "step": 7513 }, { "epoch": 5.196403872752421, "grad_norm": 6.046849250793457, "learning_rate": 2.6686645151375446e-05, "log_odds_chosen": 10.795588493347168, "log_odds_ratio": -5.696194421034306e-05, "logits/chosen": -0.31942465901374817, "logits/rejected": -0.33498522639274597, "logps/chosen": -0.0003312878543511033, "logps/rejected": -2.5096311569213867, "loss": 1.0985, "nll_loss": 0.2746131122112274, "rewards/accuracies": 1.0, "rewards/chosen": -3.3128788345493376e-05, "rewards/margins": 0.2509300112724304, "rewards/rejected": -0.25096315145492554, "step": 7514 }, { "epoch": 5.1970954356846475, "grad_norm": 7.981918811798096, "learning_rate": 2.6682803135085295e-05, "log_odds_chosen": 9.353009223937988, "log_odds_ratio": -0.007541028317064047, "logits/chosen": -0.14878655970096588, "logits/rejected": -0.24716559052467346, "logps/chosen": -0.05903133004903793, "logps/rejected": -2.631260871887207, "loss": 0.8076, "nll_loss": 0.20113366842269897, "rewards/accuracies": 1.0, "rewards/chosen": -0.005903133191168308, "rewards/margins": 0.257222980260849, "rewards/rejected": -0.26312610507011414, "step": 7515 }, { "epoch": 5.197786998616874, "grad_norm": 9.73996639251709, "learning_rate": 2.6678961118795144e-05, "log_odds_chosen": 10.649823188781738, "log_odds_ratio": -4.373151023173705e-05, "logits/chosen": -0.6496425867080688, "logits/rejected": -0.8041089773178101, "logps/chosen": -0.00027097389101982117, "logps/rejected": -2.0425217151641846, "loss": 0.6071, "nll_loss": 0.15177232027053833, "rewards/accuracies": 1.0, "rewards/chosen": -2.7097388738184236e-05, "rewards/margins": 0.2042250782251358, "rewards/rejected": -0.2042521834373474, "step": 7516 }, { "epoch": 5.198478561549101, "grad_norm": 6.225884437561035, "learning_rate": 2.6675119102504997e-05, "log_odds_chosen": 10.766189575195312, "log_odds_ratio": -0.0013470555422827601, "logits/chosen": -0.20639252662658691, "logits/rejected": -0.2310691624879837, "logps/chosen": -0.000624267035163939, "logps/rejected": -2.141397714614868, "loss": 0.9381, "nll_loss": 0.23438991606235504, "rewards/accuracies": 1.0, "rewards/chosen": -6.242669769562781e-05, "rewards/margins": 0.21407735347747803, "rewards/rejected": -0.21413977444171906, "step": 7517 }, { "epoch": 5.199170124481328, "grad_norm": 6.064988136291504, "learning_rate": 2.6671277086214846e-05, "log_odds_chosen": 9.96807861328125, "log_odds_ratio": -0.00041285439510829747, "logits/chosen": -0.6431621313095093, "logits/rejected": -0.766636848449707, "logps/chosen": -0.002066761488094926, "logps/rejected": -2.4709391593933105, "loss": 0.744, "nll_loss": 0.185963436961174, "rewards/accuracies": 1.0, "rewards/chosen": -0.00020667616627179086, "rewards/margins": 0.24688725173473358, "rewards/rejected": -0.24709393084049225, "step": 7518 }, { "epoch": 5.199861687413555, "grad_norm": 8.340907096862793, "learning_rate": 2.6667435069924695e-05, "log_odds_chosen": 8.846458435058594, "log_odds_ratio": -0.0005865695420652628, "logits/chosen": -0.24171032011508942, "logits/rejected": -0.4122176766395569, "logps/chosen": -0.0007772729732096195, "logps/rejected": -1.002537488937378, "loss": 1.177, "nll_loss": 0.294181764125824, "rewards/accuracies": 1.0, "rewards/chosen": -7.772730168653652e-05, "rewards/margins": 0.10017602145671844, "rewards/rejected": -0.10025375336408615, "step": 7519 }, { "epoch": 5.200553250345782, "grad_norm": 10.035767555236816, "learning_rate": 2.666359305363455e-05, "log_odds_chosen": 10.078902244567871, "log_odds_ratio": -0.000265401613432914, "logits/chosen": -0.48366451263427734, "logits/rejected": -0.5781936645507812, "logps/chosen": -0.00022505372180603445, "logps/rejected": -1.8617677688598633, "loss": 0.9025, "nll_loss": 0.22558824717998505, "rewards/accuracies": 1.0, "rewards/chosen": -2.2505371816805564e-05, "rewards/margins": 0.18615427613258362, "rewards/rejected": -0.18617677688598633, "step": 7520 }, { "epoch": 5.2012448132780085, "grad_norm": 10.484762191772461, "learning_rate": 2.66597510373444e-05, "log_odds_chosen": 10.321274757385254, "log_odds_ratio": -0.0001440600899513811, "logits/chosen": -0.7540542483329773, "logits/rejected": -0.7682251334190369, "logps/chosen": -0.000749644823372364, "logps/rejected": -2.1048970222473145, "loss": 1.059, "nll_loss": 0.26474690437316895, "rewards/accuracies": 1.0, "rewards/chosen": -7.49644823372364e-05, "rewards/margins": 0.21041473746299744, "rewards/rejected": -0.21048972010612488, "step": 7521 }, { "epoch": 5.201936376210235, "grad_norm": 10.707878112792969, "learning_rate": 2.665590902105425e-05, "log_odds_chosen": 10.353931427001953, "log_odds_ratio": -0.00016276889073196799, "logits/chosen": -0.08579882979393005, "logits/rejected": -0.1549488604068756, "logps/chosen": -0.00023727120424155146, "logps/rejected": -1.4980394840240479, "loss": 1.1418, "nll_loss": 0.2854325771331787, "rewards/accuracies": 1.0, "rewards/chosen": -2.3727119696559384e-05, "rewards/margins": 0.14978022873401642, "rewards/rejected": -0.14980396628379822, "step": 7522 }, { "epoch": 5.202627939142462, "grad_norm": 16.987855911254883, "learning_rate": 2.6652067004764105e-05, "log_odds_chosen": 11.349204063415527, "log_odds_ratio": -6.56421689200215e-05, "logits/chosen": -0.16829033195972443, "logits/rejected": -0.2914985418319702, "logps/chosen": -0.00012773709022440016, "logps/rejected": -2.0962631702423096, "loss": 0.7947, "nll_loss": 0.19866439700126648, "rewards/accuracies": 1.0, "rewards/chosen": -1.2773710295732599e-05, "rewards/margins": 0.20961356163024902, "rewards/rejected": -0.20962633192539215, "step": 7523 }, { "epoch": 5.203319502074689, "grad_norm": 12.814325332641602, "learning_rate": 2.6648224988473954e-05, "log_odds_chosen": 10.35975170135498, "log_odds_ratio": -0.00010918633779510856, "logits/chosen": -0.32887110114097595, "logits/rejected": -0.4124990999698639, "logps/chosen": -0.0007760451408103108, "logps/rejected": -2.509739398956299, "loss": 0.8109, "nll_loss": 0.20270992815494537, "rewards/accuracies": 1.0, "rewards/chosen": -7.760451262583956e-05, "rewards/margins": 0.2508963346481323, "rewards/rejected": -0.2509739100933075, "step": 7524 }, { "epoch": 5.204011065006916, "grad_norm": 18.52150535583496, "learning_rate": 2.6644382972183803e-05, "log_odds_chosen": 10.428438186645508, "log_odds_ratio": -5.791490548290312e-05, "logits/chosen": -0.35513511300086975, "logits/rejected": -0.3227297067642212, "logps/chosen": -0.00031158284400589764, "logps/rejected": -2.353970766067505, "loss": 0.9356, "nll_loss": 0.23389127850532532, "rewards/accuracies": 1.0, "rewards/chosen": -3.1158284400589764e-05, "rewards/margins": 0.23536591231822968, "rewards/rejected": -0.235397070646286, "step": 7525 }, { "epoch": 5.204702627939143, "grad_norm": 7.042150974273682, "learning_rate": 2.6640540955893655e-05, "log_odds_chosen": 9.846081733703613, "log_odds_ratio": -0.0005817461060360074, "logits/chosen": -0.5193594098091125, "logits/rejected": -0.5902177095413208, "logps/chosen": -0.02698969841003418, "logps/rejected": -2.1079354286193848, "loss": 0.8825, "nll_loss": 0.2205757051706314, "rewards/accuracies": 1.0, "rewards/chosen": -0.002698970027267933, "rewards/margins": 0.20809456706047058, "rewards/rejected": -0.21079353988170624, "step": 7526 }, { "epoch": 5.2053941908713695, "grad_norm": 7.628520488739014, "learning_rate": 2.6636698939603504e-05, "log_odds_chosen": 11.020240783691406, "log_odds_ratio": -0.0005862127291038632, "logits/chosen": -0.2743455171585083, "logits/rejected": -0.2303268015384674, "logps/chosen": -0.0004525255935732275, "logps/rejected": -2.5413033962249756, "loss": 1.0111, "nll_loss": 0.2527076303958893, "rewards/accuracies": 1.0, "rewards/chosen": -4.5252560084918514e-05, "rewards/margins": 0.25408506393432617, "rewards/rejected": -0.2541303336620331, "step": 7527 }, { "epoch": 5.206085753803596, "grad_norm": 7.396105766296387, "learning_rate": 2.6632856923313353e-05, "log_odds_chosen": 11.09078311920166, "log_odds_ratio": -3.599739648052491e-05, "logits/chosen": -0.522323727607727, "logits/rejected": -0.5345219373703003, "logps/chosen": -0.0001351584796793759, "logps/rejected": -2.2688066959381104, "loss": 0.6788, "nll_loss": 0.16970443725585938, "rewards/accuracies": 1.0, "rewards/chosen": -1.3515847058442887e-05, "rewards/margins": 0.22686713933944702, "rewards/rejected": -0.22688066959381104, "step": 7528 }, { "epoch": 5.206777316735823, "grad_norm": 8.117511749267578, "learning_rate": 2.662901490702321e-05, "log_odds_chosen": 10.91192626953125, "log_odds_ratio": -3.3292169973719865e-05, "logits/chosen": -0.2902049422264099, "logits/rejected": -0.3024437427520752, "logps/chosen": -0.00023253005929291248, "logps/rejected": -2.238485336303711, "loss": 0.9055, "nll_loss": 0.22636531293392181, "rewards/accuracies": 1.0, "rewards/chosen": -2.3253005565493368e-05, "rewards/margins": 0.22382529079914093, "rewards/rejected": -0.22384853661060333, "step": 7529 }, { "epoch": 5.20746887966805, "grad_norm": 8.098960876464844, "learning_rate": 2.6625172890733058e-05, "log_odds_chosen": 11.654924392700195, "log_odds_ratio": -1.6672003766871057e-05, "logits/chosen": -0.6740207672119141, "logits/rejected": -0.7895724773406982, "logps/chosen": -0.00012381460692267865, "logps/rejected": -2.4390528202056885, "loss": 0.7303, "nll_loss": 0.18256288766860962, "rewards/accuracies": 1.0, "rewards/chosen": -1.2381460692267865e-05, "rewards/margins": 0.24389290809631348, "rewards/rejected": -0.24390527606010437, "step": 7530 }, { "epoch": 5.208160442600277, "grad_norm": 7.831466197967529, "learning_rate": 2.6621330874442907e-05, "log_odds_chosen": 11.02690601348877, "log_odds_ratio": -2.8968894184799865e-05, "logits/chosen": -0.7625277638435364, "logits/rejected": -0.8582916259765625, "logps/chosen": -0.00014673579426016659, "logps/rejected": -2.1968586444854736, "loss": 0.7601, "nll_loss": 0.1900177001953125, "rewards/accuracies": 1.0, "rewards/chosen": -1.4673579244117718e-05, "rewards/margins": 0.21967121958732605, "rewards/rejected": -0.2196858823299408, "step": 7531 }, { "epoch": 5.208852005532504, "grad_norm": 9.437521934509277, "learning_rate": 2.6617488858152763e-05, "log_odds_chosen": 9.570180892944336, "log_odds_ratio": -0.00027336826315149665, "logits/chosen": -0.15018832683563232, "logits/rejected": -0.3415522575378418, "logps/chosen": -0.0005937953246757388, "logps/rejected": -1.3931825160980225, "loss": 0.8658, "nll_loss": 0.21641241014003754, "rewards/accuracies": 1.0, "rewards/chosen": -5.9379533922765404e-05, "rewards/margins": 0.13925886154174805, "rewards/rejected": -0.13931825757026672, "step": 7532 }, { "epoch": 5.20954356846473, "grad_norm": 10.934542655944824, "learning_rate": 2.6613646841862612e-05, "log_odds_chosen": 10.167157173156738, "log_odds_ratio": -8.426292333751917e-05, "logits/chosen": -0.5966573357582092, "logits/rejected": -0.6782484650611877, "logps/chosen": -0.000651887443382293, "logps/rejected": -1.9049832820892334, "loss": 1.2315, "nll_loss": 0.3078649044036865, "rewards/accuracies": 1.0, "rewards/chosen": -6.518874579342082e-05, "rewards/margins": 0.19043314456939697, "rewards/rejected": -0.19049833714962006, "step": 7533 }, { "epoch": 5.210235131396957, "grad_norm": 9.348286628723145, "learning_rate": 2.660980482557246e-05, "log_odds_chosen": 9.688858032226562, "log_odds_ratio": -0.004896416794508696, "logits/chosen": -0.4835703670978546, "logits/rejected": -0.6681973934173584, "logps/chosen": -0.002467024838551879, "logps/rejected": -2.4683291912078857, "loss": 0.7232, "nll_loss": 0.18031813204288483, "rewards/accuracies": 1.0, "rewards/chosen": -0.000246702489675954, "rewards/margins": 0.24658623337745667, "rewards/rejected": -0.2468329221010208, "step": 7534 }, { "epoch": 5.210926694329184, "grad_norm": 8.002618789672852, "learning_rate": 2.6605962809282314e-05, "log_odds_chosen": 10.053289413452148, "log_odds_ratio": -0.00027556309942156076, "logits/chosen": -0.28471508622169495, "logits/rejected": -0.2700623571872711, "logps/chosen": -0.0002975426323246211, "logps/rejected": -1.8266332149505615, "loss": 1.0783, "nll_loss": 0.2695525884628296, "rewards/accuracies": 1.0, "rewards/chosen": -2.9754261049674824e-05, "rewards/margins": 0.18263356387615204, "rewards/rejected": -0.18266333639621735, "step": 7535 }, { "epoch": 5.211618257261411, "grad_norm": 9.897976875305176, "learning_rate": 2.6602120792992163e-05, "log_odds_chosen": 9.274949073791504, "log_odds_ratio": -0.00024828262394294143, "logits/chosen": 0.012322517111897469, "logits/rejected": -0.06911614537239075, "logps/chosen": -0.0011005365522578359, "logps/rejected": -1.8486918210983276, "loss": 1.2116, "nll_loss": 0.30286920070648193, "rewards/accuracies": 1.0, "rewards/chosen": -0.00011005365377059206, "rewards/margins": 0.18475914001464844, "rewards/rejected": -0.1848691999912262, "step": 7536 }, { "epoch": 5.212309820193638, "grad_norm": 5.6801934242248535, "learning_rate": 2.659827877670201e-05, "log_odds_chosen": 9.88988971710205, "log_odds_ratio": -8.744581282371655e-05, "logits/chosen": -0.3401045799255371, "logits/rejected": -0.4230215847492218, "logps/chosen": -0.000338320794980973, "logps/rejected": -1.680631160736084, "loss": 0.6281, "nll_loss": 0.15702250599861145, "rewards/accuracies": 1.0, "rewards/chosen": -3.383207877050154e-05, "rewards/margins": 0.1680292785167694, "rewards/rejected": -0.16806311905384064, "step": 7537 }, { "epoch": 5.213001383125865, "grad_norm": 7.96742582321167, "learning_rate": 2.6594436760411867e-05, "log_odds_chosen": 11.109058380126953, "log_odds_ratio": -7.072191510815173e-05, "logits/chosen": -0.35100793838500977, "logits/rejected": -0.40755900740623474, "logps/chosen": -0.00039556881529279053, "logps/rejected": -2.5512094497680664, "loss": 0.6497, "nll_loss": 0.16241194307804108, "rewards/accuracies": 1.0, "rewards/chosen": -3.9556882256874815e-05, "rewards/margins": 0.2550814151763916, "rewards/rejected": -0.2551209628582001, "step": 7538 }, { "epoch": 5.213692946058091, "grad_norm": 8.653180122375488, "learning_rate": 2.6590594744121717e-05, "log_odds_chosen": 10.56446361541748, "log_odds_ratio": -0.0005890832981094718, "logits/chosen": -0.39483940601348877, "logits/rejected": -0.4310857355594635, "logps/chosen": -0.0010279006091877818, "logps/rejected": -2.5635743141174316, "loss": 0.9006, "nll_loss": 0.22507987916469574, "rewards/accuracies": 1.0, "rewards/chosen": -0.00010279006528435275, "rewards/margins": 0.256254643201828, "rewards/rejected": -0.25635743141174316, "step": 7539 }, { "epoch": 5.214384508990318, "grad_norm": 7.798567295074463, "learning_rate": 2.6586752727831566e-05, "log_odds_chosen": 11.562299728393555, "log_odds_ratio": -3.5191998904338107e-05, "logits/chosen": -0.5837400555610657, "logits/rejected": -0.5905430912971497, "logps/chosen": -0.00020522023260127753, "logps/rejected": -2.3719401359558105, "loss": 0.7627, "nll_loss": 0.19066068530082703, "rewards/accuracies": 1.0, "rewards/chosen": -2.052202216873411e-05, "rewards/margins": 0.23717349767684937, "rewards/rejected": -0.2371940165758133, "step": 7540 }, { "epoch": 5.215076071922545, "grad_norm": 14.764293670654297, "learning_rate": 2.658291071154142e-05, "log_odds_chosen": 10.847089767456055, "log_odds_ratio": -3.9235426811501384e-05, "logits/chosen": -0.2103194147348404, "logits/rejected": -0.24678084254264832, "logps/chosen": -0.0003935906570404768, "logps/rejected": -2.3666625022888184, "loss": 0.7798, "nll_loss": 0.1949402391910553, "rewards/accuracies": 1.0, "rewards/chosen": -3.9359063521260396e-05, "rewards/margins": 0.23662689328193665, "rewards/rejected": -0.2366662621498108, "step": 7541 }, { "epoch": 5.215767634854772, "grad_norm": 7.255589485168457, "learning_rate": 2.657906869525127e-05, "log_odds_chosen": 10.368997573852539, "log_odds_ratio": -0.0005649956874549389, "logits/chosen": -0.33564862608909607, "logits/rejected": -0.42874041199684143, "logps/chosen": -0.0008114329539239407, "logps/rejected": -2.0210976600646973, "loss": 0.6028, "nll_loss": 0.15064597129821777, "rewards/accuracies": 1.0, "rewards/chosen": -8.11432910268195e-05, "rewards/margins": 0.20202863216400146, "rewards/rejected": -0.20210978388786316, "step": 7542 }, { "epoch": 5.216459197786999, "grad_norm": 6.3288350105285645, "learning_rate": 2.657522667896112e-05, "log_odds_chosen": 10.309673309326172, "log_odds_ratio": -0.0007493701996281743, "logits/chosen": -0.528795599937439, "logits/rejected": -0.4613361358642578, "logps/chosen": -0.0005491769406944513, "logps/rejected": -1.9666938781738281, "loss": 0.7648, "nll_loss": 0.19112075865268707, "rewards/accuracies": 1.0, "rewards/chosen": -5.4917694797040895e-05, "rewards/margins": 0.19661447405815125, "rewards/rejected": -0.19666936993598938, "step": 7543 }, { "epoch": 5.217150760719226, "grad_norm": 4.966573715209961, "learning_rate": 2.6571384662670972e-05, "log_odds_chosen": 10.328919410705566, "log_odds_ratio": -4.260972491465509e-05, "logits/chosen": -0.5825825929641724, "logits/rejected": -0.6369431614875793, "logps/chosen": -0.00013902317732572556, "logps/rejected": -1.5345927476882935, "loss": 0.5713, "nll_loss": 0.14283086359500885, "rewards/accuracies": 1.0, "rewards/chosen": -1.3902318642067257e-05, "rewards/margins": 0.15344536304473877, "rewards/rejected": -0.15345928072929382, "step": 7544 }, { "epoch": 5.217842323651452, "grad_norm": 8.431014060974121, "learning_rate": 2.656754264638082e-05, "log_odds_chosen": 10.66819953918457, "log_odds_ratio": -4.8884499847190455e-05, "logits/chosen": -0.7278584241867065, "logits/rejected": -0.7446757555007935, "logps/chosen": -0.0006141972844488919, "logps/rejected": -2.559722423553467, "loss": 0.7858, "nll_loss": 0.1964486688375473, "rewards/accuracies": 1.0, "rewards/chosen": -6.141973426565528e-05, "rewards/margins": 0.25591087341308594, "rewards/rejected": -0.2559722661972046, "step": 7545 }, { "epoch": 5.218533886583679, "grad_norm": 19.670764923095703, "learning_rate": 2.656370063009067e-05, "log_odds_chosen": 10.58185863494873, "log_odds_ratio": -7.996035856194794e-05, "logits/chosen": -0.1423773467540741, "logits/rejected": -0.23043018579483032, "logps/chosen": -0.00021502267918549478, "logps/rejected": -2.1016366481781006, "loss": 1.1633, "nll_loss": 0.29081207513809204, "rewards/accuracies": 1.0, "rewards/chosen": -2.150226828234736e-05, "rewards/margins": 0.21014216542243958, "rewards/rejected": -0.2101636528968811, "step": 7546 }, { "epoch": 5.219225449515906, "grad_norm": 22.079612731933594, "learning_rate": 2.6559858613800526e-05, "log_odds_chosen": 9.88987922668457, "log_odds_ratio": -0.0003320075338706374, "logits/chosen": -0.7029349207878113, "logits/rejected": -0.8374977111816406, "logps/chosen": -0.0006211322615854442, "logps/rejected": -1.8706246614456177, "loss": 0.6639, "nll_loss": 0.16593071818351746, "rewards/accuracies": 1.0, "rewards/chosen": -6.211322033777833e-05, "rewards/margins": 0.1870003342628479, "rewards/rejected": -0.18706245720386505, "step": 7547 }, { "epoch": 5.219917012448133, "grad_norm": 8.58415699005127, "learning_rate": 2.6556016597510375e-05, "log_odds_chosen": 10.443418502807617, "log_odds_ratio": -4.11863875342533e-05, "logits/chosen": -0.5435079336166382, "logits/rejected": -0.5880762338638306, "logps/chosen": -0.0008465655264444649, "logps/rejected": -2.0463433265686035, "loss": 0.979, "nll_loss": 0.2447490394115448, "rewards/accuracies": 1.0, "rewards/chosen": -8.46565599204041e-05, "rewards/margins": 0.2045496702194214, "rewards/rejected": -0.20463432371616364, "step": 7548 }, { "epoch": 5.22060857538036, "grad_norm": 19.33316421508789, "learning_rate": 2.6552174581220224e-05, "log_odds_chosen": 9.962740898132324, "log_odds_ratio": -0.0001467976690037176, "logits/chosen": -0.22345323860645294, "logits/rejected": -0.23257654905319214, "logps/chosen": -0.0007146099815145135, "logps/rejected": -1.6609793901443481, "loss": 1.134, "nll_loss": 0.28347694873809814, "rewards/accuracies": 1.0, "rewards/chosen": -7.146099233068526e-05, "rewards/margins": 0.16602648794651031, "rewards/rejected": -0.16609793901443481, "step": 7549 }, { "epoch": 5.2213001383125865, "grad_norm": 7.09939432144165, "learning_rate": 2.654833256493008e-05, "log_odds_chosen": 9.59273910522461, "log_odds_ratio": -0.0005566454492509365, "logits/chosen": -0.44439682364463806, "logits/rejected": -0.3926330804824829, "logps/chosen": -0.0005894859787076712, "logps/rejected": -1.5625590085983276, "loss": 0.9567, "nll_loss": 0.2391187995672226, "rewards/accuracies": 1.0, "rewards/chosen": -5.8948597143171355e-05, "rewards/margins": 0.1561969518661499, "rewards/rejected": -0.15625590085983276, "step": 7550 }, { "epoch": 5.221991701244813, "grad_norm": 23.660127639770508, "learning_rate": 2.654449054863993e-05, "log_odds_chosen": 10.068212509155273, "log_odds_ratio": -0.00010939614730887115, "logits/chosen": -0.6090282797813416, "logits/rejected": -0.6502014994621277, "logps/chosen": -0.0002628647198434919, "logps/rejected": -1.9712339639663696, "loss": 0.8848, "nll_loss": 0.22119513154029846, "rewards/accuracies": 1.0, "rewards/chosen": -2.6286472348147072e-05, "rewards/margins": 0.19709712266921997, "rewards/rejected": -0.19712340831756592, "step": 7551 }, { "epoch": 5.22268326417704, "grad_norm": 8.631831169128418, "learning_rate": 2.6540648532349778e-05, "log_odds_chosen": 9.057621955871582, "log_odds_ratio": -0.0005410752492025495, "logits/chosen": -0.7084760665893555, "logits/rejected": -0.5085403919219971, "logps/chosen": -0.0005544874002225697, "logps/rejected": -1.2910645008087158, "loss": 1.2109, "nll_loss": 0.3026607632637024, "rewards/accuracies": 1.0, "rewards/chosen": -5.54487451154273e-05, "rewards/margins": 0.12905099987983704, "rewards/rejected": -0.12910646200180054, "step": 7552 }, { "epoch": 5.223374827109267, "grad_norm": 10.814403533935547, "learning_rate": 2.653680651605963e-05, "log_odds_chosen": 11.15772819519043, "log_odds_ratio": -3.051651037822012e-05, "logits/chosen": -0.7379977107048035, "logits/rejected": -0.7769466638565063, "logps/chosen": -0.00015586796507705003, "logps/rejected": -1.9965726137161255, "loss": 1.3236, "nll_loss": 0.330906480550766, "rewards/accuracies": 1.0, "rewards/chosen": -1.5586796507705003e-05, "rewards/margins": 0.19964167475700378, "rewards/rejected": -0.19965726137161255, "step": 7553 }, { "epoch": 5.224066390041494, "grad_norm": 10.185839653015137, "learning_rate": 2.653296449976948e-05, "log_odds_chosen": 10.938926696777344, "log_odds_ratio": -5.6243785365950316e-05, "logits/chosen": -0.8716825246810913, "logits/rejected": -0.8651461601257324, "logps/chosen": -0.0005617404822260141, "logps/rejected": -2.5082576274871826, "loss": 0.7537, "nll_loss": 0.18841758370399475, "rewards/accuracies": 1.0, "rewards/chosen": -5.617404531221837e-05, "rewards/margins": 0.25076958537101746, "rewards/rejected": -0.25082576274871826, "step": 7554 }, { "epoch": 5.224757952973721, "grad_norm": 10.396666526794434, "learning_rate": 2.652912248347933e-05, "log_odds_chosen": 10.43869686126709, "log_odds_ratio": -7.070177525747567e-05, "logits/chosen": -0.36217403411865234, "logits/rejected": -0.410467267036438, "logps/chosen": -0.00030194493592716753, "logps/rejected": -1.7846906185150146, "loss": 1.4544, "nll_loss": 0.3636040687561035, "rewards/accuracies": 1.0, "rewards/chosen": -3.01944965030998e-05, "rewards/margins": 0.17843888700008392, "rewards/rejected": -0.17846906185150146, "step": 7555 }, { "epoch": 5.2254495159059475, "grad_norm": 7.918328285217285, "learning_rate": 2.6525280467189184e-05, "log_odds_chosen": 10.289556503295898, "log_odds_ratio": -0.00011836655903607607, "logits/chosen": -0.7381885647773743, "logits/rejected": -0.7799155712127686, "logps/chosen": -0.0009419352281838655, "logps/rejected": -2.351292133331299, "loss": 0.7091, "nll_loss": 0.1772594451904297, "rewards/accuracies": 1.0, "rewards/chosen": -9.419352136319503e-05, "rewards/margins": 0.2350350320339203, "rewards/rejected": -0.2351292073726654, "step": 7556 }, { "epoch": 5.226141078838174, "grad_norm": 11.101275444030762, "learning_rate": 2.6521438450899033e-05, "log_odds_chosen": 11.636258125305176, "log_odds_ratio": -1.3528469025914092e-05, "logits/chosen": -0.5852770209312439, "logits/rejected": -0.699225127696991, "logps/chosen": -0.00014542852295562625, "logps/rejected": -2.403111457824707, "loss": 0.6832, "nll_loss": 0.17079591751098633, "rewards/accuracies": 1.0, "rewards/chosen": -1.4542852113663685e-05, "rewards/margins": 0.2402966022491455, "rewards/rejected": -0.2403111457824707, "step": 7557 }, { "epoch": 5.226832641770401, "grad_norm": 6.832596302032471, "learning_rate": 2.6517596434608882e-05, "log_odds_chosen": 9.619921684265137, "log_odds_ratio": -0.0004081670194864273, "logits/chosen": -0.6781374216079712, "logits/rejected": -0.6394006013870239, "logps/chosen": -0.00045663962373510003, "logps/rejected": -1.9987691640853882, "loss": 1.0768, "nll_loss": 0.2691575586795807, "rewards/accuracies": 1.0, "rewards/chosen": -4.566396091831848e-05, "rewards/margins": 0.1998312622308731, "rewards/rejected": -0.19987693428993225, "step": 7558 }, { "epoch": 5.227524204702628, "grad_norm": 15.910563468933105, "learning_rate": 2.6513754418318738e-05, "log_odds_chosen": 11.075983047485352, "log_odds_ratio": -3.4982949728146195e-05, "logits/chosen": -0.7346349358558655, "logits/rejected": -0.8451979756355286, "logps/chosen": -0.00029519235249608755, "logps/rejected": -2.4787511825561523, "loss": 0.5291, "nll_loss": 0.13227322697639465, "rewards/accuracies": 1.0, "rewards/chosen": -2.9519236704800278e-05, "rewards/margins": 0.2478456050157547, "rewards/rejected": -0.2478751242160797, "step": 7559 }, { "epoch": 5.228215767634855, "grad_norm": 4.949207305908203, "learning_rate": 2.6509912402028587e-05, "log_odds_chosen": 10.036555290222168, "log_odds_ratio": -7.955866021802649e-05, "logits/chosen": -0.5480470657348633, "logits/rejected": -0.4952780604362488, "logps/chosen": -0.000600629486143589, "logps/rejected": -2.0421175956726074, "loss": 1.3625, "nll_loss": 0.34061944484710693, "rewards/accuracies": 1.0, "rewards/chosen": -6.0062950069550425e-05, "rewards/margins": 0.2041517198085785, "rewards/rejected": -0.2042117714881897, "step": 7560 }, { "epoch": 5.228907330567082, "grad_norm": 7.809098720550537, "learning_rate": 2.6506070385738436e-05, "log_odds_chosen": 10.17052936553955, "log_odds_ratio": -0.00016491710266564041, "logits/chosen": -0.6219543814659119, "logits/rejected": -0.6439845561981201, "logps/chosen": -0.00039262970676645637, "logps/rejected": -2.2306129932403564, "loss": 0.7724, "nll_loss": 0.19308488070964813, "rewards/accuracies": 1.0, "rewards/chosen": -3.9262969949049875e-05, "rewards/margins": 0.22302204370498657, "rewards/rejected": -0.22306130826473236, "step": 7561 }, { "epoch": 5.2295988934993085, "grad_norm": 6.184190273284912, "learning_rate": 2.650222836944829e-05, "log_odds_chosen": 10.522407531738281, "log_odds_ratio": -0.0001425845839548856, "logits/chosen": -0.6801462769508362, "logits/rejected": -0.6958547234535217, "logps/chosen": -0.005194925703108311, "logps/rejected": -2.2992043495178223, "loss": 0.6993, "nll_loss": 0.1748044192790985, "rewards/accuracies": 1.0, "rewards/chosen": -0.0005194926052354276, "rewards/margins": 0.22940094769001007, "rewards/rejected": -0.22992043197155, "step": 7562 }, { "epoch": 5.230290456431535, "grad_norm": 6.153726100921631, "learning_rate": 2.6498386353158138e-05, "log_odds_chosen": 10.447622299194336, "log_odds_ratio": -0.0019986850675195456, "logits/chosen": -0.2738765478134155, "logits/rejected": -0.29188835620880127, "logps/chosen": -0.00196833279915154, "logps/rejected": -2.315131664276123, "loss": 0.8464, "nll_loss": 0.21140965819358826, "rewards/accuracies": 1.0, "rewards/chosen": -0.0001968332944670692, "rewards/margins": 0.23131635785102844, "rewards/rejected": -0.23151318728923798, "step": 7563 }, { "epoch": 5.230982019363762, "grad_norm": 8.70680046081543, "learning_rate": 2.6494544336867987e-05, "log_odds_chosen": 9.893153190612793, "log_odds_ratio": -0.0008131297072395682, "logits/chosen": -0.281112939119339, "logits/rejected": -0.32066017389297485, "logps/chosen": -0.0007761477027088404, "logps/rejected": -1.8777573108673096, "loss": 1.1029, "nll_loss": 0.2756534218788147, "rewards/accuracies": 1.0, "rewards/chosen": -7.761476445011795e-05, "rewards/margins": 0.1876981258392334, "rewards/rejected": -0.18777573108673096, "step": 7564 }, { "epoch": 5.231673582295989, "grad_norm": 12.37846851348877, "learning_rate": 2.6490702320577843e-05, "log_odds_chosen": 12.095224380493164, "log_odds_ratio": -1.5422276192111894e-05, "logits/chosen": -0.6123469471931458, "logits/rejected": -0.6750447750091553, "logps/chosen": -7.237125828396529e-05, "logps/rejected": -2.5945072174072266, "loss": 0.7965, "nll_loss": 0.19912873208522797, "rewards/accuracies": 1.0, "rewards/chosen": -7.23712628314388e-06, "rewards/margins": 0.259443461894989, "rewards/rejected": -0.2594507038593292, "step": 7565 }, { "epoch": 5.232365145228216, "grad_norm": 6.599029541015625, "learning_rate": 2.6486860304287692e-05, "log_odds_chosen": 9.663243293762207, "log_odds_ratio": -0.00901144091039896, "logits/chosen": -0.5348978042602539, "logits/rejected": -0.5369443893432617, "logps/chosen": -0.002337898127734661, "logps/rejected": -2.131387710571289, "loss": 0.6999, "nll_loss": 0.17407315969467163, "rewards/accuracies": 1.0, "rewards/chosen": -0.00023378983314614743, "rewards/margins": 0.21290498971939087, "rewards/rejected": -0.21313877403736115, "step": 7566 }, { "epoch": 5.233056708160443, "grad_norm": 17.254377365112305, "learning_rate": 2.648301828799754e-05, "log_odds_chosen": 10.758286476135254, "log_odds_ratio": -8.570626232540235e-05, "logits/chosen": -0.32616153359413147, "logits/rejected": -0.43125247955322266, "logps/chosen": -0.0003337445668876171, "logps/rejected": -2.5154995918273926, "loss": 0.9858, "nll_loss": 0.24644559621810913, "rewards/accuracies": 1.0, "rewards/chosen": -3.3374453778378665e-05, "rewards/margins": 0.2515166103839874, "rewards/rejected": -0.25154998898506165, "step": 7567 }, { "epoch": 5.2337482710926695, "grad_norm": 8.67546558380127, "learning_rate": 2.6479176271707397e-05, "log_odds_chosen": 10.991742134094238, "log_odds_ratio": -5.405677075032145e-05, "logits/chosen": -0.7275859117507935, "logits/rejected": -0.7737497091293335, "logps/chosen": -8.817527850624174e-05, "logps/rejected": -1.8147327899932861, "loss": 0.8772, "nll_loss": 0.21929170191287994, "rewards/accuracies": 1.0, "rewards/chosen": -8.817527486826293e-06, "rewards/margins": 0.18146446347236633, "rewards/rejected": -0.18147329986095428, "step": 7568 }, { "epoch": 5.234439834024896, "grad_norm": 11.16156005859375, "learning_rate": 2.6475334255417246e-05, "log_odds_chosen": 10.978002548217773, "log_odds_ratio": -3.5411394492257386e-05, "logits/chosen": -0.5314689874649048, "logits/rejected": -0.5226581692695618, "logps/chosen": -0.00018502044258639216, "logps/rejected": -2.1848983764648438, "loss": 0.8016, "nll_loss": 0.20040425658226013, "rewards/accuracies": 1.0, "rewards/chosen": -1.850204535003286e-05, "rewards/margins": 0.21847134828567505, "rewards/rejected": -0.2184898406267166, "step": 7569 }, { "epoch": 5.235131396957123, "grad_norm": 4.604856491088867, "learning_rate": 2.6471492239127095e-05, "log_odds_chosen": 10.077240943908691, "log_odds_ratio": -0.0003931020328309387, "logits/chosen": -0.3274500370025635, "logits/rejected": -0.30237939953804016, "logps/chosen": -0.00028508197283372283, "logps/rejected": -1.58601713180542, "loss": 0.5674, "nll_loss": 0.14182157814502716, "rewards/accuracies": 1.0, "rewards/chosen": -2.8508195100585e-05, "rewards/margins": 0.1585732102394104, "rewards/rejected": -0.15860171616077423, "step": 7570 }, { "epoch": 5.23582295988935, "grad_norm": 9.677464485168457, "learning_rate": 2.6467650222836947e-05, "log_odds_chosen": 11.102912902832031, "log_odds_ratio": -5.850612433278002e-05, "logits/chosen": -0.3548862338066101, "logits/rejected": -0.38852009177207947, "logps/chosen": -0.00034190929727628827, "logps/rejected": -3.049135684967041, "loss": 0.7139, "nll_loss": 0.17846885323524475, "rewards/accuracies": 1.0, "rewards/chosen": -3.4190929000033066e-05, "rewards/margins": 0.304879367351532, "rewards/rejected": -0.30491358041763306, "step": 7571 }, { "epoch": 5.236514522821577, "grad_norm": 6.259531497955322, "learning_rate": 2.6463808206546796e-05, "log_odds_chosen": 10.08462142944336, "log_odds_ratio": -0.00012527560465969145, "logits/chosen": -0.8586251735687256, "logits/rejected": -0.914375901222229, "logps/chosen": -0.0004578919615596533, "logps/rejected": -1.9007015228271484, "loss": 0.6399, "nll_loss": 0.1599629819393158, "rewards/accuracies": 1.0, "rewards/chosen": -4.5789194700773805e-05, "rewards/margins": 0.19002437591552734, "rewards/rejected": -0.19007018208503723, "step": 7572 }, { "epoch": 5.237206085753804, "grad_norm": 9.60282039642334, "learning_rate": 2.6459966190256645e-05, "log_odds_chosen": 10.403770446777344, "log_odds_ratio": -0.00023783154028933495, "logits/chosen": -0.27836769819259644, "logits/rejected": -0.3159245252609253, "logps/chosen": -0.0008468222804367542, "logps/rejected": -2.479321002960205, "loss": 1.2405, "nll_loss": 0.3101038932800293, "rewards/accuracies": 1.0, "rewards/chosen": -8.468222949886695e-05, "rewards/margins": 0.24784742295742035, "rewards/rejected": -0.24793210625648499, "step": 7573 }, { "epoch": 5.2378976486860305, "grad_norm": 10.499727249145508, "learning_rate": 2.64561241739665e-05, "log_odds_chosen": 11.214341163635254, "log_odds_ratio": -5.0808059313567355e-05, "logits/chosen": -0.6644982099533081, "logits/rejected": -0.6907986998558044, "logps/chosen": -0.00016903187497518957, "logps/rejected": -2.415891408920288, "loss": 0.7948, "nll_loss": 0.19869248569011688, "rewards/accuracies": 1.0, "rewards/chosen": -1.6903188225114718e-05, "rewards/margins": 0.24157223105430603, "rewards/rejected": -0.24158914387226105, "step": 7574 }, { "epoch": 5.238589211618257, "grad_norm": 19.77359962463379, "learning_rate": 2.645228215767635e-05, "log_odds_chosen": 7.320486068725586, "log_odds_ratio": -0.5365301966667175, "logits/chosen": -0.5406480431556702, "logits/rejected": -0.6060658693313599, "logps/chosen": -0.06338723003864288, "logps/rejected": -1.2642097473144531, "loss": 0.8243, "nll_loss": 0.15242083370685577, "rewards/accuracies": 0.875, "rewards/chosen": -0.006338723469525576, "rewards/margins": 0.12008225172758102, "rewards/rejected": -0.1264209747314453, "step": 7575 }, { "epoch": 5.239280774550484, "grad_norm": 9.19914436340332, "learning_rate": 2.64484401413862e-05, "log_odds_chosen": 10.021259307861328, "log_odds_ratio": -0.0002777604095172137, "logits/chosen": -0.5509160757064819, "logits/rejected": -0.5600121021270752, "logps/chosen": -0.0009588706307113171, "logps/rejected": -2.2768490314483643, "loss": 2.0222, "nll_loss": 0.5055317878723145, "rewards/accuracies": 1.0, "rewards/chosen": -9.588706598151475e-05, "rewards/margins": 0.22758901119232178, "rewards/rejected": -0.22768491506576538, "step": 7576 }, { "epoch": 5.239972337482711, "grad_norm": 9.574323654174805, "learning_rate": 2.6444598125096055e-05, "log_odds_chosen": 9.933778762817383, "log_odds_ratio": -0.00023947448062244803, "logits/chosen": -0.6763154864311218, "logits/rejected": -0.6849706172943115, "logps/chosen": -0.00020450836746022105, "logps/rejected": -1.3118788003921509, "loss": 0.9615, "nll_loss": 0.24034947156906128, "rewards/accuracies": 1.0, "rewards/chosen": -2.0450837837415747e-05, "rewards/margins": 0.1311674416065216, "rewards/rejected": -0.13118787109851837, "step": 7577 }, { "epoch": 5.240663900414938, "grad_norm": 8.033780097961426, "learning_rate": 2.6440756108805904e-05, "log_odds_chosen": 9.859085083007812, "log_odds_ratio": -0.00016901653725653887, "logits/chosen": -0.3645017743110657, "logits/rejected": -0.5088723301887512, "logps/chosen": -0.00046245334669947624, "logps/rejected": -1.7504345178604126, "loss": 0.9783, "nll_loss": 0.24455362558364868, "rewards/accuracies": 1.0, "rewards/chosen": -4.624533903552219e-05, "rewards/margins": 0.1749972254037857, "rewards/rejected": -0.17504346370697021, "step": 7578 }, { "epoch": 5.241355463347165, "grad_norm": 11.642791748046875, "learning_rate": 2.6436914092515753e-05, "log_odds_chosen": 10.092130661010742, "log_odds_ratio": -7.946189725771546e-05, "logits/chosen": -0.39425206184387207, "logits/rejected": -0.43070298433303833, "logps/chosen": -0.0004464397206902504, "logps/rejected": -1.8897475004196167, "loss": 1.096, "nll_loss": 0.27399927377700806, "rewards/accuracies": 1.0, "rewards/chosen": -4.464397352421656e-05, "rewards/margins": 0.18893010914325714, "rewards/rejected": -0.1889747530221939, "step": 7579 }, { "epoch": 5.2420470262793915, "grad_norm": 12.233444213867188, "learning_rate": 2.6433072076225606e-05, "log_odds_chosen": 10.869236946105957, "log_odds_ratio": -4.346840432845056e-05, "logits/chosen": -0.49191814661026, "logits/rejected": -0.5358452796936035, "logps/chosen": -0.0001388008677167818, "logps/rejected": -1.8795957565307617, "loss": 0.7285, "nll_loss": 0.18211990594863892, "rewards/accuracies": 1.0, "rewards/chosen": -1.3880086044082418e-05, "rewards/margins": 0.18794569373130798, "rewards/rejected": -0.18795958161354065, "step": 7580 }, { "epoch": 5.242738589211618, "grad_norm": 8.853819847106934, "learning_rate": 2.6429230059935455e-05, "log_odds_chosen": 10.513277053833008, "log_odds_ratio": -0.0003534628194756806, "logits/chosen": -0.4861745536327362, "logits/rejected": -0.5915360450744629, "logps/chosen": -0.0008426437852904201, "logps/rejected": -2.690995693206787, "loss": 1.1245, "nll_loss": 0.2810984253883362, "rewards/accuracies": 1.0, "rewards/chosen": -8.4264378529042e-05, "rewards/margins": 0.2690153121948242, "rewards/rejected": -0.2690995931625366, "step": 7581 }, { "epoch": 5.243430152143845, "grad_norm": 6.510776996612549, "learning_rate": 2.6425388043645304e-05, "log_odds_chosen": 8.518279075622559, "log_odds_ratio": -0.000700904696714133, "logits/chosen": -0.6766083240509033, "logits/rejected": -0.7357896566390991, "logps/chosen": -0.0011913528433069587, "logps/rejected": -1.1983290910720825, "loss": 1.104, "nll_loss": 0.2759358286857605, "rewards/accuracies": 1.0, "rewards/chosen": -0.00011913527850992978, "rewards/margins": 0.11971378326416016, "rewards/rejected": -0.11983291804790497, "step": 7582 }, { "epoch": 5.244121715076072, "grad_norm": 7.311406135559082, "learning_rate": 2.642154602735516e-05, "log_odds_chosen": 10.2103271484375, "log_odds_ratio": -0.0016313835512846708, "logits/chosen": 0.12058839201927185, "logits/rejected": 0.015313982963562012, "logps/chosen": -0.005035826470702887, "logps/rejected": -2.050142526626587, "loss": 1.1695, "nll_loss": 0.2922120690345764, "rewards/accuracies": 1.0, "rewards/chosen": -0.0005035826470702887, "rewards/margins": 0.2045106738805771, "rewards/rejected": -0.20501424372196198, "step": 7583 }, { "epoch": 5.244813278008299, "grad_norm": 6.708953857421875, "learning_rate": 2.641770401106501e-05, "log_odds_chosen": 10.654993057250977, "log_odds_ratio": -6.913335528224707e-05, "logits/chosen": -0.2991570234298706, "logits/rejected": -0.3160810172557831, "logps/chosen": -0.0002667972003109753, "logps/rejected": -2.0909149646759033, "loss": 0.691, "nll_loss": 0.172745019197464, "rewards/accuracies": 1.0, "rewards/chosen": -2.667971966729965e-05, "rewards/margins": 0.2090648114681244, "rewards/rejected": -0.20909149944782257, "step": 7584 }, { "epoch": 5.245504840940526, "grad_norm": 5.9692559242248535, "learning_rate": 2.6413861994774858e-05, "log_odds_chosen": 10.247108459472656, "log_odds_ratio": -0.00015192184946499765, "logits/chosen": -0.4084897041320801, "logits/rejected": -0.46908169984817505, "logps/chosen": -0.0007824224303476512, "logps/rejected": -2.4277756214141846, "loss": 0.6644, "nll_loss": 0.1660842001438141, "rewards/accuracies": 1.0, "rewards/chosen": -7.824225031072274e-05, "rewards/margins": 0.24269933998584747, "rewards/rejected": -0.24277758598327637, "step": 7585 }, { "epoch": 5.246196403872752, "grad_norm": 9.228209495544434, "learning_rate": 2.6410019978484714e-05, "log_odds_chosen": 10.015410423278809, "log_odds_ratio": -0.002055589109659195, "logits/chosen": -0.5356808304786682, "logits/rejected": -0.6216482520103455, "logps/chosen": -0.007103268522769213, "logps/rejected": -1.934248685836792, "loss": 0.9288, "nll_loss": 0.2319985330104828, "rewards/accuracies": 1.0, "rewards/chosen": -0.0007103268289938569, "rewards/margins": 0.19271454215049744, "rewards/rejected": -0.19342486560344696, "step": 7586 }, { "epoch": 5.246887966804979, "grad_norm": 7.3460917472839355, "learning_rate": 2.6406177962194563e-05, "log_odds_chosen": 10.928125381469727, "log_odds_ratio": -0.0001309488870901987, "logits/chosen": -0.6458200216293335, "logits/rejected": -0.7437701225280762, "logps/chosen": -0.000363502447726205, "logps/rejected": -2.271428108215332, "loss": 0.6664, "nll_loss": 0.16659240424633026, "rewards/accuracies": 1.0, "rewards/chosen": -3.6350247683003545e-05, "rewards/margins": 0.2271064817905426, "rewards/rejected": -0.2271428257226944, "step": 7587 }, { "epoch": 5.247579529737206, "grad_norm": 7.9204511642456055, "learning_rate": 2.640233594590441e-05, "log_odds_chosen": 11.812535285949707, "log_odds_ratio": -1.5402521967189386e-05, "logits/chosen": -0.6082082390785217, "logits/rejected": -0.641418993473053, "logps/chosen": -0.00013046017556916922, "logps/rejected": -2.6230921745300293, "loss": 0.5233, "nll_loss": 0.13082760572433472, "rewards/accuracies": 1.0, "rewards/chosen": -1.3046018466411624e-05, "rewards/margins": 0.2622961401939392, "rewards/rejected": -0.262309193611145, "step": 7588 }, { "epoch": 5.248271092669433, "grad_norm": 9.620582580566406, "learning_rate": 2.6398493929614264e-05, "log_odds_chosen": 10.287154197692871, "log_odds_ratio": -5.5621800129301846e-05, "logits/chosen": -0.5223538279533386, "logits/rejected": -0.5681127309799194, "logps/chosen": -0.0006379556725732982, "logps/rejected": -2.0750598907470703, "loss": 0.7044, "nll_loss": 0.1761023998260498, "rewards/accuracies": 1.0, "rewards/chosen": -6.379557453328744e-05, "rewards/margins": 0.20744217932224274, "rewards/rejected": -0.2075059860944748, "step": 7589 }, { "epoch": 5.24896265560166, "grad_norm": 12.833014488220215, "learning_rate": 2.6394651913324113e-05, "log_odds_chosen": 10.22948169708252, "log_odds_ratio": -7.96320236986503e-05, "logits/chosen": -0.8496423959732056, "logits/rejected": -0.8584446907043457, "logps/chosen": -0.0002447866427246481, "logps/rejected": -2.0432004928588867, "loss": 0.6975, "nll_loss": 0.17437176406383514, "rewards/accuracies": 1.0, "rewards/chosen": -2.4478662453475408e-05, "rewards/margins": 0.2042955756187439, "rewards/rejected": -0.204320028424263, "step": 7590 }, { "epoch": 5.249654218533887, "grad_norm": 8.8342924118042, "learning_rate": 2.6390809897033962e-05, "log_odds_chosen": 9.66745662689209, "log_odds_ratio": -8.37321495055221e-05, "logits/chosen": -0.5921209454536438, "logits/rejected": -0.7187113761901855, "logps/chosen": -0.00023703300394117832, "logps/rejected": -1.3444901704788208, "loss": 0.7808, "nll_loss": 0.1951880007982254, "rewards/accuracies": 1.0, "rewards/chosen": -2.3703301849309355e-05, "rewards/margins": 0.1344253122806549, "rewards/rejected": -0.13444900512695312, "step": 7591 }, { "epoch": 5.250345781466113, "grad_norm": 8.780054092407227, "learning_rate": 2.6386967880743818e-05, "log_odds_chosen": 10.155247688293457, "log_odds_ratio": -0.00027468675398267806, "logits/chosen": -0.6420718431472778, "logits/rejected": -0.6700660586357117, "logps/chosen": -0.001097345957532525, "logps/rejected": -2.030292510986328, "loss": 0.5962, "nll_loss": 0.14901979267597198, "rewards/accuracies": 1.0, "rewards/chosen": -0.0001097345957532525, "rewards/margins": 0.20291951298713684, "rewards/rejected": -0.20302924513816833, "step": 7592 }, { "epoch": 5.25103734439834, "grad_norm": 8.005521774291992, "learning_rate": 2.6383125864453667e-05, "log_odds_chosen": 10.611947059631348, "log_odds_ratio": -0.00013667433813679963, "logits/chosen": -0.7551765441894531, "logits/rejected": -0.7871332168579102, "logps/chosen": -0.005232673604041338, "logps/rejected": -2.3519179821014404, "loss": 0.6701, "nll_loss": 0.16752111911773682, "rewards/accuracies": 1.0, "rewards/chosen": -0.0005232674302533269, "rewards/margins": 0.2346685528755188, "rewards/rejected": -0.23519182205200195, "step": 7593 }, { "epoch": 5.251728907330567, "grad_norm": 6.8082685470581055, "learning_rate": 2.6379283848163516e-05, "log_odds_chosen": 10.906329154968262, "log_odds_ratio": -3.830662171822041e-05, "logits/chosen": -1.0006284713745117, "logits/rejected": -0.8602535724639893, "logps/chosen": -0.000224283488932997, "logps/rejected": -2.3183956146240234, "loss": 0.8172, "nll_loss": 0.20428450405597687, "rewards/accuracies": 1.0, "rewards/chosen": -2.242835034849122e-05, "rewards/margins": 0.23181715607643127, "rewards/rejected": -0.23183956742286682, "step": 7594 }, { "epoch": 5.252420470262794, "grad_norm": 8.90805435180664, "learning_rate": 2.6375441831873372e-05, "log_odds_chosen": 10.537378311157227, "log_odds_ratio": -0.00014934049977455288, "logits/chosen": -0.6920749545097351, "logits/rejected": -0.8484786748886108, "logps/chosen": -0.0003269157896284014, "logps/rejected": -1.866767406463623, "loss": 0.9527, "nll_loss": 0.23815563321113586, "rewards/accuracies": 1.0, "rewards/chosen": -3.26915796904359e-05, "rewards/margins": 0.1866440623998642, "rewards/rejected": -0.1866767406463623, "step": 7595 }, { "epoch": 5.253112033195021, "grad_norm": 6.233585834503174, "learning_rate": 2.637159981558322e-05, "log_odds_chosen": 10.72461986541748, "log_odds_ratio": -2.351926923438441e-05, "logits/chosen": -0.2917868494987488, "logits/rejected": -0.38088148832321167, "logps/chosen": -0.00023079040693119168, "logps/rejected": -2.118159770965576, "loss": 0.8197, "nll_loss": 0.20491938292980194, "rewards/accuracies": 1.0, "rewards/chosen": -2.3079042875906453e-05, "rewards/margins": 0.21179290115833282, "rewards/rejected": -0.2118159681558609, "step": 7596 }, { "epoch": 5.253803596127248, "grad_norm": 9.819358825683594, "learning_rate": 2.636775779929307e-05, "log_odds_chosen": 9.54849624633789, "log_odds_ratio": -0.0014317891327664256, "logits/chosen": -0.7229546308517456, "logits/rejected": -0.8251336216926575, "logps/chosen": -0.008922244422137737, "logps/rejected": -1.8008694648742676, "loss": 1.1143, "nll_loss": 0.27843788266181946, "rewards/accuracies": 1.0, "rewards/chosen": -0.0008922244887799025, "rewards/margins": 0.17919471859931946, "rewards/rejected": -0.18008697032928467, "step": 7597 }, { "epoch": 5.254495159059474, "grad_norm": 11.097025871276855, "learning_rate": 2.6363915783002923e-05, "log_odds_chosen": 10.830201148986816, "log_odds_ratio": -6.067188223823905e-05, "logits/chosen": -0.41811248660087585, "logits/rejected": -0.5653706789016724, "logps/chosen": -0.00023369134578388184, "logps/rejected": -2.0545437335968018, "loss": 0.7246, "nll_loss": 0.18113973736763, "rewards/accuracies": 1.0, "rewards/chosen": -2.3369135305983946e-05, "rewards/margins": 0.2054310142993927, "rewards/rejected": -0.20545436441898346, "step": 7598 }, { "epoch": 5.255186721991701, "grad_norm": 17.628812789916992, "learning_rate": 2.636007376671277e-05, "log_odds_chosen": 10.451534271240234, "log_odds_ratio": -0.000129957843455486, "logits/chosen": -0.6208571791648865, "logits/rejected": -0.7254254221916199, "logps/chosen": -0.00016725575551390648, "logps/rejected": -1.9300652742385864, "loss": 0.8079, "nll_loss": 0.20194993913173676, "rewards/accuracies": 1.0, "rewards/chosen": -1.6725574823794886e-05, "rewards/margins": 0.1929897964000702, "rewards/rejected": -0.1930065155029297, "step": 7599 }, { "epoch": 5.255878284923928, "grad_norm": 7.975822448730469, "learning_rate": 2.635623175042262e-05, "log_odds_chosen": 10.203317642211914, "log_odds_ratio": -0.00014215914416126907, "logits/chosen": -0.25304776430130005, "logits/rejected": -0.3041943907737732, "logps/chosen": -0.000454009510576725, "logps/rejected": -1.9212093353271484, "loss": 0.7088, "nll_loss": 0.1771775186061859, "rewards/accuracies": 1.0, "rewards/chosen": -4.540094960248098e-05, "rewards/margins": 0.19207553565502167, "rewards/rejected": -0.19212093949317932, "step": 7600 }, { "epoch": 5.256569847856155, "grad_norm": 14.046150207519531, "learning_rate": 2.6352389734132477e-05, "log_odds_chosen": 10.884321212768555, "log_odds_ratio": -5.141493966220878e-05, "logits/chosen": -0.62176513671875, "logits/rejected": -0.5501907467842102, "logps/chosen": -0.00033163276384584606, "logps/rejected": -2.3565614223480225, "loss": 1.0363, "nll_loss": 0.25907301902770996, "rewards/accuracies": 1.0, "rewards/chosen": -3.3163276384584606e-05, "rewards/margins": 0.23562298715114594, "rewards/rejected": -0.23565614223480225, "step": 7601 }, { "epoch": 5.257261410788382, "grad_norm": 8.92452335357666, "learning_rate": 2.6348547717842326e-05, "log_odds_chosen": 11.022520065307617, "log_odds_ratio": -0.000125913429656066, "logits/chosen": -0.35074371099472046, "logits/rejected": -0.3500364422798157, "logps/chosen": -0.0002775251923594624, "logps/rejected": -2.63907527923584, "loss": 0.7324, "nll_loss": 0.18308815360069275, "rewards/accuracies": 1.0, "rewards/chosen": -2.7752517780754715e-05, "rewards/margins": 0.26387977600097656, "rewards/rejected": -0.2639075219631195, "step": 7602 }, { "epoch": 5.2579529737206085, "grad_norm": 9.57458782196045, "learning_rate": 2.6344705701552175e-05, "log_odds_chosen": 9.637293815612793, "log_odds_ratio": -0.0007488789851777256, "logits/chosen": -0.3129688799381256, "logits/rejected": -0.338850736618042, "logps/chosen": -0.0013760539004579186, "logps/rejected": -1.7696377038955688, "loss": 0.6114, "nll_loss": 0.15276853740215302, "rewards/accuracies": 1.0, "rewards/chosen": -0.00013760538422502577, "rewards/margins": 0.17682614922523499, "rewards/rejected": -0.17696376144886017, "step": 7603 }, { "epoch": 5.258644536652835, "grad_norm": 9.156579971313477, "learning_rate": 2.634086368526203e-05, "log_odds_chosen": 10.068866729736328, "log_odds_ratio": -0.00020604400197044015, "logits/chosen": -0.17679022252559662, "logits/rejected": -0.2397395372390747, "logps/chosen": -0.00029306631768122315, "logps/rejected": -1.9695006608963013, "loss": 0.571, "nll_loss": 0.14272595942020416, "rewards/accuracies": 1.0, "rewards/chosen": -2.9306633223313838e-05, "rewards/margins": 0.19692076742649078, "rewards/rejected": -0.1969500631093979, "step": 7604 }, { "epoch": 5.259336099585062, "grad_norm": 6.802250862121582, "learning_rate": 2.633702166897188e-05, "log_odds_chosen": 9.829263687133789, "log_odds_ratio": -0.0001831715926527977, "logits/chosen": -0.2670425474643707, "logits/rejected": -0.39818575978279114, "logps/chosen": -0.0004503652162384242, "logps/rejected": -1.573431134223938, "loss": 0.9043, "nll_loss": 0.22605912387371063, "rewards/accuracies": 1.0, "rewards/chosen": -4.503652235143818e-05, "rewards/margins": 0.15729807317256927, "rewards/rejected": -0.15734311938285828, "step": 7605 }, { "epoch": 5.260027662517289, "grad_norm": 7.160187721252441, "learning_rate": 2.633317965268173e-05, "log_odds_chosen": 10.128305435180664, "log_odds_ratio": -0.00012072191020706668, "logits/chosen": -0.4220190644264221, "logits/rejected": -0.54075688123703, "logps/chosen": -0.0007584138656966388, "logps/rejected": -1.8679054975509644, "loss": 0.8598, "nll_loss": 0.21494436264038086, "rewards/accuracies": 1.0, "rewards/chosen": -7.58413880248554e-05, "rewards/margins": 0.18671470880508423, "rewards/rejected": -0.1867905557155609, "step": 7606 }, { "epoch": 5.260719225449516, "grad_norm": 7.017876148223877, "learning_rate": 2.632933763639158e-05, "log_odds_chosen": 9.915639877319336, "log_odds_ratio": -0.0001403828791808337, "logits/chosen": -0.32453134655952454, "logits/rejected": -0.42478859424591064, "logps/chosen": -0.00033312488812953234, "logps/rejected": -1.84895658493042, "loss": 0.587, "nll_loss": 0.14674820005893707, "rewards/accuracies": 1.0, "rewards/chosen": -3.331249172333628e-05, "rewards/margins": 0.18486234545707703, "rewards/rejected": -0.18489566445350647, "step": 7607 }, { "epoch": 5.261410788381743, "grad_norm": 5.297868251800537, "learning_rate": 2.632549562010143e-05, "log_odds_chosen": 9.418892860412598, "log_odds_ratio": -0.00041185441659763455, "logits/chosen": -0.6980939507484436, "logits/rejected": -0.6271117925643921, "logps/chosen": -0.0027880629058927298, "logps/rejected": -2.140965223312378, "loss": 1.0844, "nll_loss": 0.27104663848876953, "rewards/accuracies": 1.0, "rewards/chosen": -0.00027880631387233734, "rewards/margins": 0.21381771564483643, "rewards/rejected": -0.21409651637077332, "step": 7608 }, { "epoch": 5.2621023513139695, "grad_norm": 8.745232582092285, "learning_rate": 2.632165360381128e-05, "log_odds_chosen": 10.277860641479492, "log_odds_ratio": -0.00029809505213052034, "logits/chosen": -0.7317168712615967, "logits/rejected": -0.8170226812362671, "logps/chosen": -0.0035285335034132004, "logps/rejected": -2.215935230255127, "loss": 0.7868, "nll_loss": 0.19666241109371185, "rewards/accuracies": 1.0, "rewards/chosen": -0.00035285335616208613, "rewards/margins": 0.22124065458774567, "rewards/rejected": -0.22159351408481598, "step": 7609 }, { "epoch": 5.262793914246196, "grad_norm": 7.425640106201172, "learning_rate": 2.6317811587521135e-05, "log_odds_chosen": 10.117514610290527, "log_odds_ratio": -5.87234681006521e-05, "logits/chosen": -0.2286212295293808, "logits/rejected": -0.2650706171989441, "logps/chosen": -0.000267807423369959, "logps/rejected": -1.8272347450256348, "loss": 1.0774, "nll_loss": 0.26933613419532776, "rewards/accuracies": 1.0, "rewards/chosen": -2.678074270079378e-05, "rewards/margins": 0.18269670009613037, "rewards/rejected": -0.18272347748279572, "step": 7610 }, { "epoch": 5.263485477178423, "grad_norm": 5.914295196533203, "learning_rate": 2.6313969571230984e-05, "log_odds_chosen": 9.658827781677246, "log_odds_ratio": -0.0001679986744420603, "logits/chosen": -0.4364164471626282, "logits/rejected": -0.510414719581604, "logps/chosen": -0.0007456679595634341, "logps/rejected": -1.650540828704834, "loss": 0.7927, "nll_loss": 0.1981654018163681, "rewards/accuracies": 1.0, "rewards/chosen": -7.456679304596037e-05, "rewards/margins": 0.16497951745986938, "rewards/rejected": -0.1650540977716446, "step": 7611 }, { "epoch": 5.26417704011065, "grad_norm": 12.017693519592285, "learning_rate": 2.6310127554940833e-05, "log_odds_chosen": 11.355411529541016, "log_odds_ratio": -2.2519958292832598e-05, "logits/chosen": -0.6643273830413818, "logits/rejected": -0.6930115222930908, "logps/chosen": -8.34651873447001e-05, "logps/rejected": -1.9710543155670166, "loss": 0.5848, "nll_loss": 0.14619022607803345, "rewards/accuracies": 1.0, "rewards/chosen": -8.346518370672129e-06, "rewards/margins": 0.19709710776805878, "rewards/rejected": -0.19710545241832733, "step": 7612 }, { "epoch": 5.264868603042877, "grad_norm": 9.172362327575684, "learning_rate": 2.630628553865069e-05, "log_odds_chosen": 9.445535659790039, "log_odds_ratio": -0.0003532869159244001, "logits/chosen": -0.44796112179756165, "logits/rejected": -0.5494440197944641, "logps/chosen": -0.000467819394543767, "logps/rejected": -1.4349019527435303, "loss": 0.8907, "nll_loss": 0.22263102233409882, "rewards/accuracies": 1.0, "rewards/chosen": -4.678194090956822e-05, "rewards/margins": 0.14344340562820435, "rewards/rejected": -0.14349019527435303, "step": 7613 }, { "epoch": 5.265560165975104, "grad_norm": 5.42615270614624, "learning_rate": 2.6302443522360538e-05, "log_odds_chosen": 10.002979278564453, "log_odds_ratio": -0.0002709925174713135, "logits/chosen": -0.5812947154045105, "logits/rejected": -0.4607744514942169, "logps/chosen": -0.0012366706505417824, "logps/rejected": -1.862508773803711, "loss": 0.875, "nll_loss": 0.21871274709701538, "rewards/accuracies": 1.0, "rewards/chosen": -0.00012366706505417824, "rewards/margins": 0.1861272156238556, "rewards/rejected": -0.18625088036060333, "step": 7614 }, { "epoch": 5.2662517289073305, "grad_norm": 13.714395523071289, "learning_rate": 2.6298601506070387e-05, "log_odds_chosen": 10.179404258728027, "log_odds_ratio": -6.211637810338289e-05, "logits/chosen": -0.911484956741333, "logits/rejected": -0.9424377679824829, "logps/chosen": -0.00022070165141485631, "logps/rejected": -1.6600805521011353, "loss": 0.7411, "nll_loss": 0.18527226150035858, "rewards/accuracies": 1.0, "rewards/chosen": -2.2070165869081393e-05, "rewards/margins": 0.16598597168922424, "rewards/rejected": -0.16600805521011353, "step": 7615 }, { "epoch": 5.266943291839557, "grad_norm": 8.581096649169922, "learning_rate": 2.6294759489780236e-05, "log_odds_chosen": 9.999594688415527, "log_odds_ratio": -0.00010349383228458464, "logits/chosen": -0.5014063119888306, "logits/rejected": -0.4965861141681671, "logps/chosen": -0.000183465686859563, "logps/rejected": -1.4133416414260864, "loss": 0.6778, "nll_loss": 0.16944223642349243, "rewards/accuracies": 1.0, "rewards/chosen": -1.8346567230764776e-05, "rewards/margins": 0.14131581783294678, "rewards/rejected": -0.1413341611623764, "step": 7616 }, { "epoch": 5.267634854771784, "grad_norm": 10.020563125610352, "learning_rate": 2.629091747349009e-05, "log_odds_chosen": 11.29733657836914, "log_odds_ratio": -2.104317354678642e-05, "logits/chosen": -0.7738044857978821, "logits/rejected": -0.8598242402076721, "logps/chosen": -0.00018091258243657649, "logps/rejected": -2.586355686187744, "loss": 1.2626, "nll_loss": 0.3156498074531555, "rewards/accuracies": 1.0, "rewards/chosen": -1.8091257516061887e-05, "rewards/margins": 0.25861749053001404, "rewards/rejected": -0.25863558053970337, "step": 7617 }, { "epoch": 5.268326417704011, "grad_norm": 15.1841459274292, "learning_rate": 2.6287075457199938e-05, "log_odds_chosen": 11.176735877990723, "log_odds_ratio": -4.5400127419270575e-05, "logits/chosen": -0.12970086932182312, "logits/rejected": -0.3043827414512634, "logps/chosen": -0.0007847197121009231, "logps/rejected": -3.0710134506225586, "loss": 1.185, "nll_loss": 0.29624485969543457, "rewards/accuracies": 1.0, "rewards/chosen": -7.847197412047535e-05, "rewards/margins": 0.3070228695869446, "rewards/rejected": -0.3071013391017914, "step": 7618 }, { "epoch": 5.269017980636238, "grad_norm": 9.741006851196289, "learning_rate": 2.6283233440909787e-05, "log_odds_chosen": 11.197010040283203, "log_odds_ratio": -7.193641067715362e-05, "logits/chosen": -0.18809190392494202, "logits/rejected": -0.3500503897666931, "logps/chosen": -0.0011983285658061504, "logps/rejected": -2.8563578128814697, "loss": 1.0726, "nll_loss": 0.26814720034599304, "rewards/accuracies": 1.0, "rewards/chosen": -0.00011983286822214723, "rewards/margins": 0.2855159640312195, "rewards/rejected": -0.2856357991695404, "step": 7619 }, { "epoch": 5.269709543568465, "grad_norm": 12.667189598083496, "learning_rate": 2.6279391424619642e-05, "log_odds_chosen": 11.548139572143555, "log_odds_ratio": -1.4151154573482927e-05, "logits/chosen": -0.6309748291969299, "logits/rejected": -0.8380830883979797, "logps/chosen": -0.0001395690196659416, "logps/rejected": -2.490206003189087, "loss": 1.1593, "nll_loss": 0.28981447219848633, "rewards/accuracies": 1.0, "rewards/chosen": -1.3956902876088861e-05, "rewards/margins": 0.24900665879249573, "rewards/rejected": -0.24902060627937317, "step": 7620 }, { "epoch": 5.2704011065006915, "grad_norm": 5.913382053375244, "learning_rate": 2.627554940832949e-05, "log_odds_chosen": 9.914276123046875, "log_odds_ratio": -0.0004736467672046274, "logits/chosen": -0.16976572573184967, "logits/rejected": -0.2518896758556366, "logps/chosen": -0.001111007179133594, "logps/rejected": -2.4549412727355957, "loss": 0.806, "nll_loss": 0.20144861936569214, "rewards/accuracies": 1.0, "rewards/chosen": -0.00011110070045106113, "rewards/margins": 0.24538302421569824, "rewards/rejected": -0.24549412727355957, "step": 7621 }, { "epoch": 5.271092669432918, "grad_norm": 10.705941200256348, "learning_rate": 2.627170739203934e-05, "log_odds_chosen": 10.679733276367188, "log_odds_ratio": -0.000957089476287365, "logits/chosen": -0.9041181206703186, "logits/rejected": -1.0080416202545166, "logps/chosen": -0.0002205181517638266, "logps/rejected": -2.086775779724121, "loss": 0.9697, "nll_loss": 0.24233216047286987, "rewards/accuracies": 1.0, "rewards/chosen": -2.2051815903978422e-05, "rewards/margins": 0.20865552127361298, "rewards/rejected": -0.20867758989334106, "step": 7622 }, { "epoch": 5.271784232365145, "grad_norm": 9.783222198486328, "learning_rate": 2.6267865375749196e-05, "log_odds_chosen": 10.436159133911133, "log_odds_ratio": -0.0005258338060230017, "logits/chosen": -0.5189238786697388, "logits/rejected": -0.537483811378479, "logps/chosen": -0.0305920522660017, "logps/rejected": -2.636291742324829, "loss": 1.1168, "nll_loss": 0.2791406512260437, "rewards/accuracies": 1.0, "rewards/chosen": -0.0030592053662985563, "rewards/margins": 0.2605699896812439, "rewards/rejected": -0.26362916827201843, "step": 7623 }, { "epoch": 5.272475795297372, "grad_norm": 10.855855941772461, "learning_rate": 2.6264023359459045e-05, "log_odds_chosen": 10.203224182128906, "log_odds_ratio": -0.0001932304003275931, "logits/chosen": -0.32453393936157227, "logits/rejected": -0.3398202061653137, "logps/chosen": -0.0005054904613643885, "logps/rejected": -2.162875175476074, "loss": 0.8921, "nll_loss": 0.22300416231155396, "rewards/accuracies": 1.0, "rewards/chosen": -5.0549046136438847e-05, "rewards/margins": 0.21623697876930237, "rewards/rejected": -0.2162875235080719, "step": 7624 }, { "epoch": 5.273167358229599, "grad_norm": 8.04382038116455, "learning_rate": 2.6260181343168894e-05, "log_odds_chosen": 9.691407203674316, "log_odds_ratio": -0.00022404029732570052, "logits/chosen": -0.36626651883125305, "logits/rejected": -0.39901232719421387, "logps/chosen": -0.00020038278307765722, "logps/rejected": -1.4379754066467285, "loss": 0.8116, "nll_loss": 0.20287036895751953, "rewards/accuracies": 1.0, "rewards/chosen": -2.0038278307765722e-05, "rewards/margins": 0.1437775194644928, "rewards/rejected": -0.14379754662513733, "step": 7625 }, { "epoch": 5.273858921161826, "grad_norm": 8.791499137878418, "learning_rate": 2.6256339326878747e-05, "log_odds_chosen": 10.086090087890625, "log_odds_ratio": -6.677798228338361e-05, "logits/chosen": -0.6222400665283203, "logits/rejected": -0.6852139234542847, "logps/chosen": -0.00019318165141157806, "logps/rejected": -1.5650521516799927, "loss": 1.1248, "nll_loss": 0.28118351101875305, "rewards/accuracies": 1.0, "rewards/chosen": -1.9318165868753567e-05, "rewards/margins": 0.1564859002828598, "rewards/rejected": -0.15650522708892822, "step": 7626 }, { "epoch": 5.2745504840940525, "grad_norm": 13.350688934326172, "learning_rate": 2.6252497310588596e-05, "log_odds_chosen": 10.088001251220703, "log_odds_ratio": -0.00010822910553542897, "logits/chosen": -0.3409346342086792, "logits/rejected": -0.35213860869407654, "logps/chosen": -0.0001726550399325788, "logps/rejected": -1.3348233699798584, "loss": 0.9251, "nll_loss": 0.2312626987695694, "rewards/accuracies": 1.0, "rewards/chosen": -1.726550362946e-05, "rewards/margins": 0.13346508145332336, "rewards/rejected": -0.13348235189914703, "step": 7627 }, { "epoch": 5.275242047026279, "grad_norm": 18.314821243286133, "learning_rate": 2.6248655294298445e-05, "log_odds_chosen": 11.440445899963379, "log_odds_ratio": -3.44728869094979e-05, "logits/chosen": -0.0827302485704422, "logits/rejected": -0.22736823558807373, "logps/chosen": -0.00017195713007822633, "logps/rejected": -2.4652082920074463, "loss": 1.3187, "nll_loss": 0.32966870069503784, "rewards/accuracies": 1.0, "rewards/chosen": -1.7195712644024752e-05, "rewards/margins": 0.24650365114212036, "rewards/rejected": -0.24652084708213806, "step": 7628 }, { "epoch": 5.275933609958506, "grad_norm": 8.562300682067871, "learning_rate": 2.62448132780083e-05, "log_odds_chosen": 12.402145385742188, "log_odds_ratio": -1.9039196558878757e-05, "logits/chosen": -0.12419766187667847, "logits/rejected": -0.24297639727592468, "logps/chosen": -9.086302452487871e-05, "logps/rejected": -3.0359091758728027, "loss": 0.928, "nll_loss": 0.23200340569019318, "rewards/accuracies": 1.0, "rewards/chosen": -9.086303180083632e-06, "rewards/margins": 0.3035818636417389, "rewards/rejected": -0.30359092354774475, "step": 7629 }, { "epoch": 5.276625172890733, "grad_norm": 12.37198257446289, "learning_rate": 2.624097126171815e-05, "log_odds_chosen": 10.71961784362793, "log_odds_ratio": -3.985036164522171e-05, "logits/chosen": -0.6938871145248413, "logits/rejected": -0.6587687134742737, "logps/chosen": -0.00035433750599622726, "logps/rejected": -2.0899598598480225, "loss": 0.841, "nll_loss": 0.21023572981357574, "rewards/accuracies": 1.0, "rewards/chosen": -3.543375351000577e-05, "rewards/margins": 0.20896054804325104, "rewards/rejected": -0.20899598300457, "step": 7630 }, { "epoch": 5.27731673582296, "grad_norm": 9.847604751586914, "learning_rate": 2.6237129245428e-05, "log_odds_chosen": 10.25374984741211, "log_odds_ratio": -0.00017980107804760337, "logits/chosen": -0.2884005904197693, "logits/rejected": -0.314953088760376, "logps/chosen": -0.00047901368816383183, "logps/rejected": -2.042052984237671, "loss": 0.8726, "nll_loss": 0.21812944114208221, "rewards/accuracies": 1.0, "rewards/chosen": -4.790136881638318e-05, "rewards/margins": 0.20415738224983215, "rewards/rejected": -0.20420530438423157, "step": 7631 }, { "epoch": 5.278008298755187, "grad_norm": 5.961386680603027, "learning_rate": 2.6233287229137855e-05, "log_odds_chosen": 10.006340026855469, "log_odds_ratio": -8.721081394469365e-05, "logits/chosen": -0.18717193603515625, "logits/rejected": -0.266289621591568, "logps/chosen": -0.00018941918096970767, "logps/rejected": -1.5128713846206665, "loss": 0.8028, "nll_loss": 0.2007022500038147, "rewards/accuracies": 1.0, "rewards/chosen": -1.8941918824566528e-05, "rewards/margins": 0.15126819908618927, "rewards/rejected": -0.15128713846206665, "step": 7632 }, { "epoch": 5.2786998616874135, "grad_norm": 8.070467948913574, "learning_rate": 2.6229445212847704e-05, "log_odds_chosen": 9.738497734069824, "log_odds_ratio": -0.00011974801600445062, "logits/chosen": -0.25868844985961914, "logits/rejected": -0.3139762282371521, "logps/chosen": -0.002189639024436474, "logps/rejected": -2.289876937866211, "loss": 1.0836, "nll_loss": 0.27089813351631165, "rewards/accuracies": 1.0, "rewards/chosen": -0.0002189638908021152, "rewards/margins": 0.2287687510251999, "rewards/rejected": -0.22898772358894348, "step": 7633 }, { "epoch": 5.27939142461964, "grad_norm": 4.927910327911377, "learning_rate": 2.6225603196557553e-05, "log_odds_chosen": 9.209070205688477, "log_odds_ratio": -0.0003817295946646482, "logits/chosen": -0.19250504672527313, "logits/rejected": -0.22361023724079132, "logps/chosen": -0.00035569886676967144, "logps/rejected": -1.3613628149032593, "loss": 0.8083, "nll_loss": 0.20204555988311768, "rewards/accuracies": 1.0, "rewards/chosen": -3.5569886676967144e-05, "rewards/margins": 0.13610070943832397, "rewards/rejected": -0.1361362785100937, "step": 7634 }, { "epoch": 5.280082987551867, "grad_norm": 6.918251991271973, "learning_rate": 2.6221761180267405e-05, "log_odds_chosen": 9.60433292388916, "log_odds_ratio": -0.0013130803126841784, "logits/chosen": -0.3779219388961792, "logits/rejected": -0.43389493227005005, "logps/chosen": -0.0008963979780673981, "logps/rejected": -1.7923884391784668, "loss": 0.7408, "nll_loss": 0.18507538735866547, "rewards/accuracies": 1.0, "rewards/chosen": -8.963979780673981e-05, "rewards/margins": 0.17914921045303345, "rewards/rejected": -0.17923887073993683, "step": 7635 }, { "epoch": 5.280774550484094, "grad_norm": 11.273725509643555, "learning_rate": 2.6217919163977254e-05, "log_odds_chosen": 10.878364562988281, "log_odds_ratio": -9.522255277261138e-05, "logits/chosen": -0.7540746927261353, "logits/rejected": -0.7857592105865479, "logps/chosen": -0.00027051271172240376, "logps/rejected": -1.8876264095306396, "loss": 0.7663, "nll_loss": 0.19157616794109344, "rewards/accuracies": 1.0, "rewards/chosen": -2.7051271899836138e-05, "rewards/margins": 0.18873558938503265, "rewards/rejected": -0.1887626349925995, "step": 7636 }, { "epoch": 5.281466113416321, "grad_norm": 8.270740509033203, "learning_rate": 2.6214077147687103e-05, "log_odds_chosen": 10.152772903442383, "log_odds_ratio": -0.0005649271188303828, "logits/chosen": -0.48451852798461914, "logits/rejected": -0.49426236748695374, "logps/chosen": -0.0005245262291282415, "logps/rejected": -2.122857093811035, "loss": 0.9035, "nll_loss": 0.22582755982875824, "rewards/accuracies": 1.0, "rewards/chosen": -5.245262946118601e-05, "rewards/margins": 0.2122332602739334, "rewards/rejected": -0.21228571236133575, "step": 7637 }, { "epoch": 5.282157676348548, "grad_norm": 10.418851852416992, "learning_rate": 2.621023513139696e-05, "log_odds_chosen": 10.892932891845703, "log_odds_ratio": -9.03589534573257e-05, "logits/chosen": -0.2918834686279297, "logits/rejected": -0.41665488481521606, "logps/chosen": -0.0004215552762616426, "logps/rejected": -2.230041027069092, "loss": 0.6557, "nll_loss": 0.16391390562057495, "rewards/accuracies": 1.0, "rewards/chosen": -4.21555305365473e-05, "rewards/margins": 0.22296196222305298, "rewards/rejected": -0.22300411760807037, "step": 7638 }, { "epoch": 5.282849239280774, "grad_norm": 10.69952392578125, "learning_rate": 2.620639311510681e-05, "log_odds_chosen": 10.622076034545898, "log_odds_ratio": -7.079998613335192e-05, "logits/chosen": -0.5640475153923035, "logits/rejected": -0.605728030204773, "logps/chosen": -0.0003474515688139945, "logps/rejected": -2.118351936340332, "loss": 1.0377, "nll_loss": 0.2594177722930908, "rewards/accuracies": 1.0, "rewards/chosen": -3.474515688139945e-05, "rewards/margins": 0.2118004560470581, "rewards/rejected": -0.21183519065380096, "step": 7639 }, { "epoch": 5.283540802213001, "grad_norm": 3.728268623352051, "learning_rate": 2.6202551098816657e-05, "log_odds_chosen": 10.98335075378418, "log_odds_ratio": -2.5416817152290605e-05, "logits/chosen": -0.40420278906822205, "logits/rejected": -0.43859580159187317, "logps/chosen": -0.0003488770453259349, "logps/rejected": -2.3899548053741455, "loss": 0.5625, "nll_loss": 0.14062948524951935, "rewards/accuracies": 1.0, "rewards/chosen": -3.488770380499773e-05, "rewards/margins": 0.23896059393882751, "rewards/rejected": -0.2389954924583435, "step": 7640 }, { "epoch": 5.284232365145228, "grad_norm": 8.159320831298828, "learning_rate": 2.6198709082526513e-05, "log_odds_chosen": 10.255733489990234, "log_odds_ratio": -0.00036923857987858355, "logits/chosen": 0.03266778588294983, "logits/rejected": 0.07152489572763443, "logps/chosen": -0.022674523293972015, "logps/rejected": -2.343322277069092, "loss": 1.1866, "nll_loss": 0.2966251075267792, "rewards/accuracies": 1.0, "rewards/chosen": -0.002267452422529459, "rewards/margins": 0.23206476867198944, "rewards/rejected": -0.23433223366737366, "step": 7641 }, { "epoch": 5.284923928077455, "grad_norm": 13.458017349243164, "learning_rate": 2.6194867066236362e-05, "log_odds_chosen": 9.971641540527344, "log_odds_ratio": -0.0003074409323744476, "logits/chosen": -0.7888079881668091, "logits/rejected": -0.8920333981513977, "logps/chosen": -0.0006763875717297196, "logps/rejected": -2.1958963871002197, "loss": 1.1482, "nll_loss": 0.2870239019393921, "rewards/accuracies": 1.0, "rewards/chosen": -6.763875717297196e-05, "rewards/margins": 0.21952198445796967, "rewards/rejected": -0.21958963572978973, "step": 7642 }, { "epoch": 5.285615491009682, "grad_norm": 8.60274887084961, "learning_rate": 2.619102504994621e-05, "log_odds_chosen": 10.84687614440918, "log_odds_ratio": -4.1135343053610995e-05, "logits/chosen": -0.19892117381095886, "logits/rejected": -0.26732897758483887, "logps/chosen": -0.0014369181590154767, "logps/rejected": -2.524735689163208, "loss": 1.2549, "nll_loss": 0.3137156665325165, "rewards/accuracies": 1.0, "rewards/chosen": -0.0001436918246326968, "rewards/margins": 0.25232988595962524, "rewards/rejected": -0.2524735927581787, "step": 7643 }, { "epoch": 5.286307053941909, "grad_norm": 6.520380973815918, "learning_rate": 2.6187183033656064e-05, "log_odds_chosen": 11.032618522644043, "log_odds_ratio": -5.6647313613211736e-05, "logits/chosen": -0.40544480085372925, "logits/rejected": -0.5566476583480835, "logps/chosen": -0.0011669609230011702, "logps/rejected": -2.725712299346924, "loss": 1.3036, "nll_loss": 0.3258954882621765, "rewards/accuracies": 1.0, "rewards/chosen": -0.00011669609375530854, "rewards/margins": 0.27245455980300903, "rewards/rejected": -0.27257123589515686, "step": 7644 }, { "epoch": 5.286998616874135, "grad_norm": 3.9610953330993652, "learning_rate": 2.6183341017365913e-05, "log_odds_chosen": 9.115592002868652, "log_odds_ratio": -0.0002037350641330704, "logits/chosen": -0.02345031499862671, "logits/rejected": -0.0046147629618644714, "logps/chosen": -0.00043547700624912977, "logps/rejected": -1.3381257057189941, "loss": 0.6619, "nll_loss": 0.16544975340366364, "rewards/accuracies": 1.0, "rewards/chosen": -4.354770135250874e-05, "rewards/margins": 0.13376902043819427, "rewards/rejected": -0.1338125616312027, "step": 7645 }, { "epoch": 5.287690179806362, "grad_norm": 7.417776107788086, "learning_rate": 2.6179499001075762e-05, "log_odds_chosen": 9.859108924865723, "log_odds_ratio": -0.00011395730689400807, "logits/chosen": -0.11562471091747284, "logits/rejected": -0.15001115202903748, "logps/chosen": -0.0002473094209562987, "logps/rejected": -1.3960421085357666, "loss": 0.6926, "nll_loss": 0.17313078045845032, "rewards/accuracies": 1.0, "rewards/chosen": -2.4730943550821394e-05, "rewards/margins": 0.13957948982715607, "rewards/rejected": -0.13960421085357666, "step": 7646 }, { "epoch": 5.288381742738589, "grad_norm": 7.190175533294678, "learning_rate": 2.6175656984785618e-05, "log_odds_chosen": 9.759132385253906, "log_odds_ratio": -0.0004232939681969583, "logits/chosen": -0.1860746145248413, "logits/rejected": -0.2614567279815674, "logps/chosen": -0.0029081744141876698, "logps/rejected": -2.041231632232666, "loss": 0.842, "nll_loss": 0.21046897768974304, "rewards/accuracies": 1.0, "rewards/chosen": -0.0002908174355980009, "rewards/margins": 0.20383237302303314, "rewards/rejected": -0.20412318408489227, "step": 7647 }, { "epoch": 5.289073305670816, "grad_norm": 6.9479498863220215, "learning_rate": 2.6171814968495467e-05, "log_odds_chosen": 9.591582298278809, "log_odds_ratio": -0.00018219949561171234, "logits/chosen": -0.2017437219619751, "logits/rejected": -0.20229317247867584, "logps/chosen": -0.0020538540557026863, "logps/rejected": -2.25608491897583, "loss": 0.6829, "nll_loss": 0.17071056365966797, "rewards/accuracies": 1.0, "rewards/chosen": -0.0002053853968391195, "rewards/margins": 0.2254031002521515, "rewards/rejected": -0.2256084829568863, "step": 7648 }, { "epoch": 5.289764868603043, "grad_norm": 12.096253395080566, "learning_rate": 2.6167972952205316e-05, "log_odds_chosen": 10.28390884399414, "log_odds_ratio": -0.0001337239664280787, "logits/chosen": 0.2107926309108734, "logits/rejected": 0.14508889615535736, "logps/chosen": -0.0010782841127365828, "logps/rejected": -2.0446832180023193, "loss": 0.799, "nll_loss": 0.19973695278167725, "rewards/accuracies": 1.0, "rewards/chosen": -0.00010782841127365828, "rewards/margins": 0.2043604850769043, "rewards/rejected": -0.20446830987930298, "step": 7649 }, { "epoch": 5.29045643153527, "grad_norm": 14.159560203552246, "learning_rate": 2.616413093591517e-05, "log_odds_chosen": 10.812664031982422, "log_odds_ratio": -2.9879665817134082e-05, "logits/chosen": -0.6208158731460571, "logits/rejected": -0.6005766987800598, "logps/chosen": -0.00011174961400683969, "logps/rejected": -1.6413708925247192, "loss": 0.926, "nll_loss": 0.23150420188903809, "rewards/accuracies": 1.0, "rewards/chosen": -1.1174963219673373e-05, "rewards/margins": 0.16412591934204102, "rewards/rejected": -0.1641370952129364, "step": 7650 }, { "epoch": 5.291147994467496, "grad_norm": 17.602048873901367, "learning_rate": 2.616028891962502e-05, "log_odds_chosen": 10.549786567687988, "log_odds_ratio": -8.301087655127048e-05, "logits/chosen": -0.6232462525367737, "logits/rejected": -0.6550066471099854, "logps/chosen": -0.000313351396471262, "logps/rejected": -2.012857675552368, "loss": 0.9327, "nll_loss": 0.23317351937294006, "rewards/accuracies": 1.0, "rewards/chosen": -3.133514110231772e-05, "rewards/margins": 0.20125442743301392, "rewards/rejected": -0.20128576457500458, "step": 7651 }, { "epoch": 5.291839557399723, "grad_norm": 5.173848628997803, "learning_rate": 2.615644690333487e-05, "log_odds_chosen": 10.600738525390625, "log_odds_ratio": -6.898707943037152e-05, "logits/chosen": -0.22983574867248535, "logits/rejected": -0.23931768536567688, "logps/chosen": -0.0003584368387237191, "logps/rejected": -2.2960362434387207, "loss": 0.5126, "nll_loss": 0.1281490921974182, "rewards/accuracies": 1.0, "rewards/chosen": -3.5843684599967673e-05, "rewards/margins": 0.22956779599189758, "rewards/rejected": -0.22960364818572998, "step": 7652 }, { "epoch": 5.29253112033195, "grad_norm": 11.524662017822266, "learning_rate": 2.6152604887044722e-05, "log_odds_chosen": 10.889970779418945, "log_odds_ratio": -5.783965752925724e-05, "logits/chosen": -0.4245738685131073, "logits/rejected": -0.6069134473800659, "logps/chosen": -0.0002389351575402543, "logps/rejected": -2.2320656776428223, "loss": 0.6865, "nll_loss": 0.1716289222240448, "rewards/accuracies": 1.0, "rewards/chosen": -2.389351539022755e-05, "rewards/margins": 0.22318267822265625, "rewards/rejected": -0.22320657968521118, "step": 7653 }, { "epoch": 5.293222683264177, "grad_norm": 11.817529678344727, "learning_rate": 2.614876287075457e-05, "log_odds_chosen": 10.150485038757324, "log_odds_ratio": -0.0003673167375382036, "logits/chosen": -0.4654601216316223, "logits/rejected": -0.4384010434150696, "logps/chosen": -0.00017272391414735466, "logps/rejected": -1.4797208309173584, "loss": 1.7016, "nll_loss": 0.4253644645214081, "rewards/accuracies": 1.0, "rewards/chosen": -1.7272392142331228e-05, "rewards/margins": 0.1479548066854477, "rewards/rejected": -0.14797207713127136, "step": 7654 }, { "epoch": 5.293914246196404, "grad_norm": 8.556180000305176, "learning_rate": 2.614492085446442e-05, "log_odds_chosen": 9.928142547607422, "log_odds_ratio": -0.0005739845219068229, "logits/chosen": 0.10154339671134949, "logits/rejected": -0.16080422699451447, "logps/chosen": -0.0033231317065656185, "logps/rejected": -2.4263432025909424, "loss": 1.3841, "nll_loss": 0.3459640145301819, "rewards/accuracies": 1.0, "rewards/chosen": -0.0003323132114019245, "rewards/margins": 0.24230200052261353, "rewards/rejected": -0.24263429641723633, "step": 7655 }, { "epoch": 5.2946058091286305, "grad_norm": 6.121419906616211, "learning_rate": 2.6141078838174276e-05, "log_odds_chosen": 10.052181243896484, "log_odds_ratio": -6.283888797042891e-05, "logits/chosen": -0.30488666892051697, "logits/rejected": -0.36098307371139526, "logps/chosen": -0.0031215278431773186, "logps/rejected": -2.2946300506591797, "loss": 0.5584, "nll_loss": 0.1395900547504425, "rewards/accuracies": 1.0, "rewards/chosen": -0.00031215278431773186, "rewards/margins": 0.22915084660053253, "rewards/rejected": -0.22946301102638245, "step": 7656 }, { "epoch": 5.295297372060857, "grad_norm": 5.950735569000244, "learning_rate": 2.6137236821884125e-05, "log_odds_chosen": 10.034515380859375, "log_odds_ratio": -0.00034096435410901904, "logits/chosen": -0.06915253400802612, "logits/rejected": -0.15333367884159088, "logps/chosen": -0.00016374155529774725, "logps/rejected": -1.260401964187622, "loss": 0.92, "nll_loss": 0.22996269166469574, "rewards/accuracies": 1.0, "rewards/chosen": -1.6374155165976845e-05, "rewards/margins": 0.1260238140821457, "rewards/rejected": -0.12604019045829773, "step": 7657 }, { "epoch": 5.295988934993084, "grad_norm": 6.171073913574219, "learning_rate": 2.6133394805593974e-05, "log_odds_chosen": 7.855501174926758, "log_odds_ratio": -0.046969350427389145, "logits/chosen": -0.45245859026908875, "logits/rejected": -0.44178035855293274, "logps/chosen": -0.012497956864535809, "logps/rejected": -1.1013507843017578, "loss": 0.9854, "nll_loss": 0.2416542023420334, "rewards/accuracies": 1.0, "rewards/chosen": -0.0012497956631705165, "rewards/margins": 0.10888528823852539, "rewards/rejected": -0.11013508588075638, "step": 7658 }, { "epoch": 5.296680497925311, "grad_norm": 6.538420677185059, "learning_rate": 2.612955278930383e-05, "log_odds_chosen": 9.587251663208008, "log_odds_ratio": -0.006791528780013323, "logits/chosen": -0.4349149763584137, "logits/rejected": -0.5419043302536011, "logps/chosen": -0.003675490617752075, "logps/rejected": -2.00207781791687, "loss": 1.0787, "nll_loss": 0.2690025269985199, "rewards/accuracies": 1.0, "rewards/chosen": -0.0003675490734167397, "rewards/margins": 0.1998402178287506, "rewards/rejected": -0.20020776987075806, "step": 7659 }, { "epoch": 5.297372060857538, "grad_norm": 6.733508110046387, "learning_rate": 2.612571077301368e-05, "log_odds_chosen": 9.530517578125, "log_odds_ratio": -0.00025011191610246897, "logits/chosen": -0.4764153063297272, "logits/rejected": -0.5095908641815186, "logps/chosen": -0.00014602337614633143, "logps/rejected": -1.0838871002197266, "loss": 0.7234, "nll_loss": 0.1808272898197174, "rewards/accuracies": 1.0, "rewards/chosen": -1.4602336705138441e-05, "rewards/margins": 0.10837408900260925, "rewards/rejected": -0.10838870704174042, "step": 7660 }, { "epoch": 5.298063623789765, "grad_norm": 8.761924743652344, "learning_rate": 2.6121868756723528e-05, "log_odds_chosen": 9.830541610717773, "log_odds_ratio": -0.0003514336422085762, "logits/chosen": -0.43194514513015747, "logits/rejected": -0.5670047402381897, "logps/chosen": -0.001400099485181272, "logps/rejected": -2.193282127380371, "loss": 0.9972, "nll_loss": 0.24927306175231934, "rewards/accuracies": 1.0, "rewards/chosen": -0.0001400099426973611, "rewards/margins": 0.21918819844722748, "rewards/rejected": -0.21932819485664368, "step": 7661 }, { "epoch": 5.2987551867219915, "grad_norm": 8.479170799255371, "learning_rate": 2.611802674043338e-05, "log_odds_chosen": 9.956790924072266, "log_odds_ratio": -0.0007199362153187394, "logits/chosen": -0.38241422176361084, "logits/rejected": -0.513526439666748, "logps/chosen": -0.004810015205293894, "logps/rejected": -2.448887825012207, "loss": 0.9497, "nll_loss": 0.23735857009887695, "rewards/accuracies": 1.0, "rewards/chosen": -0.00048100153799168766, "rewards/margins": 0.2444078028202057, "rewards/rejected": -0.2448887974023819, "step": 7662 }, { "epoch": 5.299446749654218, "grad_norm": 7.522983551025391, "learning_rate": 2.611418472414323e-05, "log_odds_chosen": 10.109183311462402, "log_odds_ratio": -0.00017479847883805633, "logits/chosen": -0.39235997200012207, "logits/rejected": -0.3930438458919525, "logps/chosen": -0.00037281305412761867, "logps/rejected": -1.6620938777923584, "loss": 0.5012, "nll_loss": 0.12528148293495178, "rewards/accuracies": 1.0, "rewards/chosen": -3.728130832314491e-05, "rewards/margins": 0.1661721169948578, "rewards/rejected": -0.1662093997001648, "step": 7663 }, { "epoch": 5.300138312586445, "grad_norm": 6.93427848815918, "learning_rate": 2.611034270785308e-05, "log_odds_chosen": 9.354527473449707, "log_odds_ratio": -0.0008370226132683456, "logits/chosen": -0.37404438853263855, "logits/rejected": -0.3976355195045471, "logps/chosen": -0.0009613793808966875, "logps/rejected": -1.5531013011932373, "loss": 0.9309, "nll_loss": 0.23264700174331665, "rewards/accuracies": 1.0, "rewards/chosen": -9.61379410000518e-05, "rewards/margins": 0.15521399676799774, "rewards/rejected": -0.15531013906002045, "step": 7664 }, { "epoch": 5.300829875518672, "grad_norm": 6.72873592376709, "learning_rate": 2.6106500691562935e-05, "log_odds_chosen": 11.079107284545898, "log_odds_ratio": -0.0003697759239003062, "logits/chosen": -0.592350959777832, "logits/rejected": -0.5782504081726074, "logps/chosen": -0.00016440243052784353, "logps/rejected": -2.4741718769073486, "loss": 0.6636, "nll_loss": 0.16586294770240784, "rewards/accuracies": 1.0, "rewards/chosen": -1.6440242688986473e-05, "rewards/margins": 0.24740076065063477, "rewards/rejected": -0.24741718173027039, "step": 7665 }, { "epoch": 5.301521438450899, "grad_norm": 7.152824878692627, "learning_rate": 2.6102658675272784e-05, "log_odds_chosen": 10.747650146484375, "log_odds_ratio": -3.955703868996352e-05, "logits/chosen": -0.44400927424430847, "logits/rejected": -0.5418944358825684, "logps/chosen": -0.0003771585179492831, "logps/rejected": -2.3978171348571777, "loss": 0.8891, "nll_loss": 0.22226016223430634, "rewards/accuracies": 1.0, "rewards/chosen": -3.771585033973679e-05, "rewards/margins": 0.23974400758743286, "rewards/rejected": -0.2397817224264145, "step": 7666 }, { "epoch": 5.302213001383126, "grad_norm": 8.8451566696167, "learning_rate": 2.6098816658982633e-05, "log_odds_chosen": 10.385505676269531, "log_odds_ratio": -0.00013336195843294263, "logits/chosen": -0.21721792221069336, "logits/rejected": -0.16161830723285675, "logps/chosen": -0.0003452481469139457, "logps/rejected": -2.1876940727233887, "loss": 0.8405, "nll_loss": 0.21011018753051758, "rewards/accuracies": 1.0, "rewards/chosen": -3.4524819056969136e-05, "rewards/margins": 0.21873487532138824, "rewards/rejected": -0.21876941621303558, "step": 7667 }, { "epoch": 5.3029045643153525, "grad_norm": 8.409830093383789, "learning_rate": 2.609497464269249e-05, "log_odds_chosen": 9.87546443939209, "log_odds_ratio": -0.00024321695673279464, "logits/chosen": -0.7340776324272156, "logits/rejected": -0.8499897718429565, "logps/chosen": -0.0005925848963670433, "logps/rejected": -1.650931477546692, "loss": 0.7627, "nll_loss": 0.19064688682556152, "rewards/accuracies": 1.0, "rewards/chosen": -5.925849109189585e-05, "rewards/margins": 0.16503389179706573, "rewards/rejected": -0.16509315371513367, "step": 7668 }, { "epoch": 5.303596127247579, "grad_norm": 12.800469398498535, "learning_rate": 2.6091132626402338e-05, "log_odds_chosen": 10.688926696777344, "log_odds_ratio": -3.9617676520720124e-05, "logits/chosen": -0.5846214890480042, "logits/rejected": -0.5911470055580139, "logps/chosen": -0.00027569776284508407, "logps/rejected": -2.2901434898376465, "loss": 0.8275, "nll_loss": 0.20687484741210938, "rewards/accuracies": 1.0, "rewards/chosen": -2.756977846729569e-05, "rewards/margins": 0.22898676991462708, "rewards/rejected": -0.2290143519639969, "step": 7669 }, { "epoch": 5.304287690179806, "grad_norm": 11.251484870910645, "learning_rate": 2.6087290610112187e-05, "log_odds_chosen": 9.43034553527832, "log_odds_ratio": -0.00011557983089005575, "logits/chosen": -0.6348074674606323, "logits/rejected": -0.62444669008255, "logps/chosen": -0.000787832832429558, "logps/rejected": -1.6137120723724365, "loss": 0.7808, "nll_loss": 0.19517762959003448, "rewards/accuracies": 1.0, "rewards/chosen": -7.878329051891342e-05, "rewards/margins": 0.1612924337387085, "rewards/rejected": -0.16137121617794037, "step": 7670 }, { "epoch": 5.304979253112033, "grad_norm": 7.849981307983398, "learning_rate": 2.608344859382204e-05, "log_odds_chosen": 8.77843952178955, "log_odds_ratio": -0.01902155764400959, "logits/chosen": -0.5401623845100403, "logits/rejected": -0.3866721987724304, "logps/chosen": -0.008708938956260681, "logps/rejected": -1.9028129577636719, "loss": 1.5558, "nll_loss": 0.38705021142959595, "rewards/accuracies": 1.0, "rewards/chosen": -0.0008708939421921968, "rewards/margins": 0.18941040337085724, "rewards/rejected": -0.19028130173683167, "step": 7671 }, { "epoch": 5.30567081604426, "grad_norm": 12.56816291809082, "learning_rate": 2.6079606577531888e-05, "log_odds_chosen": 9.128028869628906, "log_odds_ratio": -0.00227135862223804, "logits/chosen": -0.6871810555458069, "logits/rejected": -0.6655930280685425, "logps/chosen": -0.001776855206117034, "logps/rejected": -1.6447970867156982, "loss": 2.39, "nll_loss": 0.5972785949707031, "rewards/accuracies": 1.0, "rewards/chosen": -0.0001776855206117034, "rewards/margins": 0.1643020212650299, "rewards/rejected": -0.16447971761226654, "step": 7672 }, { "epoch": 5.306362378976487, "grad_norm": 3.994396448135376, "learning_rate": 2.607576456124174e-05, "log_odds_chosen": 10.866761207580566, "log_odds_ratio": -2.0611427316907793e-05, "logits/chosen": -0.6379934549331665, "logits/rejected": -0.6666987538337708, "logps/chosen": -0.00013821777247358114, "logps/rejected": -1.997601866722107, "loss": 0.3953, "nll_loss": 0.0988292247056961, "rewards/accuracies": 1.0, "rewards/chosen": -1.3821777429257054e-05, "rewards/margins": 0.19974635541439056, "rewards/rejected": -0.19976019859313965, "step": 7673 }, { "epoch": 5.3070539419087135, "grad_norm": 8.11078929901123, "learning_rate": 2.6071922544951593e-05, "log_odds_chosen": 9.82305908203125, "log_odds_ratio": -0.00041845531086437404, "logits/chosen": -0.6439027786254883, "logits/rejected": -0.6215129494667053, "logps/chosen": -0.000763900694437325, "logps/rejected": -1.9046980142593384, "loss": 0.9606, "nll_loss": 0.24010974168777466, "rewards/accuracies": 1.0, "rewards/chosen": -7.639007526449859e-05, "rewards/margins": 0.19039341807365417, "rewards/rejected": -0.19046981632709503, "step": 7674 }, { "epoch": 5.30774550484094, "grad_norm": 9.360323905944824, "learning_rate": 2.6068080528661442e-05, "log_odds_chosen": 10.177734375, "log_odds_ratio": -0.0011329721892252564, "logits/chosen": -0.3226807415485382, "logits/rejected": -0.4182065427303314, "logps/chosen": -0.006573481019586325, "logps/rejected": -1.8262217044830322, "loss": 0.6999, "nll_loss": 0.17485645413398743, "rewards/accuracies": 1.0, "rewards/chosen": -0.0006573480786755681, "rewards/margins": 0.18196482956409454, "rewards/rejected": -0.18262219429016113, "step": 7675 }, { "epoch": 5.308437067773167, "grad_norm": 7.601252555847168, "learning_rate": 2.606423851237129e-05, "log_odds_chosen": 8.919883728027344, "log_odds_ratio": -0.006886746268719435, "logits/chosen": -0.41399550437927246, "logits/rejected": -0.4040910303592682, "logps/chosen": -0.004403387662023306, "logps/rejected": -1.3676029443740845, "loss": 0.5948, "nll_loss": 0.14801070094108582, "rewards/accuracies": 1.0, "rewards/chosen": -0.0004403387720230967, "rewards/margins": 0.13631996512413025, "rewards/rejected": -0.13676030933856964, "step": 7676 }, { "epoch": 5.309128630705394, "grad_norm": 11.422032356262207, "learning_rate": 2.6060396496081147e-05, "log_odds_chosen": 10.98823070526123, "log_odds_ratio": -3.098735760431737e-05, "logits/chosen": -0.5396237373352051, "logits/rejected": -0.6037777066230774, "logps/chosen": -0.00012345438881311566, "logps/rejected": -1.9171665906906128, "loss": 0.7916, "nll_loss": 0.19789397716522217, "rewards/accuracies": 1.0, "rewards/chosen": -1.2345439245109446e-05, "rewards/margins": 0.19170431792736053, "rewards/rejected": -0.19171667098999023, "step": 7677 }, { "epoch": 5.309820193637621, "grad_norm": 7.421292304992676, "learning_rate": 2.6056554479790996e-05, "log_odds_chosen": 9.832866668701172, "log_odds_ratio": -0.0001473495940444991, "logits/chosen": -0.4560951590538025, "logits/rejected": -0.554593563079834, "logps/chosen": -0.00036930685746483505, "logps/rejected": -1.706931471824646, "loss": 0.8516, "nll_loss": 0.2128933072090149, "rewards/accuracies": 1.0, "rewards/chosen": -3.693068720167503e-05, "rewards/margins": 0.1706562340259552, "rewards/rejected": -0.17069315910339355, "step": 7678 }, { "epoch": 5.310511756569848, "grad_norm": 5.684186935424805, "learning_rate": 2.6052712463500845e-05, "log_odds_chosen": 10.158199310302734, "log_odds_ratio": -0.00011456047650426626, "logits/chosen": -0.30376410484313965, "logits/rejected": -0.34192806482315063, "logps/chosen": -0.0003021803859155625, "logps/rejected": -1.7264450788497925, "loss": 0.7734, "nll_loss": 0.19334980845451355, "rewards/accuracies": 1.0, "rewards/chosen": -3.021803786396049e-05, "rewards/margins": 0.17261429131031036, "rewards/rejected": -0.1726444959640503, "step": 7679 }, { "epoch": 5.3112033195020745, "grad_norm": 8.601361274719238, "learning_rate": 2.60488704472107e-05, "log_odds_chosen": 10.844970703125, "log_odds_ratio": -3.83031765522901e-05, "logits/chosen": -0.14833518862724304, "logits/rejected": -0.11479371786117554, "logps/chosen": -0.00024344338453374803, "logps/rejected": -2.3909902572631836, "loss": 0.9726, "nll_loss": 0.2431459277868271, "rewards/accuracies": 1.0, "rewards/chosen": -2.4344339180970564e-05, "rewards/margins": 0.23907466232776642, "rewards/rejected": -0.23909902572631836, "step": 7680 }, { "epoch": 5.311894882434301, "grad_norm": 5.738802433013916, "learning_rate": 2.604502843092055e-05, "log_odds_chosen": 10.435100555419922, "log_odds_ratio": -7.32573025743477e-05, "logits/chosen": -0.5592867136001587, "logits/rejected": -0.5970466136932373, "logps/chosen": -0.0003621687355916947, "logps/rejected": -2.1743826866149902, "loss": 0.691, "nll_loss": 0.17275381088256836, "rewards/accuracies": 1.0, "rewards/chosen": -3.6216875741956756e-05, "rewards/margins": 0.21740205585956573, "rewards/rejected": -0.21743828058242798, "step": 7681 }, { "epoch": 5.312586445366528, "grad_norm": 6.579929828643799, "learning_rate": 2.60411864146304e-05, "log_odds_chosen": 10.444208145141602, "log_odds_ratio": -0.0007665945449844003, "logits/chosen": -0.6170370578765869, "logits/rejected": -0.5970430970191956, "logps/chosen": -0.0007927232072688639, "logps/rejected": -2.231851100921631, "loss": 0.7417, "nll_loss": 0.18535269796848297, "rewards/accuracies": 1.0, "rewards/chosen": -7.927232218207791e-05, "rewards/margins": 0.22310584783554077, "rewards/rejected": -0.22318512201309204, "step": 7682 }, { "epoch": 5.313278008298755, "grad_norm": 8.819731712341309, "learning_rate": 2.603734439834025e-05, "log_odds_chosen": 10.286642074584961, "log_odds_ratio": -6.957343430258334e-05, "logits/chosen": -0.7522540092468262, "logits/rejected": -0.8117721080780029, "logps/chosen": -0.00026084392447955906, "logps/rejected": -1.9778194427490234, "loss": 0.575, "nll_loss": 0.14375334978103638, "rewards/accuracies": 1.0, "rewards/chosen": -2.6084395358338952e-05, "rewards/margins": 0.1977558732032776, "rewards/rejected": -0.19778196513652802, "step": 7683 }, { "epoch": 5.313969571230982, "grad_norm": 8.924551010131836, "learning_rate": 2.60335023820501e-05, "log_odds_chosen": 10.201484680175781, "log_odds_ratio": -0.0005286703235469759, "logits/chosen": -0.8851473927497864, "logits/rejected": -0.8970750570297241, "logps/chosen": -0.0006871483637951314, "logps/rejected": -2.151824474334717, "loss": 0.9841, "nll_loss": 0.2459702491760254, "rewards/accuracies": 1.0, "rewards/chosen": -6.871483492432162e-05, "rewards/margins": 0.21511372923851013, "rewards/rejected": -0.21518243849277496, "step": 7684 }, { "epoch": 5.314661134163209, "grad_norm": 9.595136642456055, "learning_rate": 2.602966036575995e-05, "log_odds_chosen": 8.468236923217773, "log_odds_ratio": -0.010804870165884495, "logits/chosen": -0.4198509752750397, "logits/rejected": -0.37303659319877625, "logps/chosen": -0.01878383569419384, "logps/rejected": -1.6183393001556396, "loss": 0.7602, "nll_loss": 0.18898017704486847, "rewards/accuracies": 1.0, "rewards/chosen": -0.0018783833365887403, "rewards/margins": 0.15995556116104126, "rewards/rejected": -0.16183394193649292, "step": 7685 }, { "epoch": 5.3153526970954355, "grad_norm": 11.497410774230957, "learning_rate": 2.6025818349469805e-05, "log_odds_chosen": 10.909662246704102, "log_odds_ratio": -2.5831925086094998e-05, "logits/chosen": -0.6138488054275513, "logits/rejected": -0.6145757436752319, "logps/chosen": -0.00014651940728072077, "logps/rejected": -1.8224164247512817, "loss": 0.9949, "nll_loss": 0.2487286627292633, "rewards/accuracies": 1.0, "rewards/chosen": -1.4651941455667838e-05, "rewards/margins": 0.1822269856929779, "rewards/rejected": -0.18224164843559265, "step": 7686 }, { "epoch": 5.316044260027662, "grad_norm": 5.951731204986572, "learning_rate": 2.6021976333179654e-05, "log_odds_chosen": 10.142402648925781, "log_odds_ratio": -0.00015327542496379465, "logits/chosen": -0.4024023711681366, "logits/rejected": -0.5670183300971985, "logps/chosen": -0.000891193572897464, "logps/rejected": -2.0417609214782715, "loss": 0.6139, "nll_loss": 0.15346777439117432, "rewards/accuracies": 1.0, "rewards/chosen": -8.91193631105125e-05, "rewards/margins": 0.20408698916435242, "rewards/rejected": -0.204176127910614, "step": 7687 }, { "epoch": 5.316735822959889, "grad_norm": 6.343304634094238, "learning_rate": 2.6018134316889504e-05, "log_odds_chosen": 10.47935676574707, "log_odds_ratio": -0.00012434548989403993, "logits/chosen": -0.5548829436302185, "logits/rejected": -0.5281838774681091, "logps/chosen": -0.0002495267253834754, "logps/rejected": -1.912071943283081, "loss": 0.5336, "nll_loss": 0.1333903819322586, "rewards/accuracies": 1.0, "rewards/chosen": -2.4952674721134827e-05, "rewards/margins": 0.19118225574493408, "rewards/rejected": -0.19120720028877258, "step": 7688 }, { "epoch": 5.317427385892116, "grad_norm": 7.945797443389893, "learning_rate": 2.601429230059936e-05, "log_odds_chosen": 10.331096649169922, "log_odds_ratio": -6.510312960017473e-05, "logits/chosen": -0.15395867824554443, "logits/rejected": -0.3068086802959442, "logps/chosen": -0.0003511592512950301, "logps/rejected": -2.1909170150756836, "loss": 0.6705, "nll_loss": 0.16760849952697754, "rewards/accuracies": 1.0, "rewards/chosen": -3.511592512950301e-05, "rewards/margins": 0.2190566062927246, "rewards/rejected": -0.2190917283296585, "step": 7689 }, { "epoch": 5.318118948824343, "grad_norm": 7.558305740356445, "learning_rate": 2.601045028430921e-05, "log_odds_chosen": 9.520155906677246, "log_odds_ratio": -0.00021912460215389729, "logits/chosen": -0.7176843285560608, "logits/rejected": -0.7478047609329224, "logps/chosen": -0.000420411117374897, "logps/rejected": -1.4949798583984375, "loss": 0.538, "nll_loss": 0.134473979473114, "rewards/accuracies": 1.0, "rewards/chosen": -4.204111246508546e-05, "rewards/margins": 0.1494559645652771, "rewards/rejected": -0.14949798583984375, "step": 7690 }, { "epoch": 5.31881051175657, "grad_norm": 8.577528953552246, "learning_rate": 2.6006608268019057e-05, "log_odds_chosen": 10.675228118896484, "log_odds_ratio": -0.00035642407601699233, "logits/chosen": -0.5709743499755859, "logits/rejected": -0.5321433544158936, "logps/chosen": -0.0003995354054495692, "logps/rejected": -2.767270088195801, "loss": 0.6596, "nll_loss": 0.16486123204231262, "rewards/accuracies": 1.0, "rewards/chosen": -3.995353836216964e-05, "rewards/margins": 0.27668705582618713, "rewards/rejected": -0.2767270505428314, "step": 7691 }, { "epoch": 5.319502074688796, "grad_norm": 6.864575386047363, "learning_rate": 2.600276625172891e-05, "log_odds_chosen": 9.34201431274414, "log_odds_ratio": -0.0005491009214892983, "logits/chosen": -0.36394619941711426, "logits/rejected": -0.4056907296180725, "logps/chosen": -0.001376256812363863, "logps/rejected": -1.977386713027954, "loss": 0.6747, "nll_loss": 0.1686277985572815, "rewards/accuracies": 1.0, "rewards/chosen": -0.00013762569869868457, "rewards/margins": 0.1976010501384735, "rewards/rejected": -0.1977386772632599, "step": 7692 }, { "epoch": 5.320193637621023, "grad_norm": 8.476661682128906, "learning_rate": 2.599892423543876e-05, "log_odds_chosen": 10.700014114379883, "log_odds_ratio": -7.971821469254792e-05, "logits/chosen": -0.47770819067955017, "logits/rejected": -0.5215426683425903, "logps/chosen": -0.005434725899249315, "logps/rejected": -2.4658029079437256, "loss": 1.0269, "nll_loss": 0.25670719146728516, "rewards/accuracies": 1.0, "rewards/chosen": -0.0005434725899249315, "rewards/margins": 0.2460368126630783, "rewards/rejected": -0.24658028781414032, "step": 7693 }, { "epoch": 5.32088520055325, "grad_norm": 6.076938629150391, "learning_rate": 2.5995082219148608e-05, "log_odds_chosen": 9.699786186218262, "log_odds_ratio": -0.0005650972598232329, "logits/chosen": -0.43687766790390015, "logits/rejected": -0.5232003331184387, "logps/chosen": -0.001041522016748786, "logps/rejected": -1.5237171649932861, "loss": 1.6113, "nll_loss": 0.40276864171028137, "rewards/accuracies": 1.0, "rewards/chosen": -0.00010415221186121926, "rewards/margins": 0.15226754546165466, "rewards/rejected": -0.15237170457839966, "step": 7694 }, { "epoch": 5.321576763485477, "grad_norm": 6.3537187576293945, "learning_rate": 2.5991240202858464e-05, "log_odds_chosen": 9.630485534667969, "log_odds_ratio": -0.00017862251843325794, "logits/chosen": -0.4959850609302521, "logits/rejected": -0.4872666895389557, "logps/chosen": -0.001289563486352563, "logps/rejected": -2.0892245769500732, "loss": 0.7614, "nll_loss": 0.19032001495361328, "rewards/accuracies": 1.0, "rewards/chosen": -0.00012895635154563934, "rewards/margins": 0.208793506026268, "rewards/rejected": -0.20892247557640076, "step": 7695 }, { "epoch": 5.322268326417704, "grad_norm": 6.292471885681152, "learning_rate": 2.5987398186568313e-05, "log_odds_chosen": 11.091445922851562, "log_odds_ratio": -3.2769057725090533e-05, "logits/chosen": -0.3431626558303833, "logits/rejected": -0.43052709102630615, "logps/chosen": -0.00016557855997234583, "logps/rejected": -2.2294929027557373, "loss": 0.5896, "nll_loss": 0.1474042385816574, "rewards/accuracies": 1.0, "rewards/chosen": -1.6557856724830344e-05, "rewards/margins": 0.22293275594711304, "rewards/rejected": -0.2229493111371994, "step": 7696 }, { "epoch": 5.322959889349931, "grad_norm": 6.6983723640441895, "learning_rate": 2.5983556170278162e-05, "log_odds_chosen": 10.316198348999023, "log_odds_ratio": -0.00027626199880614877, "logits/chosen": -0.4761522114276886, "logits/rejected": -0.43214714527130127, "logps/chosen": -0.000250997458351776, "logps/rejected": -1.6024224758148193, "loss": 0.6304, "nll_loss": 0.15757060050964355, "rewards/accuracies": 1.0, "rewards/chosen": -2.5099743652390316e-05, "rewards/margins": 0.16021715104579926, "rewards/rejected": -0.1602422595024109, "step": 7697 }, { "epoch": 5.323651452282157, "grad_norm": 7.137821674346924, "learning_rate": 2.5979714153988018e-05, "log_odds_chosen": 8.657133102416992, "log_odds_ratio": -0.005025691352784634, "logits/chosen": -0.5021120309829712, "logits/rejected": -0.5828627943992615, "logps/chosen": -0.002383360406383872, "logps/rejected": -1.3815230131149292, "loss": 1.4318, "nll_loss": 0.3574507534503937, "rewards/accuracies": 1.0, "rewards/chosen": -0.00023833605519030243, "rewards/margins": 0.13791395723819733, "rewards/rejected": -0.13815230131149292, "step": 7698 }, { "epoch": 5.324343015214384, "grad_norm": 10.63414192199707, "learning_rate": 2.5975872137697867e-05, "log_odds_chosen": 10.593761444091797, "log_odds_ratio": -0.00010258699330734089, "logits/chosen": -0.7671674489974976, "logits/rejected": -0.883109986782074, "logps/chosen": -0.00022933653963264078, "logps/rejected": -1.8197206258773804, "loss": 0.804, "nll_loss": 0.2009904384613037, "rewards/accuracies": 1.0, "rewards/chosen": -2.2933651052881032e-05, "rewards/margins": 0.18194912374019623, "rewards/rejected": -0.18197205662727356, "step": 7699 }, { "epoch": 5.325034578146611, "grad_norm": 6.8750152587890625, "learning_rate": 2.5972030121407716e-05, "log_odds_chosen": 11.195669174194336, "log_odds_ratio": -0.00011681480100378394, "logits/chosen": 0.256054162979126, "logits/rejected": 0.06311047077178955, "logps/chosen": -0.0006633030134253204, "logps/rejected": -2.8899636268615723, "loss": 0.7191, "nll_loss": 0.17976562678813934, "rewards/accuracies": 1.0, "rewards/chosen": -6.633029988734052e-05, "rewards/margins": 0.2889300286769867, "rewards/rejected": -0.2889963388442993, "step": 7700 }, { "epoch": 5.325726141078838, "grad_norm": 9.112584114074707, "learning_rate": 2.596818810511757e-05, "log_odds_chosen": 8.883886337280273, "log_odds_ratio": -0.0013188114389777184, "logits/chosen": -0.27402448654174805, "logits/rejected": -0.3972470760345459, "logps/chosen": -0.0013482251670211554, "logps/rejected": -1.3245115280151367, "loss": 1.1057, "nll_loss": 0.2763024568557739, "rewards/accuracies": 1.0, "rewards/chosen": -0.00013482251961249858, "rewards/margins": 0.13231633603572845, "rewards/rejected": -0.1324511468410492, "step": 7701 }, { "epoch": 5.326417704011065, "grad_norm": 6.682565212249756, "learning_rate": 2.5964346088827417e-05, "log_odds_chosen": 10.133316993713379, "log_odds_ratio": -0.00011569785419851542, "logits/chosen": -0.41242727637290955, "logits/rejected": -0.42250245809555054, "logps/chosen": -0.00038382125785574317, "logps/rejected": -2.057230234146118, "loss": 0.5205, "nll_loss": 0.13012166321277618, "rewards/accuracies": 1.0, "rewards/chosen": -3.8382124330382794e-05, "rewards/margins": 0.205684632062912, "rewards/rejected": -0.20572303235530853, "step": 7702 }, { "epoch": 5.327109266943292, "grad_norm": 7.458148002624512, "learning_rate": 2.5960504072537266e-05, "log_odds_chosen": 10.32491683959961, "log_odds_ratio": -6.329259485937655e-05, "logits/chosen": -0.15579473972320557, "logits/rejected": -0.2219676375389099, "logps/chosen": -0.0004390804679132998, "logps/rejected": -2.2576074600219727, "loss": 0.9177, "nll_loss": 0.22940751910209656, "rewards/accuracies": 1.0, "rewards/chosen": -4.390804679132998e-05, "rewards/margins": 0.22571685910224915, "rewards/rejected": -0.22576075792312622, "step": 7703 }, { "epoch": 5.327800829875518, "grad_norm": 6.013815402984619, "learning_rate": 2.5956662056247122e-05, "log_odds_chosen": 11.237785339355469, "log_odds_ratio": -2.3372362193185836e-05, "logits/chosen": -0.17733371257781982, "logits/rejected": -0.2612563967704773, "logps/chosen": -0.00018692499725148082, "logps/rejected": -2.361201763153076, "loss": 0.6259, "nll_loss": 0.1564616858959198, "rewards/accuracies": 1.0, "rewards/chosen": -1.869249899755232e-05, "rewards/margins": 0.23610147833824158, "rewards/rejected": -0.23612019419670105, "step": 7704 }, { "epoch": 5.328492392807745, "grad_norm": 9.530840873718262, "learning_rate": 2.595282003995697e-05, "log_odds_chosen": 10.308015823364258, "log_odds_ratio": -0.0008663894259370863, "logits/chosen": -0.428272008895874, "logits/rejected": -0.5346933603286743, "logps/chosen": -0.00248519005253911, "logps/rejected": -2.5558435916900635, "loss": 0.8536, "nll_loss": 0.21330133080482483, "rewards/accuracies": 1.0, "rewards/chosen": -0.000248519005253911, "rewards/margins": 0.25533586740493774, "rewards/rejected": -0.25558435916900635, "step": 7705 }, { "epoch": 5.329183955739972, "grad_norm": 11.138049125671387, "learning_rate": 2.594897802366682e-05, "log_odds_chosen": 9.924549102783203, "log_odds_ratio": -0.014975260011851788, "logits/chosen": -0.5903857946395874, "logits/rejected": -0.49607712030410767, "logps/chosen": -0.004796158522367477, "logps/rejected": -2.1003305912017822, "loss": 0.6667, "nll_loss": 0.1651792675256729, "rewards/accuracies": 1.0, "rewards/chosen": -0.0004796158755198121, "rewards/margins": 0.20955345034599304, "rewards/rejected": -0.21003307402133942, "step": 7706 }, { "epoch": 5.329875518672199, "grad_norm": 12.312597274780273, "learning_rate": 2.5945136007376676e-05, "log_odds_chosen": 10.369390487670898, "log_odds_ratio": -0.00017965630104299635, "logits/chosen": -0.5108599662780762, "logits/rejected": -0.6373491883277893, "logps/chosen": -0.00028219789965078235, "logps/rejected": -2.1200790405273438, "loss": 0.8651, "nll_loss": 0.21625776588916779, "rewards/accuracies": 1.0, "rewards/chosen": -2.8219790692673996e-05, "rewards/margins": 0.2119797021150589, "rewards/rejected": -0.21200791001319885, "step": 7707 }, { "epoch": 5.330567081604426, "grad_norm": 9.67041301727295, "learning_rate": 2.5941293991086525e-05, "log_odds_chosen": 10.308876037597656, "log_odds_ratio": -0.00018406190793029964, "logits/chosen": -0.6033331751823425, "logits/rejected": -0.6974876523017883, "logps/chosen": -0.00024687196128070354, "logps/rejected": -1.9381176233291626, "loss": 0.8257, "nll_loss": 0.20640414953231812, "rewards/accuracies": 1.0, "rewards/chosen": -2.4687196855666116e-05, "rewards/margins": 0.193787083029747, "rewards/rejected": -0.19381175935268402, "step": 7708 }, { "epoch": 5.3312586445366525, "grad_norm": 10.314027786254883, "learning_rate": 2.5937451974796374e-05, "log_odds_chosen": 10.448393821716309, "log_odds_ratio": -0.0001520294463261962, "logits/chosen": -0.3476250171661377, "logits/rejected": -0.4068318009376526, "logps/chosen": -0.00025387192727066576, "logps/rejected": -2.4243698120117188, "loss": 0.9056, "nll_loss": 0.22639591991901398, "rewards/accuracies": 1.0, "rewards/chosen": -2.5387191271875054e-05, "rewards/margins": 0.24241161346435547, "rewards/rejected": -0.24243700504302979, "step": 7709 }, { "epoch": 5.331950207468879, "grad_norm": 4.918290615081787, "learning_rate": 2.5933609958506227e-05, "log_odds_chosen": 10.350467681884766, "log_odds_ratio": -0.00013474108709488064, "logits/chosen": -0.4226555824279785, "logits/rejected": -0.4367438554763794, "logps/chosen": -0.0006977645098231733, "logps/rejected": -2.2893643379211426, "loss": 0.5785, "nll_loss": 0.14459995925426483, "rewards/accuracies": 1.0, "rewards/chosen": -6.977644807193428e-05, "rewards/margins": 0.22886666655540466, "rewards/rejected": -0.22893644869327545, "step": 7710 }, { "epoch": 5.332641770401106, "grad_norm": 5.445563793182373, "learning_rate": 2.5929767942216076e-05, "log_odds_chosen": 10.40871810913086, "log_odds_ratio": -0.00029386027017608285, "logits/chosen": -0.039409562945365906, "logits/rejected": -0.16003747284412384, "logps/chosen": -0.0002922165731433779, "logps/rejected": -2.41654634475708, "loss": 1.0912, "nll_loss": 0.27277228236198425, "rewards/accuracies": 1.0, "rewards/chosen": -2.922165731433779e-05, "rewards/margins": 0.24162541329860687, "rewards/rejected": -0.24165461957454681, "step": 7711 }, { "epoch": 5.333333333333333, "grad_norm": 5.648160457611084, "learning_rate": 2.5925925925925925e-05, "log_odds_chosen": 10.892804145812988, "log_odds_ratio": -5.025168866268359e-05, "logits/chosen": -0.28121262788772583, "logits/rejected": -0.25881797075271606, "logps/chosen": -0.00030297267949208617, "logps/rejected": -2.5124268531799316, "loss": 0.4989, "nll_loss": 0.12472567707300186, "rewards/accuracies": 1.0, "rewards/chosen": -3.029726940440014e-05, "rewards/margins": 0.25121238827705383, "rewards/rejected": -0.2512426972389221, "step": 7712 }, { "epoch": 5.33402489626556, "grad_norm": 4.784796714782715, "learning_rate": 2.592208390963578e-05, "log_odds_chosen": 8.724172592163086, "log_odds_ratio": -0.004657501820474863, "logits/chosen": -0.30892983078956604, "logits/rejected": -0.35402122139930725, "logps/chosen": -0.00851732399314642, "logps/rejected": -1.452544927597046, "loss": 1.1248, "nll_loss": 0.28072503209114075, "rewards/accuracies": 1.0, "rewards/chosen": -0.0008517323876731098, "rewards/margins": 0.14440277218818665, "rewards/rejected": -0.14525450766086578, "step": 7713 }, { "epoch": 5.334716459197787, "grad_norm": 6.670574188232422, "learning_rate": 2.591824189334563e-05, "log_odds_chosen": 9.750624656677246, "log_odds_ratio": -0.00017071192269213498, "logits/chosen": -0.28526899218559265, "logits/rejected": -0.36482855677604675, "logps/chosen": -0.00034535228041931987, "logps/rejected": -1.7057344913482666, "loss": 0.8604, "nll_loss": 0.21508340537548065, "rewards/accuracies": 1.0, "rewards/chosen": -3.4535227314336225e-05, "rewards/margins": 0.17053891718387604, "rewards/rejected": -0.17057345807552338, "step": 7714 }, { "epoch": 5.3354080221300135, "grad_norm": 18.297407150268555, "learning_rate": 2.591439987705548e-05, "log_odds_chosen": 10.761435508728027, "log_odds_ratio": -6.893004319863394e-05, "logits/chosen": -0.8565076589584351, "logits/rejected": -0.8568417429924011, "logps/chosen": -0.0004628521273843944, "logps/rejected": -2.4840097427368164, "loss": 0.785, "nll_loss": 0.19624441862106323, "rewards/accuracies": 1.0, "rewards/chosen": -4.6285214921226725e-05, "rewards/margins": 0.2483547031879425, "rewards/rejected": -0.2484009861946106, "step": 7715 }, { "epoch": 5.33609958506224, "grad_norm": 10.90261459350586, "learning_rate": 2.5910557860765335e-05, "log_odds_chosen": 12.301939964294434, "log_odds_ratio": -5.705363946617581e-06, "logits/chosen": -0.38555872440338135, "logits/rejected": -0.37881430983543396, "logps/chosen": -0.00010718798876041546, "logps/rejected": -3.048621654510498, "loss": 0.9846, "nll_loss": 0.24614211916923523, "rewards/accuracies": 1.0, "rewards/chosen": -1.0718798876041546e-05, "rewards/margins": 0.3048514723777771, "rewards/rejected": -0.30486220121383667, "step": 7716 }, { "epoch": 5.336791147994467, "grad_norm": 10.90868091583252, "learning_rate": 2.5906715844475184e-05, "log_odds_chosen": 9.353933334350586, "log_odds_ratio": -0.00036395888309925795, "logits/chosen": -0.5651636719703674, "logits/rejected": -0.6099424958229065, "logps/chosen": -0.0007730339420959353, "logps/rejected": -1.5617212057113647, "loss": 0.9819, "nll_loss": 0.24544121325016022, "rewards/accuracies": 1.0, "rewards/chosen": -7.730339711997658e-05, "rewards/margins": 0.15609481930732727, "rewards/rejected": -0.15617212653160095, "step": 7717 }, { "epoch": 5.337482710926694, "grad_norm": 5.436877250671387, "learning_rate": 2.5902873828185033e-05, "log_odds_chosen": 10.302694320678711, "log_odds_ratio": -0.00012022980081383139, "logits/chosen": -0.3647574484348297, "logits/rejected": -0.4723901152610779, "logps/chosen": -0.0003554256691131741, "logps/rejected": -1.8892847299575806, "loss": 0.7578, "nll_loss": 0.1894378662109375, "rewards/accuracies": 1.0, "rewards/chosen": -3.5542565456125885e-05, "rewards/margins": 0.1888929307460785, "rewards/rejected": -0.18892847001552582, "step": 7718 }, { "epoch": 5.338174273858921, "grad_norm": 11.403399467468262, "learning_rate": 2.5899031811894885e-05, "log_odds_chosen": 11.144704818725586, "log_odds_ratio": -2.1073818061267957e-05, "logits/chosen": -0.6162378191947937, "logits/rejected": -0.6216781139373779, "logps/chosen": -0.00015070113295223564, "logps/rejected": -2.228614330291748, "loss": 0.6812, "nll_loss": 0.17030274868011475, "rewards/accuracies": 1.0, "rewards/chosen": -1.5070114386617206e-05, "rewards/margins": 0.2228463739156723, "rewards/rejected": -0.2228614240884781, "step": 7719 }, { "epoch": 5.338865836791148, "grad_norm": 9.804717063903809, "learning_rate": 2.5895189795604734e-05, "log_odds_chosen": 11.019902229309082, "log_odds_ratio": -3.0428185709752142e-05, "logits/chosen": -0.879685640335083, "logits/rejected": -0.9076402187347412, "logps/chosen": -0.0002764484379440546, "logps/rejected": -1.8999154567718506, "loss": 0.6518, "nll_loss": 0.1629561185836792, "rewards/accuracies": 1.0, "rewards/chosen": -2.7644842703011818e-05, "rewards/margins": 0.18996389210224152, "rewards/rejected": -0.1899915337562561, "step": 7720 }, { "epoch": 5.3395573997233745, "grad_norm": 9.187143325805664, "learning_rate": 2.5891347779314583e-05, "log_odds_chosen": 10.921390533447266, "log_odds_ratio": -2.9327873562579043e-05, "logits/chosen": -0.517184853553772, "logits/rejected": -0.597442626953125, "logps/chosen": -0.00017777850735001266, "logps/rejected": -2.066096782684326, "loss": 0.7333, "nll_loss": 0.18332186341285706, "rewards/accuracies": 1.0, "rewards/chosen": -1.777785291778855e-05, "rewards/margins": 0.2065919041633606, "rewards/rejected": -0.20660969614982605, "step": 7721 }, { "epoch": 5.340248962655601, "grad_norm": 5.599289894104004, "learning_rate": 2.588750576302444e-05, "log_odds_chosen": 10.52072525024414, "log_odds_ratio": -3.783634747378528e-05, "logits/chosen": -0.5504370927810669, "logits/rejected": -0.5719676613807678, "logps/chosen": -0.00017955718794837594, "logps/rejected": -1.7687039375305176, "loss": 0.6403, "nll_loss": 0.16006891429424286, "rewards/accuracies": 1.0, "rewards/chosen": -1.7955719158635475e-05, "rewards/margins": 0.17685243487358093, "rewards/rejected": -0.17687039077281952, "step": 7722 }, { "epoch": 5.340940525587828, "grad_norm": 6.724042892456055, "learning_rate": 2.5883663746734288e-05, "log_odds_chosen": 10.5478515625, "log_odds_ratio": -6.430500070564449e-05, "logits/chosen": -0.7374582886695862, "logits/rejected": -0.7794753909111023, "logps/chosen": -0.0006023314199410379, "logps/rejected": -2.344709634780884, "loss": 1.372, "nll_loss": 0.342988520860672, "rewards/accuracies": 1.0, "rewards/chosen": -6.0233145632082596e-05, "rewards/margins": 0.23441073298454285, "rewards/rejected": -0.23447097837924957, "step": 7723 }, { "epoch": 5.341632088520055, "grad_norm": 8.0201416015625, "learning_rate": 2.5879821730444137e-05, "log_odds_chosen": 11.215263366699219, "log_odds_ratio": -1.7243946786038578e-05, "logits/chosen": -0.5369030237197876, "logits/rejected": -0.5722092390060425, "logps/chosen": -0.0001140009262599051, "logps/rejected": -2.092883586883545, "loss": 0.7565, "nll_loss": 0.18912063539028168, "rewards/accuracies": 1.0, "rewards/chosen": -1.1400094081182033e-05, "rewards/margins": 0.2092769593000412, "rewards/rejected": -0.2092883586883545, "step": 7724 }, { "epoch": 5.342323651452282, "grad_norm": 5.6271209716796875, "learning_rate": 2.5875979714153993e-05, "log_odds_chosen": 9.305371284484863, "log_odds_ratio": -0.00024319568183273077, "logits/chosen": -0.46023258566856384, "logits/rejected": -0.4424994885921478, "logps/chosen": -0.0005978612462058663, "logps/rejected": -1.4785716533660889, "loss": 0.6067, "nll_loss": 0.15165482461452484, "rewards/accuracies": 1.0, "rewards/chosen": -5.97861289861612e-05, "rewards/margins": 0.1477973759174347, "rewards/rejected": -0.1478571742773056, "step": 7725 }, { "epoch": 5.343015214384509, "grad_norm": 8.000357627868652, "learning_rate": 2.5872137697863842e-05, "log_odds_chosen": 10.568313598632812, "log_odds_ratio": -6.980830221436918e-05, "logits/chosen": -0.7129371762275696, "logits/rejected": -0.7479875087738037, "logps/chosen": -0.0003442858287598938, "logps/rejected": -2.159754991531372, "loss": 0.5499, "nll_loss": 0.13745707273483276, "rewards/accuracies": 1.0, "rewards/chosen": -3.442858360358514e-05, "rewards/margins": 0.21594107151031494, "rewards/rejected": -0.21597550809383392, "step": 7726 }, { "epoch": 5.3437067773167355, "grad_norm": 11.518319129943848, "learning_rate": 2.586829568157369e-05, "log_odds_chosen": 11.02501392364502, "log_odds_ratio": -3.7570040149148554e-05, "logits/chosen": -0.9000644683837891, "logits/rejected": -1.0022200345993042, "logps/chosen": -0.00020582509750965983, "logps/rejected": -2.083603620529175, "loss": 1.2189, "nll_loss": 0.30470970273017883, "rewards/accuracies": 1.0, "rewards/chosen": -2.058250902337022e-05, "rewards/margins": 0.20833978056907654, "rewards/rejected": -0.20836035907268524, "step": 7727 }, { "epoch": 5.344398340248962, "grad_norm": 7.7212982177734375, "learning_rate": 2.5864453665283544e-05, "log_odds_chosen": 10.258532524108887, "log_odds_ratio": -0.00015420767886098474, "logits/chosen": -0.5833723545074463, "logits/rejected": -0.5658838152885437, "logps/chosen": -0.0007139868102967739, "logps/rejected": -2.0901403427124023, "loss": 0.9233, "nll_loss": 0.23081792891025543, "rewards/accuracies": 1.0, "rewards/chosen": -7.139868102967739e-05, "rewards/margins": 0.20894262194633484, "rewards/rejected": -0.20901402831077576, "step": 7728 }, { "epoch": 5.345089903181189, "grad_norm": 7.630891799926758, "learning_rate": 2.5860611648993393e-05, "log_odds_chosen": 11.153766632080078, "log_odds_ratio": -3.3938224078156054e-05, "logits/chosen": -0.83609539270401, "logits/rejected": -0.9128667116165161, "logps/chosen": -0.00012093692203052342, "logps/rejected": -2.11665415763855, "loss": 0.9034, "nll_loss": 0.22585429251194, "rewards/accuracies": 1.0, "rewards/chosen": -1.2093692021153402e-05, "rewards/margins": 0.21165332198143005, "rewards/rejected": -0.21166542172431946, "step": 7729 }, { "epoch": 5.345781466113416, "grad_norm": 4.917819976806641, "learning_rate": 2.5856769632703242e-05, "log_odds_chosen": 11.009265899658203, "log_odds_ratio": -0.0003466054331511259, "logits/chosen": -0.9085547924041748, "logits/rejected": -0.9079838395118713, "logps/chosen": -0.0005740531487390399, "logps/rejected": -2.7738754749298096, "loss": 0.4157, "nll_loss": 0.10390262305736542, "rewards/accuracies": 1.0, "rewards/chosen": -5.740531923947856e-05, "rewards/margins": 0.2773301601409912, "rewards/rejected": -0.2773875594139099, "step": 7730 }, { "epoch": 5.346473029045643, "grad_norm": 7.660555839538574, "learning_rate": 2.5852927616413098e-05, "log_odds_chosen": 10.09063720703125, "log_odds_ratio": -0.00017584474699106067, "logits/chosen": -0.5640292763710022, "logits/rejected": -0.5959721207618713, "logps/chosen": -0.0001486010878579691, "logps/rejected": -1.3079724311828613, "loss": 0.5152, "nll_loss": 0.12878504395484924, "rewards/accuracies": 1.0, "rewards/chosen": -1.486010842199903e-05, "rewards/margins": 0.1307823657989502, "rewards/rejected": -0.13079723715782166, "step": 7731 }, { "epoch": 5.34716459197787, "grad_norm": 9.647659301757812, "learning_rate": 2.5849085600122947e-05, "log_odds_chosen": 10.084922790527344, "log_odds_ratio": -0.0005560660501942039, "logits/chosen": -0.3433188796043396, "logits/rejected": -0.3567750155925751, "logps/chosen": -0.016425875946879387, "logps/rejected": -3.150747537612915, "loss": 0.8037, "nll_loss": 0.20086315274238586, "rewards/accuracies": 1.0, "rewards/chosen": -0.0016425875946879387, "rewards/margins": 0.31343215703964233, "rewards/rejected": -0.31507474184036255, "step": 7732 }, { "epoch": 5.3478561549100965, "grad_norm": 8.467803955078125, "learning_rate": 2.5845243583832796e-05, "log_odds_chosen": 9.264129638671875, "log_odds_ratio": -0.0006215933244675398, "logits/chosen": -0.7157605290412903, "logits/rejected": -0.8103622794151306, "logps/chosen": -0.0010265086311846972, "logps/rejected": -1.4580941200256348, "loss": 0.7277, "nll_loss": 0.18185096979141235, "rewards/accuracies": 1.0, "rewards/chosen": -0.00010265086166327819, "rewards/margins": 0.14570675790309906, "rewards/rejected": -0.14580941200256348, "step": 7733 }, { "epoch": 5.348547717842323, "grad_norm": 5.992339611053467, "learning_rate": 2.584140156754265e-05, "log_odds_chosen": 9.315893173217773, "log_odds_ratio": -0.0001914792082970962, "logits/chosen": -0.5987723469734192, "logits/rejected": -0.6892966628074646, "logps/chosen": -0.0003230468137189746, "logps/rejected": -1.2014394998550415, "loss": 0.4648, "nll_loss": 0.11617596447467804, "rewards/accuracies": 1.0, "rewards/chosen": -3.23046806443017e-05, "rewards/margins": 0.12011165916919708, "rewards/rejected": -0.12014396488666534, "step": 7734 }, { "epoch": 5.34923928077455, "grad_norm": 6.232851982116699, "learning_rate": 2.58375595512525e-05, "log_odds_chosen": 10.405027389526367, "log_odds_ratio": -0.00011137073306599632, "logits/chosen": -0.4917852282524109, "logits/rejected": -0.48989009857177734, "logps/chosen": -0.0002072631032206118, "logps/rejected": -1.7466228008270264, "loss": 0.8998, "nll_loss": 0.224933922290802, "rewards/accuracies": 1.0, "rewards/chosen": -2.0726312868646346e-05, "rewards/margins": 0.17464156448841095, "rewards/rejected": -0.1746622771024704, "step": 7735 }, { "epoch": 5.349930843706777, "grad_norm": 15.150169372558594, "learning_rate": 2.583371753496235e-05, "log_odds_chosen": 9.73324203491211, "log_odds_ratio": -0.00026712685939855874, "logits/chosen": -0.8724220395088196, "logits/rejected": -0.7735554575920105, "logps/chosen": -0.00018509995425119996, "logps/rejected": -1.3555516004562378, "loss": 0.9373, "nll_loss": 0.23430290818214417, "rewards/accuracies": 1.0, "rewards/chosen": -1.8509996152715757e-05, "rewards/margins": 0.13553665578365326, "rewards/rejected": -0.1355551779270172, "step": 7736 }, { "epoch": 5.350622406639004, "grad_norm": 45.932159423828125, "learning_rate": 2.5829875518672202e-05, "log_odds_chosen": 9.237135887145996, "log_odds_ratio": -0.13657166063785553, "logits/chosen": -0.49428650736808777, "logits/rejected": -0.5516210794448853, "logps/chosen": -0.03275838866829872, "logps/rejected": -2.303534984588623, "loss": 1.0329, "nll_loss": 0.24456636607646942, "rewards/accuracies": 0.875, "rewards/chosen": -0.0032758391462266445, "rewards/margins": 0.2270776629447937, "rewards/rejected": -0.23035350441932678, "step": 7737 }, { "epoch": 5.351313969571231, "grad_norm": 10.989821434020996, "learning_rate": 2.582603350238205e-05, "log_odds_chosen": 10.879453659057617, "log_odds_ratio": -9.712464816402644e-05, "logits/chosen": -0.32066982984542847, "logits/rejected": -0.4816090166568756, "logps/chosen": -0.00041363947093486786, "logps/rejected": -2.4920613765716553, "loss": 0.6973, "nll_loss": 0.17431020736694336, "rewards/accuracies": 1.0, "rewards/chosen": -4.136395000386983e-05, "rewards/margins": 0.24916478991508484, "rewards/rejected": -0.24920612573623657, "step": 7738 }, { "epoch": 5.3520055325034575, "grad_norm": 7.630071640014648, "learning_rate": 2.58221914860919e-05, "log_odds_chosen": 10.645535469055176, "log_odds_ratio": -0.00012921505549456924, "logits/chosen": -0.3419490456581116, "logits/rejected": -0.4747019410133362, "logps/chosen": -0.0005621293094009161, "logps/rejected": -2.0010368824005127, "loss": 0.7174, "nll_loss": 0.17933741211891174, "rewards/accuracies": 1.0, "rewards/chosen": -5.621293530566618e-05, "rewards/margins": 0.20004747807979584, "rewards/rejected": -0.20010370016098022, "step": 7739 }, { "epoch": 5.352697095435684, "grad_norm": 11.037864685058594, "learning_rate": 2.5818349469801756e-05, "log_odds_chosen": 10.154804229736328, "log_odds_ratio": -8.114433148875833e-05, "logits/chosen": -0.5941439867019653, "logits/rejected": -0.7260634303092957, "logps/chosen": -0.00017833770834840834, "logps/rejected": -1.7538602352142334, "loss": 0.6191, "nll_loss": 0.1547648012638092, "rewards/accuracies": 1.0, "rewards/chosen": -1.7833772290032357e-05, "rewards/margins": 0.17536818981170654, "rewards/rejected": -0.17538602650165558, "step": 7740 }, { "epoch": 5.353388658367911, "grad_norm": 7.120156764984131, "learning_rate": 2.5814507453511605e-05, "log_odds_chosen": 10.758832931518555, "log_odds_ratio": -9.678566129878163e-05, "logits/chosen": -0.42007553577423096, "logits/rejected": -0.56074458360672, "logps/chosen": -0.00023292946571018547, "logps/rejected": -2.15881609916687, "loss": 0.7465, "nll_loss": 0.18660315871238708, "rewards/accuracies": 1.0, "rewards/chosen": -2.3292946934816428e-05, "rewards/margins": 0.2158583104610443, "rewards/rejected": -0.2158816009759903, "step": 7741 }, { "epoch": 5.354080221300138, "grad_norm": 5.136490345001221, "learning_rate": 2.5810665437221454e-05, "log_odds_chosen": 10.323856353759766, "log_odds_ratio": -9.061383025255054e-05, "logits/chosen": -0.5882817506790161, "logits/rejected": -0.6239702701568604, "logps/chosen": -0.001000424730591476, "logps/rejected": -2.1509175300598145, "loss": 0.7211, "nll_loss": 0.18025416135787964, "rewards/accuracies": 1.0, "rewards/chosen": -0.00010004247451433912, "rewards/margins": 0.2149917185306549, "rewards/rejected": -0.2150917649269104, "step": 7742 }, { "epoch": 5.354771784232365, "grad_norm": 11.205146789550781, "learning_rate": 2.580682342093131e-05, "log_odds_chosen": 9.922714233398438, "log_odds_ratio": -0.00022928789258003235, "logits/chosen": -0.7218578457832336, "logits/rejected": -0.817878007888794, "logps/chosen": -0.0009287429274991155, "logps/rejected": -2.1084470748901367, "loss": 1.1835, "nll_loss": 0.29585108160972595, "rewards/accuracies": 1.0, "rewards/chosen": -9.287429566029459e-05, "rewards/margins": 0.21075184643268585, "rewards/rejected": -0.2108447253704071, "step": 7743 }, { "epoch": 5.355463347164592, "grad_norm": 14.219990730285645, "learning_rate": 2.580298140464116e-05, "log_odds_chosen": 10.053268432617188, "log_odds_ratio": -0.0001390389952575788, "logits/chosen": -0.5563762187957764, "logits/rejected": -0.5396439433097839, "logps/chosen": -0.0015786489238962531, "logps/rejected": -2.2421164512634277, "loss": 0.9543, "nll_loss": 0.23855391144752502, "rewards/accuracies": 1.0, "rewards/chosen": -0.00015786488074809313, "rewards/margins": 0.2240537703037262, "rewards/rejected": -0.22421163320541382, "step": 7744 }, { "epoch": 5.356154910096818, "grad_norm": 7.2350358963012695, "learning_rate": 2.5799139388351008e-05, "log_odds_chosen": 9.452655792236328, "log_odds_ratio": -0.0013515216996893287, "logits/chosen": -0.33016490936279297, "logits/rejected": -0.45986154675483704, "logps/chosen": -0.004266166128218174, "logps/rejected": -1.9477782249450684, "loss": 0.6906, "nll_loss": 0.1725178062915802, "rewards/accuracies": 1.0, "rewards/chosen": -0.0004266166070010513, "rewards/margins": 0.1943511962890625, "rewards/rejected": -0.19477781653404236, "step": 7745 }, { "epoch": 5.356846473029045, "grad_norm": 6.503514289855957, "learning_rate": 2.579529737206086e-05, "log_odds_chosen": 9.286174774169922, "log_odds_ratio": -0.0001534043112769723, "logits/chosen": -0.6407766342163086, "logits/rejected": -0.6709181666374207, "logps/chosen": -0.00034943222999572754, "logps/rejected": -1.4314804077148438, "loss": 0.6638, "nll_loss": 0.16594256460666656, "rewards/accuracies": 1.0, "rewards/chosen": -3.4943222999572754e-05, "rewards/margins": 0.14311309158802032, "rewards/rejected": -0.1431480348110199, "step": 7746 }, { "epoch": 5.357538035961272, "grad_norm": 5.2143120765686035, "learning_rate": 2.579145535577071e-05, "log_odds_chosen": 10.401906967163086, "log_odds_ratio": -0.0001116981657105498, "logits/chosen": -0.5172398686408997, "logits/rejected": -0.5327882170677185, "logps/chosen": -0.00015428580809384584, "logps/rejected": -1.7058733701705933, "loss": 0.7849, "nll_loss": 0.19621866941452026, "rewards/accuracies": 1.0, "rewards/chosen": -1.5428580809384584e-05, "rewards/margins": 0.17057189345359802, "rewards/rejected": -0.17058733105659485, "step": 7747 }, { "epoch": 5.358229598893499, "grad_norm": 9.994341850280762, "learning_rate": 2.578761333948056e-05, "log_odds_chosen": 10.679950714111328, "log_odds_ratio": -4.546328273136169e-05, "logits/chosen": -0.8168638944625854, "logits/rejected": -0.8961727023124695, "logps/chosen": -0.00011033992632292211, "logps/rejected": -1.660325527191162, "loss": 0.6641, "nll_loss": 0.16603140532970428, "rewards/accuracies": 1.0, "rewards/chosen": -1.1033993359887972e-05, "rewards/margins": 0.16602152585983276, "rewards/rejected": -0.1660325527191162, "step": 7748 }, { "epoch": 5.358921161825726, "grad_norm": 5.2905168533325195, "learning_rate": 2.5783771323190414e-05, "log_odds_chosen": 10.401647567749023, "log_odds_ratio": -6.03256412432529e-05, "logits/chosen": -0.7885475158691406, "logits/rejected": -0.825690746307373, "logps/chosen": -0.0001715484686428681, "logps/rejected": -1.7799302339553833, "loss": 0.633, "nll_loss": 0.1582336723804474, "rewards/accuracies": 1.0, "rewards/chosen": -1.715484722808469e-05, "rewards/margins": 0.1779758632183075, "rewards/rejected": -0.1779930293560028, "step": 7749 }, { "epoch": 5.359612724757953, "grad_norm": 8.384246826171875, "learning_rate": 2.5779929306900264e-05, "log_odds_chosen": 9.969493865966797, "log_odds_ratio": -0.0010337287094444036, "logits/chosen": -0.5449033379554749, "logits/rejected": -0.5999699831008911, "logps/chosen": -0.003291376167908311, "logps/rejected": -1.331317663192749, "loss": 0.7406, "nll_loss": 0.1850452423095703, "rewards/accuracies": 1.0, "rewards/chosen": -0.0003291376051492989, "rewards/margins": 0.13280263543128967, "rewards/rejected": -0.13313177227973938, "step": 7750 }, { "epoch": 5.360304287690179, "grad_norm": 11.273735046386719, "learning_rate": 2.5776087290610113e-05, "log_odds_chosen": 9.195895195007324, "log_odds_ratio": -0.009233876131474972, "logits/chosen": -0.07361237704753876, "logits/rejected": -0.27805620431900024, "logps/chosen": -0.0033656263258308172, "logps/rejected": -1.2893497943878174, "loss": 1.0303, "nll_loss": 0.25664210319519043, "rewards/accuracies": 1.0, "rewards/chosen": -0.0003365626325830817, "rewards/margins": 0.1285984218120575, "rewards/rejected": -0.12893497943878174, "step": 7751 }, { "epoch": 5.360995850622406, "grad_norm": 5.421497344970703, "learning_rate": 2.577224527431996e-05, "log_odds_chosen": 10.941364288330078, "log_odds_ratio": -0.00016141105152200907, "logits/chosen": -0.5322350263595581, "logits/rejected": -0.5843270421028137, "logps/chosen": -0.00016066545504145324, "logps/rejected": -2.216162919998169, "loss": 0.6924, "nll_loss": 0.173092782497406, "rewards/accuracies": 1.0, "rewards/chosen": -1.6066545867943205e-05, "rewards/margins": 0.22160020470619202, "rewards/rejected": -0.22161628305912018, "step": 7752 }, { "epoch": 5.361687413554633, "grad_norm": 15.62678337097168, "learning_rate": 2.5768403258029817e-05, "log_odds_chosen": 10.776522636413574, "log_odds_ratio": -0.0001856583112385124, "logits/chosen": -0.1284799724817276, "logits/rejected": -0.24484391510486603, "logps/chosen": -0.00035133378696627915, "logps/rejected": -2.2453131675720215, "loss": 0.9448, "nll_loss": 0.23618479073047638, "rewards/accuracies": 1.0, "rewards/chosen": -3.513338015181944e-05, "rewards/margins": 0.22449618577957153, "rewards/rejected": -0.22453130781650543, "step": 7753 }, { "epoch": 5.36237897648686, "grad_norm": 7.678762912750244, "learning_rate": 2.5764561241739667e-05, "log_odds_chosen": 9.862929344177246, "log_odds_ratio": -7.821543113095686e-05, "logits/chosen": -0.69775390625, "logits/rejected": -0.7313964366912842, "logps/chosen": -0.0011458772933110595, "logps/rejected": -1.9824557304382324, "loss": 0.8888, "nll_loss": 0.22219723463058472, "rewards/accuracies": 1.0, "rewards/chosen": -0.00011458773224148899, "rewards/margins": 0.19813098013401031, "rewards/rejected": -0.198245570063591, "step": 7754 }, { "epoch": 5.363070539419087, "grad_norm": 5.978641986846924, "learning_rate": 2.5760719225449516e-05, "log_odds_chosen": 9.407076835632324, "log_odds_ratio": -0.001449758536182344, "logits/chosen": -0.5870097875595093, "logits/rejected": -0.5762468576431274, "logps/chosen": -0.0018696343759074807, "logps/rejected": -1.773676872253418, "loss": 0.8295, "nll_loss": 0.20723845064640045, "rewards/accuracies": 1.0, "rewards/chosen": -0.0001869634579634294, "rewards/margins": 0.17718073725700378, "rewards/rejected": -0.1773676872253418, "step": 7755 }, { "epoch": 5.363762102351314, "grad_norm": 10.33115291595459, "learning_rate": 2.5756877209159368e-05, "log_odds_chosen": 10.442926406860352, "log_odds_ratio": -0.00020072895858902484, "logits/chosen": -0.6961485147476196, "logits/rejected": -0.6396393775939941, "logps/chosen": -0.0009271060116589069, "logps/rejected": -2.2403993606567383, "loss": 0.8586, "nll_loss": 0.2146410495042801, "rewards/accuracies": 1.0, "rewards/chosen": -9.27106011658907e-05, "rewards/margins": 0.22394722700119019, "rewards/rejected": -0.2240399271249771, "step": 7756 }, { "epoch": 5.36445366528354, "grad_norm": 7.273455619812012, "learning_rate": 2.5753035192869217e-05, "log_odds_chosen": 9.373745918273926, "log_odds_ratio": -0.0005260632606223226, "logits/chosen": -0.5656231045722961, "logits/rejected": -0.5640733242034912, "logps/chosen": -0.0008249300881288946, "logps/rejected": -1.4367049932479858, "loss": 0.8845, "nll_loss": 0.22107894718647003, "rewards/accuracies": 1.0, "rewards/chosen": -8.24930175440386e-05, "rewards/margins": 0.14358802139759064, "rewards/rejected": -0.14367049932479858, "step": 7757 }, { "epoch": 5.365145228215767, "grad_norm": 6.712815761566162, "learning_rate": 2.5749193176579066e-05, "log_odds_chosen": 10.585709571838379, "log_odds_ratio": -8.377588528674096e-05, "logits/chosen": -0.11127348244190216, "logits/rejected": -0.2184278815984726, "logps/chosen": -0.0002512220526114106, "logps/rejected": -2.0447144508361816, "loss": 0.7724, "nll_loss": 0.19309033453464508, "rewards/accuracies": 1.0, "rewards/chosen": -2.51222045335453e-05, "rewards/margins": 0.20444634556770325, "rewards/rejected": -0.20447146892547607, "step": 7758 }, { "epoch": 5.365836791147994, "grad_norm": 7.707136154174805, "learning_rate": 2.5745351160288922e-05, "log_odds_chosen": 10.819369316101074, "log_odds_ratio": -4.1901796066667885e-05, "logits/chosen": -0.35066330432891846, "logits/rejected": -0.33344167470932007, "logps/chosen": -0.0001883797231130302, "logps/rejected": -1.9678874015808105, "loss": 0.6437, "nll_loss": 0.1609291285276413, "rewards/accuracies": 1.0, "rewards/chosen": -1.88379726751009e-05, "rewards/margins": 0.19676992297172546, "rewards/rejected": -0.1967887580394745, "step": 7759 }, { "epoch": 5.366528354080221, "grad_norm": 10.013188362121582, "learning_rate": 2.574150914399877e-05, "log_odds_chosen": 10.439899444580078, "log_odds_ratio": -9.335255163023248e-05, "logits/chosen": -0.6593368053436279, "logits/rejected": -0.7114875316619873, "logps/chosen": -0.00029753416310995817, "logps/rejected": -1.846423625946045, "loss": 1.0063, "nll_loss": 0.25155696272850037, "rewards/accuracies": 1.0, "rewards/chosen": -2.975341703859158e-05, "rewards/margins": 0.18461261689662933, "rewards/rejected": -0.18464237451553345, "step": 7760 }, { "epoch": 5.367219917012449, "grad_norm": 12.000226020812988, "learning_rate": 2.573766712770862e-05, "log_odds_chosen": 9.719730377197266, "log_odds_ratio": -0.0006029088981449604, "logits/chosen": -0.8230023980140686, "logits/rejected": -0.8593472242355347, "logps/chosen": -0.0007920662756077945, "logps/rejected": -1.9467694759368896, "loss": 0.8142, "nll_loss": 0.20348705351352692, "rewards/accuracies": 1.0, "rewards/chosen": -7.920662756077945e-05, "rewards/margins": 0.1945977509021759, "rewards/rejected": -0.1946769654750824, "step": 7761 }, { "epoch": 5.367911479944675, "grad_norm": 8.159753799438477, "learning_rate": 2.5733825111418476e-05, "log_odds_chosen": 9.705583572387695, "log_odds_ratio": -0.00017643548198975623, "logits/chosen": -0.550877571105957, "logits/rejected": -0.6108225584030151, "logps/chosen": -0.0006110378890298307, "logps/rejected": -1.8862289190292358, "loss": 0.6646, "nll_loss": 0.16613613069057465, "rewards/accuracies": 1.0, "rewards/chosen": -6.110379035817459e-05, "rewards/margins": 0.18856178224086761, "rewards/rejected": -0.18862289190292358, "step": 7762 }, { "epoch": 5.368603042876902, "grad_norm": 7.820406436920166, "learning_rate": 2.5729983095128325e-05, "log_odds_chosen": 9.865856170654297, "log_odds_ratio": -0.0018542808247730136, "logits/chosen": -0.38348299264907837, "logits/rejected": -0.452808141708374, "logps/chosen": -0.0012067710049450397, "logps/rejected": -2.1156463623046875, "loss": 0.8963, "nll_loss": 0.2238771766424179, "rewards/accuracies": 1.0, "rewards/chosen": -0.00012067711213603616, "rewards/margins": 0.2114439606666565, "rewards/rejected": -0.21156466007232666, "step": 7763 }, { "epoch": 5.369294605809129, "grad_norm": 14.397409439086914, "learning_rate": 2.5726141078838174e-05, "log_odds_chosen": 10.043509483337402, "log_odds_ratio": -0.00018490861111786216, "logits/chosen": -0.34730908274650574, "logits/rejected": -0.4017433524131775, "logps/chosen": -0.0008323953370563686, "logps/rejected": -2.294076919555664, "loss": 1.3105, "nll_loss": 0.3275982439517975, "rewards/accuracies": 1.0, "rewards/chosen": -8.323953079525381e-05, "rewards/margins": 0.22932444512844086, "rewards/rejected": -0.2294076830148697, "step": 7764 }, { "epoch": 5.369986168741356, "grad_norm": 6.62226676940918, "learning_rate": 2.5722299062548026e-05, "log_odds_chosen": 9.916425704956055, "log_odds_ratio": -0.0005616866401396692, "logits/chosen": -0.20786112546920776, "logits/rejected": -0.32927942276000977, "logps/chosen": -0.0004251671489328146, "logps/rejected": -1.9319912195205688, "loss": 1.6643, "nll_loss": 0.4160114526748657, "rewards/accuracies": 1.0, "rewards/chosen": -4.251671634847298e-05, "rewards/margins": 0.19315659999847412, "rewards/rejected": -0.19319912791252136, "step": 7765 }, { "epoch": 5.370677731673583, "grad_norm": 11.139594078063965, "learning_rate": 2.5718457046257876e-05, "log_odds_chosen": 10.382858276367188, "log_odds_ratio": -0.00011644057667581365, "logits/chosen": -0.2646900415420532, "logits/rejected": -0.3348369300365448, "logps/chosen": -0.0034151228610426188, "logps/rejected": -2.536651134490967, "loss": 1.7052, "nll_loss": 0.4263002276420593, "rewards/accuracies": 1.0, "rewards/chosen": -0.0003415122628211975, "rewards/margins": 0.25332361459732056, "rewards/rejected": -0.25366511940956116, "step": 7766 }, { "epoch": 5.37136929460581, "grad_norm": 7.636089324951172, "learning_rate": 2.5714615029967725e-05, "log_odds_chosen": 10.559492111206055, "log_odds_ratio": -0.00017273104458581656, "logits/chosen": -0.10843844711780548, "logits/rejected": -0.2018280327320099, "logps/chosen": -0.000510864017996937, "logps/rejected": -2.173729658126831, "loss": 0.6438, "nll_loss": 0.1609428972005844, "rewards/accuracies": 1.0, "rewards/chosen": -5.1086408348055556e-05, "rewards/margins": 0.21732190251350403, "rewards/rejected": -0.21737296879291534, "step": 7767 }, { "epoch": 5.372060857538036, "grad_norm": 8.192671775817871, "learning_rate": 2.571077301367758e-05, "log_odds_chosen": 10.112607955932617, "log_odds_ratio": -0.00016352730744984, "logits/chosen": -0.7211679220199585, "logits/rejected": -0.739108681678772, "logps/chosen": -0.0004298785061109811, "logps/rejected": -1.8741475343704224, "loss": 0.918, "nll_loss": 0.22947227954864502, "rewards/accuracies": 1.0, "rewards/chosen": -4.298784915590659e-05, "rewards/margins": 0.18737177550792694, "rewards/rejected": -0.18741475045681, "step": 7768 }, { "epoch": 5.372752420470263, "grad_norm": 9.19978141784668, "learning_rate": 2.570693099738743e-05, "log_odds_chosen": 9.75778865814209, "log_odds_ratio": -0.0023856342304497957, "logits/chosen": -0.2091895490884781, "logits/rejected": -0.2485579550266266, "logps/chosen": -0.0016922859940677881, "logps/rejected": -2.306070327758789, "loss": 0.7422, "nll_loss": 0.18530671298503876, "rewards/accuracies": 1.0, "rewards/chosen": -0.00016922858776524663, "rewards/margins": 0.23043778538703918, "rewards/rejected": -0.2306070327758789, "step": 7769 }, { "epoch": 5.37344398340249, "grad_norm": 10.220325469970703, "learning_rate": 2.570308898109728e-05, "log_odds_chosen": 10.165271759033203, "log_odds_ratio": -0.00017575306992512196, "logits/chosen": -0.8035585880279541, "logits/rejected": -0.88808673620224, "logps/chosen": -0.0012654714519158006, "logps/rejected": -2.365483283996582, "loss": 0.783, "nll_loss": 0.19573785364627838, "rewards/accuracies": 1.0, "rewards/chosen": -0.00012654715101234615, "rewards/margins": 0.23642179369926453, "rewards/rejected": -0.23654834926128387, "step": 7770 }, { "epoch": 5.374135546334717, "grad_norm": 5.651019096374512, "learning_rate": 2.5699246964807134e-05, "log_odds_chosen": 11.098930358886719, "log_odds_ratio": -2.716020208026748e-05, "logits/chosen": -0.5472127199172974, "logits/rejected": -0.569158673286438, "logps/chosen": -0.0001069469508365728, "logps/rejected": -1.6667897701263428, "loss": 0.9309, "nll_loss": 0.2327098548412323, "rewards/accuracies": 1.0, "rewards/chosen": -1.069469544745516e-05, "rewards/margins": 0.16666828095912933, "rewards/rejected": -0.16667896509170532, "step": 7771 }, { "epoch": 5.374827109266944, "grad_norm": 7.243309497833252, "learning_rate": 2.5695404948516983e-05, "log_odds_chosen": 10.71410083770752, "log_odds_ratio": -0.00019144202815368772, "logits/chosen": -0.5720975399017334, "logits/rejected": -0.6623126864433289, "logps/chosen": -0.0006748468731530011, "logps/rejected": -2.124738931655884, "loss": 0.6301, "nll_loss": 0.15749889612197876, "rewards/accuracies": 1.0, "rewards/chosen": -6.748468149453402e-05, "rewards/margins": 0.21240642666816711, "rewards/rejected": -0.21247389912605286, "step": 7772 }, { "epoch": 5.375518672199171, "grad_norm": 7.8964924812316895, "learning_rate": 2.5691562932226832e-05, "log_odds_chosen": 10.098054885864258, "log_odds_ratio": -0.00017532003403175622, "logits/chosen": -0.8081597089767456, "logits/rejected": -0.803846001625061, "logps/chosen": -0.0003364419681020081, "logps/rejected": -1.8087891340255737, "loss": 1.2479, "nll_loss": 0.3119489550590515, "rewards/accuracies": 1.0, "rewards/chosen": -3.364419535500929e-05, "rewards/margins": 0.1808452606201172, "rewards/rejected": -0.1808789074420929, "step": 7773 }, { "epoch": 5.376210235131397, "grad_norm": 7.9021782875061035, "learning_rate": 2.5687720915936685e-05, "log_odds_chosen": 10.178018569946289, "log_odds_ratio": -0.00031960944761522114, "logits/chosen": -0.47578150033950806, "logits/rejected": -0.4738832116127014, "logps/chosen": -0.00027857403620146215, "logps/rejected": -2.0880024433135986, "loss": 1.0168, "nll_loss": 0.2541689872741699, "rewards/accuracies": 1.0, "rewards/chosen": -2.7857404347741976e-05, "rewards/margins": 0.20877239108085632, "rewards/rejected": -0.20880025625228882, "step": 7774 }, { "epoch": 5.376901798063624, "grad_norm": 7.102752685546875, "learning_rate": 2.5683878899646534e-05, "log_odds_chosen": 11.256190299987793, "log_odds_ratio": -5.332418731995858e-05, "logits/chosen": -0.4997277855873108, "logits/rejected": -0.6343384981155396, "logps/chosen": -0.000341162143740803, "logps/rejected": -2.9551258087158203, "loss": 0.9257, "nll_loss": 0.23142942786216736, "rewards/accuracies": 1.0, "rewards/chosen": -3.411621582927182e-05, "rewards/margins": 0.295478492975235, "rewards/rejected": -0.2955126166343689, "step": 7775 }, { "epoch": 5.377593360995851, "grad_norm": 8.244155883789062, "learning_rate": 2.5680036883356383e-05, "log_odds_chosen": 10.558370590209961, "log_odds_ratio": -6.264346302486956e-05, "logits/chosen": -0.0697668194770813, "logits/rejected": -0.22308999300003052, "logps/chosen": -0.01101109478622675, "logps/rejected": -2.8141188621520996, "loss": 0.9669, "nll_loss": 0.2417076975107193, "rewards/accuracies": 1.0, "rewards/chosen": -0.001101109548471868, "rewards/margins": 0.28031080961227417, "rewards/rejected": -0.28141191601753235, "step": 7776 }, { "epoch": 5.378284923928078, "grad_norm": 7.367611408233643, "learning_rate": 2.567619486706624e-05, "log_odds_chosen": 9.765209197998047, "log_odds_ratio": -0.0051423623226583, "logits/chosen": -0.8637277483940125, "logits/rejected": -0.8014044761657715, "logps/chosen": -0.0039863623678684235, "logps/rejected": -2.177565336227417, "loss": 0.7636, "nll_loss": 0.19038967788219452, "rewards/accuracies": 1.0, "rewards/chosen": -0.0003986362717114389, "rewards/margins": 0.21735788881778717, "rewards/rejected": -0.21775653958320618, "step": 7777 }, { "epoch": 5.378976486860305, "grad_norm": 8.670293807983398, "learning_rate": 2.5672352850776088e-05, "log_odds_chosen": 10.498734474182129, "log_odds_ratio": -0.001384987379424274, "logits/chosen": -0.26026180386543274, "logits/rejected": -0.41038990020751953, "logps/chosen": -0.0014264382189139724, "logps/rejected": -2.3605895042419434, "loss": 0.9921, "nll_loss": 0.2478928565979004, "rewards/accuracies": 1.0, "rewards/chosen": -0.00014264382480178028, "rewards/margins": 0.23591631650924683, "rewards/rejected": -0.23605896532535553, "step": 7778 }, { "epoch": 5.3796680497925315, "grad_norm": 4.998514652252197, "learning_rate": 2.5668510834485937e-05, "log_odds_chosen": 10.203685760498047, "log_odds_ratio": -0.03531178459525108, "logits/chosen": -0.11789742857217789, "logits/rejected": -0.19914287328720093, "logps/chosen": -0.008793053217232227, "logps/rejected": -2.459259510040283, "loss": 0.6583, "nll_loss": 0.16104447841644287, "rewards/accuracies": 1.0, "rewards/chosen": -0.0008793053566478193, "rewards/margins": 0.24504666030406952, "rewards/rejected": -0.24592596292495728, "step": 7779 }, { "epoch": 5.380359612724758, "grad_norm": 9.771400451660156, "learning_rate": 2.5664668818195793e-05, "log_odds_chosen": 10.093647956848145, "log_odds_ratio": -0.0003640882787294686, "logits/chosen": -0.3619929552078247, "logits/rejected": -0.4933694899082184, "logps/chosen": -0.003856194205582142, "logps/rejected": -2.7831075191497803, "loss": 0.9292, "nll_loss": 0.2322622537612915, "rewards/accuracies": 1.0, "rewards/chosen": -0.000385619408916682, "rewards/margins": 0.27792513370513916, "rewards/rejected": -0.27831077575683594, "step": 7780 }, { "epoch": 5.381051175656985, "grad_norm": 5.72971773147583, "learning_rate": 2.5660826801905642e-05, "log_odds_chosen": 10.461682319641113, "log_odds_ratio": -0.0001288391649723053, "logits/chosen": -0.6851260662078857, "logits/rejected": -0.5563321709632874, "logps/chosen": -0.00017066244618035853, "logps/rejected": -1.617461919784546, "loss": 0.4388, "nll_loss": 0.10969004780054092, "rewards/accuracies": 1.0, "rewards/chosen": -1.7066246073227376e-05, "rewards/margins": 0.161729097366333, "rewards/rejected": -0.16174617409706116, "step": 7781 }, { "epoch": 5.381742738589212, "grad_norm": 6.927793025970459, "learning_rate": 2.565698478561549e-05, "log_odds_chosen": 10.049091339111328, "log_odds_ratio": -0.0001307661586906761, "logits/chosen": -0.46505558490753174, "logits/rejected": -0.5025767087936401, "logps/chosen": -0.0006050034426152706, "logps/rejected": -1.6383215188980103, "loss": 0.5501, "nll_loss": 0.13750982284545898, "rewards/accuracies": 1.0, "rewards/chosen": -6.050034789950587e-05, "rewards/margins": 0.1637716442346573, "rewards/rejected": -0.1638321429491043, "step": 7782 }, { "epoch": 5.382434301521439, "grad_norm": 7.09966516494751, "learning_rate": 2.5653142769325343e-05, "log_odds_chosen": 9.871310234069824, "log_odds_ratio": -0.00041894649621099234, "logits/chosen": -0.6862342357635498, "logits/rejected": -0.7124555706977844, "logps/chosen": -0.0006207457045093179, "logps/rejected": -2.264399766921997, "loss": 0.7967, "nll_loss": 0.19914031028747559, "rewards/accuracies": 1.0, "rewards/chosen": -6.207457045093179e-05, "rewards/margins": 0.22637790441513062, "rewards/rejected": -0.226439967751503, "step": 7783 }, { "epoch": 5.383125864453666, "grad_norm": 10.186842918395996, "learning_rate": 2.5649300753035192e-05, "log_odds_chosen": 11.019811630249023, "log_odds_ratio": -0.00015502631140407175, "logits/chosen": -0.20022788643836975, "logits/rejected": -0.2286909967660904, "logps/chosen": -0.000486434088088572, "logps/rejected": -2.827481508255005, "loss": 1.0997, "nll_loss": 0.2749202251434326, "rewards/accuracies": 1.0, "rewards/chosen": -4.864340735366568e-05, "rewards/margins": 0.2826995253562927, "rewards/rejected": -0.28274816274642944, "step": 7784 }, { "epoch": 5.3838174273858925, "grad_norm": 11.512709617614746, "learning_rate": 2.564545873674504e-05, "log_odds_chosen": 11.191658020019531, "log_odds_ratio": -2.234236671938561e-05, "logits/chosen": -0.6829967498779297, "logits/rejected": -0.7049492001533508, "logps/chosen": -0.00013569237489718944, "logps/rejected": -2.2471554279327393, "loss": 0.743, "nll_loss": 0.1857379525899887, "rewards/accuracies": 1.0, "rewards/chosen": -1.3569238944910467e-05, "rewards/margins": 0.22470197081565857, "rewards/rejected": -0.22471553087234497, "step": 7785 }, { "epoch": 5.384508990318119, "grad_norm": 8.908424377441406, "learning_rate": 2.5641616720454897e-05, "log_odds_chosen": 8.431398391723633, "log_odds_ratio": -0.018461300060153008, "logits/chosen": -0.46144843101501465, "logits/rejected": -0.542721688747406, "logps/chosen": -0.006432386115193367, "logps/rejected": -1.5983421802520752, "loss": 1.1868, "nll_loss": 0.2948574721813202, "rewards/accuracies": 1.0, "rewards/chosen": -0.0006432385998778045, "rewards/margins": 0.15919098258018494, "rewards/rejected": -0.15983420610427856, "step": 7786 }, { "epoch": 5.385200553250346, "grad_norm": 10.535396575927734, "learning_rate": 2.5637774704164746e-05, "log_odds_chosen": 11.600221633911133, "log_odds_ratio": -2.1177609596634284e-05, "logits/chosen": -0.837311863899231, "logits/rejected": -0.8921162486076355, "logps/chosen": -0.0001190628536278382, "logps/rejected": -2.426147937774658, "loss": 0.8309, "nll_loss": 0.2077161967754364, "rewards/accuracies": 1.0, "rewards/chosen": -1.1906286090379581e-05, "rewards/margins": 0.24260291457176208, "rewards/rejected": -0.24261482059955597, "step": 7787 }, { "epoch": 5.385892116182573, "grad_norm": 7.593047618865967, "learning_rate": 2.5633932687874595e-05, "log_odds_chosen": 9.907499313354492, "log_odds_ratio": -0.00018442222790326923, "logits/chosen": -0.50394207239151, "logits/rejected": -0.508385956287384, "logps/chosen": -0.00045533262891694903, "logps/rejected": -1.5362834930419922, "loss": 0.6708, "nll_loss": 0.1676906943321228, "rewards/accuracies": 1.0, "rewards/chosen": -4.55332628916949e-05, "rewards/margins": 0.15358281135559082, "rewards/rejected": -0.15362833440303802, "step": 7788 }, { "epoch": 5.3865836791148, "grad_norm": 6.909388542175293, "learning_rate": 2.563009067158445e-05, "log_odds_chosen": 10.73196029663086, "log_odds_ratio": -8.174381946446374e-05, "logits/chosen": 0.10891541838645935, "logits/rejected": 0.029914073646068573, "logps/chosen": -0.00043416203698143363, "logps/rejected": -2.2666399478912354, "loss": 0.6813, "nll_loss": 0.1703110784292221, "rewards/accuracies": 1.0, "rewards/chosen": -4.341620660852641e-05, "rewards/margins": 0.2266205996274948, "rewards/rejected": -0.2266639769077301, "step": 7789 }, { "epoch": 5.387275242047027, "grad_norm": 5.53429651260376, "learning_rate": 2.56262486552943e-05, "log_odds_chosen": 9.661088943481445, "log_odds_ratio": -0.0002739218180067837, "logits/chosen": 0.0766930878162384, "logits/rejected": 0.019603468477725983, "logps/chosen": -0.0010696876561269164, "logps/rejected": -2.4559895992279053, "loss": 0.8723, "nll_loss": 0.2180538773536682, "rewards/accuracies": 1.0, "rewards/chosen": -0.00010696877870941535, "rewards/margins": 0.24549199640750885, "rewards/rejected": -0.24559897184371948, "step": 7790 }, { "epoch": 5.3879668049792535, "grad_norm": 6.531269073486328, "learning_rate": 2.562240663900415e-05, "log_odds_chosen": 9.580755233764648, "log_odds_ratio": -0.00013173968181945384, "logits/chosen": -0.1554318070411682, "logits/rejected": -0.27889397740364075, "logps/chosen": -0.0011233543045818806, "logps/rejected": -1.9057151079177856, "loss": 0.742, "nll_loss": 0.1854807585477829, "rewards/accuracies": 1.0, "rewards/chosen": -0.00011233543045818806, "rewards/margins": 0.19045919179916382, "rewards/rejected": -0.19057151675224304, "step": 7791 }, { "epoch": 5.38865836791148, "grad_norm": 9.96691608428955, "learning_rate": 2.5618564622714002e-05, "log_odds_chosen": 10.92404842376709, "log_odds_ratio": -5.517835961654782e-05, "logits/chosen": -0.2640071511268616, "logits/rejected": -0.3610392212867737, "logps/chosen": -0.00020617686095647514, "logps/rejected": -2.3490943908691406, "loss": 0.7292, "nll_loss": 0.18229231238365173, "rewards/accuracies": 1.0, "rewards/chosen": -2.0617686459445395e-05, "rewards/margins": 0.23488885164260864, "rewards/rejected": -0.23490947484970093, "step": 7792 }, { "epoch": 5.389349930843707, "grad_norm": 10.167924880981445, "learning_rate": 2.561472260642385e-05, "log_odds_chosen": 10.396146774291992, "log_odds_ratio": -0.0002577454433776438, "logits/chosen": -0.1070237010717392, "logits/rejected": -0.10790125280618668, "logps/chosen": -0.0011379396310076118, "logps/rejected": -2.7146997451782227, "loss": 0.5973, "nll_loss": 0.14929774403572083, "rewards/accuracies": 1.0, "rewards/chosen": -0.00011379396892152727, "rewards/margins": 0.2713561952114105, "rewards/rejected": -0.27146998047828674, "step": 7793 }, { "epoch": 5.390041493775934, "grad_norm": 8.253470420837402, "learning_rate": 2.56108805901337e-05, "log_odds_chosen": 10.578184127807617, "log_odds_ratio": -6.63495811750181e-05, "logits/chosen": -0.11157053709030151, "logits/rejected": -0.1310674250125885, "logps/chosen": -0.000345208594808355, "logps/rejected": -1.878300428390503, "loss": 0.7129, "nll_loss": 0.17821325361728668, "rewards/accuracies": 1.0, "rewards/chosen": -3.452086093602702e-05, "rewards/margins": 0.18779553472995758, "rewards/rejected": -0.18783004581928253, "step": 7794 }, { "epoch": 5.390733056708161, "grad_norm": 7.623827934265137, "learning_rate": 2.5607038573843556e-05, "log_odds_chosen": 9.883403778076172, "log_odds_ratio": -0.00019677457748912275, "logits/chosen": -0.6082909107208252, "logits/rejected": -0.6662979125976562, "logps/chosen": -0.0005204399349167943, "logps/rejected": -2.115126848220825, "loss": 0.8763, "nll_loss": 0.2190508097410202, "rewards/accuracies": 1.0, "rewards/chosen": -5.204399349167943e-05, "rewards/margins": 0.2114606499671936, "rewards/rejected": -0.21151268482208252, "step": 7795 }, { "epoch": 5.391424619640388, "grad_norm": 12.549150466918945, "learning_rate": 2.5603196557553405e-05, "log_odds_chosen": 10.353691101074219, "log_odds_ratio": -6.125812797108665e-05, "logits/chosen": -0.1614176332950592, "logits/rejected": -0.20764128863811493, "logps/chosen": -0.00044496028567664325, "logps/rejected": -2.1650960445404053, "loss": 0.9377, "nll_loss": 0.23442500829696655, "rewards/accuracies": 1.0, "rewards/chosen": -4.449603147804737e-05, "rewards/margins": 0.21646510064601898, "rewards/rejected": -0.21650958061218262, "step": 7796 }, { "epoch": 5.3921161825726145, "grad_norm": 11.16163158416748, "learning_rate": 2.5599354541263254e-05, "log_odds_chosen": 10.391372680664062, "log_odds_ratio": -0.0002313799923285842, "logits/chosen": -0.6086483597755432, "logits/rejected": -0.7471935749053955, "logps/chosen": -0.0004083913518115878, "logps/rejected": -1.9547507762908936, "loss": 0.7049, "nll_loss": 0.1762014776468277, "rewards/accuracies": 1.0, "rewards/chosen": -4.083913518115878e-05, "rewards/margins": 0.19543424248695374, "rewards/rejected": -0.19547508656978607, "step": 7797 }, { "epoch": 5.392807745504841, "grad_norm": 4.64284610748291, "learning_rate": 2.559551252497311e-05, "log_odds_chosen": 10.95727825164795, "log_odds_ratio": -3.449685391387902e-05, "logits/chosen": -0.517857551574707, "logits/rejected": -0.5917432904243469, "logps/chosen": -0.00013936441973783076, "logps/rejected": -2.081711769104004, "loss": 0.6214, "nll_loss": 0.1553504914045334, "rewards/accuracies": 1.0, "rewards/chosen": -1.3936441973783076e-05, "rewards/margins": 0.2081572711467743, "rewards/rejected": -0.20817118883132935, "step": 7798 }, { "epoch": 5.393499308437068, "grad_norm": 6.72576379776001, "learning_rate": 2.559167050868296e-05, "log_odds_chosen": 11.150545120239258, "log_odds_ratio": -3.067057696171105e-05, "logits/chosen": -0.11584608256816864, "logits/rejected": -0.23737332224845886, "logps/chosen": -0.003040261333808303, "logps/rejected": -2.970217704772949, "loss": 0.8037, "nll_loss": 0.2009156048297882, "rewards/accuracies": 1.0, "rewards/chosen": -0.00030402615084312856, "rewards/margins": 0.2967177629470825, "rewards/rejected": -0.2970217764377594, "step": 7799 }, { "epoch": 5.394190871369295, "grad_norm": 6.495701789855957, "learning_rate": 2.5587828492392808e-05, "log_odds_chosen": 10.355062484741211, "log_odds_ratio": -8.241965406341478e-05, "logits/chosen": -0.7206138372421265, "logits/rejected": -0.7242903709411621, "logps/chosen": -0.0006676479242742062, "logps/rejected": -2.488015651702881, "loss": 0.6441, "nll_loss": 0.16100604832172394, "rewards/accuracies": 1.0, "rewards/chosen": -6.67647909722291e-05, "rewards/margins": 0.24873481690883636, "rewards/rejected": -0.2488015741109848, "step": 7800 }, { "epoch": 5.394882434301522, "grad_norm": 7.106627941131592, "learning_rate": 2.558398647610266e-05, "log_odds_chosen": 9.96081829071045, "log_odds_ratio": -0.000366711406968534, "logits/chosen": -0.4516948461532593, "logits/rejected": -0.522978663444519, "logps/chosen": -0.00035521230893209577, "logps/rejected": -1.8816453218460083, "loss": 0.5956, "nll_loss": 0.14885476231575012, "rewards/accuracies": 1.0, "rewards/chosen": -3.55212323484011e-05, "rewards/margins": 0.1881290078163147, "rewards/rejected": -0.18816451728343964, "step": 7801 }, { "epoch": 5.395573997233749, "grad_norm": 7.80407190322876, "learning_rate": 2.558014445981251e-05, "log_odds_chosen": 10.894344329833984, "log_odds_ratio": -3.5703680623555556e-05, "logits/chosen": -0.4218696653842926, "logits/rejected": -0.5361448526382446, "logps/chosen": -0.0003464347682893276, "logps/rejected": -2.5303425788879395, "loss": 0.76, "nll_loss": 0.19000676274299622, "rewards/accuracies": 1.0, "rewards/chosen": -3.4643479011720046e-05, "rewards/margins": 0.25299960374832153, "rewards/rejected": -0.25303423404693604, "step": 7802 }, { "epoch": 5.3962655601659755, "grad_norm": 10.6310396194458, "learning_rate": 2.557630244352236e-05, "log_odds_chosen": 11.177322387695312, "log_odds_ratio": -5.937780952081084e-05, "logits/chosen": -0.3804362416267395, "logits/rejected": -0.47340330481529236, "logps/chosen": -0.0002439522068016231, "logps/rejected": -2.295778274536133, "loss": 0.7703, "nll_loss": 0.19257938861846924, "rewards/accuracies": 1.0, "rewards/chosen": -2.4395221771555953e-05, "rewards/margins": 0.2295534312725067, "rewards/rejected": -0.22957783937454224, "step": 7803 }, { "epoch": 5.396957123098202, "grad_norm": 6.042616367340088, "learning_rate": 2.5572460427232214e-05, "log_odds_chosen": 10.209222793579102, "log_odds_ratio": -0.00022868410451337695, "logits/chosen": -0.3529431223869324, "logits/rejected": -0.3970162868499756, "logps/chosen": -0.0002680581819731742, "logps/rejected": -1.7455283403396606, "loss": 1.1763, "nll_loss": 0.29406148195266724, "rewards/accuracies": 1.0, "rewards/chosen": -2.6805821107700467e-05, "rewards/margins": 0.17452603578567505, "rewards/rejected": -0.17455284297466278, "step": 7804 }, { "epoch": 5.397648686030429, "grad_norm": 8.033905982971191, "learning_rate": 2.5568618410942063e-05, "log_odds_chosen": 9.660449981689453, "log_odds_ratio": -0.00011870273010572419, "logits/chosen": -0.0120609812438488, "logits/rejected": -0.07766593247652054, "logps/chosen": -0.0003880371223203838, "logps/rejected": -1.8114503622055054, "loss": 0.7018, "nll_loss": 0.17542925477027893, "rewards/accuracies": 1.0, "rewards/chosen": -3.880371150444262e-05, "rewards/margins": 0.18110623955726624, "rewards/rejected": -0.18114504218101501, "step": 7805 }, { "epoch": 5.398340248962656, "grad_norm": 11.184370040893555, "learning_rate": 2.5564776394651912e-05, "log_odds_chosen": 9.650315284729004, "log_odds_ratio": -0.000734238070435822, "logits/chosen": -0.34173810482025146, "logits/rejected": -0.34528568387031555, "logps/chosen": -0.0028360248543322086, "logps/rejected": -2.4724929332733154, "loss": 0.8162, "nll_loss": 0.20397043228149414, "rewards/accuracies": 1.0, "rewards/chosen": -0.0002836025378201157, "rewards/margins": 0.246965691447258, "rewards/rejected": -0.2472492754459381, "step": 7806 }, { "epoch": 5.399031811894883, "grad_norm": 6.66445779800415, "learning_rate": 2.5560934378361768e-05, "log_odds_chosen": 9.032700538635254, "log_odds_ratio": -0.0004254723316989839, "logits/chosen": -0.38099467754364014, "logits/rejected": -0.3853090703487396, "logps/chosen": -0.0005114672239869833, "logps/rejected": -1.1772360801696777, "loss": 0.764, "nll_loss": 0.19094544649124146, "rewards/accuracies": 1.0, "rewards/chosen": -5.114672603667714e-05, "rewards/margins": 0.11767245829105377, "rewards/rejected": -0.11772359907627106, "step": 7807 }, { "epoch": 5.39972337482711, "grad_norm": 5.572554111480713, "learning_rate": 2.5557092362071617e-05, "log_odds_chosen": 10.41073226928711, "log_odds_ratio": -0.00024940905859693885, "logits/chosen": -0.3622361123561859, "logits/rejected": -0.4100070297718048, "logps/chosen": -0.00029475893825292587, "logps/rejected": -1.9671523571014404, "loss": 0.5987, "nll_loss": 0.1496485322713852, "rewards/accuracies": 1.0, "rewards/chosen": -2.9475893825292587e-05, "rewards/margins": 0.19668574631214142, "rewards/rejected": -0.19671523571014404, "step": 7808 }, { "epoch": 5.4004149377593365, "grad_norm": 8.154836654663086, "learning_rate": 2.5553250345781466e-05, "log_odds_chosen": 9.927988052368164, "log_odds_ratio": -0.0009140261099673808, "logits/chosen": -0.1996062695980072, "logits/rejected": -0.30821940302848816, "logps/chosen": -0.003140796907246113, "logps/rejected": -2.3840131759643555, "loss": 0.5726, "nll_loss": 0.14306655526161194, "rewards/accuracies": 1.0, "rewards/chosen": -0.0003140796907246113, "rewards/margins": 0.2380872368812561, "rewards/rejected": -0.23840132355690002, "step": 7809 }, { "epoch": 5.401106500691563, "grad_norm": 9.915299415588379, "learning_rate": 2.554940832949132e-05, "log_odds_chosen": 9.086884498596191, "log_odds_ratio": -0.0034978806506842375, "logits/chosen": -0.4298107922077179, "logits/rejected": -0.47236326336860657, "logps/chosen": -0.022841552272439003, "logps/rejected": -1.7702233791351318, "loss": 1.1822, "nll_loss": 0.29521122574806213, "rewards/accuracies": 1.0, "rewards/chosen": -0.0022841552272439003, "rewards/margins": 0.17473816871643066, "rewards/rejected": -0.17702233791351318, "step": 7810 }, { "epoch": 5.40179806362379, "grad_norm": 5.638696670532227, "learning_rate": 2.5545566313201168e-05, "log_odds_chosen": 9.845707893371582, "log_odds_ratio": -0.0009368436876684427, "logits/chosen": -0.3803238272666931, "logits/rejected": -0.28517425060272217, "logps/chosen": -0.0005764992092736065, "logps/rejected": -1.737668752670288, "loss": 0.523, "nll_loss": 0.13065117597579956, "rewards/accuracies": 1.0, "rewards/chosen": -5.764992238255218e-05, "rewards/margins": 0.1737092286348343, "rewards/rejected": -0.1737668663263321, "step": 7811 }, { "epoch": 5.402489626556017, "grad_norm": 5.462301254272461, "learning_rate": 2.5541724296911017e-05, "log_odds_chosen": 11.049300193786621, "log_odds_ratio": -3.35049771820195e-05, "logits/chosen": -0.4539565443992615, "logits/rejected": -0.527484655380249, "logps/chosen": -0.00025346927577629685, "logps/rejected": -2.4482388496398926, "loss": 0.5746, "nll_loss": 0.1436401754617691, "rewards/accuracies": 1.0, "rewards/chosen": -2.534692976041697e-05, "rewards/margins": 0.24479854106903076, "rewards/rejected": -0.2448238879442215, "step": 7812 }, { "epoch": 5.403181189488244, "grad_norm": 8.376514434814453, "learning_rate": 2.5537882280620873e-05, "log_odds_chosen": 9.106565475463867, "log_odds_ratio": -0.0010129621950909495, "logits/chosen": -0.5715546011924744, "logits/rejected": -0.5758917331695557, "logps/chosen": -0.0011959555558860302, "logps/rejected": -1.408020257949829, "loss": 0.6462, "nll_loss": 0.16143743693828583, "rewards/accuracies": 1.0, "rewards/chosen": -0.00011959556286456063, "rewards/margins": 0.1406824290752411, "rewards/rejected": -0.1408020406961441, "step": 7813 }, { "epoch": 5.403872752420471, "grad_norm": 8.575206756591797, "learning_rate": 2.553404026433072e-05, "log_odds_chosen": 9.173189163208008, "log_odds_ratio": -0.0004603645938914269, "logits/chosen": -0.23822470009326935, "logits/rejected": -0.33198216557502747, "logps/chosen": -0.0019623057451099157, "logps/rejected": -2.119011163711548, "loss": 1.0927, "nll_loss": 0.2731224596500397, "rewards/accuracies": 1.0, "rewards/chosen": -0.0001962305832421407, "rewards/margins": 0.21170490980148315, "rewards/rejected": -0.21190112829208374, "step": 7814 }, { "epoch": 5.404564315352697, "grad_norm": 7.439986705780029, "learning_rate": 2.553019824804057e-05, "log_odds_chosen": 11.508347511291504, "log_odds_ratio": -2.3410131689161062e-05, "logits/chosen": -0.23676235973834991, "logits/rejected": -0.39286863803863525, "logps/chosen": -0.0002546714968048036, "logps/rejected": -2.7765936851501465, "loss": 1.0065, "nll_loss": 0.25163280963897705, "rewards/accuracies": 1.0, "rewards/chosen": -2.5467150408076122e-05, "rewards/margins": 0.2776338756084442, "rewards/rejected": -0.2776593565940857, "step": 7815 }, { "epoch": 5.405255878284924, "grad_norm": 5.230106353759766, "learning_rate": 2.5526356231750427e-05, "log_odds_chosen": 9.520034790039062, "log_odds_ratio": -0.00026278512086719275, "logits/chosen": -0.4936428666114807, "logits/rejected": -0.5037153363227844, "logps/chosen": -0.00033657433232292533, "logps/rejected": -1.4637000560760498, "loss": 0.679, "nll_loss": 0.16972285509109497, "rewards/accuracies": 1.0, "rewards/chosen": -3.3657433959888294e-05, "rewards/margins": 0.14633634686470032, "rewards/rejected": -0.1463700234889984, "step": 7816 }, { "epoch": 5.405947441217151, "grad_norm": 7.410196781158447, "learning_rate": 2.5522514215460276e-05, "log_odds_chosen": 10.929586410522461, "log_odds_ratio": -5.880877506569959e-05, "logits/chosen": -0.26691627502441406, "logits/rejected": -0.3176085352897644, "logps/chosen": -0.0001894081215141341, "logps/rejected": -2.1916801929473877, "loss": 0.4472, "nll_loss": 0.11179463565349579, "rewards/accuracies": 1.0, "rewards/chosen": -1.894081106001977e-05, "rewards/margins": 0.21914908289909363, "rewards/rejected": -0.2191680371761322, "step": 7817 }, { "epoch": 5.406639004149378, "grad_norm": 10.585952758789062, "learning_rate": 2.5518672199170125e-05, "log_odds_chosen": 9.789392471313477, "log_odds_ratio": -9.345363650936633e-05, "logits/chosen": -0.5864452123641968, "logits/rejected": -0.47814223170280457, "logps/chosen": -0.0005826476262882352, "logps/rejected": -2.074592113494873, "loss": 0.6702, "nll_loss": 0.1675419807434082, "rewards/accuracies": 1.0, "rewards/chosen": -5.826475899084471e-05, "rewards/margins": 0.20740094780921936, "rewards/rejected": -0.2074592113494873, "step": 7818 }, { "epoch": 5.407330567081605, "grad_norm": 11.783663749694824, "learning_rate": 2.5514830182879977e-05, "log_odds_chosen": 10.423219680786133, "log_odds_ratio": -0.00042906455928459764, "logits/chosen": -0.22940194606781006, "logits/rejected": -0.24586878716945648, "logps/chosen": -0.0012691940646618605, "logps/rejected": -2.0153493881225586, "loss": 1.043, "nll_loss": 0.2606947720050812, "rewards/accuracies": 1.0, "rewards/chosen": -0.000126919403555803, "rewards/margins": 0.2014080137014389, "rewards/rejected": -0.2015349417924881, "step": 7819 }, { "epoch": 5.408022130013832, "grad_norm": 8.057897567749023, "learning_rate": 2.5510988166589826e-05, "log_odds_chosen": 10.591058731079102, "log_odds_ratio": -5.7730518165044487e-05, "logits/chosen": -0.3266027569770813, "logits/rejected": -0.33674854040145874, "logps/chosen": -0.00021130419918335974, "logps/rejected": -2.0841257572174072, "loss": 0.9355, "nll_loss": 0.23385721445083618, "rewards/accuracies": 1.0, "rewards/chosen": -2.1130421373527497e-05, "rewards/margins": 0.20839142799377441, "rewards/rejected": -0.2084125578403473, "step": 7820 }, { "epoch": 5.408713692946058, "grad_norm": 6.1873579025268555, "learning_rate": 2.5507146150299675e-05, "log_odds_chosen": 9.934715270996094, "log_odds_ratio": -0.0001489290443714708, "logits/chosen": -0.4274921417236328, "logits/rejected": -0.41432029008865356, "logps/chosen": -0.013626918196678162, "logps/rejected": -2.4014713764190674, "loss": 0.7978, "nll_loss": 0.19942784309387207, "rewards/accuracies": 1.0, "rewards/chosen": -0.001362691866233945, "rewards/margins": 0.23878444731235504, "rewards/rejected": -0.24014714360237122, "step": 7821 }, { "epoch": 5.409405255878285, "grad_norm": 7.516190528869629, "learning_rate": 2.550330413400953e-05, "log_odds_chosen": 10.188470840454102, "log_odds_ratio": -0.00020180402498226613, "logits/chosen": -0.3321394920349121, "logits/rejected": -0.39233285188674927, "logps/chosen": -0.0009289323934353888, "logps/rejected": -2.192671537399292, "loss": 0.8631, "nll_loss": 0.2157595008611679, "rewards/accuracies": 1.0, "rewards/chosen": -9.289323497796431e-05, "rewards/margins": 0.21917426586151123, "rewards/rejected": -0.21926715970039368, "step": 7822 }, { "epoch": 5.410096818810512, "grad_norm": 11.857353210449219, "learning_rate": 2.549946211771938e-05, "log_odds_chosen": 9.52438735961914, "log_odds_ratio": -0.0005739558837376535, "logits/chosen": -0.26063770055770874, "logits/rejected": -0.2853577733039856, "logps/chosen": -0.0016212889458984137, "logps/rejected": -1.7387852668762207, "loss": 0.6358, "nll_loss": 0.15889419615268707, "rewards/accuracies": 1.0, "rewards/chosen": -0.0001621288975002244, "rewards/margins": 0.17371639609336853, "rewards/rejected": -0.1738785207271576, "step": 7823 }, { "epoch": 5.410788381742739, "grad_norm": 8.935547828674316, "learning_rate": 2.549562010142923e-05, "log_odds_chosen": 10.768202781677246, "log_odds_ratio": -3.928116348106414e-05, "logits/chosen": -0.6923472285270691, "logits/rejected": -0.6736399531364441, "logps/chosen": -0.00015976907161530107, "logps/rejected": -1.725914716720581, "loss": 0.6046, "nll_loss": 0.15114957094192505, "rewards/accuracies": 1.0, "rewards/chosen": -1.597690788912587e-05, "rewards/margins": 0.17257550358772278, "rewards/rejected": -0.17259149253368378, "step": 7824 }, { "epoch": 5.411479944674966, "grad_norm": 6.82484769821167, "learning_rate": 2.5491778085139085e-05, "log_odds_chosen": 10.331005096435547, "log_odds_ratio": -6.685660628136247e-05, "logits/chosen": -0.06196293607354164, "logits/rejected": -0.18631382286548615, "logps/chosen": -0.0006202163640409708, "logps/rejected": -2.1389901638031006, "loss": 0.8618, "nll_loss": 0.21543405950069427, "rewards/accuracies": 1.0, "rewards/chosen": -6.20216378592886e-05, "rewards/margins": 0.21383699774742126, "rewards/rejected": -0.21389901638031006, "step": 7825 }, { "epoch": 5.412171507607193, "grad_norm": 5.5454583168029785, "learning_rate": 2.5487936068848934e-05, "log_odds_chosen": 10.644842147827148, "log_odds_ratio": -7.072136213537306e-05, "logits/chosen": -0.3575887084007263, "logits/rejected": -0.3973062038421631, "logps/chosen": -0.0005669151432812214, "logps/rejected": -2.4327518939971924, "loss": 0.821, "nll_loss": 0.20524708926677704, "rewards/accuracies": 1.0, "rewards/chosen": -5.669151869369671e-05, "rewards/margins": 0.24321848154067993, "rewards/rejected": -0.24327519536018372, "step": 7826 }, { "epoch": 5.412863070539419, "grad_norm": 9.592449188232422, "learning_rate": 2.5484094052558783e-05, "log_odds_chosen": 8.673015594482422, "log_odds_ratio": -0.013189301826059818, "logits/chosen": -0.21795159578323364, "logits/rejected": -0.34566575288772583, "logps/chosen": -0.011491509154438972, "logps/rejected": -1.5653188228607178, "loss": 1.0418, "nll_loss": 0.2591352164745331, "rewards/accuracies": 1.0, "rewards/chosen": -0.0011491508921608329, "rewards/margins": 0.15538272261619568, "rewards/rejected": -0.15653188526630402, "step": 7827 }, { "epoch": 5.413554633471646, "grad_norm": 8.43200397491455, "learning_rate": 2.5480252036268636e-05, "log_odds_chosen": 9.651618957519531, "log_odds_ratio": -0.00014595997345168144, "logits/chosen": -0.795384407043457, "logits/rejected": -0.8214473724365234, "logps/chosen": -0.0004256881948094815, "logps/rejected": -1.5659555196762085, "loss": 0.5753, "nll_loss": 0.14379899203777313, "rewards/accuracies": 1.0, "rewards/chosen": -4.256881948094815e-05, "rewards/margins": 0.1565529853105545, "rewards/rejected": -0.15659555792808533, "step": 7828 }, { "epoch": 5.414246196403873, "grad_norm": 6.174905776977539, "learning_rate": 2.5476410019978485e-05, "log_odds_chosen": 10.615562438964844, "log_odds_ratio": -9.659776696935296e-05, "logits/chosen": -0.12677879631519318, "logits/rejected": -0.31755977869033813, "logps/chosen": -0.002218089997768402, "logps/rejected": -2.367126941680908, "loss": 0.6761, "nll_loss": 0.1690124273300171, "rewards/accuracies": 1.0, "rewards/chosen": -0.00022180900850798935, "rewards/margins": 0.2364909052848816, "rewards/rejected": -0.2367127239704132, "step": 7829 }, { "epoch": 5.4149377593361, "grad_norm": 5.8143439292907715, "learning_rate": 2.5472568003688334e-05, "log_odds_chosen": 10.541342735290527, "log_odds_ratio": -0.00013124076940584928, "logits/chosen": -0.693324089050293, "logits/rejected": -0.8504449725151062, "logps/chosen": -0.00027568236691877246, "logps/rejected": -1.851475715637207, "loss": 0.4963, "nll_loss": 0.12405158579349518, "rewards/accuracies": 1.0, "rewards/chosen": -2.756823414529208e-05, "rewards/margins": 0.18512000143527985, "rewards/rejected": -0.18514756858348846, "step": 7830 }, { "epoch": 5.415629322268327, "grad_norm": 8.409075736999512, "learning_rate": 2.546872598739819e-05, "log_odds_chosen": 10.672347068786621, "log_odds_ratio": -6.198248593136668e-05, "logits/chosen": -0.43167707324028015, "logits/rejected": -0.536747395992279, "logps/chosen": -0.00020294116984587163, "logps/rejected": -2.2104036808013916, "loss": 0.6968, "nll_loss": 0.1741933524608612, "rewards/accuracies": 1.0, "rewards/chosen": -2.0294119167374447e-05, "rewards/margins": 0.22102010250091553, "rewards/rejected": -0.22104039788246155, "step": 7831 }, { "epoch": 5.4163208852005535, "grad_norm": 6.756588935852051, "learning_rate": 2.546488397110804e-05, "log_odds_chosen": 9.484219551086426, "log_odds_ratio": -0.003364234697073698, "logits/chosen": -0.5741379261016846, "logits/rejected": -0.5597481727600098, "logps/chosen": -0.00293903099372983, "logps/rejected": -2.191380500793457, "loss": 1.3862, "nll_loss": 0.3462083637714386, "rewards/accuracies": 1.0, "rewards/chosen": -0.00029390311101451516, "rewards/margins": 0.21884416043758392, "rewards/rejected": -0.21913805603981018, "step": 7832 }, { "epoch": 5.41701244813278, "grad_norm": 5.635895729064941, "learning_rate": 2.5461041954817888e-05, "log_odds_chosen": 10.222651481628418, "log_odds_ratio": -0.0007151153404265642, "logits/chosen": -0.44129547476768494, "logits/rejected": -0.5282646417617798, "logps/chosen": -0.0005037469090893865, "logps/rejected": -2.230571746826172, "loss": 1.3282, "nll_loss": 0.3319754898548126, "rewards/accuracies": 1.0, "rewards/chosen": -5.037469236413017e-05, "rewards/margins": 0.22300681471824646, "rewards/rejected": -0.22305719554424286, "step": 7833 }, { "epoch": 5.417704011065007, "grad_norm": 6.464162349700928, "learning_rate": 2.5457199938527743e-05, "log_odds_chosen": 10.352190017700195, "log_odds_ratio": -6.672489689663053e-05, "logits/chosen": -0.5189627408981323, "logits/rejected": -0.5931606292724609, "logps/chosen": -0.0005168755888007581, "logps/rejected": -2.325748920440674, "loss": 0.7537, "nll_loss": 0.18841908872127533, "rewards/accuracies": 1.0, "rewards/chosen": -5.168755888007581e-05, "rewards/margins": 0.23252321779727936, "rewards/rejected": -0.23257490992546082, "step": 7834 }, { "epoch": 5.418395573997234, "grad_norm": 9.047159194946289, "learning_rate": 2.5453357922237592e-05, "log_odds_chosen": 9.300627708435059, "log_odds_ratio": -0.0006379535770975053, "logits/chosen": -0.1021367758512497, "logits/rejected": -0.06073558330535889, "logps/chosen": -0.0013150572776794434, "logps/rejected": -2.060987949371338, "loss": 0.7732, "nll_loss": 0.19324207305908203, "rewards/accuracies": 1.0, "rewards/chosen": -0.00013150573067832738, "rewards/margins": 0.20596730709075928, "rewards/rejected": -0.2060987949371338, "step": 7835 }, { "epoch": 5.419087136929461, "grad_norm": 4.95590353012085, "learning_rate": 2.544951590594744e-05, "log_odds_chosen": 9.185993194580078, "log_odds_ratio": -0.000421573146013543, "logits/chosen": -0.27182701230049133, "logits/rejected": -0.27344971895217896, "logps/chosen": -0.0005388180143199861, "logps/rejected": -1.6429342031478882, "loss": 0.747, "nll_loss": 0.1866980791091919, "rewards/accuracies": 1.0, "rewards/chosen": -5.388180579757318e-05, "rewards/margins": 0.1642395406961441, "rewards/rejected": -0.16429343819618225, "step": 7836 }, { "epoch": 5.419778699861688, "grad_norm": 7.58992862701416, "learning_rate": 2.5445673889657294e-05, "log_odds_chosen": 10.388230323791504, "log_odds_ratio": -0.00028462830232456326, "logits/chosen": -0.27032992243766785, "logits/rejected": -0.3360634446144104, "logps/chosen": -0.014370308257639408, "logps/rejected": -2.8050293922424316, "loss": 0.5867, "nll_loss": 0.14663758873939514, "rewards/accuracies": 1.0, "rewards/chosen": -0.0014370307326316833, "rewards/margins": 0.27906593680381775, "rewards/rejected": -0.28050294518470764, "step": 7837 }, { "epoch": 5.4204702627939145, "grad_norm": 8.065587043762207, "learning_rate": 2.5441831873367143e-05, "log_odds_chosen": 12.526787757873535, "log_odds_ratio": -7.872034984757192e-06, "logits/chosen": -0.7101952433586121, "logits/rejected": -0.7076274752616882, "logps/chosen": -4.7266785259125754e-05, "logps/rejected": -2.2666127681732178, "loss": 0.6991, "nll_loss": 0.17478647828102112, "rewards/accuracies": 1.0, "rewards/chosen": -4.7266785259125754e-06, "rewards/margins": 0.22665655612945557, "rewards/rejected": -0.2266612946987152, "step": 7838 }, { "epoch": 5.421161825726141, "grad_norm": 6.4186787605285645, "learning_rate": 2.5437989857076992e-05, "log_odds_chosen": 10.256253242492676, "log_odds_ratio": -0.00030515273101627827, "logits/chosen": -0.3902437090873718, "logits/rejected": -0.510823667049408, "logps/chosen": -0.0005199067527428269, "logps/rejected": -2.0400826930999756, "loss": 0.5576, "nll_loss": 0.13937097787857056, "rewards/accuracies": 1.0, "rewards/chosen": -5.19906789122615e-05, "rewards/margins": 0.20395630598068237, "rewards/rejected": -0.20400826632976532, "step": 7839 }, { "epoch": 5.421853388658368, "grad_norm": 5.648606300354004, "learning_rate": 2.5434147840786848e-05, "log_odds_chosen": 9.406319618225098, "log_odds_ratio": -0.00028232004842720926, "logits/chosen": -0.4357467591762543, "logits/rejected": -0.4736112058162689, "logps/chosen": -0.0003766193403862417, "logps/rejected": -1.1548511981964111, "loss": 1.2206, "nll_loss": 0.3051202893257141, "rewards/accuracies": 1.0, "rewards/chosen": -3.766193549381569e-05, "rewards/margins": 0.11544745415449142, "rewards/rejected": -0.11548513174057007, "step": 7840 }, { "epoch": 5.422544951590595, "grad_norm": 6.648719310760498, "learning_rate": 2.5430305824496697e-05, "log_odds_chosen": 9.577982902526855, "log_odds_ratio": -0.007578455377370119, "logits/chosen": -0.40829968452453613, "logits/rejected": -0.46084004640579224, "logps/chosen": -0.0030157307628542185, "logps/rejected": -1.722627878189087, "loss": 1.2203, "nll_loss": 0.30430811643600464, "rewards/accuracies": 1.0, "rewards/chosen": -0.00030157307628542185, "rewards/margins": 0.1719612181186676, "rewards/rejected": -0.1722627878189087, "step": 7841 }, { "epoch": 5.423236514522822, "grad_norm": 6.960811138153076, "learning_rate": 2.5426463808206546e-05, "log_odds_chosen": 11.941719055175781, "log_odds_ratio": -1.1457333130238112e-05, "logits/chosen": -0.8144433498382568, "logits/rejected": -0.9371423125267029, "logps/chosen": -8.856329077389091e-05, "logps/rejected": -2.627164363861084, "loss": 0.4427, "nll_loss": 0.11068376898765564, "rewards/accuracies": 1.0, "rewards/chosen": -8.856329259288032e-06, "rewards/margins": 0.26270759105682373, "rewards/rejected": -0.2627164423465729, "step": 7842 }, { "epoch": 5.423928077455049, "grad_norm": 5.834754943847656, "learning_rate": 2.5422621791916402e-05, "log_odds_chosen": 9.961302757263184, "log_odds_ratio": -8.535663073416799e-05, "logits/chosen": -0.6488773226737976, "logits/rejected": -0.695549726486206, "logps/chosen": -0.0005070774932391942, "logps/rejected": -1.7984031438827515, "loss": 0.6351, "nll_loss": 0.1587558388710022, "rewards/accuracies": 1.0, "rewards/chosen": -5.070775296189822e-05, "rewards/margins": 0.17978960275650024, "rewards/rejected": -0.1798403263092041, "step": 7843 }, { "epoch": 5.4246196403872755, "grad_norm": 4.278073310852051, "learning_rate": 2.541877977562625e-05, "log_odds_chosen": 9.657689094543457, "log_odds_ratio": -0.0005896209622733295, "logits/chosen": -0.5128522515296936, "logits/rejected": -0.5324857234954834, "logps/chosen": -0.009259654209017754, "logps/rejected": -2.4090094566345215, "loss": 0.7183, "nll_loss": 0.17950932681560516, "rewards/accuracies": 1.0, "rewards/chosen": -0.0009259654907509685, "rewards/margins": 0.2399749755859375, "rewards/rejected": -0.24090096354484558, "step": 7844 }, { "epoch": 5.425311203319502, "grad_norm": 7.503888130187988, "learning_rate": 2.54149377593361e-05, "log_odds_chosen": 9.949324607849121, "log_odds_ratio": -0.00013019111065659672, "logits/chosen": -0.16912402212619781, "logits/rejected": -0.2177458107471466, "logps/chosen": -0.0010880143381655216, "logps/rejected": -2.2526049613952637, "loss": 0.8531, "nll_loss": 0.2132684886455536, "rewards/accuracies": 1.0, "rewards/chosen": -0.00010880143236136064, "rewards/margins": 0.22515171766281128, "rewards/rejected": -0.22526051104068756, "step": 7845 }, { "epoch": 5.426002766251729, "grad_norm": 10.34386920928955, "learning_rate": 2.5411095743045952e-05, "log_odds_chosen": 11.487712860107422, "log_odds_ratio": -0.00019205230637453496, "logits/chosen": -0.636671245098114, "logits/rejected": -0.6072683930397034, "logps/chosen": -0.0009793839417397976, "logps/rejected": -3.500851631164551, "loss": 0.8111, "nll_loss": 0.20274467766284943, "rewards/accuracies": 1.0, "rewards/chosen": -9.793839126359671e-05, "rewards/margins": 0.3499872088432312, "rewards/rejected": -0.35008513927459717, "step": 7846 }, { "epoch": 5.426694329183956, "grad_norm": 5.985348701477051, "learning_rate": 2.54072537267558e-05, "log_odds_chosen": 10.38613510131836, "log_odds_ratio": -0.00016703951405361295, "logits/chosen": -0.29771584272384644, "logits/rejected": -0.3260590434074402, "logps/chosen": -0.0007466238457709551, "logps/rejected": -2.1024794578552246, "loss": 0.5324, "nll_loss": 0.13309422135353088, "rewards/accuracies": 1.0, "rewards/chosen": -7.466238457709551e-05, "rewards/margins": 0.21017327904701233, "rewards/rejected": -0.2102479487657547, "step": 7847 }, { "epoch": 5.427385892116183, "grad_norm": 11.137646675109863, "learning_rate": 2.540341171046565e-05, "log_odds_chosen": 10.203710556030273, "log_odds_ratio": -0.0006535428110510111, "logits/chosen": -0.46439170837402344, "logits/rejected": -0.4716670513153076, "logps/chosen": -0.0006691356538794935, "logps/rejected": -2.228005886077881, "loss": 0.7658, "nll_loss": 0.19138406217098236, "rewards/accuracies": 1.0, "rewards/chosen": -6.69135624775663e-05, "rewards/margins": 0.22273366153240204, "rewards/rejected": -0.2228005826473236, "step": 7848 }, { "epoch": 5.42807745504841, "grad_norm": 9.139507293701172, "learning_rate": 2.5399569694175506e-05, "log_odds_chosen": 11.50609302520752, "log_odds_ratio": -2.8523911169031635e-05, "logits/chosen": -0.2181396782398224, "logits/rejected": -0.2987217903137207, "logps/chosen": -0.00016175236669369042, "logps/rejected": -2.743394136428833, "loss": 0.8292, "nll_loss": 0.2072906345129013, "rewards/accuracies": 1.0, "rewards/chosen": -1.617523594177328e-05, "rewards/margins": 0.2743232548236847, "rewards/rejected": -0.2743394374847412, "step": 7849 }, { "epoch": 5.4287690179806365, "grad_norm": 9.508968353271484, "learning_rate": 2.5395727677885355e-05, "log_odds_chosen": 10.997293472290039, "log_odds_ratio": -4.560018714983016e-05, "logits/chosen": -0.3897295892238617, "logits/rejected": -0.3831002116203308, "logps/chosen": -0.00018428656039759517, "logps/rejected": -2.2266499996185303, "loss": 0.6922, "nll_loss": 0.17304514348506927, "rewards/accuracies": 1.0, "rewards/chosen": -1.8428656403557397e-05, "rewards/margins": 0.222646564245224, "rewards/rejected": -0.2226649969816208, "step": 7850 }, { "epoch": 5.429460580912863, "grad_norm": 7.224493026733398, "learning_rate": 2.5391885661595204e-05, "log_odds_chosen": 11.01947021484375, "log_odds_ratio": -8.331074059242383e-05, "logits/chosen": -0.1935908943414688, "logits/rejected": -0.2113337516784668, "logps/chosen": -0.00044694929965771735, "logps/rejected": -2.6879687309265137, "loss": 0.8935, "nll_loss": 0.22337090969085693, "rewards/accuracies": 1.0, "rewards/chosen": -4.469492705538869e-05, "rewards/margins": 0.26875215768814087, "rewards/rejected": -0.2687968909740448, "step": 7851 }, { "epoch": 5.43015214384509, "grad_norm": 9.00788688659668, "learning_rate": 2.538804364530506e-05, "log_odds_chosen": 9.374788284301758, "log_odds_ratio": -0.05502600222826004, "logits/chosen": -0.4989396929740906, "logits/rejected": -0.6203451156616211, "logps/chosen": -0.010878579691052437, "logps/rejected": -1.516574740409851, "loss": 0.7964, "nll_loss": 0.19360893964767456, "rewards/accuracies": 1.0, "rewards/chosen": -0.0010878578759729862, "rewards/margins": 0.1505696177482605, "rewards/rejected": -0.15165749192237854, "step": 7852 }, { "epoch": 5.430843706777317, "grad_norm": 7.597384452819824, "learning_rate": 2.538420162901491e-05, "log_odds_chosen": 10.482349395751953, "log_odds_ratio": -0.0003454138641245663, "logits/chosen": -0.0877026915550232, "logits/rejected": -0.19539429247379303, "logps/chosen": -0.0005087396712042391, "logps/rejected": -2.267861843109131, "loss": 0.8494, "nll_loss": 0.21232616901397705, "rewards/accuracies": 1.0, "rewards/chosen": -5.0873968575615436e-05, "rewards/margins": 0.22673532366752625, "rewards/rejected": -0.22678618133068085, "step": 7853 }, { "epoch": 5.431535269709544, "grad_norm": 9.940882682800293, "learning_rate": 2.538035961272476e-05, "log_odds_chosen": 11.412620544433594, "log_odds_ratio": -0.0010527351405471563, "logits/chosen": -0.6566622257232666, "logits/rejected": -0.7543309926986694, "logps/chosen": -0.0007822321495041251, "logps/rejected": -2.9906747341156006, "loss": 0.7132, "nll_loss": 0.17820365726947784, "rewards/accuracies": 1.0, "rewards/chosen": -7.822322368156165e-05, "rewards/margins": 0.29898926615715027, "rewards/rejected": -0.2990674674510956, "step": 7854 }, { "epoch": 5.432226832641771, "grad_norm": 8.040964126586914, "learning_rate": 2.537651759643461e-05, "log_odds_chosen": 10.689407348632812, "log_odds_ratio": -4.262772563379258e-05, "logits/chosen": -0.6748466491699219, "logits/rejected": -0.7293582558631897, "logps/chosen": -0.0002847913419827819, "logps/rejected": -2.2315211296081543, "loss": 0.6307, "nll_loss": 0.1576675921678543, "rewards/accuracies": 1.0, "rewards/chosen": -2.847913492587395e-05, "rewards/margins": 0.22312362492084503, "rewards/rejected": -0.22315210103988647, "step": 7855 }, { "epoch": 5.4329183955739975, "grad_norm": 4.731971263885498, "learning_rate": 2.537267558014446e-05, "log_odds_chosen": 10.669794082641602, "log_odds_ratio": -0.00023917089856695384, "logits/chosen": -0.5203498005867004, "logits/rejected": -0.4933302402496338, "logps/chosen": -0.0003398106200620532, "logps/rejected": -2.070086717605591, "loss": 0.6356, "nll_loss": 0.15886807441711426, "rewards/accuracies": 1.0, "rewards/chosen": -3.398106127860956e-05, "rewards/margins": 0.20697468519210815, "rewards/rejected": -0.20700865983963013, "step": 7856 }, { "epoch": 5.433609958506224, "grad_norm": 5.539132118225098, "learning_rate": 2.5368833563854312e-05, "log_odds_chosen": 10.369670867919922, "log_odds_ratio": -0.000260966713540256, "logits/chosen": -0.24483546614646912, "logits/rejected": -0.3387295603752136, "logps/chosen": -0.00038009221316315234, "logps/rejected": -2.261167049407959, "loss": 0.83, "nll_loss": 0.20746630430221558, "rewards/accuracies": 1.0, "rewards/chosen": -3.800922058871947e-05, "rewards/margins": 0.22607870399951935, "rewards/rejected": -0.22611671686172485, "step": 7857 }, { "epoch": 5.434301521438451, "grad_norm": 6.399085998535156, "learning_rate": 2.5364991547564165e-05, "log_odds_chosen": 10.487403869628906, "log_odds_ratio": -0.0002748219412751496, "logits/chosen": -0.4126533567905426, "logits/rejected": -0.4159373342990875, "logps/chosen": -0.0016300861025229096, "logps/rejected": -2.318737268447876, "loss": 0.8799, "nll_loss": 0.2199430912733078, "rewards/accuracies": 1.0, "rewards/chosen": -0.00016300860443152487, "rewards/margins": 0.23171071708202362, "rewards/rejected": -0.23187373578548431, "step": 7858 }, { "epoch": 5.434993084370678, "grad_norm": 6.040285110473633, "learning_rate": 2.5361149531274014e-05, "log_odds_chosen": 11.001068115234375, "log_odds_ratio": -2.4839646357577294e-05, "logits/chosen": -0.692878007888794, "logits/rejected": -0.7236531972885132, "logps/chosen": -0.0001058193010976538, "logps/rejected": -1.721745252609253, "loss": 0.5957, "nll_loss": 0.1489114761352539, "rewards/accuracies": 1.0, "rewards/chosen": -1.058192992786644e-05, "rewards/margins": 0.1721639335155487, "rewards/rejected": -0.17217451333999634, "step": 7859 }, { "epoch": 5.435684647302905, "grad_norm": 8.008295059204102, "learning_rate": 2.5357307514983863e-05, "log_odds_chosen": 9.715714454650879, "log_odds_ratio": -0.0013938343618065119, "logits/chosen": -0.24811391532421112, "logits/rejected": -0.3216937780380249, "logps/chosen": -0.0010375329293310642, "logps/rejected": -1.7235355377197266, "loss": 0.8535, "nll_loss": 0.2132408618927002, "rewards/accuracies": 1.0, "rewards/chosen": -0.00010375330020906404, "rewards/margins": 0.17224982380867004, "rewards/rejected": -0.17235355079174042, "step": 7860 }, { "epoch": 5.436376210235132, "grad_norm": 7.474475383758545, "learning_rate": 2.535346549869372e-05, "log_odds_chosen": 9.584784507751465, "log_odds_ratio": -0.0005862210527993739, "logits/chosen": -0.5556076765060425, "logits/rejected": -0.557613730430603, "logps/chosen": -0.0021440701093524694, "logps/rejected": -1.8439087867736816, "loss": 0.8209, "nll_loss": 0.20515908300876617, "rewards/accuracies": 1.0, "rewards/chosen": -0.0002144070458598435, "rewards/margins": 0.1841764748096466, "rewards/rejected": -0.18439088761806488, "step": 7861 }, { "epoch": 5.4370677731673585, "grad_norm": 7.606071472167969, "learning_rate": 2.5349623482403568e-05, "log_odds_chosen": 9.713556289672852, "log_odds_ratio": -0.0002427960280328989, "logits/chosen": -0.11545206606388092, "logits/rejected": -0.1437520831823349, "logps/chosen": -0.0006739971577189863, "logps/rejected": -1.5161678791046143, "loss": 0.8651, "nll_loss": 0.2162489891052246, "rewards/accuracies": 1.0, "rewards/chosen": -6.739972741343081e-05, "rewards/margins": 0.15154938399791718, "rewards/rejected": -0.15161678194999695, "step": 7862 }, { "epoch": 5.437759336099585, "grad_norm": 8.389718055725098, "learning_rate": 2.5345781466113417e-05, "log_odds_chosen": 10.20302963256836, "log_odds_ratio": -4.7732421080581844e-05, "logits/chosen": -0.5267475843429565, "logits/rejected": -0.5469604730606079, "logps/chosen": -0.00023220572620630264, "logps/rejected": -1.7363343238830566, "loss": 1.0272, "nll_loss": 0.25679296255111694, "rewards/accuracies": 1.0, "rewards/chosen": -2.322057480341755e-05, "rewards/margins": 0.17361021041870117, "rewards/rejected": -0.1736334264278412, "step": 7863 }, { "epoch": 5.438450899031812, "grad_norm": 13.0947847366333, "learning_rate": 2.534193944982327e-05, "log_odds_chosen": 9.859249114990234, "log_odds_ratio": -0.00023818403133191168, "logits/chosen": -0.2789401710033417, "logits/rejected": -0.3713934123516083, "logps/chosen": -0.0011656444985419512, "logps/rejected": -1.5852174758911133, "loss": 0.7659, "nll_loss": 0.19143958389759064, "rewards/accuracies": 1.0, "rewards/chosen": -0.0001165644425782375, "rewards/margins": 0.15840518474578857, "rewards/rejected": -0.15852174162864685, "step": 7864 }, { "epoch": 5.439142461964039, "grad_norm": 6.816270351409912, "learning_rate": 2.5338097433533122e-05, "log_odds_chosen": 10.205053329467773, "log_odds_ratio": -0.00020811142167076468, "logits/chosen": -0.780189037322998, "logits/rejected": -0.7887230515480042, "logps/chosen": -0.007137411739677191, "logps/rejected": -2.1368024349212646, "loss": 0.8766, "nll_loss": 0.219136044383049, "rewards/accuracies": 1.0, "rewards/chosen": -0.00071374123217538, "rewards/margins": 0.21296651661396027, "rewards/rejected": -0.21368026733398438, "step": 7865 }, { "epoch": 5.439834024896266, "grad_norm": 8.272232055664062, "learning_rate": 2.533425541724297e-05, "log_odds_chosen": 8.850973129272461, "log_odds_ratio": -0.0006840628921054304, "logits/chosen": -0.629523515701294, "logits/rejected": -0.6524733304977417, "logps/chosen": -0.0012333383783698082, "logps/rejected": -1.3839800357818604, "loss": 0.9089, "nll_loss": 0.2271665334701538, "rewards/accuracies": 1.0, "rewards/chosen": -0.00012333384074736387, "rewards/margins": 0.1382746696472168, "rewards/rejected": -0.13839800655841827, "step": 7866 }, { "epoch": 5.440525587828493, "grad_norm": 7.405073165893555, "learning_rate": 2.5330413400952823e-05, "log_odds_chosen": 10.685062408447266, "log_odds_ratio": -2.844108166755177e-05, "logits/chosen": -0.437447190284729, "logits/rejected": -0.5611270070075989, "logps/chosen": -0.00013399166346061975, "logps/rejected": -1.8456788063049316, "loss": 0.4605, "nll_loss": 0.11511488258838654, "rewards/accuracies": 1.0, "rewards/chosen": -1.3399166164163034e-05, "rewards/margins": 0.18455448746681213, "rewards/rejected": -0.1845678687095642, "step": 7867 }, { "epoch": 5.441217150760719, "grad_norm": 6.375338554382324, "learning_rate": 2.5326571384662672e-05, "log_odds_chosen": 10.531484603881836, "log_odds_ratio": -0.00015703555254731327, "logits/chosen": -0.2637036144733429, "logits/rejected": -0.17880676686763763, "logps/chosen": -0.00035209517227485776, "logps/rejected": -1.7615535259246826, "loss": 0.923, "nll_loss": 0.23073497414588928, "rewards/accuracies": 1.0, "rewards/chosen": -3.520952304825187e-05, "rewards/margins": 0.17612014710903168, "rewards/rejected": -0.17615535855293274, "step": 7868 }, { "epoch": 5.441908713692946, "grad_norm": 12.731097221374512, "learning_rate": 2.532272936837252e-05, "log_odds_chosen": 10.44438362121582, "log_odds_ratio": -5.5176606110762805e-05, "logits/chosen": -1.0070915222167969, "logits/rejected": -1.0459587574005127, "logps/chosen": -0.00043362006545066833, "logps/rejected": -2.1274542808532715, "loss": 1.1053, "nll_loss": 0.2763287425041199, "rewards/accuracies": 1.0, "rewards/chosen": -4.3362008000258356e-05, "rewards/margins": 0.21270209550857544, "rewards/rejected": -0.21274544298648834, "step": 7869 }, { "epoch": 5.442600276625173, "grad_norm": 18.94190216064453, "learning_rate": 2.5318887352082377e-05, "log_odds_chosen": 11.627334594726562, "log_odds_ratio": -2.15392756217625e-05, "logits/chosen": -0.7026960849761963, "logits/rejected": -0.7298768162727356, "logps/chosen": -0.00012318039080128074, "logps/rejected": -2.5806596279144287, "loss": 0.8875, "nll_loss": 0.22187533974647522, "rewards/accuracies": 1.0, "rewards/chosen": -1.2318038898229133e-05, "rewards/margins": 0.25805363059043884, "rewards/rejected": -0.25806596875190735, "step": 7870 }, { "epoch": 5.4432918395574, "grad_norm": 12.605120658874512, "learning_rate": 2.5315045335792226e-05, "log_odds_chosen": 10.456953048706055, "log_odds_ratio": -0.0006705054547637701, "logits/chosen": -0.6541532874107361, "logits/rejected": -0.7368367910385132, "logps/chosen": -0.0007939254865050316, "logps/rejected": -2.2013332843780518, "loss": 0.6147, "nll_loss": 0.15360420942306519, "rewards/accuracies": 1.0, "rewards/chosen": -7.939254282973707e-05, "rewards/margins": 0.22005394101142883, "rewards/rejected": -0.22013333439826965, "step": 7871 }, { "epoch": 5.443983402489627, "grad_norm": 5.563952922821045, "learning_rate": 2.5311203319502075e-05, "log_odds_chosen": 10.210861206054688, "log_odds_ratio": -0.00012154671276221052, "logits/chosen": -0.2771844267845154, "logits/rejected": -0.2716318666934967, "logps/chosen": -0.00019371393136680126, "logps/rejected": -1.819549798965454, "loss": 0.7497, "nll_loss": 0.18740403652191162, "rewards/accuracies": 1.0, "rewards/chosen": -1.9371393136680126e-05, "rewards/margins": 0.1819356083869934, "rewards/rejected": -0.1819549798965454, "step": 7872 }, { "epoch": 5.444674965421854, "grad_norm": 8.425066947937012, "learning_rate": 2.530736130321193e-05, "log_odds_chosen": 11.025166511535645, "log_odds_ratio": -4.0440820157527924e-05, "logits/chosen": -0.34361937642097473, "logits/rejected": -0.40224599838256836, "logps/chosen": -0.00013009503891225904, "logps/rejected": -2.163750648498535, "loss": 0.5686, "nll_loss": 0.14215293526649475, "rewards/accuracies": 1.0, "rewards/chosen": -1.3009504073124845e-05, "rewards/margins": 0.21636205911636353, "rewards/rejected": -0.21637505292892456, "step": 7873 }, { "epoch": 5.44536652835408, "grad_norm": 3.8120367527008057, "learning_rate": 2.530351928692178e-05, "log_odds_chosen": 10.694634437561035, "log_odds_ratio": -4.566337156575173e-05, "logits/chosen": -0.42301592230796814, "logits/rejected": -0.48795831203460693, "logps/chosen": -0.00040328531758859754, "logps/rejected": -2.773578405380249, "loss": 0.6298, "nll_loss": 0.15745729207992554, "rewards/accuracies": 1.0, "rewards/chosen": -4.032853394164704e-05, "rewards/margins": 0.27731749415397644, "rewards/rejected": -0.277357816696167, "step": 7874 }, { "epoch": 5.446058091286307, "grad_norm": 4.726737022399902, "learning_rate": 2.529967727063163e-05, "log_odds_chosen": 10.373005867004395, "log_odds_ratio": -8.296048326883465e-05, "logits/chosen": -0.2723003029823303, "logits/rejected": -0.2989640235900879, "logps/chosen": -0.0003351265622768551, "logps/rejected": -2.3543386459350586, "loss": 0.562, "nll_loss": 0.14049817621707916, "rewards/accuracies": 1.0, "rewards/chosen": -3.3512653317302465e-05, "rewards/margins": 0.23540034890174866, "rewards/rejected": -0.23543386161327362, "step": 7875 }, { "epoch": 5.446749654218534, "grad_norm": 8.28689956665039, "learning_rate": 2.529583525434148e-05, "log_odds_chosen": 9.111331939697266, "log_odds_ratio": -0.0001572294277139008, "logits/chosen": -0.2043474316596985, "logits/rejected": -0.28499382734298706, "logps/chosen": -0.0006082933978177607, "logps/rejected": -1.5229251384735107, "loss": 1.0661, "nll_loss": 0.26651203632354736, "rewards/accuracies": 1.0, "rewards/chosen": -6.0829341236967593e-05, "rewards/margins": 0.15223167836666107, "rewards/rejected": -0.15229250490665436, "step": 7876 }, { "epoch": 5.447441217150761, "grad_norm": 6.75670862197876, "learning_rate": 2.529199323805133e-05, "log_odds_chosen": 10.351332664489746, "log_odds_ratio": -0.0002016138460021466, "logits/chosen": -0.27136003971099854, "logits/rejected": -0.29065850377082825, "logps/chosen": -0.0005154769751243293, "logps/rejected": -2.0906341075897217, "loss": 0.6432, "nll_loss": 0.16078633069992065, "rewards/accuracies": 1.0, "rewards/chosen": -5.154770042281598e-05, "rewards/margins": 0.20901186764240265, "rewards/rejected": -0.20906341075897217, "step": 7877 }, { "epoch": 5.448132780082988, "grad_norm": 8.170028686523438, "learning_rate": 2.528815122176118e-05, "log_odds_chosen": 10.666536331176758, "log_odds_ratio": -3.752233897102997e-05, "logits/chosen": -0.25186508893966675, "logits/rejected": -0.37342870235443115, "logps/chosen": -0.0002312668802915141, "logps/rejected": -2.112480640411377, "loss": 0.7806, "nll_loss": 0.1951507329940796, "rewards/accuracies": 1.0, "rewards/chosen": -2.312668766535353e-05, "rewards/margins": 0.21122492849826813, "rewards/rejected": -0.21124804019927979, "step": 7878 }, { "epoch": 5.448824343015215, "grad_norm": 12.511398315429688, "learning_rate": 2.5284309205471036e-05, "log_odds_chosen": 10.229702949523926, "log_odds_ratio": -0.00016912652063183486, "logits/chosen": -0.4138595461845398, "logits/rejected": -0.3729810416698456, "logps/chosen": -0.0001521167578175664, "logps/rejected": -1.7475824356079102, "loss": 0.792, "nll_loss": 0.1979808211326599, "rewards/accuracies": 1.0, "rewards/chosen": -1.521167632745346e-05, "rewards/margins": 0.17474302649497986, "rewards/rejected": -0.17475822567939758, "step": 7879 }, { "epoch": 5.449515905947441, "grad_norm": 7.0651116371154785, "learning_rate": 2.5280467189180885e-05, "log_odds_chosen": 10.682268142700195, "log_odds_ratio": -0.017535412684082985, "logits/chosen": -0.3367564082145691, "logits/rejected": -0.3902648985385895, "logps/chosen": -0.006567737087607384, "logps/rejected": -2.294995069503784, "loss": 0.7196, "nll_loss": 0.17814846336841583, "rewards/accuracies": 1.0, "rewards/chosen": -0.0006567737436853349, "rewards/margins": 0.22884273529052734, "rewards/rejected": -0.22949950397014618, "step": 7880 }, { "epoch": 5.450207468879668, "grad_norm": 6.419574737548828, "learning_rate": 2.5276625172890734e-05, "log_odds_chosen": 10.409414291381836, "log_odds_ratio": -0.00017506652511656284, "logits/chosen": -0.5267450213432312, "logits/rejected": -0.5546387434005737, "logps/chosen": -0.0007220894913189113, "logps/rejected": -2.721928596496582, "loss": 0.7508, "nll_loss": 0.18768687546253204, "rewards/accuracies": 1.0, "rewards/chosen": -7.220894622150809e-05, "rewards/margins": 0.2721206843852997, "rewards/rejected": -0.2721928656101227, "step": 7881 }, { "epoch": 5.450899031811895, "grad_norm": 5.396580219268799, "learning_rate": 2.527278315660059e-05, "log_odds_chosen": 10.267948150634766, "log_odds_ratio": -0.0004927387926727533, "logits/chosen": -0.31401973962783813, "logits/rejected": -0.41773149371147156, "logps/chosen": -0.0007646388257853687, "logps/rejected": -2.140446424484253, "loss": 0.9892, "nll_loss": 0.24724820256233215, "rewards/accuracies": 1.0, "rewards/chosen": -7.646388257853687e-05, "rewards/margins": 0.2139681726694107, "rewards/rejected": -0.21404464542865753, "step": 7882 }, { "epoch": 5.451590594744122, "grad_norm": 7.157034873962402, "learning_rate": 2.526894114031044e-05, "log_odds_chosen": 10.70358657836914, "log_odds_ratio": -0.00021359114907681942, "logits/chosen": -0.3688841760158539, "logits/rejected": -0.40278881788253784, "logps/chosen": -0.00044239018461667, "logps/rejected": -2.261518716812134, "loss": 0.7828, "nll_loss": 0.19567003846168518, "rewards/accuracies": 1.0, "rewards/chosen": -4.423902282724157e-05, "rewards/margins": 0.2261076420545578, "rewards/rejected": -0.22615188360214233, "step": 7883 }, { "epoch": 5.452282157676349, "grad_norm": 7.7425432205200195, "learning_rate": 2.5265099124020288e-05, "log_odds_chosen": 10.416945457458496, "log_odds_ratio": -0.00010456659219926223, "logits/chosen": -0.07597717642784119, "logits/rejected": -0.18601390719413757, "logps/chosen": -0.0006473706453107297, "logps/rejected": -2.18353271484375, "loss": 0.4489, "nll_loss": 0.11221115291118622, "rewards/accuracies": 1.0, "rewards/chosen": -6.473706162068993e-05, "rewards/margins": 0.21828854084014893, "rewards/rejected": -0.2183532863855362, "step": 7884 }, { "epoch": 5.4529737206085755, "grad_norm": 6.269292831420898, "learning_rate": 2.526125710773014e-05, "log_odds_chosen": 10.223175048828125, "log_odds_ratio": -0.0005372378509491682, "logits/chosen": -0.2822244167327881, "logits/rejected": -0.3845008313655853, "logps/chosen": -0.0014499751850962639, "logps/rejected": -2.3291826248168945, "loss": 0.615, "nll_loss": 0.15368527173995972, "rewards/accuracies": 1.0, "rewards/chosen": -0.00014499750977847725, "rewards/margins": 0.23277327418327332, "rewards/rejected": -0.23291826248168945, "step": 7885 }, { "epoch": 5.453665283540802, "grad_norm": 7.0991387367248535, "learning_rate": 2.525741509143999e-05, "log_odds_chosen": 10.426545143127441, "log_odds_ratio": -0.00014646339695900679, "logits/chosen": -0.6692524552345276, "logits/rejected": -0.6565045118331909, "logps/chosen": -0.000264812697423622, "logps/rejected": -2.0045952796936035, "loss": 0.8776, "nll_loss": 0.21939438581466675, "rewards/accuracies": 1.0, "rewards/chosen": -2.64812697423622e-05, "rewards/margins": 0.20043303072452545, "rewards/rejected": -0.20045951008796692, "step": 7886 }, { "epoch": 5.454356846473029, "grad_norm": 9.188990592956543, "learning_rate": 2.5253573075149838e-05, "log_odds_chosen": 11.181268692016602, "log_odds_ratio": -0.0002525774762034416, "logits/chosen": -0.7016425132751465, "logits/rejected": -0.7341064214706421, "logps/chosen": -0.0011649903608486056, "logps/rejected": -2.7058682441711426, "loss": 0.6038, "nll_loss": 0.15092355012893677, "rewards/accuracies": 1.0, "rewards/chosen": -0.00011649904627120122, "rewards/margins": 0.2704703211784363, "rewards/rejected": -0.27058684825897217, "step": 7887 }, { "epoch": 5.455048409405256, "grad_norm": 6.652552604675293, "learning_rate": 2.5249731058859687e-05, "log_odds_chosen": 10.284914016723633, "log_odds_ratio": -0.00010503552039153874, "logits/chosen": -0.14076370000839233, "logits/rejected": -0.18022692203521729, "logps/chosen": -0.0003460772568359971, "logps/rejected": -2.018402576446533, "loss": 0.697, "nll_loss": 0.17424136400222778, "rewards/accuracies": 1.0, "rewards/chosen": -3.4607728593982756e-05, "rewards/margins": 0.20180565118789673, "rewards/rejected": -0.20184025168418884, "step": 7888 }, { "epoch": 5.455739972337483, "grad_norm": 5.636088848114014, "learning_rate": 2.5245889042569543e-05, "log_odds_chosen": 10.133207321166992, "log_odds_ratio": -0.00039851610199548304, "logits/chosen": -0.4633323550224304, "logits/rejected": -0.46017763018608093, "logps/chosen": -0.0005366819095797837, "logps/rejected": -2.1516566276550293, "loss": 1.1516, "nll_loss": 0.2878515124320984, "rewards/accuracies": 1.0, "rewards/chosen": -5.366818731999956e-05, "rewards/margins": 0.21511197090148926, "rewards/rejected": -0.21516567468643188, "step": 7889 }, { "epoch": 5.45643153526971, "grad_norm": 5.4103498458862305, "learning_rate": 2.5242047026279392e-05, "log_odds_chosen": 10.09362506866455, "log_odds_ratio": -0.00035184432636015117, "logits/chosen": -0.4552658200263977, "logits/rejected": -0.5115972757339478, "logps/chosen": -0.0001713481906335801, "logps/rejected": -1.8507417440414429, "loss": 1.1351, "nll_loss": 0.28374433517456055, "rewards/accuracies": 1.0, "rewards/chosen": -1.7134818335762247e-05, "rewards/margins": 0.18505704402923584, "rewards/rejected": -0.18507418036460876, "step": 7890 }, { "epoch": 5.4571230982019365, "grad_norm": 14.919554710388184, "learning_rate": 2.523820500998924e-05, "log_odds_chosen": 10.883527755737305, "log_odds_ratio": -0.0007609869935549796, "logits/chosen": -0.5898886919021606, "logits/rejected": -0.566375195980072, "logps/chosen": -0.005747266113758087, "logps/rejected": -2.3063547611236572, "loss": 0.9042, "nll_loss": 0.22596535086631775, "rewards/accuracies": 1.0, "rewards/chosen": -0.0005747266695834696, "rewards/margins": 0.23006075620651245, "rewards/rejected": -0.23063546419143677, "step": 7891 }, { "epoch": 5.457814661134163, "grad_norm": 10.642051696777344, "learning_rate": 2.5234362993699097e-05, "log_odds_chosen": 9.415760040283203, "log_odds_ratio": -0.000675417308229953, "logits/chosen": -0.38749903440475464, "logits/rejected": -0.5239227414131165, "logps/chosen": -0.0054013486951589584, "logps/rejected": -1.73267662525177, "loss": 1.5004, "nll_loss": 0.37502381205558777, "rewards/accuracies": 1.0, "rewards/chosen": -0.0005401347880251706, "rewards/margins": 0.1727275252342224, "rewards/rejected": -0.1732676774263382, "step": 7892 }, { "epoch": 5.45850622406639, "grad_norm": 17.498640060424805, "learning_rate": 2.5230520977408946e-05, "log_odds_chosen": 10.213619232177734, "log_odds_ratio": -7.920338248368353e-05, "logits/chosen": -0.36259281635284424, "logits/rejected": -0.41285136342048645, "logps/chosen": -0.0004386402724776417, "logps/rejected": -1.7193783521652222, "loss": 1.2039, "nll_loss": 0.3009600341320038, "rewards/accuracies": 1.0, "rewards/chosen": -4.386402724776417e-05, "rewards/margins": 0.17189398407936096, "rewards/rejected": -0.17193783819675446, "step": 7893 }, { "epoch": 5.459197786998617, "grad_norm": 8.615107536315918, "learning_rate": 2.5226678961118795e-05, "log_odds_chosen": 9.743375778198242, "log_odds_ratio": -0.00016872762353159487, "logits/chosen": -0.6855210065841675, "logits/rejected": -0.7205241322517395, "logps/chosen": -0.0006472010281868279, "logps/rejected": -2.015528440475464, "loss": 0.8585, "nll_loss": 0.214613139629364, "rewards/accuracies": 1.0, "rewards/chosen": -6.472010136349127e-05, "rewards/margins": 0.20148813724517822, "rewards/rejected": -0.2015528529882431, "step": 7894 }, { "epoch": 5.459889349930844, "grad_norm": 8.549186706542969, "learning_rate": 2.5222836944828648e-05, "log_odds_chosen": 11.554786682128906, "log_odds_ratio": -2.5646073481766507e-05, "logits/chosen": -0.9103915691375732, "logits/rejected": -0.9481276273727417, "logps/chosen": -0.00018213970179203898, "logps/rejected": -2.588318347930908, "loss": 1.7569, "nll_loss": 0.43922534584999084, "rewards/accuracies": 1.0, "rewards/chosen": -1.82139719981933e-05, "rewards/margins": 0.25881361961364746, "rewards/rejected": -0.25883182883262634, "step": 7895 }, { "epoch": 5.460580912863071, "grad_norm": 11.719233512878418, "learning_rate": 2.5218994928538497e-05, "log_odds_chosen": 10.113941192626953, "log_odds_ratio": -0.00013782066525891423, "logits/chosen": -0.4043325185775757, "logits/rejected": -0.47121208906173706, "logps/chosen": -0.0033909042831510305, "logps/rejected": -2.3903987407684326, "loss": 0.6943, "nll_loss": 0.17356136441230774, "rewards/accuracies": 1.0, "rewards/chosen": -0.00033909041667357087, "rewards/margins": 0.2387007772922516, "rewards/rejected": -0.23903986811637878, "step": 7896 }, { "epoch": 5.4612724757952975, "grad_norm": 8.379502296447754, "learning_rate": 2.5215152912248346e-05, "log_odds_chosen": 9.217211723327637, "log_odds_ratio": -0.0010226225713267922, "logits/chosen": -0.4326026439666748, "logits/rejected": -0.5274725556373596, "logps/chosen": -0.004476041067391634, "logps/rejected": -2.342529535293579, "loss": 0.9241, "nll_loss": 0.2309216856956482, "rewards/accuracies": 1.0, "rewards/chosen": -0.0004476041067391634, "rewards/margins": 0.2338053584098816, "rewards/rejected": -0.2342529594898224, "step": 7897 }, { "epoch": 5.461964038727524, "grad_norm": 7.358403205871582, "learning_rate": 2.52113108959582e-05, "log_odds_chosen": 11.019567489624023, "log_odds_ratio": -2.6093295673490502e-05, "logits/chosen": -0.5179281830787659, "logits/rejected": -0.6355370283126831, "logps/chosen": -0.00020142251742072403, "logps/rejected": -2.1744496822357178, "loss": 0.6061, "nll_loss": 0.15153148770332336, "rewards/accuracies": 1.0, "rewards/chosen": -2.0142253561061807e-05, "rewards/margins": 0.21742482483386993, "rewards/rejected": -0.21744495630264282, "step": 7898 }, { "epoch": 5.462655601659751, "grad_norm": 15.733071327209473, "learning_rate": 2.520746887966805e-05, "log_odds_chosen": 10.189199447631836, "log_odds_ratio": -0.0006109835230745375, "logits/chosen": -0.6819102764129639, "logits/rejected": -0.7697743773460388, "logps/chosen": -0.0003226564731448889, "logps/rejected": -2.198263168334961, "loss": 1.0409, "nll_loss": 0.260160356760025, "rewards/accuracies": 1.0, "rewards/chosen": -3.226565240765922e-05, "rewards/margins": 0.21979409456253052, "rewards/rejected": -0.219826340675354, "step": 7899 }, { "epoch": 5.463347164591978, "grad_norm": 13.659838676452637, "learning_rate": 2.52036268633779e-05, "log_odds_chosen": 10.806734085083008, "log_odds_ratio": -0.00038712259265594184, "logits/chosen": -0.5743763446807861, "logits/rejected": -0.7685285806655884, "logps/chosen": -0.0010596952633932233, "logps/rejected": -2.2519259452819824, "loss": 0.7574, "nll_loss": 0.18931885063648224, "rewards/accuracies": 1.0, "rewards/chosen": -0.00010596953507047147, "rewards/margins": 0.22508664429187775, "rewards/rejected": -0.2251926213502884, "step": 7900 }, { "epoch": 5.464038727524205, "grad_norm": 9.067700386047363, "learning_rate": 2.5199784847087755e-05, "log_odds_chosen": 10.845809936523438, "log_odds_ratio": -0.0001876501482911408, "logits/chosen": -0.37300464510917664, "logits/rejected": -0.43012484908103943, "logps/chosen": -0.0001886295503936708, "logps/rejected": -2.392216205596924, "loss": 0.8364, "nll_loss": 0.20909173786640167, "rewards/accuracies": 1.0, "rewards/chosen": -1.8862956494558603e-05, "rewards/margins": 0.23920276761054993, "rewards/rejected": -0.23922163248062134, "step": 7901 }, { "epoch": 5.464730290456432, "grad_norm": 11.004956245422363, "learning_rate": 2.5195942830797605e-05, "log_odds_chosen": 9.733002662658691, "log_odds_ratio": -0.0004925908870063722, "logits/chosen": -0.7571265697479248, "logits/rejected": -0.8239815831184387, "logps/chosen": -0.00042578810825943947, "logps/rejected": -1.9153045415878296, "loss": 0.7991, "nll_loss": 0.19973230361938477, "rewards/accuracies": 1.0, "rewards/chosen": -4.257881300873123e-05, "rewards/margins": 0.1914878785610199, "rewards/rejected": -0.19153046607971191, "step": 7902 }, { "epoch": 5.4654218533886585, "grad_norm": 9.545635223388672, "learning_rate": 2.5192100814507454e-05, "log_odds_chosen": 9.390292167663574, "log_odds_ratio": -0.03421199694275856, "logits/chosen": 0.020793870091438293, "logits/rejected": -0.06223713606595993, "logps/chosen": -0.007960923947393894, "logps/rejected": -1.421905279159546, "loss": 0.7789, "nll_loss": 0.19130827486515045, "rewards/accuracies": 1.0, "rewards/chosen": -0.00079609255772084, "rewards/margins": 0.14139443635940552, "rewards/rejected": -0.14219053089618683, "step": 7903 }, { "epoch": 5.466113416320885, "grad_norm": 10.32170295715332, "learning_rate": 2.5188258798217306e-05, "log_odds_chosen": 9.949746131896973, "log_odds_ratio": -0.25190046429634094, "logits/chosen": -0.5395863652229309, "logits/rejected": -0.49859222769737244, "logps/chosen": -0.04036932438611984, "logps/rejected": -3.2632665634155273, "loss": 1.3503, "nll_loss": 0.31238770484924316, "rewards/accuracies": 0.875, "rewards/chosen": -0.004036932718008757, "rewards/margins": 0.32228973507881165, "rewards/rejected": -0.3263266682624817, "step": 7904 }, { "epoch": 5.466804979253112, "grad_norm": 10.649049758911133, "learning_rate": 2.5184416781927155e-05, "log_odds_chosen": 9.888246536254883, "log_odds_ratio": -0.00021169688261579722, "logits/chosen": -0.7000361084938049, "logits/rejected": -0.668064534664154, "logps/chosen": -0.0006827355246059597, "logps/rejected": -1.8547999858856201, "loss": 0.9118, "nll_loss": 0.22792655229568481, "rewards/accuracies": 1.0, "rewards/chosen": -6.827355537097901e-05, "rewards/margins": 0.1854117214679718, "rewards/rejected": -0.185479998588562, "step": 7905 }, { "epoch": 5.467496542185339, "grad_norm": 6.560869216918945, "learning_rate": 2.5180574765637004e-05, "log_odds_chosen": 11.247020721435547, "log_odds_ratio": -6.718430813634768e-05, "logits/chosen": -0.39155805110931396, "logits/rejected": -0.431242436170578, "logps/chosen": -0.0002432766486890614, "logps/rejected": -2.5565779209136963, "loss": 0.6568, "nll_loss": 0.16420210897922516, "rewards/accuracies": 1.0, "rewards/chosen": -2.432766450510826e-05, "rewards/margins": 0.25563347339630127, "rewards/rejected": -0.25565779209136963, "step": 7906 }, { "epoch": 5.468188105117566, "grad_norm": 49.997371673583984, "learning_rate": 2.517673274934686e-05, "log_odds_chosen": 9.113655090332031, "log_odds_ratio": -0.06486863642930984, "logits/chosen": -0.48760855197906494, "logits/rejected": -0.5602211952209473, "logps/chosen": -0.01464058831334114, "logps/rejected": -1.5333609580993652, "loss": 0.653, "nll_loss": 0.15675517916679382, "rewards/accuracies": 1.0, "rewards/chosen": -0.0014640586450695992, "rewards/margins": 0.15187203884124756, "rewards/rejected": -0.15333609282970428, "step": 7907 }, { "epoch": 5.468879668049793, "grad_norm": 7.984647274017334, "learning_rate": 2.517289073305671e-05, "log_odds_chosen": 11.545648574829102, "log_odds_ratio": -1.5041207916510757e-05, "logits/chosen": -0.33951878547668457, "logits/rejected": -0.40240412950515747, "logps/chosen": -0.00020837641204707325, "logps/rejected": -2.9692435264587402, "loss": 0.5283, "nll_loss": 0.13208217918872833, "rewards/accuracies": 1.0, "rewards/chosen": -2.0837640477111563e-05, "rewards/margins": 0.29690349102020264, "rewards/rejected": -0.296924352645874, "step": 7908 }, { "epoch": 5.4695712309820195, "grad_norm": 7.5629425048828125, "learning_rate": 2.5169048716766558e-05, "log_odds_chosen": 8.79336166381836, "log_odds_ratio": -0.0004576348583213985, "logits/chosen": -0.48704391717910767, "logits/rejected": -0.5239378809928894, "logps/chosen": -0.0007803332409821451, "logps/rejected": -1.5541610717773438, "loss": 0.627, "nll_loss": 0.15671171247959137, "rewards/accuracies": 1.0, "rewards/chosen": -7.803332846378908e-05, "rewards/margins": 0.1553380787372589, "rewards/rejected": -0.1554161012172699, "step": 7909 }, { "epoch": 5.470262793914246, "grad_norm": 6.970637321472168, "learning_rate": 2.5165206700476414e-05, "log_odds_chosen": 9.41334342956543, "log_odds_ratio": -0.0017763872165232897, "logits/chosen": -0.6047480702400208, "logits/rejected": -0.6281265616416931, "logps/chosen": -0.008626021444797516, "logps/rejected": -1.9706454277038574, "loss": 1.2315, "nll_loss": 0.30770647525787354, "rewards/accuracies": 1.0, "rewards/chosen": -0.0008626022026874125, "rewards/margins": 0.19620195031166077, "rewards/rejected": -0.19706454873085022, "step": 7910 }, { "epoch": 5.470954356846473, "grad_norm": 9.45356273651123, "learning_rate": 2.5161364684186263e-05, "log_odds_chosen": 9.752571105957031, "log_odds_ratio": -0.00014655350241810083, "logits/chosen": -0.5641658306121826, "logits/rejected": -0.6220308542251587, "logps/chosen": -0.0004932170268148184, "logps/rejected": -1.6327717304229736, "loss": 0.6847, "nll_loss": 0.17116187512874603, "rewards/accuracies": 1.0, "rewards/chosen": -4.9321701226290315e-05, "rewards/margins": 0.1632278561592102, "rewards/rejected": -0.16327717900276184, "step": 7911 }, { "epoch": 5.4716459197787, "grad_norm": 6.888040065765381, "learning_rate": 2.5157522667896112e-05, "log_odds_chosen": 11.008320808410645, "log_odds_ratio": -2.1486168407136574e-05, "logits/chosen": -0.8454128503799438, "logits/rejected": -0.849215030670166, "logps/chosen": -0.00010806650971062481, "logps/rejected": -1.9455052614212036, "loss": 0.515, "nll_loss": 0.12873561680316925, "rewards/accuracies": 1.0, "rewards/chosen": -1.08066506072646e-05, "rewards/margins": 0.19453972578048706, "rewards/rejected": -0.1945505440235138, "step": 7912 }, { "epoch": 5.472337482710927, "grad_norm": 7.035167217254639, "learning_rate": 2.5153680651605964e-05, "log_odds_chosen": 9.783241271972656, "log_odds_ratio": -0.0006064789486117661, "logits/chosen": -0.18023613095283508, "logits/rejected": -0.41178882122039795, "logps/chosen": -0.0021076889242976904, "logps/rejected": -2.3890411853790283, "loss": 1.0119, "nll_loss": 0.25290894508361816, "rewards/accuracies": 1.0, "rewards/chosen": -0.00021076889242976904, "rewards/margins": 0.23869335651397705, "rewards/rejected": -0.23890413343906403, "step": 7913 }, { "epoch": 5.473029045643154, "grad_norm": 5.894155025482178, "learning_rate": 2.5149838635315814e-05, "log_odds_chosen": 9.392170906066895, "log_odds_ratio": -0.00023966014850884676, "logits/chosen": -0.40104061365127563, "logits/rejected": -0.29543566703796387, "logps/chosen": -0.0006486907368525863, "logps/rejected": -1.4715025424957275, "loss": 0.4484, "nll_loss": 0.11208631843328476, "rewards/accuracies": 1.0, "rewards/chosen": -6.486906931968406e-05, "rewards/margins": 0.14708539843559265, "rewards/rejected": -0.14715026319026947, "step": 7914 }, { "epoch": 5.4737206085753805, "grad_norm": 8.190710067749023, "learning_rate": 2.5145996619025663e-05, "log_odds_chosen": 10.513279914855957, "log_odds_ratio": -8.20760615170002e-05, "logits/chosen": -0.7377473711967468, "logits/rejected": -0.8403040170669556, "logps/chosen": -0.00011504338181111962, "logps/rejected": -1.5232871770858765, "loss": 0.8404, "nll_loss": 0.21009854972362518, "rewards/accuracies": 1.0, "rewards/chosen": -1.150433763541514e-05, "rewards/margins": 0.15231722593307495, "rewards/rejected": -0.1523287296295166, "step": 7915 }, { "epoch": 5.474412171507607, "grad_norm": 5.56616735458374, "learning_rate": 2.514215460273552e-05, "log_odds_chosen": 10.981778144836426, "log_odds_ratio": -6.831878272350878e-05, "logits/chosen": -0.657960057258606, "logits/rejected": -0.801915168762207, "logps/chosen": -0.0005254566203802824, "logps/rejected": -2.9949893951416016, "loss": 0.7411, "nll_loss": 0.18527188897132874, "rewards/accuracies": 1.0, "rewards/chosen": -5.2545659855240956e-05, "rewards/margins": 0.2994464039802551, "rewards/rejected": -0.29949894547462463, "step": 7916 }, { "epoch": 5.475103734439834, "grad_norm": 6.962458610534668, "learning_rate": 2.5138312586445367e-05, "log_odds_chosen": 10.380294799804688, "log_odds_ratio": -0.0003663065726868808, "logits/chosen": -0.727442741394043, "logits/rejected": -0.8181726932525635, "logps/chosen": -0.0008164530154317617, "logps/rejected": -2.5967469215393066, "loss": 0.8706, "nll_loss": 0.21762053668498993, "rewards/accuracies": 1.0, "rewards/chosen": -8.164531027432531e-05, "rewards/margins": 0.25959306955337524, "rewards/rejected": -0.25967469811439514, "step": 7917 }, { "epoch": 5.475795297372061, "grad_norm": 5.841886043548584, "learning_rate": 2.5134470570155217e-05, "log_odds_chosen": 11.438337326049805, "log_odds_ratio": -1.5561568943667226e-05, "logits/chosen": -0.18600064516067505, "logits/rejected": -0.32965904474258423, "logps/chosen": -0.00011055743379984051, "logps/rejected": -2.105381727218628, "loss": 0.6327, "nll_loss": 0.15817975997924805, "rewards/accuracies": 1.0, "rewards/chosen": -1.1055743925680872e-05, "rewards/margins": 0.21052710711956024, "rewards/rejected": -0.21053817868232727, "step": 7918 }, { "epoch": 5.476486860304288, "grad_norm": 5.203618049621582, "learning_rate": 2.5130628553865072e-05, "log_odds_chosen": 9.663179397583008, "log_odds_ratio": -0.000736804970074445, "logits/chosen": -0.05351455509662628, "logits/rejected": -0.050800621509552, "logps/chosen": -0.0022640221286565065, "logps/rejected": -2.2554354667663574, "loss": 0.5911, "nll_loss": 0.1477089375257492, "rewards/accuracies": 1.0, "rewards/chosen": -0.00022640220413450152, "rewards/margins": 0.225317120552063, "rewards/rejected": -0.2255435287952423, "step": 7919 }, { "epoch": 5.477178423236515, "grad_norm": 8.263466835021973, "learning_rate": 2.512678653757492e-05, "log_odds_chosen": 10.013599395751953, "log_odds_ratio": -0.00011523398279678077, "logits/chosen": -0.7380114197731018, "logits/rejected": -0.6945633292198181, "logps/chosen": -0.0003955452120862901, "logps/rejected": -1.6594562530517578, "loss": 0.8181, "nll_loss": 0.20452088117599487, "rewards/accuracies": 1.0, "rewards/chosen": -3.955452120862901e-05, "rewards/margins": 0.16590608656406403, "rewards/rejected": -0.1659456342458725, "step": 7920 }, { "epoch": 5.477869986168741, "grad_norm": 5.862675189971924, "learning_rate": 2.512294452128477e-05, "log_odds_chosen": 10.199264526367188, "log_odds_ratio": -0.00010353358811698854, "logits/chosen": -0.6615251302719116, "logits/rejected": -0.6607353091239929, "logps/chosen": -0.0004281606525182724, "logps/rejected": -1.998523473739624, "loss": 1.0327, "nll_loss": 0.2581551671028137, "rewards/accuracies": 1.0, "rewards/chosen": -4.281606015865691e-05, "rewards/margins": 0.19980952143669128, "rewards/rejected": -0.1998523622751236, "step": 7921 }, { "epoch": 5.478561549100968, "grad_norm": 9.82187557220459, "learning_rate": 2.5119102504994623e-05, "log_odds_chosen": 10.85956859588623, "log_odds_ratio": -2.9763910788460635e-05, "logits/chosen": -0.6906986832618713, "logits/rejected": -0.7038851380348206, "logps/chosen": -0.0005827648565173149, "logps/rejected": -2.5121445655822754, "loss": 0.7615, "nll_loss": 0.19037176668643951, "rewards/accuracies": 1.0, "rewards/chosen": -5.827648419653997e-05, "rewards/margins": 0.25115618109703064, "rewards/rejected": -0.2512144446372986, "step": 7922 }, { "epoch": 5.479253112033195, "grad_norm": 7.418650150299072, "learning_rate": 2.5115260488704472e-05, "log_odds_chosen": 10.056888580322266, "log_odds_ratio": -0.00015551802061963826, "logits/chosen": -0.8092662692070007, "logits/rejected": -0.8078435659408569, "logps/chosen": -0.0003808286564890295, "logps/rejected": -2.0566279888153076, "loss": 0.7577, "nll_loss": 0.18939712643623352, "rewards/accuracies": 1.0, "rewards/chosen": -3.808286419371143e-05, "rewards/margins": 0.20562469959259033, "rewards/rejected": -0.205662801861763, "step": 7923 }, { "epoch": 5.479944674965422, "grad_norm": 6.861764907836914, "learning_rate": 2.511141847241432e-05, "log_odds_chosen": 10.926447868347168, "log_odds_ratio": -9.362271521240473e-05, "logits/chosen": -0.466507226228714, "logits/rejected": -0.4524589776992798, "logps/chosen": -0.0002040699328063056, "logps/rejected": -2.489025831222534, "loss": 0.8015, "nll_loss": 0.20037364959716797, "rewards/accuracies": 1.0, "rewards/chosen": -2.0406996554811485e-05, "rewards/margins": 0.2488822042942047, "rewards/rejected": -0.2489026039838791, "step": 7924 }, { "epoch": 5.480636237897649, "grad_norm": 10.226140975952148, "learning_rate": 2.5107576456124177e-05, "log_odds_chosen": 10.256708145141602, "log_odds_ratio": -0.00020014113397337496, "logits/chosen": -0.9860405325889587, "logits/rejected": -0.9984095096588135, "logps/chosen": -0.0006605676026083529, "logps/rejected": -1.8433809280395508, "loss": 0.8948, "nll_loss": 0.22368690371513367, "rewards/accuracies": 1.0, "rewards/chosen": -6.605676026083529e-05, "rewards/margins": 0.18427203595638275, "rewards/rejected": -0.18433809280395508, "step": 7925 }, { "epoch": 5.481327800829876, "grad_norm": 8.794382095336914, "learning_rate": 2.5103734439834026e-05, "log_odds_chosen": 11.23933219909668, "log_odds_ratio": -2.205545206379611e-05, "logits/chosen": -0.6451046466827393, "logits/rejected": -0.7187870740890503, "logps/chosen": -0.0001990120654227212, "logps/rejected": -2.49930477142334, "loss": 0.8034, "nll_loss": 0.20085430145263672, "rewards/accuracies": 1.0, "rewards/chosen": -1.990120654227212e-05, "rewards/margins": 0.24991059303283691, "rewards/rejected": -0.2499305009841919, "step": 7926 }, { "epoch": 5.482019363762102, "grad_norm": 9.740113258361816, "learning_rate": 2.5099892423543875e-05, "log_odds_chosen": 10.139154434204102, "log_odds_ratio": -0.0001262970909010619, "logits/chosen": -0.7651572227478027, "logits/rejected": -0.8166838884353638, "logps/chosen": -0.0006389489863067865, "logps/rejected": -1.9527884721755981, "loss": 0.8119, "nll_loss": 0.2029644101858139, "rewards/accuracies": 1.0, "rewards/chosen": -6.389489863067865e-05, "rewards/margins": 0.19521497189998627, "rewards/rejected": -0.1952788531780243, "step": 7927 }, { "epoch": 5.482710926694329, "grad_norm": 15.382346153259277, "learning_rate": 2.509605040725373e-05, "log_odds_chosen": 10.098079681396484, "log_odds_ratio": -8.8360087829642e-05, "logits/chosen": -0.3574230372905731, "logits/rejected": -0.33704087138175964, "logps/chosen": -0.0006936791469343007, "logps/rejected": -2.5140581130981445, "loss": 1.2812, "nll_loss": 0.3202976882457733, "rewards/accuracies": 1.0, "rewards/chosen": -6.93679103278555e-05, "rewards/margins": 0.2513364553451538, "rewards/rejected": -0.25140583515167236, "step": 7928 }, { "epoch": 5.483402489626556, "grad_norm": 9.151076316833496, "learning_rate": 2.509220839096358e-05, "log_odds_chosen": 10.143035888671875, "log_odds_ratio": -0.00013026421947870404, "logits/chosen": -0.2792324125766754, "logits/rejected": -0.39061158895492554, "logps/chosen": -0.000424693338572979, "logps/rejected": -1.8352653980255127, "loss": 1.1901, "nll_loss": 0.2975192666053772, "rewards/accuracies": 1.0, "rewards/chosen": -4.246933531248942e-05, "rewards/margins": 0.18348407745361328, "rewards/rejected": -0.18352654576301575, "step": 7929 }, { "epoch": 5.484094052558783, "grad_norm": 15.15778636932373, "learning_rate": 2.508836637467343e-05, "log_odds_chosen": 11.682640075683594, "log_odds_ratio": -1.2657160368689802e-05, "logits/chosen": -0.23138004541397095, "logits/rejected": -0.32260462641716003, "logps/chosen": -0.00038684680475853384, "logps/rejected": -3.3264083862304688, "loss": 0.6705, "nll_loss": 0.16762800514698029, "rewards/accuracies": 1.0, "rewards/chosen": -3.868468411383219e-05, "rewards/margins": 0.3326021432876587, "rewards/rejected": -0.3326408565044403, "step": 7930 }, { "epoch": 5.48478561549101, "grad_norm": 7.947671413421631, "learning_rate": 2.508452435838328e-05, "log_odds_chosen": 9.699936866760254, "log_odds_ratio": -0.0006529848906211555, "logits/chosen": -0.5883873701095581, "logits/rejected": -0.7178964614868164, "logps/chosen": -0.001084399176761508, "logps/rejected": -1.9630234241485596, "loss": 0.9102, "nll_loss": 0.22749494016170502, "rewards/accuracies": 1.0, "rewards/chosen": -0.00010843992640729994, "rewards/margins": 0.1961939036846161, "rewards/rejected": -0.1963023543357849, "step": 7931 }, { "epoch": 5.485477178423237, "grad_norm": 6.651211261749268, "learning_rate": 2.508068234209313e-05, "log_odds_chosen": 10.307214736938477, "log_odds_ratio": -0.0004254367668181658, "logits/chosen": -0.8200116157531738, "logits/rejected": -0.7713389992713928, "logps/chosen": -0.00029836269095540047, "logps/rejected": -1.8111873865127563, "loss": 0.6126, "nll_loss": 0.1531156450510025, "rewards/accuracies": 1.0, "rewards/chosen": -2.983627018693369e-05, "rewards/margins": 0.18108892440795898, "rewards/rejected": -0.18111875653266907, "step": 7932 }, { "epoch": 5.486168741355463, "grad_norm": 5.8201704025268555, "learning_rate": 2.507684032580298e-05, "log_odds_chosen": 10.988911628723145, "log_odds_ratio": -0.0004914018791168928, "logits/chosen": -0.3212338387966156, "logits/rejected": -0.4956795573234558, "logps/chosen": -0.0008269093232229352, "logps/rejected": -2.6963746547698975, "loss": 0.7461, "nll_loss": 0.1864856332540512, "rewards/accuracies": 1.0, "rewards/chosen": -8.269093814305961e-05, "rewards/margins": 0.2695547938346863, "rewards/rejected": -0.26963746547698975, "step": 7933 }, { "epoch": 5.48686030428769, "grad_norm": 7.1769938468933105, "learning_rate": 2.5072998309512835e-05, "log_odds_chosen": 11.084684371948242, "log_odds_ratio": -2.3583517759107053e-05, "logits/chosen": -0.574668824672699, "logits/rejected": -0.7622121572494507, "logps/chosen": -0.0001388926466461271, "logps/rejected": -2.050570249557495, "loss": 0.5676, "nll_loss": 0.1418919414281845, "rewards/accuracies": 1.0, "rewards/chosen": -1.388926466461271e-05, "rewards/margins": 0.20504313707351685, "rewards/rejected": -0.2050570249557495, "step": 7934 }, { "epoch": 5.487551867219917, "grad_norm": 9.00568675994873, "learning_rate": 2.5069156293222684e-05, "log_odds_chosen": 10.233102798461914, "log_odds_ratio": -8.496645023114979e-05, "logits/chosen": -0.5563417077064514, "logits/rejected": -0.588212788105011, "logps/chosen": -0.0002815087791532278, "logps/rejected": -2.1203181743621826, "loss": 0.579, "nll_loss": 0.1447538137435913, "rewards/accuracies": 1.0, "rewards/chosen": -2.8150876460131258e-05, "rewards/margins": 0.2120036780834198, "rewards/rejected": -0.21203184127807617, "step": 7935 }, { "epoch": 5.488243430152144, "grad_norm": 17.79596519470215, "learning_rate": 2.5065314276932533e-05, "log_odds_chosen": 9.840686798095703, "log_odds_ratio": -0.00029457185883075, "logits/chosen": -0.20178522169589996, "logits/rejected": -0.3377896547317505, "logps/chosen": -0.00044397261808626354, "logps/rejected": -1.8732333183288574, "loss": 0.7773, "nll_loss": 0.19428730010986328, "rewards/accuracies": 1.0, "rewards/chosen": -4.439726035343483e-05, "rewards/margins": 0.18727895617485046, "rewards/rejected": -0.18732333183288574, "step": 7936 }, { "epoch": 5.488934993084371, "grad_norm": 9.293213844299316, "learning_rate": 2.506147226064239e-05, "log_odds_chosen": 10.284903526306152, "log_odds_ratio": -6.364515866152942e-05, "logits/chosen": -0.3089835047721863, "logits/rejected": -0.39305680990219116, "logps/chosen": -0.0027302736416459084, "logps/rejected": -2.2307722568511963, "loss": 0.7385, "nll_loss": 0.18461675941944122, "rewards/accuracies": 1.0, "rewards/chosen": -0.0002730273699853569, "rewards/margins": 0.22280417382717133, "rewards/rejected": -0.2230772227048874, "step": 7937 }, { "epoch": 5.4896265560165975, "grad_norm": 5.33021879196167, "learning_rate": 2.5057630244352238e-05, "log_odds_chosen": 9.169672966003418, "log_odds_ratio": -0.000621531275101006, "logits/chosen": -0.2677467465400696, "logits/rejected": -0.3390297293663025, "logps/chosen": -0.0015468818601220846, "logps/rejected": -1.889085054397583, "loss": 0.8048, "nll_loss": 0.20113667845726013, "rewards/accuracies": 1.0, "rewards/chosen": -0.00015468819765374064, "rewards/margins": 0.18875382840633392, "rewards/rejected": -0.18890851736068726, "step": 7938 }, { "epoch": 5.490318118948824, "grad_norm": 5.521744251251221, "learning_rate": 2.5053788228062087e-05, "log_odds_chosen": 9.12801456451416, "log_odds_ratio": -0.0012813995126634836, "logits/chosen": -0.7456037402153015, "logits/rejected": -0.750789999961853, "logps/chosen": -0.0036856280639767647, "logps/rejected": -1.3551989793777466, "loss": 1.1409, "nll_loss": 0.2850930094718933, "rewards/accuracies": 1.0, "rewards/chosen": -0.00036856281803920865, "rewards/margins": 0.13515134155750275, "rewards/rejected": -0.13551990687847137, "step": 7939 }, { "epoch": 5.491009681881051, "grad_norm": 8.740033149719238, "learning_rate": 2.504994621177194e-05, "log_odds_chosen": 8.394597053527832, "log_odds_ratio": -0.0022611194290220737, "logits/chosen": -0.43547797203063965, "logits/rejected": -0.5125408172607422, "logps/chosen": -0.005125186871737242, "logps/rejected": -1.7335705757141113, "loss": 1.0796, "nll_loss": 0.2696824371814728, "rewards/accuracies": 1.0, "rewards/chosen": -0.0005125186871737242, "rewards/margins": 0.1728445589542389, "rewards/rejected": -0.1733570694923401, "step": 7940 }, { "epoch": 5.491701244813278, "grad_norm": 5.807063102722168, "learning_rate": 2.504610419548179e-05, "log_odds_chosen": 10.93294906616211, "log_odds_ratio": -6.808589387219399e-05, "logits/chosen": -0.6323756575584412, "logits/rejected": -0.6467557549476624, "logps/chosen": -0.00018673163140192628, "logps/rejected": -1.8192808628082275, "loss": 0.4817, "nll_loss": 0.12041807919740677, "rewards/accuracies": 1.0, "rewards/chosen": -1.8673163140192628e-05, "rewards/margins": 0.18190941214561462, "rewards/rejected": -0.18192808330059052, "step": 7941 }, { "epoch": 5.492392807745505, "grad_norm": 9.008139610290527, "learning_rate": 2.5042262179191638e-05, "log_odds_chosen": 10.336362838745117, "log_odds_ratio": -0.00031754738301970065, "logits/chosen": -0.75423264503479, "logits/rejected": -0.7876052856445312, "logps/chosen": -0.0003842760343104601, "logps/rejected": -1.8387316465377808, "loss": 0.5582, "nll_loss": 0.13952915370464325, "rewards/accuracies": 1.0, "rewards/chosen": -3.842760270345025e-05, "rewards/margins": 0.1838347315788269, "rewards/rejected": -0.18387316167354584, "step": 7942 }, { "epoch": 5.493084370677732, "grad_norm": 7.10367488861084, "learning_rate": 2.5038420162901494e-05, "log_odds_chosen": 10.97952651977539, "log_odds_ratio": -3.581916098482907e-05, "logits/chosen": -0.8699790239334106, "logits/rejected": -0.8966847658157349, "logps/chosen": -0.00038829189725220203, "logps/rejected": -2.5607616901397705, "loss": 0.6365, "nll_loss": 0.1591133177280426, "rewards/accuracies": 1.0, "rewards/chosen": -3.882919190800749e-05, "rewards/margins": 0.25603732466697693, "rewards/rejected": -0.2560761570930481, "step": 7943 }, { "epoch": 5.4937759336099585, "grad_norm": 5.826725959777832, "learning_rate": 2.5034578146611343e-05, "log_odds_chosen": 10.67078971862793, "log_odds_ratio": -8.722233178559691e-05, "logits/chosen": -0.6411142349243164, "logits/rejected": -0.7242069244384766, "logps/chosen": -0.0007470400887541473, "logps/rejected": -2.562070608139038, "loss": 0.6689, "nll_loss": 0.16722771525382996, "rewards/accuracies": 1.0, "rewards/chosen": -7.470400305464864e-05, "rewards/margins": 0.2561323642730713, "rewards/rejected": -0.25620707869529724, "step": 7944 }, { "epoch": 5.494467496542185, "grad_norm": 9.011191368103027, "learning_rate": 2.5030736130321192e-05, "log_odds_chosen": 10.33845043182373, "log_odds_ratio": -0.0010209310567006469, "logits/chosen": -0.8604280948638916, "logits/rejected": -0.891929030418396, "logps/chosen": -0.0017238704022020102, "logps/rejected": -2.7034897804260254, "loss": 0.6467, "nll_loss": 0.16158509254455566, "rewards/accuracies": 1.0, "rewards/chosen": -0.00017238703730981797, "rewards/margins": 0.27017658948898315, "rewards/rejected": -0.2703489661216736, "step": 7945 }, { "epoch": 5.495159059474412, "grad_norm": 6.513817310333252, "learning_rate": 2.5026894114031048e-05, "log_odds_chosen": 11.323633193969727, "log_odds_ratio": -1.6800902812974527e-05, "logits/chosen": 0.04846089333295822, "logits/rejected": -0.07541124522686005, "logps/chosen": -0.00011946099402848631, "logps/rejected": -2.3040506839752197, "loss": 0.7299, "nll_loss": 0.18246236443519592, "rewards/accuracies": 1.0, "rewards/chosen": -1.1946100130444393e-05, "rewards/margins": 0.23039312660694122, "rewards/rejected": -0.2304050773382187, "step": 7946 }, { "epoch": 5.495850622406639, "grad_norm": 10.008655548095703, "learning_rate": 2.5023052097740897e-05, "log_odds_chosen": 10.430981636047363, "log_odds_ratio": -4.921500658383593e-05, "logits/chosen": -0.12481583654880524, "logits/rejected": -0.24347051978111267, "logps/chosen": -0.00031723693246021867, "logps/rejected": -2.288741111755371, "loss": 0.7818, "nll_loss": 0.19544847309589386, "rewards/accuracies": 1.0, "rewards/chosen": -3.172369542880915e-05, "rewards/margins": 0.22884240746498108, "rewards/rejected": -0.2288741171360016, "step": 7947 }, { "epoch": 5.496542185338866, "grad_norm": 15.212750434875488, "learning_rate": 2.5019210081450746e-05, "log_odds_chosen": 10.807697296142578, "log_odds_ratio": -5.2383267757249996e-05, "logits/chosen": -0.460726261138916, "logits/rejected": -0.5388174057006836, "logps/chosen": -0.00018959477893076837, "logps/rejected": -1.999808669090271, "loss": 1.155, "nll_loss": 0.28874483704566956, "rewards/accuracies": 1.0, "rewards/chosen": -1.895947934826836e-05, "rewards/margins": 0.19996190071105957, "rewards/rejected": -0.19998085498809814, "step": 7948 }, { "epoch": 5.497233748271093, "grad_norm": 10.610461235046387, "learning_rate": 2.5015368065160598e-05, "log_odds_chosen": 10.641895294189453, "log_odds_ratio": -0.0001160370884463191, "logits/chosen": -0.6269339919090271, "logits/rejected": -0.6266019344329834, "logps/chosen": -0.00038971190224401653, "logps/rejected": -2.3446550369262695, "loss": 1.2292, "nll_loss": 0.307287335395813, "rewards/accuracies": 1.0, "rewards/chosen": -3.89711931347847e-05, "rewards/margins": 0.23442654311656952, "rewards/rejected": -0.23446550965309143, "step": 7949 }, { "epoch": 5.4979253112033195, "grad_norm": 10.49134635925293, "learning_rate": 2.5011526048870447e-05, "log_odds_chosen": 10.224824905395508, "log_odds_ratio": -0.000180298593477346, "logits/chosen": -0.6823133826255798, "logits/rejected": -0.756210446357727, "logps/chosen": -0.010154692456126213, "logps/rejected": -3.193361759185791, "loss": 0.6622, "nll_loss": 0.1655343770980835, "rewards/accuracies": 1.0, "rewards/chosen": -0.0010154691990464926, "rewards/margins": 0.31832069158554077, "rewards/rejected": -0.3193361759185791, "step": 7950 }, { "epoch": 5.498616874135546, "grad_norm": 8.006492614746094, "learning_rate": 2.5007684032580296e-05, "log_odds_chosen": 10.028341293334961, "log_odds_ratio": -0.00015695212641730905, "logits/chosen": -0.12197916209697723, "logits/rejected": -0.190192312002182, "logps/chosen": -0.00036239047767594457, "logps/rejected": -1.8425757884979248, "loss": 0.7822, "nll_loss": 0.1955282837152481, "rewards/accuracies": 1.0, "rewards/chosen": -3.623904922278598e-05, "rewards/margins": 0.18422134220600128, "rewards/rejected": -0.18425756692886353, "step": 7951 }, { "epoch": 5.499308437067773, "grad_norm": 6.36076545715332, "learning_rate": 2.5003842016290152e-05, "log_odds_chosen": 10.715845108032227, "log_odds_ratio": -5.2525760111166164e-05, "logits/chosen": -0.1273401528596878, "logits/rejected": -0.13696612417697906, "logps/chosen": -0.0002070648333756253, "logps/rejected": -2.113837718963623, "loss": 0.9156, "nll_loss": 0.2289057821035385, "rewards/accuracies": 1.0, "rewards/chosen": -2.0706484065158293e-05, "rewards/margins": 0.2113630771636963, "rewards/rejected": -0.21138378977775574, "step": 7952 }, { "epoch": 5.5, "grad_norm": 10.263921737670898, "learning_rate": 2.5e-05, "log_odds_chosen": 10.461770057678223, "log_odds_ratio": -0.00011564150190679356, "logits/chosen": -0.663473904132843, "logits/rejected": -0.5993514657020569, "logps/chosen": -0.000487415527459234, "logps/rejected": -1.8845086097717285, "loss": 0.6197, "nll_loss": 0.15491798520088196, "rewards/accuracies": 1.0, "rewards/chosen": -4.874155638390221e-05, "rewards/margins": 0.18840211629867554, "rewards/rejected": -0.1884508728981018, "step": 7953 }, { "epoch": 5.500691562932227, "grad_norm": 8.521260261535645, "learning_rate": 2.4996157983709854e-05, "log_odds_chosen": 9.991600036621094, "log_odds_ratio": -0.0003730976313818246, "logits/chosen": 0.06851185858249664, "logits/rejected": 0.07069035619497299, "logps/chosen": -0.00043773557990789413, "logps/rejected": -1.9327276945114136, "loss": 1.147, "nll_loss": 0.2867090106010437, "rewards/accuracies": 1.0, "rewards/chosen": -4.3773557990789413e-05, "rewards/margins": 0.19322898983955383, "rewards/rejected": -0.19327276945114136, "step": 7954 }, { "epoch": 5.501383125864454, "grad_norm": 7.410879135131836, "learning_rate": 2.4992315967419703e-05, "log_odds_chosen": 11.399124145507812, "log_odds_ratio": -3.132876372546889e-05, "logits/chosen": -0.29479166865348816, "logits/rejected": -0.32007062435150146, "logps/chosen": -0.00012927822535857558, "logps/rejected": -2.465428590774536, "loss": 0.6375, "nll_loss": 0.15937091410160065, "rewards/accuracies": 1.0, "rewards/chosen": -1.2927822353958618e-05, "rewards/margins": 0.2465299367904663, "rewards/rejected": -0.24654288589954376, "step": 7955 }, { "epoch": 5.5020746887966805, "grad_norm": 6.070524215698242, "learning_rate": 2.4988473951129555e-05, "log_odds_chosen": 10.64219856262207, "log_odds_ratio": -4.545422052615322e-05, "logits/chosen": -0.4476628601551056, "logits/rejected": -0.507516086101532, "logps/chosen": -0.0005662592011503875, "logps/rejected": -2.5192782878875732, "loss": 0.746, "nll_loss": 0.18649649620056152, "rewards/accuracies": 1.0, "rewards/chosen": -5.662592229782604e-05, "rewards/margins": 0.2518712282180786, "rewards/rejected": -0.25192785263061523, "step": 7956 }, { "epoch": 5.502766251728907, "grad_norm": 9.197659492492676, "learning_rate": 2.4984631934839408e-05, "log_odds_chosen": 10.960630416870117, "log_odds_ratio": -5.631362000713125e-05, "logits/chosen": -0.7002097368240356, "logits/rejected": -0.6325714588165283, "logps/chosen": -0.0001856803719419986, "logps/rejected": -2.307868003845215, "loss": 1.0023, "nll_loss": 0.2505730390548706, "rewards/accuracies": 1.0, "rewards/chosen": -1.85680364666041e-05, "rewards/margins": 0.23076826333999634, "rewards/rejected": -0.23078681528568268, "step": 7957 }, { "epoch": 5.503457814661134, "grad_norm": 6.928858280181885, "learning_rate": 2.4980789918549257e-05, "log_odds_chosen": 10.45057487487793, "log_odds_ratio": -6.252497405512258e-05, "logits/chosen": -0.41174808144569397, "logits/rejected": -0.4137668311595917, "logps/chosen": -0.00021452132205013186, "logps/rejected": -1.9052519798278809, "loss": 0.7878, "nll_loss": 0.19695553183555603, "rewards/accuracies": 1.0, "rewards/chosen": -2.1452131477417424e-05, "rewards/margins": 0.19050373136997223, "rewards/rejected": -0.19052520394325256, "step": 7958 }, { "epoch": 5.504149377593361, "grad_norm": 6.849351406097412, "learning_rate": 2.4976947902259106e-05, "log_odds_chosen": 10.479252815246582, "log_odds_ratio": -8.19520719232969e-05, "logits/chosen": -0.41458866000175476, "logits/rejected": -0.5210939049720764, "logps/chosen": -0.0002766270481515676, "logps/rejected": -1.998887538909912, "loss": 0.7945, "nll_loss": 0.19861416518688202, "rewards/accuracies": 1.0, "rewards/chosen": -2.766270517895464e-05, "rewards/margins": 0.19986110925674438, "rewards/rejected": -0.19988876581192017, "step": 7959 }, { "epoch": 5.504840940525588, "grad_norm": 9.421629905700684, "learning_rate": 2.4973105885968958e-05, "log_odds_chosen": 10.660806655883789, "log_odds_ratio": -3.9099551941035315e-05, "logits/chosen": -0.23674964904785156, "logits/rejected": -0.33446353673934937, "logps/chosen": -0.00019968458218500018, "logps/rejected": -1.9553388357162476, "loss": 0.762, "nll_loss": 0.19048884510993958, "rewards/accuracies": 1.0, "rewards/chosen": -1.9968458218500018e-05, "rewards/margins": 0.19551391899585724, "rewards/rejected": -0.195533886551857, "step": 7960 }, { "epoch": 5.505532503457815, "grad_norm": 5.590500831604004, "learning_rate": 2.4969263869678807e-05, "log_odds_chosen": 10.218223571777344, "log_odds_ratio": -7.917553011793643e-05, "logits/chosen": -0.07494551688432693, "logits/rejected": -0.1157187670469284, "logps/chosen": -0.00028149288846179843, "logps/rejected": -1.8711258172988892, "loss": 0.8577, "nll_loss": 0.21441730856895447, "rewards/accuracies": 1.0, "rewards/chosen": -2.814928666339256e-05, "rewards/margins": 0.18708443641662598, "rewards/rejected": -0.18711256980895996, "step": 7961 }, { "epoch": 5.5062240663900415, "grad_norm": 12.93747615814209, "learning_rate": 2.496542185338866e-05, "log_odds_chosen": 9.34193229675293, "log_odds_ratio": -0.011150677688419819, "logits/chosen": -0.5461189150810242, "logits/rejected": -0.4774158000946045, "logps/chosen": -0.04011606052517891, "logps/rejected": -1.7543762922286987, "loss": 0.703, "nll_loss": 0.17463496327400208, "rewards/accuracies": 1.0, "rewards/chosen": -0.004011606797575951, "rewards/margins": 0.17142602801322937, "rewards/rejected": -0.17543762922286987, "step": 7962 }, { "epoch": 5.506915629322268, "grad_norm": 16.010683059692383, "learning_rate": 2.4961579837098512e-05, "log_odds_chosen": 11.137772560119629, "log_odds_ratio": -1.815756331779994e-05, "logits/chosen": -0.25531166791915894, "logits/rejected": -0.24413050711154938, "logps/chosen": -0.00016972131561487913, "logps/rejected": -2.17885422706604, "loss": 0.7363, "nll_loss": 0.18407143652439117, "rewards/accuracies": 1.0, "rewards/chosen": -1.6972131561487913e-05, "rewards/margins": 0.21786844730377197, "rewards/rejected": -0.21788541972637177, "step": 7963 }, { "epoch": 5.507607192254495, "grad_norm": 11.521442413330078, "learning_rate": 2.495773782080836e-05, "log_odds_chosen": 9.831197738647461, "log_odds_ratio": -0.00015496321429964155, "logits/chosen": -0.6120339035987854, "logits/rejected": -0.727606475353241, "logps/chosen": -0.0003907257050741464, "logps/rejected": -1.8259804248809814, "loss": 0.8695, "nll_loss": 0.21735124289989471, "rewards/accuracies": 1.0, "rewards/chosen": -3.907257269020192e-05, "rewards/margins": 0.18255898356437683, "rewards/rejected": -0.1825980544090271, "step": 7964 }, { "epoch": 5.508298755186722, "grad_norm": 8.496864318847656, "learning_rate": 2.4953895804518214e-05, "log_odds_chosen": 11.090476036071777, "log_odds_ratio": -0.00013023380597587675, "logits/chosen": -0.1817966103553772, "logits/rejected": -0.22730708122253418, "logps/chosen": -0.00041753993718884885, "logps/rejected": -2.8603012561798096, "loss": 0.7038, "nll_loss": 0.1759316325187683, "rewards/accuracies": 1.0, "rewards/chosen": -4.175399590167217e-05, "rewards/margins": 0.28598839044570923, "rewards/rejected": -0.2860301434993744, "step": 7965 }, { "epoch": 5.508990318118949, "grad_norm": 5.603028297424316, "learning_rate": 2.4950053788228066e-05, "log_odds_chosen": 10.303237915039062, "log_odds_ratio": -0.0003845428582280874, "logits/chosen": -0.7924565672874451, "logits/rejected": -0.8252934813499451, "logps/chosen": -0.000411234941566363, "logps/rejected": -1.761689305305481, "loss": 0.5459, "nll_loss": 0.1364300549030304, "rewards/accuracies": 1.0, "rewards/chosen": -4.11234941566363e-05, "rewards/margins": 0.1761278212070465, "rewards/rejected": -0.17616893351078033, "step": 7966 }, { "epoch": 5.509681881051176, "grad_norm": 8.586468696594238, "learning_rate": 2.4946211771937915e-05, "log_odds_chosen": 10.925909042358398, "log_odds_ratio": -9.190478158416227e-05, "logits/chosen": -0.11336120218038559, "logits/rejected": -0.14280270040035248, "logps/chosen": -0.00026982746203429997, "logps/rejected": -2.5007822513580322, "loss": 0.9702, "nll_loss": 0.24254389107227325, "rewards/accuracies": 1.0, "rewards/chosen": -2.6982745112036355e-05, "rewards/margins": 0.25005125999450684, "rewards/rejected": -0.2500782310962677, "step": 7967 }, { "epoch": 5.5103734439834025, "grad_norm": 5.238374710083008, "learning_rate": 2.4942369755647764e-05, "log_odds_chosen": 10.48822021484375, "log_odds_ratio": -9.922584285959601e-05, "logits/chosen": -0.5154739618301392, "logits/rejected": -0.5209592580795288, "logps/chosen": -0.00020251476962585002, "logps/rejected": -2.038828134536743, "loss": 0.8264, "nll_loss": 0.2065943479537964, "rewards/accuracies": 1.0, "rewards/chosen": -2.025147659878712e-05, "rewards/margins": 0.20386257767677307, "rewards/rejected": -0.20388279855251312, "step": 7968 }, { "epoch": 5.511065006915629, "grad_norm": 7.684095859527588, "learning_rate": 2.4938527739357617e-05, "log_odds_chosen": 9.44205093383789, "log_odds_ratio": -0.0006889720680192113, "logits/chosen": -0.6913365721702576, "logits/rejected": -0.6879432797431946, "logps/chosen": -0.0010971655137836933, "logps/rejected": -1.7808010578155518, "loss": 1.0883, "nll_loss": 0.2720167338848114, "rewards/accuracies": 1.0, "rewards/chosen": -0.00010971655137836933, "rewards/margins": 0.17797040939331055, "rewards/rejected": -0.17808012664318085, "step": 7969 }, { "epoch": 5.511756569847856, "grad_norm": 12.717248916625977, "learning_rate": 2.4934685723067466e-05, "log_odds_chosen": 9.68883991241455, "log_odds_ratio": -0.00018611655104905367, "logits/chosen": -0.4414333701133728, "logits/rejected": -0.5569271445274353, "logps/chosen": -0.00047959090443328023, "logps/rejected": -1.9319571256637573, "loss": 0.7711, "nll_loss": 0.19275924563407898, "rewards/accuracies": 1.0, "rewards/chosen": -4.795909262611531e-05, "rewards/margins": 0.19314777851104736, "rewards/rejected": -0.19319573044776917, "step": 7970 }, { "epoch": 5.512448132780083, "grad_norm": 6.287275314331055, "learning_rate": 2.4930843706777318e-05, "log_odds_chosen": 10.674774169921875, "log_odds_ratio": -9.911719826050103e-05, "logits/chosen": -0.6898698806762695, "logits/rejected": -0.5760419368743896, "logps/chosen": -0.0015942247118800879, "logps/rejected": -2.917684555053711, "loss": 0.6086, "nll_loss": 0.15215209126472473, "rewards/accuracies": 1.0, "rewards/chosen": -0.00015942247409839183, "rewards/margins": 0.2916090488433838, "rewards/rejected": -0.29176849126815796, "step": 7971 }, { "epoch": 5.51313969571231, "grad_norm": 8.057446479797363, "learning_rate": 2.492700169048717e-05, "log_odds_chosen": 9.769332885742188, "log_odds_ratio": -0.002421770943328738, "logits/chosen": -0.9956812858581543, "logits/rejected": -0.9617546796798706, "logps/chosen": -0.0010418831370770931, "logps/rejected": -1.563226580619812, "loss": 0.8939, "nll_loss": 0.22323346138000488, "rewards/accuracies": 1.0, "rewards/chosen": -0.00010418832243885845, "rewards/margins": 0.15621846914291382, "rewards/rejected": -0.1563226580619812, "step": 7972 }, { "epoch": 5.513831258644537, "grad_norm": 7.7207512855529785, "learning_rate": 2.492315967419702e-05, "log_odds_chosen": 9.700448989868164, "log_odds_ratio": -0.001893568434752524, "logits/chosen": -0.9668991565704346, "logits/rejected": -0.9769569635391235, "logps/chosen": -0.001259871176443994, "logps/rejected": -1.7321535348892212, "loss": 1.0863, "nll_loss": 0.2713871896266937, "rewards/accuracies": 1.0, "rewards/chosen": -0.00012598712055478245, "rewards/margins": 0.1730893850326538, "rewards/rejected": -0.1732153594493866, "step": 7973 }, { "epoch": 5.514522821576763, "grad_norm": 7.019569396972656, "learning_rate": 2.4919317657906872e-05, "log_odds_chosen": 9.588005065917969, "log_odds_ratio": -0.00033628003438934684, "logits/chosen": -0.6726071834564209, "logits/rejected": -0.6700811982154846, "logps/chosen": -0.0003584343066904694, "logps/rejected": -1.4035160541534424, "loss": 0.9721, "nll_loss": 0.24299171566963196, "rewards/accuracies": 1.0, "rewards/chosen": -3.584342994145118e-05, "rewards/margins": 0.14031577110290527, "rewards/rejected": -0.14035160839557648, "step": 7974 }, { "epoch": 5.51521438450899, "grad_norm": 8.323095321655273, "learning_rate": 2.4915475641616724e-05, "log_odds_chosen": 10.26290512084961, "log_odds_ratio": -0.00018353183986619115, "logits/chosen": -0.5947859287261963, "logits/rejected": -0.6203582286834717, "logps/chosen": -0.0007037975010462105, "logps/rejected": -1.9799339771270752, "loss": 0.6118, "nll_loss": 0.15292108058929443, "rewards/accuracies": 1.0, "rewards/chosen": -7.037974864942953e-05, "rewards/margins": 0.19792300462722778, "rewards/rejected": -0.19799339771270752, "step": 7975 }, { "epoch": 5.515905947441217, "grad_norm": 6.35712194442749, "learning_rate": 2.4911633625326574e-05, "log_odds_chosen": 10.924556732177734, "log_odds_ratio": -3.232357630622573e-05, "logits/chosen": -0.9577485918998718, "logits/rejected": -0.9427499175071716, "logps/chosen": -0.00012175530719105154, "logps/rejected": -1.7347506284713745, "loss": 0.6837, "nll_loss": 0.17091642320156097, "rewards/accuracies": 1.0, "rewards/chosen": -1.2175531082903035e-05, "rewards/margins": 0.1734628826379776, "rewards/rejected": -0.17347505688667297, "step": 7976 }, { "epoch": 5.516597510373444, "grad_norm": 10.328906059265137, "learning_rate": 2.4907791609036423e-05, "log_odds_chosen": 10.467013359069824, "log_odds_ratio": -3.795954035012983e-05, "logits/chosen": -0.6780118942260742, "logits/rejected": -0.7425776124000549, "logps/chosen": -0.00019371267990209162, "logps/rejected": -1.8615496158599854, "loss": 0.582, "nll_loss": 0.14549127221107483, "rewards/accuracies": 1.0, "rewards/chosen": -1.937126762641128e-05, "rewards/margins": 0.18613559007644653, "rewards/rejected": -0.18615497648715973, "step": 7977 }, { "epoch": 5.517289073305671, "grad_norm": 10.159599304199219, "learning_rate": 2.4903949592746275e-05, "log_odds_chosen": 10.206741333007812, "log_odds_ratio": -0.00017410822329111397, "logits/chosen": -0.7243431210517883, "logits/rejected": -0.6271113157272339, "logps/chosen": -0.00040067086229100823, "logps/rejected": -2.076680898666382, "loss": 0.9364, "nll_loss": 0.23409080505371094, "rewards/accuracies": 1.0, "rewards/chosen": -4.006709059467539e-05, "rewards/margins": 0.2076280117034912, "rewards/rejected": -0.20766809582710266, "step": 7978 }, { "epoch": 5.517980636237898, "grad_norm": 9.997716903686523, "learning_rate": 2.4900107576456124e-05, "log_odds_chosen": 10.515052795410156, "log_odds_ratio": -6.538545130752027e-05, "logits/chosen": -0.13770362734794617, "logits/rejected": -0.2249658703804016, "logps/chosen": -0.000640476297121495, "logps/rejected": -2.286221981048584, "loss": 0.9074, "nll_loss": 0.22684696316719055, "rewards/accuracies": 1.0, "rewards/chosen": -6.404763553291559e-05, "rewards/margins": 0.22855816781520844, "rewards/rejected": -0.2286222130060196, "step": 7979 }, { "epoch": 5.518672199170124, "grad_norm": 4.5267333984375, "learning_rate": 2.4896265560165977e-05, "log_odds_chosen": 10.762895584106445, "log_odds_ratio": -5.7566088798921555e-05, "logits/chosen": -0.5054532885551453, "logits/rejected": -0.5565618872642517, "logps/chosen": -0.00048600853187963367, "logps/rejected": -2.163853168487549, "loss": 0.473, "nll_loss": 0.11825583875179291, "rewards/accuracies": 1.0, "rewards/chosen": -4.8600857553537935e-05, "rewards/margins": 0.2163367122411728, "rewards/rejected": -0.21638531982898712, "step": 7980 }, { "epoch": 5.519363762102351, "grad_norm": 6.196623802185059, "learning_rate": 2.489242354387583e-05, "log_odds_chosen": 9.516175270080566, "log_odds_ratio": -0.000546960742212832, "logits/chosen": -0.4045857787132263, "logits/rejected": -0.4056215286254883, "logps/chosen": -0.0007767517236061394, "logps/rejected": -1.6184048652648926, "loss": 0.5242, "nll_loss": 0.1309964507818222, "rewards/accuracies": 1.0, "rewards/chosen": -7.767517672618851e-05, "rewards/margins": 0.16176281869411469, "rewards/rejected": -0.1618404984474182, "step": 7981 }, { "epoch": 5.520055325034578, "grad_norm": 10.751822471618652, "learning_rate": 2.4888581527585678e-05, "log_odds_chosen": 8.485633850097656, "log_odds_ratio": -0.002912183292210102, "logits/chosen": -0.6579504013061523, "logits/rejected": -0.7018341422080994, "logps/chosen": -0.0013555031036958098, "logps/rejected": -1.6597306728363037, "loss": 0.8041, "nll_loss": 0.20074039697647095, "rewards/accuracies": 1.0, "rewards/chosen": -0.0001355503045488149, "rewards/margins": 0.16583752632141113, "rewards/rejected": -0.16597306728363037, "step": 7982 }, { "epoch": 5.520746887966805, "grad_norm": 7.2890238761901855, "learning_rate": 2.488473951129553e-05, "log_odds_chosen": 10.734992980957031, "log_odds_ratio": -0.00011404085671529174, "logits/chosen": -0.47404229640960693, "logits/rejected": -0.5771169662475586, "logps/chosen": -0.00018186001398134977, "logps/rejected": -1.957323431968689, "loss": 0.787, "nll_loss": 0.1967388391494751, "rewards/accuracies": 1.0, "rewards/chosen": -1.8186001398134977e-05, "rewards/margins": 0.19571414589881897, "rewards/rejected": -0.19573235511779785, "step": 7983 }, { "epoch": 5.521438450899032, "grad_norm": 10.685153007507324, "learning_rate": 2.4880897495005383e-05, "log_odds_chosen": 11.264336585998535, "log_odds_ratio": -2.648890222189948e-05, "logits/chosen": -0.4374861419200897, "logits/rejected": -0.4763266444206238, "logps/chosen": -0.00012154224532423541, "logps/rejected": -2.124026298522949, "loss": 1.0, "nll_loss": 0.25000786781311035, "rewards/accuracies": 1.0, "rewards/chosen": -1.21542243505246e-05, "rewards/margins": 0.2123904675245285, "rewards/rejected": -0.21240262687206268, "step": 7984 }, { "epoch": 5.522130013831259, "grad_norm": 7.698362827301025, "learning_rate": 2.4877055478715232e-05, "log_odds_chosen": 9.903738021850586, "log_odds_ratio": -0.00037517695454880595, "logits/chosen": -0.3922664523124695, "logits/rejected": -0.4175853729248047, "logps/chosen": -0.0004097001510672271, "logps/rejected": -1.7772531509399414, "loss": 1.0402, "nll_loss": 0.26001253724098206, "rewards/accuracies": 1.0, "rewards/chosen": -4.097001510672271e-05, "rewards/margins": 0.17768435180187225, "rewards/rejected": -0.17772531509399414, "step": 7985 }, { "epoch": 5.522821576763485, "grad_norm": 7.038385391235352, "learning_rate": 2.487321346242508e-05, "log_odds_chosen": 10.166360855102539, "log_odds_ratio": -0.0013821757165715098, "logits/chosen": -0.6075230240821838, "logits/rejected": -0.6644953489303589, "logps/chosen": -0.0011727921664714813, "logps/rejected": -1.869389533996582, "loss": 0.8812, "nll_loss": 0.22015592455863953, "rewards/accuracies": 1.0, "rewards/chosen": -0.00011727922537829727, "rewards/margins": 0.18682168424129486, "rewards/rejected": -0.18693895637989044, "step": 7986 }, { "epoch": 5.523513139695712, "grad_norm": 23.00648307800293, "learning_rate": 2.486937144613493e-05, "log_odds_chosen": 11.663230895996094, "log_odds_ratio": -3.127233503619209e-05, "logits/chosen": -0.5745376944541931, "logits/rejected": -0.5894800424575806, "logps/chosen": -0.00014439536607824266, "logps/rejected": -2.702629327774048, "loss": 0.8924, "nll_loss": 0.22308455407619476, "rewards/accuracies": 1.0, "rewards/chosen": -1.4439538063015789e-05, "rewards/margins": 0.27024850249290466, "rewards/rejected": -0.2702629566192627, "step": 7987 }, { "epoch": 5.524204702627939, "grad_norm": 9.516694068908691, "learning_rate": 2.4865529429844783e-05, "log_odds_chosen": 10.172452926635742, "log_odds_ratio": -0.00201642164029181, "logits/chosen": -0.44605493545532227, "logits/rejected": -0.4708332419395447, "logps/chosen": -0.000771133229136467, "logps/rejected": -2.4048895835876465, "loss": 0.9287, "nll_loss": 0.231984481215477, "rewards/accuracies": 1.0, "rewards/chosen": -7.711332000326365e-05, "rewards/margins": 0.24041184782981873, "rewards/rejected": -0.24048897624015808, "step": 7988 }, { "epoch": 5.524896265560166, "grad_norm": 6.859710216522217, "learning_rate": 2.4861687413554635e-05, "log_odds_chosen": 10.065300941467285, "log_odds_ratio": -0.0002263882925035432, "logits/chosen": -0.42298412322998047, "logits/rejected": -0.4986242651939392, "logps/chosen": -0.0020351733546704054, "logps/rejected": -2.262023448944092, "loss": 0.7278, "nll_loss": 0.18191887438297272, "rewards/accuracies": 1.0, "rewards/chosen": -0.00020351732382550836, "rewards/margins": 0.22599883377552032, "rewards/rejected": -0.2262023538351059, "step": 7989 }, { "epoch": 5.525587828492393, "grad_norm": 6.190978050231934, "learning_rate": 2.4857845397264484e-05, "log_odds_chosen": 10.284591674804688, "log_odds_ratio": -8.91397285158746e-05, "logits/chosen": -0.26648449897766113, "logits/rejected": -0.4036068320274353, "logps/chosen": -0.00047370928223244846, "logps/rejected": -2.147756338119507, "loss": 0.7173, "nll_loss": 0.17931276559829712, "rewards/accuracies": 1.0, "rewards/chosen": -4.737092967843637e-05, "rewards/margins": 0.2147282510995865, "rewards/rejected": -0.21477562189102173, "step": 7990 }, { "epoch": 5.5262793914246195, "grad_norm": 10.940460205078125, "learning_rate": 2.4854003380974336e-05, "log_odds_chosen": 10.07880973815918, "log_odds_ratio": -0.00037312853964976966, "logits/chosen": -0.18514373898506165, "logits/rejected": -0.27846235036849976, "logps/chosen": -0.0007444091606885195, "logps/rejected": -1.682682991027832, "loss": 1.0049, "nll_loss": 0.2511962652206421, "rewards/accuracies": 1.0, "rewards/chosen": -7.444091170327738e-05, "rewards/margins": 0.16819386184215546, "rewards/rejected": -0.16826829314231873, "step": 7991 }, { "epoch": 5.526970954356846, "grad_norm": 5.758509635925293, "learning_rate": 2.485016136468419e-05, "log_odds_chosen": 9.71127700805664, "log_odds_ratio": -0.0002266637166030705, "logits/chosen": -0.5368736386299133, "logits/rejected": -0.5184175968170166, "logps/chosen": -0.0008136904216371477, "logps/rejected": -2.2877635955810547, "loss": 0.7681, "nll_loss": 0.1920141577720642, "rewards/accuracies": 1.0, "rewards/chosen": -8.136904216371477e-05, "rewards/margins": 0.22869496047496796, "rewards/rejected": -0.22877633571624756, "step": 7992 }, { "epoch": 5.527662517289073, "grad_norm": 6.862176418304443, "learning_rate": 2.4846319348394038e-05, "log_odds_chosen": 10.76097583770752, "log_odds_ratio": -4.926729525323026e-05, "logits/chosen": -0.7725235223770142, "logits/rejected": -0.7454330325126648, "logps/chosen": -0.00017461538664065301, "logps/rejected": -2.0369513034820557, "loss": 0.5055, "nll_loss": 0.12636780738830566, "rewards/accuracies": 1.0, "rewards/chosen": -1.7461539755458944e-05, "rewards/margins": 0.20367765426635742, "rewards/rejected": -0.2036951184272766, "step": 7993 }, { "epoch": 5.5283540802213, "grad_norm": 5.5887556076049805, "learning_rate": 2.484247733210389e-05, "log_odds_chosen": 9.259298324584961, "log_odds_ratio": -0.0001776816789060831, "logits/chosen": -0.32677367329597473, "logits/rejected": -0.3950062096118927, "logps/chosen": -0.00048437563236802816, "logps/rejected": -1.6748768091201782, "loss": 0.6943, "nll_loss": 0.17354771494865417, "rewards/accuracies": 1.0, "rewards/chosen": -4.8437563236802816e-05, "rewards/margins": 0.16743925213813782, "rewards/rejected": -0.16748769581317902, "step": 7994 }, { "epoch": 5.529045643153527, "grad_norm": 7.050952911376953, "learning_rate": 2.483863531581374e-05, "log_odds_chosen": 10.473284721374512, "log_odds_ratio": -0.00017908669542521238, "logits/chosen": -0.49899330735206604, "logits/rejected": -0.49205830693244934, "logps/chosen": -0.00046343091526068747, "logps/rejected": -1.8278075456619263, "loss": 0.5457, "nll_loss": 0.13640612363815308, "rewards/accuracies": 1.0, "rewards/chosen": -4.634309152606875e-05, "rewards/margins": 0.182734414935112, "rewards/rejected": -0.18278075754642487, "step": 7995 }, { "epoch": 5.529737206085754, "grad_norm": 9.834431648254395, "learning_rate": 2.483479329952359e-05, "log_odds_chosen": 10.868444442749023, "log_odds_ratio": -5.240093742031604e-05, "logits/chosen": -0.4045601189136505, "logits/rejected": -0.42231157422065735, "logps/chosen": -0.0003390312194824219, "logps/rejected": -2.556269407272339, "loss": 0.4876, "nll_loss": 0.1218939870595932, "rewards/accuracies": 1.0, "rewards/chosen": -3.3903121220646426e-05, "rewards/margins": 0.25559306144714355, "rewards/rejected": -0.25562694668769836, "step": 7996 }, { "epoch": 5.5304287690179805, "grad_norm": 7.154923439025879, "learning_rate": 2.483095128323344e-05, "log_odds_chosen": 9.480171203613281, "log_odds_ratio": -0.0006049070507287979, "logits/chosen": -0.34929412603378296, "logits/rejected": -0.3010343015193939, "logps/chosen": -0.0012120280880481005, "logps/rejected": -2.1136090755462646, "loss": 0.9824, "nll_loss": 0.24554985761642456, "rewards/accuracies": 1.0, "rewards/chosen": -0.00012120280007366091, "rewards/margins": 0.21123971045017242, "rewards/rejected": -0.211360901594162, "step": 7997 }, { "epoch": 5.531120331950207, "grad_norm": 7.135680675506592, "learning_rate": 2.4827109266943293e-05, "log_odds_chosen": 9.703398704528809, "log_odds_ratio": -0.0005645658238790929, "logits/chosen": -0.6417530179023743, "logits/rejected": -0.698527455329895, "logps/chosen": -0.0014950187178328633, "logps/rejected": -2.188197135925293, "loss": 1.2605, "nll_loss": 0.3150603771209717, "rewards/accuracies": 1.0, "rewards/chosen": -0.0001495018950663507, "rewards/margins": 0.21867021918296814, "rewards/rejected": -0.21881970763206482, "step": 7998 }, { "epoch": 5.531811894882434, "grad_norm": 6.10271692276001, "learning_rate": 2.4823267250653142e-05, "log_odds_chosen": 9.906564712524414, "log_odds_ratio": -0.00015931145753711462, "logits/chosen": -0.23830081522464752, "logits/rejected": -0.39299190044403076, "logps/chosen": -0.00039393879706040025, "logps/rejected": -1.9749970436096191, "loss": 1.171, "nll_loss": 0.2927406430244446, "rewards/accuracies": 1.0, "rewards/chosen": -3.939388625440188e-05, "rewards/margins": 0.1974603235721588, "rewards/rejected": -0.19749972224235535, "step": 7999 }, { "epoch": 5.532503457814661, "grad_norm": 5.048730850219727, "learning_rate": 2.4819425234362995e-05, "log_odds_chosen": 11.018048286437988, "log_odds_ratio": -2.6650952349882573e-05, "logits/chosen": -0.6066503524780273, "logits/rejected": -0.6664042472839355, "logps/chosen": -7.129425648599863e-05, "logps/rejected": -1.5724921226501465, "loss": 0.7817, "nll_loss": 0.19543345272541046, "rewards/accuracies": 1.0, "rewards/chosen": -7.129426194296684e-06, "rewards/margins": 0.157242089509964, "rewards/rejected": -0.15724921226501465, "step": 8000 }, { "epoch": 5.533195020746888, "grad_norm": 10.095208168029785, "learning_rate": 2.4815583218072847e-05, "log_odds_chosen": 11.383611679077148, "log_odds_ratio": -2.3711505491519347e-05, "logits/chosen": -0.2679492235183716, "logits/rejected": -0.3012941777706146, "logps/chosen": -0.0003241963859181851, "logps/rejected": -3.117649793624878, "loss": 0.6962, "nll_loss": 0.17404018342494965, "rewards/accuracies": 1.0, "rewards/chosen": -3.241963713662699e-05, "rewards/margins": 0.31173259019851685, "rewards/rejected": -0.31176501512527466, "step": 8001 }, { "epoch": 5.533886583679115, "grad_norm": 7.097454071044922, "learning_rate": 2.4811741201782696e-05, "log_odds_chosen": 10.319755554199219, "log_odds_ratio": -0.00022292044013738632, "logits/chosen": -0.2506617605686188, "logits/rejected": -0.3690025210380554, "logps/chosen": -0.0026361849159002304, "logps/rejected": -2.5132508277893066, "loss": 0.9051, "nll_loss": 0.22624385356903076, "rewards/accuracies": 1.0, "rewards/chosen": -0.00026361847994849086, "rewards/margins": 0.25106143951416016, "rewards/rejected": -0.2513250708580017, "step": 8002 }, { "epoch": 5.5345781466113415, "grad_norm": 9.05012035369873, "learning_rate": 2.480789918549255e-05, "log_odds_chosen": 11.596423149108887, "log_odds_ratio": -2.853182377293706e-05, "logits/chosen": -0.5594992637634277, "logits/rejected": -0.7029032707214355, "logps/chosen": -0.00015006544708739966, "logps/rejected": -2.5829501152038574, "loss": 0.8926, "nll_loss": 0.22315490245819092, "rewards/accuracies": 1.0, "rewards/chosen": -1.5006546163931489e-05, "rewards/margins": 0.25828003883361816, "rewards/rejected": -0.2582949995994568, "step": 8003 }, { "epoch": 5.535269709543568, "grad_norm": 13.613221168518066, "learning_rate": 2.4804057169202398e-05, "log_odds_chosen": 11.267099380493164, "log_odds_ratio": -2.884993955376558e-05, "logits/chosen": -0.017788421362638474, "logits/rejected": -0.1763039231300354, "logps/chosen": -0.0003624186501838267, "logps/rejected": -2.790835380554199, "loss": 0.8132, "nll_loss": 0.20329706370830536, "rewards/accuracies": 1.0, "rewards/chosen": -3.6241868656361476e-05, "rewards/margins": 0.27904731035232544, "rewards/rejected": -0.2790835499763489, "step": 8004 }, { "epoch": 5.535961272475795, "grad_norm": 7.166762351989746, "learning_rate": 2.4800215152912247e-05, "log_odds_chosen": 9.902912139892578, "log_odds_ratio": -0.0005022470140829682, "logits/chosen": -0.49830174446105957, "logits/rejected": -0.5488074421882629, "logps/chosen": -0.00040694093331694603, "logps/rejected": -1.8442189693450928, "loss": 0.5014, "nll_loss": 0.12530532479286194, "rewards/accuracies": 1.0, "rewards/chosen": -4.069409624207765e-05, "rewards/margins": 0.18438121676445007, "rewards/rejected": -0.18442192673683167, "step": 8005 }, { "epoch": 5.536652835408022, "grad_norm": 7.92393684387207, "learning_rate": 2.47963731366221e-05, "log_odds_chosen": 11.132036209106445, "log_odds_ratio": -2.9587336030090228e-05, "logits/chosen": 0.26792770624160767, "logits/rejected": 0.16624970734119415, "logps/chosen": -0.0002462788834236562, "logps/rejected": -2.610889196395874, "loss": 0.7446, "nll_loss": 0.18615420162677765, "rewards/accuracies": 1.0, "rewards/chosen": -2.4627888706163503e-05, "rewards/margins": 0.2610642910003662, "rewards/rejected": -0.26108893752098083, "step": 8006 }, { "epoch": 5.537344398340249, "grad_norm": 12.280511856079102, "learning_rate": 2.4792531120331952e-05, "log_odds_chosen": 10.223596572875977, "log_odds_ratio": -4.838874883716926e-05, "logits/chosen": -0.27258747816085815, "logits/rejected": -0.316699743270874, "logps/chosen": -0.00025811471277847886, "logps/rejected": -1.9246526956558228, "loss": 0.743, "nll_loss": 0.18575647473335266, "rewards/accuracies": 1.0, "rewards/chosen": -2.5811470550252125e-05, "rewards/margins": 0.19243945181369781, "rewards/rejected": -0.19246527552604675, "step": 8007 }, { "epoch": 5.538035961272476, "grad_norm": 7.187430381774902, "learning_rate": 2.47886891040418e-05, "log_odds_chosen": 10.459199905395508, "log_odds_ratio": -0.0001508681889390573, "logits/chosen": -0.23558424413204193, "logits/rejected": -0.32377859950065613, "logps/chosen": -0.00045977221452631056, "logps/rejected": -1.9837918281555176, "loss": 0.6488, "nll_loss": 0.16219037771224976, "rewards/accuracies": 1.0, "rewards/chosen": -4.597722363541834e-05, "rewards/margins": 0.19833320379257202, "rewards/rejected": -0.19837918877601624, "step": 8008 }, { "epoch": 5.5387275242047025, "grad_norm": 8.177022933959961, "learning_rate": 2.4784847087751653e-05, "log_odds_chosen": 9.602645874023438, "log_odds_ratio": -0.00019704359874594957, "logits/chosen": -0.11307889968156815, "logits/rejected": -0.11641175299882889, "logps/chosen": -0.0002687516971491277, "logps/rejected": -1.5583631992340088, "loss": 1.0271, "nll_loss": 0.2567659020423889, "rewards/accuracies": 1.0, "rewards/chosen": -2.6875170078710653e-05, "rewards/margins": 0.1558094471693039, "rewards/rejected": -0.1558363288640976, "step": 8009 }, { "epoch": 5.539419087136929, "grad_norm": 6.553852558135986, "learning_rate": 2.4781005071461506e-05, "log_odds_chosen": 9.845769882202148, "log_odds_ratio": -0.01491259504109621, "logits/chosen": -0.4053501486778259, "logits/rejected": -0.48310720920562744, "logps/chosen": -0.08075077831745148, "logps/rejected": -1.9933180809020996, "loss": 0.6495, "nll_loss": 0.1608942449092865, "rewards/accuracies": 1.0, "rewards/chosen": -0.008075077086687088, "rewards/margins": 0.19125673174858093, "rewards/rejected": -0.19933182001113892, "step": 8010 }, { "epoch": 5.540110650069156, "grad_norm": 4.823723793029785, "learning_rate": 2.4777163055171355e-05, "log_odds_chosen": 9.852107048034668, "log_odds_ratio": -0.00047949200961738825, "logits/chosen": -0.5386756658554077, "logits/rejected": -0.5551636815071106, "logps/chosen": -0.00034617114579305053, "logps/rejected": -1.7934048175811768, "loss": 0.6276, "nll_loss": 0.1568397879600525, "rewards/accuracies": 1.0, "rewards/chosen": -3.4617110941326246e-05, "rewards/margins": 0.17930585145950317, "rewards/rejected": -0.17934048175811768, "step": 8011 }, { "epoch": 5.540802213001383, "grad_norm": 6.76782751083374, "learning_rate": 2.4773321038881207e-05, "log_odds_chosen": 11.475250244140625, "log_odds_ratio": -2.2427797375712544e-05, "logits/chosen": -0.5119101405143738, "logits/rejected": -0.5618435740470886, "logps/chosen": -0.0002971701032947749, "logps/rejected": -2.7157227993011475, "loss": 0.7586, "nll_loss": 0.18965360522270203, "rewards/accuracies": 1.0, "rewards/chosen": -2.9717009965679608e-05, "rewards/margins": 0.27154257893562317, "rewards/rejected": -0.2715722918510437, "step": 8012 }, { "epoch": 5.54149377593361, "grad_norm": 6.336251258850098, "learning_rate": 2.4769479022591056e-05, "log_odds_chosen": 10.323335647583008, "log_odds_ratio": -8.404036634601653e-05, "logits/chosen": -0.17968213558197021, "logits/rejected": -0.1371138095855713, "logps/chosen": -0.00026224643806926906, "logps/rejected": -2.03709077835083, "loss": 0.914, "nll_loss": 0.22849002480506897, "rewards/accuracies": 1.0, "rewards/chosen": -2.6224643079331145e-05, "rewards/margins": 0.20368286967277527, "rewards/rejected": -0.20370909571647644, "step": 8013 }, { "epoch": 5.542185338865837, "grad_norm": 5.572230815887451, "learning_rate": 2.4765637006300905e-05, "log_odds_chosen": 10.898839950561523, "log_odds_ratio": -0.00018718911451287568, "logits/chosen": -0.6403128504753113, "logits/rejected": -0.6781945824623108, "logps/chosen": -0.0006806739838793874, "logps/rejected": -1.9784750938415527, "loss": 0.5278, "nll_loss": 0.13193657994270325, "rewards/accuracies": 1.0, "rewards/chosen": -6.806739838793874e-05, "rewards/margins": 0.19777946174144745, "rewards/rejected": -0.19784751534461975, "step": 8014 }, { "epoch": 5.5428769017980635, "grad_norm": 7.013886451721191, "learning_rate": 2.4761794990010758e-05, "log_odds_chosen": 10.597729682922363, "log_odds_ratio": -4.4307264033704996e-05, "logits/chosen": -0.4737222194671631, "logits/rejected": -0.6006163358688354, "logps/chosen": -0.0001434629812138155, "logps/rejected": -1.7592902183532715, "loss": 0.7723, "nll_loss": 0.1930730640888214, "rewards/accuracies": 1.0, "rewards/chosen": -1.4346298485179432e-05, "rewards/margins": 0.1759146749973297, "rewards/rejected": -0.1759290248155594, "step": 8015 }, { "epoch": 5.54356846473029, "grad_norm": 9.871222496032715, "learning_rate": 2.475795297372061e-05, "log_odds_chosen": 10.386920928955078, "log_odds_ratio": -7.108970748959109e-05, "logits/chosen": -0.12642526626586914, "logits/rejected": -0.1811293661594391, "logps/chosen": -0.00018795863434206694, "logps/rejected": -2.037794589996338, "loss": 1.0485, "nll_loss": 0.2621132433414459, "rewards/accuracies": 1.0, "rewards/chosen": -1.8795864889398217e-05, "rewards/margins": 0.20376065373420715, "rewards/rejected": -0.2037794589996338, "step": 8016 }, { "epoch": 5.544260027662517, "grad_norm": 8.90102767944336, "learning_rate": 2.475411095743046e-05, "log_odds_chosen": 10.551826477050781, "log_odds_ratio": -0.0010956140467897058, "logits/chosen": -0.4647817611694336, "logits/rejected": -0.46479907631874084, "logps/chosen": -0.0002952416252810508, "logps/rejected": -2.0322060585021973, "loss": 0.983, "nll_loss": 0.24563650786876678, "rewards/accuracies": 1.0, "rewards/chosen": -2.952416252810508e-05, "rewards/margins": 0.20319105684757233, "rewards/rejected": -0.20322057604789734, "step": 8017 }, { "epoch": 5.544951590594744, "grad_norm": 10.191629409790039, "learning_rate": 2.4750268941140312e-05, "log_odds_chosen": 10.009177207946777, "log_odds_ratio": -0.00033783228718675673, "logits/chosen": -0.4004913866519928, "logits/rejected": -0.42859768867492676, "logps/chosen": -0.00042483455035835505, "logps/rejected": -1.8237648010253906, "loss": 0.5796, "nll_loss": 0.14487770199775696, "rewards/accuracies": 1.0, "rewards/chosen": -4.248345430823974e-05, "rewards/margins": 0.18233400583267212, "rewards/rejected": -0.18237647414207458, "step": 8018 }, { "epoch": 5.545643153526971, "grad_norm": 4.802680492401123, "learning_rate": 2.4746426924850164e-05, "log_odds_chosen": 10.210495948791504, "log_odds_ratio": -0.00011646961502265185, "logits/chosen": -0.39877617359161377, "logits/rejected": -0.42844846844673157, "logps/chosen": -0.0003031216620001942, "logps/rejected": -1.4773494005203247, "loss": 0.581, "nll_loss": 0.14524339139461517, "rewards/accuracies": 1.0, "rewards/chosen": -3.031216692761518e-05, "rewards/margins": 0.14770463109016418, "rewards/rejected": -0.14773495495319366, "step": 8019 }, { "epoch": 5.546334716459198, "grad_norm": 20.15610694885254, "learning_rate": 2.4742584908560013e-05, "log_odds_chosen": 11.423358917236328, "log_odds_ratio": -0.00012067429634043947, "logits/chosen": -0.6458957195281982, "logits/rejected": -0.7440388202667236, "logps/chosen": -0.00017651362577453256, "logps/rejected": -1.9940600395202637, "loss": 0.715, "nll_loss": 0.17874941229820251, "rewards/accuracies": 1.0, "rewards/chosen": -1.765136403264478e-05, "rewards/margins": 0.19938836991786957, "rewards/rejected": -0.19940602779388428, "step": 8020 }, { "epoch": 5.5470262793914245, "grad_norm": 8.5133695602417, "learning_rate": 2.4738742892269866e-05, "log_odds_chosen": 10.65363883972168, "log_odds_ratio": -6.911178934387863e-05, "logits/chosen": -0.026836829259991646, "logits/rejected": -0.21755240857601166, "logps/chosen": -0.000281484768493101, "logps/rejected": -2.1441502571105957, "loss": 0.7558, "nll_loss": 0.18893516063690186, "rewards/accuracies": 1.0, "rewards/chosen": -2.8148479032097384e-05, "rewards/margins": 0.21438689529895782, "rewards/rejected": -0.214415043592453, "step": 8021 }, { "epoch": 5.547717842323651, "grad_norm": 6.64901065826416, "learning_rate": 2.4734900875979715e-05, "log_odds_chosen": 10.427520751953125, "log_odds_ratio": -0.00015523642650805414, "logits/chosen": -0.5124070048332214, "logits/rejected": -0.7381057143211365, "logps/chosen": -0.0002842023386619985, "logps/rejected": -2.0599887371063232, "loss": 0.7097, "nll_loss": 0.17740392684936523, "rewards/accuracies": 1.0, "rewards/chosen": -2.8420234229997732e-05, "rewards/margins": 0.20597046613693237, "rewards/rejected": -0.20599888265132904, "step": 8022 }, { "epoch": 5.548409405255878, "grad_norm": 15.249797821044922, "learning_rate": 2.4731058859689564e-05, "log_odds_chosen": 10.686328887939453, "log_odds_ratio": -5.325684469426051e-05, "logits/chosen": -0.36337143182754517, "logits/rejected": -0.3869422376155853, "logps/chosen": -0.00022905900550540537, "logps/rejected": -2.438955545425415, "loss": 0.997, "nll_loss": 0.24923908710479736, "rewards/accuracies": 1.0, "rewards/chosen": -2.2905900550540537e-05, "rewards/margins": 0.24387264251708984, "rewards/rejected": -0.2438955456018448, "step": 8023 }, { "epoch": 5.549100968188105, "grad_norm": 11.313993453979492, "learning_rate": 2.4727216843399416e-05, "log_odds_chosen": 9.633194923400879, "log_odds_ratio": -0.00018383633869234473, "logits/chosen": -0.22703269124031067, "logits/rejected": -0.29582494497299194, "logps/chosen": -0.00045589538058266044, "logps/rejected": -1.6704440116882324, "loss": 0.6371, "nll_loss": 0.15925177931785583, "rewards/accuracies": 1.0, "rewards/chosen": -4.5589538785861805e-05, "rewards/margins": 0.16699880361557007, "rewards/rejected": -0.16704440116882324, "step": 8024 }, { "epoch": 5.549792531120332, "grad_norm": 11.974188804626465, "learning_rate": 2.472337482710927e-05, "log_odds_chosen": 10.81893539428711, "log_odds_ratio": -0.0012266793055459857, "logits/chosen": -0.4919191598892212, "logits/rejected": -0.5107102394104004, "logps/chosen": -0.0006843972951173782, "logps/rejected": -2.7139744758605957, "loss": 0.9274, "nll_loss": 0.23171493411064148, "rewards/accuracies": 1.0, "rewards/chosen": -6.843973824288696e-05, "rewards/margins": 0.27132901549339294, "rewards/rejected": -0.2713974714279175, "step": 8025 }, { "epoch": 5.550484094052559, "grad_norm": 6.510634422302246, "learning_rate": 2.4719532810819118e-05, "log_odds_chosen": 10.264190673828125, "log_odds_ratio": -0.0001600035757292062, "logits/chosen": -0.5555983781814575, "logits/rejected": -0.6701858043670654, "logps/chosen": -0.0001303990138694644, "logps/rejected": -1.360192894935608, "loss": 1.1732, "nll_loss": 0.2932942509651184, "rewards/accuracies": 1.0, "rewards/chosen": -1.3039902114542201e-05, "rewards/margins": 0.13600623607635498, "rewards/rejected": -0.1360192894935608, "step": 8026 }, { "epoch": 5.551175656984785, "grad_norm": 13.490435600280762, "learning_rate": 2.471569079452897e-05, "log_odds_chosen": 11.536421775817871, "log_odds_ratio": -2.0468776710913517e-05, "logits/chosen": -0.7267881631851196, "logits/rejected": -0.7671307325363159, "logps/chosen": -0.00020429975120350718, "logps/rejected": -2.4622702598571777, "loss": 0.646, "nll_loss": 0.1615058183670044, "rewards/accuracies": 1.0, "rewards/chosen": -2.042997584794648e-05, "rewards/margins": 0.246206596493721, "rewards/rejected": -0.24622704088687897, "step": 8027 }, { "epoch": 5.551867219917012, "grad_norm": 8.800284385681152, "learning_rate": 2.4711848778238823e-05, "log_odds_chosen": 9.855973243713379, "log_odds_ratio": -0.00014719019236508757, "logits/chosen": -0.531029224395752, "logits/rejected": -0.5820093750953674, "logps/chosen": -0.00027673933072946966, "logps/rejected": -1.5422172546386719, "loss": 0.6295, "nll_loss": 0.1573716104030609, "rewards/accuracies": 1.0, "rewards/chosen": -2.7673933800542727e-05, "rewards/margins": 0.15419405698776245, "rewards/rejected": -0.15422172844409943, "step": 8028 }, { "epoch": 5.552558782849239, "grad_norm": 5.500239372253418, "learning_rate": 2.470800676194867e-05, "log_odds_chosen": 10.604162216186523, "log_odds_ratio": -3.668230419862084e-05, "logits/chosen": -0.3131680488586426, "logits/rejected": -0.3498471677303314, "logps/chosen": -0.00015500406152568758, "logps/rejected": -1.7472177743911743, "loss": 0.6977, "nll_loss": 0.17442475259304047, "rewards/accuracies": 1.0, "rewards/chosen": -1.55004072439624e-05, "rewards/margins": 0.17470628023147583, "rewards/rejected": -0.17472177743911743, "step": 8029 }, { "epoch": 5.553250345781466, "grad_norm": 9.993817329406738, "learning_rate": 2.4704164745658524e-05, "log_odds_chosen": 11.406761169433594, "log_odds_ratio": -2.9171018468332477e-05, "logits/chosen": -0.45108741521835327, "logits/rejected": -0.48169055581092834, "logps/chosen": -0.00019415847782511264, "logps/rejected": -2.8017964363098145, "loss": 0.8608, "nll_loss": 0.21519702672958374, "rewards/accuracies": 1.0, "rewards/chosen": -1.9415847418713383e-05, "rewards/margins": 0.28016021847724915, "rewards/rejected": -0.28017961978912354, "step": 8030 }, { "epoch": 5.553941908713693, "grad_norm": 6.946387767791748, "learning_rate": 2.4700322729368373e-05, "log_odds_chosen": 11.587224960327148, "log_odds_ratio": -2.163035787816625e-05, "logits/chosen": -0.49010270833969116, "logits/rejected": -0.4291403591632843, "logps/chosen": -0.0002493963693268597, "logps/rejected": -3.0701684951782227, "loss": 0.7018, "nll_loss": 0.1754557192325592, "rewards/accuracies": 1.0, "rewards/chosen": -2.493963620509021e-05, "rewards/margins": 0.30699190497398376, "rewards/rejected": -0.30701684951782227, "step": 8031 }, { "epoch": 5.55463347164592, "grad_norm": 7.238589763641357, "learning_rate": 2.4696480713078222e-05, "log_odds_chosen": 9.71479606628418, "log_odds_ratio": -0.0008989711641333997, "logits/chosen": -0.19155974686145782, "logits/rejected": -0.2716420292854309, "logps/chosen": -0.0017191801453009248, "logps/rejected": -1.540884017944336, "loss": 0.6655, "nll_loss": 0.16629430651664734, "rewards/accuracies": 1.0, "rewards/chosen": -0.00017191801453009248, "rewards/margins": 0.15391649305820465, "rewards/rejected": -0.15408840775489807, "step": 8032 }, { "epoch": 5.555325034578146, "grad_norm": 9.536120414733887, "learning_rate": 2.4692638696788075e-05, "log_odds_chosen": 10.171594619750977, "log_odds_ratio": -0.0008030205499380827, "logits/chosen": -0.537517786026001, "logits/rejected": -0.603646993637085, "logps/chosen": -0.0011483978014439344, "logps/rejected": -2.086796998977661, "loss": 1.2286, "nll_loss": 0.30706262588500977, "rewards/accuracies": 1.0, "rewards/chosen": -0.00011483977141324431, "rewards/margins": 0.2085648626089096, "rewards/rejected": -0.2086797058582306, "step": 8033 }, { "epoch": 5.556016597510373, "grad_norm": 8.908292770385742, "learning_rate": 2.4688796680497927e-05, "log_odds_chosen": 8.988523483276367, "log_odds_ratio": -0.00041150639299303293, "logits/chosen": -0.355111300945282, "logits/rejected": -0.3641355633735657, "logps/chosen": -0.0014900579117238522, "logps/rejected": -1.5867036581039429, "loss": 0.6441, "nll_loss": 0.16098034381866455, "rewards/accuracies": 1.0, "rewards/chosen": -0.00014900577662047, "rewards/margins": 0.1585213541984558, "rewards/rejected": -0.1586703658103943, "step": 8034 }, { "epoch": 5.5567081604426, "grad_norm": 6.447203636169434, "learning_rate": 2.4684954664207776e-05, "log_odds_chosen": 10.461267471313477, "log_odds_ratio": -4.435352821019478e-05, "logits/chosen": -0.19314274191856384, "logits/rejected": -0.2793979048728943, "logps/chosen": -0.0004317377461120486, "logps/rejected": -2.267042875289917, "loss": 0.753, "nll_loss": 0.18823593854904175, "rewards/accuracies": 1.0, "rewards/chosen": -4.317377170082182e-05, "rewards/margins": 0.22666111588478088, "rewards/rejected": -0.22670429944992065, "step": 8035 }, { "epoch": 5.557399723374827, "grad_norm": 9.434931755065918, "learning_rate": 2.468111264791763e-05, "log_odds_chosen": 10.66131591796875, "log_odds_ratio": -0.00020637440320570022, "logits/chosen": 0.12125441431999207, "logits/rejected": 0.10425134003162384, "logps/chosen": -0.0005198074504733086, "logps/rejected": -2.5221619606018066, "loss": 0.7512, "nll_loss": 0.18778873980045319, "rewards/accuracies": 1.0, "rewards/chosen": -5.198074723011814e-05, "rewards/margins": 0.25216418504714966, "rewards/rejected": -0.2522161900997162, "step": 8036 }, { "epoch": 5.558091286307054, "grad_norm": 7.9632768630981445, "learning_rate": 2.467727063162748e-05, "log_odds_chosen": 10.371261596679688, "log_odds_ratio": -0.00016782450256869197, "logits/chosen": -0.43128275871276855, "logits/rejected": -0.4339713752269745, "logps/chosen": -0.00034248243900947273, "logps/rejected": -1.950977087020874, "loss": 0.7265, "nll_loss": 0.1816154569387436, "rewards/accuracies": 1.0, "rewards/chosen": -3.4248245356138796e-05, "rewards/margins": 0.19506347179412842, "rewards/rejected": -0.19509771466255188, "step": 8037 }, { "epoch": 5.558782849239281, "grad_norm": 12.128067016601562, "learning_rate": 2.467342861533733e-05, "log_odds_chosen": 11.665319442749023, "log_odds_ratio": -1.4122078027867246e-05, "logits/chosen": -0.3573598861694336, "logits/rejected": -0.5253264904022217, "logps/chosen": -0.00014026931603439152, "logps/rejected": -2.598935127258301, "loss": 0.7445, "nll_loss": 0.18612857162952423, "rewards/accuracies": 1.0, "rewards/chosen": -1.4026931239641272e-05, "rewards/margins": 0.2598794996738434, "rewards/rejected": -0.2598935067653656, "step": 8038 }, { "epoch": 5.559474412171507, "grad_norm": 4.508486747741699, "learning_rate": 2.4669586599047183e-05, "log_odds_chosen": 10.604143142700195, "log_odds_ratio": -0.00021092304086778313, "logits/chosen": -0.2617264688014984, "logits/rejected": -0.3032621145248413, "logps/chosen": -0.00018225214444100857, "logps/rejected": -2.072490692138672, "loss": 0.8293, "nll_loss": 0.2073136270046234, "rewards/accuracies": 1.0, "rewards/chosen": -1.8225213352707215e-05, "rewards/margins": 0.2072308361530304, "rewards/rejected": -0.20724907517433167, "step": 8039 }, { "epoch": 5.560165975103734, "grad_norm": 7.761422634124756, "learning_rate": 2.466574458275703e-05, "log_odds_chosen": 10.363710403442383, "log_odds_ratio": -0.00037738552782684565, "logits/chosen": -0.173141211271286, "logits/rejected": -0.13690513372421265, "logps/chosen": -0.0004011451092083007, "logps/rejected": -2.5756120681762695, "loss": 0.827, "nll_loss": 0.20670370757579803, "rewards/accuracies": 1.0, "rewards/chosen": -4.011451164842583e-05, "rewards/margins": 0.2575211226940155, "rewards/rejected": -0.25756123661994934, "step": 8040 }, { "epoch": 5.560857538035961, "grad_norm": 8.741007804870605, "learning_rate": 2.466190256646688e-05, "log_odds_chosen": 10.194473266601562, "log_odds_ratio": -0.0012418972328305244, "logits/chosen": -0.004908490926027298, "logits/rejected": -0.020960234105587006, "logps/chosen": -0.0012074820697307587, "logps/rejected": -2.2647933959960938, "loss": 1.0754, "nll_loss": 0.26873573660850525, "rewards/accuracies": 1.0, "rewards/chosen": -0.00012074820551788434, "rewards/margins": 0.2263585925102234, "rewards/rejected": -0.22647933661937714, "step": 8041 }, { "epoch": 5.561549100968188, "grad_norm": 5.996640682220459, "learning_rate": 2.4658060550176733e-05, "log_odds_chosen": 9.534639358520508, "log_odds_ratio": -0.0032207153271883726, "logits/chosen": -0.05922443047165871, "logits/rejected": -0.16529792547225952, "logps/chosen": -0.0024670644197613, "logps/rejected": -2.0751233100891113, "loss": 0.9233, "nll_loss": 0.23050865530967712, "rewards/accuracies": 1.0, "rewards/chosen": -0.00024670641869306564, "rewards/margins": 0.20726561546325684, "rewards/rejected": -0.20751231908798218, "step": 8042 }, { "epoch": 5.562240663900415, "grad_norm": 7.457037448883057, "learning_rate": 2.4654218533886586e-05, "log_odds_chosen": 10.855881690979004, "log_odds_ratio": -3.0194203645805828e-05, "logits/chosen": -0.41566288471221924, "logits/rejected": -0.40151041746139526, "logps/chosen": -0.00016004889039322734, "logps/rejected": -1.8453633785247803, "loss": 0.6157, "nll_loss": 0.15391522645950317, "rewards/accuracies": 1.0, "rewards/chosen": -1.600488758413121e-05, "rewards/margins": 0.18452034890651703, "rewards/rejected": -0.18453636765480042, "step": 8043 }, { "epoch": 5.5629322268326415, "grad_norm": 7.482526779174805, "learning_rate": 2.4650376517596435e-05, "log_odds_chosen": 10.949602127075195, "log_odds_ratio": -3.0145914934109896e-05, "logits/chosen": -0.474301278591156, "logits/rejected": -0.4571562111377716, "logps/chosen": -0.0003064905758947134, "logps/rejected": -2.0776028633117676, "loss": 1.0127, "nll_loss": 0.2531779408454895, "rewards/accuracies": 1.0, "rewards/chosen": -3.064906195504591e-05, "rewards/margins": 0.20772963762283325, "rewards/rejected": -0.2077602744102478, "step": 8044 }, { "epoch": 5.563623789764868, "grad_norm": 8.855326652526855, "learning_rate": 2.4646534501306287e-05, "log_odds_chosen": 11.166003227233887, "log_odds_ratio": -2.6106763471034355e-05, "logits/chosen": -0.6545218229293823, "logits/rejected": -0.6752309799194336, "logps/chosen": -0.00016588937432970852, "logps/rejected": -2.249631643295288, "loss": 0.5967, "nll_loss": 0.1491810381412506, "rewards/accuracies": 1.0, "rewards/chosen": -1.6588937796768732e-05, "rewards/margins": 0.22494655847549438, "rewards/rejected": -0.22496315836906433, "step": 8045 }, { "epoch": 5.564315352697095, "grad_norm": 14.56978988647461, "learning_rate": 2.464269248501614e-05, "log_odds_chosen": 10.544530868530273, "log_odds_ratio": -0.000425957259722054, "logits/chosen": -0.20443707704544067, "logits/rejected": -0.1254967600107193, "logps/chosen": -0.00042200577445328236, "logps/rejected": -2.852787494659424, "loss": 1.0257, "nll_loss": 0.2563808560371399, "rewards/accuracies": 1.0, "rewards/chosen": -4.220057962811552e-05, "rewards/margins": 0.2852365970611572, "rewards/rejected": -0.2852787673473358, "step": 8046 }, { "epoch": 5.565006915629322, "grad_norm": 10.46658706665039, "learning_rate": 2.463885046872599e-05, "log_odds_chosen": 10.141487121582031, "log_odds_ratio": -0.0002209717349614948, "logits/chosen": -0.49858924746513367, "logits/rejected": -0.6654733419418335, "logps/chosen": -0.0006808140315115452, "logps/rejected": -2.273078680038452, "loss": 0.6522, "nll_loss": 0.16302204132080078, "rewards/accuracies": 1.0, "rewards/chosen": -6.808139733038843e-05, "rewards/margins": 0.22723978757858276, "rewards/rejected": -0.22730787098407745, "step": 8047 }, { "epoch": 5.565698478561549, "grad_norm": 6.561436653137207, "learning_rate": 2.463500845243584e-05, "log_odds_chosen": 10.76079273223877, "log_odds_ratio": -4.994025221094489e-05, "logits/chosen": -0.3768499493598938, "logits/rejected": -0.5194529294967651, "logps/chosen": -0.00028103572549298406, "logps/rejected": -2.0566999912261963, "loss": 0.6791, "nll_loss": 0.16976627707481384, "rewards/accuracies": 1.0, "rewards/chosen": -2.8103571821702644e-05, "rewards/margins": 0.20564191043376923, "rewards/rejected": -0.20566999912261963, "step": 8048 }, { "epoch": 5.566390041493776, "grad_norm": 6.7705488204956055, "learning_rate": 2.463116643614569e-05, "log_odds_chosen": 9.857308387756348, "log_odds_ratio": -0.0005722575588151813, "logits/chosen": -0.5434191823005676, "logits/rejected": -0.5727492570877075, "logps/chosen": -0.0007818934391252697, "logps/rejected": -1.9093397855758667, "loss": 0.8424, "nll_loss": 0.21055403351783752, "rewards/accuracies": 1.0, "rewards/chosen": -7.818934682291001e-05, "rewards/margins": 0.19085580110549927, "rewards/rejected": -0.1909339725971222, "step": 8049 }, { "epoch": 5.5670816044260025, "grad_norm": 9.348870277404785, "learning_rate": 2.4627324419855543e-05, "log_odds_chosen": 10.814437866210938, "log_odds_ratio": -9.064251207746565e-05, "logits/chosen": -0.3218635320663452, "logits/rejected": -0.3573150038719177, "logps/chosen": -0.0009795472724363208, "logps/rejected": -2.4565610885620117, "loss": 0.6158, "nll_loss": 0.15393370389938354, "rewards/accuracies": 1.0, "rewards/chosen": -9.795472578844056e-05, "rewards/margins": 0.24555814266204834, "rewards/rejected": -0.2456560879945755, "step": 8050 }, { "epoch": 5.567773167358229, "grad_norm": 10.63403034210205, "learning_rate": 2.462348240356539e-05, "log_odds_chosen": 10.409517288208008, "log_odds_ratio": -0.0005241333856247365, "logits/chosen": -0.7085733413696289, "logits/rejected": -0.8216226100921631, "logps/chosen": -0.0009803158463910222, "logps/rejected": -2.1244659423828125, "loss": 0.9471, "nll_loss": 0.2367316335439682, "rewards/accuracies": 1.0, "rewards/chosen": -9.803157445276156e-05, "rewards/margins": 0.2123485803604126, "rewards/rejected": -0.21244660019874573, "step": 8051 }, { "epoch": 5.568464730290456, "grad_norm": 10.331066131591797, "learning_rate": 2.4619640387275244e-05, "log_odds_chosen": 10.168068885803223, "log_odds_ratio": -0.0004346623900346458, "logits/chosen": -0.7611827850341797, "logits/rejected": -0.6889389157295227, "logps/chosen": -0.0005256303120404482, "logps/rejected": -1.8769570589065552, "loss": 1.1829, "nll_loss": 0.29568618535995483, "rewards/accuracies": 1.0, "rewards/chosen": -5.256303120404482e-05, "rewards/margins": 0.1876431405544281, "rewards/rejected": -0.1876957267522812, "step": 8052 }, { "epoch": 5.569156293222683, "grad_norm": 6.900964736938477, "learning_rate": 2.4615798370985093e-05, "log_odds_chosen": 9.281791687011719, "log_odds_ratio": -0.007029072381556034, "logits/chosen": -0.28015413880348206, "logits/rejected": -0.38101398944854736, "logps/chosen": -0.0018885629251599312, "logps/rejected": -1.5742026567459106, "loss": 0.5586, "nll_loss": 0.13894297182559967, "rewards/accuracies": 1.0, "rewards/chosen": -0.0001888562983367592, "rewards/margins": 0.1572314202785492, "rewards/rejected": -0.15742027759552002, "step": 8053 }, { "epoch": 5.56984785615491, "grad_norm": 7.115659713745117, "learning_rate": 2.4611956354694945e-05, "log_odds_chosen": 11.122929573059082, "log_odds_ratio": -5.1810442528221756e-05, "logits/chosen": -0.014425862580537796, "logits/rejected": -0.17043359577655792, "logps/chosen": -0.000720279582310468, "logps/rejected": -2.988020420074463, "loss": 0.7804, "nll_loss": 0.195082426071167, "rewards/accuracies": 1.0, "rewards/chosen": -7.202795677585527e-05, "rewards/margins": 0.2987300157546997, "rewards/rejected": -0.29880204796791077, "step": 8054 }, { "epoch": 5.570539419087137, "grad_norm": 7.839625358581543, "learning_rate": 2.4608114338404795e-05, "log_odds_chosen": 8.786584854125977, "log_odds_ratio": -0.0005181143060326576, "logits/chosen": 0.19618989527225494, "logits/rejected": 0.024336382746696472, "logps/chosen": -0.001151248230598867, "logps/rejected": -1.499411702156067, "loss": 0.6895, "nll_loss": 0.17232772707939148, "rewards/accuracies": 1.0, "rewards/chosen": -0.00011512481432873756, "rewards/margins": 0.1498260498046875, "rewards/rejected": -0.14994117617607117, "step": 8055 }, { "epoch": 5.5712309820193635, "grad_norm": 6.1973772048950195, "learning_rate": 2.4604272322114647e-05, "log_odds_chosen": 11.382923126220703, "log_odds_ratio": -2.2351225197780877e-05, "logits/chosen": -0.7432894110679626, "logits/rejected": -0.838476300239563, "logps/chosen": -0.00017028761794790626, "logps/rejected": -2.575474977493286, "loss": 0.6824, "nll_loss": 0.17060251533985138, "rewards/accuracies": 1.0, "rewards/chosen": -1.702876397757791e-05, "rewards/margins": 0.25753045082092285, "rewards/rejected": -0.2575474977493286, "step": 8056 }, { "epoch": 5.57192254495159, "grad_norm": 7.494674205780029, "learning_rate": 2.46004303058245e-05, "log_odds_chosen": 10.927709579467773, "log_odds_ratio": -3.457109414739534e-05, "logits/chosen": -0.8123071193695068, "logits/rejected": -0.8477652072906494, "logps/chosen": -0.00015166602679528296, "logps/rejected": -2.0031211376190186, "loss": 0.4584, "nll_loss": 0.11458532512187958, "rewards/accuracies": 1.0, "rewards/chosen": -1.51666044985177e-05, "rewards/margins": 0.20029696822166443, "rewards/rejected": -0.20031213760375977, "step": 8057 }, { "epoch": 5.572614107883817, "grad_norm": 9.052752494812012, "learning_rate": 2.459658828953435e-05, "log_odds_chosen": 11.031610488891602, "log_odds_ratio": -2.8391477826517075e-05, "logits/chosen": -0.11535578966140747, "logits/rejected": -0.1948067545890808, "logps/chosen": -0.00014932632620912045, "logps/rejected": -2.1253697872161865, "loss": 0.932, "nll_loss": 0.23300620913505554, "rewards/accuracies": 1.0, "rewards/chosen": -1.4932633348507807e-05, "rewards/margins": 0.21252202987670898, "rewards/rejected": -0.21253696084022522, "step": 8058 }, { "epoch": 5.573305670816044, "grad_norm": 8.160527229309082, "learning_rate": 2.45927462732442e-05, "log_odds_chosen": 10.030344009399414, "log_odds_ratio": -0.0001031822175718844, "logits/chosen": -0.7391003370285034, "logits/rejected": -0.8036350607872009, "logps/chosen": -0.00034203121322207153, "logps/rejected": -1.551897644996643, "loss": 0.6604, "nll_loss": 0.1650814414024353, "rewards/accuracies": 1.0, "rewards/chosen": -3.420311986701563e-05, "rewards/margins": 0.15515556931495667, "rewards/rejected": -0.15518975257873535, "step": 8059 }, { "epoch": 5.573997233748271, "grad_norm": 9.76441764831543, "learning_rate": 2.458890425695405e-05, "log_odds_chosen": 10.707071304321289, "log_odds_ratio": -7.481992361135781e-05, "logits/chosen": -0.37856489419937134, "logits/rejected": -0.5111147165298462, "logps/chosen": -0.0011090300977230072, "logps/rejected": -2.6076529026031494, "loss": 1.0368, "nll_loss": 0.25918781757354736, "rewards/accuracies": 1.0, "rewards/chosen": -0.00011090300540672615, "rewards/margins": 0.26065438985824585, "rewards/rejected": -0.26076528429985046, "step": 8060 }, { "epoch": 5.574688796680498, "grad_norm": 7.49188232421875, "learning_rate": 2.45850622406639e-05, "log_odds_chosen": 9.97515869140625, "log_odds_ratio": -0.0002881538530346006, "logits/chosen": -0.33292925357818604, "logits/rejected": -0.47597840428352356, "logps/chosen": -0.0007897530449554324, "logps/rejected": -2.464226722717285, "loss": 0.7122, "nll_loss": 0.1780133843421936, "rewards/accuracies": 1.0, "rewards/chosen": -7.89752957643941e-05, "rewards/margins": 0.2463436722755432, "rewards/rejected": -0.2464226335287094, "step": 8061 }, { "epoch": 5.5753803596127245, "grad_norm": 10.136432647705078, "learning_rate": 2.458122022437375e-05, "log_odds_chosen": 10.503422737121582, "log_odds_ratio": -7.251178612932563e-05, "logits/chosen": -0.33820295333862305, "logits/rejected": -0.2976325750350952, "logps/chosen": -0.0005803690291941166, "logps/rejected": -2.2288050651550293, "loss": 0.6395, "nll_loss": 0.15985599160194397, "rewards/accuracies": 1.0, "rewards/chosen": -5.8036905102198943e-05, "rewards/margins": 0.22282248735427856, "rewards/rejected": -0.2228805124759674, "step": 8062 }, { "epoch": 5.576071922544951, "grad_norm": 20.0115966796875, "learning_rate": 2.4577378208083604e-05, "log_odds_chosen": 10.732137680053711, "log_odds_ratio": -7.273490336956456e-05, "logits/chosen": -0.15554040670394897, "logits/rejected": -0.23485851287841797, "logps/chosen": -0.0004803851479664445, "logps/rejected": -2.2087879180908203, "loss": 1.1601, "nll_loss": 0.29002463817596436, "rewards/accuracies": 1.0, "rewards/chosen": -4.803850970347412e-05, "rewards/margins": 0.2208307683467865, "rewards/rejected": -0.22087879478931427, "step": 8063 }, { "epoch": 5.576763485477178, "grad_norm": 6.817218780517578, "learning_rate": 2.4573536191793453e-05, "log_odds_chosen": 11.50283145904541, "log_odds_ratio": -1.474907730880659e-05, "logits/chosen": -0.3653714954853058, "logits/rejected": -0.3387664258480072, "logps/chosen": -0.00011848987196572125, "logps/rejected": -2.3012490272521973, "loss": 0.5957, "nll_loss": 0.14892232418060303, "rewards/accuracies": 1.0, "rewards/chosen": -1.1848987014673185e-05, "rewards/margins": 0.23011307418346405, "rewards/rejected": -0.23012492060661316, "step": 8064 }, { "epoch": 5.577455048409405, "grad_norm": 8.593660354614258, "learning_rate": 2.4569694175503305e-05, "log_odds_chosen": 10.678092002868652, "log_odds_ratio": -0.00027599811437539756, "logits/chosen": -0.37883827090263367, "logits/rejected": -0.4286655783653259, "logps/chosen": -0.00045390904415398836, "logps/rejected": -2.3745663166046143, "loss": 0.8728, "nll_loss": 0.2181757539510727, "rewards/accuracies": 1.0, "rewards/chosen": -4.53909051429946e-05, "rewards/margins": 0.2374112457036972, "rewards/rejected": -0.23745664954185486, "step": 8065 }, { "epoch": 5.578146611341632, "grad_norm": 9.433917999267578, "learning_rate": 2.4565852159213158e-05, "log_odds_chosen": 11.234939575195312, "log_odds_ratio": -0.00016438095190096647, "logits/chosen": -0.3239496946334839, "logits/rejected": -0.45599889755249023, "logps/chosen": -0.0002022422559093684, "logps/rejected": -2.838594913482666, "loss": 1.1254, "nll_loss": 0.28134027123451233, "rewards/accuracies": 1.0, "rewards/chosen": -2.0224228137522005e-05, "rewards/margins": 0.28383928537368774, "rewards/rejected": -0.283859521150589, "step": 8066 }, { "epoch": 5.578838174273859, "grad_norm": 5.817116737365723, "learning_rate": 2.4562010142923007e-05, "log_odds_chosen": 11.082422256469727, "log_odds_ratio": -0.0002813279570546001, "logits/chosen": -0.47998160123825073, "logits/rejected": -0.5466978549957275, "logps/chosen": -0.001538085751235485, "logps/rejected": -3.1759636402130127, "loss": 0.9083, "nll_loss": 0.22704046964645386, "rewards/accuracies": 1.0, "rewards/chosen": -0.00015380859258584678, "rewards/margins": 0.3174425959587097, "rewards/rejected": -0.3175963759422302, "step": 8067 }, { "epoch": 5.5795297372060855, "grad_norm": 11.252291679382324, "learning_rate": 2.455816812663286e-05, "log_odds_chosen": 11.147133827209473, "log_odds_ratio": -4.1483330278424546e-05, "logits/chosen": -0.722510814666748, "logits/rejected": -0.667180597782135, "logps/chosen": -0.0001893240405479446, "logps/rejected": -2.4593329429626465, "loss": 1.4768, "nll_loss": 0.3691995143890381, "rewards/accuracies": 1.0, "rewards/chosen": -1.893240369099658e-05, "rewards/margins": 0.24591434001922607, "rewards/rejected": -0.24593327939510345, "step": 8068 }, { "epoch": 5.580221300138312, "grad_norm": 6.8589582443237305, "learning_rate": 2.455432611034271e-05, "log_odds_chosen": 9.747818946838379, "log_odds_ratio": -0.0002661866310518235, "logits/chosen": -0.017276108264923096, "logits/rejected": -0.12848007678985596, "logps/chosen": -0.0009187893010675907, "logps/rejected": -1.8754096031188965, "loss": 0.768, "nll_loss": 0.191975936293602, "rewards/accuracies": 1.0, "rewards/chosen": -9.187893010675907e-05, "rewards/margins": 0.18744908273220062, "rewards/rejected": -0.1875409632921219, "step": 8069 }, { "epoch": 5.580912863070539, "grad_norm": 5.451083660125732, "learning_rate": 2.4550484094052557e-05, "log_odds_chosen": 9.641411781311035, "log_odds_ratio": -0.000142205273732543, "logits/chosen": -0.6447650790214539, "logits/rejected": -0.639355480670929, "logps/chosen": -0.0003977498272433877, "logps/rejected": -1.4326138496398926, "loss": 0.6992, "nll_loss": 0.17477968335151672, "rewards/accuracies": 1.0, "rewards/chosen": -3.977498272433877e-05, "rewards/margins": 0.14322161674499512, "rewards/rejected": -0.1432614028453827, "step": 8070 }, { "epoch": 5.581604426002766, "grad_norm": 11.98173999786377, "learning_rate": 2.454664207776241e-05, "log_odds_chosen": 8.304815292358398, "log_odds_ratio": -0.3241727948188782, "logits/chosen": -0.649227499961853, "logits/rejected": -0.7476930618286133, "logps/chosen": -0.05667625367641449, "logps/rejected": -1.2627215385437012, "loss": 0.9664, "nll_loss": 0.20918306708335876, "rewards/accuracies": 0.875, "rewards/chosen": -0.005667625926434994, "rewards/margins": 0.12060452997684479, "rewards/rejected": -0.12627214193344116, "step": 8071 }, { "epoch": 5.582295988934993, "grad_norm": 6.674464225769043, "learning_rate": 2.4542800061472262e-05, "log_odds_chosen": 11.908063888549805, "log_odds_ratio": -1.3823148037772626e-05, "logits/chosen": -0.11755906790494919, "logits/rejected": -0.20479317009449005, "logps/chosen": -0.00016248153406195343, "logps/rejected": -3.069481611251831, "loss": 0.8086, "nll_loss": 0.2021464854478836, "rewards/accuracies": 1.0, "rewards/chosen": -1.6248153769993223e-05, "rewards/margins": 0.30693191289901733, "rewards/rejected": -0.306948184967041, "step": 8072 }, { "epoch": 5.58298755186722, "grad_norm": 9.36412239074707, "learning_rate": 2.453895804518211e-05, "log_odds_chosen": 11.2120361328125, "log_odds_ratio": -2.773918276943732e-05, "logits/chosen": -0.6709299087524414, "logits/rejected": -0.7922207713127136, "logps/chosen": -0.00016998070350382477, "logps/rejected": -2.340764045715332, "loss": 0.6088, "nll_loss": 0.15218935906887054, "rewards/accuracies": 1.0, "rewards/chosen": -1.6998070350382477e-05, "rewards/margins": 0.2340594232082367, "rewards/rejected": -0.23407642543315887, "step": 8073 }, { "epoch": 5.5836791147994465, "grad_norm": 5.475508689880371, "learning_rate": 2.4535116028891964e-05, "log_odds_chosen": 11.978597640991211, "log_odds_ratio": -6.813578238507034e-06, "logits/chosen": -0.49684345722198486, "logits/rejected": -0.4248015284538269, "logps/chosen": -9.586186206433922e-05, "logps/rejected": -2.6206562519073486, "loss": 0.4513, "nll_loss": 0.11282727867364883, "rewards/accuracies": 1.0, "rewards/chosen": -9.586186934029683e-06, "rewards/margins": 0.26205605268478394, "rewards/rejected": -0.2620656192302704, "step": 8074 }, { "epoch": 5.584370677731673, "grad_norm": 10.361490249633789, "learning_rate": 2.4531274012601816e-05, "log_odds_chosen": 9.624015808105469, "log_odds_ratio": -0.00036905109300278127, "logits/chosen": -0.8618177175521851, "logits/rejected": -0.8672319054603577, "logps/chosen": -0.0006858786218799651, "logps/rejected": -2.188358783721924, "loss": 1.0405, "nll_loss": 0.26007890701293945, "rewards/accuracies": 1.0, "rewards/chosen": -6.858785491203889e-05, "rewards/margins": 0.21876728534698486, "rewards/rejected": -0.21883587539196014, "step": 8075 }, { "epoch": 5.5850622406639, "grad_norm": 7.7408246994018555, "learning_rate": 2.4527431996311665e-05, "log_odds_chosen": 10.29294204711914, "log_odds_ratio": -0.00012540553871076554, "logits/chosen": -0.3908573389053345, "logits/rejected": -0.5217314958572388, "logps/chosen": -0.001282507088035345, "logps/rejected": -1.9427863359451294, "loss": 0.537, "nll_loss": 0.134227454662323, "rewards/accuracies": 1.0, "rewards/chosen": -0.0001282507146243006, "rewards/margins": 0.1941503882408142, "rewards/rejected": -0.19427862763404846, "step": 8076 }, { "epoch": 5.585753803596127, "grad_norm": 11.349954605102539, "learning_rate": 2.4523589980021518e-05, "log_odds_chosen": 11.313045501708984, "log_odds_ratio": -1.8136362996301614e-05, "logits/chosen": -0.5860576033592224, "logits/rejected": -0.5836232304573059, "logps/chosen": -0.00015536148566752672, "logps/rejected": -2.554374933242798, "loss": 0.7355, "nll_loss": 0.18388128280639648, "rewards/accuracies": 1.0, "rewards/chosen": -1.5536148566752672e-05, "rewards/margins": 0.2554219365119934, "rewards/rejected": -0.2554374933242798, "step": 8077 }, { "epoch": 5.586445366528354, "grad_norm": 9.04966926574707, "learning_rate": 2.4519747963731367e-05, "log_odds_chosen": 10.225482940673828, "log_odds_ratio": -0.00024371693143621087, "logits/chosen": -0.7126716375350952, "logits/rejected": -0.6592588424682617, "logps/chosen": -0.0001922544906847179, "logps/rejected": -1.6873338222503662, "loss": 1.5855, "nll_loss": 0.3963471055030823, "rewards/accuracies": 1.0, "rewards/chosen": -1.9225448340876028e-05, "rewards/margins": 0.16871415078639984, "rewards/rejected": -0.1687333732843399, "step": 8078 }, { "epoch": 5.587136929460581, "grad_norm": 9.121757507324219, "learning_rate": 2.4515905947441216e-05, "log_odds_chosen": 10.379926681518555, "log_odds_ratio": -0.0003530043177306652, "logits/chosen": -0.5633220076560974, "logits/rejected": -0.5946686267852783, "logps/chosen": -0.0005274852155707777, "logps/rejected": -1.8887830972671509, "loss": 0.6953, "nll_loss": 0.17379090189933777, "rewards/accuracies": 1.0, "rewards/chosen": -5.274852446746081e-05, "rewards/margins": 0.1888255774974823, "rewards/rejected": -0.18887831270694733, "step": 8079 }, { "epoch": 5.587828492392807, "grad_norm": 25.334081649780273, "learning_rate": 2.451206393115107e-05, "log_odds_chosen": 9.471872329711914, "log_odds_ratio": -0.03043685294687748, "logits/chosen": -0.5243476033210754, "logits/rejected": -0.5439872741699219, "logps/chosen": -0.0027094304095953703, "logps/rejected": -1.7234371900558472, "loss": 0.6609, "nll_loss": 0.16217103600502014, "rewards/accuracies": 1.0, "rewards/chosen": -0.00027094304095953703, "rewards/margins": 0.17207276821136475, "rewards/rejected": -0.17234373092651367, "step": 8080 }, { "epoch": 5.588520055325034, "grad_norm": 19.014925003051758, "learning_rate": 2.450822191486092e-05, "log_odds_chosen": 10.891995429992676, "log_odds_ratio": -0.00017133046640083194, "logits/chosen": -0.5247626900672913, "logits/rejected": -0.5682399868965149, "logps/chosen": -0.0002994315873365849, "logps/rejected": -1.8587620258331299, "loss": 0.8665, "nll_loss": 0.21661463379859924, "rewards/accuracies": 1.0, "rewards/chosen": -2.9943159461254254e-05, "rewards/margins": 0.1858462542295456, "rewards/rejected": -0.18587620556354523, "step": 8081 }, { "epoch": 5.589211618257261, "grad_norm": 19.9434757232666, "learning_rate": 2.450437989857077e-05, "log_odds_chosen": 8.52181339263916, "log_odds_ratio": -0.08861760050058365, "logits/chosen": -0.6639509201049805, "logits/rejected": -0.7798012495040894, "logps/chosen": -0.01545221172273159, "logps/rejected": -1.6898785829544067, "loss": 1.7577, "nll_loss": 0.4305575489997864, "rewards/accuracies": 0.875, "rewards/chosen": -0.0015452211955562234, "rewards/margins": 0.16744264960289001, "rewards/rejected": -0.16898785531520844, "step": 8082 }, { "epoch": 5.589903181189488, "grad_norm": 6.002845287322998, "learning_rate": 2.4500537882280622e-05, "log_odds_chosen": 9.620186805725098, "log_odds_ratio": -0.00030638324096798897, "logits/chosen": -0.7766826748847961, "logits/rejected": -0.7715072631835938, "logps/chosen": -0.00021891409414820373, "logps/rejected": -1.4006415605545044, "loss": 0.7964, "nll_loss": 0.1990627646446228, "rewards/accuracies": 1.0, "rewards/chosen": -2.1891410142416134e-05, "rewards/margins": 0.1400422751903534, "rewards/rejected": -0.14006415009498596, "step": 8083 }, { "epoch": 5.590594744121715, "grad_norm": 6.572117805480957, "learning_rate": 2.4496695865990475e-05, "log_odds_chosen": 9.993795394897461, "log_odds_ratio": -0.0008166706538759172, "logits/chosen": -0.8760631084442139, "logits/rejected": -0.8676169514656067, "logps/chosen": -0.005875871051102877, "logps/rejected": -2.340808629989624, "loss": 0.7076, "nll_loss": 0.1768127679824829, "rewards/accuracies": 1.0, "rewards/chosen": -0.0005875870701856911, "rewards/margins": 0.23349328339099884, "rewards/rejected": -0.23408088088035583, "step": 8084 }, { "epoch": 5.591286307053942, "grad_norm": 4.220310688018799, "learning_rate": 2.4492853849700324e-05, "log_odds_chosen": 9.948683738708496, "log_odds_ratio": -0.00011734232248272747, "logits/chosen": -0.276368111371994, "logits/rejected": -0.278658002614975, "logps/chosen": -0.0008274052524939179, "logps/rejected": -2.1559712886810303, "loss": 0.6354, "nll_loss": 0.1588326096534729, "rewards/accuracies": 1.0, "rewards/chosen": -8.274052379420027e-05, "rewards/margins": 0.2155143916606903, "rewards/rejected": -0.21559712290763855, "step": 8085 }, { "epoch": 5.591977869986168, "grad_norm": 8.20447063446045, "learning_rate": 2.4489011833410176e-05, "log_odds_chosen": 10.30103874206543, "log_odds_ratio": -0.00018550232925917953, "logits/chosen": -0.1794845461845398, "logits/rejected": -0.23964820802211761, "logps/chosen": -0.0002999446587637067, "logps/rejected": -1.9575010538101196, "loss": 0.668, "nll_loss": 0.1669834852218628, "rewards/accuracies": 1.0, "rewards/chosen": -2.9994469514349476e-05, "rewards/margins": 0.1957201063632965, "rewards/rejected": -0.19575008749961853, "step": 8086 }, { "epoch": 5.592669432918395, "grad_norm": 6.297646522521973, "learning_rate": 2.4485169817120025e-05, "log_odds_chosen": 9.927389144897461, "log_odds_ratio": -0.0001191746341646649, "logits/chosen": -0.42461729049682617, "logits/rejected": -0.46698299050331116, "logps/chosen": -0.000342499086400494, "logps/rejected": -1.638321876525879, "loss": 0.7794, "nll_loss": 0.19483135640621185, "rewards/accuracies": 1.0, "rewards/chosen": -3.4249907912453637e-05, "rewards/margins": 0.16379794478416443, "rewards/rejected": -0.1638321876525879, "step": 8087 }, { "epoch": 5.593360995850622, "grad_norm": 6.690411567687988, "learning_rate": 2.4481327800829874e-05, "log_odds_chosen": 10.245779991149902, "log_odds_ratio": -0.00010225032747257501, "logits/chosen": -0.5880808234214783, "logits/rejected": -0.6006646156311035, "logps/chosen": -0.00046540662879124284, "logps/rejected": -2.1198089122772217, "loss": 0.7397, "nll_loss": 0.18490742146968842, "rewards/accuracies": 1.0, "rewards/chosen": -4.6540662879124284e-05, "rewards/margins": 0.21193435788154602, "rewards/rejected": -0.2119809091091156, "step": 8088 }, { "epoch": 5.594052558782849, "grad_norm": 4.878581523895264, "learning_rate": 2.4477485784539727e-05, "log_odds_chosen": 10.35411548614502, "log_odds_ratio": -4.369155431049876e-05, "logits/chosen": -0.6042921543121338, "logits/rejected": -0.6532620191574097, "logps/chosen": -0.00029760473989881575, "logps/rejected": -2.0455756187438965, "loss": 0.5802, "nll_loss": 0.14505022764205933, "rewards/accuracies": 1.0, "rewards/chosen": -2.9760474717477337e-05, "rewards/margins": 0.20452779531478882, "rewards/rejected": -0.20455756783485413, "step": 8089 }, { "epoch": 5.594744121715076, "grad_norm": 6.3988752365112305, "learning_rate": 2.447364376824958e-05, "log_odds_chosen": 10.794388771057129, "log_odds_ratio": -0.0006846464239060879, "logits/chosen": -0.2714294195175171, "logits/rejected": -0.3008936643600464, "logps/chosen": -0.000839495100080967, "logps/rejected": -2.3594367504119873, "loss": 0.7923, "nll_loss": 0.19801412522792816, "rewards/accuracies": 1.0, "rewards/chosen": -8.394951873924583e-05, "rewards/margins": 0.2358597218990326, "rewards/rejected": -0.23594367504119873, "step": 8090 }, { "epoch": 5.595435684647303, "grad_norm": 11.49247932434082, "learning_rate": 2.4469801751959428e-05, "log_odds_chosen": 9.74952507019043, "log_odds_ratio": -0.0001429698895663023, "logits/chosen": -0.4237690567970276, "logits/rejected": -0.4887728691101074, "logps/chosen": -0.000436846079537645, "logps/rejected": -1.6562985181808472, "loss": 0.6642, "nll_loss": 0.16604474186897278, "rewards/accuracies": 1.0, "rewards/chosen": -4.3684605770977214e-05, "rewards/margins": 0.1655861735343933, "rewards/rejected": -0.16562986373901367, "step": 8091 }, { "epoch": 5.596127247579529, "grad_norm": 8.297944068908691, "learning_rate": 2.446595973566928e-05, "log_odds_chosen": 9.776349067687988, "log_odds_ratio": -0.0016647065058350563, "logits/chosen": -0.1512414813041687, "logits/rejected": -0.22371408343315125, "logps/chosen": -0.0034234109334647655, "logps/rejected": -1.9482197761535645, "loss": 1.1818, "nll_loss": 0.29528945684432983, "rewards/accuracies": 1.0, "rewards/chosen": -0.00034234108170494437, "rewards/margins": 0.19447962939739227, "rewards/rejected": -0.19482198357582092, "step": 8092 }, { "epoch": 5.596818810511756, "grad_norm": 7.137149333953857, "learning_rate": 2.4462117719379133e-05, "log_odds_chosen": 10.748970031738281, "log_odds_ratio": -9.89637992461212e-05, "logits/chosen": -0.22393687069416046, "logits/rejected": -0.3681349456310272, "logps/chosen": -0.00027920937282033265, "logps/rejected": -2.4082417488098145, "loss": 0.9565, "nll_loss": 0.2391146719455719, "rewards/accuracies": 1.0, "rewards/chosen": -2.7920936190639623e-05, "rewards/margins": 0.2407962530851364, "rewards/rejected": -0.24082417786121368, "step": 8093 }, { "epoch": 5.597510373443983, "grad_norm": 10.234183311462402, "learning_rate": 2.4458275703088982e-05, "log_odds_chosen": 10.590971946716309, "log_odds_ratio": -0.00014004560944158584, "logits/chosen": -0.488928884267807, "logits/rejected": -0.5948966145515442, "logps/chosen": -0.0004322922322899103, "logps/rejected": -2.103107452392578, "loss": 0.6438, "nll_loss": 0.16093306243419647, "rewards/accuracies": 1.0, "rewards/chosen": -4.3229225411778316e-05, "rewards/margins": 0.21026751399040222, "rewards/rejected": -0.21031074225902557, "step": 8094 }, { "epoch": 5.59820193637621, "grad_norm": 5.393209457397461, "learning_rate": 2.4454433686798835e-05, "log_odds_chosen": 10.531394004821777, "log_odds_ratio": -0.00012310939200688154, "logits/chosen": -0.3937787711620331, "logits/rejected": -0.35000520944595337, "logps/chosen": -0.00021687900880351663, "logps/rejected": -2.040586471557617, "loss": 0.7389, "nll_loss": 0.18470054864883423, "rewards/accuracies": 1.0, "rewards/chosen": -2.1687901607947424e-05, "rewards/margins": 0.20403696596622467, "rewards/rejected": -0.20405864715576172, "step": 8095 }, { "epoch": 5.598893499308437, "grad_norm": 7.336994171142578, "learning_rate": 2.4450591670508684e-05, "log_odds_chosen": 10.63749885559082, "log_odds_ratio": -0.00015697650087531656, "logits/chosen": -0.22074763476848602, "logits/rejected": -0.3110903203487396, "logps/chosen": -0.000745423894841224, "logps/rejected": -2.6639904975891113, "loss": 0.8247, "nll_loss": 0.20615951716899872, "rewards/accuracies": 1.0, "rewards/chosen": -7.45423894841224e-05, "rewards/margins": 0.266324520111084, "rewards/rejected": -0.2663990557193756, "step": 8096 }, { "epoch": 5.5995850622406635, "grad_norm": 7.200364589691162, "learning_rate": 2.4446749654218533e-05, "log_odds_chosen": 10.061790466308594, "log_odds_ratio": -0.00010069488052977249, "logits/chosen": -0.04585893079638481, "logits/rejected": -0.07798229157924652, "logps/chosen": -0.00022760449792258441, "logps/rejected": -1.6503199338912964, "loss": 0.4874, "nll_loss": 0.12184731662273407, "rewards/accuracies": 1.0, "rewards/chosen": -2.2760450519854203e-05, "rewards/margins": 0.16500923037528992, "rewards/rejected": -0.16503199934959412, "step": 8097 }, { "epoch": 5.60027662517289, "grad_norm": 12.905299186706543, "learning_rate": 2.4442907637928385e-05, "log_odds_chosen": 10.574941635131836, "log_odds_ratio": -4.7053843445610255e-05, "logits/chosen": -0.5514289140701294, "logits/rejected": -0.5167350769042969, "logps/chosen": -0.0002096430107485503, "logps/rejected": -2.0648980140686035, "loss": 1.3079, "nll_loss": 0.32696911692619324, "rewards/accuracies": 1.0, "rewards/chosen": -2.0964303985238075e-05, "rewards/margins": 0.2064688503742218, "rewards/rejected": -0.20648980140686035, "step": 8098 }, { "epoch": 5.600968188105117, "grad_norm": 10.966777801513672, "learning_rate": 2.4439065621638238e-05, "log_odds_chosen": 10.464959144592285, "log_odds_ratio": -4.483927841647528e-05, "logits/chosen": -0.3929779827594757, "logits/rejected": -0.4391102194786072, "logps/chosen": -0.00016973260790109634, "logps/rejected": -1.95558500289917, "loss": 0.9996, "nll_loss": 0.2498941719532013, "rewards/accuracies": 1.0, "rewards/chosen": -1.6973261153907515e-05, "rewards/margins": 0.19554153084754944, "rewards/rejected": -0.19555850327014923, "step": 8099 }, { "epoch": 5.601659751037344, "grad_norm": 4.830338001251221, "learning_rate": 2.4435223605348087e-05, "log_odds_chosen": 9.880056381225586, "log_odds_ratio": -0.00016596855130046606, "logits/chosen": -0.3157750964164734, "logits/rejected": -0.29224520921707153, "logps/chosen": -0.0003259595832787454, "logps/rejected": -1.9344159364700317, "loss": 0.3731, "nll_loss": 0.09325173497200012, "rewards/accuracies": 1.0, "rewards/chosen": -3.259596269344911e-05, "rewards/margins": 0.1934089958667755, "rewards/rejected": -0.19344159960746765, "step": 8100 }, { "epoch": 5.602351313969571, "grad_norm": 7.5184454917907715, "learning_rate": 2.443138158905794e-05, "log_odds_chosen": 12.033928871154785, "log_odds_ratio": -7.474442099919543e-06, "logits/chosen": -0.6768447756767273, "logits/rejected": -0.6699569821357727, "logps/chosen": -0.0002758900518529117, "logps/rejected": -3.3420660495758057, "loss": 0.5762, "nll_loss": 0.1440436989068985, "rewards/accuracies": 1.0, "rewards/chosen": -2.7589003366301768e-05, "rewards/margins": 0.3341790437698364, "rewards/rejected": -0.33420661091804504, "step": 8101 }, { "epoch": 5.603042876901798, "grad_norm": 7.3457350730896, "learning_rate": 2.442753957276779e-05, "log_odds_chosen": 10.256988525390625, "log_odds_ratio": -0.00010181563993683085, "logits/chosen": -0.43973827362060547, "logits/rejected": -0.5459225177764893, "logps/chosen": -0.00029382610227912664, "logps/rejected": -2.0015015602111816, "loss": 0.8933, "nll_loss": 0.22332513332366943, "rewards/accuracies": 1.0, "rewards/chosen": -2.9382606953731738e-05, "rewards/margins": 0.20012077689170837, "rewards/rejected": -0.20015016198158264, "step": 8102 }, { "epoch": 5.6037344398340245, "grad_norm": 12.629637718200684, "learning_rate": 2.442369755647764e-05, "log_odds_chosen": 9.829996109008789, "log_odds_ratio": -0.00035466509871184826, "logits/chosen": -0.3330022990703583, "logits/rejected": -0.402560293674469, "logps/chosen": -0.005177430808544159, "logps/rejected": -1.6692194938659668, "loss": 0.8688, "nll_loss": 0.21716631948947906, "rewards/accuracies": 1.0, "rewards/chosen": -0.0005177431157790124, "rewards/margins": 0.16640420258045197, "rewards/rejected": -0.1669219434261322, "step": 8103 }, { "epoch": 5.604426002766251, "grad_norm": 11.547345161437988, "learning_rate": 2.4419855540187493e-05, "log_odds_chosen": 9.086764335632324, "log_odds_ratio": -0.14109911024570465, "logits/chosen": -0.4889862537384033, "logits/rejected": -0.4533969759941101, "logps/chosen": -0.021350393071770668, "logps/rejected": -1.995664358139038, "loss": 0.9185, "nll_loss": 0.21551468968391418, "rewards/accuracies": 0.875, "rewards/chosen": -0.002135039307177067, "rewards/margins": 0.19743140041828156, "rewards/rejected": -0.19956645369529724, "step": 8104 }, { "epoch": 5.605117565698478, "grad_norm": 10.75623893737793, "learning_rate": 2.4416013523897342e-05, "log_odds_chosen": 10.713278770446777, "log_odds_ratio": -6.872645462863147e-05, "logits/chosen": -0.4476158618927002, "logits/rejected": -0.3338536322116852, "logps/chosen": -0.00020456750644370914, "logps/rejected": -1.8309910297393799, "loss": 0.7068, "nll_loss": 0.17670387029647827, "rewards/accuracies": 1.0, "rewards/chosen": -2.045674955297727e-05, "rewards/margins": 0.18307864665985107, "rewards/rejected": -0.18309910595417023, "step": 8105 }, { "epoch": 5.605809128630705, "grad_norm": 8.851183891296387, "learning_rate": 2.441217150760719e-05, "log_odds_chosen": 9.805652618408203, "log_odds_ratio": -0.00024620784097351134, "logits/chosen": -0.5062170624732971, "logits/rejected": -0.5972744822502136, "logps/chosen": -0.0004057588812429458, "logps/rejected": -1.4951839447021484, "loss": 1.1216, "nll_loss": 0.28038015961647034, "rewards/accuracies": 1.0, "rewards/chosen": -4.057588739669882e-05, "rewards/margins": 0.14947780966758728, "rewards/rejected": -0.14951838552951813, "step": 8106 }, { "epoch": 5.606500691562932, "grad_norm": 9.78542423248291, "learning_rate": 2.4408329491317044e-05, "log_odds_chosen": 10.101738929748535, "log_odds_ratio": -0.00014487582666333765, "logits/chosen": -0.38510605692863464, "logits/rejected": -0.43797144293785095, "logps/chosen": -0.00026860134676098824, "logps/rejected": -1.763177752494812, "loss": 0.8307, "nll_loss": 0.2076645791530609, "rewards/accuracies": 1.0, "rewards/chosen": -2.6860136131290346e-05, "rewards/margins": 0.17629091441631317, "rewards/rejected": -0.17631778120994568, "step": 8107 }, { "epoch": 5.607192254495159, "grad_norm": 9.541324615478516, "learning_rate": 2.4404487475026896e-05, "log_odds_chosen": 11.488207817077637, "log_odds_ratio": -2.1646897948812693e-05, "logits/chosen": -0.32397058606147766, "logits/rejected": -0.3785797953605652, "logps/chosen": -0.0003754164499696344, "logps/rejected": -2.5622153282165527, "loss": 0.7442, "nll_loss": 0.1860416829586029, "rewards/accuracies": 1.0, "rewards/chosen": -3.75416457245592e-05, "rewards/margins": 0.25618401169776917, "rewards/rejected": -0.25622156262397766, "step": 8108 }, { "epoch": 5.6078838174273855, "grad_norm": 12.605222702026367, "learning_rate": 2.4400645458736745e-05, "log_odds_chosen": 11.033609390258789, "log_odds_ratio": -4.730310320155695e-05, "logits/chosen": -0.603103518486023, "logits/rejected": -0.6246871948242188, "logps/chosen": -0.000758894719183445, "logps/rejected": -2.748445749282837, "loss": 1.1526, "nll_loss": 0.28813713788986206, "rewards/accuracies": 1.0, "rewards/chosen": -7.588947482872754e-05, "rewards/margins": 0.2747687101364136, "rewards/rejected": -0.27484458684921265, "step": 8109 }, { "epoch": 5.608575380359612, "grad_norm": 15.241668701171875, "learning_rate": 2.4396803442446598e-05, "log_odds_chosen": 10.998993873596191, "log_odds_ratio": -2.9695545890717767e-05, "logits/chosen": -0.374109148979187, "logits/rejected": -0.5247286558151245, "logps/chosen": -0.0002444365236442536, "logps/rejected": -2.5976760387420654, "loss": 1.0804, "nll_loss": 0.27008968591690063, "rewards/accuracies": 1.0, "rewards/chosen": -2.444365236442536e-05, "rewards/margins": 0.2597431540489197, "rewards/rejected": -0.2597675919532776, "step": 8110 }, { "epoch": 5.609266943291839, "grad_norm": 6.355210781097412, "learning_rate": 2.439296142615645e-05, "log_odds_chosen": 10.260903358459473, "log_odds_ratio": -0.0003000001597683877, "logits/chosen": -0.5140043497085571, "logits/rejected": -0.5887265205383301, "logps/chosen": -0.00032185198506340384, "logps/rejected": -1.8163726329803467, "loss": 0.7539, "nll_loss": 0.18843594193458557, "rewards/accuracies": 1.0, "rewards/chosen": -3.218520214431919e-05, "rewards/margins": 0.18160510063171387, "rewards/rejected": -0.18163727223873138, "step": 8111 }, { "epoch": 5.609958506224066, "grad_norm": 6.719587802886963, "learning_rate": 2.43891194098663e-05, "log_odds_chosen": 11.945234298706055, "log_odds_ratio": -8.13683436717838e-06, "logits/chosen": -0.4414019286632538, "logits/rejected": -0.47166669368743896, "logps/chosen": -0.00016639998648315668, "logps/rejected": -2.941502571105957, "loss": 0.7043, "nll_loss": 0.17606189846992493, "rewards/accuracies": 1.0, "rewards/chosen": -1.6639998648315668e-05, "rewards/margins": 0.29413360357284546, "rewards/rejected": -0.2941502630710602, "step": 8112 }, { "epoch": 5.610650069156293, "grad_norm": 10.019978523254395, "learning_rate": 2.438527739357615e-05, "log_odds_chosen": 8.934112548828125, "log_odds_ratio": -0.004202369134873152, "logits/chosen": -0.646615743637085, "logits/rejected": -0.5332063436508179, "logps/chosen": -0.03276941925287247, "logps/rejected": -1.848158836364746, "loss": 0.5842, "nll_loss": 0.14562909305095673, "rewards/accuracies": 1.0, "rewards/chosen": -0.003276942064985633, "rewards/margins": 0.18153896927833557, "rewards/rejected": -0.1848158836364746, "step": 8113 }, { "epoch": 5.61134163208852, "grad_norm": 8.241288185119629, "learning_rate": 2.4381435377286e-05, "log_odds_chosen": 10.67772388458252, "log_odds_ratio": -9.114733984461054e-05, "logits/chosen": -0.32889315485954285, "logits/rejected": -0.4711022675037384, "logps/chosen": -0.000362161808880046, "logps/rejected": -2.067312717437744, "loss": 0.9802, "nll_loss": 0.24502873420715332, "rewards/accuracies": 1.0, "rewards/chosen": -3.6216177250025794e-05, "rewards/margins": 0.2066950649023056, "rewards/rejected": -0.20673127472400665, "step": 8114 }, { "epoch": 5.6120331950207465, "grad_norm": 10.448286056518555, "learning_rate": 2.437759336099585e-05, "log_odds_chosen": 10.521625518798828, "log_odds_ratio": -0.0002287977113155648, "logits/chosen": -0.395813912153244, "logits/rejected": -0.3993910551071167, "logps/chosen": -0.0018196626333519816, "logps/rejected": -2.648771286010742, "loss": 0.8287, "nll_loss": 0.20715875923633575, "rewards/accuracies": 1.0, "rewards/chosen": -0.00018196628661826253, "rewards/margins": 0.2646951675415039, "rewards/rejected": -0.2648771405220032, "step": 8115 }, { "epoch": 5.612724757952973, "grad_norm": 6.530020236968994, "learning_rate": 2.4373751344705702e-05, "log_odds_chosen": 9.301420211791992, "log_odds_ratio": -0.000275790982414037, "logits/chosen": -0.31070441007614136, "logits/rejected": -0.37882116436958313, "logps/chosen": -0.000820478075183928, "logps/rejected": -2.0555219650268555, "loss": 0.8836, "nll_loss": 0.2208763211965561, "rewards/accuracies": 1.0, "rewards/chosen": -8.204780897358432e-05, "rewards/margins": 0.20547014474868774, "rewards/rejected": -0.20555220544338226, "step": 8116 }, { "epoch": 5.6134163208852, "grad_norm": 9.40421199798584, "learning_rate": 2.4369909328415555e-05, "log_odds_chosen": 11.07664966583252, "log_odds_ratio": -4.299749343772419e-05, "logits/chosen": -0.7671568393707275, "logits/rejected": -0.845366358757019, "logps/chosen": -0.00028596227639354765, "logps/rejected": -2.3645904064178467, "loss": 1.0057, "nll_loss": 0.25142791867256165, "rewards/accuracies": 1.0, "rewards/chosen": -2.8596226911759004e-05, "rewards/margins": 0.23643043637275696, "rewards/rejected": -0.23645903170108795, "step": 8117 }, { "epoch": 5.614107883817427, "grad_norm": 7.280426502227783, "learning_rate": 2.4366067312125404e-05, "log_odds_chosen": 10.041878700256348, "log_odds_ratio": -0.0001619046670384705, "logits/chosen": -0.46100568771362305, "logits/rejected": -0.5172097086906433, "logps/chosen": -0.0005833308678120375, "logps/rejected": -1.877671480178833, "loss": 1.0205, "nll_loss": 0.2551140785217285, "rewards/accuracies": 1.0, "rewards/chosen": -5.833308750879951e-05, "rewards/margins": 0.1877087950706482, "rewards/rejected": -0.1877671331167221, "step": 8118 }, { "epoch": 5.614799446749654, "grad_norm": 7.466595649719238, "learning_rate": 2.4362225295835256e-05, "log_odds_chosen": 10.243062973022461, "log_odds_ratio": -0.00020536058582365513, "logits/chosen": -0.8286471962928772, "logits/rejected": -0.8110532164573669, "logps/chosen": -0.00029074729536660016, "logps/rejected": -1.445319414138794, "loss": 0.5988, "nll_loss": 0.1496695578098297, "rewards/accuracies": 1.0, "rewards/chosen": -2.907473208324518e-05, "rewards/margins": 0.14450286328792572, "rewards/rejected": -0.14453193545341492, "step": 8119 }, { "epoch": 5.615491009681881, "grad_norm": 8.413311004638672, "learning_rate": 2.435838327954511e-05, "log_odds_chosen": 9.944595336914062, "log_odds_ratio": -0.000541605637408793, "logits/chosen": -0.521904706954956, "logits/rejected": -0.5767476558685303, "logps/chosen": -0.00025783380260691047, "logps/rejected": -1.7686951160430908, "loss": 1.2197, "nll_loss": 0.3048711121082306, "rewards/accuracies": 1.0, "rewards/chosen": -2.578338171588257e-05, "rewards/margins": 0.17684374749660492, "rewards/rejected": -0.17686951160430908, "step": 8120 }, { "epoch": 5.6161825726141075, "grad_norm": 15.419757843017578, "learning_rate": 2.4354541263254958e-05, "log_odds_chosen": 9.785442352294922, "log_odds_ratio": -0.005669338628649712, "logits/chosen": -0.20900648832321167, "logits/rejected": -0.3056734800338745, "logps/chosen": -0.004202909301966429, "logps/rejected": -2.0534253120422363, "loss": 0.8243, "nll_loss": 0.20550069212913513, "rewards/accuracies": 1.0, "rewards/chosen": -0.0004202909185551107, "rewards/margins": 0.20492225885391235, "rewards/rejected": -0.20534256100654602, "step": 8121 }, { "epoch": 5.616874135546334, "grad_norm": 9.012137413024902, "learning_rate": 2.435069924696481e-05, "log_odds_chosen": 10.545869827270508, "log_odds_ratio": -3.9361391827696934e-05, "logits/chosen": -0.28159162402153015, "logits/rejected": -0.3681046962738037, "logps/chosen": -0.00032194817322306335, "logps/rejected": -1.9497275352478027, "loss": 0.6193, "nll_loss": 0.15483029186725616, "rewards/accuracies": 1.0, "rewards/chosen": -3.219482096028514e-05, "rewards/margins": 0.19494055211544037, "rewards/rejected": -0.19497275352478027, "step": 8122 }, { "epoch": 5.617565698478561, "grad_norm": 4.492483139038086, "learning_rate": 2.434685723067466e-05, "log_odds_chosen": 10.464592933654785, "log_odds_ratio": -8.870554302120581e-05, "logits/chosen": -0.3013230562210083, "logits/rejected": -0.35164928436279297, "logps/chosen": -0.00027655542362481356, "logps/rejected": -2.1586222648620605, "loss": 0.543, "nll_loss": 0.13574101030826569, "rewards/accuracies": 1.0, "rewards/chosen": -2.7655545636662282e-05, "rewards/margins": 0.2158345729112625, "rewards/rejected": -0.2158622443675995, "step": 8123 }, { "epoch": 5.618257261410788, "grad_norm": 5.90981912612915, "learning_rate": 2.4343015214384508e-05, "log_odds_chosen": 10.498150825500488, "log_odds_ratio": -0.0004647416644729674, "logits/chosen": -0.26439282298088074, "logits/rejected": -0.31085023283958435, "logps/chosen": -0.0028593679890036583, "logps/rejected": -2.0355193614959717, "loss": 0.7325, "nll_loss": 0.1830860823392868, "rewards/accuracies": 1.0, "rewards/chosen": -0.000285936810541898, "rewards/margins": 0.2032659947872162, "rewards/rejected": -0.20355194807052612, "step": 8124 }, { "epoch": 5.618948824343015, "grad_norm": 14.861536026000977, "learning_rate": 2.433917319809436e-05, "log_odds_chosen": 9.945615768432617, "log_odds_ratio": -0.00018102941976394504, "logits/chosen": 0.029468819499015808, "logits/rejected": -0.10923825204372406, "logps/chosen": -0.000667485233861953, "logps/rejected": -2.044044017791748, "loss": 0.9351, "nll_loss": 0.23375779390335083, "rewards/accuracies": 1.0, "rewards/chosen": -6.67485292069614e-05, "rewards/margins": 0.20433764159679413, "rewards/rejected": -0.20440438389778137, "step": 8125 }, { "epoch": 5.619640387275242, "grad_norm": 11.529810905456543, "learning_rate": 2.433533118180421e-05, "log_odds_chosen": 9.585453987121582, "log_odds_ratio": -0.004293727222830057, "logits/chosen": -0.5194653272628784, "logits/rejected": -0.5726852416992188, "logps/chosen": -0.0029624204616993666, "logps/rejected": -1.9861929416656494, "loss": 0.9043, "nll_loss": 0.22564168274402618, "rewards/accuracies": 1.0, "rewards/chosen": -0.00029624204034917057, "rewards/margins": 0.198323056101799, "rewards/rejected": -0.1986193060874939, "step": 8126 }, { "epoch": 5.6203319502074685, "grad_norm": 6.89424467086792, "learning_rate": 2.4331489165514062e-05, "log_odds_chosen": 10.434301376342773, "log_odds_ratio": -7.22405529813841e-05, "logits/chosen": -0.5234087705612183, "logits/rejected": -0.48809075355529785, "logps/chosen": -0.0002671776164788753, "logps/rejected": -1.732217788696289, "loss": 0.7066, "nll_loss": 0.17664460837841034, "rewards/accuracies": 1.0, "rewards/chosen": -2.6717760192696005e-05, "rewards/margins": 0.17319506406784058, "rewards/rejected": -0.17322179675102234, "step": 8127 }, { "epoch": 5.621023513139695, "grad_norm": 5.283146381378174, "learning_rate": 2.4327647149223914e-05, "log_odds_chosen": 10.077682495117188, "log_odds_ratio": -0.0005050359759479761, "logits/chosen": -0.4807586967945099, "logits/rejected": -0.4631732106208801, "logps/chosen": -0.0011460325913503766, "logps/rejected": -2.214632034301758, "loss": 0.8259, "nll_loss": 0.20641423761844635, "rewards/accuracies": 1.0, "rewards/chosen": -0.00011460327368695289, "rewards/margins": 0.2213486135005951, "rewards/rejected": -0.22146320343017578, "step": 8128 }, { "epoch": 5.621715076071922, "grad_norm": 3.413188934326172, "learning_rate": 2.4323805132933764e-05, "log_odds_chosen": 10.446636199951172, "log_odds_ratio": -0.010576541535556316, "logits/chosen": -0.3370027542114258, "logits/rejected": -0.30192655324935913, "logps/chosen": -0.0044626230373978615, "logps/rejected": -2.717942476272583, "loss": 0.8401, "nll_loss": 0.20897215604782104, "rewards/accuracies": 1.0, "rewards/chosen": -0.00044626230373978615, "rewards/margins": 0.2713479995727539, "rewards/rejected": -0.27179425954818726, "step": 8129 }, { "epoch": 5.622406639004149, "grad_norm": 4.8018269538879395, "learning_rate": 2.4319963116643616e-05, "log_odds_chosen": 10.509857177734375, "log_odds_ratio": -8.940868428908288e-05, "logits/chosen": -0.34783679246902466, "logits/rejected": -0.45641839504241943, "logps/chosen": -0.000150371779454872, "logps/rejected": -2.042797565460205, "loss": 0.4591, "nll_loss": 0.11477848887443542, "rewards/accuracies": 1.0, "rewards/chosen": -1.5037178854981903e-05, "rewards/margins": 0.20426471531391144, "rewards/rejected": -0.20427973568439484, "step": 8130 }, { "epoch": 5.623098201936376, "grad_norm": 7.478186130523682, "learning_rate": 2.431612110035347e-05, "log_odds_chosen": 10.237602233886719, "log_odds_ratio": -9.592981223249808e-05, "logits/chosen": -0.6704153418540955, "logits/rejected": -0.6934966444969177, "logps/chosen": -0.0002596440608613193, "logps/rejected": -1.833032488822937, "loss": 0.6939, "nll_loss": 0.17345938086509705, "rewards/accuracies": 1.0, "rewards/chosen": -2.5964407541323453e-05, "rewards/margins": 0.18327729403972626, "rewards/rejected": -0.18330325186252594, "step": 8131 }, { "epoch": 5.623789764868603, "grad_norm": 8.362983703613281, "learning_rate": 2.4312279084063317e-05, "log_odds_chosen": 9.720096588134766, "log_odds_ratio": -0.029921630397439003, "logits/chosen": -0.3918308615684509, "logits/rejected": -0.4415450692176819, "logps/chosen": -0.007875367067754269, "logps/rejected": -2.011821746826172, "loss": 0.7132, "nll_loss": 0.1753058284521103, "rewards/accuracies": 1.0, "rewards/chosen": -0.0007875366136431694, "rewards/margins": 0.2003946602344513, "rewards/rejected": -0.20118218660354614, "step": 8132 }, { "epoch": 5.624481327800829, "grad_norm": 5.304823875427246, "learning_rate": 2.4308437067773167e-05, "log_odds_chosen": 10.959489822387695, "log_odds_ratio": -4.239015470375307e-05, "logits/chosen": -0.517112672328949, "logits/rejected": -0.6122016906738281, "logps/chosen": -0.007303733378648758, "logps/rejected": -2.872983455657959, "loss": 0.7552, "nll_loss": 0.1887841820716858, "rewards/accuracies": 1.0, "rewards/chosen": -0.0007303733727894723, "rewards/margins": 0.2865679860115051, "rewards/rejected": -0.287298321723938, "step": 8133 }, { "epoch": 5.625172890733056, "grad_norm": 8.693367958068848, "learning_rate": 2.430459505148302e-05, "log_odds_chosen": 10.738483428955078, "log_odds_ratio": -6.522463081637397e-05, "logits/chosen": -0.42843499779701233, "logits/rejected": -0.4606199860572815, "logps/chosen": -0.00020155473612248898, "logps/rejected": -1.9086081981658936, "loss": 0.6322, "nll_loss": 0.15804407000541687, "rewards/accuracies": 1.0, "rewards/chosen": -2.0155472157057375e-05, "rewards/margins": 0.19084066152572632, "rewards/rejected": -0.1908608227968216, "step": 8134 }, { "epoch": 5.625864453665283, "grad_norm": 5.344371318817139, "learning_rate": 2.4300753035192868e-05, "log_odds_chosen": 9.759532928466797, "log_odds_ratio": -0.0003999543550889939, "logits/chosen": -0.32385164499282837, "logits/rejected": -0.38953524827957153, "logps/chosen": -0.005059612449258566, "logps/rejected": -2.5394933223724365, "loss": 0.6739, "nll_loss": 0.1684274673461914, "rewards/accuracies": 1.0, "rewards/chosen": -0.0005059613031335175, "rewards/margins": 0.2534433603286743, "rewards/rejected": -0.2539493441581726, "step": 8135 }, { "epoch": 5.62655601659751, "grad_norm": 11.509753227233887, "learning_rate": 2.429691101890272e-05, "log_odds_chosen": 10.994524002075195, "log_odds_ratio": -2.6269664886058308e-05, "logits/chosen": -0.5848830342292786, "logits/rejected": -0.606230616569519, "logps/chosen": -0.00015633998555131257, "logps/rejected": -1.9623017311096191, "loss": 0.9263, "nll_loss": 0.23158368468284607, "rewards/accuracies": 1.0, "rewards/chosen": -1.5633999282727018e-05, "rewards/margins": 0.19621454179286957, "rewards/rejected": -0.1962301880121231, "step": 8136 }, { "epoch": 5.627247579529737, "grad_norm": 7.090087413787842, "learning_rate": 2.4293069002612573e-05, "log_odds_chosen": 10.250383377075195, "log_odds_ratio": -0.0006723024416714907, "logits/chosen": -0.14481787383556366, "logits/rejected": -0.26430898904800415, "logps/chosen": -0.0007214234792627394, "logps/rejected": -1.9152991771697998, "loss": 0.8962, "nll_loss": 0.2239772081375122, "rewards/accuracies": 1.0, "rewards/chosen": -7.214234938146546e-05, "rewards/margins": 0.19145777821540833, "rewards/rejected": -0.19152991473674774, "step": 8137 }, { "epoch": 5.627939142461964, "grad_norm": 7.703522682189941, "learning_rate": 2.4289226986322422e-05, "log_odds_chosen": 9.376312255859375, "log_odds_ratio": -0.0018738940125331283, "logits/chosen": -0.5013612508773804, "logits/rejected": -0.541246771812439, "logps/chosen": -0.006598047912120819, "logps/rejected": -2.103334426879883, "loss": 0.8793, "nll_loss": 0.21962577104568481, "rewards/accuracies": 1.0, "rewards/chosen": -0.000659804733004421, "rewards/margins": 0.20967364311218262, "rewards/rejected": -0.2103334367275238, "step": 8138 }, { "epoch": 5.62863070539419, "grad_norm": 8.062605857849121, "learning_rate": 2.4285384970032274e-05, "log_odds_chosen": 10.10983657836914, "log_odds_ratio": -0.003539876313880086, "logits/chosen": -0.43089261651039124, "logits/rejected": -0.5064984560012817, "logps/chosen": -0.0018464041640982032, "logps/rejected": -2.3213188648223877, "loss": 0.6593, "nll_loss": 0.1644791215658188, "rewards/accuracies": 1.0, "rewards/chosen": -0.00018464041932020336, "rewards/margins": 0.23194727301597595, "rewards/rejected": -0.23213189840316772, "step": 8139 }, { "epoch": 5.629322268326417, "grad_norm": 8.136240005493164, "learning_rate": 2.4281542953742127e-05, "log_odds_chosen": 10.913151741027832, "log_odds_ratio": -5.67370698263403e-05, "logits/chosen": -0.5349059104919434, "logits/rejected": -0.5520635843276978, "logps/chosen": -0.00027292617596685886, "logps/rejected": -2.168395519256592, "loss": 0.5939, "nll_loss": 0.14847087860107422, "rewards/accuracies": 1.0, "rewards/chosen": -2.7292615413898602e-05, "rewards/margins": 0.21681226789951324, "rewards/rejected": -0.21683958172798157, "step": 8140 }, { "epoch": 5.630013831258644, "grad_norm": 8.027983665466309, "learning_rate": 2.4277700937451976e-05, "log_odds_chosen": 9.770769119262695, "log_odds_ratio": -0.0002399134391453117, "logits/chosen": -0.5105567574501038, "logits/rejected": -0.5048942565917969, "logps/chosen": -0.0008296390878967941, "logps/rejected": -2.3705339431762695, "loss": 1.3542, "nll_loss": 0.33851632475852966, "rewards/accuracies": 1.0, "rewards/chosen": -8.296391752082855e-05, "rewards/margins": 0.2369704246520996, "rewards/rejected": -0.23705337941646576, "step": 8141 }, { "epoch": 5.630705394190871, "grad_norm": 9.292349815368652, "learning_rate": 2.427385892116183e-05, "log_odds_chosen": 11.085734367370605, "log_odds_ratio": -0.0005495705408975482, "logits/chosen": -0.3071434795856476, "logits/rejected": -0.34941795468330383, "logps/chosen": -0.0024962667375802994, "logps/rejected": -2.7942733764648438, "loss": 0.6718, "nll_loss": 0.1678960621356964, "rewards/accuracies": 1.0, "rewards/chosen": -0.00024962666793726385, "rewards/margins": 0.279177725315094, "rewards/rejected": -0.27942734956741333, "step": 8142 }, { "epoch": 5.631396957123098, "grad_norm": 10.542267799377441, "learning_rate": 2.4270016904871677e-05, "log_odds_chosen": 10.499855995178223, "log_odds_ratio": -0.0001602061529411003, "logits/chosen": -0.4694616198539734, "logits/rejected": -0.4889953136444092, "logps/chosen": -0.0002664076164364815, "logps/rejected": -2.212634563446045, "loss": 0.6688, "nll_loss": 0.16717346012592316, "rewards/accuracies": 1.0, "rewards/chosen": -2.664076237124391e-05, "rewards/margins": 0.22123682498931885, "rewards/rejected": -0.22126345336437225, "step": 8143 }, { "epoch": 5.632088520055325, "grad_norm": 15.375788688659668, "learning_rate": 2.4266174888581526e-05, "log_odds_chosen": 7.971059322357178, "log_odds_ratio": -0.06600493937730789, "logits/chosen": -0.3449121117591858, "logits/rejected": -0.39886587858200073, "logps/chosen": -0.15883807837963104, "logps/rejected": -1.4472795724868774, "loss": 1.1476, "nll_loss": 0.2803100347518921, "rewards/accuracies": 1.0, "rewards/chosen": -0.015883808955550194, "rewards/margins": 0.12884415686130524, "rewards/rejected": -0.14472796022891998, "step": 8144 }, { "epoch": 5.632780082987551, "grad_norm": 6.314674377441406, "learning_rate": 2.426233287229138e-05, "log_odds_chosen": 9.617402076721191, "log_odds_ratio": -0.024739326909184456, "logits/chosen": -0.5911489725112915, "logits/rejected": -0.5527961254119873, "logps/chosen": -0.006679967511445284, "logps/rejected": -2.2330830097198486, "loss": 0.7778, "nll_loss": 0.19198457896709442, "rewards/accuracies": 1.0, "rewards/chosen": -0.0006679967627860606, "rewards/margins": 0.22264030575752258, "rewards/rejected": -0.22330829501152039, "step": 8145 }, { "epoch": 5.633471645919778, "grad_norm": 6.1393022537231445, "learning_rate": 2.425849085600123e-05, "log_odds_chosen": 8.443960189819336, "log_odds_ratio": -0.0038430807180702686, "logits/chosen": -0.5928643345832825, "logits/rejected": -0.6108096837997437, "logps/chosen": -0.003301118966192007, "logps/rejected": -1.4561421871185303, "loss": 0.9987, "nll_loss": 0.249294251203537, "rewards/accuracies": 1.0, "rewards/chosen": -0.000330111914081499, "rewards/margins": 0.14528410136699677, "rewards/rejected": -0.14561422169208527, "step": 8146 }, { "epoch": 5.634163208852005, "grad_norm": 6.993804931640625, "learning_rate": 2.425464883971108e-05, "log_odds_chosen": 11.066886901855469, "log_odds_ratio": -3.2031512091634795e-05, "logits/chosen": -0.5116040706634521, "logits/rejected": -0.4701327085494995, "logps/chosen": -0.00016026015509851277, "logps/rejected": -1.5866247415542603, "loss": 1.1816, "nll_loss": 0.2954084575176239, "rewards/accuracies": 1.0, "rewards/chosen": -1.6026015146053396e-05, "rewards/margins": 0.15864643454551697, "rewards/rejected": -0.15866246819496155, "step": 8147 }, { "epoch": 5.634854771784232, "grad_norm": 12.311450004577637, "learning_rate": 2.4250806823420933e-05, "log_odds_chosen": 9.662727355957031, "log_odds_ratio": -0.0002951676433440298, "logits/chosen": -0.4856759011745453, "logits/rejected": -0.667373538017273, "logps/chosen": -0.014049791730940342, "logps/rejected": -2.104684829711914, "loss": 0.6139, "nll_loss": 0.15344290435314178, "rewards/accuracies": 1.0, "rewards/chosen": -0.0014049792662262917, "rewards/margins": 0.20906350016593933, "rewards/rejected": -0.21046847105026245, "step": 8148 }, { "epoch": 5.635546334716459, "grad_norm": 12.116528511047363, "learning_rate": 2.4246964807130785e-05, "log_odds_chosen": 9.53546142578125, "log_odds_ratio": -0.0001511829177616164, "logits/chosen": -0.5348033308982849, "logits/rejected": -0.59422767162323, "logps/chosen": -0.00030178771703504026, "logps/rejected": -1.5057218074798584, "loss": 1.1536, "nll_loss": 0.2883729040622711, "rewards/accuracies": 1.0, "rewards/chosen": -3.0178771339706145e-05, "rewards/margins": 0.1505420058965683, "rewards/rejected": -0.15057218074798584, "step": 8149 }, { "epoch": 5.6362378976486855, "grad_norm": 6.730164527893066, "learning_rate": 2.4243122790840634e-05, "log_odds_chosen": 10.801505088806152, "log_odds_ratio": -6.559064786415547e-05, "logits/chosen": -0.3491261899471283, "logits/rejected": -0.5007361173629761, "logps/chosen": -0.00025117339100688696, "logps/rejected": -1.964350938796997, "loss": 0.9513, "nll_loss": 0.23780661821365356, "rewards/accuracies": 1.0, "rewards/chosen": -2.5117338736890815e-05, "rewards/margins": 0.19640997052192688, "rewards/rejected": -0.1964350789785385, "step": 8150 }, { "epoch": 5.636929460580912, "grad_norm": 5.858931541442871, "learning_rate": 2.4239280774550487e-05, "log_odds_chosen": 9.36701488494873, "log_odds_ratio": -0.00018923338211607188, "logits/chosen": -0.22614255547523499, "logits/rejected": -0.21489690244197845, "logps/chosen": -0.0004743195604532957, "logps/rejected": -1.5576481819152832, "loss": 0.5359, "nll_loss": 0.1339491903781891, "rewards/accuracies": 1.0, "rewards/chosen": -4.7431953134946525e-05, "rewards/margins": 0.1557173877954483, "rewards/rejected": -0.15576481819152832, "step": 8151 }, { "epoch": 5.637621023513139, "grad_norm": 6.291555404663086, "learning_rate": 2.4235438758260336e-05, "log_odds_chosen": 9.558046340942383, "log_odds_ratio": -0.0004121177480556071, "logits/chosen": -0.5385869145393372, "logits/rejected": -0.6188297867774963, "logps/chosen": -0.0006326594157144427, "logps/rejected": -1.8800450563430786, "loss": 0.7444, "nll_loss": 0.18606965243816376, "rewards/accuracies": 1.0, "rewards/chosen": -6.326595030259341e-05, "rewards/margins": 0.18794125318527222, "rewards/rejected": -0.1880044937133789, "step": 8152 }, { "epoch": 5.638312586445366, "grad_norm": 13.652946472167969, "learning_rate": 2.4231596741970185e-05, "log_odds_chosen": 10.601496696472168, "log_odds_ratio": -6.46712287561968e-05, "logits/chosen": -0.8333589434623718, "logits/rejected": -0.8563560247421265, "logps/chosen": -0.00015697132039349526, "logps/rejected": -1.7056736946105957, "loss": 0.69, "nll_loss": 0.17250311374664307, "rewards/accuracies": 1.0, "rewards/chosen": -1.5697132766945288e-05, "rewards/margins": 0.17055165767669678, "rewards/rejected": -0.1705673635005951, "step": 8153 }, { "epoch": 5.639004149377593, "grad_norm": 20.094688415527344, "learning_rate": 2.4227754725680037e-05, "log_odds_chosen": 11.596328735351562, "log_odds_ratio": -2.2379195797839202e-05, "logits/chosen": -0.296434223651886, "logits/rejected": -0.39348912239074707, "logps/chosen": -0.0016546223778277636, "logps/rejected": -2.9314146041870117, "loss": 1.1227, "nll_loss": 0.28066858649253845, "rewards/accuracies": 1.0, "rewards/chosen": -0.0001654622465139255, "rewards/margins": 0.2929759621620178, "rewards/rejected": -0.2931414544582367, "step": 8154 }, { "epoch": 5.63969571230982, "grad_norm": 7.812084674835205, "learning_rate": 2.422391270938989e-05, "log_odds_chosen": 10.527837753295898, "log_odds_ratio": -7.580976671306416e-05, "logits/chosen": -0.3038010895252228, "logits/rejected": -0.37021446228027344, "logps/chosen": -0.0003315204521641135, "logps/rejected": -2.5236568450927734, "loss": 0.7297, "nll_loss": 0.1824055016040802, "rewards/accuracies": 1.0, "rewards/chosen": -3.3152045944007114e-05, "rewards/margins": 0.25233250856399536, "rewards/rejected": -0.2523656487464905, "step": 8155 }, { "epoch": 5.6403872752420465, "grad_norm": 7.516757011413574, "learning_rate": 2.422007069309974e-05, "log_odds_chosen": 11.295770645141602, "log_odds_ratio": -1.5275883924914524e-05, "logits/chosen": -0.44369572401046753, "logits/rejected": -0.5033272504806519, "logps/chosen": -0.00013083986414130777, "logps/rejected": -2.203603982925415, "loss": 0.7065, "nll_loss": 0.17663389444351196, "rewards/accuracies": 1.0, "rewards/chosen": -1.3083986232231837e-05, "rewards/margins": 0.2203473150730133, "rewards/rejected": -0.22036036849021912, "step": 8156 }, { "epoch": 5.641078838174274, "grad_norm": 5.2864885330200195, "learning_rate": 2.421622867680959e-05, "log_odds_chosen": 9.633533477783203, "log_odds_ratio": -0.00018607992387842387, "logits/chosen": -0.41946959495544434, "logits/rejected": -0.34882277250289917, "logps/chosen": -0.0006838082917965949, "logps/rejected": -1.833829641342163, "loss": 1.038, "nll_loss": 0.2594764530658722, "rewards/accuracies": 1.0, "rewards/chosen": -6.838083209004253e-05, "rewards/margins": 0.18331459164619446, "rewards/rejected": -0.18338295817375183, "step": 8157 }, { "epoch": 5.641770401106501, "grad_norm": 4.267787456512451, "learning_rate": 2.4212386660519444e-05, "log_odds_chosen": 10.599780082702637, "log_odds_ratio": -5.783616870758124e-05, "logits/chosen": -0.6049816012382507, "logits/rejected": -0.5448726415634155, "logps/chosen": -0.000193988045793958, "logps/rejected": -1.547009825706482, "loss": 0.4527, "nll_loss": 0.11316224932670593, "rewards/accuracies": 1.0, "rewards/chosen": -1.9398805306991562e-05, "rewards/margins": 0.15468159317970276, "rewards/rejected": -0.15470099449157715, "step": 8158 }, { "epoch": 5.642461964038728, "grad_norm": 27.239734649658203, "learning_rate": 2.4208544644229293e-05, "log_odds_chosen": 9.927078247070312, "log_odds_ratio": -0.00020649611542467028, "logits/chosen": -0.5524391531944275, "logits/rejected": -0.6222734451293945, "logps/chosen": -0.005761809181421995, "logps/rejected": -2.021272659301758, "loss": 1.2179, "nll_loss": 0.30445796251296997, "rewards/accuracies": 1.0, "rewards/chosen": -0.0005761808715760708, "rewards/margins": 0.20155107975006104, "rewards/rejected": -0.20212724804878235, "step": 8159 }, { "epoch": 5.643153526970955, "grad_norm": 9.34630012512207, "learning_rate": 2.4204702627939145e-05, "log_odds_chosen": 10.594392776489258, "log_odds_ratio": -5.7553992519387975e-05, "logits/chosen": -0.5342981815338135, "logits/rejected": -0.5811108350753784, "logps/chosen": -0.0016168851871043444, "logps/rejected": -2.659160614013672, "loss": 0.7788, "nll_loss": 0.19469034671783447, "rewards/accuracies": 1.0, "rewards/chosen": -0.00016168851288966835, "rewards/margins": 0.265754371881485, "rewards/rejected": -0.2659160792827606, "step": 8160 }, { "epoch": 5.643845089903182, "grad_norm": 13.244446754455566, "learning_rate": 2.4200860611648994e-05, "log_odds_chosen": 10.221586227416992, "log_odds_ratio": -8.015262574190274e-05, "logits/chosen": -0.3327646851539612, "logits/rejected": -0.35911014676094055, "logps/chosen": -0.0005617007846012712, "logps/rejected": -2.2518398761749268, "loss": 0.815, "nll_loss": 0.20373176038265228, "rewards/accuracies": 1.0, "rewards/chosen": -5.617008719127625e-05, "rewards/margins": 0.22512781620025635, "rewards/rejected": -0.22518399357795715, "step": 8161 }, { "epoch": 5.644536652835408, "grad_norm": 5.6324639320373535, "learning_rate": 2.4197018595358843e-05, "log_odds_chosen": 10.490815162658691, "log_odds_ratio": -3.7659163353964686e-05, "logits/chosen": -0.3393840789794922, "logits/rejected": -0.383233904838562, "logps/chosen": -0.00018157096928916872, "logps/rejected": -1.9173825979232788, "loss": 0.805, "nll_loss": 0.20124170184135437, "rewards/accuracies": 1.0, "rewards/chosen": -1.815709583752323e-05, "rewards/margins": 0.191720113158226, "rewards/rejected": -0.19173826277256012, "step": 8162 }, { "epoch": 5.645228215767635, "grad_norm": 12.69321060180664, "learning_rate": 2.4193176579068696e-05, "log_odds_chosen": 10.602964401245117, "log_odds_ratio": -4.337333666626364e-05, "logits/chosen": -0.27991557121276855, "logits/rejected": -0.3347739577293396, "logps/chosen": -0.00010848429519683123, "logps/rejected": -1.7074754238128662, "loss": 0.6757, "nll_loss": 0.16892284154891968, "rewards/accuracies": 1.0, "rewards/chosen": -1.0848429155885242e-05, "rewards/margins": 0.17073668539524078, "rewards/rejected": -0.1707475334405899, "step": 8163 }, { "epoch": 5.645919778699862, "grad_norm": 8.432408332824707, "learning_rate": 2.4189334562778548e-05, "log_odds_chosen": 10.296133041381836, "log_odds_ratio": -8.676405559526756e-05, "logits/chosen": -0.3905819356441498, "logits/rejected": -0.5043076872825623, "logps/chosen": -0.00029500541859306395, "logps/rejected": -1.99964439868927, "loss": 0.7268, "nll_loss": 0.181697279214859, "rewards/accuracies": 1.0, "rewards/chosen": -2.9500542950700037e-05, "rewards/margins": 0.1999349594116211, "rewards/rejected": -0.19996444880962372, "step": 8164 }, { "epoch": 5.646611341632089, "grad_norm": 6.604633808135986, "learning_rate": 2.4185492546488397e-05, "log_odds_chosen": 10.595582962036133, "log_odds_ratio": -0.00013819042942486703, "logits/chosen": -0.05515572428703308, "logits/rejected": -0.09381835162639618, "logps/chosen": -0.0016885169316083193, "logps/rejected": -3.0337023735046387, "loss": 0.8599, "nll_loss": 0.214948832988739, "rewards/accuracies": 1.0, "rewards/chosen": -0.00016885170771274716, "rewards/margins": 0.3032013773918152, "rewards/rejected": -0.30337023735046387, "step": 8165 }, { "epoch": 5.647302904564316, "grad_norm": 11.411890983581543, "learning_rate": 2.418165053019825e-05, "log_odds_chosen": 11.249796867370605, "log_odds_ratio": -2.3611090000486e-05, "logits/chosen": -0.3377041220664978, "logits/rejected": -0.35702985525131226, "logps/chosen": -0.0001584252022439614, "logps/rejected": -2.2942593097686768, "loss": 0.6065, "nll_loss": 0.15161898732185364, "rewards/accuracies": 1.0, "rewards/chosen": -1.5842519133002497e-05, "rewards/margins": 0.2294101119041443, "rewards/rejected": -0.22942596673965454, "step": 8166 }, { "epoch": 5.6479944674965425, "grad_norm": 7.92568826675415, "learning_rate": 2.4177808513908102e-05, "log_odds_chosen": 10.211087226867676, "log_odds_ratio": -9.13483600015752e-05, "logits/chosen": -0.5297777056694031, "logits/rejected": -0.485273152589798, "logps/chosen": -0.00019397379946894944, "logps/rejected": -1.8004730939865112, "loss": 0.5342, "nll_loss": 0.13353081047534943, "rewards/accuracies": 1.0, "rewards/chosen": -1.9397381038288586e-05, "rewards/margins": 0.18002791702747345, "rewards/rejected": -0.18004733324050903, "step": 8167 }, { "epoch": 5.648686030428769, "grad_norm": 12.89731502532959, "learning_rate": 2.417396649761795e-05, "log_odds_chosen": 8.56032943725586, "log_odds_ratio": -0.00191538967192173, "logits/chosen": -0.3405977785587311, "logits/rejected": -0.3541436791419983, "logps/chosen": -0.002999624703079462, "logps/rejected": -1.421423316001892, "loss": 0.6859, "nll_loss": 0.17127352952957153, "rewards/accuracies": 1.0, "rewards/chosen": -0.0002999624703079462, "rewards/margins": 0.14184238016605377, "rewards/rejected": -0.14214232563972473, "step": 8168 }, { "epoch": 5.649377593360996, "grad_norm": 5.056807518005371, "learning_rate": 2.4170124481327804e-05, "log_odds_chosen": 9.410223007202148, "log_odds_ratio": -0.00013891287380829453, "logits/chosen": -0.030084922909736633, "logits/rejected": -0.058445125818252563, "logps/chosen": -0.002498415531590581, "logps/rejected": -1.902077555656433, "loss": 0.6037, "nll_loss": 0.15090909600257874, "rewards/accuracies": 1.0, "rewards/chosen": -0.00024984157062135637, "rewards/margins": 0.18995791673660278, "rewards/rejected": -0.19020774960517883, "step": 8169 }, { "epoch": 5.650069156293223, "grad_norm": 13.368850708007812, "learning_rate": 2.4166282465037653e-05, "log_odds_chosen": 11.036503791809082, "log_odds_ratio": -3.4123506338801235e-05, "logits/chosen": -0.49376970529556274, "logits/rejected": -0.48467227816581726, "logps/chosen": -0.00012498226715251803, "logps/rejected": -1.9384177923202515, "loss": 0.8761, "nll_loss": 0.2190292477607727, "rewards/accuracies": 1.0, "rewards/chosen": -1.2498228898039088e-05, "rewards/margins": 0.19382928311824799, "rewards/rejected": -0.19384178519248962, "step": 8170 }, { "epoch": 5.65076071922545, "grad_norm": 16.7672061920166, "learning_rate": 2.4162440448747502e-05, "log_odds_chosen": 10.526063919067383, "log_odds_ratio": -0.00012096527643734589, "logits/chosen": -0.7498366832733154, "logits/rejected": -0.8215652704238892, "logps/chosen": -0.00040370371425524354, "logps/rejected": -2.3306567668914795, "loss": 0.8662, "nll_loss": 0.21654112637043, "rewards/accuracies": 1.0, "rewards/chosen": -4.037037069792859e-05, "rewards/margins": 0.23302532732486725, "rewards/rejected": -0.23306570947170258, "step": 8171 }, { "epoch": 5.651452282157677, "grad_norm": 8.703173637390137, "learning_rate": 2.4158598432457354e-05, "log_odds_chosen": 11.517191886901855, "log_odds_ratio": -8.211346721509472e-05, "logits/chosen": -0.12739884853363037, "logits/rejected": -0.23411113023757935, "logps/chosen": -0.0003812021459452808, "logps/rejected": -2.476501703262329, "loss": 0.8198, "nll_loss": 0.2049528807401657, "rewards/accuracies": 1.0, "rewards/chosen": -3.812021896010265e-05, "rewards/margins": 0.24761205911636353, "rewards/rejected": -0.24765019118785858, "step": 8172 }, { "epoch": 5.6521438450899035, "grad_norm": 6.470282554626465, "learning_rate": 2.4154756416167207e-05, "log_odds_chosen": 11.416342735290527, "log_odds_ratio": -3.046138590434566e-05, "logits/chosen": -0.05274605005979538, "logits/rejected": -0.18470771610736847, "logps/chosen": -0.0006778776296414435, "logps/rejected": -3.102085828781128, "loss": 0.7421, "nll_loss": 0.18551874160766602, "rewards/accuracies": 1.0, "rewards/chosen": -6.778776878491044e-05, "rewards/margins": 0.31014078855514526, "rewards/rejected": -0.31020858883857727, "step": 8173 }, { "epoch": 5.65283540802213, "grad_norm": 8.105291366577148, "learning_rate": 2.4150914399877056e-05, "log_odds_chosen": 10.079261779785156, "log_odds_ratio": -0.0001657334651099518, "logits/chosen": -0.2964015007019043, "logits/rejected": -0.39093565940856934, "logps/chosen": -0.0011884081177413464, "logps/rejected": -2.2544288635253906, "loss": 0.7485, "nll_loss": 0.18709859251976013, "rewards/accuracies": 1.0, "rewards/chosen": -0.00011884080595336854, "rewards/margins": 0.22532406449317932, "rewards/rejected": -0.22544291615486145, "step": 8174 }, { "epoch": 5.653526970954357, "grad_norm": 10.007908821105957, "learning_rate": 2.4147072383586908e-05, "log_odds_chosen": 11.506814956665039, "log_odds_ratio": -2.823519753292203e-05, "logits/chosen": -0.42628705501556396, "logits/rejected": -0.4609462022781372, "logps/chosen": -0.002497493987902999, "logps/rejected": -3.4516782760620117, "loss": 0.6041, "nll_loss": 0.15103143453598022, "rewards/accuracies": 1.0, "rewards/chosen": -0.0002497493987902999, "rewards/margins": 0.3449181020259857, "rewards/rejected": -0.3451678454875946, "step": 8175 }, { "epoch": 5.654218533886584, "grad_norm": 5.072340965270996, "learning_rate": 2.414323036729676e-05, "log_odds_chosen": 11.129064559936523, "log_odds_ratio": -2.1794527128804475e-05, "logits/chosen": 0.02416606992483139, "logits/rejected": -0.08635728806257248, "logps/chosen": -0.00016831500397529453, "logps/rejected": -2.3612351417541504, "loss": 0.6541, "nll_loss": 0.1635279506444931, "rewards/accuracies": 1.0, "rewards/chosen": -1.6831501852720976e-05, "rewards/margins": 0.23610667884349823, "rewards/rejected": -0.23612351715564728, "step": 8176 }, { "epoch": 5.654910096818811, "grad_norm": 8.00340461730957, "learning_rate": 2.413938835100661e-05, "log_odds_chosen": 10.177129745483398, "log_odds_ratio": -0.00021432647190522403, "logits/chosen": -0.37250402569770813, "logits/rejected": -0.48559921979904175, "logps/chosen": -0.0009563150233589113, "logps/rejected": -2.075915813446045, "loss": 1.0559, "nll_loss": 0.26395127177238464, "rewards/accuracies": 1.0, "rewards/chosen": -9.563150524627417e-05, "rewards/margins": 0.20749595761299133, "rewards/rejected": -0.20759157836437225, "step": 8177 }, { "epoch": 5.655601659751038, "grad_norm": 7.277695178985596, "learning_rate": 2.4135546334716462e-05, "log_odds_chosen": 10.896219253540039, "log_odds_ratio": -3.1958810723153874e-05, "logits/chosen": -0.5712587833404541, "logits/rejected": -0.5515532493591309, "logps/chosen": -0.0001701847359072417, "logps/rejected": -1.8836162090301514, "loss": 0.9981, "nll_loss": 0.24951490759849548, "rewards/accuracies": 1.0, "rewards/chosen": -1.701847395452205e-05, "rewards/margins": 0.18834459781646729, "rewards/rejected": -0.18836161494255066, "step": 8178 }, { "epoch": 5.6562932226832645, "grad_norm": 13.938727378845215, "learning_rate": 2.413170431842631e-05, "log_odds_chosen": 11.479532241821289, "log_odds_ratio": -8.935096411732957e-05, "logits/chosen": -0.624695897102356, "logits/rejected": -0.625409722328186, "logps/chosen": -0.0005521889543160796, "logps/rejected": -3.179570436477661, "loss": 0.626, "nll_loss": 0.15649336576461792, "rewards/accuracies": 1.0, "rewards/chosen": -5.5218897614395246e-05, "rewards/margins": 0.3179018497467041, "rewards/rejected": -0.3179570436477661, "step": 8179 }, { "epoch": 5.656984785615491, "grad_norm": 10.801321029663086, "learning_rate": 2.412786230213616e-05, "log_odds_chosen": 10.582452774047852, "log_odds_ratio": -3.776304583880119e-05, "logits/chosen": -0.6883202791213989, "logits/rejected": -0.7378414869308472, "logps/chosen": -0.001248332904651761, "logps/rejected": -2.509331703186035, "loss": 0.7602, "nll_loss": 0.19005697965621948, "rewards/accuracies": 1.0, "rewards/chosen": -0.00012483329919632524, "rewards/margins": 0.25080832839012146, "rewards/rejected": -0.2509331703186035, "step": 8180 }, { "epoch": 5.657676348547718, "grad_norm": 8.140530586242676, "learning_rate": 2.4124020285846013e-05, "log_odds_chosen": 11.58713150024414, "log_odds_ratio": -4.277328844182193e-05, "logits/chosen": -0.4091223478317261, "logits/rejected": -0.49574363231658936, "logps/chosen": -0.0004341943422332406, "logps/rejected": -2.724029064178467, "loss": 0.7815, "nll_loss": 0.1953831911087036, "rewards/accuracies": 1.0, "rewards/chosen": -4.341944077168591e-05, "rewards/margins": 0.2723594903945923, "rewards/rejected": -0.27240291237831116, "step": 8181 }, { "epoch": 5.658367911479945, "grad_norm": 10.485898971557617, "learning_rate": 2.4120178269555865e-05, "log_odds_chosen": 10.399928092956543, "log_odds_ratio": -8.76165067893453e-05, "logits/chosen": -0.24331486225128174, "logits/rejected": -0.3691559433937073, "logps/chosen": -0.0004011784621980041, "logps/rejected": -2.289734363555908, "loss": 0.8195, "nll_loss": 0.20486295223236084, "rewards/accuracies": 1.0, "rewards/chosen": -4.011784403701313e-05, "rewards/margins": 0.22893333435058594, "rewards/rejected": -0.22897344827651978, "step": 8182 }, { "epoch": 5.659059474412172, "grad_norm": 5.636170864105225, "learning_rate": 2.4116336253265714e-05, "log_odds_chosen": 10.645550727844238, "log_odds_ratio": -4.1069572034757584e-05, "logits/chosen": -0.39882680773735046, "logits/rejected": -0.4428432583808899, "logps/chosen": -0.0010795921552926302, "logps/rejected": -2.0664827823638916, "loss": 0.7889, "nll_loss": 0.19723322987556458, "rewards/accuracies": 1.0, "rewards/chosen": -0.00010795921116368845, "rewards/margins": 0.2065403163433075, "rewards/rejected": -0.20664829015731812, "step": 8183 }, { "epoch": 5.659751037344399, "grad_norm": 8.679171562194824, "learning_rate": 2.4112494236975567e-05, "log_odds_chosen": 11.246646881103516, "log_odds_ratio": -9.160442277789116e-05, "logits/chosen": -0.30560818314552307, "logits/rejected": -0.3293704092502594, "logps/chosen": -0.0003360637929290533, "logps/rejected": -3.327373504638672, "loss": 0.7894, "nll_loss": 0.19733411073684692, "rewards/accuracies": 1.0, "rewards/chosen": -3.3606382203288376e-05, "rewards/margins": 0.33270376920700073, "rewards/rejected": -0.33273738622665405, "step": 8184 }, { "epoch": 5.6604426002766255, "grad_norm": 17.666276931762695, "learning_rate": 2.410865222068542e-05, "log_odds_chosen": 10.4041166305542, "log_odds_ratio": -6.354991637635976e-05, "logits/chosen": -0.48255521059036255, "logits/rejected": -0.5338969230651855, "logps/chosen": -0.0003337644156999886, "logps/rejected": -2.2116596698760986, "loss": 0.9727, "nll_loss": 0.24315854907035828, "rewards/accuracies": 1.0, "rewards/chosen": -3.3376443752786145e-05, "rewards/margins": 0.22113259136676788, "rewards/rejected": -0.2211659699678421, "step": 8185 }, { "epoch": 5.661134163208852, "grad_norm": 11.990691184997559, "learning_rate": 2.4104810204395268e-05, "log_odds_chosen": 10.493095397949219, "log_odds_ratio": -8.417104254476726e-05, "logits/chosen": -0.14247873425483704, "logits/rejected": -0.22682204842567444, "logps/chosen": -0.002159011783078313, "logps/rejected": -2.218174457550049, "loss": 0.8291, "nll_loss": 0.20726041495800018, "rewards/accuracies": 1.0, "rewards/chosen": -0.00021590119285974652, "rewards/margins": 0.22160154581069946, "rewards/rejected": -0.22181746363639832, "step": 8186 }, { "epoch": 5.661825726141079, "grad_norm": 9.079957962036133, "learning_rate": 2.410096818810512e-05, "log_odds_chosen": 10.605042457580566, "log_odds_ratio": -0.0003826943866442889, "logits/chosen": -0.6163223385810852, "logits/rejected": -0.7284992933273315, "logps/chosen": -0.0005659597227349877, "logps/rejected": -2.3373608589172363, "loss": 0.9552, "nll_loss": 0.2387738823890686, "rewards/accuracies": 1.0, "rewards/chosen": -5.6595963542349637e-05, "rewards/margins": 0.23367950320243835, "rewards/rejected": -0.2337360978126526, "step": 8187 }, { "epoch": 5.662517289073306, "grad_norm": 10.778707504272461, "learning_rate": 2.409712617181497e-05, "log_odds_chosen": 11.336727142333984, "log_odds_ratio": -3.515938078635372e-05, "logits/chosen": -0.5271604061126709, "logits/rejected": -0.6426557898521423, "logps/chosen": -0.0004096640623174608, "logps/rejected": -2.7991998195648193, "loss": 0.7284, "nll_loss": 0.18209651112556458, "rewards/accuracies": 1.0, "rewards/chosen": -4.096640623174608e-05, "rewards/margins": 0.27987903356552124, "rewards/rejected": -0.27991998195648193, "step": 8188 }, { "epoch": 5.663208852005533, "grad_norm": 11.966964721679688, "learning_rate": 2.409328415552482e-05, "log_odds_chosen": 11.431020736694336, "log_odds_ratio": -3.573830326786265e-05, "logits/chosen": -0.8616727590560913, "logits/rejected": -0.8452792167663574, "logps/chosen": -9.532944386592135e-05, "logps/rejected": -2.1406638622283936, "loss": 0.7947, "nll_loss": 0.19867615401744843, "rewards/accuracies": 1.0, "rewards/chosen": -9.532944204693194e-06, "rewards/margins": 0.2140568494796753, "rewards/rejected": -0.21406638622283936, "step": 8189 }, { "epoch": 5.66390041493776, "grad_norm": 7.023293495178223, "learning_rate": 2.408944213923467e-05, "log_odds_chosen": 9.780372619628906, "log_odds_ratio": -0.0003126821538899094, "logits/chosen": -0.4998539984226227, "logits/rejected": -0.5834687948226929, "logps/chosen": -0.001264077378436923, "logps/rejected": -1.733214259147644, "loss": 0.7783, "nll_loss": 0.19453226029872894, "rewards/accuracies": 1.0, "rewards/chosen": -0.0001264077436644584, "rewards/margins": 0.173195019364357, "rewards/rejected": -0.1733214110136032, "step": 8190 }, { "epoch": 5.6645919778699865, "grad_norm": 7.998936653137207, "learning_rate": 2.408560012294452e-05, "log_odds_chosen": 9.439737319946289, "log_odds_ratio": -0.0011585770407691598, "logits/chosen": -0.6191846132278442, "logits/rejected": -0.7086983323097229, "logps/chosen": -0.0018918003188446164, "logps/rejected": -1.6204073429107666, "loss": 0.8352, "nll_loss": 0.20867592096328735, "rewards/accuracies": 1.0, "rewards/chosen": -0.00018918004934675992, "rewards/margins": 0.16185154020786285, "rewards/rejected": -0.16204072535037994, "step": 8191 }, { "epoch": 5.665283540802213, "grad_norm": 8.45009994506836, "learning_rate": 2.4081758106654373e-05, "log_odds_chosen": 10.186800956726074, "log_odds_ratio": -6.632292206631973e-05, "logits/chosen": -0.5729277729988098, "logits/rejected": -0.6806793808937073, "logps/chosen": -0.0007412948179990053, "logps/rejected": -2.046152114868164, "loss": 0.7685, "nll_loss": 0.19211547076702118, "rewards/accuracies": 1.0, "rewards/chosen": -7.41294861654751e-05, "rewards/margins": 0.20454107224941254, "rewards/rejected": -0.20461520552635193, "step": 8192 }, { "epoch": 5.66597510373444, "grad_norm": 8.021780967712402, "learning_rate": 2.4077916090364225e-05, "log_odds_chosen": 9.615251541137695, "log_odds_ratio": -0.000610757211688906, "logits/chosen": -0.6830639243125916, "logits/rejected": -0.7671900391578674, "logps/chosen": -0.001944072311744094, "logps/rejected": -2.287820816040039, "loss": 1.549, "nll_loss": 0.3871961236000061, "rewards/accuracies": 1.0, "rewards/chosen": -0.00019440722826402634, "rewards/margins": 0.22858770191669464, "rewards/rejected": -0.22878210246562958, "step": 8193 }, { "epoch": 5.666666666666667, "grad_norm": 14.318559646606445, "learning_rate": 2.4074074074074074e-05, "log_odds_chosen": 11.765233039855957, "log_odds_ratio": -1.4429515431402251e-05, "logits/chosen": -0.749580979347229, "logits/rejected": -0.7655578851699829, "logps/chosen": -0.00037591400905512273, "logps/rejected": -3.43573260307312, "loss": 0.7071, "nll_loss": 0.17676472663879395, "rewards/accuracies": 1.0, "rewards/chosen": -3.7591402360703796e-05, "rewards/margins": 0.3435356914997101, "rewards/rejected": -0.34357327222824097, "step": 8194 }, { "epoch": 5.667358229598894, "grad_norm": 9.44308853149414, "learning_rate": 2.4070232057783927e-05, "log_odds_chosen": 9.825872421264648, "log_odds_ratio": -0.0001476307079428807, "logits/chosen": -0.5155048370361328, "logits/rejected": -0.6302860975265503, "logps/chosen": -0.0007964313263073564, "logps/rejected": -1.5924385786056519, "loss": 0.6867, "nll_loss": 0.17166659235954285, "rewards/accuracies": 1.0, "rewards/chosen": -7.964312680996954e-05, "rewards/margins": 0.1591642200946808, "rewards/rejected": -0.1592438519001007, "step": 8195 }, { "epoch": 5.668049792531121, "grad_norm": 7.392195701599121, "learning_rate": 2.406639004149378e-05, "log_odds_chosen": 9.350540161132812, "log_odds_ratio": -0.0004433426365721971, "logits/chosen": -0.7200472950935364, "logits/rejected": -0.722077488899231, "logps/chosen": -0.00044103802065365016, "logps/rejected": -1.473872184753418, "loss": 0.7089, "nll_loss": 0.17717213928699493, "rewards/accuracies": 1.0, "rewards/chosen": -4.410380279296078e-05, "rewards/margins": 0.14734309911727905, "rewards/rejected": -0.14738722145557404, "step": 8196 }, { "epoch": 5.6687413554633475, "grad_norm": 7.103992938995361, "learning_rate": 2.4062548025203628e-05, "log_odds_chosen": 9.922134399414062, "log_odds_ratio": -0.00039362561074085534, "logits/chosen": -0.593850314617157, "logits/rejected": -0.6985044479370117, "logps/chosen": -0.000883720291312784, "logps/rejected": -2.1663904190063477, "loss": 0.6011, "nll_loss": 0.15024788677692413, "rewards/accuracies": 1.0, "rewards/chosen": -8.837203495204449e-05, "rewards/margins": 0.21655067801475525, "rewards/rejected": -0.21663904190063477, "step": 8197 }, { "epoch": 5.669432918395574, "grad_norm": 7.624302864074707, "learning_rate": 2.4058706008913477e-05, "log_odds_chosen": 10.729737281799316, "log_odds_ratio": -3.654578540590592e-05, "logits/chosen": -0.7158301472663879, "logits/rejected": -0.7326931357383728, "logps/chosen": -0.00021854013903066516, "logps/rejected": -1.8644185066223145, "loss": 0.8006, "nll_loss": 0.20013828575611115, "rewards/accuracies": 1.0, "rewards/chosen": -2.185401535825804e-05, "rewards/margins": 0.1864199936389923, "rewards/rejected": -0.18644185364246368, "step": 8198 }, { "epoch": 5.670124481327801, "grad_norm": 7.759194374084473, "learning_rate": 2.405486399262333e-05, "log_odds_chosen": 9.836369514465332, "log_odds_ratio": -0.00013164187839720398, "logits/chosen": -0.2198760211467743, "logits/rejected": -0.27906516194343567, "logps/chosen": -0.00028999499045312405, "logps/rejected": -1.5851798057556152, "loss": 0.6143, "nll_loss": 0.1535702645778656, "rewards/accuracies": 1.0, "rewards/chosen": -2.899950231949333e-05, "rewards/margins": 0.15848898887634277, "rewards/rejected": -0.158517986536026, "step": 8199 }, { "epoch": 5.670816044260028, "grad_norm": 9.081013679504395, "learning_rate": 2.405102197633318e-05, "log_odds_chosen": 11.407434463500977, "log_odds_ratio": -4.3108979298267514e-05, "logits/chosen": -0.48705050349235535, "logits/rejected": -0.5852835178375244, "logps/chosen": -0.00021016478422097862, "logps/rejected": -2.620225667953491, "loss": 0.7386, "nll_loss": 0.18464510142803192, "rewards/accuracies": 1.0, "rewards/chosen": -2.1016479877289385e-05, "rewards/margins": 0.26200154423713684, "rewards/rejected": -0.26202255487442017, "step": 8200 }, { "epoch": 5.671507607192255, "grad_norm": 6.312060356140137, "learning_rate": 2.404717996004303e-05, "log_odds_chosen": 10.28929328918457, "log_odds_ratio": -4.510658254730515e-05, "logits/chosen": -0.25805893540382385, "logits/rejected": -0.3633671998977661, "logps/chosen": -0.0004313248791731894, "logps/rejected": -1.6498507261276245, "loss": 0.4647, "nll_loss": 0.1161627247929573, "rewards/accuracies": 1.0, "rewards/chosen": -4.313248791731894e-05, "rewards/margins": 0.1649419367313385, "rewards/rejected": -0.16498509049415588, "step": 8201 }, { "epoch": 5.672199170124482, "grad_norm": 5.8020734786987305, "learning_rate": 2.4043337943752883e-05, "log_odds_chosen": 10.381085395812988, "log_odds_ratio": -8.950331539381295e-05, "logits/chosen": -0.6993118524551392, "logits/rejected": -0.7206007838249207, "logps/chosen": -0.000795810017734766, "logps/rejected": -2.066084146499634, "loss": 0.4755, "nll_loss": 0.11886313557624817, "rewards/accuracies": 1.0, "rewards/chosen": -7.958101195981726e-05, "rewards/margins": 0.20652884244918823, "rewards/rejected": -0.20660841464996338, "step": 8202 }, { "epoch": 5.672890733056708, "grad_norm": 6.812307834625244, "learning_rate": 2.4039495927462733e-05, "log_odds_chosen": 11.133984565734863, "log_odds_ratio": -3.28252644976601e-05, "logits/chosen": -0.28554531931877136, "logits/rejected": -0.43246108293533325, "logps/chosen": -0.0005199001170694828, "logps/rejected": -2.497281789779663, "loss": 0.8954, "nll_loss": 0.22383980453014374, "rewards/accuracies": 1.0, "rewards/chosen": -5.199001680011861e-05, "rewards/margins": 0.24967621266841888, "rewards/rejected": -0.24972817301750183, "step": 8203 }, { "epoch": 5.673582295988935, "grad_norm": 9.337517738342285, "learning_rate": 2.4035653911172585e-05, "log_odds_chosen": 10.392212867736816, "log_odds_ratio": -0.0002654620911926031, "logits/chosen": -0.16084915399551392, "logits/rejected": -0.23642441630363464, "logps/chosen": -0.001370608457364142, "logps/rejected": -2.3316516876220703, "loss": 0.6279, "nll_loss": 0.15694421529769897, "rewards/accuracies": 1.0, "rewards/chosen": -0.00013706083700526506, "rewards/margins": 0.2330280989408493, "rewards/rejected": -0.23316514492034912, "step": 8204 }, { "epoch": 5.674273858921162, "grad_norm": 8.094923973083496, "learning_rate": 2.4031811894882437e-05, "log_odds_chosen": 9.047698974609375, "log_odds_ratio": -0.0011066325241699815, "logits/chosen": -0.4470326900482178, "logits/rejected": -0.4333973824977875, "logps/chosen": -0.0011469994205981493, "logps/rejected": -1.259209156036377, "loss": 0.735, "nll_loss": 0.18363803625106812, "rewards/accuracies": 1.0, "rewards/chosen": -0.00011469994933577254, "rewards/margins": 0.12580621242523193, "rewards/rejected": -0.12592092156410217, "step": 8205 }, { "epoch": 5.674965421853389, "grad_norm": 38.148014068603516, "learning_rate": 2.4027969878592286e-05, "log_odds_chosen": 10.174893379211426, "log_odds_ratio": -9.261524974135682e-05, "logits/chosen": -0.3130311965942383, "logits/rejected": -0.3069872260093689, "logps/chosen": -0.00035005720565095544, "logps/rejected": -1.4493703842163086, "loss": 0.7833, "nll_loss": 0.19582557678222656, "rewards/accuracies": 1.0, "rewards/chosen": -3.500572347547859e-05, "rewards/margins": 0.14490202069282532, "rewards/rejected": -0.14493703842163086, "step": 8206 }, { "epoch": 5.675656984785616, "grad_norm": 10.254268646240234, "learning_rate": 2.4024127862302136e-05, "log_odds_chosen": 10.973251342773438, "log_odds_ratio": -0.00014097840175963938, "logits/chosen": -0.5504222512245178, "logits/rejected": -0.6547200083732605, "logps/chosen": -0.00023056677309796214, "logps/rejected": -2.453969955444336, "loss": 0.6504, "nll_loss": 0.16258879005908966, "rewards/accuracies": 1.0, "rewards/chosen": -2.3056678401189856e-05, "rewards/margins": 0.24537393450737, "rewards/rejected": -0.24539700150489807, "step": 8207 }, { "epoch": 5.676348547717843, "grad_norm": 8.336179733276367, "learning_rate": 2.4020285846011988e-05, "log_odds_chosen": 9.055521965026855, "log_odds_ratio": -0.0005176339182071388, "logits/chosen": -0.435626745223999, "logits/rejected": -0.360824853181839, "logps/chosen": -0.0011783160734921694, "logps/rejected": -1.8434216976165771, "loss": 0.7676, "nll_loss": 0.19185706973075867, "rewards/accuracies": 1.0, "rewards/chosen": -0.00011783160152845085, "rewards/margins": 0.18422432243824005, "rewards/rejected": -0.1843421757221222, "step": 8208 }, { "epoch": 5.677040110650069, "grad_norm": 14.023844718933105, "learning_rate": 2.4016443829721837e-05, "log_odds_chosen": 10.210799217224121, "log_odds_ratio": -0.14266715943813324, "logits/chosen": -0.8347749710083008, "logits/rejected": -0.8222837448120117, "logps/chosen": -0.020706988871097565, "logps/rejected": -2.22721266746521, "loss": 0.7187, "nll_loss": 0.16540297865867615, "rewards/accuracies": 0.875, "rewards/chosen": -0.0020706988871097565, "rewards/margins": 0.2206505537033081, "rewards/rejected": -0.22272126376628876, "step": 8209 }, { "epoch": 5.677731673582296, "grad_norm": 8.138181686401367, "learning_rate": 2.401260181343169e-05, "log_odds_chosen": 9.880661964416504, "log_odds_ratio": -0.0003724874695762992, "logits/chosen": -0.6499980092048645, "logits/rejected": -0.6886671185493469, "logps/chosen": -0.003275522030889988, "logps/rejected": -2.115614175796509, "loss": 0.8486, "nll_loss": 0.2121048867702484, "rewards/accuracies": 1.0, "rewards/chosen": -0.00032755223219282925, "rewards/margins": 0.21123388409614563, "rewards/rejected": -0.2115614414215088, "step": 8210 }, { "epoch": 5.678423236514523, "grad_norm": 6.953858852386475, "learning_rate": 2.4008759797141542e-05, "log_odds_chosen": 10.449197769165039, "log_odds_ratio": -5.541814607568085e-05, "logits/chosen": -0.3932764530181885, "logits/rejected": -0.3913537561893463, "logps/chosen": -0.0018914844840765, "logps/rejected": -2.5113325119018555, "loss": 0.7375, "nll_loss": 0.18436847627162933, "rewards/accuracies": 1.0, "rewards/chosen": -0.00018914842803496867, "rewards/margins": 0.2509440779685974, "rewards/rejected": -0.2511332333087921, "step": 8211 }, { "epoch": 5.67911479944675, "grad_norm": 4.819328308105469, "learning_rate": 2.400491778085139e-05, "log_odds_chosen": 10.556605339050293, "log_odds_ratio": -0.00014853657921776175, "logits/chosen": -0.684946596622467, "logits/rejected": -0.7343550324440002, "logps/chosen": -0.00017646751075517386, "logps/rejected": -1.8235328197479248, "loss": 0.7751, "nll_loss": 0.19377098977565765, "rewards/accuracies": 1.0, "rewards/chosen": -1.7646751075517386e-05, "rewards/margins": 0.18233563005924225, "rewards/rejected": -0.18235328793525696, "step": 8212 }, { "epoch": 5.679806362378977, "grad_norm": 6.816978454589844, "learning_rate": 2.4001075764561243e-05, "log_odds_chosen": 9.348443031311035, "log_odds_ratio": -0.0002749827108345926, "logits/chosen": -0.8403669595718384, "logits/rejected": -0.8626226186752319, "logps/chosen": -0.0004887762479484081, "logps/rejected": -1.6191821098327637, "loss": 0.7655, "nll_loss": 0.19135594367980957, "rewards/accuracies": 1.0, "rewards/chosen": -4.887762406724505e-05, "rewards/margins": 0.1618693470954895, "rewards/rejected": -0.16191820800304413, "step": 8213 }, { "epoch": 5.680497925311204, "grad_norm": 8.590983390808105, "learning_rate": 2.3997233748271096e-05, "log_odds_chosen": 9.74236011505127, "log_odds_ratio": -0.002336436416953802, "logits/chosen": -0.39976224303245544, "logits/rejected": -0.4997917413711548, "logps/chosen": -0.001402735011652112, "logps/rejected": -1.6303997039794922, "loss": 0.9188, "nll_loss": 0.22947055101394653, "rewards/accuracies": 1.0, "rewards/chosen": -0.0001402735069859773, "rewards/margins": 0.1628997027873993, "rewards/rejected": -0.16303998231887817, "step": 8214 }, { "epoch": 5.68118948824343, "grad_norm": 4.530670166015625, "learning_rate": 2.3993391731980945e-05, "log_odds_chosen": 9.391895294189453, "log_odds_ratio": -0.00036676725721918046, "logits/chosen": -0.7029848098754883, "logits/rejected": -0.7017989158630371, "logps/chosen": -0.00031811019289307296, "logps/rejected": -1.4524152278900146, "loss": 0.5978, "nll_loss": 0.14941361546516418, "rewards/accuracies": 1.0, "rewards/chosen": -3.1811017834115773e-05, "rewards/margins": 0.14520972967147827, "rewards/rejected": -0.14524152874946594, "step": 8215 }, { "epoch": 5.681881051175657, "grad_norm": 12.992388725280762, "learning_rate": 2.3989549715690794e-05, "log_odds_chosen": 9.962270736694336, "log_odds_ratio": -7.181529508670792e-05, "logits/chosen": -0.7905853986740112, "logits/rejected": -0.7792447209358215, "logps/chosen": -0.00018229984561912715, "logps/rejected": -1.4809046983718872, "loss": 0.903, "nll_loss": 0.2257525473833084, "rewards/accuracies": 1.0, "rewards/chosen": -1.8229984561912715e-05, "rewards/margins": 0.1480722278356552, "rewards/rejected": -0.14809046685695648, "step": 8216 }, { "epoch": 5.682572614107884, "grad_norm": 10.572747230529785, "learning_rate": 2.3985707699400646e-05, "log_odds_chosen": 10.466547012329102, "log_odds_ratio": -0.00023784548102412373, "logits/chosen": -0.533780574798584, "logits/rejected": -0.5297945737838745, "logps/chosen": -0.00042985755135305226, "logps/rejected": -2.4228312969207764, "loss": 0.9092, "nll_loss": 0.2272777557373047, "rewards/accuracies": 1.0, "rewards/chosen": -4.298575731809251e-05, "rewards/margins": 0.24224016070365906, "rewards/rejected": -0.24228313565254211, "step": 8217 }, { "epoch": 5.683264177040111, "grad_norm": 24.881248474121094, "learning_rate": 2.3981865683110495e-05, "log_odds_chosen": 10.501619338989258, "log_odds_ratio": -0.00023195492394734174, "logits/chosen": -0.6462554335594177, "logits/rejected": -0.7131510376930237, "logps/chosen": -0.005770097486674786, "logps/rejected": -2.762514114379883, "loss": 1.5618, "nll_loss": 0.3904319107532501, "rewards/accuracies": 1.0, "rewards/chosen": -0.0005770098650828004, "rewards/margins": 0.275674432516098, "rewards/rejected": -0.2762514352798462, "step": 8218 }, { "epoch": 5.683955739972338, "grad_norm": 8.164917945861816, "learning_rate": 2.3978023666820348e-05, "log_odds_chosen": 10.335380554199219, "log_odds_ratio": -0.00032323403866030276, "logits/chosen": -0.3388156592845917, "logits/rejected": -0.4354027211666107, "logps/chosen": -0.0008530388004146516, "logps/rejected": -2.367396354675293, "loss": 0.7178, "nll_loss": 0.17940829694271088, "rewards/accuracies": 1.0, "rewards/chosen": -8.530388004146516e-05, "rewards/margins": 0.2366543412208557, "rewards/rejected": -0.2367396354675293, "step": 8219 }, { "epoch": 5.6846473029045645, "grad_norm": 16.99516487121582, "learning_rate": 2.39741816505302e-05, "log_odds_chosen": 10.967035293579102, "log_odds_ratio": -9.04684275155887e-05, "logits/chosen": -0.31332796812057495, "logits/rejected": -0.46520695090293884, "logps/chosen": -0.0002737205068115145, "logps/rejected": -2.4684104919433594, "loss": 0.9361, "nll_loss": 0.23400893807411194, "rewards/accuracies": 1.0, "rewards/chosen": -2.7372052500140853e-05, "rewards/margins": 0.24681369960308075, "rewards/rejected": -0.24684105813503265, "step": 8220 }, { "epoch": 5.685338865836791, "grad_norm": 7.044379234313965, "learning_rate": 2.397033963424005e-05, "log_odds_chosen": 9.70529556274414, "log_odds_ratio": -0.0028542920481413603, "logits/chosen": -0.5774523019790649, "logits/rejected": -0.5707895755767822, "logps/chosen": -0.0018815601943060756, "logps/rejected": -2.0981202125549316, "loss": 0.7339, "nll_loss": 0.18318060040473938, "rewards/accuracies": 1.0, "rewards/chosen": -0.0001881560165202245, "rewards/margins": 0.20962387323379517, "rewards/rejected": -0.2098120152950287, "step": 8221 }, { "epoch": 5.686030428769018, "grad_norm": 5.558027267456055, "learning_rate": 2.3966497617949902e-05, "log_odds_chosen": 11.028006553649902, "log_odds_ratio": -3.5399825719650835e-05, "logits/chosen": -0.2784597873687744, "logits/rejected": -0.24438714981079102, "logps/chosen": -0.00037131988210603595, "logps/rejected": -2.662822723388672, "loss": 1.0955, "nll_loss": 0.2738625109195709, "rewards/accuracies": 1.0, "rewards/chosen": -3.713199112098664e-05, "rewards/margins": 0.26624512672424316, "rewards/rejected": -0.26628226041793823, "step": 8222 }, { "epoch": 5.686721991701245, "grad_norm": 8.467771530151367, "learning_rate": 2.3962655601659754e-05, "log_odds_chosen": 10.364076614379883, "log_odds_ratio": -0.00023806243552826345, "logits/chosen": -0.7695846557617188, "logits/rejected": -0.7961927652359009, "logps/chosen": -0.00040172922308556736, "logps/rejected": -2.285043716430664, "loss": 0.5618, "nll_loss": 0.14042823016643524, "rewards/accuracies": 1.0, "rewards/chosen": -4.0172926674131304e-05, "rewards/margins": 0.22846421599388123, "rewards/rejected": -0.22850438952445984, "step": 8223 }, { "epoch": 5.687413554633472, "grad_norm": 5.40841007232666, "learning_rate": 2.3958813585369603e-05, "log_odds_chosen": 9.848953247070312, "log_odds_ratio": -0.00010321783338440582, "logits/chosen": -0.3513679802417755, "logits/rejected": -0.4765387177467346, "logps/chosen": -0.0004271346260793507, "logps/rejected": -1.7631869316101074, "loss": 0.7031, "nll_loss": 0.17576465010643005, "rewards/accuracies": 1.0, "rewards/chosen": -4.271346551831812e-05, "rewards/margins": 0.17627596855163574, "rewards/rejected": -0.1763186901807785, "step": 8224 }, { "epoch": 5.688105117565699, "grad_norm": 4.302876949310303, "learning_rate": 2.3954971569079452e-05, "log_odds_chosen": 10.70728874206543, "log_odds_ratio": -5.083745054434985e-05, "logits/chosen": -0.33189859986305237, "logits/rejected": -0.4616236984729767, "logps/chosen": -0.00015526746574323624, "logps/rejected": -2.0939488410949707, "loss": 0.5166, "nll_loss": 0.1291552633047104, "rewards/accuracies": 1.0, "rewards/chosen": -1.5526748029515147e-05, "rewards/margins": 0.20937936007976532, "rewards/rejected": -0.2093949019908905, "step": 8225 }, { "epoch": 5.6887966804979255, "grad_norm": 5.944268703460693, "learning_rate": 2.3951129552789305e-05, "log_odds_chosen": 10.232353210449219, "log_odds_ratio": -0.0002500133996363729, "logits/chosen": -0.46743786334991455, "logits/rejected": -0.5347243547439575, "logps/chosen": -0.00020009730360470712, "logps/rejected": -1.7333909273147583, "loss": 0.7712, "nll_loss": 0.19276997447013855, "rewards/accuracies": 1.0, "rewards/chosen": -2.000972926907707e-05, "rewards/margins": 0.17331908643245697, "rewards/rejected": -0.1733390986919403, "step": 8226 }, { "epoch": 5.689488243430152, "grad_norm": 10.213455200195312, "learning_rate": 2.3947287536499154e-05, "log_odds_chosen": 9.59089469909668, "log_odds_ratio": -0.0028409319929778576, "logits/chosen": -0.5655418038368225, "logits/rejected": -0.6786340475082397, "logps/chosen": -0.0021009810734540224, "logps/rejected": -1.8002028465270996, "loss": 0.6108, "nll_loss": 0.15240830183029175, "rewards/accuracies": 1.0, "rewards/chosen": -0.00021009810734540224, "rewards/margins": 0.17981019616127014, "rewards/rejected": -0.1800202876329422, "step": 8227 }, { "epoch": 5.690179806362379, "grad_norm": 9.121550559997559, "learning_rate": 2.3943445520209006e-05, "log_odds_chosen": 10.495368957519531, "log_odds_ratio": -4.995317794964649e-05, "logits/chosen": -0.7145277261734009, "logits/rejected": -0.744437038898468, "logps/chosen": -0.0002535338862799108, "logps/rejected": -1.7618240118026733, "loss": 0.5839, "nll_loss": 0.14596140384674072, "rewards/accuracies": 1.0, "rewards/chosen": -2.535338899178896e-05, "rewards/margins": 0.17615705728530884, "rewards/rejected": -0.17618238925933838, "step": 8228 }, { "epoch": 5.690871369294606, "grad_norm": 6.346843242645264, "learning_rate": 2.393960350391886e-05, "log_odds_chosen": 10.27423095703125, "log_odds_ratio": -7.538552745245397e-05, "logits/chosen": -0.4880000352859497, "logits/rejected": -0.5067594647407532, "logps/chosen": -0.00036941375583410263, "logps/rejected": -1.9501086473464966, "loss": 0.6427, "nll_loss": 0.16067828238010406, "rewards/accuracies": 1.0, "rewards/chosen": -3.694137558341026e-05, "rewards/margins": 0.1949739158153534, "rewards/rejected": -0.19501087069511414, "step": 8229 }, { "epoch": 5.691562932226833, "grad_norm": 7.4739484786987305, "learning_rate": 2.3935761487628708e-05, "log_odds_chosen": 10.826186180114746, "log_odds_ratio": -3.0438441172009334e-05, "logits/chosen": -0.5610729455947876, "logits/rejected": -0.6000049710273743, "logps/chosen": -0.0002225643111160025, "logps/rejected": -2.1735219955444336, "loss": 0.5268, "nll_loss": 0.1317044347524643, "rewards/accuracies": 1.0, "rewards/chosen": -2.225643038400449e-05, "rewards/margins": 0.2173299342393875, "rewards/rejected": -0.21735221147537231, "step": 8230 }, { "epoch": 5.69225449515906, "grad_norm": 8.88032341003418, "learning_rate": 2.393191947133856e-05, "log_odds_chosen": 9.630340576171875, "log_odds_ratio": -0.00022998328495305032, "logits/chosen": -0.8728919625282288, "logits/rejected": -0.9178202152252197, "logps/chosen": -0.00024191971169784665, "logps/rejected": -1.42867112159729, "loss": 0.9837, "nll_loss": 0.24591073393821716, "rewards/accuracies": 1.0, "rewards/chosen": -2.4191969714593142e-05, "rewards/margins": 0.14284291863441467, "rewards/rejected": -0.14286711812019348, "step": 8231 }, { "epoch": 5.6929460580912865, "grad_norm": 8.377534866333008, "learning_rate": 2.3928077455048413e-05, "log_odds_chosen": 10.767512321472168, "log_odds_ratio": -2.962576581921894e-05, "logits/chosen": -0.4958113431930542, "logits/rejected": -0.6360245943069458, "logps/chosen": -0.00018656565225683153, "logps/rejected": -2.044919729232788, "loss": 1.079, "nll_loss": 0.2697392702102661, "rewards/accuracies": 1.0, "rewards/chosen": -1.8656564861885272e-05, "rewards/margins": 0.2044733464717865, "rewards/rejected": -0.2044919729232788, "step": 8232 }, { "epoch": 5.693637621023513, "grad_norm": 8.898459434509277, "learning_rate": 2.3924235438758262e-05, "log_odds_chosen": 11.142088890075684, "log_odds_ratio": -8.624765905551612e-05, "logits/chosen": -0.6141859889030457, "logits/rejected": -0.613359272480011, "logps/chosen": -0.00017005293921101838, "logps/rejected": -2.2443342208862305, "loss": 0.6842, "nll_loss": 0.1710437536239624, "rewards/accuracies": 1.0, "rewards/chosen": -1.700529537629336e-05, "rewards/margins": 0.22441640496253967, "rewards/rejected": -0.22443342208862305, "step": 8233 }, { "epoch": 5.69432918395574, "grad_norm": 8.689697265625, "learning_rate": 2.3920393422468114e-05, "log_odds_chosen": 9.910636901855469, "log_odds_ratio": -0.00013076815230306238, "logits/chosen": -0.5209373235702515, "logits/rejected": -0.7035219669342041, "logps/chosen": -0.0003667376295197755, "logps/rejected": -1.636942744255066, "loss": 0.6499, "nll_loss": 0.16246803104877472, "rewards/accuracies": 1.0, "rewards/chosen": -3.667376222438179e-05, "rewards/margins": 0.16365760564804077, "rewards/rejected": -0.16369427740573883, "step": 8234 }, { "epoch": 5.695020746887967, "grad_norm": 11.052765846252441, "learning_rate": 2.3916551406177963e-05, "log_odds_chosen": 10.682456970214844, "log_odds_ratio": -4.524372343439609e-05, "logits/chosen": -0.676239550113678, "logits/rejected": -0.8159259557723999, "logps/chosen": -0.00013625116844195873, "logps/rejected": -1.5824081897735596, "loss": 0.8183, "nll_loss": 0.20456570386886597, "rewards/accuracies": 1.0, "rewards/chosen": -1.3625117389892694e-05, "rewards/margins": 0.15822717547416687, "rewards/rejected": -0.15824081003665924, "step": 8235 }, { "epoch": 5.695712309820194, "grad_norm": 9.405180931091309, "learning_rate": 2.3912709389887812e-05, "log_odds_chosen": 11.243492126464844, "log_odds_ratio": -1.532965325168334e-05, "logits/chosen": -0.4327881336212158, "logits/rejected": -0.6123930811882019, "logps/chosen": -0.0012353898491710424, "logps/rejected": -2.936528205871582, "loss": 0.87, "nll_loss": 0.21749062836170197, "rewards/accuracies": 1.0, "rewards/chosen": -0.00012353899364825338, "rewards/margins": 0.29352930188179016, "rewards/rejected": -0.29365283250808716, "step": 8236 }, { "epoch": 5.696403872752421, "grad_norm": 12.553094863891602, "learning_rate": 2.3908867373597665e-05, "log_odds_chosen": 6.747748851776123, "log_odds_ratio": -0.231939896941185, "logits/chosen": -0.6761006116867065, "logits/rejected": -0.734649658203125, "logps/chosen": -0.037190742790699005, "logps/rejected": -1.02030611038208, "loss": 1.3833, "nll_loss": 0.32261955738067627, "rewards/accuracies": 0.875, "rewards/chosen": -0.003719074185937643, "rewards/margins": 0.09831153601408005, "rewards/rejected": -0.10203061252832413, "step": 8237 }, { "epoch": 5.6970954356846475, "grad_norm": 5.807753086090088, "learning_rate": 2.3905025357307517e-05, "log_odds_chosen": 10.674144744873047, "log_odds_ratio": -0.00039669463876634836, "logits/chosen": -0.3693387508392334, "logits/rejected": -0.5821853280067444, "logps/chosen": -0.0008466947474516928, "logps/rejected": -3.156538724899292, "loss": 1.0424, "nll_loss": 0.2605629563331604, "rewards/accuracies": 1.0, "rewards/chosen": -8.466947474516928e-05, "rewards/margins": 0.3155692219734192, "rewards/rejected": -0.31565389037132263, "step": 8238 }, { "epoch": 5.697786998616874, "grad_norm": 9.770577430725098, "learning_rate": 2.3901183341017366e-05, "log_odds_chosen": 10.510368347167969, "log_odds_ratio": -4.034125231555663e-05, "logits/chosen": -0.5765312910079956, "logits/rejected": -0.7340801954269409, "logps/chosen": -0.0008137425757013261, "logps/rejected": -2.114612102508545, "loss": 0.5254, "nll_loss": 0.1313486248254776, "rewards/accuracies": 1.0, "rewards/chosen": -8.137425174936652e-05, "rewards/margins": 0.21137984097003937, "rewards/rejected": -0.21146121621131897, "step": 8239 }, { "epoch": 5.698478561549101, "grad_norm": 7.401946544647217, "learning_rate": 2.389734132472722e-05, "log_odds_chosen": 9.060220718383789, "log_odds_ratio": -0.00040474371053278446, "logits/chosen": -0.37978866696357727, "logits/rejected": -0.4097916781902313, "logps/chosen": -0.0008790887659415603, "logps/rejected": -1.4040875434875488, "loss": 0.8951, "nll_loss": 0.22374194860458374, "rewards/accuracies": 1.0, "rewards/chosen": -8.790888387011364e-05, "rewards/margins": 0.14032085239887238, "rewards/rejected": -0.14040875434875488, "step": 8240 }, { "epoch": 5.699170124481328, "grad_norm": 7.6191277503967285, "learning_rate": 2.389349930843707e-05, "log_odds_chosen": 11.199530601501465, "log_odds_ratio": -3.895882036886178e-05, "logits/chosen": -0.3387885093688965, "logits/rejected": -0.5780792236328125, "logps/chosen": -0.00025989252026192844, "logps/rejected": -2.5432190895080566, "loss": 1.016, "nll_loss": 0.2539976239204407, "rewards/accuracies": 1.0, "rewards/chosen": -2.5989251298597082e-05, "rewards/margins": 0.2542959451675415, "rewards/rejected": -0.2543219327926636, "step": 8241 }, { "epoch": 5.699861687413555, "grad_norm": 5.339159965515137, "learning_rate": 2.388965729214692e-05, "log_odds_chosen": 11.320333480834961, "log_odds_ratio": -7.026216917438433e-05, "logits/chosen": -0.19970078766345978, "logits/rejected": -0.21898649632930756, "logps/chosen": -0.00022263993741944432, "logps/rejected": -2.338035821914673, "loss": 0.5952, "nll_loss": 0.1487809121608734, "rewards/accuracies": 1.0, "rewards/chosen": -2.2263993741944432e-05, "rewards/margins": 0.23378130793571472, "rewards/rejected": -0.23380357027053833, "step": 8242 }, { "epoch": 5.700553250345782, "grad_norm": 10.900066375732422, "learning_rate": 2.3885815275856773e-05, "log_odds_chosen": 10.417686462402344, "log_odds_ratio": -8.494217763654888e-05, "logits/chosen": -0.42949870228767395, "logits/rejected": -0.45903027057647705, "logps/chosen": -0.00041172222699970007, "logps/rejected": -2.4149889945983887, "loss": 0.875, "nll_loss": 0.21874701976776123, "rewards/accuracies": 1.0, "rewards/chosen": -4.1172221244778484e-05, "rewards/margins": 0.2414577305316925, "rewards/rejected": -0.2414989024400711, "step": 8243 }, { "epoch": 5.7012448132780085, "grad_norm": 8.390003204345703, "learning_rate": 2.3881973259566622e-05, "log_odds_chosen": 11.345580101013184, "log_odds_ratio": -0.0001110144512495026, "logits/chosen": -0.23835702240467072, "logits/rejected": -0.4055521786212921, "logps/chosen": -0.00022283539874479175, "logps/rejected": -2.9258501529693604, "loss": 1.1082, "nll_loss": 0.27704259753227234, "rewards/accuracies": 1.0, "rewards/chosen": -2.2283540602074936e-05, "rewards/margins": 0.29256272315979004, "rewards/rejected": -0.29258501529693604, "step": 8244 }, { "epoch": 5.701936376210235, "grad_norm": 8.030402183532715, "learning_rate": 2.387813124327647e-05, "log_odds_chosen": 9.59226131439209, "log_odds_ratio": -0.0007807939546182752, "logits/chosen": -0.3640380799770355, "logits/rejected": -0.3463056683540344, "logps/chosen": -0.0004926139954477549, "logps/rejected": -1.7567813396453857, "loss": 0.7978, "nll_loss": 0.19936777651309967, "rewards/accuracies": 1.0, "rewards/chosen": -4.926140536554158e-05, "rewards/margins": 0.1756288707256317, "rewards/rejected": -0.17567814886569977, "step": 8245 }, { "epoch": 5.702627939142462, "grad_norm": 11.18429946899414, "learning_rate": 2.3874289226986323e-05, "log_odds_chosen": 10.842290878295898, "log_odds_ratio": -3.7047873775009066e-05, "logits/chosen": -0.0423475056886673, "logits/rejected": -0.15324100852012634, "logps/chosen": -0.00022486448870040476, "logps/rejected": -2.397068977355957, "loss": 0.7768, "nll_loss": 0.19419454038143158, "rewards/accuracies": 1.0, "rewards/chosen": -2.2486448870040476e-05, "rewards/margins": 0.23968440294265747, "rewards/rejected": -0.23970690369606018, "step": 8246 }, { "epoch": 5.703319502074689, "grad_norm": 12.94444751739502, "learning_rate": 2.3870447210696176e-05, "log_odds_chosen": 10.377115249633789, "log_odds_ratio": -0.0001268679043278098, "logits/chosen": -0.14172455668449402, "logits/rejected": -0.20555134117603302, "logps/chosen": -0.00018239016935694963, "logps/rejected": -1.9219377040863037, "loss": 0.5551, "nll_loss": 0.138749897480011, "rewards/accuracies": 1.0, "rewards/chosen": -1.8239019482280128e-05, "rewards/margins": 0.19217553734779358, "rewards/rejected": -0.19219377636909485, "step": 8247 }, { "epoch": 5.704011065006916, "grad_norm": 7.755146503448486, "learning_rate": 2.3866605194406025e-05, "log_odds_chosen": 10.609502792358398, "log_odds_ratio": -0.0001665048039285466, "logits/chosen": -0.09565743803977966, "logits/rejected": -0.1093120276927948, "logps/chosen": -0.00014979354455135763, "logps/rejected": -1.856912612915039, "loss": 0.8389, "nll_loss": 0.2096976339817047, "rewards/accuracies": 1.0, "rewards/chosen": -1.4979355000832584e-05, "rewards/margins": 0.18567627668380737, "rewards/rejected": -0.18569126725196838, "step": 8248 }, { "epoch": 5.704702627939143, "grad_norm": 4.313599586486816, "learning_rate": 2.3862763178115877e-05, "log_odds_chosen": 10.78399658203125, "log_odds_ratio": -8.890665776561946e-05, "logits/chosen": -0.40093475580215454, "logits/rejected": -0.5203821063041687, "logps/chosen": -0.0001348076475551352, "logps/rejected": -1.8851039409637451, "loss": 0.4644, "nll_loss": 0.116102434694767, "rewards/accuracies": 1.0, "rewards/chosen": -1.34807651193114e-05, "rewards/margins": 0.1884969174861908, "rewards/rejected": -0.18851038813591003, "step": 8249 }, { "epoch": 5.7053941908713695, "grad_norm": 4.284151554107666, "learning_rate": 2.385892116182573e-05, "log_odds_chosen": 9.743794441223145, "log_odds_ratio": -0.00013424563803710043, "logits/chosen": -0.0006545856595039368, "logits/rejected": 0.04994429275393486, "logps/chosen": -0.0003481783205643296, "logps/rejected": -1.907433271408081, "loss": 0.9276, "nll_loss": 0.23188096284866333, "rewards/accuracies": 1.0, "rewards/chosen": -3.481783642200753e-05, "rewards/margins": 0.19070850312709808, "rewards/rejected": -0.1907433271408081, "step": 8250 }, { "epoch": 5.706085753803596, "grad_norm": 10.36178970336914, "learning_rate": 2.385507914553558e-05, "log_odds_chosen": 9.836285591125488, "log_odds_ratio": -0.00011164380703121424, "logits/chosen": -0.49485906958580017, "logits/rejected": -0.6330108642578125, "logps/chosen": -0.00021051692601758987, "logps/rejected": -1.2412182092666626, "loss": 0.7839, "nll_loss": 0.19596289098262787, "rewards/accuracies": 1.0, "rewards/chosen": -2.105169369315263e-05, "rewards/margins": 0.12410077452659607, "rewards/rejected": -0.12412182241678238, "step": 8251 }, { "epoch": 5.706777316735823, "grad_norm": 6.942699909210205, "learning_rate": 2.385123712924543e-05, "log_odds_chosen": 8.719355583190918, "log_odds_ratio": -0.00041597173549234867, "logits/chosen": -0.5135871171951294, "logits/rejected": -0.5049526691436768, "logps/chosen": -0.0006854881066828966, "logps/rejected": -1.5492440462112427, "loss": 1.1859, "nll_loss": 0.29642635583877563, "rewards/accuracies": 1.0, "rewards/chosen": -6.854880484752357e-05, "rewards/margins": 0.1548558473587036, "rewards/rejected": -0.1549244076013565, "step": 8252 }, { "epoch": 5.70746887966805, "grad_norm": 6.982730865478516, "learning_rate": 2.384739511295528e-05, "log_odds_chosen": 10.45567798614502, "log_odds_ratio": -0.00011374377936590463, "logits/chosen": -0.31738218665122986, "logits/rejected": -0.371385395526886, "logps/chosen": -0.00034341655555181205, "logps/rejected": -2.1118595600128174, "loss": 0.7938, "nll_loss": 0.1984386444091797, "rewards/accuracies": 1.0, "rewards/chosen": -3.434165409998968e-05, "rewards/margins": 0.2111515998840332, "rewards/rejected": -0.21118594706058502, "step": 8253 }, { "epoch": 5.708160442600277, "grad_norm": 9.220938682556152, "learning_rate": 2.384355309666513e-05, "log_odds_chosen": 10.828641891479492, "log_odds_ratio": -4.158323281444609e-05, "logits/chosen": -0.21871013939380646, "logits/rejected": -0.33926716446876526, "logps/chosen": -0.000680235680192709, "logps/rejected": -2.519893169403076, "loss": 0.8333, "nll_loss": 0.20831838250160217, "rewards/accuracies": 1.0, "rewards/chosen": -6.802357529522851e-05, "rewards/margins": 0.25192129611968994, "rewards/rejected": -0.25198930501937866, "step": 8254 }, { "epoch": 5.708852005532504, "grad_norm": 7.605830669403076, "learning_rate": 2.383971108037498e-05, "log_odds_chosen": 9.904296875, "log_odds_ratio": -0.0009204599191434681, "logits/chosen": -0.8972567319869995, "logits/rejected": -0.9149336814880371, "logps/chosen": -0.001136539620347321, "logps/rejected": -1.9943983554840088, "loss": 0.7697, "nll_loss": 0.19232803583145142, "rewards/accuracies": 1.0, "rewards/chosen": -0.00011365396494511515, "rewards/margins": 0.19932620227336884, "rewards/rejected": -0.1994398534297943, "step": 8255 }, { "epoch": 5.70954356846473, "grad_norm": 11.07079792022705, "learning_rate": 2.3835869064084834e-05, "log_odds_chosen": 8.659165382385254, "log_odds_ratio": -0.12604594230651855, "logits/chosen": -0.6595401763916016, "logits/rejected": -0.7154828310012817, "logps/chosen": -0.019718142226338387, "logps/rejected": -1.4792919158935547, "loss": 0.9043, "nll_loss": 0.2134617567062378, "rewards/accuracies": 0.875, "rewards/chosen": -0.0019718140829354525, "rewards/margins": 0.14595738053321838, "rewards/rejected": -0.14792919158935547, "step": 8256 }, { "epoch": 5.710235131396957, "grad_norm": 8.167619705200195, "learning_rate": 2.3832027047794683e-05, "log_odds_chosen": 9.714550018310547, "log_odds_ratio": -0.00034440302988514304, "logits/chosen": -0.48485106229782104, "logits/rejected": -0.5268182158470154, "logps/chosen": -0.0009396661771461368, "logps/rejected": -1.8895883560180664, "loss": 0.7271, "nll_loss": 0.18174050748348236, "rewards/accuracies": 1.0, "rewards/chosen": -9.396662062499672e-05, "rewards/margins": 0.18886488676071167, "rewards/rejected": -0.18895885348320007, "step": 8257 }, { "epoch": 5.710926694329184, "grad_norm": 15.704752922058105, "learning_rate": 2.3828185031504536e-05, "log_odds_chosen": 11.482129096984863, "log_odds_ratio": -2.9630047720274888e-05, "logits/chosen": -0.26579806208610535, "logits/rejected": -0.2275182008743286, "logps/chosen": -0.00010128448047908023, "logps/rejected": -2.2797024250030518, "loss": 1.0707, "nll_loss": 0.2676713764667511, "rewards/accuracies": 1.0, "rewards/chosen": -1.0128448593604844e-05, "rewards/margins": 0.22796010971069336, "rewards/rejected": -0.22797025740146637, "step": 8258 }, { "epoch": 5.711618257261411, "grad_norm": 5.285425662994385, "learning_rate": 2.3824343015214385e-05, "log_odds_chosen": 9.573975563049316, "log_odds_ratio": -0.0006210988503880799, "logits/chosen": -0.48438677191734314, "logits/rejected": -0.5001848936080933, "logps/chosen": -0.0025780226569622755, "logps/rejected": -1.3919250965118408, "loss": 0.6277, "nll_loss": 0.15686482191085815, "rewards/accuracies": 1.0, "rewards/chosen": -0.0002578022249508649, "rewards/margins": 0.13893470168113708, "rewards/rejected": -0.13919252157211304, "step": 8259 }, { "epoch": 5.712309820193638, "grad_norm": 6.764977931976318, "learning_rate": 2.3820500998924237e-05, "log_odds_chosen": 10.443593978881836, "log_odds_ratio": -7.994850602699444e-05, "logits/chosen": -0.5136489868164062, "logits/rejected": -0.5443819165229797, "logps/chosen": -0.00024153507547453046, "logps/rejected": -1.9305315017700195, "loss": 0.9562, "nll_loss": 0.23904123902320862, "rewards/accuracies": 1.0, "rewards/chosen": -2.415350536466576e-05, "rewards/margins": 0.19302898645401, "rewards/rejected": -0.19305315613746643, "step": 8260 }, { "epoch": 5.713001383125865, "grad_norm": 5.804108619689941, "learning_rate": 2.381665898263409e-05, "log_odds_chosen": 9.328962326049805, "log_odds_ratio": -0.0003930005186703056, "logits/chosen": -0.5408710837364197, "logits/rejected": -0.6241236329078674, "logps/chosen": -0.0005625694757327437, "logps/rejected": -1.4583903551101685, "loss": 0.7266, "nll_loss": 0.1816195398569107, "rewards/accuracies": 1.0, "rewards/chosen": -5.625695121125318e-05, "rewards/margins": 0.14578276872634888, "rewards/rejected": -0.14583903551101685, "step": 8261 }, { "epoch": 5.713692946058091, "grad_norm": 8.310803413391113, "learning_rate": 2.381281696634394e-05, "log_odds_chosen": 8.8518648147583, "log_odds_ratio": -0.0004088030837010592, "logits/chosen": -0.7182442545890808, "logits/rejected": -0.7691875100135803, "logps/chosen": -0.0009055974660441279, "logps/rejected": -1.6568962335586548, "loss": 0.7927, "nll_loss": 0.19813647866249084, "rewards/accuracies": 1.0, "rewards/chosen": -9.055974805960432e-05, "rewards/margins": 0.16559907793998718, "rewards/rejected": -0.1656896471977234, "step": 8262 }, { "epoch": 5.714384508990318, "grad_norm": 5.538098335266113, "learning_rate": 2.3808974950053788e-05, "log_odds_chosen": 9.040349960327148, "log_odds_ratio": -0.00048364241956733167, "logits/chosen": -0.5108079314231873, "logits/rejected": -0.5672467947006226, "logps/chosen": -0.0017460084054619074, "logps/rejected": -1.8536648750305176, "loss": 0.7697, "nll_loss": 0.1923697143793106, "rewards/accuracies": 1.0, "rewards/chosen": -0.00017460084927733988, "rewards/margins": 0.18519188463687897, "rewards/rejected": -0.18536648154258728, "step": 8263 }, { "epoch": 5.715076071922545, "grad_norm": 4.487985134124756, "learning_rate": 2.380513293376364e-05, "log_odds_chosen": 9.942371368408203, "log_odds_ratio": -0.0001654111547395587, "logits/chosen": -0.5469893217086792, "logits/rejected": -0.5883113145828247, "logps/chosen": -0.000214441679418087, "logps/rejected": -1.191213846206665, "loss": 0.7364, "nll_loss": 0.18408462405204773, "rewards/accuracies": 1.0, "rewards/chosen": -2.14441679418087e-05, "rewards/margins": 0.1190999448299408, "rewards/rejected": -0.11912138015031815, "step": 8264 }, { "epoch": 5.715767634854772, "grad_norm": 5.776425361633301, "learning_rate": 2.380129091747349e-05, "log_odds_chosen": 11.487066268920898, "log_odds_ratio": -7.581858517369255e-05, "logits/chosen": -0.38149914145469666, "logits/rejected": -0.47518178820610046, "logps/chosen": -0.00017071723414119333, "logps/rejected": -2.243966579437256, "loss": 0.73, "nll_loss": 0.18249055743217468, "rewards/accuracies": 1.0, "rewards/chosen": -1.7071723050321452e-05, "rewards/margins": 0.22437959909439087, "rewards/rejected": -0.22439667582511902, "step": 8265 }, { "epoch": 5.716459197786999, "grad_norm": 9.092817306518555, "learning_rate": 2.379744890118334e-05, "log_odds_chosen": 10.952812194824219, "log_odds_ratio": -4.257060936652124e-05, "logits/chosen": -0.6757196187973022, "logits/rejected": -0.6657350063323975, "logps/chosen": -0.001055280677974224, "logps/rejected": -2.6038944721221924, "loss": 0.964, "nll_loss": 0.24098624289035797, "rewards/accuracies": 1.0, "rewards/chosen": -0.00010552808089414611, "rewards/margins": 0.2602839469909668, "rewards/rejected": -0.2603894770145416, "step": 8266 }, { "epoch": 5.717150760719226, "grad_norm": 5.761097431182861, "learning_rate": 2.3793606884893194e-05, "log_odds_chosen": 10.911550521850586, "log_odds_ratio": -4.369751695776358e-05, "logits/chosen": -0.5501573085784912, "logits/rejected": -0.622515857219696, "logps/chosen": -0.00017440738156437874, "logps/rejected": -2.022120237350464, "loss": 0.6955, "nll_loss": 0.17388296127319336, "rewards/accuracies": 1.0, "rewards/chosen": -1.7440739611629397e-05, "rewards/margins": 0.20219460129737854, "rewards/rejected": -0.20221203565597534, "step": 8267 }, { "epoch": 5.717842323651452, "grad_norm": 7.399377822875977, "learning_rate": 2.3789764868603043e-05, "log_odds_chosen": 8.654727935791016, "log_odds_ratio": -0.0005356417968869209, "logits/chosen": -0.3415067791938782, "logits/rejected": -0.369626522064209, "logps/chosen": -0.004428997170180082, "logps/rejected": -1.865692138671875, "loss": 0.8491, "nll_loss": 0.21222692728042603, "rewards/accuracies": 1.0, "rewards/chosen": -0.00044289970537647605, "rewards/margins": 0.18612630665302277, "rewards/rejected": -0.1865692138671875, "step": 8268 }, { "epoch": 5.718533886583679, "grad_norm": 8.111486434936523, "learning_rate": 2.3785922852312896e-05, "log_odds_chosen": 10.670412063598633, "log_odds_ratio": -6.3931496697478e-05, "logits/chosen": -0.624998152256012, "logits/rejected": -0.6532515287399292, "logps/chosen": -0.0002908039605244994, "logps/rejected": -1.9806416034698486, "loss": 0.8571, "nll_loss": 0.21425631642341614, "rewards/accuracies": 1.0, "rewards/chosen": -2.9080396416247822e-05, "rewards/margins": 0.1980350911617279, "rewards/rejected": -0.1980641782283783, "step": 8269 }, { "epoch": 5.719225449515906, "grad_norm": 8.084224700927734, "learning_rate": 2.3782080836022748e-05, "log_odds_chosen": 11.551338195800781, "log_odds_ratio": -1.5403145880554803e-05, "logits/chosen": -0.40487638115882874, "logits/rejected": -0.5136086344718933, "logps/chosen": -0.00013520754873752594, "logps/rejected": -2.4729225635528564, "loss": 0.6435, "nll_loss": 0.16087886691093445, "rewards/accuracies": 1.0, "rewards/chosen": -1.3520754691853654e-05, "rewards/margins": 0.24727874994277954, "rewards/rejected": -0.24729228019714355, "step": 8270 }, { "epoch": 5.719917012448133, "grad_norm": 7.687830924987793, "learning_rate": 2.3778238819732597e-05, "log_odds_chosen": 10.32638168334961, "log_odds_ratio": -0.0001268651831196621, "logits/chosen": -0.4124564230442047, "logits/rejected": -0.46572574973106384, "logps/chosen": -0.00041066654375754297, "logps/rejected": -2.4294347763061523, "loss": 0.5519, "nll_loss": 0.13796168565750122, "rewards/accuracies": 1.0, "rewards/chosen": -4.1066658013733104e-05, "rewards/margins": 0.24290242791175842, "rewards/rejected": -0.24294348061084747, "step": 8271 }, { "epoch": 5.72060857538036, "grad_norm": 6.698635578155518, "learning_rate": 2.3774396803442446e-05, "log_odds_chosen": 9.576118469238281, "log_odds_ratio": -0.0013523140223696828, "logits/chosen": -0.2256212830543518, "logits/rejected": -0.3111118972301483, "logps/chosen": -0.005982575472444296, "logps/rejected": -2.017214775085449, "loss": 0.5258, "nll_loss": 0.13131293654441833, "rewards/accuracies": 1.0, "rewards/chosen": -0.0005982575239613652, "rewards/margins": 0.2011232078075409, "rewards/rejected": -0.20172148942947388, "step": 8272 }, { "epoch": 5.7213001383125865, "grad_norm": 8.149397850036621, "learning_rate": 2.37705547871523e-05, "log_odds_chosen": 10.184762954711914, "log_odds_ratio": -0.00050318893045187, "logits/chosen": -0.5884162187576294, "logits/rejected": -0.6161515712738037, "logps/chosen": -0.0006764706922695041, "logps/rejected": -2.1969189643859863, "loss": 0.6321, "nll_loss": 0.15796729922294617, "rewards/accuracies": 1.0, "rewards/chosen": -6.764707359252498e-05, "rewards/margins": 0.21962425112724304, "rewards/rejected": -0.2196919023990631, "step": 8273 }, { "epoch": 5.721991701244813, "grad_norm": 5.989152431488037, "learning_rate": 2.3766712770862148e-05, "log_odds_chosen": 10.977209091186523, "log_odds_ratio": -0.00010491662396816537, "logits/chosen": -0.5030311346054077, "logits/rejected": -0.5460888147354126, "logps/chosen": -0.00025720163830555975, "logps/rejected": -2.2601795196533203, "loss": 0.5065, "nll_loss": 0.12660832703113556, "rewards/accuracies": 1.0, "rewards/chosen": -2.5720162739162333e-05, "rewards/margins": 0.22599226236343384, "rewards/rejected": -0.22601798176765442, "step": 8274 }, { "epoch": 5.72268326417704, "grad_norm": 7.687798023223877, "learning_rate": 2.3762870754572e-05, "log_odds_chosen": 11.152847290039062, "log_odds_ratio": -8.137941040331498e-05, "logits/chosen": -0.33489474654197693, "logits/rejected": -0.3867482542991638, "logps/chosen": -0.00017074895731639117, "logps/rejected": -2.54575777053833, "loss": 1.1304, "nll_loss": 0.28258955478668213, "rewards/accuracies": 1.0, "rewards/chosen": -1.7074895367841236e-05, "rewards/margins": 0.2545586824417114, "rewards/rejected": -0.2545757591724396, "step": 8275 }, { "epoch": 5.723374827109267, "grad_norm": 8.79166316986084, "learning_rate": 2.3759028738281852e-05, "log_odds_chosen": 10.480096817016602, "log_odds_ratio": -7.668240141356364e-05, "logits/chosen": -0.5375388264656067, "logits/rejected": -0.5943214893341064, "logps/chosen": -0.0003251215966884047, "logps/rejected": -2.195033073425293, "loss": 0.6961, "nll_loss": 0.17401918768882751, "rewards/accuracies": 1.0, "rewards/chosen": -3.2512158213648945e-05, "rewards/margins": 0.2194707840681076, "rewards/rejected": -0.21950331330299377, "step": 8276 }, { "epoch": 5.724066390041494, "grad_norm": 11.357304573059082, "learning_rate": 2.37551867219917e-05, "log_odds_chosen": 10.792548179626465, "log_odds_ratio": -4.351916868472472e-05, "logits/chosen": -0.5698350071907043, "logits/rejected": -0.6279833316802979, "logps/chosen": -0.0003448599891271442, "logps/rejected": -2.2444381713867188, "loss": 0.6254, "nll_loss": 0.15633320808410645, "rewards/accuracies": 1.0, "rewards/chosen": -3.448599818511866e-05, "rewards/margins": 0.2244093418121338, "rewards/rejected": -0.22444380819797516, "step": 8277 }, { "epoch": 5.724757952973721, "grad_norm": 11.965065002441406, "learning_rate": 2.3751344705701554e-05, "log_odds_chosen": 10.649494171142578, "log_odds_ratio": -3.594794179662131e-05, "logits/chosen": -0.3620974123477936, "logits/rejected": -0.45498672127723694, "logps/chosen": -0.0004868964897468686, "logps/rejected": -2.1574292182922363, "loss": 0.4775, "nll_loss": 0.11938130855560303, "rewards/accuracies": 1.0, "rewards/chosen": -4.868964970228262e-05, "rewards/margins": 0.21569424867630005, "rewards/rejected": -0.21574293076992035, "step": 8278 }, { "epoch": 5.7254495159059475, "grad_norm": 7.889781951904297, "learning_rate": 2.3747502689411406e-05, "log_odds_chosen": 9.396888732910156, "log_odds_ratio": -0.0008098037214949727, "logits/chosen": -0.6873564720153809, "logits/rejected": -0.6436290144920349, "logps/chosen": -0.0009592488058842719, "logps/rejected": -1.727072834968567, "loss": 0.8465, "nll_loss": 0.21155577898025513, "rewards/accuracies": 1.0, "rewards/chosen": -9.592487913323566e-05, "rewards/margins": 0.17261135578155518, "rewards/rejected": -0.17270728945732117, "step": 8279 }, { "epoch": 5.726141078838174, "grad_norm": 14.263683319091797, "learning_rate": 2.3743660673121255e-05, "log_odds_chosen": 10.230692863464355, "log_odds_ratio": -0.00189583795145154, "logits/chosen": -0.6234297752380371, "logits/rejected": -0.7451103925704956, "logps/chosen": -0.013375808484852314, "logps/rejected": -2.628518581390381, "loss": 0.4403, "nll_loss": 0.10988589376211166, "rewards/accuracies": 1.0, "rewards/chosen": -0.0013375808484852314, "rewards/margins": 0.261514276266098, "rewards/rejected": -0.2628518342971802, "step": 8280 }, { "epoch": 5.726832641770401, "grad_norm": 7.551321983337402, "learning_rate": 2.3739818656831105e-05, "log_odds_chosen": 9.866402626037598, "log_odds_ratio": -0.0008807494305074215, "logits/chosen": -0.5730447173118591, "logits/rejected": -0.5540003776550293, "logps/chosen": -0.000991776934824884, "logps/rejected": -1.3968546390533447, "loss": 0.439, "nll_loss": 0.10965709388256073, "rewards/accuracies": 1.0, "rewards/chosen": -9.917769057210535e-05, "rewards/margins": 0.13958629965782166, "rewards/rejected": -0.1396854668855667, "step": 8281 }, { "epoch": 5.727524204702628, "grad_norm": 8.65972900390625, "learning_rate": 2.3735976640540957e-05, "log_odds_chosen": 10.859101295471191, "log_odds_ratio": -6.727507570758462e-05, "logits/chosen": -0.6840611696243286, "logits/rejected": -0.6930698752403259, "logps/chosen": -0.0003340707626193762, "logps/rejected": -2.5840611457824707, "loss": 0.6857, "nll_loss": 0.17142435908317566, "rewards/accuracies": 1.0, "rewards/chosen": -3.3407079172320664e-05, "rewards/margins": 0.2583727240562439, "rewards/rejected": -0.2584061026573181, "step": 8282 }, { "epoch": 5.728215767634855, "grad_norm": 15.318198204040527, "learning_rate": 2.3732134624250806e-05, "log_odds_chosen": 9.745034217834473, "log_odds_ratio": -0.000523479888215661, "logits/chosen": -0.7020419836044312, "logits/rejected": -0.69991135597229, "logps/chosen": -0.0028948565013706684, "logps/rejected": -2.178514003753662, "loss": 0.6398, "nll_loss": 0.15990720689296722, "rewards/accuracies": 1.0, "rewards/chosen": -0.00028948564431630075, "rewards/margins": 0.21756193041801453, "rewards/rejected": -0.2178514152765274, "step": 8283 }, { "epoch": 5.728907330567082, "grad_norm": 10.038471221923828, "learning_rate": 2.372829260796066e-05, "log_odds_chosen": 10.421679496765137, "log_odds_ratio": -4.225273005431518e-05, "logits/chosen": -0.6500394940376282, "logits/rejected": -0.5979303121566772, "logps/chosen": -0.00040729602915234864, "logps/rejected": -2.4221601486206055, "loss": 1.2355, "nll_loss": 0.3088797926902771, "rewards/accuracies": 1.0, "rewards/chosen": -4.072960655321367e-05, "rewards/margins": 0.24217528104782104, "rewards/rejected": -0.24221599102020264, "step": 8284 }, { "epoch": 5.7295988934993085, "grad_norm": 7.296390533447266, "learning_rate": 2.372445059167051e-05, "log_odds_chosen": 10.552610397338867, "log_odds_ratio": -4.067786721861921e-05, "logits/chosen": -0.259600967168808, "logits/rejected": -0.38736289739608765, "logps/chosen": -0.00021202483912929893, "logps/rejected": -2.1145904064178467, "loss": 0.828, "nll_loss": 0.20700691640377045, "rewards/accuracies": 1.0, "rewards/chosen": -2.1202484276727773e-05, "rewards/margins": 0.21143783628940582, "rewards/rejected": -0.21145904064178467, "step": 8285 }, { "epoch": 5.730290456431535, "grad_norm": 9.419576644897461, "learning_rate": 2.372060857538036e-05, "log_odds_chosen": 9.820661544799805, "log_odds_ratio": -0.0002437801013002172, "logits/chosen": -0.41803741455078125, "logits/rejected": -0.5837389826774597, "logps/chosen": -0.0005225567147135735, "logps/rejected": -1.7315198183059692, "loss": 0.6794, "nll_loss": 0.16982489824295044, "rewards/accuracies": 1.0, "rewards/chosen": -5.225568020250648e-05, "rewards/margins": 0.17309972643852234, "rewards/rejected": -0.17315198481082916, "step": 8286 }, { "epoch": 5.730982019363762, "grad_norm": 9.923052787780762, "learning_rate": 2.3716766559090212e-05, "log_odds_chosen": 11.267467498779297, "log_odds_ratio": -2.611328636703547e-05, "logits/chosen": -0.2683943212032318, "logits/rejected": -0.3119713068008423, "logps/chosen": -0.0002064039435936138, "logps/rejected": -2.690495252609253, "loss": 0.721, "nll_loss": 0.1802428811788559, "rewards/accuracies": 1.0, "rewards/chosen": -2.064039472315926e-05, "rewards/margins": 0.269028902053833, "rewards/rejected": -0.2690495252609253, "step": 8287 }, { "epoch": 5.731673582295989, "grad_norm": 6.981366157531738, "learning_rate": 2.3712924542800065e-05, "log_odds_chosen": 10.127792358398438, "log_odds_ratio": -0.0002459138340782374, "logits/chosen": -0.10545951128005981, "logits/rejected": -0.11636913567781448, "logps/chosen": -0.0017972186906263232, "logps/rejected": -2.027972459793091, "loss": 0.7241, "nll_loss": 0.18100659549236298, "rewards/accuracies": 1.0, "rewards/chosen": -0.00017972187197301537, "rewards/margins": 0.20261752605438232, "rewards/rejected": -0.20279724895954132, "step": 8288 }, { "epoch": 5.732365145228216, "grad_norm": 8.613247871398926, "learning_rate": 2.3709082526509914e-05, "log_odds_chosen": 10.950531959533691, "log_odds_ratio": -6.212839798536152e-05, "logits/chosen": -0.49585795402526855, "logits/rejected": -0.5744040012359619, "logps/chosen": -0.0002617794962134212, "logps/rejected": -2.5418455600738525, "loss": 1.2651, "nll_loss": 0.3162631690502167, "rewards/accuracies": 1.0, "rewards/chosen": -2.6177949621342123e-05, "rewards/margins": 0.2541584074497223, "rewards/rejected": -0.2541845738887787, "step": 8289 }, { "epoch": 5.733056708160443, "grad_norm": 5.971242427825928, "learning_rate": 2.3705240510219763e-05, "log_odds_chosen": 9.750470161437988, "log_odds_ratio": -0.00015549581439699978, "logits/chosen": -0.6675084829330444, "logits/rejected": -0.6928970813751221, "logps/chosen": -0.000278493738733232, "logps/rejected": -1.3103184700012207, "loss": 0.5449, "nll_loss": 0.13620929419994354, "rewards/accuracies": 1.0, "rewards/chosen": -2.7849375328514725e-05, "rewards/margins": 0.13100400567054749, "rewards/rejected": -0.1310318410396576, "step": 8290 }, { "epoch": 5.7337482710926695, "grad_norm": 9.22486400604248, "learning_rate": 2.3701398493929615e-05, "log_odds_chosen": 10.807173728942871, "log_odds_ratio": -4.7663743316661566e-05, "logits/chosen": -0.46042823791503906, "logits/rejected": -0.4756005108356476, "logps/chosen": -0.0004922206280753016, "logps/rejected": -2.3511030673980713, "loss": 0.9725, "nll_loss": 0.24313212931156158, "rewards/accuracies": 1.0, "rewards/chosen": -4.922206426272169e-05, "rewards/margins": 0.23506109416484833, "rewards/rejected": -0.2351103127002716, "step": 8291 }, { "epoch": 5.734439834024896, "grad_norm": 7.93418550491333, "learning_rate": 2.3697556477639464e-05, "log_odds_chosen": 10.170130729675293, "log_odds_ratio": -0.00011982273281319067, "logits/chosen": -0.5709888935089111, "logits/rejected": -0.5875977277755737, "logps/chosen": -0.0004376860451884568, "logps/rejected": -1.9982610940933228, "loss": 0.6622, "nll_loss": 0.16554518043994904, "rewards/accuracies": 1.0, "rewards/chosen": -4.3768603063654155e-05, "rewards/margins": 0.1997823566198349, "rewards/rejected": -0.19982610642910004, "step": 8292 }, { "epoch": 5.735131396957123, "grad_norm": 10.620789527893066, "learning_rate": 2.3693714461349317e-05, "log_odds_chosen": 10.807987213134766, "log_odds_ratio": -0.00010772298264782876, "logits/chosen": -0.4360056519508362, "logits/rejected": -0.5167050361633301, "logps/chosen": -0.0010975584154948592, "logps/rejected": -2.4652209281921387, "loss": 0.5281, "nll_loss": 0.13200679421424866, "rewards/accuracies": 1.0, "rewards/chosen": -0.00010975583427352831, "rewards/margins": 0.24641233682632446, "rewards/rejected": -0.24652206897735596, "step": 8293 }, { "epoch": 5.73582295988935, "grad_norm": 8.644923210144043, "learning_rate": 2.368987244505917e-05, "log_odds_chosen": 10.958145141601562, "log_odds_ratio": -3.0786028219154105e-05, "logits/chosen": -0.44990062713623047, "logits/rejected": -0.5274561047554016, "logps/chosen": -0.00019371551752556115, "logps/rejected": -2.090069055557251, "loss": 0.7833, "nll_loss": 0.19581928849220276, "rewards/accuracies": 1.0, "rewards/chosen": -1.9371555026737042e-05, "rewards/margins": 0.2089875340461731, "rewards/rejected": -0.2090069055557251, "step": 8294 }, { "epoch": 5.736514522821577, "grad_norm": 4.456380367279053, "learning_rate": 2.368603042876902e-05, "log_odds_chosen": 9.164673805236816, "log_odds_ratio": -0.006682587321847677, "logits/chosen": -0.15113608539104462, "logits/rejected": -0.227360799908638, "logps/chosen": -0.0032797495368868113, "logps/rejected": -2.0092923641204834, "loss": 0.9032, "nll_loss": 0.22512412071228027, "rewards/accuracies": 1.0, "rewards/chosen": -0.00032797493622638285, "rewards/margins": 0.2006012499332428, "rewards/rejected": -0.20092923939228058, "step": 8295 }, { "epoch": 5.737206085753804, "grad_norm": 7.534974098205566, "learning_rate": 2.368218841247887e-05, "log_odds_chosen": 10.547529220581055, "log_odds_ratio": -0.0002148733037756756, "logits/chosen": -0.44200295209884644, "logits/rejected": -0.5098553895950317, "logps/chosen": -0.0008662916370667517, "logps/rejected": -2.1029608249664307, "loss": 0.5069, "nll_loss": 0.1267036497592926, "rewards/accuracies": 1.0, "rewards/chosen": -8.662916661705822e-05, "rewards/margins": 0.210209459066391, "rewards/rejected": -0.21029609441757202, "step": 8296 }, { "epoch": 5.7378976486860305, "grad_norm": 9.358753204345703, "learning_rate": 2.3678346396188723e-05, "log_odds_chosen": 10.221595764160156, "log_odds_ratio": -0.0011085877195000648, "logits/chosen": -0.2000870555639267, "logits/rejected": -0.22448882460594177, "logps/chosen": -0.0008927193703129888, "logps/rejected": -1.9040881395339966, "loss": 1.1827, "nll_loss": 0.29555743932724, "rewards/accuracies": 1.0, "rewards/chosen": -8.927193994168192e-05, "rewards/margins": 0.19031953811645508, "rewards/rejected": -0.19040879607200623, "step": 8297 }, { "epoch": 5.738589211618257, "grad_norm": 7.413073539733887, "learning_rate": 2.3674504379898572e-05, "log_odds_chosen": 9.21097469329834, "log_odds_ratio": -0.00043296560761518776, "logits/chosen": -0.333681583404541, "logits/rejected": -0.4281213879585266, "logps/chosen": -0.0006029916112311184, "logps/rejected": -1.3785467147827148, "loss": 0.6211, "nll_loss": 0.15522557497024536, "rewards/accuracies": 1.0, "rewards/chosen": -6.029916039551608e-05, "rewards/margins": 0.1377943903207779, "rewards/rejected": -0.1378546804189682, "step": 8298 }, { "epoch": 5.739280774550484, "grad_norm": 6.0836992263793945, "learning_rate": 2.367066236360842e-05, "log_odds_chosen": 10.23131275177002, "log_odds_ratio": -0.00010292407387169078, "logits/chosen": -0.3584192395210266, "logits/rejected": -0.46633875370025635, "logps/chosen": -0.0003847597981803119, "logps/rejected": -1.8952624797821045, "loss": 0.5546, "nll_loss": 0.13863810896873474, "rewards/accuracies": 1.0, "rewards/chosen": -3.847598054562695e-05, "rewards/margins": 0.1894877552986145, "rewards/rejected": -0.18952623009681702, "step": 8299 }, { "epoch": 5.739972337482711, "grad_norm": 7.007051467895508, "learning_rate": 2.3666820347318274e-05, "log_odds_chosen": 9.6890869140625, "log_odds_ratio": -0.00029695930425077677, "logits/chosen": -0.7456952333450317, "logits/rejected": -0.7626982927322388, "logps/chosen": -0.011276169680058956, "logps/rejected": -2.1853461265563965, "loss": 0.7108, "nll_loss": 0.17767959833145142, "rewards/accuracies": 1.0, "rewards/chosen": -0.0011276170844212174, "rewards/margins": 0.2174069881439209, "rewards/rejected": -0.21853460371494293, "step": 8300 }, { "epoch": 5.740663900414938, "grad_norm": 7.881728172302246, "learning_rate": 2.3662978331028123e-05, "log_odds_chosen": 10.861671447753906, "log_odds_ratio": -8.425668784184381e-05, "logits/chosen": -0.25384053587913513, "logits/rejected": -0.31320399045944214, "logps/chosen": -0.0002361613733228296, "logps/rejected": -2.5008726119995117, "loss": 0.601, "nll_loss": 0.15024977922439575, "rewards/accuracies": 1.0, "rewards/chosen": -2.361613769608084e-05, "rewards/margins": 0.2500636577606201, "rewards/rejected": -0.25008726119995117, "step": 8301 }, { "epoch": 5.741355463347165, "grad_norm": 5.908812999725342, "learning_rate": 2.3659136314737975e-05, "log_odds_chosen": 11.111687660217285, "log_odds_ratio": -3.3626787626417354e-05, "logits/chosen": -0.4688529372215271, "logits/rejected": -0.41200196743011475, "logps/chosen": -0.00020868267165496945, "logps/rejected": -2.5753095149993896, "loss": 0.8032, "nll_loss": 0.20080536603927612, "rewards/accuracies": 1.0, "rewards/chosen": -2.0868266801699065e-05, "rewards/margins": 0.25751006603240967, "rewards/rejected": -0.25753095746040344, "step": 8302 }, { "epoch": 5.7420470262793915, "grad_norm": 8.06871509552002, "learning_rate": 2.3655294298447828e-05, "log_odds_chosen": 9.761555671691895, "log_odds_ratio": -0.00042566165211610496, "logits/chosen": -0.6165575981140137, "logits/rejected": -0.6146994829177856, "logps/chosen": -0.00041026706458069384, "logps/rejected": -1.7203035354614258, "loss": 0.7944, "nll_loss": 0.19855532050132751, "rewards/accuracies": 1.0, "rewards/chosen": -4.1026702092494816e-05, "rewards/margins": 0.1719893217086792, "rewards/rejected": -0.17203034460544586, "step": 8303 }, { "epoch": 5.742738589211618, "grad_norm": 8.098464012145996, "learning_rate": 2.3651452282157677e-05, "log_odds_chosen": 10.305068016052246, "log_odds_ratio": -0.00018514314433559775, "logits/chosen": -0.7303239107131958, "logits/rejected": -0.6914010047912598, "logps/chosen": -0.0005240870523266494, "logps/rejected": -1.8838518857955933, "loss": 0.5746, "nll_loss": 0.14362111687660217, "rewards/accuracies": 1.0, "rewards/chosen": -5.240870814304799e-05, "rewards/margins": 0.18833279609680176, "rewards/rejected": -0.18838520348072052, "step": 8304 }, { "epoch": 5.743430152143845, "grad_norm": 8.639131546020508, "learning_rate": 2.364761026586753e-05, "log_odds_chosen": 11.053112983703613, "log_odds_ratio": -3.3354688639519736e-05, "logits/chosen": -0.3220217823982239, "logits/rejected": -0.3667411208152771, "logps/chosen": -0.00018558189913164824, "logps/rejected": -2.0954384803771973, "loss": 0.6619, "nll_loss": 0.1654653400182724, "rewards/accuracies": 1.0, "rewards/chosen": -1.8558190276962705e-05, "rewards/margins": 0.20952528715133667, "rewards/rejected": -0.2095438539981842, "step": 8305 }, { "epoch": 5.744121715076072, "grad_norm": 9.16308879852295, "learning_rate": 2.3643768249577382e-05, "log_odds_chosen": 9.564486503601074, "log_odds_ratio": -0.00025545209064148366, "logits/chosen": -0.4439648985862732, "logits/rejected": -0.4105756878852844, "logps/chosen": -0.0007857171585783362, "logps/rejected": -1.612653136253357, "loss": 1.0796, "nll_loss": 0.2698758542537689, "rewards/accuracies": 1.0, "rewards/chosen": -7.857171294745058e-05, "rewards/margins": 0.16118673980236053, "rewards/rejected": -0.1612653136253357, "step": 8306 }, { "epoch": 5.744813278008299, "grad_norm": 9.436599731445312, "learning_rate": 2.363992623328723e-05, "log_odds_chosen": 10.715935707092285, "log_odds_ratio": -3.4208103897981346e-05, "logits/chosen": -0.6278020143508911, "logits/rejected": -0.7652894258499146, "logps/chosen": -0.0002282711793668568, "logps/rejected": -1.8947447538375854, "loss": 0.7806, "nll_loss": 0.19513654708862305, "rewards/accuracies": 1.0, "rewards/chosen": -2.282711648149416e-05, "rewards/margins": 0.1894516497850418, "rewards/rejected": -0.18947447836399078, "step": 8307 }, { "epoch": 5.745504840940526, "grad_norm": 10.356780052185059, "learning_rate": 2.363608421699708e-05, "log_odds_chosen": 10.806076049804688, "log_odds_ratio": -2.597077764221467e-05, "logits/chosen": -0.5891835689544678, "logits/rejected": -0.6955010294914246, "logps/chosen": -0.00022716508829034865, "logps/rejected": -2.125861167907715, "loss": 1.0314, "nll_loss": 0.2578585147857666, "rewards/accuracies": 1.0, "rewards/chosen": -2.2716509192832746e-05, "rewards/margins": 0.2125634104013443, "rewards/rejected": -0.2125861495733261, "step": 8308 }, { "epoch": 5.746196403872752, "grad_norm": 6.441484451293945, "learning_rate": 2.3632242200706932e-05, "log_odds_chosen": 10.06994342803955, "log_odds_ratio": -0.00023832148872315884, "logits/chosen": -0.2060949206352234, "logits/rejected": -0.24644814431667328, "logps/chosen": -0.001094201346859336, "logps/rejected": -2.2449793815612793, "loss": 0.6008, "nll_loss": 0.15017682313919067, "rewards/accuracies": 1.0, "rewards/chosen": -0.00010942013614112511, "rewards/margins": 0.2243885099887848, "rewards/rejected": -0.2244979292154312, "step": 8309 }, { "epoch": 5.746887966804979, "grad_norm": 6.610924243927002, "learning_rate": 2.362840018441678e-05, "log_odds_chosen": 10.011903762817383, "log_odds_ratio": -7.450394332408905e-05, "logits/chosen": -0.5421202182769775, "logits/rejected": -0.48485058546066284, "logps/chosen": -0.0002872304758056998, "logps/rejected": -1.7974812984466553, "loss": 0.9211, "nll_loss": 0.23026490211486816, "rewards/accuracies": 1.0, "rewards/chosen": -2.8723048671963625e-05, "rewards/margins": 0.17971941828727722, "rewards/rejected": -0.17974814772605896, "step": 8310 }, { "epoch": 5.747579529737206, "grad_norm": 7.050563812255859, "learning_rate": 2.3624558168126634e-05, "log_odds_chosen": 12.180794715881348, "log_odds_ratio": -6.411512913473416e-06, "logits/chosen": -0.4151165783405304, "logits/rejected": -0.5160906910896301, "logps/chosen": -0.0001374170824419707, "logps/rejected": -2.9264304637908936, "loss": 0.5877, "nll_loss": 0.14693555235862732, "rewards/accuracies": 1.0, "rewards/chosen": -1.3741709153691772e-05, "rewards/margins": 0.29262930154800415, "rewards/rejected": -0.29264307022094727, "step": 8311 }, { "epoch": 5.748271092669433, "grad_norm": 6.463709354400635, "learning_rate": 2.3620716151836486e-05, "log_odds_chosen": 9.185981750488281, "log_odds_ratio": -0.010154437273740768, "logits/chosen": -0.40779149532318115, "logits/rejected": -0.3674390912055969, "logps/chosen": -0.003874379675835371, "logps/rejected": -1.2728424072265625, "loss": 0.7317, "nll_loss": 0.18190543353557587, "rewards/accuracies": 1.0, "rewards/chosen": -0.000387437961762771, "rewards/margins": 0.12689681351184845, "rewards/rejected": -0.12728425860404968, "step": 8312 }, { "epoch": 5.74896265560166, "grad_norm": 6.199563980102539, "learning_rate": 2.3616874135546335e-05, "log_odds_chosen": 10.591329574584961, "log_odds_ratio": -9.259778744308278e-05, "logits/chosen": -0.5904867053031921, "logits/rejected": -0.6726003885269165, "logps/chosen": -0.0002847728901542723, "logps/rejected": -2.216808557510376, "loss": 0.5315, "nll_loss": 0.13287466764450073, "rewards/accuracies": 1.0, "rewards/chosen": -2.8477286832639948e-05, "rewards/margins": 0.22165238857269287, "rewards/rejected": -0.22168086469173431, "step": 8313 }, { "epoch": 5.749654218533887, "grad_norm": 7.432782173156738, "learning_rate": 2.3613032119256188e-05, "log_odds_chosen": 9.365463256835938, "log_odds_ratio": -0.00235193083062768, "logits/chosen": -0.6185011267662048, "logits/rejected": -0.6692292094230652, "logps/chosen": -0.0015238788910210133, "logps/rejected": -1.8370863199234009, "loss": 0.7764, "nll_loss": 0.19387227296829224, "rewards/accuracies": 1.0, "rewards/chosen": -0.0001523878745501861, "rewards/margins": 0.18355624377727509, "rewards/rejected": -0.18370862305164337, "step": 8314 }, { "epoch": 5.750345781466113, "grad_norm": 9.817612648010254, "learning_rate": 2.360919010296604e-05, "log_odds_chosen": 10.95273494720459, "log_odds_ratio": -6.1401427956298e-05, "logits/chosen": -0.7160186767578125, "logits/rejected": -0.801353931427002, "logps/chosen": -0.00029561432893387973, "logps/rejected": -2.404662847518921, "loss": 0.7411, "nll_loss": 0.18526718020439148, "rewards/accuracies": 1.0, "rewards/chosen": -2.9561435439973138e-05, "rewards/margins": 0.24043673276901245, "rewards/rejected": -0.24046629667282104, "step": 8315 }, { "epoch": 5.75103734439834, "grad_norm": 6.344141960144043, "learning_rate": 2.360534808667589e-05, "log_odds_chosen": 9.526826858520508, "log_odds_ratio": -0.00014862409443594515, "logits/chosen": -0.5527998208999634, "logits/rejected": -0.6022185683250427, "logps/chosen": -0.0007526268600486219, "logps/rejected": -1.5950901508331299, "loss": 0.9076, "nll_loss": 0.22688600420951843, "rewards/accuracies": 1.0, "rewards/chosen": -7.526268746005371e-05, "rewards/margins": 0.1594337522983551, "rewards/rejected": -0.15950901806354523, "step": 8316 }, { "epoch": 5.751728907330567, "grad_norm": 9.710481643676758, "learning_rate": 2.3601506070385738e-05, "log_odds_chosen": 10.60885238647461, "log_odds_ratio": -4.971360249328427e-05, "logits/chosen": -0.39266735315322876, "logits/rejected": -0.5182227492332458, "logps/chosen": -0.00034685549326241016, "logps/rejected": -2.6334073543548584, "loss": 0.7297, "nll_loss": 0.18243081867694855, "rewards/accuracies": 1.0, "rewards/chosen": -3.468555223662406e-05, "rewards/margins": 0.2633060812950134, "rewards/rejected": -0.2633407413959503, "step": 8317 }, { "epoch": 5.752420470262794, "grad_norm": 6.257876396179199, "learning_rate": 2.359766405409559e-05, "log_odds_chosen": 9.356744766235352, "log_odds_ratio": -0.0002441833494231105, "logits/chosen": -0.6548734307289124, "logits/rejected": -0.7048206925392151, "logps/chosen": -0.0006509888335131109, "logps/rejected": -1.4409332275390625, "loss": 0.4796, "nll_loss": 0.11986897885799408, "rewards/accuracies": 1.0, "rewards/chosen": -6.509888771688566e-05, "rewards/margins": 0.14402823150157928, "rewards/rejected": -0.1440933346748352, "step": 8318 }, { "epoch": 5.753112033195021, "grad_norm": 5.713196754455566, "learning_rate": 2.359382203780544e-05, "log_odds_chosen": 10.923623085021973, "log_odds_ratio": -4.0328122850041837e-05, "logits/chosen": 0.05549601465463638, "logits/rejected": 0.0023378878831863403, "logps/chosen": -0.0001473123993491754, "logps/rejected": -2.0698325634002686, "loss": 1.0181, "nll_loss": 0.25452303886413574, "rewards/accuracies": 1.0, "rewards/chosen": -1.4731239389220718e-05, "rewards/margins": 0.2069685459136963, "rewards/rejected": -0.2069832682609558, "step": 8319 }, { "epoch": 5.753803596127248, "grad_norm": 9.718740463256836, "learning_rate": 2.3589980021515292e-05, "log_odds_chosen": 9.84650993347168, "log_odds_ratio": -0.00020443143148440868, "logits/chosen": -0.5786601305007935, "logits/rejected": -0.5279185771942139, "logps/chosen": -0.0007319730357266963, "logps/rejected": -1.9794467687606812, "loss": 1.0748, "nll_loss": 0.2686849534511566, "rewards/accuracies": 1.0, "rewards/chosen": -7.319729775190353e-05, "rewards/margins": 0.19787146151065826, "rewards/rejected": -0.19794468581676483, "step": 8320 }, { "epoch": 5.754495159059474, "grad_norm": 7.701869964599609, "learning_rate": 2.3586138005225145e-05, "log_odds_chosen": 10.897615432739258, "log_odds_ratio": -5.009109008824453e-05, "logits/chosen": -0.20346534252166748, "logits/rejected": -0.18375641107559204, "logps/chosen": -0.00037128583062440157, "logps/rejected": -2.7812089920043945, "loss": 1.3457, "nll_loss": 0.33642303943634033, "rewards/accuracies": 1.0, "rewards/chosen": -3.71285859728232e-05, "rewards/margins": 0.27808377146720886, "rewards/rejected": -0.27812087535858154, "step": 8321 }, { "epoch": 5.755186721991701, "grad_norm": 9.211677551269531, "learning_rate": 2.3582295988934994e-05, "log_odds_chosen": 9.959250450134277, "log_odds_ratio": -0.00010035712330136448, "logits/chosen": -0.5219177007675171, "logits/rejected": -0.46816885471343994, "logps/chosen": -0.0002476215595379472, "logps/rejected": -1.614699363708496, "loss": 0.9682, "nll_loss": 0.2420455813407898, "rewards/accuracies": 1.0, "rewards/chosen": -2.4762153771007434e-05, "rewards/margins": 0.16144520044326782, "rewards/rejected": -0.161469966173172, "step": 8322 }, { "epoch": 5.755878284923928, "grad_norm": 8.527338981628418, "learning_rate": 2.3578453972644846e-05, "log_odds_chosen": 10.829414367675781, "log_odds_ratio": -2.493345527909696e-05, "logits/chosen": -0.22919800877571106, "logits/rejected": -0.3545800447463989, "logps/chosen": -0.00017396353359799832, "logps/rejected": -2.0452499389648438, "loss": 0.6785, "nll_loss": 0.16962985694408417, "rewards/accuracies": 1.0, "rewards/chosen": -1.7396354451193474e-05, "rewards/margins": 0.20450758934020996, "rewards/rejected": -0.20452499389648438, "step": 8323 }, { "epoch": 5.756569847856155, "grad_norm": 14.861743927001953, "learning_rate": 2.35746119563547e-05, "log_odds_chosen": 9.110204696655273, "log_odds_ratio": -0.0011775546008720994, "logits/chosen": -0.42979809641838074, "logits/rejected": -0.4159172773361206, "logps/chosen": -0.0021905952598899603, "logps/rejected": -1.6844552755355835, "loss": 0.8767, "nll_loss": 0.21905720233917236, "rewards/accuracies": 1.0, "rewards/chosen": -0.00021905952598899603, "rewards/margins": 0.1682264804840088, "rewards/rejected": -0.16844552755355835, "step": 8324 }, { "epoch": 5.757261410788382, "grad_norm": 7.052018642425537, "learning_rate": 2.3570769940064548e-05, "log_odds_chosen": 10.287298202514648, "log_odds_ratio": -0.0001158514860435389, "logits/chosen": -0.0011706706136465073, "logits/rejected": -0.10996723920106888, "logps/chosen": -0.0002825699921231717, "logps/rejected": -2.024381399154663, "loss": 0.8779, "nll_loss": 0.2194562554359436, "rewards/accuracies": 1.0, "rewards/chosen": -2.8256998120923527e-05, "rewards/margins": 0.20240987837314606, "rewards/rejected": -0.2024381309747696, "step": 8325 }, { "epoch": 5.7579529737206085, "grad_norm": 6.528735160827637, "learning_rate": 2.3566927923774397e-05, "log_odds_chosen": 10.121028900146484, "log_odds_ratio": -0.0001131187891587615, "logits/chosen": -0.5546838641166687, "logits/rejected": -0.48821526765823364, "logps/chosen": -0.0006948888767510653, "logps/rejected": -2.0356459617614746, "loss": 0.3869, "nll_loss": 0.09670525789260864, "rewards/accuracies": 1.0, "rewards/chosen": -6.948888767510653e-05, "rewards/margins": 0.2034951150417328, "rewards/rejected": -0.2035646140575409, "step": 8326 }, { "epoch": 5.758644536652835, "grad_norm": 9.359241485595703, "learning_rate": 2.356308590748425e-05, "log_odds_chosen": 10.889639854431152, "log_odds_ratio": -4.8169935325859115e-05, "logits/chosen": -0.1331821084022522, "logits/rejected": -0.2709449529647827, "logps/chosen": -0.0002415928611299023, "logps/rejected": -2.5324788093566895, "loss": 0.877, "nll_loss": 0.21924549341201782, "rewards/accuracies": 1.0, "rewards/chosen": -2.4159287931979634e-05, "rewards/margins": 0.253223717212677, "rewards/rejected": -0.25324785709381104, "step": 8327 }, { "epoch": 5.759336099585062, "grad_norm": 10.904271125793457, "learning_rate": 2.3559243891194098e-05, "log_odds_chosen": 11.361100196838379, "log_odds_ratio": -2.6015641196863726e-05, "logits/chosen": -0.5011187195777893, "logits/rejected": -0.5023812055587769, "logps/chosen": -0.0006036162376403809, "logps/rejected": -3.1411614418029785, "loss": 0.9301, "nll_loss": 0.2325301468372345, "rewards/accuracies": 1.0, "rewards/chosen": -6.036162449163385e-05, "rewards/margins": 0.31405580043792725, "rewards/rejected": -0.31411615014076233, "step": 8328 }, { "epoch": 5.760027662517289, "grad_norm": 5.806125164031982, "learning_rate": 2.355540187490395e-05, "log_odds_chosen": 9.143113136291504, "log_odds_ratio": -0.00015040890139061958, "logits/chosen": -0.3005862534046173, "logits/rejected": -0.32919326424598694, "logps/chosen": -0.0002952023351099342, "logps/rejected": -1.053991436958313, "loss": 0.842, "nll_loss": 0.21049299836158752, "rewards/accuracies": 1.0, "rewards/chosen": -2.952023351099342e-05, "rewards/margins": 0.10536962747573853, "rewards/rejected": -0.10539913922548294, "step": 8329 }, { "epoch": 5.760719225449516, "grad_norm": 6.955360412597656, "learning_rate": 2.35515598586138e-05, "log_odds_chosen": 9.990994453430176, "log_odds_ratio": -0.00024183614004869014, "logits/chosen": -0.36879050731658936, "logits/rejected": -0.40905478596687317, "logps/chosen": -0.0004077651828993112, "logps/rejected": -1.8470849990844727, "loss": 1.0752, "nll_loss": 0.268771231174469, "rewards/accuracies": 1.0, "rewards/chosen": -4.0776521927909926e-05, "rewards/margins": 0.18466772139072418, "rewards/rejected": -0.18470850586891174, "step": 8330 }, { "epoch": 5.761410788381743, "grad_norm": 5.922750473022461, "learning_rate": 2.3547717842323652e-05, "log_odds_chosen": 11.753957748413086, "log_odds_ratio": -2.4064009267021902e-05, "logits/chosen": -0.8286944031715393, "logits/rejected": -0.8924739360809326, "logps/chosen": -0.00015205403906293213, "logps/rejected": -2.853480577468872, "loss": 0.4743, "nll_loss": 0.11856278777122498, "rewards/accuracies": 1.0, "rewards/chosen": -1.5205403542495333e-05, "rewards/margins": 0.2853328287601471, "rewards/rejected": -0.2853480577468872, "step": 8331 }, { "epoch": 5.7621023513139695, "grad_norm": 7.735132217407227, "learning_rate": 2.3543875826033505e-05, "log_odds_chosen": 11.60269546508789, "log_odds_ratio": -4.3830594222527e-05, "logits/chosen": -0.26348862051963806, "logits/rejected": -0.32002776861190796, "logps/chosen": -0.00020181875152047724, "logps/rejected": -2.8781893253326416, "loss": 0.6063, "nll_loss": 0.15156348049640656, "rewards/accuracies": 1.0, "rewards/chosen": -2.0181880245218053e-05, "rewards/margins": 0.28779876232147217, "rewards/rejected": -0.28781893849372864, "step": 8332 }, { "epoch": 5.762793914246196, "grad_norm": 10.662870407104492, "learning_rate": 2.3540033809743354e-05, "log_odds_chosen": 9.17403507232666, "log_odds_ratio": -0.006309689488261938, "logits/chosen": -0.21587823331356049, "logits/rejected": -0.20806483924388885, "logps/chosen": -0.0029637019615620375, "logps/rejected": -1.7408331632614136, "loss": 0.7234, "nll_loss": 0.18022409081459045, "rewards/accuracies": 1.0, "rewards/chosen": -0.00029637018451467156, "rewards/margins": 0.1737869530916214, "rewards/rejected": -0.17408332228660583, "step": 8333 }, { "epoch": 5.763485477178423, "grad_norm": 7.98430871963501, "learning_rate": 2.3536191793453206e-05, "log_odds_chosen": 10.42989730834961, "log_odds_ratio": -0.00014048349112272263, "logits/chosen": -0.5470676422119141, "logits/rejected": -0.7323271632194519, "logps/chosen": -0.00022380598238669336, "logps/rejected": -1.730640172958374, "loss": 1.0507, "nll_loss": 0.26266804337501526, "rewards/accuracies": 1.0, "rewards/chosen": -2.2380598238669336e-05, "rewards/margins": 0.17304162681102753, "rewards/rejected": -0.17306400835514069, "step": 8334 }, { "epoch": 5.76417704011065, "grad_norm": 8.684687614440918, "learning_rate": 2.353234977716306e-05, "log_odds_chosen": 10.68388557434082, "log_odds_ratio": -6.952737021492794e-05, "logits/chosen": -0.6682871580123901, "logits/rejected": -0.6703358888626099, "logps/chosen": -0.0003899956354871392, "logps/rejected": -2.3728811740875244, "loss": 0.8976, "nll_loss": 0.22438865900039673, "rewards/accuracies": 1.0, "rewards/chosen": -3.899956573150121e-05, "rewards/margins": 0.23724913597106934, "rewards/rejected": -0.23728811740875244, "step": 8335 }, { "epoch": 5.764868603042877, "grad_norm": 5.415703773498535, "learning_rate": 2.3528507760872908e-05, "log_odds_chosen": 9.89802360534668, "log_odds_ratio": -0.0012788517633453012, "logits/chosen": -0.927123486995697, "logits/rejected": -0.8529543280601501, "logps/chosen": -0.0020643535535782576, "logps/rejected": -1.9083293676376343, "loss": 1.323, "nll_loss": 0.33061519265174866, "rewards/accuracies": 1.0, "rewards/chosen": -0.00020643536117859185, "rewards/margins": 0.19062651693820953, "rewards/rejected": -0.1908329427242279, "step": 8336 }, { "epoch": 5.765560165975104, "grad_norm": 14.877506256103516, "learning_rate": 2.3524665744582757e-05, "log_odds_chosen": 11.388561248779297, "log_odds_ratio": -2.0636227418435737e-05, "logits/chosen": -1.0710829496383667, "logits/rejected": -1.0672526359558105, "logps/chosen": -0.00018354799249209464, "logps/rejected": -2.684891700744629, "loss": 0.9869, "nll_loss": 0.24672679603099823, "rewards/accuracies": 1.0, "rewards/chosen": -1.8354799976805225e-05, "rewards/margins": 0.268470823764801, "rewards/rejected": -0.26848918199539185, "step": 8337 }, { "epoch": 5.7662517289073305, "grad_norm": 6.486876964569092, "learning_rate": 2.352082372829261e-05, "log_odds_chosen": 10.3418550491333, "log_odds_ratio": -4.532843013294041e-05, "logits/chosen": -0.5621716976165771, "logits/rejected": -0.5832915306091309, "logps/chosen": -0.0004285993636585772, "logps/rejected": -2.2560110092163086, "loss": 0.5395, "nll_loss": 0.13485826551914215, "rewards/accuracies": 1.0, "rewards/chosen": -4.2859934183070436e-05, "rewards/margins": 0.22555822134017944, "rewards/rejected": -0.22560109198093414, "step": 8338 }, { "epoch": 5.766943291839557, "grad_norm": 8.831564903259277, "learning_rate": 2.3516981712002458e-05, "log_odds_chosen": 9.910039901733398, "log_odds_ratio": -0.0002273163408972323, "logits/chosen": -0.9114158153533936, "logits/rejected": -0.9666690826416016, "logps/chosen": -0.010054264217615128, "logps/rejected": -2.3426384925842285, "loss": 0.7889, "nll_loss": 0.1972012221813202, "rewards/accuracies": 1.0, "rewards/chosen": -0.0010054263984784484, "rewards/margins": 0.2332584261894226, "rewards/rejected": -0.2342638522386551, "step": 8339 }, { "epoch": 5.767634854771784, "grad_norm": 12.703185081481934, "learning_rate": 2.351313969571231e-05, "log_odds_chosen": 10.88131332397461, "log_odds_ratio": -0.00015934542170725763, "logits/chosen": -0.9918793439865112, "logits/rejected": -1.0424768924713135, "logps/chosen": -0.00022023438941687346, "logps/rejected": -2.2530806064605713, "loss": 0.983, "nll_loss": 0.2457437366247177, "rewards/accuracies": 1.0, "rewards/chosen": -2.2023437850293703e-05, "rewards/margins": 0.22528605163097382, "rewards/rejected": -0.22530806064605713, "step": 8340 }, { "epoch": 5.768326417704011, "grad_norm": 11.430049896240234, "learning_rate": 2.3509297679422163e-05, "log_odds_chosen": 10.878573417663574, "log_odds_ratio": -3.2517738873139024e-05, "logits/chosen": -0.5243549942970276, "logits/rejected": -0.507844090461731, "logps/chosen": -0.00019536682520993054, "logps/rejected": -2.2270255088806152, "loss": 0.7372, "nll_loss": 0.18429552018642426, "rewards/accuracies": 1.0, "rewards/chosen": -1.9536684703780338e-05, "rewards/margins": 0.22268301248550415, "rewards/rejected": -0.22270256280899048, "step": 8341 }, { "epoch": 5.769017980636238, "grad_norm": 7.633252143859863, "learning_rate": 2.3505455663132012e-05, "log_odds_chosen": 10.390115737915039, "log_odds_ratio": -0.000257474574027583, "logits/chosen": -0.5723543763160706, "logits/rejected": -0.537143349647522, "logps/chosen": -0.0002853441401384771, "logps/rejected": -2.1401093006134033, "loss": 1.1687, "nll_loss": 0.29213932156562805, "rewards/accuracies": 1.0, "rewards/chosen": -2.8534417651826516e-05, "rewards/margins": 0.21398240327835083, "rewards/rejected": -0.21401095390319824, "step": 8342 }, { "epoch": 5.769709543568465, "grad_norm": 5.9149169921875, "learning_rate": 2.3501613646841865e-05, "log_odds_chosen": 10.216432571411133, "log_odds_ratio": -6.506162026198581e-05, "logits/chosen": -0.3109501898288727, "logits/rejected": -0.35374629497528076, "logps/chosen": -0.00036072812508791685, "logps/rejected": -1.8220794200897217, "loss": 0.5343, "nll_loss": 0.13356661796569824, "rewards/accuracies": 1.0, "rewards/chosen": -3.607281541917473e-05, "rewards/margins": 0.18217185139656067, "rewards/rejected": -0.18220794200897217, "step": 8343 }, { "epoch": 5.7704011065006915, "grad_norm": 6.765550136566162, "learning_rate": 2.3497771630551717e-05, "log_odds_chosen": 10.58395767211914, "log_odds_ratio": -0.00010712833318393677, "logits/chosen": -0.22515490651130676, "logits/rejected": -0.4198504090309143, "logps/chosen": -0.00018988562806043774, "logps/rejected": -1.9974274635314941, "loss": 0.6781, "nll_loss": 0.16950182616710663, "rewards/accuracies": 1.0, "rewards/chosen": -1.8988564988831058e-05, "rewards/margins": 0.1997237652540207, "rewards/rejected": -0.19974276423454285, "step": 8344 }, { "epoch": 5.771092669432918, "grad_norm": 6.1564154624938965, "learning_rate": 2.3493929614261566e-05, "log_odds_chosen": 9.40694522857666, "log_odds_ratio": -0.00018372457998339087, "logits/chosen": -0.5075855851173401, "logits/rejected": -0.5839619040489197, "logps/chosen": -0.000857932900544256, "logps/rejected": -1.6731497049331665, "loss": 0.7596, "nll_loss": 0.1898796111345291, "rewards/accuracies": 1.0, "rewards/chosen": -8.579329733038321e-05, "rewards/margins": 0.16722919046878815, "rewards/rejected": -0.16731497645378113, "step": 8345 }, { "epoch": 5.771784232365145, "grad_norm": 13.756538391113281, "learning_rate": 2.3490087597971415e-05, "log_odds_chosen": 10.903304100036621, "log_odds_ratio": -6.926347123226151e-05, "logits/chosen": -0.8468804359436035, "logits/rejected": -0.880153238773346, "logps/chosen": -0.00027108131325803697, "logps/rejected": -2.4493632316589355, "loss": 0.8235, "nll_loss": 0.2058658003807068, "rewards/accuracies": 1.0, "rewards/chosen": -2.7108131689601578e-05, "rewards/margins": 0.24490921199321747, "rewards/rejected": -0.24493633210659027, "step": 8346 }, { "epoch": 5.772475795297372, "grad_norm": 5.244549751281738, "learning_rate": 2.3486245581681268e-05, "log_odds_chosen": 8.939018249511719, "log_odds_ratio": -0.0004187691956758499, "logits/chosen": -0.5194487571716309, "logits/rejected": -0.6112475395202637, "logps/chosen": -0.0007814795826561749, "logps/rejected": -1.6546971797943115, "loss": 0.8936, "nll_loss": 0.2233474999666214, "rewards/accuracies": 1.0, "rewards/chosen": -7.814796117600054e-05, "rewards/margins": 0.16539156436920166, "rewards/rejected": -0.1654697060585022, "step": 8347 }, { "epoch": 5.773167358229599, "grad_norm": 6.745221138000488, "learning_rate": 2.3482403565391117e-05, "log_odds_chosen": 10.619478225708008, "log_odds_ratio": -7.211588672362268e-05, "logits/chosen": -0.386459618806839, "logits/rejected": -0.5498947501182556, "logps/chosen": -0.00022541302314493805, "logps/rejected": -1.8751081228256226, "loss": 0.9235, "nll_loss": 0.23086822032928467, "rewards/accuracies": 1.0, "rewards/chosen": -2.25413004955044e-05, "rewards/margins": 0.18748828768730164, "rewards/rejected": -0.18751080334186554, "step": 8348 }, { "epoch": 5.773858921161826, "grad_norm": 8.773056030273438, "learning_rate": 2.347856154910097e-05, "log_odds_chosen": 11.348367691040039, "log_odds_ratio": -5.642771429847926e-05, "logits/chosen": 0.013702712953090668, "logits/rejected": -0.09357339143753052, "logps/chosen": -0.00038130112807266414, "logps/rejected": -2.4233832359313965, "loss": 1.7573, "nll_loss": 0.43933171033859253, "rewards/accuracies": 1.0, "rewards/chosen": -3.813011062447913e-05, "rewards/margins": 0.24230018258094788, "rewards/rejected": -0.24233832955360413, "step": 8349 }, { "epoch": 5.7745504840940525, "grad_norm": 7.7265143394470215, "learning_rate": 2.347471953281082e-05, "log_odds_chosen": 10.363457679748535, "log_odds_ratio": -0.00023995987430680543, "logits/chosen": -0.232026606798172, "logits/rejected": -0.28352096676826477, "logps/chosen": -0.005336123518645763, "logps/rejected": -2.446638584136963, "loss": 0.6538, "nll_loss": 0.16343380510807037, "rewards/accuracies": 1.0, "rewards/chosen": -0.0005336122703738511, "rewards/margins": 0.24413025379180908, "rewards/rejected": -0.24466384947299957, "step": 8350 }, { "epoch": 5.775242047026279, "grad_norm": 9.928838729858398, "learning_rate": 2.347087751652067e-05, "log_odds_chosen": 9.737348556518555, "log_odds_ratio": -0.00023867032723501325, "logits/chosen": -0.06464526057243347, "logits/rejected": -0.08520027995109558, "logps/chosen": -0.0003639034694060683, "logps/rejected": -1.6743927001953125, "loss": 0.6192, "nll_loss": 0.15477892756462097, "rewards/accuracies": 1.0, "rewards/chosen": -3.639034548541531e-05, "rewards/margins": 0.16740287840366364, "rewards/rejected": -0.167439267039299, "step": 8351 }, { "epoch": 5.775933609958506, "grad_norm": 10.34325885772705, "learning_rate": 2.3467035500230523e-05, "log_odds_chosen": 11.383646011352539, "log_odds_ratio": -2.3865615730755962e-05, "logits/chosen": -0.4863215684890747, "logits/rejected": -0.634797990322113, "logps/chosen": -0.00019501753558870405, "logps/rejected": -2.7335824966430664, "loss": 0.6773, "nll_loss": 0.16931481659412384, "rewards/accuracies": 1.0, "rewards/chosen": -1.9501752831274644e-05, "rewards/margins": 0.2733387351036072, "rewards/rejected": -0.27335822582244873, "step": 8352 }, { "epoch": 5.776625172890733, "grad_norm": 6.895632266998291, "learning_rate": 2.3463193483940375e-05, "log_odds_chosen": 10.131932258605957, "log_odds_ratio": -7.790548261255026e-05, "logits/chosen": -0.478166788816452, "logits/rejected": -0.4578874707221985, "logps/chosen": -0.00029212809749878943, "logps/rejected": -1.9623818397521973, "loss": 0.4387, "nll_loss": 0.10965972393751144, "rewards/accuracies": 1.0, "rewards/chosen": -2.921280793088954e-05, "rewards/margins": 0.19620898365974426, "rewards/rejected": -0.1962381899356842, "step": 8353 }, { "epoch": 5.77731673582296, "grad_norm": 25.984107971191406, "learning_rate": 2.3459351467650224e-05, "log_odds_chosen": 8.111963272094727, "log_odds_ratio": -0.6164579391479492, "logits/chosen": -0.5472182035446167, "logits/rejected": -0.5658654570579529, "logps/chosen": -0.07941090315580368, "logps/rejected": -1.5097594261169434, "loss": 1.2514, "nll_loss": 0.25121480226516724, "rewards/accuracies": 0.875, "rewards/chosen": -0.007941090501844883, "rewards/margins": 0.14303484559059143, "rewards/rejected": -0.15097594261169434, "step": 8354 }, { "epoch": 5.778008298755187, "grad_norm": 7.861454963684082, "learning_rate": 2.3455509451360074e-05, "log_odds_chosen": 11.025238037109375, "log_odds_ratio": -4.6407567424466833e-05, "logits/chosen": -0.7130393981933594, "logits/rejected": -0.7995960712432861, "logps/chosen": -0.0007563336985185742, "logps/rejected": -2.4245080947875977, "loss": 0.5134, "nll_loss": 0.1283499151468277, "rewards/accuracies": 1.0, "rewards/chosen": -7.563337567262352e-05, "rewards/margins": 0.2423751950263977, "rewards/rejected": -0.24245081841945648, "step": 8355 }, { "epoch": 5.7786998616874135, "grad_norm": 6.390862941741943, "learning_rate": 2.3451667435069926e-05, "log_odds_chosen": 9.716100692749023, "log_odds_ratio": -0.00018891274521593004, "logits/chosen": -0.27408623695373535, "logits/rejected": -0.37599632143974304, "logps/chosen": -0.0005538674304261804, "logps/rejected": -1.9498720169067383, "loss": 0.4609, "nll_loss": 0.11519482731819153, "rewards/accuracies": 1.0, "rewards/chosen": -5.538674668059684e-05, "rewards/margins": 0.19493183493614197, "rewards/rejected": -0.1949872076511383, "step": 8356 }, { "epoch": 5.77939142461964, "grad_norm": 7.093240737915039, "learning_rate": 2.3447825418779775e-05, "log_odds_chosen": 9.832173347473145, "log_odds_ratio": -0.00016536461771465838, "logits/chosen": -0.534382700920105, "logits/rejected": -0.6079778075218201, "logps/chosen": -0.00042304108501411974, "logps/rejected": -1.752963662147522, "loss": 0.5897, "nll_loss": 0.14741525053977966, "rewards/accuracies": 1.0, "rewards/chosen": -4.230411286698654e-05, "rewards/margins": 0.17525407671928406, "rewards/rejected": -0.1752963811159134, "step": 8357 }, { "epoch": 5.780082987551867, "grad_norm": 4.863102436065674, "learning_rate": 2.3443983402489627e-05, "log_odds_chosen": 10.297584533691406, "log_odds_ratio": -9.968863741960377e-05, "logits/chosen": -0.21967944502830505, "logits/rejected": -0.31562548875808716, "logps/chosen": -0.00019475305452942848, "logps/rejected": -1.7844955921173096, "loss": 0.44, "nll_loss": 0.10999009013175964, "rewards/accuracies": 1.0, "rewards/chosen": -1.9475304725347087e-05, "rewards/margins": 0.17843008041381836, "rewards/rejected": -0.1784495711326599, "step": 8358 }, { "epoch": 5.780774550484094, "grad_norm": 5.4476752281188965, "learning_rate": 2.344014138619948e-05, "log_odds_chosen": 10.860854148864746, "log_odds_ratio": -3.658882633317262e-05, "logits/chosen": -0.47734540700912476, "logits/rejected": -0.4496386647224426, "logps/chosen": -0.00026548956520855427, "logps/rejected": -2.257863759994507, "loss": 1.2608, "nll_loss": 0.3152031898498535, "rewards/accuracies": 1.0, "rewards/chosen": -2.654895797604695e-05, "rewards/margins": 0.22575980424880981, "rewards/rejected": -0.22578637301921844, "step": 8359 }, { "epoch": 5.781466113416321, "grad_norm": 5.5818705558776855, "learning_rate": 2.343629936990933e-05, "log_odds_chosen": 10.45132064819336, "log_odds_ratio": -0.0004984838888049126, "logits/chosen": -0.627118706703186, "logits/rejected": -0.6624408960342407, "logps/chosen": -0.0015973912086337805, "logps/rejected": -2.520289421081543, "loss": 0.7724, "nll_loss": 0.19305981695652008, "rewards/accuracies": 1.0, "rewards/chosen": -0.00015973913832567632, "rewards/margins": 0.25186920166015625, "rewards/rejected": -0.2520289421081543, "step": 8360 }, { "epoch": 5.782157676348548, "grad_norm": 10.007711410522461, "learning_rate": 2.343245735361918e-05, "log_odds_chosen": 10.941266059875488, "log_odds_ratio": -6.014830432832241e-05, "logits/chosen": -0.5361988544464111, "logits/rejected": -0.5562364459037781, "logps/chosen": -0.0003670321893878281, "logps/rejected": -2.7064778804779053, "loss": 0.6319, "nll_loss": 0.15796193480491638, "rewards/accuracies": 1.0, "rewards/chosen": -3.670322257676162e-05, "rewards/margins": 0.27061107754707336, "rewards/rejected": -0.2706477642059326, "step": 8361 }, { "epoch": 5.782849239280774, "grad_norm": 7.794462203979492, "learning_rate": 2.3428615337329034e-05, "log_odds_chosen": 10.351293563842773, "log_odds_ratio": -0.0006059914594516158, "logits/chosen": -0.6464826464653015, "logits/rejected": -0.7297243475914001, "logps/chosen": -0.00021089179790578783, "logps/rejected": -1.6023046970367432, "loss": 1.1555, "nll_loss": 0.28881415724754333, "rewards/accuracies": 1.0, "rewards/chosen": -2.1089181245770305e-05, "rewards/margins": 0.16020938754081726, "rewards/rejected": -0.16023047268390656, "step": 8362 }, { "epoch": 5.783540802213001, "grad_norm": 6.460762023925781, "learning_rate": 2.3424773321038883e-05, "log_odds_chosen": 10.685081481933594, "log_odds_ratio": -0.00012143061030656099, "logits/chosen": -0.7190501689910889, "logits/rejected": -0.7460399866104126, "logps/chosen": -0.00029704137705266476, "logps/rejected": -2.504964828491211, "loss": 0.4641, "nll_loss": 0.11600157618522644, "rewards/accuracies": 1.0, "rewards/chosen": -2.970413515868131e-05, "rewards/margins": 0.25046679377555847, "rewards/rejected": -0.2504964768886566, "step": 8363 }, { "epoch": 5.784232365145228, "grad_norm": 6.547046184539795, "learning_rate": 2.3420931304748732e-05, "log_odds_chosen": 10.335028648376465, "log_odds_ratio": -0.0002512763603590429, "logits/chosen": -0.5884988903999329, "logits/rejected": -0.6217831373214722, "logps/chosen": -0.0014175032265484333, "logps/rejected": -2.6600277423858643, "loss": 0.9214, "nll_loss": 0.23032043874263763, "rewards/accuracies": 1.0, "rewards/chosen": -0.00014175032265484333, "rewards/margins": 0.26586103439331055, "rewards/rejected": -0.2660028040409088, "step": 8364 }, { "epoch": 5.784923928077455, "grad_norm": 7.295699596405029, "learning_rate": 2.3417089288458584e-05, "log_odds_chosen": 9.605745315551758, "log_odds_ratio": -0.0009946682257577777, "logits/chosen": 0.16253028810024261, "logits/rejected": 0.00862811878323555, "logps/chosen": -0.0010614326456561685, "logps/rejected": -1.5691893100738525, "loss": 0.7776, "nll_loss": 0.19429834187030792, "rewards/accuracies": 1.0, "rewards/chosen": -0.00010614325583446771, "rewards/margins": 0.15681278705596924, "rewards/rejected": -0.1569189429283142, "step": 8365 }, { "epoch": 5.785615491009682, "grad_norm": 11.699624061584473, "learning_rate": 2.3413247272168433e-05, "log_odds_chosen": 9.368026733398438, "log_odds_ratio": -0.0008493943023495376, "logits/chosen": -0.38463887572288513, "logits/rejected": -0.34834763407707214, "logps/chosen": -0.00212243665009737, "logps/rejected": -2.0978848934173584, "loss": 0.7825, "nll_loss": 0.19554492831230164, "rewards/accuracies": 1.0, "rewards/chosen": -0.00021224364172667265, "rewards/margins": 0.20957626402378082, "rewards/rejected": -0.2097885012626648, "step": 8366 }, { "epoch": 5.786307053941909, "grad_norm": 6.0530500411987305, "learning_rate": 2.3409405255878286e-05, "log_odds_chosen": 11.640003204345703, "log_odds_ratio": -2.191989915445447e-05, "logits/chosen": -0.32898950576782227, "logits/rejected": -0.30571579933166504, "logps/chosen": -0.0001186039880849421, "logps/rejected": -2.5280473232269287, "loss": 0.4237, "nll_loss": 0.10592895746231079, "rewards/accuracies": 1.0, "rewards/chosen": -1.186039844469633e-05, "rewards/margins": 0.2527928650379181, "rewards/rejected": -0.2528047263622284, "step": 8367 }, { "epoch": 5.786998616874135, "grad_norm": 5.655771732330322, "learning_rate": 2.340556323958814e-05, "log_odds_chosen": 10.02695369720459, "log_odds_ratio": -0.0017944334540516138, "logits/chosen": -0.2847994267940521, "logits/rejected": -0.4468628466129303, "logps/chosen": -0.0016070208512246609, "logps/rejected": -1.8412233591079712, "loss": 0.6046, "nll_loss": 0.15095919370651245, "rewards/accuracies": 1.0, "rewards/chosen": -0.00016070209676399827, "rewards/margins": 0.1839616298675537, "rewards/rejected": -0.18412232398986816, "step": 8368 }, { "epoch": 5.787690179806362, "grad_norm": 8.640703201293945, "learning_rate": 2.3401721223297987e-05, "log_odds_chosen": 9.762921333312988, "log_odds_ratio": -0.00027630673139356077, "logits/chosen": -0.21005013585090637, "logits/rejected": -0.32674640417099, "logps/chosen": -0.0015161462360993028, "logps/rejected": -1.6819090843200684, "loss": 0.6451, "nll_loss": 0.16124506294727325, "rewards/accuracies": 1.0, "rewards/chosen": -0.00015161462943069637, "rewards/margins": 0.16803930699825287, "rewards/rejected": -0.16819091141223907, "step": 8369 }, { "epoch": 5.788381742738589, "grad_norm": 7.583675384521484, "learning_rate": 2.339787920700784e-05, "log_odds_chosen": 11.343730926513672, "log_odds_ratio": -1.8906674085883424e-05, "logits/chosen": -0.6724814176559448, "logits/rejected": -0.7422870397567749, "logps/chosen": -0.00013959617353975773, "logps/rejected": -2.027846574783325, "loss": 0.4644, "nll_loss": 0.11610476672649384, "rewards/accuracies": 1.0, "rewards/chosen": -1.3959616808278952e-05, "rewards/margins": 0.20277069509029388, "rewards/rejected": -0.20278465747833252, "step": 8370 }, { "epoch": 5.789073305670816, "grad_norm": 9.348284721374512, "learning_rate": 2.3394037190717692e-05, "log_odds_chosen": 9.624040603637695, "log_odds_ratio": -0.0005165559705346823, "logits/chosen": -0.32689768075942993, "logits/rejected": -0.353707492351532, "logps/chosen": -0.00031788769410923123, "logps/rejected": -1.819197654724121, "loss": 0.9245, "nll_loss": 0.23106229305267334, "rewards/accuracies": 1.0, "rewards/chosen": -3.178877159371041e-05, "rewards/margins": 0.18188798427581787, "rewards/rejected": -0.18191976845264435, "step": 8371 }, { "epoch": 5.789764868603043, "grad_norm": 6.802423000335693, "learning_rate": 2.339019517442754e-05, "log_odds_chosen": 10.92711067199707, "log_odds_ratio": -4.961782178725116e-05, "logits/chosen": -0.48822087049484253, "logits/rejected": -0.5076096653938293, "logps/chosen": -0.00014275651483330876, "logps/rejected": -1.920109510421753, "loss": 0.5638, "nll_loss": 0.14093977212905884, "rewards/accuracies": 1.0, "rewards/chosen": -1.4275651665229816e-05, "rewards/margins": 0.19199667870998383, "rewards/rejected": -0.19201095402240753, "step": 8372 }, { "epoch": 5.79045643153527, "grad_norm": 6.595789432525635, "learning_rate": 2.338635315813739e-05, "log_odds_chosen": 10.505535125732422, "log_odds_ratio": -9.082785982172936e-05, "logits/chosen": -0.8474116325378418, "logits/rejected": -0.8347305059432983, "logps/chosen": -0.0002792773593682796, "logps/rejected": -1.8048241138458252, "loss": 0.7605, "nll_loss": 0.19012302160263062, "rewards/accuracies": 1.0, "rewards/chosen": -2.792773739201948e-05, "rewards/margins": 0.18045447766780853, "rewards/rejected": -0.1804824024438858, "step": 8373 }, { "epoch": 5.791147994467496, "grad_norm": 8.353302955627441, "learning_rate": 2.3382511141847243e-05, "log_odds_chosen": 10.95888900756836, "log_odds_ratio": -7.381556497421116e-05, "logits/chosen": -0.043954428285360336, "logits/rejected": -0.19600719213485718, "logps/chosen": -0.0006513033295050263, "logps/rejected": -2.3824691772460938, "loss": 0.7341, "nll_loss": 0.18351265788078308, "rewards/accuracies": 1.0, "rewards/chosen": -6.513033440569416e-05, "rewards/margins": 0.23818179965019226, "rewards/rejected": -0.23824693262577057, "step": 8374 }, { "epoch": 5.791839557399723, "grad_norm": 6.531155586242676, "learning_rate": 2.3378669125557092e-05, "log_odds_chosen": 8.23431396484375, "log_odds_ratio": -0.009316143579781055, "logits/chosen": -0.7907726168632507, "logits/rejected": -0.8477673530578613, "logps/chosen": -0.004934323951601982, "logps/rejected": -1.4430415630340576, "loss": 0.7477, "nll_loss": 0.18599727749824524, "rewards/accuracies": 1.0, "rewards/chosen": -0.0004934323951601982, "rewards/margins": 0.1438107192516327, "rewards/rejected": -0.14430415630340576, "step": 8375 }, { "epoch": 5.79253112033195, "grad_norm": 11.56829833984375, "learning_rate": 2.3374827109266944e-05, "log_odds_chosen": 9.17873764038086, "log_odds_ratio": -0.0017092173220589757, "logits/chosen": -0.5897649526596069, "logits/rejected": -0.669125497341156, "logps/chosen": -0.0021838941611349583, "logps/rejected": -1.4593961238861084, "loss": 0.6345, "nll_loss": 0.1584663838148117, "rewards/accuracies": 1.0, "rewards/chosen": -0.00021838942484464496, "rewards/margins": 0.1457212269306183, "rewards/rejected": -0.14593960344791412, "step": 8376 }, { "epoch": 5.793222683264177, "grad_norm": 10.690628051757812, "learning_rate": 2.3370985092976797e-05, "log_odds_chosen": 11.137996673583984, "log_odds_ratio": -1.9885355868609622e-05, "logits/chosen": -0.21123374998569489, "logits/rejected": -0.23481081426143646, "logps/chosen": -0.00025429227389395237, "logps/rejected": -2.4706366062164307, "loss": 1.1716, "nll_loss": 0.2928870916366577, "rewards/accuracies": 1.0, "rewards/chosen": -2.5429229935980402e-05, "rewards/margins": 0.24703821539878845, "rewards/rejected": -0.24706363677978516, "step": 8377 }, { "epoch": 5.793914246196404, "grad_norm": 5.095751762390137, "learning_rate": 2.3367143076686646e-05, "log_odds_chosen": 10.155561447143555, "log_odds_ratio": -0.0005075469962321222, "logits/chosen": -0.2100965529680252, "logits/rejected": -0.24129444360733032, "logps/chosen": -0.0019794083200395107, "logps/rejected": -2.64514422416687, "loss": 0.7971, "nll_loss": 0.19922536611557007, "rewards/accuracies": 1.0, "rewards/chosen": -0.0001979408407351002, "rewards/margins": 0.26431649923324585, "rewards/rejected": -0.2645144462585449, "step": 8378 }, { "epoch": 5.7946058091286305, "grad_norm": 9.237796783447266, "learning_rate": 2.3363301060396498e-05, "log_odds_chosen": 11.54646110534668, "log_odds_ratio": -3.1261472031474113e-05, "logits/chosen": -0.5789388418197632, "logits/rejected": -0.6498785614967346, "logps/chosen": -0.00018026272300630808, "logps/rejected": -2.9494409561157227, "loss": 1.1371, "nll_loss": 0.28427499532699585, "rewards/accuracies": 1.0, "rewards/chosen": -1.8026272300630808e-05, "rewards/margins": 0.2949260473251343, "rewards/rejected": -0.29494407773017883, "step": 8379 }, { "epoch": 5.795297372060857, "grad_norm": 9.438192367553711, "learning_rate": 2.335945904410635e-05, "log_odds_chosen": 10.218551635742188, "log_odds_ratio": -0.00047488469863310456, "logits/chosen": -0.473160058259964, "logits/rejected": -0.4454137682914734, "logps/chosen": -0.0006715701892971992, "logps/rejected": -1.9091229438781738, "loss": 0.6901, "nll_loss": 0.17248502373695374, "rewards/accuracies": 1.0, "rewards/chosen": -6.715701601933688e-05, "rewards/margins": 0.19084513187408447, "rewards/rejected": -0.19091227650642395, "step": 8380 }, { "epoch": 5.795988934993084, "grad_norm": 6.330174922943115, "learning_rate": 2.33556170278162e-05, "log_odds_chosen": 10.507976531982422, "log_odds_ratio": -5.8574034483172e-05, "logits/chosen": -0.6390625834465027, "logits/rejected": -0.7654403448104858, "logps/chosen": -0.00023662333842366934, "logps/rejected": -1.9224880933761597, "loss": 0.608, "nll_loss": 0.15200158953666687, "rewards/accuracies": 1.0, "rewards/chosen": -2.3662332750973292e-05, "rewards/margins": 0.1922251582145691, "rewards/rejected": -0.19224882125854492, "step": 8381 }, { "epoch": 5.796680497925311, "grad_norm": 10.470353126525879, "learning_rate": 2.335177501152605e-05, "log_odds_chosen": 9.854674339294434, "log_odds_ratio": -9.551268158247694e-05, "logits/chosen": -0.5654609799385071, "logits/rejected": -0.7642248272895813, "logps/chosen": -0.00030991079984232783, "logps/rejected": -1.416645884513855, "loss": 0.8086, "nll_loss": 0.2021281123161316, "rewards/accuracies": 1.0, "rewards/chosen": -3.099108289461583e-05, "rewards/margins": 0.14163358509540558, "rewards/rejected": -0.14166459441184998, "step": 8382 }, { "epoch": 5.797372060857538, "grad_norm": 6.718166351318359, "learning_rate": 2.33479329952359e-05, "log_odds_chosen": 8.857773780822754, "log_odds_ratio": -0.0011921023251488805, "logits/chosen": -0.36666426062583923, "logits/rejected": -0.33660316467285156, "logps/chosen": -0.002264556474983692, "logps/rejected": -1.3710294961929321, "loss": 0.7864, "nll_loss": 0.19647863507270813, "rewards/accuracies": 1.0, "rewards/chosen": -0.0002264556533191353, "rewards/margins": 0.13687649369239807, "rewards/rejected": -0.13710294663906097, "step": 8383 }, { "epoch": 5.798063623789765, "grad_norm": 6.389466285705566, "learning_rate": 2.334409097894575e-05, "log_odds_chosen": 10.509028434753418, "log_odds_ratio": -5.556520045502111e-05, "logits/chosen": -0.4471544027328491, "logits/rejected": -0.5482914447784424, "logps/chosen": -0.0002809996949508786, "logps/rejected": -2.2582454681396484, "loss": 0.5929, "nll_loss": 0.14821313321590424, "rewards/accuracies": 1.0, "rewards/chosen": -2.8099970222683623e-05, "rewards/margins": 0.22579646110534668, "rewards/rejected": -0.22582454979419708, "step": 8384 }, { "epoch": 5.7987551867219915, "grad_norm": 8.72469425201416, "learning_rate": 2.3340248962655603e-05, "log_odds_chosen": 8.631034851074219, "log_odds_ratio": -0.002588872332125902, "logits/chosen": -0.4895836114883423, "logits/rejected": -0.4251309931278229, "logps/chosen": -0.004463810473680496, "logps/rejected": -1.896122694015503, "loss": 1.116, "nll_loss": 0.27873343229293823, "rewards/accuracies": 1.0, "rewards/chosen": -0.0004463810764718801, "rewards/margins": 0.1891658902168274, "rewards/rejected": -0.1896122843027115, "step": 8385 }, { "epoch": 5.799446749654218, "grad_norm": 8.63010025024414, "learning_rate": 2.3336406946365455e-05, "log_odds_chosen": 10.331777572631836, "log_odds_ratio": -6.749943713657558e-05, "logits/chosen": -0.1513824462890625, "logits/rejected": -0.1929640769958496, "logps/chosen": -0.0006063561886548996, "logps/rejected": -1.767856240272522, "loss": 0.5939, "nll_loss": 0.14847230911254883, "rewards/accuracies": 1.0, "rewards/chosen": -6.063562250346877e-05, "rewards/margins": 0.17672500014305115, "rewards/rejected": -0.1767856478691101, "step": 8386 }, { "epoch": 5.800138312586445, "grad_norm": 6.971400737762451, "learning_rate": 2.3332564930075304e-05, "log_odds_chosen": 10.246747970581055, "log_odds_ratio": -0.0002245830837637186, "logits/chosen": -0.6562935709953308, "logits/rejected": -0.7062948942184448, "logps/chosen": -0.0005609599174931645, "logps/rejected": -2.1932871341705322, "loss": 0.92, "nll_loss": 0.22997446358203888, "rewards/accuracies": 1.0, "rewards/chosen": -5.609598883893341e-05, "rewards/margins": 0.21927259862422943, "rewards/rejected": -0.21932871639728546, "step": 8387 }, { "epoch": 5.800829875518672, "grad_norm": 12.3055419921875, "learning_rate": 2.3328722913785157e-05, "log_odds_chosen": 9.929397583007812, "log_odds_ratio": -0.0008865576237440109, "logits/chosen": -0.4087953567504883, "logits/rejected": -0.3798828125, "logps/chosen": -0.0010064990492537618, "logps/rejected": -2.1022868156433105, "loss": 0.7364, "nll_loss": 0.1840183436870575, "rewards/accuracies": 1.0, "rewards/chosen": -0.00010064992238767445, "rewards/margins": 0.2101280391216278, "rewards/rejected": -0.21022869646549225, "step": 8388 }, { "epoch": 5.801521438450899, "grad_norm": 8.019001007080078, "learning_rate": 2.332488089749501e-05, "log_odds_chosen": 10.398614883422852, "log_odds_ratio": -5.484620851348154e-05, "logits/chosen": -0.4774879515171051, "logits/rejected": -0.5907200574874878, "logps/chosen": -0.0003679244837258011, "logps/rejected": -2.316585063934326, "loss": 0.7131, "nll_loss": 0.17826199531555176, "rewards/accuracies": 1.0, "rewards/chosen": -3.679245128296316e-05, "rewards/margins": 0.23162171244621277, "rewards/rejected": -0.23165848851203918, "step": 8389 }, { "epoch": 5.802213001383126, "grad_norm": 6.085483551025391, "learning_rate": 2.3321038881204858e-05, "log_odds_chosen": 11.327494621276855, "log_odds_ratio": -2.2759963030694053e-05, "logits/chosen": -0.22526970505714417, "logits/rejected": -0.23279313743114471, "logps/chosen": -0.00042107899207621813, "logps/rejected": -3.112298011779785, "loss": 0.9084, "nll_loss": 0.22709017992019653, "rewards/accuracies": 1.0, "rewards/chosen": -4.210790211800486e-05, "rewards/margins": 0.3111876845359802, "rewards/rejected": -0.3112298250198364, "step": 8390 }, { "epoch": 5.8029045643153525, "grad_norm": 6.497158527374268, "learning_rate": 2.3317196864914707e-05, "log_odds_chosen": 9.713647842407227, "log_odds_ratio": -0.0035168626345694065, "logits/chosen": -0.6375054121017456, "logits/rejected": -0.6635434031486511, "logps/chosen": -0.0017270109383389354, "logps/rejected": -1.250663161277771, "loss": 0.9453, "nll_loss": 0.23597703874111176, "rewards/accuracies": 1.0, "rewards/chosen": -0.0001727011112961918, "rewards/margins": 0.12489361315965652, "rewards/rejected": -0.12506631016731262, "step": 8391 }, { "epoch": 5.803596127247579, "grad_norm": 8.719135284423828, "learning_rate": 2.331335484862456e-05, "log_odds_chosen": 10.795459747314453, "log_odds_ratio": -4.9957008741330355e-05, "logits/chosen": 0.13198542594909668, "logits/rejected": -0.0006924569606781006, "logps/chosen": -0.0006249711732380092, "logps/rejected": -2.9145002365112305, "loss": 0.7778, "nll_loss": 0.19443432986736298, "rewards/accuracies": 1.0, "rewards/chosen": -6.249711441341788e-05, "rewards/margins": 0.29138755798339844, "rewards/rejected": -0.29145002365112305, "step": 8392 }, { "epoch": 5.804287690179806, "grad_norm": 7.647108554840088, "learning_rate": 2.330951283233441e-05, "log_odds_chosen": 10.523514747619629, "log_odds_ratio": -7.726266630925238e-05, "logits/chosen": -0.607114851474762, "logits/rejected": -0.6458864808082581, "logps/chosen": -0.00021323721739463508, "logps/rejected": -2.1598801612854004, "loss": 0.5307, "nll_loss": 0.13266785442829132, "rewards/accuracies": 1.0, "rewards/chosen": -2.1323721739463508e-05, "rewards/margins": 0.21596670150756836, "rewards/rejected": -0.21598801016807556, "step": 8393 }, { "epoch": 5.804979253112033, "grad_norm": 9.371009826660156, "learning_rate": 2.330567081604426e-05, "log_odds_chosen": 11.301060676574707, "log_odds_ratio": -4.154463022132404e-05, "logits/chosen": -0.39531388878822327, "logits/rejected": -0.5437592267990112, "logps/chosen": -0.00044395128497853875, "logps/rejected": -2.742654323577881, "loss": 0.7487, "nll_loss": 0.18716083467006683, "rewards/accuracies": 1.0, "rewards/chosen": -4.439512485987507e-05, "rewards/margins": 0.2742210626602173, "rewards/rejected": -0.27426543831825256, "step": 8394 }, { "epoch": 5.80567081604426, "grad_norm": 6.178443908691406, "learning_rate": 2.330182879975411e-05, "log_odds_chosen": 9.948263168334961, "log_odds_ratio": -0.0005975314998067915, "logits/chosen": -0.48438113927841187, "logits/rejected": -0.4877588152885437, "logps/chosen": -0.001075923559255898, "logps/rejected": -2.2288899421691895, "loss": 0.8015, "nll_loss": 0.20030581951141357, "rewards/accuracies": 1.0, "rewards/chosen": -0.0001075923428288661, "rewards/margins": 0.22278141975402832, "rewards/rejected": -0.2228890061378479, "step": 8395 }, { "epoch": 5.806362378976487, "grad_norm": 6.832706928253174, "learning_rate": 2.3297986783463963e-05, "log_odds_chosen": 8.992963790893555, "log_odds_ratio": -0.001389929442666471, "logits/chosen": -0.5586123466491699, "logits/rejected": -0.6410993337631226, "logps/chosen": -0.007598080672323704, "logps/rejected": -2.2602124214172363, "loss": 1.4837, "nll_loss": 0.3707854151725769, "rewards/accuracies": 1.0, "rewards/chosen": -0.0007598081137984991, "rewards/margins": 0.22526142001152039, "rewards/rejected": -0.22602123022079468, "step": 8396 }, { "epoch": 5.8070539419087135, "grad_norm": 6.229443073272705, "learning_rate": 2.3294144767173815e-05, "log_odds_chosen": 10.032766342163086, "log_odds_ratio": -0.00092526082880795, "logits/chosen": -0.6621145606040955, "logits/rejected": -0.7454342246055603, "logps/chosen": -0.011643901467323303, "logps/rejected": -2.058879852294922, "loss": 0.7335, "nll_loss": 0.18328972160816193, "rewards/accuracies": 1.0, "rewards/chosen": -0.0011643902398645878, "rewards/margins": 0.20472361147403717, "rewards/rejected": -0.20588800311088562, "step": 8397 }, { "epoch": 5.80774550484094, "grad_norm": 7.834509372711182, "learning_rate": 2.3290302750883664e-05, "log_odds_chosen": 10.701813697814941, "log_odds_ratio": -0.0012211342109367251, "logits/chosen": -0.31548330187797546, "logits/rejected": -0.366915225982666, "logps/chosen": -0.003109922166913748, "logps/rejected": -2.5836844444274902, "loss": 0.5284, "nll_loss": 0.13197240233421326, "rewards/accuracies": 1.0, "rewards/chosen": -0.0003109922108706087, "rewards/margins": 0.25805744528770447, "rewards/rejected": -0.25836843252182007, "step": 8398 }, { "epoch": 5.808437067773167, "grad_norm": 8.876883506774902, "learning_rate": 2.3286460734593517e-05, "log_odds_chosen": 9.514881134033203, "log_odds_ratio": -0.021531281992793083, "logits/chosen": -0.385174036026001, "logits/rejected": -0.4180731177330017, "logps/chosen": -0.0053784530609846115, "logps/rejected": -1.6292874813079834, "loss": 0.555, "nll_loss": 0.13659876585006714, "rewards/accuracies": 1.0, "rewards/chosen": -0.000537845422513783, "rewards/margins": 0.16239090263843536, "rewards/rejected": -0.1629287600517273, "step": 8399 }, { "epoch": 5.809128630705394, "grad_norm": 7.484838962554932, "learning_rate": 2.3282618718303366e-05, "log_odds_chosen": 10.81995964050293, "log_odds_ratio": -3.512339389999397e-05, "logits/chosen": -0.687035083770752, "logits/rejected": -0.7899414300918579, "logps/chosen": -0.00020911633328069001, "logps/rejected": -2.0662503242492676, "loss": 0.6707, "nll_loss": 0.16767863929271698, "rewards/accuracies": 1.0, "rewards/chosen": -2.0911633328069e-05, "rewards/margins": 0.20660412311553955, "rewards/rejected": -0.2066250443458557, "step": 8400 }, { "epoch": 5.809820193637621, "grad_norm": 18.49502182006836, "learning_rate": 2.3278776702013215e-05, "log_odds_chosen": 9.66842269897461, "log_odds_ratio": -0.2323729246854782, "logits/chosen": -0.5169225335121155, "logits/rejected": -0.574622392654419, "logps/chosen": -0.025232313200831413, "logps/rejected": -1.8740395307540894, "loss": 1.1176, "nll_loss": 0.2561640739440918, "rewards/accuracies": 0.875, "rewards/chosen": -0.0025232313200831413, "rewards/margins": 0.18488073348999023, "rewards/rejected": -0.18740394711494446, "step": 8401 }, { "epoch": 5.810511756569848, "grad_norm": 9.298235893249512, "learning_rate": 2.3274934685723067e-05, "log_odds_chosen": 10.226746559143066, "log_odds_ratio": -0.00023452314781025052, "logits/chosen": -0.4971470236778259, "logits/rejected": -0.48891282081604004, "logps/chosen": -0.0006599174812436104, "logps/rejected": -1.9210774898529053, "loss": 0.6072, "nll_loss": 0.15176746249198914, "rewards/accuracies": 1.0, "rewards/chosen": -6.599174957955256e-05, "rewards/margins": 0.19204173982143402, "rewards/rejected": -0.19210773706436157, "step": 8402 }, { "epoch": 5.8112033195020745, "grad_norm": 6.863306045532227, "learning_rate": 2.327109266943292e-05, "log_odds_chosen": 9.36366081237793, "log_odds_ratio": -0.000508427619934082, "logits/chosen": -0.5794307589530945, "logits/rejected": -0.6291723251342773, "logps/chosen": -0.0017660510493442416, "logps/rejected": -1.9207453727722168, "loss": 1.1281, "nll_loss": 0.281978964805603, "rewards/accuracies": 1.0, "rewards/chosen": -0.00017660509911365807, "rewards/margins": 0.1918979436159134, "rewards/rejected": -0.19207455217838287, "step": 8403 }, { "epoch": 5.811894882434301, "grad_norm": 9.127928733825684, "learning_rate": 2.326725065314277e-05, "log_odds_chosen": 11.82565689086914, "log_odds_ratio": -1.8272336092195474e-05, "logits/chosen": -0.3629467189311981, "logits/rejected": -0.43086594343185425, "logps/chosen": -0.00016442629566881806, "logps/rejected": -2.7578787803649902, "loss": 0.6727, "nll_loss": 0.16816404461860657, "rewards/accuracies": 1.0, "rewards/chosen": -1.644263102207333e-05, "rewards/margins": 0.27577143907546997, "rewards/rejected": -0.275787889957428, "step": 8404 }, { "epoch": 5.812586445366528, "grad_norm": 14.229921340942383, "learning_rate": 2.326340863685262e-05, "log_odds_chosen": 10.126533508300781, "log_odds_ratio": -8.251607505371794e-05, "logits/chosen": -0.7604619264602661, "logits/rejected": -0.8193801641464233, "logps/chosen": -0.00019167909340467304, "logps/rejected": -1.3991739749908447, "loss": 0.6961, "nll_loss": 0.1740119755268097, "rewards/accuracies": 1.0, "rewards/chosen": -1.9167910068063065e-05, "rewards/margins": 0.13989822566509247, "rewards/rejected": -0.13991738855838776, "step": 8405 }, { "epoch": 5.813278008298755, "grad_norm": 7.9956374168396, "learning_rate": 2.3259566620562474e-05, "log_odds_chosen": 8.889430046081543, "log_odds_ratio": -0.04909144714474678, "logits/chosen": -0.560508131980896, "logits/rejected": -0.6312997341156006, "logps/chosen": -0.010527387261390686, "logps/rejected": -1.4551292657852173, "loss": 1.1541, "nll_loss": 0.28362512588500977, "rewards/accuracies": 1.0, "rewards/chosen": -0.0010527388658374548, "rewards/margins": 0.14446018636226654, "rewards/rejected": -0.1455129235982895, "step": 8406 }, { "epoch": 5.813969571230982, "grad_norm": 8.152767181396484, "learning_rate": 2.3255724604272323e-05, "log_odds_chosen": 10.798035621643066, "log_odds_ratio": -4.214973887428641e-05, "logits/chosen": -0.13847284018993378, "logits/rejected": -0.19017915427684784, "logps/chosen": -0.00013657697127200663, "logps/rejected": -1.9986261129379272, "loss": 0.6238, "nll_loss": 0.1559559553861618, "rewards/accuracies": 1.0, "rewards/chosen": -1.3657696399604902e-05, "rewards/margins": 0.1998489648103714, "rewards/rejected": -0.19986259937286377, "step": 8407 }, { "epoch": 5.814661134163209, "grad_norm": 5.268293380737305, "learning_rate": 2.3251882587982175e-05, "log_odds_chosen": 10.488828659057617, "log_odds_ratio": -0.00019341889128554612, "logits/chosen": -0.3686334192752838, "logits/rejected": -0.41108012199401855, "logps/chosen": -0.0010957105550915003, "logps/rejected": -2.32481050491333, "loss": 0.7747, "nll_loss": 0.19365090131759644, "rewards/accuracies": 1.0, "rewards/chosen": -0.0001095710540539585, "rewards/margins": 0.2323714792728424, "rewards/rejected": -0.23248103260993958, "step": 8408 }, { "epoch": 5.8153526970954355, "grad_norm": 8.308907508850098, "learning_rate": 2.3248040571692024e-05, "log_odds_chosen": 8.83240032196045, "log_odds_ratio": -0.0071347374469041824, "logits/chosen": -0.38757187128067017, "logits/rejected": -0.47507134079933167, "logps/chosen": -0.009843516163527966, "logps/rejected": -1.6178112030029297, "loss": 0.8248, "nll_loss": 0.20549696683883667, "rewards/accuracies": 1.0, "rewards/chosen": -0.0009843516163527966, "rewards/margins": 0.16079676151275635, "rewards/rejected": -0.16178111732006073, "step": 8409 }, { "epoch": 5.816044260027662, "grad_norm": 7.885484218597412, "learning_rate": 2.3244198555401873e-05, "log_odds_chosen": 10.769173622131348, "log_odds_ratio": -5.721321213059127e-05, "logits/chosen": -0.28124937415122986, "logits/rejected": -0.4066743552684784, "logps/chosen": -0.00032837747130542994, "logps/rejected": -1.9929379224777222, "loss": 0.648, "nll_loss": 0.1619967371225357, "rewards/accuracies": 1.0, "rewards/chosen": -3.283774640294723e-05, "rewards/margins": 0.19926095008850098, "rewards/rejected": -0.19929379224777222, "step": 8410 }, { "epoch": 5.816735822959889, "grad_norm": 6.666642665863037, "learning_rate": 2.3240356539111726e-05, "log_odds_chosen": 10.48222827911377, "log_odds_ratio": -0.0004556652274914086, "logits/chosen": -0.1470799446105957, "logits/rejected": -0.25690242648124695, "logps/chosen": -0.0005777716287411749, "logps/rejected": -2.4178152084350586, "loss": 0.7361, "nll_loss": 0.18398535251617432, "rewards/accuracies": 1.0, "rewards/chosen": -5.7777167967287824e-05, "rewards/margins": 0.24172374606132507, "rewards/rejected": -0.24178150296211243, "step": 8411 }, { "epoch": 5.817427385892116, "grad_norm": 6.081725597381592, "learning_rate": 2.3236514522821578e-05, "log_odds_chosen": 10.020877838134766, "log_odds_ratio": -0.0009894585236907005, "logits/chosen": -0.49546316266059875, "logits/rejected": -0.48959237337112427, "logps/chosen": -0.0033304309472441673, "logps/rejected": -1.8964627981185913, "loss": 0.5553, "nll_loss": 0.13872453570365906, "rewards/accuracies": 1.0, "rewards/chosen": -0.0003330431063659489, "rewards/margins": 0.18931323289871216, "rewards/rejected": -0.18964628875255585, "step": 8412 }, { "epoch": 5.818118948824343, "grad_norm": 5.866261959075928, "learning_rate": 2.3232672506531427e-05, "log_odds_chosen": 10.42142391204834, "log_odds_ratio": -6.498560833279043e-05, "logits/chosen": -0.15770825743675232, "logits/rejected": -0.13760429620742798, "logps/chosen": -0.0002847444557119161, "logps/rejected": -1.9210240840911865, "loss": 0.6556, "nll_loss": 0.16390159726142883, "rewards/accuracies": 1.0, "rewards/chosen": -2.8474447390181012e-05, "rewards/margins": 0.19207392632961273, "rewards/rejected": -0.19210243225097656, "step": 8413 }, { "epoch": 5.81881051175657, "grad_norm": 5.274764060974121, "learning_rate": 2.322883049024128e-05, "log_odds_chosen": 10.923952102661133, "log_odds_ratio": -0.00017226138152182102, "logits/chosen": -0.13174067437648773, "logits/rejected": -0.2675361633300781, "logps/chosen": -0.00036066051688976586, "logps/rejected": -2.3805484771728516, "loss": 0.7038, "nll_loss": 0.17594024538993835, "rewards/accuracies": 1.0, "rewards/chosen": -3.606605241657235e-05, "rewards/margins": 0.23801881074905396, "rewards/rejected": -0.23805485665798187, "step": 8414 }, { "epoch": 5.819502074688796, "grad_norm": 11.084059715270996, "learning_rate": 2.3224988473951132e-05, "log_odds_chosen": 9.231254577636719, "log_odds_ratio": -0.0006525219068862498, "logits/chosen": -0.06093751639127731, "logits/rejected": -0.19099873304367065, "logps/chosen": -0.001553456182591617, "logps/rejected": -1.725694179534912, "loss": 1.0024, "nll_loss": 0.2505434453487396, "rewards/accuracies": 1.0, "rewards/chosen": -0.00015534560952801257, "rewards/margins": 0.17241407930850983, "rewards/rejected": -0.1725694239139557, "step": 8415 }, { "epoch": 5.820193637621023, "grad_norm": 7.154449462890625, "learning_rate": 2.322114645766098e-05, "log_odds_chosen": 10.379135131835938, "log_odds_ratio": -9.017589763971046e-05, "logits/chosen": -0.49093595147132874, "logits/rejected": -0.5859791040420532, "logps/chosen": -0.0006343638524413109, "logps/rejected": -2.3570706844329834, "loss": 0.6112, "nll_loss": 0.1527821123600006, "rewards/accuracies": 1.0, "rewards/chosen": -6.343638233374804e-05, "rewards/margins": 0.2356436401605606, "rewards/rejected": -0.23570707440376282, "step": 8416 }, { "epoch": 5.82088520055325, "grad_norm": 6.823001861572266, "learning_rate": 2.3217304441370834e-05, "log_odds_chosen": 10.937118530273438, "log_odds_ratio": -3.171800926793367e-05, "logits/chosen": -0.024569761008024216, "logits/rejected": -0.06036039814352989, "logps/chosen": -0.00010773000394692644, "logps/rejected": -1.862146258354187, "loss": 0.5502, "nll_loss": 0.137550950050354, "rewards/accuracies": 1.0, "rewards/chosen": -1.0773001122288406e-05, "rewards/margins": 0.18620386719703674, "rewards/rejected": -0.1862146258354187, "step": 8417 }, { "epoch": 5.821576763485477, "grad_norm": 10.990660667419434, "learning_rate": 2.3213462425080683e-05, "log_odds_chosen": 10.83702278137207, "log_odds_ratio": -0.0001108981596189551, "logits/chosen": -0.432903528213501, "logits/rejected": -0.4672655761241913, "logps/chosen": -0.00029598127002827823, "logps/rejected": -2.406498432159424, "loss": 0.793, "nll_loss": 0.19824771583080292, "rewards/accuracies": 1.0, "rewards/chosen": -2.9598128094221465e-05, "rewards/margins": 0.24062024056911469, "rewards/rejected": -0.24064984917640686, "step": 8418 }, { "epoch": 5.822268326417704, "grad_norm": 8.261746406555176, "learning_rate": 2.3209620408790535e-05, "log_odds_chosen": 10.909168243408203, "log_odds_ratio": -4.1487171984044835e-05, "logits/chosen": -0.3953424394130707, "logits/rejected": -0.41297727823257446, "logps/chosen": -0.0001901828363770619, "logps/rejected": -1.8639588356018066, "loss": 0.481, "nll_loss": 0.1202574297785759, "rewards/accuracies": 1.0, "rewards/chosen": -1.901828363770619e-05, "rewards/margins": 0.18637686967849731, "rewards/rejected": -0.18639588356018066, "step": 8419 }, { "epoch": 5.822959889349931, "grad_norm": 6.4954142570495605, "learning_rate": 2.3205778392500384e-05, "log_odds_chosen": 11.166680335998535, "log_odds_ratio": -0.00025789288338273764, "logits/chosen": -0.6210584044456482, "logits/rejected": -0.7248342633247375, "logps/chosen": -0.00025950977578759193, "logps/rejected": -2.8732190132141113, "loss": 0.7263, "nll_loss": 0.18155349791049957, "rewards/accuracies": 1.0, "rewards/chosen": -2.595098158053588e-05, "rewards/margins": 0.287295937538147, "rewards/rejected": -0.28732186555862427, "step": 8420 }, { "epoch": 5.823651452282157, "grad_norm": 10.566604614257812, "learning_rate": 2.3201936376210237e-05, "log_odds_chosen": 11.431151390075684, "log_odds_ratio": -1.713056735752616e-05, "logits/chosen": -0.5161327123641968, "logits/rejected": -0.5762264728546143, "logps/chosen": -0.00018493003153707832, "logps/rejected": -2.3237714767456055, "loss": 0.6122, "nll_loss": 0.15303756296634674, "rewards/accuracies": 1.0, "rewards/chosen": -1.849300133471843e-05, "rewards/margins": 0.2323586344718933, "rewards/rejected": -0.23237714171409607, "step": 8421 }, { "epoch": 5.824343015214384, "grad_norm": 7.167019844055176, "learning_rate": 2.3198094359920086e-05, "log_odds_chosen": 11.25281047821045, "log_odds_ratio": -2.5544428353896365e-05, "logits/chosen": -0.38715946674346924, "logits/rejected": -0.45937803387641907, "logps/chosen": -0.00012793140194844455, "logps/rejected": -2.1478474140167236, "loss": 0.7055, "nll_loss": 0.1763816773891449, "rewards/accuracies": 1.0, "rewards/chosen": -1.2793139831046574e-05, "rewards/margins": 0.21477195620536804, "rewards/rejected": -0.21478475630283356, "step": 8422 }, { "epoch": 5.825034578146611, "grad_norm": 8.895520210266113, "learning_rate": 2.3194252343629938e-05, "log_odds_chosen": 10.265355110168457, "log_odds_ratio": -0.00013882019266020507, "logits/chosen": -0.25306057929992676, "logits/rejected": -0.3418146073818207, "logps/chosen": -0.0015197532484307885, "logps/rejected": -2.4995551109313965, "loss": 0.8472, "nll_loss": 0.21178866922855377, "rewards/accuracies": 1.0, "rewards/chosen": -0.0001519753277534619, "rewards/margins": 0.2498035430908203, "rewards/rejected": -0.24995550513267517, "step": 8423 }, { "epoch": 5.825726141078838, "grad_norm": 9.67374038696289, "learning_rate": 2.319041032733979e-05, "log_odds_chosen": 11.97836685180664, "log_odds_ratio": -1.1517046914377715e-05, "logits/chosen": -0.3377426862716675, "logits/rejected": -0.48516958951950073, "logps/chosen": -0.0001647754106670618, "logps/rejected": -2.926840305328369, "loss": 0.885, "nll_loss": 0.2212529331445694, "rewards/accuracies": 1.0, "rewards/chosen": -1.647754106670618e-05, "rewards/margins": 0.2926675081253052, "rewards/rejected": -0.29268401861190796, "step": 8424 }, { "epoch": 5.826417704011065, "grad_norm": 3.828767776489258, "learning_rate": 2.318656831104964e-05, "log_odds_chosen": 10.650819778442383, "log_odds_ratio": -0.00011364149395376444, "logits/chosen": -0.7182785272598267, "logits/rejected": -0.7033266425132751, "logps/chosen": -0.0006292496109381318, "logps/rejected": -2.3739876747131348, "loss": 0.8316, "nll_loss": 0.207891583442688, "rewards/accuracies": 1.0, "rewards/chosen": -6.29249625490047e-05, "rewards/margins": 0.23733583092689514, "rewards/rejected": -0.23739874362945557, "step": 8425 }, { "epoch": 5.827109266943292, "grad_norm": 8.507972717285156, "learning_rate": 2.3182726294759492e-05, "log_odds_chosen": 10.733532905578613, "log_odds_ratio": -4.510982762440108e-05, "logits/chosen": -0.49858397245407104, "logits/rejected": -0.5925998687744141, "logps/chosen": -0.005823322106152773, "logps/rejected": -2.722111701965332, "loss": 1.0338, "nll_loss": 0.25843608379364014, "rewards/accuracies": 1.0, "rewards/chosen": -0.000582332257181406, "rewards/margins": 0.27162885665893555, "rewards/rejected": -0.2722111940383911, "step": 8426 }, { "epoch": 5.827800829875518, "grad_norm": 9.715168952941895, "learning_rate": 2.3178884278469344e-05, "log_odds_chosen": 10.892751693725586, "log_odds_ratio": -2.8781119908671826e-05, "logits/chosen": -0.36615926027297974, "logits/rejected": -0.417987585067749, "logps/chosen": -0.0003425172180868685, "logps/rejected": -2.6365199089050293, "loss": 0.6392, "nll_loss": 0.15979956090450287, "rewards/accuracies": 1.0, "rewards/chosen": -3.4251723263878375e-05, "rewards/margins": 0.26361775398254395, "rewards/rejected": -0.2636519968509674, "step": 8427 }, { "epoch": 5.828492392807745, "grad_norm": 12.601354598999023, "learning_rate": 2.3175042262179193e-05, "log_odds_chosen": 10.926346778869629, "log_odds_ratio": -3.957270018872805e-05, "logits/chosen": -0.5197044610977173, "logits/rejected": -0.5732940435409546, "logps/chosen": -0.0004308591887820512, "logps/rejected": -2.6111674308776855, "loss": 0.7898, "nll_loss": 0.1974467784166336, "rewards/accuracies": 1.0, "rewards/chosen": -4.3085921788588166e-05, "rewards/margins": 0.26107367873191833, "rewards/rejected": -0.26111674308776855, "step": 8428 }, { "epoch": 5.829183955739972, "grad_norm": 12.75105094909668, "learning_rate": 2.3171200245889043e-05, "log_odds_chosen": 10.6773681640625, "log_odds_ratio": -4.829808676731773e-05, "logits/chosen": -0.23225167393684387, "logits/rejected": -0.31541967391967773, "logps/chosen": -0.00018530743545852602, "logps/rejected": -2.179605484008789, "loss": 0.775, "nll_loss": 0.19375410676002502, "rewards/accuracies": 1.0, "rewards/chosen": -1.85307435458526e-05, "rewards/margins": 0.21794205904006958, "rewards/rejected": -0.21796056628227234, "step": 8429 }, { "epoch": 5.829875518672199, "grad_norm": 7.408595561981201, "learning_rate": 2.3167358229598895e-05, "log_odds_chosen": 9.79969596862793, "log_odds_ratio": -0.0001708334602881223, "logits/chosen": -0.47640442848205566, "logits/rejected": -0.5898884534835815, "logps/chosen": -0.0005523377330973744, "logps/rejected": -2.0489134788513184, "loss": 0.8691, "nll_loss": 0.21726754307746887, "rewards/accuracies": 1.0, "rewards/chosen": -5.523378058569506e-05, "rewards/margins": 0.20483613014221191, "rewards/rejected": -0.2048913538455963, "step": 8430 }, { "epoch": 5.830567081604426, "grad_norm": 7.56455135345459, "learning_rate": 2.3163516213308744e-05, "log_odds_chosen": 10.20605182647705, "log_odds_ratio": -6.57220371067524e-05, "logits/chosen": -0.8905206918716431, "logits/rejected": -0.9065886735916138, "logps/chosen": -0.00035831076093018055, "logps/rejected": -1.6489109992980957, "loss": 0.7195, "nll_loss": 0.17987284064292908, "rewards/accuracies": 1.0, "rewards/chosen": -3.5831071727443486e-05, "rewards/margins": 0.16485527157783508, "rewards/rejected": -0.1648910939693451, "step": 8431 }, { "epoch": 5.8312586445366525, "grad_norm": 6.3840012550354, "learning_rate": 2.3159674197018596e-05, "log_odds_chosen": 9.805391311645508, "log_odds_ratio": -0.0001165514113381505, "logits/chosen": -0.5324653387069702, "logits/rejected": -0.4614141583442688, "logps/chosen": -0.0006516007706522942, "logps/rejected": -1.4721413850784302, "loss": 0.9642, "nll_loss": 0.24104109406471252, "rewards/accuracies": 1.0, "rewards/chosen": -6.516007124446332e-05, "rewards/margins": 0.1471489667892456, "rewards/rejected": -0.1472141444683075, "step": 8432 }, { "epoch": 5.831950207468879, "grad_norm": 8.392349243164062, "learning_rate": 2.315583218072845e-05, "log_odds_chosen": 9.441329956054688, "log_odds_ratio": -0.0006476533599197865, "logits/chosen": -0.6239942908287048, "logits/rejected": -0.6620289087295532, "logps/chosen": -0.0011929383035749197, "logps/rejected": -2.1252613067626953, "loss": 0.8785, "nll_loss": 0.21956561505794525, "rewards/accuracies": 1.0, "rewards/chosen": -0.00011929382890230045, "rewards/margins": 0.21240685880184174, "rewards/rejected": -0.2125261425971985, "step": 8433 }, { "epoch": 5.832641770401106, "grad_norm": 7.291143894195557, "learning_rate": 2.3151990164438298e-05, "log_odds_chosen": 10.459549903869629, "log_odds_ratio": -8.78508435562253e-05, "logits/chosen": -0.43349847197532654, "logits/rejected": -0.5388727784156799, "logps/chosen": -0.0004004551446996629, "logps/rejected": -2.0080487728118896, "loss": 0.7219, "nll_loss": 0.18047361075878143, "rewards/accuracies": 1.0, "rewards/chosen": -4.004551374237053e-05, "rewards/margins": 0.20076483488082886, "rewards/rejected": -0.20080485939979553, "step": 8434 }, { "epoch": 5.833333333333333, "grad_norm": 5.595971584320068, "learning_rate": 2.314814814814815e-05, "log_odds_chosen": 11.08672046661377, "log_odds_ratio": -2.201294591941405e-05, "logits/chosen": -0.39860230684280396, "logits/rejected": -0.45757389068603516, "logps/chosen": -9.895279799820855e-05, "logps/rejected": -1.784722924232483, "loss": 0.5515, "nll_loss": 0.1378733068704605, "rewards/accuracies": 1.0, "rewards/chosen": -9.895280527416617e-06, "rewards/margins": 0.17846240103244781, "rewards/rejected": -0.17847228050231934, "step": 8435 }, { "epoch": 5.83402489626556, "grad_norm": 5.454908847808838, "learning_rate": 2.3144306131858003e-05, "log_odds_chosen": 9.911537170410156, "log_odds_ratio": -0.0003854777605738491, "logits/chosen": -0.33123037219047546, "logits/rejected": -0.4306354224681854, "logps/chosen": -0.0004738108837045729, "logps/rejected": -1.5103071928024292, "loss": 0.6818, "nll_loss": 0.17041829228401184, "rewards/accuracies": 1.0, "rewards/chosen": -4.738109419122338e-05, "rewards/margins": 0.15098334848880768, "rewards/rejected": -0.15103071928024292, "step": 8436 }, { "epoch": 5.834716459197787, "grad_norm": 6.055264472961426, "learning_rate": 2.3140464115567852e-05, "log_odds_chosen": 9.830744743347168, "log_odds_ratio": -0.00022968296252656728, "logits/chosen": -0.39797988533973694, "logits/rejected": -0.44242775440216064, "logps/chosen": -0.0002832105674315244, "logps/rejected": -1.6076643466949463, "loss": 0.5417, "nll_loss": 0.1353919506072998, "rewards/accuracies": 1.0, "rewards/chosen": -2.8321059289737605e-05, "rewards/margins": 0.16073811054229736, "rewards/rejected": -0.16076642274856567, "step": 8437 }, { "epoch": 5.8354080221300135, "grad_norm": 6.931327819824219, "learning_rate": 2.31366220992777e-05, "log_odds_chosen": 10.32223892211914, "log_odds_ratio": -0.0001639589318074286, "logits/chosen": -0.196787491440773, "logits/rejected": -0.25322869420051575, "logps/chosen": -0.00022969470592215657, "logps/rejected": -1.8739651441574097, "loss": 0.7697, "nll_loss": 0.19240155816078186, "rewards/accuracies": 1.0, "rewards/chosen": -2.2969470592215657e-05, "rewards/margins": 0.187373548746109, "rewards/rejected": -0.18739651143550873, "step": 8438 }, { "epoch": 5.83609958506224, "grad_norm": 7.6645588874816895, "learning_rate": 2.3132780082987553e-05, "log_odds_chosen": 10.39122200012207, "log_odds_ratio": -0.00011932729103136808, "logits/chosen": -0.17069807648658752, "logits/rejected": -0.25050681829452515, "logps/chosen": -0.0007452214485965669, "logps/rejected": -2.2647957801818848, "loss": 1.1396, "nll_loss": 0.2848907709121704, "rewards/accuracies": 1.0, "rewards/chosen": -7.452214049408212e-05, "rewards/margins": 0.2264050841331482, "rewards/rejected": -0.22647960484027863, "step": 8439 }, { "epoch": 5.836791147994467, "grad_norm": 8.279850959777832, "learning_rate": 2.3128938066697402e-05, "log_odds_chosen": 10.228134155273438, "log_odds_ratio": -6.32415249128826e-05, "logits/chosen": -0.4481656551361084, "logits/rejected": -0.5319783687591553, "logps/chosen": -0.0004316547710914165, "logps/rejected": -1.857574701309204, "loss": 0.592, "nll_loss": 0.14799726009368896, "rewards/accuracies": 1.0, "rewards/chosen": -4.316547710914165e-05, "rewards/margins": 0.18571430444717407, "rewards/rejected": -0.18575747311115265, "step": 8440 }, { "epoch": 5.837482710926694, "grad_norm": 6.741942405700684, "learning_rate": 2.3125096050407255e-05, "log_odds_chosen": 10.6365327835083, "log_odds_ratio": -4.813177292817272e-05, "logits/chosen": -0.09993970394134521, "logits/rejected": -0.25238558650016785, "logps/chosen": -0.00024143440532498062, "logps/rejected": -2.303591728210449, "loss": 0.8639, "nll_loss": 0.21596278250217438, "rewards/accuracies": 1.0, "rewards/chosen": -2.414343907730654e-05, "rewards/margins": 0.2303350269794464, "rewards/rejected": -0.23035918176174164, "step": 8441 }, { "epoch": 5.838174273858921, "grad_norm": 7.0618181228637695, "learning_rate": 2.3121254034117107e-05, "log_odds_chosen": 11.856544494628906, "log_odds_ratio": -1.502203394920798e-05, "logits/chosen": -0.34077247977256775, "logits/rejected": -0.3906419277191162, "logps/chosen": -0.00016831861285027117, "logps/rejected": -2.935901165008545, "loss": 0.7305, "nll_loss": 0.18261991441249847, "rewards/accuracies": 1.0, "rewards/chosen": -1.6831862012622878e-05, "rewards/margins": 0.2935732901096344, "rewards/rejected": -0.29359012842178345, "step": 8442 }, { "epoch": 5.838865836791148, "grad_norm": 6.268857002258301, "learning_rate": 2.3117412017826956e-05, "log_odds_chosen": 11.079063415527344, "log_odds_ratio": -0.00011699595052050427, "logits/chosen": 0.002030055969953537, "logits/rejected": -0.13568267226219177, "logps/chosen": -0.00037257670192047954, "logps/rejected": -2.717937707901001, "loss": 0.651, "nll_loss": 0.16273711621761322, "rewards/accuracies": 1.0, "rewards/chosen": -3.725767237483524e-05, "rewards/margins": 0.27175652980804443, "rewards/rejected": -0.27179378271102905, "step": 8443 }, { "epoch": 5.8395573997233745, "grad_norm": 6.871936321258545, "learning_rate": 2.311357000153681e-05, "log_odds_chosen": 10.119974136352539, "log_odds_ratio": -0.00012443287414498627, "logits/chosen": -0.23293182253837585, "logits/rejected": -0.2058962881565094, "logps/chosen": -0.0004133795737288892, "logps/rejected": -2.0336642265319824, "loss": 0.8331, "nll_loss": 0.2082645148038864, "rewards/accuracies": 1.0, "rewards/chosen": -4.133796028327197e-05, "rewards/margins": 0.203325092792511, "rewards/rejected": -0.20336642861366272, "step": 8444 }, { "epoch": 5.840248962655601, "grad_norm": 6.8992719650268555, "learning_rate": 2.310972798524666e-05, "log_odds_chosen": 9.559707641601562, "log_odds_ratio": -0.0006719698430970311, "logits/chosen": -0.2077520489692688, "logits/rejected": -0.28626370429992676, "logps/chosen": -0.0007084297831170261, "logps/rejected": -1.8092963695526123, "loss": 1.4427, "nll_loss": 0.3606107532978058, "rewards/accuracies": 1.0, "rewards/chosen": -7.084297976689413e-05, "rewards/margins": 0.1808588206768036, "rewards/rejected": -0.18092964589595795, "step": 8445 }, { "epoch": 5.840940525587828, "grad_norm": 6.642013072967529, "learning_rate": 2.310588596895651e-05, "log_odds_chosen": 11.043392181396484, "log_odds_ratio": -4.813253690372221e-05, "logits/chosen": -0.478056937456131, "logits/rejected": -0.43620866537094116, "logps/chosen": -0.00011894687486346811, "logps/rejected": -1.8128962516784668, "loss": 1.0769, "nll_loss": 0.2692229747772217, "rewards/accuracies": 1.0, "rewards/chosen": -1.1894688213942572e-05, "rewards/margins": 0.18127773702144623, "rewards/rejected": -0.1812896430492401, "step": 8446 }, { "epoch": 5.841632088520055, "grad_norm": 8.432222366333008, "learning_rate": 2.310204395266636e-05, "log_odds_chosen": 9.543682098388672, "log_odds_ratio": -0.00022593011090066284, "logits/chosen": -0.5476033687591553, "logits/rejected": -0.4643722176551819, "logps/chosen": -0.00032802880741655827, "logps/rejected": -1.440566897392273, "loss": 0.7975, "nll_loss": 0.1993521898984909, "rewards/accuracies": 1.0, "rewards/chosen": -3.2802880014060065e-05, "rewards/margins": 0.14402389526367188, "rewards/rejected": -0.14405669271945953, "step": 8447 }, { "epoch": 5.842323651452282, "grad_norm": 9.15583610534668, "learning_rate": 2.3098201936376212e-05, "log_odds_chosen": 10.55355453491211, "log_odds_ratio": -4.6456440031761304e-05, "logits/chosen": -0.14537620544433594, "logits/rejected": -0.12754163146018982, "logps/chosen": -0.0002685963409021497, "logps/rejected": -1.9353593587875366, "loss": 0.6313, "nll_loss": 0.1578291952610016, "rewards/accuracies": 1.0, "rewards/chosen": -2.6859634090214968e-05, "rewards/margins": 0.19350910186767578, "rewards/rejected": -0.1935359537601471, "step": 8448 }, { "epoch": 5.843015214384509, "grad_norm": 7.750082492828369, "learning_rate": 2.309435992008606e-05, "log_odds_chosen": 10.369003295898438, "log_odds_ratio": -9.04241023818031e-05, "logits/chosen": -0.43339040875434875, "logits/rejected": -0.41327208280563354, "logps/chosen": -0.0021025664173066616, "logps/rejected": -2.554117441177368, "loss": 0.5103, "nll_loss": 0.1275673806667328, "rewards/accuracies": 1.0, "rewards/chosen": -0.00021025663590990007, "rewards/margins": 0.2552014887332916, "rewards/rejected": -0.2554117441177368, "step": 8449 }, { "epoch": 5.8437067773167355, "grad_norm": 8.610663414001465, "learning_rate": 2.3090517903795913e-05, "log_odds_chosen": 10.283554077148438, "log_odds_ratio": -6.271598249441013e-05, "logits/chosen": -0.20354630053043365, "logits/rejected": -0.2865172028541565, "logps/chosen": -0.0012952906545251608, "logps/rejected": -2.354818105697632, "loss": 0.953, "nll_loss": 0.23823662102222443, "rewards/accuracies": 1.0, "rewards/chosen": -0.00012952905672136694, "rewards/margins": 0.2353522777557373, "rewards/rejected": -0.23548182845115662, "step": 8450 }, { "epoch": 5.844398340248962, "grad_norm": 14.888447761535645, "learning_rate": 2.3086675887505766e-05, "log_odds_chosen": 10.123775482177734, "log_odds_ratio": -0.00026565161533653736, "logits/chosen": -0.7436781525611877, "logits/rejected": -0.876754879951477, "logps/chosen": -0.0005735972663387656, "logps/rejected": -2.3147449493408203, "loss": 1.1257, "nll_loss": 0.2813999056816101, "rewards/accuracies": 1.0, "rewards/chosen": -5.735972081311047e-05, "rewards/margins": 0.23141713440418243, "rewards/rejected": -0.23147448897361755, "step": 8451 }, { "epoch": 5.845089903181189, "grad_norm": 9.175409317016602, "learning_rate": 2.3082833871215615e-05, "log_odds_chosen": 10.095325469970703, "log_odds_ratio": -0.002730314154177904, "logits/chosen": -0.44304752349853516, "logits/rejected": -0.567151665687561, "logps/chosen": -0.021072175353765488, "logps/rejected": -2.3713250160217285, "loss": 0.7359, "nll_loss": 0.18370933830738068, "rewards/accuracies": 1.0, "rewards/chosen": -0.002107217675074935, "rewards/margins": 0.2350253015756607, "rewards/rejected": -0.23713251948356628, "step": 8452 }, { "epoch": 5.845781466113416, "grad_norm": 9.673622131347656, "learning_rate": 2.3078991854925467e-05, "log_odds_chosen": 10.642251014709473, "log_odds_ratio": -9.646148100728169e-05, "logits/chosen": -0.7856646776199341, "logits/rejected": -0.8123069405555725, "logps/chosen": -0.000291735224891454, "logps/rejected": -2.3011388778686523, "loss": 0.8692, "nll_loss": 0.21730202436447144, "rewards/accuracies": 1.0, "rewards/chosen": -2.917352321674116e-05, "rewards/margins": 0.23008470237255096, "rewards/rejected": -0.23011387884616852, "step": 8453 }, { "epoch": 5.846473029045643, "grad_norm": 8.762003898620605, "learning_rate": 2.307514983863532e-05, "log_odds_chosen": 10.296168327331543, "log_odds_ratio": -0.00019660551333799958, "logits/chosen": -0.41896605491638184, "logits/rejected": -0.4700847268104553, "logps/chosen": -0.00015691027510911226, "logps/rejected": -1.820425271987915, "loss": 0.8769, "nll_loss": 0.21919672191143036, "rewards/accuracies": 1.0, "rewards/chosen": -1.5691028238506988e-05, "rewards/margins": 0.18202683329582214, "rewards/rejected": -0.18204253911972046, "step": 8454 }, { "epoch": 5.84716459197787, "grad_norm": 4.631357669830322, "learning_rate": 2.307130782234517e-05, "log_odds_chosen": 9.847299575805664, "log_odds_ratio": -0.0001940663787536323, "logits/chosen": -0.48244351148605347, "logits/rejected": -0.5233024954795837, "logps/chosen": -0.0012303382391110063, "logps/rejected": -1.9997535943984985, "loss": 0.7227, "nll_loss": 0.18065957725048065, "rewards/accuracies": 1.0, "rewards/chosen": -0.00012303382391110063, "rewards/margins": 0.1998523324728012, "rewards/rejected": -0.1999753713607788, "step": 8455 }, { "epoch": 5.8478561549100965, "grad_norm": 5.259738445281982, "learning_rate": 2.3067465806055018e-05, "log_odds_chosen": 11.067523956298828, "log_odds_ratio": -8.703301136847585e-05, "logits/chosen": -0.2891809940338135, "logits/rejected": -0.3176378309726715, "logps/chosen": -0.00046851314255036414, "logps/rejected": -3.0254478454589844, "loss": 0.7031, "nll_loss": 0.1757761538028717, "rewards/accuracies": 1.0, "rewards/chosen": -4.685131716541946e-05, "rewards/margins": 0.302497923374176, "rewards/rejected": -0.30254480242729187, "step": 8456 }, { "epoch": 5.848547717842323, "grad_norm": 5.415672779083252, "learning_rate": 2.306362378976487e-05, "log_odds_chosen": 10.416397094726562, "log_odds_ratio": -8.140011050272733e-05, "logits/chosen": -0.6477643847465515, "logits/rejected": -0.7383682727813721, "logps/chosen": -0.00015809826436452568, "logps/rejected": -1.8434655666351318, "loss": 0.6798, "nll_loss": 0.16995397210121155, "rewards/accuracies": 1.0, "rewards/chosen": -1.5809826436452568e-05, "rewards/margins": 0.1843307614326477, "rewards/rejected": -0.18434655666351318, "step": 8457 }, { "epoch": 5.84923928077455, "grad_norm": 9.015101432800293, "learning_rate": 2.305978177347472e-05, "log_odds_chosen": 9.226083755493164, "log_odds_ratio": -0.048988040536642075, "logits/chosen": -0.7032791376113892, "logits/rejected": -0.7187217473983765, "logps/chosen": -0.01049938052892685, "logps/rejected": -1.548127293586731, "loss": 1.0571, "nll_loss": 0.2593871057033539, "rewards/accuracies": 1.0, "rewards/chosen": -0.0010499380296096206, "rewards/margins": 0.1537627875804901, "rewards/rejected": -0.15481272339820862, "step": 8458 }, { "epoch": 5.849930843706777, "grad_norm": 6.82561731338501, "learning_rate": 2.3055939757184572e-05, "log_odds_chosen": 10.352500915527344, "log_odds_ratio": -0.00015991966938599944, "logits/chosen": -0.7856279015541077, "logits/rejected": -0.8276427388191223, "logps/chosen": -0.0003398554399609566, "logps/rejected": -2.0386099815368652, "loss": 0.9899, "nll_loss": 0.2474551498889923, "rewards/accuracies": 1.0, "rewards/chosen": -3.398553963052109e-05, "rewards/margins": 0.20382700860500336, "rewards/rejected": -0.20386099815368652, "step": 8459 }, { "epoch": 5.850622406639004, "grad_norm": 5.151954650878906, "learning_rate": 2.3052097740894424e-05, "log_odds_chosen": 10.417545318603516, "log_odds_ratio": -0.0009327520965598524, "logits/chosen": -0.5495296120643616, "logits/rejected": -0.5445981025695801, "logps/chosen": -0.0010982422390952706, "logps/rejected": -2.3092572689056396, "loss": 0.718, "nll_loss": 0.1794186234474182, "rewards/accuracies": 1.0, "rewards/chosen": -0.00010982423555105925, "rewards/margins": 0.23081590235233307, "rewards/rejected": -0.23092572391033173, "step": 8460 }, { "epoch": 5.851313969571231, "grad_norm": 8.871502876281738, "learning_rate": 2.3048255724604273e-05, "log_odds_chosen": 11.5017728805542, "log_odds_ratio": -1.7553564248373732e-05, "logits/chosen": -0.6545240879058838, "logits/rejected": -0.648597776889801, "logps/chosen": -0.000455363595392555, "logps/rejected": -2.6390507221221924, "loss": 0.5919, "nll_loss": 0.14797112345695496, "rewards/accuracies": 1.0, "rewards/chosen": -4.553635881165974e-05, "rewards/margins": 0.2638595402240753, "rewards/rejected": -0.2639050781726837, "step": 8461 }, { "epoch": 5.8520055325034575, "grad_norm": 13.104969024658203, "learning_rate": 2.3044413708314126e-05, "log_odds_chosen": 9.82265567779541, "log_odds_ratio": -0.0008504237048327923, "logits/chosen": -0.2843659520149231, "logits/rejected": -0.33131715655326843, "logps/chosen": -0.0010565564734861255, "logps/rejected": -1.8024723529815674, "loss": 0.7141, "nll_loss": 0.17844170331954956, "rewards/accuracies": 1.0, "rewards/chosen": -0.00010565565753495321, "rewards/margins": 0.1801415979862213, "rewards/rejected": -0.1802472323179245, "step": 8462 }, { "epoch": 5.852697095435684, "grad_norm": 7.004133701324463, "learning_rate": 2.3040571692023975e-05, "log_odds_chosen": 10.034337997436523, "log_odds_ratio": -4.633550997823477e-05, "logits/chosen": -0.47206172347068787, "logits/rejected": -0.4350772500038147, "logps/chosen": -0.00018470632494427264, "logps/rejected": -1.5162315368652344, "loss": 0.535, "nll_loss": 0.13374532759189606, "rewards/accuracies": 1.0, "rewards/chosen": -1.847063140303362e-05, "rewards/margins": 0.15160468220710754, "rewards/rejected": -0.15162315964698792, "step": 8463 }, { "epoch": 5.853388658367911, "grad_norm": 4.642582893371582, "learning_rate": 2.3036729675733827e-05, "log_odds_chosen": 9.964178085327148, "log_odds_ratio": -0.0001939919893629849, "logits/chosen": -0.43271350860595703, "logits/rejected": -0.43284517526626587, "logps/chosen": -0.0008543849689885974, "logps/rejected": -1.6528716087341309, "loss": 0.3916, "nll_loss": 0.09787382185459137, "rewards/accuracies": 1.0, "rewards/chosen": -8.543848525732756e-05, "rewards/margins": 0.16520172357559204, "rewards/rejected": -0.16528716683387756, "step": 8464 }, { "epoch": 5.854080221300138, "grad_norm": 7.169305801391602, "learning_rate": 2.3032887659443676e-05, "log_odds_chosen": 11.115387916564941, "log_odds_ratio": -2.8890877729281783e-05, "logits/chosen": -0.10302860289812088, "logits/rejected": -0.27272582054138184, "logps/chosen": -0.00022147822892293334, "logps/rejected": -2.375821590423584, "loss": 0.8242, "nll_loss": 0.20604780316352844, "rewards/accuracies": 1.0, "rewards/chosen": -2.2147822164697573e-05, "rewards/margins": 0.23756001889705658, "rewards/rejected": -0.23758217692375183, "step": 8465 }, { "epoch": 5.854771784232365, "grad_norm": 11.38037395477295, "learning_rate": 2.3029045643153525e-05, "log_odds_chosen": 11.165085792541504, "log_odds_ratio": -4.8637983127264306e-05, "logits/chosen": -0.23498114943504333, "logits/rejected": -0.31281086802482605, "logps/chosen": -0.00021896889666095376, "logps/rejected": -2.051191806793213, "loss": 0.7301, "nll_loss": 0.18253003060817719, "rewards/accuracies": 1.0, "rewards/chosen": -2.1896890757489018e-05, "rewards/margins": 0.2050972878932953, "rewards/rejected": -0.20511919260025024, "step": 8466 }, { "epoch": 5.855463347164592, "grad_norm": 7.457212924957275, "learning_rate": 2.3025203626863378e-05, "log_odds_chosen": 10.323047637939453, "log_odds_ratio": -0.00025923640350811183, "logits/chosen": -0.6115937829017639, "logits/rejected": -0.5882790684700012, "logps/chosen": -0.0003199709753971547, "logps/rejected": -2.251497745513916, "loss": 0.7397, "nll_loss": 0.18489199876785278, "rewards/accuracies": 1.0, "rewards/chosen": -3.1997100450098515e-05, "rewards/margins": 0.2251177728176117, "rewards/rejected": -0.22514976561069489, "step": 8467 }, { "epoch": 5.856154910096818, "grad_norm": 15.208779335021973, "learning_rate": 2.302136161057323e-05, "log_odds_chosen": 11.957347869873047, "log_odds_ratio": -8.310971679748036e-06, "logits/chosen": -0.6097729206085205, "logits/rejected": -0.7365176677703857, "logps/chosen": -7.011348498053849e-05, "logps/rejected": -2.17049503326416, "loss": 0.7336, "nll_loss": 0.1834029257297516, "rewards/accuracies": 1.0, "rewards/chosen": -7.01134922564961e-06, "rewards/margins": 0.21704250574111938, "rewards/rejected": -0.2170495092868805, "step": 8468 }, { "epoch": 5.856846473029045, "grad_norm": 7.276225566864014, "learning_rate": 2.301751959428308e-05, "log_odds_chosen": 10.8690824508667, "log_odds_ratio": -0.00017169021884910762, "logits/chosen": -0.36788827180862427, "logits/rejected": -0.4121812880039215, "logps/chosen": -0.0006079672602936625, "logps/rejected": -2.42014741897583, "loss": 0.6196, "nll_loss": 0.1548839658498764, "rewards/accuracies": 1.0, "rewards/chosen": -6.079672311898321e-05, "rewards/margins": 0.24195393919944763, "rewards/rejected": -0.24201476573944092, "step": 8469 }, { "epoch": 5.857538035961272, "grad_norm": 7.84228515625, "learning_rate": 2.301367757799293e-05, "log_odds_chosen": 8.987767219543457, "log_odds_ratio": -0.0005815964541397989, "logits/chosen": -0.4216848611831665, "logits/rejected": -0.46568408608436584, "logps/chosen": -0.000269006792223081, "logps/rejected": -1.2155593633651733, "loss": 0.7729, "nll_loss": 0.19317123293876648, "rewards/accuracies": 1.0, "rewards/chosen": -2.6900681405095384e-05, "rewards/margins": 0.12152904272079468, "rewards/rejected": -0.12155595421791077, "step": 8470 }, { "epoch": 5.858229598893499, "grad_norm": 11.462625503540039, "learning_rate": 2.3009835561702784e-05, "log_odds_chosen": 9.116348266601562, "log_odds_ratio": -0.0001660315610934049, "logits/chosen": -0.7824064493179321, "logits/rejected": -0.8699042201042175, "logps/chosen": -0.00036246480885893106, "logps/rejected": -1.1887366771697998, "loss": 0.7694, "nll_loss": 0.19234436750411987, "rewards/accuracies": 1.0, "rewards/chosen": -3.624648161348887e-05, "rewards/margins": 0.11883742362260818, "rewards/rejected": -0.11887366324663162, "step": 8471 }, { "epoch": 5.858921161825726, "grad_norm": 7.8624067306518555, "learning_rate": 2.3005993545412633e-05, "log_odds_chosen": 9.661598205566406, "log_odds_ratio": -0.00036091357469558716, "logits/chosen": -0.0834280401468277, "logits/rejected": 0.008592572063207626, "logps/chosen": -0.0011003809049725533, "logps/rejected": -2.2244157791137695, "loss": 0.8687, "nll_loss": 0.21713170409202576, "rewards/accuracies": 1.0, "rewards/chosen": -0.00011003809777321294, "rewards/margins": 0.22233150899410248, "rewards/rejected": -0.22244155406951904, "step": 8472 }, { "epoch": 5.8596127247579535, "grad_norm": 5.414401054382324, "learning_rate": 2.3002151529122486e-05, "log_odds_chosen": 10.450469970703125, "log_odds_ratio": -0.0006526052602566779, "logits/chosen": -0.48692911863327026, "logits/rejected": -0.5583130121231079, "logps/chosen": -0.0008224752964451909, "logps/rejected": -2.2898762226104736, "loss": 0.5195, "nll_loss": 0.12981073558330536, "rewards/accuracies": 1.0, "rewards/chosen": -8.224751945817843e-05, "rewards/margins": 0.22890537977218628, "rewards/rejected": -0.22898761928081512, "step": 8473 }, { "epoch": 5.86030428769018, "grad_norm": 4.766345500946045, "learning_rate": 2.2998309512832335e-05, "log_odds_chosen": 10.06740951538086, "log_odds_ratio": -0.00011042284313589334, "logits/chosen": -0.5878250598907471, "logits/rejected": -0.6204366087913513, "logps/chosen": -0.00036290791467763484, "logps/rejected": -1.7573728561401367, "loss": 0.6145, "nll_loss": 0.1536146104335785, "rewards/accuracies": 1.0, "rewards/chosen": -3.629079583333805e-05, "rewards/margins": 0.17570099234580994, "rewards/rejected": -0.17573729157447815, "step": 8474 }, { "epoch": 5.860995850622407, "grad_norm": 7.62743616104126, "learning_rate": 2.2994467496542184e-05, "log_odds_chosen": 10.28510856628418, "log_odds_ratio": -0.00019185362907592207, "logits/chosen": -0.22833538055419922, "logits/rejected": -0.2660457491874695, "logps/chosen": -0.0003476017154753208, "logps/rejected": -1.9011214971542358, "loss": 0.6462, "nll_loss": 0.16153304278850555, "rewards/accuracies": 1.0, "rewards/chosen": -3.476017445791513e-05, "rewards/margins": 0.19007739424705505, "rewards/rejected": -0.1901121437549591, "step": 8475 }, { "epoch": 5.861687413554634, "grad_norm": 10.680320739746094, "learning_rate": 2.2990625480252036e-05, "log_odds_chosen": 11.026237487792969, "log_odds_ratio": -0.0001369381498079747, "logits/chosen": -0.4240252375602722, "logits/rejected": -0.43849751353263855, "logps/chosen": -0.000934754207264632, "logps/rejected": -2.4850332736968994, "loss": 0.6348, "nll_loss": 0.15868297219276428, "rewards/accuracies": 1.0, "rewards/chosen": -9.347541345050558e-05, "rewards/margins": 0.24840986728668213, "rewards/rejected": -0.24850334227085114, "step": 8476 }, { "epoch": 5.862378976486861, "grad_norm": 5.56638765335083, "learning_rate": 2.298678346396189e-05, "log_odds_chosen": 10.3011474609375, "log_odds_ratio": -0.00010999199002981186, "logits/chosen": -0.46072691679000854, "logits/rejected": -0.5030690431594849, "logps/chosen": -0.00021592274424619973, "logps/rejected": -1.7911337614059448, "loss": 0.6528, "nll_loss": 0.1632012128829956, "rewards/accuracies": 1.0, "rewards/chosen": -2.1592275516013615e-05, "rewards/margins": 0.17909179627895355, "rewards/rejected": -0.17911338806152344, "step": 8477 }, { "epoch": 5.863070539419088, "grad_norm": 6.392146587371826, "learning_rate": 2.2982941447671738e-05, "log_odds_chosen": 8.475443840026855, "log_odds_ratio": -0.000854872923810035, "logits/chosen": -0.28882694244384766, "logits/rejected": -0.32335835695266724, "logps/chosen": -0.0007878682808950543, "logps/rejected": -1.0920076370239258, "loss": 0.6953, "nll_loss": 0.1737397462129593, "rewards/accuracies": 1.0, "rewards/chosen": -7.878682663431391e-05, "rewards/margins": 0.10912197828292847, "rewards/rejected": -0.10920076072216034, "step": 8478 }, { "epoch": 5.8637621023513145, "grad_norm": 9.73110294342041, "learning_rate": 2.297909943138159e-05, "log_odds_chosen": 9.462718963623047, "log_odds_ratio": -0.00021635452867485583, "logits/chosen": -0.6231610774993896, "logits/rejected": -0.7080822587013245, "logps/chosen": -0.0003653976018540561, "logps/rejected": -1.4073103666305542, "loss": 0.6395, "nll_loss": 0.15986159443855286, "rewards/accuracies": 1.0, "rewards/chosen": -3.6539760913001373e-05, "rewards/margins": 0.1406944990158081, "rewards/rejected": -0.14073103666305542, "step": 8479 }, { "epoch": 5.864453665283541, "grad_norm": 7.435009479522705, "learning_rate": 2.2975257415091443e-05, "log_odds_chosen": 9.775884628295898, "log_odds_ratio": -0.0010301289148628712, "logits/chosen": -0.3054084777832031, "logits/rejected": -0.41487932205200195, "logps/chosen": -0.0004547960124909878, "logps/rejected": -1.4178364276885986, "loss": 0.7897, "nll_loss": 0.19732072949409485, "rewards/accuracies": 1.0, "rewards/chosen": -4.54796027042903e-05, "rewards/margins": 0.141738161444664, "rewards/rejected": -0.14178363978862762, "step": 8480 }, { "epoch": 5.865145228215768, "grad_norm": 8.806510925292969, "learning_rate": 2.297141539880129e-05, "log_odds_chosen": 10.327197074890137, "log_odds_ratio": -0.00014382670633494854, "logits/chosen": -0.3441201150417328, "logits/rejected": -0.33835649490356445, "logps/chosen": -0.00020382092043291777, "logps/rejected": -1.5859711170196533, "loss": 0.8675, "nll_loss": 0.21685351431369781, "rewards/accuracies": 1.0, "rewards/chosen": -2.0382089132908732e-05, "rewards/margins": 0.15857672691345215, "rewards/rejected": -0.15859711170196533, "step": 8481 }, { "epoch": 5.865836791147995, "grad_norm": 7.617472171783447, "learning_rate": 2.2967573382511144e-05, "log_odds_chosen": 10.687834739685059, "log_odds_ratio": -6.997709715506062e-05, "logits/chosen": -0.09275079518556595, "logits/rejected": -0.2638387680053711, "logps/chosen": -0.00020644580945372581, "logps/rejected": -1.818613052368164, "loss": 0.6018, "nll_loss": 0.15043112635612488, "rewards/accuracies": 1.0, "rewards/chosen": -2.0644582036766224e-05, "rewards/margins": 0.1818406581878662, "rewards/rejected": -0.1818612813949585, "step": 8482 }, { "epoch": 5.866528354080222, "grad_norm": 10.971071243286133, "learning_rate": 2.2963731366220993e-05, "log_odds_chosen": 9.51954460144043, "log_odds_ratio": -0.0009347121231257915, "logits/chosen": -0.49512526392936707, "logits/rejected": -0.43952494859695435, "logps/chosen": -0.0011396382469683886, "logps/rejected": -1.9709765911102295, "loss": 0.83, "nll_loss": 0.2073979377746582, "rewards/accuracies": 1.0, "rewards/chosen": -0.00011396382615203038, "rewards/margins": 0.1969837099313736, "rewards/rejected": -0.19709765911102295, "step": 8483 }, { "epoch": 5.867219917012449, "grad_norm": 14.948137283325195, "learning_rate": 2.2959889349930842e-05, "log_odds_chosen": 10.836645126342773, "log_odds_ratio": -4.2490642954362556e-05, "logits/chosen": -0.5271219611167908, "logits/rejected": -0.587436318397522, "logps/chosen": -0.00012515847629401833, "logps/rejected": -1.9035587310791016, "loss": 0.8411, "nll_loss": 0.21027767658233643, "rewards/accuracies": 1.0, "rewards/chosen": -1.2515848538896535e-05, "rewards/margins": 0.19034337997436523, "rewards/rejected": -0.19035589694976807, "step": 8484 }, { "epoch": 5.867911479944675, "grad_norm": 13.141921043395996, "learning_rate": 2.2956047333640695e-05, "log_odds_chosen": 11.41067886352539, "log_odds_ratio": -5.523320214706473e-05, "logits/chosen": -1.1916627883911133, "logits/rejected": -1.240216612815857, "logps/chosen": -0.00011970465129707009, "logps/rejected": -2.4381299018859863, "loss": 0.7998, "nll_loss": 0.1999463587999344, "rewards/accuracies": 1.0, "rewards/chosen": -1.197046549350489e-05, "rewards/margins": 0.2438010424375534, "rewards/rejected": -0.24381300806999207, "step": 8485 }, { "epoch": 5.868603042876902, "grad_norm": 5.228637218475342, "learning_rate": 2.2952205317350547e-05, "log_odds_chosen": 8.226043701171875, "log_odds_ratio": -0.0019224716816097498, "logits/chosen": -0.5227464437484741, "logits/rejected": -0.5976300239562988, "logps/chosen": -0.0015107663348317146, "logps/rejected": -1.1950490474700928, "loss": 0.6394, "nll_loss": 0.15966679155826569, "rewards/accuracies": 1.0, "rewards/chosen": -0.00015107664512470365, "rewards/margins": 0.11935383081436157, "rewards/rejected": -0.1195048987865448, "step": 8486 }, { "epoch": 5.869294605809129, "grad_norm": 5.083609104156494, "learning_rate": 2.2948363301060396e-05, "log_odds_chosen": 10.229864120483398, "log_odds_ratio": -6.585160735994577e-05, "logits/chosen": -0.5051541328430176, "logits/rejected": -0.5046700835227966, "logps/chosen": -0.0003552216512616724, "logps/rejected": -1.795076608657837, "loss": 0.5168, "nll_loss": 0.1291866898536682, "rewards/accuracies": 1.0, "rewards/chosen": -3.552216730895452e-05, "rewards/margins": 0.17947213351726532, "rewards/rejected": -0.17950765788555145, "step": 8487 }, { "epoch": 5.869986168741356, "grad_norm": 5.455687999725342, "learning_rate": 2.294452128477025e-05, "log_odds_chosen": 10.526873588562012, "log_odds_ratio": -0.0004474801244214177, "logits/chosen": -0.5565546751022339, "logits/rejected": -0.6399465799331665, "logps/chosen": -0.0018108977237716317, "logps/rejected": -2.827514886856079, "loss": 0.9766, "nll_loss": 0.24409957230091095, "rewards/accuracies": 1.0, "rewards/chosen": -0.00018108976655639708, "rewards/margins": 0.28257042169570923, "rewards/rejected": -0.28275150060653687, "step": 8488 }, { "epoch": 5.870677731673583, "grad_norm": 6.853804111480713, "learning_rate": 2.29406792684801e-05, "log_odds_chosen": 10.368824005126953, "log_odds_ratio": -5.3320181905291975e-05, "logits/chosen": -0.259101927280426, "logits/rejected": -0.3078291714191437, "logps/chosen": -0.0004884085501544178, "logps/rejected": -2.398629665374756, "loss": 0.6442, "nll_loss": 0.16105058789253235, "rewards/accuracies": 1.0, "rewards/chosen": -4.8840855015441775e-05, "rewards/margins": 0.23981411755084991, "rewards/rejected": -0.23986297845840454, "step": 8489 }, { "epoch": 5.87136929460581, "grad_norm": 7.719622611999512, "learning_rate": 2.293683725218995e-05, "log_odds_chosen": 10.203547477722168, "log_odds_ratio": -0.0001927811244968325, "logits/chosen": -0.5728868842124939, "logits/rejected": -0.5559225082397461, "logps/chosen": -0.0007595854112878442, "logps/rejected": -2.2591919898986816, "loss": 0.7431, "nll_loss": 0.18574857711791992, "rewards/accuracies": 1.0, "rewards/chosen": -7.595854549435899e-05, "rewards/margins": 0.22584323585033417, "rewards/rejected": -0.2259191870689392, "step": 8490 }, { "epoch": 5.872060857538036, "grad_norm": 10.853219985961914, "learning_rate": 2.2932995235899803e-05, "log_odds_chosen": 10.449564933776855, "log_odds_ratio": -8.658177102915943e-05, "logits/chosen": -0.4404873549938202, "logits/rejected": -0.49861884117126465, "logps/chosen": -0.0005348151898942888, "logps/rejected": -1.8154845237731934, "loss": 1.0555, "nll_loss": 0.263855516910553, "rewards/accuracies": 1.0, "rewards/chosen": -5.3481522627407685e-05, "rewards/margins": 0.18149496614933014, "rewards/rejected": -0.18154844641685486, "step": 8491 }, { "epoch": 5.872752420470263, "grad_norm": 10.383162498474121, "learning_rate": 2.292915321960965e-05, "log_odds_chosen": 9.47683048248291, "log_odds_ratio": -0.0003712968318723142, "logits/chosen": -0.6891586780548096, "logits/rejected": -0.7439630627632141, "logps/chosen": -0.0007887138053774834, "logps/rejected": -2.0626349449157715, "loss": 0.996, "nll_loss": 0.24896374344825745, "rewards/accuracies": 1.0, "rewards/chosen": -7.887138781370595e-05, "rewards/margins": 0.20618462562561035, "rewards/rejected": -0.2062634974718094, "step": 8492 }, { "epoch": 5.87344398340249, "grad_norm": 5.884782314300537, "learning_rate": 2.29253112033195e-05, "log_odds_chosen": 10.168281555175781, "log_odds_ratio": -0.0001475010794820264, "logits/chosen": -0.8397963047027588, "logits/rejected": -0.9277424216270447, "logps/chosen": -0.0025121637154370546, "logps/rejected": -2.0754339694976807, "loss": 0.5356, "nll_loss": 0.1338898241519928, "rewards/accuracies": 1.0, "rewards/chosen": -0.00025121637736447155, "rewards/margins": 0.20729219913482666, "rewards/rejected": -0.20754341781139374, "step": 8493 }, { "epoch": 5.874135546334717, "grad_norm": 7.1727213859558105, "learning_rate": 2.2921469187029353e-05, "log_odds_chosen": 11.261905670166016, "log_odds_ratio": -2.215427957708016e-05, "logits/chosen": -0.2568546533584595, "logits/rejected": -0.34491297602653503, "logps/chosen": -6.840811693109572e-05, "logps/rejected": -1.8215889930725098, "loss": 0.5514, "nll_loss": 0.1378553807735443, "rewards/accuracies": 1.0, "rewards/chosen": -6.840811693109572e-06, "rewards/margins": 0.18215206265449524, "rewards/rejected": -0.1821589171886444, "step": 8494 }, { "epoch": 5.874827109266944, "grad_norm": 8.279706954956055, "learning_rate": 2.2917627170739206e-05, "log_odds_chosen": 10.762760162353516, "log_odds_ratio": -0.00012183685612399131, "logits/chosen": -0.5639219284057617, "logits/rejected": -0.5642800331115723, "logps/chosen": -0.00018160277977585793, "logps/rejected": -2.2311253547668457, "loss": 0.6445, "nll_loss": 0.16111090779304504, "rewards/accuracies": 1.0, "rewards/chosen": -1.8160279068979435e-05, "rewards/margins": 0.2230944037437439, "rewards/rejected": -0.223112553358078, "step": 8495 }, { "epoch": 5.875518672199171, "grad_norm": 6.084949970245361, "learning_rate": 2.2913785154449055e-05, "log_odds_chosen": 11.30678939819336, "log_odds_ratio": -2.2296328097581863e-05, "logits/chosen": -0.5212001800537109, "logits/rejected": -0.5491906404495239, "logps/chosen": -0.000233797516557388, "logps/rejected": -2.737969398498535, "loss": 0.5014, "nll_loss": 0.12534253299236298, "rewards/accuracies": 1.0, "rewards/chosen": -2.3379752747132443e-05, "rewards/margins": 0.27377355098724365, "rewards/rejected": -0.2737969160079956, "step": 8496 }, { "epoch": 5.876210235131397, "grad_norm": 7.121036052703857, "learning_rate": 2.2909943138158907e-05, "log_odds_chosen": 10.899856567382812, "log_odds_ratio": -3.422797817620449e-05, "logits/chosen": -0.7306734323501587, "logits/rejected": -0.8067958354949951, "logps/chosen": -0.0001387650117976591, "logps/rejected": -2.044618606567383, "loss": 0.5315, "nll_loss": 0.13288140296936035, "rewards/accuracies": 1.0, "rewards/chosen": -1.3876499906473327e-05, "rewards/margins": 0.20444798469543457, "rewards/rejected": -0.20446187257766724, "step": 8497 }, { "epoch": 5.876901798063624, "grad_norm": 9.996922492980957, "learning_rate": 2.290610112186876e-05, "log_odds_chosen": 10.232194900512695, "log_odds_ratio": -0.0001114396145567298, "logits/chosen": -0.5728435516357422, "logits/rejected": -0.629746675491333, "logps/chosen": -0.0003327956947032362, "logps/rejected": -1.8851649761199951, "loss": 0.8212, "nll_loss": 0.20528879761695862, "rewards/accuracies": 1.0, "rewards/chosen": -3.3279571653110906e-05, "rewards/margins": 0.18848322331905365, "rewards/rejected": -0.1885165125131607, "step": 8498 }, { "epoch": 5.877593360995851, "grad_norm": 7.2498579025268555, "learning_rate": 2.290225910557861e-05, "log_odds_chosen": 10.74060344696045, "log_odds_ratio": -6.614306039409712e-05, "logits/chosen": -0.4745241701602936, "logits/rejected": -0.5490915179252625, "logps/chosen": -0.00032557419035583735, "logps/rejected": -2.3486926555633545, "loss": 0.635, "nll_loss": 0.15873301029205322, "rewards/accuracies": 1.0, "rewards/chosen": -3.255742194596678e-05, "rewards/margins": 0.23483669757843018, "rewards/rejected": -0.23486925661563873, "step": 8499 }, { "epoch": 5.878284923928078, "grad_norm": 5.14827299118042, "learning_rate": 2.289841708928846e-05, "log_odds_chosen": 10.924230575561523, "log_odds_ratio": -2.3871401936048642e-05, "logits/chosen": -0.4564250707626343, "logits/rejected": -0.4732181131839752, "logps/chosen": -0.00027109961956739426, "logps/rejected": -2.2624363899230957, "loss": 0.5444, "nll_loss": 0.13610848784446716, "rewards/accuracies": 1.0, "rewards/chosen": -2.7109967049909756e-05, "rewards/margins": 0.22621655464172363, "rewards/rejected": -0.22624364495277405, "step": 8500 }, { "epoch": 5.878976486860305, "grad_norm": 5.078951835632324, "learning_rate": 2.289457507299831e-05, "log_odds_chosen": 9.571325302124023, "log_odds_ratio": -0.00027377932565286756, "logits/chosen": -0.7607353925704956, "logits/rejected": -0.7960146069526672, "logps/chosen": -0.000566540053114295, "logps/rejected": -1.4324028491973877, "loss": 0.5346, "nll_loss": 0.13362522423267365, "rewards/accuracies": 1.0, "rewards/chosen": -5.665401113219559e-05, "rewards/margins": 0.1431836485862732, "rewards/rejected": -0.143240287899971, "step": 8501 }, { "epoch": 5.8796680497925315, "grad_norm": 5.351574420928955, "learning_rate": 2.289073305670816e-05, "log_odds_chosen": 10.164881706237793, "log_odds_ratio": -9.443954331800342e-05, "logits/chosen": -0.7071594595909119, "logits/rejected": -0.6985030770301819, "logps/chosen": -0.0006836583488620818, "logps/rejected": -2.3781793117523193, "loss": 0.9594, "nll_loss": 0.2398291975259781, "rewards/accuracies": 1.0, "rewards/chosen": -6.836582906544209e-05, "rewards/margins": 0.23774956166744232, "rewards/rejected": -0.2378179430961609, "step": 8502 }, { "epoch": 5.880359612724758, "grad_norm": 7.638421058654785, "learning_rate": 2.288689104041801e-05, "log_odds_chosen": 9.617386817932129, "log_odds_ratio": -0.0010062733199447393, "logits/chosen": -0.7072057127952576, "logits/rejected": -0.7967448234558105, "logps/chosen": -0.0010916423052549362, "logps/rejected": -2.1913113594055176, "loss": 1.4487, "nll_loss": 0.36207252740859985, "rewards/accuracies": 1.0, "rewards/chosen": -0.00010916422615991905, "rewards/margins": 0.2190219610929489, "rewards/rejected": -0.21913112699985504, "step": 8503 }, { "epoch": 5.881051175656985, "grad_norm": 19.64476776123047, "learning_rate": 2.2883049024127864e-05, "log_odds_chosen": 11.248300552368164, "log_odds_ratio": -2.4759872758295387e-05, "logits/chosen": -0.17140576243400574, "logits/rejected": -0.29898712038993835, "logps/chosen": -0.00019525880634319037, "logps/rejected": -2.35072922706604, "loss": 0.8572, "nll_loss": 0.21430210769176483, "rewards/accuracies": 1.0, "rewards/chosen": -1.9525879906723276e-05, "rewards/margins": 0.2350533902645111, "rewards/rejected": -0.23507292568683624, "step": 8504 }, { "epoch": 5.881742738589212, "grad_norm": 5.020126819610596, "learning_rate": 2.2879207007837713e-05, "log_odds_chosen": 11.25761604309082, "log_odds_ratio": -0.00011057972005801275, "logits/chosen": -0.24836036562919617, "logits/rejected": -0.3698427975177765, "logps/chosen": -7.968972204253078e-05, "logps/rejected": -2.058882713317871, "loss": 1.0456, "nll_loss": 0.26139941811561584, "rewards/accuracies": 1.0, "rewards/chosen": -7.968971658556256e-06, "rewards/margins": 0.2058803141117096, "rewards/rejected": -0.2058882713317871, "step": 8505 }, { "epoch": 5.882434301521439, "grad_norm": 9.156305313110352, "learning_rate": 2.2875364991547565e-05, "log_odds_chosen": 11.667762756347656, "log_odds_ratio": -1.598697417648509e-05, "logits/chosen": -0.29898375272750854, "logits/rejected": -0.36718064546585083, "logps/chosen": -0.0001733689132379368, "logps/rejected": -2.839731216430664, "loss": 0.8435, "nll_loss": 0.21087408065795898, "rewards/accuracies": 1.0, "rewards/chosen": -1.7336889868602157e-05, "rewards/margins": 0.28395578265190125, "rewards/rejected": -0.2839730978012085, "step": 8506 }, { "epoch": 5.883125864453666, "grad_norm": 16.775108337402344, "learning_rate": 2.2871522975257418e-05, "log_odds_chosen": 10.418400764465332, "log_odds_ratio": -5.1155384426238015e-05, "logits/chosen": -0.3068312108516693, "logits/rejected": -0.30754444003105164, "logps/chosen": -0.000983749981969595, "logps/rejected": -2.654691457748413, "loss": 0.7028, "nll_loss": 0.17570620775222778, "rewards/accuracies": 1.0, "rewards/chosen": -9.837499965215102e-05, "rewards/margins": 0.26537075638771057, "rewards/rejected": -0.26546913385391235, "step": 8507 }, { "epoch": 5.8838174273858925, "grad_norm": 23.888675689697266, "learning_rate": 2.2867680958967267e-05, "log_odds_chosen": 9.692110061645508, "log_odds_ratio": -0.026897268369793892, "logits/chosen": -0.5336145758628845, "logits/rejected": -0.5951070785522461, "logps/chosen": -0.05457576364278793, "logps/rejected": -2.692565679550171, "loss": 0.6847, "nll_loss": 0.16849052906036377, "rewards/accuracies": 1.0, "rewards/chosen": -0.005457576364278793, "rewards/margins": 0.2637990117073059, "rewards/rejected": -0.2692565619945526, "step": 8508 }, { "epoch": 5.884508990318119, "grad_norm": 3.8887486457824707, "learning_rate": 2.286383894267712e-05, "log_odds_chosen": 10.369190216064453, "log_odds_ratio": -4.303463356336579e-05, "logits/chosen": -0.3863868713378906, "logits/rejected": -0.4776327908039093, "logps/chosen": -0.0003454264369793236, "logps/rejected": -1.9126297235488892, "loss": 0.4677, "nll_loss": 0.11691690981388092, "rewards/accuracies": 1.0, "rewards/chosen": -3.4542645153123885e-05, "rewards/margins": 0.191228449344635, "rewards/rejected": -0.19126299023628235, "step": 8509 }, { "epoch": 5.885200553250346, "grad_norm": 11.780563354492188, "learning_rate": 2.285999692638697e-05, "log_odds_chosen": 10.295709609985352, "log_odds_ratio": -0.00017475405184086412, "logits/chosen": -0.3894461691379547, "logits/rejected": -0.4314771890640259, "logps/chosen": -0.00022790816728956997, "logps/rejected": -2.005774974822998, "loss": 0.8555, "nll_loss": 0.21384881436824799, "rewards/accuracies": 1.0, "rewards/chosen": -2.2790816728956997e-05, "rewards/margins": 0.20055469870567322, "rewards/rejected": -0.2005774825811386, "step": 8510 }, { "epoch": 5.885892116182573, "grad_norm": 8.197593688964844, "learning_rate": 2.285615491009682e-05, "log_odds_chosen": 10.112564086914062, "log_odds_ratio": -0.0003686411655507982, "logits/chosen": -0.5047045946121216, "logits/rejected": -0.4849182963371277, "logps/chosen": -0.0006123175262473524, "logps/rejected": -1.7913522720336914, "loss": 0.8222, "nll_loss": 0.20551498234272003, "rewards/accuracies": 1.0, "rewards/chosen": -6.123175990069285e-05, "rewards/margins": 0.1790739893913269, "rewards/rejected": -0.17913523316383362, "step": 8511 }, { "epoch": 5.8865836791148, "grad_norm": 4.952797889709473, "learning_rate": 2.285231289380667e-05, "log_odds_chosen": 11.156974792480469, "log_odds_ratio": -0.00033142950269393623, "logits/chosen": -0.6084975600242615, "logits/rejected": -0.6908661723136902, "logps/chosen": -0.0005794093594886363, "logps/rejected": -2.6508893966674805, "loss": 0.7902, "nll_loss": 0.197504460811615, "rewards/accuracies": 1.0, "rewards/chosen": -5.7940935221267864e-05, "rewards/margins": 0.26503100991249084, "rewards/rejected": -0.2650889456272125, "step": 8512 }, { "epoch": 5.887275242047027, "grad_norm": 8.416805267333984, "learning_rate": 2.2848470877516522e-05, "log_odds_chosen": 10.48718547821045, "log_odds_ratio": -4.281369183445349e-05, "logits/chosen": -0.2424652874469757, "logits/rejected": -0.38805943727493286, "logps/chosen": -0.0007869750843383372, "logps/rejected": -2.3322014808654785, "loss": 0.767, "nll_loss": 0.19175630807876587, "rewards/accuracies": 1.0, "rewards/chosen": -7.869752153055742e-05, "rewards/margins": 0.2331414371728897, "rewards/rejected": -0.2332201451063156, "step": 8513 }, { "epoch": 5.8879668049792535, "grad_norm": 6.996028423309326, "learning_rate": 2.284462886122637e-05, "log_odds_chosen": 10.321379661560059, "log_odds_ratio": -5.5568940297234803e-05, "logits/chosen": -0.36068078875541687, "logits/rejected": -0.49022531509399414, "logps/chosen": -0.00025417341385036707, "logps/rejected": -1.7573280334472656, "loss": 0.5471, "nll_loss": 0.13676485419273376, "rewards/accuracies": 1.0, "rewards/chosen": -2.5417339202249423e-05, "rewards/margins": 0.1757073849439621, "rewards/rejected": -0.1757328063249588, "step": 8514 }, { "epoch": 5.88865836791148, "grad_norm": 7.647250175476074, "learning_rate": 2.2840786844936224e-05, "log_odds_chosen": 11.126952171325684, "log_odds_ratio": -3.1055500585353e-05, "logits/chosen": 0.01863679103553295, "logits/rejected": -0.0044981446117162704, "logps/chosen": -0.00016973679885268211, "logps/rejected": -2.32952880859375, "loss": 0.9461, "nll_loss": 0.2365235984325409, "rewards/accuracies": 1.0, "rewards/chosen": -1.6973681340459734e-05, "rewards/margins": 0.23293590545654297, "rewards/rejected": -0.23295289278030396, "step": 8515 }, { "epoch": 5.889349930843707, "grad_norm": 7.024383068084717, "learning_rate": 2.2836944828646076e-05, "log_odds_chosen": 10.434898376464844, "log_odds_ratio": -7.665654993616045e-05, "logits/chosen": -0.5486772656440735, "logits/rejected": -0.6262264251708984, "logps/chosen": -0.00029988729511387646, "logps/rejected": -1.8727209568023682, "loss": 0.7843, "nll_loss": 0.1960609257221222, "rewards/accuracies": 1.0, "rewards/chosen": -2.998873060278129e-05, "rewards/margins": 0.18724212050437927, "rewards/rejected": -0.1872721016407013, "step": 8516 }, { "epoch": 5.890041493775934, "grad_norm": 9.35839557647705, "learning_rate": 2.2833102812355925e-05, "log_odds_chosen": 11.336438179016113, "log_odds_ratio": -3.796366945607588e-05, "logits/chosen": -0.04211053624749184, "logits/rejected": -0.026684284210205078, "logps/chosen": -0.00012434335076250136, "logps/rejected": -2.3859617710113525, "loss": 1.1471, "nll_loss": 0.28675931692123413, "rewards/accuracies": 1.0, "rewards/chosen": -1.2434335076250136e-05, "rewards/margins": 0.2385837435722351, "rewards/rejected": -0.23859617114067078, "step": 8517 }, { "epoch": 5.890733056708161, "grad_norm": 5.5423197746276855, "learning_rate": 2.2829260796065778e-05, "log_odds_chosen": 10.303956985473633, "log_odds_ratio": -0.00018886705220211297, "logits/chosen": -0.3222675621509552, "logits/rejected": -0.38316380977630615, "logps/chosen": -0.0010156576754525304, "logps/rejected": -1.6832990646362305, "loss": 0.4919, "nll_loss": 0.12294995039701462, "rewards/accuracies": 1.0, "rewards/chosen": -0.00010156577627640218, "rewards/margins": 0.16822832822799683, "rewards/rejected": -0.16832990944385529, "step": 8518 }, { "epoch": 5.891424619640388, "grad_norm": 5.728291034698486, "learning_rate": 2.282541877977563e-05, "log_odds_chosen": 9.790092468261719, "log_odds_ratio": -0.0003914476837962866, "logits/chosen": -0.2519989013671875, "logits/rejected": -0.24132508039474487, "logps/chosen": -0.0002517211833037436, "logps/rejected": -1.394624948501587, "loss": 0.6545, "nll_loss": 0.16358324885368347, "rewards/accuracies": 1.0, "rewards/chosen": -2.51721176027786e-05, "rewards/margins": 0.13943731784820557, "rewards/rejected": -0.13946250081062317, "step": 8519 }, { "epoch": 5.8921161825726145, "grad_norm": 9.926775932312012, "learning_rate": 2.282157676348548e-05, "log_odds_chosen": 9.664847373962402, "log_odds_ratio": -0.0005159341380931437, "logits/chosen": -0.5023306012153625, "logits/rejected": -0.516615629196167, "logps/chosen": -0.0005643821787089109, "logps/rejected": -1.4718093872070312, "loss": 0.787, "nll_loss": 0.19668985903263092, "rewards/accuracies": 1.0, "rewards/chosen": -5.643822078127414e-05, "rewards/margins": 0.1471245139837265, "rewards/rejected": -0.1471809446811676, "step": 8520 }, { "epoch": 5.892807745504841, "grad_norm": 6.766112327575684, "learning_rate": 2.281773474719533e-05, "log_odds_chosen": 10.125080108642578, "log_odds_ratio": -0.0006498443544842303, "logits/chosen": -0.47156214714050293, "logits/rejected": -0.49221134185791016, "logps/chosen": -0.0007708219927735627, "logps/rejected": -1.8949339389801025, "loss": 0.5459, "nll_loss": 0.13641297817230225, "rewards/accuracies": 1.0, "rewards/chosen": -7.708220073254779e-05, "rewards/margins": 0.1894163191318512, "rewards/rejected": -0.18949341773986816, "step": 8521 }, { "epoch": 5.893499308437068, "grad_norm": 6.54794979095459, "learning_rate": 2.281389273090518e-05, "log_odds_chosen": 10.456282615661621, "log_odds_ratio": -0.00014503316197078675, "logits/chosen": -0.5151886940002441, "logits/rejected": -0.6777184009552002, "logps/chosen": -0.0003617224283516407, "logps/rejected": -2.0949783325195312, "loss": 0.9168, "nll_loss": 0.22918158769607544, "rewards/accuracies": 1.0, "rewards/chosen": -3.6172245017951354e-05, "rewards/margins": 0.20946168899536133, "rewards/rejected": -0.2094978541135788, "step": 8522 }, { "epoch": 5.894190871369295, "grad_norm": 4.836280822753906, "learning_rate": 2.281005071461503e-05, "log_odds_chosen": 10.79955005645752, "log_odds_ratio": -7.796000136295334e-05, "logits/chosen": -0.6844509840011597, "logits/rejected": -0.7414740920066833, "logps/chosen": -0.0005077764508314431, "logps/rejected": -2.5114388465881348, "loss": 0.6021, "nll_loss": 0.1505274474620819, "rewards/accuracies": 1.0, "rewards/chosen": -5.0777649448718876e-05, "rewards/margins": 0.2510930895805359, "rewards/rejected": -0.2511438727378845, "step": 8523 }, { "epoch": 5.894882434301522, "grad_norm": 7.654256820678711, "learning_rate": 2.2806208698324882e-05, "log_odds_chosen": 10.270405769348145, "log_odds_ratio": -0.000742147967685014, "logits/chosen": -0.23735153675079346, "logits/rejected": -0.312753289937973, "logps/chosen": -0.000335223157890141, "logps/rejected": -1.793253779411316, "loss": 0.5804, "nll_loss": 0.14502355456352234, "rewards/accuracies": 1.0, "rewards/chosen": -3.35223157890141e-05, "rewards/margins": 0.17929185926914215, "rewards/rejected": -0.1793254017829895, "step": 8524 }, { "epoch": 5.895573997233749, "grad_norm": 8.170629501342773, "learning_rate": 2.2802366682034735e-05, "log_odds_chosen": 10.913341522216797, "log_odds_ratio": -7.966715202201158e-05, "logits/chosen": -0.7726643681526184, "logits/rejected": -0.8593516945838928, "logps/chosen": -0.0004093981988262385, "logps/rejected": -2.7591519355773926, "loss": 0.9395, "nll_loss": 0.2348717749118805, "rewards/accuracies": 1.0, "rewards/chosen": -4.093982352060266e-05, "rewards/margins": 0.2758742570877075, "rewards/rejected": -0.2759152054786682, "step": 8525 }, { "epoch": 5.8962655601659755, "grad_norm": 5.694158554077148, "learning_rate": 2.2798524665744584e-05, "log_odds_chosen": 10.442919731140137, "log_odds_ratio": -7.781775639159605e-05, "logits/chosen": -0.2508584260940552, "logits/rejected": -0.4683656692504883, "logps/chosen": -0.00016275026428047568, "logps/rejected": -1.475663423538208, "loss": 0.9346, "nll_loss": 0.23365084826946259, "rewards/accuracies": 1.0, "rewards/chosen": -1.627502751944121e-05, "rewards/margins": 0.1475500762462616, "rewards/rejected": -0.14756636321544647, "step": 8526 }, { "epoch": 5.896957123098202, "grad_norm": 6.7789177894592285, "learning_rate": 2.2794682649454436e-05, "log_odds_chosen": 9.294227600097656, "log_odds_ratio": -0.024507587775588036, "logits/chosen": -0.6571269035339355, "logits/rejected": -0.7861886620521545, "logps/chosen": -0.008058538660407066, "logps/rejected": -1.9478745460510254, "loss": 0.8478, "nll_loss": 0.20949037373065948, "rewards/accuracies": 1.0, "rewards/chosen": -0.0008058538078330457, "rewards/margins": 0.1939816027879715, "rewards/rejected": -0.19478744268417358, "step": 8527 }, { "epoch": 5.897648686030429, "grad_norm": 6.08666467666626, "learning_rate": 2.279084063316429e-05, "log_odds_chosen": 10.613787651062012, "log_odds_ratio": -9.840642451308668e-05, "logits/chosen": -0.7829938530921936, "logits/rejected": -0.8247061967849731, "logps/chosen": -0.0008157877600751817, "logps/rejected": -2.3705320358276367, "loss": 0.7853, "nll_loss": 0.19631502032279968, "rewards/accuracies": 1.0, "rewards/chosen": -8.15787716419436e-05, "rewards/margins": 0.2369716465473175, "rewards/rejected": -0.23705321550369263, "step": 8528 }, { "epoch": 5.898340248962656, "grad_norm": 7.659102439880371, "learning_rate": 2.2786998616874138e-05, "log_odds_chosen": 11.211624145507812, "log_odds_ratio": -0.0016679400578141212, "logits/chosen": -0.029983580112457275, "logits/rejected": -0.10546886920928955, "logps/chosen": -0.002418810036033392, "logps/rejected": -2.425703287124634, "loss": 0.994, "nll_loss": 0.2483421117067337, "rewards/accuracies": 1.0, "rewards/chosen": -0.00024188101815525442, "rewards/margins": 0.2423284649848938, "rewards/rejected": -0.24257034063339233, "step": 8529 }, { "epoch": 5.899031811894883, "grad_norm": 8.744182586669922, "learning_rate": 2.2783156600583987e-05, "log_odds_chosen": 10.308116912841797, "log_odds_ratio": -0.00011377524060662836, "logits/chosen": -0.3487590551376343, "logits/rejected": -0.458489328622818, "logps/chosen": -0.0003950122045353055, "logps/rejected": -2.2673113346099854, "loss": 0.6696, "nll_loss": 0.16739189624786377, "rewards/accuracies": 1.0, "rewards/chosen": -3.950122118112631e-05, "rewards/margins": 0.22669163346290588, "rewards/rejected": -0.22673112154006958, "step": 8530 }, { "epoch": 5.89972337482711, "grad_norm": 6.020569324493408, "learning_rate": 2.2779314584293836e-05, "log_odds_chosen": 10.968600273132324, "log_odds_ratio": -3.5992925404571e-05, "logits/chosen": -0.5196901559829712, "logits/rejected": -0.446166068315506, "logps/chosen": -0.0002649162197485566, "logps/rejected": -2.243401050567627, "loss": 0.5834, "nll_loss": 0.14584845304489136, "rewards/accuracies": 1.0, "rewards/chosen": -2.6491623430047184e-05, "rewards/margins": 0.22431360185146332, "rewards/rejected": -0.22434011101722717, "step": 8531 }, { "epoch": 5.9004149377593365, "grad_norm": 6.1547675132751465, "learning_rate": 2.2775472568003688e-05, "log_odds_chosen": 10.011276245117188, "log_odds_ratio": -0.0003909708757419139, "logits/chosen": -0.654976487159729, "logits/rejected": -0.6612088680267334, "logps/chosen": -0.00025521713541820645, "logps/rejected": -1.1476695537567139, "loss": 0.6228, "nll_loss": 0.15565167367458344, "rewards/accuracies": 1.0, "rewards/chosen": -2.5521714633214287e-05, "rewards/margins": 0.11474142968654633, "rewards/rejected": -0.11476695537567139, "step": 8532 }, { "epoch": 5.901106500691563, "grad_norm": 12.199445724487305, "learning_rate": 2.277163055171354e-05, "log_odds_chosen": 9.943747520446777, "log_odds_ratio": -0.0003530299582052976, "logits/chosen": -0.8974602818489075, "logits/rejected": -0.8420943021774292, "logps/chosen": -0.0015661438228562474, "logps/rejected": -2.1415584087371826, "loss": 0.6201, "nll_loss": 0.15499332547187805, "rewards/accuracies": 1.0, "rewards/chosen": -0.00015661439101677388, "rewards/margins": 0.2139992117881775, "rewards/rejected": -0.21415582299232483, "step": 8533 }, { "epoch": 5.90179806362379, "grad_norm": 11.708760261535645, "learning_rate": 2.276778853542339e-05, "log_odds_chosen": 10.785688400268555, "log_odds_ratio": -3.1557647162117064e-05, "logits/chosen": -0.6444445252418518, "logits/rejected": -0.6951552629470825, "logps/chosen": -0.0002593470271676779, "logps/rejected": -2.308858871459961, "loss": 1.0258, "nll_loss": 0.25645360350608826, "rewards/accuracies": 1.0, "rewards/chosen": -2.593470344436355e-05, "rewards/margins": 0.23085996508598328, "rewards/rejected": -0.23088589310646057, "step": 8534 }, { "epoch": 5.902489626556017, "grad_norm": 6.170323848724365, "learning_rate": 2.2763946519133242e-05, "log_odds_chosen": 11.332568168640137, "log_odds_ratio": -0.0004851693520322442, "logits/chosen": -0.5559641122817993, "logits/rejected": -0.5363360047340393, "logps/chosen": -0.0006352619384415448, "logps/rejected": -2.3492929935455322, "loss": 0.6353, "nll_loss": 0.158772274851799, "rewards/accuracies": 1.0, "rewards/chosen": -6.352619675453752e-05, "rewards/margins": 0.23486578464508057, "rewards/rejected": -0.23492930829524994, "step": 8535 }, { "epoch": 5.903181189488244, "grad_norm": 5.993259429931641, "learning_rate": 2.2760104502843095e-05, "log_odds_chosen": 9.582369804382324, "log_odds_ratio": -0.02115216664969921, "logits/chosen": -0.2557261288166046, "logits/rejected": -0.2677662670612335, "logps/chosen": -0.005261305253952742, "logps/rejected": -1.5212640762329102, "loss": 0.9547, "nll_loss": 0.2365698367357254, "rewards/accuracies": 1.0, "rewards/chosen": -0.0005261304322630167, "rewards/margins": 0.15160028636455536, "rewards/rejected": -0.15212641656398773, "step": 8536 }, { "epoch": 5.903872752420471, "grad_norm": 6.829483509063721, "learning_rate": 2.2756262486552944e-05, "log_odds_chosen": 11.096105575561523, "log_odds_ratio": -3.8061079976614565e-05, "logits/chosen": -0.7347974181175232, "logits/rejected": -0.7576143741607666, "logps/chosen": -0.00016978240455500782, "logps/rejected": -2.360802173614502, "loss": 0.739, "nll_loss": 0.18474218249320984, "rewards/accuracies": 1.0, "rewards/chosen": -1.6978241546894424e-05, "rewards/margins": 0.23606324195861816, "rewards/rejected": -0.23608021438121796, "step": 8537 }, { "epoch": 5.904564315352697, "grad_norm": 6.303608417510986, "learning_rate": 2.2752420470262796e-05, "log_odds_chosen": 10.607667922973633, "log_odds_ratio": -0.00042963639134541154, "logits/chosen": -0.5105722546577454, "logits/rejected": -0.6554079651832581, "logps/chosen": -0.0006353295175358653, "logps/rejected": -2.2140278816223145, "loss": 0.5539, "nll_loss": 0.13843461871147156, "rewards/accuracies": 1.0, "rewards/chosen": -6.353294156724587e-05, "rewards/margins": 0.22133925557136536, "rewards/rejected": -0.22140279412269592, "step": 8538 }, { "epoch": 5.905255878284924, "grad_norm": 11.24542236328125, "learning_rate": 2.2748578453972645e-05, "log_odds_chosen": 7.889625549316406, "log_odds_ratio": -0.3360826373100281, "logits/chosen": -0.5862733125686646, "logits/rejected": -0.6579238176345825, "logps/chosen": -0.056968431919813156, "logps/rejected": -2.314674139022827, "loss": 0.9672, "nll_loss": 0.2081890106201172, "rewards/accuracies": 0.875, "rewards/chosen": -0.005696842912584543, "rewards/margins": 0.22577057778835297, "rewards/rejected": -0.23146742582321167, "step": 8539 }, { "epoch": 5.905947441217151, "grad_norm": 8.91585636138916, "learning_rate": 2.2744736437682494e-05, "log_odds_chosen": 9.990187644958496, "log_odds_ratio": -0.0002307224931428209, "logits/chosen": -0.5497514009475708, "logits/rejected": -0.5800761580467224, "logps/chosen": -0.0003133144346065819, "logps/rejected": -1.416618824005127, "loss": 0.7085, "nll_loss": 0.1771116703748703, "rewards/accuracies": 1.0, "rewards/chosen": -3.1331444915849715e-05, "rewards/margins": 0.14163057506084442, "rewards/rejected": -0.1416618973016739, "step": 8540 }, { "epoch": 5.906639004149378, "grad_norm": 16.768951416015625, "learning_rate": 2.2740894421392347e-05, "log_odds_chosen": 11.527608871459961, "log_odds_ratio": -1.8020944480667822e-05, "logits/chosen": -0.24016182124614716, "logits/rejected": -0.2821471095085144, "logps/chosen": -0.00040088381501846015, "logps/rejected": -2.8893532752990723, "loss": 0.6619, "nll_loss": 0.16548146307468414, "rewards/accuracies": 1.0, "rewards/chosen": -4.00883836846333e-05, "rewards/margins": 0.2888951897621155, "rewards/rejected": -0.2889353036880493, "step": 8541 }, { "epoch": 5.907330567081605, "grad_norm": 6.422243118286133, "learning_rate": 2.27370524051022e-05, "log_odds_chosen": 10.793885231018066, "log_odds_ratio": -0.0001332794490735978, "logits/chosen": -0.20492669939994812, "logits/rejected": -0.28338170051574707, "logps/chosen": -0.0005102384602651, "logps/rejected": -2.7783327102661133, "loss": 0.6223, "nll_loss": 0.15556910634040833, "rewards/accuracies": 1.0, "rewards/chosen": -5.102384602651e-05, "rewards/margins": 0.27778226137161255, "rewards/rejected": -0.2778332531452179, "step": 8542 }, { "epoch": 5.908022130013832, "grad_norm": 8.342117309570312, "learning_rate": 2.2733210388812048e-05, "log_odds_chosen": 8.009577751159668, "log_odds_ratio": -0.004809632431715727, "logits/chosen": -0.4651964008808136, "logits/rejected": -0.5181519985198975, "logps/chosen": -0.0025818380527198315, "logps/rejected": -1.4038352966308594, "loss": 0.8466, "nll_loss": 0.21116048097610474, "rewards/accuracies": 1.0, "rewards/chosen": -0.00025818380527198315, "rewards/margins": 0.1401253491640091, "rewards/rejected": -0.1403835266828537, "step": 8543 }, { "epoch": 5.908713692946058, "grad_norm": 7.213745594024658, "learning_rate": 2.27293683725219e-05, "log_odds_chosen": 10.365843772888184, "log_odds_ratio": -0.00014006158744450659, "logits/chosen": -0.24567002058029175, "logits/rejected": -0.297152042388916, "logps/chosen": -0.0009655110188759863, "logps/rejected": -2.6441903114318848, "loss": 0.6859, "nll_loss": 0.17147107422351837, "rewards/accuracies": 1.0, "rewards/chosen": -9.65511062531732e-05, "rewards/margins": 0.26432251930236816, "rewards/rejected": -0.2644190490245819, "step": 8544 }, { "epoch": 5.909405255878285, "grad_norm": 9.711751937866211, "learning_rate": 2.2725526356231753e-05, "log_odds_chosen": 10.276808738708496, "log_odds_ratio": -8.432636968791485e-05, "logits/chosen": -0.6451094150543213, "logits/rejected": -0.7156064510345459, "logps/chosen": -0.0004483590309973806, "logps/rejected": -1.7663764953613281, "loss": 1.1487, "nll_loss": 0.2871660590171814, "rewards/accuracies": 1.0, "rewards/chosen": -4.483590601012111e-05, "rewards/margins": 0.17659281194210052, "rewards/rejected": -0.1766376495361328, "step": 8545 }, { "epoch": 5.910096818810512, "grad_norm": 8.632865905761719, "learning_rate": 2.2721684339941602e-05, "log_odds_chosen": 10.930590629577637, "log_odds_ratio": -0.0001606600999366492, "logits/chosen": -0.5400450825691223, "logits/rejected": -0.6316708326339722, "logps/chosen": -0.00023115398653317243, "logps/rejected": -2.519530773162842, "loss": 0.9548, "nll_loss": 0.23867672681808472, "rewards/accuracies": 1.0, "rewards/chosen": -2.3115399017115124e-05, "rewards/margins": 0.2519299387931824, "rewards/rejected": -0.2519530653953552, "step": 8546 }, { "epoch": 5.910788381742739, "grad_norm": 9.776907920837402, "learning_rate": 2.2717842323651455e-05, "log_odds_chosen": 10.466167449951172, "log_odds_ratio": -0.00017565919551998377, "logits/chosen": -0.25509530305862427, "logits/rejected": -0.4272596538066864, "logps/chosen": -0.0004573424521367997, "logps/rejected": -2.5414772033691406, "loss": 0.9307, "nll_loss": 0.23266099393367767, "rewards/accuracies": 1.0, "rewards/chosen": -4.573424666887149e-05, "rewards/margins": 0.25410202145576477, "rewards/rejected": -0.2541477382183075, "step": 8547 }, { "epoch": 5.911479944674966, "grad_norm": 4.618690490722656, "learning_rate": 2.2714000307361304e-05, "log_odds_chosen": 10.580927848815918, "log_odds_ratio": -5.235819116933271e-05, "logits/chosen": -0.23213829100131989, "logits/rejected": -0.3192077875137329, "logps/chosen": -0.008321152068674564, "logps/rejected": -3.3467907905578613, "loss": 0.6374, "nll_loss": 0.15933555364608765, "rewards/accuracies": 1.0, "rewards/chosen": -0.0008321151835843921, "rewards/margins": 0.33384695649147034, "rewards/rejected": -0.33467909693717957, "step": 8548 }, { "epoch": 5.912171507607193, "grad_norm": 5.7036333084106445, "learning_rate": 2.2710158291071153e-05, "log_odds_chosen": 10.605021476745605, "log_odds_ratio": -4.936326513416134e-05, "logits/chosen": -0.41037696599960327, "logits/rejected": -0.5515607595443726, "logps/chosen": -0.00024102001043502241, "logps/rejected": -2.1665050983428955, "loss": 0.7821, "nll_loss": 0.19552463293075562, "rewards/accuracies": 1.0, "rewards/chosen": -2.4102002498693764e-05, "rewards/margins": 0.21662640571594238, "rewards/rejected": -0.21665051579475403, "step": 8549 }, { "epoch": 5.912863070539419, "grad_norm": 13.772993087768555, "learning_rate": 2.2706316274781005e-05, "log_odds_chosen": 12.158016204833984, "log_odds_ratio": -1.6254996808129363e-05, "logits/chosen": -0.5372356176376343, "logits/rejected": -0.6390924453735352, "logps/chosen": -9.869621135294437e-05, "logps/rejected": -2.9592461585998535, "loss": 0.846, "nll_loss": 0.21148821711540222, "rewards/accuracies": 1.0, "rewards/chosen": -9.869620953395497e-06, "rewards/margins": 0.29591473937034607, "rewards/rejected": -0.2959246039390564, "step": 8550 }, { "epoch": 5.913554633471646, "grad_norm": 8.666646003723145, "learning_rate": 2.2702474258490858e-05, "log_odds_chosen": 10.557194709777832, "log_odds_ratio": -7.307058695005253e-05, "logits/chosen": -0.8025202751159668, "logits/rejected": -0.8761454820632935, "logps/chosen": -0.00041048714774660766, "logps/rejected": -2.3817849159240723, "loss": 0.7585, "nll_loss": 0.1896216869354248, "rewards/accuracies": 1.0, "rewards/chosen": -4.1048719140235335e-05, "rewards/margins": 0.23813745379447937, "rewards/rejected": -0.23817849159240723, "step": 8551 }, { "epoch": 5.914246196403873, "grad_norm": 8.621912956237793, "learning_rate": 2.2698632242200707e-05, "log_odds_chosen": 10.435504913330078, "log_odds_ratio": -0.00020912522450089455, "logits/chosen": -0.304024338722229, "logits/rejected": -0.3682836890220642, "logps/chosen": -0.00018781126709654927, "logps/rejected": -2.1270718574523926, "loss": 0.6206, "nll_loss": 0.15513885021209717, "rewards/accuracies": 1.0, "rewards/chosen": -1.8781129256240092e-05, "rewards/margins": 0.2126884013414383, "rewards/rejected": -0.21270719170570374, "step": 8552 }, { "epoch": 5.9149377593361, "grad_norm": 10.987290382385254, "learning_rate": 2.269479022591056e-05, "log_odds_chosen": 10.077252388000488, "log_odds_ratio": -0.00014401637599803507, "logits/chosen": -0.4053186774253845, "logits/rejected": -0.4016092121601105, "logps/chosen": -0.0008381298393942416, "logps/rejected": -2.265294313430786, "loss": 0.6591, "nll_loss": 0.16474927961826324, "rewards/accuracies": 1.0, "rewards/chosen": -8.381298539461568e-05, "rewards/margins": 0.22644563019275665, "rewards/rejected": -0.22652943432331085, "step": 8553 }, { "epoch": 5.915629322268327, "grad_norm": 9.601272583007812, "learning_rate": 2.269094820962041e-05, "log_odds_chosen": 10.813383102416992, "log_odds_ratio": -4.799844464287162e-05, "logits/chosen": -0.583682656288147, "logits/rejected": -0.6282103657722473, "logps/chosen": -0.00023382306972052902, "logps/rejected": -2.446737289428711, "loss": 0.5298, "nll_loss": 0.13243836164474487, "rewards/accuracies": 1.0, "rewards/chosen": -2.3382304789265618e-05, "rewards/margins": 0.24465036392211914, "rewards/rejected": -0.2446737438440323, "step": 8554 }, { "epoch": 5.9163208852005535, "grad_norm": 7.350333213806152, "learning_rate": 2.268710619333026e-05, "log_odds_chosen": 9.566350936889648, "log_odds_ratio": -0.00035029981518164277, "logits/chosen": 0.14597120881080627, "logits/rejected": -0.05483380705118179, "logps/chosen": -0.0013699035625904799, "logps/rejected": -2.373587131500244, "loss": 0.8311, "nll_loss": 0.20774847269058228, "rewards/accuracies": 1.0, "rewards/chosen": -0.00013699036207981408, "rewards/margins": 0.23722171783447266, "rewards/rejected": -0.2373587191104889, "step": 8555 }, { "epoch": 5.91701244813278, "grad_norm": 17.94904136657715, "learning_rate": 2.2683264177040113e-05, "log_odds_chosen": 10.428934097290039, "log_odds_ratio": -9.59420285653323e-05, "logits/chosen": -0.45303452014923096, "logits/rejected": -0.5170224905014038, "logps/chosen": -0.0003410226199775934, "logps/rejected": -2.2718138694763184, "loss": 0.9655, "nll_loss": 0.24137672781944275, "rewards/accuracies": 1.0, "rewards/chosen": -3.410226054256782e-05, "rewards/margins": 0.22714729607105255, "rewards/rejected": -0.22718140482902527, "step": 8556 }, { "epoch": 5.917704011065007, "grad_norm": 4.4098734855651855, "learning_rate": 2.2679422160749962e-05, "log_odds_chosen": 10.864945411682129, "log_odds_ratio": -3.9907470636535436e-05, "logits/chosen": -0.256070613861084, "logits/rejected": -0.2606242597103119, "logps/chosen": -0.0019068530527874827, "logps/rejected": -2.8022260665893555, "loss": 0.7057, "nll_loss": 0.1764180064201355, "rewards/accuracies": 1.0, "rewards/chosen": -0.00019068529945798218, "rewards/margins": 0.2800319194793701, "rewards/rejected": -0.2802225947380066, "step": 8557 }, { "epoch": 5.918395573997234, "grad_norm": 7.403103351593018, "learning_rate": 2.267558014445981e-05, "log_odds_chosen": 10.689541816711426, "log_odds_ratio": -3.504283449728973e-05, "logits/chosen": -0.4196911156177521, "logits/rejected": -0.4435195028781891, "logps/chosen": -0.00029301928589120507, "logps/rejected": -2.470460891723633, "loss": 0.7607, "nll_loss": 0.19015933573246002, "rewards/accuracies": 1.0, "rewards/chosen": -2.9301927497726865e-05, "rewards/margins": 0.2470167875289917, "rewards/rejected": -0.2470460832118988, "step": 8558 }, { "epoch": 5.919087136929461, "grad_norm": 7.533710479736328, "learning_rate": 2.2671738128169664e-05, "log_odds_chosen": 9.999994277954102, "log_odds_ratio": -0.0002365948457736522, "logits/chosen": -0.36014026403427124, "logits/rejected": -0.3944398760795593, "logps/chosen": -0.0003829544293694198, "logps/rejected": -1.3881903886795044, "loss": 0.7457, "nll_loss": 0.18641208112239838, "rewards/accuracies": 1.0, "rewards/chosen": -3.829544584732503e-05, "rewards/margins": 0.13878074288368225, "rewards/rejected": -0.13881903886795044, "step": 8559 }, { "epoch": 5.919778699861688, "grad_norm": 10.651721000671387, "learning_rate": 2.2667896111879516e-05, "log_odds_chosen": 10.4109525680542, "log_odds_ratio": -0.00021536195708904415, "logits/chosen": -0.20203912258148193, "logits/rejected": -0.3034498989582062, "logps/chosen": -0.00043156338506378233, "logps/rejected": -1.706121802330017, "loss": 0.9793, "nll_loss": 0.244802325963974, "rewards/accuracies": 1.0, "rewards/chosen": -4.3156334868399426e-05, "rewards/margins": 0.1705690175294876, "rewards/rejected": -0.1706121861934662, "step": 8560 }, { "epoch": 5.9204702627939145, "grad_norm": 6.417027473449707, "learning_rate": 2.2664054095589365e-05, "log_odds_chosen": 7.905770778656006, "log_odds_ratio": -0.002830528188496828, "logits/chosen": -0.5475532412528992, "logits/rejected": -0.5872865915298462, "logps/chosen": -0.027408741414546967, "logps/rejected": -1.390294075012207, "loss": 1.1341, "nll_loss": 0.28323599696159363, "rewards/accuracies": 1.0, "rewards/chosen": -0.0027408739551901817, "rewards/margins": 0.13628853857517242, "rewards/rejected": -0.13902941346168518, "step": 8561 }, { "epoch": 5.921161825726141, "grad_norm": 7.8787007331848145, "learning_rate": 2.2660212079299218e-05, "log_odds_chosen": 11.101816177368164, "log_odds_ratio": -0.00010772953828563914, "logits/chosen": -0.5102895498275757, "logits/rejected": -0.5560255646705627, "logps/chosen": -0.00044660046114586294, "logps/rejected": -2.1967179775238037, "loss": 0.6121, "nll_loss": 0.15301674604415894, "rewards/accuracies": 1.0, "rewards/chosen": -4.4660046114586294e-05, "rewards/margins": 0.21962714195251465, "rewards/rejected": -0.2196718156337738, "step": 8562 }, { "epoch": 5.921853388658368, "grad_norm": 7.844832420349121, "learning_rate": 2.265637006300907e-05, "log_odds_chosen": 9.944887161254883, "log_odds_ratio": -9.000200225273147e-05, "logits/chosen": -0.30578577518463135, "logits/rejected": -0.36093151569366455, "logps/chosen": -0.0006033809040673077, "logps/rejected": -1.9198884963989258, "loss": 0.6007, "nll_loss": 0.15015468001365662, "rewards/accuracies": 1.0, "rewards/chosen": -6.0338086768751964e-05, "rewards/margins": 0.19192850589752197, "rewards/rejected": -0.19198885560035706, "step": 8563 }, { "epoch": 5.922544951590595, "grad_norm": 6.397083759307861, "learning_rate": 2.265252804671892e-05, "log_odds_chosen": 11.030065536499023, "log_odds_ratio": -5.464674177346751e-05, "logits/chosen": -0.08422800898551941, "logits/rejected": -0.22156718373298645, "logps/chosen": -0.0006563407951034606, "logps/rejected": -2.902289867401123, "loss": 1.1569, "nll_loss": 0.28922930359840393, "rewards/accuracies": 1.0, "rewards/chosen": -6.563407805515453e-05, "rewards/margins": 0.2901633679866791, "rewards/rejected": -0.2902289927005768, "step": 8564 }, { "epoch": 5.923236514522822, "grad_norm": 5.906310558319092, "learning_rate": 2.264868603042877e-05, "log_odds_chosen": 9.578790664672852, "log_odds_ratio": -0.0021116340067237616, "logits/chosen": -0.46797680854797363, "logits/rejected": -0.4816071391105652, "logps/chosen": -0.0012455761898308992, "logps/rejected": -1.300618052482605, "loss": 0.8921, "nll_loss": 0.22281301021575928, "rewards/accuracies": 1.0, "rewards/chosen": -0.00012455761316232383, "rewards/margins": 0.12993724644184113, "rewards/rejected": -0.1300618052482605, "step": 8565 }, { "epoch": 5.923928077455049, "grad_norm": 17.54301643371582, "learning_rate": 2.264484401413862e-05, "log_odds_chosen": 9.373865127563477, "log_odds_ratio": -0.0004120336670894176, "logits/chosen": -0.37062132358551025, "logits/rejected": -0.48371198773384094, "logps/chosen": -0.0012714089825749397, "logps/rejected": -2.1493399143218994, "loss": 0.6719, "nll_loss": 0.1679382026195526, "rewards/accuracies": 1.0, "rewards/chosen": -0.00012714089825749397, "rewards/margins": 0.21480685472488403, "rewards/rejected": -0.21493399143218994, "step": 8566 }, { "epoch": 5.9246196403872755, "grad_norm": 6.1621222496032715, "learning_rate": 2.264100199784847e-05, "log_odds_chosen": 10.586554527282715, "log_odds_ratio": -0.00022484929650090635, "logits/chosen": -0.6475090980529785, "logits/rejected": -0.742377519607544, "logps/chosen": -0.0005424355622380972, "logps/rejected": -1.4557613134384155, "loss": 1.078, "nll_loss": 0.2694748342037201, "rewards/accuracies": 1.0, "rewards/chosen": -5.424355185823515e-05, "rewards/margins": 0.14552189409732819, "rewards/rejected": -0.14557614922523499, "step": 8567 }, { "epoch": 5.925311203319502, "grad_norm": 10.520523071289062, "learning_rate": 2.2637159981558322e-05, "log_odds_chosen": 11.213796615600586, "log_odds_ratio": -5.063842036179267e-05, "logits/chosen": -0.15347711741924286, "logits/rejected": -0.2412378191947937, "logps/chosen": -0.001006375066936016, "logps/rejected": -3.2527801990509033, "loss": 0.8013, "nll_loss": 0.20032110810279846, "rewards/accuracies": 1.0, "rewards/chosen": -0.00010063750960398465, "rewards/margins": 0.3251773715019226, "rewards/rejected": -0.32527804374694824, "step": 8568 }, { "epoch": 5.926002766251729, "grad_norm": 15.545186042785645, "learning_rate": 2.2633317965268174e-05, "log_odds_chosen": 11.705305099487305, "log_odds_ratio": -0.00015831185737624764, "logits/chosen": -0.3981912434101105, "logits/rejected": -0.46448180079460144, "logps/chosen": -0.00022738243569619954, "logps/rejected": -2.729987621307373, "loss": 0.6592, "nll_loss": 0.16478851437568665, "rewards/accuracies": 1.0, "rewards/chosen": -2.2738244297215715e-05, "rewards/margins": 0.2729760408401489, "rewards/rejected": -0.27299875020980835, "step": 8569 }, { "epoch": 5.926694329183956, "grad_norm": 10.800206184387207, "learning_rate": 2.2629475948978024e-05, "log_odds_chosen": 9.136209487915039, "log_odds_ratio": -0.00038616295205429196, "logits/chosen": -0.8798481225967407, "logits/rejected": -0.9394698143005371, "logps/chosen": -0.0006298995576798916, "logps/rejected": -1.1048842668533325, "loss": 0.5879, "nll_loss": 0.14694133400917053, "rewards/accuracies": 1.0, "rewards/chosen": -6.29899586783722e-05, "rewards/margins": 0.1104254275560379, "rewards/rejected": -0.1104884222149849, "step": 8570 }, { "epoch": 5.927385892116183, "grad_norm": 8.502237319946289, "learning_rate": 2.2625633932687876e-05, "log_odds_chosen": 10.115253448486328, "log_odds_ratio": -0.00011375232134014368, "logits/chosen": -0.6215211749076843, "logits/rejected": -0.6475383639335632, "logps/chosen": -0.00029889732832089067, "logps/rejected": -1.6586815118789673, "loss": 0.6581, "nll_loss": 0.16451016068458557, "rewards/accuracies": 1.0, "rewards/chosen": -2.988973392348271e-05, "rewards/margins": 0.16583827137947083, "rewards/rejected": -0.16586816310882568, "step": 8571 }, { "epoch": 5.92807745504841, "grad_norm": 15.530472755432129, "learning_rate": 2.262179191639773e-05, "log_odds_chosen": 8.37136459350586, "log_odds_ratio": -0.005694496911019087, "logits/chosen": -0.20429344475269318, "logits/rejected": -0.2544511556625366, "logps/chosen": -0.0032351193949580193, "logps/rejected": -1.765426516532898, "loss": 0.6276, "nll_loss": 0.15632155537605286, "rewards/accuracies": 1.0, "rewards/chosen": -0.0003235119511373341, "rewards/margins": 0.1762191355228424, "rewards/rejected": -0.17654263973236084, "step": 8572 }, { "epoch": 5.9287690179806365, "grad_norm": 20.958740234375, "learning_rate": 2.2617949900107577e-05, "log_odds_chosen": 10.967823028564453, "log_odds_ratio": -5.087364843348041e-05, "logits/chosen": -0.7277958989143372, "logits/rejected": -0.8081362247467041, "logps/chosen": -0.0001713030505925417, "logps/rejected": -1.981994867324829, "loss": 0.9123, "nll_loss": 0.22807423770427704, "rewards/accuracies": 1.0, "rewards/chosen": -1.7130303604062647e-05, "rewards/margins": 0.1981823593378067, "rewards/rejected": -0.19819949567317963, "step": 8573 }, { "epoch": 5.929460580912863, "grad_norm": 10.149932861328125, "learning_rate": 2.261410788381743e-05, "log_odds_chosen": 9.296614646911621, "log_odds_ratio": -0.0015210387064144015, "logits/chosen": -0.5005256533622742, "logits/rejected": -0.4993587136268616, "logps/chosen": -0.0005473347846418619, "logps/rejected": -1.6416828632354736, "loss": 1.1707, "nll_loss": 0.29251858592033386, "rewards/accuracies": 1.0, "rewards/chosen": -5.4733485740143806e-05, "rewards/margins": 0.16411355137825012, "rewards/rejected": -0.16416828334331512, "step": 8574 }, { "epoch": 5.93015214384509, "grad_norm": 9.455828666687012, "learning_rate": 2.261026586752728e-05, "log_odds_chosen": 9.926673889160156, "log_odds_ratio": -0.010275032371282578, "logits/chosen": -0.8462824821472168, "logits/rejected": -0.8726486563682556, "logps/chosen": -0.004735386930406094, "logps/rejected": -2.1909990310668945, "loss": 0.6947, "nll_loss": 0.17265570163726807, "rewards/accuracies": 1.0, "rewards/chosen": -0.00047353864647448063, "rewards/margins": 0.21862637996673584, "rewards/rejected": -0.21909990906715393, "step": 8575 }, { "epoch": 5.930843706777317, "grad_norm": 7.079444885253906, "learning_rate": 2.2606423851237128e-05, "log_odds_chosen": 10.440970420837402, "log_odds_ratio": -0.00572149408981204, "logits/chosen": -0.4378744959831238, "logits/rejected": -0.5360420942306519, "logps/chosen": -0.035686738789081573, "logps/rejected": -2.1300199031829834, "loss": 0.5094, "nll_loss": 0.12678956985473633, "rewards/accuracies": 1.0, "rewards/chosen": -0.003568673972040415, "rewards/margins": 0.20943331718444824, "rewards/rejected": -0.21300198137760162, "step": 8576 }, { "epoch": 5.931535269709544, "grad_norm": 8.993355751037598, "learning_rate": 2.260258183494698e-05, "log_odds_chosen": 10.129745483398438, "log_odds_ratio": -0.006981295999139547, "logits/chosen": -0.5487630367279053, "logits/rejected": -0.5710721611976624, "logps/chosen": -0.003039892530068755, "logps/rejected": -2.049140214920044, "loss": 0.5986, "nll_loss": 0.148961141705513, "rewards/accuracies": 1.0, "rewards/chosen": -0.00030398921808227897, "rewards/margins": 0.20461003482341766, "rewards/rejected": -0.20491401851177216, "step": 8577 }, { "epoch": 5.932226832641771, "grad_norm": 5.472675323486328, "learning_rate": 2.2598739818656833e-05, "log_odds_chosen": 9.776227951049805, "log_odds_ratio": -0.00033155985875055194, "logits/chosen": -0.5276475548744202, "logits/rejected": -0.5645096302032471, "logps/chosen": -0.0007022687932476401, "logps/rejected": -1.7329561710357666, "loss": 0.9177, "nll_loss": 0.22938492894172668, "rewards/accuracies": 1.0, "rewards/chosen": -7.022687350399792e-05, "rewards/margins": 0.17322540283203125, "rewards/rejected": -0.17329561710357666, "step": 8578 }, { "epoch": 5.9329183955739975, "grad_norm": 5.772761821746826, "learning_rate": 2.2594897802366682e-05, "log_odds_chosen": 9.596529006958008, "log_odds_ratio": -0.0009786165319383144, "logits/chosen": -0.4244062900543213, "logits/rejected": -0.4867440462112427, "logps/chosen": -0.018765205517411232, "logps/rejected": -2.093818187713623, "loss": 0.8859, "nll_loss": 0.2213878631591797, "rewards/accuracies": 1.0, "rewards/chosen": -0.0018765205750241876, "rewards/margins": 0.20750531554222107, "rewards/rejected": -0.2093818336725235, "step": 8579 }, { "epoch": 5.933609958506224, "grad_norm": 12.897007942199707, "learning_rate": 2.2591055786076534e-05, "log_odds_chosen": 10.668493270874023, "log_odds_ratio": -6.173766450956464e-05, "logits/chosen": -0.47161316871643066, "logits/rejected": -0.5060651302337646, "logps/chosen": -0.00015352212358266115, "logps/rejected": -1.8575775623321533, "loss": 0.5795, "nll_loss": 0.14486020803451538, "rewards/accuracies": 1.0, "rewards/chosen": -1.5352212358266115e-05, "rewards/margins": 0.18574243783950806, "rewards/rejected": -0.18575777113437653, "step": 8580 }, { "epoch": 5.934301521438451, "grad_norm": 8.834507942199707, "learning_rate": 2.2587213769786387e-05, "log_odds_chosen": 10.817688941955566, "log_odds_ratio": -0.00029401585925370455, "logits/chosen": -0.7078501582145691, "logits/rejected": -0.6817142963409424, "logps/chosen": -0.00031072754063643515, "logps/rejected": -2.6172075271606445, "loss": 0.9104, "nll_loss": 0.2275734394788742, "rewards/accuracies": 1.0, "rewards/chosen": -3.107275551883504e-05, "rewards/margins": 0.261689692735672, "rewards/rejected": -0.26172077655792236, "step": 8581 }, { "epoch": 5.934993084370678, "grad_norm": 5.395383358001709, "learning_rate": 2.2583371753496236e-05, "log_odds_chosen": 10.347330093383789, "log_odds_ratio": -0.00010833951091626659, "logits/chosen": -0.9370718002319336, "logits/rejected": -0.9664212465286255, "logps/chosen": -0.00035284709883853793, "logps/rejected": -1.7800090312957764, "loss": 0.6464, "nll_loss": 0.16159787774085999, "rewards/accuracies": 1.0, "rewards/chosen": -3.528471279423684e-05, "rewards/margins": 0.17796562612056732, "rewards/rejected": -0.17800092697143555, "step": 8582 }, { "epoch": 5.935684647302905, "grad_norm": 8.83195972442627, "learning_rate": 2.257952973720609e-05, "log_odds_chosen": 10.438563346862793, "log_odds_ratio": -6.224818935152143e-05, "logits/chosen": -0.510218620300293, "logits/rejected": -0.44954901933670044, "logps/chosen": -0.0002455156354699284, "logps/rejected": -1.6129556894302368, "loss": 0.4977, "nll_loss": 0.12440832704305649, "rewards/accuracies": 1.0, "rewards/chosen": -2.4551565729780123e-05, "rewards/margins": 0.16127102077007294, "rewards/rejected": -0.1612955629825592, "step": 8583 }, { "epoch": 5.936376210235132, "grad_norm": 6.737086772918701, "learning_rate": 2.2575687720915937e-05, "log_odds_chosen": 10.558185577392578, "log_odds_ratio": -0.00012676040933001786, "logits/chosen": -0.4822555184364319, "logits/rejected": -0.48588335514068604, "logps/chosen": -0.00015993253327906132, "logps/rejected": -1.8099101781845093, "loss": 0.5725, "nll_loss": 0.14312425255775452, "rewards/accuracies": 1.0, "rewards/chosen": -1.5993253327906132e-05, "rewards/margins": 0.1809750348329544, "rewards/rejected": -0.1809910237789154, "step": 8584 }, { "epoch": 5.9370677731673585, "grad_norm": 6.972037315368652, "learning_rate": 2.2571845704625786e-05, "log_odds_chosen": 10.596516609191895, "log_odds_ratio": -3.5987286537420005e-05, "logits/chosen": -0.1407221257686615, "logits/rejected": -0.32239216566085815, "logps/chosen": -0.0002259848261019215, "logps/rejected": -1.8885389566421509, "loss": 0.6527, "nll_loss": 0.16316595673561096, "rewards/accuracies": 1.0, "rewards/chosen": -2.2598484065383673e-05, "rewards/margins": 0.18883128464221954, "rewards/rejected": -0.1888538897037506, "step": 8585 }, { "epoch": 5.937759336099585, "grad_norm": 11.993995666503906, "learning_rate": 2.256800368833564e-05, "log_odds_chosen": 8.331764221191406, "log_odds_ratio": -0.4614397883415222, "logits/chosen": -0.4517919421195984, "logits/rejected": -0.5015581846237183, "logps/chosen": -0.04650157317519188, "logps/rejected": -1.8373360633850098, "loss": 0.8105, "nll_loss": 0.15647102892398834, "rewards/accuracies": 0.875, "rewards/chosen": -0.004650157410651445, "rewards/margins": 0.179083451628685, "rewards/rejected": -0.18373361229896545, "step": 8586 }, { "epoch": 5.938450899031812, "grad_norm": 7.184019088745117, "learning_rate": 2.256416167204549e-05, "log_odds_chosen": 9.58578872680664, "log_odds_ratio": -0.0004617223748937249, "logits/chosen": -0.3178104758262634, "logits/rejected": -0.4234858453273773, "logps/chosen": -0.00038188480539247394, "logps/rejected": -1.509856939315796, "loss": 0.58, "nll_loss": 0.14495989680290222, "rewards/accuracies": 1.0, "rewards/chosen": -3.8188481994438916e-05, "rewards/margins": 0.15094749629497528, "rewards/rejected": -0.1509856879711151, "step": 8587 }, { "epoch": 5.939142461964039, "grad_norm": 6.3074140548706055, "learning_rate": 2.256031965575534e-05, "log_odds_chosen": 9.820735931396484, "log_odds_ratio": -0.0016581026138737798, "logits/chosen": -0.4913124144077301, "logits/rejected": -0.3220345377922058, "logps/chosen": -0.0011917630909010768, "logps/rejected": -1.8636376857757568, "loss": 0.5282, "nll_loss": 0.13187173008918762, "rewards/accuracies": 1.0, "rewards/chosen": -0.00011917632218683138, "rewards/margins": 0.18624460697174072, "rewards/rejected": -0.18636377155780792, "step": 8588 }, { "epoch": 5.939834024896266, "grad_norm": 5.304167747497559, "learning_rate": 2.2556477639465193e-05, "log_odds_chosen": 9.788444519042969, "log_odds_ratio": -0.000702496210578829, "logits/chosen": -0.5710271000862122, "logits/rejected": -0.5970733165740967, "logps/chosen": -0.0012307936558499932, "logps/rejected": -2.3701136112213135, "loss": 1.469, "nll_loss": 0.3671877980232239, "rewards/accuracies": 1.0, "rewards/chosen": -0.00012307935685385019, "rewards/margins": 0.23688827455043793, "rewards/rejected": -0.2370113730430603, "step": 8589 }, { "epoch": 5.940525587828493, "grad_norm": 7.113852500915527, "learning_rate": 2.2552635623175045e-05, "log_odds_chosen": 10.384570121765137, "log_odds_ratio": -0.00011506390001159161, "logits/chosen": -0.6606665849685669, "logits/rejected": -0.6632811427116394, "logps/chosen": -0.0007222711574286222, "logps/rejected": -2.103870391845703, "loss": 0.9292, "nll_loss": 0.23229539394378662, "rewards/accuracies": 1.0, "rewards/chosen": -7.222710701171309e-05, "rewards/margins": 0.2103148102760315, "rewards/rejected": -0.21038705110549927, "step": 8590 }, { "epoch": 5.941217150760719, "grad_norm": 11.421980857849121, "learning_rate": 2.2548793606884894e-05, "log_odds_chosen": 10.75039291381836, "log_odds_ratio": -0.00022272029309533536, "logits/chosen": -0.3645836412906647, "logits/rejected": -0.5092456936836243, "logps/chosen": -0.0002936196979135275, "logps/rejected": -2.463463068008423, "loss": 0.6619, "nll_loss": 0.1654607355594635, "rewards/accuracies": 1.0, "rewards/chosen": -2.9361966880969703e-05, "rewards/margins": 0.24631696939468384, "rewards/rejected": -0.24634632468223572, "step": 8591 }, { "epoch": 5.941908713692946, "grad_norm": 5.813602447509766, "learning_rate": 2.2544951590594747e-05, "log_odds_chosen": 11.08498477935791, "log_odds_ratio": -3.317241498734802e-05, "logits/chosen": -0.29458001255989075, "logits/rejected": -0.34045854210853577, "logps/chosen": -0.0003151995479129255, "logps/rejected": -2.317119836807251, "loss": 0.6591, "nll_loss": 0.16476860642433167, "rewards/accuracies": 1.0, "rewards/chosen": -3.151995406369679e-05, "rewards/margins": 0.23168045282363892, "rewards/rejected": -0.2317119836807251, "step": 8592 }, { "epoch": 5.942600276625173, "grad_norm": 5.240687847137451, "learning_rate": 2.2541109574304596e-05, "log_odds_chosen": 10.848184585571289, "log_odds_ratio": -0.0002010673051699996, "logits/chosen": -0.7847503423690796, "logits/rejected": -0.8887568116188049, "logps/chosen": -0.000890504801645875, "logps/rejected": -2.394976854324341, "loss": 1.0431, "nll_loss": 0.2607666254043579, "rewards/accuracies": 1.0, "rewards/chosen": -8.905048161977902e-05, "rewards/margins": 0.23940865695476532, "rewards/rejected": -0.23949770629405975, "step": 8593 }, { "epoch": 5.9432918395574, "grad_norm": 7.4959716796875, "learning_rate": 2.2537267558014445e-05, "log_odds_chosen": 11.396228790283203, "log_odds_ratio": -1.6502690414199606e-05, "logits/chosen": -0.25917848944664, "logits/rejected": -0.3350893259048462, "logps/chosen": -0.0001408563693985343, "logps/rejected": -2.543832540512085, "loss": 0.5539, "nll_loss": 0.13848382234573364, "rewards/accuracies": 1.0, "rewards/chosen": -1.408563730365131e-05, "rewards/margins": 0.25436916947364807, "rewards/rejected": -0.25438326597213745, "step": 8594 }, { "epoch": 5.943983402489627, "grad_norm": 4.75478982925415, "learning_rate": 2.2533425541724297e-05, "log_odds_chosen": 11.02970027923584, "log_odds_ratio": -3.782029671128839e-05, "logits/chosen": -0.7902827262878418, "logits/rejected": -0.83676677942276, "logps/chosen": -0.00020545838924590498, "logps/rejected": -2.285313129425049, "loss": 0.4411, "nll_loss": 0.11028016358613968, "rewards/accuracies": 1.0, "rewards/chosen": -2.0545838196994737e-05, "rewards/margins": 0.2285107672214508, "rewards/rejected": -0.22853130102157593, "step": 8595 }, { "epoch": 5.944674965421854, "grad_norm": 9.864461898803711, "learning_rate": 2.252958352543415e-05, "log_odds_chosen": 11.542867660522461, "log_odds_ratio": -1.895393324957695e-05, "logits/chosen": -0.022642409428954124, "logits/rejected": -0.0558503232896328, "logps/chosen": -0.00012987994705326855, "logps/rejected": -2.4602818489074707, "loss": 0.7878, "nll_loss": 0.1969459503889084, "rewards/accuracies": 1.0, "rewards/chosen": -1.2987994523427915e-05, "rewards/margins": 0.24601522088050842, "rewards/rejected": -0.24602821469306946, "step": 8596 }, { "epoch": 5.94536652835408, "grad_norm": 5.678092002868652, "learning_rate": 2.2525741509144e-05, "log_odds_chosen": 8.817580223083496, "log_odds_ratio": -0.0009618825279176235, "logits/chosen": -0.4064343571662903, "logits/rejected": -0.4465927481651306, "logps/chosen": -0.0013117840280756354, "logps/rejected": -1.1665174961090088, "loss": 0.5906, "nll_loss": 0.1475575864315033, "rewards/accuracies": 1.0, "rewards/chosen": -0.0001311783998971805, "rewards/margins": 0.11652056127786636, "rewards/rejected": -0.1166517361998558, "step": 8597 }, { "epoch": 5.946058091286307, "grad_norm": 8.367502212524414, "learning_rate": 2.252189949285385e-05, "log_odds_chosen": 11.335031509399414, "log_odds_ratio": -3.1897066946839914e-05, "logits/chosen": -0.8433002233505249, "logits/rejected": -0.8570671081542969, "logps/chosen": -0.00025656697107478976, "logps/rejected": -2.491272211074829, "loss": 1.2058, "nll_loss": 0.30144941806793213, "rewards/accuracies": 1.0, "rewards/chosen": -2.5656696379883215e-05, "rewards/margins": 0.24910154938697815, "rewards/rejected": -0.24912720918655396, "step": 8598 }, { "epoch": 5.946749654218534, "grad_norm": 7.0525803565979, "learning_rate": 2.25180574765637e-05, "log_odds_chosen": 9.210527420043945, "log_odds_ratio": -0.0020838002674281597, "logits/chosen": -0.15819773077964783, "logits/rejected": -0.19214797019958496, "logps/chosen": -0.005358480848371983, "logps/rejected": -1.5282800197601318, "loss": 1.1511, "nll_loss": 0.28755632042884827, "rewards/accuracies": 1.0, "rewards/chosen": -0.0005358480848371983, "rewards/margins": 0.1522921621799469, "rewards/rejected": -0.15282802283763885, "step": 8599 }, { "epoch": 5.947441217150761, "grad_norm": 20.070941925048828, "learning_rate": 2.2514215460273553e-05, "log_odds_chosen": 10.128929138183594, "log_odds_ratio": -9.235789184458554e-05, "logits/chosen": -0.43759238719940186, "logits/rejected": -0.4546014964580536, "logps/chosen": -0.0004204876022413373, "logps/rejected": -2.027372121810913, "loss": 0.7217, "nll_loss": 0.18042320013046265, "rewards/accuracies": 1.0, "rewards/chosen": -4.204875949653797e-05, "rewards/margins": 0.20269516110420227, "rewards/rejected": -0.2027372121810913, "step": 8600 }, { "epoch": 5.948132780082988, "grad_norm": 9.916879653930664, "learning_rate": 2.2510373443983405e-05, "log_odds_chosen": 11.070162773132324, "log_odds_ratio": -4.012341742054559e-05, "logits/chosen": -1.012428641319275, "logits/rejected": -1.0315272808074951, "logps/chosen": -0.0001498181081842631, "logps/rejected": -1.9242411851882935, "loss": 0.5297, "nll_loss": 0.13243013620376587, "rewards/accuracies": 1.0, "rewards/chosen": -1.4981809727032669e-05, "rewards/margins": 0.19240912795066833, "rewards/rejected": -0.19242411851882935, "step": 8601 }, { "epoch": 5.948824343015215, "grad_norm": 12.090327262878418, "learning_rate": 2.2506531427693254e-05, "log_odds_chosen": 10.315339088439941, "log_odds_ratio": -0.0001418688625562936, "logits/chosen": -0.34358614683151245, "logits/rejected": -0.4839419722557068, "logps/chosen": -0.0007913429872132838, "logps/rejected": -1.9713385105133057, "loss": 0.6987, "nll_loss": 0.17466667294502258, "rewards/accuracies": 1.0, "rewards/chosen": -7.913430454209447e-05, "rewards/margins": 0.19705472886562347, "rewards/rejected": -0.197133868932724, "step": 8602 }, { "epoch": 5.949515905947441, "grad_norm": 5.3857808113098145, "learning_rate": 2.2502689411403107e-05, "log_odds_chosen": 10.192558288574219, "log_odds_ratio": -0.00026585307205095887, "logits/chosen": -0.8433107733726501, "logits/rejected": -0.9449383020401001, "logps/chosen": -0.0006162350182421505, "logps/rejected": -1.9687180519104004, "loss": 0.4978, "nll_loss": 0.12442828714847565, "rewards/accuracies": 1.0, "rewards/chosen": -6.16235047345981e-05, "rewards/margins": 0.19681017100811005, "rewards/rejected": -0.1968718022108078, "step": 8603 }, { "epoch": 5.950207468879668, "grad_norm": 6.468098163604736, "learning_rate": 2.2498847395112956e-05, "log_odds_chosen": 9.801401138305664, "log_odds_ratio": -0.00025009317323565483, "logits/chosen": -0.6675868034362793, "logits/rejected": -0.6527258157730103, "logps/chosen": -0.0007151628378778696, "logps/rejected": -2.322625160217285, "loss": 0.7434, "nll_loss": 0.18583112955093384, "rewards/accuracies": 1.0, "rewards/chosen": -7.151628960855305e-05, "rewards/margins": 0.23219099640846252, "rewards/rejected": -0.232262521982193, "step": 8604 }, { "epoch": 5.950899031811895, "grad_norm": 5.987737655639648, "learning_rate": 2.2495005378822805e-05, "log_odds_chosen": 10.477818489074707, "log_odds_ratio": -7.430124969687313e-05, "logits/chosen": -0.05224364250898361, "logits/rejected": -0.051796793937683105, "logps/chosen": -0.0001971510355360806, "logps/rejected": -2.1585772037506104, "loss": 1.3965, "nll_loss": 0.3491280972957611, "rewards/accuracies": 1.0, "rewards/chosen": -1.9715105736395344e-05, "rewards/margins": 0.2158380150794983, "rewards/rejected": -0.21585772931575775, "step": 8605 }, { "epoch": 5.951590594744122, "grad_norm": 6.161578178405762, "learning_rate": 2.2491163362532657e-05, "log_odds_chosen": 10.42709732055664, "log_odds_ratio": -6.25356042291969e-05, "logits/chosen": 0.113109290599823, "logits/rejected": 0.06976966559886932, "logps/chosen": -0.0025051303673535585, "logps/rejected": -2.5732247829437256, "loss": 0.6081, "nll_loss": 0.15200835466384888, "rewards/accuracies": 1.0, "rewards/chosen": -0.00025051305419765413, "rewards/margins": 0.25707194209098816, "rewards/rejected": -0.2573224604129791, "step": 8606 }, { "epoch": 5.952282157676349, "grad_norm": 15.835515975952148, "learning_rate": 2.248732134624251e-05, "log_odds_chosen": 9.979249000549316, "log_odds_ratio": -0.0002866145805455744, "logits/chosen": -0.7599179148674011, "logits/rejected": -0.7421848773956299, "logps/chosen": -0.000998812378384173, "logps/rejected": -1.7596694231033325, "loss": 1.1156, "nll_loss": 0.27886295318603516, "rewards/accuracies": 1.0, "rewards/chosen": -9.98812320176512e-05, "rewards/margins": 0.17586706578731537, "rewards/rejected": -0.17596694827079773, "step": 8607 }, { "epoch": 5.9529737206085755, "grad_norm": 6.893677711486816, "learning_rate": 2.248347932995236e-05, "log_odds_chosen": 11.070934295654297, "log_odds_ratio": -0.00021689744608011097, "logits/chosen": -0.5308154821395874, "logits/rejected": -0.5692165493965149, "logps/chosen": -0.0005209331284277141, "logps/rejected": -1.9994919300079346, "loss": 0.6829, "nll_loss": 0.17069843411445618, "rewards/accuracies": 1.0, "rewards/chosen": -5.2093309932388365e-05, "rewards/margins": 0.19989712536334991, "rewards/rejected": -0.1999492198228836, "step": 8608 }, { "epoch": 5.953665283540802, "grad_norm": 9.771283149719238, "learning_rate": 2.247963731366221e-05, "log_odds_chosen": 7.985379695892334, "log_odds_ratio": -0.13277308642864227, "logits/chosen": -0.8190820217132568, "logits/rejected": -0.7509307861328125, "logps/chosen": -0.026225317269563675, "logps/rejected": -1.2286534309387207, "loss": 0.8013, "nll_loss": 0.1870512068271637, "rewards/accuracies": 0.875, "rewards/chosen": -0.002622531494125724, "rewards/margins": 0.12024280428886414, "rewards/rejected": -0.12286533415317535, "step": 8609 }, { "epoch": 5.954356846473029, "grad_norm": 9.570266723632812, "learning_rate": 2.2475795297372064e-05, "log_odds_chosen": 11.255922317504883, "log_odds_ratio": -3.132349957013503e-05, "logits/chosen": -0.6426629424095154, "logits/rejected": -0.7214614152908325, "logps/chosen": -0.00036300989449955523, "logps/rejected": -2.28373646736145, "loss": 0.6506, "nll_loss": 0.16263887286186218, "rewards/accuracies": 1.0, "rewards/chosen": -3.630098944995552e-05, "rewards/margins": 0.228337362408638, "rewards/rejected": -0.22837364673614502, "step": 8610 }, { "epoch": 5.955048409405256, "grad_norm": 4.853519439697266, "learning_rate": 2.2471953281081913e-05, "log_odds_chosen": 9.830463409423828, "log_odds_ratio": -0.00016724743181839585, "logits/chosen": -0.3163721561431885, "logits/rejected": -0.33467578887939453, "logps/chosen": -0.0005570831708610058, "logps/rejected": -1.6250646114349365, "loss": 0.7881, "nll_loss": 0.19701674580574036, "rewards/accuracies": 1.0, "rewards/chosen": -5.57083185412921e-05, "rewards/margins": 0.16245076060295105, "rewards/rejected": -0.16250646114349365, "step": 8611 }, { "epoch": 5.955739972337483, "grad_norm": 7.442193031311035, "learning_rate": 2.2468111264791765e-05, "log_odds_chosen": 10.984609603881836, "log_odds_ratio": -3.6467157769948244e-05, "logits/chosen": -0.8650991916656494, "logits/rejected": -0.8918881416320801, "logps/chosen": -0.0008259970345534384, "logps/rejected": -2.5060737133026123, "loss": 0.8518, "nll_loss": 0.21293659508228302, "rewards/accuracies": 1.0, "rewards/chosen": -8.259969763457775e-05, "rewards/margins": 0.2505247890949249, "rewards/rejected": -0.2506074011325836, "step": 8612 }, { "epoch": 5.95643153526971, "grad_norm": 6.5263543128967285, "learning_rate": 2.2464269248501614e-05, "log_odds_chosen": 10.611418724060059, "log_odds_ratio": -8.558265108149499e-05, "logits/chosen": -0.4117690920829773, "logits/rejected": -0.4803375005722046, "logps/chosen": -0.00030033683287911117, "logps/rejected": -2.3147926330566406, "loss": 0.5673, "nll_loss": 0.14182326197624207, "rewards/accuracies": 1.0, "rewards/chosen": -3.0033686925889924e-05, "rewards/margins": 0.23144923150539398, "rewards/rejected": -0.23147927224636078, "step": 8613 }, { "epoch": 5.9571230982019365, "grad_norm": 5.585168361663818, "learning_rate": 2.2460427232211463e-05, "log_odds_chosen": 10.020788192749023, "log_odds_ratio": -0.0020185827743262053, "logits/chosen": -0.7417982220649719, "logits/rejected": -0.8316002488136292, "logps/chosen": -0.0019587883725762367, "logps/rejected": -1.7383875846862793, "loss": 0.9549, "nll_loss": 0.23852689564228058, "rewards/accuracies": 1.0, "rewards/chosen": -0.0001958788780029863, "rewards/margins": 0.17364290356636047, "rewards/rejected": -0.1738387793302536, "step": 8614 }, { "epoch": 5.957814661134163, "grad_norm": 6.802754878997803, "learning_rate": 2.2456585215921316e-05, "log_odds_chosen": 10.667003631591797, "log_odds_ratio": -9.732814942253754e-05, "logits/chosen": -0.502316951751709, "logits/rejected": -0.5313187837600708, "logps/chosen": -0.0002029087336268276, "logps/rejected": -2.169607639312744, "loss": 0.9616, "nll_loss": 0.24038337171077728, "rewards/accuracies": 1.0, "rewards/chosen": -2.029087409027852e-05, "rewards/margins": 0.2169404774904251, "rewards/rejected": -0.21696075797080994, "step": 8615 }, { "epoch": 5.95850622406639, "grad_norm": 4.8081183433532715, "learning_rate": 2.2452743199631168e-05, "log_odds_chosen": 10.98234748840332, "log_odds_ratio": -0.00042538848356343806, "logits/chosen": -0.437279611825943, "logits/rejected": -0.5226849317550659, "logps/chosen": -0.0008870043675415218, "logps/rejected": -2.479304790496826, "loss": 0.5971, "nll_loss": 0.14922332763671875, "rewards/accuracies": 1.0, "rewards/chosen": -8.870044257491827e-05, "rewards/margins": 0.24784177541732788, "rewards/rejected": -0.24793048202991486, "step": 8616 }, { "epoch": 5.959197786998617, "grad_norm": 4.302028656005859, "learning_rate": 2.2448901183341017e-05, "log_odds_chosen": 9.503775596618652, "log_odds_ratio": -0.00013842491898685694, "logits/chosen": -0.5067854523658752, "logits/rejected": -0.5400323867797852, "logps/chosen": -0.00017005865811370313, "logps/rejected": -1.234816074371338, "loss": 0.5709, "nll_loss": 0.1427188664674759, "rewards/accuracies": 1.0, "rewards/chosen": -1.7005866538966075e-05, "rewards/margins": 0.12346460670232773, "rewards/rejected": -0.12348160147666931, "step": 8617 }, { "epoch": 5.959889349930844, "grad_norm": 9.732542991638184, "learning_rate": 2.244505916705087e-05, "log_odds_chosen": 10.742938041687012, "log_odds_ratio": -0.0018327643629163504, "logits/chosen": -0.1278042197227478, "logits/rejected": -0.18076513707637787, "logps/chosen": -0.0014679968589916825, "logps/rejected": -2.591768264770508, "loss": 0.7851, "nll_loss": 0.19609123468399048, "rewards/accuracies": 1.0, "rewards/chosen": -0.00014679969171993434, "rewards/margins": 0.2590300738811493, "rewards/rejected": -0.2591768503189087, "step": 8618 }, { "epoch": 5.960580912863071, "grad_norm": 6.502620697021484, "learning_rate": 2.2441217150760722e-05, "log_odds_chosen": 10.331536293029785, "log_odds_ratio": -0.00011660806922009215, "logits/chosen": -0.10579995810985565, "logits/rejected": -0.11191463470458984, "logps/chosen": -0.0003946416836697608, "logps/rejected": -1.7601673603057861, "loss": 0.8607, "nll_loss": 0.21515217423439026, "rewards/accuracies": 1.0, "rewards/chosen": -3.946416836697608e-05, "rewards/margins": 0.17597728967666626, "rewards/rejected": -0.17601674795150757, "step": 8619 }, { "epoch": 5.9612724757952975, "grad_norm": 17.422819137573242, "learning_rate": 2.243737513447057e-05, "log_odds_chosen": 11.365373611450195, "log_odds_ratio": -2.036244040937163e-05, "logits/chosen": 0.17082726955413818, "logits/rejected": 0.10050006210803986, "logps/chosen": -0.0002604515175335109, "logps/rejected": -2.61152982711792, "loss": 0.8965, "nll_loss": 0.22412513196468353, "rewards/accuracies": 1.0, "rewards/chosen": -2.6045154299936257e-05, "rewards/margins": 0.26112696528434753, "rewards/rejected": -0.2611530125141144, "step": 8620 }, { "epoch": 5.961964038727524, "grad_norm": 6.855748176574707, "learning_rate": 2.2433533118180424e-05, "log_odds_chosen": 10.539216041564941, "log_odds_ratio": -9.504984336672351e-05, "logits/chosen": -0.2191634476184845, "logits/rejected": -0.2327946424484253, "logps/chosen": -0.00025539161288179457, "logps/rejected": -1.910869836807251, "loss": 0.6591, "nll_loss": 0.16477471590042114, "rewards/accuracies": 1.0, "rewards/chosen": -2.5539160560583696e-05, "rewards/margins": 0.19106145203113556, "rewards/rejected": -0.19108697772026062, "step": 8621 }, { "epoch": 5.962655601659751, "grad_norm": 9.039917945861816, "learning_rate": 2.2429691101890273e-05, "log_odds_chosen": 10.876553535461426, "log_odds_ratio": -4.704829188995063e-05, "logits/chosen": -0.6791223883628845, "logits/rejected": -0.7270498871803284, "logps/chosen": -0.00022510235430672765, "logps/rejected": -2.25089168548584, "loss": 0.7708, "nll_loss": 0.19269677996635437, "rewards/accuracies": 1.0, "rewards/chosen": -2.2510233975481242e-05, "rewards/margins": 0.2250666469335556, "rewards/rejected": -0.2250891625881195, "step": 8622 }, { "epoch": 5.963347164591978, "grad_norm": 16.28369903564453, "learning_rate": 2.2425849085600122e-05, "log_odds_chosen": 11.484891891479492, "log_odds_ratio": -2.6043024263344705e-05, "logits/chosen": -0.1581682711839676, "logits/rejected": -0.216511532664299, "logps/chosen": -0.0003339290269650519, "logps/rejected": -3.420424699783325, "loss": 1.0881, "nll_loss": 0.27203014492988586, "rewards/accuracies": 1.0, "rewards/chosen": -3.3392905606888235e-05, "rewards/margins": 0.3420090675354004, "rewards/rejected": -0.342042475938797, "step": 8623 }, { "epoch": 5.964038727524205, "grad_norm": 7.26689338684082, "learning_rate": 2.2422007069309974e-05, "log_odds_chosen": 10.805521011352539, "log_odds_ratio": -4.978823562851176e-05, "logits/chosen": -0.5056122541427612, "logits/rejected": -0.6053757667541504, "logps/chosen": -0.00015571873518638313, "logps/rejected": -2.0780341625213623, "loss": 0.8315, "nll_loss": 0.2078588306903839, "rewards/accuracies": 1.0, "rewards/chosen": -1.5571875337627716e-05, "rewards/margins": 0.20778784155845642, "rewards/rejected": -0.20780342817306519, "step": 8624 }, { "epoch": 5.964730290456432, "grad_norm": 8.424686431884766, "learning_rate": 2.2418165053019827e-05, "log_odds_chosen": 10.124658584594727, "log_odds_ratio": -0.0005912402411922812, "logits/chosen": -0.28676581382751465, "logits/rejected": -0.39113837480545044, "logps/chosen": -0.006734848488122225, "logps/rejected": -2.4362435340881348, "loss": 0.6915, "nll_loss": 0.17280808091163635, "rewards/accuracies": 1.0, "rewards/chosen": -0.0006734849303029478, "rewards/margins": 0.24295085668563843, "rewards/rejected": -0.24362435936927795, "step": 8625 }, { "epoch": 5.9654218533886585, "grad_norm": 8.816883087158203, "learning_rate": 2.2414323036729676e-05, "log_odds_chosen": 10.74892807006836, "log_odds_ratio": -9.71397093962878e-05, "logits/chosen": -0.7456105947494507, "logits/rejected": -0.7920002937316895, "logps/chosen": -0.0003633729356806725, "logps/rejected": -2.1066083908081055, "loss": 0.4359, "nll_loss": 0.10895287245512009, "rewards/accuracies": 1.0, "rewards/chosen": -3.633729284047149e-05, "rewards/margins": 0.21062450110912323, "rewards/rejected": -0.21066084504127502, "step": 8626 }, { "epoch": 5.966113416320885, "grad_norm": 9.672571182250977, "learning_rate": 2.2410481020439528e-05, "log_odds_chosen": 10.556832313537598, "log_odds_ratio": -0.00010228557221125811, "logits/chosen": -0.39597249031066895, "logits/rejected": -0.4569196105003357, "logps/chosen": -0.0004282575682736933, "logps/rejected": -2.1169612407684326, "loss": 0.648, "nll_loss": 0.1619986593723297, "rewards/accuracies": 1.0, "rewards/chosen": -4.282575537217781e-05, "rewards/margins": 0.21165330708026886, "rewards/rejected": -0.21169611811637878, "step": 8627 }, { "epoch": 5.966804979253112, "grad_norm": 9.692134857177734, "learning_rate": 2.240663900414938e-05, "log_odds_chosen": 10.527505874633789, "log_odds_ratio": -8.708895620657131e-05, "logits/chosen": -0.09428629279136658, "logits/rejected": -0.1848604679107666, "logps/chosen": -0.0017188250785693526, "logps/rejected": -1.9022977352142334, "loss": 0.9876, "nll_loss": 0.24690304696559906, "rewards/accuracies": 1.0, "rewards/chosen": -0.00017188250785693526, "rewards/margins": 0.1900579035282135, "rewards/rejected": -0.19022977352142334, "step": 8628 }, { "epoch": 5.967496542185339, "grad_norm": 7.546328067779541, "learning_rate": 2.240279698785923e-05, "log_odds_chosen": 10.628252983093262, "log_odds_ratio": -6.79682198096998e-05, "logits/chosen": -0.28366631269454956, "logits/rejected": -0.3606613874435425, "logps/chosen": -0.0003197102341800928, "logps/rejected": -1.9171504974365234, "loss": 1.0352, "nll_loss": 0.2587844133377075, "rewards/accuracies": 1.0, "rewards/chosen": -3.197102341800928e-05, "rewards/margins": 0.1916830837726593, "rewards/rejected": -0.1917150467634201, "step": 8629 }, { "epoch": 5.968188105117566, "grad_norm": 8.36005973815918, "learning_rate": 2.2398954971569082e-05, "log_odds_chosen": 9.56314468383789, "log_odds_ratio": -0.0005237284349277616, "logits/chosen": -0.3528299033641815, "logits/rejected": -0.39573559165000916, "logps/chosen": -0.001121058943681419, "logps/rejected": -1.7783137559890747, "loss": 0.9561, "nll_loss": 0.2389650046825409, "rewards/accuracies": 1.0, "rewards/chosen": -0.00011210589582333341, "rewards/margins": 0.17771926522254944, "rewards/rejected": -0.17783138155937195, "step": 8630 }, { "epoch": 5.968879668049793, "grad_norm": 8.814260482788086, "learning_rate": 2.239511295527893e-05, "log_odds_chosen": 10.795074462890625, "log_odds_ratio": -4.484070450416766e-05, "logits/chosen": -0.7871134281158447, "logits/rejected": -0.8398634791374207, "logps/chosen": -0.0004096375487279147, "logps/rejected": -2.22101092338562, "loss": 0.6136, "nll_loss": 0.15338875353336334, "rewards/accuracies": 1.0, "rewards/chosen": -4.096375414519571e-05, "rewards/margins": 0.22206011414527893, "rewards/rejected": -0.22210107743740082, "step": 8631 }, { "epoch": 5.9695712309820195, "grad_norm": 7.860897064208984, "learning_rate": 2.239127093898878e-05, "log_odds_chosen": 11.818281173706055, "log_odds_ratio": -4.5246742956805974e-05, "logits/chosen": -0.0748319998383522, "logits/rejected": -0.21310384571552277, "logps/chosen": -0.00032474740874022245, "logps/rejected": -2.9677517414093018, "loss": 0.7533, "nll_loss": 0.1883193850517273, "rewards/accuracies": 1.0, "rewards/chosen": -3.247474160161801e-05, "rewards/margins": 0.29674267768859863, "rewards/rejected": -0.2967751622200012, "step": 8632 }, { "epoch": 5.970262793914246, "grad_norm": 5.752249240875244, "learning_rate": 2.2387428922698633e-05, "log_odds_chosen": 9.994537353515625, "log_odds_ratio": -0.0003835707320831716, "logits/chosen": -0.5630497932434082, "logits/rejected": -0.6039668917655945, "logps/chosen": -0.0008304682560265064, "logps/rejected": -1.9116523265838623, "loss": 0.6675, "nll_loss": 0.16682936251163483, "rewards/accuracies": 1.0, "rewards/chosen": -8.304682705784217e-05, "rewards/margins": 0.1910821944475174, "rewards/rejected": -0.1911652386188507, "step": 8633 }, { "epoch": 5.970954356846473, "grad_norm": 6.303492546081543, "learning_rate": 2.2383586906408485e-05, "log_odds_chosen": 11.492966651916504, "log_odds_ratio": -2.2081108909333125e-05, "logits/chosen": -0.4860258102416992, "logits/rejected": -0.5048642754554749, "logps/chosen": -0.00012483232421800494, "logps/rejected": -2.422050952911377, "loss": 0.6797, "nll_loss": 0.1699298918247223, "rewards/accuracies": 1.0, "rewards/chosen": -1.2483233149396256e-05, "rewards/margins": 0.2421925961971283, "rewards/rejected": -0.24220508337020874, "step": 8634 }, { "epoch": 5.9716459197787, "grad_norm": 7.249591827392578, "learning_rate": 2.2379744890118334e-05, "log_odds_chosen": 8.761195182800293, "log_odds_ratio": -0.004018676467239857, "logits/chosen": -0.49295467138290405, "logits/rejected": -0.5738300681114197, "logps/chosen": -0.002777382265776396, "logps/rejected": -1.8959558010101318, "loss": 0.6219, "nll_loss": 0.15507788956165314, "rewards/accuracies": 1.0, "rewards/chosen": -0.00027773823239840567, "rewards/margins": 0.18931785225868225, "rewards/rejected": -0.18959558010101318, "step": 8635 }, { "epoch": 5.972337482710927, "grad_norm": 10.255823135375977, "learning_rate": 2.2375902873828187e-05, "log_odds_chosen": 10.179533004760742, "log_odds_ratio": -0.00010110568109666929, "logits/chosen": -0.43774309754371643, "logits/rejected": -0.5146663188934326, "logps/chosen": -0.0007115602493286133, "logps/rejected": -2.2837624549865723, "loss": 1.0529, "nll_loss": 0.26320332288742065, "rewards/accuracies": 1.0, "rewards/chosen": -7.115602056728676e-05, "rewards/margins": 0.22830507159233093, "rewards/rejected": -0.22837623953819275, "step": 8636 }, { "epoch": 5.973029045643154, "grad_norm": 8.049625396728516, "learning_rate": 2.237206085753804e-05, "log_odds_chosen": 10.132574081420898, "log_odds_ratio": -0.00020164766465313733, "logits/chosen": -0.7841938734054565, "logits/rejected": -0.8315725922584534, "logps/chosen": -0.0002748143160715699, "logps/rejected": -1.7135040760040283, "loss": 0.5968, "nll_loss": 0.14917722344398499, "rewards/accuracies": 1.0, "rewards/chosen": -2.7481431970954873e-05, "rewards/margins": 0.17132292687892914, "rewards/rejected": -0.1713503897190094, "step": 8637 }, { "epoch": 5.9737206085753805, "grad_norm": 7.535101413726807, "learning_rate": 2.2368218841247888e-05, "log_odds_chosen": 9.183361053466797, "log_odds_ratio": -0.00047606491716578603, "logits/chosen": -0.4759877622127533, "logits/rejected": -0.47154396772384644, "logps/chosen": -0.0007837703451514244, "logps/rejected": -1.6742212772369385, "loss": 0.6127, "nll_loss": 0.15313656628131866, "rewards/accuracies": 1.0, "rewards/chosen": -7.837703742552549e-05, "rewards/margins": 0.16734375059604645, "rewards/rejected": -0.1674221307039261, "step": 8638 }, { "epoch": 5.974412171507607, "grad_norm": 7.383305072784424, "learning_rate": 2.236437682495774e-05, "log_odds_chosen": 11.191994667053223, "log_odds_ratio": -2.7318410502630286e-05, "logits/chosen": -0.5382693409919739, "logits/rejected": -0.5746403932571411, "logps/chosen": -0.00014897863729856908, "logps/rejected": -2.3866891860961914, "loss": 0.5678, "nll_loss": 0.14194512367248535, "rewards/accuracies": 1.0, "rewards/chosen": -1.4897865185048431e-05, "rewards/margins": 0.23865404725074768, "rewards/rejected": -0.23866893351078033, "step": 8639 }, { "epoch": 5.975103734439834, "grad_norm": 9.034134864807129, "learning_rate": 2.236053480866759e-05, "log_odds_chosen": 9.865920066833496, "log_odds_ratio": -0.05563132092356682, "logits/chosen": -0.5075576305389404, "logits/rejected": -0.5447198152542114, "logps/chosen": -0.01322740875184536, "logps/rejected": -2.043813467025757, "loss": 0.9815, "nll_loss": 0.23980922996997833, "rewards/accuracies": 1.0, "rewards/chosen": -0.0013227409217506647, "rewards/margins": 0.20305860042572021, "rewards/rejected": -0.2043813318014145, "step": 8640 }, { "epoch": 5.975795297372061, "grad_norm": 7.827591896057129, "learning_rate": 2.235669279237744e-05, "log_odds_chosen": 9.875395774841309, "log_odds_ratio": -0.002394024282693863, "logits/chosen": -0.35297128558158875, "logits/rejected": -0.3840131163597107, "logps/chosen": -0.0015253536403179169, "logps/rejected": -1.8439840078353882, "loss": 0.5606, "nll_loss": 0.1399049460887909, "rewards/accuracies": 1.0, "rewards/chosen": -0.00015253537276294082, "rewards/margins": 0.18424585461616516, "rewards/rejected": -0.18439838290214539, "step": 8641 }, { "epoch": 5.976486860304288, "grad_norm": 7.682641983032227, "learning_rate": 2.235285077608729e-05, "log_odds_chosen": 10.980426788330078, "log_odds_ratio": -3.709265365614556e-05, "logits/chosen": -0.13334612548351288, "logits/rejected": -0.23983854055404663, "logps/chosen": -0.0006211751606315374, "logps/rejected": -2.8142642974853516, "loss": 0.8566, "nll_loss": 0.21414557099342346, "rewards/accuracies": 1.0, "rewards/chosen": -6.211752042872831e-05, "rewards/margins": 0.2813643217086792, "rewards/rejected": -0.28142642974853516, "step": 8642 }, { "epoch": 5.977178423236515, "grad_norm": 8.214534759521484, "learning_rate": 2.2349008759797143e-05, "log_odds_chosen": 9.371925354003906, "log_odds_ratio": -0.002760364906862378, "logits/chosen": -0.04601850360631943, "logits/rejected": -0.13053913414478302, "logps/chosen": -0.0032472298480570316, "logps/rejected": -1.5316357612609863, "loss": 1.272, "nll_loss": 0.31772580742836, "rewards/accuracies": 1.0, "rewards/chosen": -0.0003247229615226388, "rewards/margins": 0.15283885598182678, "rewards/rejected": -0.1531635820865631, "step": 8643 }, { "epoch": 5.977869986168741, "grad_norm": 9.129252433776855, "learning_rate": 2.2345166743506993e-05, "log_odds_chosen": 10.828535079956055, "log_odds_ratio": -0.00030270888237282634, "logits/chosen": -0.5784143209457397, "logits/rejected": -0.5499647855758667, "logps/chosen": -0.0003934805281460285, "logps/rejected": -2.299145221710205, "loss": 0.7022, "nll_loss": 0.17550931870937347, "rewards/accuracies": 1.0, "rewards/chosen": -3.934805135941133e-05, "rewards/margins": 0.22987514734268188, "rewards/rejected": -0.22991451621055603, "step": 8644 }, { "epoch": 5.978561549100968, "grad_norm": 7.754681587219238, "learning_rate": 2.2341324727216845e-05, "log_odds_chosen": 10.025257110595703, "log_odds_ratio": -0.00018732018361333758, "logits/chosen": -0.36683574318885803, "logits/rejected": -0.5633817315101624, "logps/chosen": -0.0003875857510138303, "logps/rejected": -1.5009148120880127, "loss": 0.6898, "nll_loss": 0.1724189668893814, "rewards/accuracies": 1.0, "rewards/chosen": -3.875857873936184e-05, "rewards/margins": 0.1500527262687683, "rewards/rejected": -0.1500914990901947, "step": 8645 }, { "epoch": 5.979253112033195, "grad_norm": 9.99053955078125, "learning_rate": 2.2337482710926697e-05, "log_odds_chosen": 10.781888008117676, "log_odds_ratio": -5.4689018725184724e-05, "logits/chosen": -0.21867603063583374, "logits/rejected": -0.24089495837688446, "logps/chosen": -0.00023485012934543192, "logps/rejected": -2.4182658195495605, "loss": 0.7284, "nll_loss": 0.1821037083864212, "rewards/accuracies": 1.0, "rewards/chosen": -2.348501220694743e-05, "rewards/margins": 0.2418031096458435, "rewards/rejected": -0.241826593875885, "step": 8646 }, { "epoch": 5.979944674965422, "grad_norm": 5.023019313812256, "learning_rate": 2.2333640694636546e-05, "log_odds_chosen": 10.844634056091309, "log_odds_ratio": -3.536961230565794e-05, "logits/chosen": -0.45662564039230347, "logits/rejected": -0.47854191064834595, "logps/chosen": -0.00011072782217524946, "logps/rejected": -1.7490148544311523, "loss": 0.9009, "nll_loss": 0.22522106766700745, "rewards/accuracies": 1.0, "rewards/chosen": -1.1072783308918588e-05, "rewards/margins": 0.1748904138803482, "rewards/rejected": -0.17490148544311523, "step": 8647 }, { "epoch": 5.980636237897649, "grad_norm": 6.765304088592529, "learning_rate": 2.23297986783464e-05, "log_odds_chosen": 10.448127746582031, "log_odds_ratio": -0.00030654840520583093, "logits/chosen": -0.7699148058891296, "logits/rejected": -0.8414617776870728, "logps/chosen": -0.0004278847191017121, "logps/rejected": -1.927390217781067, "loss": 1.3203, "nll_loss": 0.3300439715385437, "rewards/accuracies": 1.0, "rewards/chosen": -4.2788473365362734e-05, "rewards/margins": 0.1926962435245514, "rewards/rejected": -0.19273902475833893, "step": 8648 }, { "epoch": 5.981327800829876, "grad_norm": 13.171281814575195, "learning_rate": 2.2325956662056248e-05, "log_odds_chosen": 9.756038665771484, "log_odds_ratio": -0.00038166638114489615, "logits/chosen": -0.5013332962989807, "logits/rejected": -0.5654415488243103, "logps/chosen": -0.0009114966378547251, "logps/rejected": -2.246232032775879, "loss": 1.1029, "nll_loss": 0.27567875385284424, "rewards/accuracies": 1.0, "rewards/chosen": -9.114966815104708e-05, "rewards/margins": 0.22453203797340393, "rewards/rejected": -0.2246231883764267, "step": 8649 }, { "epoch": 5.982019363762102, "grad_norm": 7.266759872436523, "learning_rate": 2.2322114645766097e-05, "log_odds_chosen": 10.222375869750977, "log_odds_ratio": -0.0001875264715636149, "logits/chosen": -0.2591787874698639, "logits/rejected": -0.2960703372955322, "logps/chosen": -0.0010339757427573204, "logps/rejected": -2.102975606918335, "loss": 0.5865, "nll_loss": 0.14661476016044617, "rewards/accuracies": 1.0, "rewards/chosen": -0.000103397571365349, "rewards/margins": 0.2101941555738449, "rewards/rejected": -0.2102975696325302, "step": 8650 }, { "epoch": 5.982710926694329, "grad_norm": 9.63925838470459, "learning_rate": 2.231827262947595e-05, "log_odds_chosen": 10.470054626464844, "log_odds_ratio": -0.00010331722296541557, "logits/chosen": -0.034905824810266495, "logits/rejected": -0.030192043632268906, "logps/chosen": -0.0003535951836965978, "logps/rejected": -1.8167335987091064, "loss": 0.5858, "nll_loss": 0.1464492231607437, "rewards/accuracies": 1.0, "rewards/chosen": -3.5359520552447066e-05, "rewards/margins": 0.18163800239562988, "rewards/rejected": -0.1816733479499817, "step": 8651 }, { "epoch": 5.983402489626556, "grad_norm": 6.006370544433594, "learning_rate": 2.2314430613185802e-05, "log_odds_chosen": 9.1875638961792, "log_odds_ratio": -0.00034047331428155303, "logits/chosen": -0.3792960047721863, "logits/rejected": -0.41480278968811035, "logps/chosen": -0.006437941920012236, "logps/rejected": -2.0599706172943115, "loss": 0.8695, "nll_loss": 0.21734338998794556, "rewards/accuracies": 1.0, "rewards/chosen": -0.0006437941920012236, "rewards/margins": 0.2053532749414444, "rewards/rejected": -0.2059970647096634, "step": 8652 }, { "epoch": 5.984094052558783, "grad_norm": 4.718745708465576, "learning_rate": 2.231058859689565e-05, "log_odds_chosen": 9.810354232788086, "log_odds_ratio": -0.00027896571555174887, "logits/chosen": -0.6763026714324951, "logits/rejected": -0.6438024044036865, "logps/chosen": -0.0017682433826848865, "logps/rejected": -1.700201392173767, "loss": 0.4415, "nll_loss": 0.11035287380218506, "rewards/accuracies": 1.0, "rewards/chosen": -0.00017682435282040387, "rewards/margins": 0.16984331607818604, "rewards/rejected": -0.17002014815807343, "step": 8653 }, { "epoch": 5.98478561549101, "grad_norm": 9.258583068847656, "learning_rate": 2.2306746580605503e-05, "log_odds_chosen": 11.272083282470703, "log_odds_ratio": -2.6522073312662542e-05, "logits/chosen": -0.2892472445964813, "logits/rejected": -0.37034428119659424, "logps/chosen": -9.99235053313896e-05, "logps/rejected": -2.1363444328308105, "loss": 0.7271, "nll_loss": 0.18177568912506104, "rewards/accuracies": 1.0, "rewards/chosen": -9.99234998744214e-06, "rewards/margins": 0.21362446248531342, "rewards/rejected": -0.2136344462633133, "step": 8654 }, { "epoch": 5.985477178423237, "grad_norm": 6.793190956115723, "learning_rate": 2.2302904564315356e-05, "log_odds_chosen": 10.94711971282959, "log_odds_ratio": -0.0002915470104198903, "logits/chosen": 0.11246512830257416, "logits/rejected": -0.04465283453464508, "logps/chosen": -0.0031524840742349625, "logps/rejected": -3.0115573406219482, "loss": 0.7859, "nll_loss": 0.1964433640241623, "rewards/accuracies": 1.0, "rewards/chosen": -0.00031524847145192325, "rewards/margins": 0.30084046721458435, "rewards/rejected": -0.3011557459831238, "step": 8655 }, { "epoch": 5.986168741355463, "grad_norm": 5.1810150146484375, "learning_rate": 2.2299062548025205e-05, "log_odds_chosen": 10.347426414489746, "log_odds_ratio": -0.00036247429670765996, "logits/chosen": -0.21458715200424194, "logits/rejected": -0.24404297769069672, "logps/chosen": -0.00046909027150832117, "logps/rejected": -2.0660440921783447, "loss": 0.6473, "nll_loss": 0.16179302334785461, "rewards/accuracies": 1.0, "rewards/chosen": -4.690902278525755e-05, "rewards/margins": 0.2065575122833252, "rewards/rejected": -0.20660439133644104, "step": 8656 }, { "epoch": 5.98686030428769, "grad_norm": 7.241073131561279, "learning_rate": 2.2295220531735057e-05, "log_odds_chosen": 10.880849838256836, "log_odds_ratio": -0.00020453293109312654, "logits/chosen": -0.7550759315490723, "logits/rejected": -0.7828030586242676, "logps/chosen": -0.00047937879571691155, "logps/rejected": -2.152524709701538, "loss": 0.8083, "nll_loss": 0.20206305384635925, "rewards/accuracies": 1.0, "rewards/chosen": -4.7937879571691155e-05, "rewards/margins": 0.21520453691482544, "rewards/rejected": -0.21525248885154724, "step": 8657 }, { "epoch": 5.987551867219917, "grad_norm": 7.950253009796143, "learning_rate": 2.2291378515444906e-05, "log_odds_chosen": 10.849205017089844, "log_odds_ratio": -4.903499939246103e-05, "logits/chosen": -0.5559477210044861, "logits/rejected": -0.5406090021133423, "logps/chosen": -0.007074021268635988, "logps/rejected": -2.487555503845215, "loss": 1.2176, "nll_loss": 0.3043956458568573, "rewards/accuracies": 1.0, "rewards/chosen": -0.0007074021268635988, "rewards/margins": 0.24804814159870148, "rewards/rejected": -0.248755544424057, "step": 8658 }, { "epoch": 5.988243430152144, "grad_norm": 6.710446357727051, "learning_rate": 2.2287536499154755e-05, "log_odds_chosen": 10.762208938598633, "log_odds_ratio": -0.0006994472933001816, "logits/chosen": -0.3678176999092102, "logits/rejected": -0.47703123092651367, "logps/chosen": -0.0007495337049476802, "logps/rejected": -2.196031332015991, "loss": 1.1679, "nll_loss": 0.291897714138031, "rewards/accuracies": 1.0, "rewards/chosen": -7.495337194995955e-05, "rewards/margins": 0.2195281684398651, "rewards/rejected": -0.21960312128067017, "step": 8659 }, { "epoch": 5.988934993084371, "grad_norm": 7.118763446807861, "learning_rate": 2.2283694482864608e-05, "log_odds_chosen": 11.913274765014648, "log_odds_ratio": -4.843656643060967e-05, "logits/chosen": -0.5528740882873535, "logits/rejected": -0.6186320781707764, "logps/chosen": -0.0005820526275783777, "logps/rejected": -3.5821590423583984, "loss": 0.6879, "nll_loss": 0.17196498811244965, "rewards/accuracies": 1.0, "rewards/chosen": -5.820525984745473e-05, "rewards/margins": 0.3581577241420746, "rewards/rejected": -0.35821592807769775, "step": 8660 }, { "epoch": 5.9896265560165975, "grad_norm": 10.976981163024902, "learning_rate": 2.227985246657446e-05, "log_odds_chosen": 10.413000106811523, "log_odds_ratio": -0.00011384957906557247, "logits/chosen": -0.6883463859558105, "logits/rejected": -0.7387396693229675, "logps/chosen": -0.00041092990431934595, "logps/rejected": -2.2846686840057373, "loss": 1.2261, "nll_loss": 0.3065022826194763, "rewards/accuracies": 1.0, "rewards/chosen": -4.1092989704338834e-05, "rewards/margins": 0.2284257709980011, "rewards/rejected": -0.22846688330173492, "step": 8661 }, { "epoch": 5.990318118948824, "grad_norm": 7.272087574005127, "learning_rate": 2.227601045028431e-05, "log_odds_chosen": 10.620426177978516, "log_odds_ratio": -0.00024088645295705646, "logits/chosen": -0.8818700909614563, "logits/rejected": -0.8350770473480225, "logps/chosen": -0.0008133258670568466, "logps/rejected": -2.7097411155700684, "loss": 0.7731, "nll_loss": 0.1932595819234848, "rewards/accuracies": 1.0, "rewards/chosen": -8.13325823401101e-05, "rewards/margins": 0.27089282870292664, "rewards/rejected": -0.27097412943840027, "step": 8662 }, { "epoch": 5.991009681881051, "grad_norm": 8.038209915161133, "learning_rate": 2.2272168433994162e-05, "log_odds_chosen": 10.276552200317383, "log_odds_ratio": -0.00010404939530417323, "logits/chosen": -0.33884933590888977, "logits/rejected": -0.34993690252304077, "logps/chosen": -0.0004283892922103405, "logps/rejected": -2.4142346382141113, "loss": 0.6032, "nll_loss": 0.15079672634601593, "rewards/accuracies": 1.0, "rewards/chosen": -4.2838932131417096e-05, "rewards/margins": 0.24138060212135315, "rewards/rejected": -0.24142345786094666, "step": 8663 }, { "epoch": 5.991701244813278, "grad_norm": 6.9717698097229, "learning_rate": 2.2268326417704014e-05, "log_odds_chosen": 10.098555564880371, "log_odds_ratio": -0.00010227275197394192, "logits/chosen": -0.3164041042327881, "logits/rejected": -0.28061607480049133, "logps/chosen": -0.00019917808822356164, "logps/rejected": -1.6113238334655762, "loss": 0.6207, "nll_loss": 0.15516464412212372, "rewards/accuracies": 1.0, "rewards/chosen": -1.9917810277547687e-05, "rewards/margins": 0.1611124575138092, "rewards/rejected": -0.16113239526748657, "step": 8664 }, { "epoch": 5.992392807745505, "grad_norm": 7.463654518127441, "learning_rate": 2.2264484401413863e-05, "log_odds_chosen": 11.0711030960083, "log_odds_ratio": -4.571516183204949e-05, "logits/chosen": -0.8618670701980591, "logits/rejected": -0.9495669603347778, "logps/chosen": -0.00015691615408286452, "logps/rejected": -2.068732976913452, "loss": 0.6994, "nll_loss": 0.17484234273433685, "rewards/accuracies": 1.0, "rewards/chosen": -1.5691615772084333e-05, "rewards/margins": 0.2068575918674469, "rewards/rejected": -0.2068733125925064, "step": 8665 }, { "epoch": 5.993084370677732, "grad_norm": 5.811643600463867, "learning_rate": 2.2260642385123716e-05, "log_odds_chosen": 10.059645652770996, "log_odds_ratio": -0.00012689466529991478, "logits/chosen": -0.6054131388664246, "logits/rejected": -0.5192132592201233, "logps/chosen": -0.00026542056002654135, "logps/rejected": -1.6344331502914429, "loss": 1.132, "nll_loss": 0.2829943895339966, "rewards/accuracies": 1.0, "rewards/chosen": -2.6542056730249897e-05, "rewards/margins": 0.1634167730808258, "rewards/rejected": -0.16344329714775085, "step": 8666 }, { "epoch": 5.9937759336099585, "grad_norm": 7.299609661102295, "learning_rate": 2.2256800368833565e-05, "log_odds_chosen": 9.224020004272461, "log_odds_ratio": -0.0012528158258646727, "logits/chosen": -0.5629249811172485, "logits/rejected": -0.6724428534507751, "logps/chosen": -0.0006475116824731231, "logps/rejected": -1.4131289720535278, "loss": 0.8529, "nll_loss": 0.21310065686702728, "rewards/accuracies": 1.0, "rewards/chosen": -6.475116970250383e-05, "rewards/margins": 0.14124815165996552, "rewards/rejected": -0.14131289720535278, "step": 8667 }, { "epoch": 5.994467496542185, "grad_norm": 6.194040775299072, "learning_rate": 2.2252958352543414e-05, "log_odds_chosen": 11.240839004516602, "log_odds_ratio": -3.617586480686441e-05, "logits/chosen": -0.3279075026512146, "logits/rejected": -0.3418920636177063, "logps/chosen": -0.0007275803945958614, "logps/rejected": -2.8612427711486816, "loss": 1.0579, "nll_loss": 0.2644652724266052, "rewards/accuracies": 1.0, "rewards/chosen": -7.275804819073528e-05, "rewards/margins": 0.28605151176452637, "rewards/rejected": -0.2861242890357971, "step": 8668 }, { "epoch": 5.995159059474412, "grad_norm": 6.6501851081848145, "learning_rate": 2.2249116336253266e-05, "log_odds_chosen": 10.770421981811523, "log_odds_ratio": -5.30408215126954e-05, "logits/chosen": -0.2276163250207901, "logits/rejected": -0.3236449062824249, "logps/chosen": -0.00018166887457482517, "logps/rejected": -2.0231595039367676, "loss": 0.4534, "nll_loss": 0.11334690451622009, "rewards/accuracies": 1.0, "rewards/chosen": -1.8166887457482517e-05, "rewards/margins": 0.20229777693748474, "rewards/rejected": -0.20231595635414124, "step": 8669 }, { "epoch": 5.995850622406639, "grad_norm": 13.294677734375, "learning_rate": 2.2245274319963115e-05, "log_odds_chosen": 11.529056549072266, "log_odds_ratio": -1.5711140804341994e-05, "logits/chosen": -0.2797544300556183, "logits/rejected": -0.37466806173324585, "logps/chosen": -0.0009607851970940828, "logps/rejected": -2.869312286376953, "loss": 0.7845, "nll_loss": 0.19612446427345276, "rewards/accuracies": 1.0, "rewards/chosen": -9.607851097825915e-05, "rewards/margins": 0.2868351638317108, "rewards/rejected": -0.28693124651908875, "step": 8670 }, { "epoch": 5.996542185338866, "grad_norm": 9.357288360595703, "learning_rate": 2.2241432303672968e-05, "log_odds_chosen": 10.64914608001709, "log_odds_ratio": -8.049822645261884e-05, "logits/chosen": -0.45057523250579834, "logits/rejected": -0.4166865944862366, "logps/chosen": -0.0009274584008380771, "logps/rejected": -2.585829019546509, "loss": 0.8917, "nll_loss": 0.22291788458824158, "rewards/accuracies": 1.0, "rewards/chosen": -9.27458459045738e-05, "rewards/margins": 0.2584901750087738, "rewards/rejected": -0.2585829198360443, "step": 8671 }, { "epoch": 5.997233748271093, "grad_norm": 8.706218719482422, "learning_rate": 2.223759028738282e-05, "log_odds_chosen": 11.269906997680664, "log_odds_ratio": -1.8488583009457216e-05, "logits/chosen": -0.28220856189727783, "logits/rejected": -0.3012881875038147, "logps/chosen": -0.00011813380115199834, "logps/rejected": -1.8137805461883545, "loss": 0.5725, "nll_loss": 0.14313194155693054, "rewards/accuracies": 1.0, "rewards/chosen": -1.1813381206593476e-05, "rewards/margins": 0.18136624991893768, "rewards/rejected": -0.1813780665397644, "step": 8672 }, { "epoch": 5.9979253112033195, "grad_norm": 11.165410995483398, "learning_rate": 2.223374827109267e-05, "log_odds_chosen": 8.817961692810059, "log_odds_ratio": -0.04084470868110657, "logits/chosen": -0.30266138911247253, "logits/rejected": -0.37050777673721313, "logps/chosen": -0.01773255318403244, "logps/rejected": -1.998016357421875, "loss": 1.1478, "nll_loss": 0.2828754186630249, "rewards/accuracies": 1.0, "rewards/chosen": -0.0017732552951201797, "rewards/margins": 0.19802838563919067, "rewards/rejected": -0.19980162382125854, "step": 8673 }, { "epoch": 5.998616874135546, "grad_norm": 15.032378196716309, "learning_rate": 2.2229906254802522e-05, "log_odds_chosen": 10.327550888061523, "log_odds_ratio": -0.002341375919058919, "logits/chosen": -0.28076595067977905, "logits/rejected": -0.20058351755142212, "logps/chosen": -0.001488571404479444, "logps/rejected": -2.425849437713623, "loss": 1.0028, "nll_loss": 0.25046506524086, "rewards/accuracies": 1.0, "rewards/chosen": -0.00014885715791024268, "rewards/margins": 0.24243608117103577, "rewards/rejected": -0.2425849437713623, "step": 8674 }, { "epoch": 5.999308437067773, "grad_norm": 5.518479347229004, "learning_rate": 2.2226064238512374e-05, "log_odds_chosen": 10.280984878540039, "log_odds_ratio": -0.001351931132376194, "logits/chosen": -0.2739599943161011, "logits/rejected": -0.3183516263961792, "logps/chosen": -0.0012864520540460944, "logps/rejected": -2.3341145515441895, "loss": 0.5981, "nll_loss": 0.14940090477466583, "rewards/accuracies": 1.0, "rewards/chosen": -0.00012864521704614162, "rewards/margins": 0.23328280448913574, "rewards/rejected": -0.23341144621372223, "step": 8675 }, { "epoch": 6.0, "grad_norm": 15.6314697265625, "learning_rate": 2.2222222222222223e-05, "log_odds_chosen": 10.472728729248047, "log_odds_ratio": -6.75845512887463e-05, "logits/chosen": -0.5962976217269897, "logits/rejected": -0.6523298621177673, "logps/chosen": -0.0005146162584424019, "logps/rejected": -2.0234107971191406, "loss": 0.6844, "nll_loss": 0.17108887434005737, "rewards/accuracies": 1.0, "rewards/chosen": -5.1461629482218996e-05, "rewards/margins": 0.2022896260023117, "rewards/rejected": -0.20234109461307526, "step": 8676 }, { "epoch": 6.000691562932227, "grad_norm": 6.434847831726074, "learning_rate": 2.2218380205932072e-05, "log_odds_chosen": 11.39303970336914, "log_odds_ratio": -0.00013875612057745457, "logits/chosen": -0.5068904161453247, "logits/rejected": -0.46730056405067444, "logps/chosen": -0.00022783351596444845, "logps/rejected": -2.217576026916504, "loss": 0.6635, "nll_loss": 0.16585785150527954, "rewards/accuracies": 1.0, "rewards/chosen": -2.2783351596444845e-05, "rewards/margins": 0.22173485159873962, "rewards/rejected": -0.22175762057304382, "step": 8677 }, { "epoch": 6.001383125864454, "grad_norm": 6.79045295715332, "learning_rate": 2.2214538189641925e-05, "log_odds_chosen": 11.089578628540039, "log_odds_ratio": -0.00026866502594202757, "logits/chosen": -0.40105462074279785, "logits/rejected": -0.40405669808387756, "logps/chosen": -0.0005820757942274213, "logps/rejected": -2.961902618408203, "loss": 0.6948, "nll_loss": 0.17367303371429443, "rewards/accuracies": 1.0, "rewards/chosen": -5.820758451591246e-05, "rewards/margins": 0.29613208770751953, "rewards/rejected": -0.2961902618408203, "step": 8678 }, { "epoch": 6.0020746887966805, "grad_norm": 5.017109394073486, "learning_rate": 2.2210696173351774e-05, "log_odds_chosen": 9.893177032470703, "log_odds_ratio": -0.00048073488869704306, "logits/chosen": -0.5683239102363586, "logits/rejected": -0.625013530254364, "logps/chosen": -0.0008165045292116702, "logps/rejected": -2.0497570037841797, "loss": 0.8591, "nll_loss": 0.214732825756073, "rewards/accuracies": 1.0, "rewards/chosen": -8.165045437635854e-05, "rewards/margins": 0.2048940360546112, "rewards/rejected": -0.2049756795167923, "step": 8679 }, { "epoch": 6.002766251728907, "grad_norm": 5.027901649475098, "learning_rate": 2.2206854157061626e-05, "log_odds_chosen": 10.607897758483887, "log_odds_ratio": -0.00017300539184361696, "logits/chosen": -0.35090476274490356, "logits/rejected": -0.4269883334636688, "logps/chosen": -0.0007968850550241768, "logps/rejected": -2.3614661693573, "loss": 0.46, "nll_loss": 0.11498267948627472, "rewards/accuracies": 1.0, "rewards/chosen": -7.968849968165159e-05, "rewards/margins": 0.23606693744659424, "rewards/rejected": -0.23614662885665894, "step": 8680 }, { "epoch": 6.003457814661134, "grad_norm": 8.310800552368164, "learning_rate": 2.220301214077148e-05, "log_odds_chosen": 12.149660110473633, "log_odds_ratio": -1.4518486750603188e-05, "logits/chosen": -0.24523666501045227, "logits/rejected": -0.3247970938682556, "logps/chosen": -0.00015250536671373993, "logps/rejected": -3.321934700012207, "loss": 0.6412, "nll_loss": 0.1602870225906372, "rewards/accuracies": 1.0, "rewards/chosen": -1.5250537217070814e-05, "rewards/margins": 0.3321782350540161, "rewards/rejected": -0.3321934938430786, "step": 8681 }, { "epoch": 6.004149377593361, "grad_norm": 7.237411022186279, "learning_rate": 2.2199170124481328e-05, "log_odds_chosen": 9.43208122253418, "log_odds_ratio": -0.00025389096117578447, "logits/chosen": -0.5209689140319824, "logits/rejected": -0.5669733881950378, "logps/chosen": -0.0007305705803446472, "logps/rejected": -1.4171273708343506, "loss": 0.8169, "nll_loss": 0.2042027860879898, "rewards/accuracies": 1.0, "rewards/chosen": -7.305705366889015e-05, "rewards/margins": 0.14163967967033386, "rewards/rejected": -0.1417127400636673, "step": 8682 }, { "epoch": 6.004840940525588, "grad_norm": 7.599490165710449, "learning_rate": 2.219532810819118e-05, "log_odds_chosen": 10.09867000579834, "log_odds_ratio": -0.0008778284536674619, "logits/chosen": -0.882388174533844, "logits/rejected": -0.7930755615234375, "logps/chosen": -0.0010563363321125507, "logps/rejected": -2.356621742248535, "loss": 0.5612, "nll_loss": 0.14019975066184998, "rewards/accuracies": 1.0, "rewards/chosen": -0.00010563363321125507, "rewards/margins": 0.23555654287338257, "rewards/rejected": -0.23566217720508575, "step": 8683 }, { "epoch": 6.005532503457815, "grad_norm": 7.226995468139648, "learning_rate": 2.2191486091901033e-05, "log_odds_chosen": 10.630027770996094, "log_odds_ratio": -0.00019700817938428372, "logits/chosen": -0.4471282958984375, "logits/rejected": -0.523328959941864, "logps/chosen": -0.0018144343048334122, "logps/rejected": -2.698294162750244, "loss": 0.818, "nll_loss": 0.20448969304561615, "rewards/accuracies": 1.0, "rewards/chosen": -0.0001814434363041073, "rewards/margins": 0.269648015499115, "rewards/rejected": -0.2698294520378113, "step": 8684 }, { "epoch": 6.0062240663900415, "grad_norm": 8.176135063171387, "learning_rate": 2.2187644075610882e-05, "log_odds_chosen": 10.905904769897461, "log_odds_ratio": -3.206159090041183e-05, "logits/chosen": -0.6330730319023132, "logits/rejected": -0.6118956804275513, "logps/chosen": -0.00013914890587329865, "logps/rejected": -1.9043033123016357, "loss": 0.613, "nll_loss": 0.15325427055358887, "rewards/accuracies": 1.0, "rewards/chosen": -1.3914891496824566e-05, "rewards/margins": 0.19041642546653748, "rewards/rejected": -0.19043034315109253, "step": 8685 }, { "epoch": 6.006915629322268, "grad_norm": 6.383642673492432, "learning_rate": 2.218380205932073e-05, "log_odds_chosen": 10.754993438720703, "log_odds_ratio": -0.0014159767888486385, "logits/chosen": -0.33393335342407227, "logits/rejected": -0.4243951141834259, "logps/chosen": -0.0009645427926443517, "logps/rejected": -2.005563259124756, "loss": 0.8443, "nll_loss": 0.2109401524066925, "rewards/accuracies": 1.0, "rewards/chosen": -9.645428508520126e-05, "rewards/margins": 0.20045988261699677, "rewards/rejected": -0.20055633783340454, "step": 8686 }, { "epoch": 6.007607192254495, "grad_norm": 7.030446529388428, "learning_rate": 2.2179960043030583e-05, "log_odds_chosen": 10.016925811767578, "log_odds_ratio": -0.00023798673646524549, "logits/chosen": -0.22667162120342255, "logits/rejected": -0.2796691358089447, "logps/chosen": -0.0003618993505369872, "logps/rejected": -1.9764633178710938, "loss": 0.5404, "nll_loss": 0.13506531715393066, "rewards/accuracies": 1.0, "rewards/chosen": -3.618993650889024e-05, "rewards/margins": 0.19761013984680176, "rewards/rejected": -0.1976463347673416, "step": 8687 }, { "epoch": 6.008298755186722, "grad_norm": 5.065103054046631, "learning_rate": 2.2176118026740432e-05, "log_odds_chosen": 10.794071197509766, "log_odds_ratio": -5.74659243284259e-05, "logits/chosen": -0.3765200674533844, "logits/rejected": -0.446674644947052, "logps/chosen": -0.00041642412543296814, "logps/rejected": -2.614743232727051, "loss": 0.7369, "nll_loss": 0.18422549962997437, "rewards/accuracies": 1.0, "rewards/chosen": -4.164241545367986e-05, "rewards/margins": 0.2614326775074005, "rewards/rejected": -0.2614743113517761, "step": 8688 }, { "epoch": 6.008990318118949, "grad_norm": 4.796487331390381, "learning_rate": 2.2172276010450285e-05, "log_odds_chosen": 10.9295072555542, "log_odds_ratio": -7.72800121922046e-05, "logits/chosen": -0.05144000053405762, "logits/rejected": -0.18354326486587524, "logps/chosen": -0.0022706189192831516, "logps/rejected": -3.0933749675750732, "loss": 0.6238, "nll_loss": 0.1559487283229828, "rewards/accuracies": 1.0, "rewards/chosen": -0.00022706188610754907, "rewards/margins": 0.3091104328632355, "rewards/rejected": -0.3093374967575073, "step": 8689 }, { "epoch": 6.009681881051176, "grad_norm": 6.887716770172119, "learning_rate": 2.2168433994160137e-05, "log_odds_chosen": 10.934017181396484, "log_odds_ratio": -3.159624611726031e-05, "logits/chosen": -0.3636074662208557, "logits/rejected": -0.3056112229824066, "logps/chosen": -9.977620356949046e-05, "logps/rejected": -1.744262933731079, "loss": 0.4337, "nll_loss": 0.10841409862041473, "rewards/accuracies": 1.0, "rewards/chosen": -9.977620720746927e-06, "rewards/margins": 0.17441630363464355, "rewards/rejected": -0.17442628741264343, "step": 8690 }, { "epoch": 6.0103734439834025, "grad_norm": 7.19215202331543, "learning_rate": 2.2164591977869986e-05, "log_odds_chosen": 11.126794815063477, "log_odds_ratio": -4.7908208216540515e-05, "logits/chosen": -0.503757655620575, "logits/rejected": -0.5169774889945984, "logps/chosen": -0.0005810896982438862, "logps/rejected": -2.8171472549438477, "loss": 0.5578, "nll_loss": 0.13944345712661743, "rewards/accuracies": 1.0, "rewards/chosen": -5.810897710034624e-05, "rewards/margins": 0.2816566228866577, "rewards/rejected": -0.2817147374153137, "step": 8691 }, { "epoch": 6.011065006915629, "grad_norm": 5.968571186065674, "learning_rate": 2.216074996157984e-05, "log_odds_chosen": 10.023983001708984, "log_odds_ratio": -0.0001322976895608008, "logits/chosen": -0.5502274036407471, "logits/rejected": -0.5863975286483765, "logps/chosen": -0.00030613134731538594, "logps/rejected": -1.6076347827911377, "loss": 1.0732, "nll_loss": 0.26829004287719727, "rewards/accuracies": 1.0, "rewards/chosen": -3.061313327634707e-05, "rewards/margins": 0.16073286533355713, "rewards/rejected": -0.1607634723186493, "step": 8692 }, { "epoch": 6.011756569847856, "grad_norm": 5.204310894012451, "learning_rate": 2.215690794528969e-05, "log_odds_chosen": 9.940327644348145, "log_odds_ratio": -0.0021992912515997887, "logits/chosen": -0.4144933521747589, "logits/rejected": -0.3945091962814331, "logps/chosen": -0.002976007293909788, "logps/rejected": -1.6367212533950806, "loss": 0.4361, "nll_loss": 0.10879306495189667, "rewards/accuracies": 1.0, "rewards/chosen": -0.00029760069446638227, "rewards/margins": 0.16337452828884125, "rewards/rejected": -0.16367211937904358, "step": 8693 }, { "epoch": 6.012448132780083, "grad_norm": 5.4716644287109375, "learning_rate": 2.215306592899954e-05, "log_odds_chosen": 10.381438255310059, "log_odds_ratio": -0.00014213169924914837, "logits/chosen": -0.2392970472574234, "logits/rejected": -0.22555887699127197, "logps/chosen": -0.00031407614005729556, "logps/rejected": -1.8977861404418945, "loss": 0.4822, "nll_loss": 0.12054598331451416, "rewards/accuracies": 1.0, "rewards/chosen": -3.1407613278133795e-05, "rewards/margins": 0.18974719941616058, "rewards/rejected": -0.1897786259651184, "step": 8694 }, { "epoch": 6.01313969571231, "grad_norm": 9.384466171264648, "learning_rate": 2.214922391270939e-05, "log_odds_chosen": 9.672860145568848, "log_odds_ratio": -0.0006632217555306852, "logits/chosen": -0.7125736474990845, "logits/rejected": -0.7020600438117981, "logps/chosen": -0.002472150605171919, "logps/rejected": -2.1107208728790283, "loss": 0.7561, "nll_loss": 0.188960000872612, "rewards/accuracies": 1.0, "rewards/chosen": -0.000247215066337958, "rewards/margins": 0.2108248770236969, "rewards/rejected": -0.21107208728790283, "step": 8695 }, { "epoch": 6.013831258644537, "grad_norm": 6.288478851318359, "learning_rate": 2.214538189641924e-05, "log_odds_chosen": 11.169961929321289, "log_odds_ratio": -5.973395309410989e-05, "logits/chosen": -0.3781644105911255, "logits/rejected": -0.4019641876220703, "logps/chosen": -0.0001706377079244703, "logps/rejected": -2.1489315032958984, "loss": 0.4614, "nll_loss": 0.11533622443675995, "rewards/accuracies": 1.0, "rewards/chosen": -1.706377042864915e-05, "rewards/margins": 0.21487608551979065, "rewards/rejected": -0.2148931622505188, "step": 8696 }, { "epoch": 6.014522821576763, "grad_norm": 4.725882053375244, "learning_rate": 2.214153988012909e-05, "log_odds_chosen": 9.371036529541016, "log_odds_ratio": -0.0006971032125875354, "logits/chosen": -0.40736350417137146, "logits/rejected": -0.5088552236557007, "logps/chosen": -0.0007990387384779751, "logps/rejected": -1.4437347650527954, "loss": 0.4459, "nll_loss": 0.11140558868646622, "rewards/accuracies": 1.0, "rewards/chosen": -7.990387530298904e-05, "rewards/margins": 0.14429357647895813, "rewards/rejected": -0.14437347650527954, "step": 8697 }, { "epoch": 6.01521438450899, "grad_norm": 4.899136543273926, "learning_rate": 2.2137697863838943e-05, "log_odds_chosen": 11.269777297973633, "log_odds_ratio": -0.0002260785986436531, "logits/chosen": -0.45080870389938354, "logits/rejected": -0.5041922330856323, "logps/chosen": -0.00014511129120364785, "logps/rejected": -2.7046732902526855, "loss": 0.7203, "nll_loss": 0.180058091878891, "rewards/accuracies": 1.0, "rewards/chosen": -1.4511128028971143e-05, "rewards/margins": 0.2704527974128723, "rewards/rejected": -0.2704673111438751, "step": 8698 }, { "epoch": 6.015905947441217, "grad_norm": 8.853839874267578, "learning_rate": 2.2133855847548796e-05, "log_odds_chosen": 10.970938682556152, "log_odds_ratio": -5.5123193305917084e-05, "logits/chosen": -0.610616147518158, "logits/rejected": -0.6522665023803711, "logps/chosen": -0.00042205723002552986, "logps/rejected": -2.1785030364990234, "loss": 0.6792, "nll_loss": 0.16979989409446716, "rewards/accuracies": 1.0, "rewards/chosen": -4.2205727368127555e-05, "rewards/margins": 0.2178080976009369, "rewards/rejected": -0.21785031259059906, "step": 8699 }, { "epoch": 6.016597510373444, "grad_norm": 5.846555709838867, "learning_rate": 2.2130013831258645e-05, "log_odds_chosen": 10.732621192932129, "log_odds_ratio": -0.00025087303947657347, "logits/chosen": -0.06174376606941223, "logits/rejected": -0.09032157808542252, "logps/chosen": -0.014207985252141953, "logps/rejected": -2.8097808361053467, "loss": 0.561, "nll_loss": 0.14022380113601685, "rewards/accuracies": 1.0, "rewards/chosen": -0.0014207985950633883, "rewards/margins": 0.2795572578907013, "rewards/rejected": -0.28097808361053467, "step": 8700 }, { "epoch": 6.017289073305671, "grad_norm": 5.967756748199463, "learning_rate": 2.2126171814968497e-05, "log_odds_chosen": 9.416215896606445, "log_odds_ratio": -0.0009794974466785789, "logits/chosen": -0.6006361842155457, "logits/rejected": -0.6846101880073547, "logps/chosen": -0.007930763997137547, "logps/rejected": -1.8586578369140625, "loss": 0.4852, "nll_loss": 0.12120527774095535, "rewards/accuracies": 1.0, "rewards/chosen": -0.0007930764113552868, "rewards/margins": 0.18507272005081177, "rewards/rejected": -0.18586578965187073, "step": 8701 }, { "epoch": 6.017980636237898, "grad_norm": 6.094478130340576, "learning_rate": 2.212232979867835e-05, "log_odds_chosen": 10.967626571655273, "log_odds_ratio": -5.0005532102659345e-05, "logits/chosen": -0.5818835496902466, "logits/rejected": -0.7001234292984009, "logps/chosen": -0.00010855847358470783, "logps/rejected": -1.7564148902893066, "loss": 0.4468, "nll_loss": 0.11169680953025818, "rewards/accuracies": 1.0, "rewards/chosen": -1.0855848813662305e-05, "rewards/margins": 0.1756306290626526, "rewards/rejected": -0.1756414920091629, "step": 8702 }, { "epoch": 6.018672199170124, "grad_norm": 4.230981349945068, "learning_rate": 2.21184877823882e-05, "log_odds_chosen": 9.438599586486816, "log_odds_ratio": -0.01306835561990738, "logits/chosen": -0.26618629693984985, "logits/rejected": -0.3190756142139435, "logps/chosen": -0.00445225415751338, "logps/rejected": -1.745501160621643, "loss": 0.7029, "nll_loss": 0.1744271218776703, "rewards/accuracies": 1.0, "rewards/chosen": -0.00044522545067593455, "rewards/margins": 0.17410489916801453, "rewards/rejected": -0.1745501160621643, "step": 8703 }, { "epoch": 6.019363762102351, "grad_norm": 6.9316301345825195, "learning_rate": 2.211464576609805e-05, "log_odds_chosen": 10.875043869018555, "log_odds_ratio": -0.00011402039672248065, "logits/chosen": -0.577836275100708, "logits/rejected": -0.5895098447799683, "logps/chosen": -0.0004857161547988653, "logps/rejected": -2.3168389797210693, "loss": 0.5247, "nll_loss": 0.13116057217121124, "rewards/accuracies": 1.0, "rewards/chosen": -4.857161547988653e-05, "rewards/margins": 0.23163533210754395, "rewards/rejected": -0.2316839098930359, "step": 8704 }, { "epoch": 6.020055325034578, "grad_norm": 4.984665870666504, "learning_rate": 2.21108037498079e-05, "log_odds_chosen": 10.290657997131348, "log_odds_ratio": -7.351344538619742e-05, "logits/chosen": -0.3882635235786438, "logits/rejected": -0.42681241035461426, "logps/chosen": -0.00023238833819050342, "logps/rejected": -1.7680834531784058, "loss": 0.5464, "nll_loss": 0.13659122586250305, "rewards/accuracies": 1.0, "rewards/chosen": -2.3238833819050342e-05, "rewards/margins": 0.17678511142730713, "rewards/rejected": -0.17680835723876953, "step": 8705 }, { "epoch": 6.020746887966805, "grad_norm": 4.9300031661987305, "learning_rate": 2.210696173351775e-05, "log_odds_chosen": 11.626920700073242, "log_odds_ratio": -3.644917524070479e-05, "logits/chosen": -0.5183431506156921, "logits/rejected": -0.5448989868164062, "logps/chosen": -6.581601337529719e-05, "logps/rejected": -2.022371292114258, "loss": 0.4968, "nll_loss": 0.1241886168718338, "rewards/accuracies": 1.0, "rewards/chosen": -6.5816011556307785e-06, "rewards/margins": 0.2022305577993393, "rewards/rejected": -0.20223712921142578, "step": 8706 }, { "epoch": 6.021438450899032, "grad_norm": 9.541595458984375, "learning_rate": 2.21031197172276e-05, "log_odds_chosen": 10.2780179977417, "log_odds_ratio": -0.00016216814401559532, "logits/chosen": -0.6901393532752991, "logits/rejected": -0.6745829582214355, "logps/chosen": -0.0006011630175635219, "logps/rejected": -2.878037452697754, "loss": 0.6064, "nll_loss": 0.15159110724925995, "rewards/accuracies": 1.0, "rewards/chosen": -6.0116301028756425e-05, "rewards/margins": 0.28774362802505493, "rewards/rejected": -0.2878037393093109, "step": 8707 }, { "epoch": 6.022130013831259, "grad_norm": 7.746224880218506, "learning_rate": 2.2099277700937454e-05, "log_odds_chosen": 10.702656745910645, "log_odds_ratio": -0.00015088812506292015, "logits/chosen": -0.5035496950149536, "logits/rejected": -0.5546244978904724, "logps/chosen": -0.0008898228988982737, "logps/rejected": -2.4310531616210938, "loss": 0.602, "nll_loss": 0.15048100054264069, "rewards/accuracies": 1.0, "rewards/chosen": -8.89822913450189e-05, "rewards/margins": 0.2430163472890854, "rewards/rejected": -0.24310533702373505, "step": 8708 }, { "epoch": 6.022821576763485, "grad_norm": 8.530742645263672, "learning_rate": 2.2095435684647303e-05, "log_odds_chosen": 12.953808784484863, "log_odds_ratio": -2.9884475225117058e-05, "logits/chosen": -0.26880186796188354, "logits/rejected": -0.374568372964859, "logps/chosen": -0.00034990967833437026, "logps/rejected": -4.05475378036499, "loss": 0.9199, "nll_loss": 0.22996152937412262, "rewards/accuracies": 1.0, "rewards/chosen": -3.4990967833437026e-05, "rewards/margins": 0.40544039011001587, "rewards/rejected": -0.405475378036499, "step": 8709 }, { "epoch": 6.023513139695712, "grad_norm": 5.546627044677734, "learning_rate": 2.2091593668357156e-05, "log_odds_chosen": 10.592414855957031, "log_odds_ratio": -7.655217632418498e-05, "logits/chosen": -0.37299686670303345, "logits/rejected": -0.4346255660057068, "logps/chosen": -0.0001668974873609841, "logps/rejected": -1.760309100151062, "loss": 0.4839, "nll_loss": 0.12096671760082245, "rewards/accuracies": 1.0, "rewards/chosen": -1.668974982749205e-05, "rewards/margins": 0.17601421475410461, "rewards/rejected": -0.17603090405464172, "step": 8710 }, { "epoch": 6.024204702627939, "grad_norm": 5.520246982574463, "learning_rate": 2.2087751652067008e-05, "log_odds_chosen": 10.64315414428711, "log_odds_ratio": -7.172457117121667e-05, "logits/chosen": -0.214762344956398, "logits/rejected": -0.4369995594024658, "logps/chosen": -0.0005153071833774447, "logps/rejected": -2.5201823711395264, "loss": 0.7639, "nll_loss": 0.1909710019826889, "rewards/accuracies": 1.0, "rewards/chosen": -5.153072561370209e-05, "rewards/margins": 0.2519667148590088, "rewards/rejected": -0.2520182430744171, "step": 8711 }, { "epoch": 6.024896265560166, "grad_norm": 6.77101469039917, "learning_rate": 2.2083909635776857e-05, "log_odds_chosen": 10.253266334533691, "log_odds_ratio": -5.588552448898554e-05, "logits/chosen": -0.5703224539756775, "logits/rejected": -0.5802868008613586, "logps/chosen": -0.00020519075042102486, "logps/rejected": -1.7386521100997925, "loss": 0.6399, "nll_loss": 0.1599694937467575, "rewards/accuracies": 1.0, "rewards/chosen": -2.0519073586910963e-05, "rewards/margins": 0.17384469509124756, "rewards/rejected": -0.1738651990890503, "step": 8712 }, { "epoch": 6.025587828492393, "grad_norm": 6.571026802062988, "learning_rate": 2.208006761948671e-05, "log_odds_chosen": 10.851655006408691, "log_odds_ratio": -0.0003473691758699715, "logits/chosen": -0.479988157749176, "logits/rejected": -0.528286337852478, "logps/chosen": -0.00018631343846209347, "logps/rejected": -2.036672830581665, "loss": 0.8061, "nll_loss": 0.20148473978042603, "rewards/accuracies": 1.0, "rewards/chosen": -1.863134457380511e-05, "rewards/margins": 0.2036486715078354, "rewards/rejected": -0.2036673128604889, "step": 8713 }, { "epoch": 6.0262793914246195, "grad_norm": 7.67877197265625, "learning_rate": 2.207622560319656e-05, "log_odds_chosen": 11.414480209350586, "log_odds_ratio": -1.969960067071952e-05, "logits/chosen": -0.6675363183021545, "logits/rejected": -0.6894694566726685, "logps/chosen": -0.00010665694571798667, "logps/rejected": -2.019965171813965, "loss": 0.8034, "nll_loss": 0.20084403455257416, "rewards/accuracies": 1.0, "rewards/chosen": -1.0665693480405025e-05, "rewards/margins": 0.20198586583137512, "rewards/rejected": -0.20199653506278992, "step": 8714 }, { "epoch": 6.026970954356846, "grad_norm": 6.10966682434082, "learning_rate": 2.2072383586906408e-05, "log_odds_chosen": 10.402027130126953, "log_odds_ratio": -6.45701729808934e-05, "logits/chosen": -0.6677394509315491, "logits/rejected": -0.7106789350509644, "logps/chosen": -0.00032101484248414636, "logps/rejected": -2.0806641578674316, "loss": 0.4961, "nll_loss": 0.12400933355093002, "rewards/accuracies": 1.0, "rewards/chosen": -3.21014849760104e-05, "rewards/margins": 0.20803432166576385, "rewards/rejected": -0.2080664187669754, "step": 8715 }, { "epoch": 6.027662517289073, "grad_norm": 6.375890731811523, "learning_rate": 2.206854157061626e-05, "log_odds_chosen": 10.099254608154297, "log_odds_ratio": -0.00010230734187643975, "logits/chosen": -0.8636232018470764, "logits/rejected": -0.8537338376045227, "logps/chosen": -0.0005410996964201331, "logps/rejected": -1.9590046405792236, "loss": 0.5508, "nll_loss": 0.13769523799419403, "rewards/accuracies": 1.0, "rewards/chosen": -5.410997255239636e-05, "rewards/margins": 0.1958463490009308, "rewards/rejected": -0.19590047001838684, "step": 8716 }, { "epoch": 6.0283540802213, "grad_norm": 5.492069244384766, "learning_rate": 2.2064699554326112e-05, "log_odds_chosen": 9.815468788146973, "log_odds_ratio": -0.00042597288847900927, "logits/chosen": -0.47465819120407104, "logits/rejected": -0.614909827709198, "logps/chosen": -0.001235145260579884, "logps/rejected": -2.1695032119750977, "loss": 0.9836, "nll_loss": 0.24586890637874603, "rewards/accuracies": 1.0, "rewards/chosen": -0.0001235145318787545, "rewards/margins": 0.21682682633399963, "rewards/rejected": -0.21695034205913544, "step": 8717 }, { "epoch": 6.029045643153527, "grad_norm": 5.251602649688721, "learning_rate": 2.206085753803596e-05, "log_odds_chosen": 10.349724769592285, "log_odds_ratio": -6.653775926679373e-05, "logits/chosen": -0.5225825905799866, "logits/rejected": -0.4800230860710144, "logps/chosen": -0.0003861216246150434, "logps/rejected": -2.0060806274414062, "loss": 0.441, "nll_loss": 0.11023493111133575, "rewards/accuracies": 1.0, "rewards/chosen": -3.861216100631282e-05, "rewards/margins": 0.20056945085525513, "rewards/rejected": -0.2006080448627472, "step": 8718 }, { "epoch": 6.029737206085754, "grad_norm": 7.304520606994629, "learning_rate": 2.2057015521745814e-05, "log_odds_chosen": 10.213470458984375, "log_odds_ratio": -0.00011162283772137016, "logits/chosen": -0.45237404108047485, "logits/rejected": -0.5312992930412292, "logps/chosen": -0.0010302782757207751, "logps/rejected": -2.3630874156951904, "loss": 0.6137, "nll_loss": 0.1534142792224884, "rewards/accuracies": 1.0, "rewards/chosen": -0.00010302782175131142, "rewards/margins": 0.23620572686195374, "rewards/rejected": -0.236308753490448, "step": 8719 }, { "epoch": 6.0304287690179805, "grad_norm": 4.732924461364746, "learning_rate": 2.2053173505455666e-05, "log_odds_chosen": 11.044392585754395, "log_odds_ratio": -3.0344037440954708e-05, "logits/chosen": -0.6467368006706238, "logits/rejected": -0.6115586757659912, "logps/chosen": -0.0001271823130082339, "logps/rejected": -2.0162088871002197, "loss": 0.5966, "nll_loss": 0.14914806187152863, "rewards/accuracies": 1.0, "rewards/chosen": -1.2718230209429748e-05, "rewards/margins": 0.20160816609859467, "rewards/rejected": -0.20162087678909302, "step": 8720 }, { "epoch": 6.031120331950207, "grad_norm": 8.279495239257812, "learning_rate": 2.2049331489165515e-05, "log_odds_chosen": 10.505151748657227, "log_odds_ratio": -8.098056423477829e-05, "logits/chosen": -0.2747512757778168, "logits/rejected": -0.38079598546028137, "logps/chosen": -0.00027252238942310214, "logps/rejected": -2.2139976024627686, "loss": 0.7138, "nll_loss": 0.17843583226203918, "rewards/accuracies": 1.0, "rewards/chosen": -2.725223748711869e-05, "rewards/margins": 0.22137251496315002, "rewards/rejected": -0.22139976918697357, "step": 8721 }, { "epoch": 6.031811894882434, "grad_norm": 5.9185471534729, "learning_rate": 2.2045489472875368e-05, "log_odds_chosen": 9.572835922241211, "log_odds_ratio": -0.0009964063065126538, "logits/chosen": -0.6413549780845642, "logits/rejected": -0.7216004133224487, "logps/chosen": -0.0012645921669900417, "logps/rejected": -1.7656288146972656, "loss": 0.702, "nll_loss": 0.17540577054023743, "rewards/accuracies": 1.0, "rewards/chosen": -0.00012645922834053636, "rewards/margins": 0.1764364242553711, "rewards/rejected": -0.17656287550926208, "step": 8722 }, { "epoch": 6.032503457814661, "grad_norm": 12.007911682128906, "learning_rate": 2.2041647456585217e-05, "log_odds_chosen": 11.140923500061035, "log_odds_ratio": -5.497312668012455e-05, "logits/chosen": -0.6097493171691895, "logits/rejected": -0.6834452152252197, "logps/chosen": -0.0003111382247880101, "logps/rejected": -2.289703845977783, "loss": 0.6628, "nll_loss": 0.1656874418258667, "rewards/accuracies": 1.0, "rewards/chosen": -3.1113824661588296e-05, "rewards/margins": 0.22893927991390228, "rewards/rejected": -0.22897037863731384, "step": 8723 }, { "epoch": 6.033195020746888, "grad_norm": 6.161701202392578, "learning_rate": 2.2037805440295066e-05, "log_odds_chosen": 10.174674034118652, "log_odds_ratio": -0.0007106483681127429, "logits/chosen": -0.013030372560024261, "logits/rejected": -0.07829655706882477, "logps/chosen": -0.002556081395596266, "logps/rejected": -2.195359706878662, "loss": 1.0238, "nll_loss": 0.2558753192424774, "rewards/accuracies": 1.0, "rewards/chosen": -0.00025560817448422313, "rewards/margins": 0.21928036212921143, "rewards/rejected": -0.2195359766483307, "step": 8724 }, { "epoch": 6.033886583679115, "grad_norm": 12.640791893005371, "learning_rate": 2.203396342400492e-05, "log_odds_chosen": 10.730382919311523, "log_odds_ratio": -4.421206904225983e-05, "logits/chosen": -0.054626476019620895, "logits/rejected": -0.17611365020275116, "logps/chosen": -0.0003003279271069914, "logps/rejected": -2.2857675552368164, "loss": 0.8732, "nll_loss": 0.218306303024292, "rewards/accuracies": 1.0, "rewards/chosen": -3.0032793802092783e-05, "rewards/margins": 0.22854672372341156, "rewards/rejected": -0.22857676446437836, "step": 8725 }, { "epoch": 6.0345781466113415, "grad_norm": 5.11686372756958, "learning_rate": 2.203012140771477e-05, "log_odds_chosen": 11.164871215820312, "log_odds_ratio": -5.090639751870185e-05, "logits/chosen": -0.6329345107078552, "logits/rejected": -0.7006338238716125, "logps/chosen": -0.00026425038231536746, "logps/rejected": -2.590266227722168, "loss": 0.6781, "nll_loss": 0.1695106029510498, "rewards/accuracies": 1.0, "rewards/chosen": -2.6425037503940985e-05, "rewards/margins": 0.25900018215179443, "rewards/rejected": -0.2590266168117523, "step": 8726 }, { "epoch": 6.035269709543568, "grad_norm": 7.614181041717529, "learning_rate": 2.202627939142462e-05, "log_odds_chosen": 10.693385124206543, "log_odds_ratio": -4.799907037522644e-05, "logits/chosen": -0.26990771293640137, "logits/rejected": -0.31676316261291504, "logps/chosen": -0.0005678210873156786, "logps/rejected": -2.929535388946533, "loss": 0.7391, "nll_loss": 0.18476933240890503, "rewards/accuracies": 1.0, "rewards/chosen": -5.6782111641950905e-05, "rewards/margins": 0.29289674758911133, "rewards/rejected": -0.2929535508155823, "step": 8727 }, { "epoch": 6.035961272475795, "grad_norm": 5.707152366638184, "learning_rate": 2.2022437375134472e-05, "log_odds_chosen": 10.58765983581543, "log_odds_ratio": -0.0001340707967756316, "logits/chosen": -0.7809407711029053, "logits/rejected": -0.8220765590667725, "logps/chosen": -0.0005716230371035635, "logps/rejected": -1.914036512374878, "loss": 0.5423, "nll_loss": 0.13555540144443512, "rewards/accuracies": 1.0, "rewards/chosen": -5.716230589314364e-05, "rewards/margins": 0.19134649634361267, "rewards/rejected": -0.19140365719795227, "step": 8728 }, { "epoch": 6.036652835408022, "grad_norm": 4.701399326324463, "learning_rate": 2.2018595358844325e-05, "log_odds_chosen": 10.997509002685547, "log_odds_ratio": -5.641685493174009e-05, "logits/chosen": -0.6777703166007996, "logits/rejected": -0.7699306011199951, "logps/chosen": -0.0004010865814052522, "logps/rejected": -2.258298873901367, "loss": 0.8187, "nll_loss": 0.2046590894460678, "rewards/accuracies": 1.0, "rewards/chosen": -4.0108665416482836e-05, "rewards/margins": 0.22578977048397064, "rewards/rejected": -0.2258298695087433, "step": 8729 }, { "epoch": 6.037344398340249, "grad_norm": 6.020839691162109, "learning_rate": 2.2014753342554174e-05, "log_odds_chosen": 10.484159469604492, "log_odds_ratio": -0.0008418535580858588, "logits/chosen": -0.5254714488983154, "logits/rejected": -0.5732161998748779, "logps/chosen": -0.0012820654083043337, "logps/rejected": -2.5081441402435303, "loss": 0.5102, "nll_loss": 0.12746252119541168, "rewards/accuracies": 1.0, "rewards/chosen": -0.0001282065495615825, "rewards/margins": 0.2506862282752991, "rewards/rejected": -0.25081440806388855, "step": 8730 }, { "epoch": 6.038035961272476, "grad_norm": 6.0337982177734375, "learning_rate": 2.2010911326264026e-05, "log_odds_chosen": 10.774333000183105, "log_odds_ratio": -2.366080298088491e-05, "logits/chosen": -0.4981067180633545, "logits/rejected": -0.5781898498535156, "logps/chosen": -0.00015158558380790055, "logps/rejected": -1.8667584657669067, "loss": 0.3905, "nll_loss": 0.09761685132980347, "rewards/accuracies": 1.0, "rewards/chosen": -1.5158560017880518e-05, "rewards/margins": 0.18666070699691772, "rewards/rejected": -0.18667584657669067, "step": 8731 }, { "epoch": 6.0387275242047025, "grad_norm": 11.3192777633667, "learning_rate": 2.2007069309973875e-05, "log_odds_chosen": 11.054316520690918, "log_odds_ratio": -9.583937207935378e-05, "logits/chosen": -0.5879403352737427, "logits/rejected": -0.6534625291824341, "logps/chosen": -0.00024015655799303204, "logps/rejected": -1.997798204421997, "loss": 0.6967, "nll_loss": 0.1741618514060974, "rewards/accuracies": 1.0, "rewards/chosen": -2.4015656890696846e-05, "rewards/margins": 0.19975581765174866, "rewards/rejected": -0.19977982342243195, "step": 8732 }, { "epoch": 6.039419087136929, "grad_norm": 5.469114303588867, "learning_rate": 2.2003227293683724e-05, "log_odds_chosen": 10.51624584197998, "log_odds_ratio": -4.1717499698279426e-05, "logits/chosen": -0.2299852818250656, "logits/rejected": -0.33670753240585327, "logps/chosen": -0.00031217903597280383, "logps/rejected": -2.0699970722198486, "loss": 0.543, "nll_loss": 0.1357382833957672, "rewards/accuracies": 1.0, "rewards/chosen": -3.121790359728038e-05, "rewards/margins": 0.2069685161113739, "rewards/rejected": -0.20699971914291382, "step": 8733 }, { "epoch": 6.040110650069156, "grad_norm": 6.060972690582275, "learning_rate": 2.1999385277393577e-05, "log_odds_chosen": 10.410504341125488, "log_odds_ratio": -7.131236634450033e-05, "logits/chosen": -0.4340049624443054, "logits/rejected": -0.3281274437904358, "logps/chosen": -0.0002894492354243994, "logps/rejected": -2.016857624053955, "loss": 0.5125, "nll_loss": 0.12811097502708435, "rewards/accuracies": 1.0, "rewards/chosen": -2.8944923542439938e-05, "rewards/margins": 0.20165681838989258, "rewards/rejected": -0.20168575644493103, "step": 8734 }, { "epoch": 6.040802213001383, "grad_norm": 10.327274322509766, "learning_rate": 2.1995543261103426e-05, "log_odds_chosen": 10.888412475585938, "log_odds_ratio": -2.7028379918192513e-05, "logits/chosen": -0.7649399042129517, "logits/rejected": -0.7249663472175598, "logps/chosen": -0.0001005790545605123, "logps/rejected": -1.711350679397583, "loss": 0.533, "nll_loss": 0.13324396312236786, "rewards/accuracies": 1.0, "rewards/chosen": -1.005790545605123e-05, "rewards/margins": 0.17112502455711365, "rewards/rejected": -0.1711350828409195, "step": 8735 }, { "epoch": 6.04149377593361, "grad_norm": 6.163614273071289, "learning_rate": 2.199170124481328e-05, "log_odds_chosen": 11.648406028747559, "log_odds_ratio": -1.5283943866961636e-05, "logits/chosen": -0.784024715423584, "logits/rejected": -0.7108168601989746, "logps/chosen": -0.00010706988541642204, "logps/rejected": -2.4180727005004883, "loss": 0.6191, "nll_loss": 0.15476897358894348, "rewards/accuracies": 1.0, "rewards/chosen": -1.0706989087339025e-05, "rewards/margins": 0.2417965829372406, "rewards/rejected": -0.24180728197097778, "step": 8736 }, { "epoch": 6.042185338865837, "grad_norm": 5.836353302001953, "learning_rate": 2.198785922852313e-05, "log_odds_chosen": 11.407360076904297, "log_odds_ratio": -6.587072857655585e-05, "logits/chosen": -0.5999523401260376, "logits/rejected": -0.6171562075614929, "logps/chosen": -0.0004752510867547244, "logps/rejected": -3.2661917209625244, "loss": 1.0002, "nll_loss": 0.2500498294830322, "rewards/accuracies": 1.0, "rewards/chosen": -4.752511085825972e-05, "rewards/margins": 0.32657167315483093, "rewards/rejected": -0.32661914825439453, "step": 8737 }, { "epoch": 6.0428769017980635, "grad_norm": 6.33279275894165, "learning_rate": 2.198401721223298e-05, "log_odds_chosen": 11.425430297851562, "log_odds_ratio": -1.806885666155722e-05, "logits/chosen": -0.6484169363975525, "logits/rejected": -0.4264671802520752, "logps/chosen": -0.000468467827886343, "logps/rejected": -2.840762138366699, "loss": 0.5571, "nll_loss": 0.13927598297595978, "rewards/accuracies": 1.0, "rewards/chosen": -4.684678788180463e-05, "rewards/margins": 0.28402936458587646, "rewards/rejected": -0.2840762138366699, "step": 8738 }, { "epoch": 6.04356846473029, "grad_norm": 6.053421497344971, "learning_rate": 2.1980175195942832e-05, "log_odds_chosen": 10.433453559875488, "log_odds_ratio": -0.00021078737336210907, "logits/chosen": -0.27353599667549133, "logits/rejected": -0.380341112613678, "logps/chosen": -0.0008415814954787493, "logps/rejected": -2.3961219787597656, "loss": 0.6963, "nll_loss": 0.17405669391155243, "rewards/accuracies": 1.0, "rewards/chosen": -8.415814954787493e-05, "rewards/margins": 0.23952805995941162, "rewards/rejected": -0.23961222171783447, "step": 8739 }, { "epoch": 6.044260027662517, "grad_norm": 5.677506923675537, "learning_rate": 2.1976333179652685e-05, "log_odds_chosen": 11.112959861755371, "log_odds_ratio": -4.309674113756046e-05, "logits/chosen": 0.1854228973388672, "logits/rejected": 0.0214783176779747, "logps/chosen": -0.00015564000932499766, "logps/rejected": -2.153843641281128, "loss": 0.739, "nll_loss": 0.18473802506923676, "rewards/accuracies": 1.0, "rewards/chosen": -1.5564000932499766e-05, "rewards/margins": 0.21536879241466522, "rewards/rejected": -0.2153843641281128, "step": 8740 }, { "epoch": 6.044951590594744, "grad_norm": 10.937490463256836, "learning_rate": 2.1972491163362534e-05, "log_odds_chosen": 9.786539077758789, "log_odds_ratio": -0.00013552032760344446, "logits/chosen": -0.7398344874382019, "logits/rejected": -0.7491805553436279, "logps/chosen": -0.0013153355102986097, "logps/rejected": -1.9373672008514404, "loss": 0.6691, "nll_loss": 0.1672721803188324, "rewards/accuracies": 1.0, "rewards/chosen": -0.00013153356849215925, "rewards/margins": 0.1936051845550537, "rewards/rejected": -0.1937367171049118, "step": 8741 }, { "epoch": 6.045643153526971, "grad_norm": 7.335566997528076, "learning_rate": 2.1968649147072383e-05, "log_odds_chosen": 9.627555847167969, "log_odds_ratio": -0.0001948888530023396, "logits/chosen": -0.3438924551010132, "logits/rejected": -0.501375675201416, "logps/chosen": -0.00024168803065549582, "logps/rejected": -1.2390199899673462, "loss": 0.8358, "nll_loss": 0.20894263684749603, "rewards/accuracies": 1.0, "rewards/chosen": -2.416880124656018e-05, "rewards/margins": 0.12387783080339432, "rewards/rejected": -0.12390200793743134, "step": 8742 }, { "epoch": 6.046334716459198, "grad_norm": 4.566446781158447, "learning_rate": 2.1964807130782235e-05, "log_odds_chosen": 10.265748023986816, "log_odds_ratio": -9.862089063972235e-05, "logits/chosen": -0.625510036945343, "logits/rejected": -0.5897167325019836, "logps/chosen": -0.00043656807974912226, "logps/rejected": -1.9950671195983887, "loss": 1.0323, "nll_loss": 0.2580625116825104, "rewards/accuracies": 1.0, "rewards/chosen": -4.3656807974912226e-05, "rewards/margins": 0.19946306943893433, "rewards/rejected": -0.1995067149400711, "step": 8743 }, { "epoch": 6.0470262793914245, "grad_norm": 5.270570278167725, "learning_rate": 2.1960965114492084e-05, "log_odds_chosen": 11.466952323913574, "log_odds_ratio": -3.146990275126882e-05, "logits/chosen": -0.5958943367004395, "logits/rejected": -0.6558508276939392, "logps/chosen": -0.00024944625329226255, "logps/rejected": -2.6926231384277344, "loss": 0.7491, "nll_loss": 0.18726211786270142, "rewards/accuracies": 1.0, "rewards/chosen": -2.494462751201354e-05, "rewards/margins": 0.26923736929893494, "rewards/rejected": -0.26926231384277344, "step": 8744 }, { "epoch": 6.047717842323651, "grad_norm": 6.721274375915527, "learning_rate": 2.1957123098201937e-05, "log_odds_chosen": 10.474918365478516, "log_odds_ratio": -0.0010723298182711005, "logits/chosen": -0.36522969603538513, "logits/rejected": -0.42422711849212646, "logps/chosen": -0.0009068144718185067, "logps/rejected": -2.2234137058258057, "loss": 0.5801, "nll_loss": 0.14491480588912964, "rewards/accuracies": 1.0, "rewards/chosen": -9.068144572665915e-05, "rewards/margins": 0.22225069999694824, "rewards/rejected": -0.2223413735628128, "step": 8745 }, { "epoch": 6.048409405255878, "grad_norm": 9.751940727233887, "learning_rate": 2.195328108191179e-05, "log_odds_chosen": 10.69310188293457, "log_odds_ratio": -0.00014623063907492906, "logits/chosen": -0.4476546049118042, "logits/rejected": -0.47708776593208313, "logps/chosen": -0.001096154097467661, "logps/rejected": -2.643292188644409, "loss": 0.5479, "nll_loss": 0.1369646042585373, "rewards/accuracies": 1.0, "rewards/chosen": -0.00010961541556753218, "rewards/margins": 0.26421958208084106, "rewards/rejected": -0.2643292248249054, "step": 8746 }, { "epoch": 6.049100968188105, "grad_norm": 4.666476726531982, "learning_rate": 2.194943906562164e-05, "log_odds_chosen": 9.971809387207031, "log_odds_ratio": -0.00015414562949445099, "logits/chosen": -0.5992689728736877, "logits/rejected": -0.6264042258262634, "logps/chosen": -0.0009661355288699269, "logps/rejected": -1.293394684791565, "loss": 0.4689, "nll_loss": 0.11721152067184448, "rewards/accuracies": 1.0, "rewards/chosen": -9.661355579737574e-05, "rewards/margins": 0.12924285233020782, "rewards/rejected": -0.12933948636054993, "step": 8747 }, { "epoch": 6.049792531120332, "grad_norm": 9.599535942077637, "learning_rate": 2.194559704933149e-05, "log_odds_chosen": 11.24213981628418, "log_odds_ratio": -2.586483969935216e-05, "logits/chosen": -0.49608927965164185, "logits/rejected": -0.6723200678825378, "logps/chosen": -0.00033675608574412763, "logps/rejected": -2.715348720550537, "loss": 0.7347, "nll_loss": 0.18367436528205872, "rewards/accuracies": 1.0, "rewards/chosen": -3.3675609302008525e-05, "rewards/margins": 0.27150118350982666, "rewards/rejected": -0.27153486013412476, "step": 8748 }, { "epoch": 6.050484094052559, "grad_norm": 5.931811332702637, "learning_rate": 2.1941755033041343e-05, "log_odds_chosen": 11.314300537109375, "log_odds_ratio": -3.0137358407955617e-05, "logits/chosen": -0.2631280720233917, "logits/rejected": -0.15379023551940918, "logps/chosen": -0.00013942176883574575, "logps/rejected": -2.585228204727173, "loss": 0.6119, "nll_loss": 0.15297843515872955, "rewards/accuracies": 1.0, "rewards/chosen": -1.3942176337877754e-05, "rewards/margins": 0.2585088610649109, "rewards/rejected": -0.25852280855178833, "step": 8749 }, { "epoch": 6.051175656984785, "grad_norm": 6.247043132781982, "learning_rate": 2.1937913016751192e-05, "log_odds_chosen": 10.999902725219727, "log_odds_ratio": -1.8577326045488007e-05, "logits/chosen": -0.564647912979126, "logits/rejected": -0.6719080209732056, "logps/chosen": -6.438637501560152e-05, "logps/rejected": -1.439497709274292, "loss": 0.4286, "nll_loss": 0.10715620219707489, "rewards/accuracies": 1.0, "rewards/chosen": -6.438637683459092e-06, "rewards/margins": 0.14394333958625793, "rewards/rejected": -0.14394977688789368, "step": 8750 }, { "epoch": 6.051867219917012, "grad_norm": 7.51104736328125, "learning_rate": 2.193407100046104e-05, "log_odds_chosen": 9.899858474731445, "log_odds_ratio": -0.00010276660032104701, "logits/chosen": -0.40770965814590454, "logits/rejected": -0.48507434129714966, "logps/chosen": -0.0003978637687396258, "logps/rejected": -1.6586893796920776, "loss": 0.5209, "nll_loss": 0.13021023571491241, "rewards/accuracies": 1.0, "rewards/chosen": -3.978637687396258e-05, "rewards/margins": 0.1658291518688202, "rewards/rejected": -0.16586895287036896, "step": 8751 }, { "epoch": 6.052558782849239, "grad_norm": 11.98574447631836, "learning_rate": 2.1930228984170894e-05, "log_odds_chosen": 10.0965576171875, "log_odds_ratio": -0.0003631242143455893, "logits/chosen": -0.3025035858154297, "logits/rejected": -0.44557201862335205, "logps/chosen": -0.0006092924159020185, "logps/rejected": -1.8992372751235962, "loss": 0.6847, "nll_loss": 0.17114058136940002, "rewards/accuracies": 1.0, "rewards/chosen": -6.092924377298914e-05, "rewards/margins": 0.18986281752586365, "rewards/rejected": -0.1899237334728241, "step": 8752 }, { "epoch": 6.053250345781466, "grad_norm": 6.399974822998047, "learning_rate": 2.1926386967880743e-05, "log_odds_chosen": 11.87247085571289, "log_odds_ratio": -1.9874878489645198e-05, "logits/chosen": -0.4109629988670349, "logits/rejected": -0.34680724143981934, "logps/chosen": -0.000150459905853495, "logps/rejected": -2.954695463180542, "loss": 0.5667, "nll_loss": 0.14166241884231567, "rewards/accuracies": 1.0, "rewards/chosen": -1.5045991858642083e-05, "rewards/margins": 0.2954545021057129, "rewards/rejected": -0.2954695224761963, "step": 8753 }, { "epoch": 6.053941908713693, "grad_norm": 3.7555723190307617, "learning_rate": 2.1922544951590595e-05, "log_odds_chosen": 11.044259071350098, "log_odds_ratio": -0.00011946244194405153, "logits/chosen": -0.8748759627342224, "logits/rejected": -0.8916746973991394, "logps/chosen": -0.00022297601390164346, "logps/rejected": -2.106964349746704, "loss": 0.5291, "nll_loss": 0.13227327167987823, "rewards/accuracies": 1.0, "rewards/chosen": -2.229760320915375e-05, "rewards/margins": 0.21067410707473755, "rewards/rejected": -0.21069641411304474, "step": 8754 }, { "epoch": 6.05463347164592, "grad_norm": 6.178657054901123, "learning_rate": 2.1918702935300448e-05, "log_odds_chosen": 10.054718017578125, "log_odds_ratio": -6.976965232752264e-05, "logits/chosen": -0.617241621017456, "logits/rejected": -0.643979012966156, "logps/chosen": -0.00029691471718251705, "logps/rejected": -1.5716367959976196, "loss": 0.4473, "nll_loss": 0.11182103306055069, "rewards/accuracies": 1.0, "rewards/chosen": -2.9691473173443228e-05, "rewards/margins": 0.15713398158550262, "rewards/rejected": -0.15716367959976196, "step": 8755 }, { "epoch": 6.055325034578146, "grad_norm": 5.386208534240723, "learning_rate": 2.1914860919010297e-05, "log_odds_chosen": 10.863911628723145, "log_odds_ratio": -4.7492121666437015e-05, "logits/chosen": -0.40668195486068726, "logits/rejected": -0.4869404435157776, "logps/chosen": -0.00022119340428616852, "logps/rejected": -2.426424026489258, "loss": 0.536, "nll_loss": 0.13398811221122742, "rewards/accuracies": 1.0, "rewards/chosen": -2.211934042861685e-05, "rewards/margins": 0.2426202893257141, "rewards/rejected": -0.24264241755008698, "step": 8756 }, { "epoch": 6.056016597510373, "grad_norm": 10.927085876464844, "learning_rate": 2.191101890272015e-05, "log_odds_chosen": 10.562910079956055, "log_odds_ratio": -3.968338933191262e-05, "logits/chosen": -0.40782853960990906, "logits/rejected": -0.4667756259441376, "logps/chosen": -0.00017079540702980012, "logps/rejected": -1.7969799041748047, "loss": 0.633, "nll_loss": 0.15824900567531586, "rewards/accuracies": 1.0, "rewards/chosen": -1.7079541066777892e-05, "rewards/margins": 0.17968091368675232, "rewards/rejected": -0.17969800531864166, "step": 8757 }, { "epoch": 6.0567081604426, "grad_norm": 4.373627662658691, "learning_rate": 2.190717688643e-05, "log_odds_chosen": 11.044089317321777, "log_odds_ratio": -2.8408252546796575e-05, "logits/chosen": -0.47975000739097595, "logits/rejected": -0.5330215096473694, "logps/chosen": -0.000502656155731529, "logps/rejected": -2.2571511268615723, "loss": 0.4773, "nll_loss": 0.119330994784832, "rewards/accuracies": 1.0, "rewards/chosen": -5.0265618483535945e-05, "rewards/margins": 0.22566482424736023, "rewards/rejected": -0.22571511566638947, "step": 8758 }, { "epoch": 6.057399723374827, "grad_norm": 5.878119468688965, "learning_rate": 2.190333487013985e-05, "log_odds_chosen": 10.509617805480957, "log_odds_ratio": -0.00012422242434695363, "logits/chosen": -0.5751892328262329, "logits/rejected": -0.7009394764900208, "logps/chosen": -0.00025830554659478366, "logps/rejected": -2.0686752796173096, "loss": 0.5925, "nll_loss": 0.1481102705001831, "rewards/accuracies": 1.0, "rewards/chosen": -2.5830553568084724e-05, "rewards/margins": 0.20684170722961426, "rewards/rejected": -0.2068675458431244, "step": 8759 }, { "epoch": 6.058091286307054, "grad_norm": 6.645744323730469, "learning_rate": 2.18994928538497e-05, "log_odds_chosen": 9.95135498046875, "log_odds_ratio": -0.00012155869626440108, "logits/chosen": -0.23222099244594574, "logits/rejected": -0.3660616874694824, "logps/chosen": -0.0002634202828630805, "logps/rejected": -1.813197135925293, "loss": 0.6914, "nll_loss": 0.17283284664154053, "rewards/accuracies": 1.0, "rewards/chosen": -2.6342029741499573e-05, "rewards/margins": 0.18129336833953857, "rewards/rejected": -0.1813197135925293, "step": 8760 }, { "epoch": 6.058782849239281, "grad_norm": 5.354851245880127, "learning_rate": 2.1895650837559552e-05, "log_odds_chosen": 10.4942626953125, "log_odds_ratio": -0.0005334573797881603, "logits/chosen": -0.8777337074279785, "logits/rejected": -0.8068556785583496, "logps/chosen": -0.00047751181409694254, "logps/rejected": -2.2734134197235107, "loss": 0.5328, "nll_loss": 0.13314048945903778, "rewards/accuracies": 1.0, "rewards/chosen": -4.7751182137290016e-05, "rewards/margins": 0.22729359567165375, "rewards/rejected": -0.22734133899211884, "step": 8761 }, { "epoch": 6.059474412171507, "grad_norm": 5.62773323059082, "learning_rate": 2.18918088212694e-05, "log_odds_chosen": 10.236888885498047, "log_odds_ratio": -0.0005244009662419558, "logits/chosen": -0.4014491140842438, "logits/rejected": -0.46045809984207153, "logps/chosen": -0.00047710942453704774, "logps/rejected": -2.1145997047424316, "loss": 0.8807, "nll_loss": 0.2201147973537445, "rewards/accuracies": 1.0, "rewards/chosen": -4.771094609168358e-05, "rewards/margins": 0.211412250995636, "rewards/rejected": -0.2114599496126175, "step": 8762 }, { "epoch": 6.060165975103734, "grad_norm": 7.158164024353027, "learning_rate": 2.1887966804979254e-05, "log_odds_chosen": 10.382726669311523, "log_odds_ratio": -0.0002478898677509278, "logits/chosen": -0.5366454124450684, "logits/rejected": -0.5365896821022034, "logps/chosen": -0.003330084728077054, "logps/rejected": -2.4704389572143555, "loss": 0.593, "nll_loss": 0.14822663366794586, "rewards/accuracies": 1.0, "rewards/chosen": -0.00033300850191153586, "rewards/margins": 0.2467108964920044, "rewards/rejected": -0.2470439076423645, "step": 8763 }, { "epoch": 6.060857538035961, "grad_norm": 6.579989910125732, "learning_rate": 2.1884124788689106e-05, "log_odds_chosen": 10.012670516967773, "log_odds_ratio": -0.00017603966989554465, "logits/chosen": -0.5483078360557556, "logits/rejected": -0.6120085716247559, "logps/chosen": -0.0004534853796940297, "logps/rejected": -1.733701229095459, "loss": 0.583, "nll_loss": 0.14573873579502106, "rewards/accuracies": 1.0, "rewards/chosen": -4.534853724180721e-05, "rewards/margins": 0.17332476377487183, "rewards/rejected": -0.1733701229095459, "step": 8764 }, { "epoch": 6.061549100968188, "grad_norm": 5.654554843902588, "learning_rate": 2.1880282772398955e-05, "log_odds_chosen": 9.292821884155273, "log_odds_ratio": -0.0005053141503594816, "logits/chosen": -0.6640750765800476, "logits/rejected": -0.6798596978187561, "logps/chosen": -0.0007134718471206725, "logps/rejected": -1.5490200519561768, "loss": 0.6347, "nll_loss": 0.15863104164600372, "rewards/accuracies": 1.0, "rewards/chosen": -7.134718907764181e-05, "rewards/margins": 0.154830664396286, "rewards/rejected": -0.15490201115608215, "step": 8765 }, { "epoch": 6.062240663900415, "grad_norm": 5.678080081939697, "learning_rate": 2.1876440756108808e-05, "log_odds_chosen": 8.85152816772461, "log_odds_ratio": -0.0004262386355549097, "logits/chosen": -0.519461989402771, "logits/rejected": -0.6006239652633667, "logps/chosen": -0.0010829487582668662, "logps/rejected": -1.4057854413986206, "loss": 0.5594, "nll_loss": 0.13980016112327576, "rewards/accuracies": 1.0, "rewards/chosen": -0.0001082948874682188, "rewards/margins": 0.1404702514410019, "rewards/rejected": -0.14057855308055878, "step": 8766 }, { "epoch": 6.0629322268326415, "grad_norm": 9.73702621459961, "learning_rate": 2.187259873981866e-05, "log_odds_chosen": 10.37993049621582, "log_odds_ratio": -4.671388160204515e-05, "logits/chosen": -0.5082290172576904, "logits/rejected": -0.5737982988357544, "logps/chosen": -0.00029075262136757374, "logps/rejected": -2.170318841934204, "loss": 0.6437, "nll_loss": 0.16090813279151917, "rewards/accuracies": 1.0, "rewards/chosen": -2.907525959017221e-05, "rewards/margins": 0.21700282394886017, "rewards/rejected": -0.21703189611434937, "step": 8767 }, { "epoch": 6.063623789764868, "grad_norm": 8.46346664428711, "learning_rate": 2.186875672352851e-05, "log_odds_chosen": 9.255306243896484, "log_odds_ratio": -0.000526092597283423, "logits/chosen": -0.3945017158985138, "logits/rejected": -0.47494441270828247, "logps/chosen": -0.0019078406039625406, "logps/rejected": -1.9639620780944824, "loss": 0.5382, "nll_loss": 0.1345093846321106, "rewards/accuracies": 1.0, "rewards/chosen": -0.00019078404875472188, "rewards/margins": 0.19620543718338013, "rewards/rejected": -0.19639620184898376, "step": 8768 }, { "epoch": 6.064315352697095, "grad_norm": 6.009820938110352, "learning_rate": 2.1864914707238358e-05, "log_odds_chosen": 10.627840042114258, "log_odds_ratio": -0.00033696723403409123, "logits/chosen": -0.2139442265033722, "logits/rejected": -0.12539441883563995, "logps/chosen": -0.0006009953212924302, "logps/rejected": -2.532836675643921, "loss": 0.5323, "nll_loss": 0.1330462396144867, "rewards/accuracies": 1.0, "rewards/chosen": -6.009953358443454e-05, "rewards/margins": 0.25322356820106506, "rewards/rejected": -0.25328364968299866, "step": 8769 }, { "epoch": 6.065006915629322, "grad_norm": 5.412619590759277, "learning_rate": 2.186107269094821e-05, "log_odds_chosen": 10.109893798828125, "log_odds_ratio": -0.00016745369066484272, "logits/chosen": -0.5539190769195557, "logits/rejected": -0.6296762824058533, "logps/chosen": -0.00047056650510057807, "logps/rejected": -1.8397618532180786, "loss": 0.5788, "nll_loss": 0.14467471837997437, "rewards/accuracies": 1.0, "rewards/chosen": -4.705665196524933e-05, "rewards/margins": 0.18392911553382874, "rewards/rejected": -0.1839761734008789, "step": 8770 }, { "epoch": 6.065698478561549, "grad_norm": 6.335086822509766, "learning_rate": 2.185723067465806e-05, "log_odds_chosen": 9.797880172729492, "log_odds_ratio": -0.000998196774162352, "logits/chosen": -0.8057728409767151, "logits/rejected": -0.7389850616455078, "logps/chosen": -0.0006941133178770542, "logps/rejected": -2.1998682022094727, "loss": 0.7095, "nll_loss": 0.17727643251419067, "rewards/accuracies": 1.0, "rewards/chosen": -6.941134051885456e-05, "rewards/margins": 0.2199174016714096, "rewards/rejected": -0.21998681128025055, "step": 8771 }, { "epoch": 6.066390041493776, "grad_norm": 6.116896629333496, "learning_rate": 2.1853388658367912e-05, "log_odds_chosen": 10.272491455078125, "log_odds_ratio": -0.00020923007105011493, "logits/chosen": -0.30253416299819946, "logits/rejected": -0.30119574069976807, "logps/chosen": -0.0007446683594025671, "logps/rejected": -1.7595021724700928, "loss": 0.5707, "nll_loss": 0.14265257120132446, "rewards/accuracies": 1.0, "rewards/chosen": -7.446683594025671e-05, "rewards/margins": 0.17587575316429138, "rewards/rejected": -0.17595022916793823, "step": 8772 }, { "epoch": 6.0670816044260025, "grad_norm": 5.565364360809326, "learning_rate": 2.1849546642077765e-05, "log_odds_chosen": 10.232695579528809, "log_odds_ratio": -0.00010011553240474313, "logits/chosen": -0.6119440793991089, "logits/rejected": -0.6406114101409912, "logps/chosen": -0.00029022886883467436, "logps/rejected": -1.9426348209381104, "loss": 0.7586, "nll_loss": 0.18964678049087524, "rewards/accuracies": 1.0, "rewards/chosen": -2.9022887247265317e-05, "rewards/margins": 0.1942344605922699, "rewards/rejected": -0.1942634880542755, "step": 8773 }, { "epoch": 6.067773167358229, "grad_norm": 4.671210765838623, "learning_rate": 2.1845704625787614e-05, "log_odds_chosen": 10.74302864074707, "log_odds_ratio": -3.786133311223239e-05, "logits/chosen": -0.643653392791748, "logits/rejected": -0.7036978006362915, "logps/chosen": -0.00018377033120486885, "logps/rejected": -2.0579030513763428, "loss": 0.4593, "nll_loss": 0.11483326554298401, "rewards/accuracies": 1.0, "rewards/chosen": -1.8377033484284766e-05, "rewards/margins": 0.20577193796634674, "rewards/rejected": -0.20579031109809875, "step": 8774 }, { "epoch": 6.068464730290456, "grad_norm": 9.357017517089844, "learning_rate": 2.1841862609497466e-05, "log_odds_chosen": 11.394503593444824, "log_odds_ratio": -4.771078238263726e-05, "logits/chosen": -0.53241366147995, "logits/rejected": -0.6427210569381714, "logps/chosen": -0.00017528921307530254, "logps/rejected": -2.24078369140625, "loss": 0.7093, "nll_loss": 0.17733241617679596, "rewards/accuracies": 1.0, "rewards/chosen": -1.7528920579934493e-05, "rewards/margins": 0.2240608185529709, "rewards/rejected": -0.22407834231853485, "step": 8775 }, { "epoch": 6.069156293222683, "grad_norm": 5.8118205070495605, "learning_rate": 2.183802059320732e-05, "log_odds_chosen": 10.930205345153809, "log_odds_ratio": -3.402809306862764e-05, "logits/chosen": -0.2228926718235016, "logits/rejected": -0.3147223889827728, "logps/chosen": -0.00012596958549693227, "logps/rejected": -1.8366320133209229, "loss": 0.6962, "nll_loss": 0.17405246198177338, "rewards/accuracies": 1.0, "rewards/chosen": -1.2596959095390048e-05, "rewards/margins": 0.18365059792995453, "rewards/rejected": -0.18366320431232452, "step": 8776 }, { "epoch": 6.06984785615491, "grad_norm": 7.497174263000488, "learning_rate": 2.1834178576917168e-05, "log_odds_chosen": 10.006776809692383, "log_odds_ratio": -0.0003789706388488412, "logits/chosen": -0.40487217903137207, "logits/rejected": -0.34448304772377014, "logps/chosen": -0.0007712909136898816, "logps/rejected": -1.9380466938018799, "loss": 0.8276, "nll_loss": 0.2068547010421753, "rewards/accuracies": 1.0, "rewards/chosen": -7.71290942793712e-05, "rewards/margins": 0.1937275528907776, "rewards/rejected": -0.19380466639995575, "step": 8777 }, { "epoch": 6.070539419087137, "grad_norm": 6.315959930419922, "learning_rate": 2.1830336560627017e-05, "log_odds_chosen": 11.265578269958496, "log_odds_ratio": -1.984885057026986e-05, "logits/chosen": -0.29897958040237427, "logits/rejected": -0.42221328616142273, "logps/chosen": -0.00015299460210371763, "logps/rejected": -2.132373809814453, "loss": 0.5165, "nll_loss": 0.12912456691265106, "rewards/accuracies": 1.0, "rewards/chosen": -1.5299459846573882e-05, "rewards/margins": 0.21322208642959595, "rewards/rejected": -0.21323740482330322, "step": 8778 }, { "epoch": 6.0712309820193635, "grad_norm": 5.4421539306640625, "learning_rate": 2.182649454433687e-05, "log_odds_chosen": 10.107906341552734, "log_odds_ratio": -7.581239333376288e-05, "logits/chosen": -0.13145671784877777, "logits/rejected": -0.1686045080423355, "logps/chosen": -0.0011641870951279998, "logps/rejected": -1.9716193675994873, "loss": 0.7435, "nll_loss": 0.18586105108261108, "rewards/accuracies": 1.0, "rewards/chosen": -0.00011641871969914064, "rewards/margins": 0.19704553484916687, "rewards/rejected": -0.1971619725227356, "step": 8779 }, { "epoch": 6.07192254495159, "grad_norm": 6.829347610473633, "learning_rate": 2.1822652528046718e-05, "log_odds_chosen": 10.207975387573242, "log_odds_ratio": -0.00010395667050033808, "logits/chosen": -0.7461816072463989, "logits/rejected": -0.9530609250068665, "logps/chosen": -0.0006818310357630253, "logps/rejected": -1.8421964645385742, "loss": 0.5781, "nll_loss": 0.14451363682746887, "rewards/accuracies": 1.0, "rewards/chosen": -6.81831079418771e-05, "rewards/margins": 0.18415147066116333, "rewards/rejected": -0.18421964347362518, "step": 8780 }, { "epoch": 6.072614107883817, "grad_norm": 7.78087043762207, "learning_rate": 2.181881051175657e-05, "log_odds_chosen": 11.349825859069824, "log_odds_ratio": -0.0001022371870931238, "logits/chosen": -0.37964165210723877, "logits/rejected": -0.4040919244289398, "logps/chosen": -0.0001565673155710101, "logps/rejected": -2.2509853839874268, "loss": 0.5442, "nll_loss": 0.1360395848751068, "rewards/accuracies": 1.0, "rewards/chosen": -1.565673119330313e-05, "rewards/margins": 0.2250829041004181, "rewards/rejected": -0.22509855031967163, "step": 8781 }, { "epoch": 6.073305670816044, "grad_norm": 11.672771453857422, "learning_rate": 2.1814968495466423e-05, "log_odds_chosen": 10.513250350952148, "log_odds_ratio": -7.949629798531532e-05, "logits/chosen": -0.1163056492805481, "logits/rejected": -0.2008170783519745, "logps/chosen": -0.00033721962245181203, "logps/rejected": -2.001417636871338, "loss": 0.6344, "nll_loss": 0.15858982503414154, "rewards/accuracies": 1.0, "rewards/chosen": -3.372196078998968e-05, "rewards/margins": 0.20010803639888763, "rewards/rejected": -0.20014174282550812, "step": 8782 }, { "epoch": 6.073997233748271, "grad_norm": 5.708601474761963, "learning_rate": 2.1811126479176272e-05, "log_odds_chosen": 9.723910331726074, "log_odds_ratio": -0.0002251253608847037, "logits/chosen": -0.009398063644766808, "logits/rejected": -0.16576920449733734, "logps/chosen": -0.001370785990729928, "logps/rejected": -2.355374574661255, "loss": 0.9146, "nll_loss": 0.22861604392528534, "rewards/accuracies": 1.0, "rewards/chosen": -0.0001370786048937589, "rewards/margins": 0.23540037870407104, "rewards/rejected": -0.23553743958473206, "step": 8783 }, { "epoch": 6.074688796680498, "grad_norm": 9.264809608459473, "learning_rate": 2.1807284462886125e-05, "log_odds_chosen": 10.570226669311523, "log_odds_ratio": -0.0011507862946018577, "logits/chosen": -0.425912469625473, "logits/rejected": -0.580479621887207, "logps/chosen": -0.005178863648325205, "logps/rejected": -2.450840711593628, "loss": 0.7519, "nll_loss": 0.187865749001503, "rewards/accuracies": 1.0, "rewards/chosen": -0.0005178863066248596, "rewards/margins": 0.2445661872625351, "rewards/rejected": -0.24508407711982727, "step": 8784 }, { "epoch": 6.0753803596127245, "grad_norm": 5.949010848999023, "learning_rate": 2.1803442446595977e-05, "log_odds_chosen": 10.58531379699707, "log_odds_ratio": -4.488856211537495e-05, "logits/chosen": -0.4791228175163269, "logits/rejected": -0.4790971875190735, "logps/chosen": -0.00041575790964998305, "logps/rejected": -2.0088014602661133, "loss": 0.5197, "nll_loss": 0.12990805506706238, "rewards/accuracies": 1.0, "rewards/chosen": -4.157579314778559e-05, "rewards/margins": 0.2008385807275772, "rewards/rejected": -0.20088014006614685, "step": 8785 }, { "epoch": 6.076071922544951, "grad_norm": 8.873502731323242, "learning_rate": 2.1799600430305826e-05, "log_odds_chosen": 9.660669326782227, "log_odds_ratio": -0.000820180110167712, "logits/chosen": -0.17997047305107117, "logits/rejected": -0.2867315411567688, "logps/chosen": -0.0021463148295879364, "logps/rejected": -1.7114827632904053, "loss": 0.5846, "nll_loss": 0.1460685431957245, "rewards/accuracies": 1.0, "rewards/chosen": -0.00021463149460032582, "rewards/margins": 0.17093363404273987, "rewards/rejected": -0.17114827036857605, "step": 8786 }, { "epoch": 6.076763485477178, "grad_norm": 3.9984657764434814, "learning_rate": 2.1795758414015675e-05, "log_odds_chosen": 10.248716354370117, "log_odds_ratio": -0.00011280793842161074, "logits/chosen": -0.40966886281967163, "logits/rejected": -0.4978000819683075, "logps/chosen": -0.0002598642313387245, "logps/rejected": -2.1092262268066406, "loss": 0.4102, "nll_loss": 0.10254928469657898, "rewards/accuracies": 1.0, "rewards/chosen": -2.598642277007457e-05, "rewards/margins": 0.21089664101600647, "rewards/rejected": -0.21092264354228973, "step": 8787 }, { "epoch": 6.077455048409405, "grad_norm": 3.56330943107605, "learning_rate": 2.1791916397725528e-05, "log_odds_chosen": 10.921337127685547, "log_odds_ratio": -0.0005771245341747999, "logits/chosen": 0.020429208874702454, "logits/rejected": -0.06128193438053131, "logps/chosen": -0.0022159842774271965, "logps/rejected": -2.977935314178467, "loss": 0.758, "nll_loss": 0.18944081664085388, "rewards/accuracies": 1.0, "rewards/chosen": -0.0002215984568465501, "rewards/margins": 0.29757189750671387, "rewards/rejected": -0.29779350757598877, "step": 8788 }, { "epoch": 6.078146611341632, "grad_norm": 7.409214973449707, "learning_rate": 2.1788074381435377e-05, "log_odds_chosen": 10.565449714660645, "log_odds_ratio": -0.00011608524073380977, "logits/chosen": -0.3189489543437958, "logits/rejected": -0.2393302619457245, "logps/chosen": -0.00021485527395270765, "logps/rejected": -2.161606788635254, "loss": 0.7909, "nll_loss": 0.19770371913909912, "rewards/accuracies": 1.0, "rewards/chosen": -2.1485526303877123e-05, "rewards/margins": 0.21613919734954834, "rewards/rejected": -0.21616066992282867, "step": 8789 }, { "epoch": 6.078838174273859, "grad_norm": 7.98007869720459, "learning_rate": 2.178423236514523e-05, "log_odds_chosen": 11.489389419555664, "log_odds_ratio": -3.7924979551462457e-05, "logits/chosen": -0.30156710743904114, "logits/rejected": -0.31472352147102356, "logps/chosen": -0.0001984165864996612, "logps/rejected": -2.3415443897247314, "loss": 0.646, "nll_loss": 0.16149157285690308, "rewards/accuracies": 1.0, "rewards/chosen": -1.9841660105157644e-05, "rewards/margins": 0.23413459956645966, "rewards/rejected": -0.23415443301200867, "step": 8790 }, { "epoch": 6.0795297372060855, "grad_norm": 11.55526065826416, "learning_rate": 2.178039034885508e-05, "log_odds_chosen": 10.707229614257812, "log_odds_ratio": -0.000218723711441271, "logits/chosen": -0.49764618277549744, "logits/rejected": -0.5057958960533142, "logps/chosen": -0.0008697055745869875, "logps/rejected": -2.173175096511841, "loss": 0.5207, "nll_loss": 0.13016489148139954, "rewards/accuracies": 1.0, "rewards/chosen": -8.69705545483157e-05, "rewards/margins": 0.21723055839538574, "rewards/rejected": -0.21731753647327423, "step": 8791 }, { "epoch": 6.080221300138312, "grad_norm": 7.144685745239258, "learning_rate": 2.177654833256493e-05, "log_odds_chosen": 10.159027099609375, "log_odds_ratio": -0.0007074850727804005, "logits/chosen": -0.42714568972587585, "logits/rejected": -0.45183366537094116, "logps/chosen": -0.0006984758656471968, "logps/rejected": -2.197754144668579, "loss": 0.5004, "nll_loss": 0.1250331550836563, "rewards/accuracies": 1.0, "rewards/chosen": -6.984759238548577e-05, "rewards/margins": 0.21970558166503906, "rewards/rejected": -0.21977542340755463, "step": 8792 }, { "epoch": 6.080912863070539, "grad_norm": 4.701704502105713, "learning_rate": 2.1772706316274783e-05, "log_odds_chosen": 10.532032012939453, "log_odds_ratio": -4.7948538849595934e-05, "logits/chosen": -0.30077916383743286, "logits/rejected": -0.35092857480049133, "logps/chosen": -0.00040165867540054023, "logps/rejected": -2.3476030826568604, "loss": 0.4936, "nll_loss": 0.12340079247951508, "rewards/accuracies": 1.0, "rewards/chosen": -4.0165868995245546e-05, "rewards/margins": 0.2347201704978943, "rewards/rejected": -0.2347603291273117, "step": 8793 }, { "epoch": 6.081604426002766, "grad_norm": 5.7320051193237305, "learning_rate": 2.1768864299984635e-05, "log_odds_chosen": 10.813372611999512, "log_odds_ratio": -0.00021087832283228636, "logits/chosen": -0.36254051327705383, "logits/rejected": -0.48160022497177124, "logps/chosen": -0.0005631643580272794, "logps/rejected": -2.1880125999450684, "loss": 0.657, "nll_loss": 0.16422507166862488, "rewards/accuracies": 1.0, "rewards/chosen": -5.6316428526770324e-05, "rewards/margins": 0.2187449336051941, "rewards/rejected": -0.21880124509334564, "step": 8794 }, { "epoch": 6.082295988934993, "grad_norm": 9.378345489501953, "learning_rate": 2.1765022283694484e-05, "log_odds_chosen": 10.610244750976562, "log_odds_ratio": -5.6305994803551584e-05, "logits/chosen": -0.02063523232936859, "logits/rejected": -0.09921707957983017, "logps/chosen": -0.0004355312848929316, "logps/rejected": -2.193181037902832, "loss": 0.6431, "nll_loss": 0.16076835989952087, "rewards/accuracies": 1.0, "rewards/chosen": -4.355312557891011e-05, "rewards/margins": 0.21927456557750702, "rewards/rejected": -0.21931812167167664, "step": 8795 }, { "epoch": 6.08298755186722, "grad_norm": 12.209285736083984, "learning_rate": 2.1761180267404337e-05, "log_odds_chosen": 10.893821716308594, "log_odds_ratio": -0.0001389766694046557, "logits/chosen": -0.7187768816947937, "logits/rejected": -0.7721809148788452, "logps/chosen": -0.0013256346574053168, "logps/rejected": -2.5911355018615723, "loss": 0.5633, "nll_loss": 0.14081165194511414, "rewards/accuracies": 1.0, "rewards/chosen": -0.00013256346574053168, "rewards/margins": 0.25898098945617676, "rewards/rejected": -0.2591135501861572, "step": 8796 }, { "epoch": 6.0836791147994465, "grad_norm": 5.005368709564209, "learning_rate": 2.1757338251114186e-05, "log_odds_chosen": 10.6502685546875, "log_odds_ratio": -0.00034274725476279855, "logits/chosen": -0.7452360391616821, "logits/rejected": -0.7825686931610107, "logps/chosen": -0.0019323653541505337, "logps/rejected": -2.6518256664276123, "loss": 0.5366, "nll_loss": 0.13411401212215424, "rewards/accuracies": 1.0, "rewards/chosen": -0.00019323652668390423, "rewards/margins": 0.26498931646347046, "rewards/rejected": -0.2651825547218323, "step": 8797 }, { "epoch": 6.084370677731673, "grad_norm": 7.144959449768066, "learning_rate": 2.1753496234824035e-05, "log_odds_chosen": 9.655604362487793, "log_odds_ratio": -0.000444319739472121, "logits/chosen": -0.45785990357398987, "logits/rejected": -0.474082350730896, "logps/chosen": -0.00023596035316586494, "logps/rejected": -1.6099621057510376, "loss": 0.8545, "nll_loss": 0.2135741412639618, "rewards/accuracies": 1.0, "rewards/chosen": -2.3596036044182256e-05, "rewards/margins": 0.16097262501716614, "rewards/rejected": -0.160996213555336, "step": 8798 }, { "epoch": 6.0850622406639, "grad_norm": 6.444210529327393, "learning_rate": 2.1749654218533887e-05, "log_odds_chosen": 11.038360595703125, "log_odds_ratio": -2.1456851754919626e-05, "logits/chosen": -0.3064062297344208, "logits/rejected": -0.40747886896133423, "logps/chosen": -0.00010762877354864031, "logps/rejected": -1.856105089187622, "loss": 0.5418, "nll_loss": 0.13545329868793488, "rewards/accuracies": 1.0, "rewards/chosen": -1.0762878446257673e-05, "rewards/margins": 0.18559975922107697, "rewards/rejected": -0.18561053276062012, "step": 8799 }, { "epoch": 6.085753803596127, "grad_norm": 7.325039863586426, "learning_rate": 2.174581220224374e-05, "log_odds_chosen": 8.8878173828125, "log_odds_ratio": -0.0006561190239153802, "logits/chosen": -0.16303503513336182, "logits/rejected": -0.23362107574939728, "logps/chosen": -0.0007630744366906583, "logps/rejected": -1.7544726133346558, "loss": 0.6446, "nll_loss": 0.16107672452926636, "rewards/accuracies": 1.0, "rewards/chosen": -7.630744221387431e-05, "rewards/margins": 0.1753709614276886, "rewards/rejected": -0.1754472553730011, "step": 8800 }, { "epoch": 6.086445366528354, "grad_norm": 6.830489635467529, "learning_rate": 2.174197018595359e-05, "log_odds_chosen": 11.936038970947266, "log_odds_ratio": -3.0060902645345777e-05, "logits/chosen": -0.29758238792419434, "logits/rejected": -0.3780736029148102, "logps/chosen": -0.00016458773461636156, "logps/rejected": -2.9928178787231445, "loss": 0.6726, "nll_loss": 0.16814972460269928, "rewards/accuracies": 1.0, "rewards/chosen": -1.6458772734040394e-05, "rewards/margins": 0.2992653250694275, "rewards/rejected": -0.2992818057537079, "step": 8801 }, { "epoch": 6.087136929460581, "grad_norm": 7.6381049156188965, "learning_rate": 2.173812816966344e-05, "log_odds_chosen": 11.096879959106445, "log_odds_ratio": -5.57570529053919e-05, "logits/chosen": -0.19633033871650696, "logits/rejected": -0.25721418857574463, "logps/chosen": -0.00017020636005327106, "logps/rejected": -2.2261905670166016, "loss": 0.5965, "nll_loss": 0.1491204798221588, "rewards/accuracies": 1.0, "rewards/chosen": -1.7020636732922867e-05, "rewards/margins": 0.22260203957557678, "rewards/rejected": -0.22261905670166016, "step": 8802 }, { "epoch": 6.087828492392807, "grad_norm": 14.14664077758789, "learning_rate": 2.173428615337329e-05, "log_odds_chosen": 11.166595458984375, "log_odds_ratio": -0.00020692782709375024, "logits/chosen": -0.11803440749645233, "logits/rejected": -0.23896290361881256, "logps/chosen": -0.0009996149456128478, "logps/rejected": -2.9760303497314453, "loss": 0.7754, "nll_loss": 0.19382460415363312, "rewards/accuracies": 1.0, "rewards/chosen": -9.996150038205087e-05, "rewards/margins": 0.29750311374664307, "rewards/rejected": -0.2976030707359314, "step": 8803 }, { "epoch": 6.088520055325034, "grad_norm": 5.6617207527160645, "learning_rate": 2.1730444137083143e-05, "log_odds_chosen": 10.668686866760254, "log_odds_ratio": -8.660917228553444e-05, "logits/chosen": -0.5297747850418091, "logits/rejected": -0.49632179737091064, "logps/chosen": -0.0001935886830324307, "logps/rejected": -2.0004067420959473, "loss": 0.5958, "nll_loss": 0.14893387258052826, "rewards/accuracies": 1.0, "rewards/chosen": -1.9358869394636713e-05, "rewards/margins": 0.20002131164073944, "rewards/rejected": -0.20004066824913025, "step": 8804 }, { "epoch": 6.089211618257261, "grad_norm": 8.393978118896484, "learning_rate": 2.1726602120792995e-05, "log_odds_chosen": 9.278707504272461, "log_odds_ratio": -0.003602417418733239, "logits/chosen": -0.8583751916885376, "logits/rejected": -0.8760848045349121, "logps/chosen": -0.0023618321865797043, "logps/rejected": -1.3024115562438965, "loss": 0.5269, "nll_loss": 0.1313529908657074, "rewards/accuracies": 1.0, "rewards/chosen": -0.00023618324485141784, "rewards/margins": 0.13000498712062836, "rewards/rejected": -0.13024115562438965, "step": 8805 }, { "epoch": 6.089903181189488, "grad_norm": 6.89979887008667, "learning_rate": 2.1722760104502844e-05, "log_odds_chosen": 11.741314888000488, "log_odds_ratio": -2.223522460553795e-05, "logits/chosen": -0.4017873704433441, "logits/rejected": -0.43786537647247314, "logps/chosen": -0.00020560878328979015, "logps/rejected": -3.0209012031555176, "loss": 0.5789, "nll_loss": 0.14472922682762146, "rewards/accuracies": 1.0, "rewards/chosen": -2.0560877601383254e-05, "rewards/margins": 0.3020695447921753, "rewards/rejected": -0.3020901381969452, "step": 8806 }, { "epoch": 6.090594744121715, "grad_norm": 5.095583915710449, "learning_rate": 2.1718918088212693e-05, "log_odds_chosen": 11.15652847290039, "log_odds_ratio": -1.8469172573531978e-05, "logits/chosen": -0.8193786144256592, "logits/rejected": -0.7967087030410767, "logps/chosen": -7.29345265426673e-05, "logps/rejected": -1.5476975440979004, "loss": 0.3783, "nll_loss": 0.09456189721822739, "rewards/accuracies": 1.0, "rewards/chosen": -7.29345265426673e-06, "rewards/margins": 0.15476244688034058, "rewards/rejected": -0.15476974844932556, "step": 8807 }, { "epoch": 6.091286307053942, "grad_norm": 10.117960929870605, "learning_rate": 2.1715076071922546e-05, "log_odds_chosen": 11.16279411315918, "log_odds_ratio": -2.1533323888434097e-05, "logits/chosen": -0.6959891319274902, "logits/rejected": -0.7757497429847717, "logps/chosen": -0.00027285193209536374, "logps/rejected": -2.445639133453369, "loss": 0.7058, "nll_loss": 0.1764417141675949, "rewards/accuracies": 1.0, "rewards/chosen": -2.728519575612154e-05, "rewards/margins": 0.2445366233587265, "rewards/rejected": -0.24456390738487244, "step": 8808 }, { "epoch": 6.091977869986168, "grad_norm": 8.009075164794922, "learning_rate": 2.1711234055632395e-05, "log_odds_chosen": 10.137691497802734, "log_odds_ratio": -0.00022952201834414154, "logits/chosen": -0.6261448264122009, "logits/rejected": -0.5945342183113098, "logps/chosen": -0.0013284524902701378, "logps/rejected": -2.2858855724334717, "loss": 0.777, "nll_loss": 0.19423788785934448, "rewards/accuracies": 1.0, "rewards/chosen": -0.00013284524902701378, "rewards/margins": 0.22845572233200073, "rewards/rejected": -0.2285885512828827, "step": 8809 }, { "epoch": 6.092669432918395, "grad_norm": 6.640091419219971, "learning_rate": 2.1707392039342247e-05, "log_odds_chosen": 11.143184661865234, "log_odds_ratio": -2.3205455363495275e-05, "logits/chosen": -0.3292912244796753, "logits/rejected": -0.4862833619117737, "logps/chosen": -0.00011166653712280095, "logps/rejected": -1.7538467645645142, "loss": 0.472, "nll_loss": 0.11800672113895416, "rewards/accuracies": 1.0, "rewards/chosen": -1.1166652257088572e-05, "rewards/margins": 0.17537352442741394, "rewards/rejected": -0.17538467049598694, "step": 8810 }, { "epoch": 6.093360995850622, "grad_norm": 10.416664123535156, "learning_rate": 2.17035500230521e-05, "log_odds_chosen": 10.741606712341309, "log_odds_ratio": -0.0006154798902571201, "logits/chosen": -0.605117917060852, "logits/rejected": -0.5834625959396362, "logps/chosen": -0.0005302996141836047, "logps/rejected": -2.474724531173706, "loss": 0.4787, "nll_loss": 0.11962580680847168, "rewards/accuracies": 1.0, "rewards/chosen": -5.302996578393504e-05, "rewards/margins": 0.24741944670677185, "rewards/rejected": -0.24747246503829956, "step": 8811 }, { "epoch": 6.094052558782849, "grad_norm": 10.211325645446777, "learning_rate": 2.169970800676195e-05, "log_odds_chosen": 11.239428520202637, "log_odds_ratio": -9.101382602239028e-05, "logits/chosen": 0.054155007004737854, "logits/rejected": 0.031840741634368896, "logps/chosen": -0.0006743489066138864, "logps/rejected": -3.3643722534179688, "loss": 0.8705, "nll_loss": 0.21762073040008545, "rewards/accuracies": 1.0, "rewards/chosen": -6.743489211658016e-05, "rewards/margins": 0.3363697826862335, "rewards/rejected": -0.3364371955394745, "step": 8812 }, { "epoch": 6.094744121715076, "grad_norm": 6.392689228057861, "learning_rate": 2.16958659904718e-05, "log_odds_chosen": 9.836763381958008, "log_odds_ratio": -0.00022950033599045128, "logits/chosen": -0.05819656699895859, "logits/rejected": -0.19960781931877136, "logps/chosen": -0.0003322213888168335, "logps/rejected": -2.0401153564453125, "loss": 0.8009, "nll_loss": 0.20020034909248352, "rewards/accuracies": 1.0, "rewards/chosen": -3.322213888168335e-05, "rewards/margins": 0.20397831499576569, "rewards/rejected": -0.20401152968406677, "step": 8813 }, { "epoch": 6.095435684647303, "grad_norm": 3.7710330486297607, "learning_rate": 2.1692023974181654e-05, "log_odds_chosen": 9.871589660644531, "log_odds_ratio": -0.00019951784634031355, "logits/chosen": -0.00703035295009613, "logits/rejected": -0.11229774355888367, "logps/chosen": -0.0009015482501126826, "logps/rejected": -1.984623670578003, "loss": 0.796, "nll_loss": 0.19897326827049255, "rewards/accuracies": 1.0, "rewards/chosen": -9.01548337424174e-05, "rewards/margins": 0.19837221503257751, "rewards/rejected": -0.1984623670578003, "step": 8814 }, { "epoch": 6.096127247579529, "grad_norm": 5.55619478225708, "learning_rate": 2.1688181957891503e-05, "log_odds_chosen": 9.764547348022461, "log_odds_ratio": -0.00041928552673198283, "logits/chosen": -0.7127132415771484, "logits/rejected": -0.8955925107002258, "logps/chosen": -0.0008278897730633616, "logps/rejected": -1.8823670148849487, "loss": 0.9079, "nll_loss": 0.22692245244979858, "rewards/accuracies": 1.0, "rewards/chosen": -8.278897439595312e-05, "rewards/margins": 0.1881539225578308, "rewards/rejected": -0.18823671340942383, "step": 8815 }, { "epoch": 6.096818810511756, "grad_norm": 5.305004596710205, "learning_rate": 2.1684339941601352e-05, "log_odds_chosen": 10.194826126098633, "log_odds_ratio": -0.0031981952488422394, "logits/chosen": -0.43302011489868164, "logits/rejected": -0.4673667550086975, "logps/chosen": -0.0015352818882092834, "logps/rejected": -1.9649176597595215, "loss": 0.4772, "nll_loss": 0.11898045241832733, "rewards/accuracies": 1.0, "rewards/chosen": -0.0001535282062832266, "rewards/margins": 0.1963382363319397, "rewards/rejected": -0.19649174809455872, "step": 8816 }, { "epoch": 6.097510373443983, "grad_norm": 6.293792724609375, "learning_rate": 2.1680497925311204e-05, "log_odds_chosen": 11.738176345825195, "log_odds_ratio": -1.4964467482059263e-05, "logits/chosen": -0.21480712294578552, "logits/rejected": -0.2954432964324951, "logps/chosen": -0.00013515262980945408, "logps/rejected": -2.606994390487671, "loss": 0.5252, "nll_loss": 0.13129648566246033, "rewards/accuracies": 1.0, "rewards/chosen": -1.3515262253349647e-05, "rewards/margins": 0.26068592071533203, "rewards/rejected": -0.26069945096969604, "step": 8817 }, { "epoch": 6.09820193637621, "grad_norm": 6.480541706085205, "learning_rate": 2.1676655909021053e-05, "log_odds_chosen": 9.735299110412598, "log_odds_ratio": -0.0001797816512407735, "logits/chosen": -0.051631614565849304, "logits/rejected": -0.10556840896606445, "logps/chosen": -0.0004435731389094144, "logps/rejected": -1.998798131942749, "loss": 0.7467, "nll_loss": 0.18666529655456543, "rewards/accuracies": 1.0, "rewards/chosen": -4.435731170815416e-05, "rewards/margins": 0.19983544945716858, "rewards/rejected": -0.19987979531288147, "step": 8818 }, { "epoch": 6.098893499308437, "grad_norm": 4.939790725708008, "learning_rate": 2.1672813892730906e-05, "log_odds_chosen": 9.56667423248291, "log_odds_ratio": -0.00015915413678158075, "logits/chosen": -0.09113126248121262, "logits/rejected": -0.1315183788537979, "logps/chosen": -0.00048493000213056803, "logps/rejected": -1.30069100856781, "loss": 0.6208, "nll_loss": 0.15519554913043976, "rewards/accuracies": 1.0, "rewards/chosen": -4.849300239584409e-05, "rewards/margins": 0.1300206184387207, "rewards/rejected": -0.13006910681724548, "step": 8819 }, { "epoch": 6.0995850622406635, "grad_norm": 6.470208168029785, "learning_rate": 2.1668971876440758e-05, "log_odds_chosen": 11.104886054992676, "log_odds_ratio": -7.311101944651455e-05, "logits/chosen": -0.5827865600585938, "logits/rejected": -0.6194196343421936, "logps/chosen": -0.0005881582037545741, "logps/rejected": -2.9203414916992188, "loss": 1.1226, "nll_loss": 0.2806398868560791, "rewards/accuracies": 1.0, "rewards/chosen": -5.881581819267012e-05, "rewards/margins": 0.29197531938552856, "rewards/rejected": -0.2920341491699219, "step": 8820 }, { "epoch": 6.10027662517289, "grad_norm": 5.361146450042725, "learning_rate": 2.1665129860150607e-05, "log_odds_chosen": 11.236394882202148, "log_odds_ratio": -0.00015473456005565822, "logits/chosen": -0.7091895341873169, "logits/rejected": -0.7002542018890381, "logps/chosen": -0.000265885260887444, "logps/rejected": -2.7120513916015625, "loss": 0.5708, "nll_loss": 0.1426815241575241, "rewards/accuracies": 1.0, "rewards/chosen": -2.658852463355288e-05, "rewards/margins": 0.27117854356765747, "rewards/rejected": -0.2712051272392273, "step": 8821 }, { "epoch": 6.100968188105117, "grad_norm": 6.503119945526123, "learning_rate": 2.166128784386046e-05, "log_odds_chosen": 11.223774909973145, "log_odds_ratio": -8.994392555905506e-05, "logits/chosen": -0.3564401865005493, "logits/rejected": -0.36347508430480957, "logps/chosen": -0.0007731412770226598, "logps/rejected": -3.339472770690918, "loss": 0.7328, "nll_loss": 0.18319301307201385, "rewards/accuracies": 1.0, "rewards/chosen": -7.731413643341511e-05, "rewards/margins": 0.33386993408203125, "rewards/rejected": -0.3339473009109497, "step": 8822 }, { "epoch": 6.101659751037344, "grad_norm": 7.759904861450195, "learning_rate": 2.1657445827570312e-05, "log_odds_chosen": 9.283439636230469, "log_odds_ratio": -0.000780658156145364, "logits/chosen": -0.0106724314391613, "logits/rejected": 0.050531066954135895, "logps/chosen": -0.0009825469460338354, "logps/rejected": -1.320664882659912, "loss": 0.8748, "nll_loss": 0.2186199277639389, "rewards/accuracies": 1.0, "rewards/chosen": -9.82546916930005e-05, "rewards/margins": 0.13196823000907898, "rewards/rejected": -0.1320664882659912, "step": 8823 }, { "epoch": 6.102351313969571, "grad_norm": 6.8154988288879395, "learning_rate": 2.165360381128016e-05, "log_odds_chosen": 10.798100471496582, "log_odds_ratio": -3.55045085598249e-05, "logits/chosen": -0.27733373641967773, "logits/rejected": -0.40890154242515564, "logps/chosen": -0.00019498051551636308, "logps/rejected": -2.014132022857666, "loss": 0.5225, "nll_loss": 0.13062188029289246, "rewards/accuracies": 1.0, "rewards/chosen": -1.949805300682783e-05, "rewards/margins": 0.201393723487854, "rewards/rejected": -0.20141321420669556, "step": 8824 }, { "epoch": 6.103042876901798, "grad_norm": 9.223984718322754, "learning_rate": 2.164976179499001e-05, "log_odds_chosen": 11.027979850769043, "log_odds_ratio": -0.0001737778220558539, "logits/chosen": -0.4153405427932739, "logits/rejected": -0.494740754365921, "logps/chosen": -0.000494759005960077, "logps/rejected": -3.088369369506836, "loss": 0.5605, "nll_loss": 0.14010044932365417, "rewards/accuracies": 1.0, "rewards/chosen": -4.9475900596007705e-05, "rewards/margins": 0.30878746509552, "rewards/rejected": -0.308836966753006, "step": 8825 }, { "epoch": 6.1037344398340245, "grad_norm": 5.566494464874268, "learning_rate": 2.1645919778699863e-05, "log_odds_chosen": 9.569103240966797, "log_odds_ratio": -0.0007949695573188365, "logits/chosen": -0.2678110599517822, "logits/rejected": -0.33686572313308716, "logps/chosen": -0.001850008382461965, "logps/rejected": -1.621332049369812, "loss": 0.5076, "nll_loss": 0.1268259584903717, "rewards/accuracies": 1.0, "rewards/chosen": -0.00018500084115657955, "rewards/margins": 0.16194820404052734, "rewards/rejected": -0.16213320195674896, "step": 8826 }, { "epoch": 6.104426002766251, "grad_norm": 7.433922290802002, "learning_rate": 2.1642077762409712e-05, "log_odds_chosen": 11.172834396362305, "log_odds_ratio": -5.931320629315451e-05, "logits/chosen": -0.10432031005620956, "logits/rejected": -0.16316169500350952, "logps/chosen": -0.00039796033524908125, "logps/rejected": -2.6650447845458984, "loss": 0.7373, "nll_loss": 0.18432670831680298, "rewards/accuracies": 1.0, "rewards/chosen": -3.97960320697166e-05, "rewards/margins": 0.2664646804332733, "rewards/rejected": -0.2665044963359833, "step": 8827 }, { "epoch": 6.105117565698478, "grad_norm": 12.175658226013184, "learning_rate": 2.1638235746119564e-05, "log_odds_chosen": 11.484321594238281, "log_odds_ratio": -3.6780231312150136e-05, "logits/chosen": -0.14946818351745605, "logits/rejected": -0.25240346789360046, "logps/chosen": -0.0003283719124738127, "logps/rejected": -3.119140386581421, "loss": 0.6463, "nll_loss": 0.1615663468837738, "rewards/accuracies": 1.0, "rewards/chosen": -3.283718979218975e-05, "rewards/margins": 0.3118812143802643, "rewards/rejected": -0.3119140565395355, "step": 8828 }, { "epoch": 6.105809128630705, "grad_norm": 6.868575096130371, "learning_rate": 2.1634393729829417e-05, "log_odds_chosen": 10.50800609588623, "log_odds_ratio": -0.0007316919509321451, "logits/chosen": 0.09018150717020035, "logits/rejected": -0.14269495010375977, "logps/chosen": -0.0008097183890640736, "logps/rejected": -1.760002851486206, "loss": 0.6243, "nll_loss": 0.15601056814193726, "rewards/accuracies": 1.0, "rewards/chosen": -8.097184036159888e-05, "rewards/margins": 0.1759193241596222, "rewards/rejected": -0.17600028216838837, "step": 8829 }, { "epoch": 6.106500691562932, "grad_norm": 3.782043933868408, "learning_rate": 2.1630551713539266e-05, "log_odds_chosen": 10.608339309692383, "log_odds_ratio": -8.497351518599316e-05, "logits/chosen": -0.2754320502281189, "logits/rejected": -0.29661867022514343, "logps/chosen": -0.00019873691780958325, "logps/rejected": -2.0929758548736572, "loss": 0.4986, "nll_loss": 0.12464545667171478, "rewards/accuracies": 1.0, "rewards/chosen": -1.9873692508554086e-05, "rewards/margins": 0.2092777043581009, "rewards/rejected": -0.20929758250713348, "step": 8830 }, { "epoch": 6.107192254495159, "grad_norm": 5.824345588684082, "learning_rate": 2.1626709697249118e-05, "log_odds_chosen": 10.269322395324707, "log_odds_ratio": -6.437697447836399e-05, "logits/chosen": -0.6419390439987183, "logits/rejected": -0.6782790422439575, "logps/chosen": -0.0003181939828209579, "logps/rejected": -1.8846712112426758, "loss": 0.6367, "nll_loss": 0.1591646373271942, "rewards/accuracies": 1.0, "rewards/chosen": -3.181939973728731e-05, "rewards/margins": 0.18843530118465424, "rewards/rejected": -0.1884671151638031, "step": 8831 }, { "epoch": 6.1078838174273855, "grad_norm": 5.414685249328613, "learning_rate": 2.162286768095897e-05, "log_odds_chosen": 10.8403959274292, "log_odds_ratio": -2.654375566635281e-05, "logits/chosen": -0.5415429472923279, "logits/rejected": -0.5043303966522217, "logps/chosen": -0.0001397305604768917, "logps/rejected": -1.906392216682434, "loss": 0.465, "nll_loss": 0.11623533070087433, "rewards/accuracies": 1.0, "rewards/chosen": -1.397305641148705e-05, "rewards/margins": 0.19062525033950806, "rewards/rejected": -0.19063922762870789, "step": 8832 }, { "epoch": 6.108575380359612, "grad_norm": 9.278594970703125, "learning_rate": 2.161902566466882e-05, "log_odds_chosen": 11.402810096740723, "log_odds_ratio": -1.8366235963185318e-05, "logits/chosen": -0.18955518305301666, "logits/rejected": -0.2610347867012024, "logps/chosen": -0.00014559032570105046, "logps/rejected": -2.5812301635742188, "loss": 0.6596, "nll_loss": 0.1648978888988495, "rewards/accuracies": 1.0, "rewards/chosen": -1.4559032933902927e-05, "rewards/margins": 0.2581084668636322, "rewards/rejected": -0.2581230401992798, "step": 8833 }, { "epoch": 6.109266943291839, "grad_norm": 4.596074104309082, "learning_rate": 2.161518364837867e-05, "log_odds_chosen": 11.463423728942871, "log_odds_ratio": -3.30315378960222e-05, "logits/chosen": -0.4086986780166626, "logits/rejected": -0.449398934841156, "logps/chosen": -0.00018414505757391453, "logps/rejected": -2.8123068809509277, "loss": 0.3211, "nll_loss": 0.08027203381061554, "rewards/accuracies": 1.0, "rewards/chosen": -1.841450466599781e-05, "rewards/margins": 0.2812122702598572, "rewards/rejected": -0.2812306880950928, "step": 8834 }, { "epoch": 6.109958506224066, "grad_norm": 4.561694622039795, "learning_rate": 2.161134163208852e-05, "log_odds_chosen": 10.875974655151367, "log_odds_ratio": -6.157255120342597e-05, "logits/chosen": -0.3669860363006592, "logits/rejected": -0.43035003542900085, "logps/chosen": -0.0002957833930850029, "logps/rejected": -2.4708425998687744, "loss": 0.4244, "nll_loss": 0.10609880834817886, "rewards/accuracies": 1.0, "rewards/chosen": -2.957833930850029e-05, "rewards/margins": 0.24705468118190765, "rewards/rejected": -0.24708425998687744, "step": 8835 }, { "epoch": 6.110650069156293, "grad_norm": 5.778451442718506, "learning_rate": 2.160749961579837e-05, "log_odds_chosen": 11.576051712036133, "log_odds_ratio": -6.129697430878878e-05, "logits/chosen": -0.8564388751983643, "logits/rejected": -0.9177031517028809, "logps/chosen": -0.0002084274310618639, "logps/rejected": -2.860732078552246, "loss": 0.8458, "nll_loss": 0.21144109964370728, "rewards/accuracies": 1.0, "rewards/chosen": -2.084274274238851e-05, "rewards/margins": 0.2860523760318756, "rewards/rejected": -0.286073237657547, "step": 8836 }, { "epoch": 6.11134163208852, "grad_norm": 4.546443939208984, "learning_rate": 2.1603657599508223e-05, "log_odds_chosen": 11.850934982299805, "log_odds_ratio": -3.854755777865648e-05, "logits/chosen": -0.09234938025474548, "logits/rejected": -0.14182928204536438, "logps/chosen": -0.0004462672513909638, "logps/rejected": -2.16272234916687, "loss": 0.4516, "nll_loss": 0.11288943886756897, "rewards/accuracies": 1.0, "rewards/chosen": -4.462672950467095e-05, "rewards/margins": 0.21622759103775024, "rewards/rejected": -0.21627222001552582, "step": 8837 }, { "epoch": 6.1120331950207465, "grad_norm": 11.113327026367188, "learning_rate": 2.1599815583218075e-05, "log_odds_chosen": 9.901022911071777, "log_odds_ratio": -0.024493100121617317, "logits/chosen": -0.29056257009506226, "logits/rejected": -0.41285592317581177, "logps/chosen": -0.008077163249254227, "logps/rejected": -2.225210428237915, "loss": 0.97, "nll_loss": 0.24004806578159332, "rewards/accuracies": 1.0, "rewards/chosen": -0.0008077163947746158, "rewards/margins": 0.2217133492231369, "rewards/rejected": -0.22252105176448822, "step": 8838 }, { "epoch": 6.112724757952973, "grad_norm": 9.41685962677002, "learning_rate": 2.1595973566927924e-05, "log_odds_chosen": 11.145782470703125, "log_odds_ratio": -3.246070264140144e-05, "logits/chosen": -0.5988461971282959, "logits/rejected": -0.5849972367286682, "logps/chosen": -0.0006508535007014871, "logps/rejected": -2.710702419281006, "loss": 0.6633, "nll_loss": 0.1658124029636383, "rewards/accuracies": 1.0, "rewards/chosen": -6.508534715976566e-05, "rewards/margins": 0.27100518345832825, "rewards/rejected": -0.271070271730423, "step": 8839 }, { "epoch": 6.1134163208852, "grad_norm": 5.831615924835205, "learning_rate": 2.1592131550637777e-05, "log_odds_chosen": 11.677581787109375, "log_odds_ratio": -2.2064965378376655e-05, "logits/chosen": -0.07563771307468414, "logits/rejected": -0.1285375952720642, "logps/chosen": -0.000272395700449124, "logps/rejected": -3.220499277114868, "loss": 0.7135, "nll_loss": 0.17837318778038025, "rewards/accuracies": 1.0, "rewards/chosen": -2.7239570044912398e-05, "rewards/margins": 0.3220227062702179, "rewards/rejected": -0.32204994559288025, "step": 8840 }, { "epoch": 6.114107883817427, "grad_norm": 6.619877338409424, "learning_rate": 2.158828953434763e-05, "log_odds_chosen": 11.94092845916748, "log_odds_ratio": -1.0732926057244185e-05, "logits/chosen": -0.5719197392463684, "logits/rejected": -0.581974983215332, "logps/chosen": -8.917129889596254e-05, "logps/rejected": -2.4263858795166016, "loss": 0.4402, "nll_loss": 0.11005041003227234, "rewards/accuracies": 1.0, "rewards/chosen": -8.917129889596254e-06, "rewards/margins": 0.24262967705726624, "rewards/rejected": -0.24263860285282135, "step": 8841 }, { "epoch": 6.114799446749654, "grad_norm": 5.583203315734863, "learning_rate": 2.1584447518057478e-05, "log_odds_chosen": 9.578554153442383, "log_odds_ratio": -0.0002196382120018825, "logits/chosen": -0.5063522458076477, "logits/rejected": -0.6370638608932495, "logps/chosen": -0.0006920626619830728, "logps/rejected": -1.7157485485076904, "loss": 0.7513, "nll_loss": 0.18780651688575745, "rewards/accuracies": 1.0, "rewards/chosen": -6.920627492945641e-05, "rewards/margins": 0.1715056449174881, "rewards/rejected": -0.17157486081123352, "step": 8842 }, { "epoch": 6.115491009681881, "grad_norm": 7.913309097290039, "learning_rate": 2.1580605501767327e-05, "log_odds_chosen": 11.05504035949707, "log_odds_ratio": -4.723056190414354e-05, "logits/chosen": -0.6872215270996094, "logits/rejected": -0.7240106463432312, "logps/chosen": -0.00019065033120568842, "logps/rejected": -2.20306658744812, "loss": 0.5783, "nll_loss": 0.1445586234331131, "rewards/accuracies": 1.0, "rewards/chosen": -1.9065033484366722e-05, "rewards/margins": 0.22028759121894836, "rewards/rejected": -0.2203066647052765, "step": 8843 }, { "epoch": 6.1161825726141075, "grad_norm": 4.464817523956299, "learning_rate": 2.157676348547718e-05, "log_odds_chosen": 10.496665954589844, "log_odds_ratio": -6.723314436385408e-05, "logits/chosen": -0.6100011467933655, "logits/rejected": -0.5485386252403259, "logps/chosen": -0.0005118567496538162, "logps/rejected": -1.8350151777267456, "loss": 0.6321, "nll_loss": 0.15802812576293945, "rewards/accuracies": 1.0, "rewards/chosen": -5.118567787576467e-05, "rewards/margins": 0.1834503412246704, "rewards/rejected": -0.18350151181221008, "step": 8844 }, { "epoch": 6.116874135546334, "grad_norm": 7.142628192901611, "learning_rate": 2.157292146918703e-05, "log_odds_chosen": 10.981527328491211, "log_odds_ratio": -0.00034269201569259167, "logits/chosen": -0.34676387906074524, "logits/rejected": -0.35570889711380005, "logps/chosen": -0.0019361183512955904, "logps/rejected": -2.876324415206909, "loss": 0.6613, "nll_loss": 0.1652963012456894, "rewards/accuracies": 1.0, "rewards/chosen": -0.00019361183512955904, "rewards/margins": 0.2874388098716736, "rewards/rejected": -0.28763246536254883, "step": 8845 }, { "epoch": 6.117565698478561, "grad_norm": 4.98019552230835, "learning_rate": 2.156907945289688e-05, "log_odds_chosen": 10.531964302062988, "log_odds_ratio": -4.4222902943147346e-05, "logits/chosen": -0.2760690450668335, "logits/rejected": -0.3222053349018097, "logps/chosen": -0.00015717542555648834, "logps/rejected": -1.7313332557678223, "loss": 0.5804, "nll_loss": 0.1450836956501007, "rewards/accuracies": 1.0, "rewards/chosen": -1.5717543647042476e-05, "rewards/margins": 0.17311760783195496, "rewards/rejected": -0.17313331365585327, "step": 8846 }, { "epoch": 6.118257261410788, "grad_norm": 7.392937660217285, "learning_rate": 2.1565237436606734e-05, "log_odds_chosen": 9.921597480773926, "log_odds_ratio": -0.00011697213631123304, "logits/chosen": -0.24065059423446655, "logits/rejected": -0.28834766149520874, "logps/chosen": -0.0005189216462895274, "logps/rejected": -1.8165524005889893, "loss": 0.5459, "nll_loss": 0.1364704966545105, "rewards/accuracies": 1.0, "rewards/chosen": -5.1892166084144264e-05, "rewards/margins": 0.1816033571958542, "rewards/rejected": -0.18165524303913116, "step": 8847 }, { "epoch": 6.118948824343015, "grad_norm": 11.480717658996582, "learning_rate": 2.1561395420316583e-05, "log_odds_chosen": 10.543256759643555, "log_odds_ratio": -0.00019431406690273434, "logits/chosen": 0.16179290413856506, "logits/rejected": 0.02488519996404648, "logps/chosen": -0.0006095264106988907, "logps/rejected": -3.026254177093506, "loss": 0.6545, "nll_loss": 0.1636168211698532, "rewards/accuracies": 1.0, "rewards/chosen": -6.0952639614697546e-05, "rewards/margins": 0.30256450176239014, "rewards/rejected": -0.302625447511673, "step": 8848 }, { "epoch": 6.119640387275242, "grad_norm": 6.340550422668457, "learning_rate": 2.1557553404026435e-05, "log_odds_chosen": 10.822004318237305, "log_odds_ratio": -5.153457459527999e-05, "logits/chosen": -0.5803370475769043, "logits/rejected": -0.571527898311615, "logps/chosen": -0.0002886800211854279, "logps/rejected": -2.354701519012451, "loss": 0.5035, "nll_loss": 0.1258649379014969, "rewards/accuracies": 1.0, "rewards/chosen": -2.8868005756521598e-05, "rewards/margins": 0.23544129729270935, "rewards/rejected": -0.23547017574310303, "step": 8849 }, { "epoch": 6.1203319502074685, "grad_norm": 7.232970714569092, "learning_rate": 2.1553711387736288e-05, "log_odds_chosen": 9.102911949157715, "log_odds_ratio": -0.00019699697440955788, "logits/chosen": -0.34530875086784363, "logits/rejected": -0.4146209955215454, "logps/chosen": -0.0025119970086961985, "logps/rejected": -1.6312192678451538, "loss": 0.561, "nll_loss": 0.14022156596183777, "rewards/accuracies": 1.0, "rewards/chosen": -0.00025119970086961985, "rewards/margins": 0.16287073493003845, "rewards/rejected": -0.16312193870544434, "step": 8850 }, { "epoch": 6.121023513139695, "grad_norm": 7.151753902435303, "learning_rate": 2.1549869371446137e-05, "log_odds_chosen": 10.662192344665527, "log_odds_ratio": -0.00024196658341679722, "logits/chosen": -0.7194679379463196, "logits/rejected": -0.7703452110290527, "logps/chosen": -0.00021683350496459752, "logps/rejected": -1.6515040397644043, "loss": 0.508, "nll_loss": 0.12698546051979065, "rewards/accuracies": 1.0, "rewards/chosen": -2.1683352315449156e-05, "rewards/margins": 0.1651287078857422, "rewards/rejected": -0.16515041887760162, "step": 8851 }, { "epoch": 6.121715076071922, "grad_norm": 6.731082916259766, "learning_rate": 2.1546027355155986e-05, "log_odds_chosen": 10.459026336669922, "log_odds_ratio": -5.2345916628837585e-05, "logits/chosen": -0.4643927812576294, "logits/rejected": -0.45928698778152466, "logps/chosen": -0.00017969627515412867, "logps/rejected": -1.7573070526123047, "loss": 0.4461, "nll_loss": 0.11151199042797089, "rewards/accuracies": 1.0, "rewards/chosen": -1.796962897060439e-05, "rewards/margins": 0.17571274936199188, "rewards/rejected": -0.17573070526123047, "step": 8852 }, { "epoch": 6.122406639004149, "grad_norm": 9.097981452941895, "learning_rate": 2.1542185338865838e-05, "log_odds_chosen": 11.558393478393555, "log_odds_ratio": -2.5802919481066056e-05, "logits/chosen": -0.31933721899986267, "logits/rejected": -0.3849840462207794, "logps/chosen": -0.00011497936066007242, "logps/rejected": -2.0980782508850098, "loss": 0.5145, "nll_loss": 0.12861578166484833, "rewards/accuracies": 1.0, "rewards/chosen": -1.1497935702209361e-05, "rewards/margins": 0.20979633927345276, "rewards/rejected": -0.2098078429698944, "step": 8853 }, { "epoch": 6.123098201936376, "grad_norm": 5.605348110198975, "learning_rate": 2.1538343322575687e-05, "log_odds_chosen": 10.877555847167969, "log_odds_ratio": -3.869025385938585e-05, "logits/chosen": -0.3823351562023163, "logits/rejected": -0.5414446592330933, "logps/chosen": -0.00019438430899754167, "logps/rejected": -2.1862130165100098, "loss": 0.6517, "nll_loss": 0.16291458904743195, "rewards/accuracies": 1.0, "rewards/chosen": -1.9438430172158405e-05, "rewards/margins": 0.21860186755657196, "rewards/rejected": -0.21862132847309113, "step": 8854 }, { "epoch": 6.123789764868603, "grad_norm": 4.647186756134033, "learning_rate": 2.153450130628554e-05, "log_odds_chosen": 9.800394058227539, "log_odds_ratio": -0.0005086607998237014, "logits/chosen": -0.21515649557113647, "logits/rejected": -0.26546669006347656, "logps/chosen": -0.0007469278643839061, "logps/rejected": -1.615663766860962, "loss": 0.4185, "nll_loss": 0.10456429421901703, "rewards/accuracies": 1.0, "rewards/chosen": -7.469279080396518e-05, "rewards/margins": 0.16149169206619263, "rewards/rejected": -0.16156639158725739, "step": 8855 }, { "epoch": 6.124481327800829, "grad_norm": 4.1204915046691895, "learning_rate": 2.1530659289995392e-05, "log_odds_chosen": 10.329703330993652, "log_odds_ratio": -0.00016737988335080445, "logits/chosen": -0.47069692611694336, "logits/rejected": -0.48261600732803345, "logps/chosen": -0.0007168280426412821, "logps/rejected": -2.0946145057678223, "loss": 0.5077, "nll_loss": 0.12690681219100952, "rewards/accuracies": 1.0, "rewards/chosen": -7.168280717451125e-05, "rewards/margins": 0.209389790892601, "rewards/rejected": -0.20946148037910461, "step": 8856 }, { "epoch": 6.125172890733056, "grad_norm": 9.45824146270752, "learning_rate": 2.152681727370524e-05, "log_odds_chosen": 11.265548706054688, "log_odds_ratio": -2.638051228132099e-05, "logits/chosen": -0.44578057527542114, "logits/rejected": -0.5971443057060242, "logps/chosen": -0.0002542410511523485, "logps/rejected": -2.679877281188965, "loss": 1.5033, "nll_loss": 0.3758128881454468, "rewards/accuracies": 1.0, "rewards/chosen": -2.5424105842830613e-05, "rewards/margins": 0.2679622769355774, "rewards/rejected": -0.2679877281188965, "step": 8857 }, { "epoch": 6.125864453665283, "grad_norm": 11.375960350036621, "learning_rate": 2.1522975257415094e-05, "log_odds_chosen": 10.551385879516602, "log_odds_ratio": -0.006846227683126926, "logits/chosen": -0.25975847244262695, "logits/rejected": -0.37072598934173584, "logps/chosen": -0.04536538943648338, "logps/rejected": -2.7914230823516846, "loss": 0.6618, "nll_loss": 0.16475853323936462, "rewards/accuracies": 1.0, "rewards/chosen": -0.004536538850516081, "rewards/margins": 0.27460581064224243, "rewards/rejected": -0.2791423499584198, "step": 8858 }, { "epoch": 6.12655601659751, "grad_norm": 6.898074150085449, "learning_rate": 2.1519133241124946e-05, "log_odds_chosen": 11.68839168548584, "log_odds_ratio": -2.754458910203539e-05, "logits/chosen": -0.7259312868118286, "logits/rejected": -0.8036313056945801, "logps/chosen": -0.00017395266331732273, "logps/rejected": -2.473479747772217, "loss": 0.7248, "nll_loss": 0.18118774890899658, "rewards/accuracies": 1.0, "rewards/chosen": -1.7395266695530154e-05, "rewards/margins": 0.24733059108257294, "rewards/rejected": -0.24734798073768616, "step": 8859 }, { "epoch": 6.127247579529737, "grad_norm": 7.808147430419922, "learning_rate": 2.1515291224834795e-05, "log_odds_chosen": 9.726302146911621, "log_odds_ratio": -0.00056614656932652, "logits/chosen": -0.267952024936676, "logits/rejected": -0.21720397472381592, "logps/chosen": -0.00065661157714203, "logps/rejected": -1.7407177686691284, "loss": 0.8449, "nll_loss": 0.21116022765636444, "rewards/accuracies": 1.0, "rewards/chosen": -6.566115189343691e-05, "rewards/margins": 0.1740061342716217, "rewards/rejected": -0.1740717738866806, "step": 8860 }, { "epoch": 6.127939142461964, "grad_norm": 6.377797603607178, "learning_rate": 2.1511449208544644e-05, "log_odds_chosen": 10.847086906433105, "log_odds_ratio": -0.0001203061401611194, "logits/chosen": -0.6302061676979065, "logits/rejected": -0.6396503448486328, "logps/chosen": -0.00020166633476037532, "logps/rejected": -1.7068842649459839, "loss": 0.6304, "nll_loss": 0.1575794219970703, "rewards/accuracies": 1.0, "rewards/chosen": -2.016663165704813e-05, "rewards/margins": 0.17066825926303864, "rewards/rejected": -0.1706884205341339, "step": 8861 }, { "epoch": 6.12863070539419, "grad_norm": 9.449673652648926, "learning_rate": 2.1507607192254497e-05, "log_odds_chosen": 10.45321273803711, "log_odds_ratio": -5.564562525250949e-05, "logits/chosen": -0.11017411947250366, "logits/rejected": -0.21915608644485474, "logps/chosen": -0.0007298594573512673, "logps/rejected": -2.18619966506958, "loss": 0.4913, "nll_loss": 0.12282784283161163, "rewards/accuracies": 1.0, "rewards/chosen": -7.298595301108435e-05, "rewards/margins": 0.21854698657989502, "rewards/rejected": -0.21861997246742249, "step": 8862 }, { "epoch": 6.129322268326418, "grad_norm": 6.794439315795898, "learning_rate": 2.1503765175964346e-05, "log_odds_chosen": 9.2752103805542, "log_odds_ratio": -0.007447497453540564, "logits/chosen": -0.42011135816574097, "logits/rejected": -0.4615238606929779, "logps/chosen": -0.003412168473005295, "logps/rejected": -1.340742826461792, "loss": 0.5112, "nll_loss": 0.127059668302536, "rewards/accuracies": 1.0, "rewards/chosen": -0.00034121685894206166, "rewards/margins": 0.13373306393623352, "rewards/rejected": -0.13407427072525024, "step": 8863 }, { "epoch": 6.130013831258645, "grad_norm": 6.756525039672852, "learning_rate": 2.1499923159674198e-05, "log_odds_chosen": 10.04014778137207, "log_odds_ratio": -0.0001670851925155148, "logits/chosen": -0.8022230863571167, "logits/rejected": -0.8228714466094971, "logps/chosen": -0.0008743289508856833, "logps/rejected": -1.7404072284698486, "loss": 0.6535, "nll_loss": 0.16336099803447723, "rewards/accuracies": 1.0, "rewards/chosen": -8.743289799895138e-05, "rewards/margins": 0.17395329475402832, "rewards/rejected": -0.17404071986675262, "step": 8864 }, { "epoch": 6.130705394190872, "grad_norm": 4.477063179016113, "learning_rate": 2.149608114338405e-05, "log_odds_chosen": 10.5889253616333, "log_odds_ratio": -0.00010871638369280845, "logits/chosen": -0.7607325315475464, "logits/rejected": -0.8645247220993042, "logps/chosen": -0.00038288458017632365, "logps/rejected": -2.266031265258789, "loss": 0.5362, "nll_loss": 0.1340354084968567, "rewards/accuracies": 1.0, "rewards/chosen": -3.828846092801541e-05, "rewards/margins": 0.22656482458114624, "rewards/rejected": -0.22660312056541443, "step": 8865 }, { "epoch": 6.131396957123099, "grad_norm": 6.994044303894043, "learning_rate": 2.14922391270939e-05, "log_odds_chosen": 10.54684829711914, "log_odds_ratio": -5.5355423683067784e-05, "logits/chosen": -0.7427743673324585, "logits/rejected": -0.6657612919807434, "logps/chosen": -0.00029647996416315436, "logps/rejected": -1.9989583492279053, "loss": 0.5749, "nll_loss": 0.1437259316444397, "rewards/accuracies": 1.0, "rewards/chosen": -2.964799750770908e-05, "rewards/margins": 0.1998661756515503, "rewards/rejected": -0.19989581406116486, "step": 8866 }, { "epoch": 6.1320885200553255, "grad_norm": 6.7296671867370605, "learning_rate": 2.1488397110803752e-05, "log_odds_chosen": 10.42095947265625, "log_odds_ratio": -8.404959953622892e-05, "logits/chosen": -0.23315714299678802, "logits/rejected": -0.3014877736568451, "logps/chosen": -0.0001869620755314827, "logps/rejected": -1.8798887729644775, "loss": 0.6162, "nll_loss": 0.15404073894023895, "rewards/accuracies": 1.0, "rewards/chosen": -1.8696206097956747e-05, "rewards/margins": 0.18797020614147186, "rewards/rejected": -0.18798887729644775, "step": 8867 }, { "epoch": 6.132780082987552, "grad_norm": 5.581561088562012, "learning_rate": 2.1484555094513604e-05, "log_odds_chosen": 10.633293151855469, "log_odds_ratio": -0.0003565740189515054, "logits/chosen": -0.2947113513946533, "logits/rejected": -0.3295404613018036, "logps/chosen": -0.0009969068923965096, "logps/rejected": -2.78654146194458, "loss": 0.6332, "nll_loss": 0.15827523171901703, "rewards/accuracies": 1.0, "rewards/chosen": -9.969068923965096e-05, "rewards/margins": 0.2785544693470001, "rewards/rejected": -0.2786541283130646, "step": 8868 }, { "epoch": 6.133471645919779, "grad_norm": 9.132095336914062, "learning_rate": 2.1480713078223453e-05, "log_odds_chosen": 10.777402877807617, "log_odds_ratio": -6.423494778573513e-05, "logits/chosen": -0.22323840856552124, "logits/rejected": -0.3039165139198303, "logps/chosen": -0.0002668887027539313, "logps/rejected": -2.220047950744629, "loss": 0.5278, "nll_loss": 0.13193866610527039, "rewards/accuracies": 1.0, "rewards/chosen": -2.668887100298889e-05, "rewards/margins": 0.2219781130552292, "rewards/rejected": -0.22200478613376617, "step": 8869 }, { "epoch": 6.134163208852006, "grad_norm": 7.941714763641357, "learning_rate": 2.1476871061933303e-05, "log_odds_chosen": 10.74931526184082, "log_odds_ratio": -0.00013049867993686348, "logits/chosen": -0.3583360016345978, "logits/rejected": -0.34637540578842163, "logps/chosen": -0.0002213473489973694, "logps/rejected": -2.156782627105713, "loss": 0.5839, "nll_loss": 0.1459662914276123, "rewards/accuracies": 1.0, "rewards/chosen": -2.213473453593906e-05, "rewards/margins": 0.2156561315059662, "rewards/rejected": -0.21567825973033905, "step": 8870 }, { "epoch": 6.134854771784233, "grad_norm": 5.933559894561768, "learning_rate": 2.147302904564315e-05, "log_odds_chosen": 10.360872268676758, "log_odds_ratio": -3.608822953538038e-05, "logits/chosen": -0.39610740542411804, "logits/rejected": -0.4380618929862976, "logps/chosen": -0.00028375934925861657, "logps/rejected": -1.6616911888122559, "loss": 0.4978, "nll_loss": 0.124452605843544, "rewards/accuracies": 1.0, "rewards/chosen": -2.837593638105318e-05, "rewards/margins": 0.16614075005054474, "rewards/rejected": -0.16616912186145782, "step": 8871 }, { "epoch": 6.13554633471646, "grad_norm": 7.322857856750488, "learning_rate": 2.1469187029353004e-05, "log_odds_chosen": 9.55153751373291, "log_odds_ratio": -0.0011159204877912998, "logits/chosen": -0.08747230470180511, "logits/rejected": -0.0702173113822937, "logps/chosen": -0.0011271832045167685, "logps/rejected": -1.8637242317199707, "loss": 0.5021, "nll_loss": 0.12542061507701874, "rewards/accuracies": 1.0, "rewards/chosen": -0.00011271832045167685, "rewards/margins": 0.18625971674919128, "rewards/rejected": -0.18637242913246155, "step": 8872 }, { "epoch": 6.136237897648686, "grad_norm": 3.4511232376098633, "learning_rate": 2.1465345013062856e-05, "log_odds_chosen": 9.75981616973877, "log_odds_ratio": -0.00038340777973644435, "logits/chosen": -0.4628656804561615, "logits/rejected": -0.5771580934524536, "logps/chosen": -0.0003525334468577057, "logps/rejected": -1.7929129600524902, "loss": 0.5708, "nll_loss": 0.14265519380569458, "rewards/accuracies": 1.0, "rewards/chosen": -3.5253346140962094e-05, "rewards/margins": 0.17925603687763214, "rewards/rejected": -0.17929129302501678, "step": 8873 }, { "epoch": 6.136929460580913, "grad_norm": 6.297825336456299, "learning_rate": 2.1461502996772706e-05, "log_odds_chosen": 9.935726165771484, "log_odds_ratio": -0.00023815446184016764, "logits/chosen": -0.35457730293273926, "logits/rejected": -0.4085862338542938, "logps/chosen": -0.0007369728991761804, "logps/rejected": -1.723237156867981, "loss": 0.5132, "nll_loss": 0.12827615439891815, "rewards/accuracies": 1.0, "rewards/chosen": -7.369727973127738e-05, "rewards/margins": 0.17225001752376556, "rewards/rejected": -0.17232371866703033, "step": 8874 }, { "epoch": 6.13762102351314, "grad_norm": 8.161831855773926, "learning_rate": 2.1457660980482558e-05, "log_odds_chosen": 10.419249534606934, "log_odds_ratio": -0.00042370465234853327, "logits/chosen": -0.538821280002594, "logits/rejected": -0.49489909410476685, "logps/chosen": -0.0037653190083801746, "logps/rejected": -2.5501937866210938, "loss": 0.6721, "nll_loss": 0.16797395050525665, "rewards/accuracies": 1.0, "rewards/chosen": -0.0003765319415833801, "rewards/margins": 0.25464287400245667, "rewards/rejected": -0.2550193965435028, "step": 8875 }, { "epoch": 6.138312586445367, "grad_norm": 6.556025505065918, "learning_rate": 2.145381896419241e-05, "log_odds_chosen": 11.062671661376953, "log_odds_ratio": -3.0943039746489376e-05, "logits/chosen": -0.2883804440498352, "logits/rejected": -0.2896476686000824, "logps/chosen": -0.0001753466494847089, "logps/rejected": -2.1570873260498047, "loss": 0.6934, "nll_loss": 0.17334787547588348, "rewards/accuracies": 1.0, "rewards/chosen": -1.753466494847089e-05, "rewards/margins": 0.21569117903709412, "rewards/rejected": -0.21570873260498047, "step": 8876 }, { "epoch": 6.139004149377594, "grad_norm": 7.59773063659668, "learning_rate": 2.144997694790226e-05, "log_odds_chosen": 9.487346649169922, "log_odds_ratio": -0.00012805784353986382, "logits/chosen": -0.7149614691734314, "logits/rejected": -0.7343244552612305, "logps/chosen": -0.0005941563285887241, "logps/rejected": -1.285873532295227, "loss": 0.6787, "nll_loss": 0.1696619689464569, "rewards/accuracies": 1.0, "rewards/chosen": -5.941562994848937e-05, "rewards/margins": 0.1285279393196106, "rewards/rejected": -0.12858736515045166, "step": 8877 }, { "epoch": 6.139695712309821, "grad_norm": 10.843826293945312, "learning_rate": 2.1446134931612112e-05, "log_odds_chosen": 10.153909683227539, "log_odds_ratio": -0.00013622343249153346, "logits/chosen": -0.4070616364479065, "logits/rejected": -0.42122650146484375, "logps/chosen": -0.00043185707181692123, "logps/rejected": -1.9670538902282715, "loss": 0.7594, "nll_loss": 0.18982651829719543, "rewards/accuracies": 1.0, "rewards/chosen": -4.3185707909287885e-05, "rewards/margins": 0.19666221737861633, "rewards/rejected": -0.1967054009437561, "step": 8878 }, { "epoch": 6.140387275242047, "grad_norm": 7.272959232330322, "learning_rate": 2.144229291532196e-05, "log_odds_chosen": 10.480998039245605, "log_odds_ratio": -0.000361002457793802, "logits/chosen": -0.2630500793457031, "logits/rejected": -0.3229910731315613, "logps/chosen": -0.000636959564872086, "logps/rejected": -2.146204948425293, "loss": 0.709, "nll_loss": 0.17721214890480042, "rewards/accuracies": 1.0, "rewards/chosen": -6.369595212163404e-05, "rewards/margins": 0.21455681324005127, "rewards/rejected": -0.21462051570415497, "step": 8879 }, { "epoch": 6.141078838174274, "grad_norm": 10.42378044128418, "learning_rate": 2.1438450899031813e-05, "log_odds_chosen": 10.40891170501709, "log_odds_ratio": -0.00029213528614491224, "logits/chosen": -0.3548663854598999, "logits/rejected": -0.32809650897979736, "logps/chosen": -0.0004571013560052961, "logps/rejected": -1.7706871032714844, "loss": 0.8349, "nll_loss": 0.20870471000671387, "rewards/accuracies": 1.0, "rewards/chosen": -4.5710137783316895e-05, "rewards/margins": 0.1770229935646057, "rewards/rejected": -0.17706872522830963, "step": 8880 }, { "epoch": 6.141770401106501, "grad_norm": 9.024090766906738, "learning_rate": 2.1434608882741662e-05, "log_odds_chosen": 9.728191375732422, "log_odds_ratio": -0.0002653269039001316, "logits/chosen": -0.20413929224014282, "logits/rejected": -0.2840261459350586, "logps/chosen": -0.0007140958332456648, "logps/rejected": -1.912898302078247, "loss": 0.8936, "nll_loss": 0.22338436543941498, "rewards/accuracies": 1.0, "rewards/chosen": -7.140958041418344e-05, "rewards/margins": 0.19121842086315155, "rewards/rejected": -0.19128982722759247, "step": 8881 }, { "epoch": 6.142461964038728, "grad_norm": 6.755743503570557, "learning_rate": 2.1430766866451515e-05, "log_odds_chosen": 10.81404972076416, "log_odds_ratio": -0.0001417073654010892, "logits/chosen": -0.343039870262146, "logits/rejected": -0.28404492139816284, "logps/chosen": -0.0005510105402208865, "logps/rejected": -2.346940517425537, "loss": 0.7738, "nll_loss": 0.19343721866607666, "rewards/accuracies": 1.0, "rewards/chosen": -5.510105620487593e-05, "rewards/margins": 0.23463895916938782, "rewards/rejected": -0.23469404876232147, "step": 8882 }, { "epoch": 6.143153526970955, "grad_norm": 5.670907974243164, "learning_rate": 2.1426924850161364e-05, "log_odds_chosen": 10.498161315917969, "log_odds_ratio": -0.00018444280431140214, "logits/chosen": -0.24533668160438538, "logits/rejected": -0.26434126496315, "logps/chosen": -0.0005119394045323133, "logps/rejected": -2.2618327140808105, "loss": 0.4762, "nll_loss": 0.11902209371328354, "rewards/accuracies": 1.0, "rewards/chosen": -5.119394336361438e-05, "rewards/margins": 0.2261320799589157, "rewards/rejected": -0.22618328034877777, "step": 8883 }, { "epoch": 6.143845089903182, "grad_norm": 4.66843318939209, "learning_rate": 2.1423082833871216e-05, "log_odds_chosen": 10.099947929382324, "log_odds_ratio": -0.00010283004667144269, "logits/chosen": -0.21136608719825745, "logits/rejected": -0.33208179473876953, "logps/chosen": -0.0003027537022717297, "logps/rejected": -1.6289397478103638, "loss": 0.4111, "nll_loss": 0.10277269780635834, "rewards/accuracies": 1.0, "rewards/chosen": -3.0275368771981448e-05, "rewards/margins": 0.16286370158195496, "rewards/rejected": -0.16289398074150085, "step": 8884 }, { "epoch": 6.144536652835408, "grad_norm": 6.934423923492432, "learning_rate": 2.141924081758107e-05, "log_odds_chosen": 10.522510528564453, "log_odds_ratio": -0.00021211769490037113, "logits/chosen": -0.11151409894227982, "logits/rejected": -0.2589746117591858, "logps/chosen": -0.0003004320606123656, "logps/rejected": -2.039461612701416, "loss": 0.6197, "nll_loss": 0.15489232540130615, "rewards/accuracies": 1.0, "rewards/chosen": -3.0043207516428083e-05, "rewards/margins": 0.20391613245010376, "rewards/rejected": -0.20394617319107056, "step": 8885 }, { "epoch": 6.145228215767635, "grad_norm": 6.154320240020752, "learning_rate": 2.1415398801290918e-05, "log_odds_chosen": 10.276860237121582, "log_odds_ratio": -0.00023392810544464737, "logits/chosen": -0.30178678035736084, "logits/rejected": -0.3277810215950012, "logps/chosen": -0.00093194650253281, "logps/rejected": -2.405626058578491, "loss": 0.6904, "nll_loss": 0.1725805401802063, "rewards/accuracies": 1.0, "rewards/chosen": -9.319464879808947e-05, "rewards/margins": 0.24046942591667175, "rewards/rejected": -0.24056261777877808, "step": 8886 }, { "epoch": 6.145919778699862, "grad_norm": 4.306264400482178, "learning_rate": 2.141155678500077e-05, "log_odds_chosen": 10.523026466369629, "log_odds_ratio": -0.00015434774104505777, "logits/chosen": -0.4924680292606354, "logits/rejected": -0.5587255954742432, "logps/chosen": -0.0001923997770063579, "logps/rejected": -1.9562420845031738, "loss": 0.4092, "nll_loss": 0.10229238867759705, "rewards/accuracies": 1.0, "rewards/chosen": -1.9239978428231552e-05, "rewards/margins": 0.19560497999191284, "rewards/rejected": -0.1956242024898529, "step": 8887 }, { "epoch": 6.146611341632089, "grad_norm": 4.622827529907227, "learning_rate": 2.1407714768710623e-05, "log_odds_chosen": 9.643756866455078, "log_odds_ratio": -0.00021520015434361994, "logits/chosen": -0.06347708404064178, "logits/rejected": -0.053321439772844315, "logps/chosen": -0.0004967688000760972, "logps/rejected": -1.9408763647079468, "loss": 0.6166, "nll_loss": 0.15413503348827362, "rewards/accuracies": 1.0, "rewards/chosen": -4.9676880735205486e-05, "rewards/margins": 0.19403798878192902, "rewards/rejected": -0.1940876543521881, "step": 8888 }, { "epoch": 6.147302904564316, "grad_norm": 6.192646026611328, "learning_rate": 2.1403872752420472e-05, "log_odds_chosen": 10.072959899902344, "log_odds_ratio": -0.0002504573785699904, "logits/chosen": -0.3247324824333191, "logits/rejected": -0.39910486340522766, "logps/chosen": -0.0004856810555793345, "logps/rejected": -1.7511827945709229, "loss": 0.5946, "nll_loss": 0.14862856268882751, "rewards/accuracies": 1.0, "rewards/chosen": -4.8568108468316495e-05, "rewards/margins": 0.17506971955299377, "rewards/rejected": -0.17511829733848572, "step": 8889 }, { "epoch": 6.1479944674965425, "grad_norm": 4.0776543617248535, "learning_rate": 2.140003073613032e-05, "log_odds_chosen": 11.533493041992188, "log_odds_ratio": -4.676056050811894e-05, "logits/chosen": -0.2515339255332947, "logits/rejected": -0.274416983127594, "logps/chosen": -0.00019514464656822383, "logps/rejected": -2.8244099617004395, "loss": 0.9146, "nll_loss": 0.2286345511674881, "rewards/accuracies": 1.0, "rewards/chosen": -1.9514465748216026e-05, "rewards/margins": 0.2824214696884155, "rewards/rejected": -0.28244102001190186, "step": 8890 }, { "epoch": 6.148686030428769, "grad_norm": 5.191386699676514, "learning_rate": 2.1396188719840173e-05, "log_odds_chosen": 11.381552696228027, "log_odds_ratio": -1.6261165001196787e-05, "logits/chosen": -0.19675296545028687, "logits/rejected": -0.3034244179725647, "logps/chosen": -0.0002262248599436134, "logps/rejected": -2.7013492584228516, "loss": 0.475, "nll_loss": 0.11874811351299286, "rewards/accuracies": 1.0, "rewards/chosen": -2.262248563056346e-05, "rewards/margins": 0.2701122760772705, "rewards/rejected": -0.27013492584228516, "step": 8891 }, { "epoch": 6.149377593360996, "grad_norm": 5.449078559875488, "learning_rate": 2.1392346703550022e-05, "log_odds_chosen": 12.33166217803955, "log_odds_ratio": -1.2560204595502e-05, "logits/chosen": -0.5773420929908752, "logits/rejected": -0.6253620386123657, "logps/chosen": -0.00010516971087781712, "logps/rejected": -2.8040151596069336, "loss": 0.8042, "nll_loss": 0.20104244351387024, "rewards/accuracies": 1.0, "rewards/chosen": -1.0516971997276414e-05, "rewards/margins": 0.28039100766181946, "rewards/rejected": -0.2804015278816223, "step": 8892 }, { "epoch": 6.150069156293223, "grad_norm": 6.951034069061279, "learning_rate": 2.1388504687259875e-05, "log_odds_chosen": 10.728132247924805, "log_odds_ratio": -5.662855619448237e-05, "logits/chosen": -0.44142085313796997, "logits/rejected": -0.5342628955841064, "logps/chosen": -0.000431107881013304, "logps/rejected": -2.6346278190612793, "loss": 0.5627, "nll_loss": 0.14066919684410095, "rewards/accuracies": 1.0, "rewards/chosen": -4.311078737373464e-05, "rewards/margins": 0.26341962814331055, "rewards/rejected": -0.26346278190612793, "step": 8893 }, { "epoch": 6.15076071922545, "grad_norm": 5.496524810791016, "learning_rate": 2.1384662670969727e-05, "log_odds_chosen": 10.09290599822998, "log_odds_ratio": -0.00015022409206721932, "logits/chosen": -0.41256386041641235, "logits/rejected": -0.4960746169090271, "logps/chosen": -0.00129657459910959, "logps/rejected": -2.2923073768615723, "loss": 0.4069, "nll_loss": 0.1017158180475235, "rewards/accuracies": 1.0, "rewards/chosen": -0.00012965746282134205, "rewards/margins": 0.2291010618209839, "rewards/rejected": -0.22923073172569275, "step": 8894 }, { "epoch": 6.151452282157677, "grad_norm": 4.6340651512146, "learning_rate": 2.1380820654679576e-05, "log_odds_chosen": 9.381574630737305, "log_odds_ratio": -0.00721169076859951, "logits/chosen": -0.3064397871494293, "logits/rejected": -0.21976573765277863, "logps/chosen": -0.004331896547228098, "logps/rejected": -1.9964240789413452, "loss": 0.4484, "nll_loss": 0.11139068752527237, "rewards/accuracies": 1.0, "rewards/chosen": -0.0004331897071097046, "rewards/margins": 0.19920922815799713, "rewards/rejected": -0.19964241981506348, "step": 8895 }, { "epoch": 6.1521438450899035, "grad_norm": 5.038140296936035, "learning_rate": 2.137697863838943e-05, "log_odds_chosen": 10.987846374511719, "log_odds_ratio": -4.962710227118805e-05, "logits/chosen": -0.20362409949302673, "logits/rejected": -0.23671866953372955, "logps/chosen": -0.00026456365594640374, "logps/rejected": -2.0418331623077393, "loss": 0.5224, "nll_loss": 0.13059036433696747, "rewards/accuracies": 1.0, "rewards/chosen": -2.6456365958438255e-05, "rewards/margins": 0.20415686070919037, "rewards/rejected": -0.20418329536914825, "step": 8896 }, { "epoch": 6.15283540802213, "grad_norm": 7.108769416809082, "learning_rate": 2.137313662209928e-05, "log_odds_chosen": 9.597886085510254, "log_odds_ratio": -0.0005740196211263537, "logits/chosen": -0.19212770462036133, "logits/rejected": -0.33647072315216064, "logps/chosen": -0.0010434961877763271, "logps/rejected": -1.7020900249481201, "loss": 0.6459, "nll_loss": 0.16142773628234863, "rewards/accuracies": 1.0, "rewards/chosen": -0.00010434961586724967, "rewards/margins": 0.17010465264320374, "rewards/rejected": -0.17020899057388306, "step": 8897 }, { "epoch": 6.153526970954357, "grad_norm": 5.511011600494385, "learning_rate": 2.136929460580913e-05, "log_odds_chosen": 10.08315658569336, "log_odds_ratio": -0.0014666010392829776, "logits/chosen": -0.0493057519197464, "logits/rejected": -0.06691646575927734, "logps/chosen": -0.0014571938663721085, "logps/rejected": -1.961158037185669, "loss": 0.7142, "nll_loss": 0.17840927839279175, "rewards/accuracies": 1.0, "rewards/chosen": -0.00014571940118912607, "rewards/margins": 0.1959700882434845, "rewards/rejected": -0.19611582159996033, "step": 8898 }, { "epoch": 6.154218533886584, "grad_norm": 7.098496913909912, "learning_rate": 2.136545258951898e-05, "log_odds_chosen": 10.316756248474121, "log_odds_ratio": -0.00021556735737249255, "logits/chosen": -0.46447503566741943, "logits/rejected": -0.5707484483718872, "logps/chosen": -0.00018599169561639428, "logps/rejected": -1.7750988006591797, "loss": 0.8972, "nll_loss": 0.2242662012577057, "rewards/accuracies": 1.0, "rewards/chosen": -1.859917028923519e-05, "rewards/margins": 0.17749127745628357, "rewards/rejected": -0.1775098741054535, "step": 8899 }, { "epoch": 6.154910096818811, "grad_norm": 11.79477596282959, "learning_rate": 2.1361610573228832e-05, "log_odds_chosen": 11.65049934387207, "log_odds_ratio": -1.220466674567433e-05, "logits/chosen": -0.15692803263664246, "logits/rejected": -0.1678299605846405, "logps/chosen": -0.00012023936142213643, "logps/rejected": -2.5337300300598145, "loss": 0.7389, "nll_loss": 0.18471355736255646, "rewards/accuracies": 1.0, "rewards/chosen": -1.2023936506011523e-05, "rewards/margins": 0.2533610165119171, "rewards/rejected": -0.25337302684783936, "step": 8900 }, { "epoch": 6.155601659751038, "grad_norm": 5.2032318115234375, "learning_rate": 2.135776855693868e-05, "log_odds_chosen": 11.28200912475586, "log_odds_ratio": -6.505651253974065e-05, "logits/chosen": -0.3721749484539032, "logits/rejected": -0.4769337773323059, "logps/chosen": -0.0004726629122160375, "logps/rejected": -3.120828151702881, "loss": 0.508, "nll_loss": 0.12698745727539062, "rewards/accuracies": 1.0, "rewards/chosen": -4.72662941319868e-05, "rewards/margins": 0.31203556060791016, "rewards/rejected": -0.31208279728889465, "step": 8901 }, { "epoch": 6.1562932226832645, "grad_norm": 7.075010776519775, "learning_rate": 2.1353926540648533e-05, "log_odds_chosen": 10.750005722045898, "log_odds_ratio": -7.514657045248896e-05, "logits/chosen": -0.44335147738456726, "logits/rejected": -0.45919889211654663, "logps/chosen": -0.0005746853421442211, "logps/rejected": -2.118549346923828, "loss": 0.5021, "nll_loss": 0.125524640083313, "rewards/accuracies": 1.0, "rewards/chosen": -5.74685400351882e-05, "rewards/margins": 0.21179747581481934, "rewards/rejected": -0.211854949593544, "step": 8902 }, { "epoch": 6.156984785615491, "grad_norm": 6.851710796356201, "learning_rate": 2.1350084524358386e-05, "log_odds_chosen": 10.875438690185547, "log_odds_ratio": -7.797985745128244e-05, "logits/chosen": -0.32115495204925537, "logits/rejected": -0.40007078647613525, "logps/chosen": -0.0004854054714087397, "logps/rejected": -2.01396107673645, "loss": 0.5387, "nll_loss": 0.13466191291809082, "rewards/accuracies": 1.0, "rewards/chosen": -4.854054714087397e-05, "rewards/margins": 0.20134755969047546, "rewards/rejected": -0.20139610767364502, "step": 8903 }, { "epoch": 6.157676348547718, "grad_norm": 10.241081237792969, "learning_rate": 2.1346242508068235e-05, "log_odds_chosen": 10.226788520812988, "log_odds_ratio": -0.00022474183060694486, "logits/chosen": -0.7302020192146301, "logits/rejected": -0.7752872109413147, "logps/chosen": -0.00029912887839600444, "logps/rejected": -1.4040298461914062, "loss": 0.4306, "nll_loss": 0.10763468593358994, "rewards/accuracies": 1.0, "rewards/chosen": -2.991289147757925e-05, "rewards/margins": 0.1403730809688568, "rewards/rejected": -0.14040298759937286, "step": 8904 }, { "epoch": 6.158367911479945, "grad_norm": 5.993839263916016, "learning_rate": 2.1342400491778087e-05, "log_odds_chosen": 10.475533485412598, "log_odds_ratio": -7.221288979053497e-05, "logits/chosen": -0.3128795027732849, "logits/rejected": -0.3192360997200012, "logps/chosen": -0.00036443519638851285, "logps/rejected": -2.456841468811035, "loss": 0.4818, "nll_loss": 0.12044843286275864, "rewards/accuracies": 1.0, "rewards/chosen": -3.644351818365976e-05, "rewards/margins": 0.24564771354198456, "rewards/rejected": -0.24568414688110352, "step": 8905 }, { "epoch": 6.159059474412172, "grad_norm": 7.920623302459717, "learning_rate": 2.133855847548794e-05, "log_odds_chosen": 9.961012840270996, "log_odds_ratio": -0.00036133910180069506, "logits/chosen": -0.31349772214889526, "logits/rejected": -0.35793423652648926, "logps/chosen": -0.0008964145672507584, "logps/rejected": -1.9681870937347412, "loss": 0.4843, "nll_loss": 0.12103552371263504, "rewards/accuracies": 1.0, "rewards/chosen": -8.964145672507584e-05, "rewards/margins": 0.19672906398773193, "rewards/rejected": -0.19681870937347412, "step": 8906 }, { "epoch": 6.159751037344399, "grad_norm": 5.189927101135254, "learning_rate": 2.133471645919779e-05, "log_odds_chosen": 10.290582656860352, "log_odds_ratio": -5.6040276831481606e-05, "logits/chosen": -0.4642256498336792, "logits/rejected": -0.5244139432907104, "logps/chosen": -0.0004029185511171818, "logps/rejected": -2.1179988384246826, "loss": 0.4601, "nll_loss": 0.11502990871667862, "rewards/accuracies": 1.0, "rewards/chosen": -4.0291859477292746e-05, "rewards/margins": 0.21175959706306458, "rewards/rejected": -0.21179988980293274, "step": 8907 }, { "epoch": 6.1604426002766255, "grad_norm": 7.469080924987793, "learning_rate": 2.1330874442907638e-05, "log_odds_chosen": 10.109557151794434, "log_odds_ratio": -6.607791874557734e-05, "logits/chosen": -0.637088418006897, "logits/rejected": -0.6275466680526733, "logps/chosen": -0.0007182995905168355, "logps/rejected": -1.944756269454956, "loss": 0.5628, "nll_loss": 0.14069898426532745, "rewards/accuracies": 1.0, "rewards/chosen": -7.18299561413005e-05, "rewards/margins": 0.19440379738807678, "rewards/rejected": -0.19447562098503113, "step": 8908 }, { "epoch": 6.161134163208852, "grad_norm": 5.130368232727051, "learning_rate": 2.132703242661749e-05, "log_odds_chosen": 11.349884033203125, "log_odds_ratio": -2.6829929993255064e-05, "logits/chosen": -0.1395387351512909, "logits/rejected": -0.07001563906669617, "logps/chosen": -0.00030773127218708396, "logps/rejected": -2.8125007152557373, "loss": 1.1286, "nll_loss": 0.28215882182121277, "rewards/accuracies": 1.0, "rewards/chosen": -3.077312794630416e-05, "rewards/margins": 0.2812193036079407, "rewards/rejected": -0.28125008940696716, "step": 8909 }, { "epoch": 6.161825726141079, "grad_norm": 5.060723781585693, "learning_rate": 2.132319041032734e-05, "log_odds_chosen": 10.80126953125, "log_odds_ratio": -3.217908306396566e-05, "logits/chosen": -0.5110337138175964, "logits/rejected": -0.5838327407836914, "logps/chosen": -0.00011997703404631466, "logps/rejected": -1.7408264875411987, "loss": 1.1244, "nll_loss": 0.28110799193382263, "rewards/accuracies": 1.0, "rewards/chosen": -1.1997703040833585e-05, "rewards/margins": 0.17407065629959106, "rewards/rejected": -0.1740826517343521, "step": 8910 }, { "epoch": 6.162517289073306, "grad_norm": 8.353875160217285, "learning_rate": 2.131934839403719e-05, "log_odds_chosen": 10.599270820617676, "log_odds_ratio": -0.0002233553968835622, "logits/chosen": -0.5864130258560181, "logits/rejected": -0.667081356048584, "logps/chosen": -0.00040467121289111674, "logps/rejected": -2.2740707397460938, "loss": 0.737, "nll_loss": 0.18422240018844604, "rewards/accuracies": 1.0, "rewards/chosen": -4.04671227443032e-05, "rewards/margins": 0.2273666113615036, "rewards/rejected": -0.2274070680141449, "step": 8911 }, { "epoch": 6.163208852005533, "grad_norm": 7.310943603515625, "learning_rate": 2.1315506377747044e-05, "log_odds_chosen": 9.501506805419922, "log_odds_ratio": -0.000398534961277619, "logits/chosen": -0.3367373049259186, "logits/rejected": -0.38235363364219666, "logps/chosen": -0.0006585284136235714, "logps/rejected": -1.6440269947052002, "loss": 0.5572, "nll_loss": 0.13925069570541382, "rewards/accuracies": 1.0, "rewards/chosen": -6.585283699678257e-05, "rewards/margins": 0.16433684527873993, "rewards/rejected": -0.16440270841121674, "step": 8912 }, { "epoch": 6.16390041493776, "grad_norm": 5.013652324676514, "learning_rate": 2.1311664361456893e-05, "log_odds_chosen": 11.695579528808594, "log_odds_ratio": -1.577230796101503e-05, "logits/chosen": -0.5814932584762573, "logits/rejected": -0.5627788305282593, "logps/chosen": -0.00018127662769984454, "logps/rejected": -2.960634231567383, "loss": 0.6214, "nll_loss": 0.155350923538208, "rewards/accuracies": 1.0, "rewards/chosen": -1.8127662769984454e-05, "rewards/margins": 0.29604530334472656, "rewards/rejected": -0.29606345295906067, "step": 8913 }, { "epoch": 6.1645919778699865, "grad_norm": 9.031291007995605, "learning_rate": 2.1307822345166746e-05, "log_odds_chosen": 11.528639793395996, "log_odds_ratio": -2.0978211978217587e-05, "logits/chosen": -0.34808099269866943, "logits/rejected": -0.4555736482143402, "logps/chosen": -0.00028040894540026784, "logps/rejected": -2.9485039710998535, "loss": 1.3101, "nll_loss": 0.3275301158428192, "rewards/accuracies": 1.0, "rewards/chosen": -2.804089308483526e-05, "rewards/margins": 0.2948223352432251, "rewards/rejected": -0.2948504090309143, "step": 8914 }, { "epoch": 6.165283540802213, "grad_norm": 5.164880752563477, "learning_rate": 2.1303980328876598e-05, "log_odds_chosen": 10.544919967651367, "log_odds_ratio": -0.00013954663882032037, "logits/chosen": 0.003693707287311554, "logits/rejected": -0.03545793890953064, "logps/chosen": -0.00025245817960239947, "logps/rejected": -2.0628929138183594, "loss": 0.462, "nll_loss": 0.11548123508691788, "rewards/accuracies": 1.0, "rewards/chosen": -2.524581941543147e-05, "rewards/margins": 0.20626406371593475, "rewards/rejected": -0.20628932118415833, "step": 8915 }, { "epoch": 6.16597510373444, "grad_norm": 4.7397942543029785, "learning_rate": 2.1300138312586447e-05, "log_odds_chosen": 10.250405311584473, "log_odds_ratio": -8.911825716495514e-05, "logits/chosen": -0.23642602562904358, "logits/rejected": -0.3013024628162384, "logps/chosen": -0.0003032445383723825, "logps/rejected": -1.7590906620025635, "loss": 0.5796, "nll_loss": 0.14488404989242554, "rewards/accuracies": 1.0, "rewards/chosen": -3.0324456020025536e-05, "rewards/margins": 0.17587874829769135, "rewards/rejected": -0.17590907216072083, "step": 8916 }, { "epoch": 6.166666666666667, "grad_norm": 5.335708141326904, "learning_rate": 2.1296296296296296e-05, "log_odds_chosen": 10.717148780822754, "log_odds_ratio": -7.697472028667107e-05, "logits/chosen": -0.5630709528923035, "logits/rejected": -0.6455743312835693, "logps/chosen": -0.0003123174246866256, "logps/rejected": -2.522401809692383, "loss": 0.587, "nll_loss": 0.14673317968845367, "rewards/accuracies": 1.0, "rewards/chosen": -3.123174246866256e-05, "rewards/margins": 0.252208948135376, "rewards/rejected": -0.2522401809692383, "step": 8917 }, { "epoch": 6.167358229598894, "grad_norm": 6.6418867111206055, "learning_rate": 2.129245428000615e-05, "log_odds_chosen": 10.8656587600708, "log_odds_ratio": -0.00013898345059715211, "logits/chosen": -0.466498464345932, "logits/rejected": -0.48642832040786743, "logps/chosen": -0.0003943238698411733, "logps/rejected": -2.5864686965942383, "loss": 0.8249, "nll_loss": 0.20621107518672943, "rewards/accuracies": 1.0, "rewards/chosen": -3.9432390622096136e-05, "rewards/margins": 0.2586074471473694, "rewards/rejected": -0.2586469054222107, "step": 8918 }, { "epoch": 6.168049792531121, "grad_norm": 7.2308526039123535, "learning_rate": 2.1288612263715998e-05, "log_odds_chosen": 10.071611404418945, "log_odds_ratio": -0.0009543601772747934, "logits/chosen": 0.01656036078929901, "logits/rejected": -0.1201089546084404, "logps/chosen": -0.0030811289325356483, "logps/rejected": -2.486435651779175, "loss": 0.8668, "nll_loss": 0.2165965735912323, "rewards/accuracies": 1.0, "rewards/chosen": -0.00030811288161203265, "rewards/margins": 0.24833545088768005, "rewards/rejected": -0.24864357709884644, "step": 8919 }, { "epoch": 6.1687413554633475, "grad_norm": 4.748799800872803, "learning_rate": 2.128477024742585e-05, "log_odds_chosen": 11.16125202178955, "log_odds_ratio": -5.2723400585819036e-05, "logits/chosen": -0.24413493275642395, "logits/rejected": -0.3363977074623108, "logps/chosen": -0.0002226789656560868, "logps/rejected": -2.519413948059082, "loss": 0.6246, "nll_loss": 0.15613305568695068, "rewards/accuracies": 1.0, "rewards/chosen": -2.226789729320444e-05, "rewards/margins": 0.251919150352478, "rewards/rejected": -0.25194138288497925, "step": 8920 }, { "epoch": 6.169432918395574, "grad_norm": 12.29122257232666, "learning_rate": 2.1280928231135703e-05, "log_odds_chosen": 9.678821563720703, "log_odds_ratio": -6.596092134714127e-05, "logits/chosen": -0.59602952003479, "logits/rejected": -0.5715896487236023, "logps/chosen": -0.00031758646946400404, "logps/rejected": -1.6542768478393555, "loss": 0.4999, "nll_loss": 0.12496354430913925, "rewards/accuracies": 1.0, "rewards/chosen": -3.175864912918769e-05, "rewards/margins": 0.16539591550827026, "rewards/rejected": -0.16542768478393555, "step": 8921 }, { "epoch": 6.170124481327801, "grad_norm": 6.445894718170166, "learning_rate": 2.127708621484555e-05, "log_odds_chosen": 11.29629898071289, "log_odds_ratio": -2.9858012567274272e-05, "logits/chosen": -0.3435760736465454, "logits/rejected": -0.36505529284477234, "logps/chosen": -0.0001979438675334677, "logps/rejected": -2.4094908237457275, "loss": 0.6538, "nll_loss": 0.16345354914665222, "rewards/accuracies": 1.0, "rewards/chosen": -1.9794388208538294e-05, "rewards/margins": 0.24092930555343628, "rewards/rejected": -0.2409490942955017, "step": 8922 }, { "epoch": 6.170816044260028, "grad_norm": 5.647541522979736, "learning_rate": 2.1273244198555404e-05, "log_odds_chosen": 11.45530891418457, "log_odds_ratio": -1.723444256640505e-05, "logits/chosen": -0.5780686736106873, "logits/rejected": -0.6104984283447266, "logps/chosen": -0.0002975270035676658, "logps/rejected": -2.274785280227661, "loss": 0.5488, "nll_loss": 0.13718825578689575, "rewards/accuracies": 1.0, "rewards/chosen": -2.9752698537777178e-05, "rewards/margins": 0.22744877636432648, "rewards/rejected": -0.2274785190820694, "step": 8923 }, { "epoch": 6.171507607192255, "grad_norm": 6.984696388244629, "learning_rate": 2.1269402182265257e-05, "log_odds_chosen": 10.067432403564453, "log_odds_ratio": -0.0020822491496801376, "logits/chosen": -0.58390873670578, "logits/rejected": -0.6317079067230225, "logps/chosen": -0.00180349953006953, "logps/rejected": -1.6590774059295654, "loss": 0.7266, "nll_loss": 0.18145343661308289, "rewards/accuracies": 1.0, "rewards/chosen": -0.0001803499471861869, "rewards/margins": 0.1657274067401886, "rewards/rejected": -0.16590775549411774, "step": 8924 }, { "epoch": 6.172199170124482, "grad_norm": 6.403304576873779, "learning_rate": 2.1265560165975106e-05, "log_odds_chosen": 9.484448432922363, "log_odds_ratio": -0.00029352630372159183, "logits/chosen": -0.7287478446960449, "logits/rejected": -0.7109928131103516, "logps/chosen": -0.00045713261351920664, "logps/rejected": -1.874360203742981, "loss": 1.3442, "nll_loss": 0.3360257148742676, "rewards/accuracies": 1.0, "rewards/chosen": -4.571326280711219e-05, "rewards/margins": 0.1873903125524521, "rewards/rejected": -0.18743601441383362, "step": 8925 }, { "epoch": 6.172890733056708, "grad_norm": 6.790854454040527, "learning_rate": 2.1261718149684955e-05, "log_odds_chosen": 11.074459075927734, "log_odds_ratio": -3.4488293749745935e-05, "logits/chosen": -0.1194690614938736, "logits/rejected": -0.39182788133621216, "logps/chosen": -0.00013691597268916667, "logps/rejected": -2.0365371704101562, "loss": 0.6756, "nll_loss": 0.16889050602912903, "rewards/accuracies": 1.0, "rewards/chosen": -1.369159872410819e-05, "rewards/margins": 0.20364001393318176, "rewards/rejected": -0.20365369319915771, "step": 8926 }, { "epoch": 6.173582295988935, "grad_norm": 5.842673301696777, "learning_rate": 2.1257876133394807e-05, "log_odds_chosen": 11.106877326965332, "log_odds_ratio": -0.00012637543841265142, "logits/chosen": -0.42743033170700073, "logits/rejected": -0.4144943952560425, "logps/chosen": -0.00023457163479179144, "logps/rejected": -2.5342657566070557, "loss": 1.133, "nll_loss": 0.28323131799697876, "rewards/accuracies": 1.0, "rewards/chosen": -2.3457163479179144e-05, "rewards/margins": 0.2534031271934509, "rewards/rejected": -0.25342658162117004, "step": 8927 }, { "epoch": 6.174273858921162, "grad_norm": 5.868600845336914, "learning_rate": 2.1254034117104656e-05, "log_odds_chosen": 10.483613967895508, "log_odds_ratio": -3.841664147330448e-05, "logits/chosen": -0.6506422162055969, "logits/rejected": -0.6948019862174988, "logps/chosen": -0.0002064492437057197, "logps/rejected": -1.6701231002807617, "loss": 0.5622, "nll_loss": 0.14053946733474731, "rewards/accuracies": 1.0, "rewards/chosen": -2.0644925825763494e-05, "rewards/margins": 0.1669916808605194, "rewards/rejected": -0.1670123189687729, "step": 8928 }, { "epoch": 6.174965421853389, "grad_norm": 6.216475009918213, "learning_rate": 2.125019210081451e-05, "log_odds_chosen": 10.1587495803833, "log_odds_ratio": -8.018967491807416e-05, "logits/chosen": -0.1432342678308487, "logits/rejected": -0.23333218693733215, "logps/chosen": -0.000549984397366643, "logps/rejected": -2.5280392169952393, "loss": 0.9891, "nll_loss": 0.24727122485637665, "rewards/accuracies": 1.0, "rewards/chosen": -5.4998439736664295e-05, "rewards/margins": 0.25274893641471863, "rewards/rejected": -0.2528039216995239, "step": 8929 }, { "epoch": 6.175656984785616, "grad_norm": 9.768843650817871, "learning_rate": 2.124635008452436e-05, "log_odds_chosen": 12.131757736206055, "log_odds_ratio": -7.5639613896782976e-06, "logits/chosen": -0.8124226331710815, "logits/rejected": -0.8450635671615601, "logps/chosen": -9.760225657373667e-05, "logps/rejected": -2.8064818382263184, "loss": 0.7664, "nll_loss": 0.19160333275794983, "rewards/accuracies": 1.0, "rewards/chosen": -9.760226021171547e-06, "rewards/margins": 0.2806384265422821, "rewards/rejected": -0.28064820170402527, "step": 8930 }, { "epoch": 6.176348547717843, "grad_norm": 4.7237629890441895, "learning_rate": 2.124250806823421e-05, "log_odds_chosen": 10.60220718383789, "log_odds_ratio": -0.0011569132329896092, "logits/chosen": 0.03953489661216736, "logits/rejected": 0.026873737573623657, "logps/chosen": -0.0003797906683757901, "logps/rejected": -2.4322993755340576, "loss": 0.7953, "nll_loss": 0.19870004057884216, "rewards/accuracies": 1.0, "rewards/chosen": -3.797906538238749e-05, "rewards/margins": 0.24319197237491608, "rewards/rejected": -0.2432299554347992, "step": 8931 }, { "epoch": 6.177040110650069, "grad_norm": 11.197449684143066, "learning_rate": 2.1238666051944063e-05, "log_odds_chosen": 10.218611717224121, "log_odds_ratio": -4.907567927148193e-05, "logits/chosen": -0.0748477429151535, "logits/rejected": -0.015666451305150986, "logps/chosen": -0.0007873535505495965, "logps/rejected": -2.362804889678955, "loss": 0.6111, "nll_loss": 0.1527731865644455, "rewards/accuracies": 1.0, "rewards/chosen": -7.873535651015118e-05, "rewards/margins": 0.23620177805423737, "rewards/rejected": -0.23628050088882446, "step": 8932 }, { "epoch": 6.177731673582296, "grad_norm": 4.892848491668701, "learning_rate": 2.1234824035653915e-05, "log_odds_chosen": 9.225906372070312, "log_odds_ratio": -0.0011509408941492438, "logits/chosen": -0.2720785140991211, "logits/rejected": -0.1349216103553772, "logps/chosen": -0.001067000557668507, "logps/rejected": -1.2327617406845093, "loss": 0.7489, "nll_loss": 0.18709851801395416, "rewards/accuracies": 1.0, "rewards/chosen": -0.00010670005576685071, "rewards/margins": 0.12316948175430298, "rewards/rejected": -0.12327618151903152, "step": 8933 }, { "epoch": 6.178423236514523, "grad_norm": 4.760075569152832, "learning_rate": 2.1230982019363764e-05, "log_odds_chosen": 10.438307762145996, "log_odds_ratio": -0.00014227713108994067, "logits/chosen": -0.4747689962387085, "logits/rejected": -0.5722922086715698, "logps/chosen": -0.0007225346053019166, "logps/rejected": -2.717395067214966, "loss": 0.5699, "nll_loss": 0.14247281849384308, "rewards/accuracies": 1.0, "rewards/chosen": -7.225346780614927e-05, "rewards/margins": 0.2716672420501709, "rewards/rejected": -0.27173948287963867, "step": 8934 }, { "epoch": 6.17911479944675, "grad_norm": 5.34441614151001, "learning_rate": 2.1227140003073613e-05, "log_odds_chosen": 10.103221893310547, "log_odds_ratio": -0.00013670921907760203, "logits/chosen": -0.5896474123001099, "logits/rejected": -0.6704520583152771, "logps/chosen": -0.0009348234161734581, "logps/rejected": -1.964598536491394, "loss": 0.6346, "nll_loss": 0.1586480289697647, "rewards/accuracies": 1.0, "rewards/chosen": -9.34823474381119e-05, "rewards/margins": 0.19636636972427368, "rewards/rejected": -0.19645985960960388, "step": 8935 }, { "epoch": 6.179806362378977, "grad_norm": 7.0526933670043945, "learning_rate": 2.1223297986783462e-05, "log_odds_chosen": 11.831838607788086, "log_odds_ratio": -1.501597034803126e-05, "logits/chosen": -0.7381616830825806, "logits/rejected": -0.796602725982666, "logps/chosen": -0.00020313140703365207, "logps/rejected": -2.7845511436462402, "loss": 0.858, "nll_loss": 0.21448718011379242, "rewards/accuracies": 1.0, "rewards/chosen": -2.031314215855673e-05, "rewards/margins": 0.2784348130226135, "rewards/rejected": -0.27845510840415955, "step": 8936 }, { "epoch": 6.180497925311204, "grad_norm": 10.067313194274902, "learning_rate": 2.1219455970493315e-05, "log_odds_chosen": 10.108768463134766, "log_odds_ratio": -0.00024934698012657464, "logits/chosen": -0.5302776098251343, "logits/rejected": -0.5117073059082031, "logps/chosen": -0.0007634575595147908, "logps/rejected": -1.7113847732543945, "loss": 0.6449, "nll_loss": 0.16119928658008575, "rewards/accuracies": 1.0, "rewards/chosen": -7.634575013071299e-05, "rewards/margins": 0.1710621416568756, "rewards/rejected": -0.1711384654045105, "step": 8937 }, { "epoch": 6.18118948824343, "grad_norm": 9.377707481384277, "learning_rate": 2.1215613954203167e-05, "log_odds_chosen": 9.925009727478027, "log_odds_ratio": -0.0011858758516609669, "logits/chosen": -0.3536968231201172, "logits/rejected": -0.45212236046791077, "logps/chosen": -0.0005519436672329903, "logps/rejected": -2.1006224155426025, "loss": 1.1264, "nll_loss": 0.2814798951148987, "rewards/accuracies": 1.0, "rewards/chosen": -5.519437399925664e-05, "rewards/margins": 0.21000702679157257, "rewards/rejected": -0.21006223559379578, "step": 8938 }, { "epoch": 6.181881051175657, "grad_norm": 7.41147518157959, "learning_rate": 2.1211771937913016e-05, "log_odds_chosen": 10.760738372802734, "log_odds_ratio": -3.743385968846269e-05, "logits/chosen": -0.44082483649253845, "logits/rejected": -0.5385691523551941, "logps/chosen": -0.00021795716020278633, "logps/rejected": -2.0892586708068848, "loss": 0.615, "nll_loss": 0.15374669432640076, "rewards/accuracies": 1.0, "rewards/chosen": -2.1795716747874394e-05, "rewards/margins": 0.20890408754348755, "rewards/rejected": -0.20892588794231415, "step": 8939 }, { "epoch": 6.182572614107884, "grad_norm": 5.886544227600098, "learning_rate": 2.120792992162287e-05, "log_odds_chosen": 9.831256866455078, "log_odds_ratio": -0.000369185465388, "logits/chosen": -0.6895782351493835, "logits/rejected": -0.7876423597335815, "logps/chosen": -0.0003805963206104934, "logps/rejected": -1.545591115951538, "loss": 0.6755, "nll_loss": 0.16883057355880737, "rewards/accuracies": 1.0, "rewards/chosen": -3.805962478509173e-05, "rewards/margins": 0.15452106297016144, "rewards/rejected": -0.15455910563468933, "step": 8940 }, { "epoch": 6.183264177040111, "grad_norm": 7.385894298553467, "learning_rate": 2.120408790533272e-05, "log_odds_chosen": 11.309425354003906, "log_odds_ratio": -2.0229526853654534e-05, "logits/chosen": 0.1541682481765747, "logits/rejected": 0.07405371963977814, "logps/chosen": -0.00015169157995842397, "logps/rejected": -2.2509653568267822, "loss": 1.0316, "nll_loss": 0.25790539383888245, "rewards/accuracies": 1.0, "rewards/chosen": -1.5169158359640278e-05, "rewards/margins": 0.22508138418197632, "rewards/rejected": -0.22509652376174927, "step": 8941 }, { "epoch": 6.183955739972338, "grad_norm": 22.69285774230957, "learning_rate": 2.120024588904257e-05, "log_odds_chosen": 11.588238716125488, "log_odds_ratio": -4.003260255558416e-05, "logits/chosen": -0.0912555530667305, "logits/rejected": -0.20591235160827637, "logps/chosen": -0.00015075062401592731, "logps/rejected": -2.546660900115967, "loss": 0.6722, "nll_loss": 0.16805234551429749, "rewards/accuracies": 1.0, "rewards/chosen": -1.5075062947289553e-05, "rewards/margins": 0.2546510100364685, "rewards/rejected": -0.2546660602092743, "step": 8942 }, { "epoch": 6.1846473029045645, "grad_norm": 5.821830749511719, "learning_rate": 2.1196403872752422e-05, "log_odds_chosen": 11.451372146606445, "log_odds_ratio": -2.8443888368201442e-05, "logits/chosen": -0.44433489441871643, "logits/rejected": -0.5518810153007507, "logps/chosen": -0.00010296277469024062, "logps/rejected": -2.2659056186676025, "loss": 0.6998, "nll_loss": 0.17494189739227295, "rewards/accuracies": 1.0, "rewards/chosen": -1.02962767414283e-05, "rewards/margins": 0.22658026218414307, "rewards/rejected": -0.22659055888652802, "step": 8943 }, { "epoch": 6.185338865836791, "grad_norm": 5.573258876800537, "learning_rate": 2.119256185646227e-05, "log_odds_chosen": 10.505191802978516, "log_odds_ratio": -8.956159581430256e-05, "logits/chosen": -0.5057865381240845, "logits/rejected": -0.5452699661254883, "logps/chosen": -0.00032347580417990685, "logps/rejected": -2.196836471557617, "loss": 0.4975, "nll_loss": 0.12436286360025406, "rewards/accuracies": 1.0, "rewards/chosen": -3.234758332837373e-05, "rewards/margins": 0.21965131163597107, "rewards/rejected": -0.21968364715576172, "step": 8944 }, { "epoch": 6.186030428769018, "grad_norm": 8.397379875183105, "learning_rate": 2.118871984017212e-05, "log_odds_chosen": 10.539834976196289, "log_odds_ratio": -6.860620487714186e-05, "logits/chosen": -0.0445815809071064, "logits/rejected": -0.060250017791986465, "logps/chosen": -0.00033489393536001444, "logps/rejected": -2.3347554206848145, "loss": 0.6105, "nll_loss": 0.15261012315750122, "rewards/accuracies": 1.0, "rewards/chosen": -3.3489399356767535e-05, "rewards/margins": 0.2334420382976532, "rewards/rejected": -0.23347553610801697, "step": 8945 }, { "epoch": 6.186721991701245, "grad_norm": 25.599557876586914, "learning_rate": 2.1184877823881973e-05, "log_odds_chosen": 10.395254135131836, "log_odds_ratio": -7.127891149139032e-05, "logits/chosen": -0.8309057950973511, "logits/rejected": -0.9776581525802612, "logps/chosen": -0.00036339106736704707, "logps/rejected": -1.8712868690490723, "loss": 0.6645, "nll_loss": 0.16611744463443756, "rewards/accuracies": 1.0, "rewards/chosen": -3.633910819189623e-05, "rewards/margins": 0.1870923489332199, "rewards/rejected": -0.1871286928653717, "step": 8946 }, { "epoch": 6.187413554633472, "grad_norm": 8.522847175598145, "learning_rate": 2.1181035807591825e-05, "log_odds_chosen": 11.368314743041992, "log_odds_ratio": -2.3690898160566576e-05, "logits/chosen": -0.7033950090408325, "logits/rejected": -0.5988879203796387, "logps/chosen": -0.00023206671176012605, "logps/rejected": -2.6347384452819824, "loss": 0.6913, "nll_loss": 0.17283479869365692, "rewards/accuracies": 1.0, "rewards/chosen": -2.3206672267406248e-05, "rewards/margins": 0.26345065236091614, "rewards/rejected": -0.26347386837005615, "step": 8947 }, { "epoch": 6.188105117565699, "grad_norm": 6.687798976898193, "learning_rate": 2.1177193791301674e-05, "log_odds_chosen": 10.457347869873047, "log_odds_ratio": -9.949246305041015e-05, "logits/chosen": -0.6181915402412415, "logits/rejected": -0.5568039417266846, "logps/chosen": -0.0002464308054186404, "logps/rejected": -1.782206654548645, "loss": 1.0987, "nll_loss": 0.2746756970882416, "rewards/accuracies": 1.0, "rewards/chosen": -2.4643079086672515e-05, "rewards/margins": 0.1781960129737854, "rewards/rejected": -0.17822065949440002, "step": 8948 }, { "epoch": 6.1887966804979255, "grad_norm": 5.763555526733398, "learning_rate": 2.1173351775011527e-05, "log_odds_chosen": 9.946380615234375, "log_odds_ratio": -7.830550021026284e-05, "logits/chosen": -0.42581579089164734, "logits/rejected": -0.44059014320373535, "logps/chosen": -0.001672828570008278, "logps/rejected": -1.8614200353622437, "loss": 0.7887, "nll_loss": 0.1971677839756012, "rewards/accuracies": 1.0, "rewards/chosen": -0.0001672828511800617, "rewards/margins": 0.18597471714019775, "rewards/rejected": -0.1861419975757599, "step": 8949 }, { "epoch": 6.189488243430152, "grad_norm": 5.339982509613037, "learning_rate": 2.116950975872138e-05, "log_odds_chosen": 10.638383865356445, "log_odds_ratio": -0.0004364659544080496, "logits/chosen": -0.5033034086227417, "logits/rejected": -0.5210574269294739, "logps/chosen": -0.00019398736185394228, "logps/rejected": -2.3503217697143555, "loss": 0.6866, "nll_loss": 0.1716083437204361, "rewards/accuracies": 1.0, "rewards/chosen": -1.9398736185394228e-05, "rewards/margins": 0.23501276969909668, "rewards/rejected": -0.23503217101097107, "step": 8950 }, { "epoch": 6.190179806362379, "grad_norm": 5.419443130493164, "learning_rate": 2.116566774243123e-05, "log_odds_chosen": 11.213875770568848, "log_odds_ratio": -2.873012635973282e-05, "logits/chosen": -1.0778733491897583, "logits/rejected": -1.0934827327728271, "logps/chosen": -0.0009469868382439017, "logps/rejected": -2.755531072616577, "loss": 0.6279, "nll_loss": 0.15698230266571045, "rewards/accuracies": 1.0, "rewards/chosen": -9.469868382439017e-05, "rewards/margins": 0.275458425283432, "rewards/rejected": -0.2755531072616577, "step": 8951 }, { "epoch": 6.190871369294606, "grad_norm": 9.115525245666504, "learning_rate": 2.116182572614108e-05, "log_odds_chosen": 11.05650806427002, "log_odds_ratio": -0.00020289364329073578, "logits/chosen": -0.9041266441345215, "logits/rejected": -0.829599916934967, "logps/chosen": -0.00024038890842348337, "logps/rejected": -2.259377956390381, "loss": 0.847, "nll_loss": 0.21172687411308289, "rewards/accuracies": 1.0, "rewards/chosen": -2.4038890842348337e-05, "rewards/margins": 0.22591374814510345, "rewards/rejected": -0.22593779861927032, "step": 8952 }, { "epoch": 6.191562932226833, "grad_norm": 6.934108257293701, "learning_rate": 2.115798370985093e-05, "log_odds_chosen": 9.981086730957031, "log_odds_ratio": -0.0009031962836161256, "logits/chosen": -0.5584157705307007, "logits/rejected": -0.6889655590057373, "logps/chosen": -0.0022818795405328274, "logps/rejected": -1.761110782623291, "loss": 0.7491, "nll_loss": 0.18718641996383667, "rewards/accuracies": 1.0, "rewards/chosen": -0.000228187971515581, "rewards/margins": 0.17588287591934204, "rewards/rejected": -0.17611107230186462, "step": 8953 }, { "epoch": 6.19225449515906, "grad_norm": 5.375219821929932, "learning_rate": 2.115414169356078e-05, "log_odds_chosen": 10.997356414794922, "log_odds_ratio": -3.314020068501122e-05, "logits/chosen": -0.620721697807312, "logits/rejected": -0.6426808834075928, "logps/chosen": -0.00013396151189226657, "logps/rejected": -1.832876443862915, "loss": 0.4577, "nll_loss": 0.11442224681377411, "rewards/accuracies": 1.0, "rewards/chosen": -1.3396151189226657e-05, "rewards/margins": 0.1832742542028427, "rewards/rejected": -0.1832876354455948, "step": 8954 }, { "epoch": 6.1929460580912865, "grad_norm": 5.435145378112793, "learning_rate": 2.115029967727063e-05, "log_odds_chosen": 10.449785232543945, "log_odds_ratio": -5.887317456654273e-05, "logits/chosen": -0.18803539872169495, "logits/rejected": -0.3863428831100464, "logps/chosen": -0.00021623042994178832, "logps/rejected": -1.649173378944397, "loss": 0.881, "nll_loss": 0.22024330496788025, "rewards/accuracies": 1.0, "rewards/chosen": -2.1623043721774593e-05, "rewards/margins": 0.16489571332931519, "rewards/rejected": -0.16491734981536865, "step": 8955 }, { "epoch": 6.193637621023513, "grad_norm": 7.4757399559021, "learning_rate": 2.1146457660980484e-05, "log_odds_chosen": 11.041566848754883, "log_odds_ratio": -0.00036361149977892637, "logits/chosen": -0.3361101746559143, "logits/rejected": -0.3597390055656433, "logps/chosen": -0.000462493859231472, "logps/rejected": -2.7006239891052246, "loss": 0.6115, "nll_loss": 0.1528266966342926, "rewards/accuracies": 1.0, "rewards/chosen": -4.624938446795568e-05, "rewards/margins": 0.2700161635875702, "rewards/rejected": -0.2700624167919159, "step": 8956 }, { "epoch": 6.19432918395574, "grad_norm": 6.892910957336426, "learning_rate": 2.1142615644690333e-05, "log_odds_chosen": 10.666043281555176, "log_odds_ratio": -9.158872853731737e-05, "logits/chosen": -0.44387686252593994, "logits/rejected": -0.589409589767456, "logps/chosen": -0.0003314261557534337, "logps/rejected": -2.5575907230377197, "loss": 0.8809, "nll_loss": 0.22020521759986877, "rewards/accuracies": 1.0, "rewards/chosen": -3.314261994091794e-05, "rewards/margins": 0.2557259202003479, "rewards/rejected": -0.255759060382843, "step": 8957 }, { "epoch": 6.195020746887967, "grad_norm": 4.018488883972168, "learning_rate": 2.1138773628400185e-05, "log_odds_chosen": 11.348909378051758, "log_odds_ratio": -3.268062573624775e-05, "logits/chosen": -0.5060147643089294, "logits/rejected": -0.6009031534194946, "logps/chosen": -0.00012915278784930706, "logps/rejected": -2.4707422256469727, "loss": 0.336, "nll_loss": 0.08399759232997894, "rewards/accuracies": 1.0, "rewards/chosen": -1.2915278603031766e-05, "rewards/margins": 0.24706131219863892, "rewards/rejected": -0.2470742166042328, "step": 8958 }, { "epoch": 6.195712309820194, "grad_norm": 9.816084861755371, "learning_rate": 2.1134931612110038e-05, "log_odds_chosen": 10.38204574584961, "log_odds_ratio": -0.00014280746108852327, "logits/chosen": -0.5085594058036804, "logits/rejected": -0.5785856246948242, "logps/chosen": -0.0004231697239447385, "logps/rejected": -2.2826638221740723, "loss": 0.8793, "nll_loss": 0.2198125571012497, "rewards/accuracies": 1.0, "rewards/chosen": -4.2316969484090805e-05, "rewards/margins": 0.22822408378124237, "rewards/rejected": -0.2282664030790329, "step": 8959 }, { "epoch": 6.196403872752421, "grad_norm": 6.990175724029541, "learning_rate": 2.1131089595819887e-05, "log_odds_chosen": 10.67991828918457, "log_odds_ratio": -5.843305916641839e-05, "logits/chosen": -0.2339039295911789, "logits/rejected": -0.31213295459747314, "logps/chosen": -0.00019437081937212497, "logps/rejected": -2.1106085777282715, "loss": 0.6733, "nll_loss": 0.16832423210144043, "rewards/accuracies": 1.0, "rewards/chosen": -1.9437082301010378e-05, "rewards/margins": 0.2110414206981659, "rewards/rejected": -0.21106085181236267, "step": 8960 }, { "epoch": 6.1970954356846475, "grad_norm": 5.311736583709717, "learning_rate": 2.112724757952974e-05, "log_odds_chosen": 10.271112442016602, "log_odds_ratio": -0.00021918118000030518, "logits/chosen": -0.5048193335533142, "logits/rejected": -0.5501112937927246, "logps/chosen": -0.00027754349866881967, "logps/rejected": -2.03511643409729, "loss": 0.5808, "nll_loss": 0.14516626298427582, "rewards/accuracies": 1.0, "rewards/chosen": -2.775435132207349e-05, "rewards/margins": 0.20348387956619263, "rewards/rejected": -0.20351164042949677, "step": 8961 }, { "epoch": 6.197786998616874, "grad_norm": 6.911527156829834, "learning_rate": 2.112340556323959e-05, "log_odds_chosen": 10.46664810180664, "log_odds_ratio": -3.2967760489555076e-05, "logits/chosen": -0.678046464920044, "logits/rejected": -0.6764953136444092, "logps/chosen": -0.0002369165886193514, "logps/rejected": -1.7433416843414307, "loss": 0.7735, "nll_loss": 0.19336023926734924, "rewards/accuracies": 1.0, "rewards/chosen": -2.369166031712666e-05, "rewards/margins": 0.17431046068668365, "rewards/rejected": -0.17433416843414307, "step": 8962 }, { "epoch": 6.198478561549101, "grad_norm": 12.984915733337402, "learning_rate": 2.1119563546949437e-05, "log_odds_chosen": 10.639669418334961, "log_odds_ratio": -0.00014092384662944824, "logits/chosen": -0.5126928687095642, "logits/rejected": -0.6322792768478394, "logps/chosen": -0.0002208442019764334, "logps/rejected": -1.886051893234253, "loss": 1.7345, "nll_loss": 0.4336107075214386, "rewards/accuracies": 1.0, "rewards/chosen": -2.2084421289036982e-05, "rewards/margins": 0.188583105802536, "rewards/rejected": -0.1886051893234253, "step": 8963 }, { "epoch": 6.199170124481328, "grad_norm": 5.063332557678223, "learning_rate": 2.111572153065929e-05, "log_odds_chosen": 11.39436149597168, "log_odds_ratio": -5.77733080717735e-05, "logits/chosen": -0.39494332671165466, "logits/rejected": -0.5832082629203796, "logps/chosen": -0.00023869842698331922, "logps/rejected": -2.9476852416992188, "loss": 0.429, "nll_loss": 0.10723817348480225, "rewards/accuracies": 1.0, "rewards/chosen": -2.3869844881119207e-05, "rewards/margins": 0.2947446405887604, "rewards/rejected": -0.2947685122489929, "step": 8964 }, { "epoch": 6.199861687413555, "grad_norm": 8.189932823181152, "learning_rate": 2.1111879514369142e-05, "log_odds_chosen": 10.68580150604248, "log_odds_ratio": -0.00010645938164088875, "logits/chosen": -0.6137702465057373, "logits/rejected": -0.6555781364440918, "logps/chosen": -0.0002459251263644546, "logps/rejected": -2.07014799118042, "loss": 0.4549, "nll_loss": 0.11371321231126785, "rewards/accuracies": 1.0, "rewards/chosen": -2.4592513000243343e-05, "rewards/margins": 0.20699021220207214, "rewards/rejected": -0.207014799118042, "step": 8965 }, { "epoch": 6.200553250345782, "grad_norm": 5.458976745605469, "learning_rate": 2.110803749807899e-05, "log_odds_chosen": 9.840924263000488, "log_odds_ratio": -0.0001311297673964873, "logits/chosen": -0.5520514249801636, "logits/rejected": -0.5865632891654968, "logps/chosen": -0.0007088709389790893, "logps/rejected": -1.6935031414031982, "loss": 0.6038, "nll_loss": 0.15092788636684418, "rewards/accuracies": 1.0, "rewards/chosen": -7.088709389790893e-05, "rewards/margins": 0.16927942633628845, "rewards/rejected": -0.16935031116008759, "step": 8966 }, { "epoch": 6.2012448132780085, "grad_norm": 5.70952033996582, "learning_rate": 2.1104195481788844e-05, "log_odds_chosen": 8.883814811706543, "log_odds_ratio": -0.0009645810350775719, "logits/chosen": -0.1132664605975151, "logits/rejected": -0.18594291806221008, "logps/chosen": -0.0007536731427535415, "logps/rejected": -1.6175205707550049, "loss": 0.6865, "nll_loss": 0.17151714861392975, "rewards/accuracies": 1.0, "rewards/chosen": -7.536730845458806e-05, "rewards/margins": 0.16167670488357544, "rewards/rejected": -0.16175207495689392, "step": 8967 }, { "epoch": 6.201936376210235, "grad_norm": 4.402467250823975, "learning_rate": 2.1100353465498696e-05, "log_odds_chosen": 9.699943542480469, "log_odds_ratio": -0.003861078992486, "logits/chosen": -0.5733868479728699, "logits/rejected": -0.6014716625213623, "logps/chosen": -0.002426671562716365, "logps/rejected": -1.5587421655654907, "loss": 0.5523, "nll_loss": 0.13767722249031067, "rewards/accuracies": 1.0, "rewards/chosen": -0.00024266715627163649, "rewards/margins": 0.15563154220581055, "rewards/rejected": -0.15587422251701355, "step": 8968 }, { "epoch": 6.202627939142462, "grad_norm": 8.179895401000977, "learning_rate": 2.1096511449208545e-05, "log_odds_chosen": 11.282783508300781, "log_odds_ratio": -2.2567839550902136e-05, "logits/chosen": -0.29590824246406555, "logits/rejected": -0.33641281723976135, "logps/chosen": -0.0003832974180113524, "logps/rejected": -2.8411865234375, "loss": 0.5058, "nll_loss": 0.12644313275814056, "rewards/accuracies": 1.0, "rewards/chosen": -3.832974107353948e-05, "rewards/margins": 0.2840803265571594, "rewards/rejected": -0.28411865234375, "step": 8969 }, { "epoch": 6.203319502074689, "grad_norm": 6.13981819152832, "learning_rate": 2.1092669432918398e-05, "log_odds_chosen": 10.735755920410156, "log_odds_ratio": -4.915258614346385e-05, "logits/chosen": -0.25060558319091797, "logits/rejected": -0.3601216673851013, "logps/chosen": -0.000244683149503544, "logps/rejected": -2.069368600845337, "loss": 0.5323, "nll_loss": 0.1330815702676773, "rewards/accuracies": 1.0, "rewards/chosen": -2.446831058477983e-05, "rewards/margins": 0.20691239833831787, "rewards/rejected": -0.20693686604499817, "step": 8970 }, { "epoch": 6.204011065006916, "grad_norm": 5.527590751647949, "learning_rate": 2.1088827416628247e-05, "log_odds_chosen": 10.561314582824707, "log_odds_ratio": -7.101245137164369e-05, "logits/chosen": 0.0811053216457367, "logits/rejected": -0.15598750114440918, "logps/chosen": -0.000390547385904938, "logps/rejected": -2.09092378616333, "loss": 1.0619, "nll_loss": 0.2654751241207123, "rewards/accuracies": 1.0, "rewards/chosen": -3.905473931808956e-05, "rewards/margins": 0.20905332267284393, "rewards/rejected": -0.209092378616333, "step": 8971 }, { "epoch": 6.204702627939143, "grad_norm": 10.170967102050781, "learning_rate": 2.1084985400338096e-05, "log_odds_chosen": 10.451860427856445, "log_odds_ratio": -0.00031936095911078155, "logits/chosen": -0.2732451856136322, "logits/rejected": -0.36096101999282837, "logps/chosen": -0.0005754973972216249, "logps/rejected": -2.381434440612793, "loss": 0.5891, "nll_loss": 0.14723119139671326, "rewards/accuracies": 1.0, "rewards/chosen": -5.7549739722162485e-05, "rewards/margins": 0.23808589577674866, "rewards/rejected": -0.2381434589624405, "step": 8972 }, { "epoch": 6.2053941908713695, "grad_norm": 4.112435817718506, "learning_rate": 2.1081143384047948e-05, "log_odds_chosen": 9.853161811828613, "log_odds_ratio": -0.00031163229141384363, "logits/chosen": -0.5074654221534729, "logits/rejected": -0.43558061122894287, "logps/chosen": -0.0003113880520686507, "logps/rejected": -1.5286929607391357, "loss": 0.6685, "nll_loss": 0.16709870100021362, "rewards/accuracies": 1.0, "rewards/chosen": -3.1138806662056595e-05, "rewards/margins": 0.15283815562725067, "rewards/rejected": -0.15286928415298462, "step": 8973 }, { "epoch": 6.206085753803596, "grad_norm": 4.837614059448242, "learning_rate": 2.10773013677578e-05, "log_odds_chosen": 9.713627815246582, "log_odds_ratio": -0.0007801411557011306, "logits/chosen": -0.37643712759017944, "logits/rejected": -0.4500851631164551, "logps/chosen": -0.00041137493099085987, "logps/rejected": -2.01298189163208, "loss": 0.6282, "nll_loss": 0.15697824954986572, "rewards/accuracies": 1.0, "rewards/chosen": -4.113749309908599e-05, "rewards/margins": 0.20125705003738403, "rewards/rejected": -0.20129819214344025, "step": 8974 }, { "epoch": 6.206777316735823, "grad_norm": 5.911776065826416, "learning_rate": 2.107345935146765e-05, "log_odds_chosen": 10.234687805175781, "log_odds_ratio": -0.0010891073616221547, "logits/chosen": -0.4068930447101593, "logits/rejected": -0.47032034397125244, "logps/chosen": -0.008021952584385872, "logps/rejected": -2.307753562927246, "loss": 0.6201, "nll_loss": 0.15490767359733582, "rewards/accuracies": 1.0, "rewards/chosen": -0.0008021953399293125, "rewards/margins": 0.22997316718101501, "rewards/rejected": -0.2307753562927246, "step": 8975 }, { "epoch": 6.20746887966805, "grad_norm": 9.897012710571289, "learning_rate": 2.1069617335177502e-05, "log_odds_chosen": 10.247870445251465, "log_odds_ratio": -0.00021053629461675882, "logits/chosen": -0.21756838262081146, "logits/rejected": -0.35750874876976013, "logps/chosen": -0.0004833069397136569, "logps/rejected": -2.3169853687286377, "loss": 0.8337, "nll_loss": 0.2084033489227295, "rewards/accuracies": 1.0, "rewards/chosen": -4.833069397136569e-05, "rewards/margins": 0.2316502332687378, "rewards/rejected": -0.23169854283332825, "step": 8976 }, { "epoch": 6.208160442600277, "grad_norm": 6.505767345428467, "learning_rate": 2.1065775318887355e-05, "log_odds_chosen": 10.968399047851562, "log_odds_ratio": -0.00010166892025154084, "logits/chosen": -0.1739257425069809, "logits/rejected": -0.3365812301635742, "logps/chosen": -0.00034486784716136754, "logps/rejected": -2.477846145629883, "loss": 0.4064, "nll_loss": 0.10159460455179214, "rewards/accuracies": 1.0, "rewards/chosen": -3.44867876265198e-05, "rewards/margins": 0.2477501481771469, "rewards/rejected": -0.24778464436531067, "step": 8977 }, { "epoch": 6.208852005532504, "grad_norm": 5.0093674659729, "learning_rate": 2.1061933302597204e-05, "log_odds_chosen": 10.247230529785156, "log_odds_ratio": -0.00017742854834068567, "logits/chosen": -0.27696970105171204, "logits/rejected": -0.384748637676239, "logps/chosen": -0.0005147414631210268, "logps/rejected": -2.2932639122009277, "loss": 0.5645, "nll_loss": 0.14109958708286285, "rewards/accuracies": 1.0, "rewards/chosen": -5.14741477672942e-05, "rewards/margins": 0.2292748987674713, "rewards/rejected": -0.22932636737823486, "step": 8978 }, { "epoch": 6.20954356846473, "grad_norm": 11.902543067932129, "learning_rate": 2.1058091286307056e-05, "log_odds_chosen": 10.680965423583984, "log_odds_ratio": -0.00011109773186035454, "logits/chosen": -0.16623735427856445, "logits/rejected": -0.2329682856798172, "logps/chosen": -0.001161636901088059, "logps/rejected": -3.0028746128082275, "loss": 0.6431, "nll_loss": 0.16075670719146729, "rewards/accuracies": 1.0, "rewards/chosen": -0.00011616369738476351, "rewards/margins": 0.30017128586769104, "rewards/rejected": -0.30028748512268066, "step": 8979 }, { "epoch": 6.210235131396957, "grad_norm": 8.642373085021973, "learning_rate": 2.105424927001691e-05, "log_odds_chosen": 10.992445945739746, "log_odds_ratio": -0.0002544039161875844, "logits/chosen": -0.2466789186000824, "logits/rejected": -0.2625923752784729, "logps/chosen": -0.0005904433783143759, "logps/rejected": -2.307201385498047, "loss": 0.5631, "nll_loss": 0.1407500058412552, "rewards/accuracies": 1.0, "rewards/chosen": -5.904433419345878e-05, "rewards/margins": 0.23066109418869019, "rewards/rejected": -0.2307201474905014, "step": 8980 }, { "epoch": 6.210926694329184, "grad_norm": 5.6907172203063965, "learning_rate": 2.1050407253726758e-05, "log_odds_chosen": 10.472320556640625, "log_odds_ratio": -0.00016914596199057996, "logits/chosen": -0.727494478225708, "logits/rejected": -0.7286410331726074, "logps/chosen": -0.0006767577142454684, "logps/rejected": -2.395904779434204, "loss": 0.6677, "nll_loss": 0.16691061854362488, "rewards/accuracies": 1.0, "rewards/chosen": -6.767576996935531e-05, "rewards/margins": 0.2395228147506714, "rewards/rejected": -0.23959049582481384, "step": 8981 }, { "epoch": 6.211618257261411, "grad_norm": 6.281667232513428, "learning_rate": 2.1046565237436607e-05, "log_odds_chosen": 11.036178588867188, "log_odds_ratio": -5.01195972901769e-05, "logits/chosen": -0.2629799544811249, "logits/rejected": -0.3964104950428009, "logps/chosen": -0.0003912192478310317, "logps/rejected": -2.7341060638427734, "loss": 0.6457, "nll_loss": 0.16142527759075165, "rewards/accuracies": 1.0, "rewards/chosen": -3.912192187272012e-05, "rewards/margins": 0.27337148785591125, "rewards/rejected": -0.2734106183052063, "step": 8982 }, { "epoch": 6.212309820193638, "grad_norm": 7.363375186920166, "learning_rate": 2.104272322114646e-05, "log_odds_chosen": 11.015447616577148, "log_odds_ratio": -3.7999296182533726e-05, "logits/chosen": -0.6886452436447144, "logits/rejected": -0.7008345127105713, "logps/chosen": -0.0001938598434207961, "logps/rejected": -2.3993420600891113, "loss": 0.7438, "nll_loss": 0.18593871593475342, "rewards/accuracies": 1.0, "rewards/chosen": -1.938598506967537e-05, "rewards/margins": 0.23991483449935913, "rewards/rejected": -0.23993422091007233, "step": 8983 }, { "epoch": 6.213001383125865, "grad_norm": 4.0122971534729, "learning_rate": 2.1038881204856308e-05, "log_odds_chosen": 10.397682189941406, "log_odds_ratio": -0.00024126411881297827, "logits/chosen": -0.46353641152381897, "logits/rejected": -0.4780619740486145, "logps/chosen": -0.00030690658604726195, "logps/rejected": -2.0208168029785156, "loss": 0.5212, "nll_loss": 0.13026946783065796, "rewards/accuracies": 1.0, "rewards/chosen": -3.069065496674739e-05, "rewards/margins": 0.20205096900463104, "rewards/rejected": -0.20208168029785156, "step": 8984 }, { "epoch": 6.213692946058091, "grad_norm": 5.429798126220703, "learning_rate": 2.103503918856616e-05, "log_odds_chosen": 11.59724235534668, "log_odds_ratio": -1.5820773114683107e-05, "logits/chosen": -0.38226318359375, "logits/rejected": -0.5154070258140564, "logps/chosen": -0.0003342315030749887, "logps/rejected": -2.710653305053711, "loss": 0.4582, "nll_loss": 0.11453854292631149, "rewards/accuracies": 1.0, "rewards/chosen": -3.3423151762690395e-05, "rewards/margins": 0.2710318863391876, "rewards/rejected": -0.2710653245449066, "step": 8985 }, { "epoch": 6.214384508990318, "grad_norm": 6.813840866088867, "learning_rate": 2.1031197172276013e-05, "log_odds_chosen": 9.728269577026367, "log_odds_ratio": -0.0004124910046812147, "logits/chosen": -0.5749183297157288, "logits/rejected": -0.5533679723739624, "logps/chosen": -0.004407223779708147, "logps/rejected": -1.9411263465881348, "loss": 0.5482, "nll_loss": 0.13700014352798462, "rewards/accuracies": 1.0, "rewards/chosen": -0.0004407223896123469, "rewards/margins": 0.19367189705371857, "rewards/rejected": -0.194112628698349, "step": 8986 }, { "epoch": 6.215076071922545, "grad_norm": 10.740469932556152, "learning_rate": 2.1027355155985862e-05, "log_odds_chosen": 9.602537155151367, "log_odds_ratio": -0.014514979906380177, "logits/chosen": -0.5719044208526611, "logits/rejected": -0.700533390045166, "logps/chosen": -0.010756929405033588, "logps/rejected": -1.8883540630340576, "loss": 1.2422, "nll_loss": 0.309101939201355, "rewards/accuracies": 1.0, "rewards/chosen": -0.0010756929405033588, "rewards/margins": 0.18775972723960876, "rewards/rejected": -0.18883541226387024, "step": 8987 }, { "epoch": 6.215767634854772, "grad_norm": 5.412166118621826, "learning_rate": 2.1023513139695715e-05, "log_odds_chosen": 10.697986602783203, "log_odds_ratio": -4.1972598410211504e-05, "logits/chosen": -0.3933907449245453, "logits/rejected": -0.3781155049800873, "logps/chosen": -0.0002012891782214865, "logps/rejected": -2.0933737754821777, "loss": 0.5938, "nll_loss": 0.14844883978366852, "rewards/accuracies": 1.0, "rewards/chosen": -2.012891673075501e-05, "rewards/margins": 0.20931726694107056, "rewards/rejected": -0.20933738350868225, "step": 8988 }, { "epoch": 6.216459197786999, "grad_norm": 6.379223346710205, "learning_rate": 2.1019671123405567e-05, "log_odds_chosen": 10.370619773864746, "log_odds_ratio": -0.00026047302526421845, "logits/chosen": -0.5298944711685181, "logits/rejected": -0.5091612339019775, "logps/chosen": -0.00016024197975639254, "logps/rejected": -1.7236460447311401, "loss": 0.9999, "nll_loss": 0.24995876848697662, "rewards/accuracies": 1.0, "rewards/chosen": -1.6024197975639254e-05, "rewards/margins": 0.17234858870506287, "rewards/rejected": -0.17236460745334625, "step": 8989 }, { "epoch": 6.217150760719226, "grad_norm": 6.144903659820557, "learning_rate": 2.1015829107115416e-05, "log_odds_chosen": 10.349257469177246, "log_odds_ratio": -0.00021842276328243315, "logits/chosen": -0.8314346075057983, "logits/rejected": -0.7988418936729431, "logps/chosen": -0.000315680488711223, "logps/rejected": -2.2892818450927734, "loss": 0.614, "nll_loss": 0.1534736156463623, "rewards/accuracies": 1.0, "rewards/chosen": -3.1568051781505346e-05, "rewards/margins": 0.22889664769172668, "rewards/rejected": -0.22892819344997406, "step": 8990 }, { "epoch": 6.217842323651452, "grad_norm": 7.017372131347656, "learning_rate": 2.1011987090825265e-05, "log_odds_chosen": 10.174150466918945, "log_odds_ratio": -0.00014968191680964082, "logits/chosen": -0.7089210748672485, "logits/rejected": -0.7370721697807312, "logps/chosen": -0.000289241987047717, "logps/rejected": -1.7138595581054688, "loss": 0.332, "nll_loss": 0.08299598097801208, "rewards/accuracies": 1.0, "rewards/chosen": -2.8924197977175936e-05, "rewards/margins": 0.17135705053806305, "rewards/rejected": -0.1713859736919403, "step": 8991 }, { "epoch": 6.218533886583679, "grad_norm": 6.155914783477783, "learning_rate": 2.1008145074535118e-05, "log_odds_chosen": 10.125147819519043, "log_odds_ratio": -9.33370174607262e-05, "logits/chosen": -0.5350738763809204, "logits/rejected": -0.6198149919509888, "logps/chosen": -0.000489847909193486, "logps/rejected": -1.8168946504592896, "loss": 0.7029, "nll_loss": 0.17571145296096802, "rewards/accuracies": 1.0, "rewards/chosen": -4.898479164694436e-05, "rewards/margins": 0.18164049088954926, "rewards/rejected": -0.18168947100639343, "step": 8992 }, { "epoch": 6.219225449515906, "grad_norm": 8.826537132263184, "learning_rate": 2.1004303058244967e-05, "log_odds_chosen": 10.819112777709961, "log_odds_ratio": -3.925442433683202e-05, "logits/chosen": -0.8494081497192383, "logits/rejected": -0.8767988681793213, "logps/chosen": -0.0004170122556388378, "logps/rejected": -2.2485222816467285, "loss": 0.608, "nll_loss": 0.15199565887451172, "rewards/accuracies": 1.0, "rewards/chosen": -4.1701227019075304e-05, "rewards/margins": 0.22481051087379456, "rewards/rejected": -0.22485221922397614, "step": 8993 }, { "epoch": 6.219917012448133, "grad_norm": 11.54941463470459, "learning_rate": 2.100046104195482e-05, "log_odds_chosen": 10.90658187866211, "log_odds_ratio": -9.246320405509323e-05, "logits/chosen": -0.5866506099700928, "logits/rejected": -0.6891856789588928, "logps/chosen": -0.0002085501328110695, "logps/rejected": -2.233708143234253, "loss": 0.5723, "nll_loss": 0.1430549919605255, "rewards/accuracies": 1.0, "rewards/chosen": -2.0855015463894233e-05, "rewards/margins": 0.22334995865821838, "rewards/rejected": -0.22337080538272858, "step": 8994 }, { "epoch": 6.22060857538036, "grad_norm": 8.5200834274292, "learning_rate": 2.099661902566467e-05, "log_odds_chosen": 11.007604598999023, "log_odds_ratio": -5.738082472817041e-05, "logits/chosen": -0.5865336060523987, "logits/rejected": -0.53379225730896, "logps/chosen": -0.00028456421568989754, "logps/rejected": -2.431556463241577, "loss": 0.9227, "nll_loss": 0.2306625097990036, "rewards/accuracies": 1.0, "rewards/chosen": -2.8456421205191873e-05, "rewards/margins": 0.24312719702720642, "rewards/rejected": -0.24315565824508667, "step": 8995 }, { "epoch": 6.2213001383125865, "grad_norm": 7.333250045776367, "learning_rate": 2.099277700937452e-05, "log_odds_chosen": 10.954893112182617, "log_odds_ratio": -6.844123709015548e-05, "logits/chosen": -0.2322230190038681, "logits/rejected": -0.3255404829978943, "logps/chosen": -0.00023944003623910248, "logps/rejected": -2.2728569507598877, "loss": 0.6644, "nll_loss": 0.16609756648540497, "rewards/accuracies": 1.0, "rewards/chosen": -2.394400507910177e-05, "rewards/margins": 0.2272617518901825, "rewards/rejected": -0.227285698056221, "step": 8996 }, { "epoch": 6.221991701244813, "grad_norm": 7.575857639312744, "learning_rate": 2.0988934993084373e-05, "log_odds_chosen": 11.492612838745117, "log_odds_ratio": -4.526223710854538e-05, "logits/chosen": 0.041964687407016754, "logits/rejected": -0.07933655381202698, "logps/chosen": -0.0002207772631663829, "logps/rejected": -2.7611167430877686, "loss": 0.7763, "nll_loss": 0.19406726956367493, "rewards/accuracies": 1.0, "rewards/chosen": -2.207772558904253e-05, "rewards/margins": 0.2760895788669586, "rewards/rejected": -0.2761116623878479, "step": 8997 }, { "epoch": 6.22268326417704, "grad_norm": 7.768023490905762, "learning_rate": 2.0985092976794226e-05, "log_odds_chosen": 10.354554176330566, "log_odds_ratio": -5.161076478543691e-05, "logits/chosen": -0.37766382098197937, "logits/rejected": -0.38012662529945374, "logps/chosen": -0.00051166454795748, "logps/rejected": -2.2143630981445312, "loss": 0.5399, "nll_loss": 0.13496245443820953, "rewards/accuracies": 1.0, "rewards/chosen": -5.116645115776919e-05, "rewards/margins": 0.2213851511478424, "rewards/rejected": -0.2214363068342209, "step": 8998 }, { "epoch": 6.223374827109267, "grad_norm": 6.647977352142334, "learning_rate": 2.0981250960504075e-05, "log_odds_chosen": 11.430723190307617, "log_odds_ratio": -2.3654045435250737e-05, "logits/chosen": -0.4360831677913666, "logits/rejected": -0.4695732295513153, "logps/chosen": -0.00035060528898611665, "logps/rejected": -2.913233757019043, "loss": 0.6416, "nll_loss": 0.16040118038654327, "rewards/accuracies": 1.0, "rewards/chosen": -3.5060529626207426e-05, "rewards/margins": 0.2912883162498474, "rewards/rejected": -0.29132339358329773, "step": 8999 }, { "epoch": 6.224066390041494, "grad_norm": 7.017353057861328, "learning_rate": 2.0977408944213924e-05, "log_odds_chosen": 11.734591484069824, "log_odds_ratio": -3.7382968002930284e-05, "logits/chosen": -0.06758692860603333, "logits/rejected": -0.17713436484336853, "logps/chosen": -0.0004616577934939414, "logps/rejected": -2.8184022903442383, "loss": 0.4697, "nll_loss": 0.11741077154874802, "rewards/accuracies": 1.0, "rewards/chosen": -4.6165780076989904e-05, "rewards/margins": 0.2817940413951874, "rewards/rejected": -0.2818402051925659, "step": 9000 }, { "epoch": 6.224757952973721, "grad_norm": 3.9933974742889404, "learning_rate": 2.0973566927923776e-05, "log_odds_chosen": 9.483848571777344, "log_odds_ratio": -0.0003084187919739634, "logits/chosen": -0.24222630262374878, "logits/rejected": -0.286294162273407, "logps/chosen": -0.0029962025582790375, "logps/rejected": -2.703573703765869, "loss": 0.5122, "nll_loss": 0.1280231475830078, "rewards/accuracies": 1.0, "rewards/chosen": -0.0002996202383656055, "rewards/margins": 0.2700577676296234, "rewards/rejected": -0.2703574001789093, "step": 9001 }, { "epoch": 6.2254495159059475, "grad_norm": 7.665703773498535, "learning_rate": 2.0969724911633625e-05, "log_odds_chosen": 10.602262496948242, "log_odds_ratio": -0.0001078636123565957, "logits/chosen": -0.24498625099658966, "logits/rejected": -0.3056301176548004, "logps/chosen": -0.0003808321198448539, "logps/rejected": -2.0292539596557617, "loss": 0.6004, "nll_loss": 0.1500934660434723, "rewards/accuracies": 1.0, "rewards/chosen": -3.8083209801698104e-05, "rewards/margins": 0.20288731157779694, "rewards/rejected": -0.2029254138469696, "step": 9002 }, { "epoch": 6.226141078838174, "grad_norm": 8.88949966430664, "learning_rate": 2.0965882895343478e-05, "log_odds_chosen": 10.602922439575195, "log_odds_ratio": -4.7081877710297704e-05, "logits/chosen": -0.28342053294181824, "logits/rejected": -0.29611077904701233, "logps/chosen": -0.00026970237377099693, "logps/rejected": -2.155287504196167, "loss": 0.7502, "nll_loss": 0.18754449486732483, "rewards/accuracies": 1.0, "rewards/chosen": -2.6970237740897574e-05, "rewards/margins": 0.2155017852783203, "rewards/rejected": -0.21552875638008118, "step": 9003 }, { "epoch": 6.226832641770401, "grad_norm": 6.602673530578613, "learning_rate": 2.0962040879053327e-05, "log_odds_chosen": 10.173442840576172, "log_odds_ratio": -0.00119855219963938, "logits/chosen": -0.18686045706272125, "logits/rejected": -0.2736320197582245, "logps/chosen": -0.0023301991168409586, "logps/rejected": -1.9063091278076172, "loss": 0.6251, "nll_loss": 0.15616220235824585, "rewards/accuracies": 1.0, "rewards/chosen": -0.00023301989131141454, "rewards/margins": 0.19039788842201233, "rewards/rejected": -0.19063091278076172, "step": 9004 }, { "epoch": 6.227524204702628, "grad_norm": 10.806591033935547, "learning_rate": 2.095819886276318e-05, "log_odds_chosen": 9.640810012817383, "log_odds_ratio": -0.00026288797380402684, "logits/chosen": -0.46420085430145264, "logits/rejected": -0.5561163425445557, "logps/chosen": -0.0003273919865023345, "logps/rejected": -1.186840534210205, "loss": 0.438, "nll_loss": 0.1094648689031601, "rewards/accuracies": 1.0, "rewards/chosen": -3.2739197195041925e-05, "rewards/margins": 0.11865131556987762, "rewards/rejected": -0.1186840608716011, "step": 9005 }, { "epoch": 6.228215767634855, "grad_norm": 6.025399684906006, "learning_rate": 2.095435684647303e-05, "log_odds_chosen": 9.861456871032715, "log_odds_ratio": -0.0004261066787876189, "logits/chosen": -0.07645013928413391, "logits/rejected": -0.15051329135894775, "logps/chosen": -0.0006179987103678286, "logps/rejected": -2.166393280029297, "loss": 0.5465, "nll_loss": 0.13658836483955383, "rewards/accuracies": 1.0, "rewards/chosen": -6.179986667120829e-05, "rewards/margins": 0.21657755970954895, "rewards/rejected": -0.21663935482501984, "step": 9006 }, { "epoch": 6.228907330567082, "grad_norm": 23.894134521484375, "learning_rate": 2.095051483018288e-05, "log_odds_chosen": 7.401577949523926, "log_odds_ratio": -0.05275609716773033, "logits/chosen": -0.36621835827827454, "logits/rejected": -0.30994993448257446, "logps/chosen": -0.03477298468351364, "logps/rejected": -1.45652437210083, "loss": 0.6965, "nll_loss": 0.16883717477321625, "rewards/accuracies": 1.0, "rewards/chosen": -0.003477298654615879, "rewards/margins": 0.14217513799667358, "rewards/rejected": -0.1456524282693863, "step": 9007 }, { "epoch": 6.2295988934993085, "grad_norm": 5.496852874755859, "learning_rate": 2.0946672813892733e-05, "log_odds_chosen": 10.890434265136719, "log_odds_ratio": -4.913666271022521e-05, "logits/chosen": -0.5789802074432373, "logits/rejected": -0.6446264982223511, "logps/chosen": -0.0001830903347581625, "logps/rejected": -1.9237865209579468, "loss": 0.4705, "nll_loss": 0.11761998385190964, "rewards/accuracies": 1.0, "rewards/chosen": -1.830903420341201e-05, "rewards/margins": 0.19236035645008087, "rewards/rejected": -0.19237865507602692, "step": 9008 }, { "epoch": 6.230290456431535, "grad_norm": 20.32415008544922, "learning_rate": 2.0942830797602582e-05, "log_odds_chosen": 11.691722869873047, "log_odds_ratio": -1.3160077287466265e-05, "logits/chosen": -0.30656173825263977, "logits/rejected": -0.3084481656551361, "logps/chosen": -0.00013962341472506523, "logps/rejected": -2.690067768096924, "loss": 0.685, "nll_loss": 0.1712568998336792, "rewards/accuracies": 1.0, "rewards/chosen": -1.3962340744910762e-05, "rewards/margins": 0.2689928412437439, "rewards/rejected": -0.26900678873062134, "step": 9009 }, { "epoch": 6.230982019363762, "grad_norm": 14.121626853942871, "learning_rate": 2.093898878131243e-05, "log_odds_chosen": 10.256575584411621, "log_odds_ratio": -0.00016809267981443554, "logits/chosen": -0.6957007646560669, "logits/rejected": -0.6951332688331604, "logps/chosen": -0.0032358954194933176, "logps/rejected": -2.291188955307007, "loss": 0.6839, "nll_loss": 0.170955628156662, "rewards/accuracies": 1.0, "rewards/chosen": -0.0003235895128455013, "rewards/margins": 0.22879531979560852, "rewards/rejected": -0.22911891341209412, "step": 9010 }, { "epoch": 6.231673582295989, "grad_norm": 6.468430995941162, "learning_rate": 2.0935146765022284e-05, "log_odds_chosen": 10.054919242858887, "log_odds_ratio": -0.0005386160919442773, "logits/chosen": -0.3988853991031647, "logits/rejected": -0.5644351243972778, "logps/chosen": -0.0008596985717304051, "logps/rejected": -2.17459774017334, "loss": 0.979, "nll_loss": 0.24469514191150665, "rewards/accuracies": 1.0, "rewards/chosen": -8.59698629938066e-05, "rewards/margins": 0.2173738181591034, "rewards/rejected": -0.2174597978591919, "step": 9011 }, { "epoch": 6.232365145228216, "grad_norm": 5.70004415512085, "learning_rate": 2.0931304748732136e-05, "log_odds_chosen": 10.633837699890137, "log_odds_ratio": -5.87763061048463e-05, "logits/chosen": -0.38955414295196533, "logits/rejected": -0.5138880610466003, "logps/chosen": -0.00017273437697440386, "logps/rejected": -1.9499411582946777, "loss": 0.7007, "nll_loss": 0.17516255378723145, "rewards/accuracies": 1.0, "rewards/chosen": -1.727343988022767e-05, "rewards/margins": 0.19497685134410858, "rewards/rejected": -0.19499412178993225, "step": 9012 }, { "epoch": 6.233056708160443, "grad_norm": 5.867102146148682, "learning_rate": 2.0927462732441985e-05, "log_odds_chosen": 9.976856231689453, "log_odds_ratio": -0.0003477151913102716, "logits/chosen": -0.1553748995065689, "logits/rejected": -0.2037595510482788, "logps/chosen": -0.0010640517575666308, "logps/rejected": -2.2231345176696777, "loss": 0.6212, "nll_loss": 0.15527181327342987, "rewards/accuracies": 1.0, "rewards/chosen": -0.00010640516120474786, "rewards/margins": 0.22220705449581146, "rewards/rejected": -0.22231344878673553, "step": 9013 }, { "epoch": 6.2337482710926695, "grad_norm": 6.256630897521973, "learning_rate": 2.0923620716151837e-05, "log_odds_chosen": 11.773627281188965, "log_odds_ratio": -2.0270506865927018e-05, "logits/chosen": -0.41896963119506836, "logits/rejected": -0.6450668573379517, "logps/chosen": -0.00016775909170974046, "logps/rejected": -2.4986515045166016, "loss": 0.5745, "nll_loss": 0.14361341297626495, "rewards/accuracies": 1.0, "rewards/chosen": -1.6775908079580404e-05, "rewards/margins": 0.2498483806848526, "rewards/rejected": -0.24986517429351807, "step": 9014 }, { "epoch": 6.234439834024896, "grad_norm": 12.794068336486816, "learning_rate": 2.091977869986169e-05, "log_odds_chosen": 11.137877464294434, "log_odds_ratio": -2.7645883164950646e-05, "logits/chosen": -0.4125073254108429, "logits/rejected": -0.45649489760398865, "logps/chosen": -0.00030240084743127227, "logps/rejected": -2.5896098613739014, "loss": 0.6423, "nll_loss": 0.16056138277053833, "rewards/accuracies": 1.0, "rewards/chosen": -3.0240084015531465e-05, "rewards/margins": 0.2589307427406311, "rewards/rejected": -0.2589609920978546, "step": 9015 }, { "epoch": 6.235131396957123, "grad_norm": 6.285947799682617, "learning_rate": 2.091593668357154e-05, "log_odds_chosen": 10.62628173828125, "log_odds_ratio": -5.577644697041251e-05, "logits/chosen": -0.3475406765937805, "logits/rejected": -0.41657719016075134, "logps/chosen": -0.000300862651783973, "logps/rejected": -2.3320021629333496, "loss": 0.6059, "nll_loss": 0.15147507190704346, "rewards/accuracies": 1.0, "rewards/chosen": -3.0086270271567628e-05, "rewards/margins": 0.23317012190818787, "rewards/rejected": -0.23320019245147705, "step": 9016 }, { "epoch": 6.23582295988935, "grad_norm": 6.353825092315674, "learning_rate": 2.091209466728139e-05, "log_odds_chosen": 9.51252555847168, "log_odds_ratio": -0.0012284711701795459, "logits/chosen": -0.6180807948112488, "logits/rejected": -0.6229330897331238, "logps/chosen": -0.001876621157862246, "logps/rejected": -2.276731491088867, "loss": 0.438, "nll_loss": 0.10938158631324768, "rewards/accuracies": 1.0, "rewards/chosen": -0.00018766212451737374, "rewards/margins": 0.22748547792434692, "rewards/rejected": -0.22767315804958344, "step": 9017 }, { "epoch": 6.236514522821577, "grad_norm": 7.255704402923584, "learning_rate": 2.090825265099124e-05, "log_odds_chosen": 10.664867401123047, "log_odds_ratio": -3.9949351048562676e-05, "logits/chosen": -0.4201251268386841, "logits/rejected": -0.4479733407497406, "logps/chosen": -0.00033521311706863344, "logps/rejected": -2.4541258811950684, "loss": 1.0342, "nll_loss": 0.2585402727127075, "rewards/accuracies": 1.0, "rewards/chosen": -3.3521311706863344e-05, "rewards/margins": 0.24537906050682068, "rewards/rejected": -0.24541258811950684, "step": 9018 }, { "epoch": 6.237206085753804, "grad_norm": 13.467849731445312, "learning_rate": 2.090441063470109e-05, "log_odds_chosen": 11.327585220336914, "log_odds_ratio": -2.2701206034980714e-05, "logits/chosen": -0.8228195905685425, "logits/rejected": -0.897856593132019, "logps/chosen": -0.00030445802258327603, "logps/rejected": -1.9246413707733154, "loss": 0.5952, "nll_loss": 0.14880092442035675, "rewards/accuracies": 1.0, "rewards/chosen": -3.0445804441114888e-05, "rewards/margins": 0.19243371486663818, "rewards/rejected": -0.19246414303779602, "step": 9019 }, { "epoch": 6.2378976486860305, "grad_norm": 7.673033714294434, "learning_rate": 2.0900568618410942e-05, "log_odds_chosen": 11.0219144821167, "log_odds_ratio": -2.651966133271344e-05, "logits/chosen": -0.431251585483551, "logits/rejected": -0.5808360576629639, "logps/chosen": -0.00018394278595224023, "logps/rejected": -2.3105428218841553, "loss": 0.5675, "nll_loss": 0.14187981188297272, "rewards/accuracies": 1.0, "rewards/chosen": -1.8394279322819784e-05, "rewards/margins": 0.23103588819503784, "rewards/rejected": -0.23105429112911224, "step": 9020 }, { "epoch": 6.238589211618257, "grad_norm": 7.586302280426025, "learning_rate": 2.0896726602120794e-05, "log_odds_chosen": 10.934219360351562, "log_odds_ratio": -5.194777259021066e-05, "logits/chosen": -0.42324450612068176, "logits/rejected": -0.5293049812316895, "logps/chosen": -0.0002458833623677492, "logps/rejected": -2.190702438354492, "loss": 0.6002, "nll_loss": 0.15005594491958618, "rewards/accuracies": 1.0, "rewards/chosen": -2.4588336600572802e-05, "rewards/margins": 0.21904563903808594, "rewards/rejected": -0.21907024085521698, "step": 9021 }, { "epoch": 6.239280774550484, "grad_norm": 7.884757995605469, "learning_rate": 2.0892884585830643e-05, "log_odds_chosen": 11.067387580871582, "log_odds_ratio": -0.00040776049718260765, "logits/chosen": -0.9919091463088989, "logits/rejected": -0.9518996477127075, "logps/chosen": -0.0002284134243382141, "logps/rejected": -1.9842913150787354, "loss": 0.5262, "nll_loss": 0.13151274621486664, "rewards/accuracies": 1.0, "rewards/chosen": -2.284134279761929e-05, "rewards/margins": 0.19840630888938904, "rewards/rejected": -0.198429137468338, "step": 9022 }, { "epoch": 6.239972337482711, "grad_norm": 4.72267484664917, "learning_rate": 2.0889042569540496e-05, "log_odds_chosen": 11.019242286682129, "log_odds_ratio": -0.0008485190337523818, "logits/chosen": 0.0482051819562912, "logits/rejected": -0.14590582251548767, "logps/chosen": -0.0014960195403546095, "logps/rejected": -2.0413918495178223, "loss": 0.4531, "nll_loss": 0.11318106949329376, "rewards/accuracies": 1.0, "rewards/chosen": -0.00014960193948354572, "rewards/margins": 0.20398959517478943, "rewards/rejected": -0.20413920283317566, "step": 9023 }, { "epoch": 6.240663900414938, "grad_norm": 6.891628265380859, "learning_rate": 2.088520055325035e-05, "log_odds_chosen": 10.24162483215332, "log_odds_ratio": -0.00018210129928775132, "logits/chosen": -0.6122728586196899, "logits/rejected": -0.6851514577865601, "logps/chosen": -0.000291764794383198, "logps/rejected": -1.8662919998168945, "loss": 0.589, "nll_loss": 0.14722611010074615, "rewards/accuracies": 1.0, "rewards/chosen": -2.9176480893511325e-05, "rewards/margins": 0.18660002946853638, "rewards/rejected": -0.18662920594215393, "step": 9024 }, { "epoch": 6.241355463347165, "grad_norm": 10.785980224609375, "learning_rate": 2.0881358536960197e-05, "log_odds_chosen": 10.858931541442871, "log_odds_ratio": -0.00032134572393260896, "logits/chosen": -0.2621542811393738, "logits/rejected": -0.37139779329299927, "logps/chosen": -0.0008505442528985441, "logps/rejected": -2.503044605255127, "loss": 0.7347, "nll_loss": 0.1836353838443756, "rewards/accuracies": 1.0, "rewards/chosen": -8.50544311106205e-05, "rewards/margins": 0.2502194046974182, "rewards/rejected": -0.2503044605255127, "step": 9025 }, { "epoch": 6.2420470262793915, "grad_norm": 8.544774055480957, "learning_rate": 2.087751652067005e-05, "log_odds_chosen": 9.95460033416748, "log_odds_ratio": -0.00020327308448031545, "logits/chosen": -0.3229715824127197, "logits/rejected": -0.3416019678115845, "logps/chosen": -0.0009239742066711187, "logps/rejected": -2.1367297172546387, "loss": 0.5462, "nll_loss": 0.13653871417045593, "rewards/accuracies": 1.0, "rewards/chosen": -9.239741484634578e-05, "rewards/margins": 0.21358059346675873, "rewards/rejected": -0.21367299556732178, "step": 9026 }, { "epoch": 6.242738589211618, "grad_norm": 4.830820083618164, "learning_rate": 2.08736745043799e-05, "log_odds_chosen": 10.288642883300781, "log_odds_ratio": -7.357189315371215e-05, "logits/chosen": 0.009236622601747513, "logits/rejected": -0.19707392156124115, "logps/chosen": -0.0003716089413501322, "logps/rejected": -1.879990577697754, "loss": 0.5735, "nll_loss": 0.14336419105529785, "rewards/accuracies": 1.0, "rewards/chosen": -3.7160894862608984e-05, "rewards/margins": 0.1879618912935257, "rewards/rejected": -0.18799905478954315, "step": 9027 }, { "epoch": 6.243430152143845, "grad_norm": 5.889484405517578, "learning_rate": 2.0869832488089748e-05, "log_odds_chosen": 9.802091598510742, "log_odds_ratio": -0.0006548008532263339, "logits/chosen": -0.6355588436126709, "logits/rejected": -0.6946972608566284, "logps/chosen": -0.0002756410976871848, "logps/rejected": -1.7165451049804688, "loss": 0.5623, "nll_loss": 0.1405138224363327, "rewards/accuracies": 1.0, "rewards/chosen": -2.7564110496314242e-05, "rewards/margins": 0.1716269552707672, "rewards/rejected": -0.17165450751781464, "step": 9028 }, { "epoch": 6.244121715076072, "grad_norm": 3.881678819656372, "learning_rate": 2.08659904717996e-05, "log_odds_chosen": 11.143312454223633, "log_odds_ratio": -9.366253652842715e-05, "logits/chosen": -0.4110240340232849, "logits/rejected": -0.4805094301700592, "logps/chosen": -0.00028160365764051676, "logps/rejected": -2.812816619873047, "loss": 0.4031, "nll_loss": 0.10075541585683823, "rewards/accuracies": 1.0, "rewards/chosen": -2.8160364308860153e-05, "rewards/margins": 0.28125351667404175, "rewards/rejected": -0.2812816798686981, "step": 9029 }, { "epoch": 6.244813278008299, "grad_norm": 4.964913845062256, "learning_rate": 2.0862148455509453e-05, "log_odds_chosen": 10.30183219909668, "log_odds_ratio": -0.00011942970013478771, "logits/chosen": -0.4194273352622986, "logits/rejected": -0.4900840222835541, "logps/chosen": -0.0014646524796262383, "logps/rejected": -2.252519369125366, "loss": 0.5851, "nll_loss": 0.14625787734985352, "rewards/accuracies": 1.0, "rewards/chosen": -0.0001464652450522408, "rewards/margins": 0.22510546445846558, "rewards/rejected": -0.2252519428730011, "step": 9030 }, { "epoch": 6.245504840940526, "grad_norm": 5.751460552215576, "learning_rate": 2.0858306439219302e-05, "log_odds_chosen": 9.793046951293945, "log_odds_ratio": -0.00017207200289703906, "logits/chosen": -0.2726150453090668, "logits/rejected": -0.30392637848854065, "logps/chosen": -0.00022600665397476405, "logps/rejected": -1.6625508069992065, "loss": 0.8965, "nll_loss": 0.22410230338573456, "rewards/accuracies": 1.0, "rewards/chosen": -2.2600665033678524e-05, "rewards/margins": 0.16623248159885406, "rewards/rejected": -0.16625507175922394, "step": 9031 }, { "epoch": 6.246196403872752, "grad_norm": 6.082136631011963, "learning_rate": 2.0854464422929154e-05, "log_odds_chosen": 9.919041633605957, "log_odds_ratio": -0.00023411812435369939, "logits/chosen": -0.12235762178897858, "logits/rejected": -0.16585399210453033, "logps/chosen": -0.0003532566479407251, "logps/rejected": -1.8604954481124878, "loss": 0.6567, "nll_loss": 0.16414141654968262, "rewards/accuracies": 1.0, "rewards/chosen": -3.532566552166827e-05, "rewards/margins": 0.18601423501968384, "rewards/rejected": -0.18604956567287445, "step": 9032 }, { "epoch": 6.246887966804979, "grad_norm": 5.677314281463623, "learning_rate": 2.0850622406639007e-05, "log_odds_chosen": 10.706421852111816, "log_odds_ratio": -0.00010353871039114892, "logits/chosen": -0.37744152545928955, "logits/rejected": -0.43936687707901, "logps/chosen": -0.0006696865311823785, "logps/rejected": -2.4172487258911133, "loss": 0.5202, "nll_loss": 0.1300279051065445, "rewards/accuracies": 1.0, "rewards/chosen": -6.69686560286209e-05, "rewards/margins": 0.24165791273117065, "rewards/rejected": -0.241724893450737, "step": 9033 }, { "epoch": 6.247579529737206, "grad_norm": 7.973165512084961, "learning_rate": 2.0846780390348856e-05, "log_odds_chosen": 10.522238731384277, "log_odds_ratio": -7.479259511455894e-05, "logits/chosen": -0.38221296668052673, "logits/rejected": -0.47134849429130554, "logps/chosen": -0.0002770965511444956, "logps/rejected": -1.7832226753234863, "loss": 0.5368, "nll_loss": 0.1341937780380249, "rewards/accuracies": 1.0, "rewards/chosen": -2.770965511444956e-05, "rewards/margins": 0.1782945692539215, "rewards/rejected": -0.17832225561141968, "step": 9034 }, { "epoch": 6.248271092669433, "grad_norm": 8.03611946105957, "learning_rate": 2.0842938374058708e-05, "log_odds_chosen": 11.027082443237305, "log_odds_ratio": -3.489879600238055e-05, "logits/chosen": -0.5230115056037903, "logits/rejected": -0.560234785079956, "logps/chosen": -0.0002563800080679357, "logps/rejected": -2.4159536361694336, "loss": 0.5464, "nll_loss": 0.13658906519412994, "rewards/accuracies": 1.0, "rewards/chosen": -2.5638002625782974e-05, "rewards/margins": 0.24156975746154785, "rewards/rejected": -0.24159538745880127, "step": 9035 }, { "epoch": 6.24896265560166, "grad_norm": 6.003415584564209, "learning_rate": 2.0839096357768557e-05, "log_odds_chosen": 9.94825553894043, "log_odds_ratio": -0.00018064079631585628, "logits/chosen": -0.5426046252250671, "logits/rejected": -0.5991511940956116, "logps/chosen": -0.00014731872943229973, "logps/rejected": -1.313718557357788, "loss": 0.6065, "nll_loss": 0.15160351991653442, "rewards/accuracies": 1.0, "rewards/chosen": -1.4731874216522556e-05, "rewards/margins": 0.1313571333885193, "rewards/rejected": -0.1313718557357788, "step": 9036 }, { "epoch": 6.249654218533887, "grad_norm": 6.042317867279053, "learning_rate": 2.0835254341478406e-05, "log_odds_chosen": 10.225564956665039, "log_odds_ratio": -0.0007171995821408927, "logits/chosen": -0.3908754885196686, "logits/rejected": -0.5180646181106567, "logps/chosen": -0.0009109845268540084, "logps/rejected": -2.3440051078796387, "loss": 0.5335, "nll_loss": 0.13331358134746552, "rewards/accuracies": 1.0, "rewards/chosen": -9.109845996135846e-05, "rewards/margins": 0.23430943489074707, "rewards/rejected": -0.23440054059028625, "step": 9037 }, { "epoch": 6.250345781466113, "grad_norm": 3.844398021697998, "learning_rate": 2.083141232518826e-05, "log_odds_chosen": 11.103232383728027, "log_odds_ratio": -2.063993269985076e-05, "logits/chosen": -0.2962512671947479, "logits/rejected": -0.3584537208080292, "logps/chosen": -0.0001835509028751403, "logps/rejected": -2.4707653522491455, "loss": 0.5642, "nll_loss": 0.14104364812374115, "rewards/accuracies": 1.0, "rewards/chosen": -1.835508919612039e-05, "rewards/margins": 0.2470581829547882, "rewards/rejected": -0.24707652628421783, "step": 9038 }, { "epoch": 6.25103734439834, "grad_norm": 4.758118152618408, "learning_rate": 2.082757030889811e-05, "log_odds_chosen": 10.80452823638916, "log_odds_ratio": -6.069736264180392e-05, "logits/chosen": -0.3704849183559418, "logits/rejected": -0.4546177387237549, "logps/chosen": -0.00019304102170281112, "logps/rejected": -2.074105739593506, "loss": 0.7361, "nll_loss": 0.1840093731880188, "rewards/accuracies": 1.0, "rewards/chosen": -1.930410144268535e-05, "rewards/margins": 0.20739126205444336, "rewards/rejected": -0.20741057395935059, "step": 9039 }, { "epoch": 6.251728907330567, "grad_norm": 8.88686752319336, "learning_rate": 2.082372829260796e-05, "log_odds_chosen": 10.896563529968262, "log_odds_ratio": -0.00014622285380028188, "logits/chosen": -0.20167036354541779, "logits/rejected": -0.2820538878440857, "logps/chosen": -0.001121298992075026, "logps/rejected": -2.7503931522369385, "loss": 0.6227, "nll_loss": 0.15567225217819214, "rewards/accuracies": 1.0, "rewards/chosen": -0.0001121298992075026, "rewards/margins": 0.27492719888687134, "rewards/rejected": -0.27503931522369385, "step": 9040 }, { "epoch": 6.252420470262794, "grad_norm": 8.300743103027344, "learning_rate": 2.0819886276317813e-05, "log_odds_chosen": 10.957125663757324, "log_odds_ratio": -4.0475730202160776e-05, "logits/chosen": -0.3142332434654236, "logits/rejected": -0.33307600021362305, "logps/chosen": -0.0002978050906676799, "logps/rejected": -2.8132143020629883, "loss": 0.727, "nll_loss": 0.1817580759525299, "rewards/accuracies": 1.0, "rewards/chosen": -2.978050906676799e-05, "rewards/margins": 0.281291663646698, "rewards/rejected": -0.2813214361667633, "step": 9041 }, { "epoch": 6.253112033195021, "grad_norm": 4.971611976623535, "learning_rate": 2.0816044260027665e-05, "log_odds_chosen": 9.46718692779541, "log_odds_ratio": -0.000553897931240499, "logits/chosen": -0.3183286190032959, "logits/rejected": -0.47218573093414307, "logps/chosen": -0.0004418599419295788, "logps/rejected": -1.6773815155029297, "loss": 0.737, "nll_loss": 0.18420332670211792, "rewards/accuracies": 1.0, "rewards/chosen": -4.41859956481494e-05, "rewards/margins": 0.16769397258758545, "rewards/rejected": -0.1677381694316864, "step": 9042 }, { "epoch": 6.253803596127248, "grad_norm": 6.985831260681152, "learning_rate": 2.0812202243737514e-05, "log_odds_chosen": 10.669255256652832, "log_odds_ratio": -0.00018928670033346862, "logits/chosen": -0.3893347382545471, "logits/rejected": -0.44403713941574097, "logps/chosen": -0.0007266980828717351, "logps/rejected": -1.9807296991348267, "loss": 0.7293, "nll_loss": 0.18229913711547852, "rewards/accuracies": 1.0, "rewards/chosen": -7.266981265274808e-05, "rewards/margins": 0.19800031185150146, "rewards/rejected": -0.19807296991348267, "step": 9043 }, { "epoch": 6.254495159059474, "grad_norm": 15.985045433044434, "learning_rate": 2.0808360227447367e-05, "log_odds_chosen": 9.64474105834961, "log_odds_ratio": -0.0008536613313481212, "logits/chosen": -0.581167459487915, "logits/rejected": -0.7496652007102966, "logps/chosen": -0.00538310781121254, "logps/rejected": -2.3295609951019287, "loss": 1.2832, "nll_loss": 0.3207254111766815, "rewards/accuracies": 1.0, "rewards/chosen": -0.0005383107927627861, "rewards/margins": 0.23241779208183289, "rewards/rejected": -0.23295611143112183, "step": 9044 }, { "epoch": 6.255186721991701, "grad_norm": 5.278681755065918, "learning_rate": 2.0804518211157216e-05, "log_odds_chosen": 10.54180908203125, "log_odds_ratio": -3.6456309317145497e-05, "logits/chosen": -0.7690801024436951, "logits/rejected": -0.8494816422462463, "logps/chosen": -0.0040186732076108456, "logps/rejected": -2.7842183113098145, "loss": 0.4661, "nll_loss": 0.11652586609125137, "rewards/accuracies": 1.0, "rewards/chosen": -0.0004018672916572541, "rewards/margins": 0.2780199646949768, "rewards/rejected": -0.2784218192100525, "step": 9045 }, { "epoch": 6.255878284923928, "grad_norm": 5.81907320022583, "learning_rate": 2.0800676194867065e-05, "log_odds_chosen": 9.398512840270996, "log_odds_ratio": -0.0007897837203927338, "logits/chosen": -0.864077091217041, "logits/rejected": -0.821776807308197, "logps/chosen": -0.0018377433298155665, "logps/rejected": -1.8497081995010376, "loss": 0.538, "nll_loss": 0.13441544771194458, "rewards/accuracies": 1.0, "rewards/chosen": -0.00018377433298155665, "rewards/margins": 0.18478704988956451, "rewards/rejected": -0.18497082591056824, "step": 9046 }, { "epoch": 6.256569847856155, "grad_norm": 5.225982666015625, "learning_rate": 2.0796834178576917e-05, "log_odds_chosen": 10.886088371276855, "log_odds_ratio": -3.8307931390590966e-05, "logits/chosen": -0.4181956648826599, "logits/rejected": -0.533706545829773, "logps/chosen": -0.0003992409911006689, "logps/rejected": -2.2801594734191895, "loss": 0.4228, "nll_loss": 0.1056981086730957, "rewards/accuracies": 1.0, "rewards/chosen": -3.992409983766265e-05, "rewards/margins": 0.2279760241508484, "rewards/rejected": -0.2280159592628479, "step": 9047 }, { "epoch": 6.257261410788382, "grad_norm": 8.202089309692383, "learning_rate": 2.079299216228677e-05, "log_odds_chosen": 10.310097694396973, "log_odds_ratio": -0.00020973727805539966, "logits/chosen": -0.6218382716178894, "logits/rejected": -0.619733452796936, "logps/chosen": -0.001025262288749218, "logps/rejected": -2.1449999809265137, "loss": 0.6375, "nll_loss": 0.15935611724853516, "rewards/accuracies": 1.0, "rewards/chosen": -0.00010252623178530484, "rewards/margins": 0.21439746022224426, "rewards/rejected": -0.21449998021125793, "step": 9048 }, { "epoch": 6.2579529737206085, "grad_norm": 6.26420259475708, "learning_rate": 2.078915014599662e-05, "log_odds_chosen": 11.018274307250977, "log_odds_ratio": -4.080742655787617e-05, "logits/chosen": -0.4249802231788635, "logits/rejected": -0.5103530883789062, "logps/chosen": -0.00018488478963263333, "logps/rejected": -2.230947971343994, "loss": 0.492, "nll_loss": 0.12300199270248413, "rewards/accuracies": 1.0, "rewards/chosen": -1.8488481146050617e-05, "rewards/margins": 0.22307631373405457, "rewards/rejected": -0.22309479117393494, "step": 9049 }, { "epoch": 6.258644536652835, "grad_norm": 10.108119010925293, "learning_rate": 2.078530812970647e-05, "log_odds_chosen": 11.731668472290039, "log_odds_ratio": -0.00010565257252892479, "logits/chosen": -0.432361364364624, "logits/rejected": -0.5796195268630981, "logps/chosen": -0.0012302513932809234, "logps/rejected": -2.892240285873413, "loss": 0.7714, "nll_loss": 0.1928274929523468, "rewards/accuracies": 1.0, "rewards/chosen": -0.00012302515096962452, "rewards/margins": 0.2891010046005249, "rewards/rejected": -0.2892240285873413, "step": 9050 }, { "epoch": 6.259336099585062, "grad_norm": 5.540128707885742, "learning_rate": 2.0781466113416324e-05, "log_odds_chosen": 10.653034210205078, "log_odds_ratio": -5.03646006109193e-05, "logits/chosen": -0.30922791361808777, "logits/rejected": -0.4006563425064087, "logps/chosen": -0.0004195195506326854, "logps/rejected": -1.9212048053741455, "loss": 0.5047, "nll_loss": 0.12615928053855896, "rewards/accuracies": 1.0, "rewards/chosen": -4.1951956518460065e-05, "rewards/margins": 0.19207853078842163, "rewards/rejected": -0.1921204924583435, "step": 9051 }, { "epoch": 6.260027662517289, "grad_norm": 7.490386486053467, "learning_rate": 2.0777624097126173e-05, "log_odds_chosen": 11.348106384277344, "log_odds_ratio": -4.494922541198321e-05, "logits/chosen": -0.22071166336536407, "logits/rejected": -0.259778767824173, "logps/chosen": -0.00020064400450792164, "logps/rejected": -2.4897258281707764, "loss": 0.4719, "nll_loss": 0.11798227578401566, "rewards/accuracies": 1.0, "rewards/chosen": -2.006439899560064e-05, "rewards/margins": 0.24895252287387848, "rewards/rejected": -0.2489725798368454, "step": 9052 }, { "epoch": 6.260719225449516, "grad_norm": 10.887153625488281, "learning_rate": 2.0773782080836025e-05, "log_odds_chosen": 10.057692527770996, "log_odds_ratio": -0.017915375530719757, "logits/chosen": 0.13184309005737305, "logits/rejected": 0.09195524454116821, "logps/chosen": -0.005825578700751066, "logps/rejected": -2.157179832458496, "loss": 0.9576, "nll_loss": 0.2376118302345276, "rewards/accuracies": 1.0, "rewards/chosen": -0.0005825579282827675, "rewards/margins": 0.21513542532920837, "rewards/rejected": -0.21571798622608185, "step": 9053 }, { "epoch": 6.261410788381743, "grad_norm": 10.072612762451172, "learning_rate": 2.0769940064545874e-05, "log_odds_chosen": 11.511762619018555, "log_odds_ratio": -2.426476748951245e-05, "logits/chosen": -0.4715292453765869, "logits/rejected": -0.5088003277778625, "logps/chosen": -0.00019626517314463854, "logps/rejected": -2.4985780715942383, "loss": 0.4892, "nll_loss": 0.12229645997285843, "rewards/accuracies": 1.0, "rewards/chosen": -1.9626517314463854e-05, "rewards/margins": 0.24983817338943481, "rewards/rejected": -0.2498578131198883, "step": 9054 }, { "epoch": 6.2621023513139695, "grad_norm": 6.188398838043213, "learning_rate": 2.0766098048255723e-05, "log_odds_chosen": 10.952611923217773, "log_odds_ratio": -7.49345199437812e-05, "logits/chosen": -0.25741341710090637, "logits/rejected": -0.28530973196029663, "logps/chosen": -0.00017540823318995535, "logps/rejected": -2.232959270477295, "loss": 0.8443, "nll_loss": 0.2110714316368103, "rewards/accuracies": 1.0, "rewards/chosen": -1.7540824046591297e-05, "rewards/margins": 0.22327838838100433, "rewards/rejected": -0.2232959270477295, "step": 9055 }, { "epoch": 6.262793914246196, "grad_norm": 10.523344039916992, "learning_rate": 2.0762256031965576e-05, "log_odds_chosen": 12.459383964538574, "log_odds_ratio": -1.0062859473691788e-05, "logits/chosen": -0.5501984357833862, "logits/rejected": -0.5856611132621765, "logps/chosen": -0.00019254116341471672, "logps/rejected": -3.8092966079711914, "loss": 0.765, "nll_loss": 0.19125302135944366, "rewards/accuracies": 1.0, "rewards/chosen": -1.9254117432865314e-05, "rewards/margins": 0.3809104561805725, "rewards/rejected": -0.38092970848083496, "step": 9056 }, { "epoch": 6.263485477178423, "grad_norm": 6.1262407302856445, "learning_rate": 2.0758414015675428e-05, "log_odds_chosen": 9.742956161499023, "log_odds_ratio": -0.000668332795612514, "logits/chosen": -0.6014862060546875, "logits/rejected": -0.7945506572723389, "logps/chosen": -0.0018230837304145098, "logps/rejected": -1.510488748550415, "loss": 0.4158, "nll_loss": 0.10387758910655975, "rewards/accuracies": 1.0, "rewards/chosen": -0.00018230837304145098, "rewards/margins": 0.1508665531873703, "rewards/rejected": -0.15104886889457703, "step": 9057 }, { "epoch": 6.26417704011065, "grad_norm": 5.251424312591553, "learning_rate": 2.0754571999385277e-05, "log_odds_chosen": 10.103209495544434, "log_odds_ratio": -0.00021142560581211, "logits/chosen": -0.3135252594947815, "logits/rejected": -0.4640074074268341, "logps/chosen": -0.00029878091299906373, "logps/rejected": -1.7954256534576416, "loss": 0.5794, "nll_loss": 0.14481639862060547, "rewards/accuracies": 1.0, "rewards/chosen": -2.9878092391300015e-05, "rewards/margins": 0.17951269447803497, "rewards/rejected": -0.17954257130622864, "step": 9058 }, { "epoch": 6.264868603042877, "grad_norm": 6.161133766174316, "learning_rate": 2.075072998309513e-05, "log_odds_chosen": 11.259553909301758, "log_odds_ratio": -2.63049550994765e-05, "logits/chosen": -0.7111493349075317, "logits/rejected": -0.7237696051597595, "logps/chosen": -7.550412556156516e-05, "logps/rejected": -1.9565980434417725, "loss": 0.594, "nll_loss": 0.14849528670310974, "rewards/accuracies": 1.0, "rewards/chosen": -7.550413556600688e-06, "rewards/margins": 0.19565224647521973, "rewards/rejected": -0.1956597864627838, "step": 9059 }, { "epoch": 6.265560165975104, "grad_norm": 6.60701847076416, "learning_rate": 2.0746887966804982e-05, "log_odds_chosen": 8.687655448913574, "log_odds_ratio": -0.0004205875447951257, "logits/chosen": 0.09222330898046494, "logits/rejected": 0.01570185273885727, "logps/chosen": -0.002098360098898411, "logps/rejected": -1.8479384183883667, "loss": 0.5536, "nll_loss": 0.1383584439754486, "rewards/accuracies": 1.0, "rewards/chosen": -0.0002098359982483089, "rewards/margins": 0.18458399176597595, "rewards/rejected": -0.18479382991790771, "step": 9060 }, { "epoch": 6.2662517289073305, "grad_norm": 5.517164707183838, "learning_rate": 2.074304595051483e-05, "log_odds_chosen": 10.857470512390137, "log_odds_ratio": -7.395334978355095e-05, "logits/chosen": -0.5717877149581909, "logits/rejected": -0.6270738840103149, "logps/chosen": -0.00035990981268696487, "logps/rejected": -1.8339455127716064, "loss": 0.3562, "nll_loss": 0.08904750645160675, "rewards/accuracies": 1.0, "rewards/chosen": -3.599098272388801e-05, "rewards/margins": 0.1833585500717163, "rewards/rejected": -0.18339455127716064, "step": 9061 }, { "epoch": 6.266943291839557, "grad_norm": 5.447756290435791, "learning_rate": 2.0739203934224684e-05, "log_odds_chosen": 11.803698539733887, "log_odds_ratio": -1.1309020919725299e-05, "logits/chosen": -0.6223872900009155, "logits/rejected": -0.656653106212616, "logps/chosen": -6.747247971361503e-05, "logps/rejected": -1.9564663171768188, "loss": 0.3991, "nll_loss": 0.0997781977057457, "rewards/accuracies": 1.0, "rewards/chosen": -6.747248335159384e-06, "rewards/margins": 0.19563987851142883, "rewards/rejected": -0.19564664363861084, "step": 9062 }, { "epoch": 6.267634854771784, "grad_norm": 9.957139015197754, "learning_rate": 2.0735361917934533e-05, "log_odds_chosen": 11.273309707641602, "log_odds_ratio": -4.331594755058177e-05, "logits/chosen": -0.41265708208084106, "logits/rejected": -0.5016641616821289, "logps/chosen": -0.00020188375492580235, "logps/rejected": -2.586665630340576, "loss": 0.709, "nll_loss": 0.17723584175109863, "rewards/accuracies": 1.0, "rewards/chosen": -2.0188375856378116e-05, "rewards/margins": 0.2586463689804077, "rewards/rejected": -0.2586665749549866, "step": 9063 }, { "epoch": 6.268326417704011, "grad_norm": 4.971700668334961, "learning_rate": 2.0731519901644382e-05, "log_odds_chosen": 10.290696144104004, "log_odds_ratio": -9.44764688028954e-05, "logits/chosen": -0.5032870769500732, "logits/rejected": -0.5671851634979248, "logps/chosen": -0.0004169541352894157, "logps/rejected": -2.007357358932495, "loss": 0.5111, "nll_loss": 0.12775713205337524, "rewards/accuracies": 1.0, "rewards/chosen": -4.169541352894157e-05, "rewards/margins": 0.20069405436515808, "rewards/rejected": -0.20073574781417847, "step": 9064 }, { "epoch": 6.269017980636238, "grad_norm": 6.018120288848877, "learning_rate": 2.0727677885354234e-05, "log_odds_chosen": 9.527848243713379, "log_odds_ratio": -0.0004043597145937383, "logits/chosen": 0.20102456212043762, "logits/rejected": 0.05797000974416733, "logps/chosen": -0.00043211251613684, "logps/rejected": -1.5511095523834229, "loss": 0.9438, "nll_loss": 0.23591835796833038, "rewards/accuracies": 1.0, "rewards/chosen": -4.3211250158492476e-05, "rewards/margins": 0.15506777167320251, "rewards/rejected": -0.15511095523834229, "step": 9065 }, { "epoch": 6.269709543568465, "grad_norm": 7.352861404418945, "learning_rate": 2.0723835869064087e-05, "log_odds_chosen": 10.076114654541016, "log_odds_ratio": -8.22042056825012e-05, "logits/chosen": -0.38600829243659973, "logits/rejected": -0.42129021883010864, "logps/chosen": -0.0007903474033810198, "logps/rejected": -2.3401424884796143, "loss": 0.625, "nll_loss": 0.15625423192977905, "rewards/accuracies": 1.0, "rewards/chosen": -7.903473306214437e-05, "rewards/margins": 0.23393520712852478, "rewards/rejected": -0.23401425778865814, "step": 9066 }, { "epoch": 6.2704011065006915, "grad_norm": 5.614367485046387, "learning_rate": 2.0719993852773936e-05, "log_odds_chosen": 11.346555709838867, "log_odds_ratio": -7.106648263288662e-05, "logits/chosen": -0.4547783136367798, "logits/rejected": -0.5846199989318848, "logps/chosen": -0.00023274804698303342, "logps/rejected": -2.928040027618408, "loss": 0.4747, "nll_loss": 0.11867094784975052, "rewards/accuracies": 1.0, "rewards/chosen": -2.327480433450546e-05, "rewards/margins": 0.29278072714805603, "rewards/rejected": -0.2928040027618408, "step": 9067 }, { "epoch": 6.271092669432918, "grad_norm": 5.819713115692139, "learning_rate": 2.0716151836483788e-05, "log_odds_chosen": 9.828176498413086, "log_odds_ratio": -0.00026798504404723644, "logits/chosen": -0.31800785660743713, "logits/rejected": -0.4112524092197418, "logps/chosen": -0.0003081922768615186, "logps/rejected": -1.351736307144165, "loss": 0.5388, "nll_loss": 0.1346682757139206, "rewards/accuracies": 1.0, "rewards/chosen": -3.0819228413747624e-05, "rewards/margins": 0.13514280319213867, "rewards/rejected": -0.13517361879348755, "step": 9068 }, { "epoch": 6.271784232365145, "grad_norm": 4.693689346313477, "learning_rate": 2.071230982019364e-05, "log_odds_chosen": 10.99893856048584, "log_odds_ratio": -2.8096686946810223e-05, "logits/chosen": -0.7330502867698669, "logits/rejected": -0.7905983924865723, "logps/chosen": -0.0003268049331381917, "logps/rejected": -2.4436917304992676, "loss": 0.5298, "nll_loss": 0.13243672251701355, "rewards/accuracies": 1.0, "rewards/chosen": -3.2680491131031886e-05, "rewards/margins": 0.24433650076389313, "rewards/rejected": -0.24436917901039124, "step": 9069 }, { "epoch": 6.272475795297372, "grad_norm": 7.647608757019043, "learning_rate": 2.070846780390349e-05, "log_odds_chosen": 10.127792358398438, "log_odds_ratio": -0.0002474577631801367, "logits/chosen": -0.1824146807193756, "logits/rejected": -0.25091737508773804, "logps/chosen": -0.0006325167487375438, "logps/rejected": -1.671685814857483, "loss": 0.5367, "nll_loss": 0.13413894176483154, "rewards/accuracies": 1.0, "rewards/chosen": -6.3251682149712e-05, "rewards/margins": 0.1671053171157837, "rewards/rejected": -0.16716857254505157, "step": 9070 }, { "epoch": 6.273167358229599, "grad_norm": 6.014593124389648, "learning_rate": 2.0704625787613342e-05, "log_odds_chosen": 11.303054809570312, "log_odds_ratio": -2.4923643650254235e-05, "logits/chosen": -0.8515036106109619, "logits/rejected": -0.9350264668464661, "logps/chosen": -0.00017572117212694138, "logps/rejected": -2.4927737712860107, "loss": 1.1008, "nll_loss": 0.27519088983535767, "rewards/accuracies": 1.0, "rewards/chosen": -1.75721179402899e-05, "rewards/margins": 0.249259814620018, "rewards/rejected": -0.24927736818790436, "step": 9071 }, { "epoch": 6.273858921161826, "grad_norm": 12.842439651489258, "learning_rate": 2.070078377132319e-05, "log_odds_chosen": 11.64586067199707, "log_odds_ratio": -3.5853641747962683e-05, "logits/chosen": -0.5936583876609802, "logits/rejected": -0.699408769607544, "logps/chosen": -0.00029556750087067485, "logps/rejected": -2.9994378089904785, "loss": 0.6965, "nll_loss": 0.17411650717258453, "rewards/accuracies": 1.0, "rewards/chosen": -2.9556751542259008e-05, "rewards/margins": 0.29991424083709717, "rewards/rejected": -0.29994380474090576, "step": 9072 }, { "epoch": 6.2745504840940525, "grad_norm": 9.507415771484375, "learning_rate": 2.0696941755033044e-05, "log_odds_chosen": 10.774910926818848, "log_odds_ratio": -3.4509495890233666e-05, "logits/chosen": -0.6610577702522278, "logits/rejected": -0.6650505065917969, "logps/chosen": -0.00021303967514541, "logps/rejected": -1.9831280708312988, "loss": 0.5951, "nll_loss": 0.14877526462078094, "rewards/accuracies": 1.0, "rewards/chosen": -2.1303967514541e-05, "rewards/margins": 0.19829149544239044, "rewards/rejected": -0.19831281900405884, "step": 9073 }, { "epoch": 6.275242047026279, "grad_norm": 9.559590339660645, "learning_rate": 2.0693099738742893e-05, "log_odds_chosen": 9.74659252166748, "log_odds_ratio": -0.0001875993621069938, "logits/chosen": -0.45239779353141785, "logits/rejected": -0.4574759602546692, "logps/chosen": -0.0002895795914810151, "logps/rejected": -1.7154991626739502, "loss": 0.4271, "nll_loss": 0.10676049441099167, "rewards/accuracies": 1.0, "rewards/chosen": -2.8957958420505747e-05, "rewards/margins": 0.17152096331119537, "rewards/rejected": -0.17154991626739502, "step": 9074 }, { "epoch": 6.275933609958506, "grad_norm": 5.952230453491211, "learning_rate": 2.068925772245274e-05, "log_odds_chosen": 10.594772338867188, "log_odds_ratio": -0.00013316248077899218, "logits/chosen": -0.35547271370887756, "logits/rejected": -0.37703627347946167, "logps/chosen": -0.00033602563780732453, "logps/rejected": -2.5674431324005127, "loss": 0.7013, "nll_loss": 0.17531204223632812, "rewards/accuracies": 1.0, "rewards/chosen": -3.3602569601498544e-05, "rewards/margins": 0.2567107081413269, "rewards/rejected": -0.2567443251609802, "step": 9075 }, { "epoch": 6.276625172890733, "grad_norm": 4.105951309204102, "learning_rate": 2.0685415706162594e-05, "log_odds_chosen": 10.632652282714844, "log_odds_ratio": -0.00012810016050934792, "logits/chosen": -0.11020754277706146, "logits/rejected": -0.11705412715673447, "logps/chosen": -0.00033791197347454727, "logps/rejected": -2.2698371410369873, "loss": 0.7135, "nll_loss": 0.1783510148525238, "rewards/accuracies": 1.0, "rewards/chosen": -3.379119880264625e-05, "rewards/margins": 0.22694994509220123, "rewards/rejected": -0.22698372602462769, "step": 9076 }, { "epoch": 6.27731673582296, "grad_norm": 14.176226615905762, "learning_rate": 2.0681573689872447e-05, "log_odds_chosen": 10.789567947387695, "log_odds_ratio": -0.0003163648652844131, "logits/chosen": -0.5388079881668091, "logits/rejected": -0.2634154260158539, "logps/chosen": -0.0005803824751637876, "logps/rejected": -2.910543203353882, "loss": 0.4857, "nll_loss": 0.12138887494802475, "rewards/accuracies": 1.0, "rewards/chosen": -5.803824751637876e-05, "rewards/margins": 0.2909962832927704, "rewards/rejected": -0.2910543382167816, "step": 9077 }, { "epoch": 6.278008298755187, "grad_norm": 10.08351993560791, "learning_rate": 2.0677731673582296e-05, "log_odds_chosen": 11.046411514282227, "log_odds_ratio": -3.6873323551844805e-05, "logits/chosen": -0.4525337815284729, "logits/rejected": -0.5173885822296143, "logps/chosen": -0.00015250897558871657, "logps/rejected": -1.7773330211639404, "loss": 0.9478, "nll_loss": 0.2369578629732132, "rewards/accuracies": 1.0, "rewards/chosen": -1.5250896467478015e-05, "rewards/margins": 0.17771805822849274, "rewards/rejected": -0.17773330211639404, "step": 9078 }, { "epoch": 6.2786998616874135, "grad_norm": 11.85649299621582, "learning_rate": 2.0673889657292148e-05, "log_odds_chosen": 11.521341323852539, "log_odds_ratio": -1.7196343833347782e-05, "logits/chosen": -0.2669324278831482, "logits/rejected": -0.2659561336040497, "logps/chosen": -0.00044245910248719156, "logps/rejected": -3.3967370986938477, "loss": 0.6256, "nll_loss": 0.15640604496002197, "rewards/accuracies": 1.0, "rewards/chosen": -4.4245909521123394e-05, "rewards/margins": 0.33962947130203247, "rewards/rejected": -0.3396737277507782, "step": 9079 }, { "epoch": 6.27939142461964, "grad_norm": 7.615908145904541, "learning_rate": 2.0670047641002e-05, "log_odds_chosen": 10.834648132324219, "log_odds_ratio": -4.6995177399367094e-05, "logits/chosen": -0.5375221371650696, "logits/rejected": -0.5115315914154053, "logps/chosen": -0.00023706954380031675, "logps/rejected": -2.054745674133301, "loss": 0.9527, "nll_loss": 0.23815976083278656, "rewards/accuracies": 1.0, "rewards/chosen": -2.3706954380031675e-05, "rewards/margins": 0.2054508626461029, "rewards/rejected": -0.20547455549240112, "step": 9080 }, { "epoch": 6.280082987551867, "grad_norm": 6.639434814453125, "learning_rate": 2.066620562471185e-05, "log_odds_chosen": 10.678813934326172, "log_odds_ratio": -7.374895358225331e-05, "logits/chosen": -0.46182703971862793, "logits/rejected": -0.4474222660064697, "logps/chosen": -0.0002161176089430228, "logps/rejected": -1.9364413022994995, "loss": 0.6428, "nll_loss": 0.16069626808166504, "rewards/accuracies": 1.0, "rewards/chosen": -2.1611760530504398e-05, "rewards/margins": 0.19362254440784454, "rewards/rejected": -0.19364413619041443, "step": 9081 }, { "epoch": 6.280774550484094, "grad_norm": 8.542716026306152, "learning_rate": 2.0662363608421702e-05, "log_odds_chosen": 10.277900695800781, "log_odds_ratio": -0.0008143960149027407, "logits/chosen": -0.46082472801208496, "logits/rejected": -0.46745404601097107, "logps/chosen": -0.0009153565624728799, "logps/rejected": -2.7791833877563477, "loss": 0.6049, "nll_loss": 0.15115474164485931, "rewards/accuracies": 1.0, "rewards/chosen": -9.153566497843713e-05, "rewards/margins": 0.27782678604125977, "rewards/rejected": -0.27791833877563477, "step": 9082 }, { "epoch": 6.281466113416321, "grad_norm": 6.666465759277344, "learning_rate": 2.065852159213155e-05, "log_odds_chosen": 10.795612335205078, "log_odds_ratio": -8.343016088474542e-05, "logits/chosen": -0.242087721824646, "logits/rejected": -0.20174013078212738, "logps/chosen": -0.00022156513296067715, "logps/rejected": -2.262528896331787, "loss": 0.7005, "nll_loss": 0.1751115769147873, "rewards/accuracies": 1.0, "rewards/chosen": -2.2156513296067715e-05, "rewards/margins": 0.22623072564601898, "rewards/rejected": -0.22625288367271423, "step": 9083 }, { "epoch": 6.282157676348548, "grad_norm": 9.905106544494629, "learning_rate": 2.06546795758414e-05, "log_odds_chosen": 10.582621574401855, "log_odds_ratio": -0.000918439356610179, "logits/chosen": -0.24201442301273346, "logits/rejected": -0.2323443591594696, "logps/chosen": -0.004194437526166439, "logps/rejected": -2.3850326538085938, "loss": 0.6521, "nll_loss": 0.16293781995773315, "rewards/accuracies": 1.0, "rewards/chosen": -0.0004194437642581761, "rewards/margins": 0.2380838245153427, "rewards/rejected": -0.23850327730178833, "step": 9084 }, { "epoch": 6.282849239280774, "grad_norm": 5.138657093048096, "learning_rate": 2.0650837559551253e-05, "log_odds_chosen": 11.133842468261719, "log_odds_ratio": -3.982733323937282e-05, "logits/chosen": -0.60860276222229, "logits/rejected": -0.7133729457855225, "logps/chosen": -0.00038923200918361545, "logps/rejected": -2.505032539367676, "loss": 0.6201, "nll_loss": 0.1550285816192627, "rewards/accuracies": 1.0, "rewards/chosen": -3.8923200918361545e-05, "rewards/margins": 0.2504643201828003, "rewards/rejected": -0.2505032420158386, "step": 9085 }, { "epoch": 6.283540802213001, "grad_norm": 4.807248592376709, "learning_rate": 2.0646995543261105e-05, "log_odds_chosen": 10.152233123779297, "log_odds_ratio": -8.358690683962777e-05, "logits/chosen": -0.18029722571372986, "logits/rejected": -0.16884273290634155, "logps/chosen": -0.0004682210856117308, "logps/rejected": -1.9533414840698242, "loss": 0.5167, "nll_loss": 0.12917307019233704, "rewards/accuracies": 1.0, "rewards/chosen": -4.682211147155613e-05, "rewards/margins": 0.1952873170375824, "rewards/rejected": -0.19533413648605347, "step": 9086 }, { "epoch": 6.284232365145228, "grad_norm": 4.057741641998291, "learning_rate": 2.0643153526970954e-05, "log_odds_chosen": 11.659266471862793, "log_odds_ratio": -1.8078741049976088e-05, "logits/chosen": -0.5082545280456543, "logits/rejected": -0.5605330467224121, "logps/chosen": -0.00011240919411648065, "logps/rejected": -2.500460386276245, "loss": 0.4684, "nll_loss": 0.11708788573741913, "rewards/accuracies": 1.0, "rewards/chosen": -1.1240919775445946e-05, "rewards/margins": 0.25003480911254883, "rewards/rejected": -0.250046044588089, "step": 9087 }, { "epoch": 6.284923928077455, "grad_norm": 4.503249168395996, "learning_rate": 2.0639311510680806e-05, "log_odds_chosen": 10.83475399017334, "log_odds_ratio": -2.814342951751314e-05, "logits/chosen": -0.3474327325820923, "logits/rejected": -0.4542396068572998, "logps/chosen": -0.0001295089750783518, "logps/rejected": -1.9213929176330566, "loss": 0.4156, "nll_loss": 0.10390356183052063, "rewards/accuracies": 1.0, "rewards/chosen": -1.2950898053532e-05, "rewards/margins": 0.1921263337135315, "rewards/rejected": -0.19213929772377014, "step": 9088 }, { "epoch": 6.285615491009682, "grad_norm": 5.762747764587402, "learning_rate": 2.063546949439066e-05, "log_odds_chosen": 10.371255874633789, "log_odds_ratio": -0.0003046975180041045, "logits/chosen": -0.40071895718574524, "logits/rejected": -0.4745933711528778, "logps/chosen": -0.00035770676913671196, "logps/rejected": -2.2709102630615234, "loss": 0.7057, "nll_loss": 0.17639602720737457, "rewards/accuracies": 1.0, "rewards/chosen": -3.577067400328815e-05, "rewards/margins": 0.22705526649951935, "rewards/rejected": -0.22709104418754578, "step": 9089 }, { "epoch": 6.286307053941909, "grad_norm": 20.534412384033203, "learning_rate": 2.0631627478100508e-05, "log_odds_chosen": 9.866781234741211, "log_odds_ratio": -0.0009979484602808952, "logits/chosen": -0.6083856225013733, "logits/rejected": -0.5973707437515259, "logps/chosen": -0.00729210302233696, "logps/rejected": -2.3333680629730225, "loss": 0.831, "nll_loss": 0.20765653252601624, "rewards/accuracies": 1.0, "rewards/chosen": -0.0007292103255167603, "rewards/margins": 0.2326076328754425, "rewards/rejected": -0.23333682119846344, "step": 9090 }, { "epoch": 6.286998616874135, "grad_norm": 7.940767288208008, "learning_rate": 2.062778546181036e-05, "log_odds_chosen": 10.434680938720703, "log_odds_ratio": -0.00012016297841910273, "logits/chosen": -0.5466931462287903, "logits/rejected": -0.6142657399177551, "logps/chosen": -0.0004252656945027411, "logps/rejected": -2.3878979682922363, "loss": 0.7342, "nll_loss": 0.18353325128555298, "rewards/accuracies": 1.0, "rewards/chosen": -4.252656799508259e-05, "rewards/margins": 0.23874729871749878, "rewards/rejected": -0.23878982663154602, "step": 9091 }, { "epoch": 6.287690179806362, "grad_norm": 9.114567756652832, "learning_rate": 2.062394344552021e-05, "log_odds_chosen": 11.336699485778809, "log_odds_ratio": -2.9732616894762032e-05, "logits/chosen": -0.5372797846794128, "logits/rejected": -0.5294592380523682, "logps/chosen": -9.94501169770956e-05, "logps/rejected": -2.0726401805877686, "loss": 0.6144, "nll_loss": 0.1535939872264862, "rewards/accuracies": 1.0, "rewards/chosen": -9.94501169770956e-06, "rewards/margins": 0.2072540819644928, "rewards/rejected": -0.2072640359401703, "step": 9092 }, { "epoch": 6.288381742738589, "grad_norm": 5.164366722106934, "learning_rate": 2.062010142923006e-05, "log_odds_chosen": 11.472952842712402, "log_odds_ratio": -6.04154120082967e-05, "logits/chosen": -0.5103493332862854, "logits/rejected": -0.6042656898498535, "logps/chosen": -0.00023936809157021344, "logps/rejected": -3.1101317405700684, "loss": 0.5141, "nll_loss": 0.12852534651756287, "rewards/accuracies": 1.0, "rewards/chosen": -2.3936810976010747e-05, "rewards/margins": 0.31098923087120056, "rewards/rejected": -0.3110131621360779, "step": 9093 }, { "epoch": 6.289073305670816, "grad_norm": 5.8642120361328125, "learning_rate": 2.061625941293991e-05, "log_odds_chosen": 10.50048542022705, "log_odds_ratio": -5.2675630286103114e-05, "logits/chosen": -0.012782931327819824, "logits/rejected": -0.0010571479797363281, "logps/chosen": -0.0002058782265521586, "logps/rejected": -1.895133137702942, "loss": 0.4468, "nll_loss": 0.1116909384727478, "rewards/accuracies": 1.0, "rewards/chosen": -2.058782229141798e-05, "rewards/margins": 0.18949273228645325, "rewards/rejected": -0.18951331079006195, "step": 9094 }, { "epoch": 6.289764868603043, "grad_norm": 4.499237060546875, "learning_rate": 2.0612417396649763e-05, "log_odds_chosen": 11.613418579101562, "log_odds_ratio": -4.268988413969055e-05, "logits/chosen": -0.33803707361221313, "logits/rejected": -0.44322025775909424, "logps/chosen": -0.00025714325602166355, "logps/rejected": -3.2149362564086914, "loss": 0.4654, "nll_loss": 0.11635729670524597, "rewards/accuracies": 1.0, "rewards/chosen": -2.571432378317695e-05, "rewards/margins": 0.32146793603897095, "rewards/rejected": -0.32149362564086914, "step": 9095 }, { "epoch": 6.29045643153527, "grad_norm": 5.892808437347412, "learning_rate": 2.0608575380359612e-05, "log_odds_chosen": 10.895482063293457, "log_odds_ratio": -8.759888442000374e-05, "logits/chosen": -0.38643354177474976, "logits/rejected": -0.507982075214386, "logps/chosen": -0.0005721809575334191, "logps/rejected": -2.772576332092285, "loss": 0.4919, "nll_loss": 0.12295990437269211, "rewards/accuracies": 1.0, "rewards/chosen": -5.721809066017158e-05, "rewards/margins": 0.2772004306316376, "rewards/rejected": -0.27725762128829956, "step": 9096 }, { "epoch": 6.291147994467496, "grad_norm": 4.480569362640381, "learning_rate": 2.0604733364069465e-05, "log_odds_chosen": 11.518074989318848, "log_odds_ratio": -2.6272582545061596e-05, "logits/chosen": -0.15712255239486694, "logits/rejected": -0.21012672781944275, "logps/chosen": -0.00012038549175485969, "logps/rejected": -2.594068765640259, "loss": 0.5891, "nll_loss": 0.14728417992591858, "rewards/accuracies": 1.0, "rewards/chosen": -1.2038549357384909e-05, "rewards/margins": 0.2593948543071747, "rewards/rejected": -0.2594068646430969, "step": 9097 }, { "epoch": 6.291839557399723, "grad_norm": 14.674330711364746, "learning_rate": 2.0600891347779317e-05, "log_odds_chosen": 10.980438232421875, "log_odds_ratio": -5.24193346791435e-05, "logits/chosen": -0.318036288022995, "logits/rejected": -0.34022057056427, "logps/chosen": -0.00012784292630385607, "logps/rejected": -2.103323221206665, "loss": 0.4618, "nll_loss": 0.11543399095535278, "rewards/accuracies": 1.0, "rewards/chosen": -1.2784292266587727e-05, "rewards/margins": 0.21031954884529114, "rewards/rejected": -0.21033233404159546, "step": 9098 }, { "epoch": 6.29253112033195, "grad_norm": 7.994041442871094, "learning_rate": 2.0597049331489166e-05, "log_odds_chosen": 10.096479415893555, "log_odds_ratio": -0.000200098060304299, "logits/chosen": -0.2078363597393036, "logits/rejected": -0.2426210343837738, "logps/chosen": -0.00042861944530159235, "logps/rejected": -1.877027988433838, "loss": 0.5436, "nll_loss": 0.1358749121427536, "rewards/accuracies": 1.0, "rewards/chosen": -4.286194598535076e-05, "rewards/margins": 0.18765994906425476, "rewards/rejected": -0.18770280480384827, "step": 9099 }, { "epoch": 6.293222683264177, "grad_norm": 5.093844413757324, "learning_rate": 2.059320731519902e-05, "log_odds_chosen": 11.225149154663086, "log_odds_ratio": -2.9234739486128092e-05, "logits/chosen": -0.14554114639759064, "logits/rejected": -0.16985180974006653, "logps/chosen": -0.0001269277709070593, "logps/rejected": -2.1995105743408203, "loss": 0.5332, "nll_loss": 0.1332854926586151, "rewards/accuracies": 1.0, "rewards/chosen": -1.269277618121123e-05, "rewards/margins": 0.21993838250637054, "rewards/rejected": -0.2199510633945465, "step": 9100 }, { "epoch": 6.293914246196404, "grad_norm": 7.994125843048096, "learning_rate": 2.0589365298908868e-05, "log_odds_chosen": 11.543903350830078, "log_odds_ratio": -1.968575088540092e-05, "logits/chosen": -0.5534186363220215, "logits/rejected": -0.7465137243270874, "logps/chosen": -6.856806430732831e-05, "logps/rejected": -1.8713579177856445, "loss": 0.8615, "nll_loss": 0.21538425981998444, "rewards/accuracies": 1.0, "rewards/chosen": -6.856806976429652e-06, "rewards/margins": 0.187128946185112, "rewards/rejected": -0.18713578581809998, "step": 9101 }, { "epoch": 6.2946058091286305, "grad_norm": 9.89396858215332, "learning_rate": 2.0585523282618717e-05, "log_odds_chosen": 9.128700256347656, "log_odds_ratio": -0.008632665500044823, "logits/chosen": -0.5671526789665222, "logits/rejected": -0.5252231359481812, "logps/chosen": -0.0046012732200324535, "logps/rejected": -1.1975053548812866, "loss": 0.6559, "nll_loss": 0.16312405467033386, "rewards/accuracies": 1.0, "rewards/chosen": -0.00046012733946554363, "rewards/margins": 0.11929042637348175, "rewards/rejected": -0.11975055187940598, "step": 9102 }, { "epoch": 6.295297372060857, "grad_norm": 5.3626933097839355, "learning_rate": 2.058168126632857e-05, "log_odds_chosen": 10.341206550598145, "log_odds_ratio": -6.292940815910697e-05, "logits/chosen": -0.7664812803268433, "logits/rejected": -0.827400267124176, "logps/chosen": -0.0005252526607364416, "logps/rejected": -1.9347258806228638, "loss": 0.4056, "nll_loss": 0.1013893336057663, "rewards/accuracies": 1.0, "rewards/chosen": -5.25252653460484e-05, "rewards/margins": 0.19342006742954254, "rewards/rejected": -0.19347259402275085, "step": 9103 }, { "epoch": 6.295988934993084, "grad_norm": 7.762931823730469, "learning_rate": 2.0577839250038422e-05, "log_odds_chosen": 11.329391479492188, "log_odds_ratio": -0.0001028580591082573, "logits/chosen": -0.2265159785747528, "logits/rejected": -0.3073858320713043, "logps/chosen": -0.0003771249030251056, "logps/rejected": -3.12196683883667, "loss": 0.6758, "nll_loss": 0.16894958913326263, "rewards/accuracies": 1.0, "rewards/chosen": -3.7712492485297844e-05, "rewards/margins": 0.3121589720249176, "rewards/rejected": -0.31219667196273804, "step": 9104 }, { "epoch": 6.296680497925311, "grad_norm": 6.711189270019531, "learning_rate": 2.057399723374827e-05, "log_odds_chosen": 10.471979141235352, "log_odds_ratio": -4.5370179577730596e-05, "logits/chosen": -0.34325867891311646, "logits/rejected": -0.3504777252674103, "logps/chosen": -0.0005373624735511839, "logps/rejected": -2.595447540283203, "loss": 0.5405, "nll_loss": 0.13513071835041046, "rewards/accuracies": 1.0, "rewards/chosen": -5.373624662752263e-05, "rewards/margins": 0.2594910264015198, "rewards/rejected": -0.2595447599887848, "step": 9105 }, { "epoch": 6.297372060857538, "grad_norm": 5.516439914703369, "learning_rate": 2.0570155217458123e-05, "log_odds_chosen": 10.464244842529297, "log_odds_ratio": -0.00011570812057470903, "logits/chosen": -0.48608943819999695, "logits/rejected": -0.6556553244590759, "logps/chosen": -0.0003440856817178428, "logps/rejected": -2.043456554412842, "loss": 0.9232, "nll_loss": 0.23078015446662903, "rewards/accuracies": 1.0, "rewards/chosen": -3.440857108216733e-05, "rewards/margins": 0.20431123673915863, "rewards/rejected": -0.20434564352035522, "step": 9106 }, { "epoch": 6.298063623789765, "grad_norm": 8.259562492370605, "learning_rate": 2.0566313201167976e-05, "log_odds_chosen": 10.183019638061523, "log_odds_ratio": -0.00031526273232884705, "logits/chosen": -0.6100044250488281, "logits/rejected": -0.5913538932800293, "logps/chosen": -0.0004313396639190614, "logps/rejected": -1.5800104141235352, "loss": 0.5025, "nll_loss": 0.12558498978614807, "rewards/accuracies": 1.0, "rewards/chosen": -4.3133968574693426e-05, "rewards/margins": 0.15795791149139404, "rewards/rejected": -0.15800105035305023, "step": 9107 }, { "epoch": 6.2987551867219915, "grad_norm": 5.118277549743652, "learning_rate": 2.0562471184877825e-05, "log_odds_chosen": 9.261262893676758, "log_odds_ratio": -0.001336643472313881, "logits/chosen": -0.7850015163421631, "logits/rejected": -0.7912291288375854, "logps/chosen": -0.002515049185603857, "logps/rejected": -1.6835026741027832, "loss": 0.4578, "nll_loss": 0.11431378126144409, "rewards/accuracies": 1.0, "rewards/chosen": -0.00025150494184345007, "rewards/margins": 0.16809876263141632, "rewards/rejected": -0.16835026443004608, "step": 9108 }, { "epoch": 6.299446749654218, "grad_norm": 10.098360061645508, "learning_rate": 2.0558629168587677e-05, "log_odds_chosen": 10.201295852661133, "log_odds_ratio": -7.733918027952313e-05, "logits/chosen": -0.8221825361251831, "logits/rejected": -0.7927052974700928, "logps/chosen": -0.00042313465382903814, "logps/rejected": -2.2757129669189453, "loss": 1.1279, "nll_loss": 0.2819552421569824, "rewards/accuracies": 1.0, "rewards/chosen": -4.231346247252077e-05, "rewards/margins": 0.22752898931503296, "rewards/rejected": -0.2275713086128235, "step": 9109 }, { "epoch": 6.300138312586445, "grad_norm": 9.635416984558105, "learning_rate": 2.0554787152297526e-05, "log_odds_chosen": 10.860652923583984, "log_odds_ratio": -8.556530519854277e-05, "logits/chosen": -0.5292823910713196, "logits/rejected": -0.528170108795166, "logps/chosen": -0.0007029472617432475, "logps/rejected": -2.466893434524536, "loss": 0.9067, "nll_loss": 0.2266591489315033, "rewards/accuracies": 1.0, "rewards/chosen": -7.02947290847078e-05, "rewards/margins": 0.24661904573440552, "rewards/rejected": -0.2466893196105957, "step": 9110 }, { "epoch": 6.300829875518672, "grad_norm": 11.15628719329834, "learning_rate": 2.0550945136007375e-05, "log_odds_chosen": 9.115913391113281, "log_odds_ratio": -0.2130754590034485, "logits/chosen": -0.5340465307235718, "logits/rejected": -0.5288445353507996, "logps/chosen": -0.02381826378405094, "logps/rejected": -1.7315921783447266, "loss": 0.6328, "nll_loss": 0.1369016170501709, "rewards/accuracies": 0.875, "rewards/chosen": -0.0023818262852728367, "rewards/margins": 0.17077738046646118, "rewards/rejected": -0.17315921187400818, "step": 9111 }, { "epoch": 6.301521438450899, "grad_norm": 8.721664428710938, "learning_rate": 2.0547103119717228e-05, "log_odds_chosen": 10.03692626953125, "log_odds_ratio": -0.0002028129529207945, "logits/chosen": -0.26854532957077026, "logits/rejected": -0.32714080810546875, "logps/chosen": -0.0004822782357223332, "logps/rejected": -1.737931489944458, "loss": 0.521, "nll_loss": 0.130230113863945, "rewards/accuracies": 1.0, "rewards/chosen": -4.822781920665875e-05, "rewards/margins": 0.17374494671821594, "rewards/rejected": -0.17379315197467804, "step": 9112 }, { "epoch": 6.302213001383126, "grad_norm": 7.221090793609619, "learning_rate": 2.054326110342708e-05, "log_odds_chosen": 10.58332633972168, "log_odds_ratio": -3.227575507480651e-05, "logits/chosen": -0.5629743933677673, "logits/rejected": -0.6404491662979126, "logps/chosen": -0.0001708652707748115, "logps/rejected": -1.8281381130218506, "loss": 0.5051, "nll_loss": 0.12627874314785004, "rewards/accuracies": 1.0, "rewards/chosen": -1.7086527805076912e-05, "rewards/margins": 0.18279673159122467, "rewards/rejected": -0.182813823223114, "step": 9113 }, { "epoch": 6.3029045643153525, "grad_norm": 14.374824523925781, "learning_rate": 2.053941908713693e-05, "log_odds_chosen": 11.562841415405273, "log_odds_ratio": -2.0522167687886395e-05, "logits/chosen": -0.5264410972595215, "logits/rejected": -0.5556541681289673, "logps/chosen": -0.00010420165926916525, "logps/rejected": -2.358149290084839, "loss": 0.6013, "nll_loss": 0.15032121539115906, "rewards/accuracies": 1.0, "rewards/chosen": -1.0420166290714405e-05, "rewards/margins": 0.23580452799797058, "rewards/rejected": -0.2358149290084839, "step": 9114 }, { "epoch": 6.303596127247579, "grad_norm": 4.442668437957764, "learning_rate": 2.0535577070846782e-05, "log_odds_chosen": 11.009807586669922, "log_odds_ratio": -0.00015554331184830517, "logits/chosen": -0.40942203998565674, "logits/rejected": -0.4664459228515625, "logps/chosen": -0.00044318806612864137, "logps/rejected": -2.650289535522461, "loss": 0.5712, "nll_loss": 0.14278602600097656, "rewards/accuracies": 1.0, "rewards/chosen": -4.43188073404599e-05, "rewards/margins": 0.264984667301178, "rewards/rejected": -0.2650289535522461, "step": 9115 }, { "epoch": 6.304287690179806, "grad_norm": 7.319894790649414, "learning_rate": 2.0531735054556634e-05, "log_odds_chosen": 11.91440200805664, "log_odds_ratio": -3.091490361839533e-05, "logits/chosen": 0.10696769505739212, "logits/rejected": 0.04083235189318657, "logps/chosen": -0.00016952966689132154, "logps/rejected": -2.9402096271514893, "loss": 0.6421, "nll_loss": 0.16051530838012695, "rewards/accuracies": 1.0, "rewards/chosen": -1.6952966689132154e-05, "rewards/margins": 0.29400402307510376, "rewards/rejected": -0.29402095079421997, "step": 9116 }, { "epoch": 6.304979253112033, "grad_norm": 8.480218887329102, "learning_rate": 2.0527893038266483e-05, "log_odds_chosen": 9.917200088500977, "log_odds_ratio": -0.0001736325357342139, "logits/chosen": -0.692557692527771, "logits/rejected": -0.7411516904830933, "logps/chosen": -0.0005900151445530355, "logps/rejected": -1.5346190929412842, "loss": 0.6832, "nll_loss": 0.1707778424024582, "rewards/accuracies": 1.0, "rewards/chosen": -5.9001511544920504e-05, "rewards/margins": 0.1534029245376587, "rewards/rejected": -0.15346190333366394, "step": 9117 }, { "epoch": 6.30567081604426, "grad_norm": 9.101211547851562, "learning_rate": 2.0524051021976336e-05, "log_odds_chosen": 11.44467544555664, "log_odds_ratio": -1.538614924356807e-05, "logits/chosen": -0.3148750066757202, "logits/rejected": -0.36211660504341125, "logps/chosen": -0.000133889916469343, "logps/rejected": -2.597325325012207, "loss": 0.728, "nll_loss": 0.1820085346698761, "rewards/accuracies": 1.0, "rewards/chosen": -1.33889916469343e-05, "rewards/margins": 0.2597191631793976, "rewards/rejected": -0.25973254442214966, "step": 9118 }, { "epoch": 6.306362378976487, "grad_norm": 8.270613670349121, "learning_rate": 2.0520209005686185e-05, "log_odds_chosen": 11.105756759643555, "log_odds_ratio": -3.122861380688846e-05, "logits/chosen": -0.34265416860580444, "logits/rejected": -0.4220009744167328, "logps/chosen": -0.0003007478080689907, "logps/rejected": -2.6179518699645996, "loss": 0.6988, "nll_loss": 0.17468640208244324, "rewards/accuracies": 1.0, "rewards/chosen": -3.0074781534494832e-05, "rewards/margins": 0.26176509261131287, "rewards/rejected": -0.26179519295692444, "step": 9119 }, { "epoch": 6.3070539419087135, "grad_norm": 5.276710033416748, "learning_rate": 2.0516366989396034e-05, "log_odds_chosen": 9.563478469848633, "log_odds_ratio": -0.0029228893108665943, "logits/chosen": -0.5020996928215027, "logits/rejected": -0.5588923096656799, "logps/chosen": -0.0016042347997426987, "logps/rejected": -1.2131743431091309, "loss": 0.5278, "nll_loss": 0.1316453069448471, "rewards/accuracies": 1.0, "rewards/chosen": -0.00016042348579503596, "rewards/margins": 0.12115702033042908, "rewards/rejected": -0.12131744623184204, "step": 9120 }, { "epoch": 6.30774550484094, "grad_norm": 4.691458225250244, "learning_rate": 2.0512524973105886e-05, "log_odds_chosen": 10.797635078430176, "log_odds_ratio": -0.00011942382843699306, "logits/chosen": -0.8213660717010498, "logits/rejected": -0.8382663726806641, "logps/chosen": -0.00017635921540204436, "logps/rejected": -2.3420279026031494, "loss": 0.3518, "nll_loss": 0.08794598281383514, "rewards/accuracies": 1.0, "rewards/chosen": -1.7635920812608674e-05, "rewards/margins": 0.234185129404068, "rewards/rejected": -0.2342027872800827, "step": 9121 }, { "epoch": 6.308437067773167, "grad_norm": 6.859457969665527, "learning_rate": 2.050868295681574e-05, "log_odds_chosen": 10.081676483154297, "log_odds_ratio": -8.843156683724374e-05, "logits/chosen": -0.27068910002708435, "logits/rejected": -0.39853787422180176, "logps/chosen": -0.0002941065758932382, "logps/rejected": -1.7044429779052734, "loss": 0.6262, "nll_loss": 0.1565401256084442, "rewards/accuracies": 1.0, "rewards/chosen": -2.941065758932382e-05, "rewards/margins": 0.17041489481925964, "rewards/rejected": -0.1704443097114563, "step": 9122 }, { "epoch": 6.309128630705394, "grad_norm": 8.61195182800293, "learning_rate": 2.0504840940525588e-05, "log_odds_chosen": 10.604068756103516, "log_odds_ratio": -4.5928445615572855e-05, "logits/chosen": -0.4395691454410553, "logits/rejected": -0.49882519245147705, "logps/chosen": -0.0006089697126299143, "logps/rejected": -2.2564098834991455, "loss": 0.5378, "nll_loss": 0.13443604111671448, "rewards/accuracies": 1.0, "rewards/chosen": -6.089697126299143e-05, "rewards/margins": 0.225580096244812, "rewards/rejected": -0.22564098238945007, "step": 9123 }, { "epoch": 6.309820193637621, "grad_norm": 8.427109718322754, "learning_rate": 2.050099892423544e-05, "log_odds_chosen": 10.184764862060547, "log_odds_ratio": -0.0012439328711479902, "logits/chosen": -0.15111692249774933, "logits/rejected": -0.2543826997280121, "logps/chosen": -0.0011011157184839249, "logps/rejected": -2.358288288116455, "loss": 0.6526, "nll_loss": 0.16302835941314697, "rewards/accuracies": 1.0, "rewards/chosen": -0.00011011157766915858, "rewards/margins": 0.2357187122106552, "rewards/rejected": -0.23582881689071655, "step": 9124 }, { "epoch": 6.310511756569848, "grad_norm": 8.313289642333984, "learning_rate": 2.0497156907945293e-05, "log_odds_chosen": 11.648088455200195, "log_odds_ratio": -1.4956855011405423e-05, "logits/chosen": -0.41185325384140015, "logits/rejected": -0.4594433903694153, "logps/chosen": -6.538411980727687e-05, "logps/rejected": -1.9124966859817505, "loss": 0.5214, "nll_loss": 0.1303468942642212, "rewards/accuracies": 1.0, "rewards/chosen": -6.5384119807276875e-06, "rewards/margins": 0.19124312698841095, "rewards/rejected": -0.19124966859817505, "step": 9125 }, { "epoch": 6.3112033195020745, "grad_norm": 6.2132039070129395, "learning_rate": 2.0493314891655142e-05, "log_odds_chosen": 10.96221923828125, "log_odds_ratio": -0.00021452337387017906, "logits/chosen": -0.42153963446617126, "logits/rejected": -0.4461123049259186, "logps/chosen": -0.00041328402585349977, "logps/rejected": -2.3031129837036133, "loss": 0.4764, "nll_loss": 0.11907409131526947, "rewards/accuracies": 1.0, "rewards/chosen": -4.1328406950924546e-05, "rewards/margins": 0.23026996850967407, "rewards/rejected": -0.2303113043308258, "step": 9126 }, { "epoch": 6.311894882434301, "grad_norm": 5.806750774383545, "learning_rate": 2.0489472875364994e-05, "log_odds_chosen": 10.953788757324219, "log_odds_ratio": -2.7453637812868692e-05, "logits/chosen": -0.5027472376823425, "logits/rejected": -0.5538997054100037, "logps/chosen": -0.0002139663847628981, "logps/rejected": -2.0388693809509277, "loss": 1.021, "nll_loss": 0.2552356719970703, "rewards/accuracies": 1.0, "rewards/chosen": -2.1396637748694047e-05, "rewards/margins": 0.20386554300785065, "rewards/rejected": -0.203886941075325, "step": 9127 }, { "epoch": 6.312586445366528, "grad_norm": 6.384781837463379, "learning_rate": 2.0485630859074843e-05, "log_odds_chosen": 11.070930480957031, "log_odds_ratio": -2.5265617296099663e-05, "logits/chosen": -0.4062907099723816, "logits/rejected": -0.49823057651519775, "logps/chosen": -0.0001699151616776362, "logps/rejected": -2.390195846557617, "loss": 0.8184, "nll_loss": 0.20460784435272217, "rewards/accuracies": 1.0, "rewards/chosen": -1.69915165315615e-05, "rewards/margins": 0.23900258541107178, "rewards/rejected": -0.23901957273483276, "step": 9128 }, { "epoch": 6.313278008298755, "grad_norm": 9.393410682678223, "learning_rate": 2.0481788842784692e-05, "log_odds_chosen": 10.473282814025879, "log_odds_ratio": -0.005252666771411896, "logits/chosen": -0.12318453937768936, "logits/rejected": -0.1653863489627838, "logps/chosen": -0.03536149486899376, "logps/rejected": -2.7704577445983887, "loss": 0.5438, "nll_loss": 0.13542786240577698, "rewards/accuracies": 1.0, "rewards/chosen": -0.003536149626597762, "rewards/margins": 0.2735096216201782, "rewards/rejected": -0.2770457863807678, "step": 9129 }, { "epoch": 6.313969571230982, "grad_norm": 4.986472129821777, "learning_rate": 2.0477946826494545e-05, "log_odds_chosen": 10.974739074707031, "log_odds_ratio": -6.281452806433663e-05, "logits/chosen": -0.6672524213790894, "logits/rejected": -0.7109086513519287, "logps/chosen": -0.000695232767611742, "logps/rejected": -3.159275531768799, "loss": 0.6801, "nll_loss": 0.17002885043621063, "rewards/accuracies": 1.0, "rewards/chosen": -6.952328112674877e-05, "rewards/margins": 0.31585806608200073, "rewards/rejected": -0.31592756509780884, "step": 9130 }, { "epoch": 6.314661134163209, "grad_norm": 6.2385430335998535, "learning_rate": 2.0474104810204397e-05, "log_odds_chosen": 10.428638458251953, "log_odds_ratio": -9.423011215403676e-05, "logits/chosen": 0.144125834107399, "logits/rejected": 0.09078823775053024, "logps/chosen": -0.0005835729534737766, "logps/rejected": -1.8538960218429565, "loss": 0.5353, "nll_loss": 0.13381007313728333, "rewards/accuracies": 1.0, "rewards/chosen": -5.835729098180309e-05, "rewards/margins": 0.18533125519752502, "rewards/rejected": -0.18538960814476013, "step": 9131 }, { "epoch": 6.3153526970954355, "grad_norm": 5.815805435180664, "learning_rate": 2.0470262793914246e-05, "log_odds_chosen": 10.13248062133789, "log_odds_ratio": -8.74106481205672e-05, "logits/chosen": -0.2938426733016968, "logits/rejected": -0.3387156128883362, "logps/chosen": -0.000201555565581657, "logps/rejected": -1.6588993072509766, "loss": 0.4176, "nll_loss": 0.10439470410346985, "rewards/accuracies": 1.0, "rewards/chosen": -2.0155555830569938e-05, "rewards/margins": 0.1658697873353958, "rewards/rejected": -0.1658899337053299, "step": 9132 }, { "epoch": 6.316044260027662, "grad_norm": 6.086231708526611, "learning_rate": 2.04664207776241e-05, "log_odds_chosen": 10.607781410217285, "log_odds_ratio": -0.0001166929941973649, "logits/chosen": -0.4167684316635132, "logits/rejected": -0.4740995764732361, "logps/chosen": -0.000221387977944687, "logps/rejected": -2.036670207977295, "loss": 0.6779, "nll_loss": 0.16946941614151, "rewards/accuracies": 1.0, "rewards/chosen": -2.2138799977255985e-05, "rewards/margins": 0.20364490151405334, "rewards/rejected": -0.2036670446395874, "step": 9133 }, { "epoch": 6.316735822959889, "grad_norm": 4.986376762390137, "learning_rate": 2.046257876133395e-05, "log_odds_chosen": 11.060983657836914, "log_odds_ratio": -8.120344864437357e-05, "logits/chosen": -0.2317226231098175, "logits/rejected": -0.25166720151901245, "logps/chosen": -0.000272795237833634, "logps/rejected": -2.5201592445373535, "loss": 0.6768, "nll_loss": 0.16920101642608643, "rewards/accuracies": 1.0, "rewards/chosen": -2.7279524147161283e-05, "rewards/margins": 0.25198864936828613, "rewards/rejected": -0.25201594829559326, "step": 9134 }, { "epoch": 6.317427385892116, "grad_norm": 5.949633598327637, "learning_rate": 2.04587367450438e-05, "log_odds_chosen": 10.249530792236328, "log_odds_ratio": -0.00014744520012754947, "logits/chosen": -0.22081822156906128, "logits/rejected": -0.32150161266326904, "logps/chosen": -0.0003919299051631242, "logps/rejected": -1.9968299865722656, "loss": 0.4439, "nll_loss": 0.11095862090587616, "rewards/accuracies": 1.0, "rewards/chosen": -3.919298615073785e-05, "rewards/margins": 0.1996438056230545, "rewards/rejected": -0.19968298077583313, "step": 9135 }, { "epoch": 6.318118948824343, "grad_norm": 7.3173065185546875, "learning_rate": 2.0454894728753653e-05, "log_odds_chosen": 10.903233528137207, "log_odds_ratio": -0.00016012144624255598, "logits/chosen": -0.3240607678890228, "logits/rejected": -0.43824082612991333, "logps/chosen": -0.0012418123660609126, "logps/rejected": -2.7618508338928223, "loss": 0.7438, "nll_loss": 0.18593566119670868, "rewards/accuracies": 1.0, "rewards/chosen": -0.00012418124242685735, "rewards/margins": 0.27606093883514404, "rewards/rejected": -0.2761850953102112, "step": 9136 }, { "epoch": 6.31881051175657, "grad_norm": 5.717310905456543, "learning_rate": 2.04510527124635e-05, "log_odds_chosen": 10.645530700683594, "log_odds_ratio": -4.525161057244986e-05, "logits/chosen": -0.49670839309692383, "logits/rejected": -0.5754987597465515, "logps/chosen": -0.00016813335241749883, "logps/rejected": -1.8609609603881836, "loss": 0.5286, "nll_loss": 0.13215577602386475, "rewards/accuracies": 1.0, "rewards/chosen": -1.681333378655836e-05, "rewards/margins": 0.18607929348945618, "rewards/rejected": -0.18609611690044403, "step": 9137 }, { "epoch": 6.319502074688796, "grad_norm": 3.6851601600646973, "learning_rate": 2.044721069617335e-05, "log_odds_chosen": 11.53646469116211, "log_odds_ratio": -2.4523029424017295e-05, "logits/chosen": -0.35948729515075684, "logits/rejected": -0.32746779918670654, "logps/chosen": -0.00018249072309117764, "logps/rejected": -2.788064479827881, "loss": 0.4728, "nll_loss": 0.11818571388721466, "rewards/accuracies": 1.0, "rewards/chosen": -1.8249073036713526e-05, "rewards/margins": 0.2787882387638092, "rewards/rejected": -0.2788064777851105, "step": 9138 }, { "epoch": 6.320193637621023, "grad_norm": 6.905288219451904, "learning_rate": 2.0443368679883203e-05, "log_odds_chosen": 10.342803001403809, "log_odds_ratio": -5.947341560386121e-05, "logits/chosen": -0.4630863070487976, "logits/rejected": -0.570403516292572, "logps/chosen": -0.0002199176378780976, "logps/rejected": -1.5150375366210938, "loss": 0.5436, "nll_loss": 0.13588204979896545, "rewards/accuracies": 1.0, "rewards/chosen": -2.1991765606799163e-05, "rewards/margins": 0.1514817476272583, "rewards/rejected": -0.15150374174118042, "step": 9139 }, { "epoch": 6.32088520055325, "grad_norm": 7.062165260314941, "learning_rate": 2.0439526663593052e-05, "log_odds_chosen": 10.084284782409668, "log_odds_ratio": -0.000260809320025146, "logits/chosen": -0.19906927645206451, "logits/rejected": -0.24244311451911926, "logps/chosen": -0.00016882145428098738, "logps/rejected": -1.760647177696228, "loss": 0.7813, "nll_loss": 0.19529539346694946, "rewards/accuracies": 1.0, "rewards/chosen": -1.6882144336705096e-05, "rewards/margins": 0.17604786157608032, "rewards/rejected": -0.17606472969055176, "step": 9140 }, { "epoch": 6.321576763485477, "grad_norm": 10.630314826965332, "learning_rate": 2.0435684647302905e-05, "log_odds_chosen": 9.74234676361084, "log_odds_ratio": -0.00018264676327817142, "logits/chosen": -0.09850164502859116, "logits/rejected": -0.19318366050720215, "logps/chosen": -0.0005119048291817307, "logps/rejected": -1.7797728776931763, "loss": 0.5908, "nll_loss": 0.14769017696380615, "rewards/accuracies": 1.0, "rewards/chosen": -5.1190483645768836e-05, "rewards/margins": 0.17792612314224243, "rewards/rejected": -0.1779772937297821, "step": 9141 }, { "epoch": 6.322268326417704, "grad_norm": 5.875476360321045, "learning_rate": 2.0431842631012757e-05, "log_odds_chosen": 10.273632049560547, "log_odds_ratio": -0.0002673549752216786, "logits/chosen": -0.5241735577583313, "logits/rejected": -0.5758167505264282, "logps/chosen": -0.00015015172539278865, "logps/rejected": -1.5837026834487915, "loss": 0.6094, "nll_loss": 0.15232551097869873, "rewards/accuracies": 1.0, "rewards/chosen": -1.5015171811683103e-05, "rewards/margins": 0.15835526585578918, "rewards/rejected": -0.1583702713251114, "step": 9142 }, { "epoch": 6.322959889349931, "grad_norm": 15.639997482299805, "learning_rate": 2.0428000614722606e-05, "log_odds_chosen": 11.281831741333008, "log_odds_ratio": -5.805850014439784e-05, "logits/chosen": -0.7342573404312134, "logits/rejected": -0.7432233095169067, "logps/chosen": -0.00012704191613011062, "logps/rejected": -2.3933351039886475, "loss": 0.6757, "nll_loss": 0.16891874372959137, "rewards/accuracies": 1.0, "rewards/chosen": -1.2704192158707883e-05, "rewards/margins": 0.2393207997083664, "rewards/rejected": -0.23933351039886475, "step": 9143 }, { "epoch": 6.323651452282157, "grad_norm": 6.517669677734375, "learning_rate": 2.042415859843246e-05, "log_odds_chosen": 9.65914249420166, "log_odds_ratio": -0.00022453966084867716, "logits/chosen": -0.19278670847415924, "logits/rejected": -0.03370809555053711, "logps/chosen": -0.00017699523596093059, "logps/rejected": -1.4082324504852295, "loss": 0.9838, "nll_loss": 0.24593743681907654, "rewards/accuracies": 1.0, "rewards/chosen": -1.7699525415082462e-05, "rewards/margins": 0.14080555737018585, "rewards/rejected": -0.14082324504852295, "step": 9144 }, { "epoch": 6.324343015214384, "grad_norm": 9.895349502563477, "learning_rate": 2.042031658214231e-05, "log_odds_chosen": 10.371511459350586, "log_odds_ratio": -0.0004187318554613739, "logits/chosen": -0.939992368221283, "logits/rejected": -0.9398999214172363, "logps/chosen": -0.0003308483865112066, "logps/rejected": -2.229236602783203, "loss": 0.612, "nll_loss": 0.15294599533081055, "rewards/accuracies": 1.0, "rewards/chosen": -3.308484156150371e-05, "rewards/margins": 0.22289058566093445, "rewards/rejected": -0.2229236662387848, "step": 9145 }, { "epoch": 6.325034578146611, "grad_norm": 14.121508598327637, "learning_rate": 2.041647456585216e-05, "log_odds_chosen": 10.612939834594727, "log_odds_ratio": -5.0611692131496966e-05, "logits/chosen": -0.3637595474720001, "logits/rejected": -0.39835870265960693, "logps/chosen": -0.0002669534587766975, "logps/rejected": -2.183626174926758, "loss": 0.6725, "nll_loss": 0.1681094914674759, "rewards/accuracies": 1.0, "rewards/chosen": -2.6695346605265513e-05, "rewards/margins": 0.21833594143390656, "rewards/rejected": -0.21836264431476593, "step": 9146 }, { "epoch": 6.325726141078838, "grad_norm": 6.091559886932373, "learning_rate": 2.041263254956201e-05, "log_odds_chosen": 10.744900703430176, "log_odds_ratio": -4.4846929085906595e-05, "logits/chosen": -0.2999820113182068, "logits/rejected": -0.3527391254901886, "logps/chosen": -0.00013493587903212756, "logps/rejected": -1.8932716846466064, "loss": 0.4416, "nll_loss": 0.11038561910390854, "rewards/accuracies": 1.0, "rewards/chosen": -1.3493588085111696e-05, "rewards/margins": 0.18931367993354797, "rewards/rejected": -0.1893271803855896, "step": 9147 }, { "epoch": 6.326417704011065, "grad_norm": 6.129518985748291, "learning_rate": 2.040879053327186e-05, "log_odds_chosen": 10.612061500549316, "log_odds_ratio": -0.00012798480747733265, "logits/chosen": -0.22341987490653992, "logits/rejected": -0.2803928554058075, "logps/chosen": -0.0003002454759553075, "logps/rejected": -1.9529635906219482, "loss": 0.5286, "nll_loss": 0.13213306665420532, "rewards/accuracies": 1.0, "rewards/chosen": -3.0024551961105317e-05, "rewards/margins": 0.19526633620262146, "rewards/rejected": -0.19529634714126587, "step": 9148 }, { "epoch": 6.327109266943292, "grad_norm": 4.429156303405762, "learning_rate": 2.040494851698171e-05, "log_odds_chosen": 10.688800811767578, "log_odds_ratio": -7.709318015258759e-05, "logits/chosen": -0.19439886510372162, "logits/rejected": -0.2739133834838867, "logps/chosen": -0.00029584753792732954, "logps/rejected": -2.3238232135772705, "loss": 0.6883, "nll_loss": 0.17206960916519165, "rewards/accuracies": 1.0, "rewards/chosen": -2.9584754884126596e-05, "rewards/margins": 0.23235273361206055, "rewards/rejected": -0.23238232731819153, "step": 9149 }, { "epoch": 6.327800829875518, "grad_norm": 5.260585784912109, "learning_rate": 2.0401106500691563e-05, "log_odds_chosen": 10.529271125793457, "log_odds_ratio": -6.538509478559718e-05, "logits/chosen": -0.301781564950943, "logits/rejected": -0.31411588191986084, "logps/chosen": -0.00021967320935800672, "logps/rejected": -2.090834856033325, "loss": 0.5069, "nll_loss": 0.12671947479248047, "rewards/accuracies": 1.0, "rewards/chosen": -2.196732020820491e-05, "rewards/margins": 0.20906151831150055, "rewards/rejected": -0.20908348262310028, "step": 9150 }, { "epoch": 6.328492392807745, "grad_norm": 5.937831401824951, "learning_rate": 2.0397264484401416e-05, "log_odds_chosen": 10.982461929321289, "log_odds_ratio": -4.1540384700056165e-05, "logits/chosen": -0.37165072560310364, "logits/rejected": -0.42236626148223877, "logps/chosen": -0.000152397362398915, "logps/rejected": -1.7052466869354248, "loss": 0.3998, "nll_loss": 0.09994690120220184, "rewards/accuracies": 1.0, "rewards/chosen": -1.5239737876981962e-05, "rewards/margins": 0.1705094277858734, "rewards/rejected": -0.17052467167377472, "step": 9151 }, { "epoch": 6.329183955739972, "grad_norm": 5.322743892669678, "learning_rate": 2.0393422468111265e-05, "log_odds_chosen": 11.560039520263672, "log_odds_ratio": -7.76972301537171e-05, "logits/chosen": -0.15101279318332672, "logits/rejected": -0.20717932283878326, "logps/chosen": -9.647566912462935e-05, "logps/rejected": -2.2673559188842773, "loss": 0.6161, "nll_loss": 0.1540297269821167, "rewards/accuracies": 1.0, "rewards/chosen": -9.647566912462935e-06, "rewards/margins": 0.2267259657382965, "rewards/rejected": -0.22673562169075012, "step": 9152 }, { "epoch": 6.329875518672199, "grad_norm": 9.544047355651855, "learning_rate": 2.0389580451821117e-05, "log_odds_chosen": 10.044012069702148, "log_odds_ratio": -0.00019923794025089592, "logits/chosen": -0.8574331402778625, "logits/rejected": -0.8163395524024963, "logps/chosen": -0.00047073009773157537, "logps/rejected": -2.0109262466430664, "loss": 1.0646, "nll_loss": 0.26612648367881775, "rewards/accuracies": 1.0, "rewards/chosen": -4.7073008317966014e-05, "rewards/margins": 0.20104554295539856, "rewards/rejected": -0.20109263062477112, "step": 9153 }, { "epoch": 6.330567081604426, "grad_norm": 6.393756866455078, "learning_rate": 2.038573843553097e-05, "log_odds_chosen": 9.159208297729492, "log_odds_ratio": -0.00200686976313591, "logits/chosen": -0.35502588748931885, "logits/rejected": -0.3517462909221649, "logps/chosen": -0.0012916001724079251, "logps/rejected": -1.1428102254867554, "loss": 0.6195, "nll_loss": 0.15467888116836548, "rewards/accuracies": 1.0, "rewards/chosen": -0.0001291600347030908, "rewards/margins": 0.11415186524391174, "rewards/rejected": -0.11428102105855942, "step": 9154 }, { "epoch": 6.3312586445366525, "grad_norm": 5.2988972663879395, "learning_rate": 2.038189641924082e-05, "log_odds_chosen": 11.045205116271973, "log_odds_ratio": -3.658358036773279e-05, "logits/chosen": -0.5847592949867249, "logits/rejected": -0.4950445592403412, "logps/chosen": -0.00017093573114834726, "logps/rejected": -2.2229764461517334, "loss": 0.3643, "nll_loss": 0.09106318652629852, "rewards/accuracies": 1.0, "rewards/chosen": -1.7093572751036845e-05, "rewards/margins": 0.2222805619239807, "rewards/rejected": -0.22229765355587006, "step": 9155 }, { "epoch": 6.331950207468879, "grad_norm": 10.425149917602539, "learning_rate": 2.0378054402950668e-05, "log_odds_chosen": 9.705398559570312, "log_odds_ratio": -0.00030759384389966726, "logits/chosen": -0.5570382475852966, "logits/rejected": -0.5905536413192749, "logps/chosen": -0.0006824440788477659, "logps/rejected": -2.1629672050476074, "loss": 0.6773, "nll_loss": 0.16928672790527344, "rewards/accuracies": 1.0, "rewards/chosen": -6.824440788477659e-05, "rewards/margins": 0.21622847020626068, "rewards/rejected": -0.2162967175245285, "step": 9156 }, { "epoch": 6.332641770401106, "grad_norm": 5.680348873138428, "learning_rate": 2.037421238666052e-05, "log_odds_chosen": 10.483183860778809, "log_odds_ratio": -8.02238064352423e-05, "logits/chosen": -0.2764190137386322, "logits/rejected": -0.2961311340332031, "logps/chosen": -0.0003129146352875978, "logps/rejected": -2.1694741249084473, "loss": 0.5383, "nll_loss": 0.1345573365688324, "rewards/accuracies": 1.0, "rewards/chosen": -3.129146352875978e-05, "rewards/margins": 0.21691614389419556, "rewards/rejected": -0.21694743633270264, "step": 9157 }, { "epoch": 6.333333333333333, "grad_norm": 5.355869770050049, "learning_rate": 2.037037037037037e-05, "log_odds_chosen": 11.758176803588867, "log_odds_ratio": -1.2654306374315638e-05, "logits/chosen": -0.28790849447250366, "logits/rejected": -0.3816429376602173, "logps/chosen": -0.0001446415262762457, "logps/rejected": -2.7636444568634033, "loss": 0.7746, "nll_loss": 0.19364896416664124, "rewards/accuracies": 1.0, "rewards/chosen": -1.4464152627624571e-05, "rewards/margins": 0.2763499617576599, "rewards/rejected": -0.27636444568634033, "step": 9158 }, { "epoch": 6.33402489626556, "grad_norm": 7.186736583709717, "learning_rate": 2.036652835408022e-05, "log_odds_chosen": 10.33999252319336, "log_odds_ratio": -0.00015110634558368474, "logits/chosen": -0.6750638484954834, "logits/rejected": -0.5907482504844666, "logps/chosen": -0.0005631681997328997, "logps/rejected": -2.267817497253418, "loss": 0.6841, "nll_loss": 0.17102006077766418, "rewards/accuracies": 1.0, "rewards/chosen": -5.631681779050268e-05, "rewards/margins": 0.22672541439533234, "rewards/rejected": -0.22678174078464508, "step": 9159 }, { "epoch": 6.334716459197787, "grad_norm": 4.402047634124756, "learning_rate": 2.0362686337790074e-05, "log_odds_chosen": 10.460683822631836, "log_odds_ratio": -5.5046300985850394e-05, "logits/chosen": -0.535968542098999, "logits/rejected": -0.5456217527389526, "logps/chosen": -0.0004225453594699502, "logps/rejected": -1.4134366512298584, "loss": 0.5102, "nll_loss": 0.1275371015071869, "rewards/accuracies": 1.0, "rewards/chosen": -4.2254538129782304e-05, "rewards/margins": 0.14130142331123352, "rewards/rejected": -0.14134368300437927, "step": 9160 }, { "epoch": 6.3354080221300135, "grad_norm": 6.969802379608154, "learning_rate": 2.0358844321499923e-05, "log_odds_chosen": 10.595348358154297, "log_odds_ratio": -9.216701437253505e-05, "logits/chosen": -0.33821868896484375, "logits/rejected": -0.4096333980560303, "logps/chosen": -0.0007422784110531211, "logps/rejected": -2.276984691619873, "loss": 0.6899, "nll_loss": 0.17246267199516296, "rewards/accuracies": 1.0, "rewards/chosen": -7.422784256050363e-05, "rewards/margins": 0.22762425243854523, "rewards/rejected": -0.2276984602212906, "step": 9161 }, { "epoch": 6.33609958506224, "grad_norm": 8.498947143554688, "learning_rate": 2.0355002305209775e-05, "log_odds_chosen": 10.396454811096191, "log_odds_ratio": -0.0001644312433199957, "logits/chosen": -0.4233134686946869, "logits/rejected": -0.4007781445980072, "logps/chosen": -0.0004954281030222774, "logps/rejected": -1.3405135869979858, "loss": 1.0076, "nll_loss": 0.25187867879867554, "rewards/accuracies": 1.0, "rewards/chosen": -4.9542810302227736e-05, "rewards/margins": 0.13400182127952576, "rewards/rejected": -0.1340513676404953, "step": 9162 }, { "epoch": 6.336791147994467, "grad_norm": 4.9324631690979, "learning_rate": 2.0351160288919628e-05, "log_odds_chosen": 10.10614013671875, "log_odds_ratio": -0.0005629255319945514, "logits/chosen": -0.5218598246574402, "logits/rejected": -0.5570496916770935, "logps/chosen": -0.000602225074544549, "logps/rejected": -1.9400722980499268, "loss": 0.4529, "nll_loss": 0.11316834390163422, "rewards/accuracies": 1.0, "rewards/chosen": -6.022251182002947e-05, "rewards/margins": 0.19394701719284058, "rewards/rejected": -0.19400723278522491, "step": 9163 }, { "epoch": 6.337482710926694, "grad_norm": 11.972575187683105, "learning_rate": 2.0347318272629477e-05, "log_odds_chosen": 12.207606315612793, "log_odds_ratio": -7.4364847932884e-06, "logits/chosen": -0.28967320919036865, "logits/rejected": -0.2895028591156006, "logps/chosen": -7.918903429526836e-05, "logps/rejected": -2.5067331790924072, "loss": 0.7326, "nll_loss": 0.18314482271671295, "rewards/accuracies": 1.0, "rewards/chosen": -7.918903065728955e-06, "rewards/margins": 0.2506653964519501, "rewards/rejected": -0.2506733238697052, "step": 9164 }, { "epoch": 6.338174273858921, "grad_norm": 6.695245265960693, "learning_rate": 2.034347625633933e-05, "log_odds_chosen": 10.07140827178955, "log_odds_ratio": -0.0003995221049990505, "logits/chosen": -0.7729678153991699, "logits/rejected": -0.728569507598877, "logps/chosen": -0.00022771614021621644, "logps/rejected": -1.8983569145202637, "loss": 0.7834, "nll_loss": 0.19580984115600586, "rewards/accuracies": 1.0, "rewards/chosen": -2.2771615476813167e-05, "rewards/margins": 0.18981292843818665, "rewards/rejected": -0.18983569741249084, "step": 9165 }, { "epoch": 6.338865836791148, "grad_norm": 8.26303482055664, "learning_rate": 2.033963424004918e-05, "log_odds_chosen": 11.618376731872559, "log_odds_ratio": -1.5082228856044821e-05, "logits/chosen": -0.44059500098228455, "logits/rejected": -0.43658512830734253, "logps/chosen": -0.0001126268834923394, "logps/rejected": -2.456397294998169, "loss": 0.6351, "nll_loss": 0.15877383947372437, "rewards/accuracies": 1.0, "rewards/chosen": -1.1262687621638179e-05, "rewards/margins": 0.2456284761428833, "rewards/rejected": -0.24563972651958466, "step": 9166 }, { "epoch": 6.3395573997233745, "grad_norm": 4.643455982208252, "learning_rate": 2.0335792223759028e-05, "log_odds_chosen": 10.716975212097168, "log_odds_ratio": -7.84438379923813e-05, "logits/chosen": -0.6044843792915344, "logits/rejected": -0.6347651481628418, "logps/chosen": -0.000250465702265501, "logps/rejected": -2.1208696365356445, "loss": 0.5771, "nll_loss": 0.14426752924919128, "rewards/accuracies": 1.0, "rewards/chosen": -2.504656913515646e-05, "rewards/margins": 0.21206192672252655, "rewards/rejected": -0.21208696067333221, "step": 9167 }, { "epoch": 6.340248962655601, "grad_norm": 6.384197235107422, "learning_rate": 2.033195020746888e-05, "log_odds_chosen": 10.821781158447266, "log_odds_ratio": -3.1766670872457325e-05, "logits/chosen": -0.47071394324302673, "logits/rejected": -0.5132383704185486, "logps/chosen": -0.00017042181571014225, "logps/rejected": -2.046055555343628, "loss": 0.5178, "nll_loss": 0.12943808734416962, "rewards/accuracies": 1.0, "rewards/chosen": -1.704218448139727e-05, "rewards/margins": 0.20458853244781494, "rewards/rejected": -0.20460554957389832, "step": 9168 }, { "epoch": 6.340940525587828, "grad_norm": 5.672266483306885, "learning_rate": 2.0328108191178732e-05, "log_odds_chosen": 11.661087036132812, "log_odds_ratio": -1.4097817256697454e-05, "logits/chosen": -0.49641531705856323, "logits/rejected": -0.4841935634613037, "logps/chosen": -0.0004052775038871914, "logps/rejected": -2.6994376182556152, "loss": 1.0757, "nll_loss": 0.26892024278640747, "rewards/accuracies": 1.0, "rewards/chosen": -4.0527756937080994e-05, "rewards/margins": 0.2699032425880432, "rewards/rejected": -0.2699437737464905, "step": 9169 }, { "epoch": 6.341632088520055, "grad_norm": 6.265336036682129, "learning_rate": 2.032426617488858e-05, "log_odds_chosen": 11.042598724365234, "log_odds_ratio": -0.0007391467806883156, "logits/chosen": -0.11241171509027481, "logits/rejected": -0.1534719169139862, "logps/chosen": -0.0021816291846334934, "logps/rejected": -2.3804805278778076, "loss": 0.4893, "nll_loss": 0.12224604934453964, "rewards/accuracies": 1.0, "rewards/chosen": -0.00021816292428411543, "rewards/margins": 0.23782990872859955, "rewards/rejected": -0.23804807662963867, "step": 9170 }, { "epoch": 6.342323651452282, "grad_norm": 4.920318603515625, "learning_rate": 2.0320424158598434e-05, "log_odds_chosen": 10.784578323364258, "log_odds_ratio": -3.836057658190839e-05, "logits/chosen": -0.7648136615753174, "logits/rejected": -0.8611428737640381, "logps/chosen": -0.00010240989649901167, "logps/rejected": -1.7506695985794067, "loss": 0.429, "nll_loss": 0.10723373293876648, "rewards/accuracies": 1.0, "rewards/chosen": -1.0240990377496928e-05, "rewards/margins": 0.17505672574043274, "rewards/rejected": -0.1750669628381729, "step": 9171 }, { "epoch": 6.343015214384509, "grad_norm": 8.815408706665039, "learning_rate": 2.0316582142308286e-05, "log_odds_chosen": 10.180644989013672, "log_odds_ratio": -9.145465446636081e-05, "logits/chosen": -0.47253626585006714, "logits/rejected": -0.6050565242767334, "logps/chosen": -0.0005305693484842777, "logps/rejected": -2.2179205417633057, "loss": 0.6499, "nll_loss": 0.16246379911899567, "rewards/accuracies": 1.0, "rewards/chosen": -5.305693775881082e-05, "rewards/margins": 0.2217389941215515, "rewards/rejected": -0.2217920422554016, "step": 9172 }, { "epoch": 6.3437067773167355, "grad_norm": 5.342888832092285, "learning_rate": 2.0312740126018135e-05, "log_odds_chosen": 9.948837280273438, "log_odds_ratio": -0.0001233671500813216, "logits/chosen": -0.3362911343574524, "logits/rejected": -0.4858512878417969, "logps/chosen": -0.0003074869164265692, "logps/rejected": -1.777867317199707, "loss": 0.4667, "nll_loss": 0.1166590079665184, "rewards/accuracies": 1.0, "rewards/chosen": -3.074869164265692e-05, "rewards/margins": 0.17775598168373108, "rewards/rejected": -0.177786722779274, "step": 9173 }, { "epoch": 6.344398340248962, "grad_norm": 6.704127788543701, "learning_rate": 2.0308898109727988e-05, "log_odds_chosen": 10.417448043823242, "log_odds_ratio": -0.00019412532856222242, "logits/chosen": -0.227530375123024, "logits/rejected": -0.21911419928073883, "logps/chosen": -0.0005401723901741207, "logps/rejected": -2.1637792587280273, "loss": 0.7081, "nll_loss": 0.1770128607749939, "rewards/accuracies": 1.0, "rewards/chosen": -5.401724047260359e-05, "rewards/margins": 0.21632394194602966, "rewards/rejected": -0.21637794375419617, "step": 9174 }, { "epoch": 6.345089903181189, "grad_norm": 6.890054225921631, "learning_rate": 2.0305056093437837e-05, "log_odds_chosen": 10.487573623657227, "log_odds_ratio": -7.205517613328993e-05, "logits/chosen": -0.1999693661928177, "logits/rejected": -0.12560325860977173, "logps/chosen": -0.00024904205929487944, "logps/rejected": -1.6322691440582275, "loss": 0.4943, "nll_loss": 0.12357941269874573, "rewards/accuracies": 1.0, "rewards/chosen": -2.4904205929487944e-05, "rewards/margins": 0.16320201754570007, "rewards/rejected": -0.1632269322872162, "step": 9175 }, { "epoch": 6.345781466113416, "grad_norm": 11.061262130737305, "learning_rate": 2.0301214077147686e-05, "log_odds_chosen": 10.508772850036621, "log_odds_ratio": -0.003219763981178403, "logits/chosen": -0.7970146536827087, "logits/rejected": -0.8142995238304138, "logps/chosen": -0.001975291408598423, "logps/rejected": -1.51791250705719, "loss": 0.6285, "nll_loss": 0.15680286288261414, "rewards/accuracies": 1.0, "rewards/chosen": -0.0001975291670532897, "rewards/margins": 0.15159371495246887, "rewards/rejected": -0.15179124474525452, "step": 9176 }, { "epoch": 6.346473029045643, "grad_norm": 5.008984565734863, "learning_rate": 2.029737206085754e-05, "log_odds_chosen": 10.216703414916992, "log_odds_ratio": -0.00014600915892515332, "logits/chosen": -0.5505592823028564, "logits/rejected": -0.5319199562072754, "logps/chosen": -0.0003324694116599858, "logps/rejected": -1.6418519020080566, "loss": 0.636, "nll_loss": 0.15897497534751892, "rewards/accuracies": 1.0, "rewards/chosen": -3.32469426211901e-05, "rewards/margins": 0.16415196657180786, "rewards/rejected": -0.16418521106243134, "step": 9177 }, { "epoch": 6.34716459197787, "grad_norm": 10.514545440673828, "learning_rate": 2.029353004456739e-05, "log_odds_chosen": 10.497976303100586, "log_odds_ratio": -0.00018029067723546177, "logits/chosen": -0.2952839732170105, "logits/rejected": -0.33477315306663513, "logps/chosen": -0.0004315444966778159, "logps/rejected": -2.282492160797119, "loss": 0.8506, "nll_loss": 0.21263387799263, "rewards/accuracies": 1.0, "rewards/chosen": -4.315445039537735e-05, "rewards/margins": 0.22820605337619781, "rewards/rejected": -0.2282492071390152, "step": 9178 }, { "epoch": 6.3478561549100965, "grad_norm": 6.681618690490723, "learning_rate": 2.028968802827724e-05, "log_odds_chosen": 10.633491516113281, "log_odds_ratio": -7.278387784026563e-05, "logits/chosen": -0.2575504779815674, "logits/rejected": -0.360831081867218, "logps/chosen": -0.0006361017003655434, "logps/rejected": -2.2641687393188477, "loss": 0.5476, "nll_loss": 0.1369018256664276, "rewards/accuracies": 1.0, "rewards/chosen": -6.361017585732043e-05, "rewards/margins": 0.226353257894516, "rewards/rejected": -0.22641685605049133, "step": 9179 }, { "epoch": 6.348547717842323, "grad_norm": 6.981048583984375, "learning_rate": 2.0285846011987092e-05, "log_odds_chosen": 11.55207633972168, "log_odds_ratio": -2.6088957383763045e-05, "logits/chosen": -0.41310855746269226, "logits/rejected": -0.5048751831054688, "logps/chosen": -0.00016262067947536707, "logps/rejected": -2.607013702392578, "loss": 0.5756, "nll_loss": 0.14389488101005554, "rewards/accuracies": 1.0, "rewards/chosen": -1.6262067219940946e-05, "rewards/margins": 0.2606850862503052, "rewards/rejected": -0.26070135831832886, "step": 9180 }, { "epoch": 6.34923928077455, "grad_norm": 11.618637084960938, "learning_rate": 2.0282003995696945e-05, "log_odds_chosen": 10.954242706298828, "log_odds_ratio": -2.8021946491207927e-05, "logits/chosen": -0.4849565923213959, "logits/rejected": -0.5182377696037292, "logps/chosen": -0.00011028562585124746, "logps/rejected": -1.8828730583190918, "loss": 0.5321, "nll_loss": 0.13302020728588104, "rewards/accuracies": 1.0, "rewards/chosen": -1.1028562767023686e-05, "rewards/margins": 0.1882762908935547, "rewards/rejected": -0.18828731775283813, "step": 9181 }, { "epoch": 6.349930843706777, "grad_norm": 11.160720825195312, "learning_rate": 2.0278161979406794e-05, "log_odds_chosen": 8.96442985534668, "log_odds_ratio": -0.00035631554783321917, "logits/chosen": -0.4377845525741577, "logits/rejected": -0.42905181646347046, "logps/chosen": -0.0005569449858739972, "logps/rejected": -1.361982822418213, "loss": 0.5463, "nll_loss": 0.13653430342674255, "rewards/accuracies": 1.0, "rewards/chosen": -5.569449785980396e-05, "rewards/margins": 0.13614259660243988, "rewards/rejected": -0.1361982822418213, "step": 9182 }, { "epoch": 6.350622406639004, "grad_norm": 10.625052452087402, "learning_rate": 2.0274319963116646e-05, "log_odds_chosen": 10.720477104187012, "log_odds_ratio": -0.00016753214003983885, "logits/chosen": -0.13363853096961975, "logits/rejected": -0.259945273399353, "logps/chosen": -0.0003033954999409616, "logps/rejected": -2.012464761734009, "loss": 0.8445, "nll_loss": 0.21111492812633514, "rewards/accuracies": 1.0, "rewards/chosen": -3.033954999409616e-05, "rewards/margins": 0.20121616125106812, "rewards/rejected": -0.2012465000152588, "step": 9183 }, { "epoch": 6.351313969571231, "grad_norm": 9.830244064331055, "learning_rate": 2.0270477946826495e-05, "log_odds_chosen": 10.320734024047852, "log_odds_ratio": -5.5076357966754586e-05, "logits/chosen": -0.6449425220489502, "logits/rejected": -0.5527397990226746, "logps/chosen": -0.00033088948111981153, "logps/rejected": -1.7124865055084229, "loss": 0.6336, "nll_loss": 0.15838631987571716, "rewards/accuracies": 1.0, "rewards/chosen": -3.308894520159811e-05, "rewards/margins": 0.17121556401252747, "rewards/rejected": -0.171248659491539, "step": 9184 }, { "epoch": 6.3520055325034575, "grad_norm": 7.309013366699219, "learning_rate": 2.0266635930536344e-05, "log_odds_chosen": 11.929250717163086, "log_odds_ratio": -7.019154963927576e-06, "logits/chosen": -0.6183920502662659, "logits/rejected": -0.6302121877670288, "logps/chosen": -0.0001854830770753324, "logps/rejected": -2.6690664291381836, "loss": 1.0344, "nll_loss": 0.2585914433002472, "rewards/accuracies": 1.0, "rewards/chosen": -1.854830770753324e-05, "rewards/margins": 0.2668881118297577, "rewards/rejected": -0.2669066786766052, "step": 9185 }, { "epoch": 6.352697095435684, "grad_norm": 6.724107265472412, "learning_rate": 2.0262793914246197e-05, "log_odds_chosen": 10.042360305786133, "log_odds_ratio": -9.932967077475041e-05, "logits/chosen": -0.7215209007263184, "logits/rejected": -0.7661874294281006, "logps/chosen": -0.00031137201585806906, "logps/rejected": -1.836350917816162, "loss": 0.5111, "nll_loss": 0.12775912880897522, "rewards/accuracies": 1.0, "rewards/chosen": -3.113720231340267e-05, "rewards/margins": 0.1836039423942566, "rewards/rejected": -0.18363508582115173, "step": 9186 }, { "epoch": 6.353388658367911, "grad_norm": 7.169684886932373, "learning_rate": 2.025895189795605e-05, "log_odds_chosen": 11.2716064453125, "log_odds_ratio": -5.317230898072012e-05, "logits/chosen": -0.3582202196121216, "logits/rejected": -0.45097386837005615, "logps/chosen": -0.00018732898752205074, "logps/rejected": -2.5006752014160156, "loss": 0.6799, "nll_loss": 0.16997021436691284, "rewards/accuracies": 1.0, "rewards/chosen": -1.873289693321567e-05, "rewards/margins": 0.25004881620407104, "rewards/rejected": -0.2500675320625305, "step": 9187 }, { "epoch": 6.354080221300138, "grad_norm": 7.111165523529053, "learning_rate": 2.02551098816659e-05, "log_odds_chosen": 11.701675415039062, "log_odds_ratio": -4.208488098811358e-05, "logits/chosen": -0.37285587191581726, "logits/rejected": -0.38051775097846985, "logps/chosen": -0.00016419717576354742, "logps/rejected": -2.967442512512207, "loss": 0.5549, "nll_loss": 0.1387246698141098, "rewards/accuracies": 1.0, "rewards/chosen": -1.641971721255686e-05, "rewards/margins": 0.29672783613204956, "rewards/rejected": -0.2967442572116852, "step": 9188 }, { "epoch": 6.354771784232365, "grad_norm": 8.374202728271484, "learning_rate": 2.025126786537575e-05, "log_odds_chosen": 10.756795883178711, "log_odds_ratio": -0.00011663652549032122, "logits/chosen": -0.39200976490974426, "logits/rejected": -0.3689347803592682, "logps/chosen": -0.00034770870115607977, "logps/rejected": -2.374525308609009, "loss": 0.725, "nll_loss": 0.18124133348464966, "rewards/accuracies": 1.0, "rewards/chosen": -3.477087011560798e-05, "rewards/margins": 0.23741775751113892, "rewards/rejected": -0.23745253682136536, "step": 9189 }, { "epoch": 6.355463347164592, "grad_norm": 5.542569637298584, "learning_rate": 2.0247425849085603e-05, "log_odds_chosen": 10.205241203308105, "log_odds_ratio": -0.000721512536983937, "logits/chosen": -0.2690023183822632, "logits/rejected": -0.26603633165359497, "logps/chosen": -0.0011981608113273978, "logps/rejected": -1.9227674007415771, "loss": 0.4221, "nll_loss": 0.10545966029167175, "rewards/accuracies": 1.0, "rewards/chosen": -0.0001198160825879313, "rewards/margins": 0.19215692579746246, "rewards/rejected": -0.192276731133461, "step": 9190 }, { "epoch": 6.356154910096818, "grad_norm": 7.170056343078613, "learning_rate": 2.0243583832795452e-05, "log_odds_chosen": 10.770509719848633, "log_odds_ratio": -0.00014403241220861673, "logits/chosen": -0.5067330598831177, "logits/rejected": -0.5712544918060303, "logps/chosen": -0.0002003665576921776, "logps/rejected": -1.4868699312210083, "loss": 0.5378, "nll_loss": 0.13442908227443695, "rewards/accuracies": 1.0, "rewards/chosen": -2.0036653950228356e-05, "rewards/margins": 0.14866694808006287, "rewards/rejected": -0.1486869752407074, "step": 9191 }, { "epoch": 6.356846473029045, "grad_norm": 5.789017677307129, "learning_rate": 2.0239741816505305e-05, "log_odds_chosen": 11.209232330322266, "log_odds_ratio": -2.6927336875814945e-05, "logits/chosen": -0.4791033864021301, "logits/rejected": -0.5403072834014893, "logps/chosen": -0.00015609999536536634, "logps/rejected": -2.214787244796753, "loss": 0.5028, "nll_loss": 0.12570922076702118, "rewards/accuracies": 1.0, "rewards/chosen": -1.5609999536536634e-05, "rewards/margins": 0.22146311402320862, "rewards/rejected": -0.22147874534130096, "step": 9192 }, { "epoch": 6.357538035961272, "grad_norm": 10.424960136413574, "learning_rate": 2.0235899800215154e-05, "log_odds_chosen": 10.695180892944336, "log_odds_ratio": -9.039837459567934e-05, "logits/chosen": -0.3451023995876312, "logits/rejected": -0.28836554288864136, "logps/chosen": -0.0003894604742527008, "logps/rejected": -2.282132625579834, "loss": 0.6913, "nll_loss": 0.1728084236383438, "rewards/accuracies": 1.0, "rewards/chosen": -3.894605106324889e-05, "rewards/margins": 0.2281743288040161, "rewards/rejected": -0.22821328043937683, "step": 9193 }, { "epoch": 6.358229598893499, "grad_norm": 5.216132640838623, "learning_rate": 2.0232057783925003e-05, "log_odds_chosen": 11.270370483398438, "log_odds_ratio": -2.522995782783255e-05, "logits/chosen": -0.4266151487827301, "logits/rejected": -0.5151241421699524, "logps/chosen": -0.0001478709455113858, "logps/rejected": -2.335394859313965, "loss": 0.3715, "nll_loss": 0.09286393970251083, "rewards/accuracies": 1.0, "rewards/chosen": -1.4787094187340699e-05, "rewards/margins": 0.23352470993995667, "rewards/rejected": -0.23353949189186096, "step": 9194 }, { "epoch": 6.358921161825726, "grad_norm": 6.978919982910156, "learning_rate": 2.0228215767634855e-05, "log_odds_chosen": 9.261372566223145, "log_odds_ratio": -0.0012194992741569877, "logits/chosen": -0.22732126712799072, "logits/rejected": -0.3630354702472687, "logps/chosen": -0.002600749721750617, "logps/rejected": -1.7258175611495972, "loss": 0.5196, "nll_loss": 0.12978777289390564, "rewards/accuracies": 1.0, "rewards/chosen": -0.00026007500127889216, "rewards/margins": 0.17232167720794678, "rewards/rejected": -0.1725817620754242, "step": 9195 }, { "epoch": 6.359612724757953, "grad_norm": 7.02858304977417, "learning_rate": 2.0224373751344708e-05, "log_odds_chosen": 10.51254653930664, "log_odds_ratio": -0.0002540510904509574, "logits/chosen": -0.5970651507377625, "logits/rejected": -0.6617342829704285, "logps/chosen": -0.00046592182479798794, "logps/rejected": -2.2553892135620117, "loss": 0.4232, "nll_loss": 0.10578116029500961, "rewards/accuracies": 1.0, "rewards/chosen": -4.659218393499032e-05, "rewards/margins": 0.22549235820770264, "rewards/rejected": -0.2255389541387558, "step": 9196 }, { "epoch": 6.360304287690179, "grad_norm": 13.577595710754395, "learning_rate": 2.0220531735054557e-05, "log_odds_chosen": 11.231561660766602, "log_odds_ratio": -0.00012580891780089587, "logits/chosen": -0.44007226824760437, "logits/rejected": -0.46577906608581543, "logps/chosen": -0.00034361236612312496, "logps/rejected": -2.6888885498046875, "loss": 0.5729, "nll_loss": 0.1432117372751236, "rewards/accuracies": 1.0, "rewards/chosen": -3.436123370192945e-05, "rewards/margins": 0.2688544988632202, "rewards/rejected": -0.2688888609409332, "step": 9197 }, { "epoch": 6.360995850622406, "grad_norm": 7.206313133239746, "learning_rate": 2.021668971876441e-05, "log_odds_chosen": 10.88147258758545, "log_odds_ratio": -2.9554386856034398e-05, "logits/chosen": -0.534494161605835, "logits/rejected": -0.661098837852478, "logps/chosen": -0.000369693007087335, "logps/rejected": -2.0248522758483887, "loss": 0.5609, "nll_loss": 0.1402200311422348, "rewards/accuracies": 1.0, "rewards/chosen": -3.6969304346712306e-05, "rewards/margins": 0.2024482786655426, "rewards/rejected": -0.20248523354530334, "step": 9198 }, { "epoch": 6.361687413554633, "grad_norm": 5.518919467926025, "learning_rate": 2.021284770247426e-05, "log_odds_chosen": 10.717257499694824, "log_odds_ratio": -0.0003174339362885803, "logits/chosen": -0.2607053220272064, "logits/rejected": -0.3942747414112091, "logps/chosen": -0.00042165315244346857, "logps/rejected": -2.048736810684204, "loss": 0.583, "nll_loss": 0.14572477340698242, "rewards/accuracies": 1.0, "rewards/chosen": -4.216531669953838e-05, "rewards/margins": 0.20483151078224182, "rewards/rejected": -0.2048736959695816, "step": 9199 }, { "epoch": 6.36237897648686, "grad_norm": 6.506553649902344, "learning_rate": 2.020900568618411e-05, "log_odds_chosen": 10.156195640563965, "log_odds_ratio": -0.0008201644523069263, "logits/chosen": -0.4659947156906128, "logits/rejected": -0.5176660418510437, "logps/chosen": -0.00027816108195111156, "logps/rejected": -1.8388397693634033, "loss": 0.6022, "nll_loss": 0.15047568082809448, "rewards/accuracies": 1.0, "rewards/chosen": -2.7816107831313275e-05, "rewards/margins": 0.18385615944862366, "rewards/rejected": -0.18388396501541138, "step": 9200 }, { "epoch": 6.363070539419087, "grad_norm": 5.314968585968018, "learning_rate": 2.0205163669893963e-05, "log_odds_chosen": 10.760082244873047, "log_odds_ratio": -4.831475598621182e-05, "logits/chosen": -0.20053163170814514, "logits/rejected": -0.2904966473579407, "logps/chosen": -0.00032365991501137614, "logps/rejected": -2.176091194152832, "loss": 0.5382, "nll_loss": 0.13455608487129211, "rewards/accuracies": 1.0, "rewards/chosen": -3.2365991501137614e-05, "rewards/margins": 0.2175767719745636, "rewards/rejected": -0.21760913729667664, "step": 9201 }, { "epoch": 6.363762102351314, "grad_norm": 8.624659538269043, "learning_rate": 2.0201321653603812e-05, "log_odds_chosen": 10.964468002319336, "log_odds_ratio": -3.857814954244532e-05, "logits/chosen": -0.562919020652771, "logits/rejected": -0.7147216796875, "logps/chosen": -0.00018094430561177433, "logps/rejected": -2.261103630065918, "loss": 0.6497, "nll_loss": 0.16241827607154846, "rewards/accuracies": 1.0, "rewards/chosen": -1.809442983358167e-05, "rewards/margins": 0.22609226405620575, "rewards/rejected": -0.22611036896705627, "step": 9202 }, { "epoch": 6.36445366528354, "grad_norm": 5.81415319442749, "learning_rate": 2.019747963731366e-05, "log_odds_chosen": 10.822190284729004, "log_odds_ratio": -4.008920950582251e-05, "logits/chosen": -0.25131142139434814, "logits/rejected": -0.4631497859954834, "logps/chosen": -0.0002011386095546186, "logps/rejected": -2.2005937099456787, "loss": 0.597, "nll_loss": 0.14925579726696014, "rewards/accuracies": 1.0, "rewards/chosen": -2.011386095546186e-05, "rewards/margins": 0.2200392633676529, "rewards/rejected": -0.2200593799352646, "step": 9203 }, { "epoch": 6.365145228215767, "grad_norm": 6.23052453994751, "learning_rate": 2.0193637621023514e-05, "log_odds_chosen": 10.77461051940918, "log_odds_ratio": -0.00010428290261188522, "logits/chosen": -0.029508620500564575, "logits/rejected": -0.035192057490348816, "logps/chosen": -0.00045014353236183524, "logps/rejected": -2.0687994956970215, "loss": 0.4886, "nll_loss": 0.1221313402056694, "rewards/accuracies": 1.0, "rewards/chosen": -4.5014348870608956e-05, "rewards/margins": 0.20683494210243225, "rewards/rejected": -0.20687994360923767, "step": 9204 }, { "epoch": 6.365836791147994, "grad_norm": 10.496476173400879, "learning_rate": 2.0189795604733366e-05, "log_odds_chosen": 12.02440071105957, "log_odds_ratio": -9.049935215443838e-06, "logits/chosen": -0.27575576305389404, "logits/rejected": -0.30641281604766846, "logps/chosen": -9.213421435561031e-05, "logps/rejected": -2.588135242462158, "loss": 0.8979, "nll_loss": 0.22447043657302856, "rewards/accuracies": 1.0, "rewards/chosen": -9.213421435561031e-06, "rewards/margins": 0.2588043212890625, "rewards/rejected": -0.2588135600090027, "step": 9205 }, { "epoch": 6.366528354080221, "grad_norm": 8.798904418945312, "learning_rate": 2.0185953588443215e-05, "log_odds_chosen": 10.987395286560059, "log_odds_ratio": -0.00019550917204469442, "logits/chosen": -0.18301673233509064, "logits/rejected": -0.3637821674346924, "logps/chosen": -0.00034496065927669406, "logps/rejected": -2.5501561164855957, "loss": 0.5876, "nll_loss": 0.1468726396560669, "rewards/accuracies": 1.0, "rewards/chosen": -3.449606447247788e-05, "rewards/margins": 0.2549811005592346, "rewards/rejected": -0.2550155818462372, "step": 9206 }, { "epoch": 6.367219917012449, "grad_norm": 5.261299133300781, "learning_rate": 2.0182111572153068e-05, "log_odds_chosen": 10.68661880493164, "log_odds_ratio": -7.621490658493713e-05, "logits/chosen": -0.5029549598693848, "logits/rejected": -0.5550189018249512, "logps/chosen": -6.601712084375322e-05, "logps/rejected": -1.5257365703582764, "loss": 0.5211, "nll_loss": 0.13025513291358948, "rewards/accuracies": 1.0, "rewards/chosen": -6.601712811971083e-06, "rewards/margins": 0.152567058801651, "rewards/rejected": -0.15257366001605988, "step": 9207 }, { "epoch": 6.367911479944675, "grad_norm": 7.6004228591918945, "learning_rate": 2.0178269555862917e-05, "log_odds_chosen": 9.818439483642578, "log_odds_ratio": -0.00021286829723976552, "logits/chosen": -0.30478382110595703, "logits/rejected": -0.21007993817329407, "logps/chosen": -0.0003476694109849632, "logps/rejected": -1.9411594867706299, "loss": 0.6535, "nll_loss": 0.16334381699562073, "rewards/accuracies": 1.0, "rewards/chosen": -3.476694109849632e-05, "rewards/margins": 0.19408118724822998, "rewards/rejected": -0.19411595165729523, "step": 9208 }, { "epoch": 6.368603042876902, "grad_norm": 8.168708801269531, "learning_rate": 2.017442753957277e-05, "log_odds_chosen": 11.416715621948242, "log_odds_ratio": -1.4642300811829045e-05, "logits/chosen": -0.12115032970905304, "logits/rejected": -0.20901496708393097, "logps/chosen": -0.00016176048666238785, "logps/rejected": -2.281259059906006, "loss": 0.7037, "nll_loss": 0.17593160271644592, "rewards/accuracies": 1.0, "rewards/chosen": -1.6176049030036665e-05, "rewards/margins": 0.2281097173690796, "rewards/rejected": -0.2281259000301361, "step": 9209 }, { "epoch": 6.369294605809129, "grad_norm": 5.403926849365234, "learning_rate": 2.017058552328262e-05, "log_odds_chosen": 10.59703540802002, "log_odds_ratio": -6.367493188008666e-05, "logits/chosen": 0.09212058782577515, "logits/rejected": 0.14779186248779297, "logps/chosen": -0.0002771377330645919, "logps/rejected": -1.9987256526947021, "loss": 0.5871, "nll_loss": 0.14678050577640533, "rewards/accuracies": 1.0, "rewards/chosen": -2.7713776944437996e-05, "rewards/margins": 0.1998448669910431, "rewards/rejected": -0.19987258315086365, "step": 9210 }, { "epoch": 6.369986168741356, "grad_norm": 5.3090620040893555, "learning_rate": 2.016674350699247e-05, "log_odds_chosen": 11.665288925170898, "log_odds_ratio": -1.9512324797688052e-05, "logits/chosen": -0.6499193906784058, "logits/rejected": -0.6336812376976013, "logps/chosen": -0.00016225603758357465, "logps/rejected": -2.2109932899475098, "loss": 0.6673, "nll_loss": 0.16682903468608856, "rewards/accuracies": 1.0, "rewards/chosen": -1.6225603758357465e-05, "rewards/margins": 0.22108310461044312, "rewards/rejected": -0.22109931707382202, "step": 9211 }, { "epoch": 6.370677731673583, "grad_norm": 10.165082931518555, "learning_rate": 2.016290149070232e-05, "log_odds_chosen": 11.195685386657715, "log_odds_ratio": -0.00017299283354077488, "logits/chosen": -0.5278292298316956, "logits/rejected": -0.6007906198501587, "logps/chosen": -0.00046260812086984515, "logps/rejected": -2.265780448913574, "loss": 0.397, "nll_loss": 0.09923581033945084, "rewards/accuracies": 1.0, "rewards/chosen": -4.626081499736756e-05, "rewards/margins": 0.2265317738056183, "rewards/rejected": -0.22657804191112518, "step": 9212 }, { "epoch": 6.37136929460581, "grad_norm": 8.883148193359375, "learning_rate": 2.0159059474412172e-05, "log_odds_chosen": 10.232280731201172, "log_odds_ratio": -0.0003335610090289265, "logits/chosen": -0.48593389987945557, "logits/rejected": -0.5400428175926208, "logps/chosen": -0.0005161626031622291, "logps/rejected": -1.7813667058944702, "loss": 0.969, "nll_loss": 0.2422185242176056, "rewards/accuracies": 1.0, "rewards/chosen": -5.1616254495456815e-05, "rewards/margins": 0.17808504402637482, "rewards/rejected": -0.1781366765499115, "step": 9213 }, { "epoch": 6.372060857538036, "grad_norm": 6.551841735839844, "learning_rate": 2.015521745812202e-05, "log_odds_chosen": 10.956387519836426, "log_odds_ratio": -9.756218059919775e-05, "logits/chosen": -0.11400066316127777, "logits/rejected": -0.21385729312896729, "logps/chosen": -0.00022725429153069854, "logps/rejected": -2.4168527126312256, "loss": 0.4417, "nll_loss": 0.11042511463165283, "rewards/accuracies": 1.0, "rewards/chosen": -2.2725429516867734e-05, "rewards/margins": 0.24166254699230194, "rewards/rejected": -0.24168527126312256, "step": 9214 }, { "epoch": 6.372752420470263, "grad_norm": 7.063676357269287, "learning_rate": 2.0151375441831874e-05, "log_odds_chosen": 10.549491882324219, "log_odds_ratio": -0.00019233435159549117, "logits/chosen": -0.434749960899353, "logits/rejected": -0.47967755794525146, "logps/chosen": -0.00045270402915775776, "logps/rejected": -2.4761176109313965, "loss": 0.8509, "nll_loss": 0.2126937210559845, "rewards/accuracies": 1.0, "rewards/chosen": -4.52704043709673e-05, "rewards/margins": 0.24756652116775513, "rewards/rejected": -0.24761177599430084, "step": 9215 }, { "epoch": 6.37344398340249, "grad_norm": 4.453362464904785, "learning_rate": 2.0147533425541726e-05, "log_odds_chosen": 10.472236633300781, "log_odds_ratio": -6.213154119905084e-05, "logits/chosen": -0.2916863262653351, "logits/rejected": -0.2914811372756958, "logps/chosen": -0.00018990921671502292, "logps/rejected": -1.8327441215515137, "loss": 0.3673, "nll_loss": 0.09180820733308792, "rewards/accuracies": 1.0, "rewards/chosen": -1.899092058010865e-05, "rewards/margins": 0.1832554191350937, "rewards/rejected": -0.18327441811561584, "step": 9216 }, { "epoch": 6.374135546334717, "grad_norm": 5.508984565734863, "learning_rate": 2.0143691409251575e-05, "log_odds_chosen": 11.875307083129883, "log_odds_ratio": -0.00015400855045299977, "logits/chosen": -0.2681117057800293, "logits/rejected": -0.2774713337421417, "logps/chosen": -0.0009705049451440573, "logps/rejected": -3.091235637664795, "loss": 0.5325, "nll_loss": 0.13310199975967407, "rewards/accuracies": 1.0, "rewards/chosen": -9.705049160402268e-05, "rewards/margins": 0.3090265393257141, "rewards/rejected": -0.30912357568740845, "step": 9217 }, { "epoch": 6.374827109266944, "grad_norm": 4.798890113830566, "learning_rate": 2.0139849392961428e-05, "log_odds_chosen": 11.035809516906738, "log_odds_ratio": -0.00011051326873712242, "logits/chosen": -0.5304756760597229, "logits/rejected": -0.607671856880188, "logps/chosen": -0.0002653436386026442, "logps/rejected": -2.285710573196411, "loss": 0.5152, "nll_loss": 0.12877829372882843, "rewards/accuracies": 1.0, "rewards/chosen": -2.65343642240623e-05, "rewards/margins": 0.22854453325271606, "rewards/rejected": -0.2285710722208023, "step": 9218 }, { "epoch": 6.375518672199171, "grad_norm": 7.894879341125488, "learning_rate": 2.013600737667128e-05, "log_odds_chosen": 10.94662857055664, "log_odds_ratio": -0.0001528830180177465, "logits/chosen": -0.5855768918991089, "logits/rejected": -0.5030243396759033, "logps/chosen": -0.000555214995983988, "logps/rejected": -2.32736873626709, "loss": 0.6675, "nll_loss": 0.1668596863746643, "rewards/accuracies": 1.0, "rewards/chosen": -5.552149741561152e-05, "rewards/margins": 0.23268136382102966, "rewards/rejected": -0.23273690044879913, "step": 9219 }, { "epoch": 6.376210235131397, "grad_norm": 6.6997904777526855, "learning_rate": 2.013216536038113e-05, "log_odds_chosen": 10.035709381103516, "log_odds_ratio": -0.0005353145534172654, "logits/chosen": -0.2331252545118332, "logits/rejected": -0.38201838731765747, "logps/chosen": -0.0001682726142462343, "logps/rejected": -1.5411182641983032, "loss": 0.9186, "nll_loss": 0.22959373891353607, "rewards/accuracies": 1.0, "rewards/chosen": -1.682726178842131e-05, "rewards/margins": 0.15409502387046814, "rewards/rejected": -0.154111847281456, "step": 9220 }, { "epoch": 6.376901798063624, "grad_norm": 7.748939514160156, "learning_rate": 2.0128323344090978e-05, "log_odds_chosen": 10.36617660522461, "log_odds_ratio": -0.0001164079294539988, "logits/chosen": -0.572219967842102, "logits/rejected": -0.5345730781555176, "logps/chosen": -0.0003311182663310319, "logps/rejected": -1.959726095199585, "loss": 0.6189, "nll_loss": 0.15470938384532928, "rewards/accuracies": 1.0, "rewards/chosen": -3.3111828088294715e-05, "rewards/margins": 0.19593951106071472, "rewards/rejected": -0.19597262144088745, "step": 9221 }, { "epoch": 6.377593360995851, "grad_norm": 6.893679141998291, "learning_rate": 2.012448132780083e-05, "log_odds_chosen": 9.918173789978027, "log_odds_ratio": -0.0005074600921943784, "logits/chosen": -0.3186028003692627, "logits/rejected": -0.3795122504234314, "logps/chosen": -0.00208657281473279, "logps/rejected": -1.9256505966186523, "loss": 0.726, "nll_loss": 0.1814383566379547, "rewards/accuracies": 1.0, "rewards/chosen": -0.00020865729311481118, "rewards/margins": 0.1923563927412033, "rewards/rejected": -0.19256505370140076, "step": 9222 }, { "epoch": 6.378284923928078, "grad_norm": 9.132357597351074, "learning_rate": 2.012063931151068e-05, "log_odds_chosen": 10.881586074829102, "log_odds_ratio": -5.283685095491819e-05, "logits/chosen": -0.46947699785232544, "logits/rejected": -0.5702405571937561, "logps/chosen": -0.0001630905899219215, "logps/rejected": -2.0741071701049805, "loss": 0.7366, "nll_loss": 0.18414491415023804, "rewards/accuracies": 1.0, "rewards/chosen": -1.630905899219215e-05, "rewards/margins": 0.20739439129829407, "rewards/rejected": -0.20741069316864014, "step": 9223 }, { "epoch": 6.378976486860305, "grad_norm": 7.436960220336914, "learning_rate": 2.0116797295220532e-05, "log_odds_chosen": 12.106593132019043, "log_odds_ratio": -0.0003620930656325072, "logits/chosen": -0.689150333404541, "logits/rejected": -0.7047725915908813, "logps/chosen": -0.0007022612262517214, "logps/rejected": -3.17356538772583, "loss": 0.6803, "nll_loss": 0.1700369417667389, "rewards/accuracies": 1.0, "rewards/chosen": -7.022612408036366e-05, "rewards/margins": 0.31728631258010864, "rewards/rejected": -0.31735655665397644, "step": 9224 }, { "epoch": 6.3796680497925315, "grad_norm": 6.576411724090576, "learning_rate": 2.0112955278930385e-05, "log_odds_chosen": 10.059762954711914, "log_odds_ratio": -0.00010953310993500054, "logits/chosen": -0.7460529208183289, "logits/rejected": -0.7538313269615173, "logps/chosen": -0.0002810961741488427, "logps/rejected": -1.5116372108459473, "loss": 0.9037, "nll_loss": 0.22591376304626465, "rewards/accuracies": 1.0, "rewards/chosen": -2.8109616323490627e-05, "rewards/margins": 0.1511356234550476, "rewards/rejected": -0.1511637270450592, "step": 9225 }, { "epoch": 6.380359612724758, "grad_norm": 6.347103118896484, "learning_rate": 2.0109113262640234e-05, "log_odds_chosen": 9.54544448852539, "log_odds_ratio": -0.0006142269703559577, "logits/chosen": -0.5503570437431335, "logits/rejected": -0.6310123205184937, "logps/chosen": -0.0007548942230641842, "logps/rejected": -1.9970778226852417, "loss": 0.5175, "nll_loss": 0.12930533289909363, "rewards/accuracies": 1.0, "rewards/chosen": -7.548942812718451e-05, "rewards/margins": 0.19963230192661285, "rewards/rejected": -0.1997077912092209, "step": 9226 }, { "epoch": 6.381051175656985, "grad_norm": 5.354732036590576, "learning_rate": 2.0105271246350086e-05, "log_odds_chosen": 10.121562957763672, "log_odds_ratio": -5.865055209142156e-05, "logits/chosen": -0.18324589729309082, "logits/rejected": -0.29151177406311035, "logps/chosen": -0.0003376026579644531, "logps/rejected": -1.604554295539856, "loss": 0.6855, "nll_loss": 0.17136290669441223, "rewards/accuracies": 1.0, "rewards/chosen": -3.3760268706828356e-05, "rewards/margins": 0.160421684384346, "rewards/rejected": -0.16045543551445007, "step": 9227 }, { "epoch": 6.381742738589212, "grad_norm": 7.14629602432251, "learning_rate": 2.010142923005994e-05, "log_odds_chosen": 11.467676162719727, "log_odds_ratio": -8.916402293834835e-05, "logits/chosen": -0.006139256525784731, "logits/rejected": -0.11357221752405167, "logps/chosen": -0.0001775856944732368, "logps/rejected": -2.6482834815979004, "loss": 0.5431, "nll_loss": 0.13577528297901154, "rewards/accuracies": 1.0, "rewards/chosen": -1.7758567992132157e-05, "rewards/margins": 0.26481059193611145, "rewards/rejected": -0.26482832431793213, "step": 9228 }, { "epoch": 6.382434301521439, "grad_norm": 3.6606783866882324, "learning_rate": 2.0097587213769788e-05, "log_odds_chosen": 10.625322341918945, "log_odds_ratio": -0.00024825221044011414, "logits/chosen": -0.7195248603820801, "logits/rejected": -0.762287974357605, "logps/chosen": -0.0004275470564607531, "logps/rejected": -2.5666942596435547, "loss": 0.5686, "nll_loss": 0.1421373337507248, "rewards/accuracies": 1.0, "rewards/chosen": -4.275470564607531e-05, "rewards/margins": 0.256626695394516, "rewards/rejected": -0.25666943192481995, "step": 9229 }, { "epoch": 6.383125864453666, "grad_norm": 6.820446491241455, "learning_rate": 2.0093745197479637e-05, "log_odds_chosen": 10.165421485900879, "log_odds_ratio": -0.0002845745184458792, "logits/chosen": -0.7244423031806946, "logits/rejected": -0.5874755382537842, "logps/chosen": -0.0002369354770053178, "logps/rejected": -1.8407738208770752, "loss": 0.7861, "nll_loss": 0.19650883972644806, "rewards/accuracies": 1.0, "rewards/chosen": -2.3693544790148735e-05, "rewards/margins": 0.1840536892414093, "rewards/rejected": -0.18407738208770752, "step": 9230 }, { "epoch": 6.3838174273858925, "grad_norm": 10.45644760131836, "learning_rate": 2.008990318118949e-05, "log_odds_chosen": 10.038421630859375, "log_odds_ratio": -0.000769000849686563, "logits/chosen": -0.26605021953582764, "logits/rejected": -0.3089340329170227, "logps/chosen": -0.0019191744504496455, "logps/rejected": -2.226240396499634, "loss": 0.7829, "nll_loss": 0.19565162062644958, "rewards/accuracies": 1.0, "rewards/chosen": -0.0001919174537761137, "rewards/margins": 0.22243213653564453, "rewards/rejected": -0.2226240336894989, "step": 9231 }, { "epoch": 6.384508990318119, "grad_norm": 6.580210208892822, "learning_rate": 2.0086061164899338e-05, "log_odds_chosen": 10.458142280578613, "log_odds_ratio": -4.524439282249659e-05, "logits/chosen": -0.7212006449699402, "logits/rejected": -0.7315418720245361, "logps/chosen": -0.00020987285824958235, "logps/rejected": -2.0603229999542236, "loss": 0.4201, "nll_loss": 0.10501430928707123, "rewards/accuracies": 1.0, "rewards/chosen": -2.0987286916351877e-05, "rewards/margins": 0.2060113102197647, "rewards/rejected": -0.20603230595588684, "step": 9232 }, { "epoch": 6.385200553250346, "grad_norm": 9.622176170349121, "learning_rate": 2.008221914860919e-05, "log_odds_chosen": 10.036763191223145, "log_odds_ratio": -0.00010809496598085389, "logits/chosen": -0.5044224858283997, "logits/rejected": -0.501787006855011, "logps/chosen": -0.0005055024521425366, "logps/rejected": -1.873197317123413, "loss": 0.5587, "nll_loss": 0.13966771960258484, "rewards/accuracies": 1.0, "rewards/chosen": -5.055024303146638e-05, "rewards/margins": 0.1872691959142685, "rewards/rejected": -0.18731975555419922, "step": 9233 }, { "epoch": 6.385892116182573, "grad_norm": 7.534361362457275, "learning_rate": 2.0078377132319043e-05, "log_odds_chosen": 10.918198585510254, "log_odds_ratio": -0.0002446550060994923, "logits/chosen": -0.5140203833580017, "logits/rejected": -0.5069654583930969, "logps/chosen": -0.0003034729161299765, "logps/rejected": -2.358905076980591, "loss": 0.5943, "nll_loss": 0.14855839312076569, "rewards/accuracies": 1.0, "rewards/chosen": -3.0347293431987055e-05, "rewards/margins": 0.2358601689338684, "rewards/rejected": -0.23589050769805908, "step": 9234 }, { "epoch": 6.3865836791148, "grad_norm": 7.286149501800537, "learning_rate": 2.0074535116028892e-05, "log_odds_chosen": 10.955660820007324, "log_odds_ratio": -0.001999650150537491, "logits/chosen": -0.2171625941991806, "logits/rejected": -0.2073816955089569, "logps/chosen": -0.0009690782171674073, "logps/rejected": -3.253391742706299, "loss": 0.7628, "nll_loss": 0.19049334526062012, "rewards/accuracies": 1.0, "rewards/chosen": -9.6907839179039e-05, "rewards/margins": 0.325242280960083, "rewards/rejected": -0.3253391981124878, "step": 9235 }, { "epoch": 6.387275242047027, "grad_norm": 4.016501426696777, "learning_rate": 2.0070693099738744e-05, "log_odds_chosen": 10.702842712402344, "log_odds_ratio": -0.00010885380470426753, "logits/chosen": 0.09817785024642944, "logits/rejected": 0.017373614013195038, "logps/chosen": -0.00010006909724324942, "logps/rejected": -1.8319261074066162, "loss": 0.6437, "nll_loss": 0.16091807186603546, "rewards/accuracies": 1.0, "rewards/chosen": -1.0006910088122822e-05, "rewards/margins": 0.18318259716033936, "rewards/rejected": -0.18319261074066162, "step": 9236 }, { "epoch": 6.3879668049792535, "grad_norm": 7.597879886627197, "learning_rate": 2.0066851083448597e-05, "log_odds_chosen": 9.945055961608887, "log_odds_ratio": -0.0001487003028159961, "logits/chosen": -0.3875085115432739, "logits/rejected": -0.47611644864082336, "logps/chosen": -0.00034226285060867667, "logps/rejected": -1.6458823680877686, "loss": 0.5198, "nll_loss": 0.12993381917476654, "rewards/accuracies": 1.0, "rewards/chosen": -3.422628651605919e-05, "rewards/margins": 0.16455401480197906, "rewards/rejected": -0.16458824276924133, "step": 9237 }, { "epoch": 6.38865836791148, "grad_norm": 5.411985397338867, "learning_rate": 2.0063009067158446e-05, "log_odds_chosen": 10.670639038085938, "log_odds_ratio": -8.757206524023786e-05, "logits/chosen": -0.24942684173583984, "logits/rejected": -0.2994964122772217, "logps/chosen": -0.000738327216822654, "logps/rejected": -2.4431498050689697, "loss": 0.6914, "nll_loss": 0.17284391820430756, "rewards/accuracies": 1.0, "rewards/chosen": -7.383272895822302e-05, "rewards/margins": 0.2442411482334137, "rewards/rejected": -0.2443149983882904, "step": 9238 }, { "epoch": 6.389349930843707, "grad_norm": 4.901096820831299, "learning_rate": 2.0059167050868295e-05, "log_odds_chosen": 11.94849967956543, "log_odds_ratio": -9.714612679090351e-05, "logits/chosen": 0.11094458401203156, "logits/rejected": -0.12741082906723022, "logps/chosen": -0.0005755483289249241, "logps/rejected": -3.0402350425720215, "loss": 0.448, "nll_loss": 0.11199948936700821, "rewards/accuracies": 1.0, "rewards/chosen": -5.7554832892492414e-05, "rewards/margins": 0.3039659261703491, "rewards/rejected": -0.30402350425720215, "step": 9239 }, { "epoch": 6.390041493775934, "grad_norm": 9.04263687133789, "learning_rate": 2.0055325034578147e-05, "log_odds_chosen": 10.631397247314453, "log_odds_ratio": -6.708302680635825e-05, "logits/chosen": -0.48490309715270996, "logits/rejected": -0.5478677153587341, "logps/chosen": -0.00012463021266739815, "logps/rejected": -1.730746865272522, "loss": 0.6251, "nll_loss": 0.15627357363700867, "rewards/accuracies": 1.0, "rewards/chosen": -1.2463022358133458e-05, "rewards/margins": 0.17306222021579742, "rewards/rejected": -0.17307469248771667, "step": 9240 }, { "epoch": 6.390733056708161, "grad_norm": 6.076363563537598, "learning_rate": 2.0051483018287997e-05, "log_odds_chosen": 10.133882522583008, "log_odds_ratio": -0.0001550958986626938, "logits/chosen": -0.24662718176841736, "logits/rejected": -0.3362925052642822, "logps/chosen": -0.0001913850283017382, "logps/rejected": -1.8486599922180176, "loss": 0.8633, "nll_loss": 0.21581153571605682, "rewards/accuracies": 1.0, "rewards/chosen": -1.913850246637594e-05, "rewards/margins": 0.1848468780517578, "rewards/rejected": -0.1848660111427307, "step": 9241 }, { "epoch": 6.391424619640388, "grad_norm": 4.970860481262207, "learning_rate": 2.004764100199785e-05, "log_odds_chosen": 10.41044807434082, "log_odds_ratio": -0.0003214046882931143, "logits/chosen": -0.41268280148506165, "logits/rejected": -0.39346709847450256, "logps/chosen": -0.00024544625193811953, "logps/rejected": -1.7044298648834229, "loss": 0.5116, "nll_loss": 0.12785673141479492, "rewards/accuracies": 1.0, "rewards/chosen": -2.454462446621619e-05, "rewards/margins": 0.17041844129562378, "rewards/rejected": -0.17044298350811005, "step": 9242 }, { "epoch": 6.3921161825726145, "grad_norm": 9.744050025939941, "learning_rate": 2.00437989857077e-05, "log_odds_chosen": 9.98155403137207, "log_odds_ratio": -7.244835433084518e-05, "logits/chosen": -0.5653132200241089, "logits/rejected": -0.6456999182701111, "logps/chosen": -0.0005318495677784085, "logps/rejected": -1.479022741317749, "loss": 0.5747, "nll_loss": 0.14367491006851196, "rewards/accuracies": 1.0, "rewards/chosen": -5.3184958233032376e-05, "rewards/margins": 0.14784908294677734, "rewards/rejected": -0.14790228009223938, "step": 9243 }, { "epoch": 6.392807745504841, "grad_norm": 11.613505363464355, "learning_rate": 2.003995696941755e-05, "log_odds_chosen": 10.732831954956055, "log_odds_ratio": -0.0001225114392582327, "logits/chosen": -0.5981311202049255, "logits/rejected": -0.6376551985740662, "logps/chosen": -0.0002539186389185488, "logps/rejected": -2.253077507019043, "loss": 0.4829, "nll_loss": 0.12071920931339264, "rewards/accuracies": 1.0, "rewards/chosen": -2.539186243666336e-05, "rewards/margins": 0.22528235614299774, "rewards/rejected": -0.22530776262283325, "step": 9244 }, { "epoch": 6.393499308437068, "grad_norm": 6.729595184326172, "learning_rate": 2.0036114953127403e-05, "log_odds_chosen": 12.00716495513916, "log_odds_ratio": -3.18633065035101e-05, "logits/chosen": -0.4734930694103241, "logits/rejected": -0.3904078006744385, "logps/chosen": -0.000660967780277133, "logps/rejected": -3.9002439975738525, "loss": 0.7588, "nll_loss": 0.18970799446105957, "rewards/accuracies": 1.0, "rewards/chosen": -6.609678530367091e-05, "rewards/margins": 0.38995829224586487, "rewards/rejected": -0.39002442359924316, "step": 9245 }, { "epoch": 6.394190871369295, "grad_norm": 8.777599334716797, "learning_rate": 2.0032272936837255e-05, "log_odds_chosen": 10.585710525512695, "log_odds_ratio": -6.461291923187673e-05, "logits/chosen": 0.009615451097488403, "logits/rejected": -0.06416276097297668, "logps/chosen": -0.0005693895509466529, "logps/rejected": -2.4563021659851074, "loss": 0.5749, "nll_loss": 0.14370794594287872, "rewards/accuracies": 1.0, "rewards/chosen": -5.693895218428224e-05, "rewards/margins": 0.2455732822418213, "rewards/rejected": -0.24563023447990417, "step": 9246 }, { "epoch": 6.394882434301522, "grad_norm": 5.206984996795654, "learning_rate": 2.0028430920547104e-05, "log_odds_chosen": 10.491327285766602, "log_odds_ratio": -9.465732728131115e-05, "logits/chosen": -0.8148977756500244, "logits/rejected": -0.8432801961898804, "logps/chosen": -0.00031137533369474113, "logps/rejected": -1.946422815322876, "loss": 0.7206, "nll_loss": 0.18014222383499146, "rewards/accuracies": 1.0, "rewards/chosen": -3.113753336947411e-05, "rewards/margins": 0.19461116194725037, "rewards/rejected": -0.19464229047298431, "step": 9247 }, { "epoch": 6.395573997233749, "grad_norm": 6.629668235778809, "learning_rate": 2.0024588904256953e-05, "log_odds_chosen": 10.25268840789795, "log_odds_ratio": -0.00024983941693790257, "logits/chosen": -0.48796379566192627, "logits/rejected": -0.6104187965393066, "logps/chosen": -0.000225244730245322, "logps/rejected": -1.7549493312835693, "loss": 0.5557, "nll_loss": 0.13889212906360626, "rewards/accuracies": 1.0, "rewards/chosen": -2.25244730245322e-05, "rewards/margins": 0.1754724234342575, "rewards/rejected": -0.1754949390888214, "step": 9248 }, { "epoch": 6.3962655601659755, "grad_norm": 9.717963218688965, "learning_rate": 2.0020746887966806e-05, "log_odds_chosen": 10.322273254394531, "log_odds_ratio": -0.0001579874224262312, "logits/chosen": -0.3825463652610779, "logits/rejected": -0.25651872158050537, "logps/chosen": -0.00029214590904302895, "logps/rejected": -1.7719712257385254, "loss": 0.5939, "nll_loss": 0.1484469473361969, "rewards/accuracies": 1.0, "rewards/chosen": -2.9214590540505014e-05, "rewards/margins": 0.17716792225837708, "rewards/rejected": -0.1771971434354782, "step": 9249 }, { "epoch": 6.396957123098202, "grad_norm": 9.360939025878906, "learning_rate": 2.0016904871676655e-05, "log_odds_chosen": 11.585420608520508, "log_odds_ratio": -1.1413669199100696e-05, "logits/chosen": -0.4388987421989441, "logits/rejected": -0.5015350580215454, "logps/chosen": -0.00016019078611861914, "logps/rejected": -2.4639031887054443, "loss": 0.4223, "nll_loss": 0.10557594150304794, "rewards/accuracies": 1.0, "rewards/chosen": -1.6019079339457676e-05, "rewards/margins": 0.24637427926063538, "rewards/rejected": -0.24639031291007996, "step": 9250 }, { "epoch": 6.397648686030429, "grad_norm": 9.76177978515625, "learning_rate": 2.0013062855386507e-05, "log_odds_chosen": 10.462821960449219, "log_odds_ratio": -0.0002514673105906695, "logits/chosen": -0.6369720697402954, "logits/rejected": -0.6475129127502441, "logps/chosen": -0.0008802501251921058, "logps/rejected": -2.4775772094726562, "loss": 0.6221, "nll_loss": 0.155501589179039, "rewards/accuracies": 1.0, "rewards/chosen": -8.802501542959362e-05, "rewards/margins": 0.24766971170902252, "rewards/rejected": -0.24775774776935577, "step": 9251 }, { "epoch": 6.398340248962656, "grad_norm": 7.950948238372803, "learning_rate": 2.000922083909636e-05, "log_odds_chosen": 11.220458984375, "log_odds_ratio": -0.0001685236784396693, "logits/chosen": -0.33944761753082275, "logits/rejected": -0.4309898614883423, "logps/chosen": -0.000930731650441885, "logps/rejected": -3.331479787826538, "loss": 0.9565, "nll_loss": 0.23909664154052734, "rewards/accuracies": 1.0, "rewards/chosen": -9.307316213380545e-05, "rewards/margins": 0.33305490016937256, "rewards/rejected": -0.33314797282218933, "step": 9252 }, { "epoch": 6.399031811894883, "grad_norm": 9.884614944458008, "learning_rate": 2.000537882280621e-05, "log_odds_chosen": 10.504169464111328, "log_odds_ratio": -0.00013753658276982605, "logits/chosen": -0.39223068952560425, "logits/rejected": -0.3681015372276306, "logps/chosen": -0.00023119246179703623, "logps/rejected": -2.302983045578003, "loss": 0.5224, "nll_loss": 0.13057824969291687, "rewards/accuracies": 1.0, "rewards/chosen": -2.3119247998693027e-05, "rewards/margins": 0.23027518391609192, "rewards/rejected": -0.23029831051826477, "step": 9253 }, { "epoch": 6.39972337482711, "grad_norm": 5.766773700714111, "learning_rate": 2.000153680651606e-05, "log_odds_chosen": 10.745979309082031, "log_odds_ratio": -0.00012643210357055068, "logits/chosen": -0.6153202056884766, "logits/rejected": -0.634486198425293, "logps/chosen": -0.0003257495700381696, "logps/rejected": -2.0734894275665283, "loss": 0.4582, "nll_loss": 0.11454488337039948, "rewards/accuracies": 1.0, "rewards/chosen": -3.257495700381696e-05, "rewards/margins": 0.20731636881828308, "rewards/rejected": -0.20734894275665283, "step": 9254 }, { "epoch": 6.4004149377593365, "grad_norm": 36.044620513916016, "learning_rate": 1.9997694790225914e-05, "log_odds_chosen": 10.067339897155762, "log_odds_ratio": -0.00020148635667283088, "logits/chosen": -0.5717883706092834, "logits/rejected": -0.5887860059738159, "logps/chosen": -0.00023326711379922926, "logps/rejected": -1.9287145137786865, "loss": 0.5745, "nll_loss": 0.14359921216964722, "rewards/accuracies": 1.0, "rewards/chosen": -2.332671283511445e-05, "rewards/margins": 0.1928481161594391, "rewards/rejected": -0.19287145137786865, "step": 9255 }, { "epoch": 6.401106500691563, "grad_norm": 5.805153846740723, "learning_rate": 1.9993852773935763e-05, "log_odds_chosen": 11.110633850097656, "log_odds_ratio": -2.4092261810437776e-05, "logits/chosen": -0.10224826633930206, "logits/rejected": -0.2345043420791626, "logps/chosen": -0.0004690833739005029, "logps/rejected": -2.6470563411712646, "loss": 0.5916, "nll_loss": 0.14789032936096191, "rewards/accuracies": 1.0, "rewards/chosen": -4.6908338845241815e-05, "rewards/margins": 0.26465874910354614, "rewards/rejected": -0.2647056579589844, "step": 9256 }, { "epoch": 6.40179806362379, "grad_norm": 8.75613021850586, "learning_rate": 1.9990010757645615e-05, "log_odds_chosen": 10.300285339355469, "log_odds_ratio": -8.09316334198229e-05, "logits/chosen": -0.6939583420753479, "logits/rejected": -0.8330811858177185, "logps/chosen": -0.0003144872607663274, "logps/rejected": -1.9681203365325928, "loss": 0.5557, "nll_loss": 0.1389131098985672, "rewards/accuracies": 1.0, "rewards/chosen": -3.144872607663274e-05, "rewards/margins": 0.19678059220314026, "rewards/rejected": -0.19681203365325928, "step": 9257 }, { "epoch": 6.402489626556017, "grad_norm": 6.8936004638671875, "learning_rate": 1.9986168741355464e-05, "log_odds_chosen": 11.252097129821777, "log_odds_ratio": -3.741459295270033e-05, "logits/chosen": -0.7355087399482727, "logits/rejected": -0.8028329610824585, "logps/chosen": -0.00016158061043825, "logps/rejected": -2.471592426300049, "loss": 0.4441, "nll_loss": 0.11102715134620667, "rewards/accuracies": 1.0, "rewards/chosen": -1.6158061043825e-05, "rewards/margins": 0.24714305996894836, "rewards/rejected": -0.2471592277288437, "step": 9258 }, { "epoch": 6.403181189488244, "grad_norm": 7.777023792266846, "learning_rate": 1.9982326725065313e-05, "log_odds_chosen": 10.736876487731934, "log_odds_ratio": -0.000112594869278837, "logits/chosen": -0.5803585648536682, "logits/rejected": -0.5270214080810547, "logps/chosen": -0.00024174893042072654, "logps/rejected": -2.451173782348633, "loss": 0.6914, "nll_loss": 0.17283624410629272, "rewards/accuracies": 1.0, "rewards/chosen": -2.4174894861062057e-05, "rewards/margins": 0.2450932115316391, "rewards/rejected": -0.24511736631393433, "step": 9259 }, { "epoch": 6.403872752420471, "grad_norm": 7.646719455718994, "learning_rate": 1.9978484708775166e-05, "log_odds_chosen": 11.0280179977417, "log_odds_ratio": -4.2771946027642116e-05, "logits/chosen": -0.3071643114089966, "logits/rejected": -0.3612262010574341, "logps/chosen": -0.00020295185095164925, "logps/rejected": -2.3706865310668945, "loss": 0.5651, "nll_loss": 0.14127621054649353, "rewards/accuracies": 1.0, "rewards/chosen": -2.029518691415433e-05, "rewards/margins": 0.23704838752746582, "rewards/rejected": -0.23706866800785065, "step": 9260 }, { "epoch": 6.404564315352697, "grad_norm": 6.2436723709106445, "learning_rate": 1.9974642692485018e-05, "log_odds_chosen": 10.827913284301758, "log_odds_ratio": -2.2066273231757805e-05, "logits/chosen": -0.5114313960075378, "logits/rejected": -0.5310375690460205, "logps/chosen": -0.0001401654299115762, "logps/rejected": -1.8054132461547852, "loss": 0.4848, "nll_loss": 0.12118765711784363, "rewards/accuracies": 1.0, "rewards/chosen": -1.4016542991157621e-05, "rewards/margins": 0.18052731454372406, "rewards/rejected": -0.18054133653640747, "step": 9261 }, { "epoch": 6.405255878284924, "grad_norm": 5.233295917510986, "learning_rate": 1.9970800676194867e-05, "log_odds_chosen": 11.959897994995117, "log_odds_ratio": -5.2490322559606284e-05, "logits/chosen": -0.33044326305389404, "logits/rejected": -0.49339759349823, "logps/chosen": -0.00012844899902120233, "logps/rejected": -3.1751294136047363, "loss": 0.5452, "nll_loss": 0.1362866759300232, "rewards/accuracies": 1.0, "rewards/chosen": -1.2844900084019173e-05, "rewards/margins": 0.3175000846385956, "rewards/rejected": -0.3175129294395447, "step": 9262 }, { "epoch": 6.405947441217151, "grad_norm": 4.386822700500488, "learning_rate": 1.996695865990472e-05, "log_odds_chosen": 10.40085506439209, "log_odds_ratio": -0.00021573121193796396, "logits/chosen": -0.4487634301185608, "logits/rejected": -0.4953088164329529, "logps/chosen": -0.000500406080391258, "logps/rejected": -2.1370983123779297, "loss": 0.3764, "nll_loss": 0.09408355504274368, "rewards/accuracies": 1.0, "rewards/chosen": -5.004060949431732e-05, "rewards/margins": 0.21365980803966522, "rewards/rejected": -0.21370983123779297, "step": 9263 }, { "epoch": 6.406639004149378, "grad_norm": 12.913554191589355, "learning_rate": 1.9963116643614572e-05, "log_odds_chosen": 11.511303901672363, "log_odds_ratio": -5.697936285287142e-05, "logits/chosen": -0.5148541927337646, "logits/rejected": -0.5149716138839722, "logps/chosen": -0.000354190357029438, "logps/rejected": -3.1875712871551514, "loss": 0.6385, "nll_loss": 0.159611314535141, "rewards/accuracies": 1.0, "rewards/chosen": -3.5419037885731086e-05, "rewards/margins": 0.3187217116355896, "rewards/rejected": -0.31875714659690857, "step": 9264 }, { "epoch": 6.407330567081605, "grad_norm": 5.047789096832275, "learning_rate": 1.995927462732442e-05, "log_odds_chosen": 11.870508193969727, "log_odds_ratio": -1.1209338481421582e-05, "logits/chosen": -0.1910778284072876, "logits/rejected": -0.3116176724433899, "logps/chosen": -0.0001415059232385829, "logps/rejected": -2.6769137382507324, "loss": 0.5289, "nll_loss": 0.13223110139369965, "rewards/accuracies": 1.0, "rewards/chosen": -1.4150593415251933e-05, "rewards/margins": 0.2676772475242615, "rewards/rejected": -0.26769137382507324, "step": 9265 }, { "epoch": 6.408022130013832, "grad_norm": 4.988502025604248, "learning_rate": 1.9955432611034274e-05, "log_odds_chosen": 10.377761840820312, "log_odds_ratio": -4.9067879444919527e-05, "logits/chosen": -0.2553957402706146, "logits/rejected": -0.24770502746105194, "logps/chosen": -0.00022306838945951313, "logps/rejected": -1.7231662273406982, "loss": 0.5945, "nll_loss": 0.14862555265426636, "rewards/accuracies": 1.0, "rewards/chosen": -2.230684185633436e-05, "rewards/margins": 0.17229431867599487, "rewards/rejected": -0.17231664061546326, "step": 9266 }, { "epoch": 6.408713692946058, "grad_norm": 6.566638469696045, "learning_rate": 1.9951590594744123e-05, "log_odds_chosen": 11.166705131530762, "log_odds_ratio": -4.125917257624678e-05, "logits/chosen": -0.2326575368642807, "logits/rejected": -0.1612112820148468, "logps/chosen": -0.00021289799769874662, "logps/rejected": -2.609964370727539, "loss": 0.6125, "nll_loss": 0.15311162173748016, "rewards/accuracies": 1.0, "rewards/chosen": -2.128980304405559e-05, "rewards/margins": 0.2609751522541046, "rewards/rejected": -0.2609964609146118, "step": 9267 }, { "epoch": 6.409405255878285, "grad_norm": 7.873584747314453, "learning_rate": 1.9947748578453972e-05, "log_odds_chosen": 10.430408477783203, "log_odds_ratio": -8.551737118978053e-05, "logits/chosen": -0.7652373313903809, "logits/rejected": -0.8368032574653625, "logps/chosen": -0.00012079392035957426, "logps/rejected": -1.5141304731369019, "loss": 0.6633, "nll_loss": 0.1658088117837906, "rewards/accuracies": 1.0, "rewards/chosen": -1.2079392945452128e-05, "rewards/margins": 0.15140098333358765, "rewards/rejected": -0.15141305327415466, "step": 9268 }, { "epoch": 6.410096818810512, "grad_norm": 8.773344039916992, "learning_rate": 1.9943906562163824e-05, "log_odds_chosen": 10.397517204284668, "log_odds_ratio": -0.00030735606560483575, "logits/chosen": -0.22622643411159515, "logits/rejected": -0.39059799909591675, "logps/chosen": -0.00034772863727994263, "logps/rejected": -2.125950574874878, "loss": 0.5186, "nll_loss": 0.1296141892671585, "rewards/accuracies": 1.0, "rewards/chosen": -3.477286372799426e-05, "rewards/margins": 0.21256029605865479, "rewards/rejected": -0.21259507536888123, "step": 9269 }, { "epoch": 6.410788381742739, "grad_norm": 12.695683479309082, "learning_rate": 1.9940064545873677e-05, "log_odds_chosen": 11.041482925415039, "log_odds_ratio": -0.0006497156573459506, "logits/chosen": 0.3386712968349457, "logits/rejected": 0.1383814960718155, "logps/chosen": -0.0004824312636628747, "logps/rejected": -2.4895670413970947, "loss": 0.8336, "nll_loss": 0.2083456963300705, "rewards/accuracies": 1.0, "rewards/chosen": -4.82431314594578e-05, "rewards/margins": 0.24890847504138947, "rewards/rejected": -0.24895671010017395, "step": 9270 }, { "epoch": 6.411479944674966, "grad_norm": 7.70023250579834, "learning_rate": 1.9936222529583526e-05, "log_odds_chosen": 11.177961349487305, "log_odds_ratio": -9.007647167891264e-05, "logits/chosen": -0.5466927289962769, "logits/rejected": -0.5063419938087463, "logps/chosen": -0.00019104511011391878, "logps/rejected": -2.687896728515625, "loss": 0.75, "nll_loss": 0.18748390674591064, "rewards/accuracies": 1.0, "rewards/chosen": -1.9104511011391878e-05, "rewards/margins": 0.26877057552337646, "rewards/rejected": -0.268789678812027, "step": 9271 }, { "epoch": 6.412171507607193, "grad_norm": 6.924728870391846, "learning_rate": 1.9932380513293378e-05, "log_odds_chosen": 10.738670349121094, "log_odds_ratio": -0.00016212818445637822, "logits/chosen": -0.2753411829471588, "logits/rejected": -0.17702896893024445, "logps/chosen": -0.0005732090794481337, "logps/rejected": -2.4172887802124023, "loss": 0.8917, "nll_loss": 0.22289781272411346, "rewards/accuracies": 1.0, "rewards/chosen": -5.73209035792388e-05, "rewards/margins": 0.24167153239250183, "rewards/rejected": -0.24172884225845337, "step": 9272 }, { "epoch": 6.412863070539419, "grad_norm": 14.814724922180176, "learning_rate": 1.992853849700323e-05, "log_odds_chosen": 11.559272766113281, "log_odds_ratio": -1.548027285025455e-05, "logits/chosen": -0.89446622133255, "logits/rejected": -1.0412811040878296, "logps/chosen": -0.0004175920912530273, "logps/rejected": -2.7191271781921387, "loss": 0.7141, "nll_loss": 0.1785222291946411, "rewards/accuracies": 1.0, "rewards/chosen": -4.1759216401260346e-05, "rewards/margins": 0.2718709707260132, "rewards/rejected": -0.27191272377967834, "step": 9273 }, { "epoch": 6.413554633471646, "grad_norm": 8.348380088806152, "learning_rate": 1.992469648071308e-05, "log_odds_chosen": 10.117218017578125, "log_odds_ratio": -0.00013753073289990425, "logits/chosen": -0.4123270809650421, "logits/rejected": -0.4688197374343872, "logps/chosen": -0.0002495943335816264, "logps/rejected": -1.7303459644317627, "loss": 0.7288, "nll_loss": 0.18217617273330688, "rewards/accuracies": 1.0, "rewards/chosen": -2.4959434085758403e-05, "rewards/margins": 0.17300963401794434, "rewards/rejected": -0.17303459346294403, "step": 9274 }, { "epoch": 6.414246196403873, "grad_norm": 8.611536979675293, "learning_rate": 1.9920854464422932e-05, "log_odds_chosen": 10.415322303771973, "log_odds_ratio": -8.811524457996711e-05, "logits/chosen": -0.3573923707008362, "logits/rejected": -0.4499669671058655, "logps/chosen": -0.000698216026648879, "logps/rejected": -2.2807555198669434, "loss": 0.5595, "nll_loss": 0.1398547887802124, "rewards/accuracies": 1.0, "rewards/chosen": -6.98216026648879e-05, "rewards/margins": 0.22800573706626892, "rewards/rejected": -0.22807557880878448, "step": 9275 }, { "epoch": 6.4149377593361, "grad_norm": 5.704978942871094, "learning_rate": 1.991701244813278e-05, "log_odds_chosen": 11.306905746459961, "log_odds_ratio": -1.6761072402005084e-05, "logits/chosen": 0.09830937534570694, "logits/rejected": 0.004683436825871468, "logps/chosen": -0.00013964908430352807, "logps/rejected": -2.4486541748046875, "loss": 0.5404, "nll_loss": 0.13511033356189728, "rewards/accuracies": 1.0, "rewards/chosen": -1.3964909157948568e-05, "rewards/margins": 0.2448514699935913, "rewards/rejected": -0.24486540257930756, "step": 9276 }, { "epoch": 6.415629322268327, "grad_norm": 6.354818820953369, "learning_rate": 1.991317043184263e-05, "log_odds_chosen": 9.531805992126465, "log_odds_ratio": -0.012731587514281273, "logits/chosen": -0.15848901867866516, "logits/rejected": -0.299731969833374, "logps/chosen": -0.0038687651976943016, "logps/rejected": -1.9498724937438965, "loss": 1.323, "nll_loss": 0.3294808566570282, "rewards/accuracies": 1.0, "rewards/chosen": -0.00038687651976943016, "rewards/margins": 0.1946004033088684, "rewards/rejected": -0.19498726725578308, "step": 9277 }, { "epoch": 6.4163208852005535, "grad_norm": 7.357949256896973, "learning_rate": 1.9909328415552483e-05, "log_odds_chosen": 8.946401596069336, "log_odds_ratio": -0.0028267528396099806, "logits/chosen": -0.25857284665107727, "logits/rejected": -0.4397861361503601, "logps/chosen": -0.0020083796698600054, "logps/rejected": -1.6771559715270996, "loss": 1.0644, "nll_loss": 0.2658079266548157, "rewards/accuracies": 1.0, "rewards/chosen": -0.0002008379524340853, "rewards/margins": 0.16751474142074585, "rewards/rejected": -0.16771559417247772, "step": 9278 }, { "epoch": 6.41701244813278, "grad_norm": 6.2034687995910645, "learning_rate": 1.9905486399262332e-05, "log_odds_chosen": 11.08052921295166, "log_odds_ratio": -3.313586785225198e-05, "logits/chosen": -0.37916818261146545, "logits/rejected": -0.47911304235458374, "logps/chosen": -0.00012295949272811413, "logps/rejected": -2.0425448417663574, "loss": 0.393, "nll_loss": 0.0982578918337822, "rewards/accuracies": 1.0, "rewards/chosen": -1.2295950909901876e-05, "rewards/margins": 0.20424219965934753, "rewards/rejected": -0.20425450801849365, "step": 9279 }, { "epoch": 6.417704011065007, "grad_norm": 7.049955368041992, "learning_rate": 1.9901644382972184e-05, "log_odds_chosen": 11.446515083312988, "log_odds_ratio": -3.128893149551004e-05, "logits/chosen": -0.42859339714050293, "logits/rejected": -0.3936436176300049, "logps/chosen": -0.0002233328705187887, "logps/rejected": -2.4475386142730713, "loss": 0.7558, "nll_loss": 0.18894362449645996, "rewards/accuracies": 1.0, "rewards/chosen": -2.2333286324283108e-05, "rewards/margins": 0.24473154544830322, "rewards/rejected": -0.2447538673877716, "step": 9280 }, { "epoch": 6.418395573997234, "grad_norm": 6.746326923370361, "learning_rate": 1.9897802366682037e-05, "log_odds_chosen": 10.88402271270752, "log_odds_ratio": -3.941710019716993e-05, "logits/chosen": -0.1344437301158905, "logits/rejected": -0.2331140637397766, "logps/chosen": -0.0003557455202098936, "logps/rejected": -2.0126137733459473, "loss": 0.7242, "nll_loss": 0.18103933334350586, "rewards/accuracies": 1.0, "rewards/chosen": -3.5574550565797836e-05, "rewards/margins": 0.20122580230236053, "rewards/rejected": -0.20126137137413025, "step": 9281 }, { "epoch": 6.419087136929461, "grad_norm": 11.59501838684082, "learning_rate": 1.9893960350391886e-05, "log_odds_chosen": 10.503249168395996, "log_odds_ratio": -8.245335629908368e-05, "logits/chosen": -0.44896990060806274, "logits/rejected": -0.6239047646522522, "logps/chosen": -0.0002727765531744808, "logps/rejected": -1.7392630577087402, "loss": 0.4511, "nll_loss": 0.11275888979434967, "rewards/accuracies": 1.0, "rewards/chosen": -2.7277654226054437e-05, "rewards/margins": 0.17389902472496033, "rewards/rejected": -0.17392629384994507, "step": 9282 }, { "epoch": 6.419778699861688, "grad_norm": 7.3511881828308105, "learning_rate": 1.9890118334101738e-05, "log_odds_chosen": 11.662189483642578, "log_odds_ratio": -1.938941204571165e-05, "logits/chosen": -0.4689757823944092, "logits/rejected": -0.5246522426605225, "logps/chosen": -0.000340028025675565, "logps/rejected": -3.2328014373779297, "loss": 1.2258, "nll_loss": 0.30644235014915466, "rewards/accuracies": 1.0, "rewards/chosen": -3.4002805477939546e-05, "rewards/margins": 0.32324618101119995, "rewards/rejected": -0.3232801556587219, "step": 9283 }, { "epoch": 6.4204702627939145, "grad_norm": 11.929098129272461, "learning_rate": 1.988627631781159e-05, "log_odds_chosen": 10.89908504486084, "log_odds_ratio": -9.280510130338371e-05, "logits/chosen": -0.3836508095264435, "logits/rejected": -0.4062102138996124, "logps/chosen": -0.0006549840909428895, "logps/rejected": -2.7214956283569336, "loss": 0.6892, "nll_loss": 0.1723027229309082, "rewards/accuracies": 1.0, "rewards/chosen": -6.549841054948047e-05, "rewards/margins": 0.2720840573310852, "rewards/rejected": -0.27214956283569336, "step": 9284 }, { "epoch": 6.421161825726141, "grad_norm": 6.852628707885742, "learning_rate": 1.988243430152144e-05, "log_odds_chosen": 12.121063232421875, "log_odds_ratio": -8.600990440754686e-06, "logits/chosen": -0.20669779181480408, "logits/rejected": -0.30786818265914917, "logps/chosen": -6.404731539078057e-05, "logps/rejected": -2.255523681640625, "loss": 0.4526, "nll_loss": 0.11315148323774338, "rewards/accuracies": 1.0, "rewards/chosen": -6.404731720976997e-06, "rewards/margins": 0.2255459725856781, "rewards/rejected": -0.22555235028266907, "step": 9285 }, { "epoch": 6.421853388658368, "grad_norm": 5.463398456573486, "learning_rate": 1.987859228523129e-05, "log_odds_chosen": 10.10585880279541, "log_odds_ratio": -0.00014293566346168518, "logits/chosen": -0.45323115587234497, "logits/rejected": -0.5202012658119202, "logps/chosen": -0.0009458367712795734, "logps/rejected": -2.0295233726501465, "loss": 0.4648, "nll_loss": 0.11618015170097351, "rewards/accuracies": 1.0, "rewards/chosen": -9.458368003834039e-05, "rewards/margins": 0.20285777747631073, "rewards/rejected": -0.20295235514640808, "step": 9286 }, { "epoch": 6.422544951590595, "grad_norm": 8.183695793151855, "learning_rate": 1.987475026894114e-05, "log_odds_chosen": 10.048669815063477, "log_odds_ratio": -0.00015383576101157814, "logits/chosen": -0.2582641839981079, "logits/rejected": -0.39592811465263367, "logps/chosen": -0.00028131093131378293, "logps/rejected": -1.7277061939239502, "loss": 0.5308, "nll_loss": 0.132684126496315, "rewards/accuracies": 1.0, "rewards/chosen": -2.8131093131378293e-05, "rewards/margins": 0.17274248600006104, "rewards/rejected": -0.1727706342935562, "step": 9287 }, { "epoch": 6.423236514522822, "grad_norm": 5.832282543182373, "learning_rate": 1.987090825265099e-05, "log_odds_chosen": 11.093450546264648, "log_odds_ratio": -2.3829081328585744e-05, "logits/chosen": -0.7500884532928467, "logits/rejected": -0.7326939702033997, "logps/chosen": -0.0003749387396965176, "logps/rejected": -2.4053335189819336, "loss": 0.5222, "nll_loss": 0.1305372714996338, "rewards/accuracies": 1.0, "rewards/chosen": -3.749387542484328e-05, "rewards/margins": 0.24049586057662964, "rewards/rejected": -0.24053336679935455, "step": 9288 }, { "epoch": 6.423928077455049, "grad_norm": 23.444679260253906, "learning_rate": 1.9867066236360843e-05, "log_odds_chosen": 11.406169891357422, "log_odds_ratio": -2.352497904212214e-05, "logits/chosen": -0.45594874024391174, "logits/rejected": -0.5880993008613586, "logps/chosen": -0.00014266757352743298, "logps/rejected": -2.4971399307250977, "loss": 0.8397, "nll_loss": 0.20991206169128418, "rewards/accuracies": 1.0, "rewards/chosen": -1.4266757716541179e-05, "rewards/margins": 0.24969972670078278, "rewards/rejected": -0.24971400201320648, "step": 9289 }, { "epoch": 6.4246196403872755, "grad_norm": 6.20634651184082, "learning_rate": 1.9863224220070695e-05, "log_odds_chosen": 11.279754638671875, "log_odds_ratio": -0.0001059678616002202, "logits/chosen": -0.9170388579368591, "logits/rejected": -0.9366865158081055, "logps/chosen": -0.00013711173960473388, "logps/rejected": -1.7270501852035522, "loss": 0.5417, "nll_loss": 0.13541099429130554, "rewards/accuracies": 1.0, "rewards/chosen": -1.3711173778574448e-05, "rewards/margins": 0.17269130051136017, "rewards/rejected": -0.1727050244808197, "step": 9290 }, { "epoch": 6.425311203319502, "grad_norm": 6.133997917175293, "learning_rate": 1.9859382203780544e-05, "log_odds_chosen": 9.219642639160156, "log_odds_ratio": -0.000537938205525279, "logits/chosen": -0.45818766951560974, "logits/rejected": -0.48580294847488403, "logps/chosen": -0.010992909781634808, "logps/rejected": -2.0333194732666016, "loss": 0.5356, "nll_loss": 0.13384385406970978, "rewards/accuracies": 1.0, "rewards/chosen": -0.0010992909083142877, "rewards/margins": 0.20223265886306763, "rewards/rejected": -0.20333196222782135, "step": 9291 }, { "epoch": 6.426002766251729, "grad_norm": 7.714879989624023, "learning_rate": 1.9855540187490397e-05, "log_odds_chosen": 11.284984588623047, "log_odds_ratio": -4.448912659427151e-05, "logits/chosen": -0.5899901390075684, "logits/rejected": -0.7108150720596313, "logps/chosen": -0.00018639791232999414, "logps/rejected": -2.3057572841644287, "loss": 0.9004, "nll_loss": 0.22508883476257324, "rewards/accuracies": 1.0, "rewards/chosen": -1.8639791960595176e-05, "rewards/margins": 0.23055709898471832, "rewards/rejected": -0.23057572543621063, "step": 9292 }, { "epoch": 6.426694329183956, "grad_norm": 8.799266815185547, "learning_rate": 1.985169817120025e-05, "log_odds_chosen": 10.905144691467285, "log_odds_ratio": -2.7711570510291494e-05, "logits/chosen": -0.4120444655418396, "logits/rejected": -0.4582875669002533, "logps/chosen": -0.00017896827193908393, "logps/rejected": -1.9567880630493164, "loss": 0.6326, "nll_loss": 0.15815678238868713, "rewards/accuracies": 1.0, "rewards/chosen": -1.7896827557706274e-05, "rewards/margins": 0.19566091895103455, "rewards/rejected": -0.19567880034446716, "step": 9293 }, { "epoch": 6.427385892116183, "grad_norm": 6.370955944061279, "learning_rate": 1.9847856154910098e-05, "log_odds_chosen": 11.097612380981445, "log_odds_ratio": -2.4163698981283233e-05, "logits/chosen": -0.4839726984500885, "logits/rejected": -0.49444663524627686, "logps/chosen": -0.00022879890457261354, "logps/rejected": -2.4734559059143066, "loss": 0.6235, "nll_loss": 0.15587235987186432, "rewards/accuracies": 1.0, "rewards/chosen": -2.2879890821059234e-05, "rewards/margins": 0.24732272326946259, "rewards/rejected": -0.24734559655189514, "step": 9294 }, { "epoch": 6.42807745504841, "grad_norm": 7.625823020935059, "learning_rate": 1.9844014138619947e-05, "log_odds_chosen": 10.773378372192383, "log_odds_ratio": -5.160564978723414e-05, "logits/chosen": -0.4517514407634735, "logits/rejected": -0.41671445965766907, "logps/chosen": -0.00014808705600444227, "logps/rejected": -2.0106725692749023, "loss": 0.8005, "nll_loss": 0.2001313418149948, "rewards/accuracies": 1.0, "rewards/chosen": -1.4808705600444227e-05, "rewards/margins": 0.2010524570941925, "rewards/rejected": -0.2010672688484192, "step": 9295 }, { "epoch": 6.4287690179806365, "grad_norm": 5.913774013519287, "learning_rate": 1.98401721223298e-05, "log_odds_chosen": 11.38464069366455, "log_odds_ratio": -2.319132545380853e-05, "logits/chosen": -0.3373313844203949, "logits/rejected": -0.39826345443725586, "logps/chosen": -0.00011273365089436993, "logps/rejected": -2.226011037826538, "loss": 0.4352, "nll_loss": 0.10878792405128479, "rewards/accuracies": 1.0, "rewards/chosen": -1.1273365089436993e-05, "rewards/margins": 0.22258983552455902, "rewards/rejected": -0.22260110080242157, "step": 9296 }, { "epoch": 6.429460580912863, "grad_norm": 5.648289680480957, "learning_rate": 1.983633010603965e-05, "log_odds_chosen": 10.220199584960938, "log_odds_ratio": -5.696108564734459e-05, "logits/chosen": -0.4742014408111572, "logits/rejected": -0.4896661043167114, "logps/chosen": -0.0002253315324196592, "logps/rejected": -1.7355661392211914, "loss": 0.3925, "nll_loss": 0.09811148792505264, "rewards/accuracies": 1.0, "rewards/chosen": -2.2533155060955323e-05, "rewards/margins": 0.1735340803861618, "rewards/rejected": -0.1735566258430481, "step": 9297 }, { "epoch": 6.43015214384509, "grad_norm": 7.391413688659668, "learning_rate": 1.98324880897495e-05, "log_odds_chosen": 10.479411125183105, "log_odds_ratio": -5.7051620387937874e-05, "logits/chosen": -0.1723538339138031, "logits/rejected": -0.15370287001132965, "logps/chosen": -0.0002791702572721988, "logps/rejected": -1.9361761808395386, "loss": 0.5332, "nll_loss": 0.13329452276229858, "rewards/accuracies": 1.0, "rewards/chosen": -2.7917027182411402e-05, "rewards/margins": 0.1935897171497345, "rewards/rejected": -0.19361764192581177, "step": 9298 }, { "epoch": 6.430843706777317, "grad_norm": 6.326645374298096, "learning_rate": 1.9828646073459354e-05, "log_odds_chosen": 10.112716674804688, "log_odds_ratio": -0.0008755337912589312, "logits/chosen": -0.3616921305656433, "logits/rejected": -0.3587180972099304, "logps/chosen": -0.002188085112720728, "logps/rejected": -1.7334392070770264, "loss": 0.9168, "nll_loss": 0.2291109263896942, "rewards/accuracies": 1.0, "rewards/chosen": -0.0002188085054513067, "rewards/margins": 0.17312513291835785, "rewards/rejected": -0.17334392666816711, "step": 9299 }, { "epoch": 6.431535269709544, "grad_norm": 11.17259693145752, "learning_rate": 1.9824804057169203e-05, "log_odds_chosen": 10.165445327758789, "log_odds_ratio": -0.002701385412365198, "logits/chosen": -0.1986115574836731, "logits/rejected": -0.2049403190612793, "logps/chosen": -0.0016654229257255793, "logps/rejected": -1.7589303255081177, "loss": 0.7622, "nll_loss": 0.19027338922023773, "rewards/accuracies": 1.0, "rewards/chosen": -0.00016654228966217488, "rewards/margins": 0.1757265031337738, "rewards/rejected": -0.17589303851127625, "step": 9300 }, { "epoch": 6.432226832641771, "grad_norm": 5.610093116760254, "learning_rate": 1.9820962040879055e-05, "log_odds_chosen": 11.30485725402832, "log_odds_ratio": -1.869337029347662e-05, "logits/chosen": -0.3075014352798462, "logits/rejected": -0.27221566438674927, "logps/chosen": -5.7261881011072546e-05, "logps/rejected": -1.7437981367111206, "loss": 0.4263, "nll_loss": 0.10656291246414185, "rewards/accuracies": 1.0, "rewards/chosen": -5.726188646804076e-06, "rewards/margins": 0.1743740737438202, "rewards/rejected": -0.17437982559204102, "step": 9301 }, { "epoch": 6.4329183955739975, "grad_norm": 5.6840643882751465, "learning_rate": 1.9817120024588907e-05, "log_odds_chosen": 8.59211540222168, "log_odds_ratio": -0.001855487353168428, "logits/chosen": -0.6209691166877747, "logits/rejected": -0.5917834639549255, "logps/chosen": -0.003198577556759119, "logps/rejected": -1.748227834701538, "loss": 0.6318, "nll_loss": 0.15775543451309204, "rewards/accuracies": 1.0, "rewards/chosen": -0.000319857761496678, "rewards/margins": 0.17450293898582458, "rewards/rejected": -0.17482279241085052, "step": 9302 }, { "epoch": 6.433609958506224, "grad_norm": 7.813873767852783, "learning_rate": 1.9813278008298757e-05, "log_odds_chosen": 10.65418815612793, "log_odds_ratio": -6.0141857829876244e-05, "logits/chosen": -0.4232789874076843, "logits/rejected": -0.4591125249862671, "logps/chosen": -0.00038247957127168775, "logps/rejected": -2.263458251953125, "loss": 0.4639, "nll_loss": 0.11595964431762695, "rewards/accuracies": 1.0, "rewards/chosen": -3.824795567197725e-05, "rewards/margins": 0.22630760073661804, "rewards/rejected": -0.22634583711624146, "step": 9303 }, { "epoch": 6.434301521438451, "grad_norm": 4.04008150100708, "learning_rate": 1.9809435992008606e-05, "log_odds_chosen": 10.580642700195312, "log_odds_ratio": -0.0004853243299294263, "logits/chosen": -0.3635895550251007, "logits/rejected": -0.44146040081977844, "logps/chosen": -0.0014031457249075174, "logps/rejected": -2.3424365520477295, "loss": 0.5206, "nll_loss": 0.13010403513908386, "rewards/accuracies": 1.0, "rewards/chosen": -0.00014031457249075174, "rewards/margins": 0.23410335183143616, "rewards/rejected": -0.23424366116523743, "step": 9304 }, { "epoch": 6.434993084370678, "grad_norm": 6.571130752563477, "learning_rate": 1.9805593975718458e-05, "log_odds_chosen": 10.493505477905273, "log_odds_ratio": -6.152471905807033e-05, "logits/chosen": -1.1643874645233154, "logits/rejected": -1.2527637481689453, "logps/chosen": -0.0003062770119868219, "logps/rejected": -2.2799320220947266, "loss": 0.6452, "nll_loss": 0.16130110621452332, "rewards/accuracies": 1.0, "rewards/chosen": -3.0627699743490666e-05, "rewards/margins": 0.22796256840229034, "rewards/rejected": -0.2279932051897049, "step": 9305 }, { "epoch": 6.435684647302905, "grad_norm": 4.19816780090332, "learning_rate": 1.9801751959428307e-05, "log_odds_chosen": 9.588232040405273, "log_odds_ratio": -0.00028488037060014904, "logits/chosen": -0.4710809588432312, "logits/rejected": -0.5234569311141968, "logps/chosen": -0.0009138531750068069, "logps/rejected": -2.053696632385254, "loss": 0.5372, "nll_loss": 0.1342787742614746, "rewards/accuracies": 1.0, "rewards/chosen": -9.138531459029764e-05, "rewards/margins": 0.20527824759483337, "rewards/rejected": -0.20536965131759644, "step": 9306 }, { "epoch": 6.436376210235132, "grad_norm": 6.288425445556641, "learning_rate": 1.979790994313816e-05, "log_odds_chosen": 10.086763381958008, "log_odds_ratio": -0.00023319364117924124, "logits/chosen": -0.4830077886581421, "logits/rejected": -0.5060534477233887, "logps/chosen": -0.0004371547547634691, "logps/rejected": -1.8741427659988403, "loss": 0.5413, "nll_loss": 0.13530413806438446, "rewards/accuracies": 1.0, "rewards/chosen": -4.371547402115539e-05, "rewards/margins": 0.18737053871154785, "rewards/rejected": -0.1874142587184906, "step": 9307 }, { "epoch": 6.4370677731673585, "grad_norm": 6.124855995178223, "learning_rate": 1.9794067926848012e-05, "log_odds_chosen": 10.023574829101562, "log_odds_ratio": -0.00033251961576752365, "logits/chosen": -0.39527204632759094, "logits/rejected": -0.48593708872795105, "logps/chosen": -0.0005362079245969653, "logps/rejected": -1.6085436344146729, "loss": 0.3435, "nll_loss": 0.08583007752895355, "rewards/accuracies": 1.0, "rewards/chosen": -5.362079537007958e-05, "rewards/margins": 0.1608007550239563, "rewards/rejected": -0.16085438430309296, "step": 9308 }, { "epoch": 6.437759336099585, "grad_norm": 5.4500837326049805, "learning_rate": 1.979022591055786e-05, "log_odds_chosen": 10.023755073547363, "log_odds_ratio": -8.400460501434281e-05, "logits/chosen": -0.1788664311170578, "logits/rejected": -0.30472058057785034, "logps/chosen": -0.00044232915388420224, "logps/rejected": -1.5639151334762573, "loss": 0.6181, "nll_loss": 0.1545056849718094, "rewards/accuracies": 1.0, "rewards/chosen": -4.423291466082446e-05, "rewards/margins": 0.15634728968143463, "rewards/rejected": -0.15639153122901917, "step": 9309 }, { "epoch": 6.438450899031812, "grad_norm": 8.140402793884277, "learning_rate": 1.9786383894267713e-05, "log_odds_chosen": 11.199501991271973, "log_odds_ratio": -2.9649016141775064e-05, "logits/chosen": -0.3762897849082947, "logits/rejected": -0.2551382780075073, "logps/chosen": -0.0001679428678471595, "logps/rejected": -2.0896825790405273, "loss": 0.7305, "nll_loss": 0.18261376023292542, "rewards/accuracies": 1.0, "rewards/chosen": -1.6794285329524428e-05, "rewards/margins": 0.2089514434337616, "rewards/rejected": -0.20896823704242706, "step": 9310 }, { "epoch": 6.439142461964039, "grad_norm": 6.742094993591309, "learning_rate": 1.9782541877977566e-05, "log_odds_chosen": 10.582254409790039, "log_odds_ratio": -5.873282498214394e-05, "logits/chosen": 0.10128258913755417, "logits/rejected": 0.13267558813095093, "logps/chosen": -0.0006081910105422139, "logps/rejected": -2.6717841625213623, "loss": 0.6592, "nll_loss": 0.16479700803756714, "rewards/accuracies": 1.0, "rewards/chosen": -6.081910396460444e-05, "rewards/margins": 0.26711761951446533, "rewards/rejected": -0.26717841625213623, "step": 9311 }, { "epoch": 6.439834024896266, "grad_norm": 9.156832695007324, "learning_rate": 1.9778699861687415e-05, "log_odds_chosen": 9.505995750427246, "log_odds_ratio": -0.000228977354709059, "logits/chosen": -0.6701303720474243, "logits/rejected": -0.6045225858688354, "logps/chosen": -0.0007705151801928878, "logps/rejected": -1.7131035327911377, "loss": 0.5438, "nll_loss": 0.13592730462551117, "rewards/accuracies": 1.0, "rewards/chosen": -7.705151801928878e-05, "rewards/margins": 0.17123331129550934, "rewards/rejected": -0.17131036520004272, "step": 9312 }, { "epoch": 6.440525587828493, "grad_norm": 9.81544303894043, "learning_rate": 1.9774857845397264e-05, "log_odds_chosen": 10.630544662475586, "log_odds_ratio": -0.00013499357737600803, "logits/chosen": -0.050855569541454315, "logits/rejected": 0.013263605535030365, "logps/chosen": -0.0007859627366997302, "logps/rejected": -2.815199375152588, "loss": 0.6398, "nll_loss": 0.15994472801685333, "rewards/accuracies": 1.0, "rewards/chosen": -7.859627658035606e-05, "rewards/margins": 0.2814413905143738, "rewards/rejected": -0.28151994943618774, "step": 9313 }, { "epoch": 6.441217150760719, "grad_norm": 9.10059928894043, "learning_rate": 1.9771015829107116e-05, "log_odds_chosen": 11.148942947387695, "log_odds_ratio": -6.803381984354928e-05, "logits/chosen": -0.27442580461502075, "logits/rejected": -0.2540287375450134, "logps/chosen": -0.0002024202112806961, "logps/rejected": -2.5142269134521484, "loss": 1.0249, "nll_loss": 0.2562128007411957, "rewards/accuracies": 1.0, "rewards/chosen": -2.024202149186749e-05, "rewards/margins": 0.25140243768692017, "rewards/rejected": -0.2514226734638214, "step": 9314 }, { "epoch": 6.441908713692946, "grad_norm": 7.348708629608154, "learning_rate": 1.9767173812816966e-05, "log_odds_chosen": 11.32642936706543, "log_odds_ratio": -9.057987335836515e-05, "logits/chosen": -0.20230460166931152, "logits/rejected": -0.28677451610565186, "logps/chosen": -0.00028569228015840054, "logps/rejected": -3.0858020782470703, "loss": 0.6937, "nll_loss": 0.17341278493404388, "rewards/accuracies": 1.0, "rewards/chosen": -2.8569229471031576e-05, "rewards/margins": 0.3085516691207886, "rewards/rejected": -0.3085802495479584, "step": 9315 }, { "epoch": 6.442600276625173, "grad_norm": 8.409920692443848, "learning_rate": 1.9763331796526818e-05, "log_odds_chosen": 10.357051849365234, "log_odds_ratio": -0.00010548779391683638, "logits/chosen": -0.30940353870391846, "logits/rejected": -0.36812877655029297, "logps/chosen": -0.0001265437895199284, "logps/rejected": -1.275539517402649, "loss": 0.8621, "nll_loss": 0.21550799906253815, "rewards/accuracies": 1.0, "rewards/chosen": -1.265437913389178e-05, "rewards/margins": 0.12754130363464355, "rewards/rejected": -0.12755395472049713, "step": 9316 }, { "epoch": 6.4432918395574, "grad_norm": 12.811484336853027, "learning_rate": 1.975948978023667e-05, "log_odds_chosen": 10.652695655822754, "log_odds_ratio": -8.221437747124583e-05, "logits/chosen": -0.3613952398300171, "logits/rejected": -0.400936484336853, "logps/chosen": -0.00026950432220473886, "logps/rejected": -1.9418749809265137, "loss": 0.4245, "nll_loss": 0.10611484199762344, "rewards/accuracies": 1.0, "rewards/chosen": -2.6950432584271766e-05, "rewards/margins": 0.1941605508327484, "rewards/rejected": -0.19418750703334808, "step": 9317 }, { "epoch": 6.443983402489627, "grad_norm": 4.133450031280518, "learning_rate": 1.975564776394652e-05, "log_odds_chosen": 10.288055419921875, "log_odds_ratio": -0.002530732424929738, "logits/chosen": -0.6164582371711731, "logits/rejected": -0.6825376749038696, "logps/chosen": -0.0004605620924849063, "logps/rejected": -1.9580541849136353, "loss": 0.6229, "nll_loss": 0.1554667353630066, "rewards/accuracies": 1.0, "rewards/chosen": -4.6056211431277916e-05, "rewards/margins": 0.1957593709230423, "rewards/rejected": -0.1958054155111313, "step": 9318 }, { "epoch": 6.444674965421854, "grad_norm": 7.042186737060547, "learning_rate": 1.9751805747656372e-05, "log_odds_chosen": 10.167938232421875, "log_odds_ratio": -0.0009934669360518456, "logits/chosen": -0.2549976110458374, "logits/rejected": -0.33388811349868774, "logps/chosen": -0.0003739151870831847, "logps/rejected": -1.7513947486877441, "loss": 0.7465, "nll_loss": 0.1865352988243103, "rewards/accuracies": 1.0, "rewards/chosen": -3.7391520891105756e-05, "rewards/margins": 0.1751020848751068, "rewards/rejected": -0.17513945698738098, "step": 9319 }, { "epoch": 6.44536652835408, "grad_norm": 6.495095252990723, "learning_rate": 1.9747963731366224e-05, "log_odds_chosen": 10.530926704406738, "log_odds_ratio": -5.371103543438949e-05, "logits/chosen": -0.6024811863899231, "logits/rejected": -0.6971926689147949, "logps/chosen": -0.0005244613857939839, "logps/rejected": -2.2407333850860596, "loss": 0.5084, "nll_loss": 0.12709853053092957, "rewards/accuracies": 1.0, "rewards/chosen": -5.244614294497296e-05, "rewards/margins": 0.22402088344097137, "rewards/rejected": -0.2240733504295349, "step": 9320 }, { "epoch": 6.446058091286307, "grad_norm": 6.348737716674805, "learning_rate": 1.9744121715076073e-05, "log_odds_chosen": 10.62696361541748, "log_odds_ratio": -0.000152592605445534, "logits/chosen": -0.4025437831878662, "logits/rejected": -0.4162963032722473, "logps/chosen": -0.00025007472140714526, "logps/rejected": -2.108785629272461, "loss": 0.5057, "nll_loss": 0.12640956044197083, "rewards/accuracies": 1.0, "rewards/chosen": -2.500747359590605e-05, "rewards/margins": 0.21085354685783386, "rewards/rejected": -0.21087853610515594, "step": 9321 }, { "epoch": 6.446749654218534, "grad_norm": 7.778499603271484, "learning_rate": 1.9740279698785922e-05, "log_odds_chosen": 10.604227066040039, "log_odds_ratio": -9.288093860959634e-05, "logits/chosen": -0.2538522183895111, "logits/rejected": -0.3410423994064331, "logps/chosen": -0.0012250157305970788, "logps/rejected": -2.4406208992004395, "loss": 0.7234, "nll_loss": 0.1808387190103531, "rewards/accuracies": 1.0, "rewards/chosen": -0.00012250157305970788, "rewards/margins": 0.2439395785331726, "rewards/rejected": -0.24406209588050842, "step": 9322 }, { "epoch": 6.447441217150761, "grad_norm": 4.946698188781738, "learning_rate": 1.9736437682495775e-05, "log_odds_chosen": 10.961910247802734, "log_odds_ratio": -0.0001718278363114223, "logits/chosen": -0.43663695454597473, "logits/rejected": -0.5011554956436157, "logps/chosen": -0.00027560797752812505, "logps/rejected": -2.781618356704712, "loss": 0.3741, "nll_loss": 0.09350738674402237, "rewards/accuracies": 1.0, "rewards/chosen": -2.7560796297620982e-05, "rewards/margins": 0.2781342566013336, "rewards/rejected": -0.27816182374954224, "step": 9323 }, { "epoch": 6.448132780082988, "grad_norm": 6.42108678817749, "learning_rate": 1.9732595666205624e-05, "log_odds_chosen": 11.063654899597168, "log_odds_ratio": -0.0002686860098037869, "logits/chosen": -0.5942996144294739, "logits/rejected": -0.705431342124939, "logps/chosen": -0.0003295104543212801, "logps/rejected": -2.265841245651245, "loss": 1.2717, "nll_loss": 0.31789982318878174, "rewards/accuracies": 1.0, "rewards/chosen": -3.2951047614915296e-05, "rewards/margins": 0.22655119001865387, "rewards/rejected": -0.22658413648605347, "step": 9324 }, { "epoch": 6.448824343015215, "grad_norm": 4.929689407348633, "learning_rate": 1.9728753649915476e-05, "log_odds_chosen": 11.852518081665039, "log_odds_ratio": -9.962430340237916e-06, "logits/chosen": -0.7141345143318176, "logits/rejected": -0.8319796323776245, "logps/chosen": -0.00038489949656650424, "logps/rejected": -2.632211685180664, "loss": 0.4818, "nll_loss": 0.1204419881105423, "rewards/accuracies": 1.0, "rewards/chosen": -3.8489950384246185e-05, "rewards/margins": 0.263182669878006, "rewards/rejected": -0.2632211744785309, "step": 9325 }, { "epoch": 6.449515905947441, "grad_norm": 5.438591003417969, "learning_rate": 1.972491163362533e-05, "log_odds_chosen": 10.398658752441406, "log_odds_ratio": -0.00011632785754045472, "logits/chosen": -0.5323688983917236, "logits/rejected": -0.6243947744369507, "logps/chosen": -0.0003119676257483661, "logps/rejected": -1.7409591674804688, "loss": 0.7072, "nll_loss": 0.1767822802066803, "rewards/accuracies": 1.0, "rewards/chosen": -3.119676694041118e-05, "rewards/margins": 0.17406471073627472, "rewards/rejected": -0.17409591376781464, "step": 9326 }, { "epoch": 6.450207468879668, "grad_norm": 6.563150882720947, "learning_rate": 1.9721069617335178e-05, "log_odds_chosen": 11.509295463562012, "log_odds_ratio": -3.75781164621003e-05, "logits/chosen": -0.442585289478302, "logits/rejected": -0.5469682216644287, "logps/chosen": -0.00018356960208620876, "logps/rejected": -2.4356777667999268, "loss": 0.6008, "nll_loss": 0.15020179748535156, "rewards/accuracies": 1.0, "rewards/chosen": -1.8356960936216637e-05, "rewards/margins": 0.2435494214296341, "rewards/rejected": -0.24356777966022491, "step": 9327 }, { "epoch": 6.450899031811895, "grad_norm": 17.16939926147461, "learning_rate": 1.971722760104503e-05, "log_odds_chosen": 10.529670715332031, "log_odds_ratio": -6.26058827037923e-05, "logits/chosen": -0.09079033136367798, "logits/rejected": -0.12593227624893188, "logps/chosen": -0.0005906840669922531, "logps/rejected": -2.2363271713256836, "loss": 0.7384, "nll_loss": 0.18458396196365356, "rewards/accuracies": 1.0, "rewards/chosen": -5.906839942326769e-05, "rewards/margins": 0.22357365489006042, "rewards/rejected": -0.22363270819187164, "step": 9328 }, { "epoch": 6.451590594744122, "grad_norm": 6.848952770233154, "learning_rate": 1.9713385584754883e-05, "log_odds_chosen": 10.813990592956543, "log_odds_ratio": -2.499014044587966e-05, "logits/chosen": -0.5543779134750366, "logits/rejected": -0.5834307670593262, "logps/chosen": -0.00018239648488815874, "logps/rejected": -2.1655075550079346, "loss": 0.7097, "nll_loss": 0.17741863429546356, "rewards/accuracies": 1.0, "rewards/chosen": -1.8239650671603158e-05, "rewards/margins": 0.21653252840042114, "rewards/rejected": -0.2165507674217224, "step": 9329 }, { "epoch": 6.452282157676349, "grad_norm": 7.377292156219482, "learning_rate": 1.9709543568464732e-05, "log_odds_chosen": 11.44371509552002, "log_odds_ratio": -3.2429546990897506e-05, "logits/chosen": -0.05530393868684769, "logits/rejected": -0.1549655795097351, "logps/chosen": -0.00017569860210642219, "logps/rejected": -2.239352226257324, "loss": 0.5736, "nll_loss": 0.14338725805282593, "rewards/accuracies": 1.0, "rewards/chosen": -1.75698605744401e-05, "rewards/margins": 0.2239176630973816, "rewards/rejected": -0.22393521666526794, "step": 9330 }, { "epoch": 6.4529737206085755, "grad_norm": 7.323204517364502, "learning_rate": 1.970570155217458e-05, "log_odds_chosen": 10.47035026550293, "log_odds_ratio": -0.000393639609683305, "logits/chosen": -0.6959431767463684, "logits/rejected": -0.6528046131134033, "logps/chosen": -0.0016256331000477076, "logps/rejected": -2.6668670177459717, "loss": 0.8856, "nll_loss": 0.2213638573884964, "rewards/accuracies": 1.0, "rewards/chosen": -0.00016256331582553685, "rewards/margins": 0.26652413606643677, "rewards/rejected": -0.26668670773506165, "step": 9331 }, { "epoch": 6.453665283540802, "grad_norm": 6.314630031585693, "learning_rate": 1.9701859535884433e-05, "log_odds_chosen": 11.20853328704834, "log_odds_ratio": -9.939574374584481e-05, "logits/chosen": -0.811857283115387, "logits/rejected": -0.8544670343399048, "logps/chosen": -0.00013169238809496164, "logps/rejected": -2.252102851867676, "loss": 0.5471, "nll_loss": 0.13677413761615753, "rewards/accuracies": 1.0, "rewards/chosen": -1.3169238627597224e-05, "rewards/margins": 0.22519709169864655, "rewards/rejected": -0.2252102643251419, "step": 9332 }, { "epoch": 6.454356846473029, "grad_norm": 8.173171997070312, "learning_rate": 1.9698017519594282e-05, "log_odds_chosen": 11.281229019165039, "log_odds_ratio": -6.0934617067687213e-05, "logits/chosen": -0.24341799318790436, "logits/rejected": -0.3885093927383423, "logps/chosen": -7.761404413031414e-05, "logps/rejected": -2.0577759742736816, "loss": 0.676, "nll_loss": 0.16900460422039032, "rewards/accuracies": 1.0, "rewards/chosen": -7.761404049233533e-06, "rewards/margins": 0.2057698518037796, "rewards/rejected": -0.2057776153087616, "step": 9333 }, { "epoch": 6.455048409405256, "grad_norm": 11.768661499023438, "learning_rate": 1.9694175503304135e-05, "log_odds_chosen": 10.951183319091797, "log_odds_ratio": -0.0001368989615002647, "logits/chosen": -0.5413077473640442, "logits/rejected": -0.564630925655365, "logps/chosen": -0.0001645167067181319, "logps/rejected": -2.0779314041137695, "loss": 0.5338, "nll_loss": 0.13344186544418335, "rewards/accuracies": 1.0, "rewards/chosen": -1.645167139940895e-05, "rewards/margins": 0.20777669548988342, "rewards/rejected": -0.20779314637184143, "step": 9334 }, { "epoch": 6.455739972337483, "grad_norm": 5.604323863983154, "learning_rate": 1.9690333487013987e-05, "log_odds_chosen": 10.265859603881836, "log_odds_ratio": -0.00023694182164035738, "logits/chosen": -0.5120769739151001, "logits/rejected": -0.4896959364414215, "logps/chosen": -0.0002953608054667711, "logps/rejected": -1.6938186883926392, "loss": 0.3457, "nll_loss": 0.08640851825475693, "rewards/accuracies": 1.0, "rewards/chosen": -2.9536076908698305e-05, "rewards/margins": 0.16935233771800995, "rewards/rejected": -0.16938188672065735, "step": 9335 }, { "epoch": 6.45643153526971, "grad_norm": 6.015252590179443, "learning_rate": 1.9686491470723836e-05, "log_odds_chosen": 10.84512710571289, "log_odds_ratio": -7.400707545457408e-05, "logits/chosen": -0.5778646469116211, "logits/rejected": -0.6446388959884644, "logps/chosen": -0.0002638440055307001, "logps/rejected": -2.0140769481658936, "loss": 0.6219, "nll_loss": 0.1554756760597229, "rewards/accuracies": 1.0, "rewards/chosen": -2.6384399461676367e-05, "rewards/margins": 0.20138132572174072, "rewards/rejected": -0.20140770077705383, "step": 9336 }, { "epoch": 6.4571230982019365, "grad_norm": 8.022332191467285, "learning_rate": 1.968264945443369e-05, "log_odds_chosen": 10.207698822021484, "log_odds_ratio": -0.00021755530906375498, "logits/chosen": -0.5963926315307617, "logits/rejected": -0.6795532703399658, "logps/chosen": -0.0004776669084094465, "logps/rejected": -1.8929169178009033, "loss": 0.5339, "nll_loss": 0.13344630599021912, "rewards/accuracies": 1.0, "rewards/chosen": -4.776669084094465e-05, "rewards/margins": 0.18924394249916077, "rewards/rejected": -0.18929171562194824, "step": 9337 }, { "epoch": 6.457814661134163, "grad_norm": 9.413582801818848, "learning_rate": 1.967880743814354e-05, "log_odds_chosen": 10.67624282836914, "log_odds_ratio": -3.9920356357470155e-05, "logits/chosen": -0.6611392498016357, "logits/rejected": -0.6790140867233276, "logps/chosen": -0.0013447502860799432, "logps/rejected": -2.598658561706543, "loss": 0.5386, "nll_loss": 0.1346549391746521, "rewards/accuracies": 1.0, "rewards/chosen": -0.00013447501987684518, "rewards/margins": 0.2597314119338989, "rewards/rejected": -0.2598658800125122, "step": 9338 }, { "epoch": 6.45850622406639, "grad_norm": 6.010379791259766, "learning_rate": 1.967496542185339e-05, "log_odds_chosen": 10.788137435913086, "log_odds_ratio": -0.00010153088805964217, "logits/chosen": -0.3897785544395447, "logits/rejected": -0.4715155363082886, "logps/chosen": -0.00032580961124040186, "logps/rejected": -2.5287859439849854, "loss": 0.7011, "nll_loss": 0.17525643110275269, "rewards/accuracies": 1.0, "rewards/chosen": -3.2580959668848664e-05, "rewards/margins": 0.25284600257873535, "rewards/rejected": -0.2528786063194275, "step": 9339 }, { "epoch": 6.459197786998617, "grad_norm": 12.115055084228516, "learning_rate": 1.967112340556324e-05, "log_odds_chosen": 10.19715404510498, "log_odds_ratio": -0.0005401435191743076, "logits/chosen": -0.33382919430732727, "logits/rejected": -0.3910999894142151, "logps/chosen": -0.00039712939178571105, "logps/rejected": -2.1408450603485107, "loss": 0.7251, "nll_loss": 0.1812172532081604, "rewards/accuracies": 1.0, "rewards/chosen": -3.971294063376263e-05, "rewards/margins": 0.21404479444026947, "rewards/rejected": -0.21408450603485107, "step": 9340 }, { "epoch": 6.459889349930844, "grad_norm": 5.437094211578369, "learning_rate": 1.9667281389273092e-05, "log_odds_chosen": 9.647651672363281, "log_odds_ratio": -0.001371509861201048, "logits/chosen": -0.4421365559101105, "logits/rejected": -0.4803558588027954, "logps/chosen": -0.0007117479108273983, "logps/rejected": -1.8053985834121704, "loss": 0.6026, "nll_loss": 0.15051212906837463, "rewards/accuracies": 1.0, "rewards/chosen": -7.117479981388897e-05, "rewards/margins": 0.18046869337558746, "rewards/rejected": -0.18053987622261047, "step": 9341 }, { "epoch": 6.460580912863071, "grad_norm": 6.155204772949219, "learning_rate": 1.966343937298294e-05, "log_odds_chosen": 9.544366836547852, "log_odds_ratio": -0.0002290259872097522, "logits/chosen": -0.3371240496635437, "logits/rejected": -0.3929316997528076, "logps/chosen": -0.000392138579627499, "logps/rejected": -1.5680603981018066, "loss": 0.6517, "nll_loss": 0.16289177536964417, "rewards/accuracies": 1.0, "rewards/chosen": -3.9213860873132944e-05, "rewards/margins": 0.1567668467760086, "rewards/rejected": -0.15680605173110962, "step": 9342 }, { "epoch": 6.4612724757952975, "grad_norm": 13.597868919372559, "learning_rate": 1.9659597356692793e-05, "log_odds_chosen": 10.36497688293457, "log_odds_ratio": -0.0014595024986192584, "logits/chosen": -0.12537351250648499, "logits/rejected": -0.10785672068595886, "logps/chosen": -0.0014373556477949023, "logps/rejected": -2.0005123615264893, "loss": 0.7737, "nll_loss": 0.19328731298446655, "rewards/accuracies": 1.0, "rewards/chosen": -0.00014373556768987328, "rewards/margins": 0.19990751147270203, "rewards/rejected": -0.20005124807357788, "step": 9343 }, { "epoch": 6.461964038727524, "grad_norm": 13.008829116821289, "learning_rate": 1.9655755340402642e-05, "log_odds_chosen": 11.800149917602539, "log_odds_ratio": -0.0003027294878847897, "logits/chosen": -0.6529538631439209, "logits/rejected": -0.5301782488822937, "logps/chosen": -0.0005336821777746081, "logps/rejected": -2.8983495235443115, "loss": 0.7727, "nll_loss": 0.19314080476760864, "rewards/accuracies": 1.0, "rewards/chosen": -5.336822141543962e-05, "rewards/margins": 0.2897816002368927, "rewards/rejected": -0.2898349463939667, "step": 9344 }, { "epoch": 6.462655601659751, "grad_norm": 5.498860836029053, "learning_rate": 1.9651913324112495e-05, "log_odds_chosen": 10.476800918579102, "log_odds_ratio": -0.0011540587292984128, "logits/chosen": -0.04873857647180557, "logits/rejected": -0.03208974748849869, "logps/chosen": -0.0013205776922404766, "logps/rejected": -2.6151742935180664, "loss": 0.85, "nll_loss": 0.2123776227235794, "rewards/accuracies": 1.0, "rewards/chosen": -0.00013205778668634593, "rewards/margins": 0.26138538122177124, "rewards/rejected": -0.26151740550994873, "step": 9345 }, { "epoch": 6.463347164591978, "grad_norm": 4.314711570739746, "learning_rate": 1.9648071307822347e-05, "log_odds_chosen": 11.41019344329834, "log_odds_ratio": -7.023775106063113e-05, "logits/chosen": -0.20374369621276855, "logits/rejected": -0.33246517181396484, "logps/chosen": -0.00020804136875085533, "logps/rejected": -2.5009703636169434, "loss": 0.6544, "nll_loss": 0.16359777748584747, "rewards/accuracies": 1.0, "rewards/chosen": -2.080413469229825e-05, "rewards/margins": 0.2500762641429901, "rewards/rejected": -0.2500970661640167, "step": 9346 }, { "epoch": 6.464038727524205, "grad_norm": 10.438886642456055, "learning_rate": 1.9644229291532196e-05, "log_odds_chosen": 9.027183532714844, "log_odds_ratio": -0.0007800416206009686, "logits/chosen": -0.5669814944267273, "logits/rejected": -0.6429253816604614, "logps/chosen": -0.0011012261966243386, "logps/rejected": -1.3472520112991333, "loss": 0.8509, "nll_loss": 0.21263641119003296, "rewards/accuracies": 1.0, "rewards/chosen": -0.00011012262257281691, "rewards/margins": 0.13461507856845856, "rewards/rejected": -0.1347251981496811, "step": 9347 }, { "epoch": 6.464730290456432, "grad_norm": 7.92177677154541, "learning_rate": 1.964038727524205e-05, "log_odds_chosen": 10.348886489868164, "log_odds_ratio": -0.00021048627968411893, "logits/chosen": -0.16438069939613342, "logits/rejected": -0.22757890820503235, "logps/chosen": -0.0006242183735594153, "logps/rejected": -2.191136598587036, "loss": 0.7032, "nll_loss": 0.1757863610982895, "rewards/accuracies": 1.0, "rewards/chosen": -6.242184463189915e-05, "rewards/margins": 0.21905125677585602, "rewards/rejected": -0.21911367774009705, "step": 9348 }, { "epoch": 6.4654218533886585, "grad_norm": 6.527889728546143, "learning_rate": 1.96365452589519e-05, "log_odds_chosen": 10.015470504760742, "log_odds_ratio": -0.00015627051470801234, "logits/chosen": -0.504023015499115, "logits/rejected": -0.560677707195282, "logps/chosen": -0.0009324349230155349, "logps/rejected": -1.8942363262176514, "loss": 0.5809, "nll_loss": 0.1452087163925171, "rewards/accuracies": 1.0, "rewards/chosen": -9.324349230155349e-05, "rewards/margins": 0.18933041393756866, "rewards/rejected": -0.18942365050315857, "step": 9349 }, { "epoch": 6.466113416320885, "grad_norm": 5.253231048583984, "learning_rate": 1.963270324266175e-05, "log_odds_chosen": 10.344749450683594, "log_odds_ratio": -0.00010648036550264806, "logits/chosen": -0.07455027103424072, "logits/rejected": -0.14819921553134918, "logps/chosen": -0.00038044259417802095, "logps/rejected": -2.1492841243743896, "loss": 0.583, "nll_loss": 0.14573504030704498, "rewards/accuracies": 1.0, "rewards/chosen": -3.804425796261057e-05, "rewards/margins": 0.21489037573337555, "rewards/rejected": -0.21492841839790344, "step": 9350 }, { "epoch": 6.466804979253112, "grad_norm": 10.674192428588867, "learning_rate": 1.96288612263716e-05, "log_odds_chosen": 9.346158981323242, "log_odds_ratio": -0.0011899136006832123, "logits/chosen": -0.4632045328617096, "logits/rejected": -0.5040472149848938, "logps/chosen": -0.0012183859944343567, "logps/rejected": -1.971890926361084, "loss": 0.715, "nll_loss": 0.17863427102565765, "rewards/accuracies": 1.0, "rewards/chosen": -0.00012183861690573394, "rewards/margins": 0.1970672458410263, "rewards/rejected": -0.1971890926361084, "step": 9351 }, { "epoch": 6.467496542185339, "grad_norm": 8.562958717346191, "learning_rate": 1.9625019210081452e-05, "log_odds_chosen": 11.356171607971191, "log_odds_ratio": -2.4882941943360493e-05, "logits/chosen": 0.02768966555595398, "logits/rejected": 0.0028723329305648804, "logps/chosen": -0.00023115705698728561, "logps/rejected": -2.334925889968872, "loss": 0.659, "nll_loss": 0.1647453010082245, "rewards/accuracies": 1.0, "rewards/chosen": -2.3115706426324323e-05, "rewards/margins": 0.23346947133541107, "rewards/rejected": -0.23349258303642273, "step": 9352 }, { "epoch": 6.468188105117566, "grad_norm": 6.569983959197998, "learning_rate": 1.96211771937913e-05, "log_odds_chosen": 10.749797821044922, "log_odds_ratio": -4.219009861117229e-05, "logits/chosen": -0.03230445086956024, "logits/rejected": -0.04771629720926285, "logps/chosen": -0.00044686076580546796, "logps/rejected": -2.2594237327575684, "loss": 0.5344, "nll_loss": 0.13359317183494568, "rewards/accuracies": 1.0, "rewards/chosen": -4.4686075852951035e-05, "rewards/margins": 0.22589769959449768, "rewards/rejected": -0.22594238817691803, "step": 9353 }, { "epoch": 6.468879668049793, "grad_norm": 8.693273544311523, "learning_rate": 1.9617335177501153e-05, "log_odds_chosen": 10.477245330810547, "log_odds_ratio": -0.00017616937111597508, "logits/chosen": -0.1361008733510971, "logits/rejected": -0.2993399202823639, "logps/chosen": -0.0009238553466275334, "logps/rejected": -2.4320244789123535, "loss": 0.7092, "nll_loss": 0.17728503048419952, "rewards/accuracies": 1.0, "rewards/chosen": -9.238554048351943e-05, "rewards/margins": 0.2431100457906723, "rewards/rejected": -0.24320244789123535, "step": 9354 }, { "epoch": 6.4695712309820195, "grad_norm": 5.629900932312012, "learning_rate": 1.9613493161211006e-05, "log_odds_chosen": 11.165910720825195, "log_odds_ratio": -3.186017056577839e-05, "logits/chosen": -0.2521139979362488, "logits/rejected": -0.2641168236732483, "logps/chosen": -0.0004452554858289659, "logps/rejected": -2.6347713470458984, "loss": 0.6108, "nll_loss": 0.15268629789352417, "rewards/accuracies": 1.0, "rewards/chosen": -4.452555003808811e-05, "rewards/margins": 0.2634325921535492, "rewards/rejected": -0.2634771168231964, "step": 9355 }, { "epoch": 6.470262793914246, "grad_norm": 5.684783458709717, "learning_rate": 1.9609651144920855e-05, "log_odds_chosen": 11.058123588562012, "log_odds_ratio": -0.0007495767204090953, "logits/chosen": -0.4604434370994568, "logits/rejected": -0.5082454681396484, "logps/chosen": -0.0005760700441896915, "logps/rejected": -2.651322364807129, "loss": 0.4217, "nll_loss": 0.10533834248781204, "rewards/accuracies": 1.0, "rewards/chosen": -5.7607005146564916e-05, "rewards/margins": 0.2650746703147888, "rewards/rejected": -0.26513224840164185, "step": 9356 }, { "epoch": 6.470954356846473, "grad_norm": 4.939406394958496, "learning_rate": 1.9605809128630707e-05, "log_odds_chosen": 10.365484237670898, "log_odds_ratio": -0.0010485876118764281, "logits/chosen": 0.26797395944595337, "logits/rejected": 0.213160440325737, "logps/chosen": -0.0005518148536793888, "logps/rejected": -2.0900015830993652, "loss": 0.7742, "nll_loss": 0.1934371292591095, "rewards/accuracies": 1.0, "rewards/chosen": -5.5181484640343115e-05, "rewards/margins": 0.20894496142864227, "rewards/rejected": -0.2090001404285431, "step": 9357 }, { "epoch": 6.4716459197787, "grad_norm": 5.356972694396973, "learning_rate": 1.960196711234056e-05, "log_odds_chosen": 10.236105918884277, "log_odds_ratio": -8.164734754245728e-05, "logits/chosen": -0.3928852379322052, "logits/rejected": -0.440325528383255, "logps/chosen": -0.00031203130492940545, "logps/rejected": -2.0702123641967773, "loss": 0.732, "nll_loss": 0.18299226462841034, "rewards/accuracies": 1.0, "rewards/chosen": -3.1203126127365977e-05, "rewards/margins": 0.206990048289299, "rewards/rejected": -0.20702123641967773, "step": 9358 }, { "epoch": 6.472337482710927, "grad_norm": 3.448922634124756, "learning_rate": 1.959812509605041e-05, "log_odds_chosen": 11.163787841796875, "log_odds_ratio": -2.6103556592715904e-05, "logits/chosen": -0.5693701505661011, "logits/rejected": -0.47740453481674194, "logps/chosen": -0.0026880258228629827, "logps/rejected": -2.838186740875244, "loss": 1.1219, "nll_loss": 0.2804635763168335, "rewards/accuracies": 1.0, "rewards/chosen": -0.0002688025706447661, "rewards/margins": 0.2835499048233032, "rewards/rejected": -0.28381872177124023, "step": 9359 }, { "epoch": 6.473029045643154, "grad_norm": 4.64267110824585, "learning_rate": 1.9594283079760258e-05, "log_odds_chosen": 10.282106399536133, "log_odds_ratio": -7.090805593179539e-05, "logits/chosen": -0.46651166677474976, "logits/rejected": -0.5370398759841919, "logps/chosen": -0.0005136644467711449, "logps/rejected": -2.2826156616210938, "loss": 0.5595, "nll_loss": 0.1398591846227646, "rewards/accuracies": 1.0, "rewards/chosen": -5.136644540471025e-05, "rewards/margins": 0.2282102108001709, "rewards/rejected": -0.2282615602016449, "step": 9360 }, { "epoch": 6.4737206085753805, "grad_norm": 10.618483543395996, "learning_rate": 1.959044106347011e-05, "log_odds_chosen": 12.327230453491211, "log_odds_ratio": -1.442717075406108e-05, "logits/chosen": -0.48336470127105713, "logits/rejected": -0.5778825879096985, "logps/chosen": -0.00022113663726486266, "logps/rejected": -3.5289645195007324, "loss": 0.7443, "nll_loss": 0.18606841564178467, "rewards/accuracies": 1.0, "rewards/chosen": -2.2113663362688385e-05, "rewards/margins": 0.3528743088245392, "rewards/rejected": -0.35289645195007324, "step": 9361 }, { "epoch": 6.474412171507607, "grad_norm": 7.489385604858398, "learning_rate": 1.958659904717996e-05, "log_odds_chosen": 10.170774459838867, "log_odds_ratio": -0.00018795541836880147, "logits/chosen": -0.43242692947387695, "logits/rejected": -0.38654130697250366, "logps/chosen": -0.000399279611883685, "logps/rejected": -2.234528064727783, "loss": 0.4405, "nll_loss": 0.11011669039726257, "rewards/accuracies": 1.0, "rewards/chosen": -3.9927959733176976e-05, "rewards/margins": 0.2234128713607788, "rewards/rejected": -0.22345280647277832, "step": 9362 }, { "epoch": 6.475103734439834, "grad_norm": 3.973806142807007, "learning_rate": 1.958275703088981e-05, "log_odds_chosen": 10.255285263061523, "log_odds_ratio": -0.00025675195502117276, "logits/chosen": -0.3827497661113739, "logits/rejected": -0.4834297001361847, "logps/chosen": -0.0008683456690050662, "logps/rejected": -2.241360902786255, "loss": 0.4224, "nll_loss": 0.10557659715414047, "rewards/accuracies": 1.0, "rewards/chosen": -8.683456690050662e-05, "rewards/margins": 0.22404927015304565, "rewards/rejected": -0.224136084318161, "step": 9363 }, { "epoch": 6.475795297372061, "grad_norm": 5.911285877227783, "learning_rate": 1.9578915014599664e-05, "log_odds_chosen": 10.767386436462402, "log_odds_ratio": -0.0002806742559187114, "logits/chosen": -0.35741111636161804, "logits/rejected": -0.3878954350948334, "logps/chosen": -0.00031248465529643, "logps/rejected": -2.3113627433776855, "loss": 0.717, "nll_loss": 0.17922081053256989, "rewards/accuracies": 1.0, "rewards/chosen": -3.124846625723876e-05, "rewards/margins": 0.2311050146818161, "rewards/rejected": -0.2311362475156784, "step": 9364 }, { "epoch": 6.476486860304288, "grad_norm": 5.464684963226318, "learning_rate": 1.9575072998309513e-05, "log_odds_chosen": 11.092345237731934, "log_odds_ratio": -2.9721091777901165e-05, "logits/chosen": -0.005119264125823975, "logits/rejected": -0.08913788199424744, "logps/chosen": -0.0006547888042405248, "logps/rejected": -2.8356149196624756, "loss": 0.7138, "nll_loss": 0.17844641208648682, "rewards/accuracies": 1.0, "rewards/chosen": -6.5478881879244e-05, "rewards/margins": 0.28349605202674866, "rewards/rejected": -0.2835615277290344, "step": 9365 }, { "epoch": 6.477178423236515, "grad_norm": 3.9686710834503174, "learning_rate": 1.9571230982019366e-05, "log_odds_chosen": 10.885116577148438, "log_odds_ratio": -5.042840712121688e-05, "logits/chosen": -0.5279171466827393, "logits/rejected": -0.522693395614624, "logps/chosen": -0.00017958540411200374, "logps/rejected": -2.0214760303497314, "loss": 0.3655, "nll_loss": 0.09135989844799042, "rewards/accuracies": 1.0, "rewards/chosen": -1.7958540411200374e-05, "rewards/margins": 0.20212964713573456, "rewards/rejected": -0.20214760303497314, "step": 9366 }, { "epoch": 6.477869986168741, "grad_norm": 6.191340446472168, "learning_rate": 1.9567388965729218e-05, "log_odds_chosen": 10.428787231445312, "log_odds_ratio": -8.16139072412625e-05, "logits/chosen": -0.4286971092224121, "logits/rejected": -0.415450781583786, "logps/chosen": -0.0001613447384443134, "logps/rejected": -1.654663324356079, "loss": 0.6405, "nll_loss": 0.16010689735412598, "rewards/accuracies": 1.0, "rewards/chosen": -1.613447420822922e-05, "rewards/margins": 0.16545020043849945, "rewards/rejected": -0.1654663383960724, "step": 9367 }, { "epoch": 6.478561549100968, "grad_norm": 9.376286506652832, "learning_rate": 1.9563546949439067e-05, "log_odds_chosen": 9.9938325881958, "log_odds_ratio": -0.00032971659675240517, "logits/chosen": -0.41345369815826416, "logits/rejected": -0.5434488654136658, "logps/chosen": -0.00047324446495622396, "logps/rejected": -1.9914203882217407, "loss": 0.7204, "nll_loss": 0.18007104098796844, "rewards/accuracies": 1.0, "rewards/chosen": -4.732444358523935e-05, "rewards/margins": 0.1990947127342224, "rewards/rejected": -0.19914203882217407, "step": 9368 }, { "epoch": 6.479253112033195, "grad_norm": 4.642989158630371, "learning_rate": 1.9559704933148916e-05, "log_odds_chosen": 10.619248390197754, "log_odds_ratio": -4.108287612325512e-05, "logits/chosen": -0.41739746928215027, "logits/rejected": -0.4319196343421936, "logps/chosen": -0.00022695327061228454, "logps/rejected": -2.1578879356384277, "loss": 0.503, "nll_loss": 0.12573689222335815, "rewards/accuracies": 1.0, "rewards/chosen": -2.2695327061228454e-05, "rewards/margins": 0.2157660871744156, "rewards/rejected": -0.21578878164291382, "step": 9369 }, { "epoch": 6.479944674965422, "grad_norm": 5.525413990020752, "learning_rate": 1.955586291685877e-05, "log_odds_chosen": 11.790472030639648, "log_odds_ratio": -0.00010621760884532705, "logits/chosen": -0.2747637629508972, "logits/rejected": -0.2597922086715698, "logps/chosen": -0.0005810950533486903, "logps/rejected": -3.1645824909210205, "loss": 0.6016, "nll_loss": 0.15038540959358215, "rewards/accuracies": 1.0, "rewards/chosen": -5.810950824525207e-05, "rewards/margins": 0.3164001703262329, "rewards/rejected": -0.31645825505256653, "step": 9370 }, { "epoch": 6.480636237897649, "grad_norm": 8.51606559753418, "learning_rate": 1.9552020900568618e-05, "log_odds_chosen": 10.952482223510742, "log_odds_ratio": -2.4556336938985623e-05, "logits/chosen": -0.4160614311695099, "logits/rejected": -0.5774533748626709, "logps/chosen": -0.00023021356901153922, "logps/rejected": -1.9789564609527588, "loss": 0.6218, "nll_loss": 0.15545280277729034, "rewards/accuracies": 1.0, "rewards/chosen": -2.3021357264951803e-05, "rewards/margins": 0.19787262380123138, "rewards/rejected": -0.19789564609527588, "step": 9371 }, { "epoch": 6.481327800829876, "grad_norm": 11.30921745300293, "learning_rate": 1.954817888427847e-05, "log_odds_chosen": 10.13013744354248, "log_odds_ratio": -0.0004740456060972065, "logits/chosen": -0.18564777076244354, "logits/rejected": -0.2205415815114975, "logps/chosen": -0.0006518846494145691, "logps/rejected": -1.6600679159164429, "loss": 0.6882, "nll_loss": 0.17199313640594482, "rewards/accuracies": 1.0, "rewards/chosen": -6.518846203107387e-05, "rewards/margins": 0.16594161093235016, "rewards/rejected": -0.16600680351257324, "step": 9372 }, { "epoch": 6.482019363762102, "grad_norm": 7.174125671386719, "learning_rate": 1.9544336867988323e-05, "log_odds_chosen": 10.80029296875, "log_odds_ratio": -4.8086159949889407e-05, "logits/chosen": -0.038425326347351074, "logits/rejected": -0.18574784696102142, "logps/chosen": -0.0002708366373553872, "logps/rejected": -2.129744052886963, "loss": 0.6591, "nll_loss": 0.1647716909646988, "rewards/accuracies": 1.0, "rewards/chosen": -2.7083666282123886e-05, "rewards/margins": 0.2129473239183426, "rewards/rejected": -0.212974414229393, "step": 9373 }, { "epoch": 6.482710926694329, "grad_norm": 9.137430191040039, "learning_rate": 1.954049485169817e-05, "log_odds_chosen": 10.96411418914795, "log_odds_ratio": -6.397358811227605e-05, "logits/chosen": -0.07936275750398636, "logits/rejected": -0.16068057715892792, "logps/chosen": -0.00018608587561175227, "logps/rejected": -2.4485511779785156, "loss": 0.5968, "nll_loss": 0.14920562505722046, "rewards/accuracies": 1.0, "rewards/chosen": -1.860858901636675e-05, "rewards/margins": 0.2448364943265915, "rewards/rejected": -0.2448551058769226, "step": 9374 }, { "epoch": 6.483402489626556, "grad_norm": 5.644742488861084, "learning_rate": 1.9536652835408024e-05, "log_odds_chosen": 9.900896072387695, "log_odds_ratio": -9.974578279070556e-05, "logits/chosen": -0.14903931319713593, "logits/rejected": -0.18918846547603607, "logps/chosen": -0.0004961632657796144, "logps/rejected": -1.9804139137268066, "loss": 0.5944, "nll_loss": 0.14859052002429962, "rewards/accuracies": 1.0, "rewards/chosen": -4.961631930200383e-05, "rewards/margins": 0.1979917734861374, "rewards/rejected": -0.1980414092540741, "step": 9375 }, { "epoch": 6.484094052558783, "grad_norm": 7.427826404571533, "learning_rate": 1.9532810819117876e-05, "log_odds_chosen": 9.996077537536621, "log_odds_ratio": -0.00019332885858602822, "logits/chosen": -0.2691127359867096, "logits/rejected": -0.25111067295074463, "logps/chosen": -0.0014359343331307173, "logps/rejected": -2.8224704265594482, "loss": 0.6442, "nll_loss": 0.16103675961494446, "rewards/accuracies": 1.0, "rewards/chosen": -0.00014359343913383782, "rewards/margins": 0.2821034789085388, "rewards/rejected": -0.28224706649780273, "step": 9376 }, { "epoch": 6.48478561549101, "grad_norm": 9.478433609008789, "learning_rate": 1.9528968802827726e-05, "log_odds_chosen": 10.263532638549805, "log_odds_ratio": -7.455523882526904e-05, "logits/chosen": -0.27997761964797974, "logits/rejected": -0.33347785472869873, "logps/chosen": -0.0001137300132540986, "logps/rejected": -1.435097336769104, "loss": 0.623, "nll_loss": 0.15574686229228973, "rewards/accuracies": 1.0, "rewards/chosen": -1.1373001143510919e-05, "rewards/margins": 0.14349837601184845, "rewards/rejected": -0.14350974559783936, "step": 9377 }, { "epoch": 6.485477178423237, "grad_norm": 8.160806655883789, "learning_rate": 1.9525126786537575e-05, "log_odds_chosen": 10.65985107421875, "log_odds_ratio": -5.170962685951963e-05, "logits/chosen": 0.09655636548995972, "logits/rejected": -0.008620738983154297, "logps/chosen": -0.0003284272679593414, "logps/rejected": -2.1207449436187744, "loss": 0.5131, "nll_loss": 0.12827156484127045, "rewards/accuracies": 1.0, "rewards/chosen": -3.284272679593414e-05, "rewards/margins": 0.21204164624214172, "rewards/rejected": -0.21207448840141296, "step": 9378 }, { "epoch": 6.486168741355463, "grad_norm": 7.359895706176758, "learning_rate": 1.9521284770247427e-05, "log_odds_chosen": 10.544219970703125, "log_odds_ratio": -5.43832138646394e-05, "logits/chosen": 0.11446790397167206, "logits/rejected": 0.1329166442155838, "logps/chosen": -0.00029686224297620356, "logps/rejected": -1.8876280784606934, "loss": 0.4447, "nll_loss": 0.11115758121013641, "rewards/accuracies": 1.0, "rewards/chosen": -2.9686227208003402e-05, "rewards/margins": 0.18873311579227448, "rewards/rejected": -0.1887628138065338, "step": 9379 }, { "epoch": 6.48686030428769, "grad_norm": 4.4276204109191895, "learning_rate": 1.9517442753957276e-05, "log_odds_chosen": 10.485124588012695, "log_odds_ratio": -0.00014691927935928106, "logits/chosen": -0.5485713481903076, "logits/rejected": -0.5030081868171692, "logps/chosen": -0.00014963530702516437, "logps/rejected": -2.009016990661621, "loss": 0.4577, "nll_loss": 0.11441591382026672, "rewards/accuracies": 1.0, "rewards/chosen": -1.4963530702516437e-05, "rewards/margins": 0.20088671147823334, "rewards/rejected": -0.20090168714523315, "step": 9380 }, { "epoch": 6.487551867219917, "grad_norm": 13.013057708740234, "learning_rate": 1.951360073766713e-05, "log_odds_chosen": 10.500027656555176, "log_odds_ratio": -0.0002235960419056937, "logits/chosen": -0.01233922690153122, "logits/rejected": -0.05886126682162285, "logps/chosen": -0.00036796220229007304, "logps/rejected": -2.2961668968200684, "loss": 0.7453, "nll_loss": 0.18629077076911926, "rewards/accuracies": 1.0, "rewards/chosen": -3.679622386698611e-05, "rewards/margins": 0.22957991063594818, "rewards/rejected": -0.2296167016029358, "step": 9381 }, { "epoch": 6.488243430152144, "grad_norm": 5.009751319885254, "learning_rate": 1.950975872137698e-05, "log_odds_chosen": 11.303783416748047, "log_odds_ratio": -2.4028908228501678e-05, "logits/chosen": -0.00550035759806633, "logits/rejected": -0.06772229075431824, "logps/chosen": -0.00017578649567440152, "logps/rejected": -2.637251853942871, "loss": 0.5605, "nll_loss": 0.14013239741325378, "rewards/accuracies": 1.0, "rewards/chosen": -1.757864811224863e-05, "rewards/margins": 0.26370760798454285, "rewards/rejected": -0.2637251913547516, "step": 9382 }, { "epoch": 6.488934993084371, "grad_norm": 4.654784679412842, "learning_rate": 1.950591670508683e-05, "log_odds_chosen": 10.375570297241211, "log_odds_ratio": -8.63418317749165e-05, "logits/chosen": -0.049294471740722656, "logits/rejected": -0.10794499516487122, "logps/chosen": -0.000227405660552904, "logps/rejected": -1.9614112377166748, "loss": 0.7354, "nll_loss": 0.18383842706680298, "rewards/accuracies": 1.0, "rewards/chosen": -2.2740567146684043e-05, "rewards/margins": 0.19611838459968567, "rewards/rejected": -0.19614112377166748, "step": 9383 }, { "epoch": 6.4896265560165975, "grad_norm": 6.240286350250244, "learning_rate": 1.9502074688796682e-05, "log_odds_chosen": 11.334842681884766, "log_odds_ratio": -2.722295539570041e-05, "logits/chosen": 0.04571309685707092, "logits/rejected": -0.09449347853660583, "logps/chosen": -0.00015503622125834227, "logps/rejected": -2.295506477355957, "loss": 0.8953, "nll_loss": 0.2238202542066574, "rewards/accuracies": 1.0, "rewards/chosen": -1.5503621398238465e-05, "rewards/margins": 0.22953513264656067, "rewards/rejected": -0.22955065965652466, "step": 9384 }, { "epoch": 6.490318118948824, "grad_norm": 6.103881359100342, "learning_rate": 1.9498232672506535e-05, "log_odds_chosen": 10.914320945739746, "log_odds_ratio": -5.245122156338766e-05, "logits/chosen": -0.478939026594162, "logits/rejected": -0.47022366523742676, "logps/chosen": -0.00027636217419058084, "logps/rejected": -2.0653438568115234, "loss": 0.5338, "nll_loss": 0.13343766331672668, "rewards/accuracies": 1.0, "rewards/chosen": -2.7636218874249607e-05, "rewards/margins": 0.20650672912597656, "rewards/rejected": -0.20653435587882996, "step": 9385 }, { "epoch": 6.491009681881051, "grad_norm": 11.520191192626953, "learning_rate": 1.9494390656216384e-05, "log_odds_chosen": 10.832954406738281, "log_odds_ratio": -6.274733459576964e-05, "logits/chosen": -0.5592326521873474, "logits/rejected": -0.5868933796882629, "logps/chosen": -0.00022971341968514025, "logps/rejected": -2.2540066242218018, "loss": 0.5466, "nll_loss": 0.13664284348487854, "rewards/accuracies": 1.0, "rewards/chosen": -2.2971342332311906e-05, "rewards/margins": 0.22537770867347717, "rewards/rejected": -0.2254006564617157, "step": 9386 }, { "epoch": 6.491701244813278, "grad_norm": 6.2752485275268555, "learning_rate": 1.9490548639926233e-05, "log_odds_chosen": 10.993319511413574, "log_odds_ratio": -0.00043842248851433396, "logits/chosen": -0.27941277623176575, "logits/rejected": -0.2629889249801636, "logps/chosen": -0.00019828768563456833, "logps/rejected": -2.2103896141052246, "loss": 0.9143, "nll_loss": 0.22852730751037598, "rewards/accuracies": 1.0, "rewards/chosen": -1.9828770746244118e-05, "rewards/margins": 0.22101914882659912, "rewards/rejected": -0.22103895246982574, "step": 9387 }, { "epoch": 6.492392807745505, "grad_norm": 7.458618640899658, "learning_rate": 1.9486706623636085e-05, "log_odds_chosen": 11.85312557220459, "log_odds_ratio": -3.386929529369809e-05, "logits/chosen": -0.12793764472007751, "logits/rejected": -0.13494420051574707, "logps/chosen": -9.064783080248162e-05, "logps/rejected": -2.489367961883545, "loss": 0.5406, "nll_loss": 0.13514791429042816, "rewards/accuracies": 1.0, "rewards/chosen": -9.064782716450281e-06, "rewards/margins": 0.2489277422428131, "rewards/rejected": -0.24893681704998016, "step": 9388 }, { "epoch": 6.493084370677732, "grad_norm": 5.261797904968262, "learning_rate": 1.9482864607345934e-05, "log_odds_chosen": 10.775561332702637, "log_odds_ratio": -0.0002782086085062474, "logits/chosen": -0.1607806533575058, "logits/rejected": -0.17884010076522827, "logps/chosen": -0.00014673758414573967, "logps/rejected": -1.7819510698318481, "loss": 0.5429, "nll_loss": 0.13568845391273499, "rewards/accuracies": 1.0, "rewards/chosen": -1.4673758414573967e-05, "rewards/margins": 0.17818044126033783, "rewards/rejected": -0.17819511890411377, "step": 9389 }, { "epoch": 6.4937759336099585, "grad_norm": 4.2047953605651855, "learning_rate": 1.9479022591055787e-05, "log_odds_chosen": 10.238554000854492, "log_odds_ratio": -0.00011502691631903872, "logits/chosen": -0.3183709979057312, "logits/rejected": -0.38084205985069275, "logps/chosen": -0.0003533228882588446, "logps/rejected": -2.0055973529815674, "loss": 1.0841, "nll_loss": 0.2710167467594147, "rewards/accuracies": 1.0, "rewards/chosen": -3.533228664309718e-05, "rewards/margins": 0.20052438974380493, "rewards/rejected": -0.20055972039699554, "step": 9390 }, { "epoch": 6.494467496542185, "grad_norm": 6.484034061431885, "learning_rate": 1.947518057476564e-05, "log_odds_chosen": 9.686558723449707, "log_odds_ratio": -0.00015934224938973784, "logits/chosen": -0.31236618757247925, "logits/rejected": -0.3524632155895233, "logps/chosen": -0.0008042749250307679, "logps/rejected": -2.3469886779785156, "loss": 0.4975, "nll_loss": 0.12434867024421692, "rewards/accuracies": 1.0, "rewards/chosen": -8.042750414460897e-05, "rewards/margins": 0.23461845517158508, "rewards/rejected": -0.23469887673854828, "step": 9391 }, { "epoch": 6.495159059474412, "grad_norm": 4.563853740692139, "learning_rate": 1.947133855847549e-05, "log_odds_chosen": 10.755139350891113, "log_odds_ratio": -7.848611858207732e-05, "logits/chosen": -0.3584415912628174, "logits/rejected": -0.4386383295059204, "logps/chosen": -0.00026066272403113544, "logps/rejected": -2.410828113555908, "loss": 0.5091, "nll_loss": 0.12725502252578735, "rewards/accuracies": 1.0, "rewards/chosen": -2.6066272766911425e-05, "rewards/margins": 0.24105677008628845, "rewards/rejected": -0.2410828173160553, "step": 9392 }, { "epoch": 6.495850622406639, "grad_norm": 7.453660011291504, "learning_rate": 1.946749654218534e-05, "log_odds_chosen": 10.772347450256348, "log_odds_ratio": -9.090732055483386e-05, "logits/chosen": -0.3762636184692383, "logits/rejected": -0.4067800045013428, "logps/chosen": -0.0003499372396618128, "logps/rejected": -2.7631566524505615, "loss": 0.8204, "nll_loss": 0.2051025927066803, "rewards/accuracies": 1.0, "rewards/chosen": -3.49937254213728e-05, "rewards/margins": 0.2762807011604309, "rewards/rejected": -0.27631568908691406, "step": 9393 }, { "epoch": 6.496542185338866, "grad_norm": 5.516212463378906, "learning_rate": 1.9463654525895193e-05, "log_odds_chosen": 12.666479110717773, "log_odds_ratio": -7.51461811887566e-06, "logits/chosen": -0.1613074094057083, "logits/rejected": -0.21167302131652832, "logps/chosen": -0.00015206293028313667, "logps/rejected": -3.4487061500549316, "loss": 0.5949, "nll_loss": 0.1487281173467636, "rewards/accuracies": 1.0, "rewards/chosen": -1.5206292118818965e-05, "rewards/margins": 0.3448554277420044, "rewards/rejected": -0.3448706269264221, "step": 9394 }, { "epoch": 6.497233748271093, "grad_norm": 5.250306606292725, "learning_rate": 1.9459812509605042e-05, "log_odds_chosen": 10.824128150939941, "log_odds_ratio": -3.159879270242527e-05, "logits/chosen": -0.18953561782836914, "logits/rejected": -0.21519282460212708, "logps/chosen": -0.00020199205027893186, "logps/rejected": -2.2640249729156494, "loss": 0.5068, "nll_loss": 0.12670867145061493, "rewards/accuracies": 1.0, "rewards/chosen": -2.0199206119286828e-05, "rewards/margins": 0.226382315158844, "rewards/rejected": -0.22640252113342285, "step": 9395 }, { "epoch": 6.4979253112033195, "grad_norm": 5.880115509033203, "learning_rate": 1.945597049331489e-05, "log_odds_chosen": 10.155384063720703, "log_odds_ratio": -0.00017642477178014815, "logits/chosen": -0.5245303511619568, "logits/rejected": -0.5084295272827148, "logps/chosen": -0.00028953165747225285, "logps/rejected": -1.9637598991394043, "loss": 0.664, "nll_loss": 0.16598252952098846, "rewards/accuracies": 1.0, "rewards/chosen": -2.8953167202416807e-05, "rewards/margins": 0.19634705781936646, "rewards/rejected": -0.1963759958744049, "step": 9396 }, { "epoch": 6.498616874135546, "grad_norm": 12.754213333129883, "learning_rate": 1.9452128477024744e-05, "log_odds_chosen": 10.310648918151855, "log_odds_ratio": -0.0002212212566519156, "logits/chosen": -0.7641613483428955, "logits/rejected": -0.6988776922225952, "logps/chosen": -0.00037628383142873645, "logps/rejected": -1.668961763381958, "loss": 0.5353, "nll_loss": 0.13379371166229248, "rewards/accuracies": 1.0, "rewards/chosen": -3.7628382415277883e-05, "rewards/margins": 0.16685855388641357, "rewards/rejected": -0.16689617931842804, "step": 9397 }, { "epoch": 6.499308437067773, "grad_norm": 7.186704635620117, "learning_rate": 1.9448286460734593e-05, "log_odds_chosen": 10.572660446166992, "log_odds_ratio": -0.00016197163495235145, "logits/chosen": -0.36398768424987793, "logits/rejected": -0.2695331871509552, "logps/chosen": -0.0002569742500782013, "logps/rejected": -2.236602306365967, "loss": 0.741, "nll_loss": 0.18523554503917694, "rewards/accuracies": 1.0, "rewards/chosen": -2.5697427190607414e-05, "rewards/margins": 0.2236345410346985, "rewards/rejected": -0.22366023063659668, "step": 9398 }, { "epoch": 6.5, "grad_norm": 7.115647792816162, "learning_rate": 1.9444444444444445e-05, "log_odds_chosen": 11.01469898223877, "log_odds_ratio": -0.0006828614859841764, "logits/chosen": -0.5176241397857666, "logits/rejected": -0.5681507587432861, "logps/chosen": -0.0010533032473176718, "logps/rejected": -2.80560302734375, "loss": 0.9728, "nll_loss": 0.24311989545822144, "rewards/accuracies": 1.0, "rewards/chosen": -0.00010533032036619261, "rewards/margins": 0.28045496344566345, "rewards/rejected": -0.28056028485298157, "step": 9399 }, { "epoch": 6.500691562932227, "grad_norm": 6.133714199066162, "learning_rate": 1.9440602428154298e-05, "log_odds_chosen": 10.151878356933594, "log_odds_ratio": -0.00010759021097328514, "logits/chosen": -0.2427067905664444, "logits/rejected": -0.35290953516960144, "logps/chosen": -0.000351759692421183, "logps/rejected": -1.7226321697235107, "loss": 0.6078, "nll_loss": 0.15194585919380188, "rewards/accuracies": 1.0, "rewards/chosen": -3.517596996971406e-05, "rewards/margins": 0.17222803831100464, "rewards/rejected": -0.1722632348537445, "step": 9400 }, { "epoch": 6.501383125864454, "grad_norm": 6.098109722137451, "learning_rate": 1.9436760411864147e-05, "log_odds_chosen": 10.773904800415039, "log_odds_ratio": -0.00040848561911843717, "logits/chosen": -0.5251795053482056, "logits/rejected": -0.5544536113739014, "logps/chosen": -0.0026470485609024763, "logps/rejected": -2.45532488822937, "loss": 0.5156, "nll_loss": 0.12887021899223328, "rewards/accuracies": 1.0, "rewards/chosen": -0.0002647048677317798, "rewards/margins": 0.24526777863502502, "rewards/rejected": -0.24553249776363373, "step": 9401 }, { "epoch": 6.5020746887966805, "grad_norm": 6.668719291687012, "learning_rate": 1.9432918395574e-05, "log_odds_chosen": 10.610265731811523, "log_odds_ratio": -5.127752956468612e-05, "logits/chosen": -0.6081045269966125, "logits/rejected": -0.5891537666320801, "logps/chosen": -0.00013224473514128476, "logps/rejected": -1.805846929550171, "loss": 0.5718, "nll_loss": 0.1429474651813507, "rewards/accuracies": 1.0, "rewards/chosen": -1.3224474059825297e-05, "rewards/margins": 0.18057146668434143, "rewards/rejected": -0.18058468401432037, "step": 9402 }, { "epoch": 6.502766251728907, "grad_norm": 6.99521017074585, "learning_rate": 1.9429076379283852e-05, "log_odds_chosen": 10.94999885559082, "log_odds_ratio": -4.3023428588639945e-05, "logits/chosen": -0.1102178692817688, "logits/rejected": -0.10561814159154892, "logps/chosen": -0.00015922827878966928, "logps/rejected": -2.272221088409424, "loss": 0.498, "nll_loss": 0.1244877278804779, "rewards/accuracies": 1.0, "rewards/chosen": -1.5922827515169047e-05, "rewards/margins": 0.22720618546009064, "rewards/rejected": -0.22722211480140686, "step": 9403 }, { "epoch": 6.503457814661134, "grad_norm": 4.8532304763793945, "learning_rate": 1.94252343629937e-05, "log_odds_chosen": 10.020861625671387, "log_odds_ratio": -0.0003055678680539131, "logits/chosen": -0.24106940627098083, "logits/rejected": -0.23849815130233765, "logps/chosen": -0.0009358559618704021, "logps/rejected": -2.470724105834961, "loss": 0.4524, "nll_loss": 0.11306346207857132, "rewards/accuracies": 1.0, "rewards/chosen": -9.358559327665716e-05, "rewards/margins": 0.24697881937026978, "rewards/rejected": -0.24707241356372833, "step": 9404 }, { "epoch": 6.504149377593361, "grad_norm": 6.786164283752441, "learning_rate": 1.942139234670355e-05, "log_odds_chosen": 10.718588829040527, "log_odds_ratio": -9.22972394619137e-05, "logits/chosen": -0.24664977192878723, "logits/rejected": -0.4017777442932129, "logps/chosen": -0.00023182231234386563, "logps/rejected": -2.2096641063690186, "loss": 0.5772, "nll_loss": 0.14429354667663574, "rewards/accuracies": 1.0, "rewards/chosen": -2.3182230506790802e-05, "rewards/margins": 0.22094322741031647, "rewards/rejected": -0.2209664285182953, "step": 9405 }, { "epoch": 6.504840940525588, "grad_norm": 5.937921524047852, "learning_rate": 1.9417550330413402e-05, "log_odds_chosen": 10.956061363220215, "log_odds_ratio": -5.964807860436849e-05, "logits/chosen": -0.31870102882385254, "logits/rejected": -0.4643133878707886, "logps/chosen": -0.0002928538015112281, "logps/rejected": -2.033576011657715, "loss": 0.5851, "nll_loss": 0.14626480638980865, "rewards/accuracies": 1.0, "rewards/chosen": -2.928538015112281e-05, "rewards/margins": 0.20332831144332886, "rewards/rejected": -0.20335760712623596, "step": 9406 }, { "epoch": 6.505532503457815, "grad_norm": 5.6538825035095215, "learning_rate": 1.941370831412325e-05, "log_odds_chosen": 10.884583473205566, "log_odds_ratio": -4.456051465240307e-05, "logits/chosen": -0.44369328022003174, "logits/rejected": -0.49158185720443726, "logps/chosen": -0.0009643149096518755, "logps/rejected": -2.6121954917907715, "loss": 0.8599, "nll_loss": 0.21495816111564636, "rewards/accuracies": 1.0, "rewards/chosen": -9.643149678595364e-05, "rewards/margins": 0.2611231207847595, "rewards/rejected": -0.2612195611000061, "step": 9407 }, { "epoch": 6.5062240663900415, "grad_norm": 5.787775993347168, "learning_rate": 1.9409866297833104e-05, "log_odds_chosen": 10.493619918823242, "log_odds_ratio": -0.00011466229625511914, "logits/chosen": -0.7101765871047974, "logits/rejected": -0.7864276170730591, "logps/chosen": -0.00018912236555479467, "logps/rejected": -2.0413930416107178, "loss": 0.5214, "nll_loss": 0.13034909963607788, "rewards/accuracies": 1.0, "rewards/chosen": -1.8912236555479467e-05, "rewards/margins": 0.20412039756774902, "rewards/rejected": -0.20413930714130402, "step": 9408 }, { "epoch": 6.506915629322268, "grad_norm": 3.5225272178649902, "learning_rate": 1.9406024281542956e-05, "log_odds_chosen": 10.417261123657227, "log_odds_ratio": -0.0001832667039707303, "logits/chosen": -0.47410666942596436, "logits/rejected": -0.449399471282959, "logps/chosen": -0.00038906122790649533, "logps/rejected": -2.056983232498169, "loss": 0.3241, "nll_loss": 0.08101370185613632, "rewards/accuracies": 1.0, "rewards/chosen": -3.8906124245841056e-05, "rewards/margins": 0.205659419298172, "rewards/rejected": -0.20569832623004913, "step": 9409 }, { "epoch": 6.507607192254495, "grad_norm": 5.769867897033691, "learning_rate": 1.9402182265252805e-05, "log_odds_chosen": 10.512954711914062, "log_odds_ratio": -0.00010581602691672742, "logits/chosen": -0.28758078813552856, "logits/rejected": -0.2849484086036682, "logps/chosen": -0.00046525232028216124, "logps/rejected": -2.6369874477386475, "loss": 0.808, "nll_loss": 0.20199567079544067, "rewards/accuracies": 1.0, "rewards/chosen": -4.65252305730246e-05, "rewards/margins": 0.2636522352695465, "rewards/rejected": -0.2636987566947937, "step": 9410 }, { "epoch": 6.508298755186722, "grad_norm": 7.070125579833984, "learning_rate": 1.9398340248962658e-05, "log_odds_chosen": 10.729057312011719, "log_odds_ratio": -6.643655797233805e-05, "logits/chosen": -0.6054902076721191, "logits/rejected": -0.48226070404052734, "logps/chosen": -0.0008427513530477881, "logps/rejected": -2.409019947052002, "loss": 0.4645, "nll_loss": 0.11612001061439514, "rewards/accuracies": 1.0, "rewards/chosen": -8.427513967035338e-05, "rewards/margins": 0.24081772565841675, "rewards/rejected": -0.24090200662612915, "step": 9411 }, { "epoch": 6.508990318118949, "grad_norm": 6.108166217803955, "learning_rate": 1.9394498232672507e-05, "log_odds_chosen": 10.982990264892578, "log_odds_ratio": -3.755984653253108e-05, "logits/chosen": -0.3624812364578247, "logits/rejected": -0.5092224478721619, "logps/chosen": -0.0005596339469775558, "logps/rejected": -2.786410093307495, "loss": 0.5912, "nll_loss": 0.14779676496982574, "rewards/accuracies": 1.0, "rewards/chosen": -5.5963395425351337e-05, "rewards/margins": 0.2785850465297699, "rewards/rejected": -0.2786409854888916, "step": 9412 }, { "epoch": 6.509681881051176, "grad_norm": 10.15151309967041, "learning_rate": 1.939065621638236e-05, "log_odds_chosen": 10.644213676452637, "log_odds_ratio": -6.074633347452618e-05, "logits/chosen": -0.6252480149269104, "logits/rejected": -0.7317550182342529, "logps/chosen": -0.0002546596515458077, "logps/rejected": -2.0850729942321777, "loss": 0.5701, "nll_loss": 0.14251932501792908, "rewards/accuracies": 1.0, "rewards/chosen": -2.546596442698501e-05, "rewards/margins": 0.20848184823989868, "rewards/rejected": -0.20850731432437897, "step": 9413 }, { "epoch": 6.5103734439834025, "grad_norm": 10.627796173095703, "learning_rate": 1.938681420009221e-05, "log_odds_chosen": 11.714271545410156, "log_odds_ratio": -1.4582346921088174e-05, "logits/chosen": -0.08012107014656067, "logits/rejected": -0.19204331934452057, "logps/chosen": -0.00015324132982641459, "logps/rejected": -2.5697147846221924, "loss": 0.7031, "nll_loss": 0.1757819950580597, "rewards/accuracies": 1.0, "rewards/chosen": -1.532413443783298e-05, "rewards/margins": 0.25695616006851196, "rewards/rejected": -0.25697147846221924, "step": 9414 }, { "epoch": 6.511065006915629, "grad_norm": 5.780624866485596, "learning_rate": 1.9382972183802057e-05, "log_odds_chosen": 8.441697120666504, "log_odds_ratio": -0.002654177835211158, "logits/chosen": -0.4831072688102722, "logits/rejected": -0.44240304827690125, "logps/chosen": -0.002396452473476529, "logps/rejected": -1.4833424091339111, "loss": 1.112, "nll_loss": 0.27773788571357727, "rewards/accuracies": 1.0, "rewards/chosen": -0.00023964526189956814, "rewards/margins": 0.14809459447860718, "rewards/rejected": -0.14833424985408783, "step": 9415 }, { "epoch": 6.511756569847856, "grad_norm": 5.6088643074035645, "learning_rate": 1.937913016751191e-05, "log_odds_chosen": 11.039244651794434, "log_odds_ratio": -9.301940008299425e-05, "logits/chosen": -0.23438656330108643, "logits/rejected": -0.32080191373825073, "logps/chosen": -0.00019122361845802516, "logps/rejected": -2.171100616455078, "loss": 0.4477, "nll_loss": 0.11190492659807205, "rewards/accuracies": 1.0, "rewards/chosen": -1.912236439238768e-05, "rewards/margins": 0.2170909196138382, "rewards/rejected": -0.2171100378036499, "step": 9416 }, { "epoch": 6.512448132780083, "grad_norm": 6.845349311828613, "learning_rate": 1.9375288151221762e-05, "log_odds_chosen": 10.662532806396484, "log_odds_ratio": -3.290931636001915e-05, "logits/chosen": -0.48565664887428284, "logits/rejected": -0.6462719440460205, "logps/chosen": -0.00023579117259941995, "logps/rejected": -2.098126173019409, "loss": 0.6561, "nll_loss": 0.16403229534626007, "rewards/accuracies": 1.0, "rewards/chosen": -2.357911944272928e-05, "rewards/margins": 0.20978905260562897, "rewards/rejected": -0.20981262624263763, "step": 9417 }, { "epoch": 6.51313969571231, "grad_norm": 9.37191390991211, "learning_rate": 1.937144613493161e-05, "log_odds_chosen": 9.508430480957031, "log_odds_ratio": -0.003604255151003599, "logits/chosen": -0.48413991928100586, "logits/rejected": -0.5383449792861938, "logps/chosen": -0.01737593300640583, "logps/rejected": -2.10723876953125, "loss": 0.4074, "nll_loss": 0.1014985591173172, "rewards/accuracies": 1.0, "rewards/chosen": -0.0017375932075083256, "rewards/margins": 0.2089862823486328, "rewards/rejected": -0.210723876953125, "step": 9418 }, { "epoch": 6.513831258644537, "grad_norm": 7.972471237182617, "learning_rate": 1.9367604118641464e-05, "log_odds_chosen": 10.649456977844238, "log_odds_ratio": -0.0001401927729602903, "logits/chosen": -0.40515631437301636, "logits/rejected": -0.45684176683425903, "logps/chosen": -0.00021025318710599095, "logps/rejected": -2.0950050354003906, "loss": 0.5264, "nll_loss": 0.13159304857254028, "rewards/accuracies": 1.0, "rewards/chosen": -2.102531652781181e-05, "rewards/margins": 0.2094794660806656, "rewards/rejected": -0.2095005065202713, "step": 9419 }, { "epoch": 6.514522821576763, "grad_norm": 5.934338092803955, "learning_rate": 1.9363762102351316e-05, "log_odds_chosen": 10.455333709716797, "log_odds_ratio": -0.00029340438777580857, "logits/chosen": -0.5644535422325134, "logits/rejected": -0.6517401933670044, "logps/chosen": -0.0014436359051615, "logps/rejected": -2.3486440181732178, "loss": 0.432, "nll_loss": 0.10797516256570816, "rewards/accuracies": 1.0, "rewards/chosen": -0.0001443635846953839, "rewards/margins": 0.23472005128860474, "rewards/rejected": -0.23486441373825073, "step": 9420 }, { "epoch": 6.51521438450899, "grad_norm": 6.147100448608398, "learning_rate": 1.9359920086061165e-05, "log_odds_chosen": 8.786388397216797, "log_odds_ratio": -0.0012116666184738278, "logits/chosen": -0.4684923589229584, "logits/rejected": -0.490852415561676, "logps/chosen": -0.001625780132599175, "logps/rejected": -1.5735567808151245, "loss": 0.7118, "nll_loss": 0.17783603072166443, "rewards/accuracies": 1.0, "rewards/chosen": -0.0001625780132599175, "rewards/margins": 0.15719309449195862, "rewards/rejected": -0.1573556810617447, "step": 9421 }, { "epoch": 6.515905947441217, "grad_norm": 6.892473220825195, "learning_rate": 1.9356078069771018e-05, "log_odds_chosen": 10.530393600463867, "log_odds_ratio": -0.00014122716675046831, "logits/chosen": -0.6844215393066406, "logits/rejected": -0.7224639654159546, "logps/chosen": -0.0002905130968429148, "logps/rejected": -2.489344358444214, "loss": 0.5674, "nll_loss": 0.14183634519577026, "rewards/accuracies": 1.0, "rewards/chosen": -2.905130895669572e-05, "rewards/margins": 0.24890540540218353, "rewards/rejected": -0.24893444776535034, "step": 9422 }, { "epoch": 6.516597510373444, "grad_norm": 4.591324329376221, "learning_rate": 1.9352236053480867e-05, "log_odds_chosen": 11.390593528747559, "log_odds_ratio": -1.5925519619486295e-05, "logits/chosen": -0.5548987984657288, "logits/rejected": -0.5801922678947449, "logps/chosen": -0.0001749470247887075, "logps/rejected": -2.1435866355895996, "loss": 0.4582, "nll_loss": 0.11454764008522034, "rewards/accuracies": 1.0, "rewards/chosen": -1.749470357026439e-05, "rewards/margins": 0.2143411636352539, "rewards/rejected": -0.21435865759849548, "step": 9423 }, { "epoch": 6.517289073305671, "grad_norm": 6.513183116912842, "learning_rate": 1.9348394037190716e-05, "log_odds_chosen": 9.555843353271484, "log_odds_ratio": -0.0003111936675850302, "logits/chosen": -0.4625076949596405, "logits/rejected": -0.4753469228744507, "logps/chosen": -0.000441780430264771, "logps/rejected": -1.761674404144287, "loss": 0.527, "nll_loss": 0.13172942399978638, "rewards/accuracies": 1.0, "rewards/chosen": -4.41780430264771e-05, "rewards/margins": 0.1761232614517212, "rewards/rejected": -0.17616745829582214, "step": 9424 }, { "epoch": 6.517980636237898, "grad_norm": 4.426665782928467, "learning_rate": 1.9344552020900568e-05, "log_odds_chosen": 10.659873962402344, "log_odds_ratio": -4.747790808323771e-05, "logits/chosen": -0.4140737056732178, "logits/rejected": -0.3993862271308899, "logps/chosen": -0.00014404115790966898, "logps/rejected": -1.736601710319519, "loss": 0.6718, "nll_loss": 0.16794097423553467, "rewards/accuracies": 1.0, "rewards/chosen": -1.440411688236054e-05, "rewards/margins": 0.1736457794904709, "rewards/rejected": -0.17366017401218414, "step": 9425 }, { "epoch": 6.518672199170124, "grad_norm": 9.013787269592285, "learning_rate": 1.934071000461042e-05, "log_odds_chosen": 10.727466583251953, "log_odds_ratio": -8.105146116577089e-05, "logits/chosen": -0.25618284940719604, "logits/rejected": -0.2904587984085083, "logps/chosen": -0.0009987832745537162, "logps/rejected": -2.389343738555908, "loss": 0.5929, "nll_loss": 0.1482187956571579, "rewards/accuracies": 1.0, "rewards/chosen": -9.987832891056314e-05, "rewards/margins": 0.23883448541164398, "rewards/rejected": -0.23893436789512634, "step": 9426 }, { "epoch": 6.519363762102351, "grad_norm": 6.031515598297119, "learning_rate": 1.933686798832027e-05, "log_odds_chosen": 11.05801773071289, "log_odds_ratio": -3.0185055948095396e-05, "logits/chosen": -0.46399399638175964, "logits/rejected": -0.48402759432792664, "logps/chosen": -0.0002074514195555821, "logps/rejected": -2.437533378601074, "loss": 0.4859, "nll_loss": 0.12146744132041931, "rewards/accuracies": 1.0, "rewards/chosen": -2.074514122796245e-05, "rewards/margins": 0.24373260140419006, "rewards/rejected": -0.2437533587217331, "step": 9427 }, { "epoch": 6.520055325034578, "grad_norm": 5.211343288421631, "learning_rate": 1.9333025972030122e-05, "log_odds_chosen": 10.822071075439453, "log_odds_ratio": -4.411491681821644e-05, "logits/chosen": -0.7721026539802551, "logits/rejected": -0.7612123489379883, "logps/chosen": -0.00019037112360820174, "logps/rejected": -2.127896785736084, "loss": 0.3862, "nll_loss": 0.09655695408582687, "rewards/accuracies": 1.0, "rewards/chosen": -1.9037111997022294e-05, "rewards/margins": 0.21277067065238953, "rewards/rejected": -0.21278971433639526, "step": 9428 }, { "epoch": 6.520746887966805, "grad_norm": 4.8389105796813965, "learning_rate": 1.9329183955739975e-05, "log_odds_chosen": 10.801448822021484, "log_odds_ratio": -3.792867937590927e-05, "logits/chosen": -0.21949411928653717, "logits/rejected": -0.2931068539619446, "logps/chosen": -0.00027945160400122404, "logps/rejected": -2.0993459224700928, "loss": 0.5517, "nll_loss": 0.13792496919631958, "rewards/accuracies": 1.0, "rewards/chosen": -2.7945161491516046e-05, "rewards/margins": 0.2099066525697708, "rewards/rejected": -0.20993459224700928, "step": 9429 }, { "epoch": 6.521438450899032, "grad_norm": 5.096884727478027, "learning_rate": 1.9325341939449824e-05, "log_odds_chosen": 10.270853996276855, "log_odds_ratio": -0.0004896325990557671, "logits/chosen": -0.4335726201534271, "logits/rejected": -0.4108191728591919, "logps/chosen": -0.0069307005032896996, "logps/rejected": -2.498528480529785, "loss": 0.6719, "nll_loss": 0.16792647540569305, "rewards/accuracies": 1.0, "rewards/chosen": -0.0006930700037628412, "rewards/margins": 0.2491597831249237, "rewards/rejected": -0.24985285103321075, "step": 9430 }, { "epoch": 6.522130013831259, "grad_norm": 7.353761196136475, "learning_rate": 1.9321499923159676e-05, "log_odds_chosen": 10.691258430480957, "log_odds_ratio": -7.414798892568797e-05, "logits/chosen": -0.3759855031967163, "logits/rejected": -0.4204534590244293, "logps/chosen": -0.00023737037554383278, "logps/rejected": -2.425762891769409, "loss": 0.6447, "nll_loss": 0.16116894781589508, "rewards/accuracies": 1.0, "rewards/chosen": -2.3737036826787516e-05, "rewards/margins": 0.24255254864692688, "rewards/rejected": -0.24257630109786987, "step": 9431 }, { "epoch": 6.522821576763485, "grad_norm": 4.431156635284424, "learning_rate": 1.9317657906869525e-05, "log_odds_chosen": 10.560503005981445, "log_odds_ratio": -5.748879993916489e-05, "logits/chosen": -0.09382009506225586, "logits/rejected": -0.1083650067448616, "logps/chosen": -0.0002093408547807485, "logps/rejected": -1.8000552654266357, "loss": 0.4804, "nll_loss": 0.1201024129986763, "rewards/accuracies": 1.0, "rewards/chosen": -2.0934085114276968e-05, "rewards/margins": 0.17998458445072174, "rewards/rejected": -0.1800055205821991, "step": 9432 }, { "epoch": 6.523513139695712, "grad_norm": 8.661432266235352, "learning_rate": 1.9313815890579374e-05, "log_odds_chosen": 10.021512985229492, "log_odds_ratio": -0.00027715275064110756, "logits/chosen": 0.011828139424324036, "logits/rejected": -0.0077832043170928955, "logps/chosen": -0.0018440388375893235, "logps/rejected": -2.6286373138427734, "loss": 0.869, "nll_loss": 0.21721133589744568, "rewards/accuracies": 1.0, "rewards/chosen": -0.0001844038924900815, "rewards/margins": 0.2626793086528778, "rewards/rejected": -0.2628636956214905, "step": 9433 }, { "epoch": 6.524204702627939, "grad_norm": 8.74638557434082, "learning_rate": 1.9309973874289227e-05, "log_odds_chosen": 10.317972183227539, "log_odds_ratio": -0.00020331793348304927, "logits/chosen": -0.11461025476455688, "logits/rejected": -0.2297324389219284, "logps/chosen": -0.0001929646823555231, "logps/rejected": -2.06437349319458, "loss": 0.8159, "nll_loss": 0.20395462214946747, "rewards/accuracies": 1.0, "rewards/chosen": -1.929646714415867e-05, "rewards/margins": 0.20641806721687317, "rewards/rejected": -0.2064373791217804, "step": 9434 }, { "epoch": 6.524896265560166, "grad_norm": 9.663103103637695, "learning_rate": 1.930613185799908e-05, "log_odds_chosen": 10.05333423614502, "log_odds_ratio": -0.00019566371338441968, "logits/chosen": 0.023167580366134644, "logits/rejected": -0.003208555281162262, "logps/chosen": -0.0008226472418755293, "logps/rejected": -1.9805678129196167, "loss": 0.6815, "nll_loss": 0.17034311592578888, "rewards/accuracies": 1.0, "rewards/chosen": -8.226472709793597e-05, "rewards/margins": 0.19797450304031372, "rewards/rejected": -0.19805677235126495, "step": 9435 }, { "epoch": 6.525587828492393, "grad_norm": 5.263288974761963, "learning_rate": 1.9302289841708928e-05, "log_odds_chosen": 10.60208797454834, "log_odds_ratio": -9.669522114563733e-05, "logits/chosen": -0.3702765107154846, "logits/rejected": -0.4385896623134613, "logps/chosen": -0.0004391923430375755, "logps/rejected": -2.460280418395996, "loss": 0.6826, "nll_loss": 0.1706320345401764, "rewards/accuracies": 1.0, "rewards/chosen": -4.391923357616179e-05, "rewards/margins": 0.24598410725593567, "rewards/rejected": -0.24602803587913513, "step": 9436 }, { "epoch": 6.5262793914246195, "grad_norm": 4.888511657714844, "learning_rate": 1.929844782541878e-05, "log_odds_chosen": 10.464603424072266, "log_odds_ratio": -8.840052760206163e-05, "logits/chosen": -0.7394514083862305, "logits/rejected": -0.7927672863006592, "logps/chosen": -0.00026991096092388034, "logps/rejected": -1.7386510372161865, "loss": 0.3981, "nll_loss": 0.09950944036245346, "rewards/accuracies": 1.0, "rewards/chosen": -2.6991097911377437e-05, "rewards/margins": 0.17383810877799988, "rewards/rejected": -0.17386510968208313, "step": 9437 }, { "epoch": 6.526970954356846, "grad_norm": 5.329433441162109, "learning_rate": 1.9294605809128633e-05, "log_odds_chosen": 10.825544357299805, "log_odds_ratio": -6.626916729146615e-05, "logits/chosen": -0.028324007987976074, "logits/rejected": -0.0745202898979187, "logps/chosen": -0.00023314285499509424, "logps/rejected": -2.1176702976226807, "loss": 0.746, "nll_loss": 0.18650339543819427, "rewards/accuracies": 1.0, "rewards/chosen": -2.3314285499509424e-05, "rewards/margins": 0.21174371242523193, "rewards/rejected": -0.2117670178413391, "step": 9438 }, { "epoch": 6.527662517289073, "grad_norm": 6.633513450622559, "learning_rate": 1.9290763792838482e-05, "log_odds_chosen": 10.572284698486328, "log_odds_ratio": -9.137169399764389e-05, "logits/chosen": -0.5419729351997375, "logits/rejected": -0.650861918926239, "logps/chosen": -0.00021510363148991019, "logps/rejected": -2.059680223464966, "loss": 0.7737, "nll_loss": 0.19342711567878723, "rewards/accuracies": 1.0, "rewards/chosen": -2.1510362785193138e-05, "rewards/margins": 0.20594650506973267, "rewards/rejected": -0.20596802234649658, "step": 9439 }, { "epoch": 6.5283540802213, "grad_norm": 8.28564453125, "learning_rate": 1.9286921776548335e-05, "log_odds_chosen": 9.535734176635742, "log_odds_ratio": -0.0002018636732827872, "logits/chosen": -0.6633929014205933, "logits/rejected": -0.6869960427284241, "logps/chosen": -0.0005071749328635633, "logps/rejected": -1.613107442855835, "loss": 0.6712, "nll_loss": 0.16779053211212158, "rewards/accuracies": 1.0, "rewards/chosen": -5.0717495469143614e-05, "rewards/margins": 0.16126003861427307, "rewards/rejected": -0.16131076216697693, "step": 9440 }, { "epoch": 6.529045643153527, "grad_norm": 6.801348686218262, "learning_rate": 1.9283079760258184e-05, "log_odds_chosen": 10.486151695251465, "log_odds_ratio": -6.121492333477363e-05, "logits/chosen": -0.37587589025497437, "logits/rejected": -0.4333217740058899, "logps/chosen": -0.0003721543762367219, "logps/rejected": -2.1706387996673584, "loss": 0.5519, "nll_loss": 0.13796362280845642, "rewards/accuracies": 1.0, "rewards/chosen": -3.721543907886371e-05, "rewards/margins": 0.21702665090560913, "rewards/rejected": -0.21706387400627136, "step": 9441 }, { "epoch": 6.529737206085754, "grad_norm": 10.218798637390137, "learning_rate": 1.9279237743968036e-05, "log_odds_chosen": 11.670851707458496, "log_odds_ratio": -3.392618236830458e-05, "logits/chosen": -0.2155950665473938, "logits/rejected": -0.32742008566856384, "logps/chosen": -0.00012127251829952002, "logps/rejected": -2.728766441345215, "loss": 0.6559, "nll_loss": 0.16396935284137726, "rewards/accuracies": 1.0, "rewards/chosen": -1.2127253285143524e-05, "rewards/margins": 0.27286452054977417, "rewards/rejected": -0.27287665009498596, "step": 9442 }, { "epoch": 6.5304287690179805, "grad_norm": 5.403188705444336, "learning_rate": 1.9275395727677885e-05, "log_odds_chosen": 9.496389389038086, "log_odds_ratio": -0.03377415984869003, "logits/chosen": -0.280944287776947, "logits/rejected": -0.27332478761672974, "logps/chosen": -0.01256662979722023, "logps/rejected": -1.719006061553955, "loss": 0.4708, "nll_loss": 0.11432860046625137, "rewards/accuracies": 1.0, "rewards/chosen": -0.0012566630030050874, "rewards/margins": 0.17064395546913147, "rewards/rejected": -0.17190060019493103, "step": 9443 }, { "epoch": 6.531120331950207, "grad_norm": 4.520147800445557, "learning_rate": 1.9271553711387738e-05, "log_odds_chosen": 10.924385070800781, "log_odds_ratio": -6.160808698041365e-05, "logits/chosen": -0.32104748487472534, "logits/rejected": -0.3863796591758728, "logps/chosen": -0.00016719617997296154, "logps/rejected": -1.7605154514312744, "loss": 0.5161, "nll_loss": 0.12901218235492706, "rewards/accuracies": 1.0, "rewards/chosen": -1.6719619452487677e-05, "rewards/margins": 0.1760348081588745, "rewards/rejected": -0.1760515421628952, "step": 9444 }, { "epoch": 6.531811894882434, "grad_norm": 5.334826946258545, "learning_rate": 1.9267711695097587e-05, "log_odds_chosen": 10.15666389465332, "log_odds_ratio": -0.0001524979597888887, "logits/chosen": -0.2669309973716736, "logits/rejected": -0.278430700302124, "logps/chosen": -0.0004270744975656271, "logps/rejected": -1.9559170007705688, "loss": 0.4716, "nll_loss": 0.1178910881280899, "rewards/accuracies": 1.0, "rewards/chosen": -4.27074555773288e-05, "rewards/margins": 0.19554898142814636, "rewards/rejected": -0.19559170305728912, "step": 9445 }, { "epoch": 6.532503457814661, "grad_norm": 7.522082328796387, "learning_rate": 1.926386967880744e-05, "log_odds_chosen": 12.136481285095215, "log_odds_ratio": -1.9502214854583144e-05, "logits/chosen": -0.6247130036354065, "logits/rejected": -0.572918176651001, "logps/chosen": -7.606636063428596e-05, "logps/rejected": -2.7466282844543457, "loss": 1.3921, "nll_loss": 0.34801381826400757, "rewards/accuracies": 1.0, "rewards/chosen": -7.606636245327536e-06, "rewards/margins": 0.27465522289276123, "rewards/rejected": -0.2746628224849701, "step": 9446 }, { "epoch": 6.533195020746888, "grad_norm": 7.554215431213379, "learning_rate": 1.926002766251729e-05, "log_odds_chosen": 10.835151672363281, "log_odds_ratio": -2.6517705919104628e-05, "logits/chosen": -0.16394104063510895, "logits/rejected": -0.15501868724822998, "logps/chosen": -0.00021345789718907326, "logps/rejected": -1.9866135120391846, "loss": 0.6984, "nll_loss": 0.17458918690681458, "rewards/accuracies": 1.0, "rewards/chosen": -2.1345789718907326e-05, "rewards/margins": 0.19864001870155334, "rewards/rejected": -0.19866134226322174, "step": 9447 }, { "epoch": 6.533886583679115, "grad_norm": 5.6107177734375, "learning_rate": 1.925618564622714e-05, "log_odds_chosen": 10.036823272705078, "log_odds_ratio": -6.762363773304969e-05, "logits/chosen": -0.44453203678131104, "logits/rejected": -0.4903140068054199, "logps/chosen": -0.0006642360240221024, "logps/rejected": -1.758960485458374, "loss": 1.0152, "nll_loss": 0.2537900507450104, "rewards/accuracies": 1.0, "rewards/chosen": -6.64236067677848e-05, "rewards/margins": 0.17582963407039642, "rewards/rejected": -0.1758960485458374, "step": 9448 }, { "epoch": 6.5345781466113415, "grad_norm": 9.110634803771973, "learning_rate": 1.9252343629936993e-05, "log_odds_chosen": 10.609855651855469, "log_odds_ratio": -5.668444646289572e-05, "logits/chosen": -0.32916340231895447, "logits/rejected": -0.376298725605011, "logps/chosen": -0.0001745389890857041, "logps/rejected": -1.83896803855896, "loss": 0.7855, "nll_loss": 0.19636529684066772, "rewards/accuracies": 1.0, "rewards/chosen": -1.7453898180974647e-05, "rewards/margins": 0.18387934565544128, "rewards/rejected": -0.18389680981636047, "step": 9449 }, { "epoch": 6.535269709543568, "grad_norm": 8.429581642150879, "learning_rate": 1.9248501613646845e-05, "log_odds_chosen": 10.936147689819336, "log_odds_ratio": -2.1902931621298194e-05, "logits/chosen": -0.6698485016822815, "logits/rejected": -0.7103371620178223, "logps/chosen": -0.0002495582157280296, "logps/rejected": -2.1973369121551514, "loss": 0.6345, "nll_loss": 0.1586150974035263, "rewards/accuracies": 1.0, "rewards/chosen": -2.495582157280296e-05, "rewards/margins": 0.21970872581005096, "rewards/rejected": -0.21973368525505066, "step": 9450 }, { "epoch": 6.535961272475795, "grad_norm": 12.12063980102539, "learning_rate": 1.9244659597356694e-05, "log_odds_chosen": 9.785110473632812, "log_odds_ratio": -0.0005236170836724341, "logits/chosen": -0.6654300093650818, "logits/rejected": -0.7001447677612305, "logps/chosen": -0.0013286888133734465, "logps/rejected": -2.0298948287963867, "loss": 0.5691, "nll_loss": 0.14221365749835968, "rewards/accuracies": 1.0, "rewards/chosen": -0.00013286888133734465, "rewards/margins": 0.20285660028457642, "rewards/rejected": -0.20298945903778076, "step": 9451 }, { "epoch": 6.536652835408022, "grad_norm": 7.771703243255615, "learning_rate": 1.9240817581066544e-05, "log_odds_chosen": 10.353858947753906, "log_odds_ratio": -8.914186037145555e-05, "logits/chosen": -0.6196666955947876, "logits/rejected": -0.6253817677497864, "logps/chosen": -0.0004547428397927433, "logps/rejected": -2.4246909618377686, "loss": 0.6552, "nll_loss": 0.16377976536750793, "rewards/accuracies": 1.0, "rewards/chosen": -4.5474280341295525e-05, "rewards/margins": 0.24242360889911652, "rewards/rejected": -0.24246907234191895, "step": 9452 }, { "epoch": 6.537344398340249, "grad_norm": 5.618026256561279, "learning_rate": 1.9236975564776396e-05, "log_odds_chosen": 10.78561782836914, "log_odds_ratio": -6.65760671836324e-05, "logits/chosen": -0.4101612865924835, "logits/rejected": -0.3885840177536011, "logps/chosen": -0.0002911283809226006, "logps/rejected": -2.4101407527923584, "loss": 0.6213, "nll_loss": 0.15532714128494263, "rewards/accuracies": 1.0, "rewards/chosen": -2.9112838092260063e-05, "rewards/margins": 0.2409849464893341, "rewards/rejected": -0.24101409316062927, "step": 9453 }, { "epoch": 6.538035961272476, "grad_norm": 8.519519805908203, "learning_rate": 1.9233133548486245e-05, "log_odds_chosen": 10.831727981567383, "log_odds_ratio": -9.052889072336257e-05, "logits/chosen": -0.42768576741218567, "logits/rejected": -0.5330202579498291, "logps/chosen": -0.0004460075870156288, "logps/rejected": -2.201939105987549, "loss": 0.7587, "nll_loss": 0.18967759609222412, "rewards/accuracies": 1.0, "rewards/chosen": -4.460075797396712e-05, "rewards/margins": 0.22014930844306946, "rewards/rejected": -0.22019392251968384, "step": 9454 }, { "epoch": 6.5387275242047025, "grad_norm": 6.629385948181152, "learning_rate": 1.9229291532196097e-05, "log_odds_chosen": 10.440211296081543, "log_odds_ratio": -0.00025720984558574855, "logits/chosen": -0.7466363906860352, "logits/rejected": -0.7641102075576782, "logps/chosen": -0.00040673461626283824, "logps/rejected": -1.9535751342773438, "loss": 0.6623, "nll_loss": 0.1655389368534088, "rewards/accuracies": 1.0, "rewards/chosen": -4.0673461626283824e-05, "rewards/margins": 0.19531682133674622, "rewards/rejected": -0.1953575164079666, "step": 9455 }, { "epoch": 6.539419087136929, "grad_norm": 5.408649921417236, "learning_rate": 1.922544951590595e-05, "log_odds_chosen": 11.0181884765625, "log_odds_ratio": -0.00020642187155317515, "logits/chosen": -0.2848138213157654, "logits/rejected": -0.3254838287830353, "logps/chosen": -0.00031954696169123054, "logps/rejected": -2.2532505989074707, "loss": 0.4727, "nll_loss": 0.11815480887889862, "rewards/accuracies": 1.0, "rewards/chosen": -3.1954696169123054e-05, "rewards/margins": 0.2252930998802185, "rewards/rejected": -0.22532503306865692, "step": 9456 }, { "epoch": 6.540110650069156, "grad_norm": 5.664820671081543, "learning_rate": 1.92216074996158e-05, "log_odds_chosen": 10.822545051574707, "log_odds_ratio": -0.00010074210149468854, "logits/chosen": -0.5778256058692932, "logits/rejected": -0.5243774056434631, "logps/chosen": -0.0004190989420749247, "logps/rejected": -2.700394868850708, "loss": 0.5906, "nll_loss": 0.14764924347400665, "rewards/accuracies": 1.0, "rewards/chosen": -4.190989420749247e-05, "rewards/margins": 0.26999756693840027, "rewards/rejected": -0.27003949880599976, "step": 9457 }, { "epoch": 6.540802213001383, "grad_norm": 9.502604484558105, "learning_rate": 1.921776548332565e-05, "log_odds_chosen": 9.924819946289062, "log_odds_ratio": -0.00013832449621986598, "logits/chosen": -0.5658762454986572, "logits/rejected": -0.6234560012817383, "logps/chosen": -0.0010439768666401505, "logps/rejected": -1.702471137046814, "loss": 0.5704, "nll_loss": 0.1425761580467224, "rewards/accuracies": 1.0, "rewards/chosen": -0.00010439768811920658, "rewards/margins": 0.17014271020889282, "rewards/rejected": -0.1702471226453781, "step": 9458 }, { "epoch": 6.54149377593361, "grad_norm": 4.656129360198975, "learning_rate": 1.9213923467035504e-05, "log_odds_chosen": 9.624756813049316, "log_odds_ratio": -0.00042861714609898627, "logits/chosen": -0.31074270606040955, "logits/rejected": -0.3961406946182251, "logps/chosen": -0.001230962690897286, "logps/rejected": -2.0448856353759766, "loss": 0.6971, "nll_loss": 0.1742362380027771, "rewards/accuracies": 1.0, "rewards/chosen": -0.00012309628073126078, "rewards/margins": 0.20436546206474304, "rewards/rejected": -0.20448856055736542, "step": 9459 }, { "epoch": 6.542185338865837, "grad_norm": 6.805206775665283, "learning_rate": 1.9210081450745353e-05, "log_odds_chosen": 9.775091171264648, "log_odds_ratio": -0.0002657772274687886, "logits/chosen": -0.499624639749527, "logits/rejected": -0.4671819806098938, "logps/chosen": -0.0012333440827205777, "logps/rejected": -2.1008732318878174, "loss": 0.6091, "nll_loss": 0.15224380791187286, "rewards/accuracies": 1.0, "rewards/chosen": -0.00012333440827205777, "rewards/margins": 0.20996397733688354, "rewards/rejected": -0.21008731424808502, "step": 9460 }, { "epoch": 6.5428769017980635, "grad_norm": 6.917591571807861, "learning_rate": 1.9206239434455202e-05, "log_odds_chosen": 10.791444778442383, "log_odds_ratio": -4.337338395998813e-05, "logits/chosen": -0.39784783124923706, "logits/rejected": -0.5065803527832031, "logps/chosen": -0.00024545512860640883, "logps/rejected": -2.4440646171569824, "loss": 0.698, "nll_loss": 0.17449891567230225, "rewards/accuracies": 1.0, "rewards/chosen": -2.454551577102393e-05, "rewards/margins": 0.24438193440437317, "rewards/rejected": -0.24440649151802063, "step": 9461 }, { "epoch": 6.54356846473029, "grad_norm": 4.706018447875977, "learning_rate": 1.9202397418165054e-05, "log_odds_chosen": 11.40475845336914, "log_odds_ratio": -9.582944767316803e-05, "logits/chosen": -0.5012403726577759, "logits/rejected": -0.5878694653511047, "logps/chosen": -0.00046555051812902093, "logps/rejected": -3.4569056034088135, "loss": 0.4919, "nll_loss": 0.12297195196151733, "rewards/accuracies": 1.0, "rewards/chosen": -4.655505108530633e-05, "rewards/margins": 0.3456440269947052, "rewards/rejected": -0.34569060802459717, "step": 9462 }, { "epoch": 6.544260027662517, "grad_norm": 8.319991111755371, "learning_rate": 1.9198555401874903e-05, "log_odds_chosen": 8.73961067199707, "log_odds_ratio": -0.0007463196525350213, "logits/chosen": -0.7648007869720459, "logits/rejected": -0.7282172441482544, "logps/chosen": -0.0007134063635021448, "logps/rejected": -1.2260456085205078, "loss": 0.8386, "nll_loss": 0.2095862329006195, "rewards/accuracies": 1.0, "rewards/chosen": -7.134064071578905e-05, "rewards/margins": 0.12253323197364807, "rewards/rejected": -0.12260457128286362, "step": 9463 }, { "epoch": 6.544951590594744, "grad_norm": 7.8542561531066895, "learning_rate": 1.9194713385584756e-05, "log_odds_chosen": 11.679158210754395, "log_odds_ratio": -2.236912041553296e-05, "logits/chosen": -0.8372731804847717, "logits/rejected": -0.8716490268707275, "logps/chosen": -0.00024520649458281696, "logps/rejected": -2.850311279296875, "loss": 0.7341, "nll_loss": 0.1835135519504547, "rewards/accuracies": 1.0, "rewards/chosen": -2.452065200486686e-05, "rewards/margins": 0.2850066125392914, "rewards/rejected": -0.28503113985061646, "step": 9464 }, { "epoch": 6.545643153526971, "grad_norm": 9.019186019897461, "learning_rate": 1.919087136929461e-05, "log_odds_chosen": 9.300536155700684, "log_odds_ratio": -0.21208545565605164, "logits/chosen": 0.11748534440994263, "logits/rejected": -0.05792899429798126, "logps/chosen": -0.035312023013830185, "logps/rejected": -1.7976481914520264, "loss": 1.0377, "nll_loss": 0.23820726573467255, "rewards/accuracies": 0.875, "rewards/chosen": -0.0035312026739120483, "rewards/margins": 0.17623361945152283, "rewards/rejected": -0.17976480722427368, "step": 9465 }, { "epoch": 6.546334716459198, "grad_norm": 9.906425476074219, "learning_rate": 1.9187029353004457e-05, "log_odds_chosen": 11.869268417358398, "log_odds_ratio": -1.1419995644246228e-05, "logits/chosen": -0.6074370741844177, "logits/rejected": -0.6842989921569824, "logps/chosen": -0.00011990070197498426, "logps/rejected": -2.7005834579467773, "loss": 0.5716, "nll_loss": 0.14290668070316315, "rewards/accuracies": 1.0, "rewards/chosen": -1.1990070561296307e-05, "rewards/margins": 0.2700463831424713, "rewards/rejected": -0.2700583338737488, "step": 9466 }, { "epoch": 6.5470262793914245, "grad_norm": 7.5030317306518555, "learning_rate": 1.918318733671431e-05, "log_odds_chosen": 9.613960266113281, "log_odds_ratio": -0.0002500044647604227, "logits/chosen": -0.09690068662166595, "logits/rejected": -0.12829184532165527, "logps/chosen": -0.00881747156381607, "logps/rejected": -2.3322060108184814, "loss": 0.653, "nll_loss": 0.16322842240333557, "rewards/accuracies": 1.0, "rewards/chosen": -0.0008817471680231392, "rewards/margins": 0.2323388308286667, "rewards/rejected": -0.23322060704231262, "step": 9467 }, { "epoch": 6.547717842323651, "grad_norm": 5.7620744705200195, "learning_rate": 1.9179345320424162e-05, "log_odds_chosen": 11.665501594543457, "log_odds_ratio": -3.687728167278692e-05, "logits/chosen": -0.25433778762817383, "logits/rejected": -0.2974012494087219, "logps/chosen": -0.0004080279322806746, "logps/rejected": -3.097548007965088, "loss": 0.8516, "nll_loss": 0.2128935307264328, "rewards/accuracies": 1.0, "rewards/chosen": -4.080279541085474e-05, "rewards/margins": 0.30971401929855347, "rewards/rejected": -0.30975478887557983, "step": 9468 }, { "epoch": 6.548409405255878, "grad_norm": 8.611531257629395, "learning_rate": 1.917550330413401e-05, "log_odds_chosen": 10.0744047164917, "log_odds_ratio": -0.000632865761872381, "logits/chosen": -0.8037955164909363, "logits/rejected": -0.7918911576271057, "logps/chosen": -0.002187209203839302, "logps/rejected": -2.4003305435180664, "loss": 0.4801, "nll_loss": 0.11996515095233917, "rewards/accuracies": 1.0, "rewards/chosen": -0.0002187209320254624, "rewards/margins": 0.23981432616710663, "rewards/rejected": -0.24003303050994873, "step": 9469 }, { "epoch": 6.549100968188105, "grad_norm": 5.34125280380249, "learning_rate": 1.917166128784386e-05, "log_odds_chosen": 11.037103652954102, "log_odds_ratio": -0.0001046846155077219, "logits/chosen": -0.24765129387378693, "logits/rejected": -0.3548104166984558, "logps/chosen": -0.00016740552382543683, "logps/rejected": -1.8313382863998413, "loss": 0.5318, "nll_loss": 0.13294678926467896, "rewards/accuracies": 1.0, "rewards/chosen": -1.674055056355428e-05, "rewards/margins": 0.1831170916557312, "rewards/rejected": -0.1831338256597519, "step": 9470 }, { "epoch": 6.549792531120332, "grad_norm": 11.186291694641113, "learning_rate": 1.9167819271553713e-05, "log_odds_chosen": 11.0018310546875, "log_odds_ratio": -2.549621058278717e-05, "logits/chosen": -0.14623451232910156, "logits/rejected": -0.2587830424308777, "logps/chosen": -0.00013841589679941535, "logps/rejected": -1.5451520681381226, "loss": 0.7605, "nll_loss": 0.1901187300682068, "rewards/accuracies": 1.0, "rewards/chosen": -1.3841590771335177e-05, "rewards/margins": 0.15450136363506317, "rewards/rejected": -0.15451520681381226, "step": 9471 }, { "epoch": 6.550484094052559, "grad_norm": 6.574780464172363, "learning_rate": 1.9163977255263562e-05, "log_odds_chosen": 11.316503524780273, "log_odds_ratio": -0.00011724029172910377, "logits/chosen": -0.6436998844146729, "logits/rejected": -0.7034087181091309, "logps/chosen": -0.00010371260577812791, "logps/rejected": -2.339920997619629, "loss": 0.9627, "nll_loss": 0.240675687789917, "rewards/accuracies": 1.0, "rewards/chosen": -1.037126003211597e-05, "rewards/margins": 0.23398171365261078, "rewards/rejected": -0.2339920699596405, "step": 9472 }, { "epoch": 6.551175656984785, "grad_norm": 5.006659984588623, "learning_rate": 1.9160135238973414e-05, "log_odds_chosen": 10.22900104522705, "log_odds_ratio": -0.0001384686620440334, "logits/chosen": -0.35459089279174805, "logits/rejected": -0.3786230683326721, "logps/chosen": -0.0006998850731179118, "logps/rejected": -2.1292760372161865, "loss": 0.3758, "nll_loss": 0.09392721951007843, "rewards/accuracies": 1.0, "rewards/chosen": -6.998850585659966e-05, "rewards/margins": 0.21285760402679443, "rewards/rejected": -0.21292759478092194, "step": 9473 }, { "epoch": 6.551867219917012, "grad_norm": 6.996860980987549, "learning_rate": 1.9156293222683267e-05, "log_odds_chosen": 9.235257148742676, "log_odds_ratio": -0.00016873932327143848, "logits/chosen": -0.7287513017654419, "logits/rejected": -0.8030807971954346, "logps/chosen": -0.0012789568863809109, "logps/rejected": -1.5626949071884155, "loss": 0.6777, "nll_loss": 0.16941440105438232, "rewards/accuracies": 1.0, "rewards/chosen": -0.00012789569154847413, "rewards/margins": 0.15614160895347595, "rewards/rejected": -0.15626949071884155, "step": 9474 }, { "epoch": 6.552558782849239, "grad_norm": 7.9105730056762695, "learning_rate": 1.9152451206393116e-05, "log_odds_chosen": 9.815263748168945, "log_odds_ratio": -0.0008661964093334973, "logits/chosen": -0.900322675704956, "logits/rejected": -0.8875852823257446, "logps/chosen": -0.000402643287088722, "logps/rejected": -1.414959192276001, "loss": 1.0646, "nll_loss": 0.26607388257980347, "rewards/accuracies": 1.0, "rewards/chosen": -4.026433089165948e-05, "rewards/margins": 0.14145566523075104, "rewards/rejected": -0.14149592816829681, "step": 9475 }, { "epoch": 6.553250345781466, "grad_norm": 18.255563735961914, "learning_rate": 1.914860919010297e-05, "log_odds_chosen": 12.136200904846191, "log_odds_ratio": -1.486710607423447e-05, "logits/chosen": -0.3935670852661133, "logits/rejected": -0.4665505290031433, "logps/chosen": -9.996606240747496e-05, "logps/rejected": -2.7526142597198486, "loss": 0.6183, "nll_loss": 0.15456387400627136, "rewards/accuracies": 1.0, "rewards/chosen": -9.996606422646437e-06, "rewards/margins": 0.2752514183521271, "rewards/rejected": -0.27526140213012695, "step": 9476 }, { "epoch": 6.553941908713693, "grad_norm": 4.371954441070557, "learning_rate": 1.914476717381282e-05, "log_odds_chosen": 10.711895942687988, "log_odds_ratio": -5.91963944316376e-05, "logits/chosen": -0.6419881582260132, "logits/rejected": -0.6792569756507874, "logps/chosen": -0.00010584262054180726, "logps/rejected": -1.6583061218261719, "loss": 0.7602, "nll_loss": 0.1900506317615509, "rewards/accuracies": 1.0, "rewards/chosen": -1.0584262781776488e-05, "rewards/margins": 0.16582003235816956, "rewards/rejected": -0.1658306121826172, "step": 9477 }, { "epoch": 6.55463347164592, "grad_norm": 5.113374710083008, "learning_rate": 1.914092515752267e-05, "log_odds_chosen": 11.083767890930176, "log_odds_ratio": -8.742226782487705e-05, "logits/chosen": -0.5468670129776001, "logits/rejected": -0.6056516170501709, "logps/chosen": -0.000979842385277152, "logps/rejected": -2.4781551361083984, "loss": 0.4569, "nll_loss": 0.11422540247440338, "rewards/accuracies": 1.0, "rewards/chosen": -9.79842443484813e-05, "rewards/margins": 0.24771751463413239, "rewards/rejected": -0.24781548976898193, "step": 9478 }, { "epoch": 6.555325034578146, "grad_norm": 6.143009185791016, "learning_rate": 1.913708314123252e-05, "log_odds_chosen": 11.41762638092041, "log_odds_ratio": -1.6329569916706532e-05, "logits/chosen": -0.10203643143177032, "logits/rejected": -0.22533483803272247, "logps/chosen": -0.00042999981087632477, "logps/rejected": -2.9852843284606934, "loss": 0.6501, "nll_loss": 0.1625293344259262, "rewards/accuracies": 1.0, "rewards/chosen": -4.2999985453207046e-05, "rewards/margins": 0.2984854280948639, "rewards/rejected": -0.29852843284606934, "step": 9479 }, { "epoch": 6.556016597510373, "grad_norm": 14.926573753356934, "learning_rate": 1.9133241124942368e-05, "log_odds_chosen": 10.724283218383789, "log_odds_ratio": -4.1447154217166826e-05, "logits/chosen": -0.6632795333862305, "logits/rejected": -0.5676432847976685, "logps/chosen": -0.0001383264025207609, "logps/rejected": -1.9790575504302979, "loss": 0.7707, "nll_loss": 0.19267025589942932, "rewards/accuracies": 1.0, "rewards/chosen": -1.3832639524480328e-05, "rewards/margins": 0.1978919506072998, "rewards/rejected": -0.1979057788848877, "step": 9480 }, { "epoch": 6.5567081604426, "grad_norm": 6.726595401763916, "learning_rate": 1.912939910865222e-05, "log_odds_chosen": 9.883056640625, "log_odds_ratio": -9.502652392257005e-05, "logits/chosen": -0.4928602874279022, "logits/rejected": -0.48830845952033997, "logps/chosen": -0.0003097387671004981, "logps/rejected": -1.9427212476730347, "loss": 0.4563, "nll_loss": 0.11406896263360977, "rewards/accuracies": 1.0, "rewards/chosen": -3.097387889283709e-05, "rewards/margins": 0.19424115121364594, "rewards/rejected": -0.19427213072776794, "step": 9481 }, { "epoch": 6.557399723374827, "grad_norm": 6.6166181564331055, "learning_rate": 1.9125557092362073e-05, "log_odds_chosen": 10.360993385314941, "log_odds_ratio": -0.0005267745582386851, "logits/chosen": -0.7267522811889648, "logits/rejected": -0.9201721549034119, "logps/chosen": -0.000601665407884866, "logps/rejected": -1.9809919595718384, "loss": 0.8314, "nll_loss": 0.2078043818473816, "rewards/accuracies": 1.0, "rewards/chosen": -6.0166537878103554e-05, "rewards/margins": 0.19803902506828308, "rewards/rejected": -0.19809919595718384, "step": 9482 }, { "epoch": 6.558091286307054, "grad_norm": 5.971362590789795, "learning_rate": 1.9121715076071922e-05, "log_odds_chosen": 10.730911254882812, "log_odds_ratio": -9.043634054251015e-05, "logits/chosen": -0.5595455169677734, "logits/rejected": -0.5485619306564331, "logps/chosen": -0.0006760087562724948, "logps/rejected": -2.1176257133483887, "loss": 0.4346, "nll_loss": 0.10863366723060608, "rewards/accuracies": 1.0, "rewards/chosen": -6.760087126167491e-05, "rewards/margins": 0.21169498562812805, "rewards/rejected": -0.21176259219646454, "step": 9483 }, { "epoch": 6.558782849239281, "grad_norm": 4.847436904907227, "learning_rate": 1.9117873059781774e-05, "log_odds_chosen": 9.789130210876465, "log_odds_ratio": -0.00028666871367022395, "logits/chosen": -0.652285099029541, "logits/rejected": -0.7685700058937073, "logps/chosen": -0.0002560943830758333, "logps/rejected": -1.5029850006103516, "loss": 0.4915, "nll_loss": 0.1228560283780098, "rewards/accuracies": 1.0, "rewards/chosen": -2.5609435397200286e-05, "rewards/margins": 0.1502728909254074, "rewards/rejected": -0.15029850602149963, "step": 9484 }, { "epoch": 6.559474412171507, "grad_norm": 7.007932662963867, "learning_rate": 1.9114031043491627e-05, "log_odds_chosen": 11.106407165527344, "log_odds_ratio": -3.277970608905889e-05, "logits/chosen": -0.42606693506240845, "logits/rejected": -0.513460636138916, "logps/chosen": -0.00017478324298281223, "logps/rejected": -2.249858856201172, "loss": 0.9326, "nll_loss": 0.23315876722335815, "rewards/accuracies": 1.0, "rewards/chosen": -1.7478325389674865e-05, "rewards/margins": 0.22496840357780457, "rewards/rejected": -0.22498586773872375, "step": 9485 }, { "epoch": 6.560165975103734, "grad_norm": 7.205862045288086, "learning_rate": 1.9110189027201476e-05, "log_odds_chosen": 10.024550437927246, "log_odds_ratio": -0.0002134918759111315, "logits/chosen": -0.5787373185157776, "logits/rejected": -0.6127051115036011, "logps/chosen": -0.002206590957939625, "logps/rejected": -2.553743600845337, "loss": 0.5282, "nll_loss": 0.1320350468158722, "rewards/accuracies": 1.0, "rewards/chosen": -0.00022065910161472857, "rewards/margins": 0.25515371561050415, "rewards/rejected": -0.25537434220314026, "step": 9486 }, { "epoch": 6.560857538035961, "grad_norm": 4.7558417320251465, "learning_rate": 1.9106347010911328e-05, "log_odds_chosen": 10.852564811706543, "log_odds_ratio": -4.599963722284883e-05, "logits/chosen": -0.7739884257316589, "logits/rejected": -0.889204740524292, "logps/chosen": -0.00010290837963111699, "logps/rejected": -1.8536291122436523, "loss": 0.6837, "nll_loss": 0.17092812061309814, "rewards/accuracies": 1.0, "rewards/chosen": -1.0290837963111699e-05, "rewards/margins": 0.1853526085615158, "rewards/rejected": -0.18536292016506195, "step": 9487 }, { "epoch": 6.561549100968188, "grad_norm": 9.699813842773438, "learning_rate": 1.9102504994621177e-05, "log_odds_chosen": 10.22033405303955, "log_odds_ratio": -0.00013328839850146323, "logits/chosen": -0.643222987651825, "logits/rejected": -0.6561555862426758, "logps/chosen": -0.00040478675509802997, "logps/rejected": -1.997079610824585, "loss": 0.5048, "nll_loss": 0.12619677186012268, "rewards/accuracies": 1.0, "rewards/chosen": -4.047867696499452e-05, "rewards/margins": 0.19966749846935272, "rewards/rejected": -0.1997079700231552, "step": 9488 }, { "epoch": 6.562240663900415, "grad_norm": 7.772004127502441, "learning_rate": 1.9098662978331026e-05, "log_odds_chosen": 10.799205780029297, "log_odds_ratio": -0.00011516768427100033, "logits/chosen": -0.6208674907684326, "logits/rejected": -0.6132377982139587, "logps/chosen": -0.0009005725150927901, "logps/rejected": -2.7482409477233887, "loss": 1.4155, "nll_loss": 0.3538641035556793, "rewards/accuracies": 1.0, "rewards/chosen": -9.005725587485358e-05, "rewards/margins": 0.2747340202331543, "rewards/rejected": -0.2748240828514099, "step": 9489 }, { "epoch": 6.5629322268326415, "grad_norm": 5.275304317474365, "learning_rate": 1.909482096204088e-05, "log_odds_chosen": 11.012089729309082, "log_odds_ratio": -4.3458290747366846e-05, "logits/chosen": 0.15669658780097961, "logits/rejected": 0.09660986065864563, "logps/chosen": -0.00021308660507202148, "logps/rejected": -2.4177041053771973, "loss": 0.4676, "nll_loss": 0.11689440906047821, "rewards/accuracies": 1.0, "rewards/chosen": -2.130866050720215e-05, "rewards/margins": 0.241749107837677, "rewards/rejected": -0.2417704164981842, "step": 9490 }, { "epoch": 6.563623789764868, "grad_norm": 5.077573776245117, "learning_rate": 1.909097894575073e-05, "log_odds_chosen": 10.685736656188965, "log_odds_ratio": -7.411053229589015e-05, "logits/chosen": 0.0397484228014946, "logits/rejected": -0.03544532507658005, "logps/chosen": -0.00021711325098294765, "logps/rejected": -2.0057690143585205, "loss": 0.7528, "nll_loss": 0.1881898045539856, "rewards/accuracies": 1.0, "rewards/chosen": -2.1711324734496884e-05, "rewards/margins": 0.20055519044399261, "rewards/rejected": -0.20057690143585205, "step": 9491 }, { "epoch": 6.564315352697095, "grad_norm": 9.68488597869873, "learning_rate": 1.908713692946058e-05, "log_odds_chosen": 11.285282135009766, "log_odds_ratio": -4.871240525972098e-05, "logits/chosen": -0.5057222843170166, "logits/rejected": -0.5519887208938599, "logps/chosen": -0.0002046864974545315, "logps/rejected": -2.48622989654541, "loss": 0.6181, "nll_loss": 0.15451568365097046, "rewards/accuracies": 1.0, "rewards/chosen": -2.0468651200644672e-05, "rewards/margins": 0.2486025094985962, "rewards/rejected": -0.24862298369407654, "step": 9492 }, { "epoch": 6.565006915629322, "grad_norm": 9.92271900177002, "learning_rate": 1.9083294913170433e-05, "log_odds_chosen": 11.072915077209473, "log_odds_ratio": -7.389196252916008e-05, "logits/chosen": -0.047651246190071106, "logits/rejected": -0.0919855609536171, "logps/chosen": -0.0001974912011064589, "logps/rejected": -2.3384013175964355, "loss": 0.6102, "nll_loss": 0.15253698825836182, "rewards/accuracies": 1.0, "rewards/chosen": -1.9749117200262845e-05, "rewards/margins": 0.23382039368152618, "rewards/rejected": -0.23384013772010803, "step": 9493 }, { "epoch": 6.565698478561549, "grad_norm": 6.103567600250244, "learning_rate": 1.9079452896880285e-05, "log_odds_chosen": 10.56631851196289, "log_odds_ratio": -0.0001871968706836924, "logits/chosen": -0.5657141208648682, "logits/rejected": -0.5551555156707764, "logps/chosen": -0.000143907280289568, "logps/rejected": -1.7810739278793335, "loss": 0.7097, "nll_loss": 0.1774117797613144, "rewards/accuracies": 1.0, "rewards/chosen": -1.439072730136104e-05, "rewards/margins": 0.17809300124645233, "rewards/rejected": -0.1781073957681656, "step": 9494 }, { "epoch": 6.566390041493776, "grad_norm": 4.250610828399658, "learning_rate": 1.9075610880590134e-05, "log_odds_chosen": 10.280776977539062, "log_odds_ratio": -5.944541044300422e-05, "logits/chosen": -0.13907837867736816, "logits/rejected": -0.1998562067747116, "logps/chosen": -0.00017767293320503086, "logps/rejected": -1.4102723598480225, "loss": 0.3628, "nll_loss": 0.09070321172475815, "rewards/accuracies": 1.0, "rewards/chosen": -1.7767293684300967e-05, "rewards/margins": 0.14100947976112366, "rewards/rejected": -0.14102724194526672, "step": 9495 }, { "epoch": 6.5670816044260025, "grad_norm": 8.311246871948242, "learning_rate": 1.9071768864299987e-05, "log_odds_chosen": 10.381108283996582, "log_odds_ratio": -0.00028720340924337506, "logits/chosen": -0.03732209652662277, "logits/rejected": -0.08972935378551483, "logps/chosen": -0.0005405236152000725, "logps/rejected": -2.375124454498291, "loss": 0.5801, "nll_loss": 0.14500656723976135, "rewards/accuracies": 1.0, "rewards/chosen": -5.405236152000725e-05, "rewards/margins": 0.23745840787887573, "rewards/rejected": -0.237512469291687, "step": 9496 }, { "epoch": 6.567773167358229, "grad_norm": 4.872702598571777, "learning_rate": 1.9067926848009836e-05, "log_odds_chosen": 10.836227416992188, "log_odds_ratio": -3.483764885459095e-05, "logits/chosen": -0.0781363844871521, "logits/rejected": -0.32934820652008057, "logps/chosen": -0.0002562236040830612, "logps/rejected": -2.0514607429504395, "loss": 0.5159, "nll_loss": 0.12898054718971252, "rewards/accuracies": 1.0, "rewards/chosen": -2.562236477388069e-05, "rewards/margins": 0.20512044429779053, "rewards/rejected": -0.20514605939388275, "step": 9497 }, { "epoch": 6.568464730290456, "grad_norm": 4.309937000274658, "learning_rate": 1.9064084831719685e-05, "log_odds_chosen": 10.821893692016602, "log_odds_ratio": -6.669486901955679e-05, "logits/chosen": -0.4724667966365814, "logits/rejected": -0.48318246006965637, "logps/chosen": -0.0002066226297756657, "logps/rejected": -2.2214860916137695, "loss": 0.3534, "nll_loss": 0.0883367583155632, "rewards/accuracies": 1.0, "rewards/chosen": -2.066226261376869e-05, "rewards/margins": 0.22212794423103333, "rewards/rejected": -0.222148597240448, "step": 9498 }, { "epoch": 6.569156293222683, "grad_norm": 7.079944610595703, "learning_rate": 1.9060242815429537e-05, "log_odds_chosen": 11.52110481262207, "log_odds_ratio": -7.554404146503657e-05, "logits/chosen": -0.8700163960456848, "logits/rejected": -0.9756256341934204, "logps/chosen": -0.0003580303455237299, "logps/rejected": -2.680870294570923, "loss": 0.4434, "nll_loss": 0.11085189133882523, "rewards/accuracies": 1.0, "rewards/chosen": -3.580303382477723e-05, "rewards/margins": 0.26805123686790466, "rewards/rejected": -0.2680870294570923, "step": 9499 }, { "epoch": 6.56984785615491, "grad_norm": 6.172976493835449, "learning_rate": 1.905640079913939e-05, "log_odds_chosen": 10.494388580322266, "log_odds_ratio": -4.967241329723038e-05, "logits/chosen": -0.44258636236190796, "logits/rejected": -0.4766414165496826, "logps/chosen": -0.00012744334526360035, "logps/rejected": -1.7994089126586914, "loss": 0.6341, "nll_loss": 0.15851013362407684, "rewards/accuracies": 1.0, "rewards/chosen": -1.2744334526360035e-05, "rewards/margins": 0.17992815375328064, "rewards/rejected": -0.17994090914726257, "step": 9500 }, { "epoch": 6.570539419087137, "grad_norm": 9.317890167236328, "learning_rate": 1.905255878284924e-05, "log_odds_chosen": 10.344244956970215, "log_odds_ratio": -0.00025330157950520515, "logits/chosen": -0.5354888439178467, "logits/rejected": -0.4884495437145233, "logps/chosen": -0.0006745475111529231, "logps/rejected": -1.889319658279419, "loss": 0.6263, "nll_loss": 0.1565524935722351, "rewards/accuracies": 1.0, "rewards/chosen": -6.745474820490927e-05, "rewards/margins": 0.18886449933052063, "rewards/rejected": -0.18893194198608398, "step": 9501 }, { "epoch": 6.5712309820193635, "grad_norm": 7.175146579742432, "learning_rate": 1.904871676655909e-05, "log_odds_chosen": 11.946460723876953, "log_odds_ratio": -1.0231826308881864e-05, "logits/chosen": -0.3202419579029083, "logits/rejected": -0.3598886728286743, "logps/chosen": -7.545893458882347e-05, "logps/rejected": -2.313094139099121, "loss": 0.682, "nll_loss": 0.17050229012966156, "rewards/accuracies": 1.0, "rewards/chosen": -7.5458938226802275e-06, "rewards/margins": 0.23130187392234802, "rewards/rejected": -0.2313094139099121, "step": 9502 }, { "epoch": 6.57192254495159, "grad_norm": 8.389784812927246, "learning_rate": 1.9044874750268944e-05, "log_odds_chosen": 11.532312393188477, "log_odds_ratio": -1.5520981833105907e-05, "logits/chosen": -0.3472404479980469, "logits/rejected": -0.4375644326210022, "logps/chosen": -0.00013552154996432364, "logps/rejected": -2.4274983406066895, "loss": 0.7095, "nll_loss": 0.17736461758613586, "rewards/accuracies": 1.0, "rewards/chosen": -1.3552154996432364e-05, "rewards/margins": 0.2427363097667694, "rewards/rejected": -0.24274984002113342, "step": 9503 }, { "epoch": 6.572614107883817, "grad_norm": 8.687039375305176, "learning_rate": 1.9041032733978793e-05, "log_odds_chosen": 7.895865440368652, "log_odds_ratio": -0.3241705596446991, "logits/chosen": -0.9922645092010498, "logits/rejected": -0.9834545850753784, "logps/chosen": -0.05756475776433945, "logps/rejected": -1.5002421140670776, "loss": 0.9695, "nll_loss": 0.20995807647705078, "rewards/accuracies": 0.875, "rewards/chosen": -0.005756476428359747, "rewards/margins": 0.144267737865448, "rewards/rejected": -0.15002422034740448, "step": 9504 }, { "epoch": 6.573305670816044, "grad_norm": 5.384150981903076, "learning_rate": 1.9037190717688645e-05, "log_odds_chosen": 10.194380760192871, "log_odds_ratio": -8.458264346700162e-05, "logits/chosen": -0.5537482500076294, "logits/rejected": -0.5107707977294922, "logps/chosen": -0.0008147121407091618, "logps/rejected": -1.905914068222046, "loss": 0.483, "nll_loss": 0.12074509263038635, "rewards/accuracies": 1.0, "rewards/chosen": -8.147121116053313e-05, "rewards/margins": 0.19050993025302887, "rewards/rejected": -0.19059139490127563, "step": 9505 }, { "epoch": 6.573997233748271, "grad_norm": 5.172842979431152, "learning_rate": 1.9033348701398494e-05, "log_odds_chosen": 11.013910293579102, "log_odds_ratio": -9.697994391899556e-05, "logits/chosen": -0.18725240230560303, "logits/rejected": -0.2527714967727661, "logps/chosen": -0.00037460378371179104, "logps/rejected": -2.615182876586914, "loss": 0.3211, "nll_loss": 0.08026767522096634, "rewards/accuracies": 1.0, "rewards/chosen": -3.746037691598758e-05, "rewards/margins": 0.26148083806037903, "rewards/rejected": -0.26151829957962036, "step": 9506 }, { "epoch": 6.574688796680498, "grad_norm": 4.038562774658203, "learning_rate": 1.9029506685108343e-05, "log_odds_chosen": 9.88060188293457, "log_odds_ratio": -0.0001938038767548278, "logits/chosen": -0.2381129413843155, "logits/rejected": -0.20977772772312164, "logps/chosen": -0.0005058772512711585, "logps/rejected": -1.8327600955963135, "loss": 0.561, "nll_loss": 0.14024168252944946, "rewards/accuracies": 1.0, "rewards/chosen": -5.0587725127115846e-05, "rewards/margins": 0.18322542309761047, "rewards/rejected": -0.1832760125398636, "step": 9507 }, { "epoch": 6.5753803596127245, "grad_norm": 5.107179164886475, "learning_rate": 1.9025664668818196e-05, "log_odds_chosen": 11.771180152893066, "log_odds_ratio": -1.0985384506057017e-05, "logits/chosen": -0.7047210335731506, "logits/rejected": -0.7815711498260498, "logps/chosen": -9.567014058120549e-05, "logps/rejected": -2.4914684295654297, "loss": 0.4273, "nll_loss": 0.1068139374256134, "rewards/accuracies": 1.0, "rewards/chosen": -9.56701478571631e-06, "rewards/margins": 0.249137282371521, "rewards/rejected": -0.24914684891700745, "step": 9508 }, { "epoch": 6.576071922544951, "grad_norm": 5.086597919464111, "learning_rate": 1.9021822652528048e-05, "log_odds_chosen": 11.12252426147461, "log_odds_ratio": -2.3953118216013536e-05, "logits/chosen": -0.807060718536377, "logits/rejected": -0.839371383190155, "logps/chosen": -0.00011378790077287704, "logps/rejected": -1.8926666975021362, "loss": 0.5818, "nll_loss": 0.14543595910072327, "rewards/accuracies": 1.0, "rewards/chosen": -1.1378790077287704e-05, "rewards/margins": 0.18925531208515167, "rewards/rejected": -0.18926669657230377, "step": 9509 }, { "epoch": 6.576763485477178, "grad_norm": 8.26449966430664, "learning_rate": 1.9017980636237897e-05, "log_odds_chosen": 10.97335433959961, "log_odds_ratio": -7.209448813227937e-05, "logits/chosen": -0.16519924998283386, "logits/rejected": -0.20669898390769958, "logps/chosen": -0.00023038122162688524, "logps/rejected": -2.1607444286346436, "loss": 0.6227, "nll_loss": 0.15566156804561615, "rewards/accuracies": 1.0, "rewards/chosen": -2.3038122890284285e-05, "rewards/margins": 0.2160513997077942, "rewards/rejected": -0.21607445180416107, "step": 9510 }, { "epoch": 6.577455048409405, "grad_norm": 9.655121803283691, "learning_rate": 1.901413861994775e-05, "log_odds_chosen": 9.956766128540039, "log_odds_ratio": -9.793389472179115e-05, "logits/chosen": -0.218642920255661, "logits/rejected": -0.3094644844532013, "logps/chosen": -0.000380536075681448, "logps/rejected": -1.8787182569503784, "loss": 0.9288, "nll_loss": 0.2321830689907074, "rewards/accuracies": 1.0, "rewards/chosen": -3.80536075681448e-05, "rewards/margins": 0.1878337562084198, "rewards/rejected": -0.1878717988729477, "step": 9511 }, { "epoch": 6.578146611341632, "grad_norm": 10.619027137756348, "learning_rate": 1.9010296603657602e-05, "log_odds_chosen": 10.957955360412598, "log_odds_ratio": -7.806985377101228e-05, "logits/chosen": -0.07476891577243805, "logits/rejected": -0.12704113125801086, "logps/chosen": -0.00032312539406120777, "logps/rejected": -2.5475821495056152, "loss": 0.9755, "nll_loss": 0.24385805428028107, "rewards/accuracies": 1.0, "rewards/chosen": -3.2312538678525016e-05, "rewards/margins": 0.25472593307495117, "rewards/rejected": -0.25475820899009705, "step": 9512 }, { "epoch": 6.578838174273859, "grad_norm": 6.9182820320129395, "learning_rate": 1.900645458736745e-05, "log_odds_chosen": 10.320255279541016, "log_odds_ratio": -0.00038485744153149426, "logits/chosen": -0.44043487310409546, "logits/rejected": -0.43507570028305054, "logps/chosen": -0.000618858146481216, "logps/rejected": -2.230238437652588, "loss": 0.7855, "nll_loss": 0.19633594155311584, "rewards/accuracies": 1.0, "rewards/chosen": -6.188581028254703e-05, "rewards/margins": 0.22296196222305298, "rewards/rejected": -0.22302386164665222, "step": 9513 }, { "epoch": 6.5795297372060855, "grad_norm": 17.059465408325195, "learning_rate": 1.9002612571077304e-05, "log_odds_chosen": 11.300527572631836, "log_odds_ratio": -2.5722471036715433e-05, "logits/chosen": -0.45718055963516235, "logits/rejected": -0.5097206234931946, "logps/chosen": -0.00021330831805244088, "logps/rejected": -2.203810691833496, "loss": 0.6545, "nll_loss": 0.1636224389076233, "rewards/accuracies": 1.0, "rewards/chosen": -2.133083216904197e-05, "rewards/margins": 0.22035974264144897, "rewards/rejected": -0.22038106620311737, "step": 9514 }, { "epoch": 6.580221300138312, "grad_norm": 8.320899963378906, "learning_rate": 1.8998770554787153e-05, "log_odds_chosen": 10.619794845581055, "log_odds_ratio": -7.08003863110207e-05, "logits/chosen": -0.4190692603588104, "logits/rejected": -0.4577012062072754, "logps/chosen": -0.0003057790454477072, "logps/rejected": -2.1481821537017822, "loss": 0.6001, "nll_loss": 0.15001440048217773, "rewards/accuracies": 1.0, "rewards/chosen": -3.0577903089579195e-05, "rewards/margins": 0.21478766202926636, "rewards/rejected": -0.21481823921203613, "step": 9515 }, { "epoch": 6.580912863070539, "grad_norm": 6.303850173950195, "learning_rate": 1.8994928538497e-05, "log_odds_chosen": 9.006599426269531, "log_odds_ratio": -0.0007060145726427436, "logits/chosen": -0.500159740447998, "logits/rejected": -0.7312350273132324, "logps/chosen": -0.0014982303837314248, "logps/rejected": -1.930267572402954, "loss": 0.6199, "nll_loss": 0.1548926830291748, "rewards/accuracies": 1.0, "rewards/chosen": -0.0001498230267316103, "rewards/margins": 0.1928769201040268, "rewards/rejected": -0.19302673637866974, "step": 9516 }, { "epoch": 6.581604426002766, "grad_norm": 4.413415431976318, "learning_rate": 1.8991086522206854e-05, "log_odds_chosen": 10.201871871948242, "log_odds_ratio": -5.679738023900427e-05, "logits/chosen": -0.8834502696990967, "logits/rejected": -0.8898751139640808, "logps/chosen": -0.000270822987658903, "logps/rejected": -1.7805309295654297, "loss": 0.3352, "nll_loss": 0.08379855006933212, "rewards/accuracies": 1.0, "rewards/chosen": -2.7082300221081823e-05, "rewards/margins": 0.1780260056257248, "rewards/rejected": -0.178053081035614, "step": 9517 }, { "epoch": 6.582295988934993, "grad_norm": 10.41358757019043, "learning_rate": 1.8987244505916707e-05, "log_odds_chosen": 11.547701835632324, "log_odds_ratio": -2.4673521693330258e-05, "logits/chosen": -0.5142653584480286, "logits/rejected": -0.41769781708717346, "logps/chosen": -0.00011675099813146517, "logps/rejected": -2.200291156768799, "loss": 0.711, "nll_loss": 0.17774221301078796, "rewards/accuracies": 1.0, "rewards/chosen": -1.1675099813146517e-05, "rewards/margins": 0.2200174480676651, "rewards/rejected": -0.22002913057804108, "step": 9518 }, { "epoch": 6.58298755186722, "grad_norm": 7.81559944152832, "learning_rate": 1.8983402489626556e-05, "log_odds_chosen": 10.948009490966797, "log_odds_ratio": -9.050434164237231e-05, "logits/chosen": -0.33920398354530334, "logits/rejected": -0.3603125512599945, "logps/chosen": -0.0003907711070496589, "logps/rejected": -2.4962830543518066, "loss": 0.6102, "nll_loss": 0.15255282819271088, "rewards/accuracies": 1.0, "rewards/chosen": -3.907710924977437e-05, "rewards/margins": 0.2495892345905304, "rewards/rejected": -0.24962830543518066, "step": 9519 }, { "epoch": 6.5836791147994465, "grad_norm": 6.346920490264893, "learning_rate": 1.8979560473336408e-05, "log_odds_chosen": 8.478560447692871, "log_odds_ratio": -0.006527758669108152, "logits/chosen": -0.3838501572608948, "logits/rejected": -0.5643572807312012, "logps/chosen": -0.004246383905410767, "logps/rejected": -1.2931175231933594, "loss": 0.6578, "nll_loss": 0.16379287838935852, "rewards/accuracies": 1.0, "rewards/chosen": -0.00042463839054107666, "rewards/margins": 0.1288871020078659, "rewards/rejected": -0.12931174039840698, "step": 9520 }, { "epoch": 6.584370677731673, "grad_norm": 5.637845516204834, "learning_rate": 1.897571845704626e-05, "log_odds_chosen": 10.634634017944336, "log_odds_ratio": -0.00018999997701030225, "logits/chosen": -0.8730132579803467, "logits/rejected": -0.9408348798751831, "logps/chosen": -0.00012764170242007822, "logps/rejected": -1.8069618940353394, "loss": 0.7198, "nll_loss": 0.1799338161945343, "rewards/accuracies": 1.0, "rewards/chosen": -1.2764169696311e-05, "rewards/margins": 0.1806834191083908, "rewards/rejected": -0.18069618940353394, "step": 9521 }, { "epoch": 6.5850622406639, "grad_norm": 12.139360427856445, "learning_rate": 1.897187644075611e-05, "log_odds_chosen": 11.475465774536133, "log_odds_ratio": -0.00010113770986208692, "logits/chosen": -0.2640308439731598, "logits/rejected": -0.3200063705444336, "logps/chosen": -0.00028492099954746664, "logps/rejected": -3.306983470916748, "loss": 0.6333, "nll_loss": 0.15832307934761047, "rewards/accuracies": 1.0, "rewards/chosen": -2.8492100682342425e-05, "rewards/margins": 0.3306698799133301, "rewards/rejected": -0.3306983411312103, "step": 9522 }, { "epoch": 6.585753803596127, "grad_norm": 12.55954360961914, "learning_rate": 1.8968034424465962e-05, "log_odds_chosen": 11.508321762084961, "log_odds_ratio": -3.0403138225665316e-05, "logits/chosen": -0.4079541563987732, "logits/rejected": -0.4889557361602783, "logps/chosen": -0.0002845217240974307, "logps/rejected": -3.2464981079101562, "loss": 0.597, "nll_loss": 0.1492396742105484, "rewards/accuracies": 1.0, "rewards/chosen": -2.8452173864934593e-05, "rewards/margins": 0.32462140917778015, "rewards/rejected": -0.324649840593338, "step": 9523 }, { "epoch": 6.586445366528354, "grad_norm": 4.427008152008057, "learning_rate": 1.896419240817581e-05, "log_odds_chosen": 10.566621780395508, "log_odds_ratio": -0.00019363139290362597, "logits/chosen": -0.2653787434101105, "logits/rejected": -0.34483182430267334, "logps/chosen": -0.0005865857820026577, "logps/rejected": -2.4292454719543457, "loss": 0.4088, "nll_loss": 0.1021810993552208, "rewards/accuracies": 1.0, "rewards/chosen": -5.865856655873358e-05, "rewards/margins": 0.2428659051656723, "rewards/rejected": -0.2429245561361313, "step": 9524 }, { "epoch": 6.587136929460581, "grad_norm": 5.990933895111084, "learning_rate": 1.896035039188566e-05, "log_odds_chosen": 9.884780883789062, "log_odds_ratio": -0.0008313123253174126, "logits/chosen": -0.4263268709182739, "logits/rejected": -0.45220786333084106, "logps/chosen": -0.00040180000360123813, "logps/rejected": -1.9751261472702026, "loss": 1.0007, "nll_loss": 0.2500944435596466, "rewards/accuracies": 1.0, "rewards/chosen": -4.017999890493229e-05, "rewards/margins": 0.1974724382162094, "rewards/rejected": -0.19751261174678802, "step": 9525 }, { "epoch": 6.587828492392807, "grad_norm": 6.74733304977417, "learning_rate": 1.8956508375595513e-05, "log_odds_chosen": 10.80959701538086, "log_odds_ratio": -5.695023719454184e-05, "logits/chosen": -0.033346615731716156, "logits/rejected": -0.08503760397434235, "logps/chosen": -0.00012283638352528214, "logps/rejected": -1.6785304546356201, "loss": 0.8599, "nll_loss": 0.21497204899787903, "rewards/accuracies": 1.0, "rewards/chosen": -1.2283639080123976e-05, "rewards/margins": 0.16784076392650604, "rewards/rejected": -0.16785304248332977, "step": 9526 }, { "epoch": 6.588520055325034, "grad_norm": 6.609440326690674, "learning_rate": 1.8952666359305365e-05, "log_odds_chosen": 9.931138038635254, "log_odds_ratio": -0.00016108129057101905, "logits/chosen": 0.06345038115978241, "logits/rejected": -0.01876118779182434, "logps/chosen": -0.00024808067246340215, "logps/rejected": -1.2895182371139526, "loss": 0.591, "nll_loss": 0.14773456752300262, "rewards/accuracies": 1.0, "rewards/chosen": -2.4808066882542334e-05, "rewards/margins": 0.12892702221870422, "rewards/rejected": -0.12895183265209198, "step": 9527 }, { "epoch": 6.589211618257261, "grad_norm": 8.213581085205078, "learning_rate": 1.8948824343015214e-05, "log_odds_chosen": 9.630084991455078, "log_odds_ratio": -0.0004836757725570351, "logits/chosen": -0.3603372275829315, "logits/rejected": -0.4786415100097656, "logps/chosen": -0.0006260552327148616, "logps/rejected": -1.8332273960113525, "loss": 0.6008, "nll_loss": 0.15014447271823883, "rewards/accuracies": 1.0, "rewards/chosen": -6.260553345782682e-05, "rewards/margins": 0.18326014280319214, "rewards/rejected": -0.1833227574825287, "step": 9528 }, { "epoch": 6.589903181189488, "grad_norm": 7.819589614868164, "learning_rate": 1.8944982326725066e-05, "log_odds_chosen": 11.63312816619873, "log_odds_ratio": -1.2704012988251634e-05, "logits/chosen": -0.6833489537239075, "logits/rejected": -0.7324924468994141, "logps/chosen": -0.00019270573102403432, "logps/rejected": -2.559584617614746, "loss": 0.694, "nll_loss": 0.17351114749908447, "rewards/accuracies": 1.0, "rewards/chosen": -1.9270573829999194e-05, "rewards/margins": 0.25593918561935425, "rewards/rejected": -0.2559584379196167, "step": 9529 }, { "epoch": 6.590594744121715, "grad_norm": 7.137159824371338, "learning_rate": 1.894114031043492e-05, "log_odds_chosen": 10.68792724609375, "log_odds_ratio": -9.086474892683327e-05, "logits/chosen": -0.3094750940799713, "logits/rejected": -0.34787717461586, "logps/chosen": -0.0008726265514269471, "logps/rejected": -2.354119300842285, "loss": 0.5016, "nll_loss": 0.12540172040462494, "rewards/accuracies": 1.0, "rewards/chosen": -8.726265514269471e-05, "rewards/margins": 0.2353246808052063, "rewards/rejected": -0.23541194200515747, "step": 9530 }, { "epoch": 6.591286307053942, "grad_norm": 7.465571403503418, "learning_rate": 1.8937298294144768e-05, "log_odds_chosen": 10.607606887817383, "log_odds_ratio": -8.99828301044181e-05, "logits/chosen": -0.06982383877038956, "logits/rejected": -0.20454353094100952, "logps/chosen": -0.0002789056452456862, "logps/rejected": -1.781639575958252, "loss": 0.9311, "nll_loss": 0.23276673257350922, "rewards/accuracies": 1.0, "rewards/chosen": -2.7890564524568617e-05, "rewards/margins": 0.17813608050346375, "rewards/rejected": -0.17816394567489624, "step": 9531 }, { "epoch": 6.591977869986168, "grad_norm": 5.6307692527771, "learning_rate": 1.893345627785462e-05, "log_odds_chosen": 11.764213562011719, "log_odds_ratio": -4.389313835417852e-05, "logits/chosen": -0.11563535034656525, "logits/rejected": -0.09763626754283905, "logps/chosen": -0.00018603801436256617, "logps/rejected": -2.9742701053619385, "loss": 0.4816, "nll_loss": 0.12040294706821442, "rewards/accuracies": 1.0, "rewards/chosen": -1.8603801436256617e-05, "rewards/margins": 0.29740840196609497, "rewards/rejected": -0.2974269986152649, "step": 9532 }, { "epoch": 6.592669432918395, "grad_norm": 6.012737274169922, "learning_rate": 1.892961426156447e-05, "log_odds_chosen": 10.957094192504883, "log_odds_ratio": -0.00011141406139358878, "logits/chosen": -0.31941908597946167, "logits/rejected": -0.41692906618118286, "logps/chosen": -0.0002899272076319903, "logps/rejected": -2.2581934928894043, "loss": 0.7247, "nll_loss": 0.1811733841896057, "rewards/accuracies": 1.0, "rewards/chosen": -2.899271930800751e-05, "rewards/margins": 0.2257903665304184, "rewards/rejected": -0.22581936419010162, "step": 9533 }, { "epoch": 6.593360995850622, "grad_norm": 10.640678405761719, "learning_rate": 1.8925772245274322e-05, "log_odds_chosen": 11.120275497436523, "log_odds_ratio": -5.54912221559789e-05, "logits/chosen": -0.05910344794392586, "logits/rejected": -0.1686576008796692, "logps/chosen": -0.0006790636107325554, "logps/rejected": -2.8853797912597656, "loss": 0.7399, "nll_loss": 0.184981107711792, "rewards/accuracies": 1.0, "rewards/chosen": -6.790635961806402e-05, "rewards/margins": 0.288470059633255, "rewards/rejected": -0.28853797912597656, "step": 9534 }, { "epoch": 6.594052558782849, "grad_norm": 6.17962646484375, "learning_rate": 1.892193022898417e-05, "log_odds_chosen": 10.639046669006348, "log_odds_ratio": -7.220011320896447e-05, "logits/chosen": -0.0466696172952652, "logits/rejected": -0.14394527673721313, "logps/chosen": -0.0006435574614442885, "logps/rejected": -2.312441110610962, "loss": 0.6346, "nll_loss": 0.1586356908082962, "rewards/accuracies": 1.0, "rewards/chosen": -6.43557432340458e-05, "rewards/margins": 0.23117974400520325, "rewards/rejected": -0.23124408721923828, "step": 9535 }, { "epoch": 6.594744121715076, "grad_norm": 8.575480461120605, "learning_rate": 1.8918088212694023e-05, "log_odds_chosen": 11.129705429077148, "log_odds_ratio": -0.00012059323489665985, "logits/chosen": -0.32221323251724243, "logits/rejected": -0.4411635994911194, "logps/chosen": -0.0002790922299027443, "logps/rejected": -2.5678157806396484, "loss": 0.7306, "nll_loss": 0.1826275885105133, "rewards/accuracies": 1.0, "rewards/chosen": -2.7909221898880787e-05, "rewards/margins": 0.25675368309020996, "rewards/rejected": -0.25678160786628723, "step": 9536 }, { "epoch": 6.595435684647303, "grad_norm": 3.7951908111572266, "learning_rate": 1.8914246196403872e-05, "log_odds_chosen": 10.10299015045166, "log_odds_ratio": -0.0005347510450519621, "logits/chosen": -0.35992541909217834, "logits/rejected": -0.4088529050350189, "logps/chosen": -0.00016122058150358498, "logps/rejected": -1.4273490905761719, "loss": 0.4174, "nll_loss": 0.10428406298160553, "rewards/accuracies": 1.0, "rewards/chosen": -1.612205960555002e-05, "rewards/margins": 0.14271880686283112, "rewards/rejected": -0.14273492991924286, "step": 9537 }, { "epoch": 6.596127247579529, "grad_norm": 5.140981674194336, "learning_rate": 1.8910404180113725e-05, "log_odds_chosen": 10.800771713256836, "log_odds_ratio": -2.9589751648018137e-05, "logits/chosen": -0.22679775953292847, "logits/rejected": -0.4273071587085724, "logps/chosen": -0.00040692847687751055, "logps/rejected": -2.4438960552215576, "loss": 0.5973, "nll_loss": 0.1493130475282669, "rewards/accuracies": 1.0, "rewards/chosen": -4.0692848415346816e-05, "rewards/margins": 0.2443489134311676, "rewards/rejected": -0.244389608502388, "step": 9538 }, { "epoch": 6.596818810511756, "grad_norm": 6.820888996124268, "learning_rate": 1.8906562163823577e-05, "log_odds_chosen": 10.571646690368652, "log_odds_ratio": -5.5003725719871e-05, "logits/chosen": -0.3664097189903259, "logits/rejected": -0.406631201505661, "logps/chosen": -0.00030732934828847647, "logps/rejected": -1.8684226274490356, "loss": 0.5273, "nll_loss": 0.13182811439037323, "rewards/accuracies": 1.0, "rewards/chosen": -3.07329319184646e-05, "rewards/margins": 0.18681153655052185, "rewards/rejected": -0.18684226274490356, "step": 9539 }, { "epoch": 6.597510373443983, "grad_norm": 5.106546878814697, "learning_rate": 1.8902720147533426e-05, "log_odds_chosen": 10.965265274047852, "log_odds_ratio": -7.689668564125896e-05, "logits/chosen": -0.21386636793613434, "logits/rejected": -0.3349723517894745, "logps/chosen": -0.00039963488234207034, "logps/rejected": -2.9627490043640137, "loss": 0.6362, "nll_loss": 0.15903426706790924, "rewards/accuracies": 1.0, "rewards/chosen": -3.996348459622823e-05, "rewards/margins": 0.29623496532440186, "rewards/rejected": -0.29627490043640137, "step": 9540 }, { "epoch": 6.59820193637621, "grad_norm": 10.467570304870605, "learning_rate": 1.889887813124328e-05, "log_odds_chosen": 11.801546096801758, "log_odds_ratio": -1.0990625014528632e-05, "logits/chosen": -0.19286254048347473, "logits/rejected": -0.2172335833311081, "logps/chosen": -0.0003137754974886775, "logps/rejected": -2.841444492340088, "loss": 0.8365, "nll_loss": 0.20912466943264008, "rewards/accuracies": 1.0, "rewards/chosen": -3.137754902127199e-05, "rewards/margins": 0.2841130793094635, "rewards/rejected": -0.28414446115493774, "step": 9541 }, { "epoch": 6.598893499308437, "grad_norm": 7.705151557922363, "learning_rate": 1.889503611495313e-05, "log_odds_chosen": 10.635993957519531, "log_odds_ratio": -0.00017419188225176185, "logits/chosen": -0.49404484033584595, "logits/rejected": -0.5072313547134399, "logps/chosen": -0.00041031482396647334, "logps/rejected": -2.464137315750122, "loss": 0.6947, "nll_loss": 0.1736493706703186, "rewards/accuracies": 1.0, "rewards/chosen": -4.1031482396647334e-05, "rewards/margins": 0.24637269973754883, "rewards/rejected": -0.24641373753547668, "step": 9542 }, { "epoch": 6.5995850622406635, "grad_norm": 8.610349655151367, "learning_rate": 1.889119409866298e-05, "log_odds_chosen": 10.723653793334961, "log_odds_ratio": -5.8262728998670354e-05, "logits/chosen": -0.5345897078514099, "logits/rejected": -0.6072478890419006, "logps/chosen": -0.0008024029666557908, "logps/rejected": -2.9265053272247314, "loss": 0.5516, "nll_loss": 0.137889102101326, "rewards/accuracies": 1.0, "rewards/chosen": -8.024030830711126e-05, "rewards/margins": 0.2925702929496765, "rewards/rejected": -0.2926505208015442, "step": 9543 }, { "epoch": 6.60027662517289, "grad_norm": 21.155384063720703, "learning_rate": 1.888735208237283e-05, "log_odds_chosen": 11.516271591186523, "log_odds_ratio": -1.7020091036101803e-05, "logits/chosen": -0.27340206503868103, "logits/rejected": -0.37262627482414246, "logps/chosen": -0.0002646548382472247, "logps/rejected": -2.5741095542907715, "loss": 0.6883, "nll_loss": 0.17208553850650787, "rewards/accuracies": 1.0, "rewards/chosen": -2.646548455231823e-05, "rewards/margins": 0.2573844790458679, "rewards/rejected": -0.2574109435081482, "step": 9544 }, { "epoch": 6.600968188105117, "grad_norm": 9.197043418884277, "learning_rate": 1.8883510066082682e-05, "log_odds_chosen": 11.402222633361816, "log_odds_ratio": -3.696878411574289e-05, "logits/chosen": -0.02247518301010132, "logits/rejected": -0.15677234530448914, "logps/chosen": -0.00015109230298548937, "logps/rejected": -2.3994855880737305, "loss": 0.6276, "nll_loss": 0.1568952351808548, "rewards/accuracies": 1.0, "rewards/chosen": -1.510923175374046e-05, "rewards/margins": 0.23993346095085144, "rewards/rejected": -0.23994854092597961, "step": 9545 }, { "epoch": 6.601659751037344, "grad_norm": 6.208484649658203, "learning_rate": 1.887966804979253e-05, "log_odds_chosen": 9.860437393188477, "log_odds_ratio": -0.00020793148723896593, "logits/chosen": -0.564323365688324, "logits/rejected": -0.5780093669891357, "logps/chosen": -0.00029348081443458796, "logps/rejected": -1.679945707321167, "loss": 0.4888, "nll_loss": 0.1221674308180809, "rewards/accuracies": 1.0, "rewards/chosen": -2.9348082534852438e-05, "rewards/margins": 0.16796523332595825, "rewards/rejected": -0.16799457371234894, "step": 9546 }, { "epoch": 6.602351313969571, "grad_norm": 11.18844223022461, "learning_rate": 1.8875826033502383e-05, "log_odds_chosen": 9.754711151123047, "log_odds_ratio": -0.00019032778800465167, "logits/chosen": -0.47488075494766235, "logits/rejected": -0.5205568075180054, "logps/chosen": -0.0007518458878621459, "logps/rejected": -2.273089647293091, "loss": 0.6455, "nll_loss": 0.16134408116340637, "rewards/accuracies": 1.0, "rewards/chosen": -7.518458733102307e-05, "rewards/margins": 0.22723379731178284, "rewards/rejected": -0.2273089736700058, "step": 9547 }, { "epoch": 6.603042876901798, "grad_norm": 5.4768385887146, "learning_rate": 1.8871984017212232e-05, "log_odds_chosen": 10.853092193603516, "log_odds_ratio": -0.0003788030007854104, "logits/chosen": -0.48384472727775574, "logits/rejected": -0.4700443148612976, "logps/chosen": -0.0005879810778424144, "logps/rejected": -2.374781370162964, "loss": 0.5334, "nll_loss": 0.13330428302288055, "rewards/accuracies": 1.0, "rewards/chosen": -5.879810487385839e-05, "rewards/margins": 0.23741932213306427, "rewards/rejected": -0.237478107213974, "step": 9548 }, { "epoch": 6.6037344398340245, "grad_norm": 6.37504768371582, "learning_rate": 1.8868142000922085e-05, "log_odds_chosen": 10.665268898010254, "log_odds_ratio": -0.00012861876166425645, "logits/chosen": 0.07045517861843109, "logits/rejected": 0.03083537518978119, "logps/chosen": -0.0004432780551724136, "logps/rejected": -2.561220407485962, "loss": 0.6722, "nll_loss": 0.16802969574928284, "rewards/accuracies": 1.0, "rewards/chosen": -4.432780770002864e-05, "rewards/margins": 0.25607770681381226, "rewards/rejected": -0.25612205266952515, "step": 9549 }, { "epoch": 6.604426002766251, "grad_norm": 5.965790748596191, "learning_rate": 1.8864299984631937e-05, "log_odds_chosen": 10.766727447509766, "log_odds_ratio": -0.00010273887164657936, "logits/chosen": -0.38880062103271484, "logits/rejected": -0.4356210231781006, "logps/chosen": -0.0008852147730067372, "logps/rejected": -2.64434814453125, "loss": 0.5739, "nll_loss": 0.1434713900089264, "rewards/accuracies": 1.0, "rewards/chosen": -8.852146856952459e-05, "rewards/margins": 0.26434630155563354, "rewards/rejected": -0.264434814453125, "step": 9550 }, { "epoch": 6.605117565698478, "grad_norm": 9.020346641540527, "learning_rate": 1.8860457968341786e-05, "log_odds_chosen": 9.151586532592773, "log_odds_ratio": -0.0010840434115380049, "logits/chosen": -0.38431838154792786, "logits/rejected": -0.3746948540210724, "logps/chosen": -0.003543839557096362, "logps/rejected": -2.262924909591675, "loss": 0.4518, "nll_loss": 0.11283522844314575, "rewards/accuracies": 1.0, "rewards/chosen": -0.00035438398481346667, "rewards/margins": 0.2259381264448166, "rewards/rejected": -0.22629249095916748, "step": 9551 }, { "epoch": 6.605809128630705, "grad_norm": 3.341355562210083, "learning_rate": 1.885661595205164e-05, "log_odds_chosen": 11.200335502624512, "log_odds_ratio": -4.079660357092507e-05, "logits/chosen": -0.4858432710170746, "logits/rejected": -0.571876049041748, "logps/chosen": -7.985975389601663e-05, "logps/rejected": -1.8703134059906006, "loss": 0.4316, "nll_loss": 0.10789884626865387, "rewards/accuracies": 1.0, "rewards/chosen": -7.985974662005901e-06, "rewards/margins": 0.1870233565568924, "rewards/rejected": -0.1870313435792923, "step": 9552 }, { "epoch": 6.606500691562932, "grad_norm": 5.128517150878906, "learning_rate": 1.8852773935761488e-05, "log_odds_chosen": 10.190290451049805, "log_odds_ratio": -0.0012002821313217282, "logits/chosen": -0.6075640916824341, "logits/rejected": -0.617435097694397, "logps/chosen": -0.002807852579280734, "logps/rejected": -2.2782063484191895, "loss": 0.4924, "nll_loss": 0.12298320233821869, "rewards/accuracies": 1.0, "rewards/chosen": -0.0002807852579280734, "rewards/margins": 0.22753985226154327, "rewards/rejected": -0.22782063484191895, "step": 9553 }, { "epoch": 6.607192254495159, "grad_norm": 5.492445945739746, "learning_rate": 1.8848931919471337e-05, "log_odds_chosen": 10.870869636535645, "log_odds_ratio": -4.509523932938464e-05, "logits/chosen": -0.36585742235183716, "logits/rejected": -0.4368995726108551, "logps/chosen": -0.00011223943874938414, "logps/rejected": -2.0290770530700684, "loss": 0.667, "nll_loss": 0.16674567759037018, "rewards/accuracies": 1.0, "rewards/chosen": -1.1223944056837354e-05, "rewards/margins": 0.20289649069309235, "rewards/rejected": -0.2029077261686325, "step": 9554 }, { "epoch": 6.6078838174273855, "grad_norm": 4.166836738586426, "learning_rate": 1.884508990318119e-05, "log_odds_chosen": 10.888010025024414, "log_odds_ratio": -5.706503361579962e-05, "logits/chosen": -0.873898983001709, "logits/rejected": -0.8950672149658203, "logps/chosen": -0.00020168225455563515, "logps/rejected": -2.0599961280822754, "loss": 0.5566, "nll_loss": 0.13913968205451965, "rewards/accuracies": 1.0, "rewards/chosen": -2.0168225091765635e-05, "rewards/margins": 0.20597945153713226, "rewards/rejected": -0.20599961280822754, "step": 9555 }, { "epoch": 6.608575380359612, "grad_norm": 8.395662307739258, "learning_rate": 1.8841247886891042e-05, "log_odds_chosen": 10.506204605102539, "log_odds_ratio": -0.0004264920426066965, "logits/chosen": -0.49800848960876465, "logits/rejected": -0.5854029059410095, "logps/chosen": -0.0007227214518934488, "logps/rejected": -2.0328316688537598, "loss": 0.6594, "nll_loss": 0.16481497883796692, "rewards/accuracies": 1.0, "rewards/chosen": -7.227214518934488e-05, "rewards/margins": 0.20321090519428253, "rewards/rejected": -0.2032831907272339, "step": 9556 }, { "epoch": 6.609266943291839, "grad_norm": 5.947407245635986, "learning_rate": 1.883740587060089e-05, "log_odds_chosen": 11.301834106445312, "log_odds_ratio": -1.8402424757368863e-05, "logits/chosen": -0.4008464217185974, "logits/rejected": -0.39481836557388306, "logps/chosen": -0.0004325605113990605, "logps/rejected": -2.325915575027466, "loss": 0.3629, "nll_loss": 0.09073560684919357, "rewards/accuracies": 1.0, "rewards/chosen": -4.3256048229523e-05, "rewards/margins": 0.2325483113527298, "rewards/rejected": -0.23259153962135315, "step": 9557 }, { "epoch": 6.609958506224066, "grad_norm": 5.509538173675537, "learning_rate": 1.8833563854310743e-05, "log_odds_chosen": 10.087053298950195, "log_odds_ratio": -0.00010945965186692774, "logits/chosen": -0.47453972697257996, "logits/rejected": -0.5440321564674377, "logps/chosen": -0.00029165492742322385, "logps/rejected": -1.6123814582824707, "loss": 0.5475, "nll_loss": 0.13687016069889069, "rewards/accuracies": 1.0, "rewards/chosen": -2.916549601650331e-05, "rewards/margins": 0.16120897233486176, "rewards/rejected": -0.1612381637096405, "step": 9558 }, { "epoch": 6.610650069156293, "grad_norm": 7.089804649353027, "learning_rate": 1.8829721838020596e-05, "log_odds_chosen": 10.465744018554688, "log_odds_ratio": -4.751564847538248e-05, "logits/chosen": -0.36397644877433777, "logits/rejected": -0.42793208360671997, "logps/chosen": -0.0003245154512114823, "logps/rejected": -2.2591567039489746, "loss": 0.6086, "nll_loss": 0.15214122831821442, "rewards/accuracies": 1.0, "rewards/chosen": -3.245154221076518e-05, "rewards/margins": 0.22588323056697845, "rewards/rejected": -0.22591570019721985, "step": 9559 }, { "epoch": 6.61134163208852, "grad_norm": 10.86466121673584, "learning_rate": 1.8825879821730445e-05, "log_odds_chosen": 11.104890823364258, "log_odds_ratio": -9.219466301146895e-05, "logits/chosen": -0.73785400390625, "logits/rejected": -0.804728627204895, "logps/chosen": -0.00013324012979865074, "logps/rejected": -2.2057862281799316, "loss": 0.7755, "nll_loss": 0.19385896623134613, "rewards/accuracies": 1.0, "rewards/chosen": -1.3324013707460836e-05, "rewards/margins": 0.2205653041601181, "rewards/rejected": -0.2205786257982254, "step": 9560 }, { "epoch": 6.6120331950207465, "grad_norm": 5.825887203216553, "learning_rate": 1.8822037805440297e-05, "log_odds_chosen": 10.568598747253418, "log_odds_ratio": -0.000481350754853338, "logits/chosen": -0.2709238827228546, "logits/rejected": -0.3508308529853821, "logps/chosen": -0.0004341882886365056, "logps/rejected": -2.195253610610962, "loss": 0.6048, "nll_loss": 0.15116143226623535, "rewards/accuracies": 1.0, "rewards/chosen": -4.3418825953267515e-05, "rewards/margins": 0.2194819301366806, "rewards/rejected": -0.21952535212039948, "step": 9561 }, { "epoch": 6.612724757952973, "grad_norm": 6.367141246795654, "learning_rate": 1.8818195789150146e-05, "log_odds_chosen": 11.000616073608398, "log_odds_ratio": -2.92911208816804e-05, "logits/chosen": -0.3233502507209778, "logits/rejected": -0.48610731959342957, "logps/chosen": -0.0006418666453100741, "logps/rejected": -2.913724422454834, "loss": 0.8625, "nll_loss": 0.21563003957271576, "rewards/accuracies": 1.0, "rewards/chosen": -6.418666453100741e-05, "rewards/margins": 0.29130828380584717, "rewards/rejected": -0.2913724482059479, "step": 9562 }, { "epoch": 6.6134163208852, "grad_norm": 5.848628044128418, "learning_rate": 1.8814353772859995e-05, "log_odds_chosen": 11.818613052368164, "log_odds_ratio": -1.573657755216118e-05, "logits/chosen": -0.49255573749542236, "logits/rejected": -0.38133054971694946, "logps/chosen": -0.0008059104438871145, "logps/rejected": -3.054670810699463, "loss": 0.4733, "nll_loss": 0.11832495778799057, "rewards/accuracies": 1.0, "rewards/chosen": -8.059104584390298e-05, "rewards/margins": 0.305386483669281, "rewards/rejected": -0.30546706914901733, "step": 9563 }, { "epoch": 6.614107883817427, "grad_norm": 5.037055015563965, "learning_rate": 1.8810511756569848e-05, "log_odds_chosen": 10.745399475097656, "log_odds_ratio": -5.438456355477683e-05, "logits/chosen": -0.537282407283783, "logits/rejected": -0.6539811491966248, "logps/chosen": -0.00031301064882427454, "logps/rejected": -1.985874891281128, "loss": 0.4274, "nll_loss": 0.10683829337358475, "rewards/accuracies": 1.0, "rewards/chosen": -3.13010677928105e-05, "rewards/margins": 0.19855618476867676, "rewards/rejected": -0.19858750700950623, "step": 9564 }, { "epoch": 6.614799446749654, "grad_norm": 4.993642807006836, "learning_rate": 1.88066697402797e-05, "log_odds_chosen": 10.20506477355957, "log_odds_ratio": -0.0003886056365445256, "logits/chosen": -0.46335524320602417, "logits/rejected": -0.4679669141769409, "logps/chosen": -0.0008175184484571218, "logps/rejected": -2.422447681427002, "loss": 0.636, "nll_loss": 0.15896305441856384, "rewards/accuracies": 1.0, "rewards/chosen": -8.175184484571218e-05, "rewards/margins": 0.2421630173921585, "rewards/rejected": -0.24224475026130676, "step": 9565 }, { "epoch": 6.615491009681881, "grad_norm": 5.190500259399414, "learning_rate": 1.880282772398955e-05, "log_odds_chosen": 10.710193634033203, "log_odds_ratio": -6.619399937335402e-05, "logits/chosen": -0.5193922519683838, "logits/rejected": -0.5428926944732666, "logps/chosen": -0.00012502398749347776, "logps/rejected": -1.6223423480987549, "loss": 0.4191, "nll_loss": 0.10477405786514282, "rewards/accuracies": 1.0, "rewards/chosen": -1.2502399840741418e-05, "rewards/margins": 0.1622217297554016, "rewards/rejected": -0.16223423182964325, "step": 9566 }, { "epoch": 6.6161825726141075, "grad_norm": 5.0056538581848145, "learning_rate": 1.8798985707699402e-05, "log_odds_chosen": 10.813407897949219, "log_odds_ratio": -6.554868014063686e-05, "logits/chosen": -0.4899275302886963, "logits/rejected": -0.6252234578132629, "logps/chosen": -0.00014681732864119112, "logps/rejected": -2.1733970642089844, "loss": 0.6276, "nll_loss": 0.1568903625011444, "rewards/accuracies": 1.0, "rewards/chosen": -1.468173195462441e-05, "rewards/margins": 0.21732503175735474, "rewards/rejected": -0.21733970940113068, "step": 9567 }, { "epoch": 6.616874135546334, "grad_norm": 9.58974552154541, "learning_rate": 1.8795143691409254e-05, "log_odds_chosen": 9.627368927001953, "log_odds_ratio": -0.0003191865107510239, "logits/chosen": -0.2289269119501114, "logits/rejected": -0.3812503218650818, "logps/chosen": -0.0007769543444737792, "logps/rejected": -1.7609001398086548, "loss": 0.5846, "nll_loss": 0.14612506330013275, "rewards/accuracies": 1.0, "rewards/chosen": -7.769542571622878e-05, "rewards/margins": 0.176012322306633, "rewards/rejected": -0.1760900318622589, "step": 9568 }, { "epoch": 6.617565698478561, "grad_norm": 6.556011199951172, "learning_rate": 1.8791301675119103e-05, "log_odds_chosen": 9.689128875732422, "log_odds_ratio": -0.0007466164533980191, "logits/chosen": -0.726296603679657, "logits/rejected": -0.7090697884559631, "logps/chosen": -0.0019731023348867893, "logps/rejected": -2.217195749282837, "loss": 0.5579, "nll_loss": 0.13938948512077332, "rewards/accuracies": 1.0, "rewards/chosen": -0.00019731024804059416, "rewards/margins": 0.2215222716331482, "rewards/rejected": -0.22171959280967712, "step": 9569 }, { "epoch": 6.618257261410788, "grad_norm": 5.365951061248779, "learning_rate": 1.8787459658828956e-05, "log_odds_chosen": 10.164534568786621, "log_odds_ratio": -0.00022725044982507825, "logits/chosen": -0.7646428346633911, "logits/rejected": -0.7043318748474121, "logps/chosen": -0.00044344677007757127, "logps/rejected": -1.7538894414901733, "loss": 0.6294, "nll_loss": 0.15733535587787628, "rewards/accuracies": 1.0, "rewards/chosen": -4.434467700775713e-05, "rewards/margins": 0.17534460127353668, "rewards/rejected": -0.17538894712924957, "step": 9570 }, { "epoch": 6.618948824343015, "grad_norm": 8.986666679382324, "learning_rate": 1.8783617642538805e-05, "log_odds_chosen": 10.886392593383789, "log_odds_ratio": -3.449077485129237e-05, "logits/chosen": -0.3698527216911316, "logits/rejected": -0.5766900181770325, "logps/chosen": -0.00017695670248940587, "logps/rejected": -1.903517246246338, "loss": 0.4073, "nll_loss": 0.1018340140581131, "rewards/accuracies": 1.0, "rewards/chosen": -1.769567097653635e-05, "rewards/margins": 0.1903340220451355, "rewards/rejected": -0.1903517246246338, "step": 9571 }, { "epoch": 6.619640387275242, "grad_norm": 5.671159744262695, "learning_rate": 1.8779775626248654e-05, "log_odds_chosen": 9.405364990234375, "log_odds_ratio": -0.0006271099555306137, "logits/chosen": -0.4230155646800995, "logits/rejected": -0.5213783383369446, "logps/chosen": -0.0007497941260226071, "logps/rejected": -1.665784478187561, "loss": 0.7889, "nll_loss": 0.1971660852432251, "rewards/accuracies": 1.0, "rewards/chosen": -7.497941987821832e-05, "rewards/margins": 0.16650345921516418, "rewards/rejected": -0.16657845675945282, "step": 9572 }, { "epoch": 6.6203319502074685, "grad_norm": 7.949342727661133, "learning_rate": 1.8775933609958506e-05, "log_odds_chosen": 11.04636287689209, "log_odds_ratio": -0.002121716970577836, "logits/chosen": -0.5016674995422363, "logits/rejected": -0.6972486972808838, "logps/chosen": -0.001570210326462984, "logps/rejected": -1.935790777206421, "loss": 0.5045, "nll_loss": 0.12590371072292328, "rewards/accuracies": 1.0, "rewards/chosen": -0.00015702102973591536, "rewards/margins": 0.19342204928398132, "rewards/rejected": -0.1935790777206421, "step": 9573 }, { "epoch": 6.621023513139695, "grad_norm": 9.75149154663086, "learning_rate": 1.877209159366836e-05, "log_odds_chosen": 10.175332069396973, "log_odds_ratio": -0.0002040680410573259, "logits/chosen": -0.6536148190498352, "logits/rejected": -0.7172862887382507, "logps/chosen": -0.0005002233083359897, "logps/rejected": -2.021902561187744, "loss": 0.7664, "nll_loss": 0.19158843159675598, "rewards/accuracies": 1.0, "rewards/chosen": -5.002232865081169e-05, "rewards/margins": 0.2021402269601822, "rewards/rejected": -0.20219025015830994, "step": 9574 }, { "epoch": 6.621715076071922, "grad_norm": 7.20743465423584, "learning_rate": 1.8768249577378208e-05, "log_odds_chosen": 10.140237808227539, "log_odds_ratio": -8.70264702825807e-05, "logits/chosen": -0.018910914659500122, "logits/rejected": -0.14361506700515747, "logps/chosen": -0.0007901238277554512, "logps/rejected": -1.93674898147583, "loss": 0.595, "nll_loss": 0.14875128865242004, "rewards/accuracies": 1.0, "rewards/chosen": -7.90123813203536e-05, "rewards/margins": 0.19359590113162994, "rewards/rejected": -0.19367492198944092, "step": 9575 }, { "epoch": 6.622406639004149, "grad_norm": 4.346622943878174, "learning_rate": 1.876440756108806e-05, "log_odds_chosen": 10.109397888183594, "log_odds_ratio": -0.00031347855110652745, "logits/chosen": -0.6578681468963623, "logits/rejected": -0.699524462223053, "logps/chosen": -0.00035372230922803283, "logps/rejected": -1.861891508102417, "loss": 0.6135, "nll_loss": 0.153354674577713, "rewards/accuracies": 1.0, "rewards/chosen": -3.5372231650399044e-05, "rewards/margins": 0.18615376949310303, "rewards/rejected": -0.18618914484977722, "step": 9576 }, { "epoch": 6.623098201936376, "grad_norm": 10.031994819641113, "learning_rate": 1.8760565544797913e-05, "log_odds_chosen": 10.174301147460938, "log_odds_ratio": -0.0006410350324586034, "logits/chosen": -0.09565894305706024, "logits/rejected": -0.13197723031044006, "logps/chosen": -0.0009404458105564117, "logps/rejected": -1.8535264730453491, "loss": 0.4278, "nll_loss": 0.10688328742980957, "rewards/accuracies": 1.0, "rewards/chosen": -9.404457523487508e-05, "rewards/margins": 0.18525859713554382, "rewards/rejected": -0.1853526532649994, "step": 9577 }, { "epoch": 6.623789764868603, "grad_norm": 4.580968856811523, "learning_rate": 1.875672352850776e-05, "log_odds_chosen": 9.780548095703125, "log_odds_ratio": -0.0003369380137883127, "logits/chosen": -0.580957293510437, "logits/rejected": -0.613364577293396, "logps/chosen": -0.0019260908011347055, "logps/rejected": -1.8503471612930298, "loss": 0.6089, "nll_loss": 0.1521856188774109, "rewards/accuracies": 1.0, "rewards/chosen": -0.00019260909175500274, "rewards/margins": 0.18484210968017578, "rewards/rejected": -0.18503473699092865, "step": 9578 }, { "epoch": 6.624481327800829, "grad_norm": 7.827384948730469, "learning_rate": 1.8752881512217614e-05, "log_odds_chosen": 10.557483673095703, "log_odds_ratio": -4.911048381472938e-05, "logits/chosen": -0.20439143478870392, "logits/rejected": -0.24899733066558838, "logps/chosen": -0.0001140248859883286, "logps/rejected": -1.6083905696868896, "loss": 0.6618, "nll_loss": 0.16545367240905762, "rewards/accuracies": 1.0, "rewards/chosen": -1.1402488780731801e-05, "rewards/margins": 0.1608276516199112, "rewards/rejected": -0.1608390510082245, "step": 9579 }, { "epoch": 6.625172890733056, "grad_norm": 10.01291275024414, "learning_rate": 1.8749039495927463e-05, "log_odds_chosen": 10.815433502197266, "log_odds_ratio": -0.00027521009906195104, "logits/chosen": -0.2627377510070801, "logits/rejected": -0.3521021902561188, "logps/chosen": -0.0012004250893369317, "logps/rejected": -2.58683443069458, "loss": 0.7189, "nll_loss": 0.17969083786010742, "rewards/accuracies": 1.0, "rewards/chosen": -0.0001200425103888847, "rewards/margins": 0.25856339931488037, "rewards/rejected": -0.258683443069458, "step": 9580 }, { "epoch": 6.625864453665283, "grad_norm": 8.097025871276855, "learning_rate": 1.8745197479637312e-05, "log_odds_chosen": 10.607389450073242, "log_odds_ratio": -0.00010616899817250669, "logits/chosen": -0.21804478764533997, "logits/rejected": -0.2118377983570099, "logps/chosen": -0.00013465568190440536, "logps/rejected": -1.7565428018569946, "loss": 0.8019, "nll_loss": 0.20045381784439087, "rewards/accuracies": 1.0, "rewards/chosen": -1.3465569281834178e-05, "rewards/margins": 0.17564082145690918, "rewards/rejected": -0.17565427720546722, "step": 9581 }, { "epoch": 6.62655601659751, "grad_norm": 12.105314254760742, "learning_rate": 1.8741355463347165e-05, "log_odds_chosen": 9.97254753112793, "log_odds_ratio": -0.00015050121874082834, "logits/chosen": 0.02145688235759735, "logits/rejected": -0.1352299302816391, "logps/chosen": -0.00031788513297215104, "logps/rejected": -1.7679246664047241, "loss": 0.6162, "nll_loss": 0.1540263444185257, "rewards/accuracies": 1.0, "rewards/chosen": -3.1788513297215104e-05, "rewards/margins": 0.17676067352294922, "rewards/rejected": -0.1767924726009369, "step": 9582 }, { "epoch": 6.627247579529737, "grad_norm": 6.09376859664917, "learning_rate": 1.8737513447057017e-05, "log_odds_chosen": 10.102136611938477, "log_odds_ratio": -0.004398900084197521, "logits/chosen": -0.19770173728466034, "logits/rejected": -0.2749331593513489, "logps/chosen": -0.002805904019623995, "logps/rejected": -2.46071195602417, "loss": 0.9602, "nll_loss": 0.23961874842643738, "rewards/accuracies": 1.0, "rewards/chosen": -0.0002805904077831656, "rewards/margins": 0.24579061567783356, "rewards/rejected": -0.2460712194442749, "step": 9583 }, { "epoch": 6.627939142461964, "grad_norm": 6.021434783935547, "learning_rate": 1.8733671430766866e-05, "log_odds_chosen": 10.092761993408203, "log_odds_ratio": -0.0002814672188833356, "logits/chosen": -0.1907365918159485, "logits/rejected": -0.24558605253696442, "logps/chosen": -0.0009807777823880315, "logps/rejected": -2.1922271251678467, "loss": 0.6207, "nll_loss": 0.15515732765197754, "rewards/accuracies": 1.0, "rewards/chosen": -9.807777678361163e-05, "rewards/margins": 0.21912464499473572, "rewards/rejected": -0.21922272443771362, "step": 9584 }, { "epoch": 6.62863070539419, "grad_norm": 3.9753689765930176, "learning_rate": 1.872982941447672e-05, "log_odds_chosen": 10.512274742126465, "log_odds_ratio": -6.560344627359882e-05, "logits/chosen": -0.3456512987613678, "logits/rejected": -0.3297366797924042, "logps/chosen": -0.0002295379526913166, "logps/rejected": -1.99502694606781, "loss": 0.4358, "nll_loss": 0.1089467853307724, "rewards/accuracies": 1.0, "rewards/chosen": -2.29537945415359e-05, "rewards/margins": 0.19947972893714905, "rewards/rejected": -0.19950269162654877, "step": 9585 }, { "epoch": 6.629322268326417, "grad_norm": 9.102583885192871, "learning_rate": 1.872598739818657e-05, "log_odds_chosen": 9.41545581817627, "log_odds_ratio": -0.00023895951744634658, "logits/chosen": -0.07881233841180801, "logits/rejected": -0.14814022183418274, "logps/chosen": -0.00046351380296982825, "logps/rejected": -1.5590986013412476, "loss": 0.4239, "nll_loss": 0.10595303773880005, "rewards/accuracies": 1.0, "rewards/chosen": -4.63513788417913e-05, "rewards/margins": 0.15586349368095398, "rewards/rejected": -0.15590986609458923, "step": 9586 }, { "epoch": 6.630013831258644, "grad_norm": 6.898933410644531, "learning_rate": 1.872214538189642e-05, "log_odds_chosen": 9.900001525878906, "log_odds_ratio": -0.0005605472251772881, "logits/chosen": -0.5851230025291443, "logits/rejected": -0.6717733144760132, "logps/chosen": -0.005218465346843004, "logps/rejected": -2.2814183235168457, "loss": 0.3372, "nll_loss": 0.08423736691474915, "rewards/accuracies": 1.0, "rewards/chosen": -0.0005218465812504292, "rewards/margins": 0.2276199907064438, "rewards/rejected": -0.22814184427261353, "step": 9587 }, { "epoch": 6.630705394190871, "grad_norm": 3.2694380283355713, "learning_rate": 1.8718303365606273e-05, "log_odds_chosen": 10.500894546508789, "log_odds_ratio": -8.928743045544252e-05, "logits/chosen": -0.04570809006690979, "logits/rejected": -0.1338164210319519, "logps/chosen": -0.0009862055303528905, "logps/rejected": -1.8601700067520142, "loss": 0.4133, "nll_loss": 0.10330963134765625, "rewards/accuracies": 1.0, "rewards/chosen": -9.862056322162971e-05, "rewards/margins": 0.18591837584972382, "rewards/rejected": -0.1860170066356659, "step": 9588 }, { "epoch": 6.631396957123098, "grad_norm": 7.532769680023193, "learning_rate": 1.871446134931612e-05, "log_odds_chosen": 11.600115776062012, "log_odds_ratio": -2.4432218197034672e-05, "logits/chosen": -0.6962848901748657, "logits/rejected": -0.6211987733840942, "logps/chosen": -0.00010943639790639281, "logps/rejected": -2.3903281688690186, "loss": 0.8404, "nll_loss": 0.2100864201784134, "rewards/accuracies": 1.0, "rewards/chosen": -1.0943640518235043e-05, "rewards/margins": 0.23902186751365662, "rewards/rejected": -0.2390328049659729, "step": 9589 }, { "epoch": 6.632088520055325, "grad_norm": 6.394747257232666, "learning_rate": 1.871061933302597e-05, "log_odds_chosen": 11.009204864501953, "log_odds_ratio": -8.54446116136387e-05, "logits/chosen": -0.5556159615516663, "logits/rejected": -0.5460792183876038, "logps/chosen": -0.00013034706353209913, "logps/rejected": -1.7036371231079102, "loss": 0.5307, "nll_loss": 0.1326713263988495, "rewards/accuracies": 1.0, "rewards/chosen": -1.303470526181627e-05, "rewards/margins": 0.17035070061683655, "rewards/rejected": -0.17036372423171997, "step": 9590 }, { "epoch": 6.632780082987551, "grad_norm": 9.66374683380127, "learning_rate": 1.8706777316735823e-05, "log_odds_chosen": 11.150654792785645, "log_odds_ratio": -6.50244255666621e-05, "logits/chosen": -0.1793665587902069, "logits/rejected": -0.26565828919410706, "logps/chosen": -0.0004702771839220077, "logps/rejected": -2.391282320022583, "loss": 0.6228, "nll_loss": 0.15569747984409332, "rewards/accuracies": 1.0, "rewards/chosen": -4.702771911979653e-05, "rewards/margins": 0.23908117413520813, "rewards/rejected": -0.2391282171010971, "step": 9591 }, { "epoch": 6.633471645919778, "grad_norm": 6.515843391418457, "learning_rate": 1.8702935300445676e-05, "log_odds_chosen": 10.865440368652344, "log_odds_ratio": -7.430482219206169e-05, "logits/chosen": -0.01707390695810318, "logits/rejected": 0.12872716784477234, "logps/chosen": -0.00039322333759628236, "logps/rejected": -2.132324695587158, "loss": 0.7089, "nll_loss": 0.17720797657966614, "rewards/accuracies": 1.0, "rewards/chosen": -3.932233084924519e-05, "rewards/margins": 0.2131931483745575, "rewards/rejected": -0.21323247253894806, "step": 9592 }, { "epoch": 6.634163208852005, "grad_norm": 6.349339962005615, "learning_rate": 1.8699093284155525e-05, "log_odds_chosen": 10.240455627441406, "log_odds_ratio": -0.00014411374286282808, "logits/chosen": -0.6429404020309448, "logits/rejected": -0.6957386136054993, "logps/chosen": -0.0006632260046899319, "logps/rejected": -2.199697732925415, "loss": 0.5332, "nll_loss": 0.133294016122818, "rewards/accuracies": 1.0, "rewards/chosen": -6.632260192418471e-05, "rewards/margins": 0.21990343928337097, "rewards/rejected": -0.2199697643518448, "step": 9593 }, { "epoch": 6.634854771784232, "grad_norm": 13.812353134155273, "learning_rate": 1.8695251267865377e-05, "log_odds_chosen": 10.978428840637207, "log_odds_ratio": -3.51098642568104e-05, "logits/chosen": -0.4268570840358734, "logits/rejected": -0.5332842469215393, "logps/chosen": -0.00032298153382726014, "logps/rejected": -2.003493309020996, "loss": 0.4843, "nll_loss": 0.12108378857374191, "rewards/accuracies": 1.0, "rewards/chosen": -3.229815774830058e-05, "rewards/margins": 0.20031705498695374, "rewards/rejected": -0.200349360704422, "step": 9594 }, { "epoch": 6.635546334716459, "grad_norm": 7.508303642272949, "learning_rate": 1.869140925157523e-05, "log_odds_chosen": 10.579375267028809, "log_odds_ratio": -3.901870150002651e-05, "logits/chosen": -0.18920519948005676, "logits/rejected": -0.1860412359237671, "logps/chosen": -0.00022964477830100805, "logps/rejected": -2.117384910583496, "loss": 0.6806, "nll_loss": 0.170146182179451, "rewards/accuracies": 1.0, "rewards/chosen": -2.2964477466302924e-05, "rewards/margins": 0.21171551942825317, "rewards/rejected": -0.2117384970188141, "step": 9595 }, { "epoch": 6.6362378976486855, "grad_norm": 7.161440849304199, "learning_rate": 1.868756723528508e-05, "log_odds_chosen": 11.471312522888184, "log_odds_ratio": -2.5577115593478084e-05, "logits/chosen": -0.3285534381866455, "logits/rejected": -0.4225424826145172, "logps/chosen": -0.00020269016386009753, "logps/rejected": -2.752018928527832, "loss": 0.4149, "nll_loss": 0.103721022605896, "rewards/accuracies": 1.0, "rewards/chosen": -2.0269017113605514e-05, "rewards/margins": 0.2751816511154175, "rewards/rejected": -0.2752019166946411, "step": 9596 }, { "epoch": 6.636929460580912, "grad_norm": 6.152478218078613, "learning_rate": 1.868372521899493e-05, "log_odds_chosen": 11.246030807495117, "log_odds_ratio": -0.00015961957979016006, "logits/chosen": -0.34847715497016907, "logits/rejected": -0.32901671528816223, "logps/chosen": -0.00011934031499549747, "logps/rejected": -2.253852367401123, "loss": 0.4987, "nll_loss": 0.12466421723365784, "rewards/accuracies": 1.0, "rewards/chosen": -1.1934031135751866e-05, "rewards/margins": 0.2253732979297638, "rewards/rejected": -0.22538524866104126, "step": 9597 }, { "epoch": 6.637621023513139, "grad_norm": 5.750330448150635, "learning_rate": 1.867988320270478e-05, "log_odds_chosen": 9.047605514526367, "log_odds_ratio": -0.0013029974652454257, "logits/chosen": -0.5508097410202026, "logits/rejected": -0.6082567572593689, "logps/chosen": -0.0006829822086729109, "logps/rejected": -1.3287931680679321, "loss": 0.5018, "nll_loss": 0.12532344460487366, "rewards/accuracies": 1.0, "rewards/chosen": -6.829822814324871e-05, "rewards/margins": 0.1328110247850418, "rewards/rejected": -0.1328793317079544, "step": 9598 }, { "epoch": 6.638312586445366, "grad_norm": 5.946147918701172, "learning_rate": 1.867604118641463e-05, "log_odds_chosen": 10.778899192810059, "log_odds_ratio": -5.474353383760899e-05, "logits/chosen": -0.5107077360153198, "logits/rejected": -0.5558190941810608, "logps/chosen": -0.0003230084548704326, "logps/rejected": -2.5283544063568115, "loss": 0.4038, "nll_loss": 0.10093516856431961, "rewards/accuracies": 1.0, "rewards/chosen": -3.230084621463902e-05, "rewards/margins": 0.25280314683914185, "rewards/rejected": -0.2528354525566101, "step": 9599 }, { "epoch": 6.639004149377593, "grad_norm": 6.913265705108643, "learning_rate": 1.867219917012448e-05, "log_odds_chosen": 11.897588729858398, "log_odds_ratio": -1.2387119568302296e-05, "logits/chosen": -0.3910364508628845, "logits/rejected": -0.3944079875946045, "logps/chosen": -0.00012312438047956675, "logps/rejected": -2.6750569343566895, "loss": 0.5822, "nll_loss": 0.14555397629737854, "rewards/accuracies": 1.0, "rewards/chosen": -1.2312437320360914e-05, "rewards/margins": 0.2674933969974518, "rewards/rejected": -0.2675057053565979, "step": 9600 }, { "epoch": 6.63969571230982, "grad_norm": 5.178598403930664, "learning_rate": 1.8668357153834334e-05, "log_odds_chosen": 10.858855247497559, "log_odds_ratio": -0.00013011848204769194, "logits/chosen": -0.4819783568382263, "logits/rejected": -0.49420034885406494, "logps/chosen": -0.000334838405251503, "logps/rejected": -3.0064451694488525, "loss": 0.4746, "nll_loss": 0.11863689124584198, "rewards/accuracies": 1.0, "rewards/chosen": -3.34838405251503e-05, "rewards/margins": 0.30061104893684387, "rewards/rejected": -0.30064451694488525, "step": 9601 }, { "epoch": 6.6403872752420465, "grad_norm": 3.6704227924346924, "learning_rate": 1.8664515137544183e-05, "log_odds_chosen": 10.26997184753418, "log_odds_ratio": -7.55906366975978e-05, "logits/chosen": -0.8013444542884827, "logits/rejected": -0.8593555688858032, "logps/chosen": -0.00013731844956055284, "logps/rejected": -1.3853795528411865, "loss": 0.3618, "nll_loss": 0.09045219421386719, "rewards/accuracies": 1.0, "rewards/chosen": -1.3731846593145747e-05, "rewards/margins": 0.13852423429489136, "rewards/rejected": -0.13853797316551208, "step": 9602 }, { "epoch": 6.641078838174274, "grad_norm": 8.884760856628418, "learning_rate": 1.8660673121254035e-05, "log_odds_chosen": 10.43310546875, "log_odds_ratio": -0.00010702509462134913, "logits/chosen": -0.3072403073310852, "logits/rejected": -0.3633434772491455, "logps/chosen": -0.0005991262733004987, "logps/rejected": -1.909914255142212, "loss": 0.4219, "nll_loss": 0.10546346753835678, "rewards/accuracies": 1.0, "rewards/chosen": -5.991263606119901e-05, "rewards/margins": 0.190931499004364, "rewards/rejected": -0.19099141657352448, "step": 9603 }, { "epoch": 6.641770401106501, "grad_norm": 8.687732696533203, "learning_rate": 1.8656831104963888e-05, "log_odds_chosen": 8.809842109680176, "log_odds_ratio": -0.3070237934589386, "logits/chosen": -0.3055609166622162, "logits/rejected": -0.283913791179657, "logps/chosen": -0.050361473113298416, "logps/rejected": -1.5872776508331299, "loss": 0.8183, "nll_loss": 0.17387992143630981, "rewards/accuracies": 0.875, "rewards/chosen": -0.005036147776991129, "rewards/margins": 0.1536916196346283, "rewards/rejected": -0.158727765083313, "step": 9604 }, { "epoch": 6.642461964038728, "grad_norm": 5.080305099487305, "learning_rate": 1.8652989088673737e-05, "log_odds_chosen": 9.537845611572266, "log_odds_ratio": -0.00015548468218185008, "logits/chosen": -0.37062469124794006, "logits/rejected": -0.35777461528778076, "logps/chosen": -0.00035577925154939294, "logps/rejected": -1.4090495109558105, "loss": 0.4689, "nll_loss": 0.1172141507267952, "rewards/accuracies": 1.0, "rewards/chosen": -3.5577930248109624e-05, "rewards/margins": 0.1408693641424179, "rewards/rejected": -0.14090494811534882, "step": 9605 }, { "epoch": 6.643153526970955, "grad_norm": 5.121057033538818, "learning_rate": 1.864914707238359e-05, "log_odds_chosen": 11.087963104248047, "log_odds_ratio": -3.925432247342542e-05, "logits/chosen": -0.663267970085144, "logits/rejected": -0.7178729176521301, "logps/chosen": -0.00014778254262637347, "logps/rejected": -2.0659022331237793, "loss": 0.5862, "nll_loss": 0.146546870470047, "rewards/accuracies": 1.0, "rewards/chosen": -1.4778253898839466e-05, "rewards/margins": 0.2065754383802414, "rewards/rejected": -0.2065902054309845, "step": 9606 }, { "epoch": 6.643845089903182, "grad_norm": 4.4347429275512695, "learning_rate": 1.864530505609344e-05, "log_odds_chosen": 10.788904190063477, "log_odds_ratio": -0.0005048522725701332, "logits/chosen": -0.43864724040031433, "logits/rejected": -0.49911749362945557, "logps/chosen": -0.000164158787811175, "logps/rejected": -1.9060001373291016, "loss": 0.5871, "nll_loss": 0.14673137664794922, "rewards/accuracies": 1.0, "rewards/chosen": -1.641587914491538e-05, "rewards/margins": 0.19058358669281006, "rewards/rejected": -0.19060002267360687, "step": 9607 }, { "epoch": 6.644536652835408, "grad_norm": 9.326899528503418, "learning_rate": 1.8641463039803288e-05, "log_odds_chosen": 10.238898277282715, "log_odds_ratio": -6.96768329362385e-05, "logits/chosen": -0.1614079475402832, "logits/rejected": -0.22287413477897644, "logps/chosen": -0.0003164965892210603, "logps/rejected": -1.8528064489364624, "loss": 0.536, "nll_loss": 0.1339990645647049, "rewards/accuracies": 1.0, "rewards/chosen": -3.164965892210603e-05, "rewards/margins": 0.18524901568889618, "rewards/rejected": -0.18528065085411072, "step": 9608 }, { "epoch": 6.645228215767635, "grad_norm": 13.386348724365234, "learning_rate": 1.863762102351314e-05, "log_odds_chosen": 11.673979759216309, "log_odds_ratio": -9.696490451460704e-06, "logits/chosen": -0.43724504113197327, "logits/rejected": -0.4728170931339264, "logps/chosen": -0.00015469803474843502, "logps/rejected": -2.4809579849243164, "loss": 0.6093, "nll_loss": 0.15233442187309265, "rewards/accuracies": 1.0, "rewards/chosen": -1.546980274724774e-05, "rewards/margins": 0.24808037281036377, "rewards/rejected": -0.2480958253145218, "step": 9609 }, { "epoch": 6.645919778699862, "grad_norm": 8.906185150146484, "learning_rate": 1.8633779007222992e-05, "log_odds_chosen": 11.176610946655273, "log_odds_ratio": -2.3035610865917988e-05, "logits/chosen": -0.41594600677490234, "logits/rejected": -0.4206879734992981, "logps/chosen": -0.00022571772569790483, "logps/rejected": -2.4756767749786377, "loss": 0.6416, "nll_loss": 0.16038845479488373, "rewards/accuracies": 1.0, "rewards/chosen": -2.2571772205992602e-05, "rewards/margins": 0.24754513800144196, "rewards/rejected": -0.24756768345832825, "step": 9610 }, { "epoch": 6.646611341632089, "grad_norm": 4.980371952056885, "learning_rate": 1.862993699093284e-05, "log_odds_chosen": 10.551161766052246, "log_odds_ratio": -5.1158975111320615e-05, "logits/chosen": -0.20379826426506042, "logits/rejected": -0.18743589520454407, "logps/chosen": -0.0030980801675468683, "logps/rejected": -2.539689064025879, "loss": 0.5241, "nll_loss": 0.13103193044662476, "rewards/accuracies": 1.0, "rewards/chosen": -0.00030980800511315465, "rewards/margins": 0.25365912914276123, "rewards/rejected": -0.2539689242839813, "step": 9611 }, { "epoch": 6.647302904564316, "grad_norm": 5.498662948608398, "learning_rate": 1.8626094974642694e-05, "log_odds_chosen": 11.447917938232422, "log_odds_ratio": -2.6658351998776197e-05, "logits/chosen": -0.14285291731357574, "logits/rejected": -0.09949278086423874, "logps/chosen": -0.00011387672566343099, "logps/rejected": -2.4001576900482178, "loss": 0.4981, "nll_loss": 0.12452501058578491, "rewards/accuracies": 1.0, "rewards/chosen": -1.1387672202545218e-05, "rewards/margins": 0.24000439047813416, "rewards/rejected": -0.24001577496528625, "step": 9612 }, { "epoch": 6.6479944674965425, "grad_norm": 4.112555503845215, "learning_rate": 1.8622252958352546e-05, "log_odds_chosen": 11.097431182861328, "log_odds_ratio": -6.698002107441425e-05, "logits/chosen": -0.5834935903549194, "logits/rejected": -0.6480783224105835, "logps/chosen": -0.00037601194344460964, "logps/rejected": -2.660428762435913, "loss": 0.4349, "nll_loss": 0.10872837901115417, "rewards/accuracies": 1.0, "rewards/chosen": -3.760119579965249e-05, "rewards/margins": 0.266005277633667, "rewards/rejected": -0.26604288816452026, "step": 9613 }, { "epoch": 6.648686030428769, "grad_norm": 6.84444522857666, "learning_rate": 1.8618410942062395e-05, "log_odds_chosen": 10.822056770324707, "log_odds_ratio": -5.755308666266501e-05, "logits/chosen": -0.028991512954235077, "logits/rejected": -0.10154448449611664, "logps/chosen": -0.00035526740248315036, "logps/rejected": -2.3193163871765137, "loss": 0.676, "nll_loss": 0.16899527609348297, "rewards/accuracies": 1.0, "rewards/chosen": -3.5526743886293843e-05, "rewards/margins": 0.23189613223075867, "rewards/rejected": -0.2319316416978836, "step": 9614 }, { "epoch": 6.649377593360996, "grad_norm": 3.9444468021392822, "learning_rate": 1.8614568925772248e-05, "log_odds_chosen": 10.631586074829102, "log_odds_ratio": -0.0003470322408247739, "logits/chosen": -0.3811192810535431, "logits/rejected": -0.42284131050109863, "logps/chosen": -0.0011563881998881698, "logps/rejected": -2.0233068466186523, "loss": 0.4872, "nll_loss": 0.12176550179719925, "rewards/accuracies": 1.0, "rewards/chosen": -0.00011563881707843393, "rewards/margins": 0.2022150605916977, "rewards/rejected": -0.20233069360256195, "step": 9615 }, { "epoch": 6.650069156293223, "grad_norm": 6.1867594718933105, "learning_rate": 1.8610726909482097e-05, "log_odds_chosen": 9.69314193725586, "log_odds_ratio": -0.0001658633555052802, "logits/chosen": -0.4153778553009033, "logits/rejected": -0.41827812790870667, "logps/chosen": -0.0004052014264743775, "logps/rejected": -1.5486747026443481, "loss": 0.6721, "nll_loss": 0.16801199316978455, "rewards/accuracies": 1.0, "rewards/chosen": -4.0520149923395365e-05, "rewards/margins": 0.15482693910598755, "rewards/rejected": -0.15486745536327362, "step": 9616 }, { "epoch": 6.65076071922545, "grad_norm": 6.0896382331848145, "learning_rate": 1.8606884893191946e-05, "log_odds_chosen": 10.958621978759766, "log_odds_ratio": -5.041091571911238e-05, "logits/chosen": 0.014917820692062378, "logits/rejected": -0.06887702643871307, "logps/chosen": -0.0004453232977539301, "logps/rejected": -2.456699848175049, "loss": 0.4531, "nll_loss": 0.1132640540599823, "rewards/accuracies": 1.0, "rewards/chosen": -4.453233123058453e-05, "rewards/margins": 0.24562548100948334, "rewards/rejected": -0.24567002058029175, "step": 9617 }, { "epoch": 6.651452282157677, "grad_norm": 27.65871810913086, "learning_rate": 1.86030428769018e-05, "log_odds_chosen": 11.017706871032715, "log_odds_ratio": -2.6596382667776197e-05, "logits/chosen": -0.4998897314071655, "logits/rejected": -0.5836895108222961, "logps/chosen": -0.00018730561714619398, "logps/rejected": -2.200857162475586, "loss": 0.5834, "nll_loss": 0.14584854245185852, "rewards/accuracies": 1.0, "rewards/chosen": -1.873056316981092e-05, "rewards/margins": 0.22006699442863464, "rewards/rejected": -0.2200857400894165, "step": 9618 }, { "epoch": 6.6521438450899035, "grad_norm": 4.076780796051025, "learning_rate": 1.8599200860611647e-05, "log_odds_chosen": 10.832459449768066, "log_odds_ratio": -3.48457797372248e-05, "logits/chosen": -0.10937292873859406, "logits/rejected": -0.10044606029987335, "logps/chosen": -0.0005893828347325325, "logps/rejected": -2.6281380653381348, "loss": 0.4766, "nll_loss": 0.11914488673210144, "rewards/accuracies": 1.0, "rewards/chosen": -5.893828347325325e-05, "rewards/margins": 0.2627548575401306, "rewards/rejected": -0.2628138065338135, "step": 9619 }, { "epoch": 6.65283540802213, "grad_norm": 7.478692531585693, "learning_rate": 1.85953588443215e-05, "log_odds_chosen": 10.989614486694336, "log_odds_ratio": -2.4027020117500797e-05, "logits/chosen": 0.045154161751270294, "logits/rejected": -0.029847905039787292, "logps/chosen": -0.000188558449735865, "logps/rejected": -2.229787826538086, "loss": 0.8103, "nll_loss": 0.20257043838500977, "rewards/accuracies": 1.0, "rewards/chosen": -1.8855846064980142e-05, "rewards/margins": 0.22295993566513062, "rewards/rejected": -0.22297880053520203, "step": 9620 }, { "epoch": 6.653526970954357, "grad_norm": 7.046220302581787, "learning_rate": 1.8591516828031352e-05, "log_odds_chosen": 10.115169525146484, "log_odds_ratio": -9.490567026659846e-05, "logits/chosen": -0.4010132849216461, "logits/rejected": -0.5099731683731079, "logps/chosen": -0.00034400090225972235, "logps/rejected": -2.049590587615967, "loss": 0.5036, "nll_loss": 0.1258976310491562, "rewards/accuracies": 1.0, "rewards/chosen": -3.4400090953568e-05, "rewards/margins": 0.20492467284202576, "rewards/rejected": -0.20495907962322235, "step": 9621 }, { "epoch": 6.654218533886584, "grad_norm": 5.485591411590576, "learning_rate": 1.85876748117412e-05, "log_odds_chosen": 10.569567680358887, "log_odds_ratio": -8.486285514663905e-05, "logits/chosen": -0.37124836444854736, "logits/rejected": -0.304371178150177, "logps/chosen": -0.00017183186719194055, "logps/rejected": -1.9051198959350586, "loss": 0.5917, "nll_loss": 0.14791113138198853, "rewards/accuracies": 1.0, "rewards/chosen": -1.7183185264002532e-05, "rewards/margins": 0.19049480557441711, "rewards/rejected": -0.19051198661327362, "step": 9622 }, { "epoch": 6.654910096818811, "grad_norm": 7.806471347808838, "learning_rate": 1.8583832795451054e-05, "log_odds_chosen": 12.124666213989258, "log_odds_ratio": -1.1755686500691809e-05, "logits/chosen": -0.05929935351014137, "logits/rejected": -0.08076391369104385, "logps/chosen": -0.0001312370295636356, "logps/rejected": -2.980034351348877, "loss": 0.6727, "nll_loss": 0.1681688278913498, "rewards/accuracies": 1.0, "rewards/chosen": -1.3123702956363559e-05, "rewards/margins": 0.2979903221130371, "rewards/rejected": -0.2980034649372101, "step": 9623 }, { "epoch": 6.655601659751038, "grad_norm": 5.634998798370361, "learning_rate": 1.8579990779160906e-05, "log_odds_chosen": 11.782219886779785, "log_odds_ratio": -3.760270192287862e-05, "logits/chosen": -0.06998319923877716, "logits/rejected": 0.05776715278625488, "logps/chosen": -0.0003284990380052477, "logps/rejected": -2.760857105255127, "loss": 0.7855, "nll_loss": 0.19638195633888245, "rewards/accuracies": 1.0, "rewards/chosen": -3.284990452812053e-05, "rewards/margins": 0.276052862405777, "rewards/rejected": -0.2760857045650482, "step": 9624 }, { "epoch": 6.6562932226832645, "grad_norm": 4.896988868713379, "learning_rate": 1.8576148762870755e-05, "log_odds_chosen": 10.359952926635742, "log_odds_ratio": -9.971457620849833e-05, "logits/chosen": -0.534497857093811, "logits/rejected": -0.6003218293190002, "logps/chosen": -0.0005265720537863672, "logps/rejected": -1.8906424045562744, "loss": 0.643, "nll_loss": 0.16074874997138977, "rewards/accuracies": 1.0, "rewards/chosen": -5.2657207561424e-05, "rewards/margins": 0.1890116035938263, "rewards/rejected": -0.18906423449516296, "step": 9625 }, { "epoch": 6.656984785615491, "grad_norm": 4.442220211029053, "learning_rate": 1.8572306746580608e-05, "log_odds_chosen": 10.274168014526367, "log_odds_ratio": -9.561772458255291e-05, "logits/chosen": -0.26732587814331055, "logits/rejected": -0.3629629909992218, "logps/chosen": -0.0002495437511242926, "logps/rejected": -1.8324568271636963, "loss": 0.6342, "nll_loss": 0.15854156017303467, "rewards/accuracies": 1.0, "rewards/chosen": -2.4954375476227142e-05, "rewards/margins": 0.18322071433067322, "rewards/rejected": -0.1832456737756729, "step": 9626 }, { "epoch": 6.657676348547718, "grad_norm": 6.081766128540039, "learning_rate": 1.8568464730290457e-05, "log_odds_chosen": 10.357091903686523, "log_odds_ratio": -8.496503869537264e-05, "logits/chosen": -0.761346161365509, "logits/rejected": -0.7080386877059937, "logps/chosen": -0.00028227429720573127, "logps/rejected": -1.7036181688308716, "loss": 0.4951, "nll_loss": 0.12377134710550308, "rewards/accuracies": 1.0, "rewards/chosen": -2.822743044816889e-05, "rewards/margins": 0.170333594083786, "rewards/rejected": -0.17036181688308716, "step": 9627 }, { "epoch": 6.658367911479945, "grad_norm": 3.2742528915405273, "learning_rate": 1.8564622714000306e-05, "log_odds_chosen": 10.262062072753906, "log_odds_ratio": -7.255500531755388e-05, "logits/chosen": 0.11290848255157471, "logits/rejected": 0.05145927891135216, "logps/chosen": -0.00043970055412501097, "logps/rejected": -1.8753759860992432, "loss": 0.6369, "nll_loss": 0.15922844409942627, "rewards/accuracies": 1.0, "rewards/chosen": -4.397005250211805e-05, "rewards/margins": 0.18749363720417023, "rewards/rejected": -0.18753761053085327, "step": 9628 }, { "epoch": 6.659059474412172, "grad_norm": 6.663225173950195, "learning_rate": 1.856078069771016e-05, "log_odds_chosen": 10.049211502075195, "log_odds_ratio": -0.0004824997449759394, "logits/chosen": -0.3176838159561157, "logits/rejected": -0.33979031443595886, "logps/chosen": -0.00041182507993653417, "logps/rejected": -1.8318284749984741, "loss": 0.7267, "nll_loss": 0.18163245916366577, "rewards/accuracies": 1.0, "rewards/chosen": -4.118250581086613e-05, "rewards/margins": 0.18314167857170105, "rewards/rejected": -0.18318286538124084, "step": 9629 }, { "epoch": 6.659751037344399, "grad_norm": 9.147109031677246, "learning_rate": 1.855693868142001e-05, "log_odds_chosen": 10.695847511291504, "log_odds_ratio": -6.39359132037498e-05, "logits/chosen": -0.25236430764198303, "logits/rejected": -0.3654441833496094, "logps/chosen": -0.00018572970293462276, "logps/rejected": -1.872322916984558, "loss": 0.7521, "nll_loss": 0.1880245804786682, "rewards/accuracies": 1.0, "rewards/chosen": -1.8572969565866515e-05, "rewards/margins": 0.1872137188911438, "rewards/rejected": -0.18723228573799133, "step": 9630 }, { "epoch": 6.6604426002766255, "grad_norm": 8.203865051269531, "learning_rate": 1.855309666512986e-05, "log_odds_chosen": 9.84941291809082, "log_odds_ratio": -0.00018136092694476247, "logits/chosen": -0.41311779618263245, "logits/rejected": -0.5088513493537903, "logps/chosen": -0.0002423153055133298, "logps/rejected": -1.6244256496429443, "loss": 0.4041, "nll_loss": 0.10100964456796646, "rewards/accuracies": 1.0, "rewards/chosen": -2.4231529096141458e-05, "rewards/margins": 0.16241833567619324, "rewards/rejected": -0.16244256496429443, "step": 9631 }, { "epoch": 6.661134163208852, "grad_norm": 4.43738317489624, "learning_rate": 1.8549254648839712e-05, "log_odds_chosen": 9.541839599609375, "log_odds_ratio": -0.0006786661688238382, "logits/chosen": -0.31768670678138733, "logits/rejected": -0.28740179538726807, "logps/chosen": -0.0011072256602346897, "logps/rejected": -1.896850824356079, "loss": 0.7986, "nll_loss": 0.19957174360752106, "rewards/accuracies": 1.0, "rewards/chosen": -0.00011072256893385202, "rewards/margins": 0.18957436084747314, "rewards/rejected": -0.18968507647514343, "step": 9632 }, { "epoch": 6.661825726141079, "grad_norm": 7.632981300354004, "learning_rate": 1.8545412632549565e-05, "log_odds_chosen": 11.051722526550293, "log_odds_ratio": -4.5273809519130737e-05, "logits/chosen": -0.4231196343898773, "logits/rejected": -0.45129501819610596, "logps/chosen": -0.0002486844314262271, "logps/rejected": -2.4345688819885254, "loss": 0.4322, "nll_loss": 0.10804326087236404, "rewards/accuracies": 1.0, "rewards/chosen": -2.4868444597814232e-05, "rewards/margins": 0.24343204498291016, "rewards/rejected": -0.2434569001197815, "step": 9633 }, { "epoch": 6.662517289073306, "grad_norm": 6.208993911743164, "learning_rate": 1.8541570616259414e-05, "log_odds_chosen": 10.302834510803223, "log_odds_ratio": -0.00035480436054058373, "logits/chosen": -0.9572851657867432, "logits/rejected": -0.8913147449493408, "logps/chosen": -0.00021303204994183034, "logps/rejected": -1.6176707744598389, "loss": 0.7972, "nll_loss": 0.19925661385059357, "rewards/accuracies": 1.0, "rewards/chosen": -2.1303205357980914e-05, "rewards/margins": 0.1617458015680313, "rewards/rejected": -0.16176709532737732, "step": 9634 }, { "epoch": 6.663208852005533, "grad_norm": 8.000402450561523, "learning_rate": 1.8537728599969266e-05, "log_odds_chosen": 10.670997619628906, "log_odds_ratio": -9.012306691147387e-05, "logits/chosen": -0.35462328791618347, "logits/rejected": -0.34307336807250977, "logps/chosen": -0.00013257621321827173, "logps/rejected": -1.5429167747497559, "loss": 0.3766, "nll_loss": 0.09414802491664886, "rewards/accuracies": 1.0, "rewards/chosen": -1.3257621503726114e-05, "rewards/margins": 0.15427841246128082, "rewards/rejected": -0.15429167449474335, "step": 9635 }, { "epoch": 6.66390041493776, "grad_norm": 10.191165924072266, "learning_rate": 1.8533886583679115e-05, "log_odds_chosen": 10.87049674987793, "log_odds_ratio": -6.27797853667289e-05, "logits/chosen": -0.2523128092288971, "logits/rejected": -0.27273494005203247, "logps/chosen": -0.0003482589963823557, "logps/rejected": -2.6285324096679688, "loss": 0.5499, "nll_loss": 0.1374695897102356, "rewards/accuracies": 1.0, "rewards/chosen": -3.4825898183044046e-05, "rewards/margins": 0.2628183960914612, "rewards/rejected": -0.2628532350063324, "step": 9636 }, { "epoch": 6.6645919778699865, "grad_norm": 5.827001094818115, "learning_rate": 1.8530044567388964e-05, "log_odds_chosen": 11.037064552307129, "log_odds_ratio": -3.355016087880358e-05, "logits/chosen": -0.17959906160831451, "logits/rejected": -0.25126656889915466, "logps/chosen": -0.0003407234326004982, "logps/rejected": -2.286252021789551, "loss": 0.5514, "nll_loss": 0.13783562183380127, "rewards/accuracies": 1.0, "rewards/chosen": -3.40723418048583e-05, "rewards/margins": 0.22859112918376923, "rewards/rejected": -0.22862519323825836, "step": 9637 }, { "epoch": 6.665283540802213, "grad_norm": 7.157079696655273, "learning_rate": 1.8526202551098817e-05, "log_odds_chosen": 10.269147872924805, "log_odds_ratio": -0.000136653077788651, "logits/chosen": -0.7898240089416504, "logits/rejected": -0.8049187660217285, "logps/chosen": -0.0003843801387120038, "logps/rejected": -1.650019884109497, "loss": 0.6758, "nll_loss": 0.1689254343509674, "rewards/accuracies": 1.0, "rewards/chosen": -3.8438014598796144e-05, "rewards/margins": 0.16496357321739197, "rewards/rejected": -0.1650019884109497, "step": 9638 }, { "epoch": 6.66597510373444, "grad_norm": 6.0989766120910645, "learning_rate": 1.852236053480867e-05, "log_odds_chosen": 10.749070167541504, "log_odds_ratio": -0.00024179847969207913, "logits/chosen": -0.37179988622665405, "logits/rejected": -0.6003690958023071, "logps/chosen": -0.00023933660122565925, "logps/rejected": -2.0018997192382812, "loss": 0.7154, "nll_loss": 0.1788228154182434, "rewards/accuracies": 1.0, "rewards/chosen": -2.393366230535321e-05, "rewards/margins": 0.2001660317182541, "rewards/rejected": -0.2001899629831314, "step": 9639 }, { "epoch": 6.666666666666667, "grad_norm": 4.793224334716797, "learning_rate": 1.8518518518518518e-05, "log_odds_chosen": 10.52408504486084, "log_odds_ratio": -0.0006772517808713019, "logits/chosen": -0.41700923442840576, "logits/rejected": -0.42366236448287964, "logps/chosen": -0.0007978577632457018, "logps/rejected": -2.5072598457336426, "loss": 0.474, "nll_loss": 0.11843764781951904, "rewards/accuracies": 1.0, "rewards/chosen": -7.978577195899561e-05, "rewards/margins": 0.2506462037563324, "rewards/rejected": -0.25072598457336426, "step": 9640 }, { "epoch": 6.667358229598894, "grad_norm": 5.932323455810547, "learning_rate": 1.851467650222837e-05, "log_odds_chosen": 11.987409591674805, "log_odds_ratio": -3.294226189609617e-05, "logits/chosen": -0.29465189576148987, "logits/rejected": -0.4393021762371063, "logps/chosen": -0.00013866080553270876, "logps/rejected": -3.125673770904541, "loss": 0.4178, "nll_loss": 0.1044505387544632, "rewards/accuracies": 1.0, "rewards/chosen": -1.3866080735169817e-05, "rewards/margins": 0.3125535249710083, "rewards/rejected": -0.3125673830509186, "step": 9641 }, { "epoch": 6.668049792531121, "grad_norm": 6.992809295654297, "learning_rate": 1.8510834485938223e-05, "log_odds_chosen": 9.339235305786133, "log_odds_ratio": -0.00024313360336236656, "logits/chosen": -0.6617879867553711, "logits/rejected": -0.6839908361434937, "logps/chosen": -0.00032492296304553747, "logps/rejected": -1.6257578134536743, "loss": 0.6955, "nll_loss": 0.17384573817253113, "rewards/accuracies": 1.0, "rewards/chosen": -3.249229848734103e-05, "rewards/margins": 0.16254329681396484, "rewards/rejected": -0.16257578134536743, "step": 9642 }, { "epoch": 6.6687413554633475, "grad_norm": 5.294729709625244, "learning_rate": 1.8506992469648072e-05, "log_odds_chosen": 10.480348587036133, "log_odds_ratio": -5.3323532483773306e-05, "logits/chosen": -0.5375571250915527, "logits/rejected": -0.5629109144210815, "logps/chosen": -0.0002348765847273171, "logps/rejected": -1.8479783535003662, "loss": 0.525, "nll_loss": 0.13125476241111755, "rewards/accuracies": 1.0, "rewards/chosen": -2.348765883652959e-05, "rewards/margins": 0.18477436900138855, "rewards/rejected": -0.18479785323143005, "step": 9643 }, { "epoch": 6.669432918395574, "grad_norm": 7.490869998931885, "learning_rate": 1.8503150453357925e-05, "log_odds_chosen": 9.760675430297852, "log_odds_ratio": -0.0007779019069857895, "logits/chosen": -0.43486517667770386, "logits/rejected": -0.5689921379089355, "logps/chosen": -0.000742304022423923, "logps/rejected": -2.1124515533447266, "loss": 0.6009, "nll_loss": 0.1501590758562088, "rewards/accuracies": 1.0, "rewards/chosen": -7.423041097354144e-05, "rewards/margins": 0.21117094159126282, "rewards/rejected": -0.21124516427516937, "step": 9644 }, { "epoch": 6.670124481327801, "grad_norm": 7.676600456237793, "learning_rate": 1.8499308437067774e-05, "log_odds_chosen": 11.441679000854492, "log_odds_ratio": -3.095114880125038e-05, "logits/chosen": 0.07396015524864197, "logits/rejected": -0.0116509348154068, "logps/chosen": -0.0002146841725334525, "logps/rejected": -2.4313998222351074, "loss": 0.7564, "nll_loss": 0.18909502029418945, "rewards/accuracies": 1.0, "rewards/chosen": -2.146841688954737e-05, "rewards/margins": 0.2431185245513916, "rewards/rejected": -0.24313999712467194, "step": 9645 }, { "epoch": 6.670816044260028, "grad_norm": 8.344149589538574, "learning_rate": 1.8495466420777623e-05, "log_odds_chosen": 10.729771614074707, "log_odds_ratio": -2.940691410913132e-05, "logits/chosen": -0.11430245637893677, "logits/rejected": -0.15196284651756287, "logps/chosen": -0.00011363202065695077, "logps/rejected": -1.7529911994934082, "loss": 0.588, "nll_loss": 0.14699505269527435, "rewards/accuracies": 1.0, "rewards/chosen": -1.1363201338099316e-05, "rewards/margins": 0.17528776824474335, "rewards/rejected": -0.17529912292957306, "step": 9646 }, { "epoch": 6.671507607192255, "grad_norm": 10.055741310119629, "learning_rate": 1.8491624404487475e-05, "log_odds_chosen": 10.192642211914062, "log_odds_ratio": -0.00016000257164705545, "logits/chosen": -0.41450396180152893, "logits/rejected": -0.4917091429233551, "logps/chosen": -0.0007967498968355358, "logps/rejected": -2.0323781967163086, "loss": 0.3877, "nll_loss": 0.09689746052026749, "rewards/accuracies": 1.0, "rewards/chosen": -7.967498822836205e-05, "rewards/margins": 0.2031581550836563, "rewards/rejected": -0.20323783159255981, "step": 9647 }, { "epoch": 6.672199170124482, "grad_norm": 2.560049533843994, "learning_rate": 1.8487782388197328e-05, "log_odds_chosen": 11.013113975524902, "log_odds_ratio": -0.00025713135255500674, "logits/chosen": -0.3730487525463104, "logits/rejected": -0.4757554531097412, "logps/chosen": -0.0009568912792019546, "logps/rejected": -2.6731841564178467, "loss": 0.4333, "nll_loss": 0.10830046236515045, "rewards/accuracies": 1.0, "rewards/chosen": -9.56891308305785e-05, "rewards/margins": 0.26722273230552673, "rewards/rejected": -0.2673184275627136, "step": 9648 }, { "epoch": 6.672890733056708, "grad_norm": 6.429426670074463, "learning_rate": 1.8483940371907177e-05, "log_odds_chosen": 11.566768646240234, "log_odds_ratio": -1.660044836171437e-05, "logits/chosen": -0.031103745102882385, "logits/rejected": -0.10523568093776703, "logps/chosen": -0.00010068865958601236, "logps/rejected": -2.0547738075256348, "loss": 0.6027, "nll_loss": 0.15067416429519653, "rewards/accuracies": 1.0, "rewards/chosen": -1.0068865776702296e-05, "rewards/margins": 0.2054673284292221, "rewards/rejected": -0.20547738671302795, "step": 9649 }, { "epoch": 6.673582295988935, "grad_norm": 5.067642688751221, "learning_rate": 1.848009835561703e-05, "log_odds_chosen": 11.074803352355957, "log_odds_ratio": -4.086527042090893e-05, "logits/chosen": -0.3686811327934265, "logits/rejected": -0.437101274728775, "logps/chosen": -0.00011494646605569869, "logps/rejected": -2.052640438079834, "loss": 0.5613, "nll_loss": 0.14033235609531403, "rewards/accuracies": 1.0, "rewards/chosen": -1.149464696936775e-05, "rewards/margins": 0.20525255799293518, "rewards/rejected": -0.20526404678821564, "step": 9650 }, { "epoch": 6.674273858921162, "grad_norm": 5.264712810516357, "learning_rate": 1.847625633932688e-05, "log_odds_chosen": 10.861353874206543, "log_odds_ratio": -5.436270294012502e-05, "logits/chosen": -0.3558247983455658, "logits/rejected": -0.3405495584011078, "logps/chosen": -0.0003190129646100104, "logps/rejected": -1.7811611890792847, "loss": 0.5036, "nll_loss": 0.1258881390094757, "rewards/accuracies": 1.0, "rewards/chosen": -3.190129791619256e-05, "rewards/margins": 0.17808422446250916, "rewards/rejected": -0.178116112947464, "step": 9651 }, { "epoch": 6.674965421853389, "grad_norm": 4.947920799255371, "learning_rate": 1.847241432303673e-05, "log_odds_chosen": 9.052240371704102, "log_odds_ratio": -0.00023398856865242124, "logits/chosen": -0.4382186830043793, "logits/rejected": -0.4656783640384674, "logps/chosen": -0.0004539778456091881, "logps/rejected": -1.42952561378479, "loss": 0.5459, "nll_loss": 0.13645029067993164, "rewards/accuracies": 1.0, "rewards/chosen": -4.539778456091881e-05, "rewards/margins": 0.14290717244148254, "rewards/rejected": -0.142952561378479, "step": 9652 }, { "epoch": 6.675656984785616, "grad_norm": 11.757040023803711, "learning_rate": 1.8468572306746583e-05, "log_odds_chosen": 11.986181259155273, "log_odds_ratio": -2.6844723834074102e-05, "logits/chosen": -0.5267226696014404, "logits/rejected": -0.6199319958686829, "logps/chosen": -0.0001811894035199657, "logps/rejected": -3.14363956451416, "loss": 0.5525, "nll_loss": 0.1381237804889679, "rewards/accuracies": 1.0, "rewards/chosen": -1.811894071579445e-05, "rewards/margins": 0.3143458366394043, "rewards/rejected": -0.314363956451416, "step": 9653 }, { "epoch": 6.676348547717843, "grad_norm": 13.25656509399414, "learning_rate": 1.8464730290456432e-05, "log_odds_chosen": 10.751134872436523, "log_odds_ratio": -7.180450484156609e-05, "logits/chosen": -0.4260287582874298, "logits/rejected": -0.6113356351852417, "logps/chosen": -0.00017347175162285566, "logps/rejected": -1.92714524269104, "loss": 0.8281, "nll_loss": 0.20700892806053162, "rewards/accuracies": 1.0, "rewards/chosen": -1.7347174434689805e-05, "rewards/margins": 0.1926971673965454, "rewards/rejected": -0.19271452724933624, "step": 9654 }, { "epoch": 6.677040110650069, "grad_norm": 5.646144866943359, "learning_rate": 1.846088827416628e-05, "log_odds_chosen": 10.25471019744873, "log_odds_ratio": -0.0004184903227724135, "logits/chosen": -0.3453481197357178, "logits/rejected": -0.44830867648124695, "logps/chosen": -0.0012309765443205833, "logps/rejected": -1.7108335494995117, "loss": 0.3654, "nll_loss": 0.09130599349737167, "rewards/accuracies": 1.0, "rewards/chosen": -0.00012309766316320747, "rewards/margins": 0.17096027731895447, "rewards/rejected": -0.17108336091041565, "step": 9655 }, { "epoch": 6.677731673582296, "grad_norm": 6.999812126159668, "learning_rate": 1.8457046257876134e-05, "log_odds_chosen": 10.144024848937988, "log_odds_ratio": -9.134892752626911e-05, "logits/chosen": -0.5624509453773499, "logits/rejected": -0.5673438310623169, "logps/chosen": -0.0011014851043000817, "logps/rejected": -2.104128360748291, "loss": 0.4618, "nll_loss": 0.11543923616409302, "rewards/accuracies": 1.0, "rewards/chosen": -0.00011014851043000817, "rewards/margins": 0.2103026956319809, "rewards/rejected": -0.21041283011436462, "step": 9656 }, { "epoch": 6.678423236514523, "grad_norm": 6.134801864624023, "learning_rate": 1.8453204241585986e-05, "log_odds_chosen": 10.598881721496582, "log_odds_ratio": -6.408988701878116e-05, "logits/chosen": -0.27690455317497253, "logits/rejected": -0.27721768617630005, "logps/chosen": -0.00022582666133530438, "logps/rejected": -1.9214727878570557, "loss": 0.6385, "nll_loss": 0.15961936116218567, "rewards/accuracies": 1.0, "rewards/chosen": -2.2582664314541034e-05, "rewards/margins": 0.19212469458580017, "rewards/rejected": -0.19214728474617004, "step": 9657 }, { "epoch": 6.67911479944675, "grad_norm": 12.72281551361084, "learning_rate": 1.8449362225295835e-05, "log_odds_chosen": 11.956278800964355, "log_odds_ratio": -1.5963418263709173e-05, "logits/chosen": -0.5332290530204773, "logits/rejected": -0.5421327948570251, "logps/chosen": -0.0003582322970032692, "logps/rejected": -3.2543258666992188, "loss": 0.4236, "nll_loss": 0.10589415580034256, "rewards/accuracies": 1.0, "rewards/chosen": -3.5823231883114204e-05, "rewards/margins": 0.3253967761993408, "rewards/rejected": -0.32543259859085083, "step": 9658 }, { "epoch": 6.679806362378977, "grad_norm": 5.651429653167725, "learning_rate": 1.8445520209005688e-05, "log_odds_chosen": 9.974549293518066, "log_odds_ratio": -0.00014403206296265125, "logits/chosen": -0.0029746294021606445, "logits/rejected": 0.003281906247138977, "logps/chosen": -0.001990691991522908, "logps/rejected": -1.8177213668823242, "loss": 0.6811, "nll_loss": 0.17025278508663177, "rewards/accuracies": 1.0, "rewards/chosen": -0.00019906919624190778, "rewards/margins": 0.1815730780363083, "rewards/rejected": -0.1817721426486969, "step": 9659 }, { "epoch": 6.680497925311204, "grad_norm": 5.676749229431152, "learning_rate": 1.844167819271554e-05, "log_odds_chosen": 10.579449653625488, "log_odds_ratio": -5.1365925173740834e-05, "logits/chosen": -0.868455708026886, "logits/rejected": -0.7236647009849548, "logps/chosen": -0.00017911390750668943, "logps/rejected": -1.8537935018539429, "loss": 0.8948, "nll_loss": 0.223703533411026, "rewards/accuracies": 1.0, "rewards/chosen": -1.7911390386871062e-05, "rewards/margins": 0.18536144495010376, "rewards/rejected": -0.18537934124469757, "step": 9660 }, { "epoch": 6.68118948824343, "grad_norm": 7.042651176452637, "learning_rate": 1.843783617642539e-05, "log_odds_chosen": 10.082916259765625, "log_odds_ratio": -0.000205668417038396, "logits/chosen": -0.4950271248817444, "logits/rejected": -0.5898337364196777, "logps/chosen": -0.0004197222297079861, "logps/rejected": -1.7821499109268188, "loss": 0.5259, "nll_loss": 0.13145092129707336, "rewards/accuracies": 1.0, "rewards/chosen": -4.197222733637318e-05, "rewards/margins": 0.1781730204820633, "rewards/rejected": -0.17821499705314636, "step": 9661 }, { "epoch": 6.681881051175657, "grad_norm": 6.74050235748291, "learning_rate": 1.843399416013524e-05, "log_odds_chosen": 10.639854431152344, "log_odds_ratio": -7.393736450467259e-05, "logits/chosen": -0.6220520734786987, "logits/rejected": -0.6198940873146057, "logps/chosen": -0.00022765059839002788, "logps/rejected": -1.93635892868042, "loss": 0.437, "nll_loss": 0.10924112796783447, "rewards/accuracies": 1.0, "rewards/chosen": -2.276506165799219e-05, "rewards/margins": 0.19361314177513123, "rewards/rejected": -0.19363591074943542, "step": 9662 }, { "epoch": 6.682572614107884, "grad_norm": 4.9354329109191895, "learning_rate": 1.843015214384509e-05, "log_odds_chosen": 9.169984817504883, "log_odds_ratio": -0.0003960762987844646, "logits/chosen": -0.4277748167514801, "logits/rejected": -0.4469650089740753, "logps/chosen": -0.0004151844186708331, "logps/rejected": -1.3824002742767334, "loss": 0.4212, "nll_loss": 0.10525692999362946, "rewards/accuracies": 1.0, "rewards/chosen": -4.1518444049870595e-05, "rewards/margins": 0.13819852471351624, "rewards/rejected": -0.1382400393486023, "step": 9663 }, { "epoch": 6.683264177040111, "grad_norm": 4.044813632965088, "learning_rate": 1.842631012755494e-05, "log_odds_chosen": 10.278279304504395, "log_odds_ratio": -0.0003889031650032848, "logits/chosen": -0.4868510365486145, "logits/rejected": -0.5194868445396423, "logps/chosen": -0.0002763352240435779, "logps/rejected": -1.5866440534591675, "loss": 0.4056, "nll_loss": 0.10136575251817703, "rewards/accuracies": 1.0, "rewards/chosen": -2.7633523131953552e-05, "rewards/margins": 0.15863677859306335, "rewards/rejected": -0.15866440534591675, "step": 9664 }, { "epoch": 6.683955739972338, "grad_norm": 5.9982733726501465, "learning_rate": 1.8422468111264792e-05, "log_odds_chosen": 11.98559856414795, "log_odds_ratio": -1.5875868484727107e-05, "logits/chosen": -0.4334144592285156, "logits/rejected": -0.5116381049156189, "logps/chosen": -0.00015297062054742128, "logps/rejected": -2.601205348968506, "loss": 0.3902, "nll_loss": 0.09754490852355957, "rewards/accuracies": 1.0, "rewards/chosen": -1.5297060599550605e-05, "rewards/margins": 0.2601052522659302, "rewards/rejected": -0.26012054085731506, "step": 9665 }, { "epoch": 6.6846473029045645, "grad_norm": 5.988363265991211, "learning_rate": 1.8418626094974645e-05, "log_odds_chosen": 10.546000480651855, "log_odds_ratio": -9.494496771367267e-05, "logits/chosen": -0.5550673007965088, "logits/rejected": -0.6216533184051514, "logps/chosen": -0.0005368262063711882, "logps/rejected": -1.8109443187713623, "loss": 0.5025, "nll_loss": 0.12562590837478638, "rewards/accuracies": 1.0, "rewards/chosen": -5.368262645788491e-05, "rewards/margins": 0.18104076385498047, "rewards/rejected": -0.1810944527387619, "step": 9666 }, { "epoch": 6.685338865836791, "grad_norm": 5.435577869415283, "learning_rate": 1.8414784078684494e-05, "log_odds_chosen": 10.510980606079102, "log_odds_ratio": -0.00018327782163396478, "logits/chosen": -0.2902176082134247, "logits/rejected": -0.42554375529289246, "logps/chosen": -0.0003930656239390373, "logps/rejected": -2.0370352268218994, "loss": 0.5285, "nll_loss": 0.13210366666316986, "rewards/accuracies": 1.0, "rewards/chosen": -3.9306563849095255e-05, "rewards/margins": 0.20366422832012177, "rewards/rejected": -0.20370353758335114, "step": 9667 }, { "epoch": 6.686030428769018, "grad_norm": 10.245564460754395, "learning_rate": 1.8410942062394346e-05, "log_odds_chosen": 10.817817687988281, "log_odds_ratio": -4.786135832546279e-05, "logits/chosen": -0.3151760697364807, "logits/rejected": -0.2521878480911255, "logps/chosen": -0.00020681676687672734, "logps/rejected": -2.006657361984253, "loss": 0.7619, "nll_loss": 0.19047296047210693, "rewards/accuracies": 1.0, "rewards/chosen": -2.0681678506662138e-05, "rewards/margins": 0.2006450593471527, "rewards/rejected": -0.20066574215888977, "step": 9668 }, { "epoch": 6.686721991701245, "grad_norm": 13.401941299438477, "learning_rate": 1.84071000461042e-05, "log_odds_chosen": 10.34614372253418, "log_odds_ratio": -0.00021202873904258013, "logits/chosen": -0.23712968826293945, "logits/rejected": -0.26364678144454956, "logps/chosen": -0.0003467551141511649, "logps/rejected": -1.8067233562469482, "loss": 0.6057, "nll_loss": 0.15139594674110413, "rewards/accuracies": 1.0, "rewards/chosen": -3.467550777713768e-05, "rewards/margins": 0.1806376576423645, "rewards/rejected": -0.18067234754562378, "step": 9669 }, { "epoch": 6.687413554633472, "grad_norm": 5.781926155090332, "learning_rate": 1.8403258029814048e-05, "log_odds_chosen": 11.405348777770996, "log_odds_ratio": -0.0004130418528802693, "logits/chosen": -0.27188578248023987, "logits/rejected": -0.3051159977912903, "logps/chosen": -0.0005597221315838397, "logps/rejected": -2.6814310550689697, "loss": 0.5772, "nll_loss": 0.1442687064409256, "rewards/accuracies": 1.0, "rewards/chosen": -5.597221388597973e-05, "rewards/margins": 0.26808711886405945, "rewards/rejected": -0.2681431174278259, "step": 9670 }, { "epoch": 6.688105117565699, "grad_norm": 10.355371475219727, "learning_rate": 1.83994160135239e-05, "log_odds_chosen": 9.402498245239258, "log_odds_ratio": -0.00031604303512722254, "logits/chosen": -0.7214362621307373, "logits/rejected": -0.8196225166320801, "logps/chosen": -0.0010002891067415476, "logps/rejected": -1.4246968030929565, "loss": 0.6271, "nll_loss": 0.15675143897533417, "rewards/accuracies": 1.0, "rewards/chosen": -0.0001000289194053039, "rewards/margins": 0.14236965775489807, "rewards/rejected": -0.14246967434883118, "step": 9671 }, { "epoch": 6.6887966804979255, "grad_norm": 4.973082542419434, "learning_rate": 1.839557399723375e-05, "log_odds_chosen": 10.832221984863281, "log_odds_ratio": -4.954859832650982e-05, "logits/chosen": -0.6315814256668091, "logits/rejected": -0.6772823333740234, "logps/chosen": -0.0002705375081859529, "logps/rejected": -2.3966708183288574, "loss": 0.501, "nll_loss": 0.12524764239788055, "rewards/accuracies": 1.0, "rewards/chosen": -2.705375118239317e-05, "rewards/margins": 0.2396400272846222, "rewards/rejected": -0.23966708779335022, "step": 9672 }, { "epoch": 6.689488243430152, "grad_norm": 5.241406440734863, "learning_rate": 1.8391731980943598e-05, "log_odds_chosen": 10.412388801574707, "log_odds_ratio": -0.0001051185536198318, "logits/chosen": -0.6095589995384216, "logits/rejected": -0.5799497365951538, "logps/chosen": -0.0005825125845149159, "logps/rejected": -1.8753221035003662, "loss": 0.4108, "nll_loss": 0.10269524902105331, "rewards/accuracies": 1.0, "rewards/chosen": -5.825126572744921e-05, "rewards/margins": 0.18747395277023315, "rewards/rejected": -0.1875322163105011, "step": 9673 }, { "epoch": 6.690179806362379, "grad_norm": 7.175179958343506, "learning_rate": 1.838788996465345e-05, "log_odds_chosen": 9.725004196166992, "log_odds_ratio": -0.00036285031819716096, "logits/chosen": -0.5444189310073853, "logits/rejected": -0.5555685758590698, "logps/chosen": -0.001297255977988243, "logps/rejected": -1.8658530712127686, "loss": 0.6532, "nll_loss": 0.16325727105140686, "rewards/accuracies": 1.0, "rewards/chosen": -0.00012972559488844126, "rewards/margins": 0.18645557761192322, "rewards/rejected": -0.18658530712127686, "step": 9674 }, { "epoch": 6.690871369294606, "grad_norm": 10.508170127868652, "learning_rate": 1.8384047948363303e-05, "log_odds_chosen": 11.734052658081055, "log_odds_ratio": -1.1746539712476078e-05, "logits/chosen": -0.3542497456073761, "logits/rejected": -0.44388705492019653, "logps/chosen": -7.884105434641242e-05, "logps/rejected": -2.326277494430542, "loss": 0.6263, "nll_loss": 0.1565825343132019, "rewards/accuracies": 1.0, "rewards/chosen": -7.884104888944421e-06, "rewards/margins": 0.23261988162994385, "rewards/rejected": -0.2326277494430542, "step": 9675 }, { "epoch": 6.691562932226833, "grad_norm": 11.687735557556152, "learning_rate": 1.8380205932073152e-05, "log_odds_chosen": 10.39188289642334, "log_odds_ratio": -9.057446732185781e-05, "logits/chosen": -0.1264955699443817, "logits/rejected": -0.32701337337493896, "logps/chosen": -0.0002603328903205693, "logps/rejected": -1.843508005142212, "loss": 0.6477, "nll_loss": 0.1619156301021576, "rewards/accuracies": 1.0, "rewards/chosen": -2.603329085104633e-05, "rewards/margins": 0.18432477116584778, "rewards/rejected": -0.18435078859329224, "step": 9676 }, { "epoch": 6.69225449515906, "grad_norm": 6.140292167663574, "learning_rate": 1.8376363915783004e-05, "log_odds_chosen": 10.866915702819824, "log_odds_ratio": -7.089345308486372e-05, "logits/chosen": -0.295518696308136, "logits/rejected": -0.42706912755966187, "logps/chosen": -0.00022815105330664665, "logps/rejected": -2.186887264251709, "loss": 0.4853, "nll_loss": 0.12131841480731964, "rewards/accuracies": 1.0, "rewards/chosen": -2.281510387547314e-05, "rewards/margins": 0.21866591274738312, "rewards/rejected": -0.2186887264251709, "step": 9677 }, { "epoch": 6.6929460580912865, "grad_norm": 4.763021469116211, "learning_rate": 1.8372521899492857e-05, "log_odds_chosen": 11.162002563476562, "log_odds_ratio": -7.944177195895463e-05, "logits/chosen": -0.5970376133918762, "logits/rejected": -0.6784569621086121, "logps/chosen": -0.0002177559508709237, "logps/rejected": -2.3210833072662354, "loss": 0.4976, "nll_loss": 0.12439815700054169, "rewards/accuracies": 1.0, "rewards/chosen": -2.177559508709237e-05, "rewards/margins": 0.23208656907081604, "rewards/rejected": -0.23210833966732025, "step": 9678 }, { "epoch": 6.693637621023513, "grad_norm": 7.053638935089111, "learning_rate": 1.8368679883202706e-05, "log_odds_chosen": 10.3631591796875, "log_odds_ratio": -0.004077597986906767, "logits/chosen": 0.01545802503824234, "logits/rejected": -0.013516820967197418, "logps/chosen": -0.0012991420226171613, "logps/rejected": -2.3810198307037354, "loss": 1.1509, "nll_loss": 0.28730881214141846, "rewards/accuracies": 1.0, "rewards/chosen": -0.00012991421681363136, "rewards/margins": 0.23797208070755005, "rewards/rejected": -0.238101989030838, "step": 9679 }, { "epoch": 6.69432918395574, "grad_norm": 4.983798503875732, "learning_rate": 1.836483786691256e-05, "log_odds_chosen": 10.06801986694336, "log_odds_ratio": -0.0001003733414108865, "logits/chosen": -0.10567940026521683, "logits/rejected": -0.22135068476200104, "logps/chosen": -0.00038139085518196225, "logps/rejected": -1.6341947317123413, "loss": 0.5966, "nll_loss": 0.1491512656211853, "rewards/accuracies": 1.0, "rewards/chosen": -3.813908188021742e-05, "rewards/margins": 0.16338133811950684, "rewards/rejected": -0.1634194552898407, "step": 9680 }, { "epoch": 6.695020746887967, "grad_norm": 5.632517337799072, "learning_rate": 1.8360995850622407e-05, "log_odds_chosen": 10.997325897216797, "log_odds_ratio": -5.951305865892209e-05, "logits/chosen": -0.24782253801822662, "logits/rejected": -0.27457350492477417, "logps/chosen": -0.0007469297270290554, "logps/rejected": -2.7543869018554688, "loss": 0.8587, "nll_loss": 0.21465857326984406, "rewards/accuracies": 1.0, "rewards/chosen": -7.469296542694792e-05, "rewards/margins": 0.2753640115261078, "rewards/rejected": -0.27543869614601135, "step": 9681 }, { "epoch": 6.695712309820194, "grad_norm": 6.578545093536377, "learning_rate": 1.8357153834332257e-05, "log_odds_chosen": 10.860370635986328, "log_odds_ratio": -9.661043441155925e-05, "logits/chosen": -0.47947752475738525, "logits/rejected": -0.4665643274784088, "logps/chosen": -0.00026218523271381855, "logps/rejected": -2.3484091758728027, "loss": 0.5463, "nll_loss": 0.13655605912208557, "rewards/accuracies": 1.0, "rewards/chosen": -2.621852581796702e-05, "rewards/margins": 0.23481470346450806, "rewards/rejected": -0.23484092950820923, "step": 9682 }, { "epoch": 6.696403872752421, "grad_norm": 3.9766275882720947, "learning_rate": 1.835331181804211e-05, "log_odds_chosen": 11.27324390411377, "log_odds_ratio": -0.0010711115319281816, "logits/chosen": -0.12841787934303284, "logits/rejected": -0.22983276844024658, "logps/chosen": -0.001020747353322804, "logps/rejected": -2.9668526649475098, "loss": 0.4289, "nll_loss": 0.10712532699108124, "rewards/accuracies": 1.0, "rewards/chosen": -0.0001020747295115143, "rewards/margins": 0.2965832054615021, "rewards/rejected": -0.29668527841567993, "step": 9683 }, { "epoch": 6.6970954356846475, "grad_norm": 5.182412147521973, "learning_rate": 1.8349469801751958e-05, "log_odds_chosen": 10.569929122924805, "log_odds_ratio": -0.00021206194651313126, "logits/chosen": -0.38122642040252686, "logits/rejected": -0.48790067434310913, "logps/chosen": -0.000496427237521857, "logps/rejected": -2.254833936691284, "loss": 0.7962, "nll_loss": 0.19904069602489471, "rewards/accuracies": 1.0, "rewards/chosen": -4.96427237521857e-05, "rewards/margins": 0.22543376684188843, "rewards/rejected": -0.22548341751098633, "step": 9684 }, { "epoch": 6.697786998616874, "grad_norm": 4.416690349578857, "learning_rate": 1.834562778546181e-05, "log_odds_chosen": 10.155586242675781, "log_odds_ratio": -6.861994916107506e-05, "logits/chosen": -0.21608369052410126, "logits/rejected": -0.2599760890007019, "logps/chosen": -0.00011967943282797933, "logps/rejected": -1.2984521389007568, "loss": 0.3751, "nll_loss": 0.09377313405275345, "rewards/accuracies": 1.0, "rewards/chosen": -1.1967944374191575e-05, "rewards/margins": 0.12983325123786926, "rewards/rejected": -0.12984521687030792, "step": 9685 }, { "epoch": 6.698478561549101, "grad_norm": 8.809900283813477, "learning_rate": 1.8341785769171663e-05, "log_odds_chosen": 11.358308792114258, "log_odds_ratio": -3.5189397749491036e-05, "logits/chosen": -0.5408127307891846, "logits/rejected": -0.5565197467803955, "logps/chosen": -0.0002709012187551707, "logps/rejected": -2.950636625289917, "loss": 0.5811, "nll_loss": 0.14526110887527466, "rewards/accuracies": 1.0, "rewards/chosen": -2.7090123694506474e-05, "rewards/margins": 0.29503658413887024, "rewards/rejected": -0.29506367444992065, "step": 9686 }, { "epoch": 6.699170124481328, "grad_norm": 4.948386192321777, "learning_rate": 1.8337943752881512e-05, "log_odds_chosen": 10.860937118530273, "log_odds_ratio": -2.90093357762089e-05, "logits/chosen": -0.7190711498260498, "logits/rejected": -0.7402241230010986, "logps/chosen": -0.00014132638170849532, "logps/rejected": -1.7408219575881958, "loss": 0.5168, "nll_loss": 0.1291975975036621, "rewards/accuracies": 1.0, "rewards/chosen": -1.413263726135483e-05, "rewards/margins": 0.17406806349754333, "rewards/rejected": -0.1740821897983551, "step": 9687 }, { "epoch": 6.699861687413555, "grad_norm": 12.373002052307129, "learning_rate": 1.8334101736591364e-05, "log_odds_chosen": 9.979798316955566, "log_odds_ratio": -0.0001685578899923712, "logits/chosen": -0.5494964122772217, "logits/rejected": -0.6202380657196045, "logps/chosen": -0.0005979561246931553, "logps/rejected": -1.987154483795166, "loss": 0.4654, "nll_loss": 0.11633975803852081, "rewards/accuracies": 1.0, "rewards/chosen": -5.979560955893248e-05, "rewards/margins": 0.1986556500196457, "rewards/rejected": -0.1987154632806778, "step": 9688 }, { "epoch": 6.700553250345782, "grad_norm": 9.903979301452637, "learning_rate": 1.8330259720301217e-05, "log_odds_chosen": 11.164133071899414, "log_odds_ratio": -0.0001408745301887393, "logits/chosen": -0.4057766795158386, "logits/rejected": -0.4818309545516968, "logps/chosen": -0.00046957345330156386, "logps/rejected": -2.297665596008301, "loss": 0.72, "nll_loss": 0.1799810528755188, "rewards/accuracies": 1.0, "rewards/chosen": -4.695734241977334e-05, "rewards/margins": 0.22971957921981812, "rewards/rejected": -0.22976654767990112, "step": 9689 }, { "epoch": 6.7012448132780085, "grad_norm": 5.544249534606934, "learning_rate": 1.8326417704011066e-05, "log_odds_chosen": 10.337028503417969, "log_odds_ratio": -7.314958929782733e-05, "logits/chosen": -0.5487796068191528, "logits/rejected": -0.6383087635040283, "logps/chosen": -0.0005572509253397584, "logps/rejected": -2.5427985191345215, "loss": 0.5788, "nll_loss": 0.14469903707504272, "rewards/accuracies": 1.0, "rewards/chosen": -5.572508234763518e-05, "rewards/margins": 0.25422412157058716, "rewards/rejected": -0.25427988171577454, "step": 9690 }, { "epoch": 6.701936376210235, "grad_norm": 3.8700461387634277, "learning_rate": 1.8322575687720915e-05, "log_odds_chosen": 10.711881637573242, "log_odds_ratio": -5.9012105339206755e-05, "logits/chosen": -0.5717523694038391, "logits/rejected": -0.5392295122146606, "logps/chosen": -0.0002523858565837145, "logps/rejected": -2.2547340393066406, "loss": 0.5199, "nll_loss": 0.12995873391628265, "rewards/accuracies": 1.0, "rewards/chosen": -2.523858711356297e-05, "rewards/margins": 0.22544819116592407, "rewards/rejected": -0.22547343373298645, "step": 9691 }, { "epoch": 6.702627939142462, "grad_norm": 6.470229148864746, "learning_rate": 1.8318733671430767e-05, "log_odds_chosen": 11.414847373962402, "log_odds_ratio": -4.2619489249773324e-05, "logits/chosen": -0.591145396232605, "logits/rejected": -0.6597984433174133, "logps/chosen": -0.00014959601685404778, "logps/rejected": -2.2852067947387695, "loss": 0.3878, "nll_loss": 0.09693406522274017, "rewards/accuracies": 1.0, "rewards/chosen": -1.4959600775910076e-05, "rewards/margins": 0.22850573062896729, "rewards/rejected": -0.2285206913948059, "step": 9692 }, { "epoch": 6.703319502074689, "grad_norm": 5.7831950187683105, "learning_rate": 1.8314891655140616e-05, "log_odds_chosen": 9.764205932617188, "log_odds_ratio": -0.00062859698664397, "logits/chosen": -0.5247955918312073, "logits/rejected": -0.4551923871040344, "logps/chosen": -0.001059068599715829, "logps/rejected": -1.63079035282135, "loss": 0.6319, "nll_loss": 0.15792065858840942, "rewards/accuracies": 1.0, "rewards/chosen": -0.00010590685269562528, "rewards/margins": 0.16297312080860138, "rewards/rejected": -0.16307902336120605, "step": 9693 }, { "epoch": 6.704011065006916, "grad_norm": 5.505740642547607, "learning_rate": 1.831104963885047e-05, "log_odds_chosen": 10.843637466430664, "log_odds_ratio": -8.815214096102864e-05, "logits/chosen": -0.569132924079895, "logits/rejected": -0.7134794592857361, "logps/chosen": -0.00018152498523704708, "logps/rejected": -2.1765732765197754, "loss": 0.8629, "nll_loss": 0.21571555733680725, "rewards/accuracies": 1.0, "rewards/chosen": -1.815249925130047e-05, "rewards/margins": 0.21763917803764343, "rewards/rejected": -0.21765734255313873, "step": 9694 }, { "epoch": 6.704702627939143, "grad_norm": 4.550797939300537, "learning_rate": 1.830720762256032e-05, "log_odds_chosen": 10.1773681640625, "log_odds_ratio": -0.00012865892495028675, "logits/chosen": -0.46741408109664917, "logits/rejected": -0.44873249530792236, "logps/chosen": -0.000390806351788342, "logps/rejected": -1.9166791439056396, "loss": 0.6391, "nll_loss": 0.15975472331047058, "rewards/accuracies": 1.0, "rewards/chosen": -3.908063445123844e-05, "rewards/margins": 0.1916288435459137, "rewards/rejected": -0.19166792929172516, "step": 9695 }, { "epoch": 6.7053941908713695, "grad_norm": 6.874147891998291, "learning_rate": 1.830336560627017e-05, "log_odds_chosen": 9.434051513671875, "log_odds_ratio": -0.0004302481247577816, "logits/chosen": -0.34089720249176025, "logits/rejected": -0.3749096989631653, "logps/chosen": -0.0005576977273449302, "logps/rejected": -1.6736546754837036, "loss": 0.6974, "nll_loss": 0.17429561913013458, "rewards/accuracies": 1.0, "rewards/chosen": -5.5769771279301494e-05, "rewards/margins": 0.1673096865415573, "rewards/rejected": -0.1673654466867447, "step": 9696 }, { "epoch": 6.706085753803596, "grad_norm": 5.495350360870361, "learning_rate": 1.8299523589980023e-05, "log_odds_chosen": 10.23301887512207, "log_odds_ratio": -0.00018941161397378892, "logits/chosen": 0.054965417832136154, "logits/rejected": -0.0001494288444519043, "logps/chosen": -0.0002769737329799682, "logps/rejected": -1.891705870628357, "loss": 0.7117, "nll_loss": 0.17790867388248444, "rewards/accuracies": 1.0, "rewards/chosen": -2.769737329799682e-05, "rewards/margins": 0.18914291262626648, "rewards/rejected": -0.18917059898376465, "step": 9697 }, { "epoch": 6.706777316735823, "grad_norm": 6.182698726654053, "learning_rate": 1.8295681573689875e-05, "log_odds_chosen": 10.451370239257812, "log_odds_ratio": -0.0012464377796277404, "logits/chosen": -0.6633809804916382, "logits/rejected": -0.69439297914505, "logps/chosen": -0.001036527450196445, "logps/rejected": -2.798448085784912, "loss": 0.8142, "nll_loss": 0.20343345403671265, "rewards/accuracies": 1.0, "rewards/chosen": -0.00010365273919887841, "rewards/margins": 0.27974116802215576, "rewards/rejected": -0.27984482049942017, "step": 9698 }, { "epoch": 6.70746887966805, "grad_norm": 6.854715824127197, "learning_rate": 1.8291839557399724e-05, "log_odds_chosen": 10.501351356506348, "log_odds_ratio": -9.871691872831434e-05, "logits/chosen": -0.4957367777824402, "logits/rejected": -0.5315590500831604, "logps/chosen": -0.0001666514144744724, "logps/rejected": -1.935477614402771, "loss": 0.7584, "nll_loss": 0.1895884871482849, "rewards/accuracies": 1.0, "rewards/chosen": -1.666514071985148e-05, "rewards/margins": 0.1935310959815979, "rewards/rejected": -0.19354775547981262, "step": 9699 }, { "epoch": 6.708160442600277, "grad_norm": 6.463924407958984, "learning_rate": 1.8287997541109573e-05, "log_odds_chosen": 10.318857192993164, "log_odds_ratio": -0.0005355229368433356, "logits/chosen": -0.5634887218475342, "logits/rejected": -0.6088401079177856, "logps/chosen": -0.0005697443266399205, "logps/rejected": -2.081566572189331, "loss": 0.5238, "nll_loss": 0.13088880479335785, "rewards/accuracies": 1.0, "rewards/chosen": -5.6974429753609e-05, "rewards/margins": 0.20809967815876007, "rewards/rejected": -0.20815666019916534, "step": 9700 }, { "epoch": 6.708852005532504, "grad_norm": 4.38247537612915, "learning_rate": 1.8284155524819426e-05, "log_odds_chosen": 10.187275886535645, "log_odds_ratio": -0.00047190545592457056, "logits/chosen": -0.6247849464416504, "logits/rejected": -0.6580078601837158, "logps/chosen": -0.0009922175668179989, "logps/rejected": -1.968461513519287, "loss": 0.5271, "nll_loss": 0.13173647224903107, "rewards/accuracies": 1.0, "rewards/chosen": -9.922177559928969e-05, "rewards/margins": 0.19674694538116455, "rewards/rejected": -0.1968461573123932, "step": 9701 }, { "epoch": 6.70954356846473, "grad_norm": 11.628179550170898, "learning_rate": 1.8280313508529275e-05, "log_odds_chosen": 10.961841583251953, "log_odds_ratio": -0.00017139659030362964, "logits/chosen": -0.5121583938598633, "logits/rejected": -0.5763285160064697, "logps/chosen": -0.00012023936869809404, "logps/rejected": -2.096275568008423, "loss": 0.6286, "nll_loss": 0.15714293718338013, "rewards/accuracies": 1.0, "rewards/chosen": -1.2023936506011523e-05, "rewards/margins": 0.20961549878120422, "rewards/rejected": -0.20962753891944885, "step": 9702 }, { "epoch": 6.710235131396957, "grad_norm": 5.260577201843262, "learning_rate": 1.8276471492239127e-05, "log_odds_chosen": 9.537765502929688, "log_odds_ratio": -0.00026477783103473485, "logits/chosen": -0.22354461252689362, "logits/rejected": -0.23986151814460754, "logps/chosen": -0.0009489476797170937, "logps/rejected": -1.7701373100280762, "loss": 0.6928, "nll_loss": 0.17317166924476624, "rewards/accuracies": 1.0, "rewards/chosen": -9.489477088209242e-05, "rewards/margins": 0.17691883444786072, "rewards/rejected": -0.17701373994350433, "step": 9703 }, { "epoch": 6.710926694329184, "grad_norm": 6.721794605255127, "learning_rate": 1.827262947594898e-05, "log_odds_chosen": 10.314403533935547, "log_odds_ratio": -0.00021236721659079194, "logits/chosen": -0.23889127373695374, "logits/rejected": -0.44527435302734375, "logps/chosen": -0.0008492742199450731, "logps/rejected": -2.0169315338134766, "loss": 0.4235, "nll_loss": 0.1058477908372879, "rewards/accuracies": 1.0, "rewards/chosen": -8.492742927046493e-05, "rewards/margins": 0.20160824060440063, "rewards/rejected": -0.20169317722320557, "step": 9704 }, { "epoch": 6.711618257261411, "grad_norm": 4.631369113922119, "learning_rate": 1.826878745965883e-05, "log_odds_chosen": 11.75677490234375, "log_odds_ratio": -5.7359426136827096e-05, "logits/chosen": -0.39702045917510986, "logits/rejected": -0.4525296092033386, "logps/chosen": -0.0002216920693172142, "logps/rejected": -2.8050849437713623, "loss": 0.6856, "nll_loss": 0.17140543460845947, "rewards/accuracies": 1.0, "rewards/chosen": -2.216920802311506e-05, "rewards/margins": 0.2804863452911377, "rewards/rejected": -0.28050851821899414, "step": 9705 }, { "epoch": 6.712309820193638, "grad_norm": 6.538344383239746, "learning_rate": 1.826494544336868e-05, "log_odds_chosen": 10.887186050415039, "log_odds_ratio": -3.279655720689334e-05, "logits/chosen": 0.09093475341796875, "logits/rejected": -0.08036897331476212, "logps/chosen": -0.0012253046734258533, "logps/rejected": -2.9093241691589355, "loss": 0.7538, "nll_loss": 0.18844729661941528, "rewards/accuracies": 1.0, "rewards/chosen": -0.00012253047316335142, "rewards/margins": 0.29080986976623535, "rewards/rejected": -0.29093241691589355, "step": 9706 }, { "epoch": 6.713001383125865, "grad_norm": 6.628562927246094, "learning_rate": 1.8261103427078534e-05, "log_odds_chosen": 10.301244735717773, "log_odds_ratio": -0.00011587039625737816, "logits/chosen": -0.19004501402378082, "logits/rejected": -0.1924629509449005, "logps/chosen": -0.0005146628245711327, "logps/rejected": -2.369720458984375, "loss": 0.7877, "nll_loss": 0.1969122737646103, "rewards/accuracies": 1.0, "rewards/chosen": -5.1466282457113266e-05, "rewards/margins": 0.23692059516906738, "rewards/rejected": -0.23697204887866974, "step": 9707 }, { "epoch": 6.713692946058091, "grad_norm": 5.522481918334961, "learning_rate": 1.8257261410788383e-05, "log_odds_chosen": 11.290914535522461, "log_odds_ratio": -1.9673158021760173e-05, "logits/chosen": -0.5909338593482971, "logits/rejected": -0.6225830912590027, "logps/chosen": -0.00017350060807075351, "logps/rejected": -2.613370180130005, "loss": 0.8707, "nll_loss": 0.21768240630626678, "rewards/accuracies": 1.0, "rewards/chosen": -1.7350061170873232e-05, "rewards/margins": 0.26131969690322876, "rewards/rejected": -0.2613370418548584, "step": 9708 }, { "epoch": 6.714384508990318, "grad_norm": 5.048619747161865, "learning_rate": 1.8253419394498232e-05, "log_odds_chosen": 11.536866188049316, "log_odds_ratio": -8.987126784631982e-05, "logits/chosen": -0.37755972146987915, "logits/rejected": -0.4132126271724701, "logps/chosen": -0.00022529861598741263, "logps/rejected": -3.207831382751465, "loss": 0.64, "nll_loss": 0.15998563170433044, "rewards/accuracies": 1.0, "rewards/chosen": -2.2529860871145502e-05, "rewards/margins": 0.3207606077194214, "rewards/rejected": -0.3207831382751465, "step": 9709 }, { "epoch": 6.715076071922545, "grad_norm": 15.114882469177246, "learning_rate": 1.8249577378208084e-05, "log_odds_chosen": 11.053642272949219, "log_odds_ratio": -3.2246993214357644e-05, "logits/chosen": -0.8833234310150146, "logits/rejected": -0.9113695025444031, "logps/chosen": -7.948539132485166e-05, "logps/rejected": -1.6513739824295044, "loss": 0.49, "nll_loss": 0.12250618636608124, "rewards/accuracies": 1.0, "rewards/chosen": -7.948539860080928e-06, "rewards/margins": 0.16512946784496307, "rewards/rejected": -0.1651374101638794, "step": 9710 }, { "epoch": 6.715767634854772, "grad_norm": 20.863941192626953, "learning_rate": 1.8245735361917933e-05, "log_odds_chosen": 10.994382858276367, "log_odds_ratio": -0.00030419373069889843, "logits/chosen": -0.337488055229187, "logits/rejected": -0.3565511703491211, "logps/chosen": -0.0006342109409160912, "logps/rejected": -2.678623676300049, "loss": 0.614, "nll_loss": 0.15347182750701904, "rewards/accuracies": 1.0, "rewards/chosen": -6.342109554680064e-05, "rewards/margins": 0.2677989602088928, "rewards/rejected": -0.26786237955093384, "step": 9711 }, { "epoch": 6.716459197786999, "grad_norm": 11.249113082885742, "learning_rate": 1.8241893345627786e-05, "log_odds_chosen": 11.143560409545898, "log_odds_ratio": -5.740381675423123e-05, "logits/chosen": -0.40606385469436646, "logits/rejected": -0.4958600699901581, "logps/chosen": -0.00015600050392095, "logps/rejected": -2.114410400390625, "loss": 0.4894, "nll_loss": 0.12235406786203384, "rewards/accuracies": 1.0, "rewards/chosen": -1.560005148348864e-05, "rewards/margins": 0.21142543852329254, "rewards/rejected": -0.2114410549402237, "step": 9712 }, { "epoch": 6.717150760719226, "grad_norm": 7.461971759796143, "learning_rate": 1.8238051329337638e-05, "log_odds_chosen": 10.837980270385742, "log_odds_ratio": -2.7545340344659053e-05, "logits/chosen": -0.575896143913269, "logits/rejected": -0.6402074098587036, "logps/chosen": -0.00017790490528568625, "logps/rejected": -1.9333128929138184, "loss": 0.6003, "nll_loss": 0.1500811129808426, "rewards/accuracies": 1.0, "rewards/chosen": -1.7790491256164387e-05, "rewards/margins": 0.19331350922584534, "rewards/rejected": -0.1933312863111496, "step": 9713 }, { "epoch": 6.717842323651452, "grad_norm": 10.962932586669922, "learning_rate": 1.8234209313047487e-05, "log_odds_chosen": 11.474613189697266, "log_odds_ratio": -0.0001845585647970438, "logits/chosen": -0.4879600405693054, "logits/rejected": -0.56697678565979, "logps/chosen": -0.0002608491631690413, "logps/rejected": -2.9621644020080566, "loss": 0.8872, "nll_loss": 0.221793994307518, "rewards/accuracies": 1.0, "rewards/chosen": -2.608491740829777e-05, "rewards/margins": 0.2961903512477875, "rewards/rejected": -0.2962164282798767, "step": 9714 }, { "epoch": 6.718533886583679, "grad_norm": 4.909445762634277, "learning_rate": 1.823036729675734e-05, "log_odds_chosen": 10.88532829284668, "log_odds_ratio": -4.1851682908600196e-05, "logits/chosen": -0.5104326605796814, "logits/rejected": -0.5674954652786255, "logps/chosen": -0.0001578840019647032, "logps/rejected": -1.880581021308899, "loss": 0.3437, "nll_loss": 0.08592512458562851, "rewards/accuracies": 1.0, "rewards/chosen": -1.5788402379257604e-05, "rewards/margins": 0.1880423128604889, "rewards/rejected": -0.18805810809135437, "step": 9715 }, { "epoch": 6.719225449515906, "grad_norm": 10.080972671508789, "learning_rate": 1.8226525280467192e-05, "log_odds_chosen": 9.879682540893555, "log_odds_ratio": -0.00017235639097634703, "logits/chosen": -0.4462816119194031, "logits/rejected": -0.4516682028770447, "logps/chosen": -0.00025914120487868786, "logps/rejected": -1.6137008666992188, "loss": 0.4635, "nll_loss": 0.1158699095249176, "rewards/accuracies": 1.0, "rewards/chosen": -2.5914119760273024e-05, "rewards/margins": 0.16134417057037354, "rewards/rejected": -0.16137008368968964, "step": 9716 }, { "epoch": 6.719917012448133, "grad_norm": 13.479330062866211, "learning_rate": 1.822268326417704e-05, "log_odds_chosen": 10.896804809570312, "log_odds_ratio": -6.16292527411133e-05, "logits/chosen": 0.06766664981842041, "logits/rejected": 0.15634863078594208, "logps/chosen": -0.00036590383388102055, "logps/rejected": -2.6821343898773193, "loss": 0.5224, "nll_loss": 0.13058429956436157, "rewards/accuracies": 1.0, "rewards/chosen": -3.6590383388102055e-05, "rewards/margins": 0.2681768536567688, "rewards/rejected": -0.2682134509086609, "step": 9717 }, { "epoch": 6.72060857538036, "grad_norm": 5.043457984924316, "learning_rate": 1.821884124788689e-05, "log_odds_chosen": 10.161759376525879, "log_odds_ratio": -0.00023662808234803379, "logits/chosen": -0.6527903079986572, "logits/rejected": -0.6864253282546997, "logps/chosen": -0.00034481301554478705, "logps/rejected": -2.12319278717041, "loss": 0.6527, "nll_loss": 0.16315683722496033, "rewards/accuracies": 1.0, "rewards/chosen": -3.4481301554478705e-05, "rewards/margins": 0.21228480339050293, "rewards/rejected": -0.2123192846775055, "step": 9718 }, { "epoch": 6.7213001383125865, "grad_norm": 4.991131782531738, "learning_rate": 1.8214999231596743e-05, "log_odds_chosen": 9.911870956420898, "log_odds_ratio": -8.921151311369613e-05, "logits/chosen": -0.22037310898303986, "logits/rejected": -0.2324230670928955, "logps/chosen": -0.00027700295322574675, "logps/rejected": -1.6150810718536377, "loss": 0.5001, "nll_loss": 0.12501946091651917, "rewards/accuracies": 1.0, "rewards/chosen": -2.7700294594978914e-05, "rewards/margins": 0.16148041188716888, "rewards/rejected": -0.16150811314582825, "step": 9719 }, { "epoch": 6.721991701244813, "grad_norm": 7.188221454620361, "learning_rate": 1.8211157215306592e-05, "log_odds_chosen": 10.476211547851562, "log_odds_ratio": -0.00016788275388535112, "logits/chosen": -0.6707624197006226, "logits/rejected": -0.7707037925720215, "logps/chosen": -0.0008173736860044301, "logps/rejected": -1.7757651805877686, "loss": 0.4814, "nll_loss": 0.12034104019403458, "rewards/accuracies": 1.0, "rewards/chosen": -8.173738024197519e-05, "rewards/margins": 0.17749479413032532, "rewards/rejected": -0.17757654190063477, "step": 9720 }, { "epoch": 6.72268326417704, "grad_norm": 5.455285549163818, "learning_rate": 1.8207315199016444e-05, "log_odds_chosen": 10.982564926147461, "log_odds_ratio": -5.847503780387342e-05, "logits/chosen": -0.566404402256012, "logits/rejected": -0.5938459038734436, "logps/chosen": -0.0002679351018741727, "logps/rejected": -2.2465574741363525, "loss": 0.8184, "nll_loss": 0.20458795130252838, "rewards/accuracies": 1.0, "rewards/chosen": -2.6793512006406672e-05, "rewards/margins": 0.22462892532348633, "rewards/rejected": -0.22465574741363525, "step": 9721 }, { "epoch": 6.723374827109267, "grad_norm": 6.680959701538086, "learning_rate": 1.8203473182726297e-05, "log_odds_chosen": 10.745830535888672, "log_odds_ratio": -0.0002788332349155098, "logits/chosen": -0.7466229200363159, "logits/rejected": -0.8177922964096069, "logps/chosen": -0.00017833249876275659, "logps/rejected": -1.9720370769500732, "loss": 0.6547, "nll_loss": 0.16363570094108582, "rewards/accuracies": 1.0, "rewards/chosen": -1.7833252059062943e-05, "rewards/margins": 0.19718587398529053, "rewards/rejected": -0.19720371067523956, "step": 9722 }, { "epoch": 6.724066390041494, "grad_norm": 6.327624797821045, "learning_rate": 1.8199631166436146e-05, "log_odds_chosen": 11.032835960388184, "log_odds_ratio": -6.335016951197758e-05, "logits/chosen": -0.3551556169986725, "logits/rejected": -0.3900320529937744, "logps/chosen": -0.000181854484253563, "logps/rejected": -2.182952404022217, "loss": 0.4887, "nll_loss": 0.12215951085090637, "rewards/accuracies": 1.0, "rewards/chosen": -1.81854484253563e-05, "rewards/margins": 0.21827706694602966, "rewards/rejected": -0.21829524636268616, "step": 9723 }, { "epoch": 6.724757952973721, "grad_norm": 4.812091827392578, "learning_rate": 1.8195789150145998e-05, "log_odds_chosen": 11.22108268737793, "log_odds_ratio": -8.43647649162449e-05, "logits/chosen": -0.6499758958816528, "logits/rejected": -0.6795446872711182, "logps/chosen": -0.00011281066690571606, "logps/rejected": -2.3831028938293457, "loss": 0.4783, "nll_loss": 0.11957649886608124, "rewards/accuracies": 1.0, "rewards/chosen": -1.1281066690571606e-05, "rewards/margins": 0.23829901218414307, "rewards/rejected": -0.2383102923631668, "step": 9724 }, { "epoch": 6.7254495159059475, "grad_norm": 7.116838455200195, "learning_rate": 1.819194713385585e-05, "log_odds_chosen": 10.212159156799316, "log_odds_ratio": -0.00012075810082023963, "logits/chosen": -0.8690159320831299, "logits/rejected": -0.9198919534683228, "logps/chosen": -0.0005652224645018578, "logps/rejected": -2.057105302810669, "loss": 0.5324, "nll_loss": 0.13308003544807434, "rewards/accuracies": 1.0, "rewards/chosen": -5.652225445373915e-05, "rewards/margins": 0.20565402507781982, "rewards/rejected": -0.2057105451822281, "step": 9725 }, { "epoch": 6.726141078838174, "grad_norm": 7.264196872711182, "learning_rate": 1.81881051175657e-05, "log_odds_chosen": 11.783138275146484, "log_odds_ratio": -1.351820810668869e-05, "logits/chosen": -0.6626642942428589, "logits/rejected": -0.6600688099861145, "logps/chosen": -0.00021941386512480676, "logps/rejected": -2.530977249145508, "loss": 0.4854, "nll_loss": 0.12134195864200592, "rewards/accuracies": 1.0, "rewards/chosen": -2.194138869526796e-05, "rewards/margins": 0.25307580828666687, "rewards/rejected": -0.2530977427959442, "step": 9726 }, { "epoch": 6.726832641770401, "grad_norm": 5.8319411277771, "learning_rate": 1.8184263101275552e-05, "log_odds_chosen": 10.988485336303711, "log_odds_ratio": -6.073584154364653e-05, "logits/chosen": -0.42071330547332764, "logits/rejected": -0.4699041247367859, "logps/chosen": -0.0001319625589530915, "logps/rejected": -1.9698193073272705, "loss": 0.5171, "nll_loss": 0.12927477061748505, "rewards/accuracies": 1.0, "rewards/chosen": -1.3196255167713389e-05, "rewards/margins": 0.19696873426437378, "rewards/rejected": -0.19698193669319153, "step": 9727 }, { "epoch": 6.727524204702628, "grad_norm": 10.939681053161621, "learning_rate": 1.81804210849854e-05, "log_odds_chosen": 11.293813705444336, "log_odds_ratio": -5.135497121955268e-05, "logits/chosen": -0.7324642539024353, "logits/rejected": -0.7711427807807922, "logps/chosen": -0.00021397329692263156, "logps/rejected": -2.492666721343994, "loss": 0.7633, "nll_loss": 0.19083064794540405, "rewards/accuracies": 1.0, "rewards/chosen": -2.1397330783656798e-05, "rewards/margins": 0.249245285987854, "rewards/rejected": -0.24926666915416718, "step": 9728 }, { "epoch": 6.728215767634855, "grad_norm": 4.356656074523926, "learning_rate": 1.817657906869525e-05, "log_odds_chosen": 11.521858215332031, "log_odds_ratio": -3.647685662144795e-05, "logits/chosen": -0.21235541999340057, "logits/rejected": -0.17017611861228943, "logps/chosen": -0.0003204144013579935, "logps/rejected": -3.0372743606567383, "loss": 0.6446, "nll_loss": 0.16115355491638184, "rewards/accuracies": 1.0, "rewards/chosen": -3.2041443773778155e-05, "rewards/margins": 0.3036953806877136, "rewards/rejected": -0.3037274479866028, "step": 9729 }, { "epoch": 6.728907330567082, "grad_norm": 4.123113632202148, "learning_rate": 1.8172737052405103e-05, "log_odds_chosen": 11.573395729064941, "log_odds_ratio": -1.4847018974251114e-05, "logits/chosen": -0.36827945709228516, "logits/rejected": -0.4537869691848755, "logps/chosen": -0.00014661697787232697, "logps/rejected": -2.7357630729675293, "loss": 0.5253, "nll_loss": 0.13132527470588684, "rewards/accuracies": 1.0, "rewards/chosen": -1.466169942432316e-05, "rewards/margins": 0.27356165647506714, "rewards/rejected": -0.2735763192176819, "step": 9730 }, { "epoch": 6.7295988934993085, "grad_norm": 5.720463275909424, "learning_rate": 1.8168895036114955e-05, "log_odds_chosen": 11.32773208618164, "log_odds_ratio": -3.699558510561474e-05, "logits/chosen": -0.2998761832714081, "logits/rejected": -0.35165315866470337, "logps/chosen": -0.00017662344907876104, "logps/rejected": -2.2534048557281494, "loss": 0.9829, "nll_loss": 0.2457246333360672, "rewards/accuracies": 1.0, "rewards/chosen": -1.766234345268458e-05, "rewards/margins": 0.22532284259796143, "rewards/rejected": -0.22534050047397614, "step": 9731 }, { "epoch": 6.730290456431535, "grad_norm": 18.147850036621094, "learning_rate": 1.8165053019824804e-05, "log_odds_chosen": 11.302204132080078, "log_odds_ratio": -1.858202631410677e-05, "logits/chosen": -0.5007428526878357, "logits/rejected": -0.5436948537826538, "logps/chosen": -0.00026709146914072335, "logps/rejected": -2.660487174987793, "loss": 0.6066, "nll_loss": 0.15165534615516663, "rewards/accuracies": 1.0, "rewards/chosen": -2.6709147277870215e-05, "rewards/margins": 0.2660219967365265, "rewards/rejected": -0.26604872941970825, "step": 9732 }, { "epoch": 6.730982019363762, "grad_norm": 5.787598133087158, "learning_rate": 1.8161211003534657e-05, "log_odds_chosen": 9.566076278686523, "log_odds_ratio": -0.00015092222020030022, "logits/chosen": -0.6181987524032593, "logits/rejected": -0.660263180732727, "logps/chosen": -0.0011174660176038742, "logps/rejected": -1.9777193069458008, "loss": 0.4916, "nll_loss": 0.12287604808807373, "rewards/accuracies": 1.0, "rewards/chosen": -0.0001117465944844298, "rewards/margins": 0.1976601779460907, "rewards/rejected": -0.19777193665504456, "step": 9733 }, { "epoch": 6.731673582295989, "grad_norm": 11.69165325164795, "learning_rate": 1.815736898724451e-05, "log_odds_chosen": 10.284931182861328, "log_odds_ratio": -6.89550070092082e-05, "logits/chosen": -0.6240053772926331, "logits/rejected": -0.6646280288696289, "logps/chosen": -0.00039611352258361876, "logps/rejected": -2.2645621299743652, "loss": 0.5858, "nll_loss": 0.1464376002550125, "rewards/accuracies": 1.0, "rewards/chosen": -3.96113537135534e-05, "rewards/margins": 0.22641661763191223, "rewards/rejected": -0.22645621001720428, "step": 9734 }, { "epoch": 6.732365145228216, "grad_norm": 5.843806266784668, "learning_rate": 1.8153526970954358e-05, "log_odds_chosen": 10.615920066833496, "log_odds_ratio": -9.333140042144805e-05, "logits/chosen": -0.599016010761261, "logits/rejected": -0.6555784940719604, "logps/chosen": -0.0005432904581539333, "logps/rejected": -2.101348876953125, "loss": 0.4896, "nll_loss": 0.12239242345094681, "rewards/accuracies": 1.0, "rewards/chosen": -5.4329044360201806e-05, "rewards/margins": 0.2100805640220642, "rewards/rejected": -0.21013489365577698, "step": 9735 }, { "epoch": 6.733056708160443, "grad_norm": 6.959444999694824, "learning_rate": 1.814968495466421e-05, "log_odds_chosen": 11.002592086791992, "log_odds_ratio": -2.5873685444821604e-05, "logits/chosen": -0.22370731830596924, "logits/rejected": -0.30544498562812805, "logps/chosen": -0.0002484057913534343, "logps/rejected": -2.4761853218078613, "loss": 0.4903, "nll_loss": 0.12257062643766403, "rewards/accuracies": 1.0, "rewards/chosen": -2.4840579499141313e-05, "rewards/margins": 0.24759367108345032, "rewards/rejected": -0.24761851131916046, "step": 9736 }, { "epoch": 6.7337482710926695, "grad_norm": 7.223566055297852, "learning_rate": 1.814584293837406e-05, "log_odds_chosen": 10.804994583129883, "log_odds_ratio": -0.0002193464752053842, "logits/chosen": -0.6460748910903931, "logits/rejected": -0.659801185131073, "logps/chosen": -0.0003987317904829979, "logps/rejected": -2.281238555908203, "loss": 0.58, "nll_loss": 0.14497052133083344, "rewards/accuracies": 1.0, "rewards/chosen": -3.9873182686278597e-05, "rewards/margins": 0.22808398306369781, "rewards/rejected": -0.22812385857105255, "step": 9737 }, { "epoch": 6.734439834024896, "grad_norm": 4.332970142364502, "learning_rate": 1.814200092208391e-05, "log_odds_chosen": 11.209919929504395, "log_odds_ratio": -3.504233609419316e-05, "logits/chosen": -0.3805273473262787, "logits/rejected": -0.42599645256996155, "logps/chosen": -0.00011535930389072746, "logps/rejected": -1.9247658252716064, "loss": 0.4257, "nll_loss": 0.1064300462603569, "rewards/accuracies": 1.0, "rewards/chosen": -1.1535931662365329e-05, "rewards/margins": 0.19246505200862885, "rewards/rejected": -0.19247660040855408, "step": 9738 }, { "epoch": 6.735131396957123, "grad_norm": 6.551239967346191, "learning_rate": 1.813815890579376e-05, "log_odds_chosen": 9.787984848022461, "log_odds_ratio": -0.0002358718920731917, "logits/chosen": -0.6269487142562866, "logits/rejected": -0.6987356543540955, "logps/chosen": -0.0004186548467259854, "logps/rejected": -1.6685190200805664, "loss": 0.5725, "nll_loss": 0.14309853315353394, "rewards/accuracies": 1.0, "rewards/chosen": -4.18654854001943e-05, "rewards/margins": 0.1668100506067276, "rewards/rejected": -0.16685190796852112, "step": 9739 }, { "epoch": 6.73582295988935, "grad_norm": 5.712563991546631, "learning_rate": 1.8134316889503614e-05, "log_odds_chosen": 9.63579273223877, "log_odds_ratio": -0.0010889896657317877, "logits/chosen": -0.6010291576385498, "logits/rejected": -0.6723165512084961, "logps/chosen": -0.0009074000408872962, "logps/rejected": -2.5607223510742188, "loss": 0.7981, "nll_loss": 0.1994112730026245, "rewards/accuracies": 1.0, "rewards/chosen": -9.074000990949571e-05, "rewards/margins": 0.2559815049171448, "rewards/rejected": -0.2560722529888153, "step": 9740 }, { "epoch": 6.736514522821577, "grad_norm": 15.393342018127441, "learning_rate": 1.8130474873213463e-05, "log_odds_chosen": 10.707435607910156, "log_odds_ratio": -0.00014981115236878395, "logits/chosen": -0.5892646312713623, "logits/rejected": -0.6897189617156982, "logps/chosen": -0.0004563984111882746, "logps/rejected": -2.320040702819824, "loss": 0.879, "nll_loss": 0.2197228968143463, "rewards/accuracies": 1.0, "rewards/chosen": -4.563984475680627e-05, "rewards/margins": 0.23195841908454895, "rewards/rejected": -0.2320040464401245, "step": 9741 }, { "epoch": 6.737206085753804, "grad_norm": 5.6039252281188965, "learning_rate": 1.8126632856923315e-05, "log_odds_chosen": 11.154106140136719, "log_odds_ratio": -3.386279786354862e-05, "logits/chosen": -0.3528594374656677, "logits/rejected": -0.38136905431747437, "logps/chosen": -0.00043841905426234007, "logps/rejected": -2.813148021697998, "loss": 0.58, "nll_loss": 0.1449882984161377, "rewards/accuracies": 1.0, "rewards/chosen": -4.384190833661705e-05, "rewards/margins": 0.28127095103263855, "rewards/rejected": -0.28131479024887085, "step": 9742 }, { "epoch": 6.7378976486860305, "grad_norm": 4.884521484375, "learning_rate": 1.8122790840633167e-05, "log_odds_chosen": 11.476736068725586, "log_odds_ratio": -6.344960274873301e-05, "logits/chosen": -0.42241236567497253, "logits/rejected": -0.46878641843795776, "logps/chosen": -0.00015263850218616426, "logps/rejected": -2.290283679962158, "loss": 0.4595, "nll_loss": 0.11485810577869415, "rewards/accuracies": 1.0, "rewards/chosen": -1.526385312899947e-05, "rewards/margins": 0.2290130853652954, "rewards/rejected": -0.2290283441543579, "step": 9743 }, { "epoch": 6.738589211618257, "grad_norm": 5.994597911834717, "learning_rate": 1.8118948824343017e-05, "log_odds_chosen": 11.044092178344727, "log_odds_ratio": -0.0002066854212898761, "logits/chosen": -0.5602484941482544, "logits/rejected": -0.6336889266967773, "logps/chosen": -0.0002837642969097942, "logps/rejected": -2.173689126968384, "loss": 0.543, "nll_loss": 0.13572733104228973, "rewards/accuracies": 1.0, "rewards/chosen": -2.837642932718154e-05, "rewards/margins": 0.21734054386615753, "rewards/rejected": -0.21736891567707062, "step": 9744 }, { "epoch": 6.739280774550484, "grad_norm": 4.356839179992676, "learning_rate": 1.811510680805287e-05, "log_odds_chosen": 9.351144790649414, "log_odds_ratio": -0.0005068988539278507, "logits/chosen": -0.31054121255874634, "logits/rejected": -0.3710728585720062, "logps/chosen": -0.0007687251782044768, "logps/rejected": -1.5605571269989014, "loss": 0.5891, "nll_loss": 0.14723043143749237, "rewards/accuracies": 1.0, "rewards/chosen": -7.687251491006464e-05, "rewards/margins": 0.15597884356975555, "rewards/rejected": -0.15605571866035461, "step": 9745 }, { "epoch": 6.739972337482711, "grad_norm": 6.220463275909424, "learning_rate": 1.8111264791762718e-05, "log_odds_chosen": 11.119985580444336, "log_odds_ratio": -2.354292882955633e-05, "logits/chosen": 0.07925686240196228, "logits/rejected": 0.08532209694385529, "logps/chosen": -0.00018369583995081484, "logps/rejected": -2.416872978210449, "loss": 0.7845, "nll_loss": 0.1961258053779602, "rewards/accuracies": 1.0, "rewards/chosen": -1.8369584722677246e-05, "rewards/margins": 0.2416689395904541, "rewards/rejected": -0.24168729782104492, "step": 9746 }, { "epoch": 6.740663900414938, "grad_norm": 5.230076313018799, "learning_rate": 1.8107422775472567e-05, "log_odds_chosen": 10.40478229522705, "log_odds_ratio": -0.000254276383202523, "logits/chosen": -0.46239978075027466, "logits/rejected": -0.516649603843689, "logps/chosen": -0.0008920235559344292, "logps/rejected": -2.0291428565979004, "loss": 0.671, "nll_loss": 0.16771256923675537, "rewards/accuracies": 1.0, "rewards/chosen": -8.920235268305987e-05, "rewards/margins": 0.20282509922981262, "rewards/rejected": -0.202914297580719, "step": 9747 }, { "epoch": 6.741355463347165, "grad_norm": 4.9943528175354, "learning_rate": 1.810358075918242e-05, "log_odds_chosen": 11.097500801086426, "log_odds_ratio": -2.419178417767398e-05, "logits/chosen": -0.6011475324630737, "logits/rejected": -0.8357247114181519, "logps/chosen": -0.00037277143565006554, "logps/rejected": -2.7854795455932617, "loss": 0.4733, "nll_loss": 0.11833024770021439, "rewards/accuracies": 1.0, "rewards/chosen": -3.727714283741079e-05, "rewards/margins": 0.2785106599330902, "rewards/rejected": -0.2785479724407196, "step": 9748 }, { "epoch": 6.7420470262793915, "grad_norm": 5.590667724609375, "learning_rate": 1.8099738742892272e-05, "log_odds_chosen": 10.696863174438477, "log_odds_ratio": -3.9032405766192824e-05, "logits/chosen": -0.5773473978042603, "logits/rejected": -0.624853253364563, "logps/chosen": -0.00022468066890724003, "logps/rejected": -2.161128282546997, "loss": 0.7687, "nll_loss": 0.1921810805797577, "rewards/accuracies": 1.0, "rewards/chosen": -2.2468066163128242e-05, "rewards/margins": 0.2160903513431549, "rewards/rejected": -0.21611282229423523, "step": 9749 }, { "epoch": 6.742738589211618, "grad_norm": 7.014741897583008, "learning_rate": 1.809589672660212e-05, "log_odds_chosen": 11.61632251739502, "log_odds_ratio": -6.345485599013045e-05, "logits/chosen": -0.0924762487411499, "logits/rejected": -0.1658192276954651, "logps/chosen": -0.00028687884332612157, "logps/rejected": -2.877070188522339, "loss": 0.6043, "nll_loss": 0.15107500553131104, "rewards/accuracies": 1.0, "rewards/chosen": -2.868788578780368e-05, "rewards/margins": 0.2876783311367035, "rewards/rejected": -0.28770703077316284, "step": 9750 }, { "epoch": 6.743430152143845, "grad_norm": 6.5519890785217285, "learning_rate": 1.8092054710311973e-05, "log_odds_chosen": 10.81260871887207, "log_odds_ratio": -4.300369619159028e-05, "logits/chosen": -0.16680464148521423, "logits/rejected": -0.21911774575710297, "logps/chosen": -0.0001687578478595242, "logps/rejected": -1.9256123304367065, "loss": 0.3878, "nll_loss": 0.09693576395511627, "rewards/accuracies": 1.0, "rewards/chosen": -1.6875783330760896e-05, "rewards/margins": 0.1925443410873413, "rewards/rejected": -0.19256123900413513, "step": 9751 }, { "epoch": 6.744121715076072, "grad_norm": 4.346395015716553, "learning_rate": 1.8088212694021823e-05, "log_odds_chosen": 10.728569030761719, "log_odds_ratio": -4.3420415750006214e-05, "logits/chosen": -0.4695202708244324, "logits/rejected": -0.5058833360671997, "logps/chosen": -0.00018572999397292733, "logps/rejected": -1.950321912765503, "loss": 1.0065, "nll_loss": 0.25160858035087585, "rewards/accuracies": 1.0, "rewards/chosen": -1.857300230767578e-05, "rewards/margins": 0.1950136125087738, "rewards/rejected": -0.19503219425678253, "step": 9752 }, { "epoch": 6.744813278008299, "grad_norm": 7.713948726654053, "learning_rate": 1.8084370677731675e-05, "log_odds_chosen": 10.131799697875977, "log_odds_ratio": -0.00040340382838621736, "logits/chosen": -0.7229098677635193, "logits/rejected": -0.624686062335968, "logps/chosen": -0.00037127750692889094, "logps/rejected": -1.967607855796814, "loss": 0.703, "nll_loss": 0.1756991744041443, "rewards/accuracies": 1.0, "rewards/chosen": -3.712774923769757e-05, "rewards/margins": 0.19672366976737976, "rewards/rejected": -0.19676080346107483, "step": 9753 }, { "epoch": 6.745504840940526, "grad_norm": 6.567865371704102, "learning_rate": 1.8080528661441527e-05, "log_odds_chosen": 9.464198112487793, "log_odds_ratio": -0.00013312677037902176, "logits/chosen": -0.4827428162097931, "logits/rejected": -0.5091724991798401, "logps/chosen": -0.0005617655115202069, "logps/rejected": -1.3640573024749756, "loss": 0.5455, "nll_loss": 0.13636085391044617, "rewards/accuracies": 1.0, "rewards/chosen": -5.617655551759526e-05, "rewards/margins": 0.13634954392910004, "rewards/rejected": -0.13640573620796204, "step": 9754 }, { "epoch": 6.746196403872752, "grad_norm": 6.985016822814941, "learning_rate": 1.8076686645151376e-05, "log_odds_chosen": 9.059370040893555, "log_odds_ratio": -0.007898088544607162, "logits/chosen": -0.5644688606262207, "logits/rejected": -0.6181748509407043, "logps/chosen": -0.003136302111670375, "logps/rejected": -1.3507148027420044, "loss": 0.7475, "nll_loss": 0.18608833849430084, "rewards/accuracies": 1.0, "rewards/chosen": -0.0003136302693746984, "rewards/margins": 0.134757861495018, "rewards/rejected": -0.13507148623466492, "step": 9755 }, { "epoch": 6.746887966804979, "grad_norm": 6.688884258270264, "learning_rate": 1.8072844628861226e-05, "log_odds_chosen": 10.308743476867676, "log_odds_ratio": -7.386624929495156e-05, "logits/chosen": -0.3142358660697937, "logits/rejected": -0.3920065462589264, "logps/chosen": -0.00013700808631256223, "logps/rejected": -1.6693358421325684, "loss": 0.7536, "nll_loss": 0.1883903443813324, "rewards/accuracies": 1.0, "rewards/chosen": -1.3700810086447746e-05, "rewards/margins": 0.16691987216472626, "rewards/rejected": -0.1669335663318634, "step": 9756 }, { "epoch": 6.747579529737206, "grad_norm": 7.96720552444458, "learning_rate": 1.8069002612571078e-05, "log_odds_chosen": 11.322125434875488, "log_odds_ratio": -0.00012096359569113702, "logits/chosen": -0.20771357417106628, "logits/rejected": -0.20665308833122253, "logps/chosen": -0.00015821008128114045, "logps/rejected": -2.490894317626953, "loss": 0.7956, "nll_loss": 0.19889280200004578, "rewards/accuracies": 1.0, "rewards/chosen": -1.5821007764316164e-05, "rewards/margins": 0.24907363951206207, "rewards/rejected": -0.24908944964408875, "step": 9757 }, { "epoch": 6.748271092669433, "grad_norm": 7.730781078338623, "learning_rate": 1.8065160596280927e-05, "log_odds_chosen": 12.252197265625, "log_odds_ratio": -3.502556137391366e-05, "logits/chosen": -0.6151853203773499, "logits/rejected": -0.6173470616340637, "logps/chosen": -0.00014207100321073085, "logps/rejected": -3.05303955078125, "loss": 0.5453, "nll_loss": 0.13632476329803467, "rewards/accuracies": 1.0, "rewards/chosen": -1.4207101230567787e-05, "rewards/margins": 0.30528974533081055, "rewards/rejected": -0.30530399084091187, "step": 9758 }, { "epoch": 6.74896265560166, "grad_norm": 3.3932158946990967, "learning_rate": 1.806131857999078e-05, "log_odds_chosen": 10.762428283691406, "log_odds_ratio": -0.00037677347427234054, "logits/chosen": -0.2462678849697113, "logits/rejected": -0.2845362424850464, "logps/chosen": -0.0009380167466588318, "logps/rejected": -1.9752521514892578, "loss": 0.4398, "nll_loss": 0.10991780459880829, "rewards/accuracies": 1.0, "rewards/chosen": -9.38016819418408e-05, "rewards/margins": 0.19743141531944275, "rewards/rejected": -0.19752521812915802, "step": 9759 }, { "epoch": 6.749654218533887, "grad_norm": 37.30306625366211, "learning_rate": 1.8057476563700632e-05, "log_odds_chosen": 9.855682373046875, "log_odds_ratio": -0.0002588354400359094, "logits/chosen": -0.47376930713653564, "logits/rejected": -0.5016792416572571, "logps/chosen": -0.0004103544051758945, "logps/rejected": -1.791809320449829, "loss": 0.5705, "nll_loss": 0.14260096848011017, "rewards/accuracies": 1.0, "rewards/chosen": -4.103544051758945e-05, "rewards/margins": 0.1791399121284485, "rewards/rejected": -0.17918093502521515, "step": 9760 }, { "epoch": 6.750345781466113, "grad_norm": 9.75047779083252, "learning_rate": 1.805363454741048e-05, "log_odds_chosen": 10.098825454711914, "log_odds_ratio": -0.0006348791648633778, "logits/chosen": -0.4901638627052307, "logits/rejected": -0.46601709723472595, "logps/chosen": -0.0002679343451745808, "logps/rejected": -1.9539092779159546, "loss": 0.5457, "nll_loss": 0.13636308908462524, "rewards/accuracies": 1.0, "rewards/chosen": -2.679343378986232e-05, "rewards/margins": 0.19536414742469788, "rewards/rejected": -0.19539092481136322, "step": 9761 }, { "epoch": 6.75103734439834, "grad_norm": 4.300050735473633, "learning_rate": 1.8049792531120333e-05, "log_odds_chosen": 10.523996353149414, "log_odds_ratio": -3.409176861168817e-05, "logits/chosen": -0.4422757625579834, "logits/rejected": -0.513489842414856, "logps/chosen": -0.00012512198009062558, "logps/rejected": -1.5583089590072632, "loss": 0.395, "nll_loss": 0.09874957799911499, "rewards/accuracies": 1.0, "rewards/chosen": -1.2512197827163618e-05, "rewards/margins": 0.1558183878660202, "rewards/rejected": -0.15583088994026184, "step": 9762 }, { "epoch": 6.751728907330567, "grad_norm": 4.803183555603027, "learning_rate": 1.8045950514830186e-05, "log_odds_chosen": 9.5350341796875, "log_odds_ratio": -0.0017939151730388403, "logits/chosen": -0.3713359832763672, "logits/rejected": -0.5139442086219788, "logps/chosen": -0.0018062122398987412, "logps/rejected": -2.1577553749084473, "loss": 0.5588, "nll_loss": 0.13950826227664948, "rewards/accuracies": 1.0, "rewards/chosen": -0.00018062123854178935, "rewards/margins": 0.21559491753578186, "rewards/rejected": -0.21577554941177368, "step": 9763 }, { "epoch": 6.752420470262794, "grad_norm": 7.823028564453125, "learning_rate": 1.8042108498540035e-05, "log_odds_chosen": 11.513077735900879, "log_odds_ratio": -2.8428832592908293e-05, "logits/chosen": -0.1352018564939499, "logits/rejected": -0.24655655026435852, "logps/chosen": -0.00032958845258690417, "logps/rejected": -3.1851940155029297, "loss": 0.5832, "nll_loss": 0.14579999446868896, "rewards/accuracies": 1.0, "rewards/chosen": -3.29588474414777e-05, "rewards/margins": 0.31848645210266113, "rewards/rejected": -0.3185194134712219, "step": 9764 }, { "epoch": 6.753112033195021, "grad_norm": 7.303538799285889, "learning_rate": 1.8038266482249884e-05, "log_odds_chosen": 10.897727966308594, "log_odds_ratio": -0.0002780243812594563, "logits/chosen": -0.24842569231987, "logits/rejected": -0.3929397463798523, "logps/chosen": -0.0001870131236501038, "logps/rejected": -1.847274661064148, "loss": 0.5453, "nll_loss": 0.13629432022571564, "rewards/accuracies": 1.0, "rewards/chosen": -1.8701313820201904e-05, "rewards/margins": 0.18470877408981323, "rewards/rejected": -0.1847274750471115, "step": 9765 }, { "epoch": 6.753803596127248, "grad_norm": 8.309207916259766, "learning_rate": 1.8034424465959736e-05, "log_odds_chosen": 11.285194396972656, "log_odds_ratio": -7.014050788711756e-05, "logits/chosen": -0.5131502151489258, "logits/rejected": -0.5522158145904541, "logps/chosen": -0.0004909878480248153, "logps/rejected": -2.363844394683838, "loss": 0.9031, "nll_loss": 0.2257768213748932, "rewards/accuracies": 1.0, "rewards/chosen": -4.9098791350843385e-05, "rewards/margins": 0.23633533716201782, "rewards/rejected": -0.23638442158699036, "step": 9766 }, { "epoch": 6.754495159059474, "grad_norm": 5.041337013244629, "learning_rate": 1.8030582449669585e-05, "log_odds_chosen": 10.607772827148438, "log_odds_ratio": -7.603943231515586e-05, "logits/chosen": 0.008264736272394657, "logits/rejected": -0.07102751731872559, "logps/chosen": -0.00019035846344195306, "logps/rejected": -2.222433567047119, "loss": 0.4561, "nll_loss": 0.1140083372592926, "rewards/accuracies": 1.0, "rewards/chosen": -1.903584779938683e-05, "rewards/margins": 0.22222432494163513, "rewards/rejected": -0.22224338352680206, "step": 9767 }, { "epoch": 6.755186721991701, "grad_norm": 7.952573776245117, "learning_rate": 1.8026740433379438e-05, "log_odds_chosen": 10.102436065673828, "log_odds_ratio": -7.308281783480197e-05, "logits/chosen": -0.2653353810310364, "logits/rejected": -0.30025357007980347, "logps/chosen": -0.0003470160299912095, "logps/rejected": -1.922732949256897, "loss": 0.957, "nll_loss": 0.2392345666885376, "rewards/accuracies": 1.0, "rewards/chosen": -3.470160299912095e-05, "rewards/margins": 0.19223859906196594, "rewards/rejected": -0.1922733038663864, "step": 9768 }, { "epoch": 6.755878284923928, "grad_norm": 5.763984680175781, "learning_rate": 1.802289841708929e-05, "log_odds_chosen": 11.691999435424805, "log_odds_ratio": -5.55334409000352e-05, "logits/chosen": -0.6231212615966797, "logits/rejected": -0.7192140221595764, "logps/chosen": -0.0001963993563549593, "logps/rejected": -2.478055238723755, "loss": 0.4841, "nll_loss": 0.12102922052145004, "rewards/accuracies": 1.0, "rewards/chosen": -1.9639934180304408e-05, "rewards/margins": 0.24778589606285095, "rewards/rejected": -0.24780553579330444, "step": 9769 }, { "epoch": 6.756569847856155, "grad_norm": 6.567784786224365, "learning_rate": 1.801905640079914e-05, "log_odds_chosen": 11.372642517089844, "log_odds_ratio": -5.993260128889233e-05, "logits/chosen": -0.2392624318599701, "logits/rejected": -0.3215843439102173, "logps/chosen": -0.0003123276983387768, "logps/rejected": -2.6074600219726562, "loss": 0.53, "nll_loss": 0.1324916034936905, "rewards/accuracies": 1.0, "rewards/chosen": -3.123277201666497e-05, "rewards/margins": 0.2607147693634033, "rewards/rejected": -0.2607460021972656, "step": 9770 }, { "epoch": 6.757261410788382, "grad_norm": 4.615629196166992, "learning_rate": 1.8015214384508992e-05, "log_odds_chosen": 10.940568923950195, "log_odds_ratio": -6.288071745075285e-05, "logits/chosen": -0.20645862817764282, "logits/rejected": -0.4057818055152893, "logps/chosen": -0.0003123377973679453, "logps/rejected": -1.8413560390472412, "loss": 0.3406, "nll_loss": 0.0851416140794754, "rewards/accuracies": 1.0, "rewards/chosen": -3.1233776098815724e-05, "rewards/margins": 0.18410436809062958, "rewards/rejected": -0.18413560092449188, "step": 9771 }, { "epoch": 6.7579529737206085, "grad_norm": 7.441464424133301, "learning_rate": 1.8011372368218844e-05, "log_odds_chosen": 10.164894104003906, "log_odds_ratio": -0.0005850759916938841, "logits/chosen": 0.008430100977420807, "logits/rejected": -0.18595150113105774, "logps/chosen": -0.0007026636158116162, "logps/rejected": -2.359257221221924, "loss": 0.6796, "nll_loss": 0.169847771525383, "rewards/accuracies": 1.0, "rewards/chosen": -7.026636740192771e-05, "rewards/margins": 0.23585546016693115, "rewards/rejected": -0.23592573404312134, "step": 9772 }, { "epoch": 6.758644536652835, "grad_norm": 6.9098310470581055, "learning_rate": 1.8007530351928693e-05, "log_odds_chosen": 10.903366088867188, "log_odds_ratio": -3.1184124964056537e-05, "logits/chosen": -0.11911733448505402, "logits/rejected": -0.18479466438293457, "logps/chosen": -0.00035307067446410656, "logps/rejected": -2.7793569564819336, "loss": 0.4868, "nll_loss": 0.12170146405696869, "rewards/accuracies": 1.0, "rewards/chosen": -3.530706817400642e-05, "rewards/margins": 0.2779003977775574, "rewards/rejected": -0.2779357135295868, "step": 9773 }, { "epoch": 6.759336099585062, "grad_norm": 7.216391086578369, "learning_rate": 1.8003688335638542e-05, "log_odds_chosen": 9.6553373336792, "log_odds_ratio": -0.0013119232608005404, "logits/chosen": -0.9779117107391357, "logits/rejected": -0.9227054119110107, "logps/chosen": -0.0010267137549817562, "logps/rejected": -1.4132969379425049, "loss": 0.608, "nll_loss": 0.15186795592308044, "rewards/accuracies": 1.0, "rewards/chosen": -0.0001026713871397078, "rewards/margins": 0.14122702181339264, "rewards/rejected": -0.14132969081401825, "step": 9774 }, { "epoch": 6.760027662517289, "grad_norm": 4.36707067489624, "learning_rate": 1.7999846319348395e-05, "log_odds_chosen": 10.784286499023438, "log_odds_ratio": -4.411122063174844e-05, "logits/chosen": -0.5529571771621704, "logits/rejected": -0.5241627097129822, "logps/chosen": -0.0001336929271928966, "logps/rejected": -1.6788662672042847, "loss": 0.2466, "nll_loss": 0.061649952083826065, "rewards/accuracies": 1.0, "rewards/chosen": -1.3369293810683303e-05, "rewards/margins": 0.16787324845790863, "rewards/rejected": -0.1678866297006607, "step": 9775 }, { "epoch": 6.760719225449516, "grad_norm": 5.725052833557129, "learning_rate": 1.7996004303058244e-05, "log_odds_chosen": 10.938990592956543, "log_odds_ratio": -0.0004287226765882224, "logits/chosen": -0.285929411649704, "logits/rejected": -0.36556243896484375, "logps/chosen": -0.0005001586396247149, "logps/rejected": -2.315542697906494, "loss": 0.6023, "nll_loss": 0.1505252718925476, "rewards/accuracies": 1.0, "rewards/chosen": -5.001586032449268e-05, "rewards/margins": 0.23150423169136047, "rewards/rejected": -0.23155425488948822, "step": 9776 }, { "epoch": 6.761410788381743, "grad_norm": 7.261622428894043, "learning_rate": 1.7992162286768096e-05, "log_odds_chosen": 9.896255493164062, "log_odds_ratio": -0.00038533323095180094, "logits/chosen": -0.1569298803806305, "logits/rejected": -0.22647440433502197, "logps/chosen": -0.0006728830048814416, "logps/rejected": -2.288208484649658, "loss": 0.7874, "nll_loss": 0.1968180537223816, "rewards/accuracies": 1.0, "rewards/chosen": -6.72883033985272e-05, "rewards/margins": 0.22875356674194336, "rewards/rejected": -0.22882086038589478, "step": 9777 }, { "epoch": 6.7621023513139695, "grad_norm": 5.106160640716553, "learning_rate": 1.798832027047795e-05, "log_odds_chosen": 10.562328338623047, "log_odds_ratio": -0.00016904577205423266, "logits/chosen": -0.25822728872299194, "logits/rejected": -0.3803952932357788, "logps/chosen": -0.0005223815096542239, "logps/rejected": -2.2937982082366943, "loss": 0.5959, "nll_loss": 0.1489495187997818, "rewards/accuracies": 1.0, "rewards/chosen": -5.2238152420613915e-05, "rewards/margins": 0.22932758927345276, "rewards/rejected": -0.2293798327445984, "step": 9778 }, { "epoch": 6.762793914246196, "grad_norm": 9.766763687133789, "learning_rate": 1.7984478254187798e-05, "log_odds_chosen": 9.80665397644043, "log_odds_ratio": -0.00023903910187073052, "logits/chosen": -0.6199896335601807, "logits/rejected": -0.6468402147293091, "logps/chosen": -0.0003940025926567614, "logps/rejected": -1.1876604557037354, "loss": 0.4167, "nll_loss": 0.1041593849658966, "rewards/accuracies": 1.0, "rewards/chosen": -3.9400256355293095e-05, "rewards/margins": 0.11872664093971252, "rewards/rejected": -0.11876604706048965, "step": 9779 }, { "epoch": 6.763485477178423, "grad_norm": 5.44463586807251, "learning_rate": 1.798063623789765e-05, "log_odds_chosen": 9.514091491699219, "log_odds_ratio": -0.00021153160196263343, "logits/chosen": -0.4803120791912079, "logits/rejected": -0.5570478439331055, "logps/chosen": -0.0007655763765797019, "logps/rejected": -2.086155891418457, "loss": 0.8381, "nll_loss": 0.20949587225914001, "rewards/accuracies": 1.0, "rewards/chosen": -7.655764056835324e-05, "rewards/margins": 0.20853903889656067, "rewards/rejected": -0.20861557126045227, "step": 9780 }, { "epoch": 6.76417704011065, "grad_norm": 8.110411643981934, "learning_rate": 1.7976794221607503e-05, "log_odds_chosen": 10.88216781616211, "log_odds_ratio": -6.38460842310451e-05, "logits/chosen": -0.09918743371963501, "logits/rejected": -0.13283143937587738, "logps/chosen": -0.0002480775583535433, "logps/rejected": -2.291240692138672, "loss": 0.7315, "nll_loss": 0.1828564554452896, "rewards/accuracies": 1.0, "rewards/chosen": -2.4807755835354328e-05, "rewards/margins": 0.22909927368164062, "rewards/rejected": -0.22912408411502838, "step": 9781 }, { "epoch": 6.764868603042877, "grad_norm": 4.628012657165527, "learning_rate": 1.7972952205317352e-05, "log_odds_chosen": 10.212989807128906, "log_odds_ratio": -6.754022615496069e-05, "logits/chosen": -0.4950890839099884, "logits/rejected": -0.5523540377616882, "logps/chosen": -0.00018165886285714805, "logps/rejected": -1.4461414813995361, "loss": 0.4622, "nll_loss": 0.1155511885881424, "rewards/accuracies": 1.0, "rewards/chosen": -1.8165887013310567e-05, "rewards/margins": 0.14459598064422607, "rewards/rejected": -0.14461416006088257, "step": 9782 }, { "epoch": 6.765560165975104, "grad_norm": 6.216654300689697, "learning_rate": 1.79691101890272e-05, "log_odds_chosen": 11.898232460021973, "log_odds_ratio": -1.2801835509890225e-05, "logits/chosen": -0.22902534902095795, "logits/rejected": -0.3668055534362793, "logps/chosen": -0.00017746177036315203, "logps/rejected": -3.097951889038086, "loss": 0.668, "nll_loss": 0.16699756681919098, "rewards/accuracies": 1.0, "rewards/chosen": -1.77461752173258e-05, "rewards/margins": 0.30977746844291687, "rewards/rejected": -0.30979520082473755, "step": 9783 }, { "epoch": 6.7662517289073305, "grad_norm": 4.292337417602539, "learning_rate": 1.7965268172737053e-05, "log_odds_chosen": 12.043815612792969, "log_odds_ratio": -8.21490084490506e-06, "logits/chosen": -0.5467915534973145, "logits/rejected": -0.5668094158172607, "logps/chosen": -0.00013092358130961657, "logps/rejected": -2.8281164169311523, "loss": 0.4635, "nll_loss": 0.11587396264076233, "rewards/accuracies": 1.0, "rewards/chosen": -1.3092359040456358e-05, "rewards/margins": 0.28279852867126465, "rewards/rejected": -0.28281164169311523, "step": 9784 }, { "epoch": 6.766943291839557, "grad_norm": 6.525885581970215, "learning_rate": 1.7961426156446902e-05, "log_odds_chosen": 11.126798629760742, "log_odds_ratio": -5.15446845383849e-05, "logits/chosen": -0.13213799893856049, "logits/rejected": -0.13622528314590454, "logps/chosen": -0.00017841748194769025, "logps/rejected": -2.4514002799987793, "loss": 0.5827, "nll_loss": 0.145666241645813, "rewards/accuracies": 1.0, "rewards/chosen": -1.7841746739577502e-05, "rewards/margins": 0.24512219429016113, "rewards/rejected": -0.24514003098011017, "step": 9785 }, { "epoch": 6.767634854771784, "grad_norm": 6.094909191131592, "learning_rate": 1.7957584140156755e-05, "log_odds_chosen": 11.047687530517578, "log_odds_ratio": -3.2891894079511985e-05, "logits/chosen": -0.5635808110237122, "logits/rejected": -0.533679723739624, "logps/chosen": -0.0001351062091998756, "logps/rejected": -2.1552696228027344, "loss": 0.3764, "nll_loss": 0.09409818053245544, "rewards/accuracies": 1.0, "rewards/chosen": -1.35106211018865e-05, "rewards/margins": 0.2155134528875351, "rewards/rejected": -0.21552695333957672, "step": 9786 }, { "epoch": 6.768326417704011, "grad_norm": 4.369130611419678, "learning_rate": 1.7953742123866607e-05, "log_odds_chosen": 10.320816993713379, "log_odds_ratio": -6.77104399073869e-05, "logits/chosen": -0.5601058006286621, "logits/rejected": -0.6135123372077942, "logps/chosen": -0.00018023433221969754, "logps/rejected": -1.5359973907470703, "loss": 0.3982, "nll_loss": 0.09954757988452911, "rewards/accuracies": 1.0, "rewards/chosen": -1.8023432858171873e-05, "rewards/margins": 0.15358170866966248, "rewards/rejected": -0.15359973907470703, "step": 9787 }, { "epoch": 6.769017980636238, "grad_norm": 6.298321723937988, "learning_rate": 1.7949900107576456e-05, "log_odds_chosen": 10.307516098022461, "log_odds_ratio": -0.00014691260003019124, "logits/chosen": -0.3807259500026703, "logits/rejected": -0.4523041248321533, "logps/chosen": -0.000674390175845474, "logps/rejected": -2.2402751445770264, "loss": 0.4117, "nll_loss": 0.1029200330376625, "rewards/accuracies": 1.0, "rewards/chosen": -6.743902486050501e-05, "rewards/margins": 0.22396008670330048, "rewards/rejected": -0.22402751445770264, "step": 9788 }, { "epoch": 6.769709543568465, "grad_norm": 15.953858375549316, "learning_rate": 1.794605809128631e-05, "log_odds_chosen": 11.054325103759766, "log_odds_ratio": -4.443394936970435e-05, "logits/chosen": -0.3734176456928253, "logits/rejected": -0.36586546897888184, "logps/chosen": -0.0002746654790826142, "logps/rejected": -2.239257335662842, "loss": 0.4929, "nll_loss": 0.12322264909744263, "rewards/accuracies": 1.0, "rewards/chosen": -2.7466550818644464e-05, "rewards/margins": 0.2238982617855072, "rewards/rejected": -0.22392573952674866, "step": 9789 }, { "epoch": 6.7704011065006915, "grad_norm": 5.373976707458496, "learning_rate": 1.794221607499616e-05, "log_odds_chosen": 10.213509559631348, "log_odds_ratio": -0.00034472710103727877, "logits/chosen": -0.10134115070104599, "logits/rejected": -0.12651267647743225, "logps/chosen": -0.00033351409365423024, "logps/rejected": -2.3881418704986572, "loss": 0.8267, "nll_loss": 0.2066420614719391, "rewards/accuracies": 1.0, "rewards/chosen": -3.3351410820614547e-05, "rewards/margins": 0.23878082633018494, "rewards/rejected": -0.23881417512893677, "step": 9790 }, { "epoch": 6.771092669432918, "grad_norm": 7.270600318908691, "learning_rate": 1.793837405870601e-05, "log_odds_chosen": 10.21059799194336, "log_odds_ratio": -0.0002538753324188292, "logits/chosen": -0.5336205363273621, "logits/rejected": -0.5699590444564819, "logps/chosen": -0.0002814323815982789, "logps/rejected": -1.7851035594940186, "loss": 0.6582, "nll_loss": 0.16453300416469574, "rewards/accuracies": 1.0, "rewards/chosen": -2.8143236704636365e-05, "rewards/margins": 0.17848220467567444, "rewards/rejected": -0.17851035296916962, "step": 9791 }, { "epoch": 6.771784232365145, "grad_norm": 6.971714019775391, "learning_rate": 1.793453204241586e-05, "log_odds_chosen": 10.254987716674805, "log_odds_ratio": -0.0001944910327438265, "logits/chosen": -0.4201323688030243, "logits/rejected": -0.5163047909736633, "logps/chosen": -0.0008295466541312635, "logps/rejected": -1.9266215562820435, "loss": 0.7876, "nll_loss": 0.1968696117401123, "rewards/accuracies": 1.0, "rewards/chosen": -8.29546625027433e-05, "rewards/margins": 0.19257919490337372, "rewards/rejected": -0.19266214966773987, "step": 9792 }, { "epoch": 6.772475795297372, "grad_norm": 4.725278377532959, "learning_rate": 1.7930690026125712e-05, "log_odds_chosen": 9.815740585327148, "log_odds_ratio": -0.0001148254523286596, "logits/chosen": -0.10961273312568665, "logits/rejected": -0.21037688851356506, "logps/chosen": -0.0002081097918562591, "logps/rejected": -1.2197327613830566, "loss": 0.4787, "nll_loss": 0.11967021226882935, "rewards/accuracies": 1.0, "rewards/chosen": -2.081097954942379e-05, "rewards/margins": 0.12195246666669846, "rewards/rejected": -0.12197329103946686, "step": 9793 }, { "epoch": 6.773167358229599, "grad_norm": 10.813013076782227, "learning_rate": 1.792684800983556e-05, "log_odds_chosen": 10.976402282714844, "log_odds_ratio": -4.5033513742964715e-05, "logits/chosen": -0.6192176342010498, "logits/rejected": -0.8035860657691956, "logps/chosen": -0.00024157708685379475, "logps/rejected": -2.56021785736084, "loss": 0.6271, "nll_loss": 0.1567803919315338, "rewards/accuracies": 1.0, "rewards/chosen": -2.4157707230187953e-05, "rewards/margins": 0.2559976279735565, "rewards/rejected": -0.25602179765701294, "step": 9794 }, { "epoch": 6.773858921161826, "grad_norm": 5.538814544677734, "learning_rate": 1.7923005993545413e-05, "log_odds_chosen": 10.855127334594727, "log_odds_ratio": -3.1160849175648764e-05, "logits/chosen": -0.7526274919509888, "logits/rejected": -0.7859461903572083, "logps/chosen": -0.00010621797264320776, "logps/rejected": -1.528001070022583, "loss": 0.4439, "nll_loss": 0.11098209023475647, "rewards/accuracies": 1.0, "rewards/chosen": -1.0621797628118657e-05, "rewards/margins": 0.15278948843479156, "rewards/rejected": -0.15280009806156158, "step": 9795 }, { "epoch": 6.7745504840940525, "grad_norm": 5.904871463775635, "learning_rate": 1.7919163977255266e-05, "log_odds_chosen": 9.980600357055664, "log_odds_ratio": -0.00027487872284837067, "logits/chosen": -0.48689329624176025, "logits/rejected": -0.42207589745521545, "logps/chosen": -0.0005123027949593961, "logps/rejected": -1.6725163459777832, "loss": 0.6245, "nll_loss": 0.15609855949878693, "rewards/accuracies": 1.0, "rewards/chosen": -5.1230275857960805e-05, "rewards/margins": 0.16720040142536163, "rewards/rejected": -0.16725163161754608, "step": 9796 }, { "epoch": 6.775242047026279, "grad_norm": 9.97628116607666, "learning_rate": 1.7915321960965115e-05, "log_odds_chosen": 11.540143966674805, "log_odds_ratio": -2.4471966753480956e-05, "logits/chosen": -0.5073536038398743, "logits/rejected": -0.6288943290710449, "logps/chosen": -0.00042590327211655676, "logps/rejected": -2.9150373935699463, "loss": 0.4392, "nll_loss": 0.10978897660970688, "rewards/accuracies": 1.0, "rewards/chosen": -4.2590330849634483e-05, "rewards/margins": 0.29146116971969604, "rewards/rejected": -0.29150375723838806, "step": 9797 }, { "epoch": 6.775933609958506, "grad_norm": 5.156939506530762, "learning_rate": 1.7911479944674967e-05, "log_odds_chosen": 9.476211547851562, "log_odds_ratio": -0.00015527091454714537, "logits/chosen": -0.17368683218955994, "logits/rejected": -0.3279064893722534, "logps/chosen": -0.000656230200547725, "logps/rejected": -2.053809881210327, "loss": 0.5374, "nll_loss": 0.13434290885925293, "rewards/accuracies": 1.0, "rewards/chosen": -6.562301859958097e-05, "rewards/margins": 0.20531539618968964, "rewards/rejected": -0.20538100600242615, "step": 9798 }, { "epoch": 6.776625172890733, "grad_norm": 4.439167022705078, "learning_rate": 1.790763792838482e-05, "log_odds_chosen": 10.210926055908203, "log_odds_ratio": -0.0001035605018842034, "logits/chosen": -0.5380820631980896, "logits/rejected": -0.5222083330154419, "logps/chosen": -0.0005264157080091536, "logps/rejected": -1.592585802078247, "loss": 0.5461, "nll_loss": 0.1365230679512024, "rewards/accuracies": 1.0, "rewards/chosen": -5.264157152851112e-05, "rewards/margins": 0.15920594334602356, "rewards/rejected": -0.15925857424736023, "step": 9799 }, { "epoch": 6.77731673582296, "grad_norm": 6.291228771209717, "learning_rate": 1.790379591209467e-05, "log_odds_chosen": 10.743213653564453, "log_odds_ratio": -0.0011980285635218024, "logits/chosen": -0.4152224063873291, "logits/rejected": -0.40008097887039185, "logps/chosen": -0.000732260406948626, "logps/rejected": -2.3373422622680664, "loss": 0.7109, "nll_loss": 0.1776098906993866, "rewards/accuracies": 1.0, "rewards/chosen": -7.322603778447956e-05, "rewards/margins": 0.2336609959602356, "rewards/rejected": -0.23373422026634216, "step": 9800 }, { "epoch": 6.778008298755187, "grad_norm": 10.946796417236328, "learning_rate": 1.7899953895804518e-05, "log_odds_chosen": 9.949090957641602, "log_odds_ratio": -0.00010966783884214237, "logits/chosen": -0.6908112168312073, "logits/rejected": -0.7527514696121216, "logps/chosen": -0.00016632323968224227, "logps/rejected": -1.3433506488800049, "loss": 1.3295, "nll_loss": 0.3323700428009033, "rewards/accuracies": 1.0, "rewards/chosen": -1.6632324332022108e-05, "rewards/margins": 0.13431844115257263, "rewards/rejected": -0.13433507084846497, "step": 9801 }, { "epoch": 6.7786998616874135, "grad_norm": 10.57618522644043, "learning_rate": 1.789611187951437e-05, "log_odds_chosen": 11.234428405761719, "log_odds_ratio": -2.9399394406937063e-05, "logits/chosen": -0.23678916692733765, "logits/rejected": -0.2845824360847473, "logps/chosen": -0.00042127168853767216, "logps/rejected": -2.484922170639038, "loss": 0.6881, "nll_loss": 0.17201298475265503, "rewards/accuracies": 1.0, "rewards/chosen": -4.212717612972483e-05, "rewards/margins": 0.24845010042190552, "rewards/rejected": -0.24849221110343933, "step": 9802 }, { "epoch": 6.77939142461964, "grad_norm": 4.66774845123291, "learning_rate": 1.789226986322422e-05, "log_odds_chosen": 10.529937744140625, "log_odds_ratio": -0.00010805519559653476, "logits/chosen": -0.5381174683570862, "logits/rejected": -0.5654388666152954, "logps/chosen": -0.0003065310011152178, "logps/rejected": -2.255186080932617, "loss": 0.6306, "nll_loss": 0.15763989090919495, "rewards/accuracies": 1.0, "rewards/chosen": -3.065310011152178e-05, "rewards/margins": 0.22548796236515045, "rewards/rejected": -0.2255186140537262, "step": 9803 }, { "epoch": 6.780082987551867, "grad_norm": 7.76448917388916, "learning_rate": 1.788842784693407e-05, "log_odds_chosen": 10.768404960632324, "log_odds_ratio": -3.6421581171453e-05, "logits/chosen": -0.45603862404823303, "logits/rejected": -0.5468306541442871, "logps/chosen": -0.00010872337588807568, "logps/rejected": -1.5940682888031006, "loss": 0.4035, "nll_loss": 0.10087916254997253, "rewards/accuracies": 1.0, "rewards/chosen": -1.0872337952605449e-05, "rewards/margins": 0.1593959629535675, "rewards/rejected": -0.159406840801239, "step": 9804 }, { "epoch": 6.780774550484094, "grad_norm": 4.876262664794922, "learning_rate": 1.7884585830643924e-05, "log_odds_chosen": 11.440156936645508, "log_odds_ratio": -4.563136462820694e-05, "logits/chosen": -0.6698880195617676, "logits/rejected": -0.6494791507720947, "logps/chosen": -0.00046217741328291595, "logps/rejected": -2.974748373031616, "loss": 0.3709, "nll_loss": 0.09273214638233185, "rewards/accuracies": 1.0, "rewards/chosen": -4.62177449662704e-05, "rewards/margins": 0.29742860794067383, "rewards/rejected": -0.29747486114501953, "step": 9805 }, { "epoch": 6.781466113416321, "grad_norm": 6.153312683105469, "learning_rate": 1.7880743814353773e-05, "log_odds_chosen": 10.653793334960938, "log_odds_ratio": -7.533111056545749e-05, "logits/chosen": -0.46784037351608276, "logits/rejected": -0.522236704826355, "logps/chosen": -0.00014894736523274332, "logps/rejected": -1.5492587089538574, "loss": 0.8613, "nll_loss": 0.21532396972179413, "rewards/accuracies": 1.0, "rewards/chosen": -1.4894736523274332e-05, "rewards/margins": 0.15491099655628204, "rewards/rejected": -0.1549258828163147, "step": 9806 }, { "epoch": 6.782157676348548, "grad_norm": 7.5202317237854, "learning_rate": 1.7876901798063626e-05, "log_odds_chosen": 10.069587707519531, "log_odds_ratio": -9.036479605128989e-05, "logits/chosen": -0.14121729135513306, "logits/rejected": -0.23690003156661987, "logps/chosen": -0.00048229345702566206, "logps/rejected": -2.1249752044677734, "loss": 0.4922, "nll_loss": 0.12303020805120468, "rewards/accuracies": 1.0, "rewards/chosen": -4.822934715775773e-05, "rewards/margins": 0.21244929730892181, "rewards/rejected": -0.2124975323677063, "step": 9807 }, { "epoch": 6.782849239280774, "grad_norm": 5.495734214782715, "learning_rate": 1.7873059781773478e-05, "log_odds_chosen": 11.376768112182617, "log_odds_ratio": -5.383559982874431e-05, "logits/chosen": -0.3966137766838074, "logits/rejected": -0.49125686287879944, "logps/chosen": -0.00015656169853173196, "logps/rejected": -1.8736109733581543, "loss": 0.5997, "nll_loss": 0.1499200463294983, "rewards/accuracies": 1.0, "rewards/chosen": -1.5656172763556242e-05, "rewards/margins": 0.1873454451560974, "rewards/rejected": -0.18736110627651215, "step": 9808 }, { "epoch": 6.783540802213001, "grad_norm": 4.648586273193359, "learning_rate": 1.7869217765483327e-05, "log_odds_chosen": 10.439985275268555, "log_odds_ratio": -0.00021521994494833052, "logits/chosen": -0.5451334118843079, "logits/rejected": -0.4855062961578369, "logps/chosen": -0.0002363657986279577, "logps/rejected": -1.9440194368362427, "loss": 0.2804, "nll_loss": 0.07008212804794312, "rewards/accuracies": 1.0, "rewards/chosen": -2.363657949899789e-05, "rewards/margins": 0.19437828660011292, "rewards/rejected": -0.19440194964408875, "step": 9809 }, { "epoch": 6.784232365145228, "grad_norm": 6.852433681488037, "learning_rate": 1.7865375749193176e-05, "log_odds_chosen": 10.628756523132324, "log_odds_ratio": -5.7108372857328504e-05, "logits/chosen": -0.18069210648536682, "logits/rejected": -0.1497456133365631, "logps/chosen": -0.0002983055601362139, "logps/rejected": -2.0635130405426025, "loss": 0.6769, "nll_loss": 0.16920895874500275, "rewards/accuracies": 1.0, "rewards/chosen": -2.983055674121715e-05, "rewards/margins": 0.20632147789001465, "rewards/rejected": -0.20635131001472473, "step": 9810 }, { "epoch": 6.784923928077455, "grad_norm": 7.898940563201904, "learning_rate": 1.786153373290303e-05, "log_odds_chosen": 9.900899887084961, "log_odds_ratio": -8.019372762646526e-05, "logits/chosen": -0.499287873506546, "logits/rejected": -0.6086374521255493, "logps/chosen": -0.0004171186883468181, "logps/rejected": -1.8263578414916992, "loss": 0.8046, "nll_loss": 0.20114830136299133, "rewards/accuracies": 1.0, "rewards/chosen": -4.171186810708605e-05, "rewards/margins": 0.18259406089782715, "rewards/rejected": -0.18263578414916992, "step": 9811 }, { "epoch": 6.785615491009682, "grad_norm": 14.485651016235352, "learning_rate": 1.7857691716612878e-05, "log_odds_chosen": 10.63548469543457, "log_odds_ratio": -4.96716565976385e-05, "logits/chosen": -0.5968855619430542, "logits/rejected": -0.548457682132721, "logps/chosen": -0.0004951007431373, "logps/rejected": -2.177253246307373, "loss": 0.5719, "nll_loss": 0.1429726630449295, "rewards/accuracies": 1.0, "rewards/chosen": -4.9510075768921524e-05, "rewards/margins": 0.2176758348941803, "rewards/rejected": -0.21772533655166626, "step": 9812 }, { "epoch": 6.786307053941909, "grad_norm": 7.115420818328857, "learning_rate": 1.785384970032273e-05, "log_odds_chosen": 11.811935424804688, "log_odds_ratio": -0.00014342159556690603, "logits/chosen": -0.2895018458366394, "logits/rejected": -0.3131714463233948, "logps/chosen": -0.00030310984584502876, "logps/rejected": -3.217440128326416, "loss": 0.7929, "nll_loss": 0.19821369647979736, "rewards/accuracies": 1.0, "rewards/chosen": -3.0310988222481683e-05, "rewards/margins": 0.3217136859893799, "rewards/rejected": -0.3217439651489258, "step": 9813 }, { "epoch": 6.786998616874135, "grad_norm": 8.736668586730957, "learning_rate": 1.7850007684032583e-05, "log_odds_chosen": 11.872312545776367, "log_odds_ratio": -1.9381395759410225e-05, "logits/chosen": -0.7531395554542542, "logits/rejected": -0.8392912745475769, "logps/chosen": -9.647270053392276e-05, "logps/rejected": -2.450575351715088, "loss": 0.5367, "nll_loss": 0.13417330384254456, "rewards/accuracies": 1.0, "rewards/chosen": -9.647269507695455e-06, "rewards/margins": 0.2450478971004486, "rewards/rejected": -0.24505755305290222, "step": 9814 }, { "epoch": 6.787690179806362, "grad_norm": 5.51792049407959, "learning_rate": 1.784616566774243e-05, "log_odds_chosen": 9.964544296264648, "log_odds_ratio": -8.551901555620134e-05, "logits/chosen": -0.19018104672431946, "logits/rejected": -0.35868245363235474, "logps/chosen": -0.00032383183133788407, "logps/rejected": -1.7374075651168823, "loss": 0.607, "nll_loss": 0.15173201262950897, "rewards/accuracies": 1.0, "rewards/chosen": -3.238318458897993e-05, "rewards/margins": 0.17370837926864624, "rewards/rejected": -0.17374074459075928, "step": 9815 }, { "epoch": 6.788381742738589, "grad_norm": 4.665560722351074, "learning_rate": 1.7842323651452284e-05, "log_odds_chosen": 9.851408004760742, "log_odds_ratio": -0.0008542541763745248, "logits/chosen": -0.43791818618774414, "logits/rejected": -0.44907528162002563, "logps/chosen": -0.0005597654380835593, "logps/rejected": -1.7041947841644287, "loss": 0.5862, "nll_loss": 0.14647680521011353, "rewards/accuracies": 1.0, "rewards/chosen": -5.597654671873897e-05, "rewards/margins": 0.17036350071430206, "rewards/rejected": -0.17041948437690735, "step": 9816 }, { "epoch": 6.789073305670816, "grad_norm": 8.863218307495117, "learning_rate": 1.7838481635162133e-05, "log_odds_chosen": 10.806852340698242, "log_odds_ratio": -7.586018909933046e-05, "logits/chosen": -0.3525431454181671, "logits/rejected": -0.44882509112358093, "logps/chosen": -0.0006463914178311825, "logps/rejected": -2.4669196605682373, "loss": 0.5583, "nll_loss": 0.1395639330148697, "rewards/accuracies": 1.0, "rewards/chosen": -6.463914178311825e-05, "rewards/margins": 0.2466273456811905, "rewards/rejected": -0.2466919869184494, "step": 9817 }, { "epoch": 6.789764868603043, "grad_norm": 4.653459072113037, "learning_rate": 1.7834639618871986e-05, "log_odds_chosen": 11.221488952636719, "log_odds_ratio": -2.0016068447148427e-05, "logits/chosen": -0.535835862159729, "logits/rejected": -0.5027376413345337, "logps/chosen": -0.0002073202922474593, "logps/rejected": -2.5066559314727783, "loss": 0.4301, "nll_loss": 0.10751183331012726, "rewards/accuracies": 1.0, "rewards/chosen": -2.0732028133352287e-05, "rewards/margins": 0.25064486265182495, "rewards/rejected": -0.2506656050682068, "step": 9818 }, { "epoch": 6.79045643153527, "grad_norm": 4.887798309326172, "learning_rate": 1.7830797602581838e-05, "log_odds_chosen": 12.314960479736328, "log_odds_ratio": -7.485728565370664e-05, "logits/chosen": -0.49125027656555176, "logits/rejected": -0.5477063655853271, "logps/chosen": -0.0001772197283571586, "logps/rejected": -2.8946125507354736, "loss": 0.5249, "nll_loss": 0.1312086582183838, "rewards/accuracies": 1.0, "rewards/chosen": -1.7721975382301025e-05, "rewards/margins": 0.2894435226917267, "rewards/rejected": -0.289461225271225, "step": 9819 }, { "epoch": 6.791147994467496, "grad_norm": 4.644054889678955, "learning_rate": 1.7826955586291687e-05, "log_odds_chosen": 10.332023620605469, "log_odds_ratio": -0.00021751302119810134, "logits/chosen": -0.3514784574508667, "logits/rejected": -0.3961045742034912, "logps/chosen": -0.0003058880683965981, "logps/rejected": -2.000765562057495, "loss": 0.6306, "nll_loss": 0.15762130916118622, "rewards/accuracies": 1.0, "rewards/chosen": -3.0588809750042856e-05, "rewards/margins": 0.20004597306251526, "rewards/rejected": -0.20007656514644623, "step": 9820 }, { "epoch": 6.791839557399723, "grad_norm": 5.434691429138184, "learning_rate": 1.7823113570001536e-05, "log_odds_chosen": 10.024774551391602, "log_odds_ratio": -0.00034345826134085655, "logits/chosen": -0.4995710551738739, "logits/rejected": -0.48516565561294556, "logps/chosen": -0.000309604627545923, "logps/rejected": -1.9717507362365723, "loss": 1.0495, "nll_loss": 0.26234033703804016, "rewards/accuracies": 1.0, "rewards/chosen": -3.0960465664975345e-05, "rewards/margins": 0.19714412093162537, "rewards/rejected": -0.19717508554458618, "step": 9821 }, { "epoch": 6.79253112033195, "grad_norm": 6.43671178817749, "learning_rate": 1.781927155371139e-05, "log_odds_chosen": 11.061049461364746, "log_odds_ratio": -5.2428375056479126e-05, "logits/chosen": -0.0359373539686203, "logits/rejected": -0.08879576623439789, "logps/chosen": -0.0005665082135237753, "logps/rejected": -2.881434917449951, "loss": 0.6287, "nll_loss": 0.15717333555221558, "rewards/accuracies": 1.0, "rewards/chosen": -5.665082062478177e-05, "rewards/margins": 0.288086861371994, "rewards/rejected": -0.288143515586853, "step": 9822 }, { "epoch": 6.793222683264177, "grad_norm": 4.820457935333252, "learning_rate": 1.7815429537421238e-05, "log_odds_chosen": 12.112467765808105, "log_odds_ratio": -1.8505757907405496e-05, "logits/chosen": -0.6378794312477112, "logits/rejected": -0.6656926870346069, "logps/chosen": -0.00015574654389638454, "logps/rejected": -2.9929399490356445, "loss": 0.5178, "nll_loss": 0.12945497035980225, "rewards/accuracies": 1.0, "rewards/chosen": -1.5574656572425738e-05, "rewards/margins": 0.2992784082889557, "rewards/rejected": -0.29929399490356445, "step": 9823 }, { "epoch": 6.793914246196404, "grad_norm": 7.671065330505371, "learning_rate": 1.781158752113109e-05, "log_odds_chosen": 11.70947551727295, "log_odds_ratio": -1.660875932429917e-05, "logits/chosen": -0.1562579721212387, "logits/rejected": -0.2405303716659546, "logps/chosen": -8.214052650146186e-05, "logps/rejected": -2.337221384048462, "loss": 1.1624, "nll_loss": 0.29060617089271545, "rewards/accuracies": 1.0, "rewards/chosen": -8.214052286348306e-06, "rewards/margins": 0.23371393978595734, "rewards/rejected": -0.23372213542461395, "step": 9824 }, { "epoch": 6.7946058091286305, "grad_norm": 5.846847057342529, "learning_rate": 1.7807745504840942e-05, "log_odds_chosen": 10.920866966247559, "log_odds_ratio": -0.00011350302520440891, "logits/chosen": -0.21679610013961792, "logits/rejected": -0.20923295617103577, "logps/chosen": -0.00021579388703685254, "logps/rejected": -1.7754294872283936, "loss": 0.8206, "nll_loss": 0.20514589548110962, "rewards/accuracies": 1.0, "rewards/chosen": -2.1579389795078896e-05, "rewards/margins": 0.17752137780189514, "rewards/rejected": -0.17754295468330383, "step": 9825 }, { "epoch": 6.795297372060857, "grad_norm": 9.214680671691895, "learning_rate": 1.780390348855079e-05, "log_odds_chosen": 10.451539993286133, "log_odds_ratio": -0.0005183625034987926, "logits/chosen": -0.4053252041339874, "logits/rejected": -0.2930617928504944, "logps/chosen": -0.0009055271511897445, "logps/rejected": -2.129746437072754, "loss": 0.4832, "nll_loss": 0.12075144052505493, "rewards/accuracies": 1.0, "rewards/chosen": -9.055271948454902e-05, "rewards/margins": 0.2128840684890747, "rewards/rejected": -0.2129746377468109, "step": 9826 }, { "epoch": 6.795988934993084, "grad_norm": 7.412352085113525, "learning_rate": 1.7800061472260644e-05, "log_odds_chosen": 11.123895645141602, "log_odds_ratio": -3.581685814424418e-05, "logits/chosen": 0.04853195697069168, "logits/rejected": -0.1060643196105957, "logps/chosen": -0.0003896902489941567, "logps/rejected": -1.9042718410491943, "loss": 0.5712, "nll_loss": 0.14278869330883026, "rewards/accuracies": 1.0, "rewards/chosen": -3.896902489941567e-05, "rewards/margins": 0.19038820266723633, "rewards/rejected": -0.19042718410491943, "step": 9827 }, { "epoch": 6.796680497925311, "grad_norm": 6.623965263366699, "learning_rate": 1.7796219455970496e-05, "log_odds_chosen": 10.686233520507812, "log_odds_ratio": -9.329444583272561e-05, "logits/chosen": -0.33430057764053345, "logits/rejected": -0.3654334247112274, "logps/chosen": -0.0001892134314402938, "logps/rejected": -1.8002233505249023, "loss": 0.579, "nll_loss": 0.14473643898963928, "rewards/accuracies": 1.0, "rewards/chosen": -1.8921346054412425e-05, "rewards/margins": 0.18000340461730957, "rewards/rejected": -0.18002232909202576, "step": 9828 }, { "epoch": 6.797372060857538, "grad_norm": 5.111135482788086, "learning_rate": 1.7792377439680345e-05, "log_odds_chosen": 11.351503372192383, "log_odds_ratio": -2.5285688025178388e-05, "logits/chosen": -0.5366664528846741, "logits/rejected": -0.6100755929946899, "logps/chosen": -0.0002965263556689024, "logps/rejected": -2.0993173122406006, "loss": 0.6221, "nll_loss": 0.15552878379821777, "rewards/accuracies": 1.0, "rewards/chosen": -2.9652637749677524e-05, "rewards/margins": 0.2099020779132843, "rewards/rejected": -0.20993171632289886, "step": 9829 }, { "epoch": 6.798063623789765, "grad_norm": 8.023554801940918, "learning_rate": 1.7788535423390194e-05, "log_odds_chosen": 11.423932075500488, "log_odds_ratio": -7.935289613669738e-05, "logits/chosen": -0.13450314104557037, "logits/rejected": -0.3230983018875122, "logps/chosen": -0.0002636217977851629, "logps/rejected": -2.6877059936523438, "loss": 0.4252, "nll_loss": 0.10628889501094818, "rewards/accuracies": 1.0, "rewards/chosen": -2.636217868712265e-05, "rewards/margins": 0.26874423027038574, "rewards/rejected": -0.26877060532569885, "step": 9830 }, { "epoch": 6.7987551867219915, "grad_norm": 7.384605884552002, "learning_rate": 1.7784693407100047e-05, "log_odds_chosen": 10.56057357788086, "log_odds_ratio": -0.00026500289095565677, "logits/chosen": -0.6557856202125549, "logits/rejected": -0.6926838159561157, "logps/chosen": -0.0006864126771688461, "logps/rejected": -2.5699915885925293, "loss": 0.509, "nll_loss": 0.12722210586071014, "rewards/accuracies": 1.0, "rewards/chosen": -6.864126771688461e-05, "rewards/margins": 0.25693053007125854, "rewards/rejected": -0.2569991648197174, "step": 9831 }, { "epoch": 6.799446749654218, "grad_norm": 4.883238792419434, "learning_rate": 1.7780851390809896e-05, "log_odds_chosen": 11.451370239257812, "log_odds_ratio": -2.6813057047547773e-05, "logits/chosen": -0.5282101035118103, "logits/rejected": -0.6761583089828491, "logps/chosen": -0.0001240254787262529, "logps/rejected": -2.217674493789673, "loss": 0.5614, "nll_loss": 0.14034461975097656, "rewards/accuracies": 1.0, "rewards/chosen": -1.2402548236423172e-05, "rewards/margins": 0.22175507247447968, "rewards/rejected": -0.22176745533943176, "step": 9832 }, { "epoch": 6.800138312586445, "grad_norm": 8.146604537963867, "learning_rate": 1.777700937451975e-05, "log_odds_chosen": 9.993793487548828, "log_odds_ratio": -0.0001493952004238963, "logits/chosen": -0.7147838473320007, "logits/rejected": -0.7217827439308167, "logps/chosen": -0.00039108877535909414, "logps/rejected": -1.7512335777282715, "loss": 0.9134, "nll_loss": 0.22834616899490356, "rewards/accuracies": 1.0, "rewards/chosen": -3.910887608071789e-05, "rewards/margins": 0.17508423328399658, "rewards/rejected": -0.17512336373329163, "step": 9833 }, { "epoch": 6.800829875518672, "grad_norm": 7.877200603485107, "learning_rate": 1.77731673582296e-05, "log_odds_chosen": 10.837315559387207, "log_odds_ratio": -4.8736881581135094e-05, "logits/chosen": -0.28594496846199036, "logits/rejected": -0.2990525960922241, "logps/chosen": -0.0001039270282490179, "logps/rejected": -1.6275827884674072, "loss": 0.3505, "nll_loss": 0.08761301636695862, "rewards/accuracies": 1.0, "rewards/chosen": -1.039270318869967e-05, "rewards/margins": 0.16274788975715637, "rewards/rejected": -0.16275827586650848, "step": 9834 }, { "epoch": 6.801521438450899, "grad_norm": 8.999431610107422, "learning_rate": 1.776932534193945e-05, "log_odds_chosen": 11.797481536865234, "log_odds_ratio": -9.626210157875903e-06, "logits/chosen": -0.5511617064476013, "logits/rejected": -0.5186449885368347, "logps/chosen": -0.0001594604691490531, "logps/rejected": -2.653761863708496, "loss": 0.6771, "nll_loss": 0.16927213966846466, "rewards/accuracies": 1.0, "rewards/chosen": -1.5946045095915906e-05, "rewards/margins": 0.2653602361679077, "rewards/rejected": -0.26537618041038513, "step": 9835 }, { "epoch": 6.802213001383126, "grad_norm": 6.913736820220947, "learning_rate": 1.7765483325649302e-05, "log_odds_chosen": 10.623117446899414, "log_odds_ratio": -0.00021301039669197053, "logits/chosen": -0.3210628926753998, "logits/rejected": -0.3580225110054016, "logps/chosen": -0.00019910847186110914, "logps/rejected": -1.8200393915176392, "loss": 0.6378, "nll_loss": 0.15943297743797302, "rewards/accuracies": 1.0, "rewards/chosen": -1.9910847186110914e-05, "rewards/margins": 0.1819840371608734, "rewards/rejected": -0.1820039451122284, "step": 9836 }, { "epoch": 6.8029045643153525, "grad_norm": 11.309036254882812, "learning_rate": 1.7761641309359155e-05, "log_odds_chosen": 10.597155570983887, "log_odds_ratio": -4.918476042803377e-05, "logits/chosen": -0.39929133653640747, "logits/rejected": -0.5000516176223755, "logps/chosen": -0.00024138286244124174, "logps/rejected": -1.8075019121170044, "loss": 0.5687, "nll_loss": 0.14216798543930054, "rewards/accuracies": 1.0, "rewards/chosen": -2.4138287699315697e-05, "rewards/margins": 0.1807260662317276, "rewards/rejected": -0.18075020611286163, "step": 9837 }, { "epoch": 6.803596127247579, "grad_norm": 6.357234954833984, "learning_rate": 1.7757799293069004e-05, "log_odds_chosen": 10.066715240478516, "log_odds_ratio": -0.0007350252708420157, "logits/chosen": -0.39687326550483704, "logits/rejected": -0.40346938371658325, "logps/chosen": -0.0013349910732358694, "logps/rejected": -1.9573590755462646, "loss": 0.4703, "nll_loss": 0.11749234050512314, "rewards/accuracies": 1.0, "rewards/chosen": -0.00013349908113013953, "rewards/margins": 0.1956024020910263, "rewards/rejected": -0.195735901594162, "step": 9838 }, { "epoch": 6.804287690179806, "grad_norm": 9.22315788269043, "learning_rate": 1.7753957276778853e-05, "log_odds_chosen": 10.840571403503418, "log_odds_ratio": -5.220840102992952e-05, "logits/chosen": -0.40534543991088867, "logits/rejected": -0.466486394405365, "logps/chosen": -0.00023522484116256237, "logps/rejected": -1.9446091651916504, "loss": 0.3722, "nll_loss": 0.0930478423833847, "rewards/accuracies": 1.0, "rewards/chosen": -2.352248520764988e-05, "rewards/margins": 0.19443738460540771, "rewards/rejected": -0.1944609135389328, "step": 9839 }, { "epoch": 6.804979253112033, "grad_norm": 5.794712066650391, "learning_rate": 1.7750115260488705e-05, "log_odds_chosen": 11.39574146270752, "log_odds_ratio": -2.222778130089864e-05, "logits/chosen": -0.6708822250366211, "logits/rejected": -0.7222901582717896, "logps/chosen": -0.00011192337842658162, "logps/rejected": -1.9927271604537964, "loss": 0.4359, "nll_loss": 0.10897647589445114, "rewards/accuracies": 1.0, "rewards/chosen": -1.1192338206456043e-05, "rewards/margins": 0.19926151633262634, "rewards/rejected": -0.19927272200584412, "step": 9840 }, { "epoch": 6.80567081604426, "grad_norm": 8.983512878417969, "learning_rate": 1.7746273244198554e-05, "log_odds_chosen": 10.862753868103027, "log_odds_ratio": -5.702022826881148e-05, "logits/chosen": -0.12994690239429474, "logits/rejected": -0.24351057410240173, "logps/chosen": -0.00021701918740291148, "logps/rejected": -1.955878496170044, "loss": 0.4519, "nll_loss": 0.11296963691711426, "rewards/accuracies": 1.0, "rewards/chosen": -2.1701916921301745e-05, "rewards/margins": 0.19556616246700287, "rewards/rejected": -0.1955878734588623, "step": 9841 }, { "epoch": 6.806362378976487, "grad_norm": 7.3522114753723145, "learning_rate": 1.7742431227908407e-05, "log_odds_chosen": 12.072206497192383, "log_odds_ratio": -1.040917504724348e-05, "logits/chosen": -0.44298988580703735, "logits/rejected": -0.4893496036529541, "logps/chosen": -0.00012864437303505838, "logps/rejected": -2.958308696746826, "loss": 0.6231, "nll_loss": 0.15577593445777893, "rewards/accuracies": 1.0, "rewards/chosen": -1.2864436939707957e-05, "rewards/margins": 0.295818030834198, "rewards/rejected": -0.2958308756351471, "step": 9842 }, { "epoch": 6.8070539419087135, "grad_norm": 4.939804553985596, "learning_rate": 1.773858921161826e-05, "log_odds_chosen": 10.557042121887207, "log_odds_ratio": -0.00011877430370077491, "logits/chosen": -0.2231357991695404, "logits/rejected": -0.3202582895755768, "logps/chosen": -0.0001812062255339697, "logps/rejected": -1.729665994644165, "loss": 0.8258, "nll_loss": 0.20642876625061035, "rewards/accuracies": 1.0, "rewards/chosen": -1.812062328099273e-05, "rewards/margins": 0.17294849455356598, "rewards/rejected": -0.1729666143655777, "step": 9843 }, { "epoch": 6.80774550484094, "grad_norm": 4.72721529006958, "learning_rate": 1.773474719532811e-05, "log_odds_chosen": 10.538349151611328, "log_odds_ratio": -7.440797344315797e-05, "logits/chosen": -0.3844439387321472, "logits/rejected": -0.457133024930954, "logps/chosen": -0.00031418955768458545, "logps/rejected": -1.9789464473724365, "loss": 0.3445, "nll_loss": 0.08611032366752625, "rewards/accuracies": 1.0, "rewards/chosen": -3.141895649605431e-05, "rewards/margins": 0.19786323606967926, "rewards/rejected": -0.19789466261863708, "step": 9844 }, { "epoch": 6.808437067773167, "grad_norm": 20.236831665039062, "learning_rate": 1.773090517903796e-05, "log_odds_chosen": 11.023157119750977, "log_odds_ratio": -2.067050627374556e-05, "logits/chosen": -0.7416025400161743, "logits/rejected": -0.808810830116272, "logps/chosen": -0.00022165890550240874, "logps/rejected": -2.2892677783966064, "loss": 0.7423, "nll_loss": 0.18556413054466248, "rewards/accuracies": 1.0, "rewards/chosen": -2.2165892005432397e-05, "rewards/margins": 0.2289046347141266, "rewards/rejected": -0.22892676293849945, "step": 9845 }, { "epoch": 6.809128630705394, "grad_norm": 6.052879810333252, "learning_rate": 1.7727063162747813e-05, "log_odds_chosen": 11.059163093566895, "log_odds_ratio": -0.0001862043864093721, "logits/chosen": -0.7816653251647949, "logits/rejected": -0.7609491348266602, "logps/chosen": -0.00040568297845311463, "logps/rejected": -2.6221461296081543, "loss": 0.4911, "nll_loss": 0.12275606393814087, "rewards/accuracies": 1.0, "rewards/chosen": -4.056830221088603e-05, "rewards/margins": 0.2621740400791168, "rewards/rejected": -0.2622146010398865, "step": 9846 }, { "epoch": 6.809820193637621, "grad_norm": 11.615273475646973, "learning_rate": 1.7723221146457662e-05, "log_odds_chosen": 11.36082649230957, "log_odds_ratio": -2.826635136443656e-05, "logits/chosen": -0.3724120855331421, "logits/rejected": -0.4282306432723999, "logps/chosen": -0.0002107978070853278, "logps/rejected": -2.5040338039398193, "loss": 0.618, "nll_loss": 0.1544925719499588, "rewards/accuracies": 1.0, "rewards/chosen": -2.107978070853278e-05, "rewards/margins": 0.25038230419158936, "rewards/rejected": -0.25040340423583984, "step": 9847 }, { "epoch": 6.810511756569848, "grad_norm": 5.582864284515381, "learning_rate": 1.771937913016751e-05, "log_odds_chosen": 10.69006061553955, "log_odds_ratio": -0.00013318503624759614, "logits/chosen": -0.09825585782527924, "logits/rejected": -0.11967509239912033, "logps/chosen": -0.0003724147391039878, "logps/rejected": -2.130486488342285, "loss": 0.442, "nll_loss": 0.11048276722431183, "rewards/accuracies": 1.0, "rewards/chosen": -3.7241476093186066e-05, "rewards/margins": 0.21301141381263733, "rewards/rejected": -0.21304863691329956, "step": 9848 }, { "epoch": 6.8112033195020745, "grad_norm": 9.837779998779297, "learning_rate": 1.7715537113877364e-05, "log_odds_chosen": 10.194995880126953, "log_odds_ratio": -0.000401560275349766, "logits/chosen": -0.651020884513855, "logits/rejected": -0.6786574721336365, "logps/chosen": -0.0005286269588395953, "logps/rejected": -1.7069580554962158, "loss": 0.5149, "nll_loss": 0.12868283689022064, "rewards/accuracies": 1.0, "rewards/chosen": -5.286269879434258e-05, "rewards/margins": 0.1706429421901703, "rewards/rejected": -0.17069579660892487, "step": 9849 }, { "epoch": 6.811894882434301, "grad_norm": 6.8782639503479, "learning_rate": 1.7711695097587213e-05, "log_odds_chosen": 10.906352996826172, "log_odds_ratio": -3.27380039379932e-05, "logits/chosen": -0.1589619219303131, "logits/rejected": -0.1439802497625351, "logps/chosen": -0.0005701867048628628, "logps/rejected": -2.554690361022949, "loss": 0.8303, "nll_loss": 0.20757944881916046, "rewards/accuracies": 1.0, "rewards/chosen": -5.7018671213882044e-05, "rewards/margins": 0.2554119825363159, "rewards/rejected": -0.25546902418136597, "step": 9850 }, { "epoch": 6.812586445366528, "grad_norm": 4.783719539642334, "learning_rate": 1.7707853081297065e-05, "log_odds_chosen": 10.361427307128906, "log_odds_ratio": -8.810401050141081e-05, "logits/chosen": -0.48291251063346863, "logits/rejected": -0.6110714077949524, "logps/chosen": -0.00023719553428236395, "logps/rejected": -1.6287189722061157, "loss": 0.5322, "nll_loss": 0.1330510973930359, "rewards/accuracies": 1.0, "rewards/chosen": -2.3719554519630037e-05, "rewards/margins": 0.16284817457199097, "rewards/rejected": -0.16287189722061157, "step": 9851 }, { "epoch": 6.813278008298755, "grad_norm": 4.849263668060303, "learning_rate": 1.7704011065006918e-05, "log_odds_chosen": 10.773094177246094, "log_odds_ratio": -4.350581002654508e-05, "logits/chosen": -0.041710179299116135, "logits/rejected": -0.09449899941682816, "logps/chosen": -0.0002241919719381258, "logps/rejected": -2.37168025970459, "loss": 0.5471, "nll_loss": 0.1367701292037964, "rewards/accuracies": 1.0, "rewards/chosen": -2.241919719381258e-05, "rewards/margins": 0.2371455878019333, "rewards/rejected": -0.23716801404953003, "step": 9852 }, { "epoch": 6.813969571230982, "grad_norm": 6.0411505699157715, "learning_rate": 1.7700169048716767e-05, "log_odds_chosen": 10.641590118408203, "log_odds_ratio": -3.7356308894231915e-05, "logits/chosen": -0.5324625372886658, "logits/rejected": -0.6158381700515747, "logps/chosen": -0.00016988397692330182, "logps/rejected": -1.6251307725906372, "loss": 0.402, "nll_loss": 0.10049900412559509, "rewards/accuracies": 1.0, "rewards/chosen": -1.698839696473442e-05, "rewards/margins": 0.16249608993530273, "rewards/rejected": -0.16251307725906372, "step": 9853 }, { "epoch": 6.814661134163209, "grad_norm": 6.3942646980285645, "learning_rate": 1.769632703242662e-05, "log_odds_chosen": 10.55441665649414, "log_odds_ratio": -5.743156361859292e-05, "logits/chosen": -0.16442380845546722, "logits/rejected": -0.19287921488285065, "logps/chosen": -0.00024357457004953176, "logps/rejected": -2.0037691593170166, "loss": 0.5275, "nll_loss": 0.1318666785955429, "rewards/accuracies": 1.0, "rewards/chosen": -2.435745955153834e-05, "rewards/margins": 0.20035257935523987, "rewards/rejected": -0.20037691295146942, "step": 9854 }, { "epoch": 6.8153526970954355, "grad_norm": 8.117294311523438, "learning_rate": 1.7692485016136472e-05, "log_odds_chosen": 10.78452205657959, "log_odds_ratio": -4.109352812520228e-05, "logits/chosen": -0.45345538854599, "logits/rejected": -0.5758625268936157, "logps/chosen": -0.00014015237684361637, "logps/rejected": -1.9844865798950195, "loss": 0.5024, "nll_loss": 0.1255882978439331, "rewards/accuracies": 1.0, "rewards/chosen": -1.4015236956765875e-05, "rewards/margins": 0.19843465089797974, "rewards/rejected": -0.19844867289066315, "step": 9855 }, { "epoch": 6.816044260027662, "grad_norm": 8.956670761108398, "learning_rate": 1.768864299984632e-05, "log_odds_chosen": 10.297235488891602, "log_odds_ratio": -0.0001858493487816304, "logits/chosen": -0.4566280245780945, "logits/rejected": -0.49013885855674744, "logps/chosen": -0.0009861596627160907, "logps/rejected": -1.7377392053604126, "loss": 0.754, "nll_loss": 0.18847453594207764, "rewards/accuracies": 1.0, "rewards/chosen": -9.861597209237516e-05, "rewards/margins": 0.1736753135919571, "rewards/rejected": -0.17377394437789917, "step": 9856 }, { "epoch": 6.816735822959889, "grad_norm": 3.7923271656036377, "learning_rate": 1.768480098355617e-05, "log_odds_chosen": 11.093573570251465, "log_odds_ratio": -8.753328438615426e-05, "logits/chosen": -0.3417430818080902, "logits/rejected": -0.33952879905700684, "logps/chosen": -0.00043928029481321573, "logps/rejected": -2.047579765319824, "loss": 0.2997, "nll_loss": 0.0749170333147049, "rewards/accuracies": 1.0, "rewards/chosen": -4.392802657093853e-05, "rewards/margins": 0.20471405982971191, "rewards/rejected": -0.20475798845291138, "step": 9857 }, { "epoch": 6.817427385892116, "grad_norm": 4.085455894470215, "learning_rate": 1.7680958967266022e-05, "log_odds_chosen": 11.034650802612305, "log_odds_ratio": -5.613011308014393e-05, "logits/chosen": -0.36551499366760254, "logits/rejected": -0.37129878997802734, "logps/chosen": -0.00019917613826692104, "logps/rejected": -2.1950621604919434, "loss": 0.4682, "nll_loss": 0.11703965067863464, "rewards/accuracies": 1.0, "rewards/chosen": -1.9917613826692104e-05, "rewards/margins": 0.2194863259792328, "rewards/rejected": -0.21950623393058777, "step": 9858 }, { "epoch": 6.818118948824343, "grad_norm": 4.8493218421936035, "learning_rate": 1.767711695097587e-05, "log_odds_chosen": 11.219084739685059, "log_odds_ratio": -2.1206218661973253e-05, "logits/chosen": -0.6236893534660339, "logits/rejected": -0.6588972806930542, "logps/chosen": -0.0002190757222706452, "logps/rejected": -2.3729987144470215, "loss": 0.4707, "nll_loss": 0.11766964197158813, "rewards/accuracies": 1.0, "rewards/chosen": -2.190757186326664e-05, "rewards/margins": 0.23727793991565704, "rewards/rejected": -0.2372998595237732, "step": 9859 }, { "epoch": 6.81881051175657, "grad_norm": 5.451807975769043, "learning_rate": 1.7673274934685724e-05, "log_odds_chosen": 10.989908218383789, "log_odds_ratio": -2.3420301658916287e-05, "logits/chosen": 0.03291507437825203, "logits/rejected": -0.09146300703287125, "logps/chosen": -0.00030668292311020195, "logps/rejected": -2.314227342605591, "loss": 0.7565, "nll_loss": 0.18912914395332336, "rewards/accuracies": 1.0, "rewards/chosen": -3.0668292311020195e-05, "rewards/margins": 0.2313920557498932, "rewards/rejected": -0.23142272233963013, "step": 9860 }, { "epoch": 6.819502074688796, "grad_norm": 9.567743301391602, "learning_rate": 1.7669432918395576e-05, "log_odds_chosen": 12.856329917907715, "log_odds_ratio": -5.586166480497923e-06, "logits/chosen": -0.06611060351133347, "logits/rejected": -0.3514470160007477, "logps/chosen": -8.464483835268766e-05, "logps/rejected": -3.357191324234009, "loss": 0.7163, "nll_loss": 0.1790757179260254, "rewards/accuracies": 1.0, "rewards/chosen": -8.464484380965587e-06, "rewards/margins": 0.33571070432662964, "rewards/rejected": -0.33571916818618774, "step": 9861 }, { "epoch": 6.820193637621023, "grad_norm": 6.616390228271484, "learning_rate": 1.7665590902105425e-05, "log_odds_chosen": 11.775555610656738, "log_odds_ratio": -2.4208513423218392e-05, "logits/chosen": -0.362802118062973, "logits/rejected": -0.44630053639411926, "logps/chosen": -0.00015943124890327454, "logps/rejected": -2.8059206008911133, "loss": 0.4418, "nll_loss": 0.1104571521282196, "rewards/accuracies": 1.0, "rewards/chosen": -1.5943125617923215e-05, "rewards/margins": 0.2805761396884918, "rewards/rejected": -0.28059208393096924, "step": 9862 }, { "epoch": 6.82088520055325, "grad_norm": 5.2259345054626465, "learning_rate": 1.7661748885815278e-05, "log_odds_chosen": 10.420143127441406, "log_odds_ratio": -0.00035389253753237426, "logits/chosen": -0.21969397366046906, "logits/rejected": -0.3286452889442444, "logps/chosen": -0.00023069702729117125, "logps/rejected": -2.0163958072662354, "loss": 0.6497, "nll_loss": 0.1623815894126892, "rewards/accuracies": 1.0, "rewards/chosen": -2.3069704184308648e-05, "rewards/margins": 0.20161652565002441, "rewards/rejected": -0.2016395926475525, "step": 9863 }, { "epoch": 6.821576763485477, "grad_norm": 6.109011173248291, "learning_rate": 1.765790686952513e-05, "log_odds_chosen": 10.854312896728516, "log_odds_ratio": -6.197398761287332e-05, "logits/chosen": -0.7911338806152344, "logits/rejected": -0.7502856254577637, "logps/chosen": -0.00017530944023746997, "logps/rejected": -1.947382926940918, "loss": 0.3902, "nll_loss": 0.09753170609474182, "rewards/accuracies": 1.0, "rewards/chosen": -1.7530943296151236e-05, "rewards/margins": 0.1947207748889923, "rewards/rejected": -0.19473831355571747, "step": 9864 }, { "epoch": 6.822268326417704, "grad_norm": 7.206421852111816, "learning_rate": 1.765406485323498e-05, "log_odds_chosen": 11.161964416503906, "log_odds_ratio": -4.741992961498909e-05, "logits/chosen": -0.5804327726364136, "logits/rejected": -0.677083432674408, "logps/chosen": -0.0005471400218084455, "logps/rejected": -2.5652215480804443, "loss": 0.5785, "nll_loss": 0.14463266730308533, "rewards/accuracies": 1.0, "rewards/chosen": -5.471400072565302e-05, "rewards/margins": 0.25646743178367615, "rewards/rejected": -0.25652214884757996, "step": 9865 }, { "epoch": 6.822959889349931, "grad_norm": 5.182133197784424, "learning_rate": 1.7650222836944828e-05, "log_odds_chosen": 10.873984336853027, "log_odds_ratio": -4.0695875213714316e-05, "logits/chosen": -0.9530771374702454, "logits/rejected": -0.6770225763320923, "logps/chosen": -0.00030901507125236094, "logps/rejected": -2.1355810165405273, "loss": 0.4847, "nll_loss": 0.1211652159690857, "rewards/accuracies": 1.0, "rewards/chosen": -3.090150858042762e-05, "rewards/margins": 0.21352717280387878, "rewards/rejected": -0.21355809271335602, "step": 9866 }, { "epoch": 6.823651452282157, "grad_norm": 6.041674613952637, "learning_rate": 1.764638082065468e-05, "log_odds_chosen": 10.147492408752441, "log_odds_ratio": -0.00024508449132554233, "logits/chosen": -0.27030548453330994, "logits/rejected": -0.3851109743118286, "logps/chosen": -0.0005953733925707638, "logps/rejected": -2.3240535259246826, "loss": 0.5796, "nll_loss": 0.14487367868423462, "rewards/accuracies": 1.0, "rewards/chosen": -5.953734216745943e-05, "rewards/margins": 0.2323458194732666, "rewards/rejected": -0.2324053794145584, "step": 9867 }, { "epoch": 6.824343015214384, "grad_norm": 6.266101837158203, "learning_rate": 1.764253880436453e-05, "log_odds_chosen": 11.187174797058105, "log_odds_ratio": -3.098931483691558e-05, "logits/chosen": -0.15763667225837708, "logits/rejected": -0.20958489179611206, "logps/chosen": -8.974019146990031e-05, "logps/rejected": -1.9341108798980713, "loss": 0.4785, "nll_loss": 0.11961343139410019, "rewards/accuracies": 1.0, "rewards/chosen": -8.97401878319215e-06, "rewards/margins": 0.19340214133262634, "rewards/rejected": -0.19341108202934265, "step": 9868 }, { "epoch": 6.825034578146611, "grad_norm": 4.985950946807861, "learning_rate": 1.7638696788074382e-05, "log_odds_chosen": 10.878445625305176, "log_odds_ratio": -4.755319969262928e-05, "logits/chosen": -0.18200913071632385, "logits/rejected": -0.36122196912765503, "logps/chosen": -0.0004627583548426628, "logps/rejected": -2.711005210876465, "loss": 1.1427, "nll_loss": 0.28566545248031616, "rewards/accuracies": 1.0, "rewards/chosen": -4.627583621186204e-05, "rewards/margins": 0.2710542678833008, "rewards/rejected": -0.2711005210876465, "step": 9869 }, { "epoch": 6.825726141078838, "grad_norm": 5.5031418800354, "learning_rate": 1.7634854771784235e-05, "log_odds_chosen": 11.222797393798828, "log_odds_ratio": -2.3735010472591966e-05, "logits/chosen": -0.200686514377594, "logits/rejected": -0.21383801102638245, "logps/chosen": -0.00019569243886508048, "logps/rejected": -2.272225856781006, "loss": 0.5644, "nll_loss": 0.1411091387271881, "rewards/accuracies": 1.0, "rewards/chosen": -1.9569246433093213e-05, "rewards/margins": 0.22720301151275635, "rewards/rejected": -0.22722259163856506, "step": 9870 }, { "epoch": 6.826417704011065, "grad_norm": 15.147745132446289, "learning_rate": 1.7631012755494084e-05, "log_odds_chosen": 11.56512451171875, "log_odds_ratio": -2.786301774904132e-05, "logits/chosen": -0.873394250869751, "logits/rejected": -0.8722903728485107, "logps/chosen": -0.00020476612553466111, "logps/rejected": -2.6685361862182617, "loss": 0.7514, "nll_loss": 0.18784187734127045, "rewards/accuracies": 1.0, "rewards/chosen": -2.0476612917263992e-05, "rewards/margins": 0.2668331265449524, "rewards/rejected": -0.26685360074043274, "step": 9871 }, { "epoch": 6.827109266943292, "grad_norm": 15.383719444274902, "learning_rate": 1.7627170739203936e-05, "log_odds_chosen": 10.781224250793457, "log_odds_ratio": -0.00011232474935241044, "logits/chosen": -0.5298612117767334, "logits/rejected": -0.44690805673599243, "logps/chosen": -0.0005620787269435823, "logps/rejected": -2.7007107734680176, "loss": 0.8028, "nll_loss": 0.20069590210914612, "rewards/accuracies": 1.0, "rewards/chosen": -5.620787123916671e-05, "rewards/margins": 0.2700148820877075, "rewards/rejected": -0.2700711190700531, "step": 9872 }, { "epoch": 6.827800829875518, "grad_norm": 7.158430576324463, "learning_rate": 1.762332872291379e-05, "log_odds_chosen": 9.014758110046387, "log_odds_ratio": -0.0035419976338744164, "logits/chosen": -0.386219322681427, "logits/rejected": -0.1570458859205246, "logps/chosen": -0.0029637387488037348, "logps/rejected": -1.7540522813796997, "loss": 0.7435, "nll_loss": 0.1855314075946808, "rewards/accuracies": 1.0, "rewards/chosen": -0.00029637388070113957, "rewards/margins": 0.17510885000228882, "rewards/rejected": -0.17540523409843445, "step": 9873 }, { "epoch": 6.828492392807745, "grad_norm": 5.015327453613281, "learning_rate": 1.7619486706623638e-05, "log_odds_chosen": 10.229984283447266, "log_odds_ratio": -0.00017448162543587387, "logits/chosen": -0.364147424697876, "logits/rejected": -0.4106100797653198, "logps/chosen": -0.0010063423542305827, "logps/rejected": -1.9840210676193237, "loss": 0.5084, "nll_loss": 0.12709316611289978, "rewards/accuracies": 1.0, "rewards/chosen": -0.00010063423542305827, "rewards/margins": 0.19830146431922913, "rewards/rejected": -0.19840210676193237, "step": 9874 }, { "epoch": 6.829183955739972, "grad_norm": 8.063447952270508, "learning_rate": 1.7615644690333487e-05, "log_odds_chosen": 9.436758995056152, "log_odds_ratio": -0.002550853881984949, "logits/chosen": -0.4666481912136078, "logits/rejected": -0.5314819812774658, "logps/chosen": -0.004965066909790039, "logps/rejected": -2.0318586826324463, "loss": 0.528, "nll_loss": 0.13174238801002502, "rewards/accuracies": 1.0, "rewards/chosen": -0.0004965066909790039, "rewards/margins": 0.20268937945365906, "rewards/rejected": -0.20318587124347687, "step": 9875 }, { "epoch": 6.829875518672199, "grad_norm": 7.094956398010254, "learning_rate": 1.761180267404334e-05, "log_odds_chosen": 10.774164199829102, "log_odds_ratio": -4.405742947710678e-05, "logits/chosen": -0.8490189909934998, "logits/rejected": -0.9273943901062012, "logps/chosen": -0.00021111921523697674, "logps/rejected": -1.9229071140289307, "loss": 0.4135, "nll_loss": 0.10335968434810638, "rewards/accuracies": 1.0, "rewards/chosen": -2.1111922251293436e-05, "rewards/margins": 0.19226960837841034, "rewards/rejected": -0.19229072332382202, "step": 9876 }, { "epoch": 6.830567081604426, "grad_norm": 3.5385513305664062, "learning_rate": 1.7607960657753188e-05, "log_odds_chosen": 11.042720794677734, "log_odds_ratio": -7.377246947726235e-05, "logits/chosen": -0.4456542730331421, "logits/rejected": -0.48598921298980713, "logps/chosen": -0.0011691419640555978, "logps/rejected": -2.453537702560425, "loss": 0.3032, "nll_loss": 0.07579068839550018, "rewards/accuracies": 1.0, "rewards/chosen": -0.00011691421241266653, "rewards/margins": 0.24523687362670898, "rewards/rejected": -0.2453537881374359, "step": 9877 }, { "epoch": 6.8312586445366525, "grad_norm": 6.717398166656494, "learning_rate": 1.760411864146304e-05, "log_odds_chosen": 11.788796424865723, "log_odds_ratio": -3.9101567381294444e-05, "logits/chosen": -0.32864150404930115, "logits/rejected": -0.4035564064979553, "logps/chosen": -0.00012067243369529024, "logps/rejected": -2.3607280254364014, "loss": 0.7146, "nll_loss": 0.17865239083766937, "rewards/accuracies": 1.0, "rewards/chosen": -1.2067244824720547e-05, "rewards/margins": 0.2360607385635376, "rewards/rejected": -0.23607280850410461, "step": 9878 }, { "epoch": 6.831950207468879, "grad_norm": 6.486326217651367, "learning_rate": 1.7600276625172893e-05, "log_odds_chosen": 10.430749893188477, "log_odds_ratio": -0.00010449629189679399, "logits/chosen": -0.5080342292785645, "logits/rejected": -0.5649960041046143, "logps/chosen": -0.00015258403436746448, "logps/rejected": -1.7463585138320923, "loss": 0.4466, "nll_loss": 0.11164919286966324, "rewards/accuracies": 1.0, "rewards/chosen": -1.5258403436746448e-05, "rewards/margins": 0.1746205985546112, "rewards/rejected": -0.1746358573436737, "step": 9879 }, { "epoch": 6.832641770401106, "grad_norm": 4.887731552124023, "learning_rate": 1.7596434608882742e-05, "log_odds_chosen": 10.540019035339355, "log_odds_ratio": -9.059869626071304e-05, "logits/chosen": -0.37279269099235535, "logits/rejected": -0.41054567694664, "logps/chosen": -0.0002726602542679757, "logps/rejected": -1.7142304182052612, "loss": 0.6504, "nll_loss": 0.16259139776229858, "rewards/accuracies": 1.0, "rewards/chosen": -2.7266023607808165e-05, "rewards/margins": 0.17139577865600586, "rewards/rejected": -0.1714230477809906, "step": 9880 }, { "epoch": 6.833333333333333, "grad_norm": 7.472131729125977, "learning_rate": 1.7592592592592595e-05, "log_odds_chosen": 10.181229591369629, "log_odds_ratio": -0.0009723737603053451, "logits/chosen": -0.6690306067466736, "logits/rejected": -0.6618920564651489, "logps/chosen": -0.0008291637059301138, "logps/rejected": -2.0386102199554443, "loss": 0.6148, "nll_loss": 0.15359333157539368, "rewards/accuracies": 1.0, "rewards/chosen": -8.291636913781986e-05, "rewards/margins": 0.20377810299396515, "rewards/rejected": -0.20386101305484772, "step": 9881 }, { "epoch": 6.83402489626556, "grad_norm": 5.243133068084717, "learning_rate": 1.7588750576302447e-05, "log_odds_chosen": 11.20293140411377, "log_odds_ratio": -6.37612902210094e-05, "logits/chosen": -0.4984665513038635, "logits/rejected": -0.5271502733230591, "logps/chosen": -0.0004649769398383796, "logps/rejected": -2.8009819984436035, "loss": 0.7215, "nll_loss": 0.18037012219429016, "rewards/accuracies": 1.0, "rewards/chosen": -4.649769834941253e-05, "rewards/margins": 0.28005170822143555, "rewards/rejected": -0.28009819984436035, "step": 9882 }, { "epoch": 6.834716459197787, "grad_norm": 8.580633163452148, "learning_rate": 1.7584908560012296e-05, "log_odds_chosen": 11.648361206054688, "log_odds_ratio": -5.3410247346619144e-05, "logits/chosen": -0.5283809900283813, "logits/rejected": -0.5525239109992981, "logps/chosen": -0.0001951494487002492, "logps/rejected": -2.4789934158325195, "loss": 0.7252, "nll_loss": 0.18128308653831482, "rewards/accuracies": 1.0, "rewards/chosen": -1.9514944142429158e-05, "rewards/margins": 0.24787981808185577, "rewards/rejected": -0.24789933860301971, "step": 9883 }, { "epoch": 6.8354080221300135, "grad_norm": 6.00348424911499, "learning_rate": 1.7581066543722145e-05, "log_odds_chosen": 10.09213638305664, "log_odds_ratio": -0.001151418313384056, "logits/chosen": -0.7593430280685425, "logits/rejected": -0.7525742650032043, "logps/chosen": -0.0006862524314783514, "logps/rejected": -1.8011060953140259, "loss": 0.7752, "nll_loss": 0.19369091093540192, "rewards/accuracies": 1.0, "rewards/chosen": -6.862523878226057e-05, "rewards/margins": 0.18004199862480164, "rewards/rejected": -0.1801106035709381, "step": 9884 }, { "epoch": 6.83609958506224, "grad_norm": 4.2488179206848145, "learning_rate": 1.7577224527431994e-05, "log_odds_chosen": 10.401985168457031, "log_odds_ratio": -5.8780875406228006e-05, "logits/chosen": -0.4401627480983734, "logits/rejected": -0.5692065954208374, "logps/chosen": -0.00022449388052336872, "logps/rejected": -1.7061302661895752, "loss": 0.4483, "nll_loss": 0.1120588630437851, "rewards/accuracies": 1.0, "rewards/chosen": -2.2449388779932633e-05, "rewards/margins": 0.1705905795097351, "rewards/rejected": -0.17061302065849304, "step": 9885 }, { "epoch": 6.836791147994467, "grad_norm": 5.48073148727417, "learning_rate": 1.7573382511141847e-05, "log_odds_chosen": 11.850058555603027, "log_odds_ratio": -6.48322602501139e-05, "logits/chosen": -0.0961153507232666, "logits/rejected": -0.2069375216960907, "logps/chosen": -0.00020627223420888186, "logps/rejected": -2.962965965270996, "loss": 0.5813, "nll_loss": 0.14532530307769775, "rewards/accuracies": 1.0, "rewards/chosen": -2.0627223420888186e-05, "rewards/margins": 0.2962760031223297, "rewards/rejected": -0.296296626329422, "step": 9886 }, { "epoch": 6.837482710926694, "grad_norm": 6.750162124633789, "learning_rate": 1.75695404948517e-05, "log_odds_chosen": 11.15122127532959, "log_odds_ratio": -2.2114192688604817e-05, "logits/chosen": -0.5288415551185608, "logits/rejected": -0.5522376894950867, "logps/chosen": -0.00011381346121197566, "logps/rejected": -1.9328322410583496, "loss": 0.519, "nll_loss": 0.12975947558879852, "rewards/accuracies": 1.0, "rewards/chosen": -1.1381346666894387e-05, "rewards/margins": 0.19327184557914734, "rewards/rejected": -0.19328321516513824, "step": 9887 }, { "epoch": 6.838174273858921, "grad_norm": 5.236433029174805, "learning_rate": 1.7565698478561548e-05, "log_odds_chosen": 11.429391860961914, "log_odds_ratio": -1.779601734597236e-05, "logits/chosen": -0.3569970726966858, "logits/rejected": -0.42728957533836365, "logps/chosen": -0.0001754456607159227, "logps/rejected": -2.4692459106445312, "loss": 0.595, "nll_loss": 0.1487591713666916, "rewards/accuracies": 1.0, "rewards/chosen": -1.7544567526783794e-05, "rewards/margins": 0.2469070553779602, "rewards/rejected": -0.24692460894584656, "step": 9888 }, { "epoch": 6.838865836791148, "grad_norm": 7.0818772315979, "learning_rate": 1.75618564622714e-05, "log_odds_chosen": 10.782758712768555, "log_odds_ratio": -3.2990428735502064e-05, "logits/chosen": -0.7973594069480896, "logits/rejected": -0.8417778611183167, "logps/chosen": -0.00016431367839686573, "logps/rejected": -1.7458895444869995, "loss": 0.3121, "nll_loss": 0.07801300287246704, "rewards/accuracies": 1.0, "rewards/chosen": -1.6431367839686573e-05, "rewards/margins": 0.17457252740859985, "rewards/rejected": -0.17458894848823547, "step": 9889 }, { "epoch": 6.8395573997233745, "grad_norm": 7.803895473480225, "learning_rate": 1.7558014445981253e-05, "log_odds_chosen": 9.956562995910645, "log_odds_ratio": -0.00033388679730705917, "logits/chosen": -0.541756272315979, "logits/rejected": -0.5874733924865723, "logps/chosen": -0.0005440291715785861, "logps/rejected": -1.9587575197219849, "loss": 0.6511, "nll_loss": 0.1627349555492401, "rewards/accuracies": 1.0, "rewards/chosen": -5.440291715785861e-05, "rewards/margins": 0.1958213448524475, "rewards/rejected": -0.19587576389312744, "step": 9890 }, { "epoch": 6.840248962655601, "grad_norm": 9.624287605285645, "learning_rate": 1.7554172429691102e-05, "log_odds_chosen": 11.180758476257324, "log_odds_ratio": -0.00014281031326390803, "logits/chosen": -0.6653587222099304, "logits/rejected": -0.7742767930030823, "logps/chosen": -0.0012785769067704678, "logps/rejected": -3.0060606002807617, "loss": 0.6241, "nll_loss": 0.1560094654560089, "rewards/accuracies": 1.0, "rewards/chosen": -0.00012785769649781287, "rewards/margins": 0.3004781901836395, "rewards/rejected": -0.30060604214668274, "step": 9891 }, { "epoch": 6.840940525587828, "grad_norm": 8.010558128356934, "learning_rate": 1.7550330413400954e-05, "log_odds_chosen": 9.888006210327148, "log_odds_ratio": -7.842005288694054e-05, "logits/chosen": -0.14711233973503113, "logits/rejected": -0.025510773062705994, "logps/chosen": -0.00022618676302954555, "logps/rejected": -1.6812245845794678, "loss": 0.7951, "nll_loss": 0.19875632226467133, "rewards/accuracies": 1.0, "rewards/chosen": -2.2618674847763032e-05, "rewards/margins": 0.16809983551502228, "rewards/rejected": -0.16812245547771454, "step": 9892 }, { "epoch": 6.841632088520055, "grad_norm": 5.166340351104736, "learning_rate": 1.7546488397110804e-05, "log_odds_chosen": 11.100112915039062, "log_odds_ratio": -0.00014072639169171453, "logits/chosen": -0.5190063714981079, "logits/rejected": -0.5869351029396057, "logps/chosen": -0.00019831795361824334, "logps/rejected": -2.311744213104248, "loss": 0.8563, "nll_loss": 0.21406801044940948, "rewards/accuracies": 1.0, "rewards/chosen": -1.983179390663281e-05, "rewards/margins": 0.23115460574626923, "rewards/rejected": -0.23117442429065704, "step": 9893 }, { "epoch": 6.842323651452282, "grad_norm": 7.673447132110596, "learning_rate": 1.7542646380820653e-05, "log_odds_chosen": 10.821599960327148, "log_odds_ratio": -7.789761002641171e-05, "logits/chosen": -0.5115397572517395, "logits/rejected": -0.5017709732055664, "logps/chosen": -0.00034118720213882625, "logps/rejected": -2.4737889766693115, "loss": 0.6559, "nll_loss": 0.1639774739742279, "rewards/accuracies": 1.0, "rewards/chosen": -3.411872239666991e-05, "rewards/margins": 0.24734479188919067, "rewards/rejected": -0.2473789006471634, "step": 9894 }, { "epoch": 6.843015214384509, "grad_norm": 4.814943790435791, "learning_rate": 1.7538804364530505e-05, "log_odds_chosen": 10.7841796875, "log_odds_ratio": -0.00010694364755181596, "logits/chosen": 0.014818176627159119, "logits/rejected": -0.06501185148954391, "logps/chosen": -0.00022933971195016056, "logps/rejected": -2.209062337875366, "loss": 0.5032, "nll_loss": 0.12579664587974548, "rewards/accuracies": 1.0, "rewards/chosen": -2.2933971195016056e-05, "rewards/margins": 0.220883309841156, "rewards/rejected": -0.22090624272823334, "step": 9895 }, { "epoch": 6.8437067773167355, "grad_norm": 4.384773254394531, "learning_rate": 1.7534962348240357e-05, "log_odds_chosen": 10.018265724182129, "log_odds_ratio": -7.031872519291937e-05, "logits/chosen": -0.062746062874794, "logits/rejected": -0.10227086395025253, "logps/chosen": -0.00020715226128231734, "logps/rejected": -1.2999534606933594, "loss": 0.6993, "nll_loss": 0.1748102903366089, "rewards/accuracies": 1.0, "rewards/chosen": -2.0715227947221138e-05, "rewards/margins": 0.1299746334552765, "rewards/rejected": -0.12999534606933594, "step": 9896 }, { "epoch": 6.844398340248962, "grad_norm": 7.973667621612549, "learning_rate": 1.7531120331950207e-05, "log_odds_chosen": 10.680398941040039, "log_odds_ratio": -7.031670247670263e-05, "logits/chosen": -0.6380209922790527, "logits/rejected": -0.640826404094696, "logps/chosen": -0.000123622827231884, "logps/rejected": -1.6378414630889893, "loss": 0.6583, "nll_loss": 0.16457867622375488, "rewards/accuracies": 1.0, "rewards/chosen": -1.2362283086986281e-05, "rewards/margins": 0.16377176344394684, "rewards/rejected": -0.16378413140773773, "step": 9897 }, { "epoch": 6.845089903181189, "grad_norm": 6.426230430603027, "learning_rate": 1.752727831566006e-05, "log_odds_chosen": 10.495553970336914, "log_odds_ratio": -6.285425479291007e-05, "logits/chosen": -0.44061386585235596, "logits/rejected": -0.5182846188545227, "logps/chosen": -0.00020382177899591625, "logps/rejected": -1.9904173612594604, "loss": 0.4765, "nll_loss": 0.11910919845104218, "rewards/accuracies": 1.0, "rewards/chosen": -2.0382178263389505e-05, "rewards/margins": 0.1990213543176651, "rewards/rejected": -0.19904175400733948, "step": 9898 }, { "epoch": 6.845781466113416, "grad_norm": 4.594810962677002, "learning_rate": 1.752343629936991e-05, "log_odds_chosen": 11.263570785522461, "log_odds_ratio": -4.390040703583509e-05, "logits/chosen": -0.26155662536621094, "logits/rejected": -0.4153081178665161, "logps/chosen": -0.00017742003547027707, "logps/rejected": -2.175046920776367, "loss": 0.4413, "nll_loss": 0.11031512916088104, "rewards/accuracies": 1.0, "rewards/chosen": -1.774200427462347e-05, "rewards/margins": 0.21748696267604828, "rewards/rejected": -0.21750468015670776, "step": 9899 }, { "epoch": 6.846473029045643, "grad_norm": 6.1752028465271, "learning_rate": 1.751959428307976e-05, "log_odds_chosen": 9.894766807556152, "log_odds_ratio": -0.0013582361862063408, "logits/chosen": -0.18945205211639404, "logits/rejected": -0.2020440697669983, "logps/chosen": -0.0010447344975546002, "logps/rejected": -2.0190021991729736, "loss": 0.5525, "nll_loss": 0.13797858357429504, "rewards/accuracies": 1.0, "rewards/chosen": -0.0001044734672177583, "rewards/margins": 0.20179572701454163, "rewards/rejected": -0.20190021395683289, "step": 9900 }, { "epoch": 6.84716459197787, "grad_norm": 5.831141948699951, "learning_rate": 1.7515752266789613e-05, "log_odds_chosen": 11.049789428710938, "log_odds_ratio": -3.43750070896931e-05, "logits/chosen": -0.5529294610023499, "logits/rejected": -0.5563083291053772, "logps/chosen": -0.00016891643463168293, "logps/rejected": -2.304448127746582, "loss": 0.3747, "nll_loss": 0.09367722272872925, "rewards/accuracies": 1.0, "rewards/chosen": -1.6891644918359816e-05, "rewards/margins": 0.23042795062065125, "rewards/rejected": -0.23044481873512268, "step": 9901 }, { "epoch": 6.8478561549100965, "grad_norm": 9.253786087036133, "learning_rate": 1.7511910250499462e-05, "log_odds_chosen": 12.252043724060059, "log_odds_ratio": -6.59788429402397e-06, "logits/chosen": -0.09550817310810089, "logits/rejected": -0.1413390040397644, "logps/chosen": -5.3725496400147676e-05, "logps/rejected": -2.3383841514587402, "loss": 0.633, "nll_loss": 0.15825356543064117, "rewards/accuracies": 1.0, "rewards/chosen": -5.372550276661059e-06, "rewards/margins": 0.23383302986621857, "rewards/rejected": -0.23383840918540955, "step": 9902 }, { "epoch": 6.848547717842323, "grad_norm": 6.2306694984436035, "learning_rate": 1.7508068234209314e-05, "log_odds_chosen": 11.20789909362793, "log_odds_ratio": -8.882944530341774e-05, "logits/chosen": -0.6397537589073181, "logits/rejected": -0.6996241807937622, "logps/chosen": -0.0002480761322658509, "logps/rejected": -2.258899688720703, "loss": 0.4622, "nll_loss": 0.11554432660341263, "rewards/accuracies": 1.0, "rewards/chosen": -2.480761395418085e-05, "rewards/margins": 0.2258651703596115, "rewards/rejected": -0.22588998079299927, "step": 9903 }, { "epoch": 6.84923928077455, "grad_norm": 7.639112949371338, "learning_rate": 1.7504226217919163e-05, "log_odds_chosen": 10.243135452270508, "log_odds_ratio": -0.00010048186231870204, "logits/chosen": -0.50126051902771, "logits/rejected": -0.4946579039096832, "logps/chosen": -0.0003781873674597591, "logps/rejected": -1.7362889051437378, "loss": 0.7682, "nll_loss": 0.19203275442123413, "rewards/accuracies": 1.0, "rewards/chosen": -3.781873601838015e-05, "rewards/margins": 0.17359109222888947, "rewards/rejected": -0.17362891137599945, "step": 9904 }, { "epoch": 6.849930843706777, "grad_norm": 7.1962785720825195, "learning_rate": 1.7500384201629016e-05, "log_odds_chosen": 9.414996147155762, "log_odds_ratio": -0.000451650150353089, "logits/chosen": -0.937263548374176, "logits/rejected": -0.9588169455528259, "logps/chosen": -0.0006807852769270539, "logps/rejected": -1.8451372385025024, "loss": 0.8361, "nll_loss": 0.2089846134185791, "rewards/accuracies": 1.0, "rewards/chosen": -6.807853060308844e-05, "rewards/margins": 0.18444564938545227, "rewards/rejected": -0.18451373279094696, "step": 9905 }, { "epoch": 6.850622406639004, "grad_norm": 5.980663776397705, "learning_rate": 1.7496542185338865e-05, "log_odds_chosen": 10.780477523803711, "log_odds_ratio": -0.00016776591655798256, "logits/chosen": -0.6011804938316345, "logits/rejected": -0.6044209599494934, "logps/chosen": -0.0005937922396697104, "logps/rejected": -2.3112282752990723, "loss": 0.5201, "nll_loss": 0.12999917566776276, "rewards/accuracies": 1.0, "rewards/chosen": -5.93792246945668e-05, "rewards/margins": 0.23106345534324646, "rewards/rejected": -0.23112282156944275, "step": 9906 }, { "epoch": 6.851313969571231, "grad_norm": 4.122068881988525, "learning_rate": 1.7492700169048717e-05, "log_odds_chosen": 10.263906478881836, "log_odds_ratio": -9.153223072644323e-05, "logits/chosen": -0.19530266523361206, "logits/rejected": -0.17187093198299408, "logps/chosen": -0.0003283666155766696, "logps/rejected": -1.8774287700653076, "loss": 0.4826, "nll_loss": 0.12064440548419952, "rewards/accuracies": 1.0, "rewards/chosen": -3.283666228526272e-05, "rewards/margins": 0.18771006166934967, "rewards/rejected": -0.18774288892745972, "step": 9907 }, { "epoch": 6.8520055325034575, "grad_norm": 4.443989276885986, "learning_rate": 1.748885815275857e-05, "log_odds_chosen": 11.554838180541992, "log_odds_ratio": -1.3808636140311137e-05, "logits/chosen": -0.20856159925460815, "logits/rejected": -0.22478443384170532, "logps/chosen": -0.00012917797721456736, "logps/rejected": -2.4405529499053955, "loss": 0.5185, "nll_loss": 0.12961144745349884, "rewards/accuracies": 1.0, "rewards/chosen": -1.2917797903355677e-05, "rewards/margins": 0.24404236674308777, "rewards/rejected": -0.24405530095100403, "step": 9908 }, { "epoch": 6.852697095435684, "grad_norm": 10.797835350036621, "learning_rate": 1.748501613646842e-05, "log_odds_chosen": 11.389801025390625, "log_odds_ratio": -3.008267594850622e-05, "logits/chosen": -0.08736234903335571, "logits/rejected": -0.3351304233074188, "logps/chosen": -8.788306877249852e-05, "logps/rejected": -1.9150742292404175, "loss": 0.8423, "nll_loss": 0.21057824790477753, "rewards/accuracies": 1.0, "rewards/chosen": -8.788307241047733e-06, "rewards/margins": 0.19149863719940186, "rewards/rejected": -0.19150742888450623, "step": 9909 }, { "epoch": 6.853388658367911, "grad_norm": 8.192545890808105, "learning_rate": 1.748117412017827e-05, "log_odds_chosen": 10.926861763000488, "log_odds_ratio": -2.7011970814783126e-05, "logits/chosen": -0.3003247380256653, "logits/rejected": -0.39332881569862366, "logps/chosen": -0.0004899668274447322, "logps/rejected": -1.9362943172454834, "loss": 0.5292, "nll_loss": 0.13229210674762726, "rewards/accuracies": 1.0, "rewards/chosen": -4.899668419966474e-05, "rewards/margins": 0.19358044862747192, "rewards/rejected": -0.1936294436454773, "step": 9910 }, { "epoch": 6.854080221300138, "grad_norm": 5.7841997146606445, "learning_rate": 1.7477332103888124e-05, "log_odds_chosen": 10.92652702331543, "log_odds_ratio": -0.00018881492724176496, "logits/chosen": -0.6957674026489258, "logits/rejected": -0.7153472900390625, "logps/chosen": -0.000338401849148795, "logps/rejected": -2.6009016036987305, "loss": 0.4916, "nll_loss": 0.12288433313369751, "rewards/accuracies": 1.0, "rewards/chosen": -3.384018418728374e-05, "rewards/margins": 0.2600563168525696, "rewards/rejected": -0.260090172290802, "step": 9911 }, { "epoch": 6.854771784232365, "grad_norm": 6.821191787719727, "learning_rate": 1.7473490087597973e-05, "log_odds_chosen": 11.141485214233398, "log_odds_ratio": -2.456578476994764e-05, "logits/chosen": -0.656318187713623, "logits/rejected": -0.7465082406997681, "logps/chosen": -0.00015865606837905943, "logps/rejected": -1.8773607015609741, "loss": 0.3443, "nll_loss": 0.0860750824213028, "rewards/accuracies": 1.0, "rewards/chosen": -1.5865607565501705e-05, "rewards/margins": 0.18772020936012268, "rewards/rejected": -0.18773606419563293, "step": 9912 }, { "epoch": 6.855463347164592, "grad_norm": 5.875421524047852, "learning_rate": 1.7469648071307822e-05, "log_odds_chosen": 11.702245712280273, "log_odds_ratio": -1.5685051039326936e-05, "logits/chosen": -0.09253035485744476, "logits/rejected": -0.146541029214859, "logps/chosen": -0.00031491348636336625, "logps/rejected": -2.6832847595214844, "loss": 0.7997, "nll_loss": 0.19991399347782135, "rewards/accuracies": 1.0, "rewards/chosen": -3.1491348636336625e-05, "rewards/margins": 0.2682969868183136, "rewards/rejected": -0.2683284878730774, "step": 9913 }, { "epoch": 6.856154910096818, "grad_norm": 9.347213745117188, "learning_rate": 1.7465806055017674e-05, "log_odds_chosen": 11.504549026489258, "log_odds_ratio": -2.4479886633343995e-05, "logits/chosen": -0.6678361892700195, "logits/rejected": -0.6643664836883545, "logps/chosen": -0.00017276719154324383, "logps/rejected": -2.7045693397521973, "loss": 0.7489, "nll_loss": 0.18721558153629303, "rewards/accuracies": 1.0, "rewards/chosen": -1.7276719518122263e-05, "rewards/margins": 0.27043965458869934, "rewards/rejected": -0.2704569101333618, "step": 9914 }, { "epoch": 6.856846473029045, "grad_norm": 7.882079124450684, "learning_rate": 1.7461964038727523e-05, "log_odds_chosen": 11.102792739868164, "log_odds_ratio": -5.0582086259964854e-05, "logits/chosen": -0.3423008322715759, "logits/rejected": -0.3178647756576538, "logps/chosen": -0.00030722690280526876, "logps/rejected": -2.448429822921753, "loss": 0.8287, "nll_loss": 0.2071687877178192, "rewards/accuracies": 1.0, "rewards/chosen": -3.0722694646101445e-05, "rewards/margins": 0.2448122799396515, "rewards/rejected": -0.244842991232872, "step": 9915 }, { "epoch": 6.857538035961272, "grad_norm": 6.01251220703125, "learning_rate": 1.7458122022437376e-05, "log_odds_chosen": 10.909671783447266, "log_odds_ratio": -0.00014612148515880108, "logits/chosen": -0.47818073630332947, "logits/rejected": -0.45567870140075684, "logps/chosen": -0.00018920523871202022, "logps/rejected": -2.08219051361084, "loss": 0.723, "nll_loss": 0.18072611093521118, "rewards/accuracies": 1.0, "rewards/chosen": -1.8920525690191425e-05, "rewards/margins": 0.208200141787529, "rewards/rejected": -0.20821905136108398, "step": 9916 }, { "epoch": 6.858229598893499, "grad_norm": 9.785633087158203, "learning_rate": 1.745428000614723e-05, "log_odds_chosen": 11.04234504699707, "log_odds_ratio": -2.0666961063398048e-05, "logits/chosen": -0.45309072732925415, "logits/rejected": -0.507732093334198, "logps/chosen": -0.0002194387634517625, "logps/rejected": -2.370762825012207, "loss": 0.6212, "nll_loss": 0.15530359745025635, "rewards/accuracies": 1.0, "rewards/chosen": -2.194387707277201e-05, "rewards/margins": 0.23705436289310455, "rewards/rejected": -0.2370762974023819, "step": 9917 }, { "epoch": 6.858921161825726, "grad_norm": 6.593029975891113, "learning_rate": 1.7450437989857077e-05, "log_odds_chosen": 10.200165748596191, "log_odds_ratio": -0.000459955568658188, "logits/chosen": -0.35304728150367737, "logits/rejected": -0.35141220688819885, "logps/chosen": -0.00028261399711482227, "logps/rejected": -2.053372383117676, "loss": 0.7283, "nll_loss": 0.18202868103981018, "rewards/accuracies": 1.0, "rewards/chosen": -2.8261400075280108e-05, "rewards/margins": 0.20530900359153748, "rewards/rejected": -0.205337256193161, "step": 9918 }, { "epoch": 6.8596127247579535, "grad_norm": 4.91062593460083, "learning_rate": 1.744659597356693e-05, "log_odds_chosen": 11.299886703491211, "log_odds_ratio": -0.0001566058926982805, "logits/chosen": -0.7404186725616455, "logits/rejected": -0.7922763824462891, "logps/chosen": -0.00026429325225763023, "logps/rejected": -2.357874870300293, "loss": 0.3988, "nll_loss": 0.09967993944883347, "rewards/accuracies": 1.0, "rewards/chosen": -2.6429324861965142e-05, "rewards/margins": 0.23576104640960693, "rewards/rejected": -0.23578748106956482, "step": 9919 }, { "epoch": 6.86030428769018, "grad_norm": 4.724360466003418, "learning_rate": 1.7442753957276782e-05, "log_odds_chosen": 9.903718948364258, "log_odds_ratio": -0.000160827228683047, "logits/chosen": -0.5334427952766418, "logits/rejected": -0.5078845620155334, "logps/chosen": -0.0003555277071427554, "logps/rejected": -1.58981454372406, "loss": 0.5266, "nll_loss": 0.13162297010421753, "rewards/accuracies": 1.0, "rewards/chosen": -3.555276998667978e-05, "rewards/margins": 0.15894590318202972, "rewards/rejected": -0.15898147225379944, "step": 9920 }, { "epoch": 6.860995850622407, "grad_norm": 6.36701774597168, "learning_rate": 1.743891194098663e-05, "log_odds_chosen": 11.064401626586914, "log_odds_ratio": -7.795902638463303e-05, "logits/chosen": -0.48767662048339844, "logits/rejected": -0.48847290873527527, "logps/chosen": -0.0005439437227323651, "logps/rejected": -2.7804341316223145, "loss": 1.2125, "nll_loss": 0.30312567949295044, "rewards/accuracies": 1.0, "rewards/chosen": -5.439437518361956e-05, "rewards/margins": 0.2779890298843384, "rewards/rejected": -0.27804338932037354, "step": 9921 }, { "epoch": 6.861687413554634, "grad_norm": 5.585921287536621, "learning_rate": 1.743506992469648e-05, "log_odds_chosen": 10.440255165100098, "log_odds_ratio": -4.734244794235565e-05, "logits/chosen": -0.3502916693687439, "logits/rejected": -0.4361574351787567, "logps/chosen": -0.00027647442766465247, "logps/rejected": -1.9319242238998413, "loss": 0.8023, "nll_loss": 0.20056825876235962, "rewards/accuracies": 1.0, "rewards/chosen": -2.7647445676848292e-05, "rewards/margins": 0.19316478073596954, "rewards/rejected": -0.19319242238998413, "step": 9922 }, { "epoch": 6.862378976486861, "grad_norm": 5.664307117462158, "learning_rate": 1.7431227908406333e-05, "log_odds_chosen": 10.078949928283691, "log_odds_ratio": -0.0002160519507015124, "logits/chosen": -0.47924864292144775, "logits/rejected": -0.5029646158218384, "logps/chosen": -0.00029257647111080587, "logps/rejected": -1.7399296760559082, "loss": 0.568, "nll_loss": 0.14198613166809082, "rewards/accuracies": 1.0, "rewards/chosen": -2.925764783867635e-05, "rewards/margins": 0.173963725566864, "rewards/rejected": -0.17399299144744873, "step": 9923 }, { "epoch": 6.863070539419088, "grad_norm": 6.095013618469238, "learning_rate": 1.7427385892116182e-05, "log_odds_chosen": 11.45406436920166, "log_odds_ratio": -2.605345616757404e-05, "logits/chosen": -0.6417930126190186, "logits/rejected": -0.6419544816017151, "logps/chosen": -0.0002008125593420118, "logps/rejected": -2.647881507873535, "loss": 0.4199, "nll_loss": 0.1049807220697403, "rewards/accuracies": 1.0, "rewards/chosen": -2.0081257389392704e-05, "rewards/margins": 0.2647680938243866, "rewards/rejected": -0.2647881805896759, "step": 9924 }, { "epoch": 6.8637621023513145, "grad_norm": 4.876924514770508, "learning_rate": 1.7423543875826034e-05, "log_odds_chosen": 9.685707092285156, "log_odds_ratio": -0.00021258770721033216, "logits/chosen": -0.515690267086029, "logits/rejected": -0.5359160900115967, "logps/chosen": -0.0005392287275753915, "logps/rejected": -1.847927212715149, "loss": 1.1985, "nll_loss": 0.2995995879173279, "rewards/accuracies": 1.0, "rewards/chosen": -5.392287130234763e-05, "rewards/margins": 0.18473881483078003, "rewards/rejected": -0.18479272723197937, "step": 9925 }, { "epoch": 6.864453665283541, "grad_norm": 5.993760585784912, "learning_rate": 1.7419701859535887e-05, "log_odds_chosen": 10.626228332519531, "log_odds_ratio": -0.00015337899094447494, "logits/chosen": -0.24197693169116974, "logits/rejected": -0.2716585099697113, "logps/chosen": -0.0003822005819529295, "logps/rejected": -2.1401491165161133, "loss": 0.589, "nll_loss": 0.14723332226276398, "rewards/accuracies": 1.0, "rewards/chosen": -3.8220056012505665e-05, "rewards/margins": 0.2139766812324524, "rewards/rejected": -0.2140149176120758, "step": 9926 }, { "epoch": 6.865145228215768, "grad_norm": 5.754234313964844, "learning_rate": 1.7415859843245736e-05, "log_odds_chosen": 11.140161514282227, "log_odds_ratio": -9.091119864024222e-05, "logits/chosen": -0.26153460144996643, "logits/rejected": -0.45184725522994995, "logps/chosen": -0.00045796221820637584, "logps/rejected": -2.747653007507324, "loss": 0.4756, "nll_loss": 0.11889111995697021, "rewards/accuracies": 1.0, "rewards/chosen": -4.5796223275829107e-05, "rewards/margins": 0.2747195065021515, "rewards/rejected": -0.274765282869339, "step": 9927 }, { "epoch": 6.865836791147995, "grad_norm": 5.773352146148682, "learning_rate": 1.7412017826955588e-05, "log_odds_chosen": 10.411229133605957, "log_odds_ratio": -0.0001067575067281723, "logits/chosen": -0.21838048100471497, "logits/rejected": -0.21249642968177795, "logps/chosen": -0.00023341068299487233, "logps/rejected": -1.8828068971633911, "loss": 0.5262, "nll_loss": 0.13154050707817078, "rewards/accuracies": 1.0, "rewards/chosen": -2.3341068299487233e-05, "rewards/margins": 0.1882573664188385, "rewards/rejected": -0.18828070163726807, "step": 9928 }, { "epoch": 6.866528354080222, "grad_norm": 5.741930961608887, "learning_rate": 1.740817581066544e-05, "log_odds_chosen": 11.804953575134277, "log_odds_ratio": -1.0474625014467165e-05, "logits/chosen": -0.3105663061141968, "logits/rejected": -0.2862730026245117, "logps/chosen": -0.00014494526840280741, "logps/rejected": -2.5858964920043945, "loss": 0.5473, "nll_loss": 0.13682931661605835, "rewards/accuracies": 1.0, "rewards/chosen": -1.4494527931674384e-05, "rewards/margins": 0.2585751712322235, "rewards/rejected": -0.25858965516090393, "step": 9929 }, { "epoch": 6.867219917012449, "grad_norm": 8.72846794128418, "learning_rate": 1.740433379437529e-05, "log_odds_chosen": 11.901812553405762, "log_odds_ratio": -1.6944477465585805e-05, "logits/chosen": -0.7704805135726929, "logits/rejected": -0.7657447457313538, "logps/chosen": -0.00020099672838114202, "logps/rejected": -2.4580609798431396, "loss": 0.6436, "nll_loss": 0.1609020233154297, "rewards/accuracies": 1.0, "rewards/chosen": -2.0099672838114202e-05, "rewards/margins": 0.24578601121902466, "rewards/rejected": -0.24580609798431396, "step": 9930 }, { "epoch": 6.867911479944675, "grad_norm": 8.556121826171875, "learning_rate": 1.740049177808514e-05, "log_odds_chosen": 9.893619537353516, "log_odds_ratio": -0.00011758245818782598, "logits/chosen": -0.35947567224502563, "logits/rejected": -0.31796538829803467, "logps/chosen": -0.0008837583591230214, "logps/rejected": -2.0111024379730225, "loss": 0.3324, "nll_loss": 0.0830918699502945, "rewards/accuracies": 1.0, "rewards/chosen": -8.83758402778767e-05, "rewards/margins": 0.20102186501026154, "rewards/rejected": -0.20111024379730225, "step": 9931 }, { "epoch": 6.868603042876902, "grad_norm": 6.5274786949157715, "learning_rate": 1.739664976179499e-05, "log_odds_chosen": 11.198671340942383, "log_odds_ratio": -3.070286402362399e-05, "logits/chosen": -0.6600465178489685, "logits/rejected": -0.6094987392425537, "logps/chosen": -0.0002148185740225017, "logps/rejected": -2.463357925415039, "loss": 0.5131, "nll_loss": 0.12826129794120789, "rewards/accuracies": 1.0, "rewards/chosen": -2.148185740225017e-05, "rewards/margins": 0.24631434679031372, "rewards/rejected": -0.24633580446243286, "step": 9932 }, { "epoch": 6.869294605809129, "grad_norm": 4.538366794586182, "learning_rate": 1.739280774550484e-05, "log_odds_chosen": 10.437494277954102, "log_odds_ratio": -5.404383409768343e-05, "logits/chosen": -0.17082995176315308, "logits/rejected": -0.29417771100997925, "logps/chosen": -0.00018732089665718377, "logps/rejected": -1.973541259765625, "loss": 0.5179, "nll_loss": 0.12947946786880493, "rewards/accuracies": 1.0, "rewards/chosen": -1.87320911209099e-05, "rewards/margins": 0.1973353922367096, "rewards/rejected": -0.19735412299633026, "step": 9933 }, { "epoch": 6.869986168741356, "grad_norm": 6.1261091232299805, "learning_rate": 1.7388965729214693e-05, "log_odds_chosen": 10.655526161193848, "log_odds_ratio": -0.000254765065619722, "logits/chosen": -0.7289286851882935, "logits/rejected": -0.755789041519165, "logps/chosen": -0.00013674799993168563, "logps/rejected": -1.394344449043274, "loss": 0.5419, "nll_loss": 0.13544505834579468, "rewards/accuracies": 1.0, "rewards/chosen": -1.3674801266461145e-05, "rewards/margins": 0.1394207626581192, "rewards/rejected": -0.13943444192409515, "step": 9934 }, { "epoch": 6.870677731673583, "grad_norm": 6.334026336669922, "learning_rate": 1.7385123712924545e-05, "log_odds_chosen": 11.238309860229492, "log_odds_ratio": -4.077638004673645e-05, "logits/chosen": -0.6347928047180176, "logits/rejected": -0.6307282447814941, "logps/chosen": -0.00015452434308826923, "logps/rejected": -2.1024088859558105, "loss": 0.5892, "nll_loss": 0.1472892463207245, "rewards/accuracies": 1.0, "rewards/chosen": -1.545243321743328e-05, "rewards/margins": 0.2102254331111908, "rewards/rejected": -0.21024090051651, "step": 9935 }, { "epoch": 6.87136929460581, "grad_norm": 12.584394454956055, "learning_rate": 1.7381281696634394e-05, "log_odds_chosen": 10.109475135803223, "log_odds_ratio": -0.00010246929014101624, "logits/chosen": -0.3030821681022644, "logits/rejected": -0.35970088839530945, "logps/chosen": -0.0004246834432706237, "logps/rejected": -1.9912558794021606, "loss": 0.6852, "nll_loss": 0.1712900549173355, "rewards/accuracies": 1.0, "rewards/chosen": -4.246834578225389e-05, "rewards/margins": 0.1990831345319748, "rewards/rejected": -0.19912561774253845, "step": 9936 }, { "epoch": 6.872060857538036, "grad_norm": 8.13845443725586, "learning_rate": 1.7377439680344247e-05, "log_odds_chosen": 9.459121704101562, "log_odds_ratio": -0.00015860966232139617, "logits/chosen": -0.5512504577636719, "logits/rejected": -0.6040525436401367, "logps/chosen": -0.00028108907281421125, "logps/rejected": -1.1141246557235718, "loss": 0.4211, "nll_loss": 0.10525853931903839, "rewards/accuracies": 1.0, "rewards/chosen": -2.8108906917623244e-05, "rewards/margins": 0.11138436198234558, "rewards/rejected": -0.11141246557235718, "step": 9937 }, { "epoch": 6.872752420470263, "grad_norm": 7.232450485229492, "learning_rate": 1.73735976640541e-05, "log_odds_chosen": 9.538267135620117, "log_odds_ratio": -0.00014865670527797192, "logits/chosen": -0.4153312146663666, "logits/rejected": -0.4232088327407837, "logps/chosen": -0.000883034139405936, "logps/rejected": -1.9747530221939087, "loss": 0.392, "nll_loss": 0.09798521548509598, "rewards/accuracies": 1.0, "rewards/chosen": -8.830341539578512e-05, "rewards/margins": 0.1973869949579239, "rewards/rejected": -0.19747528433799744, "step": 9938 }, { "epoch": 6.87344398340249, "grad_norm": 7.225497245788574, "learning_rate": 1.7369755647763948e-05, "log_odds_chosen": 10.864669799804688, "log_odds_ratio": -3.936921712011099e-05, "logits/chosen": -0.6600432395935059, "logits/rejected": -0.6265806555747986, "logps/chosen": -0.00037797007826156914, "logps/rejected": -2.2492332458496094, "loss": 0.6035, "nll_loss": 0.15087053179740906, "rewards/accuracies": 1.0, "rewards/chosen": -3.77970100089442e-05, "rewards/margins": 0.22488552331924438, "rewards/rejected": -0.22492331266403198, "step": 9939 }, { "epoch": 6.874135546334717, "grad_norm": 8.900500297546387, "learning_rate": 1.7365913631473797e-05, "log_odds_chosen": 10.664848327636719, "log_odds_ratio": -0.00032811707933433354, "logits/chosen": -0.3347414433956146, "logits/rejected": -0.35119128227233887, "logps/chosen": -0.0006118988967500627, "logps/rejected": -2.225257158279419, "loss": 0.4027, "nll_loss": 0.10063539445400238, "rewards/accuracies": 1.0, "rewards/chosen": -6.118989404058084e-05, "rewards/margins": 0.22246450185775757, "rewards/rejected": -0.2225257158279419, "step": 9940 }, { "epoch": 6.874827109266944, "grad_norm": 5.114599704742432, "learning_rate": 1.736207161518365e-05, "log_odds_chosen": 10.928939819335938, "log_odds_ratio": -3.058044967474416e-05, "logits/chosen": -0.3283681869506836, "logits/rejected": -0.41022035479545593, "logps/chosen": -0.0001671072095632553, "logps/rejected": -2.0779647827148438, "loss": 0.5119, "nll_loss": 0.12795957922935486, "rewards/accuracies": 1.0, "rewards/chosen": -1.671072095632553e-05, "rewards/margins": 0.20777978003025055, "rewards/rejected": -0.20779648423194885, "step": 9941 }, { "epoch": 6.875518672199171, "grad_norm": 10.98104476928711, "learning_rate": 1.73582295988935e-05, "log_odds_chosen": 10.98994255065918, "log_odds_ratio": -3.9570215449202806e-05, "logits/chosen": -0.7664468288421631, "logits/rejected": -0.778846263885498, "logps/chosen": -0.0005870637251064181, "logps/rejected": -2.516409158706665, "loss": 0.9717, "nll_loss": 0.24291543662548065, "rewards/accuracies": 1.0, "rewards/chosen": -5.870637687621638e-05, "rewards/margins": 0.25158220529556274, "rewards/rejected": -0.2516409158706665, "step": 9942 }, { "epoch": 6.876210235131397, "grad_norm": 4.401638984680176, "learning_rate": 1.735438758260335e-05, "log_odds_chosen": 9.971277236938477, "log_odds_ratio": -0.00013970036525279284, "logits/chosen": -0.6718819737434387, "logits/rejected": -0.7286103367805481, "logps/chosen": -0.0002767588885035366, "logps/rejected": -1.503734827041626, "loss": 0.863, "nll_loss": 0.2157265543937683, "rewards/accuracies": 1.0, "rewards/chosen": -2.7675887395162135e-05, "rewards/margins": 0.1503458023071289, "rewards/rejected": -0.15037348866462708, "step": 9943 }, { "epoch": 6.876901798063624, "grad_norm": 5.169467926025391, "learning_rate": 1.7350545566313204e-05, "log_odds_chosen": 9.853658676147461, "log_odds_ratio": -0.0007788334041833878, "logits/chosen": -0.5393489599227905, "logits/rejected": -0.4997929334640503, "logps/chosen": -0.0005267321248538792, "logps/rejected": -1.7126424312591553, "loss": 0.6214, "nll_loss": 0.15526898205280304, "rewards/accuracies": 1.0, "rewards/chosen": -5.26732110301964e-05, "rewards/margins": 0.17121157050132751, "rewards/rejected": -0.17126423120498657, "step": 9944 }, { "epoch": 6.877593360995851, "grad_norm": 12.462244987487793, "learning_rate": 1.7346703550023053e-05, "log_odds_chosen": 9.403864860534668, "log_odds_ratio": -0.0005906783044338226, "logits/chosen": -0.40958431363105774, "logits/rejected": -0.45778223872184753, "logps/chosen": -0.0010209481697529554, "logps/rejected": -1.3862988948822021, "loss": 0.7853, "nll_loss": 0.19627533853054047, "rewards/accuracies": 1.0, "rewards/chosen": -0.00010209481843048707, "rewards/margins": 0.1385277956724167, "rewards/rejected": -0.13862988352775574, "step": 9945 }, { "epoch": 6.878284923928078, "grad_norm": 6.601505756378174, "learning_rate": 1.7342861533732905e-05, "log_odds_chosen": 11.283668518066406, "log_odds_ratio": -5.514323856914416e-05, "logits/chosen": -0.7354806065559387, "logits/rejected": -0.7971148490905762, "logps/chosen": -0.00020214321557432413, "logps/rejected": -2.620887517929077, "loss": 0.4874, "nll_loss": 0.12183713912963867, "rewards/accuracies": 1.0, "rewards/chosen": -2.021432010224089e-05, "rewards/margins": 0.2620685398578644, "rewards/rejected": -0.2620887756347656, "step": 9946 }, { "epoch": 6.878976486860305, "grad_norm": 5.732128620147705, "learning_rate": 1.7339019517442758e-05, "log_odds_chosen": 10.169954299926758, "log_odds_ratio": -7.686160097364336e-05, "logits/chosen": -0.5683677196502686, "logits/rejected": -0.5826584696769714, "logps/chosen": -0.0004312100063543767, "logps/rejected": -2.1739308834075928, "loss": 0.4796, "nll_loss": 0.11989589035511017, "rewards/accuracies": 1.0, "rewards/chosen": -4.312100645620376e-05, "rewards/margins": 0.21734994649887085, "rewards/rejected": -0.21739307045936584, "step": 9947 }, { "epoch": 6.8796680497925315, "grad_norm": 7.44845724105835, "learning_rate": 1.7335177501152607e-05, "log_odds_chosen": 10.656408309936523, "log_odds_ratio": -0.00015237029583659023, "logits/chosen": -0.6948117017745972, "logits/rejected": -0.6771795749664307, "logps/chosen": -0.0001807966036722064, "logps/rejected": -2.0261623859405518, "loss": 0.5481, "nll_loss": 0.13700607419013977, "rewards/accuracies": 1.0, "rewards/chosen": -1.807965963962488e-05, "rewards/margins": 0.20259815454483032, "rewards/rejected": -0.20261624455451965, "step": 9948 }, { "epoch": 6.880359612724758, "grad_norm": 7.517022132873535, "learning_rate": 1.7331335484862456e-05, "log_odds_chosen": 9.013700485229492, "log_odds_ratio": -0.0011961768614128232, "logits/chosen": -0.6705461740493774, "logits/rejected": -0.6669026613235474, "logps/chosen": -0.002910541370511055, "logps/rejected": -2.0945003032684326, "loss": 0.542, "nll_loss": 0.13536912202835083, "rewards/accuracies": 1.0, "rewards/chosen": -0.00029105416615493596, "rewards/margins": 0.2091589719057083, "rewards/rejected": -0.20945002138614655, "step": 9949 }, { "epoch": 6.881051175656985, "grad_norm": 4.460395812988281, "learning_rate": 1.7327493468572308e-05, "log_odds_chosen": 9.430727005004883, "log_odds_ratio": -0.0002010946482187137, "logits/chosen": -0.5580604076385498, "logits/rejected": -0.5696870684623718, "logps/chosen": -0.0006297902436926961, "logps/rejected": -1.6898581981658936, "loss": 0.4955, "nll_loss": 0.12386074662208557, "rewards/accuracies": 1.0, "rewards/chosen": -6.297902291407809e-05, "rewards/margins": 0.16892285645008087, "rewards/rejected": -0.16898582875728607, "step": 9950 }, { "epoch": 6.881742738589212, "grad_norm": 6.6989593505859375, "learning_rate": 1.7323651452282157e-05, "log_odds_chosen": 11.214866638183594, "log_odds_ratio": -3.179534905939363e-05, "logits/chosen": -0.7736462950706482, "logits/rejected": -0.8380985260009766, "logps/chosen": -0.00015404712758027017, "logps/rejected": -2.2978692054748535, "loss": 0.4214, "nll_loss": 0.10534738004207611, "rewards/accuracies": 1.0, "rewards/chosen": -1.5404712030431256e-05, "rewards/margins": 0.22977152466773987, "rewards/rejected": -0.2297869324684143, "step": 9951 }, { "epoch": 6.882434301521439, "grad_norm": 6.151716232299805, "learning_rate": 1.731980943599201e-05, "log_odds_chosen": 10.018352508544922, "log_odds_ratio": -0.00014328441466204822, "logits/chosen": -0.40556055307388306, "logits/rejected": -0.3207288980484009, "logps/chosen": -0.00018548393563833088, "logps/rejected": -1.5287439823150635, "loss": 0.8473, "nll_loss": 0.211818128824234, "rewards/accuracies": 1.0, "rewards/chosen": -1.8548393200035207e-05, "rewards/margins": 0.15285584330558777, "rewards/rejected": -0.15287438035011292, "step": 9952 }, { "epoch": 6.883125864453666, "grad_norm": 5.060670852661133, "learning_rate": 1.731596741970186e-05, "log_odds_chosen": 10.694990158081055, "log_odds_ratio": -0.00023410924768541008, "logits/chosen": -0.3882830739021301, "logits/rejected": -0.4243199825286865, "logps/chosen": -0.0010852471459656954, "logps/rejected": -1.9446629285812378, "loss": 0.5633, "nll_loss": 0.1408044546842575, "rewards/accuracies": 1.0, "rewards/chosen": -0.00010852471314137802, "rewards/margins": 0.1943577527999878, "rewards/rejected": -0.19446627795696259, "step": 9953 }, { "epoch": 6.8838174273858925, "grad_norm": 7.240312099456787, "learning_rate": 1.731212540341171e-05, "log_odds_chosen": 11.345335960388184, "log_odds_ratio": -2.520248199289199e-05, "logits/chosen": -0.3495720326900482, "logits/rejected": -0.25986605882644653, "logps/chosen": -0.0001573432091390714, "logps/rejected": -2.135495185852051, "loss": 0.6904, "nll_loss": 0.17259274423122406, "rewards/accuracies": 1.0, "rewards/chosen": -1.573432018631138e-05, "rewards/margins": 0.21353378891944885, "rewards/rejected": -0.21354952454566956, "step": 9954 }, { "epoch": 6.884508990318119, "grad_norm": 6.1964335441589355, "learning_rate": 1.7308283387121564e-05, "log_odds_chosen": 11.313817024230957, "log_odds_ratio": -4.6309156459756196e-05, "logits/chosen": -0.5913910269737244, "logits/rejected": -0.6920463442802429, "logps/chosen": -0.00011107635509688407, "logps/rejected": -2.270355224609375, "loss": 0.8023, "nll_loss": 0.20057925581932068, "rewards/accuracies": 1.0, "rewards/chosen": -1.1107635145890526e-05, "rewards/margins": 0.2270244061946869, "rewards/rejected": -0.2270355373620987, "step": 9955 }, { "epoch": 6.885200553250346, "grad_norm": 6.067237854003906, "learning_rate": 1.7304441370831413e-05, "log_odds_chosen": 9.834778785705566, "log_odds_ratio": -0.0003098523011431098, "logits/chosen": -0.9029225707054138, "logits/rejected": -0.8073776960372925, "logps/chosen": -0.0003418096457608044, "logps/rejected": -1.4671462774276733, "loss": 0.4298, "nll_loss": 0.10742116719484329, "rewards/accuracies": 1.0, "rewards/chosen": -3.4180960938101634e-05, "rewards/margins": 0.14668045938014984, "rewards/rejected": -0.14671462774276733, "step": 9956 }, { "epoch": 6.885892116182573, "grad_norm": 4.893023490905762, "learning_rate": 1.7300599354541265e-05, "log_odds_chosen": 12.138407707214355, "log_odds_ratio": -1.6137224520207383e-05, "logits/chosen": -0.27513575553894043, "logits/rejected": -0.3699111044406891, "logps/chosen": -0.00010701712017180398, "logps/rejected": -2.995022773742676, "loss": 0.7685, "nll_loss": 0.1921170949935913, "rewards/accuracies": 1.0, "rewards/chosen": -1.0701711289584637e-05, "rewards/margins": 0.2994915843009949, "rewards/rejected": -0.29950231313705444, "step": 9957 }, { "epoch": 6.8865836791148, "grad_norm": 4.358221530914307, "learning_rate": 1.7296757338251114e-05, "log_odds_chosen": 11.058510780334473, "log_odds_ratio": -6.035809565219097e-05, "logits/chosen": -0.3187516927719116, "logits/rejected": -0.34987926483154297, "logps/chosen": -0.00014822027878835797, "logps/rejected": -2.2254271507263184, "loss": 0.4032, "nll_loss": 0.1007840484380722, "rewards/accuracies": 1.0, "rewards/chosen": -1.4822028788330499e-05, "rewards/margins": 0.2225278615951538, "rewards/rejected": -0.22254270315170288, "step": 9958 }, { "epoch": 6.887275242047027, "grad_norm": 3.851203203201294, "learning_rate": 1.7292915321960963e-05, "log_odds_chosen": 10.868274688720703, "log_odds_ratio": -3.078898225794546e-05, "logits/chosen": -0.3462313711643219, "logits/rejected": -0.386076956987381, "logps/chosen": -0.00013465769006870687, "logps/rejected": -1.9355796575546265, "loss": 0.3889, "nll_loss": 0.09722109138965607, "rewards/accuracies": 1.0, "rewards/chosen": -1.3465767551679164e-05, "rewards/margins": 0.19354449212551117, "rewards/rejected": -0.1935579478740692, "step": 9959 }, { "epoch": 6.8879668049792535, "grad_norm": 8.651869773864746, "learning_rate": 1.7289073305670816e-05, "log_odds_chosen": 10.275079727172852, "log_odds_ratio": -5.3268369811121374e-05, "logits/chosen": -0.2315186709165573, "logits/rejected": -0.30275315046310425, "logps/chosen": -0.0004828626988455653, "logps/rejected": -1.9132792949676514, "loss": 0.5085, "nll_loss": 0.12712129950523376, "rewards/accuracies": 1.0, "rewards/chosen": -4.82862742501311e-05, "rewards/margins": 0.19127964973449707, "rewards/rejected": -0.19132792949676514, "step": 9960 }, { "epoch": 6.88865836791148, "grad_norm": 5.152824401855469, "learning_rate": 1.7285231289380668e-05, "log_odds_chosen": 11.55789566040039, "log_odds_ratio": -3.1101779313758016e-05, "logits/chosen": -0.11422102153301239, "logits/rejected": -0.1345907300710678, "logps/chosen": -0.00011851730960188434, "logps/rejected": -2.35556697845459, "loss": 0.5654, "nll_loss": 0.14134716987609863, "rewards/accuracies": 1.0, "rewards/chosen": -1.1851730960188434e-05, "rewards/margins": 0.2355448603630066, "rewards/rejected": -0.2355567067861557, "step": 9961 }, { "epoch": 6.889349930843707, "grad_norm": 5.693244934082031, "learning_rate": 1.7281389273090517e-05, "log_odds_chosen": 11.959161758422852, "log_odds_ratio": -1.573568988533225e-05, "logits/chosen": -0.5982638001441956, "logits/rejected": -0.4819360375404358, "logps/chosen": -0.0001248091139132157, "logps/rejected": -2.7712881565093994, "loss": 0.538, "nll_loss": 0.13448816537857056, "rewards/accuracies": 1.0, "rewards/chosen": -1.248091120942263e-05, "rewards/margins": 0.2771163582801819, "rewards/rejected": -0.27712881565093994, "step": 9962 }, { "epoch": 6.890041493775934, "grad_norm": 3.995441436767578, "learning_rate": 1.727754725680037e-05, "log_odds_chosen": 9.996007919311523, "log_odds_ratio": -9.138335008174181e-05, "logits/chosen": -0.12948307394981384, "logits/rejected": -0.19393359124660492, "logps/chosen": -0.00023820166825316846, "logps/rejected": -1.4062923192977905, "loss": 0.2848, "nll_loss": 0.07120097428560257, "rewards/accuracies": 1.0, "rewards/chosen": -2.3820166461518966e-05, "rewards/margins": 0.14060541987419128, "rewards/rejected": -0.14062923192977905, "step": 9963 }, { "epoch": 6.890733056708161, "grad_norm": 4.375192165374756, "learning_rate": 1.7273705240510222e-05, "log_odds_chosen": 10.583505630493164, "log_odds_ratio": -5.76963102503214e-05, "logits/chosen": -0.3995498716831207, "logits/rejected": -0.488243043422699, "logps/chosen": -0.0001608713937457651, "logps/rejected": -1.938847303390503, "loss": 0.4457, "nll_loss": 0.11142930388450623, "rewards/accuracies": 1.0, "rewards/chosen": -1.608714046597015e-05, "rewards/margins": 0.19386863708496094, "rewards/rejected": -0.1938847303390503, "step": 9964 }, { "epoch": 6.891424619640388, "grad_norm": 7.30011510848999, "learning_rate": 1.726986322422007e-05, "log_odds_chosen": 11.248298645019531, "log_odds_ratio": -7.639620889676735e-05, "logits/chosen": -0.05031472072005272, "logits/rejected": -0.1647230088710785, "logps/chosen": -0.0002004953712457791, "logps/rejected": -2.555237293243408, "loss": 0.6183, "nll_loss": 0.15456579625606537, "rewards/accuracies": 1.0, "rewards/chosen": -2.004953785217367e-05, "rewards/margins": 0.25550365447998047, "rewards/rejected": -0.2555237114429474, "step": 9965 }, { "epoch": 6.8921161825726145, "grad_norm": 5.279568195343018, "learning_rate": 1.7266021207929923e-05, "log_odds_chosen": 9.024934768676758, "log_odds_ratio": -0.0004643774009309709, "logits/chosen": -0.3420089781284332, "logits/rejected": -0.3719962537288666, "logps/chosen": -0.0006390767521224916, "logps/rejected": -1.4473562240600586, "loss": 0.5353, "nll_loss": 0.13377240300178528, "rewards/accuracies": 1.0, "rewards/chosen": -6.390767521224916e-05, "rewards/margins": 0.1446717232465744, "rewards/rejected": -0.14473563432693481, "step": 9966 }, { "epoch": 6.892807745504841, "grad_norm": 5.274577617645264, "learning_rate": 1.7262179191639773e-05, "log_odds_chosen": 11.9353666305542, "log_odds_ratio": -1.3555643818108365e-05, "logits/chosen": -0.20525482296943665, "logits/rejected": -0.33735471963882446, "logps/chosen": -0.00010454172297613695, "logps/rejected": -2.7081387042999268, "loss": 0.6671, "nll_loss": 0.1667725145816803, "rewards/accuracies": 1.0, "rewards/chosen": -1.0454172297613695e-05, "rewards/margins": 0.27080339193344116, "rewards/rejected": -0.27081388235092163, "step": 9967 }, { "epoch": 6.893499308437068, "grad_norm": 6.057780742645264, "learning_rate": 1.725833717534962e-05, "log_odds_chosen": 12.293357849121094, "log_odds_ratio": -1.7128662875620648e-05, "logits/chosen": -0.2888219952583313, "logits/rejected": -0.35230833292007446, "logps/chosen": -0.00016165403940249234, "logps/rejected": -3.4829816818237305, "loss": 0.607, "nll_loss": 0.15174441039562225, "rewards/accuracies": 1.0, "rewards/chosen": -1.6165404304047115e-05, "rewards/margins": 0.3482820391654968, "rewards/rejected": -0.34829822182655334, "step": 9968 }, { "epoch": 6.894190871369295, "grad_norm": 5.762960433959961, "learning_rate": 1.7254495159059474e-05, "log_odds_chosen": 11.103206634521484, "log_odds_ratio": -5.622408934868872e-05, "logits/chosen": -0.33061569929122925, "logits/rejected": -0.4054297208786011, "logps/chosen": -0.00026888775755651295, "logps/rejected": -2.325014591217041, "loss": 0.5813, "nll_loss": 0.1453164964914322, "rewards/accuracies": 1.0, "rewards/chosen": -2.688877793843858e-05, "rewards/margins": 0.23247459530830383, "rewards/rejected": -0.23250147700309753, "step": 9969 }, { "epoch": 6.894882434301522, "grad_norm": 4.574897289276123, "learning_rate": 1.7250653142769326e-05, "log_odds_chosen": 10.430425643920898, "log_odds_ratio": -0.000246243056608364, "logits/chosen": 0.02395036816596985, "logits/rejected": -0.009510427713394165, "logps/chosen": -0.00040431745583191514, "logps/rejected": -2.0692477226257324, "loss": 0.5412, "nll_loss": 0.135265052318573, "rewards/accuracies": 1.0, "rewards/chosen": -4.043174703838304e-05, "rewards/margins": 0.20688433945178986, "rewards/rejected": -0.20692478120326996, "step": 9970 }, { "epoch": 6.895573997233749, "grad_norm": 7.976735591888428, "learning_rate": 1.7246811126479176e-05, "log_odds_chosen": 11.322787284851074, "log_odds_ratio": -4.459191404748708e-05, "logits/chosen": -0.5531570911407471, "logits/rejected": -0.6223657131195068, "logps/chosen": -0.00028584557003341615, "logps/rejected": -2.5278143882751465, "loss": 0.7538, "nll_loss": 0.1884533315896988, "rewards/accuracies": 1.0, "rewards/chosen": -2.8584558094735257e-05, "rewards/margins": 0.2527528405189514, "rewards/rejected": -0.2527814209461212, "step": 9971 }, { "epoch": 6.8962655601659755, "grad_norm": 6.4360456466674805, "learning_rate": 1.7242969110189028e-05, "log_odds_chosen": 11.322450637817383, "log_odds_ratio": -0.0002915957011282444, "logits/chosen": -0.18822026252746582, "logits/rejected": -0.21931561827659607, "logps/chosen": -0.00036068481858819723, "logps/rejected": -3.0156097412109375, "loss": 0.522, "nll_loss": 0.1304597705602646, "rewards/accuracies": 1.0, "rewards/chosen": -3.6068482586415485e-05, "rewards/margins": 0.3015248775482178, "rewards/rejected": -0.3015609681606293, "step": 9972 }, { "epoch": 6.896957123098202, "grad_norm": 6.496629238128662, "learning_rate": 1.723912709389888e-05, "log_odds_chosen": 10.186212539672852, "log_odds_ratio": -0.00041088840225711465, "logits/chosen": -0.16055399179458618, "logits/rejected": -0.13317851722240448, "logps/chosen": -0.0008482407429255545, "logps/rejected": -2.466451406478882, "loss": 0.5271, "nll_loss": 0.13174614310264587, "rewards/accuracies": 1.0, "rewards/chosen": -8.482407429255545e-05, "rewards/margins": 0.24656033515930176, "rewards/rejected": -0.24664515256881714, "step": 9973 }, { "epoch": 6.897648686030429, "grad_norm": 7.392096042633057, "learning_rate": 1.723528507760873e-05, "log_odds_chosen": 9.93017864227295, "log_odds_ratio": -0.0005902046104893088, "logits/chosen": -0.4856283664703369, "logits/rejected": -0.4615434408187866, "logps/chosen": -0.0007256892276927829, "logps/rejected": -1.9430298805236816, "loss": 0.5449, "nll_loss": 0.1361699402332306, "rewards/accuracies": 1.0, "rewards/chosen": -7.25689169485122e-05, "rewards/margins": 0.19423040747642517, "rewards/rejected": -0.1943029761314392, "step": 9974 }, { "epoch": 6.898340248962656, "grad_norm": 4.620207786560059, "learning_rate": 1.7231443061318582e-05, "log_odds_chosen": 11.213531494140625, "log_odds_ratio": -1.967877324204892e-05, "logits/chosen": -0.3341743052005768, "logits/rejected": -0.4299090504646301, "logps/chosen": -0.00012599513866007328, "logps/rejected": -2.0738210678100586, "loss": 0.4984, "nll_loss": 0.12459343671798706, "rewards/accuracies": 1.0, "rewards/chosen": -1.2599512956512626e-05, "rewards/margins": 0.20736950635910034, "rewards/rejected": -0.20738211274147034, "step": 9975 }, { "epoch": 6.899031811894883, "grad_norm": 6.554383754730225, "learning_rate": 1.722760104502843e-05, "log_odds_chosen": 10.371408462524414, "log_odds_ratio": -0.00014440737140830606, "logits/chosen": 0.0777406394481659, "logits/rejected": -0.02058453857898712, "logps/chosen": -0.00027885413146577775, "logps/rejected": -1.7560479640960693, "loss": 0.6283, "nll_loss": 0.1570620834827423, "rewards/accuracies": 1.0, "rewards/chosen": -2.7885413146577775e-05, "rewards/margins": 0.17557692527770996, "rewards/rejected": -0.17560480535030365, "step": 9976 }, { "epoch": 6.89972337482711, "grad_norm": 6.355048656463623, "learning_rate": 1.722375902873828e-05, "log_odds_chosen": 10.970148086547852, "log_odds_ratio": -4.067463305545971e-05, "logits/chosen": -0.38521891832351685, "logits/rejected": -0.4572032392024994, "logps/chosen": -0.0004533581086434424, "logps/rejected": -2.48785662651062, "loss": 0.7192, "nll_loss": 0.17979201674461365, "rewards/accuracies": 1.0, "rewards/chosen": -4.533581522991881e-05, "rewards/margins": 0.24874034523963928, "rewards/rejected": -0.24878567457199097, "step": 9977 }, { "epoch": 6.9004149377593365, "grad_norm": 14.030281066894531, "learning_rate": 1.7219917012448132e-05, "log_odds_chosen": 9.614143371582031, "log_odds_ratio": -0.0008283082861453295, "logits/chosen": -0.685629665851593, "logits/rejected": -0.6637234687805176, "logps/chosen": -0.0012888973578810692, "logps/rejected": -1.804660439491272, "loss": 1.246, "nll_loss": 0.31140968203544617, "rewards/accuracies": 1.0, "rewards/chosen": -0.0001288897474296391, "rewards/margins": 0.18033716082572937, "rewards/rejected": -0.18046605587005615, "step": 9978 }, { "epoch": 6.901106500691563, "grad_norm": 4.408642292022705, "learning_rate": 1.7216074996157985e-05, "log_odds_chosen": 11.826573371887207, "log_odds_ratio": -6.028627103660256e-05, "logits/chosen": -0.07609276473522186, "logits/rejected": -0.14199811220169067, "logps/chosen": -0.0012418505502864718, "logps/rejected": -3.6499717235565186, "loss": 0.535, "nll_loss": 0.1337420642375946, "rewards/accuracies": 1.0, "rewards/chosen": -0.00012418505502864718, "rewards/margins": 0.3648729920387268, "rewards/rejected": -0.36499714851379395, "step": 9979 }, { "epoch": 6.90179806362379, "grad_norm": 12.403252601623535, "learning_rate": 1.7212232979867834e-05, "log_odds_chosen": 10.558736801147461, "log_odds_ratio": -7.954631291795522e-05, "logits/chosen": -0.8147011399269104, "logits/rejected": -0.8264572024345398, "logps/chosen": -0.00023240508744493127, "logps/rejected": -1.7422142028808594, "loss": 0.4863, "nll_loss": 0.12156790494918823, "rewards/accuracies": 1.0, "rewards/chosen": -2.3240507289301604e-05, "rewards/margins": 0.1741981953382492, "rewards/rejected": -0.17422142624855042, "step": 9980 }, { "epoch": 6.902489626556017, "grad_norm": 5.796401023864746, "learning_rate": 1.7208390963577686e-05, "log_odds_chosen": 11.614713668823242, "log_odds_ratio": -1.2603211871464737e-05, "logits/chosen": -0.24464285373687744, "logits/rejected": -0.20899458229541779, "logps/chosen": -0.00011055903451051563, "logps/rejected": -2.407068967819214, "loss": 0.5689, "nll_loss": 0.1422237902879715, "rewards/accuracies": 1.0, "rewards/chosen": -1.1055903087253682e-05, "rewards/margins": 0.24069584906101227, "rewards/rejected": -0.2407069057226181, "step": 9981 }, { "epoch": 6.903181189488244, "grad_norm": 11.680974006652832, "learning_rate": 1.720454894728754e-05, "log_odds_chosen": 10.755126953125, "log_odds_ratio": -4.615750731318258e-05, "logits/chosen": -1.014672040939331, "logits/rejected": -0.9060235023498535, "logps/chosen": -9.19853919185698e-05, "logps/rejected": -1.668900728225708, "loss": 0.5539, "nll_loss": 0.13847382366657257, "rewards/accuracies": 1.0, "rewards/chosen": -9.198538464261219e-06, "rewards/margins": 0.16688087582588196, "rewards/rejected": -0.16689005494117737, "step": 9982 }, { "epoch": 6.903872752420471, "grad_norm": 5.80061674118042, "learning_rate": 1.7200706930997388e-05, "log_odds_chosen": 11.222034454345703, "log_odds_ratio": -2.8924485377501696e-05, "logits/chosen": -0.3152433931827545, "logits/rejected": -0.4137364327907562, "logps/chosen": -0.0001420917978975922, "logps/rejected": -2.2330398559570312, "loss": 0.7179, "nll_loss": 0.17946086823940277, "rewards/accuracies": 1.0, "rewards/chosen": -1.4209179425961338e-05, "rewards/margins": 0.22328978776931763, "rewards/rejected": -0.22330397367477417, "step": 9983 }, { "epoch": 6.904564315352697, "grad_norm": 5.845762252807617, "learning_rate": 1.719686491470724e-05, "log_odds_chosen": 10.686575889587402, "log_odds_ratio": -0.00021342271065805107, "logits/chosen": -0.11986368149518967, "logits/rejected": -0.23188787698745728, "logps/chosen": -0.002099713310599327, "logps/rejected": -2.823957920074463, "loss": 0.6112, "nll_loss": 0.1527710258960724, "rewards/accuracies": 1.0, "rewards/chosen": -0.00020997134561184794, "rewards/margins": 0.282185822725296, "rewards/rejected": -0.28239578008651733, "step": 9984 }, { "epoch": 6.905255878284924, "grad_norm": 4.402422904968262, "learning_rate": 1.719302289841709e-05, "log_odds_chosen": 11.305420875549316, "log_odds_ratio": -3.5889526770915836e-05, "logits/chosen": -0.47906628251075745, "logits/rejected": -0.5477668046951294, "logps/chosen": -0.0002497454406693578, "logps/rejected": -2.85650634765625, "loss": 0.5407, "nll_loss": 0.13516737520694733, "rewards/accuracies": 1.0, "rewards/chosen": -2.4974546249723062e-05, "rewards/margins": 0.2856256663799286, "rewards/rejected": -0.2856506407260895, "step": 9985 }, { "epoch": 6.905947441217151, "grad_norm": 6.135840892791748, "learning_rate": 1.718918088212694e-05, "log_odds_chosen": 11.341968536376953, "log_odds_ratio": -0.00011196234845556319, "logits/chosen": -0.059741728007793427, "logits/rejected": -0.2262853980064392, "logps/chosen": -0.0001801389007596299, "logps/rejected": -2.3228774070739746, "loss": 0.5875, "nll_loss": 0.1468682587146759, "rewards/accuracies": 1.0, "rewards/chosen": -1.801389043976087e-05, "rewards/margins": 0.23226971924304962, "rewards/rejected": -0.2322877198457718, "step": 9986 }, { "epoch": 6.906639004149378, "grad_norm": 6.067322254180908, "learning_rate": 1.718533886583679e-05, "log_odds_chosen": 9.752814292907715, "log_odds_ratio": -0.00010807962098624557, "logits/chosen": -0.9131343364715576, "logits/rejected": -0.964361846446991, "logps/chosen": -0.0003965977521147579, "logps/rejected": -1.637006163597107, "loss": 0.6769, "nll_loss": 0.1692250370979309, "rewards/accuracies": 1.0, "rewards/chosen": -3.965977521147579e-05, "rewards/margins": 0.1636609435081482, "rewards/rejected": -0.16370061039924622, "step": 9987 }, { "epoch": 6.907330567081605, "grad_norm": 6.261120319366455, "learning_rate": 1.7181496849546643e-05, "log_odds_chosen": 11.65019416809082, "log_odds_ratio": -1.2441886610758957e-05, "logits/chosen": -0.14381486177444458, "logits/rejected": -0.283840149641037, "logps/chosen": -0.0005752052529715002, "logps/rejected": -2.8144447803497314, "loss": 0.6714, "nll_loss": 0.16783645749092102, "rewards/accuracies": 1.0, "rewards/chosen": -5.752052675234154e-05, "rewards/margins": 0.28138697147369385, "rewards/rejected": -0.2814444899559021, "step": 9988 }, { "epoch": 6.908022130013832, "grad_norm": 6.250824451446533, "learning_rate": 1.7177654833256492e-05, "log_odds_chosen": 11.515530586242676, "log_odds_ratio": -7.775715494062752e-05, "logits/chosen": -0.605188250541687, "logits/rejected": -0.6803003549575806, "logps/chosen": -0.00018032471416518092, "logps/rejected": -2.609130859375, "loss": 0.7296, "nll_loss": 0.1823856383562088, "rewards/accuracies": 1.0, "rewards/chosen": -1.8032471416518092e-05, "rewards/margins": 0.2608950436115265, "rewards/rejected": -0.26091307401657104, "step": 9989 }, { "epoch": 6.908713692946058, "grad_norm": 8.184794425964355, "learning_rate": 1.7173812816966345e-05, "log_odds_chosen": 10.37601089477539, "log_odds_ratio": -0.00011470400204416364, "logits/chosen": -0.6234085559844971, "logits/rejected": -0.6225950717926025, "logps/chosen": -0.0002568865311332047, "logps/rejected": -1.7968766689300537, "loss": 0.4098, "nll_loss": 0.10243692249059677, "rewards/accuracies": 1.0, "rewards/chosen": -2.5688654204714112e-05, "rewards/margins": 0.17966195940971375, "rewards/rejected": -0.17968766391277313, "step": 9990 }, { "epoch": 6.909405255878285, "grad_norm": 6.626237869262695, "learning_rate": 1.7169970800676197e-05, "log_odds_chosen": 11.185545921325684, "log_odds_ratio": -2.2582265955861658e-05, "logits/chosen": -0.44990795850753784, "logits/rejected": -0.45199233293533325, "logps/chosen": -0.00030346005223691463, "logps/rejected": -2.503441333770752, "loss": 0.4825, "nll_loss": 0.12061312049627304, "rewards/accuracies": 1.0, "rewards/chosen": -3.0346007406478748e-05, "rewards/margins": 0.25031381845474243, "rewards/rejected": -0.2503441572189331, "step": 9991 }, { "epoch": 6.910096818810512, "grad_norm": 4.987192153930664, "learning_rate": 1.7166128784386046e-05, "log_odds_chosen": 10.907398223876953, "log_odds_ratio": -3.8101232348708436e-05, "logits/chosen": -0.447499543428421, "logits/rejected": -0.5684572458267212, "logps/chosen": -0.00017325104272458702, "logps/rejected": -2.0211973190307617, "loss": 0.4658, "nll_loss": 0.11645621061325073, "rewards/accuracies": 1.0, "rewards/chosen": -1.7325104636256583e-05, "rewards/margins": 0.202102392911911, "rewards/rejected": -0.20211973786354065, "step": 9992 }, { "epoch": 6.910788381742739, "grad_norm": 4.203260898590088, "learning_rate": 1.71622867680959e-05, "log_odds_chosen": 11.14862060546875, "log_odds_ratio": -2.3845985197112896e-05, "logits/chosen": -0.45166918635368347, "logits/rejected": -0.48994314670562744, "logps/chosen": -0.00017078101518563926, "logps/rejected": -2.381739854812622, "loss": 0.5594, "nll_loss": 0.13983705639839172, "rewards/accuracies": 1.0, "rewards/chosen": -1.707810406514909e-05, "rewards/margins": 0.23815689980983734, "rewards/rejected": -0.23817399144172668, "step": 9993 }, { "epoch": 6.911479944674966, "grad_norm": 6.142948627471924, "learning_rate": 1.7158444751805748e-05, "log_odds_chosen": 12.051002502441406, "log_odds_ratio": -4.126852945773862e-05, "logits/chosen": -0.41811603307724, "logits/rejected": -0.5299392342567444, "logps/chosen": -0.0003040796145796776, "logps/rejected": -3.4569079875946045, "loss": 0.8762, "nll_loss": 0.21903914213180542, "rewards/accuracies": 1.0, "rewards/chosen": -3.040796218556352e-05, "rewards/margins": 0.34566038846969604, "rewards/rejected": -0.3456908166408539, "step": 9994 }, { "epoch": 6.912171507607193, "grad_norm": 4.349850654602051, "learning_rate": 1.7154602735515597e-05, "log_odds_chosen": 10.87544059753418, "log_odds_ratio": -3.4084103390341625e-05, "logits/chosen": -0.6788896322250366, "logits/rejected": -0.6680184006690979, "logps/chosen": -0.00010783715697471052, "logps/rejected": -1.7640870809555054, "loss": 0.5287, "nll_loss": 0.13216936588287354, "rewards/accuracies": 1.0, "rewards/chosen": -1.0783716788864695e-05, "rewards/margins": 0.17639793455600739, "rewards/rejected": -0.17640872299671173, "step": 9995 }, { "epoch": 6.912863070539419, "grad_norm": 7.14302921295166, "learning_rate": 1.715076071922545e-05, "log_odds_chosen": 11.023346900939941, "log_odds_ratio": -0.00014540627307724208, "logits/chosen": -0.6619070172309875, "logits/rejected": -0.6815442442893982, "logps/chosen": -0.0004409942775964737, "logps/rejected": -2.748929500579834, "loss": 0.6391, "nll_loss": 0.15976391732692719, "rewards/accuracies": 1.0, "rewards/chosen": -4.4099429942434654e-05, "rewards/margins": 0.2748488783836365, "rewards/rejected": -0.27489298582077026, "step": 9996 }, { "epoch": 6.913554633471646, "grad_norm": 5.94216775894165, "learning_rate": 1.7146918702935302e-05, "log_odds_chosen": 9.212047576904297, "log_odds_ratio": -0.0026079691015183926, "logits/chosen": -0.22626805305480957, "logits/rejected": -0.21808166801929474, "logps/chosen": -0.0020967067684978247, "logps/rejected": -1.5887069702148438, "loss": 0.4767, "nll_loss": 0.1189127266407013, "rewards/accuracies": 1.0, "rewards/chosen": -0.0002096706593874842, "rewards/margins": 0.15866103768348694, "rewards/rejected": -0.15887069702148438, "step": 9997 }, { "epoch": 6.914246196403873, "grad_norm": 5.059571266174316, "learning_rate": 1.714307668664515e-05, "log_odds_chosen": 9.903790473937988, "log_odds_ratio": -0.00019789970247074962, "logits/chosen": -0.3731571435928345, "logits/rejected": -0.4034712612628937, "logps/chosen": -0.0004390809335745871, "logps/rejected": -1.7462046146392822, "loss": 0.7642, "nll_loss": 0.19103066623210907, "rewards/accuracies": 1.0, "rewards/chosen": -4.390809408505447e-05, "rewards/margins": 0.1745765507221222, "rewards/rejected": -0.17462044954299927, "step": 9998 }, { "epoch": 6.9149377593361, "grad_norm": 6.209487438201904, "learning_rate": 1.7139234670355003e-05, "log_odds_chosen": 10.59821891784668, "log_odds_ratio": -3.986993760918267e-05, "logits/chosen": -0.17233705520629883, "logits/rejected": -0.20161035656929016, "logps/chosen": -0.0008766738465055823, "logps/rejected": -2.2711052894592285, "loss": 0.8454, "nll_loss": 0.21134933829307556, "rewards/accuracies": 1.0, "rewards/chosen": -8.766739483689889e-05, "rewards/margins": 0.22702288627624512, "rewards/rejected": -0.22711056470870972, "step": 9999 }, { "epoch": 6.915629322268327, "grad_norm": 5.539126396179199, "learning_rate": 1.7135392654064856e-05, "log_odds_chosen": 11.38946533203125, "log_odds_ratio": -4.335786070441827e-05, "logits/chosen": -0.4548533856868744, "logits/rejected": -0.46245914697647095, "logps/chosen": -0.00011622466263361275, "logps/rejected": -2.4456939697265625, "loss": 0.5107, "nll_loss": 0.1276700645685196, "rewards/accuracies": 1.0, "rewards/chosen": -1.1622466445260216e-05, "rewards/margins": 0.24455779790878296, "rewards/rejected": -0.24456939101219177, "step": 10000 }, { "epoch": 6.9163208852005535, "grad_norm": 5.396164894104004, "learning_rate": 1.7131550637774705e-05, "log_odds_chosen": 10.901666641235352, "log_odds_ratio": -5.471762779052369e-05, "logits/chosen": -0.3494221568107605, "logits/rejected": -0.4387245774269104, "logps/chosen": -0.00018875315436162055, "logps/rejected": -2.1393706798553467, "loss": 0.4547, "nll_loss": 0.11367647349834442, "rewards/accuracies": 1.0, "rewards/chosen": -1.8875314708566293e-05, "rewards/margins": 0.21391819417476654, "rewards/rejected": -0.21393707394599915, "step": 10001 }, { "epoch": 6.91701244813278, "grad_norm": 5.637955188751221, "learning_rate": 1.7127708621484557e-05, "log_odds_chosen": 11.170769691467285, "log_odds_ratio": -0.0003723877598531544, "logits/chosen": -0.5806461572647095, "logits/rejected": -0.6152359247207642, "logps/chosen": -0.001203806372359395, "logps/rejected": -2.139021635055542, "loss": 0.5255, "nll_loss": 0.1313486397266388, "rewards/accuracies": 1.0, "rewards/chosen": -0.00012038064596708864, "rewards/margins": 0.21378177404403687, "rewards/rejected": -0.21390217542648315, "step": 10002 }, { "epoch": 6.917704011065007, "grad_norm": 4.686069965362549, "learning_rate": 1.712386660519441e-05, "log_odds_chosen": 8.970919609069824, "log_odds_ratio": -0.0009532291442155838, "logits/chosen": -0.3284967839717865, "logits/rejected": -0.36006975173950195, "logps/chosen": -0.001138596679084003, "logps/rejected": -1.734472393989563, "loss": 0.3506, "nll_loss": 0.08756309747695923, "rewards/accuracies": 1.0, "rewards/chosen": -0.00011385966354282573, "rewards/margins": 0.17333339154720306, "rewards/rejected": -0.17344725131988525, "step": 10003 }, { "epoch": 6.918395573997234, "grad_norm": 5.611600875854492, "learning_rate": 1.712002458890426e-05, "log_odds_chosen": 9.66145133972168, "log_odds_ratio": -0.0003402447036933154, "logits/chosen": -0.2682963013648987, "logits/rejected": -0.2505141794681549, "logps/chosen": -0.0002948015171568841, "logps/rejected": -1.4168219566345215, "loss": 0.6874, "nll_loss": 0.17181649804115295, "rewards/accuracies": 1.0, "rewards/chosen": -2.948015389847569e-05, "rewards/margins": 0.14165271818637848, "rewards/rejected": -0.1416821926832199, "step": 10004 }, { "epoch": 6.919087136929461, "grad_norm": 7.419260025024414, "learning_rate": 1.7116182572614108e-05, "log_odds_chosen": 10.889808654785156, "log_odds_ratio": -5.775997487944551e-05, "logits/chosen": -0.14147552847862244, "logits/rejected": -0.18376371264457703, "logps/chosen": -0.000382953614462167, "logps/rejected": -2.3106813430786133, "loss": 0.6622, "nll_loss": 0.1655469834804535, "rewards/accuracies": 1.0, "rewards/chosen": -3.829535853583366e-05, "rewards/margins": 0.23102985322475433, "rewards/rejected": -0.23106813430786133, "step": 10005 }, { "epoch": 6.919778699861688, "grad_norm": 6.319573879241943, "learning_rate": 1.711234055632396e-05, "log_odds_chosen": 10.299653053283691, "log_odds_ratio": -0.0001510842703282833, "logits/chosen": -0.26969629526138306, "logits/rejected": -0.48525863885879517, "logps/chosen": -0.0004234910593368113, "logps/rejected": -1.9849330186843872, "loss": 0.6292, "nll_loss": 0.15729010105133057, "rewards/accuracies": 1.0, "rewards/chosen": -4.234910738887265e-05, "rewards/margins": 0.198450967669487, "rewards/rejected": -0.1984933316707611, "step": 10006 }, { "epoch": 6.9204702627939145, "grad_norm": 8.048715591430664, "learning_rate": 1.710849854003381e-05, "log_odds_chosen": 11.729087829589844, "log_odds_ratio": -2.784031858027447e-05, "logits/chosen": -0.1425468623638153, "logits/rejected": -0.20963522791862488, "logps/chosen": -0.00016317141125909984, "logps/rejected": -2.7022130489349365, "loss": 0.6601, "nll_loss": 0.165022611618042, "rewards/accuracies": 1.0, "rewards/chosen": -1.6317142581101507e-05, "rewards/margins": 0.27020499110221863, "rewards/rejected": -0.2702212929725647, "step": 10007 }, { "epoch": 6.921161825726141, "grad_norm": 7.835314750671387, "learning_rate": 1.7104656523743662e-05, "log_odds_chosen": 9.968704223632812, "log_odds_ratio": -0.0001407539821229875, "logits/chosen": -0.6228182315826416, "logits/rejected": -0.5100666284561157, "logps/chosen": -0.00018801394617184997, "logps/rejected": -1.6318118572235107, "loss": 0.6759, "nll_loss": 0.16896140575408936, "rewards/accuracies": 1.0, "rewards/chosen": -1.8801394617184997e-05, "rewards/margins": 0.16316238045692444, "rewards/rejected": -0.16318118572235107, "step": 10008 }, { "epoch": 6.921853388658368, "grad_norm": 15.812283515930176, "learning_rate": 1.7100814507453514e-05, "log_odds_chosen": 10.900727272033691, "log_odds_ratio": -4.0191374864662066e-05, "logits/chosen": -0.17795416712760925, "logits/rejected": -0.2042202651500702, "logps/chosen": -0.00029529494349844754, "logps/rejected": -2.5482981204986572, "loss": 0.8083, "nll_loss": 0.20208188891410828, "rewards/accuracies": 1.0, "rewards/chosen": -2.9529493986046873e-05, "rewards/margins": 0.25480031967163086, "rewards/rejected": -0.2548298239707947, "step": 10009 }, { "epoch": 6.922544951590595, "grad_norm": 7.8032450675964355, "learning_rate": 1.7096972491163363e-05, "log_odds_chosen": 11.113170623779297, "log_odds_ratio": -0.0002255002618767321, "logits/chosen": -0.14331859350204468, "logits/rejected": -0.16391247510910034, "logps/chosen": -0.0001260231510968879, "logps/rejected": -2.212287425994873, "loss": 0.884, "nll_loss": 0.22097471356391907, "rewards/accuracies": 1.0, "rewards/chosen": -1.2602315109688789e-05, "rewards/margins": 0.2212161421775818, "rewards/rejected": -0.22122874855995178, "step": 10010 }, { "epoch": 6.923236514522822, "grad_norm": 5.020081043243408, "learning_rate": 1.7093130474873216e-05, "log_odds_chosen": 10.566305160522461, "log_odds_ratio": -9.605865488993004e-05, "logits/chosen": -0.05223175138235092, "logits/rejected": -0.11202915012836456, "logps/chosen": -0.0012542939512059093, "logps/rejected": -2.5465216636657715, "loss": 0.4485, "nll_loss": 0.1121145635843277, "rewards/accuracies": 1.0, "rewards/chosen": -0.00012542940385174006, "rewards/margins": 0.2545267343521118, "rewards/rejected": -0.2546521723270416, "step": 10011 }, { "epoch": 6.923928077455049, "grad_norm": 7.946537017822266, "learning_rate": 1.7089288458583068e-05, "log_odds_chosen": 11.411028861999512, "log_odds_ratio": -3.746839865925722e-05, "logits/chosen": -0.3238312005996704, "logits/rejected": -0.3977673649787903, "logps/chosen": -0.00012408751354087144, "logps/rejected": -2.3582587242126465, "loss": 0.4168, "nll_loss": 0.10418720543384552, "rewards/accuracies": 1.0, "rewards/chosen": -1.2408750990289263e-05, "rewards/margins": 0.2358134537935257, "rewards/rejected": -0.23582588136196136, "step": 10012 }, { "epoch": 6.9246196403872755, "grad_norm": 5.735523223876953, "learning_rate": 1.7085446442292917e-05, "log_odds_chosen": 10.758692741394043, "log_odds_ratio": -0.00016033755673561245, "logits/chosen": -0.42282581329345703, "logits/rejected": -0.3419947922229767, "logps/chosen": -0.00011035312491003424, "logps/rejected": -1.7250794172286987, "loss": 0.7007, "nll_loss": 0.17514723539352417, "rewards/accuracies": 1.0, "rewards/chosen": -1.1035313036700245e-05, "rewards/margins": 0.17249691486358643, "rewards/rejected": -0.17250794172286987, "step": 10013 }, { "epoch": 6.925311203319502, "grad_norm": 5.317147254943848, "learning_rate": 1.7081604426002766e-05, "log_odds_chosen": 9.747225761413574, "log_odds_ratio": -0.00041577807860448956, "logits/chosen": -0.8432607650756836, "logits/rejected": -0.8438321948051453, "logps/chosen": -0.0006377776153385639, "logps/rejected": -1.7046442031860352, "loss": 0.474, "nll_loss": 0.1184675395488739, "rewards/accuracies": 1.0, "rewards/chosen": -6.377776298904791e-05, "rewards/margins": 0.17040064930915833, "rewards/rejected": -0.170464426279068, "step": 10014 }, { "epoch": 6.926002766251729, "grad_norm": 5.1862711906433105, "learning_rate": 1.707776240971262e-05, "log_odds_chosen": 10.295071601867676, "log_odds_ratio": -0.00014044718409422785, "logits/chosen": -0.23374953866004944, "logits/rejected": -0.22666972875595093, "logps/chosen": -0.0014169241767376661, "logps/rejected": -2.477402687072754, "loss": 0.5488, "nll_loss": 0.13718783855438232, "rewards/accuracies": 1.0, "rewards/chosen": -0.00014169240603223443, "rewards/margins": 0.2475985735654831, "rewards/rejected": -0.24774028360843658, "step": 10015 }, { "epoch": 6.926694329183956, "grad_norm": 4.504415988922119, "learning_rate": 1.7073920393422468e-05, "log_odds_chosen": 10.581575393676758, "log_odds_ratio": -4.251056452631019e-05, "logits/chosen": -0.5650668144226074, "logits/rejected": -0.6096177101135254, "logps/chosen": -0.00013295267126522958, "logps/rejected": -1.5700645446777344, "loss": 0.2816, "nll_loss": 0.07039927691221237, "rewards/accuracies": 1.0, "rewards/chosen": -1.3295267308421899e-05, "rewards/margins": 0.15699316561222076, "rewards/rejected": -0.15700644254684448, "step": 10016 }, { "epoch": 6.927385892116183, "grad_norm": 4.376059055328369, "learning_rate": 1.707007837713232e-05, "log_odds_chosen": 10.04145622253418, "log_odds_ratio": -0.00011801601795013994, "logits/chosen": -0.15814641118049622, "logits/rejected": -0.174251988530159, "logps/chosen": -0.0004231779312249273, "logps/rejected": -1.86943781375885, "loss": 0.4622, "nll_loss": 0.11552843451499939, "rewards/accuracies": 1.0, "rewards/chosen": -4.231779166730121e-05, "rewards/margins": 0.18690146505832672, "rewards/rejected": -0.18694376945495605, "step": 10017 }, { "epoch": 6.92807745504841, "grad_norm": 5.4749836921691895, "learning_rate": 1.7066236360842173e-05, "log_odds_chosen": 10.348876953125, "log_odds_ratio": -5.838269862579182e-05, "logits/chosen": -0.050103262066841125, "logits/rejected": -0.13943883776664734, "logps/chosen": -0.0003169150440953672, "logps/rejected": -2.1400392055511475, "loss": 0.5455, "nll_loss": 0.1363808512687683, "rewards/accuracies": 1.0, "rewards/chosen": -3.1691506592324004e-05, "rewards/margins": 0.21397224068641663, "rewards/rejected": -0.21400392055511475, "step": 10018 }, { "epoch": 6.9287690179806365, "grad_norm": 5.5527496337890625, "learning_rate": 1.706239434455202e-05, "log_odds_chosen": 11.428709983825684, "log_odds_ratio": -2.8784088499378413e-05, "logits/chosen": -0.4300426244735718, "logits/rejected": -0.44284480810165405, "logps/chosen": -0.00032426012330688536, "logps/rejected": -2.8826076984405518, "loss": 0.6097, "nll_loss": 0.1524135172367096, "rewards/accuracies": 1.0, "rewards/chosen": -3.242601451347582e-05, "rewards/margins": 0.2882283329963684, "rewards/rejected": -0.2882607579231262, "step": 10019 }, { "epoch": 6.929460580912863, "grad_norm": 5.034395694732666, "learning_rate": 1.7058552328261874e-05, "log_odds_chosen": 11.60173225402832, "log_odds_ratio": -5.259798854240216e-05, "logits/chosen": -0.3198222219944, "logits/rejected": -0.42940211296081543, "logps/chosen": -0.00019729719497263432, "logps/rejected": -3.0343990325927734, "loss": 0.8083, "nll_loss": 0.2020743191242218, "rewards/accuracies": 1.0, "rewards/chosen": -1.9729721316252835e-05, "rewards/margins": 0.30342015624046326, "rewards/rejected": -0.3034399151802063, "step": 10020 }, { "epoch": 6.93015214384509, "grad_norm": 7.2849555015563965, "learning_rate": 1.7054710311971723e-05, "log_odds_chosen": 11.120197296142578, "log_odds_ratio": -3.993926657130942e-05, "logits/chosen": -0.46323466300964355, "logits/rejected": -0.38564297556877136, "logps/chosen": -0.0003112137783318758, "logps/rejected": -2.552988290786743, "loss": 0.6821, "nll_loss": 0.1705092489719391, "rewards/accuracies": 1.0, "rewards/chosen": -3.1121380743570626e-05, "rewards/margins": 0.2552677094936371, "rewards/rejected": -0.25529882311820984, "step": 10021 }, { "epoch": 6.930843706777317, "grad_norm": 9.017226219177246, "learning_rate": 1.7050868295681576e-05, "log_odds_chosen": 11.707319259643555, "log_odds_ratio": -7.7094002335798e-05, "logits/chosen": -0.22343973815441132, "logits/rejected": -0.31163230538368225, "logps/chosen": -0.00026503336266614497, "logps/rejected": -2.8297653198242188, "loss": 0.4342, "nll_loss": 0.10853277146816254, "rewards/accuracies": 1.0, "rewards/chosen": -2.6503335902816616e-05, "rewards/margins": 0.2829500436782837, "rewards/rejected": -0.28297656774520874, "step": 10022 }, { "epoch": 6.931535269709544, "grad_norm": 4.966097831726074, "learning_rate": 1.7047026279391425e-05, "log_odds_chosen": 9.993123054504395, "log_odds_ratio": -0.00042920681880787015, "logits/chosen": -0.06338300555944443, "logits/rejected": -0.0930364578962326, "logps/chosen": -0.0007117825443856418, "logps/rejected": -1.797957181930542, "loss": 0.3926, "nll_loss": 0.09811747819185257, "rewards/accuracies": 1.0, "rewards/chosen": -7.117826316971332e-05, "rewards/margins": 0.1797245442867279, "rewards/rejected": -0.17979572713375092, "step": 10023 }, { "epoch": 6.932226832641771, "grad_norm": 8.300261497497559, "learning_rate": 1.7043184263101274e-05, "log_odds_chosen": 11.617762565612793, "log_odds_ratio": -2.416789902781602e-05, "logits/chosen": -0.48339706659317017, "logits/rejected": -0.5057979822158813, "logps/chosen": -0.0001829541870392859, "logps/rejected": -2.64070463180542, "loss": 0.4426, "nll_loss": 0.11063584685325623, "rewards/accuracies": 1.0, "rewards/chosen": -1.829541906772647e-05, "rewards/margins": 0.264052152633667, "rewards/rejected": -0.26407045125961304, "step": 10024 }, { "epoch": 6.9329183955739975, "grad_norm": 4.101850986480713, "learning_rate": 1.7039342246811126e-05, "log_odds_chosen": 11.531643867492676, "log_odds_ratio": -1.3064412996754982e-05, "logits/chosen": -0.33082935214042664, "logits/rejected": -0.37834471464157104, "logps/chosen": -0.00018180490587837994, "logps/rejected": -2.5283150672912598, "loss": 0.3357, "nll_loss": 0.0839347094297409, "rewards/accuracies": 1.0, "rewards/chosen": -1.8180489860242233e-05, "rewards/margins": 0.25281333923339844, "rewards/rejected": -0.25283151865005493, "step": 10025 }, { "epoch": 6.933609958506224, "grad_norm": 8.496269226074219, "learning_rate": 1.703550023052098e-05, "log_odds_chosen": 11.259842872619629, "log_odds_ratio": -2.703062455111649e-05, "logits/chosen": -0.5049582719802856, "logits/rejected": -0.4686523377895355, "logps/chosen": -0.00018693049787543714, "logps/rejected": -2.6714820861816406, "loss": 0.9831, "nll_loss": 0.24576500058174133, "rewards/accuracies": 1.0, "rewards/chosen": -1.8693050151341595e-05, "rewards/margins": 0.267129510641098, "rewards/rejected": -0.2671481966972351, "step": 10026 }, { "epoch": 6.934301521438451, "grad_norm": 6.2408223152160645, "learning_rate": 1.7031658214230828e-05, "log_odds_chosen": 11.580894470214844, "log_odds_ratio": -1.513993993285112e-05, "logits/chosen": -0.4473365247249603, "logits/rejected": -0.402547687292099, "logps/chosen": -0.0002027477603405714, "logps/rejected": -2.6497583389282227, "loss": 1.268, "nll_loss": 0.31700828671455383, "rewards/accuracies": 1.0, "rewards/chosen": -2.027477603405714e-05, "rewards/margins": 0.2649555802345276, "rewards/rejected": -0.2649758458137512, "step": 10027 }, { "epoch": 6.934993084370678, "grad_norm": 4.515257358551025, "learning_rate": 1.702781619794068e-05, "log_odds_chosen": 9.019081115722656, "log_odds_ratio": -0.0010447325184941292, "logits/chosen": -0.32567375898361206, "logits/rejected": -0.3403196930885315, "logps/chosen": -0.0007029250264167786, "logps/rejected": -1.4702303409576416, "loss": 0.6056, "nll_loss": 0.15129372477531433, "rewards/accuracies": 1.0, "rewards/chosen": -7.029250991763547e-05, "rewards/margins": 0.14695274829864502, "rewards/rejected": -0.1470230221748352, "step": 10028 }, { "epoch": 6.935684647302905, "grad_norm": 5.426224231719971, "learning_rate": 1.7023974181650533e-05, "log_odds_chosen": 10.142989158630371, "log_odds_ratio": -0.0001359380839858204, "logits/chosen": -0.6201168298721313, "logits/rejected": -0.624829888343811, "logps/chosen": -0.00029378788894973695, "logps/rejected": -2.0749104022979736, "loss": 0.5595, "nll_loss": 0.13986918330192566, "rewards/accuracies": 1.0, "rewards/chosen": -2.9378788894973695e-05, "rewards/margins": 0.2074616551399231, "rewards/rejected": -0.20749104022979736, "step": 10029 }, { "epoch": 6.936376210235132, "grad_norm": 6.71488618850708, "learning_rate": 1.702013216536038e-05, "log_odds_chosen": 9.412338256835938, "log_odds_ratio": -0.0008309513796120882, "logits/chosen": -0.765308141708374, "logits/rejected": -0.7624964714050293, "logps/chosen": -0.0015413952060043812, "logps/rejected": -1.7514090538024902, "loss": 0.3268, "nll_loss": 0.08161558210849762, "rewards/accuracies": 1.0, "rewards/chosen": -0.00015413951769005507, "rewards/margins": 0.17498676478862762, "rewards/rejected": -0.17514090240001678, "step": 10030 }, { "epoch": 6.9370677731673585, "grad_norm": 4.619319438934326, "learning_rate": 1.7016290149070234e-05, "log_odds_chosen": 11.157760620117188, "log_odds_ratio": -8.181616431102157e-05, "logits/chosen": -0.22801737487316132, "logits/rejected": -0.4726010859012604, "logps/chosen": -0.0005426481948234141, "logps/rejected": -2.3585970401763916, "loss": 0.6443, "nll_loss": 0.16105622053146362, "rewards/accuracies": 1.0, "rewards/chosen": -5.426481948234141e-05, "rewards/margins": 0.2358054369688034, "rewards/rejected": -0.2358597218990326, "step": 10031 }, { "epoch": 6.937759336099585, "grad_norm": 8.809869766235352, "learning_rate": 1.7012448132780083e-05, "log_odds_chosen": 11.043230056762695, "log_odds_ratio": -5.463225534185767e-05, "logits/chosen": -0.5463330745697021, "logits/rejected": -0.6079367399215698, "logps/chosen": -0.00029846368124708533, "logps/rejected": -2.803921937942505, "loss": 0.649, "nll_loss": 0.1622409075498581, "rewards/accuracies": 1.0, "rewards/chosen": -2.9846369216102175e-05, "rewards/margins": 0.2803623378276825, "rewards/rejected": -0.28039219975471497, "step": 10032 }, { "epoch": 6.938450899031812, "grad_norm": 5.242898464202881, "learning_rate": 1.7008606116489932e-05, "log_odds_chosen": 10.950862884521484, "log_odds_ratio": -6.454918184317648e-05, "logits/chosen": -0.547526478767395, "logits/rejected": -0.5974233150482178, "logps/chosen": -0.00021357613150030375, "logps/rejected": -2.4515743255615234, "loss": 0.5296, "nll_loss": 0.13239407539367676, "rewards/accuracies": 1.0, "rewards/chosen": -2.1357613150030375e-05, "rewards/margins": 0.2451360672712326, "rewards/rejected": -0.2451574206352234, "step": 10033 }, { "epoch": 6.939142461964039, "grad_norm": 5.736464023590088, "learning_rate": 1.7004764100199785e-05, "log_odds_chosen": 9.807477951049805, "log_odds_ratio": -0.0001616263180039823, "logits/chosen": -0.6837047338485718, "logits/rejected": -0.7070760726928711, "logps/chosen": -0.0004362165054772049, "logps/rejected": -1.9144620895385742, "loss": 0.558, "nll_loss": 0.13947302103042603, "rewards/accuracies": 1.0, "rewards/chosen": -4.362165054772049e-05, "rewards/margins": 0.1914026141166687, "rewards/rejected": -0.1914462298154831, "step": 10034 }, { "epoch": 6.939834024896266, "grad_norm": 7.313083648681641, "learning_rate": 1.7000922083909637e-05, "log_odds_chosen": 11.469751358032227, "log_odds_ratio": -1.8743656255537644e-05, "logits/chosen": -0.35865500569343567, "logits/rejected": -0.46712803840637207, "logps/chosen": -0.000137790892040357, "logps/rejected": -2.088139533996582, "loss": 0.6775, "nll_loss": 0.1693638563156128, "rewards/accuracies": 1.0, "rewards/chosen": -1.377908938593464e-05, "rewards/margins": 0.20880015194416046, "rewards/rejected": -0.20881393551826477, "step": 10035 }, { "epoch": 6.940525587828493, "grad_norm": 11.915936470031738, "learning_rate": 1.6997080067619486e-05, "log_odds_chosen": 10.631695747375488, "log_odds_ratio": -5.5690161389065906e-05, "logits/chosen": -0.1930725872516632, "logits/rejected": -0.26618435978889465, "logps/chosen": -0.0003762371779885143, "logps/rejected": -2.718092918395996, "loss": 0.535, "nll_loss": 0.1337505578994751, "rewards/accuracies": 1.0, "rewards/chosen": -3.762371852644719e-05, "rewards/margins": 0.2717716693878174, "rewards/rejected": -0.27180930972099304, "step": 10036 }, { "epoch": 6.941217150760719, "grad_norm": 7.780851364135742, "learning_rate": 1.699323805132934e-05, "log_odds_chosen": 11.143234252929688, "log_odds_ratio": -0.0001096235791919753, "logits/chosen": -0.598465085029602, "logits/rejected": -0.6562224626541138, "logps/chosen": -0.00016340528964065015, "logps/rejected": -2.1264867782592773, "loss": 0.5632, "nll_loss": 0.14078933000564575, "rewards/accuracies": 1.0, "rewards/chosen": -1.6340527508873492e-05, "rewards/margins": 0.21263231337070465, "rewards/rejected": -0.2126486748456955, "step": 10037 }, { "epoch": 6.941908713692946, "grad_norm": 7.58236837387085, "learning_rate": 1.698939603503919e-05, "log_odds_chosen": 11.076812744140625, "log_odds_ratio": -0.00014322507195174694, "logits/chosen": -0.20436197519302368, "logits/rejected": -0.21737417578697205, "logps/chosen": -0.0005082335555925965, "logps/rejected": -3.1508872509002686, "loss": 0.8379, "nll_loss": 0.20945268869400024, "rewards/accuracies": 1.0, "rewards/chosen": -5.0823357014451176e-05, "rewards/margins": 0.31503787636756897, "rewards/rejected": -0.31508874893188477, "step": 10038 }, { "epoch": 6.942600276625173, "grad_norm": 5.023095607757568, "learning_rate": 1.698555401874904e-05, "log_odds_chosen": 10.241018295288086, "log_odds_ratio": -0.0002668978413566947, "logits/chosen": -0.1428799033164978, "logits/rejected": -0.18114995956420898, "logps/chosen": -0.00031494133872911334, "logps/rejected": -1.6014385223388672, "loss": 0.4756, "nll_loss": 0.11888079345226288, "rewards/accuracies": 1.0, "rewards/chosen": -3.149413532810286e-05, "rewards/margins": 0.16011235117912292, "rewards/rejected": -0.1601438671350479, "step": 10039 }, { "epoch": 6.9432918395574, "grad_norm": 7.06999397277832, "learning_rate": 1.6981712002458892e-05, "log_odds_chosen": 9.103922843933105, "log_odds_ratio": -0.00024017822579480708, "logits/chosen": -0.24257206916809082, "logits/rejected": -0.24254654347896576, "logps/chosen": -0.0010004019131883979, "logps/rejected": -1.7234010696411133, "loss": 0.5883, "nll_loss": 0.14705629646778107, "rewards/accuracies": 1.0, "rewards/chosen": -0.00010004018986364827, "rewards/margins": 0.17224006354808807, "rewards/rejected": -0.17234010994434357, "step": 10040 }, { "epoch": 6.943983402489627, "grad_norm": 5.003592491149902, "learning_rate": 1.697786998616874e-05, "log_odds_chosen": 10.527713775634766, "log_odds_ratio": -8.007455471670255e-05, "logits/chosen": -0.3677080273628235, "logits/rejected": -0.2869381904602051, "logps/chosen": -0.000419251446146518, "logps/rejected": -2.2190418243408203, "loss": 0.5501, "nll_loss": 0.13752399384975433, "rewards/accuracies": 1.0, "rewards/chosen": -4.19251446146518e-05, "rewards/margins": 0.22186227142810822, "rewards/rejected": -0.2219042032957077, "step": 10041 }, { "epoch": 6.944674965421854, "grad_norm": 11.091449737548828, "learning_rate": 1.697402796987859e-05, "log_odds_chosen": 12.04973030090332, "log_odds_ratio": -1.0531531188462395e-05, "logits/chosen": 0.24184373021125793, "logits/rejected": 0.1998082995414734, "logps/chosen": -0.00014928578457329422, "logps/rejected": -3.006049871444702, "loss": 1.1853, "nll_loss": 0.2963164448738098, "rewards/accuracies": 1.0, "rewards/chosen": -1.4928579730622005e-05, "rewards/margins": 0.30059006810188293, "rewards/rejected": -0.30060499906539917, "step": 10042 }, { "epoch": 6.94536652835408, "grad_norm": 5.952218055725098, "learning_rate": 1.6970185953588443e-05, "log_odds_chosen": 9.409114837646484, "log_odds_ratio": -0.00029461667872965336, "logits/chosen": -0.48497089743614197, "logits/rejected": -0.4794895052909851, "logps/chosen": -0.0006209624698385596, "logps/rejected": -1.867590308189392, "loss": 0.5388, "nll_loss": 0.13466249406337738, "rewards/accuracies": 1.0, "rewards/chosen": -6.209625280462205e-05, "rewards/margins": 0.18669693171977997, "rewards/rejected": -0.18675902485847473, "step": 10043 }, { "epoch": 6.946058091286307, "grad_norm": 6.226381301879883, "learning_rate": 1.6966343937298295e-05, "log_odds_chosen": 11.209342956542969, "log_odds_ratio": -2.317272446816787e-05, "logits/chosen": -0.49453210830688477, "logits/rejected": -0.4661957621574402, "logps/chosen": -0.0004177080118097365, "logps/rejected": -2.362186908721924, "loss": 0.4943, "nll_loss": 0.12356767803430557, "rewards/accuracies": 1.0, "rewards/chosen": -4.177080700173974e-05, "rewards/margins": 0.23617693781852722, "rewards/rejected": -0.23621870577335358, "step": 10044 }, { "epoch": 6.946749654218534, "grad_norm": 6.473517417907715, "learning_rate": 1.6962501921008145e-05, "log_odds_chosen": 10.863648414611816, "log_odds_ratio": -2.934444637503475e-05, "logits/chosen": -0.4041895866394043, "logits/rejected": -0.44918423891067505, "logps/chosen": -0.00010348320938646793, "logps/rejected": -1.6501774787902832, "loss": 0.4861, "nll_loss": 0.12151362746953964, "rewards/accuracies": 1.0, "rewards/chosen": -1.0348320756747853e-05, "rewards/margins": 0.16500739753246307, "rewards/rejected": -0.1650177538394928, "step": 10045 }, { "epoch": 6.947441217150761, "grad_norm": 8.3028564453125, "learning_rate": 1.6958659904717997e-05, "log_odds_chosen": 10.222135543823242, "log_odds_ratio": -5.702318958356045e-05, "logits/chosen": 0.084259532392025, "logits/rejected": 0.013848934322595596, "logps/chosen": -0.0004253485822118819, "logps/rejected": -1.8415899276733398, "loss": 0.5054, "nll_loss": 0.12634600698947906, "rewards/accuracies": 1.0, "rewards/chosen": -4.253485894878395e-05, "rewards/margins": 0.18411648273468018, "rewards/rejected": -0.18415901064872742, "step": 10046 }, { "epoch": 6.948132780082988, "grad_norm": 5.550440311431885, "learning_rate": 1.695481788842785e-05, "log_odds_chosen": 10.537508964538574, "log_odds_ratio": -0.00010150601156055927, "logits/chosen": -0.6768600344657898, "logits/rejected": -0.6871519088745117, "logps/chosen": -0.0002947281172964722, "logps/rejected": -2.1085243225097656, "loss": 0.5136, "nll_loss": 0.12838245928287506, "rewards/accuracies": 1.0, "rewards/chosen": -2.947281245724298e-05, "rewards/margins": 0.2108229696750641, "rewards/rejected": -0.21085244417190552, "step": 10047 }, { "epoch": 6.948824343015215, "grad_norm": 4.045454978942871, "learning_rate": 1.69509758721377e-05, "log_odds_chosen": 9.82114028930664, "log_odds_ratio": -0.00017208814097102731, "logits/chosen": -0.4040992259979248, "logits/rejected": -0.5197563171386719, "logps/chosen": -0.00045935483649373055, "logps/rejected": -1.638375997543335, "loss": 0.3924, "nll_loss": 0.09808853268623352, "rewards/accuracies": 1.0, "rewards/chosen": -4.593547782860696e-05, "rewards/margins": 0.16379165649414062, "rewards/rejected": -0.16383758187294006, "step": 10048 }, { "epoch": 6.949515905947441, "grad_norm": 6.038547515869141, "learning_rate": 1.694713385584755e-05, "log_odds_chosen": 11.265363693237305, "log_odds_ratio": -3.956862565246411e-05, "logits/chosen": -0.2933599352836609, "logits/rejected": -0.4178452789783478, "logps/chosen": -0.00016355025582015514, "logps/rejected": -2.243529796600342, "loss": 0.9845, "nll_loss": 0.24611742794513702, "rewards/accuracies": 1.0, "rewards/chosen": -1.6355026673409157e-05, "rewards/margins": 0.224336639046669, "rewards/rejected": -0.22435298562049866, "step": 10049 }, { "epoch": 6.950207468879668, "grad_norm": 6.056111812591553, "learning_rate": 1.69432918395574e-05, "log_odds_chosen": 11.285694122314453, "log_odds_ratio": -3.228627974749543e-05, "logits/chosen": 0.26198089122772217, "logits/rejected": 0.2688486576080322, "logps/chosen": -0.0002590077347122133, "logps/rejected": -2.4489030838012695, "loss": 0.8032, "nll_loss": 0.20079305768013, "rewards/accuracies": 1.0, "rewards/chosen": -2.5900772016029805e-05, "rewards/margins": 0.24486443400382996, "rewards/rejected": -0.24489031732082367, "step": 10050 }, { "epoch": 6.950899031811895, "grad_norm": 7.738328456878662, "learning_rate": 1.693944982326725e-05, "log_odds_chosen": 12.397817611694336, "log_odds_ratio": -1.8989923773915507e-05, "logits/chosen": -0.41316238045692444, "logits/rejected": -0.4161011278629303, "logps/chosen": -0.0001298328279517591, "logps/rejected": -3.1106667518615723, "loss": 0.6079, "nll_loss": 0.1519773155450821, "rewards/accuracies": 1.0, "rewards/chosen": -1.298328243137803e-05, "rewards/margins": 0.31105366349220276, "rewards/rejected": -0.3110666573047638, "step": 10051 }, { "epoch": 6.951590594744122, "grad_norm": 6.873595714569092, "learning_rate": 1.69356078069771e-05, "log_odds_chosen": 9.874351501464844, "log_odds_ratio": -0.00029799286858178675, "logits/chosen": -0.18772411346435547, "logits/rejected": -0.17775577306747437, "logps/chosen": -0.0006274134502746165, "logps/rejected": -2.138361930847168, "loss": 1.1283, "nll_loss": 0.28204089403152466, "rewards/accuracies": 1.0, "rewards/chosen": -6.274134648265317e-05, "rewards/margins": 0.2137734293937683, "rewards/rejected": -0.2138361781835556, "step": 10052 }, { "epoch": 6.952282157676349, "grad_norm": 3.5054914951324463, "learning_rate": 1.6931765790686954e-05, "log_odds_chosen": 11.076932907104492, "log_odds_ratio": -8.66325935930945e-05, "logits/chosen": 0.15985512733459473, "logits/rejected": 0.1510522961616516, "logps/chosen": -0.0002725853701122105, "logps/rejected": -2.564537286758423, "loss": 0.7468, "nll_loss": 0.18668492138385773, "rewards/accuracies": 1.0, "rewards/chosen": -2.7258538466412574e-05, "rewards/margins": 0.25642648339271545, "rewards/rejected": -0.2564537525177002, "step": 10053 }, { "epoch": 6.9529737206085755, "grad_norm": 8.001506805419922, "learning_rate": 1.6927923774396803e-05, "log_odds_chosen": 11.1409912109375, "log_odds_ratio": -2.999947901116684e-05, "logits/chosen": -0.5246400237083435, "logits/rejected": -0.5274342894554138, "logps/chosen": -0.00032147939782589674, "logps/rejected": -2.5922746658325195, "loss": 0.6247, "nll_loss": 0.15617889165878296, "rewards/accuracies": 1.0, "rewards/chosen": -3.214793832739815e-05, "rewards/margins": 0.25919532775878906, "rewards/rejected": -0.259227454662323, "step": 10054 }, { "epoch": 6.953665283540802, "grad_norm": 5.166790962219238, "learning_rate": 1.6924081758106655e-05, "log_odds_chosen": 9.752410888671875, "log_odds_ratio": -0.00018502950842957944, "logits/chosen": -0.11179126054048538, "logits/rejected": -0.06541785597801208, "logps/chosen": -0.0007403604686260223, "logps/rejected": -1.681820273399353, "loss": 0.3964, "nll_loss": 0.0990731418132782, "rewards/accuracies": 1.0, "rewards/chosen": -7.403604104183614e-05, "rewards/margins": 0.1681079864501953, "rewards/rejected": -0.16818203032016754, "step": 10055 }, { "epoch": 6.954356846473029, "grad_norm": 6.174735069274902, "learning_rate": 1.6920239741816508e-05, "log_odds_chosen": 10.701284408569336, "log_odds_ratio": -0.0003483596374280751, "logits/chosen": -0.37049442529678345, "logits/rejected": -0.44730401039123535, "logps/chosen": -0.00043324686703272164, "logps/rejected": -2.10621976852417, "loss": 0.7395, "nll_loss": 0.1848517507314682, "rewards/accuracies": 1.0, "rewards/chosen": -4.33246859756764e-05, "rewards/margins": 0.21057865023612976, "rewards/rejected": -0.21062195301055908, "step": 10056 }, { "epoch": 6.955048409405256, "grad_norm": 3.980576753616333, "learning_rate": 1.6916397725526357e-05, "log_odds_chosen": 9.119756698608398, "log_odds_ratio": -0.0003259534714743495, "logits/chosen": -0.7593079805374146, "logits/rejected": -0.7276464104652405, "logps/chosen": -0.0003143846115563065, "logps/rejected": -1.3028907775878906, "loss": 0.4762, "nll_loss": 0.11900606751441956, "rewards/accuracies": 1.0, "rewards/chosen": -3.143846333841793e-05, "rewards/margins": 0.13025765120983124, "rewards/rejected": -0.13028909265995026, "step": 10057 }, { "epoch": 6.955739972337483, "grad_norm": 6.947946071624756, "learning_rate": 1.691255570923621e-05, "log_odds_chosen": 10.878345489501953, "log_odds_ratio": -6.959411257412285e-05, "logits/chosen": -0.43439775705337524, "logits/rejected": -0.48729392886161804, "logps/chosen": -0.0004633645003195852, "logps/rejected": -2.565565586090088, "loss": 0.6566, "nll_loss": 0.1641518920660019, "rewards/accuracies": 1.0, "rewards/chosen": -4.6336448576767e-05, "rewards/margins": 0.25651025772094727, "rewards/rejected": -0.25655657052993774, "step": 10058 }, { "epoch": 6.95643153526971, "grad_norm": 2.9592649936676025, "learning_rate": 1.690871369294606e-05, "log_odds_chosen": 10.088553428649902, "log_odds_ratio": -0.00013400233001448214, "logits/chosen": -0.2962646186351776, "logits/rejected": -0.3382180333137512, "logps/chosen": -0.0004178662784397602, "logps/rejected": -2.0320756435394287, "loss": 0.4795, "nll_loss": 0.11986919492483139, "rewards/accuracies": 1.0, "rewards/chosen": -4.178662857157178e-05, "rewards/margins": 0.20316576957702637, "rewards/rejected": -0.20320755243301392, "step": 10059 }, { "epoch": 6.9571230982019365, "grad_norm": 3.8344273567199707, "learning_rate": 1.6904871676655907e-05, "log_odds_chosen": 10.093937873840332, "log_odds_ratio": -0.00031608311110176146, "logits/chosen": -0.22365032136440277, "logits/rejected": -0.47377943992614746, "logps/chosen": -0.0006390301277860999, "logps/rejected": -2.3930559158325195, "loss": 0.4028, "nll_loss": 0.10067568719387054, "rewards/accuracies": 1.0, "rewards/chosen": -6.390301132341847e-05, "rewards/margins": 0.23924173414707184, "rewards/rejected": -0.23930561542510986, "step": 10060 }, { "epoch": 6.957814661134163, "grad_norm": 8.053756713867188, "learning_rate": 1.690102966036576e-05, "log_odds_chosen": 10.189164161682129, "log_odds_ratio": -0.0001274641981581226, "logits/chosen": 0.14860232174396515, "logits/rejected": 0.004969865083694458, "logps/chosen": -0.0002809629950206727, "logps/rejected": -1.9008369445800781, "loss": 0.7519, "nll_loss": 0.18795260787010193, "rewards/accuracies": 1.0, "rewards/chosen": -2.8096299502067268e-05, "rewards/margins": 0.19005560874938965, "rewards/rejected": -0.19008369743824005, "step": 10061 }, { "epoch": 6.95850622406639, "grad_norm": 5.632915019989014, "learning_rate": 1.6897187644075612e-05, "log_odds_chosen": 11.19343376159668, "log_odds_ratio": -7.442128844559193e-05, "logits/chosen": -0.1333615630865097, "logits/rejected": -0.2802311182022095, "logps/chosen": -0.00037782572326250374, "logps/rejected": -2.7057318687438965, "loss": 0.5091, "nll_loss": 0.12727561593055725, "rewards/accuracies": 1.0, "rewards/chosen": -3.778257087105885e-05, "rewards/margins": 0.270535409450531, "rewards/rejected": -0.2705731987953186, "step": 10062 }, { "epoch": 6.959197786998617, "grad_norm": 4.994719505310059, "learning_rate": 1.689334562778546e-05, "log_odds_chosen": 10.285367965698242, "log_odds_ratio": -5.1024078857153654e-05, "logits/chosen": -0.4173157811164856, "logits/rejected": -0.36329323053359985, "logps/chosen": -0.0005839330260641873, "logps/rejected": -2.3260738849639893, "loss": 0.4606, "nll_loss": 0.11514244973659515, "rewards/accuracies": 1.0, "rewards/chosen": -5.839329969603568e-05, "rewards/margins": 0.2325490117073059, "rewards/rejected": -0.2326073944568634, "step": 10063 }, { "epoch": 6.959889349930844, "grad_norm": 5.117969989776611, "learning_rate": 1.6889503611495314e-05, "log_odds_chosen": 10.772396087646484, "log_odds_ratio": -6.265474075917155e-05, "logits/chosen": -0.29701873660087585, "logits/rejected": -0.32611095905303955, "logps/chosen": -0.00026552981580607593, "logps/rejected": -2.4106943607330322, "loss": 0.5858, "nll_loss": 0.1464328020811081, "rewards/accuracies": 1.0, "rewards/chosen": -2.6552981580607593e-05, "rewards/margins": 0.2410428822040558, "rewards/rejected": -0.24106942117214203, "step": 10064 }, { "epoch": 6.960580912863071, "grad_norm": 6.523797512054443, "learning_rate": 1.6885661595205166e-05, "log_odds_chosen": 10.308741569519043, "log_odds_ratio": -7.950417784741148e-05, "logits/chosen": -0.590244472026825, "logits/rejected": -0.44279173016548157, "logps/chosen": -0.00033462955616414547, "logps/rejected": -1.7902774810791016, "loss": 0.527, "nll_loss": 0.13175415992736816, "rewards/accuracies": 1.0, "rewards/chosen": -3.346295852679759e-05, "rewards/margins": 0.178994283080101, "rewards/rejected": -0.1790277510881424, "step": 10065 }, { "epoch": 6.9612724757952975, "grad_norm": 4.181301116943359, "learning_rate": 1.6881819578915015e-05, "log_odds_chosen": 10.008706092834473, "log_odds_ratio": -0.00012131897528888658, "logits/chosen": -0.2146802544593811, "logits/rejected": -0.37969833612442017, "logps/chosen": -0.0010558163048699498, "logps/rejected": -1.8638464212417603, "loss": 0.4958, "nll_loss": 0.12394772469997406, "rewards/accuracies": 1.0, "rewards/chosen": -0.0001055816319421865, "rewards/margins": 0.1862790733575821, "rewards/rejected": -0.1863846480846405, "step": 10066 }, { "epoch": 6.961964038727524, "grad_norm": 5.340941429138184, "learning_rate": 1.6877977562624868e-05, "log_odds_chosen": 10.816205978393555, "log_odds_ratio": -0.00027935803518630564, "logits/chosen": 0.5234330892562866, "logits/rejected": 0.419673889875412, "logps/chosen": -0.0005839740042574704, "logps/rejected": -2.2605462074279785, "loss": 0.6266, "nll_loss": 0.15661442279815674, "rewards/accuracies": 1.0, "rewards/chosen": -5.8397399698151276e-05, "rewards/margins": 0.2259962111711502, "rewards/rejected": -0.2260546237230301, "step": 10067 }, { "epoch": 6.962655601659751, "grad_norm": 6.287302494049072, "learning_rate": 1.6874135546334717e-05, "log_odds_chosen": 10.874994277954102, "log_odds_ratio": -0.00023676344426348805, "logits/chosen": -0.2309369295835495, "logits/rejected": -0.3357173800468445, "logps/chosen": -0.00018939608708024025, "logps/rejected": -2.4723055362701416, "loss": 1.0141, "nll_loss": 0.25348982214927673, "rewards/accuracies": 1.0, "rewards/chosen": -1.8939608708024025e-05, "rewards/margins": 0.24721162021160126, "rewards/rejected": -0.24723055958747864, "step": 10068 }, { "epoch": 6.963347164591978, "grad_norm": 7.557071685791016, "learning_rate": 1.6870293530044566e-05, "log_odds_chosen": 11.015689849853516, "log_odds_ratio": -2.897938793466892e-05, "logits/chosen": -0.3422916531562805, "logits/rejected": -0.27559277415275574, "logps/chosen": -0.0001115235936595127, "logps/rejected": -1.888420581817627, "loss": 0.4493, "nll_loss": 0.1123114749789238, "rewards/accuracies": 1.0, "rewards/chosen": -1.115235954785021e-05, "rewards/margins": 0.1888309121131897, "rewards/rejected": -0.1888420581817627, "step": 10069 }, { "epoch": 6.964038727524205, "grad_norm": 5.31990385055542, "learning_rate": 1.686645151375442e-05, "log_odds_chosen": 10.67734146118164, "log_odds_ratio": -6.908691284479573e-05, "logits/chosen": -0.11804617941379547, "logits/rejected": -0.43072015047073364, "logps/chosen": -0.00025707035092636943, "logps/rejected": -2.150763988494873, "loss": 0.7585, "nll_loss": 0.18962499499320984, "rewards/accuracies": 1.0, "rewards/chosen": -2.5707035092636943e-05, "rewards/margins": 0.21505066752433777, "rewards/rejected": -0.21507638692855835, "step": 10070 }, { "epoch": 6.964730290456432, "grad_norm": 4.534579753875732, "learning_rate": 1.686260949746427e-05, "log_odds_chosen": 11.683751106262207, "log_odds_ratio": -2.2830068701296113e-05, "logits/chosen": -0.5978186130523682, "logits/rejected": -0.578332781791687, "logps/chosen": -9.108192170970142e-05, "logps/rejected": -2.3900818824768066, "loss": 0.4515, "nll_loss": 0.11287988722324371, "rewards/accuracies": 1.0, "rewards/chosen": -9.108191989071202e-06, "rewards/margins": 0.23899908363819122, "rewards/rejected": -0.23900818824768066, "step": 10071 }, { "epoch": 6.9654218533886585, "grad_norm": 6.28562068939209, "learning_rate": 1.685876748117412e-05, "log_odds_chosen": 11.651711463928223, "log_odds_ratio": -1.8427983377478085e-05, "logits/chosen": -0.5274204015731812, "logits/rejected": -0.525452196598053, "logps/chosen": -0.0001939109934028238, "logps/rejected": -2.805175542831421, "loss": 0.5491, "nll_loss": 0.13727723062038422, "rewards/accuracies": 1.0, "rewards/chosen": -1.939109824888874e-05, "rewards/margins": 0.2804981470108032, "rewards/rejected": -0.2805175483226776, "step": 10072 }, { "epoch": 6.966113416320885, "grad_norm": 12.471409797668457, "learning_rate": 1.6854925464883972e-05, "log_odds_chosen": 10.029308319091797, "log_odds_ratio": -8.028361480683088e-05, "logits/chosen": -0.35297563672065735, "logits/rejected": -0.4903620481491089, "logps/chosen": -0.0004890944110229611, "logps/rejected": -2.2122983932495117, "loss": 0.5236, "nll_loss": 0.13088931143283844, "rewards/accuracies": 1.0, "rewards/chosen": -4.890943819191307e-05, "rewards/margins": 0.22118094563484192, "rewards/rejected": -0.22122985124588013, "step": 10073 }, { "epoch": 6.966804979253112, "grad_norm": 13.626130104064941, "learning_rate": 1.6851083448593825e-05, "log_odds_chosen": 10.919111251831055, "log_odds_ratio": -5.1238042942713946e-05, "logits/chosen": -0.6567732691764832, "logits/rejected": -0.6847638487815857, "logps/chosen": -0.00015020312275737524, "logps/rejected": -2.0090315341949463, "loss": 0.4309, "nll_loss": 0.10771311819553375, "rewards/accuracies": 1.0, "rewards/chosen": -1.5020313185232226e-05, "rewards/margins": 0.20088812708854675, "rewards/rejected": -0.20090316236019135, "step": 10074 }, { "epoch": 6.967496542185339, "grad_norm": 6.219060897827148, "learning_rate": 1.6847241432303674e-05, "log_odds_chosen": 10.488255500793457, "log_odds_ratio": -4.7013538278406486e-05, "logits/chosen": 0.17941254377365112, "logits/rejected": -0.006448574364185333, "logps/chosen": -0.00015892702504061162, "logps/rejected": -1.7068005800247192, "loss": 0.4064, "nll_loss": 0.10159176588058472, "rewards/accuracies": 1.0, "rewards/chosen": -1.589270141266752e-05, "rewards/margins": 0.17066416144371033, "rewards/rejected": -0.17068007588386536, "step": 10075 }, { "epoch": 6.968188105117566, "grad_norm": 6.882790565490723, "learning_rate": 1.6843399416013526e-05, "log_odds_chosen": 10.748775482177734, "log_odds_ratio": -3.974856008426286e-05, "logits/chosen": -0.5364277362823486, "logits/rejected": -0.6157901287078857, "logps/chosen": -0.0003205241519026458, "logps/rejected": -2.2799644470214844, "loss": 0.5032, "nll_loss": 0.12580451369285583, "rewards/accuracies": 1.0, "rewards/chosen": -3.2052415917860344e-05, "rewards/margins": 0.2279644012451172, "rewards/rejected": -0.22799643874168396, "step": 10076 }, { "epoch": 6.968879668049793, "grad_norm": 8.474969863891602, "learning_rate": 1.6839557399723375e-05, "log_odds_chosen": 11.371885299682617, "log_odds_ratio": -3.51688067894429e-05, "logits/chosen": -0.451709508895874, "logits/rejected": -0.5114191770553589, "logps/chosen": -0.00017953138740267605, "logps/rejected": -2.330127239227295, "loss": 0.4933, "nll_loss": 0.12332789599895477, "rewards/accuracies": 1.0, "rewards/chosen": -1.7953139831661247e-05, "rewards/margins": 0.23299476504325867, "rewards/rejected": -0.23301272094249725, "step": 10077 }, { "epoch": 6.9695712309820195, "grad_norm": 4.735933780670166, "learning_rate": 1.6835715383433224e-05, "log_odds_chosen": 11.441869735717773, "log_odds_ratio": -7.0574002165813e-05, "logits/chosen": -0.39923131465911865, "logits/rejected": -0.4018963575363159, "logps/chosen": -0.0002980951394420117, "logps/rejected": -2.8218677043914795, "loss": 1.1981, "nll_loss": 0.2995148003101349, "rewards/accuracies": 1.0, "rewards/chosen": -2.9809514671796933e-05, "rewards/margins": 0.28215694427490234, "rewards/rejected": -0.28218674659729004, "step": 10078 }, { "epoch": 6.970262793914246, "grad_norm": 7.076903820037842, "learning_rate": 1.6831873367143077e-05, "log_odds_chosen": 10.777301788330078, "log_odds_ratio": -3.476179335848428e-05, "logits/chosen": -0.6153938174247742, "logits/rejected": -0.6085329651832581, "logps/chosen": -0.0001969627192011103, "logps/rejected": -2.048153877258301, "loss": 0.6916, "nll_loss": 0.1728864461183548, "rewards/accuracies": 1.0, "rewards/chosen": -1.969627192011103e-05, "rewards/margins": 0.20479567348957062, "rewards/rejected": -0.20481537282466888, "step": 10079 }, { "epoch": 6.970954356846473, "grad_norm": 4.815555572509766, "learning_rate": 1.682803135085293e-05, "log_odds_chosen": 9.752076148986816, "log_odds_ratio": -0.0008670328534208238, "logits/chosen": -0.5131319761276245, "logits/rejected": -0.5436765551567078, "logps/chosen": -0.001340696937404573, "logps/rejected": -1.7009871006011963, "loss": 0.4662, "nll_loss": 0.11645921319723129, "rewards/accuracies": 1.0, "rewards/chosen": -0.00013406967627815902, "rewards/margins": 0.16996464133262634, "rewards/rejected": -0.1700986921787262, "step": 10080 }, { "epoch": 6.9716459197787, "grad_norm": 5.930548667907715, "learning_rate": 1.6824189334562778e-05, "log_odds_chosen": 10.698500633239746, "log_odds_ratio": -3.7088189856149256e-05, "logits/chosen": -0.7052226662635803, "logits/rejected": -0.7362067699432373, "logps/chosen": -0.00012290122685953975, "logps/rejected": -1.7287907600402832, "loss": 0.6351, "nll_loss": 0.15876275300979614, "rewards/accuracies": 1.0, "rewards/chosen": -1.2290122867852915e-05, "rewards/margins": 0.1728667914867401, "rewards/rejected": -0.17287908494472504, "step": 10081 }, { "epoch": 6.972337482710927, "grad_norm": 5.158050060272217, "learning_rate": 1.682034731827263e-05, "log_odds_chosen": 10.191957473754883, "log_odds_ratio": -0.0007288920460268855, "logits/chosen": -0.7901021838188171, "logits/rejected": -0.7060337066650391, "logps/chosen": -0.0007514750468544662, "logps/rejected": -2.1284823417663574, "loss": 0.5622, "nll_loss": 0.1404794603586197, "rewards/accuracies": 1.0, "rewards/chosen": -7.514750177506357e-05, "rewards/margins": 0.21277309954166412, "rewards/rejected": -0.2128482460975647, "step": 10082 }, { "epoch": 6.973029045643154, "grad_norm": 4.94846248626709, "learning_rate": 1.6816505301982483e-05, "log_odds_chosen": 11.54309368133545, "log_odds_ratio": -1.1242198524996638e-05, "logits/chosen": -0.23920166492462158, "logits/rejected": -0.3178488314151764, "logps/chosen": -0.00024875771487131715, "logps/rejected": -2.5369608402252197, "loss": 0.5647, "nll_loss": 0.14116451144218445, "rewards/accuracies": 1.0, "rewards/chosen": -2.4875771487131715e-05, "rewards/margins": 0.25367119908332825, "rewards/rejected": -0.253696084022522, "step": 10083 }, { "epoch": 6.9737206085753805, "grad_norm": 7.300533771514893, "learning_rate": 1.6812663285692332e-05, "log_odds_chosen": 10.943312644958496, "log_odds_ratio": -0.0008375818142667413, "logits/chosen": -0.8108684420585632, "logits/rejected": -0.7640953063964844, "logps/chosen": -0.0003348039463162422, "logps/rejected": -2.059131145477295, "loss": 0.6199, "nll_loss": 0.15488187968730927, "rewards/accuracies": 1.0, "rewards/chosen": -3.348039535921998e-05, "rewards/margins": 0.20587962865829468, "rewards/rejected": -0.20591309666633606, "step": 10084 }, { "epoch": 6.974412171507607, "grad_norm": 5.671638011932373, "learning_rate": 1.6808821269402185e-05, "log_odds_chosen": 10.110841751098633, "log_odds_ratio": -8.413447358179837e-05, "logits/chosen": -0.6961773633956909, "logits/rejected": -0.6649831533432007, "logps/chosen": -0.0002696591254789382, "logps/rejected": -2.1173558235168457, "loss": 0.8262, "nll_loss": 0.20654813945293427, "rewards/accuracies": 1.0, "rewards/chosen": -2.696591218409594e-05, "rewards/margins": 0.21170863509178162, "rewards/rejected": -0.2117355912923813, "step": 10085 }, { "epoch": 6.975103734439834, "grad_norm": 13.913922309875488, "learning_rate": 1.6804979253112034e-05, "log_odds_chosen": 11.766149520874023, "log_odds_ratio": -2.2054537112126127e-05, "logits/chosen": -0.4592455327510834, "logits/rejected": -0.5087196230888367, "logps/chosen": -0.00016856536967679858, "logps/rejected": -2.561805486679077, "loss": 0.6583, "nll_loss": 0.1645684689283371, "rewards/accuracies": 1.0, "rewards/chosen": -1.685653842287138e-05, "rewards/margins": 0.25616368651390076, "rewards/rejected": -0.2561805546283722, "step": 10086 }, { "epoch": 6.975795297372061, "grad_norm": 9.301910400390625, "learning_rate": 1.6801137236821883e-05, "log_odds_chosen": 11.14494514465332, "log_odds_ratio": -2.6727680960902944e-05, "logits/chosen": -0.43499940633773804, "logits/rejected": -0.4686170816421509, "logps/chosen": -0.00015866890316829085, "logps/rejected": -2.0248191356658936, "loss": 0.504, "nll_loss": 0.12599007785320282, "rewards/accuracies": 1.0, "rewards/chosen": -1.5866889953031205e-05, "rewards/margins": 0.20246604084968567, "rewards/rejected": -0.20248191058635712, "step": 10087 }, { "epoch": 6.976486860304288, "grad_norm": 6.525985240936279, "learning_rate": 1.6797295220531735e-05, "log_odds_chosen": 10.123964309692383, "log_odds_ratio": -0.00037600661744363606, "logits/chosen": -0.38632524013519287, "logits/rejected": -0.4179686903953552, "logps/chosen": -0.001652559032663703, "logps/rejected": -2.3613505363464355, "loss": 0.4669, "nll_loss": 0.11669015139341354, "rewards/accuracies": 1.0, "rewards/chosen": -0.00016525591490790248, "rewards/margins": 0.23596976697444916, "rewards/rejected": -0.23613503575325012, "step": 10088 }, { "epoch": 6.977178423236515, "grad_norm": 4.867033958435059, "learning_rate": 1.6793453204241584e-05, "log_odds_chosen": 12.31155776977539, "log_odds_ratio": -9.306555512011983e-06, "logits/chosen": -0.505553662776947, "logits/rejected": -0.6083633899688721, "logps/chosen": -0.00012676091864705086, "logps/rejected": -3.2197482585906982, "loss": 0.441, "nll_loss": 0.11025519669055939, "rewards/accuracies": 1.0, "rewards/chosen": -1.2676091500907205e-05, "rewards/margins": 0.3219621777534485, "rewards/rejected": -0.32197481393814087, "step": 10089 }, { "epoch": 6.977869986168741, "grad_norm": 5.310248851776123, "learning_rate": 1.6789611187951437e-05, "log_odds_chosen": 11.04545783996582, "log_odds_ratio": -5.797618723590858e-05, "logits/chosen": -0.42729341983795166, "logits/rejected": -0.5321473479270935, "logps/chosen": -0.00029431338771246374, "logps/rejected": -2.51184344291687, "loss": 0.5545, "nll_loss": 0.1386297345161438, "rewards/accuracies": 1.0, "rewards/chosen": -2.9431339498842135e-05, "rewards/margins": 0.25115492939949036, "rewards/rejected": -0.2511843740940094, "step": 10090 }, { "epoch": 6.978561549100968, "grad_norm": 5.998685836791992, "learning_rate": 1.678576917166129e-05, "log_odds_chosen": 10.200556755065918, "log_odds_ratio": -0.0004204717988613993, "logits/chosen": -0.19246073067188263, "logits/rejected": -0.3010517358779907, "logps/chosen": -0.0006728671723976731, "logps/rejected": -2.1594655513763428, "loss": 0.5987, "nll_loss": 0.14964529871940613, "rewards/accuracies": 1.0, "rewards/chosen": -6.728671723976731e-05, "rewards/margins": 0.21587930619716644, "rewards/rejected": -0.21594657003879547, "step": 10091 }, { "epoch": 6.979253112033195, "grad_norm": 6.10350227355957, "learning_rate": 1.6781927155371138e-05, "log_odds_chosen": 10.90713882446289, "log_odds_ratio": -0.0001578339870320633, "logits/chosen": -0.31386104226112366, "logits/rejected": -0.2914910316467285, "logps/chosen": -0.00024432651116512716, "logps/rejected": -2.0333194732666016, "loss": 0.5143, "nll_loss": 0.1285688579082489, "rewards/accuracies": 1.0, "rewards/chosen": -2.4432651116512716e-05, "rewards/margins": 0.20330752432346344, "rewards/rejected": -0.20333194732666016, "step": 10092 }, { "epoch": 6.979944674965422, "grad_norm": 3.2862350940704346, "learning_rate": 1.677808513908099e-05, "log_odds_chosen": 11.088020324707031, "log_odds_ratio": -2.1633699361700565e-05, "logits/chosen": -0.22667689621448517, "logits/rejected": -0.2776218354701996, "logps/chosen": -0.0002498264075256884, "logps/rejected": -2.5119142532348633, "loss": 0.5233, "nll_loss": 0.1308109015226364, "rewards/accuracies": 1.0, "rewards/chosen": -2.4982638933579437e-05, "rewards/margins": 0.2511664628982544, "rewards/rejected": -0.2511914372444153, "step": 10093 }, { "epoch": 6.980636237897649, "grad_norm": 6.681600093841553, "learning_rate": 1.6774243122790843e-05, "log_odds_chosen": 11.789108276367188, "log_odds_ratio": -0.00010184692655457184, "logits/chosen": -0.5825809836387634, "logits/rejected": -0.6333975195884705, "logps/chosen": -0.0009490898228250444, "logps/rejected": -3.6755330562591553, "loss": 0.6252, "nll_loss": 0.15628907084465027, "rewards/accuracies": 1.0, "rewards/chosen": -9.490898810327053e-05, "rewards/margins": 0.36745840311050415, "rewards/rejected": -0.36755332350730896, "step": 10094 }, { "epoch": 6.981327800829876, "grad_norm": 4.172235488891602, "learning_rate": 1.6770401106500692e-05, "log_odds_chosen": 10.5609130859375, "log_odds_ratio": -9.2441332526505e-05, "logits/chosen": -0.17121471464633942, "logits/rejected": -0.2200719118118286, "logps/chosen": -0.0002370928123127669, "logps/rejected": -2.3008334636688232, "loss": 0.417, "nll_loss": 0.10424383729696274, "rewards/accuracies": 1.0, "rewards/chosen": -2.3709282686468214e-05, "rewards/margins": 0.23005962371826172, "rewards/rejected": -0.23008334636688232, "step": 10095 }, { "epoch": 6.982019363762102, "grad_norm": 8.53943157196045, "learning_rate": 1.6766559090210545e-05, "log_odds_chosen": 10.216669082641602, "log_odds_ratio": -0.00012501122546382248, "logits/chosen": -0.24906474351882935, "logits/rejected": -0.3450174033641815, "logps/chosen": -0.000745933095458895, "logps/rejected": -2.12357234954834, "loss": 0.5926, "nll_loss": 0.14813847839832306, "rewards/accuracies": 1.0, "rewards/chosen": -7.459330663550645e-05, "rewards/margins": 0.21228265762329102, "rewards/rejected": -0.21235725283622742, "step": 10096 }, { "epoch": 6.982710926694329, "grad_norm": 7.537756443023682, "learning_rate": 1.6762717073920394e-05, "log_odds_chosen": 9.626380920410156, "log_odds_ratio": -0.0007371928659267724, "logits/chosen": -0.4513899087905884, "logits/rejected": -0.5055891275405884, "logps/chosen": -0.0005784498644061387, "logps/rejected": -1.1691521406173706, "loss": 0.5523, "nll_loss": 0.1379946917295456, "rewards/accuracies": 1.0, "rewards/chosen": -5.7844990806188434e-05, "rewards/margins": 0.1168573722243309, "rewards/rejected": -0.11691521108150482, "step": 10097 }, { "epoch": 6.983402489626556, "grad_norm": 3.16633677482605, "learning_rate": 1.6758875057630243e-05, "log_odds_chosen": 11.536641120910645, "log_odds_ratio": -2.3040020096232183e-05, "logits/chosen": -0.5513945817947388, "logits/rejected": -0.6492319107055664, "logps/chosen": -0.0001172884221887216, "logps/rejected": -2.33725643157959, "loss": 0.588, "nll_loss": 0.14700275659561157, "rewards/accuracies": 1.0, "rewards/chosen": -1.1728841855074279e-05, "rewards/margins": 0.23371392488479614, "rewards/rejected": -0.2337256669998169, "step": 10098 }, { "epoch": 6.984094052558783, "grad_norm": 10.358414649963379, "learning_rate": 1.6755033041340095e-05, "log_odds_chosen": 10.274566650390625, "log_odds_ratio": -0.00012472286471165717, "logits/chosen": -0.2928815484046936, "logits/rejected": -0.3074096441268921, "logps/chosen": -0.0005019558011554182, "logps/rejected": -2.5053319931030273, "loss": 0.7442, "nll_loss": 0.1860423982143402, "rewards/accuracies": 1.0, "rewards/chosen": -5.0195580115541816e-05, "rewards/margins": 0.2504830062389374, "rewards/rejected": -0.25053322315216064, "step": 10099 }, { "epoch": 6.98478561549101, "grad_norm": 5.400953769683838, "learning_rate": 1.6751191025049948e-05, "log_odds_chosen": 9.803630828857422, "log_odds_ratio": -0.0005115721723996103, "logits/chosen": -0.3757714629173279, "logits/rejected": -0.33992066979408264, "logps/chosen": -0.00046948320232331753, "logps/rejected": -2.0363478660583496, "loss": 0.654, "nll_loss": 0.16345909237861633, "rewards/accuracies": 1.0, "rewards/chosen": -4.694832387031056e-05, "rewards/margins": 0.2035878300666809, "rewards/rejected": -0.20363479852676392, "step": 10100 }, { "epoch": 6.985477178423237, "grad_norm": 6.6279449462890625, "learning_rate": 1.6747349008759797e-05, "log_odds_chosen": 11.03076171875, "log_odds_ratio": -3.322376142023131e-05, "logits/chosen": -0.3243277072906494, "logits/rejected": -0.3870410621166229, "logps/chosen": -0.00025668181478977203, "logps/rejected": -2.2549896240234375, "loss": 0.5772, "nll_loss": 0.1443031281232834, "rewards/accuracies": 1.0, "rewards/chosen": -2.5668183297966607e-05, "rewards/margins": 0.2254732996225357, "rewards/rejected": -0.2254989743232727, "step": 10101 }, { "epoch": 6.986168741355463, "grad_norm": 10.471397399902344, "learning_rate": 1.674350699246965e-05, "log_odds_chosen": 10.673310279846191, "log_odds_ratio": -4.33100140071474e-05, "logits/chosen": -0.1118420735001564, "logits/rejected": -0.27916595339775085, "logps/chosen": -0.0003742701665032655, "logps/rejected": -2.4076638221740723, "loss": 0.559, "nll_loss": 0.13975459337234497, "rewards/accuracies": 1.0, "rewards/chosen": -3.742701665032655e-05, "rewards/margins": 0.240728959441185, "rewards/rejected": -0.24076639115810394, "step": 10102 }, { "epoch": 6.98686030428769, "grad_norm": 5.764482021331787, "learning_rate": 1.67396649761795e-05, "log_odds_chosen": 10.548346519470215, "log_odds_ratio": -0.00014787615509703755, "logits/chosen": -0.6814590096473694, "logits/rejected": -0.6488746404647827, "logps/chosen": -0.00026747508672997355, "logps/rejected": -2.2368221282958984, "loss": 0.5517, "nll_loss": 0.1379205286502838, "rewards/accuracies": 1.0, "rewards/chosen": -2.6747507945401594e-05, "rewards/margins": 0.22365549206733704, "rewards/rejected": -0.22368223965168, "step": 10103 }, { "epoch": 6.987551867219917, "grad_norm": 5.606224060058594, "learning_rate": 1.673582295988935e-05, "log_odds_chosen": 10.144465446472168, "log_odds_ratio": -0.0001444382796762511, "logits/chosen": -0.7282345294952393, "logits/rejected": -0.8093679547309875, "logps/chosen": -0.00044351324322633445, "logps/rejected": -1.7967113256454468, "loss": 0.3948, "nll_loss": 0.09869752824306488, "rewards/accuracies": 1.0, "rewards/chosen": -4.435132723301649e-05, "rewards/margins": 0.17962679266929626, "rewards/rejected": -0.17967115342617035, "step": 10104 }, { "epoch": 6.988243430152144, "grad_norm": 4.325505256652832, "learning_rate": 1.6731980943599203e-05, "log_odds_chosen": 10.875144958496094, "log_odds_ratio": -0.0001441028289264068, "logits/chosen": -0.22643186151981354, "logits/rejected": -0.352799654006958, "logps/chosen": -0.00034628575667738914, "logps/rejected": -2.2413947582244873, "loss": 0.602, "nll_loss": 0.1504793018102646, "rewards/accuracies": 1.0, "rewards/chosen": -3.46285778505262e-05, "rewards/margins": 0.2241048514842987, "rewards/rejected": -0.2241394817829132, "step": 10105 }, { "epoch": 6.988934993084371, "grad_norm": 8.308793067932129, "learning_rate": 1.6728138927309052e-05, "log_odds_chosen": 11.82182502746582, "log_odds_ratio": -1.4797966287005693e-05, "logits/chosen": -0.6414090991020203, "logits/rejected": -0.7486789226531982, "logps/chosen": -8.142340811900795e-05, "logps/rejected": -2.3995790481567383, "loss": 0.6312, "nll_loss": 0.1577981412410736, "rewards/accuracies": 1.0, "rewards/chosen": -8.142340448102914e-06, "rewards/margins": 0.23994974792003632, "rewards/rejected": -0.23995789885520935, "step": 10106 }, { "epoch": 6.9896265560165975, "grad_norm": 5.863566875457764, "learning_rate": 1.67242969110189e-05, "log_odds_chosen": 10.61973762512207, "log_odds_ratio": -5.890395550522953e-05, "logits/chosen": -0.8222779035568237, "logits/rejected": -0.8633177280426025, "logps/chosen": -0.00021483330056071281, "logps/rejected": -1.5272918939590454, "loss": 0.5585, "nll_loss": 0.13962499797344208, "rewards/accuracies": 1.0, "rewards/chosen": -2.1483330783667043e-05, "rewards/margins": 0.15270771086215973, "rewards/rejected": -0.15272918343544006, "step": 10107 }, { "epoch": 6.990318118948824, "grad_norm": 3.0240771770477295, "learning_rate": 1.6720454894728754e-05, "log_odds_chosen": 9.746898651123047, "log_odds_ratio": -0.00038893611053936183, "logits/chosen": 0.04509582743048668, "logits/rejected": -0.1614600419998169, "logps/chosen": -0.0014492695918306708, "logps/rejected": -1.977993130683899, "loss": 0.4254, "nll_loss": 0.10631842911243439, "rewards/accuracies": 1.0, "rewards/chosen": -0.00014492697664536536, "rewards/margins": 0.19765439629554749, "rewards/rejected": -0.19779931008815765, "step": 10108 }, { "epoch": 6.991009681881051, "grad_norm": 4.6803998947143555, "learning_rate": 1.6716612878438606e-05, "log_odds_chosen": 11.34988021850586, "log_odds_ratio": -2.1349296730477363e-05, "logits/chosen": -0.09972164034843445, "logits/rejected": -0.21515415608882904, "logps/chosen": -8.137220720527694e-05, "logps/rejected": -1.995793104171753, "loss": 0.4244, "nll_loss": 0.10609244555234909, "rewards/accuracies": 1.0, "rewards/chosen": -8.137220902426634e-06, "rewards/margins": 0.1995711773633957, "rewards/rejected": -0.19957931339740753, "step": 10109 }, { "epoch": 6.991701244813278, "grad_norm": 4.614258289337158, "learning_rate": 1.6712770862148455e-05, "log_odds_chosen": 10.589548110961914, "log_odds_ratio": -0.00012725955457426608, "logits/chosen": -0.5055170059204102, "logits/rejected": -0.6075488328933716, "logps/chosen": -0.0002730110427364707, "logps/rejected": -1.8980847597122192, "loss": 0.3892, "nll_loss": 0.09728061407804489, "rewards/accuracies": 1.0, "rewards/chosen": -2.7301106456434354e-05, "rewards/margins": 0.18978118896484375, "rewards/rejected": -0.18980847299098969, "step": 10110 }, { "epoch": 6.992392807745505, "grad_norm": 5.7431182861328125, "learning_rate": 1.6708928845858308e-05, "log_odds_chosen": 10.315574645996094, "log_odds_ratio": -7.668719626963139e-05, "logits/chosen": -0.27294373512268066, "logits/rejected": -0.38001275062561035, "logps/chosen": -0.0002155811234842986, "logps/rejected": -1.6959208250045776, "loss": 0.494, "nll_loss": 0.12349948287010193, "rewards/accuracies": 1.0, "rewards/chosen": -2.1558114895015024e-05, "rewards/margins": 0.16957053542137146, "rewards/rejected": -0.16959208250045776, "step": 10111 }, { "epoch": 6.993084370677732, "grad_norm": 5.39589262008667, "learning_rate": 1.670508682956816e-05, "log_odds_chosen": 11.265823364257812, "log_odds_ratio": -2.7276233595330268e-05, "logits/chosen": -0.3681911528110504, "logits/rejected": -0.30419978499412537, "logps/chosen": -0.00024840107653290033, "logps/rejected": -2.618328809738159, "loss": 0.5388, "nll_loss": 0.13470235466957092, "rewards/accuracies": 1.0, "rewards/chosen": -2.4840108380885795e-05, "rewards/margins": 0.26180803775787354, "rewards/rejected": -0.2618328630924225, "step": 10112 }, { "epoch": 6.9937759336099585, "grad_norm": 6.419252395629883, "learning_rate": 1.670124481327801e-05, "log_odds_chosen": 10.902593612670898, "log_odds_ratio": -8.501038246322423e-05, "logits/chosen": -0.43653345108032227, "logits/rejected": -0.4556824564933777, "logps/chosen": -0.00019020687614101917, "logps/rejected": -2.1433281898498535, "loss": 0.4661, "nll_loss": 0.11650720238685608, "rewards/accuracies": 1.0, "rewards/chosen": -1.9020688341697678e-05, "rewards/margins": 0.214313805103302, "rewards/rejected": -0.21433281898498535, "step": 10113 }, { "epoch": 6.994467496542185, "grad_norm": 6.108236312866211, "learning_rate": 1.669740279698786e-05, "log_odds_chosen": 10.553049087524414, "log_odds_ratio": -0.00028243346605449915, "logits/chosen": -0.3672410249710083, "logits/rejected": -0.38557636737823486, "logps/chosen": -0.00033305544639006257, "logps/rejected": -1.8382352590560913, "loss": 0.5158, "nll_loss": 0.12892918288707733, "rewards/accuracies": 1.0, "rewards/chosen": -3.3305543183814734e-05, "rewards/margins": 0.18379022181034088, "rewards/rejected": -0.18382352590560913, "step": 10114 }, { "epoch": 6.995159059474412, "grad_norm": 7.925759792327881, "learning_rate": 1.669356078069771e-05, "log_odds_chosen": 10.422781944274902, "log_odds_ratio": -6.591706187464297e-05, "logits/chosen": -0.33322617411613464, "logits/rejected": -0.3572065234184265, "logps/chosen": -0.005260770209133625, "logps/rejected": -2.2561864852905273, "loss": 0.5688, "nll_loss": 0.14219242334365845, "rewards/accuracies": 1.0, "rewards/chosen": -0.000526077055837959, "rewards/margins": 0.2250925600528717, "rewards/rejected": -0.2256186306476593, "step": 10115 }, { "epoch": 6.995850622406639, "grad_norm": 5.784368515014648, "learning_rate": 1.668971876440756e-05, "log_odds_chosen": 11.080724716186523, "log_odds_ratio": -5.246032742434181e-05, "logits/chosen": -0.4725242853164673, "logits/rejected": -0.45921579003334045, "logps/chosen": -0.00017178738198708743, "logps/rejected": -2.235011577606201, "loss": 0.3307, "nll_loss": 0.08267956972122192, "rewards/accuracies": 1.0, "rewards/chosen": -1.7178737834910862e-05, "rewards/margins": 0.22348397970199585, "rewards/rejected": -0.22350117564201355, "step": 10116 }, { "epoch": 6.996542185338866, "grad_norm": 6.753635406494141, "learning_rate": 1.6685876748117412e-05, "log_odds_chosen": 10.79682731628418, "log_odds_ratio": -0.0006323234993033111, "logits/chosen": -0.6484255194664001, "logits/rejected": -0.6901388764381409, "logps/chosen": -0.0002220661408500746, "logps/rejected": -1.7270817756652832, "loss": 0.5553, "nll_loss": 0.13875499367713928, "rewards/accuracies": 1.0, "rewards/chosen": -2.2206615540198982e-05, "rewards/margins": 0.17268598079681396, "rewards/rejected": -0.1727081835269928, "step": 10117 }, { "epoch": 6.997233748271093, "grad_norm": 12.498651504516602, "learning_rate": 1.6682034731827264e-05, "log_odds_chosen": 11.772920608520508, "log_odds_ratio": -1.1675167115754448e-05, "logits/chosen": -0.6263840794563293, "logits/rejected": -0.7192596793174744, "logps/chosen": -0.00013208006566856056, "logps/rejected": -2.688951015472412, "loss": 0.522, "nll_loss": 0.13049596548080444, "rewards/accuracies": 1.0, "rewards/chosen": -1.3208007658249699e-05, "rewards/margins": 0.2688818871974945, "rewards/rejected": -0.26889508962631226, "step": 10118 }, { "epoch": 6.9979253112033195, "grad_norm": 4.978712558746338, "learning_rate": 1.6678192715537114e-05, "log_odds_chosen": 10.6630220413208, "log_odds_ratio": -0.00014852010644972324, "logits/chosen": -0.35795727372169495, "logits/rejected": -0.4703786373138428, "logps/chosen": -0.0005530283669941127, "logps/rejected": -2.2193567752838135, "loss": 0.6832, "nll_loss": 0.1707746386528015, "rewards/accuracies": 1.0, "rewards/chosen": -5.530284033739008e-05, "rewards/margins": 0.22188037633895874, "rewards/rejected": -0.2219356894493103, "step": 10119 }, { "epoch": 6.998616874135546, "grad_norm": 5.544008255004883, "learning_rate": 1.6674350699246966e-05, "log_odds_chosen": 9.86840534210205, "log_odds_ratio": -0.00033382399124093354, "logits/chosen": -0.2054920494556427, "logits/rejected": -0.13383245468139648, "logps/chosen": -0.0002967912005260587, "logps/rejected": -1.6687836647033691, "loss": 0.6584, "nll_loss": 0.1645655333995819, "rewards/accuracies": 1.0, "rewards/chosen": -2.967912223539315e-05, "rewards/margins": 0.16684868931770325, "rewards/rejected": -0.1668783724308014, "step": 10120 }, { "epoch": 6.999308437067773, "grad_norm": 5.937603950500488, "learning_rate": 1.667050868295682e-05, "log_odds_chosen": 9.719598770141602, "log_odds_ratio": -0.002333612646907568, "logits/chosen": -0.6707288026809692, "logits/rejected": -0.6329488754272461, "logps/chosen": -0.0016681014094501734, "logps/rejected": -1.8420779705047607, "loss": 0.6239, "nll_loss": 0.1557316780090332, "rewards/accuracies": 1.0, "rewards/chosen": -0.00016681014676578343, "rewards/margins": 0.18404099345207214, "rewards/rejected": -0.18420778214931488, "step": 10121 }, { "epoch": 7.0, "grad_norm": 5.16898250579834, "learning_rate": 1.6666666666666667e-05, "log_odds_chosen": 10.842208862304688, "log_odds_ratio": -3.623691372922622e-05, "logits/chosen": -0.29508236050605774, "logits/rejected": -0.20642954111099243, "logps/chosen": -0.00010137717617908493, "logps/rejected": -1.595341444015503, "loss": 0.4601, "nll_loss": 0.11501652002334595, "rewards/accuracies": 1.0, "rewards/chosen": -1.0137718163605314e-05, "rewards/margins": 0.15952400863170624, "rewards/rejected": -0.15953415632247925, "step": 10122 }, { "epoch": 7.000691562932227, "grad_norm": 4.177090167999268, "learning_rate": 1.666282465037652e-05, "log_odds_chosen": 10.67172622680664, "log_odds_ratio": -5.4966905736364424e-05, "logits/chosen": -0.14299771189689636, "logits/rejected": -0.20266348123550415, "logps/chosen": -0.0003145110094919801, "logps/rejected": -2.3612658977508545, "loss": 0.4718, "nll_loss": 0.11795040965080261, "rewards/accuracies": 1.0, "rewards/chosen": -3.145109803881496e-05, "rewards/margins": 0.236095130443573, "rewards/rejected": -0.2361265867948532, "step": 10123 }, { "epoch": 7.001383125864454, "grad_norm": 3.383732795715332, "learning_rate": 1.665898263408637e-05, "log_odds_chosen": 9.578885078430176, "log_odds_ratio": -0.00014664589252788574, "logits/chosen": -0.5051281452178955, "logits/rejected": -0.5166344046592712, "logps/chosen": -0.0004757488495670259, "logps/rejected": -1.6275553703308105, "loss": 0.4183, "nll_loss": 0.10457170009613037, "rewards/accuracies": 1.0, "rewards/chosen": -4.7574882046319544e-05, "rewards/margins": 0.16270795464515686, "rewards/rejected": -0.16275553405284882, "step": 10124 }, { "epoch": 7.0020746887966805, "grad_norm": 5.989767074584961, "learning_rate": 1.6655140617796218e-05, "log_odds_chosen": 10.526287078857422, "log_odds_ratio": -0.00011244660709053278, "logits/chosen": -0.414249449968338, "logits/rejected": -0.5609310865402222, "logps/chosen": -0.0006711309542879462, "logps/rejected": -2.302802085876465, "loss": 0.448, "nll_loss": 0.1119890883564949, "rewards/accuracies": 1.0, "rewards/chosen": -6.711309833917767e-05, "rewards/margins": 0.23021312057971954, "rewards/rejected": -0.23028022050857544, "step": 10125 }, { "epoch": 7.002766251728907, "grad_norm": 4.631224632263184, "learning_rate": 1.665129860150607e-05, "log_odds_chosen": 9.21665096282959, "log_odds_ratio": -0.00032175323576666415, "logits/chosen": -0.3028196096420288, "logits/rejected": -0.4589107036590576, "logps/chosen": -0.0006924690096639097, "logps/rejected": -1.6965086460113525, "loss": 0.4529, "nll_loss": 0.11318409442901611, "rewards/accuracies": 1.0, "rewards/chosen": -6.924690387677401e-05, "rewards/margins": 0.1695816069841385, "rewards/rejected": -0.1696508675813675, "step": 10126 }, { "epoch": 7.003457814661134, "grad_norm": 2.4340360164642334, "learning_rate": 1.6647456585215923e-05, "log_odds_chosen": 11.157234191894531, "log_odds_ratio": -2.9356588129303418e-05, "logits/chosen": -0.6620774269104004, "logits/rejected": -0.5918096303939819, "logps/chosen": -0.00017746233788784593, "logps/rejected": -2.0909321308135986, "loss": 0.3734, "nll_loss": 0.09334038197994232, "rewards/accuracies": 1.0, "rewards/chosen": -1.7746233424986713e-05, "rewards/margins": 0.209075465798378, "rewards/rejected": -0.20909321308135986, "step": 10127 }, { "epoch": 7.004149377593361, "grad_norm": 7.020127773284912, "learning_rate": 1.6643614568925772e-05, "log_odds_chosen": 11.063360214233398, "log_odds_ratio": -9.526523353997618e-05, "logits/chosen": -0.21672368049621582, "logits/rejected": -0.2962769865989685, "logps/chosen": -0.00026356359012424946, "logps/rejected": -2.7616658210754395, "loss": 0.3796, "nll_loss": 0.0948876291513443, "rewards/accuracies": 1.0, "rewards/chosen": -2.6356357921031304e-05, "rewards/margins": 0.2761402428150177, "rewards/rejected": -0.2761665880680084, "step": 10128 }, { "epoch": 7.004840940525588, "grad_norm": 4.720909595489502, "learning_rate": 1.6639772552635624e-05, "log_odds_chosen": 10.636579513549805, "log_odds_ratio": -0.0001630079059395939, "logits/chosen": -0.37148964405059814, "logits/rejected": -0.47465845942497253, "logps/chosen": -0.0003292100736871362, "logps/rejected": -2.3341360092163086, "loss": 0.4692, "nll_loss": 0.1172887533903122, "rewards/accuracies": 1.0, "rewards/chosen": -3.292100882390514e-05, "rewards/margins": 0.23338070511817932, "rewards/rejected": -0.23341360688209534, "step": 10129 }, { "epoch": 7.005532503457815, "grad_norm": 3.466585397720337, "learning_rate": 1.6635930536345477e-05, "log_odds_chosen": 12.286417007446289, "log_odds_ratio": -1.0827205187524669e-05, "logits/chosen": -0.21593183279037476, "logits/rejected": -0.2443038523197174, "logps/chosen": -0.00011290085240034387, "logps/rejected": -3.0278398990631104, "loss": 0.4981, "nll_loss": 0.12452976405620575, "rewards/accuracies": 1.0, "rewards/chosen": -1.1290085240034387e-05, "rewards/margins": 0.3027727007865906, "rewards/rejected": -0.3027840256690979, "step": 10130 }, { "epoch": 7.0062240663900415, "grad_norm": 5.393481254577637, "learning_rate": 1.6632088520055326e-05, "log_odds_chosen": 11.34892463684082, "log_odds_ratio": -1.9979619537480175e-05, "logits/chosen": -0.10481264442205429, "logits/rejected": -0.23816201090812683, "logps/chosen": -0.00012460086145438254, "logps/rejected": -2.261373996734619, "loss": 0.527, "nll_loss": 0.13175800442695618, "rewards/accuracies": 1.0, "rewards/chosen": -1.2460084690246731e-05, "rewards/margins": 0.22612492740154266, "rewards/rejected": -0.22613739967346191, "step": 10131 }, { "epoch": 7.006915629322268, "grad_norm": 7.147092819213867, "learning_rate": 1.662824650376518e-05, "log_odds_chosen": 9.98300552368164, "log_odds_ratio": -0.000987243838608265, "logits/chosen": -0.5050815343856812, "logits/rejected": -0.5195842981338501, "logps/chosen": -0.0005599947762675583, "logps/rejected": -1.8902868032455444, "loss": 0.6455, "nll_loss": 0.16128447651863098, "rewards/accuracies": 1.0, "rewards/chosen": -5.599947689916007e-05, "rewards/margins": 0.18897268176078796, "rewards/rejected": -0.18902869522571564, "step": 10132 }, { "epoch": 7.007607192254495, "grad_norm": 3.509237766265869, "learning_rate": 1.6624404487475027e-05, "log_odds_chosen": 10.463541984558105, "log_odds_ratio": -5.5751308536855504e-05, "logits/chosen": -0.6657548546791077, "logits/rejected": -0.661270022392273, "logps/chosen": -0.00012757029617205262, "logps/rejected": -1.5192625522613525, "loss": 0.3719, "nll_loss": 0.09297018498182297, "rewards/accuracies": 1.0, "rewards/chosen": -1.2757031072396785e-05, "rewards/margins": 0.15191349387168884, "rewards/rejected": -0.15192626416683197, "step": 10133 }, { "epoch": 7.008298755186722, "grad_norm": 4.898674011230469, "learning_rate": 1.6620562471184876e-05, "log_odds_chosen": 9.497284889221191, "log_odds_ratio": -0.00023474835325032473, "logits/chosen": -0.6238462924957275, "logits/rejected": -0.657147228717804, "logps/chosen": -0.0002542249276302755, "logps/rejected": -1.2084856033325195, "loss": 0.4356, "nll_loss": 0.1088782474398613, "rewards/accuracies": 1.0, "rewards/chosen": -2.542249421821907e-05, "rewards/margins": 0.12082314491271973, "rewards/rejected": -0.12084857374429703, "step": 10134 }, { "epoch": 7.008990318118949, "grad_norm": 7.74991512298584, "learning_rate": 1.661672045489473e-05, "log_odds_chosen": 12.126110076904297, "log_odds_ratio": -1.4810936590947676e-05, "logits/chosen": -0.5204528570175171, "logits/rejected": -0.5668099522590637, "logps/chosen": -0.00012032059021294117, "logps/rejected": -2.9467427730560303, "loss": 0.4082, "nll_loss": 0.10204964876174927, "rewards/accuracies": 1.0, "rewards/chosen": -1.2032059203193057e-05, "rewards/margins": 0.2946622669696808, "rewards/rejected": -0.294674277305603, "step": 10135 }, { "epoch": 7.009681881051176, "grad_norm": 8.93749713897705, "learning_rate": 1.661287843860458e-05, "log_odds_chosen": 10.70609188079834, "log_odds_ratio": -0.00022324280871544033, "logits/chosen": -0.43974289298057556, "logits/rejected": -0.5134708881378174, "logps/chosen": -0.00028700660914182663, "logps/rejected": -2.1391687393188477, "loss": 0.4658, "nll_loss": 0.11643637716770172, "rewards/accuracies": 1.0, "rewards/chosen": -2.870065873139538e-05, "rewards/margins": 0.21388816833496094, "rewards/rejected": -0.2139168679714203, "step": 10136 }, { "epoch": 7.0103734439834025, "grad_norm": 5.301570892333984, "learning_rate": 1.660903642231443e-05, "log_odds_chosen": 11.49659252166748, "log_odds_ratio": -9.302143735112622e-05, "logits/chosen": -0.3102811872959137, "logits/rejected": -0.444336861371994, "logps/chosen": -0.0011759212939068675, "logps/rejected": -3.2168827056884766, "loss": 0.3944, "nll_loss": 0.09858773648738861, "rewards/accuracies": 1.0, "rewards/chosen": -0.00011759212065953761, "rewards/margins": 0.3215706944465637, "rewards/rejected": -0.32168829441070557, "step": 10137 }, { "epoch": 7.011065006915629, "grad_norm": 4.520849227905273, "learning_rate": 1.6605194406024283e-05, "log_odds_chosen": 9.959612846374512, "log_odds_ratio": -0.0002180114242946729, "logits/chosen": -0.8905350565910339, "logits/rejected": -1.0491600036621094, "logps/chosen": -0.00024582387413829565, "logps/rejected": -1.7666348218917847, "loss": 0.3302, "nll_loss": 0.0825221985578537, "rewards/accuracies": 1.0, "rewards/chosen": -2.45823848672444e-05, "rewards/margins": 0.1766389012336731, "rewards/rejected": -0.17666348814964294, "step": 10138 }, { "epoch": 7.011756569847856, "grad_norm": 3.800283670425415, "learning_rate": 1.6601352389734135e-05, "log_odds_chosen": 10.321309089660645, "log_odds_ratio": -0.00023361285275314003, "logits/chosen": -0.3291170001029968, "logits/rejected": -0.36998191475868225, "logps/chosen": -0.00021534046391025186, "logps/rejected": -1.4043457508087158, "loss": 0.5506, "nll_loss": 0.1376248598098755, "rewards/accuracies": 1.0, "rewards/chosen": -2.1534046027227305e-05, "rewards/margins": 0.1404130458831787, "rewards/rejected": -0.14043457806110382, "step": 10139 }, { "epoch": 7.012448132780083, "grad_norm": 3.9852750301361084, "learning_rate": 1.6597510373443984e-05, "log_odds_chosen": 11.834362983703613, "log_odds_ratio": -4.900186468148604e-05, "logits/chosen": -0.27264270186424255, "logits/rejected": -0.405239999294281, "logps/chosen": -0.0005634097033180296, "logps/rejected": -2.438737392425537, "loss": 0.5053, "nll_loss": 0.12631599605083466, "rewards/accuracies": 1.0, "rewards/chosen": -5.634097396978177e-05, "rewards/margins": 0.24381740391254425, "rewards/rejected": -0.24387376010417938, "step": 10140 }, { "epoch": 7.01313969571231, "grad_norm": 9.13525676727295, "learning_rate": 1.6593668357153837e-05, "log_odds_chosen": 10.613750457763672, "log_odds_ratio": -7.9997735156212e-05, "logits/chosen": -0.16509748995304108, "logits/rejected": -0.236328586935997, "logps/chosen": -0.0001578353112563491, "logps/rejected": -1.8619117736816406, "loss": 0.5854, "nll_loss": 0.1463315635919571, "rewards/accuracies": 1.0, "rewards/chosen": -1.578353112563491e-05, "rewards/margins": 0.1861753910779953, "rewards/rejected": -0.18619118630886078, "step": 10141 }, { "epoch": 7.013831258644537, "grad_norm": 6.098309516906738, "learning_rate": 1.6589826340863686e-05, "log_odds_chosen": 10.384798049926758, "log_odds_ratio": -0.00012338526721578091, "logits/chosen": -0.11653617024421692, "logits/rejected": -0.29346024990081787, "logps/chosen": -0.001884337398223579, "logps/rejected": -2.397714376449585, "loss": 0.5334, "nll_loss": 0.13334126770496368, "rewards/accuracies": 1.0, "rewards/chosen": -0.00018843376892618835, "rewards/margins": 0.23958301544189453, "rewards/rejected": -0.23977145552635193, "step": 10142 }, { "epoch": 7.014522821576763, "grad_norm": 4.79433536529541, "learning_rate": 1.6585984324573535e-05, "log_odds_chosen": 11.148327827453613, "log_odds_ratio": -4.3775267840828747e-05, "logits/chosen": -0.7096115946769714, "logits/rejected": -0.7051650285720825, "logps/chosen": -0.00015493936371058226, "logps/rejected": -2.150099277496338, "loss": 0.4802, "nll_loss": 0.12003987282514572, "rewards/accuracies": 1.0, "rewards/chosen": -1.5493937098653987e-05, "rewards/margins": 0.2149944305419922, "rewards/rejected": -0.2150099277496338, "step": 10143 }, { "epoch": 7.01521438450899, "grad_norm": 3.9534661769866943, "learning_rate": 1.6582142308283387e-05, "log_odds_chosen": 10.364572525024414, "log_odds_ratio": -0.0018095355480909348, "logits/chosen": -0.19156116247177124, "logits/rejected": -0.20004430413246155, "logps/chosen": -0.001704665133729577, "logps/rejected": -2.612130641937256, "loss": 0.4095, "nll_loss": 0.1021956205368042, "rewards/accuracies": 1.0, "rewards/chosen": -0.0001704665191937238, "rewards/margins": 0.26104259490966797, "rewards/rejected": -0.2612130641937256, "step": 10144 }, { "epoch": 7.015905947441217, "grad_norm": 3.5418949127197266, "learning_rate": 1.657830029199324e-05, "log_odds_chosen": 10.750520706176758, "log_odds_ratio": -0.00015969673404470086, "logits/chosen": -0.18375131487846375, "logits/rejected": -0.19293418526649475, "logps/chosen": -0.0001509210269432515, "logps/rejected": -1.8935903310775757, "loss": 0.4629, "nll_loss": 0.11571593582630157, "rewards/accuracies": 1.0, "rewards/chosen": -1.509210324002197e-05, "rewards/margins": 0.18934394419193268, "rewards/rejected": -0.18935903906822205, "step": 10145 }, { "epoch": 7.016597510373444, "grad_norm": 5.542314529418945, "learning_rate": 1.657445827570309e-05, "log_odds_chosen": 9.751424789428711, "log_odds_ratio": -0.00025462431949563324, "logits/chosen": -0.1339586228132248, "logits/rejected": -0.1502704620361328, "logps/chosen": -0.0007169965538196266, "logps/rejected": -1.936805009841919, "loss": 0.7262, "nll_loss": 0.18153619766235352, "rewards/accuracies": 1.0, "rewards/chosen": -7.169965101638809e-05, "rewards/margins": 0.19360879063606262, "rewards/rejected": -0.19368049502372742, "step": 10146 }, { "epoch": 7.017289073305671, "grad_norm": 4.211172103881836, "learning_rate": 1.657061625941294e-05, "log_odds_chosen": 11.636990547180176, "log_odds_ratio": -1.1203040230611805e-05, "logits/chosen": -0.209406316280365, "logits/rejected": -0.2680383324623108, "logps/chosen": -0.00018335843924432993, "logps/rejected": -2.7831931114196777, "loss": 0.4178, "nll_loss": 0.10444985330104828, "rewards/accuracies": 1.0, "rewards/chosen": -1.8335844288230874e-05, "rewards/margins": 0.2783010005950928, "rewards/rejected": -0.2783193290233612, "step": 10147 }, { "epoch": 7.017980636237898, "grad_norm": 4.356594562530518, "learning_rate": 1.6566774243122794e-05, "log_odds_chosen": 10.074660301208496, "log_odds_ratio": -0.0001061324801412411, "logits/chosen": -0.42399752140045166, "logits/rejected": -0.429721474647522, "logps/chosen": -0.0004705238970927894, "logps/rejected": -2.0024328231811523, "loss": 0.2835, "nll_loss": 0.07086857408285141, "rewards/accuracies": 1.0, "rewards/chosen": -4.705238825408742e-05, "rewards/margins": 0.20019622147083282, "rewards/rejected": -0.200243279337883, "step": 10148 }, { "epoch": 7.018672199170124, "grad_norm": 5.776265621185303, "learning_rate": 1.6562932226832643e-05, "log_odds_chosen": 10.335941314697266, "log_odds_ratio": -0.00023757074086461216, "logits/chosen": -0.38004934787750244, "logits/rejected": -0.43100476264953613, "logps/chosen": -0.0011662193574011326, "logps/rejected": -2.4946436882019043, "loss": 0.4329, "nll_loss": 0.10819514095783234, "rewards/accuracies": 1.0, "rewards/chosen": -0.00011662192991934717, "rewards/margins": 0.2493477761745453, "rewards/rejected": -0.24946439266204834, "step": 10149 }, { "epoch": 7.019363762102351, "grad_norm": 4.387164115905762, "learning_rate": 1.6559090210542495e-05, "log_odds_chosen": 9.481719970703125, "log_odds_ratio": -0.0002926454762928188, "logits/chosen": -0.2788187265396118, "logits/rejected": -0.28796088695526123, "logps/chosen": -0.0004147875006310642, "logps/rejected": -1.7780773639678955, "loss": 0.4905, "nll_loss": 0.12258856743574142, "rewards/accuracies": 1.0, "rewards/chosen": -4.1478753701085225e-05, "rewards/margins": 0.17776626348495483, "rewards/rejected": -0.1778077483177185, "step": 10150 }, { "epoch": 7.020055325034578, "grad_norm": 5.8968658447265625, "learning_rate": 1.6555248194252344e-05, "log_odds_chosen": 12.193097114562988, "log_odds_ratio": -9.320355275121983e-06, "logits/chosen": -0.3670240640640259, "logits/rejected": -0.5074326395988464, "logps/chosen": -0.00020143986330367625, "logps/rejected": -3.347984790802002, "loss": 0.6501, "nll_loss": 0.16253016889095306, "rewards/accuracies": 1.0, "rewards/chosen": -2.0143987057963386e-05, "rewards/margins": 0.334778368473053, "rewards/rejected": -0.33479851484298706, "step": 10151 }, { "epoch": 7.020746887966805, "grad_norm": 7.897648334503174, "learning_rate": 1.6551406177962193e-05, "log_odds_chosen": 10.744218826293945, "log_odds_ratio": -8.096903911791742e-05, "logits/chosen": -0.6842846870422363, "logits/rejected": -0.6260226368904114, "logps/chosen": -0.00022721345885656774, "logps/rejected": -2.1028618812561035, "loss": 0.425, "nll_loss": 0.10624252259731293, "rewards/accuracies": 1.0, "rewards/chosen": -2.2721345885656774e-05, "rewards/margins": 0.2102634608745575, "rewards/rejected": -0.2102862000465393, "step": 10152 }, { "epoch": 7.021438450899032, "grad_norm": 4.6264495849609375, "learning_rate": 1.6547564161672046e-05, "log_odds_chosen": 11.312272071838379, "log_odds_ratio": -2.633406074892264e-05, "logits/chosen": -0.2902446389198303, "logits/rejected": -0.3588501513004303, "logps/chosen": -0.00012992211850360036, "logps/rejected": -2.3609724044799805, "loss": 0.3794, "nll_loss": 0.09485436975955963, "rewards/accuracies": 1.0, "rewards/chosen": -1.299221366934944e-05, "rewards/margins": 0.2360842525959015, "rewards/rejected": -0.23609723150730133, "step": 10153 }, { "epoch": 7.022130013831259, "grad_norm": 4.990711212158203, "learning_rate": 1.6543722145381898e-05, "log_odds_chosen": 10.290438652038574, "log_odds_ratio": -0.00021796667715534568, "logits/chosen": -0.057533517479896545, "logits/rejected": -0.3147449493408203, "logps/chosen": -0.0010070588905364275, "logps/rejected": -2.5475285053253174, "loss": 0.6871, "nll_loss": 0.17174169421195984, "rewards/accuracies": 1.0, "rewards/chosen": -0.00010070588177768514, "rewards/margins": 0.25465214252471924, "rewards/rejected": -0.25475287437438965, "step": 10154 }, { "epoch": 7.022821576763485, "grad_norm": 5.92970609664917, "learning_rate": 1.6539880129091747e-05, "log_odds_chosen": 11.233606338500977, "log_odds_ratio": -2.7870761186932214e-05, "logits/chosen": -0.32939082384109497, "logits/rejected": -0.3940170407295227, "logps/chosen": -8.571715443395078e-05, "logps/rejected": -1.9228782653808594, "loss": 0.4735, "nll_loss": 0.11837649345397949, "rewards/accuracies": 1.0, "rewards/chosen": -8.571715625294019e-06, "rewards/margins": 0.19227927923202515, "rewards/rejected": -0.1922878473997116, "step": 10155 }, { "epoch": 7.023513139695712, "grad_norm": 3.8882079124450684, "learning_rate": 1.65360381128016e-05, "log_odds_chosen": 10.108419418334961, "log_odds_ratio": -8.935236110119149e-05, "logits/chosen": -0.8104405403137207, "logits/rejected": -0.8440303802490234, "logps/chosen": -0.0004185454163234681, "logps/rejected": -1.7598785161972046, "loss": 0.3911, "nll_loss": 0.09776659309864044, "rewards/accuracies": 1.0, "rewards/chosen": -4.185454599792138e-05, "rewards/margins": 0.17594601213932037, "rewards/rejected": -0.1759878545999527, "step": 10156 }, { "epoch": 7.024204702627939, "grad_norm": 5.826569557189941, "learning_rate": 1.653219609651145e-05, "log_odds_chosen": 10.434246063232422, "log_odds_ratio": -0.00024922305601648986, "logits/chosen": -0.7025120258331299, "logits/rejected": -0.6248111724853516, "logps/chosen": -0.0001936000626301393, "logps/rejected": -1.761898398399353, "loss": 0.6959, "nll_loss": 0.17395778000354767, "rewards/accuracies": 1.0, "rewards/chosen": -1.936000626301393e-05, "rewards/margins": 0.1761704683303833, "rewards/rejected": -0.1761898398399353, "step": 10157 }, { "epoch": 7.024896265560166, "grad_norm": 5.452138423919678, "learning_rate": 1.65283540802213e-05, "log_odds_chosen": 11.768486022949219, "log_odds_ratio": -1.5557510778307915e-05, "logits/chosen": -0.6322643160820007, "logits/rejected": -0.7022844552993774, "logps/chosen": -0.0001267803891096264, "logps/rejected": -2.44952130317688, "loss": 0.4294, "nll_loss": 0.10734250396490097, "rewards/accuracies": 1.0, "rewards/chosen": -1.2678039638558403e-05, "rewards/margins": 0.24493944644927979, "rewards/rejected": -0.24495212733745575, "step": 10158 }, { "epoch": 7.025587828492393, "grad_norm": 5.500707149505615, "learning_rate": 1.6524512063931154e-05, "log_odds_chosen": 11.4202241897583, "log_odds_ratio": -0.0006046928465366364, "logits/chosen": -0.01992719992995262, "logits/rejected": -0.10526986420154572, "logps/chosen": -0.0008786749094724655, "logps/rejected": -2.987894058227539, "loss": 0.6546, "nll_loss": 0.16358627378940582, "rewards/accuracies": 1.0, "rewards/chosen": -8.786749822320417e-05, "rewards/margins": 0.29870152473449707, "rewards/rejected": -0.298789381980896, "step": 10159 }, { "epoch": 7.0262793914246195, "grad_norm": 5.260455131530762, "learning_rate": 1.6520670047641003e-05, "log_odds_chosen": 10.34775447845459, "log_odds_ratio": -0.00031731827766634524, "logits/chosen": 0.4068028926849365, "logits/rejected": 0.27036112546920776, "logps/chosen": -0.0009474047692492604, "logps/rejected": -2.5685877799987793, "loss": 0.5361, "nll_loss": 0.13399837911128998, "rewards/accuracies": 1.0, "rewards/chosen": -9.474047692492604e-05, "rewards/margins": 0.2567640542984009, "rewards/rejected": -0.25685879588127136, "step": 10160 }, { "epoch": 7.026970954356846, "grad_norm": 5.471103191375732, "learning_rate": 1.6516828031350852e-05, "log_odds_chosen": 11.179512023925781, "log_odds_ratio": -1.992461329791695e-05, "logits/chosen": -0.5953192710876465, "logits/rejected": -0.6373271942138672, "logps/chosen": -0.000205492353416048, "logps/rejected": -2.2310597896575928, "loss": 0.6108, "nll_loss": 0.1526985764503479, "rewards/accuracies": 1.0, "rewards/chosen": -2.054923606920056e-05, "rewards/margins": 0.2230854332447052, "rewards/rejected": -0.22310596704483032, "step": 10161 }, { "epoch": 7.027662517289073, "grad_norm": 13.870356559753418, "learning_rate": 1.6512986015060704e-05, "log_odds_chosen": 10.364818572998047, "log_odds_ratio": -0.0002144112513633445, "logits/chosen": -0.7196736335754395, "logits/rejected": -0.7011851668357849, "logps/chosen": -0.00027963684988208115, "logps/rejected": -1.7333106994628906, "loss": 0.4818, "nll_loss": 0.12043341994285583, "rewards/accuracies": 1.0, "rewards/chosen": -2.7963686079601757e-05, "rewards/margins": 0.17330311238765717, "rewards/rejected": -0.17333108186721802, "step": 10162 }, { "epoch": 7.0283540802213, "grad_norm": 6.397958278656006, "learning_rate": 1.6509143998770553e-05, "log_odds_chosen": 9.366116523742676, "log_odds_ratio": -0.0018373996717855334, "logits/chosen": -0.5085399150848389, "logits/rejected": -0.570478081703186, "logps/chosen": -0.0013225064612925053, "logps/rejected": -1.975780963897705, "loss": 0.5771, "nll_loss": 0.1440969705581665, "rewards/accuracies": 1.0, "rewards/chosen": -0.00013225062866695225, "rewards/margins": 0.1974458396434784, "rewards/rejected": -0.1975780874490738, "step": 10163 }, { "epoch": 7.029045643153527, "grad_norm": 6.637556076049805, "learning_rate": 1.6505301982480406e-05, "log_odds_chosen": 11.188152313232422, "log_odds_ratio": -5.743116344092414e-05, "logits/chosen": -0.5558980107307434, "logits/rejected": -0.5013357400894165, "logps/chosen": -0.00044322869507595897, "logps/rejected": -2.2768421173095703, "loss": 0.4811, "nll_loss": 0.12027549743652344, "rewards/accuracies": 1.0, "rewards/chosen": -4.432286732480861e-05, "rewards/margins": 0.22763991355895996, "rewards/rejected": -0.22768422961235046, "step": 10164 }, { "epoch": 7.029737206085754, "grad_norm": 10.173316955566406, "learning_rate": 1.6501459966190258e-05, "log_odds_chosen": 11.47596549987793, "log_odds_ratio": -2.377521559537854e-05, "logits/chosen": -0.5182502269744873, "logits/rejected": -0.5592266917228699, "logps/chosen": -0.0001406124501954764, "logps/rejected": -2.4441890716552734, "loss": 0.5262, "nll_loss": 0.13155286014080048, "rewards/accuracies": 1.0, "rewards/chosen": -1.406124465574976e-05, "rewards/margins": 0.2444048523902893, "rewards/rejected": -0.2444189041852951, "step": 10165 }, { "epoch": 7.0304287690179805, "grad_norm": 6.493875026702881, "learning_rate": 1.6497617949900107e-05, "log_odds_chosen": 11.056800842285156, "log_odds_ratio": -3.62358296115417e-05, "logits/chosen": -0.4721360504627228, "logits/rejected": -0.522377073764801, "logps/chosen": -0.0004437095485627651, "logps/rejected": -2.6853585243225098, "loss": 0.551, "nll_loss": 0.1377374231815338, "rewards/accuracies": 1.0, "rewards/chosen": -4.437095776665956e-05, "rewards/margins": 0.2684914767742157, "rewards/rejected": -0.268535852432251, "step": 10166 }, { "epoch": 7.031120331950207, "grad_norm": 7.352128982543945, "learning_rate": 1.649377593360996e-05, "log_odds_chosen": 9.795044898986816, "log_odds_ratio": -0.0001043759984895587, "logits/chosen": -0.6661955118179321, "logits/rejected": -0.7603518962860107, "logps/chosen": -0.00048251228872686625, "logps/rejected": -1.7106971740722656, "loss": 1.0816, "nll_loss": 0.2703953981399536, "rewards/accuracies": 1.0, "rewards/chosen": -4.8251229600282386e-05, "rewards/margins": 0.1710214763879776, "rewards/rejected": -0.17106971144676208, "step": 10167 }, { "epoch": 7.031811894882434, "grad_norm": 9.762201309204102, "learning_rate": 1.6489933917319812e-05, "log_odds_chosen": 10.23452377319336, "log_odds_ratio": -0.00016816816059872508, "logits/chosen": -0.28022176027297974, "logits/rejected": -0.31643277406692505, "logps/chosen": -0.0002240131434518844, "logps/rejected": -1.9264167547225952, "loss": 0.3028, "nll_loss": 0.07567442953586578, "rewards/accuracies": 1.0, "rewards/chosen": -2.240131470898632e-05, "rewards/margins": 0.19261927902698517, "rewards/rejected": -0.19264167547225952, "step": 10168 }, { "epoch": 7.032503457814661, "grad_norm": 4.275112152099609, "learning_rate": 1.648609190102966e-05, "log_odds_chosen": 10.421937942504883, "log_odds_ratio": -0.00016295308887492865, "logits/chosen": -0.29650211334228516, "logits/rejected": -0.4259142577648163, "logps/chosen": -0.0003613826702348888, "logps/rejected": -1.915174961090088, "loss": 0.4729, "nll_loss": 0.11820431053638458, "rewards/accuracies": 1.0, "rewards/chosen": -3.613826629589312e-05, "rewards/margins": 0.191481351852417, "rewards/rejected": -0.19151750206947327, "step": 10169 }, { "epoch": 7.033195020746888, "grad_norm": 5.346506118774414, "learning_rate": 1.648224988473951e-05, "log_odds_chosen": 10.069296836853027, "log_odds_ratio": -9.543122723698616e-05, "logits/chosen": -0.3640506863594055, "logits/rejected": -0.4045189917087555, "logps/chosen": -0.0005423121619969606, "logps/rejected": -1.622582197189331, "loss": 0.7224, "nll_loss": 0.18058858811855316, "rewards/accuracies": 1.0, "rewards/chosen": -5.42312154721003e-05, "rewards/margins": 0.16220398247241974, "rewards/rejected": -0.16225820779800415, "step": 10170 }, { "epoch": 7.033886583679115, "grad_norm": 6.405359745025635, "learning_rate": 1.6478407868449363e-05, "log_odds_chosen": 10.256173133850098, "log_odds_ratio": -0.0005121605936437845, "logits/chosen": -0.609381914138794, "logits/rejected": -0.6050175428390503, "logps/chosen": -0.0007455676095560193, "logps/rejected": -2.217386484146118, "loss": 0.4848, "nll_loss": 0.12116114050149918, "rewards/accuracies": 1.0, "rewards/chosen": -7.455676677636802e-05, "rewards/margins": 0.22166410088539124, "rewards/rejected": -0.22173866629600525, "step": 10171 }, { "epoch": 7.0345781466113415, "grad_norm": 5.032310962677002, "learning_rate": 1.6474565852159212e-05, "log_odds_chosen": 10.876982688903809, "log_odds_ratio": -5.83749933866784e-05, "logits/chosen": 0.012026078999042511, "logits/rejected": -0.10096706449985504, "logps/chosen": -0.0006185060483403504, "logps/rejected": -2.8352396488189697, "loss": 0.5413, "nll_loss": 0.1353198140859604, "rewards/accuracies": 1.0, "rewards/chosen": -6.1850601923652e-05, "rewards/margins": 0.2834621071815491, "rewards/rejected": -0.28352394700050354, "step": 10172 }, { "epoch": 7.035269709543568, "grad_norm": 5.299025058746338, "learning_rate": 1.6470723835869064e-05, "log_odds_chosen": 10.806124687194824, "log_odds_ratio": -7.67287565395236e-05, "logits/chosen": -0.44665342569351196, "logits/rejected": -0.4744335114955902, "logps/chosen": -0.0004359095182735473, "logps/rejected": -2.548654556274414, "loss": 0.4792, "nll_loss": 0.11978526413440704, "rewards/accuracies": 1.0, "rewards/chosen": -4.3590949644567445e-05, "rewards/margins": 0.25482189655303955, "rewards/rejected": -0.25486546754837036, "step": 10173 }, { "epoch": 7.035961272475795, "grad_norm": 5.840847015380859, "learning_rate": 1.6466881819578917e-05, "log_odds_chosen": 10.712270736694336, "log_odds_ratio": -4.2316467443015426e-05, "logits/chosen": -0.6601865291595459, "logits/rejected": -0.8111155033111572, "logps/chosen": -0.0002043562853941694, "logps/rejected": -1.841064214706421, "loss": 0.524, "nll_loss": 0.13099926710128784, "rewards/accuracies": 1.0, "rewards/chosen": -2.04356292670127e-05, "rewards/margins": 0.18408598005771637, "rewards/rejected": -0.18410643935203552, "step": 10174 }, { "epoch": 7.036652835408022, "grad_norm": 3.828860282897949, "learning_rate": 1.6463039803288766e-05, "log_odds_chosen": 10.708616256713867, "log_odds_ratio": -0.0003266993153374642, "logits/chosen": -0.48677772283554077, "logits/rejected": -0.5339542031288147, "logps/chosen": -0.00128236785531044, "logps/rejected": -2.570948600769043, "loss": 0.5406, "nll_loss": 0.13511976599693298, "rewards/accuracies": 1.0, "rewards/chosen": -0.00012823677388951182, "rewards/margins": 0.25696662068367004, "rewards/rejected": -0.2570948600769043, "step": 10175 }, { "epoch": 7.037344398340249, "grad_norm": 5.5663065910339355, "learning_rate": 1.6459197786998618e-05, "log_odds_chosen": 11.677118301391602, "log_odds_ratio": -1.1773870028264355e-05, "logits/chosen": -0.16936692595481873, "logits/rejected": -0.15656155347824097, "logps/chosen": -8.260917093139142e-05, "logps/rejected": -2.1766366958618164, "loss": 1.1645, "nll_loss": 0.29112347960472107, "rewards/accuracies": 1.0, "rewards/chosen": -8.260917638835963e-06, "rewards/margins": 0.21765542030334473, "rewards/rejected": -0.21766367554664612, "step": 10176 }, { "epoch": 7.038035961272476, "grad_norm": 5.215190887451172, "learning_rate": 1.645535577070847e-05, "log_odds_chosen": 10.833396911621094, "log_odds_ratio": -4.95106796734035e-05, "logits/chosen": -0.0681900605559349, "logits/rejected": -0.17159458994865417, "logps/chosen": -0.0002625317720230669, "logps/rejected": -1.9759297370910645, "loss": 0.6218, "nll_loss": 0.15545254945755005, "rewards/accuracies": 1.0, "rewards/chosen": -2.6253175747115165e-05, "rewards/margins": 0.19756671786308289, "rewards/rejected": -0.19759297370910645, "step": 10177 }, { "epoch": 7.0387275242047025, "grad_norm": 3.8841283321380615, "learning_rate": 1.645151375441832e-05, "log_odds_chosen": 10.032281875610352, "log_odds_ratio": -0.00029725898639298975, "logits/chosen": 0.27079981565475464, "logits/rejected": 0.08898486196994781, "logps/chosen": -0.0007310167420655489, "logps/rejected": -1.6904047727584839, "loss": 0.3667, "nll_loss": 0.09165007621049881, "rewards/accuracies": 1.0, "rewards/chosen": -7.310167711693794e-05, "rewards/margins": 0.1689673662185669, "rewards/rejected": -0.1690404713153839, "step": 10178 }, { "epoch": 7.039419087136929, "grad_norm": 6.194324970245361, "learning_rate": 1.644767173812817e-05, "log_odds_chosen": 10.229728698730469, "log_odds_ratio": -0.00041000815690495074, "logits/chosen": -0.050195012241601944, "logits/rejected": -0.04101834073662758, "logps/chosen": -0.0002562832087278366, "logps/rejected": -1.5868102312088013, "loss": 0.7313, "nll_loss": 0.18277321755886078, "rewards/accuracies": 1.0, "rewards/chosen": -2.56283201451879e-05, "rewards/margins": 0.15865539014339447, "rewards/rejected": -0.1586810201406479, "step": 10179 }, { "epoch": 7.040110650069156, "grad_norm": 9.337830543518066, "learning_rate": 1.644382972183802e-05, "log_odds_chosen": 10.965694427490234, "log_odds_ratio": -3.5539087548386306e-05, "logits/chosen": -0.4667804539203644, "logits/rejected": -0.5348884463310242, "logps/chosen": -0.0005278678145259619, "logps/rejected": -2.2736480236053467, "loss": 0.4805, "nll_loss": 0.12011906504631042, "rewards/accuracies": 1.0, "rewards/chosen": -5.278678145259619e-05, "rewards/margins": 0.22731202840805054, "rewards/rejected": -0.22736480832099915, "step": 10180 }, { "epoch": 7.040802213001383, "grad_norm": 6.488518714904785, "learning_rate": 1.643998770554787e-05, "log_odds_chosen": 11.179366111755371, "log_odds_ratio": -0.00010157489305129275, "logits/chosen": -0.4254930317401886, "logits/rejected": -0.45213884115219116, "logps/chosen": -0.0007714617531746626, "logps/rejected": -3.35191011428833, "loss": 0.4736, "nll_loss": 0.11839261651039124, "rewards/accuracies": 1.0, "rewards/chosen": -7.71461782278493e-05, "rewards/margins": 0.33511388301849365, "rewards/rejected": -0.335191011428833, "step": 10181 }, { "epoch": 7.04149377593361, "grad_norm": 5.294634819030762, "learning_rate": 1.6436145689257723e-05, "log_odds_chosen": 12.10110092163086, "log_odds_ratio": -1.710414289846085e-05, "logits/chosen": -0.1707492619752884, "logits/rejected": -0.2344096601009369, "logps/chosen": -7.648633618373424e-05, "logps/rejected": -2.405935049057007, "loss": 0.366, "nll_loss": 0.09150756150484085, "rewards/accuracies": 1.0, "rewards/chosen": -7.648633982171305e-06, "rewards/margins": 0.2405858337879181, "rewards/rejected": -0.24059350788593292, "step": 10182 }, { "epoch": 7.042185338865837, "grad_norm": 3.243549108505249, "learning_rate": 1.6432303672967575e-05, "log_odds_chosen": 11.382166862487793, "log_odds_ratio": -2.876598409784492e-05, "logits/chosen": -0.34457629919052124, "logits/rejected": -0.40560418367385864, "logps/chosen": -0.00015357055235654116, "logps/rejected": -2.4820568561553955, "loss": 0.3721, "nll_loss": 0.09302125871181488, "rewards/accuracies": 1.0, "rewards/chosen": -1.5357054508058354e-05, "rewards/margins": 0.24819032847881317, "rewards/rejected": -0.24820569157600403, "step": 10183 }, { "epoch": 7.0428769017980635, "grad_norm": 5.427514553070068, "learning_rate": 1.6428461656677424e-05, "log_odds_chosen": 11.672807693481445, "log_odds_ratio": -3.7301670090528205e-05, "logits/chosen": -0.5080470442771912, "logits/rejected": -0.5370722413063049, "logps/chosen": -0.0006107304943725467, "logps/rejected": -2.9019529819488525, "loss": 0.4418, "nll_loss": 0.11045674979686737, "rewards/accuracies": 1.0, "rewards/chosen": -6.107304943725467e-05, "rewards/margins": 0.2901342511177063, "rewards/rejected": -0.2901953160762787, "step": 10184 }, { "epoch": 7.04356846473029, "grad_norm": 16.298444747924805, "learning_rate": 1.6424619640387277e-05, "log_odds_chosen": 11.56513786315918, "log_odds_ratio": -1.4087101590121165e-05, "logits/chosen": -0.9115073680877686, "logits/rejected": -0.917153000831604, "logps/chosen": -0.000252393918344751, "logps/rejected": -2.4125871658325195, "loss": 0.3834, "nll_loss": 0.09584072232246399, "rewards/accuracies": 1.0, "rewards/chosen": -2.5239394744858146e-05, "rewards/margins": 0.2412334680557251, "rewards/rejected": -0.24125871062278748, "step": 10185 }, { "epoch": 7.044260027662517, "grad_norm": 3.273277759552002, "learning_rate": 1.642077762409713e-05, "log_odds_chosen": 10.584732055664062, "log_odds_ratio": -0.00015055287803988904, "logits/chosen": -0.2822999656200409, "logits/rejected": -0.29767704010009766, "logps/chosen": -0.00020575344387907535, "logps/rejected": -2.1561877727508545, "loss": 0.4178, "nll_loss": 0.10444684326648712, "rewards/accuracies": 1.0, "rewards/chosen": -2.0575344024109654e-05, "rewards/margins": 0.2155982106924057, "rewards/rejected": -0.2156187891960144, "step": 10186 }, { "epoch": 7.044951590594744, "grad_norm": 3.3376383781433105, "learning_rate": 1.6416935607806978e-05, "log_odds_chosen": 10.354433059692383, "log_odds_ratio": -7.408359670080245e-05, "logits/chosen": -0.4512789845466614, "logits/rejected": -0.47543883323669434, "logps/chosen": -0.00016649517056066543, "logps/rejected": -1.6402223110198975, "loss": 0.4424, "nll_loss": 0.11058825254440308, "rewards/accuracies": 1.0, "rewards/chosen": -1.664951560087502e-05, "rewards/margins": 0.1640055924654007, "rewards/rejected": -0.16402223706245422, "step": 10187 }, { "epoch": 7.045643153526971, "grad_norm": 6.444431304931641, "learning_rate": 1.641309359151683e-05, "log_odds_chosen": 11.011929512023926, "log_odds_ratio": -6.39606369077228e-05, "logits/chosen": -0.19220955669879913, "logits/rejected": -0.30211764574050903, "logps/chosen": -0.00017778460460249335, "logps/rejected": -2.086287498474121, "loss": 0.4065, "nll_loss": 0.10161326825618744, "rewards/accuracies": 1.0, "rewards/chosen": -1.7778460460249335e-05, "rewards/margins": 0.20861098170280457, "rewards/rejected": -0.20862877368927002, "step": 10188 }, { "epoch": 7.046334716459198, "grad_norm": 6.527580261230469, "learning_rate": 1.640925157522668e-05, "log_odds_chosen": 11.151822090148926, "log_odds_ratio": -4.187340528005734e-05, "logits/chosen": -0.0439603328704834, "logits/rejected": -0.05829164385795593, "logps/chosen": -0.00031163141829892993, "logps/rejected": -2.4885408878326416, "loss": 0.8264, "nll_loss": 0.20658345520496368, "rewards/accuracies": 1.0, "rewards/chosen": -3.116314474027604e-05, "rewards/margins": 0.24882292747497559, "rewards/rejected": -0.2488541156053543, "step": 10189 }, { "epoch": 7.0470262793914245, "grad_norm": 4.903499603271484, "learning_rate": 1.640540955893653e-05, "log_odds_chosen": 12.155038833618164, "log_odds_ratio": -1.0776170711324085e-05, "logits/chosen": -0.21331267058849335, "logits/rejected": -0.24484039843082428, "logps/chosen": -0.00034775433596223593, "logps/rejected": -3.4819412231445312, "loss": 0.6091, "nll_loss": 0.1522829681634903, "rewards/accuracies": 1.0, "rewards/chosen": -3.477543577901088e-05, "rewards/margins": 0.3481593430042267, "rewards/rejected": -0.3481941223144531, "step": 10190 }, { "epoch": 7.047717842323651, "grad_norm": 4.194298267364502, "learning_rate": 1.640156754264638e-05, "log_odds_chosen": 10.863759994506836, "log_odds_ratio": -4.0062346670310944e-05, "logits/chosen": -0.3379928767681122, "logits/rejected": -0.4385606646537781, "logps/chosen": -7.709265628363937e-05, "logps/rejected": -1.4500436782836914, "loss": 0.3493, "nll_loss": 0.0873255506157875, "rewards/accuracies": 1.0, "rewards/chosen": -7.709265446464997e-06, "rewards/margins": 0.14499665796756744, "rewards/rejected": -0.14500436186790466, "step": 10191 }, { "epoch": 7.048409405255878, "grad_norm": 6.124948024749756, "learning_rate": 1.6397725526356233e-05, "log_odds_chosen": 10.520427703857422, "log_odds_ratio": -4.4489057472674176e-05, "logits/chosen": -0.28054559230804443, "logits/rejected": -0.371385395526886, "logps/chosen": -0.00035703781759366393, "logps/rejected": -2.016751289367676, "loss": 0.6267, "nll_loss": 0.1566665768623352, "rewards/accuracies": 1.0, "rewards/chosen": -3.570377884898335e-05, "rewards/margins": 0.20163944363594055, "rewards/rejected": -0.201675146818161, "step": 10192 }, { "epoch": 7.049100968188105, "grad_norm": 11.454859733581543, "learning_rate": 1.6393883510066083e-05, "log_odds_chosen": 11.12508773803711, "log_odds_ratio": -5.2118764870101586e-05, "logits/chosen": -0.2627827525138855, "logits/rejected": -0.270622193813324, "logps/chosen": -0.0005954367807134986, "logps/rejected": -2.7131738662719727, "loss": 1.0948, "nll_loss": 0.2737029492855072, "rewards/accuracies": 1.0, "rewards/chosen": -5.954367952654138e-05, "rewards/margins": 0.2712578773498535, "rewards/rejected": -0.27131742238998413, "step": 10193 }, { "epoch": 7.049792531120332, "grad_norm": 3.717924118041992, "learning_rate": 1.6390041493775935e-05, "log_odds_chosen": 10.924869537353516, "log_odds_ratio": -0.00014482364349532872, "logits/chosen": -0.5940892696380615, "logits/rejected": -0.574979305267334, "logps/chosen": -0.0002499162219464779, "logps/rejected": -2.2350001335144043, "loss": 0.3341, "nll_loss": 0.08351359516382217, "rewards/accuracies": 1.0, "rewards/chosen": -2.4991619284264743e-05, "rewards/margins": 0.22347500920295715, "rewards/rejected": -0.22350001335144043, "step": 10194 }, { "epoch": 7.050484094052559, "grad_norm": 3.923851490020752, "learning_rate": 1.6386199477485787e-05, "log_odds_chosen": 11.278277397155762, "log_odds_ratio": -4.831477417610586e-05, "logits/chosen": -0.6000710129737854, "logits/rejected": -0.6552660465240479, "logps/chosen": -0.00019233408966101706, "logps/rejected": -2.471733570098877, "loss": 0.4294, "nll_loss": 0.10733603686094284, "rewards/accuracies": 1.0, "rewards/chosen": -1.9233411876484752e-05, "rewards/margins": 0.2471541315317154, "rewards/rejected": -0.24717335402965546, "step": 10195 }, { "epoch": 7.051175656984785, "grad_norm": 6.779111862182617, "learning_rate": 1.6382357461195636e-05, "log_odds_chosen": 11.04921817779541, "log_odds_ratio": -2.1953759642201476e-05, "logits/chosen": -0.08587629348039627, "logits/rejected": -0.17471766471862793, "logps/chosen": -0.00020453102479223162, "logps/rejected": -2.310608148574829, "loss": 0.5807, "nll_loss": 0.14518429338932037, "rewards/accuracies": 1.0, "rewards/chosen": -2.0453102479223162e-05, "rewards/margins": 0.231040358543396, "rewards/rejected": -0.23106080293655396, "step": 10196 }, { "epoch": 7.051867219917012, "grad_norm": 3.915616035461426, "learning_rate": 1.637851544490549e-05, "log_odds_chosen": 10.468697547912598, "log_odds_ratio": -0.0005837790085934103, "logits/chosen": -0.5680649280548096, "logits/rejected": -0.6020484566688538, "logps/chosen": -0.0018268902786076069, "logps/rejected": -2.0773544311523438, "loss": 0.4922, "nll_loss": 0.12299355119466782, "rewards/accuracies": 1.0, "rewards/chosen": -0.0001826890220399946, "rewards/margins": 0.20755276083946228, "rewards/rejected": -0.20773544907569885, "step": 10197 }, { "epoch": 7.052558782849239, "grad_norm": 7.033474922180176, "learning_rate": 1.6374673428615338e-05, "log_odds_chosen": 11.299308776855469, "log_odds_ratio": -9.374999353894964e-05, "logits/chosen": -0.5625525712966919, "logits/rejected": -0.5659523606300354, "logps/chosen": -0.00023602109286002815, "logps/rejected": -2.5319390296936035, "loss": 0.4682, "nll_loss": 0.11702945828437805, "rewards/accuracies": 1.0, "rewards/chosen": -2.3602109649800695e-05, "rewards/margins": 0.25317031145095825, "rewards/rejected": -0.2531939148902893, "step": 10198 }, { "epoch": 7.053250345781466, "grad_norm": 7.322216510772705, "learning_rate": 1.6370831412325187e-05, "log_odds_chosen": 10.925352096557617, "log_odds_ratio": -7.212365017039701e-05, "logits/chosen": -0.8103114366531372, "logits/rejected": -0.8264681696891785, "logps/chosen": -0.00025325504248030484, "logps/rejected": -1.6794970035552979, "loss": 0.428, "nll_loss": 0.10699731111526489, "rewards/accuracies": 1.0, "rewards/chosen": -2.5325503884232603e-05, "rewards/margins": 0.16792437434196472, "rewards/rejected": -0.16794970631599426, "step": 10199 }, { "epoch": 7.053941908713693, "grad_norm": 4.7112956047058105, "learning_rate": 1.636698939603504e-05, "log_odds_chosen": 11.207176208496094, "log_odds_ratio": -5.083268115413375e-05, "logits/chosen": -0.6839872002601624, "logits/rejected": -0.7153400778770447, "logps/chosen": -0.00024315901100635529, "logps/rejected": -2.7372074127197266, "loss": 0.3759, "nll_loss": 0.09395814687013626, "rewards/accuracies": 1.0, "rewards/chosen": -2.4315902919624932e-05, "rewards/margins": 0.2736964523792267, "rewards/rejected": -0.27372077107429504, "step": 10200 }, { "epoch": 7.05463347164592, "grad_norm": 5.1407904624938965, "learning_rate": 1.6363147379744892e-05, "log_odds_chosen": 11.010181427001953, "log_odds_ratio": -3.795043448917568e-05, "logits/chosen": -0.2906171381473541, "logits/rejected": -0.34959375858306885, "logps/chosen": -0.00033321738010272384, "logps/rejected": -2.463109016418457, "loss": 0.3027, "nll_loss": 0.07567539811134338, "rewards/accuracies": 1.0, "rewards/chosen": -3.3321739465463907e-05, "rewards/margins": 0.2462776005268097, "rewards/rejected": -0.24631091952323914, "step": 10201 }, { "epoch": 7.055325034578146, "grad_norm": 4.411509990692139, "learning_rate": 1.635930536345474e-05, "log_odds_chosen": 12.155475616455078, "log_odds_ratio": -5.8786867157323286e-05, "logits/chosen": -0.5480877161026001, "logits/rejected": -0.6062612533569336, "logps/chosen": -0.00023595246602781117, "logps/rejected": -3.229804515838623, "loss": 0.5152, "nll_loss": 0.12880592048168182, "rewards/accuracies": 1.0, "rewards/chosen": -2.3595246602781117e-05, "rewards/margins": 0.3229568302631378, "rewards/rejected": -0.32298046350479126, "step": 10202 }, { "epoch": 7.056016597510373, "grad_norm": 4.577481746673584, "learning_rate": 1.6355463347164593e-05, "log_odds_chosen": 10.382850646972656, "log_odds_ratio": -7.819625170668587e-05, "logits/chosen": 0.20893914997577667, "logits/rejected": 0.13939368724822998, "logps/chosen": -0.0005484464927576482, "logps/rejected": -2.282536506652832, "loss": 0.4111, "nll_loss": 0.10276637226343155, "rewards/accuracies": 1.0, "rewards/chosen": -5.48446478205733e-05, "rewards/margins": 0.2281987965106964, "rewards/rejected": -0.22825364768505096, "step": 10203 }, { "epoch": 7.0567081604426, "grad_norm": 5.125157356262207, "learning_rate": 1.6351621330874446e-05, "log_odds_chosen": 10.655238151550293, "log_odds_ratio": -8.667867223266512e-05, "logits/chosen": -0.31201982498168945, "logits/rejected": -0.3651972711086273, "logps/chosen": -0.00029079418163746595, "logps/rejected": -2.151477813720703, "loss": 0.3703, "nll_loss": 0.09255577623844147, "rewards/accuracies": 1.0, "rewards/chosen": -2.9079419618938118e-05, "rewards/margins": 0.21511872112751007, "rewards/rejected": -0.21514779329299927, "step": 10204 }, { "epoch": 7.057399723374827, "grad_norm": 6.1740217208862305, "learning_rate": 1.6347779314584295e-05, "log_odds_chosen": 10.545979499816895, "log_odds_ratio": -6.465271872002631e-05, "logits/chosen": -0.45721954107284546, "logits/rejected": -0.4858323931694031, "logps/chosen": -0.00011708165402524173, "logps/rejected": -1.588803768157959, "loss": 0.3377, "nll_loss": 0.08442966639995575, "rewards/accuracies": 1.0, "rewards/chosen": -1.1708165402524173e-05, "rewards/margins": 0.158868670463562, "rewards/rejected": -0.15888039767742157, "step": 10205 }, { "epoch": 7.058091286307054, "grad_norm": 4.65233039855957, "learning_rate": 1.6343937298294147e-05, "log_odds_chosen": 10.411567687988281, "log_odds_ratio": -8.02720314823091e-05, "logits/chosen": -0.1786220222711563, "logits/rejected": -0.19122397899627686, "logps/chosen": -0.00019446434453129768, "logps/rejected": -1.4409358501434326, "loss": 0.3758, "nll_loss": 0.09393958747386932, "rewards/accuracies": 1.0, "rewards/chosen": -1.9446433725534007e-05, "rewards/margins": 0.14407414197921753, "rewards/rejected": -0.1440935879945755, "step": 10206 }, { "epoch": 7.058782849239281, "grad_norm": 5.362807273864746, "learning_rate": 1.6340095282003996e-05, "log_odds_chosen": 9.287012100219727, "log_odds_ratio": -0.0006890307413414121, "logits/chosen": -0.3114354610443115, "logits/rejected": -0.3563784956932068, "logps/chosen": -0.0005471562035381794, "logps/rejected": -1.4092155694961548, "loss": 0.7202, "nll_loss": 0.17999312281608582, "rewards/accuracies": 1.0, "rewards/chosen": -5.471562690217979e-05, "rewards/margins": 0.14086684584617615, "rewards/rejected": -0.14092156291007996, "step": 10207 }, { "epoch": 7.059474412171507, "grad_norm": 23.543014526367188, "learning_rate": 1.6336253265713845e-05, "log_odds_chosen": 10.889178276062012, "log_odds_ratio": -5.638160655507818e-05, "logits/chosen": -0.07049977779388428, "logits/rejected": 0.04252389073371887, "logps/chosen": -0.0003565900551620871, "logps/rejected": -2.31201171875, "loss": 0.4739, "nll_loss": 0.11845926195383072, "rewards/accuracies": 1.0, "rewards/chosen": -3.565900988178328e-05, "rewards/margins": 0.23116551339626312, "rewards/rejected": -0.2312011569738388, "step": 10208 }, { "epoch": 7.060165975103734, "grad_norm": 8.616909980773926, "learning_rate": 1.6332411249423698e-05, "log_odds_chosen": 10.404594421386719, "log_odds_ratio": -0.00043424172326922417, "logits/chosen": -0.3541746139526367, "logits/rejected": -0.3857997953891754, "logps/chosen": -0.0004474206070881337, "logps/rejected": -2.0740795135498047, "loss": 0.3256, "nll_loss": 0.08135127276182175, "rewards/accuracies": 1.0, "rewards/chosen": -4.4742064346792176e-05, "rewards/margins": 0.20736320316791534, "rewards/rejected": -0.20740795135498047, "step": 10209 }, { "epoch": 7.060857538035961, "grad_norm": 6.20536994934082, "learning_rate": 1.632856923313355e-05, "log_odds_chosen": 11.783140182495117, "log_odds_ratio": -2.0506131477304734e-05, "logits/chosen": -0.2444104254245758, "logits/rejected": -0.2824934124946594, "logps/chosen": -0.0010264317970722914, "logps/rejected": -3.3801109790802, "loss": 0.7628, "nll_loss": 0.19070187211036682, "rewards/accuracies": 1.0, "rewards/chosen": -0.00010264317825203761, "rewards/margins": 0.33790841698646545, "rewards/rejected": -0.33801108598709106, "step": 10210 }, { "epoch": 7.061549100968188, "grad_norm": 5.094874858856201, "learning_rate": 1.63247272168434e-05, "log_odds_chosen": 11.312137603759766, "log_odds_ratio": -7.084964454406872e-05, "logits/chosen": -0.21776999533176422, "logits/rejected": -0.26254236698150635, "logps/chosen": -0.00019212032202631235, "logps/rejected": -2.4793243408203125, "loss": 0.6099, "nll_loss": 0.1524657905101776, "rewards/accuracies": 1.0, "rewards/chosen": -1.9212033294024877e-05, "rewards/margins": 0.24791322648525238, "rewards/rejected": -0.24793241918087006, "step": 10211 }, { "epoch": 7.062240663900415, "grad_norm": 5.680364608764648, "learning_rate": 1.6320885200553252e-05, "log_odds_chosen": 10.518997192382812, "log_odds_ratio": -5.35045110154897e-05, "logits/chosen": -0.6529715061187744, "logits/rejected": -0.7397947907447815, "logps/chosen": -0.0001638684479985386, "logps/rejected": -1.8137354850769043, "loss": 0.6617, "nll_loss": 0.16541114449501038, "rewards/accuracies": 1.0, "rewards/chosen": -1.6386846255045384e-05, "rewards/margins": 0.18135717511177063, "rewards/rejected": -0.18137355148792267, "step": 10212 }, { "epoch": 7.0629322268326415, "grad_norm": 8.546245574951172, "learning_rate": 1.6317043184263104e-05, "log_odds_chosen": 10.472393035888672, "log_odds_ratio": -6.55086332699284e-05, "logits/chosen": -0.2394629418849945, "logits/rejected": -0.44569700956344604, "logps/chosen": -0.0004339215811342001, "logps/rejected": -2.0617246627807617, "loss": 0.5088, "nll_loss": 0.12718401849269867, "rewards/accuracies": 1.0, "rewards/chosen": -4.339216320659034e-05, "rewards/margins": 0.2061290442943573, "rewards/rejected": -0.20617243647575378, "step": 10213 }, { "epoch": 7.063623789764868, "grad_norm": 15.67087459564209, "learning_rate": 1.6313201167972953e-05, "log_odds_chosen": 10.652711868286133, "log_odds_ratio": -0.00014126955647952855, "logits/chosen": -0.45673927664756775, "logits/rejected": -0.45390135049819946, "logps/chosen": -0.0003136818122584373, "logps/rejected": -2.313603162765503, "loss": 0.5092, "nll_loss": 0.12729515135288239, "rewards/accuracies": 1.0, "rewards/chosen": -3.136818122584373e-05, "rewards/margins": 0.23132893443107605, "rewards/rejected": -0.2313603013753891, "step": 10214 }, { "epoch": 7.064315352697095, "grad_norm": 7.342169284820557, "learning_rate": 1.6309359151682806e-05, "log_odds_chosen": 10.016858100891113, "log_odds_ratio": -0.00016397902800235897, "logits/chosen": -0.3828228712081909, "logits/rejected": -0.45859494805336, "logps/chosen": -0.0007054362213239074, "logps/rejected": -2.5958151817321777, "loss": 0.5443, "nll_loss": 0.13604670763015747, "rewards/accuracies": 1.0, "rewards/chosen": -7.054361776681617e-05, "rewards/margins": 0.25951096415519714, "rewards/rejected": -0.2595815062522888, "step": 10215 }, { "epoch": 7.065006915629322, "grad_norm": 5.9624528884887695, "learning_rate": 1.6305517135392655e-05, "log_odds_chosen": 11.168617248535156, "log_odds_ratio": -4.607412120094523e-05, "logits/chosen": -0.8698675632476807, "logits/rejected": -0.9820318222045898, "logps/chosen": -0.0002429347368888557, "logps/rejected": -2.4946818351745605, "loss": 0.3995, "nll_loss": 0.09987377375364304, "rewards/accuracies": 1.0, "rewards/chosen": -2.4293474780279212e-05, "rewards/margins": 0.2494438886642456, "rewards/rejected": -0.24946817755699158, "step": 10216 }, { "epoch": 7.065698478561549, "grad_norm": 3.372307062149048, "learning_rate": 1.6301675119102504e-05, "log_odds_chosen": 10.415140151977539, "log_odds_ratio": -6.47974229650572e-05, "logits/chosen": -0.6113623976707458, "logits/rejected": -0.6171808838844299, "logps/chosen": -0.0001965291448868811, "logps/rejected": -1.7590970993041992, "loss": 0.2584, "nll_loss": 0.06460408866405487, "rewards/accuracies": 1.0, "rewards/chosen": -1.965291448868811e-05, "rewards/margins": 0.17589005827903748, "rewards/rejected": -0.17590972781181335, "step": 10217 }, { "epoch": 7.066390041493776, "grad_norm": 2.9455246925354004, "learning_rate": 1.6297833102812356e-05, "log_odds_chosen": 10.359132766723633, "log_odds_ratio": -8.454386261291802e-05, "logits/chosen": -0.9020794034004211, "logits/rejected": -0.8879674673080444, "logps/chosen": -0.000216637272387743, "logps/rejected": -1.8610130548477173, "loss": 0.2557, "nll_loss": 0.06390724331140518, "rewards/accuracies": 1.0, "rewards/chosen": -2.16637272387743e-05, "rewards/margins": 0.18607963621616364, "rewards/rejected": -0.1861013025045395, "step": 10218 }, { "epoch": 7.0670816044260025, "grad_norm": 8.130203247070312, "learning_rate": 1.629399108652221e-05, "log_odds_chosen": 9.983545303344727, "log_odds_ratio": -0.00011885909043485299, "logits/chosen": -0.5236350297927856, "logits/rejected": -0.5692715644836426, "logps/chosen": -0.0002489006146788597, "logps/rejected": -1.7812128067016602, "loss": 0.5579, "nll_loss": 0.1394590139389038, "rewards/accuracies": 1.0, "rewards/chosen": -2.4890059648896568e-05, "rewards/margins": 0.17809638381004333, "rewards/rejected": -0.17812128365039825, "step": 10219 }, { "epoch": 7.067773167358229, "grad_norm": 6.38986873626709, "learning_rate": 1.6290149070232058e-05, "log_odds_chosen": 9.96983814239502, "log_odds_ratio": -0.0007384002674371004, "logits/chosen": -0.3837193548679352, "logits/rejected": -0.4665347933769226, "logps/chosen": -0.0025215353816747665, "logps/rejected": -2.4805798530578613, "loss": 0.5105, "nll_loss": 0.12754540145397186, "rewards/accuracies": 1.0, "rewards/chosen": -0.00025215354980900884, "rewards/margins": 0.24780580401420593, "rewards/rejected": -0.24805796146392822, "step": 10220 }, { "epoch": 7.068464730290456, "grad_norm": 5.260932922363281, "learning_rate": 1.628630705394191e-05, "log_odds_chosen": 10.382405281066895, "log_odds_ratio": -4.916860780213028e-05, "logits/chosen": -0.5273264050483704, "logits/rejected": -0.4013931453227997, "logps/chosen": -0.00019109931599814445, "logps/rejected": -1.818406581878662, "loss": 0.562, "nll_loss": 0.1404985934495926, "rewards/accuracies": 1.0, "rewards/chosen": -1.9109931599814445e-05, "rewards/margins": 0.1818215399980545, "rewards/rejected": -0.1818406581878662, "step": 10221 }, { "epoch": 7.069156293222683, "grad_norm": 5.364531993865967, "learning_rate": 1.6282465037651763e-05, "log_odds_chosen": 10.817171096801758, "log_odds_ratio": -5.73900033487007e-05, "logits/chosen": -0.4159190058708191, "logits/rejected": -0.45596784353256226, "logps/chosen": -0.00012160463666077703, "logps/rejected": -1.9424967765808105, "loss": 0.4322, "nll_loss": 0.10803662240505219, "rewards/accuracies": 1.0, "rewards/chosen": -1.2160464393673465e-05, "rewards/margins": 0.19423751533031464, "rewards/rejected": -0.19424967467784882, "step": 10222 }, { "epoch": 7.06984785615491, "grad_norm": 5.511767387390137, "learning_rate": 1.6278623021361612e-05, "log_odds_chosen": 10.276840209960938, "log_odds_ratio": -0.00043214912875555456, "logits/chosen": -0.16192559897899628, "logits/rejected": -0.21732264757156372, "logps/chosen": -0.0008462379919365048, "logps/rejected": -2.0555167198181152, "loss": 0.6557, "nll_loss": 0.16387483477592468, "rewards/accuracies": 1.0, "rewards/chosen": -8.462379628326744e-05, "rewards/margins": 0.20546706020832062, "rewards/rejected": -0.20555168390274048, "step": 10223 }, { "epoch": 7.070539419087137, "grad_norm": 11.26865291595459, "learning_rate": 1.6274781005071464e-05, "log_odds_chosen": 10.140848159790039, "log_odds_ratio": -0.0001493426680099219, "logits/chosen": -0.12188369035720825, "logits/rejected": -0.08648045361042023, "logps/chosen": -0.0004011866985820234, "logps/rejected": -1.6165556907653809, "loss": 0.5129, "nll_loss": 0.12821908295154572, "rewards/accuracies": 1.0, "rewards/chosen": -4.011866985820234e-05, "rewards/margins": 0.16161544620990753, "rewards/rejected": -0.16165557503700256, "step": 10224 }, { "epoch": 7.0712309820193635, "grad_norm": 4.674397945404053, "learning_rate": 1.6270938988781313e-05, "log_odds_chosen": 11.605939865112305, "log_odds_ratio": -1.6314776075887494e-05, "logits/chosen": -0.18150296807289124, "logits/rejected": -0.30134832859039307, "logps/chosen": -0.00014632599777542055, "logps/rejected": -2.1314470767974854, "loss": 0.4547, "nll_loss": 0.11368532478809357, "rewards/accuracies": 1.0, "rewards/chosen": -1.4632600141339935e-05, "rewards/margins": 0.21313008666038513, "rewards/rejected": -0.2131447196006775, "step": 10225 }, { "epoch": 7.07192254495159, "grad_norm": 7.563007831573486, "learning_rate": 1.6267096972491162e-05, "log_odds_chosen": 11.143383979797363, "log_odds_ratio": -6.769195169908926e-05, "logits/chosen": -0.3891788125038147, "logits/rejected": -0.6019364595413208, "logps/chosen": -0.0005590927321463823, "logps/rejected": -2.7319889068603516, "loss": 0.5802, "nll_loss": 0.14505285024642944, "rewards/accuracies": 1.0, "rewards/chosen": -5.5909269576659426e-05, "rewards/margins": 0.2731429934501648, "rewards/rejected": -0.2731989026069641, "step": 10226 }, { "epoch": 7.072614107883817, "grad_norm": 4.823071002960205, "learning_rate": 1.6263254956201015e-05, "log_odds_chosen": 10.227241516113281, "log_odds_ratio": -9.763806156115606e-05, "logits/chosen": -0.6507863402366638, "logits/rejected": -0.7303443551063538, "logps/chosen": -0.00030584761407226324, "logps/rejected": -1.9578144550323486, "loss": 0.2998, "nll_loss": 0.07494589686393738, "rewards/accuracies": 1.0, "rewards/chosen": -3.058476067963056e-05, "rewards/margins": 0.19575083255767822, "rewards/rejected": -0.19578143954277039, "step": 10227 }, { "epoch": 7.073305670816044, "grad_norm": 3.057354688644409, "learning_rate": 1.6259412939910864e-05, "log_odds_chosen": 10.67620849609375, "log_odds_ratio": -0.00018128998635802418, "logits/chosen": -0.650242030620575, "logits/rejected": -0.6747527718544006, "logps/chosen": -0.00023936937213875353, "logps/rejected": -1.8818947076797485, "loss": 0.3277, "nll_loss": 0.08190518617630005, "rewards/accuracies": 1.0, "rewards/chosen": -2.3936938305268995e-05, "rewards/margins": 0.188165545463562, "rewards/rejected": -0.18818946182727814, "step": 10228 }, { "epoch": 7.073997233748271, "grad_norm": 6.393499851226807, "learning_rate": 1.6255570923620716e-05, "log_odds_chosen": 11.460598945617676, "log_odds_ratio": -1.495017932029441e-05, "logits/chosen": -0.4063599407672882, "logits/rejected": -0.4028100371360779, "logps/chosen": -0.00011225016351090744, "logps/rejected": -2.349792957305908, "loss": 0.4413, "nll_loss": 0.11032424122095108, "rewards/accuracies": 1.0, "rewards/chosen": -1.1225016351090744e-05, "rewards/margins": 0.23496806621551514, "rewards/rejected": -0.2349792867898941, "step": 10229 }, { "epoch": 7.074688796680498, "grad_norm": 6.394151210784912, "learning_rate": 1.625172890733057e-05, "log_odds_chosen": 11.290897369384766, "log_odds_ratio": -2.093686634907499e-05, "logits/chosen": -0.2111247032880783, "logits/rejected": -0.30278387665748596, "logps/chosen": -0.00013798041618429124, "logps/rejected": -2.391096591949463, "loss": 0.5643, "nll_loss": 0.1410730928182602, "rewards/accuracies": 1.0, "rewards/chosen": -1.3798041436530184e-05, "rewards/margins": 0.23909586668014526, "rewards/rejected": -0.23910966515541077, "step": 10230 }, { "epoch": 7.0753803596127245, "grad_norm": 5.258058071136475, "learning_rate": 1.6247886891040418e-05, "log_odds_chosen": 10.287449836730957, "log_odds_ratio": -0.00016187971050385386, "logits/chosen": -0.5530920028686523, "logits/rejected": -0.6217649579048157, "logps/chosen": -0.00041097920620813966, "logps/rejected": -1.967341423034668, "loss": 0.5306, "nll_loss": 0.1326388567686081, "rewards/accuracies": 1.0, "rewards/chosen": -4.109792644158006e-05, "rewards/margins": 0.1966930329799652, "rewards/rejected": -0.19673413038253784, "step": 10231 }, { "epoch": 7.076071922544951, "grad_norm": 7.700748920440674, "learning_rate": 1.624404487475027e-05, "log_odds_chosen": 10.730502128601074, "log_odds_ratio": -5.2122355555184186e-05, "logits/chosen": -0.06191644072532654, "logits/rejected": -0.14686648547649384, "logps/chosen": -0.00042249378748238087, "logps/rejected": -1.9075627326965332, "loss": 0.3847, "nll_loss": 0.09616245329380035, "rewards/accuracies": 1.0, "rewards/chosen": -4.224937947583385e-05, "rewards/margins": 0.1907140165567398, "rewards/rejected": -0.19075626134872437, "step": 10232 }, { "epoch": 7.076763485477178, "grad_norm": 6.837832927703857, "learning_rate": 1.6240202858460123e-05, "log_odds_chosen": 11.609643936157227, "log_odds_ratio": -0.00018670190183911473, "logits/chosen": 0.05244845151901245, "logits/rejected": -0.07849866151809692, "logps/chosen": -0.00014949383330531418, "logps/rejected": -2.7908411026000977, "loss": 0.8373, "nll_loss": 0.20931746065616608, "rewards/accuracies": 1.0, "rewards/chosen": -1.494938442192506e-05, "rewards/margins": 0.279069185256958, "rewards/rejected": -0.27908408641815186, "step": 10233 }, { "epoch": 7.077455048409405, "grad_norm": 3.711728096008301, "learning_rate": 1.6236360842169972e-05, "log_odds_chosen": 11.082402229309082, "log_odds_ratio": -3.26655208482407e-05, "logits/chosen": -0.3518868684768677, "logits/rejected": -0.37547898292541504, "logps/chosen": -0.0003602092619985342, "logps/rejected": -2.4345664978027344, "loss": 0.6388, "nll_loss": 0.15970079600811005, "rewards/accuracies": 1.0, "rewards/chosen": -3.602092692744918e-05, "rewards/margins": 0.24342063069343567, "rewards/rejected": -0.24345663189888, "step": 10234 }, { "epoch": 7.078146611341632, "grad_norm": 7.973918914794922, "learning_rate": 1.623251882587982e-05, "log_odds_chosen": 10.20424747467041, "log_odds_ratio": -0.00019141007214784622, "logits/chosen": -0.6154760122299194, "logits/rejected": -0.7376799583435059, "logps/chosen": -0.0003875193651765585, "logps/rejected": -1.942379355430603, "loss": 0.5139, "nll_loss": 0.1284589171409607, "rewards/accuracies": 1.0, "rewards/chosen": -3.8751939428038895e-05, "rewards/margins": 0.19419917464256287, "rewards/rejected": -0.19423794746398926, "step": 10235 }, { "epoch": 7.078838174273859, "grad_norm": 4.550065040588379, "learning_rate": 1.6228676809589673e-05, "log_odds_chosen": 10.399216651916504, "log_odds_ratio": -9.611922723706812e-05, "logits/chosen": -0.11642065644264221, "logits/rejected": -0.13028909265995026, "logps/chosen": -0.0006603579386137426, "logps/rejected": -2.431800603866577, "loss": 0.4395, "nll_loss": 0.10985739529132843, "rewards/accuracies": 1.0, "rewards/chosen": -6.603579822694883e-05, "rewards/margins": 0.24311403930187225, "rewards/rejected": -0.2431800663471222, "step": 10236 }, { "epoch": 7.0795297372060855, "grad_norm": 5.748769760131836, "learning_rate": 1.6224834793299522e-05, "log_odds_chosen": 10.851175308227539, "log_odds_ratio": -0.00013820805179420859, "logits/chosen": -0.08196417987346649, "logits/rejected": -0.1969267725944519, "logps/chosen": -0.0003377099637873471, "logps/rejected": -2.160815715789795, "loss": 0.4784, "nll_loss": 0.11958113312721252, "rewards/accuracies": 1.0, "rewards/chosen": -3.3771000744309276e-05, "rewards/margins": 0.21604779362678528, "rewards/rejected": -0.21608155965805054, "step": 10237 }, { "epoch": 7.080221300138312, "grad_norm": 4.433885097503662, "learning_rate": 1.6220992777009375e-05, "log_odds_chosen": 10.613290786743164, "log_odds_ratio": -7.50662584323436e-05, "logits/chosen": -0.6019273400306702, "logits/rejected": -0.6181836128234863, "logps/chosen": -0.0005927207530476153, "logps/rejected": -2.4414610862731934, "loss": 0.5776, "nll_loss": 0.14439508318901062, "rewards/accuracies": 1.0, "rewards/chosen": -5.927207166678272e-05, "rewards/margins": 0.2440868616104126, "rewards/rejected": -0.24414612352848053, "step": 10238 }, { "epoch": 7.080912863070539, "grad_norm": 5.122983932495117, "learning_rate": 1.6217150760719227e-05, "log_odds_chosen": 10.881866455078125, "log_odds_ratio": -6.110264075687155e-05, "logits/chosen": -0.3635219931602478, "logits/rejected": -0.38984811305999756, "logps/chosen": -0.00017556434613652527, "logps/rejected": -2.0348191261291504, "loss": 0.4104, "nll_loss": 0.10259927809238434, "rewards/accuracies": 1.0, "rewards/chosen": -1.7556434613652527e-05, "rewards/margins": 0.2034643590450287, "rewards/rejected": -0.20348191261291504, "step": 10239 }, { "epoch": 7.081604426002766, "grad_norm": 3.281977891921997, "learning_rate": 1.6213308744429076e-05, "log_odds_chosen": 10.762151718139648, "log_odds_ratio": -0.00011343157530063763, "logits/chosen": -0.8903244733810425, "logits/rejected": -0.8314093947410583, "logps/chosen": -0.00030741217778995633, "logps/rejected": -1.9653372764587402, "loss": 0.5867, "nll_loss": 0.14665445685386658, "rewards/accuracies": 1.0, "rewards/chosen": -3.074121559620835e-05, "rewards/margins": 0.19650298357009888, "rewards/rejected": -0.1965337097644806, "step": 10240 }, { "epoch": 7.082295988934993, "grad_norm": 5.201610088348389, "learning_rate": 1.620946672813893e-05, "log_odds_chosen": 11.167607307434082, "log_odds_ratio": -4.033373625134118e-05, "logits/chosen": -0.05866801738739014, "logits/rejected": -0.07460634410381317, "logps/chosen": -0.0003725805436260998, "logps/rejected": -2.66947078704834, "loss": 0.7783, "nll_loss": 0.19457535445690155, "rewards/accuracies": 1.0, "rewards/chosen": -3.725805436260998e-05, "rewards/margins": 0.26690980792045593, "rewards/rejected": -0.26694709062576294, "step": 10241 }, { "epoch": 7.08298755186722, "grad_norm": 7.919643402099609, "learning_rate": 1.620562471184878e-05, "log_odds_chosen": 9.751909255981445, "log_odds_ratio": -0.0007580799865536392, "logits/chosen": -0.5962412357330322, "logits/rejected": -0.565430223941803, "logps/chosen": -0.0007312754751183093, "logps/rejected": -1.5626683235168457, "loss": 0.7561, "nll_loss": 0.18895158171653748, "rewards/accuracies": 1.0, "rewards/chosen": -7.312755042221397e-05, "rewards/margins": 0.15619370341300964, "rewards/rejected": -0.15626683831214905, "step": 10242 }, { "epoch": 7.0836791147994465, "grad_norm": 5.059696197509766, "learning_rate": 1.620178269555863e-05, "log_odds_chosen": 10.337515830993652, "log_odds_ratio": -0.00017638430290389806, "logits/chosen": 0.14084503054618835, "logits/rejected": 0.10576988756656647, "logps/chosen": -0.0007075028261169791, "logps/rejected": -2.468397617340088, "loss": 0.6239, "nll_loss": 0.155946746468544, "rewards/accuracies": 1.0, "rewards/chosen": -7.075029134284705e-05, "rewards/margins": 0.2467690110206604, "rewards/rejected": -0.24683977663516998, "step": 10243 }, { "epoch": 7.084370677731673, "grad_norm": 5.1300177574157715, "learning_rate": 1.619794067926848e-05, "log_odds_chosen": 12.081521034240723, "log_odds_ratio": -1.1685681784001645e-05, "logits/chosen": -0.21191151440143585, "logits/rejected": -0.2935439348220825, "logps/chosen": -8.348415576620027e-05, "logps/rejected": -2.5798864364624023, "loss": 0.5743, "nll_loss": 0.14358538389205933, "rewards/accuracies": 1.0, "rewards/chosen": -8.34841648611473e-06, "rewards/margins": 0.2579803168773651, "rewards/rejected": -0.25798869132995605, "step": 10244 }, { "epoch": 7.0850622406639, "grad_norm": 5.458332538604736, "learning_rate": 1.619409866297833e-05, "log_odds_chosen": 10.951864242553711, "log_odds_ratio": -2.5700946935103275e-05, "logits/chosen": -0.5814685821533203, "logits/rejected": -0.651351273059845, "logps/chosen": -0.00015072792302817106, "logps/rejected": -2.1194839477539062, "loss": 0.4206, "nll_loss": 0.10515782982110977, "rewards/accuracies": 1.0, "rewards/chosen": -1.5072793758008629e-05, "rewards/margins": 0.21193332970142365, "rewards/rejected": -0.21194839477539062, "step": 10245 }, { "epoch": 7.085753803596127, "grad_norm": 4.95285177230835, "learning_rate": 1.619025664668818e-05, "log_odds_chosen": 10.801020622253418, "log_odds_ratio": -0.00013282234431244433, "logits/chosen": -0.41535109281539917, "logits/rejected": -0.4948478937149048, "logps/chosen": -0.00011315855226712301, "logps/rejected": -2.1074440479278564, "loss": 0.6761, "nll_loss": 0.16900669038295746, "rewards/accuracies": 1.0, "rewards/chosen": -1.131585486291442e-05, "rewards/margins": 0.2107330858707428, "rewards/rejected": -0.21074441075325012, "step": 10246 }, { "epoch": 7.086445366528354, "grad_norm": 5.93438720703125, "learning_rate": 1.6186414630398033e-05, "log_odds_chosen": 10.71384048461914, "log_odds_ratio": -0.0002663441700860858, "logits/chosen": -0.3704487085342407, "logits/rejected": -0.44287770986557007, "logps/chosen": -0.000291989475954324, "logps/rejected": -2.676551580429077, "loss": 0.4612, "nll_loss": 0.11527866125106812, "rewards/accuracies": 1.0, "rewards/chosen": -2.9198949050623924e-05, "rewards/margins": 0.26762595772743225, "rewards/rejected": -0.2676551640033722, "step": 10247 }, { "epoch": 7.087136929460581, "grad_norm": 4.31528902053833, "learning_rate": 1.6182572614107886e-05, "log_odds_chosen": 11.076179504394531, "log_odds_ratio": -7.725445175310597e-05, "logits/chosen": -0.18510644137859344, "logits/rejected": -0.2398597002029419, "logps/chosen": -0.00028446520445868373, "logps/rejected": -2.576986312866211, "loss": 0.4202, "nll_loss": 0.1050301194190979, "rewards/accuracies": 1.0, "rewards/chosen": -2.844651862687897e-05, "rewards/margins": 0.25767016410827637, "rewards/rejected": -0.2576986253261566, "step": 10248 }, { "epoch": 7.087828492392807, "grad_norm": 6.1143951416015625, "learning_rate": 1.6178730597817735e-05, "log_odds_chosen": 9.519444465637207, "log_odds_ratio": -0.0008869217708706856, "logits/chosen": -0.04826436936855316, "logits/rejected": -0.23044337332248688, "logps/chosen": -0.0009210369898937643, "logps/rejected": -1.7885702848434448, "loss": 0.6443, "nll_loss": 0.16099432110786438, "rewards/accuracies": 1.0, "rewards/chosen": -9.210369898937643e-05, "rewards/margins": 0.1787649393081665, "rewards/rejected": -0.17885704338550568, "step": 10249 }, { "epoch": 7.088520055325034, "grad_norm": 5.239924430847168, "learning_rate": 1.6174888581527587e-05, "log_odds_chosen": 10.471399307250977, "log_odds_ratio": -0.00048443872947245836, "logits/chosen": -0.21016259491443634, "logits/rejected": -0.25805291533470154, "logps/chosen": -0.0004966690903529525, "logps/rejected": -2.707772970199585, "loss": 0.6466, "nll_loss": 0.16160735487937927, "rewards/accuracies": 1.0, "rewards/chosen": -4.966690903529525e-05, "rewards/margins": 0.27072763442993164, "rewards/rejected": -0.27077731490135193, "step": 10250 }, { "epoch": 7.089211618257261, "grad_norm": 5.758553981781006, "learning_rate": 1.617104656523744e-05, "log_odds_chosen": 10.279219627380371, "log_odds_ratio": -7.061962969601154e-05, "logits/chosen": -0.4701883792877197, "logits/rejected": -0.5953389406204224, "logps/chosen": -0.0011390014551579952, "logps/rejected": -2.49090838432312, "loss": 0.6064, "nll_loss": 0.15158718824386597, "rewards/accuracies": 1.0, "rewards/chosen": -0.00011390014697099105, "rewards/margins": 0.2489769607782364, "rewards/rejected": -0.24909085035324097, "step": 10251 }, { "epoch": 7.089903181189488, "grad_norm": 6.174829483032227, "learning_rate": 1.616720454894729e-05, "log_odds_chosen": 11.544928550720215, "log_odds_ratio": -2.1656065655406564e-05, "logits/chosen": -0.46109312772750854, "logits/rejected": -0.3749909996986389, "logps/chosen": -0.0003066678764298558, "logps/rejected": -2.6392576694488525, "loss": 0.4445, "nll_loss": 0.11111783236265182, "rewards/accuracies": 1.0, "rewards/chosen": -3.066678618779406e-05, "rewards/margins": 0.26389509439468384, "rewards/rejected": -0.2639257609844208, "step": 10252 }, { "epoch": 7.090594744121715, "grad_norm": 5.161226272583008, "learning_rate": 1.6163362532657138e-05, "log_odds_chosen": 11.50408935546875, "log_odds_ratio": -2.141688673873432e-05, "logits/chosen": -0.36267930269241333, "logits/rejected": -0.41441887617111206, "logps/chosen": -9.26033389987424e-05, "logps/rejected": -2.279778003692627, "loss": 0.4567, "nll_loss": 0.11417672038078308, "rewards/accuracies": 1.0, "rewards/chosen": -9.260334991267882e-06, "rewards/margins": 0.22796852886676788, "rewards/rejected": -0.22797778248786926, "step": 10253 }, { "epoch": 7.091286307053942, "grad_norm": 6.588757038116455, "learning_rate": 1.615952051636699e-05, "log_odds_chosen": 9.77501106262207, "log_odds_ratio": -0.012199745513498783, "logits/chosen": -0.7907540798187256, "logits/rejected": -0.9560619592666626, "logps/chosen": -0.006054680794477463, "logps/rejected": -2.5418083667755127, "loss": 0.9669, "nll_loss": 0.24049539864063263, "rewards/accuracies": 1.0, "rewards/chosen": -0.0006054680561646819, "rewards/margins": 0.2535753846168518, "rewards/rejected": -0.2541808485984802, "step": 10254 }, { "epoch": 7.091977869986168, "grad_norm": 8.896109580993652, "learning_rate": 1.615567850007684e-05, "log_odds_chosen": 10.15814208984375, "log_odds_ratio": -0.00021658647165168077, "logits/chosen": -0.1415342092514038, "logits/rejected": -0.12889420986175537, "logps/chosen": -0.0009950262028723955, "logps/rejected": -2.5493836402893066, "loss": 0.6642, "nll_loss": 0.16603264212608337, "rewards/accuracies": 1.0, "rewards/chosen": -9.950262028723955e-05, "rewards/margins": 0.25483888387680054, "rewards/rejected": -0.25493836402893066, "step": 10255 }, { "epoch": 7.092669432918395, "grad_norm": 5.284745693206787, "learning_rate": 1.615183648378669e-05, "log_odds_chosen": 10.789627075195312, "log_odds_ratio": -5.579138814937323e-05, "logits/chosen": -0.7399893999099731, "logits/rejected": -0.7633627653121948, "logps/chosen": -0.00013292254880070686, "logps/rejected": -1.8104476928710938, "loss": 0.4407, "nll_loss": 0.11017127335071564, "rewards/accuracies": 1.0, "rewards/chosen": -1.3292254152474925e-05, "rewards/margins": 0.1810314953327179, "rewards/rejected": -0.1810447871685028, "step": 10256 }, { "epoch": 7.093360995850622, "grad_norm": 5.26345682144165, "learning_rate": 1.6147994467496544e-05, "log_odds_chosen": 12.348129272460938, "log_odds_ratio": -5.9028197938459925e-06, "logits/chosen": -0.5816521644592285, "logits/rejected": -0.6002523899078369, "logps/chosen": -8.477355731884018e-05, "logps/rejected": -2.757575273513794, "loss": 0.5918, "nll_loss": 0.1479586660861969, "rewards/accuracies": 1.0, "rewards/chosen": -8.477356459479779e-06, "rewards/margins": 0.2757490575313568, "rewards/rejected": -0.2757575511932373, "step": 10257 }, { "epoch": 7.094052558782849, "grad_norm": 4.62961483001709, "learning_rate": 1.6144152451206393e-05, "log_odds_chosen": 10.03465461730957, "log_odds_ratio": -0.00032837854814715683, "logits/chosen": -0.6499353647232056, "logits/rejected": -0.5975565910339355, "logps/chosen": -0.0003332508495077491, "logps/rejected": -1.9092382192611694, "loss": 0.4114, "nll_loss": 0.10280890017747879, "rewards/accuracies": 1.0, "rewards/chosen": -3.332508640596643e-05, "rewards/margins": 0.19089049100875854, "rewards/rejected": -0.190923810005188, "step": 10258 }, { "epoch": 7.094744121715076, "grad_norm": 8.160360336303711, "learning_rate": 1.6140310434916246e-05, "log_odds_chosen": 11.132489204406738, "log_odds_ratio": -4.48873033747077e-05, "logits/chosen": -0.7189033627510071, "logits/rejected": -0.7392956614494324, "logps/chosen": -0.0008609866490587592, "logps/rejected": -3.053264617919922, "loss": 0.6704, "nll_loss": 0.16760316491127014, "rewards/accuracies": 1.0, "rewards/chosen": -8.609866927145049e-05, "rewards/margins": 0.30524036288261414, "rewards/rejected": -0.3053264617919922, "step": 10259 }, { "epoch": 7.095435684647303, "grad_norm": 6.795961380004883, "learning_rate": 1.6136468418626098e-05, "log_odds_chosen": 11.607210159301758, "log_odds_ratio": -1.796493597794324e-05, "logits/chosen": -0.5604426860809326, "logits/rejected": -0.5590736865997314, "logps/chosen": -0.00020991811470594257, "logps/rejected": -2.8993279933929443, "loss": 0.5176, "nll_loss": 0.12940140068531036, "rewards/accuracies": 1.0, "rewards/chosen": -2.0991810742998496e-05, "rewards/margins": 0.28991180658340454, "rewards/rejected": -0.28993281722068787, "step": 10260 }, { "epoch": 7.096127247579529, "grad_norm": 6.2075910568237305, "learning_rate": 1.6132626402335947e-05, "log_odds_chosen": 9.902588844299316, "log_odds_ratio": -0.0001481300569139421, "logits/chosen": -0.29406067728996277, "logits/rejected": -0.4131001830101013, "logps/chosen": -0.000532010046299547, "logps/rejected": -1.9056155681610107, "loss": 0.5059, "nll_loss": 0.1264542192220688, "rewards/accuracies": 1.0, "rewards/chosen": -5.320100535755046e-05, "rewards/margins": 0.19050836563110352, "rewards/rejected": -0.19056154787540436, "step": 10261 }, { "epoch": 7.096818810511756, "grad_norm": 4.122228145599365, "learning_rate": 1.6128784386045796e-05, "log_odds_chosen": 10.819419860839844, "log_odds_ratio": -8.17267646198161e-05, "logits/chosen": -0.08426457643508911, "logits/rejected": -0.1672859489917755, "logps/chosen": -0.0007713919621892273, "logps/rejected": -2.8938684463500977, "loss": 0.6276, "nll_loss": 0.1568855345249176, "rewards/accuracies": 1.0, "rewards/chosen": -7.713919330853969e-05, "rewards/margins": 0.28930971026420593, "rewards/rejected": -0.2893868684768677, "step": 10262 }, { "epoch": 7.097510373443983, "grad_norm": 3.4906275272369385, "learning_rate": 1.612494236975565e-05, "log_odds_chosen": 10.980330467224121, "log_odds_ratio": -0.0008700335747562349, "logits/chosen": -0.34543806314468384, "logits/rejected": -0.43487393856048584, "logps/chosen": -0.004367163870483637, "logps/rejected": -2.638749361038208, "loss": 0.4312, "nll_loss": 0.10771296173334122, "rewards/accuracies": 1.0, "rewards/chosen": -0.00043671642197296023, "rewards/margins": 0.26343822479248047, "rewards/rejected": -0.26387494802474976, "step": 10263 }, { "epoch": 7.09820193637621, "grad_norm": 8.13571834564209, "learning_rate": 1.6121100353465498e-05, "log_odds_chosen": 10.953519821166992, "log_odds_ratio": -2.4463508452754468e-05, "logits/chosen": -0.7077839970588684, "logits/rejected": -0.7161340713500977, "logps/chosen": -0.00047470693243667483, "logps/rejected": -2.5533576011657715, "loss": 0.9703, "nll_loss": 0.24257071316242218, "rewards/accuracies": 1.0, "rewards/chosen": -4.747069760924205e-05, "rewards/margins": 0.255288302898407, "rewards/rejected": -0.2553357481956482, "step": 10264 }, { "epoch": 7.098893499308437, "grad_norm": 4.885742664337158, "learning_rate": 1.611725833717535e-05, "log_odds_chosen": 11.693151473999023, "log_odds_ratio": -2.9904567782068625e-05, "logits/chosen": -0.14424534142017365, "logits/rejected": -0.17319512367248535, "logps/chosen": -0.00031677918741479516, "logps/rejected": -3.179150342941284, "loss": 0.4999, "nll_loss": 0.12497483193874359, "rewards/accuracies": 1.0, "rewards/chosen": -3.1677918741479516e-05, "rewards/margins": 0.31788334250450134, "rewards/rejected": -0.31791502237319946, "step": 10265 }, { "epoch": 7.0995850622406635, "grad_norm": 5.787229537963867, "learning_rate": 1.6113416320885202e-05, "log_odds_chosen": 10.765002250671387, "log_odds_ratio": -0.00011930213076993823, "logits/chosen": -0.38844630122184753, "logits/rejected": -0.4450450837612152, "logps/chosen": -0.00026077215443365276, "logps/rejected": -1.7682405710220337, "loss": 0.5353, "nll_loss": 0.13381041586399078, "rewards/accuracies": 1.0, "rewards/chosen": -2.607721762615256e-05, "rewards/margins": 0.17679797112941742, "rewards/rejected": -0.17682406306266785, "step": 10266 }, { "epoch": 7.10027662517289, "grad_norm": 6.018062591552734, "learning_rate": 1.610957430459505e-05, "log_odds_chosen": 10.322380065917969, "log_odds_ratio": -0.00014119291154202074, "logits/chosen": -0.3337664008140564, "logits/rejected": -0.29941171407699585, "logps/chosen": -0.00019943459483329207, "logps/rejected": -1.98537015914917, "loss": 0.6841, "nll_loss": 0.17101384699344635, "rewards/accuracies": 1.0, "rewards/chosen": -1.9943458028137684e-05, "rewards/margins": 0.1985170841217041, "rewards/rejected": -0.19853702187538147, "step": 10267 }, { "epoch": 7.100968188105117, "grad_norm": 3.84755277633667, "learning_rate": 1.6105732288304904e-05, "log_odds_chosen": 10.52993392944336, "log_odds_ratio": -3.607830876717344e-05, "logits/chosen": -0.5763347744941711, "logits/rejected": -0.6093184351921082, "logps/chosen": -0.00024122398463077843, "logps/rejected": -2.0790586471557617, "loss": 0.458, "nll_loss": 0.11449619382619858, "rewards/accuracies": 1.0, "rewards/chosen": -2.412239700788632e-05, "rewards/margins": 0.20788174867630005, "rewards/rejected": -0.2079058736562729, "step": 10268 }, { "epoch": 7.101659751037344, "grad_norm": 5.161709308624268, "learning_rate": 1.6101890272014756e-05, "log_odds_chosen": 11.676347732543945, "log_odds_ratio": -1.4311031918623485e-05, "logits/chosen": -0.5673502683639526, "logits/rejected": -0.6082361936569214, "logps/chosen": -8.296072337543592e-05, "logps/rejected": -2.3757548332214355, "loss": 0.5151, "nll_loss": 0.12878523766994476, "rewards/accuracies": 1.0, "rewards/chosen": -8.296072337543592e-06, "rewards/margins": 0.23756720125675201, "rewards/rejected": -0.23757551610469818, "step": 10269 }, { "epoch": 7.102351313969571, "grad_norm": 3.7552056312561035, "learning_rate": 1.6098048255724605e-05, "log_odds_chosen": 11.910984992980957, "log_odds_ratio": -8.620838343631476e-06, "logits/chosen": -0.28908026218414307, "logits/rejected": -0.4366893470287323, "logps/chosen": -0.0001288650673814118, "logps/rejected": -2.667642116546631, "loss": 0.4266, "nll_loss": 0.10665756464004517, "rewards/accuracies": 1.0, "rewards/chosen": -1.2886507647635881e-05, "rewards/margins": 0.2667512893676758, "rewards/rejected": -0.26676419377326965, "step": 10270 }, { "epoch": 7.103042876901798, "grad_norm": 6.64044713973999, "learning_rate": 1.6094206239434455e-05, "log_odds_chosen": 10.219522476196289, "log_odds_ratio": -0.00020672775281127542, "logits/chosen": -0.31991302967071533, "logits/rejected": -0.3143084943294525, "logps/chosen": -0.0004434236034285277, "logps/rejected": -2.1775803565979004, "loss": 0.5875, "nll_loss": 0.1468459665775299, "rewards/accuracies": 1.0, "rewards/chosen": -4.434235961525701e-05, "rewards/margins": 0.21771368384361267, "rewards/rejected": -0.21775802969932556, "step": 10271 }, { "epoch": 7.1037344398340245, "grad_norm": 5.8512678146362305, "learning_rate": 1.6090364223144307e-05, "log_odds_chosen": 11.630446434020996, "log_odds_ratio": -1.6054920706665143e-05, "logits/chosen": -0.19076719880104065, "logits/rejected": -0.22760090231895447, "logps/chosen": -0.00014438344805967063, "logps/rejected": -2.5034735202789307, "loss": 0.5637, "nll_loss": 0.1409282684326172, "rewards/accuracies": 1.0, "rewards/chosen": -1.4438344805967063e-05, "rewards/margins": 0.25033292174339294, "rewards/rejected": -0.2503473460674286, "step": 10272 }, { "epoch": 7.104426002766251, "grad_norm": 4.392897605895996, "learning_rate": 1.6086522206854156e-05, "log_odds_chosen": 9.212032318115234, "log_odds_ratio": -0.0010728597408160567, "logits/chosen": -0.24878910183906555, "logits/rejected": -0.2609459459781647, "logps/chosen": -0.0008410682203248143, "logps/rejected": -1.4358794689178467, "loss": 0.4094, "nll_loss": 0.10223326086997986, "rewards/accuracies": 1.0, "rewards/chosen": -8.410682494286448e-05, "rewards/margins": 0.1435038298368454, "rewards/rejected": -0.14358794689178467, "step": 10273 }, { "epoch": 7.105117565698478, "grad_norm": 3.730031967163086, "learning_rate": 1.608268019056401e-05, "log_odds_chosen": 10.332513809204102, "log_odds_ratio": -5.71914242755156e-05, "logits/chosen": -0.1447673738002777, "logits/rejected": -0.2496945858001709, "logps/chosen": -0.0003094303538091481, "logps/rejected": -2.1362357139587402, "loss": 0.3658, "nll_loss": 0.0914420485496521, "rewards/accuracies": 1.0, "rewards/chosen": -3.094303610851057e-05, "rewards/margins": 0.21359263360500336, "rewards/rejected": -0.21362358331680298, "step": 10274 }, { "epoch": 7.105809128630705, "grad_norm": 6.209634780883789, "learning_rate": 1.607883817427386e-05, "log_odds_chosen": 10.855377197265625, "log_odds_ratio": -5.840754238306545e-05, "logits/chosen": -0.11033168435096741, "logits/rejected": -0.09126077592372894, "logps/chosen": -0.0002168384671676904, "logps/rejected": -1.9451351165771484, "loss": 0.5188, "nll_loss": 0.1296835094690323, "rewards/accuracies": 1.0, "rewards/chosen": -2.1683845261577517e-05, "rewards/margins": 0.19449183344841003, "rewards/rejected": -0.1945134997367859, "step": 10275 }, { "epoch": 7.106500691562932, "grad_norm": 4.6433916091918945, "learning_rate": 1.607499615798371e-05, "log_odds_chosen": 12.053773880004883, "log_odds_ratio": -1.450142099201912e-05, "logits/chosen": -0.31673458218574524, "logits/rejected": -0.35910022258758545, "logps/chosen": -0.00022834296396467835, "logps/rejected": -3.324519395828247, "loss": 0.4187, "nll_loss": 0.10467317700386047, "rewards/accuracies": 1.0, "rewards/chosen": -2.2834297851659358e-05, "rewards/margins": 0.33242911100387573, "rewards/rejected": -0.3324519097805023, "step": 10276 }, { "epoch": 7.107192254495159, "grad_norm": 5.220605373382568, "learning_rate": 1.6071154141693562e-05, "log_odds_chosen": 10.488154411315918, "log_odds_ratio": -0.000241068220930174, "logits/chosen": -0.4683865010738373, "logits/rejected": -0.3266890048980713, "logps/chosen": -0.00121038977522403, "logps/rejected": -2.3827860355377197, "loss": 0.339, "nll_loss": 0.08471442759037018, "rewards/accuracies": 1.0, "rewards/chosen": -0.00012103898916393518, "rewards/margins": 0.23815757036209106, "rewards/rejected": -0.2382786124944687, "step": 10277 }, { "epoch": 7.1078838174273855, "grad_norm": 4.905571460723877, "learning_rate": 1.6067312125403415e-05, "log_odds_chosen": 12.048296928405762, "log_odds_ratio": -9.217043407261372e-05, "logits/chosen": -0.3623006045818329, "logits/rejected": -0.492048054933548, "logps/chosen": -0.0002966909669339657, "logps/rejected": -3.2547688484191895, "loss": 0.9058, "nll_loss": 0.22643068432807922, "rewards/accuracies": 1.0, "rewards/chosen": -2.9669095965800807e-05, "rewards/margins": 0.3254472017288208, "rewards/rejected": -0.32547685503959656, "step": 10278 }, { "epoch": 7.108575380359612, "grad_norm": 5.757862091064453, "learning_rate": 1.6063470109113264e-05, "log_odds_chosen": 10.922185897827148, "log_odds_ratio": -3.313596243970096e-05, "logits/chosen": -0.6016491055488586, "logits/rejected": -0.6293889284133911, "logps/chosen": -0.00030946702463552356, "logps/rejected": -2.4500162601470947, "loss": 0.5111, "nll_loss": 0.12777014076709747, "rewards/accuracies": 1.0, "rewards/chosen": -3.094670319114812e-05, "rewards/margins": 0.24497069418430328, "rewards/rejected": -0.2450016438961029, "step": 10279 }, { "epoch": 7.109266943291839, "grad_norm": 4.9893317222595215, "learning_rate": 1.6059628092823116e-05, "log_odds_chosen": 10.57332992553711, "log_odds_ratio": -6.0701986512867734e-05, "logits/chosen": -0.4107089638710022, "logits/rejected": -0.475097119808197, "logps/chosen": -0.00036204716889187694, "logps/rejected": -2.0633039474487305, "loss": 0.4285, "nll_loss": 0.10711251944303513, "rewards/accuracies": 1.0, "rewards/chosen": -3.620472125476226e-05, "rewards/margins": 0.2062942087650299, "rewards/rejected": -0.20633040368556976, "step": 10280 }, { "epoch": 7.109958506224066, "grad_norm": 5.906787395477295, "learning_rate": 1.6055786076532965e-05, "log_odds_chosen": 11.538268089294434, "log_odds_ratio": -1.9969585991930217e-05, "logits/chosen": -0.601881742477417, "logits/rejected": -0.6438528895378113, "logps/chosen": -0.0001609336177352816, "logps/rejected": -2.604485511779785, "loss": 0.5312, "nll_loss": 0.13279756903648376, "rewards/accuracies": 1.0, "rewards/chosen": -1.609336322871968e-05, "rewards/margins": 0.26043248176574707, "rewards/rejected": -0.2604485750198364, "step": 10281 }, { "epoch": 7.110650069156293, "grad_norm": 5.751000881195068, "learning_rate": 1.6051944060242814e-05, "log_odds_chosen": 11.557856559753418, "log_odds_ratio": -2.7186484658159316e-05, "logits/chosen": -0.4481320381164551, "logits/rejected": -0.5386602282524109, "logps/chosen": -0.00025518867187201977, "logps/rejected": -2.9913394451141357, "loss": 0.55, "nll_loss": 0.13750889897346497, "rewards/accuracies": 1.0, "rewards/chosen": -2.5518867914797738e-05, "rewards/margins": 0.29910844564437866, "rewards/rejected": -0.29913395643234253, "step": 10282 }, { "epoch": 7.11134163208852, "grad_norm": 5.170167446136475, "learning_rate": 1.6048102043952667e-05, "log_odds_chosen": 10.355810165405273, "log_odds_ratio": -0.0001789717498468235, "logits/chosen": -0.5239746570587158, "logits/rejected": -0.5420616269111633, "logps/chosen": -0.00022885017096996307, "logps/rejected": -1.945081114768982, "loss": 0.82, "nll_loss": 0.20499102771282196, "rewards/accuracies": 1.0, "rewards/chosen": -2.288501855218783e-05, "rewards/margins": 0.19448521733283997, "rewards/rejected": -0.19450810551643372, "step": 10283 }, { "epoch": 7.1120331950207465, "grad_norm": 4.432968616485596, "learning_rate": 1.604426002766252e-05, "log_odds_chosen": 9.272736549377441, "log_odds_ratio": -0.00031625264091417193, "logits/chosen": -0.7568486332893372, "logits/rejected": -0.714116632938385, "logps/chosen": -0.0004361242463346571, "logps/rejected": -1.4550877809524536, "loss": 0.3678, "nll_loss": 0.09192757308483124, "rewards/accuracies": 1.0, "rewards/chosen": -4.3612428271444514e-05, "rewards/margins": 0.1454651653766632, "rewards/rejected": -0.1455087810754776, "step": 10284 }, { "epoch": 7.112724757952973, "grad_norm": 4.4855780601501465, "learning_rate": 1.604041801137237e-05, "log_odds_chosen": 10.794032096862793, "log_odds_ratio": -5.445224087452516e-05, "logits/chosen": -0.3360009789466858, "logits/rejected": -0.3737177848815918, "logps/chosen": -0.0001795999560272321, "logps/rejected": -2.0326905250549316, "loss": 0.4603, "nll_loss": 0.1150742843747139, "rewards/accuracies": 1.0, "rewards/chosen": -1.795999560272321e-05, "rewards/margins": 0.20325109362602234, "rewards/rejected": -0.20326903462409973, "step": 10285 }, { "epoch": 7.1134163208852, "grad_norm": 3.667229413986206, "learning_rate": 1.603657599508222e-05, "log_odds_chosen": 11.386724472045898, "log_odds_ratio": -3.736492362804711e-05, "logits/chosen": -0.4324737787246704, "logits/rejected": -0.5414674282073975, "logps/chosen": -0.0001199167309096083, "logps/rejected": -1.8028628826141357, "loss": 0.3823, "nll_loss": 0.09556451439857483, "rewards/accuracies": 1.0, "rewards/chosen": -1.1991674909950234e-05, "rewards/margins": 0.18027429282665253, "rewards/rejected": -0.18028628826141357, "step": 10286 }, { "epoch": 7.114107883817427, "grad_norm": 5.564022064208984, "learning_rate": 1.6032733978792073e-05, "log_odds_chosen": 10.44112491607666, "log_odds_ratio": -0.0002731723652686924, "logits/chosen": -0.8399361371994019, "logits/rejected": -0.8326637744903564, "logps/chosen": -0.0007418487221002579, "logps/rejected": -2.643864631652832, "loss": 0.8054, "nll_loss": 0.20133022964000702, "rewards/accuracies": 1.0, "rewards/chosen": -7.418487075483426e-05, "rewards/margins": 0.2643122673034668, "rewards/rejected": -0.26438647508621216, "step": 10287 }, { "epoch": 7.114799446749654, "grad_norm": 3.6635069847106934, "learning_rate": 1.6028891962501922e-05, "log_odds_chosen": 10.54667854309082, "log_odds_ratio": -0.0004861929046455771, "logits/chosen": -0.4404884874820709, "logits/rejected": -0.3213157057762146, "logps/chosen": -0.0008983593434095383, "logps/rejected": -2.263770818710327, "loss": 0.5492, "nll_loss": 0.13726304471492767, "rewards/accuracies": 1.0, "rewards/chosen": -8.983593579614535e-05, "rewards/margins": 0.22628724575042725, "rewards/rejected": -0.22637708485126495, "step": 10288 }, { "epoch": 7.115491009681881, "grad_norm": 5.660526275634766, "learning_rate": 1.6025049946211775e-05, "log_odds_chosen": 10.557701110839844, "log_odds_ratio": -3.345799632370472e-05, "logits/chosen": -0.3213052749633789, "logits/rejected": -0.21575619280338287, "logps/chosen": -0.0002533650549594313, "logps/rejected": -2.217205047607422, "loss": 0.6923, "nll_loss": 0.17308278381824493, "rewards/accuracies": 1.0, "rewards/chosen": -2.533650513214525e-05, "rewards/margins": 0.2216951549053192, "rewards/rejected": -0.22172048687934875, "step": 10289 }, { "epoch": 7.1161825726141075, "grad_norm": 4.457588195800781, "learning_rate": 1.6021207929921624e-05, "log_odds_chosen": 11.590757369995117, "log_odds_ratio": -2.638949081301689e-05, "logits/chosen": -0.11472570896148682, "logits/rejected": -0.2207712084054947, "logps/chosen": -0.0001405734510626644, "logps/rejected": -2.4990475177764893, "loss": 0.5276, "nll_loss": 0.1318967044353485, "rewards/accuracies": 1.0, "rewards/chosen": -1.4057346561457962e-05, "rewards/margins": 0.24989071488380432, "rewards/rejected": -0.24990476667881012, "step": 10290 }, { "epoch": 7.116874135546334, "grad_norm": 3.319014549255371, "learning_rate": 1.6017365913631473e-05, "log_odds_chosen": 11.62279987335205, "log_odds_ratio": -5.7544584706192836e-05, "logits/chosen": -0.7007091045379639, "logits/rejected": -0.7249524593353271, "logps/chosen": -0.00015849883493501693, "logps/rejected": -2.691718101501465, "loss": 0.3129, "nll_loss": 0.07820774614810944, "rewards/accuracies": 1.0, "rewards/chosen": -1.584988240210805e-05, "rewards/margins": 0.26915597915649414, "rewards/rejected": -0.269171804189682, "step": 10291 }, { "epoch": 7.117565698478561, "grad_norm": 4.47081184387207, "learning_rate": 1.6013523897341325e-05, "log_odds_chosen": 11.697754859924316, "log_odds_ratio": -7.944389653857797e-05, "logits/chosen": -0.3712863326072693, "logits/rejected": -0.46870869398117065, "logps/chosen": -0.0001674926606938243, "logps/rejected": -2.9090423583984375, "loss": 0.4161, "nll_loss": 0.10401224344968796, "rewards/accuracies": 1.0, "rewards/chosen": -1.6749265341786668e-05, "rewards/margins": 0.2908874750137329, "rewards/rejected": -0.2909042239189148, "step": 10292 }, { "epoch": 7.118257261410788, "grad_norm": 6.830644607543945, "learning_rate": 1.6009681881051174e-05, "log_odds_chosen": 11.319398880004883, "log_odds_ratio": -5.532346403924748e-05, "logits/chosen": 0.03190721571445465, "logits/rejected": -0.02680405229330063, "logps/chosen": -0.0006970899412408471, "logps/rejected": -3.399998188018799, "loss": 0.6686, "nll_loss": 0.16714029014110565, "rewards/accuracies": 1.0, "rewards/chosen": -6.970899266889319e-05, "rewards/margins": 0.33993011713027954, "rewards/rejected": -0.339999794960022, "step": 10293 }, { "epoch": 7.118948824343015, "grad_norm": 6.158021450042725, "learning_rate": 1.6005839864761027e-05, "log_odds_chosen": 10.816229820251465, "log_odds_ratio": -6.230578583199531e-05, "logits/chosen": 0.07101868093013763, "logits/rejected": -0.0015200749039649963, "logps/chosen": -0.0005694905994459987, "logps/rejected": -2.118673801422119, "loss": 0.4078, "nll_loss": 0.10194122791290283, "rewards/accuracies": 1.0, "rewards/chosen": -5.694906576536596e-05, "rewards/margins": 0.2118104249238968, "rewards/rejected": -0.21186739206314087, "step": 10294 }, { "epoch": 7.119640387275242, "grad_norm": 3.2136785984039307, "learning_rate": 1.600199784847088e-05, "log_odds_chosen": 10.907499313354492, "log_odds_ratio": -0.00022041947522666305, "logits/chosen": -0.7990951538085938, "logits/rejected": -0.8849613666534424, "logps/chosen": -9.993871935876086e-05, "logps/rejected": -2.044731378555298, "loss": 0.6505, "nll_loss": 0.16259539127349854, "rewards/accuracies": 1.0, "rewards/chosen": -9.993871572078206e-06, "rewards/margins": 0.2044631540775299, "rewards/rejected": -0.20447313785552979, "step": 10295 }, { "epoch": 7.1203319502074685, "grad_norm": 4.888383388519287, "learning_rate": 1.599815583218073e-05, "log_odds_chosen": 10.031580924987793, "log_odds_ratio": -0.0007512776064686477, "logits/chosen": -0.46712109446525574, "logits/rejected": -0.6181036829948425, "logps/chosen": -0.0013126321136951447, "logps/rejected": -2.666856288909912, "loss": 0.9919, "nll_loss": 0.2479080706834793, "rewards/accuracies": 1.0, "rewards/chosen": -0.00013126322301104665, "rewards/margins": 0.2665543854236603, "rewards/rejected": -0.2666856348514557, "step": 10296 }, { "epoch": 7.121023513139695, "grad_norm": 5.787823677062988, "learning_rate": 1.599431381589058e-05, "log_odds_chosen": 11.687092781066895, "log_odds_ratio": -9.246945410268381e-06, "logits/chosen": -0.2057281881570816, "logits/rejected": -0.18899966776371002, "logps/chosen": -9.951591346180066e-05, "logps/rejected": -2.4338736534118652, "loss": 0.5648, "nll_loss": 0.1412111520767212, "rewards/accuracies": 1.0, "rewards/chosen": -9.951590982382186e-06, "rewards/margins": 0.2433774322271347, "rewards/rejected": -0.243387371301651, "step": 10297 }, { "epoch": 7.121715076071922, "grad_norm": 5.652944564819336, "learning_rate": 1.5990471799600433e-05, "log_odds_chosen": 10.583305358886719, "log_odds_ratio": -0.0001832281268434599, "logits/chosen": -0.5304673314094543, "logits/rejected": -0.4824105203151703, "logps/chosen": -0.00040059618186205626, "logps/rejected": -2.164419174194336, "loss": 0.5758, "nll_loss": 0.143941730260849, "rewards/accuracies": 1.0, "rewards/chosen": -4.0059618186205626e-05, "rewards/margins": 0.2164018452167511, "rewards/rejected": -0.21644189953804016, "step": 10298 }, { "epoch": 7.122406639004149, "grad_norm": 14.913167953491211, "learning_rate": 1.5986629783310282e-05, "log_odds_chosen": 11.30325984954834, "log_odds_ratio": -0.00014173545059747994, "logits/chosen": -0.09313886612653732, "logits/rejected": -0.17511555552482605, "logps/chosen": -0.00034270616015419364, "logps/rejected": -2.881847620010376, "loss": 0.5357, "nll_loss": 0.13390406966209412, "rewards/accuracies": 1.0, "rewards/chosen": -3.42706152878236e-05, "rewards/margins": 0.28815048933029175, "rewards/rejected": -0.2881847620010376, "step": 10299 }, { "epoch": 7.123098201936376, "grad_norm": 3.5610148906707764, "learning_rate": 1.598278776702013e-05, "log_odds_chosen": 10.192537307739258, "log_odds_ratio": -0.00013249927724245936, "logits/chosen": -0.8154253959655762, "logits/rejected": -0.7605419158935547, "logps/chosen": -0.0002521907154005021, "logps/rejected": -1.9220904111862183, "loss": 0.3911, "nll_loss": 0.0977524071931839, "rewards/accuracies": 1.0, "rewards/chosen": -2.521907299524173e-05, "rewards/margins": 0.19218380749225616, "rewards/rejected": -0.19220903515815735, "step": 10300 }, { "epoch": 7.123789764868603, "grad_norm": 10.971964836120605, "learning_rate": 1.5978945750729984e-05, "log_odds_chosen": 10.807840347290039, "log_odds_ratio": -0.0001476934558013454, "logits/chosen": -0.6921945810317993, "logits/rejected": -0.7799077033996582, "logps/chosen": -0.0018123986665159464, "logps/rejected": -2.6817121505737305, "loss": 0.7212, "nll_loss": 0.18029022216796875, "rewards/accuracies": 1.0, "rewards/chosen": -0.0001812398841138929, "rewards/margins": 0.26798999309539795, "rewards/rejected": -0.2681712210178375, "step": 10301 }, { "epoch": 7.124481327800829, "grad_norm": 4.785852909088135, "learning_rate": 1.5975103734439833e-05, "log_odds_chosen": 12.113910675048828, "log_odds_ratio": -2.486845369276125e-05, "logits/chosen": -0.6913008093833923, "logits/rejected": -0.7212645411491394, "logps/chosen": -0.0002334258460905403, "logps/rejected": -2.9632599353790283, "loss": 0.5422, "nll_loss": 0.13555222749710083, "rewards/accuracies": 1.0, "rewards/chosen": -2.3342583517660387e-05, "rewards/margins": 0.2963026762008667, "rewards/rejected": -0.29632601141929626, "step": 10302 }, { "epoch": 7.125172890733056, "grad_norm": 6.334150314331055, "learning_rate": 1.5971261718149685e-05, "log_odds_chosen": 10.088814735412598, "log_odds_ratio": -0.00018660105706658214, "logits/chosen": -0.2547610104084015, "logits/rejected": -0.330096960067749, "logps/chosen": -0.00034176313783973455, "logps/rejected": -2.0302841663360596, "loss": 0.5072, "nll_loss": 0.1267801821231842, "rewards/accuracies": 1.0, "rewards/chosen": -3.417631523916498e-05, "rewards/margins": 0.2029942274093628, "rewards/rejected": -0.20302842557430267, "step": 10303 }, { "epoch": 7.125864453665283, "grad_norm": 13.262726783752441, "learning_rate": 1.5967419701859538e-05, "log_odds_chosen": 10.863768577575684, "log_odds_ratio": -3.665126860141754e-05, "logits/chosen": -0.33151307702064514, "logits/rejected": -0.3889313042163849, "logps/chosen": -0.00023698012228123844, "logps/rejected": -1.9521350860595703, "loss": 0.7971, "nll_loss": 0.19927959144115448, "rewards/accuracies": 1.0, "rewards/chosen": -2.369801040913444e-05, "rewards/margins": 0.19518983364105225, "rewards/rejected": -0.19521352648735046, "step": 10304 }, { "epoch": 7.12655601659751, "grad_norm": 5.214549541473389, "learning_rate": 1.5963577685569387e-05, "log_odds_chosen": 11.219606399536133, "log_odds_ratio": -3.0332190362969413e-05, "logits/chosen": -0.4732060432434082, "logits/rejected": -0.5152763724327087, "logps/chosen": -0.0002018004743149504, "logps/rejected": -2.71527099609375, "loss": 0.4732, "nll_loss": 0.11829044669866562, "rewards/accuracies": 1.0, "rewards/chosen": -2.0180048522888683e-05, "rewards/margins": 0.2715069353580475, "rewards/rejected": -0.27152711153030396, "step": 10305 }, { "epoch": 7.127247579529737, "grad_norm": 7.065393924713135, "learning_rate": 1.595973566927924e-05, "log_odds_chosen": 9.460151672363281, "log_odds_ratio": -0.00042650566319935024, "logits/chosen": -0.312082976102829, "logits/rejected": -0.3087159991264343, "logps/chosen": -0.0018372769700363278, "logps/rejected": -1.5538609027862549, "loss": 0.4244, "nll_loss": 0.10606159269809723, "rewards/accuracies": 1.0, "rewards/chosen": -0.00018372769409324974, "rewards/margins": 0.15520235896110535, "rewards/rejected": -0.1553860902786255, "step": 10306 }, { "epoch": 7.127939142461964, "grad_norm": 7.4752936363220215, "learning_rate": 1.595589365298909e-05, "log_odds_chosen": 11.011021614074707, "log_odds_ratio": -5.5785545555409044e-05, "logits/chosen": -0.34302428364753723, "logits/rejected": -0.4065769910812378, "logps/chosen": -0.00016706316091585904, "logps/rejected": -1.9993393421173096, "loss": 0.4911, "nll_loss": 0.12276819348335266, "rewards/accuracies": 1.0, "rewards/chosen": -1.6706317182979546e-05, "rewards/margins": 0.19991722702980042, "rewards/rejected": -0.1999339461326599, "step": 10307 }, { "epoch": 7.12863070539419, "grad_norm": 6.967989921569824, "learning_rate": 1.595205163669894e-05, "log_odds_chosen": 10.802072525024414, "log_odds_ratio": -7.645039295312017e-05, "logits/chosen": -0.4464913308620453, "logits/rejected": -0.402243435382843, "logps/chosen": -0.00021770509192720056, "logps/rejected": -2.035642385482788, "loss": 0.3735, "nll_loss": 0.09336201846599579, "rewards/accuracies": 1.0, "rewards/chosen": -2.1770507373730652e-05, "rewards/margins": 0.20354247093200684, "rewards/rejected": -0.20356424152851105, "step": 10308 }, { "epoch": 7.129322268326418, "grad_norm": 5.5367751121521, "learning_rate": 1.594820962040879e-05, "log_odds_chosen": 10.833320617675781, "log_odds_ratio": -6.608067633351311e-05, "logits/chosen": -0.42877280712127686, "logits/rejected": -0.40322345495224, "logps/chosen": -0.00040209069265984, "logps/rejected": -2.2934837341308594, "loss": 0.4413, "nll_loss": 0.11032700538635254, "rewards/accuracies": 1.0, "rewards/chosen": -4.020907363155857e-05, "rewards/margins": 0.22930817306041718, "rewards/rejected": -0.22934837639331818, "step": 10309 }, { "epoch": 7.130013831258645, "grad_norm": 4.261756420135498, "learning_rate": 1.5944367604118642e-05, "log_odds_chosen": 10.189852714538574, "log_odds_ratio": -0.00017108373867813498, "logits/chosen": -0.4927106201648712, "logits/rejected": -0.5972291231155396, "logps/chosen": -0.00038456491893157363, "logps/rejected": -1.7242774963378906, "loss": 0.4285, "nll_loss": 0.10710548609495163, "rewards/accuracies": 1.0, "rewards/chosen": -3.845649189315736e-05, "rewards/margins": 0.17238929867744446, "rewards/rejected": -0.17242774367332458, "step": 10310 }, { "epoch": 7.130705394190872, "grad_norm": 6.028716564178467, "learning_rate": 1.594052558782849e-05, "log_odds_chosen": 10.36422348022461, "log_odds_ratio": -0.00011556592653505504, "logits/chosen": 0.011183492839336395, "logits/rejected": 0.023617416620254517, "logps/chosen": -0.005435407627373934, "logps/rejected": -2.5519256591796875, "loss": 0.6331, "nll_loss": 0.1582653671503067, "rewards/accuracies": 1.0, "rewards/chosen": -0.0005435406928882003, "rewards/margins": 0.2546490430831909, "rewards/rejected": -0.2551925480365753, "step": 10311 }, { "epoch": 7.131396957123099, "grad_norm": 4.063992500305176, "learning_rate": 1.5936683571538344e-05, "log_odds_chosen": 10.359947204589844, "log_odds_ratio": -4.255682142684236e-05, "logits/chosen": -0.08217864483594894, "logits/rejected": -0.14698439836502075, "logps/chosen": -0.00041026753024198115, "logps/rejected": -1.7657188177108765, "loss": 0.3317, "nll_loss": 0.08291476964950562, "rewards/accuracies": 1.0, "rewards/chosen": -4.102674938621931e-05, "rewards/margins": 0.1765308529138565, "rewards/rejected": -0.17657186090946198, "step": 10312 }, { "epoch": 7.1320885200553255, "grad_norm": 5.988481044769287, "learning_rate": 1.5932841555248196e-05, "log_odds_chosen": 10.727018356323242, "log_odds_ratio": -7.427345553878695e-05, "logits/chosen": -0.011402279138565063, "logits/rejected": -0.15633204579353333, "logps/chosen": -0.0004745680489577353, "logps/rejected": -2.16426944732666, "loss": 0.5971, "nll_loss": 0.14926651120185852, "rewards/accuracies": 1.0, "rewards/chosen": -4.7456807806156576e-05, "rewards/margins": 0.21637949347496033, "rewards/rejected": -0.21642693877220154, "step": 10313 }, { "epoch": 7.132780082987552, "grad_norm": 3.8203372955322266, "learning_rate": 1.5928999538958045e-05, "log_odds_chosen": 9.118133544921875, "log_odds_ratio": -0.0004423003119882196, "logits/chosen": -0.334600031375885, "logits/rejected": -0.35691797733306885, "logps/chosen": -0.0002994161914102733, "logps/rejected": -0.8771684169769287, "loss": 0.4588, "nll_loss": 0.11465974897146225, "rewards/accuracies": 1.0, "rewards/chosen": -2.994161877722945e-05, "rewards/margins": 0.08768689632415771, "rewards/rejected": -0.08771683275699615, "step": 10314 }, { "epoch": 7.133471645919779, "grad_norm": 4.283903121948242, "learning_rate": 1.5925157522667898e-05, "log_odds_chosen": 10.53400993347168, "log_odds_ratio": -0.0004266462055966258, "logits/chosen": 0.029743246734142303, "logits/rejected": 0.004639193415641785, "logps/chosen": -0.0015510256635025144, "logps/rejected": -2.293443441390991, "loss": 0.5751, "nll_loss": 0.1437395066022873, "rewards/accuracies": 1.0, "rewards/chosen": -0.0001551025634398684, "rewards/margins": 0.22918926179409027, "rewards/rejected": -0.22934435307979584, "step": 10315 }, { "epoch": 7.134163208852006, "grad_norm": 3.7874436378479004, "learning_rate": 1.592131550637775e-05, "log_odds_chosen": 10.79223346710205, "log_odds_ratio": -0.0005515572265721858, "logits/chosen": 0.022121310234069824, "logits/rejected": -0.11769037693738937, "logps/chosen": -0.0011315718293190002, "logps/rejected": -2.303229808807373, "loss": 0.3958, "nll_loss": 0.09889844059944153, "rewards/accuracies": 1.0, "rewards/chosen": -0.00011315719166304916, "rewards/margins": 0.2302098125219345, "rewards/rejected": -0.2303229719400406, "step": 10316 }, { "epoch": 7.134854771784233, "grad_norm": 5.332455635070801, "learning_rate": 1.59174734900876e-05, "log_odds_chosen": 11.952607154846191, "log_odds_ratio": -1.06203833638574e-05, "logits/chosen": -0.041954405605793, "logits/rejected": -0.03453304246068001, "logps/chosen": -9.113582200370729e-05, "logps/rejected": -2.6752333641052246, "loss": 0.5962, "nll_loss": 0.14904029667377472, "rewards/accuracies": 1.0, "rewards/chosen": -9.113581654673908e-06, "rewards/margins": 0.2675142288208008, "rewards/rejected": -0.2675233483314514, "step": 10317 }, { "epoch": 7.13554633471646, "grad_norm": 4.75240421295166, "learning_rate": 1.5913631473797448e-05, "log_odds_chosen": 10.119691848754883, "log_odds_ratio": -0.00010590528836473823, "logits/chosen": -0.41260311007499695, "logits/rejected": -0.41198790073394775, "logps/chosen": -0.0002040062245214358, "logps/rejected": -1.4755959510803223, "loss": 0.4137, "nll_loss": 0.10340912640094757, "rewards/accuracies": 1.0, "rewards/chosen": -2.0400624634930864e-05, "rewards/margins": 0.1475391983985901, "rewards/rejected": -0.14755958318710327, "step": 10318 }, { "epoch": 7.136237897648686, "grad_norm": 14.519569396972656, "learning_rate": 1.59097894575073e-05, "log_odds_chosen": 9.824604034423828, "log_odds_ratio": -0.00032375051523558795, "logits/chosen": -0.025557734072208405, "logits/rejected": -0.07144501060247421, "logps/chosen": -0.0012760567478835583, "logps/rejected": -2.2134809494018555, "loss": 0.7126, "nll_loss": 0.1781070977449417, "rewards/accuracies": 1.0, "rewards/chosen": -0.00012760567187797278, "rewards/margins": 0.2212204784154892, "rewards/rejected": -0.22134807705879211, "step": 10319 }, { "epoch": 7.136929460580913, "grad_norm": 5.569480895996094, "learning_rate": 1.590594744121715e-05, "log_odds_chosen": 10.440262794494629, "log_odds_ratio": -0.00048077639075927436, "logits/chosen": -0.16568979620933533, "logits/rejected": -0.1728629171848297, "logps/chosen": -0.000473564286949113, "logps/rejected": -1.734022855758667, "loss": 0.4548, "nll_loss": 0.11366012692451477, "rewards/accuracies": 1.0, "rewards/chosen": -4.735643233289011e-05, "rewards/margins": 0.17335493862628937, "rewards/rejected": -0.17340229451656342, "step": 10320 }, { "epoch": 7.13762102351314, "grad_norm": 11.692529678344727, "learning_rate": 1.5902105424927002e-05, "log_odds_chosen": 10.846525192260742, "log_odds_ratio": -9.60138495429419e-05, "logits/chosen": -0.25794142484664917, "logits/rejected": -0.28674668073654175, "logps/chosen": -0.0003197805490344763, "logps/rejected": -1.4928094148635864, "loss": 0.2902, "nll_loss": 0.07254131138324738, "rewards/accuracies": 1.0, "rewards/chosen": -3.197805563104339e-05, "rewards/margins": 0.14924898743629456, "rewards/rejected": -0.14928095042705536, "step": 10321 }, { "epoch": 7.138312586445367, "grad_norm": 5.813745021820068, "learning_rate": 1.5898263408636855e-05, "log_odds_chosen": 11.401609420776367, "log_odds_ratio": -1.9921841158065945e-05, "logits/chosen": -0.3643776774406433, "logits/rejected": -0.4122941493988037, "logps/chosen": -0.00016441484331153333, "logps/rejected": -2.542574167251587, "loss": 0.4553, "nll_loss": 0.11381081491708755, "rewards/accuracies": 1.0, "rewards/chosen": -1.644148323975969e-05, "rewards/margins": 0.2542409896850586, "rewards/rejected": -0.2542574107646942, "step": 10322 }, { "epoch": 7.139004149377594, "grad_norm": 6.33383846282959, "learning_rate": 1.5894421392346704e-05, "log_odds_chosen": 10.774675369262695, "log_odds_ratio": -5.105520540382713e-05, "logits/chosen": -0.5525538921356201, "logits/rejected": -0.6430279612541199, "logps/chosen": -7.996350177563727e-05, "logps/rejected": -1.6820995807647705, "loss": 0.3662, "nll_loss": 0.091536745429039, "rewards/accuracies": 1.0, "rewards/chosen": -7.996350177563727e-06, "rewards/margins": 0.1682019829750061, "rewards/rejected": -0.168209969997406, "step": 10323 }, { "epoch": 7.139695712309821, "grad_norm": 12.23645305633545, "learning_rate": 1.5890579376056556e-05, "log_odds_chosen": 11.197294235229492, "log_odds_ratio": -0.0001597453810973093, "logits/chosen": -0.5753327012062073, "logits/rejected": -0.5307286977767944, "logps/chosen": -0.000334289507009089, "logps/rejected": -2.763484001159668, "loss": 0.4819, "nll_loss": 0.12045233696699142, "rewards/accuracies": 1.0, "rewards/chosen": -3.34289507009089e-05, "rewards/margins": 0.27631497383117676, "rewards/rejected": -0.27634841203689575, "step": 10324 }, { "epoch": 7.140387275242047, "grad_norm": 4.146999359130859, "learning_rate": 1.588673735976641e-05, "log_odds_chosen": 11.96800422668457, "log_odds_ratio": -8.39845415612217e-06, "logits/chosen": -0.3274807035923004, "logits/rejected": -0.3796335756778717, "logps/chosen": -0.00016001489711925387, "logps/rejected": -2.7158827781677246, "loss": 0.5385, "nll_loss": 0.13462276756763458, "rewards/accuracies": 1.0, "rewards/chosen": -1.600149153091479e-05, "rewards/margins": 0.2715722620487213, "rewards/rejected": -0.2715882658958435, "step": 10325 }, { "epoch": 7.141078838174274, "grad_norm": 4.407366752624512, "learning_rate": 1.5882895343476258e-05, "log_odds_chosen": 10.556142807006836, "log_odds_ratio": -0.00011441886454122141, "logits/chosen": -0.592565655708313, "logits/rejected": -0.5965545773506165, "logps/chosen": -0.0001350560487480834, "logps/rejected": -1.7966774702072144, "loss": 0.5097, "nll_loss": 0.12740643322467804, "rewards/accuracies": 1.0, "rewards/chosen": -1.3505605238606222e-05, "rewards/margins": 0.17965424060821533, "rewards/rejected": -0.17966774106025696, "step": 10326 }, { "epoch": 7.141770401106501, "grad_norm": 13.011528968811035, "learning_rate": 1.5879053327186107e-05, "log_odds_chosen": 10.92127799987793, "log_odds_ratio": -4.325023473938927e-05, "logits/chosen": -0.7639882564544678, "logits/rejected": -0.845071017742157, "logps/chosen": -0.00012478558346629143, "logps/rejected": -1.907325029373169, "loss": 0.4904, "nll_loss": 0.12258665263652802, "rewards/accuracies": 1.0, "rewards/chosen": -1.2478559256123845e-05, "rewards/margins": 0.19072003662586212, "rewards/rejected": -0.19073250889778137, "step": 10327 }, { "epoch": 7.142461964038728, "grad_norm": 3.8526790142059326, "learning_rate": 1.587521131089596e-05, "log_odds_chosen": 11.049694061279297, "log_odds_ratio": -7.972231833264232e-05, "logits/chosen": -0.5567727088928223, "logits/rejected": -0.6450908780097961, "logps/chosen": -0.00020438033971004188, "logps/rejected": -2.268096446990967, "loss": 0.369, "nll_loss": 0.0922321155667305, "rewards/accuracies": 1.0, "rewards/chosen": -2.043803578999359e-05, "rewards/margins": 0.226789191365242, "rewards/rejected": -0.22680962085723877, "step": 10328 }, { "epoch": 7.143153526970955, "grad_norm": 3.450875997543335, "learning_rate": 1.5871369294605808e-05, "log_odds_chosen": 11.664745330810547, "log_odds_ratio": -1.371507823932916e-05, "logits/chosen": -0.36245304346084595, "logits/rejected": -0.3094356060028076, "logps/chosen": -0.00013537345512304455, "logps/rejected": -2.370499849319458, "loss": 0.4018, "nll_loss": 0.1004365012049675, "rewards/accuracies": 1.0, "rewards/chosen": -1.3537345694203395e-05, "rewards/margins": 0.23703645169734955, "rewards/rejected": -0.23704996705055237, "step": 10329 }, { "epoch": 7.143845089903182, "grad_norm": 8.461017608642578, "learning_rate": 1.586752727831566e-05, "log_odds_chosen": 11.80744743347168, "log_odds_ratio": -4.3963693315163255e-05, "logits/chosen": -0.02379719167947769, "logits/rejected": -0.04182916879653931, "logps/chosen": -0.0002002610854106024, "logps/rejected": -2.7238898277282715, "loss": 0.5228, "nll_loss": 0.1306859254837036, "rewards/accuracies": 1.0, "rewards/chosen": -2.0026109268656e-05, "rewards/margins": 0.2723689675331116, "rewards/rejected": -0.2723889648914337, "step": 10330 }, { "epoch": 7.144536652835408, "grad_norm": 7.45040225982666, "learning_rate": 1.5863685262025513e-05, "log_odds_chosen": 10.639909744262695, "log_odds_ratio": -0.00013313893578015268, "logits/chosen": -0.27051666378974915, "logits/rejected": -0.4297066628932953, "logps/chosen": -0.00021471580839715898, "logps/rejected": -2.028541088104248, "loss": 0.835, "nll_loss": 0.20874357223510742, "rewards/accuracies": 1.0, "rewards/chosen": -2.1471580112120137e-05, "rewards/margins": 0.2028326392173767, "rewards/rejected": -0.20285411179065704, "step": 10331 }, { "epoch": 7.145228215767635, "grad_norm": 4.30554723739624, "learning_rate": 1.5859843245735362e-05, "log_odds_chosen": 11.11909008026123, "log_odds_ratio": -3.873251625918783e-05, "logits/chosen": -0.6348779201507568, "logits/rejected": -0.6722233891487122, "logps/chosen": -0.00025545392418280244, "logps/rejected": -2.312321186065674, "loss": 0.3027, "nll_loss": 0.075670525431633, "rewards/accuracies": 1.0, "rewards/chosen": -2.5545392418280244e-05, "rewards/margins": 0.23120658099651337, "rewards/rejected": -0.23123212158679962, "step": 10332 }, { "epoch": 7.145919778699862, "grad_norm": 10.649109840393066, "learning_rate": 1.5856001229445215e-05, "log_odds_chosen": 10.8757905960083, "log_odds_ratio": -3.358142203069292e-05, "logits/chosen": -0.7790817022323608, "logits/rejected": -0.7131824493408203, "logps/chosen": -0.00020699258311651647, "logps/rejected": -2.3343687057495117, "loss": 0.4288, "nll_loss": 0.10719355940818787, "rewards/accuracies": 1.0, "rewards/chosen": -2.069925903924741e-05, "rewards/margins": 0.23341616988182068, "rewards/rejected": -0.23343686759471893, "step": 10333 }, { "epoch": 7.146611341632089, "grad_norm": 4.358206748962402, "learning_rate": 1.5852159213155067e-05, "log_odds_chosen": 10.780325889587402, "log_odds_ratio": -0.00010636411025188863, "logits/chosen": -0.373515248298645, "logits/rejected": -0.3184809386730194, "logps/chosen": -0.00040083954809233546, "logps/rejected": -2.1831705570220947, "loss": 0.4458, "nll_loss": 0.11144562065601349, "rewards/accuracies": 1.0, "rewards/chosen": -4.008395626442507e-05, "rewards/margins": 0.2182769775390625, "rewards/rejected": -0.21831706166267395, "step": 10334 }, { "epoch": 7.147302904564316, "grad_norm": 7.291419982910156, "learning_rate": 1.5848317196864916e-05, "log_odds_chosen": 10.505504608154297, "log_odds_ratio": -0.0003409209894016385, "logits/chosen": -0.6493133306503296, "logits/rejected": -0.6722878217697144, "logps/chosen": -0.0006983771454542875, "logps/rejected": -2.949965000152588, "loss": 0.6972, "nll_loss": 0.1742691695690155, "rewards/accuracies": 1.0, "rewards/chosen": -6.983771163504571e-05, "rewards/margins": 0.2949266731739044, "rewards/rejected": -0.2949965000152588, "step": 10335 }, { "epoch": 7.1479944674965425, "grad_norm": 4.698943614959717, "learning_rate": 1.5844475180574765e-05, "log_odds_chosen": 10.346242904663086, "log_odds_ratio": -0.00013529814896173775, "logits/chosen": -0.5392444133758545, "logits/rejected": -0.6504898071289062, "logps/chosen": -0.00023310561664402485, "logps/rejected": -1.646261215209961, "loss": 0.5007, "nll_loss": 0.12517225742340088, "rewards/accuracies": 1.0, "rewards/chosen": -2.331056384718977e-05, "rewards/margins": 0.16460281610488892, "rewards/rejected": -0.1646261215209961, "step": 10336 }, { "epoch": 7.148686030428769, "grad_norm": 4.3376688957214355, "learning_rate": 1.5840633164284617e-05, "log_odds_chosen": 9.543367385864258, "log_odds_ratio": -0.00017217869753949344, "logits/chosen": -0.21007820963859558, "logits/rejected": -0.23069339990615845, "logps/chosen": -0.00039433487108908594, "logps/rejected": -1.671665906906128, "loss": 0.8096, "nll_loss": 0.20237885415554047, "rewards/accuracies": 1.0, "rewards/chosen": -3.943348565371707e-05, "rewards/margins": 0.16712714731693268, "rewards/rejected": -0.1671665757894516, "step": 10337 }, { "epoch": 7.149377593360996, "grad_norm": 5.843807220458984, "learning_rate": 1.5836791147994467e-05, "log_odds_chosen": 11.126989364624023, "log_odds_ratio": -2.9735761927440763e-05, "logits/chosen": -0.7239865660667419, "logits/rejected": -0.7809531688690186, "logps/chosen": -0.00020430656149983406, "logps/rejected": -2.331878423690796, "loss": 0.4957, "nll_loss": 0.12391365319490433, "rewards/accuracies": 1.0, "rewards/chosen": -2.0430656149983406e-05, "rewards/margins": 0.23316740989685059, "rewards/rejected": -0.23318785429000854, "step": 10338 }, { "epoch": 7.150069156293223, "grad_norm": 6.848789215087891, "learning_rate": 1.583294913170432e-05, "log_odds_chosen": 10.448689460754395, "log_odds_ratio": -7.409107638522983e-05, "logits/chosen": -0.3858215808868408, "logits/rejected": -0.5062464475631714, "logps/chosen": -0.00018067244673147798, "logps/rejected": -1.631630539894104, "loss": 0.4478, "nll_loss": 0.11194244772195816, "rewards/accuracies": 1.0, "rewards/chosen": -1.8067245036945678e-05, "rewards/margins": 0.1631450057029724, "rewards/rejected": -0.16316306591033936, "step": 10339 }, { "epoch": 7.15076071922545, "grad_norm": 4.909574031829834, "learning_rate": 1.582910711541417e-05, "log_odds_chosen": 9.875921249389648, "log_odds_ratio": -0.00047237356193363667, "logits/chosen": -0.16309067606925964, "logits/rejected": -0.14102210104465485, "logps/chosen": -0.0003869852516800165, "logps/rejected": -1.511296272277832, "loss": 0.4299, "nll_loss": 0.10743023455142975, "rewards/accuracies": 1.0, "rewards/chosen": -3.8698526623193175e-05, "rewards/margins": 0.15109093487262726, "rewards/rejected": -0.15112963318824768, "step": 10340 }, { "epoch": 7.151452282157677, "grad_norm": 4.363992691040039, "learning_rate": 1.582526509912402e-05, "log_odds_chosen": 10.739774703979492, "log_odds_ratio": -0.00010132478200830519, "logits/chosen": -0.3702893853187561, "logits/rejected": -0.39425715804100037, "logps/chosen": -0.0003061680472455919, "logps/rejected": -2.24922776222229, "loss": 0.4192, "nll_loss": 0.10479110479354858, "rewards/accuracies": 1.0, "rewards/chosen": -3.0616803996963426e-05, "rewards/margins": 0.22489216923713684, "rewards/rejected": -0.2249227911233902, "step": 10341 }, { "epoch": 7.1521438450899035, "grad_norm": 6.391323566436768, "learning_rate": 1.5821423082833873e-05, "log_odds_chosen": 10.767119407653809, "log_odds_ratio": -5.2425250032683834e-05, "logits/chosen": -0.312757283449173, "logits/rejected": -0.3822609782218933, "logps/chosen": -0.00017840521468315274, "logps/rejected": -1.8881677389144897, "loss": 0.516, "nll_loss": 0.1289961338043213, "rewards/accuracies": 1.0, "rewards/chosen": -1.7840520740719512e-05, "rewards/margins": 0.1887989342212677, "rewards/rejected": -0.18881677091121674, "step": 10342 }, { "epoch": 7.15283540802213, "grad_norm": 7.592628002166748, "learning_rate": 1.5817581066543725e-05, "log_odds_chosen": 11.997628211975098, "log_odds_ratio": -1.2196329407743178e-05, "logits/chosen": -0.21336635947227478, "logits/rejected": -0.20982897281646729, "logps/chosen": -0.00013047002721577883, "logps/rejected": -2.9994723796844482, "loss": 0.5806, "nll_loss": 0.14516031742095947, "rewards/accuracies": 1.0, "rewards/chosen": -1.3047003449173644e-05, "rewards/margins": 0.29993414878845215, "rewards/rejected": -0.29994723200798035, "step": 10343 }, { "epoch": 7.153526970954357, "grad_norm": 4.593406677246094, "learning_rate": 1.5813739050253574e-05, "log_odds_chosen": 10.125865936279297, "log_odds_ratio": -0.0003525286738295108, "logits/chosen": -0.13861218094825745, "logits/rejected": -0.08278912305831909, "logps/chosen": -0.00023694118135608733, "logps/rejected": -1.7349421977996826, "loss": 0.645, "nll_loss": 0.16122090816497803, "rewards/accuracies": 1.0, "rewards/chosen": -2.3694119590800256e-05, "rewards/margins": 0.17347052693367004, "rewards/rejected": -0.17349421977996826, "step": 10344 }, { "epoch": 7.154218533886584, "grad_norm": 5.003382205963135, "learning_rate": 1.5809897033963423e-05, "log_odds_chosen": 10.470653533935547, "log_odds_ratio": -0.00014362734509631991, "logits/chosen": 0.15004980564117432, "logits/rejected": 0.009387247264385223, "logps/chosen": -0.0001740518637234345, "logps/rejected": -2.018296241760254, "loss": 0.5705, "nll_loss": 0.1426095813512802, "rewards/accuracies": 1.0, "rewards/chosen": -1.7405187463737093e-05, "rewards/margins": 0.20181220769882202, "rewards/rejected": -0.20182962715625763, "step": 10345 }, { "epoch": 7.154910096818811, "grad_norm": 5.691940784454346, "learning_rate": 1.5806055017673276e-05, "log_odds_chosen": 10.687768936157227, "log_odds_ratio": -0.0005019446252845228, "logits/chosen": 0.009011678397655487, "logits/rejected": -0.029282599687576294, "logps/chosen": -0.00031394592951983213, "logps/rejected": -2.420288562774658, "loss": 0.5861, "nll_loss": 0.14647966623306274, "rewards/accuracies": 1.0, "rewards/chosen": -3.1394592951983213e-05, "rewards/margins": 0.24199745059013367, "rewards/rejected": -0.2420288622379303, "step": 10346 }, { "epoch": 7.155601659751038, "grad_norm": 5.003124713897705, "learning_rate": 1.5802213001383125e-05, "log_odds_chosen": 10.676345825195312, "log_odds_ratio": -9.651888103689998e-05, "logits/chosen": -0.3004814684391022, "logits/rejected": -0.3764978349208832, "logps/chosen": -9.466239862376824e-05, "logps/rejected": -1.6661527156829834, "loss": 0.7067, "nll_loss": 0.1766619235277176, "rewards/accuracies": 1.0, "rewards/chosen": -9.466239134781063e-06, "rewards/margins": 0.16660580039024353, "rewards/rejected": -0.16661526262760162, "step": 10347 }, { "epoch": 7.1562932226832645, "grad_norm": 5.460468292236328, "learning_rate": 1.5798370985092977e-05, "log_odds_chosen": 10.289358139038086, "log_odds_ratio": -0.00028913281857967377, "logits/chosen": -0.4978940486907959, "logits/rejected": -0.45821619033813477, "logps/chosen": -0.0014770093839615583, "logps/rejected": -2.626573324203491, "loss": 0.6286, "nll_loss": 0.1571165919303894, "rewards/accuracies": 1.0, "rewards/chosen": -0.00014770095003768802, "rewards/margins": 0.26250964403152466, "rewards/rejected": -0.2626573443412781, "step": 10348 }, { "epoch": 7.156984785615491, "grad_norm": 3.845489501953125, "learning_rate": 1.579452896880283e-05, "log_odds_chosen": 10.857307434082031, "log_odds_ratio": -3.6831996112596244e-05, "logits/chosen": -0.703261137008667, "logits/rejected": -0.7102363705635071, "logps/chosen": -0.00014799721247982234, "logps/rejected": -1.7273801565170288, "loss": 0.4403, "nll_loss": 0.11005987226963043, "rewards/accuracies": 1.0, "rewards/chosen": -1.4799722521274816e-05, "rewards/margins": 0.17272323369979858, "rewards/rejected": -0.17273801565170288, "step": 10349 }, { "epoch": 7.157676348547718, "grad_norm": 4.47117280960083, "learning_rate": 1.579068695251268e-05, "log_odds_chosen": 11.732563018798828, "log_odds_ratio": -1.1336490388202947e-05, "logits/chosen": -0.5973580479621887, "logits/rejected": -0.7038625478744507, "logps/chosen": -0.00014137805555947125, "logps/rejected": -2.862057685852051, "loss": 0.4003, "nll_loss": 0.1000852882862091, "rewards/accuracies": 1.0, "rewards/chosen": -1.4137805919745006e-05, "rewards/margins": 0.2861916422843933, "rewards/rejected": -0.2862057387828827, "step": 10350 }, { "epoch": 7.158367911479945, "grad_norm": 4.15538215637207, "learning_rate": 1.578684493622253e-05, "log_odds_chosen": 9.860397338867188, "log_odds_ratio": -0.0005921100964769721, "logits/chosen": -0.6259291768074036, "logits/rejected": -0.635421097278595, "logps/chosen": -0.00032356291194446385, "logps/rejected": -1.5026354789733887, "loss": 0.4756, "nll_loss": 0.11884228140115738, "rewards/accuracies": 1.0, "rewards/chosen": -3.235629264963791e-05, "rewards/margins": 0.15023118257522583, "rewards/rejected": -0.15026354789733887, "step": 10351 }, { "epoch": 7.159059474412172, "grad_norm": 9.987689018249512, "learning_rate": 1.5783002919932384e-05, "log_odds_chosen": 11.829431533813477, "log_odds_ratio": -1.2193728252896108e-05, "logits/chosen": -0.5235463380813599, "logits/rejected": -0.5612502098083496, "logps/chosen": -0.00024450334603898227, "logps/rejected": -3.104759454727173, "loss": 0.5833, "nll_loss": 0.14583048224449158, "rewards/accuracies": 1.0, "rewards/chosen": -2.445033533149399e-05, "rewards/margins": 0.3104515075683594, "rewards/rejected": -0.3104759454727173, "step": 10352 }, { "epoch": 7.159751037344399, "grad_norm": 5.015040874481201, "learning_rate": 1.5779160903642233e-05, "log_odds_chosen": 11.361230850219727, "log_odds_ratio": -6.172016583150253e-05, "logits/chosen": -0.24815750122070312, "logits/rejected": -0.398996502161026, "logps/chosen": -0.00015217142936307937, "logps/rejected": -2.3462064266204834, "loss": 0.5022, "nll_loss": 0.12555190920829773, "rewards/accuracies": 1.0, "rewards/chosen": -1.5217143300105818e-05, "rewards/margins": 0.23460541665554047, "rewards/rejected": -0.23462063074111938, "step": 10353 }, { "epoch": 7.1604426002766255, "grad_norm": 4.724827766418457, "learning_rate": 1.5775318887352082e-05, "log_odds_chosen": 10.288719177246094, "log_odds_ratio": -0.00014790150453336537, "logits/chosen": -0.09138578176498413, "logits/rejected": -0.10876794159412384, "logps/chosen": -0.000556323619093746, "logps/rejected": -1.8416610956192017, "loss": 0.4881, "nll_loss": 0.12201963365077972, "rewards/accuracies": 1.0, "rewards/chosen": -5.563236481975764e-05, "rewards/margins": 0.18411049246788025, "rewards/rejected": -0.1841661036014557, "step": 10354 }, { "epoch": 7.161134163208852, "grad_norm": 4.384098052978516, "learning_rate": 1.5771476871061934e-05, "log_odds_chosen": 10.974483489990234, "log_odds_ratio": -8.987160981632769e-05, "logits/chosen": -0.27367737889289856, "logits/rejected": -0.344980925321579, "logps/chosen": -0.0001740015286486596, "logps/rejected": -2.03933048248291, "loss": 0.4814, "nll_loss": 0.12034108489751816, "rewards/accuracies": 1.0, "rewards/chosen": -1.740015432005748e-05, "rewards/margins": 0.20391567051410675, "rewards/rejected": -0.20393304526805878, "step": 10355 }, { "epoch": 7.161825726141079, "grad_norm": 5.302286148071289, "learning_rate": 1.5767634854771783e-05, "log_odds_chosen": 10.35113525390625, "log_odds_ratio": -4.7902078222250566e-05, "logits/chosen": -0.45522600412368774, "logits/rejected": -0.543906569480896, "logps/chosen": -0.00014997663674876094, "logps/rejected": -1.7539184093475342, "loss": 0.3004, "nll_loss": 0.07508346438407898, "rewards/accuracies": 1.0, "rewards/chosen": -1.4997664038673975e-05, "rewards/margins": 0.17537686228752136, "rewards/rejected": -0.17539185285568237, "step": 10356 }, { "epoch": 7.162517289073306, "grad_norm": 5.789268493652344, "learning_rate": 1.5763792838481636e-05, "log_odds_chosen": 10.186277389526367, "log_odds_ratio": -0.00013727105397265404, "logits/chosen": 0.14934031665325165, "logits/rejected": 0.11088190972805023, "logps/chosen": -0.00018403568537905812, "logps/rejected": -1.712958574295044, "loss": 0.7635, "nll_loss": 0.19086456298828125, "rewards/accuracies": 1.0, "rewards/chosen": -1.8403568901703693e-05, "rewards/margins": 0.1712774634361267, "rewards/rejected": -0.1712958663702011, "step": 10357 }, { "epoch": 7.163208852005533, "grad_norm": 7.448746681213379, "learning_rate": 1.575995082219149e-05, "log_odds_chosen": 11.716529846191406, "log_odds_ratio": -2.580987347755581e-05, "logits/chosen": -0.25747519731521606, "logits/rejected": -0.3709869384765625, "logps/chosen": -0.00016249314649030566, "logps/rejected": -2.8506031036376953, "loss": 0.5447, "nll_loss": 0.1361800581216812, "rewards/accuracies": 1.0, "rewards/chosen": -1.6249314285232686e-05, "rewards/margins": 0.2850440442562103, "rewards/rejected": -0.2850602865219116, "step": 10358 }, { "epoch": 7.16390041493776, "grad_norm": 5.051116466522217, "learning_rate": 1.5756108805901337e-05, "log_odds_chosen": 10.604525566101074, "log_odds_ratio": -9.764420974534005e-05, "logits/chosen": -0.3789137005805969, "logits/rejected": -0.3770774304866791, "logps/chosen": -0.0001220260382979177, "logps/rejected": -1.644858479499817, "loss": 0.72, "nll_loss": 0.1799805909395218, "rewards/accuracies": 1.0, "rewards/chosen": -1.2202603102196008e-05, "rewards/margins": 0.16447363793849945, "rewards/rejected": -0.1644858419895172, "step": 10359 }, { "epoch": 7.1645919778699865, "grad_norm": 3.8639204502105713, "learning_rate": 1.575226678961119e-05, "log_odds_chosen": 10.401793479919434, "log_odds_ratio": -5.208807488088496e-05, "logits/chosen": -0.5287341475486755, "logits/rejected": -0.491424024105072, "logps/chosen": -0.00016572093591094017, "logps/rejected": -1.6025166511535645, "loss": 0.3203, "nll_loss": 0.08007211983203888, "rewards/accuracies": 1.0, "rewards/chosen": -1.6572093954891898e-05, "rewards/margins": 0.16023510694503784, "rewards/rejected": -0.1602516770362854, "step": 10360 }, { "epoch": 7.165283540802213, "grad_norm": 5.283153533935547, "learning_rate": 1.574842477332104e-05, "log_odds_chosen": 11.058725357055664, "log_odds_ratio": -2.314174889761489e-05, "logits/chosen": -0.41812098026275635, "logits/rejected": -0.4501090347766876, "logps/chosen": -0.00016006288933567703, "logps/rejected": -1.8618499040603638, "loss": 0.4602, "nll_loss": 0.11504913866519928, "rewards/accuracies": 1.0, "rewards/chosen": -1.600628820597194e-05, "rewards/margins": 0.18616899847984314, "rewards/rejected": -0.18618498742580414, "step": 10361 }, { "epoch": 7.16597510373444, "grad_norm": 5.949680328369141, "learning_rate": 1.574458275703089e-05, "log_odds_chosen": 10.830810546875, "log_odds_ratio": -6.881457375129685e-05, "logits/chosen": 0.00372517853975296, "logits/rejected": -0.08768212795257568, "logps/chosen": -0.00016697445244062692, "logps/rejected": -1.9055685997009277, "loss": 0.4789, "nll_loss": 0.11972109973430634, "rewards/accuracies": 1.0, "rewards/chosen": -1.669744415266905e-05, "rewards/margins": 0.19054014980793, "rewards/rejected": -0.1905568540096283, "step": 10362 }, { "epoch": 7.166666666666667, "grad_norm": 5.766731262207031, "learning_rate": 1.574074074074074e-05, "log_odds_chosen": 10.797150611877441, "log_odds_ratio": -3.587778701330535e-05, "logits/chosen": -0.25513169169425964, "logits/rejected": -0.2806079089641571, "logps/chosen": -0.0002537602267693728, "logps/rejected": -2.0755133628845215, "loss": 0.6524, "nll_loss": 0.16309869289398193, "rewards/accuracies": 1.0, "rewards/chosen": -2.5376022676937282e-05, "rewards/margins": 0.20752598345279694, "rewards/rejected": -0.20755136013031006, "step": 10363 }, { "epoch": 7.167358229598894, "grad_norm": 6.955835342407227, "learning_rate": 1.573689872445059e-05, "log_odds_chosen": 10.524138450622559, "log_odds_ratio": -0.00010134144395124167, "logits/chosen": -0.4832562804222107, "logits/rejected": -0.5262367725372314, "logps/chosen": -0.0003315797948744148, "logps/rejected": -2.264157772064209, "loss": 0.6328, "nll_loss": 0.1581859588623047, "rewards/accuracies": 1.0, "rewards/chosen": -3.315797948744148e-05, "rewards/margins": 0.22638264298439026, "rewards/rejected": -0.22641579806804657, "step": 10364 }, { "epoch": 7.168049792531121, "grad_norm": 3.8119096755981445, "learning_rate": 1.5733056708160442e-05, "log_odds_chosen": 10.417471885681152, "log_odds_ratio": -0.00031710093026049435, "logits/chosen": -1.077115535736084, "logits/rejected": -1.0785775184631348, "logps/chosen": -0.00048406756832264364, "logps/rejected": -2.097248077392578, "loss": 0.427, "nll_loss": 0.10671207308769226, "rewards/accuracies": 1.0, "rewards/chosen": -4.8406756832264364e-05, "rewards/margins": 0.20967641472816467, "rewards/rejected": -0.2097248136997223, "step": 10365 }, { "epoch": 7.1687413554633475, "grad_norm": 4.384820938110352, "learning_rate": 1.5729214691870294e-05, "log_odds_chosen": 10.294703483581543, "log_odds_ratio": -0.0010348277864977717, "logits/chosen": -0.2706519663333893, "logits/rejected": -0.32500049471855164, "logps/chosen": -0.0025665496941655874, "logps/rejected": -2.180053472518921, "loss": 0.3983, "nll_loss": 0.09947191178798676, "rewards/accuracies": 1.0, "rewards/chosen": -0.00025665495195426047, "rewards/margins": 0.2177487015724182, "rewards/rejected": -0.21800535917282104, "step": 10366 }, { "epoch": 7.169432918395574, "grad_norm": 5.960926532745361, "learning_rate": 1.5725372675580143e-05, "log_odds_chosen": 10.927475929260254, "log_odds_ratio": -6.083076732465997e-05, "logits/chosen": -0.5122660994529724, "logits/rejected": -0.5442514419555664, "logps/chosen": -0.0004127591964788735, "logps/rejected": -2.9259157180786133, "loss": 0.4699, "nll_loss": 0.11747057735919952, "rewards/accuracies": 1.0, "rewards/chosen": -4.127592546865344e-05, "rewards/margins": 0.29255032539367676, "rewards/rejected": -0.2925916016101837, "step": 10367 }, { "epoch": 7.170124481327801, "grad_norm": 7.345365524291992, "learning_rate": 1.5721530659289996e-05, "log_odds_chosen": 10.399859428405762, "log_odds_ratio": -8.993582014227286e-05, "logits/chosen": -0.5601933598518372, "logits/rejected": -0.7083494663238525, "logps/chosen": -0.00028123060474172235, "logps/rejected": -1.9739731550216675, "loss": 0.5057, "nll_loss": 0.12640774250030518, "rewards/accuracies": 1.0, "rewards/chosen": -2.8123060474172235e-05, "rewards/margins": 0.19736920297145844, "rewards/rejected": -0.19739732146263123, "step": 10368 }, { "epoch": 7.170816044260028, "grad_norm": 4.803614139556885, "learning_rate": 1.5717688642999848e-05, "log_odds_chosen": 11.357755661010742, "log_odds_ratio": -5.818425051984377e-05, "logits/chosen": -0.0957411378622055, "logits/rejected": -0.19544725120067596, "logps/chosen": -0.0002561407454777509, "logps/rejected": -2.88908052444458, "loss": 0.6493, "nll_loss": 0.16231924295425415, "rewards/accuracies": 1.0, "rewards/chosen": -2.561407382017933e-05, "rewards/margins": 0.28888243436813354, "rewards/rejected": -0.28890806436538696, "step": 10369 }, { "epoch": 7.171507607192255, "grad_norm": 4.037209987640381, "learning_rate": 1.5713846626709697e-05, "log_odds_chosen": 11.223855972290039, "log_odds_ratio": -2.7396872610552236e-05, "logits/chosen": -0.37989529967308044, "logits/rejected": -0.42906731367111206, "logps/chosen": -0.00016098878404591233, "logps/rejected": -2.571352958679199, "loss": 0.4347, "nll_loss": 0.10866893827915192, "rewards/accuracies": 1.0, "rewards/chosen": -1.6098878404591233e-05, "rewards/margins": 0.25711923837661743, "rewards/rejected": -0.2571353316307068, "step": 10370 }, { "epoch": 7.172199170124482, "grad_norm": 5.679206371307373, "learning_rate": 1.571000461041955e-05, "log_odds_chosen": 10.426536560058594, "log_odds_ratio": -0.00013442272029351443, "logits/chosen": -0.4217427968978882, "logits/rejected": -0.4483773112297058, "logps/chosen": -0.00038492606836371124, "logps/rejected": -1.9264945983886719, "loss": 0.5683, "nll_loss": 0.14207184314727783, "rewards/accuracies": 1.0, "rewards/chosen": -3.849260610877536e-05, "rewards/margins": 0.1926109790802002, "rewards/rejected": -0.1926494687795639, "step": 10371 }, { "epoch": 7.172890733056708, "grad_norm": 5.536522388458252, "learning_rate": 1.5706162594129402e-05, "log_odds_chosen": 11.396886825561523, "log_odds_ratio": -6.0072481574025005e-05, "logits/chosen": -0.3048505187034607, "logits/rejected": -0.38790225982666016, "logps/chosen": -0.0001912859734147787, "logps/rejected": -2.801403045654297, "loss": 0.5448, "nll_loss": 0.13619258999824524, "rewards/accuracies": 1.0, "rewards/chosen": -1.9128598069073632e-05, "rewards/margins": 0.28012117743492126, "rewards/rejected": -0.28014034032821655, "step": 10372 }, { "epoch": 7.173582295988935, "grad_norm": 5.496601104736328, "learning_rate": 1.570232057783925e-05, "log_odds_chosen": 11.184768676757812, "log_odds_ratio": -2.8792088414775208e-05, "logits/chosen": -0.6190522909164429, "logits/rejected": -0.6823225617408752, "logps/chosen": -0.00015820973203517497, "logps/rejected": -2.4082930088043213, "loss": 0.4024, "nll_loss": 0.10058543086051941, "rewards/accuracies": 1.0, "rewards/chosen": -1.5820973203517497e-05, "rewards/margins": 0.2408134937286377, "rewards/rejected": -0.24082930386066437, "step": 10373 }, { "epoch": 7.174273858921162, "grad_norm": 3.437378406524658, "learning_rate": 1.56984785615491e-05, "log_odds_chosen": 11.272420883178711, "log_odds_ratio": -0.00011576030374271795, "logits/chosen": -0.6390014886856079, "logits/rejected": -0.6240395307540894, "logps/chosen": -0.00013980553194414824, "logps/rejected": -2.6278982162475586, "loss": 0.7808, "nll_loss": 0.1951945722103119, "rewards/accuracies": 1.0, "rewards/chosen": -1.3980553376313765e-05, "rewards/margins": 0.26277586817741394, "rewards/rejected": -0.26278984546661377, "step": 10374 }, { "epoch": 7.174965421853389, "grad_norm": 11.342272758483887, "learning_rate": 1.5694636545258953e-05, "log_odds_chosen": 11.142118453979492, "log_odds_ratio": -2.4900431526475586e-05, "logits/chosen": -0.61419677734375, "logits/rejected": -0.7007442116737366, "logps/chosen": -0.00015536102000623941, "logps/rejected": -1.8198778629302979, "loss": 0.4692, "nll_loss": 0.11729123443365097, "rewards/accuracies": 1.0, "rewards/chosen": -1.5536103092017584e-05, "rewards/margins": 0.18197225034236908, "rewards/rejected": -0.18198780715465546, "step": 10375 }, { "epoch": 7.175656984785616, "grad_norm": 4.347101211547852, "learning_rate": 1.5690794528968802e-05, "log_odds_chosen": 11.29008960723877, "log_odds_ratio": -3.775333607336506e-05, "logits/chosen": -0.6615291833877563, "logits/rejected": -0.6524258255958557, "logps/chosen": -0.0004550123994704336, "logps/rejected": -3.183990001678467, "loss": 0.3407, "nll_loss": 0.08517622947692871, "rewards/accuracies": 1.0, "rewards/chosen": -4.550124140223488e-05, "rewards/margins": 0.31835347414016724, "rewards/rejected": -0.31839898228645325, "step": 10376 }, { "epoch": 7.176348547717843, "grad_norm": 5.331522464752197, "learning_rate": 1.5686952512678654e-05, "log_odds_chosen": 10.315485000610352, "log_odds_ratio": -0.000406495324568823, "logits/chosen": -0.0037413835525512695, "logits/rejected": -0.07865045964717865, "logps/chosen": -0.0003331214829813689, "logps/rejected": -1.8584060668945312, "loss": 0.8735, "nll_loss": 0.2183304727077484, "rewards/accuracies": 1.0, "rewards/chosen": -3.331214975332841e-05, "rewards/margins": 0.18580730259418488, "rewards/rejected": -0.18584060668945312, "step": 10377 }, { "epoch": 7.177040110650069, "grad_norm": 4.766586780548096, "learning_rate": 1.5683110496388507e-05, "log_odds_chosen": 10.226814270019531, "log_odds_ratio": -0.00011353972513461486, "logits/chosen": -0.40596237778663635, "logits/rejected": -0.3591066598892212, "logps/chosen": -0.0002922900894191116, "logps/rejected": -1.915623664855957, "loss": 0.4101, "nll_loss": 0.10252165794372559, "rewards/accuracies": 1.0, "rewards/chosen": -2.9229006031528115e-05, "rewards/margins": 0.1915331482887268, "rewards/rejected": -0.19156238436698914, "step": 10378 }, { "epoch": 7.177731673582296, "grad_norm": 4.257755756378174, "learning_rate": 1.5679268480098356e-05, "log_odds_chosen": 9.591224670410156, "log_odds_ratio": -0.00026978107052855194, "logits/chosen": -0.28969606757164, "logits/rejected": -0.3585425615310669, "logps/chosen": -0.0007068602135404944, "logps/rejected": -1.5515594482421875, "loss": 0.5163, "nll_loss": 0.1290472447872162, "rewards/accuracies": 1.0, "rewards/chosen": -7.068601553328335e-05, "rewards/margins": 0.1550852507352829, "rewards/rejected": -0.1551559418439865, "step": 10379 }, { "epoch": 7.178423236514523, "grad_norm": 5.376257419586182, "learning_rate": 1.5675426463808208e-05, "log_odds_chosen": 11.736917495727539, "log_odds_ratio": -1.4793120499234647e-05, "logits/chosen": -0.34879764914512634, "logits/rejected": -0.40188610553741455, "logps/chosen": -0.00011175702093169093, "logps/rejected": -2.322336435317993, "loss": 0.5005, "nll_loss": 0.12511463463306427, "rewards/accuracies": 1.0, "rewards/chosen": -1.1175701729371212e-05, "rewards/margins": 0.23222248256206512, "rewards/rejected": -0.23223364353179932, "step": 10380 }, { "epoch": 7.17911479944675, "grad_norm": 4.578193187713623, "learning_rate": 1.567158444751806e-05, "log_odds_chosen": 10.820470809936523, "log_odds_ratio": -0.00020237785065546632, "logits/chosen": -0.958088755607605, "logits/rejected": -0.8855130672454834, "logps/chosen": -0.00032699486473575234, "logps/rejected": -2.252181053161621, "loss": 0.7879, "nll_loss": 0.19696089625358582, "rewards/accuracies": 1.0, "rewards/chosen": -3.269948501838371e-05, "rewards/margins": 0.22518542408943176, "rewards/rejected": -0.22521811723709106, "step": 10381 }, { "epoch": 7.179806362378977, "grad_norm": 5.071114540100098, "learning_rate": 1.566774243122791e-05, "log_odds_chosen": 10.823832511901855, "log_odds_ratio": -7.1186208515428e-05, "logits/chosen": -0.4531767964363098, "logits/rejected": -0.5184823870658875, "logps/chosen": -0.00017408700659871101, "logps/rejected": -2.2006890773773193, "loss": 0.49, "nll_loss": 0.12250152230262756, "rewards/accuracies": 1.0, "rewards/chosen": -1.740869993227534e-05, "rewards/margins": 0.22005151212215424, "rewards/rejected": -0.22006891667842865, "step": 10382 }, { "epoch": 7.180497925311204, "grad_norm": 8.42878246307373, "learning_rate": 1.566390041493776e-05, "log_odds_chosen": 11.13003158569336, "log_odds_ratio": -2.9363169232965447e-05, "logits/chosen": -0.412482351064682, "logits/rejected": -0.33322468400001526, "logps/chosen": -0.0001294870162382722, "logps/rejected": -2.097151756286621, "loss": 0.4352, "nll_loss": 0.10879058390855789, "rewards/accuracies": 1.0, "rewards/chosen": -1.2948700714332517e-05, "rewards/margins": 0.20970222353935242, "rewards/rejected": -0.20971518754959106, "step": 10383 }, { "epoch": 7.18118948824343, "grad_norm": 4.142075061798096, "learning_rate": 1.566005839864761e-05, "log_odds_chosen": 11.121615409851074, "log_odds_ratio": -9.572529961587861e-05, "logits/chosen": -0.3162578046321869, "logits/rejected": -0.45748594403266907, "logps/chosen": -0.00015045034524518996, "logps/rejected": -2.277163505554199, "loss": 0.6452, "nll_loss": 0.16129402816295624, "rewards/accuracies": 1.0, "rewards/chosen": -1.5045035070215818e-05, "rewards/margins": 0.2277013063430786, "rewards/rejected": -0.2277163565158844, "step": 10384 }, { "epoch": 7.181881051175657, "grad_norm": 4.8519511222839355, "learning_rate": 1.565621638235746e-05, "log_odds_chosen": 9.380922317504883, "log_odds_ratio": -0.0005602799355983734, "logits/chosen": -0.10082048177719116, "logits/rejected": -0.11572250723838806, "logps/chosen": -0.0007273274823091924, "logps/rejected": -1.6986327171325684, "loss": 0.5084, "nll_loss": 0.12703612446784973, "rewards/accuracies": 1.0, "rewards/chosen": -7.273274968611076e-05, "rewards/margins": 0.16979053616523743, "rewards/rejected": -0.1698632836341858, "step": 10385 }, { "epoch": 7.182572614107884, "grad_norm": 11.137154579162598, "learning_rate": 1.5652374366067313e-05, "log_odds_chosen": 10.050169944763184, "log_odds_ratio": -8.446585707133636e-05, "logits/chosen": -0.49709948897361755, "logits/rejected": -0.5779053568840027, "logps/chosen": -0.00021436612587422132, "logps/rejected": -1.611759066581726, "loss": 0.4521, "nll_loss": 0.11301799863576889, "rewards/accuracies": 1.0, "rewards/chosen": -2.143661185982637e-05, "rewards/margins": 0.16115447878837585, "rewards/rejected": -0.1611759066581726, "step": 10386 }, { "epoch": 7.183264177040111, "grad_norm": 5.8118672370910645, "learning_rate": 1.5648532349777165e-05, "log_odds_chosen": 10.831192016601562, "log_odds_ratio": -0.001313655637204647, "logits/chosen": -0.3823273777961731, "logits/rejected": -0.407784640789032, "logps/chosen": -0.0011425204575061798, "logps/rejected": -2.535809278488159, "loss": 0.4633, "nll_loss": 0.11569167673587799, "rewards/accuracies": 1.0, "rewards/chosen": -0.00011425205593695864, "rewards/margins": 0.2534666657447815, "rewards/rejected": -0.2535809278488159, "step": 10387 }, { "epoch": 7.183955739972338, "grad_norm": 6.439311981201172, "learning_rate": 1.5644690333487014e-05, "log_odds_chosen": 10.70012378692627, "log_odds_ratio": -5.9435871662572026e-05, "logits/chosen": -0.28330811858177185, "logits/rejected": -0.2336798906326294, "logps/chosen": -0.00048739041085354984, "logps/rejected": -2.6013941764831543, "loss": 0.4894, "nll_loss": 0.12234983593225479, "rewards/accuracies": 1.0, "rewards/chosen": -4.873904254054651e-05, "rewards/margins": 0.2600906789302826, "rewards/rejected": -0.2601394057273865, "step": 10388 }, { "epoch": 7.1846473029045645, "grad_norm": 5.669109344482422, "learning_rate": 1.5640848317196867e-05, "log_odds_chosen": 12.277332305908203, "log_odds_ratio": -1.0741958249127492e-05, "logits/chosen": -0.3358321785926819, "logits/rejected": -0.4964350461959839, "logps/chosen": -0.00014862377429381013, "logps/rejected": -3.1850128173828125, "loss": 0.535, "nll_loss": 0.1337580531835556, "rewards/accuracies": 1.0, "rewards/chosen": -1.4862376701785251e-05, "rewards/margins": 0.31848645210266113, "rewards/rejected": -0.3185012936592102, "step": 10389 }, { "epoch": 7.185338865836791, "grad_norm": 6.9274821281433105, "learning_rate": 1.563700630090672e-05, "log_odds_chosen": 11.21712875366211, "log_odds_ratio": -3.1848030630499125e-05, "logits/chosen": -0.15592873096466064, "logits/rejected": -0.17688466608524323, "logps/chosen": -0.0002714493020903319, "logps/rejected": -2.7164435386657715, "loss": 0.6537, "nll_loss": 0.16341017186641693, "rewards/accuracies": 1.0, "rewards/chosen": -2.7144931664224714e-05, "rewards/margins": 0.27161717414855957, "rewards/rejected": -0.27164432406425476, "step": 10390 }, { "epoch": 7.186030428769018, "grad_norm": 3.708771228790283, "learning_rate": 1.5633164284616568e-05, "log_odds_chosen": 11.14272403717041, "log_odds_ratio": -1.6521320503670722e-05, "logits/chosen": -0.04514652490615845, "logits/rejected": -0.1443396508693695, "logps/chosen": -0.00014625716721639037, "logps/rejected": -2.0405805110931396, "loss": 0.3545, "nll_loss": 0.08861503005027771, "rewards/accuracies": 1.0, "rewards/chosen": -1.462571799493162e-05, "rewards/margins": 0.2040434181690216, "rewards/rejected": -0.20405805110931396, "step": 10391 }, { "epoch": 7.186721991701245, "grad_norm": 8.980436325073242, "learning_rate": 1.5629322268326417e-05, "log_odds_chosen": 10.469853401184082, "log_odds_ratio": -0.0005530455382540822, "logits/chosen": -0.5228238701820374, "logits/rejected": -0.544980525970459, "logps/chosen": -0.001911295112222433, "logps/rejected": -2.6337761878967285, "loss": 0.381, "nll_loss": 0.09519396722316742, "rewards/accuracies": 1.0, "rewards/chosen": -0.00019112952577415854, "rewards/margins": 0.263186514377594, "rewards/rejected": -0.2633776068687439, "step": 10392 }, { "epoch": 7.187413554633472, "grad_norm": 5.94824743270874, "learning_rate": 1.562548025203627e-05, "log_odds_chosen": 10.073355674743652, "log_odds_ratio": -0.0001066073018591851, "logits/chosen": -0.34966346621513367, "logits/rejected": -0.329208642244339, "logps/chosen": -0.0004409438115544617, "logps/rejected": -2.026257276535034, "loss": 0.5787, "nll_loss": 0.14465567469596863, "rewards/accuracies": 1.0, "rewards/chosen": -4.40943767898716e-05, "rewards/margins": 0.20258162915706635, "rewards/rejected": -0.20262573659420013, "step": 10393 }, { "epoch": 7.188105117565699, "grad_norm": 4.363674163818359, "learning_rate": 1.562163823574612e-05, "log_odds_chosen": 10.5360107421875, "log_odds_ratio": -0.00013599703379441053, "logits/chosen": -0.5808853507041931, "logits/rejected": -0.5344756841659546, "logps/chosen": -0.000584651657845825, "logps/rejected": -2.1063883304595947, "loss": 0.3505, "nll_loss": 0.08760587871074677, "rewards/accuracies": 1.0, "rewards/chosen": -5.846516432939097e-05, "rewards/margins": 0.21058037877082825, "rewards/rejected": -0.21063882112503052, "step": 10394 }, { "epoch": 7.1887966804979255, "grad_norm": 5.332204341888428, "learning_rate": 1.561779621945597e-05, "log_odds_chosen": 10.541728019714355, "log_odds_ratio": -8.822587551549077e-05, "logits/chosen": -0.31964346766471863, "logits/rejected": -0.37257277965545654, "logps/chosen": -0.0002514015359338373, "logps/rejected": -2.098186731338501, "loss": 0.7035, "nll_loss": 0.17586232721805573, "rewards/accuracies": 1.0, "rewards/chosen": -2.514015432097949e-05, "rewards/margins": 0.20979352295398712, "rewards/rejected": -0.20981867611408234, "step": 10395 }, { "epoch": 7.189488243430152, "grad_norm": 4.773373126983643, "learning_rate": 1.5613954203165824e-05, "log_odds_chosen": 10.850213050842285, "log_odds_ratio": -0.0005813997704535723, "logits/chosen": -0.8597350716590881, "logits/rejected": -0.7824587821960449, "logps/chosen": -0.0006822537397965789, "logps/rejected": -1.9705058336257935, "loss": 0.523, "nll_loss": 0.1307014524936676, "rewards/accuracies": 1.0, "rewards/chosen": -6.822537397965789e-05, "rewards/margins": 0.19698236882686615, "rewards/rejected": -0.19705060124397278, "step": 10396 }, { "epoch": 7.190179806362379, "grad_norm": 7.614468097686768, "learning_rate": 1.5610112186875673e-05, "log_odds_chosen": 11.094710350036621, "log_odds_ratio": -7.629027822986245e-05, "logits/chosen": -0.6720897555351257, "logits/rejected": -0.7558448314666748, "logps/chosen": -0.0002666703367140144, "logps/rejected": -2.537464141845703, "loss": 0.472, "nll_loss": 0.11800040304660797, "rewards/accuracies": 1.0, "rewards/chosen": -2.6667034035199322e-05, "rewards/margins": 0.2537197172641754, "rewards/rejected": -0.2537463903427124, "step": 10397 }, { "epoch": 7.190871369294606, "grad_norm": 3.3446168899536133, "learning_rate": 1.5606270170585525e-05, "log_odds_chosen": 11.072732925415039, "log_odds_ratio": -2.1762225514976308e-05, "logits/chosen": -0.5741645097732544, "logits/rejected": -0.5488527417182922, "logps/chosen": -0.00014538533287122846, "logps/rejected": -2.0284476280212402, "loss": 0.3267, "nll_loss": 0.08168265223503113, "rewards/accuracies": 1.0, "rewards/chosen": -1.4538533832819667e-05, "rewards/margins": 0.2028302252292633, "rewards/rejected": -0.2028447538614273, "step": 10398 }, { "epoch": 7.191562932226833, "grad_norm": 4.859591007232666, "learning_rate": 1.5602428154295378e-05, "log_odds_chosen": 11.008413314819336, "log_odds_ratio": -0.00010048101103166118, "logits/chosen": 0.43324506282806396, "logits/rejected": 0.308527410030365, "logps/chosen": -0.0002522010472603142, "logps/rejected": -2.1987271308898926, "loss": 0.4725, "nll_loss": 0.11810819059610367, "rewards/accuracies": 1.0, "rewards/chosen": -2.522010800021235e-05, "rewards/margins": 0.21984750032424927, "rewards/rejected": -0.21987271308898926, "step": 10399 }, { "epoch": 7.19225449515906, "grad_norm": 3.9196529388427734, "learning_rate": 1.5598586138005227e-05, "log_odds_chosen": 11.893542289733887, "log_odds_ratio": -1.7006164853228256e-05, "logits/chosen": -0.622429609298706, "logits/rejected": -0.6667870283126831, "logps/chosen": -0.00010225811274722219, "logps/rejected": -2.4955382347106934, "loss": 0.365, "nll_loss": 0.09124172478914261, "rewards/accuracies": 1.0, "rewards/chosen": -1.0225810910924338e-05, "rewards/margins": 0.24954360723495483, "rewards/rejected": -0.249553844332695, "step": 10400 }, { "epoch": 7.1929460580912865, "grad_norm": 6.067159652709961, "learning_rate": 1.5594744121715076e-05, "log_odds_chosen": 9.888992309570312, "log_odds_ratio": -0.000312354473862797, "logits/chosen": -0.39751923084259033, "logits/rejected": -0.48524197936058044, "logps/chosen": -0.0005245020147413015, "logps/rejected": -2.020956516265869, "loss": 0.5966, "nll_loss": 0.1491076946258545, "rewards/accuracies": 1.0, "rewards/chosen": -5.245019929134287e-05, "rewards/margins": 0.20204317569732666, "rewards/rejected": -0.202095627784729, "step": 10401 }, { "epoch": 7.193637621023513, "grad_norm": 3.609178066253662, "learning_rate": 1.5590902105424928e-05, "log_odds_chosen": 9.639986038208008, "log_odds_ratio": -0.00017861949163489044, "logits/chosen": -0.4248259365558624, "logits/rejected": -0.5246451497077942, "logps/chosen": -0.0002483507269062102, "logps/rejected": -1.2811558246612549, "loss": 0.398, "nll_loss": 0.09947745501995087, "rewards/accuracies": 1.0, "rewards/chosen": -2.4835069780237973e-05, "rewards/margins": 0.1280907392501831, "rewards/rejected": -0.12811557948589325, "step": 10402 }, { "epoch": 7.19432918395574, "grad_norm": 11.75823974609375, "learning_rate": 1.5587060089134777e-05, "log_odds_chosen": 8.741238594055176, "log_odds_ratio": -0.12879516184329987, "logits/chosen": -0.30707478523254395, "logits/rejected": -0.43609267473220825, "logps/chosen": -0.028354782611131668, "logps/rejected": -1.7946550846099854, "loss": 0.5618, "nll_loss": 0.12756292521953583, "rewards/accuracies": 0.875, "rewards/chosen": -0.002835478400811553, "rewards/margins": 0.17663003504276276, "rewards/rejected": -0.17946550250053406, "step": 10403 }, { "epoch": 7.195020746887967, "grad_norm": 10.215047836303711, "learning_rate": 1.558321807284463e-05, "log_odds_chosen": 11.406917572021484, "log_odds_ratio": -3.1501491321250796e-05, "logits/chosen": 0.07894551753997803, "logits/rejected": 0.08648325502872467, "logps/chosen": -0.00029575484222732484, "logps/rejected": -2.8551900386810303, "loss": 0.4392, "nll_loss": 0.10979416966438293, "rewards/accuracies": 1.0, "rewards/chosen": -2.957548713311553e-05, "rewards/margins": 0.28548943996429443, "rewards/rejected": -0.285519003868103, "step": 10404 }, { "epoch": 7.195712309820194, "grad_norm": 4.584980487823486, "learning_rate": 1.5579376056554482e-05, "log_odds_chosen": 10.541440963745117, "log_odds_ratio": -9.464387403568253e-05, "logits/chosen": -0.547250509262085, "logits/rejected": -0.49824681878089905, "logps/chosen": -0.00014894589548930526, "logps/rejected": -1.8729922771453857, "loss": 0.6072, "nll_loss": 0.151779904961586, "rewards/accuracies": 1.0, "rewards/chosen": -1.4894590094627347e-05, "rewards/margins": 0.18728432059288025, "rewards/rejected": -0.1872992366552353, "step": 10405 }, { "epoch": 7.196403872752421, "grad_norm": 10.538939476013184, "learning_rate": 1.557553404026433e-05, "log_odds_chosen": 10.94846248626709, "log_odds_ratio": -2.3089738533599302e-05, "logits/chosen": -0.2529940903186798, "logits/rejected": -0.31514447927474976, "logps/chosen": -0.0001458583283238113, "logps/rejected": -2.1446704864501953, "loss": 0.4062, "nll_loss": 0.10155414044857025, "rewards/accuracies": 1.0, "rewards/chosen": -1.4585832104785368e-05, "rewards/margins": 0.21445247530937195, "rewards/rejected": -0.21446704864501953, "step": 10406 }, { "epoch": 7.1970954356846475, "grad_norm": 3.172614574432373, "learning_rate": 1.5571692023974183e-05, "log_odds_chosen": 10.082971572875977, "log_odds_ratio": -0.0005615535192191601, "logits/chosen": -0.08390333503484726, "logits/rejected": -0.12408564984798431, "logps/chosen": -0.0015450555365532637, "logps/rejected": -2.3086283206939697, "loss": 0.3739, "nll_loss": 0.09341214597225189, "rewards/accuracies": 1.0, "rewards/chosen": -0.00015450554201379418, "rewards/margins": 0.2307083010673523, "rewards/rejected": -0.2308628261089325, "step": 10407 }, { "epoch": 7.197786998616874, "grad_norm": 5.035568714141846, "learning_rate": 1.5567850007684036e-05, "log_odds_chosen": 11.911405563354492, "log_odds_ratio": -9.349205356556922e-06, "logits/chosen": -0.5108979344367981, "logits/rejected": -0.5400675535202026, "logps/chosen": -8.778244955465198e-05, "logps/rejected": -2.3340892791748047, "loss": 0.7396, "nll_loss": 0.18490070104599, "rewards/accuracies": 1.0, "rewards/chosen": -8.77824641065672e-06, "rewards/margins": 0.2334001660346985, "rewards/rejected": -0.23340894281864166, "step": 10408 }, { "epoch": 7.198478561549101, "grad_norm": 3.9095423221588135, "learning_rate": 1.5564007991393885e-05, "log_odds_chosen": 11.930374145507812, "log_odds_ratio": -1.5022533261799254e-05, "logits/chosen": -0.36563628911972046, "logits/rejected": -0.3588956296443939, "logps/chosen": -0.00010352416575187817, "logps/rejected": -2.5348033905029297, "loss": 0.3587, "nll_loss": 0.08966990560293198, "rewards/accuracies": 1.0, "rewards/chosen": -1.0352417120884638e-05, "rewards/margins": 0.2534700036048889, "rewards/rejected": -0.25348034501075745, "step": 10409 }, { "epoch": 7.199170124481328, "grad_norm": 3.7800188064575195, "learning_rate": 1.5560165975103734e-05, "log_odds_chosen": 11.446325302124023, "log_odds_ratio": -2.7550799131859094e-05, "logits/chosen": -0.5708428621292114, "logits/rejected": -0.7656696438789368, "logps/chosen": -9.46579675655812e-05, "logps/rejected": -1.9892699718475342, "loss": 0.4408, "nll_loss": 0.11019250750541687, "rewards/accuracies": 1.0, "rewards/chosen": -9.465797120356001e-06, "rewards/margins": 0.19891753792762756, "rewards/rejected": -0.19892701506614685, "step": 10410 }, { "epoch": 7.199861687413555, "grad_norm": 5.089803695678711, "learning_rate": 1.5556323958813586e-05, "log_odds_chosen": 9.740339279174805, "log_odds_ratio": -0.00019847344083245844, "logits/chosen": -0.26678839325904846, "logits/rejected": -0.392799973487854, "logps/chosen": -0.000566477479878813, "logps/rejected": -1.7141238451004028, "loss": 0.6536, "nll_loss": 0.16337737441062927, "rewards/accuracies": 1.0, "rewards/chosen": -5.6647753808647394e-05, "rewards/margins": 0.17135575413703918, "rewards/rejected": -0.171412393450737, "step": 10411 }, { "epoch": 7.200553250345782, "grad_norm": 6.466180801391602, "learning_rate": 1.5552481942523436e-05, "log_odds_chosen": 11.755807876586914, "log_odds_ratio": -2.5303113943664357e-05, "logits/chosen": -0.3360484838485718, "logits/rejected": -0.27045273780822754, "logps/chosen": -0.00012604852963704616, "logps/rejected": -2.7367024421691895, "loss": 0.8546, "nll_loss": 0.21365918219089508, "rewards/accuracies": 1.0, "rewards/chosen": -1.2604852599906735e-05, "rewards/margins": 0.2736576497554779, "rewards/rejected": -0.2736702263355255, "step": 10412 }, { "epoch": 7.2012448132780085, "grad_norm": 4.593533992767334, "learning_rate": 1.5548639926233288e-05, "log_odds_chosen": 11.521254539489746, "log_odds_ratio": -3.387085234862752e-05, "logits/chosen": -0.7450588345527649, "logits/rejected": -0.8180086016654968, "logps/chosen": -0.00029448719578795135, "logps/rejected": -2.2137787342071533, "loss": 0.434, "nll_loss": 0.1085037887096405, "rewards/accuracies": 1.0, "rewards/chosen": -2.9448719942593016e-05, "rewards/margins": 0.22134841978549957, "rewards/rejected": -0.2213778793811798, "step": 10413 }, { "epoch": 7.201936376210235, "grad_norm": 4.276266574859619, "learning_rate": 1.554479790994314e-05, "log_odds_chosen": 10.161270141601562, "log_odds_ratio": -0.0001938036148203537, "logits/chosen": -0.7717043161392212, "logits/rejected": -0.7686448693275452, "logps/chosen": -0.0001649046753300354, "logps/rejected": -1.5598740577697754, "loss": 0.6407, "nll_loss": 0.16016465425491333, "rewards/accuracies": 1.0, "rewards/chosen": -1.6490466805407777e-05, "rewards/margins": 0.15597090125083923, "rewards/rejected": -0.15598741173744202, "step": 10414 }, { "epoch": 7.202627939142462, "grad_norm": 3.8587746620178223, "learning_rate": 1.554095589365299e-05, "log_odds_chosen": 9.642004013061523, "log_odds_ratio": -0.00021795628708787262, "logits/chosen": 0.03109053522348404, "logits/rejected": -0.1165243536233902, "logps/chosen": -0.0006174801965244114, "logps/rejected": -2.0108673572540283, "loss": 0.4171, "nll_loss": 0.10424423217773438, "rewards/accuracies": 1.0, "rewards/chosen": -6.174801819724962e-05, "rewards/margins": 0.20102497935295105, "rewards/rejected": -0.20108672976493835, "step": 10415 }, { "epoch": 7.203319502074689, "grad_norm": 3.5054314136505127, "learning_rate": 1.5537113877362842e-05, "log_odds_chosen": 10.877426147460938, "log_odds_ratio": -3.7110381526872516e-05, "logits/chosen": -0.1429760605096817, "logits/rejected": -0.24752220511436462, "logps/chosen": -0.0001301420124946162, "logps/rejected": -1.6698437929153442, "loss": 0.4203, "nll_loss": 0.10507805645465851, "rewards/accuracies": 1.0, "rewards/chosen": -1.3014201613259502e-05, "rewards/margins": 0.166971355676651, "rewards/rejected": -0.16698436439037323, "step": 10416 }, { "epoch": 7.204011065006916, "grad_norm": 7.614464282989502, "learning_rate": 1.5533271861072694e-05, "log_odds_chosen": 10.811773300170898, "log_odds_ratio": -5.097378743812442e-05, "logits/chosen": -0.8475204706192017, "logits/rejected": -0.8343116641044617, "logps/chosen": -8.274411811726168e-05, "logps/rejected": -1.541874885559082, "loss": 0.5566, "nll_loss": 0.13913320004940033, "rewards/accuracies": 1.0, "rewards/chosen": -8.274411811726168e-06, "rewards/margins": 0.15417921543121338, "rewards/rejected": -0.15418748557567596, "step": 10417 }, { "epoch": 7.204702627939143, "grad_norm": 5.6126389503479, "learning_rate": 1.5529429844782543e-05, "log_odds_chosen": 11.205718994140625, "log_odds_ratio": -2.211224455095362e-05, "logits/chosen": -0.6076329350471497, "logits/rejected": -0.6502000689506531, "logps/chosen": -0.00023064023116603494, "logps/rejected": -2.5637574195861816, "loss": 0.6841, "nll_loss": 0.17101337015628815, "rewards/accuracies": 1.0, "rewards/chosen": -2.3064025299390778e-05, "rewards/margins": 0.2563526928424835, "rewards/rejected": -0.2563757598400116, "step": 10418 }, { "epoch": 7.2053941908713695, "grad_norm": 4.8593525886535645, "learning_rate": 1.5525587828492392e-05, "log_odds_chosen": 10.118298530578613, "log_odds_ratio": -8.633873221697286e-05, "logits/chosen": -0.492728590965271, "logits/rejected": -0.41921842098236084, "logps/chosen": -0.0003420605498831719, "logps/rejected": -1.433894395828247, "loss": 0.6214, "nll_loss": 0.15532976388931274, "rewards/accuracies": 1.0, "rewards/chosen": -3.420605935389176e-05, "rewards/margins": 0.14335523545742035, "rewards/rejected": -0.14338943362236023, "step": 10419 }, { "epoch": 7.206085753803596, "grad_norm": 5.551113128662109, "learning_rate": 1.5521745812202245e-05, "log_odds_chosen": 10.857733726501465, "log_odds_ratio": -4.6031469537410885e-05, "logits/chosen": -0.7595330476760864, "logits/rejected": -0.7728185653686523, "logps/chosen": -0.0003562311176210642, "logps/rejected": -2.313002109527588, "loss": 0.7617, "nll_loss": 0.19041162729263306, "rewards/accuracies": 1.0, "rewards/chosen": -3.5623110306914896e-05, "rewards/margins": 0.23126459121704102, "rewards/rejected": -0.2313002198934555, "step": 10420 }, { "epoch": 7.206777316735823, "grad_norm": 3.6761534214019775, "learning_rate": 1.5517903795912094e-05, "log_odds_chosen": 11.04926872253418, "log_odds_ratio": -0.0001277085393667221, "logits/chosen": -0.43931564688682556, "logits/rejected": -0.463930606842041, "logps/chosen": -0.00013612773909699172, "logps/rejected": -2.1665964126586914, "loss": 0.3314, "nll_loss": 0.08284401893615723, "rewards/accuracies": 1.0, "rewards/chosen": -1.3612774637294933e-05, "rewards/margins": 0.21664604544639587, "rewards/rejected": -0.21665966510772705, "step": 10421 }, { "epoch": 7.20746887966805, "grad_norm": 6.475729465484619, "learning_rate": 1.5514061779621946e-05, "log_odds_chosen": 10.414512634277344, "log_odds_ratio": -0.00010397224832559004, "logits/chosen": -0.8556047081947327, "logits/rejected": -0.8216189742088318, "logps/chosen": -0.000805202464107424, "logps/rejected": -2.3161447048187256, "loss": 0.465, "nll_loss": 0.11623618751764297, "rewards/accuracies": 1.0, "rewards/chosen": -8.052025077631697e-05, "rewards/margins": 0.231533944606781, "rewards/rejected": -0.23161447048187256, "step": 10422 }, { "epoch": 7.208160442600277, "grad_norm": 3.659374713897705, "learning_rate": 1.55102197633318e-05, "log_odds_chosen": 10.58485221862793, "log_odds_ratio": -5.1850009185727686e-05, "logits/chosen": -0.5555611252784729, "logits/rejected": -0.6782649159431458, "logps/chosen": -0.00020528820459730923, "logps/rejected": -1.8091728687286377, "loss": 0.5134, "nll_loss": 0.12833338975906372, "rewards/accuracies": 1.0, "rewards/chosen": -2.0528819732135162e-05, "rewards/margins": 0.18089677393436432, "rewards/rejected": -0.18091730773448944, "step": 10423 }, { "epoch": 7.208852005532504, "grad_norm": 5.045117378234863, "learning_rate": 1.5506377747041648e-05, "log_odds_chosen": 10.868586540222168, "log_odds_ratio": -0.00010529413702897727, "logits/chosen": -0.31450480222702026, "logits/rejected": -0.3769742548465729, "logps/chosen": -0.0010240180417895317, "logps/rejected": -3.2506918907165527, "loss": 0.5598, "nll_loss": 0.1399279683828354, "rewards/accuracies": 1.0, "rewards/chosen": -0.0001024018056341447, "rewards/margins": 0.32496681809425354, "rewards/rejected": -0.3250691592693329, "step": 10424 }, { "epoch": 7.20954356846473, "grad_norm": 4.592897415161133, "learning_rate": 1.55025357307515e-05, "log_odds_chosen": 10.890989303588867, "log_odds_ratio": -2.9873521270928904e-05, "logits/chosen": -0.2958086133003235, "logits/rejected": -0.3364552855491638, "logps/chosen": -0.00012136924488004297, "logps/rejected": -1.817213535308838, "loss": 0.4928, "nll_loss": 0.12318695336580276, "rewards/accuracies": 1.0, "rewards/chosen": -1.2136924851802178e-05, "rewards/margins": 0.18170922994613647, "rewards/rejected": -0.18172135949134827, "step": 10425 }, { "epoch": 7.210235131396957, "grad_norm": 5.55319356918335, "learning_rate": 1.5498693714461353e-05, "log_odds_chosen": 10.696649551391602, "log_odds_ratio": -0.00012903663446195424, "logits/chosen": -0.5565292239189148, "logits/rejected": -0.6371341347694397, "logps/chosen": -0.0002119752753060311, "logps/rejected": -2.162536382675171, "loss": 0.3924, "nll_loss": 0.09807487577199936, "rewards/accuracies": 1.0, "rewards/chosen": -2.1197525711613707e-05, "rewards/margins": 0.21623243391513824, "rewards/rejected": -0.2162536382675171, "step": 10426 }, { "epoch": 7.210926694329184, "grad_norm": 5.8774495124816895, "learning_rate": 1.5494851698171202e-05, "log_odds_chosen": 11.74765396118164, "log_odds_ratio": -1.3546211448556278e-05, "logits/chosen": -0.4228131175041199, "logits/rejected": -0.4262281656265259, "logps/chosen": -0.0002533498336561024, "logps/rejected": -3.003474235534668, "loss": 0.5258, "nll_loss": 0.13145418465137482, "rewards/accuracies": 1.0, "rewards/chosen": -2.533498263801448e-05, "rewards/margins": 0.3003220856189728, "rewards/rejected": -0.3003474175930023, "step": 10427 }, { "epoch": 7.211618257261411, "grad_norm": 7.68532133102417, "learning_rate": 1.549100968188105e-05, "log_odds_chosen": 10.173051834106445, "log_odds_ratio": -8.042113040573895e-05, "logits/chosen": -0.2979806661605835, "logits/rejected": -0.38347023725509644, "logps/chosen": -0.0008097730460576713, "logps/rejected": -1.8612622022628784, "loss": 0.4417, "nll_loss": 0.11042722314596176, "rewards/accuracies": 1.0, "rewards/chosen": -8.097730460576713e-05, "rewards/margins": 0.18604524433612823, "rewards/rejected": -0.1861262172460556, "step": 10428 }, { "epoch": 7.212309820193638, "grad_norm": 6.552196502685547, "learning_rate": 1.54871676655909e-05, "log_odds_chosen": 11.38538932800293, "log_odds_ratio": -0.0006448305794037879, "logits/chosen": -0.4479593336582184, "logits/rejected": -0.5196192264556885, "logps/chosen": -0.0005789645947515965, "logps/rejected": -2.6832289695739746, "loss": 0.5465, "nll_loss": 0.13654915988445282, "rewards/accuracies": 1.0, "rewards/chosen": -5.789645729237236e-05, "rewards/margins": 0.2682650089263916, "rewards/rejected": -0.2683229148387909, "step": 10429 }, { "epoch": 7.213001383125865, "grad_norm": 11.669718742370605, "learning_rate": 1.5483325649300752e-05, "log_odds_chosen": 10.768035888671875, "log_odds_ratio": -9.076563583221287e-05, "logits/chosen": -0.5250604748725891, "logits/rejected": -0.5071054697036743, "logps/chosen": -0.00046957319136708975, "logps/rejected": -2.5636401176452637, "loss": 0.4089, "nll_loss": 0.10220938175916672, "rewards/accuracies": 1.0, "rewards/chosen": -4.695731695392169e-05, "rewards/margins": 0.2563170790672302, "rewards/rejected": -0.25636401772499084, "step": 10430 }, { "epoch": 7.213692946058091, "grad_norm": 3.9259867668151855, "learning_rate": 1.5479483633010605e-05, "log_odds_chosen": 9.859259605407715, "log_odds_ratio": -0.00022302698926068842, "logits/chosen": -0.8533114790916443, "logits/rejected": -0.878596842288971, "logps/chosen": -0.0004920702776871622, "logps/rejected": -1.479432225227356, "loss": 0.6729, "nll_loss": 0.16821011900901794, "rewards/accuracies": 1.0, "rewards/chosen": -4.9207032134290785e-05, "rewards/margins": 0.1478940099477768, "rewards/rejected": -0.14794321358203888, "step": 10431 }, { "epoch": 7.214384508990318, "grad_norm": 6.803659439086914, "learning_rate": 1.5475641616720454e-05, "log_odds_chosen": 10.224798202514648, "log_odds_ratio": -0.00013334013056010008, "logits/chosen": -0.16153977811336517, "logits/rejected": 0.06601040065288544, "logps/chosen": -0.0002593988610897213, "logps/rejected": -1.8549165725708008, "loss": 0.4982, "nll_loss": 0.12453596293926239, "rewards/accuracies": 1.0, "rewards/chosen": -2.5939887564163655e-05, "rewards/margins": 0.1854657083749771, "rewards/rejected": -0.1854916512966156, "step": 10432 }, { "epoch": 7.215076071922545, "grad_norm": 5.034233570098877, "learning_rate": 1.5471799600430306e-05, "log_odds_chosen": 9.249288558959961, "log_odds_ratio": -0.00036163858021609485, "logits/chosen": -0.1475447118282318, "logits/rejected": -0.1759490966796875, "logps/chosen": -0.0008161815349012613, "logps/rejected": -1.8197362422943115, "loss": 0.5334, "nll_loss": 0.1333133578300476, "rewards/accuracies": 1.0, "rewards/chosen": -8.161815640050918e-05, "rewards/margins": 0.1818920075893402, "rewards/rejected": -0.1819736361503601, "step": 10433 }, { "epoch": 7.215767634854772, "grad_norm": 3.4301741123199463, "learning_rate": 1.546795758414016e-05, "log_odds_chosen": 11.221672058105469, "log_odds_ratio": -3.974717401433736e-05, "logits/chosen": -0.35873931646347046, "logits/rejected": -0.3598625659942627, "logps/chosen": -0.0005683759809471667, "logps/rejected": -2.6728668212890625, "loss": 0.4431, "nll_loss": 0.11076383292675018, "rewards/accuracies": 1.0, "rewards/chosen": -5.683759445673786e-05, "rewards/margins": 0.2672298848628998, "rewards/rejected": -0.2672867178916931, "step": 10434 }, { "epoch": 7.216459197786999, "grad_norm": 12.01319408416748, "learning_rate": 1.5464115567850008e-05, "log_odds_chosen": 11.553258895874023, "log_odds_ratio": -9.56603471422568e-05, "logits/chosen": -0.7321569919586182, "logits/rejected": -0.7380499243736267, "logps/chosen": -8.728139073355123e-05, "logps/rejected": -2.136077404022217, "loss": 0.4899, "nll_loss": 0.12245947122573853, "rewards/accuracies": 1.0, "rewards/chosen": -8.728139619051944e-06, "rewards/margins": 0.21359901130199432, "rewards/rejected": -0.21360774338245392, "step": 10435 }, { "epoch": 7.217150760719226, "grad_norm": 4.647044658660889, "learning_rate": 1.546027355155986e-05, "log_odds_chosen": 11.165498733520508, "log_odds_ratio": -3.597480463213287e-05, "logits/chosen": -0.29245832562446594, "logits/rejected": -0.24228429794311523, "logps/chosen": -0.00013531606236938387, "logps/rejected": -2.185344696044922, "loss": 0.4234, "nll_loss": 0.10585423558950424, "rewards/accuracies": 1.0, "rewards/chosen": -1.3531605873140506e-05, "rewards/margins": 0.2185209095478058, "rewards/rejected": -0.2185344696044922, "step": 10436 }, { "epoch": 7.217842323651452, "grad_norm": 8.116806030273438, "learning_rate": 1.545643153526971e-05, "log_odds_chosen": 11.70106029510498, "log_odds_ratio": -2.180870433221571e-05, "logits/chosen": -0.2894930839538574, "logits/rejected": -0.29923558235168457, "logps/chosen": -0.0001487794506829232, "logps/rejected": -2.7362220287323, "loss": 0.6568, "nll_loss": 0.16420918703079224, "rewards/accuracies": 1.0, "rewards/chosen": -1.4877944522595499e-05, "rewards/margins": 0.2736073136329651, "rewards/rejected": -0.27362215518951416, "step": 10437 }, { "epoch": 7.218533886583679, "grad_norm": 4.495815753936768, "learning_rate": 1.545258951897956e-05, "log_odds_chosen": 9.812202453613281, "log_odds_ratio": -0.00014063966227695346, "logits/chosen": -0.3846893310546875, "logits/rejected": -0.39387011528015137, "logps/chosen": -0.0003531720722094178, "logps/rejected": -1.6779232025146484, "loss": 0.4573, "nll_loss": 0.11430048942565918, "rewards/accuracies": 1.0, "rewards/chosen": -3.531720722094178e-05, "rewards/margins": 0.16775700449943542, "rewards/rejected": -0.16779232025146484, "step": 10438 }, { "epoch": 7.219225449515906, "grad_norm": 4.9323272705078125, "learning_rate": 1.544874750268941e-05, "log_odds_chosen": 10.920612335205078, "log_odds_ratio": -5.197681821300648e-05, "logits/chosen": -0.4074556231498718, "logits/rejected": -0.4035705327987671, "logps/chosen": -0.00017860863590613008, "logps/rejected": -1.8755065202713013, "loss": 0.3981, "nll_loss": 0.09951049834489822, "rewards/accuracies": 1.0, "rewards/chosen": -1.786086431820877e-05, "rewards/margins": 0.18753278255462646, "rewards/rejected": -0.1875506490468979, "step": 10439 }, { "epoch": 7.219917012448133, "grad_norm": 6.232209205627441, "learning_rate": 1.5444905486399263e-05, "log_odds_chosen": 11.688590049743652, "log_odds_ratio": -3.540895340847783e-05, "logits/chosen": -0.594054102897644, "logits/rejected": -0.7286227345466614, "logps/chosen": -7.078055932652205e-05, "logps/rejected": -2.228940486907959, "loss": 0.3724, "nll_loss": 0.09309396147727966, "rewards/accuracies": 1.0, "rewards/chosen": -7.078055659803795e-06, "rewards/margins": 0.22288696467876434, "rewards/rejected": -0.22289404273033142, "step": 10440 }, { "epoch": 7.22060857538036, "grad_norm": 4.872360706329346, "learning_rate": 1.5441063470109112e-05, "log_odds_chosen": 10.40176010131836, "log_odds_ratio": -0.00017088992171920836, "logits/chosen": 0.11200764775276184, "logits/rejected": 0.05680667608976364, "logps/chosen": -0.00016381520254071802, "logps/rejected": -1.9157912731170654, "loss": 0.5962, "nll_loss": 0.14902549982070923, "rewards/accuracies": 1.0, "rewards/chosen": -1.6381520254071802e-05, "rewards/margins": 0.1915627419948578, "rewards/rejected": -0.19157913327217102, "step": 10441 }, { "epoch": 7.2213001383125865, "grad_norm": 4.740699291229248, "learning_rate": 1.5437221453818965e-05, "log_odds_chosen": 11.286370277404785, "log_odds_ratio": -0.00013748435594607145, "logits/chosen": -0.6109752058982849, "logits/rejected": -0.6121599078178406, "logps/chosen": -0.00045225946814753115, "logps/rejected": -2.360914945602417, "loss": 0.4395, "nll_loss": 0.10985865443944931, "rewards/accuracies": 1.0, "rewards/chosen": -4.522595554590225e-05, "rewards/margins": 0.23604625463485718, "rewards/rejected": -0.2360914945602417, "step": 10442 }, { "epoch": 7.221991701244813, "grad_norm": 4.2852396965026855, "learning_rate": 1.5433379437528817e-05, "log_odds_chosen": 10.645816802978516, "log_odds_ratio": -0.0001721422595437616, "logits/chosen": -0.34702369570732117, "logits/rejected": -0.3822890520095825, "logps/chosen": -0.0005197233404032886, "logps/rejected": -2.4108142852783203, "loss": 0.4523, "nll_loss": 0.11306465417146683, "rewards/accuracies": 1.0, "rewards/chosen": -5.1972336223116145e-05, "rewards/margins": 0.24102944135665894, "rewards/rejected": -0.24108143150806427, "step": 10443 }, { "epoch": 7.22268326417704, "grad_norm": 14.121091842651367, "learning_rate": 1.5429537421238666e-05, "log_odds_chosen": 10.955095291137695, "log_odds_ratio": -9.669965220382437e-05, "logits/chosen": -0.3955569267272949, "logits/rejected": -0.48575782775878906, "logps/chosen": -0.00027399149257689714, "logps/rejected": -2.102013111114502, "loss": 0.5726, "nll_loss": 0.14315034449100494, "rewards/accuracies": 1.0, "rewards/chosen": -2.7399149985285476e-05, "rewards/margins": 0.21017390489578247, "rewards/rejected": -0.21020129323005676, "step": 10444 }, { "epoch": 7.223374827109267, "grad_norm": 5.549518585205078, "learning_rate": 1.542569540494852e-05, "log_odds_chosen": 10.822440147399902, "log_odds_ratio": -0.00012936044367961586, "logits/chosen": -0.2175404578447342, "logits/rejected": -0.3416159152984619, "logps/chosen": -0.000349888316122815, "logps/rejected": -2.3769421577453613, "loss": 0.6474, "nll_loss": 0.16182616353034973, "rewards/accuracies": 1.0, "rewards/chosen": -3.498883233987726e-05, "rewards/margins": 0.2376592457294464, "rewards/rejected": -0.23769423365592957, "step": 10445 }, { "epoch": 7.224066390041494, "grad_norm": 5.736955642700195, "learning_rate": 1.5421853388658368e-05, "log_odds_chosen": 11.646930694580078, "log_odds_ratio": -2.430060521874111e-05, "logits/chosen": 0.10752647370100021, "logits/rejected": -0.04080040007829666, "logps/chosen": -0.00039383795228786767, "logps/rejected": -2.7490639686584473, "loss": 0.4966, "nll_loss": 0.12413991987705231, "rewards/accuracies": 1.0, "rewards/chosen": -3.9383794501191005e-05, "rewards/margins": 0.2748669981956482, "rewards/rejected": -0.2749063968658447, "step": 10446 }, { "epoch": 7.224757952973721, "grad_norm": 5.097479820251465, "learning_rate": 1.5418011372368217e-05, "log_odds_chosen": 10.945337295532227, "log_odds_ratio": -4.302536399336532e-05, "logits/chosen": 0.09947680681943893, "logits/rejected": 0.12206941843032837, "logps/chosen": -0.00015883771993685514, "logps/rejected": -2.1310343742370605, "loss": 0.5825, "nll_loss": 0.14561805129051208, "rewards/accuracies": 1.0, "rewards/chosen": -1.5883773812674917e-05, "rewards/margins": 0.2130875587463379, "rewards/rejected": -0.21310344338417053, "step": 10447 }, { "epoch": 7.2254495159059475, "grad_norm": 5.522941589355469, "learning_rate": 1.541416935607807e-05, "log_odds_chosen": 10.210217475891113, "log_odds_ratio": -0.0001231904316227883, "logits/chosen": -0.11590823531150818, "logits/rejected": -0.18832921981811523, "logps/chosen": -0.0001725100155454129, "logps/rejected": -1.6004002094268799, "loss": 0.638, "nll_loss": 0.1594873070716858, "rewards/accuracies": 1.0, "rewards/chosen": -1.7251002645934932e-05, "rewards/margins": 0.1600227653980255, "rewards/rejected": -0.160040020942688, "step": 10448 }, { "epoch": 7.226141078838174, "grad_norm": 4.131917476654053, "learning_rate": 1.5410327339787922e-05, "log_odds_chosen": 10.818761825561523, "log_odds_ratio": -6.041261804057285e-05, "logits/chosen": 0.06681928038597107, "logits/rejected": 0.07110019773244858, "logps/chosen": -0.0004300489672459662, "logps/rejected": -2.5556554794311523, "loss": 1.0391, "nll_loss": 0.2597717344760895, "rewards/accuracies": 1.0, "rewards/chosen": -4.300489672459662e-05, "rewards/margins": 0.25552254915237427, "rewards/rejected": -0.2555655539035797, "step": 10449 }, { "epoch": 7.226832641770401, "grad_norm": 6.010919094085693, "learning_rate": 1.540648532349777e-05, "log_odds_chosen": 10.075103759765625, "log_odds_ratio": -0.0001629930775379762, "logits/chosen": -0.41976091265678406, "logits/rejected": -0.37371984124183655, "logps/chosen": -0.0004160638782195747, "logps/rejected": -1.5253503322601318, "loss": 0.4134, "nll_loss": 0.1033235490322113, "rewards/accuracies": 1.0, "rewards/chosen": -4.160638491157442e-05, "rewards/margins": 0.152493417263031, "rewards/rejected": -0.15253503620624542, "step": 10450 }, { "epoch": 7.227524204702628, "grad_norm": 5.272099494934082, "learning_rate": 1.5402643307207623e-05, "log_odds_chosen": 11.40852165222168, "log_odds_ratio": -2.994909846165683e-05, "logits/chosen": -0.2771112322807312, "logits/rejected": -0.3340737819671631, "logps/chosen": -0.00013135781046003103, "logps/rejected": -1.9381992816925049, "loss": 0.3929, "nll_loss": 0.09821852296590805, "rewards/accuracies": 1.0, "rewards/chosen": -1.3135780136508401e-05, "rewards/margins": 0.19380679726600647, "rewards/rejected": -0.19381992518901825, "step": 10451 }, { "epoch": 7.228215767634855, "grad_norm": 4.739782333374023, "learning_rate": 1.5398801290917476e-05, "log_odds_chosen": 10.67758560180664, "log_odds_ratio": -8.847292338032275e-05, "logits/chosen": -0.15526941418647766, "logits/rejected": -0.2297494113445282, "logps/chosen": -0.0002579109859652817, "logps/rejected": -2.387845039367676, "loss": 0.5134, "nll_loss": 0.12834352254867554, "rewards/accuracies": 1.0, "rewards/chosen": -2.5791099687921815e-05, "rewards/margins": 0.23875871300697327, "rewards/rejected": -0.23878450691699982, "step": 10452 }, { "epoch": 7.228907330567082, "grad_norm": 7.194368839263916, "learning_rate": 1.5394959274627325e-05, "log_odds_chosen": 10.149903297424316, "log_odds_ratio": -0.00012134911230532452, "logits/chosen": -0.2545188367366791, "logits/rejected": -0.28976038098335266, "logps/chosen": -0.0004408220120240003, "logps/rejected": -2.34104585647583, "loss": 0.3147, "nll_loss": 0.0786544531583786, "rewards/accuracies": 1.0, "rewards/chosen": -4.4082204112783074e-05, "rewards/margins": 0.23406049609184265, "rewards/rejected": -0.23410458862781525, "step": 10453 }, { "epoch": 7.2295988934993085, "grad_norm": 4.988805294036865, "learning_rate": 1.5391117258337177e-05, "log_odds_chosen": 11.244706153869629, "log_odds_ratio": -6.236710760276765e-05, "logits/chosen": -0.35564902424812317, "logits/rejected": -0.4153813123703003, "logps/chosen": -9.814107761485502e-05, "logps/rejected": -1.9542515277862549, "loss": 0.3629, "nll_loss": 0.09071110188961029, "rewards/accuracies": 1.0, "rewards/chosen": -9.814107215788681e-06, "rewards/margins": 0.19541534781455994, "rewards/rejected": -0.1954251527786255, "step": 10454 }, { "epoch": 7.230290456431535, "grad_norm": 4.210373878479004, "learning_rate": 1.5387275242047026e-05, "log_odds_chosen": 10.481241226196289, "log_odds_ratio": -5.844100087415427e-05, "logits/chosen": -0.6087998151779175, "logits/rejected": -0.8502819538116455, "logps/chosen": -0.0001757505815476179, "logps/rejected": -1.623529076576233, "loss": 0.5721, "nll_loss": 0.14301741123199463, "rewards/accuracies": 1.0, "rewards/chosen": -1.7575059246155433e-05, "rewards/margins": 0.1623353511095047, "rewards/rejected": -0.16235291957855225, "step": 10455 }, { "epoch": 7.230982019363762, "grad_norm": 2.9738030433654785, "learning_rate": 1.5383433225756875e-05, "log_odds_chosen": 11.003986358642578, "log_odds_ratio": -3.15298602799885e-05, "logits/chosen": -0.4080902338027954, "logits/rejected": -0.5776853561401367, "logps/chosen": -0.000146781763760373, "logps/rejected": -1.7721868753433228, "loss": 0.3357, "nll_loss": 0.08393213152885437, "rewards/accuracies": 1.0, "rewards/chosen": -1.467817673983518e-05, "rewards/margins": 0.17720401287078857, "rewards/rejected": -0.17721869051456451, "step": 10456 }, { "epoch": 7.231673582295989, "grad_norm": 6.296378135681152, "learning_rate": 1.5379591209466728e-05, "log_odds_chosen": 10.507634162902832, "log_odds_ratio": -9.133493585977703e-05, "logits/chosen": -0.7208746671676636, "logits/rejected": -0.7388486862182617, "logps/chosen": -0.00014481779362540692, "logps/rejected": -1.6705474853515625, "loss": 0.4038, "nll_loss": 0.10094369947910309, "rewards/accuracies": 1.0, "rewards/chosen": -1.4481780453934334e-05, "rewards/margins": 0.16704027354717255, "rewards/rejected": -0.16705477237701416, "step": 10457 }, { "epoch": 7.232365145228216, "grad_norm": 5.793529033660889, "learning_rate": 1.537574919317658e-05, "log_odds_chosen": 11.204610824584961, "log_odds_ratio": -0.0009517016005702317, "logits/chosen": -0.3365629315376282, "logits/rejected": -0.429565966129303, "logps/chosen": -0.0028755757957696915, "logps/rejected": -2.9955363273620605, "loss": 0.4887, "nll_loss": 0.1220720037817955, "rewards/accuracies": 1.0, "rewards/chosen": -0.00028755763196386397, "rewards/margins": 0.29926609992980957, "rewards/rejected": -0.29955363273620605, "step": 10458 }, { "epoch": 7.233056708160443, "grad_norm": 5.168619155883789, "learning_rate": 1.537190717688643e-05, "log_odds_chosen": 9.76969051361084, "log_odds_ratio": -0.00230429507791996, "logits/chosen": -0.21838542819023132, "logits/rejected": -0.32139796018600464, "logps/chosen": -0.001311866450123489, "logps/rejected": -1.8827883005142212, "loss": 1.0604, "nll_loss": 0.2648812532424927, "rewards/accuracies": 1.0, "rewards/chosen": -0.00013118665083311498, "rewards/margins": 0.1881476491689682, "rewards/rejected": -0.18827882409095764, "step": 10459 }, { "epoch": 7.2337482710926695, "grad_norm": 13.341404914855957, "learning_rate": 1.536806516059628e-05, "log_odds_chosen": 10.69163703918457, "log_odds_ratio": -4.546945638139732e-05, "logits/chosen": -0.3286779820919037, "logits/rejected": -0.35568225383758545, "logps/chosen": -0.0001345030905213207, "logps/rejected": -1.6156198978424072, "loss": 0.5042, "nll_loss": 0.1260339319705963, "rewards/accuracies": 1.0, "rewards/chosen": -1.3450309779727831e-05, "rewards/margins": 0.16154852509498596, "rewards/rejected": -0.1615619957447052, "step": 10460 }, { "epoch": 7.234439834024896, "grad_norm": 4.455132484436035, "learning_rate": 1.5364223144306134e-05, "log_odds_chosen": 11.63259506225586, "log_odds_ratio": -2.2229838577914052e-05, "logits/chosen": -0.44185471534729004, "logits/rejected": -0.5651582479476929, "logps/chosen": -0.0002310091513209045, "logps/rejected": -2.6355438232421875, "loss": 0.4199, "nll_loss": 0.10497577488422394, "rewards/accuracies": 1.0, "rewards/chosen": -2.310091622348409e-05, "rewards/margins": 0.26353126764297485, "rewards/rejected": -0.2635543942451477, "step": 10461 }, { "epoch": 7.235131396957123, "grad_norm": 16.563955307006836, "learning_rate": 1.5360381128015983e-05, "log_odds_chosen": 11.065947532653809, "log_odds_ratio": -6.139009929029271e-05, "logits/chosen": -0.4617714583873749, "logits/rejected": -0.40665292739868164, "logps/chosen": -0.00022873807756695896, "logps/rejected": -2.4485700130462646, "loss": 0.4601, "nll_loss": 0.11501865088939667, "rewards/accuracies": 1.0, "rewards/chosen": -2.287380993948318e-05, "rewards/margins": 0.24483412504196167, "rewards/rejected": -0.24485701322555542, "step": 10462 }, { "epoch": 7.23582295988935, "grad_norm": 3.3635497093200684, "learning_rate": 1.5356539111725836e-05, "log_odds_chosen": 11.090447425842285, "log_odds_ratio": -0.00017930346075445414, "logits/chosen": -0.838020920753479, "logits/rejected": -0.7805964946746826, "logps/chosen": -0.00020828915876336396, "logps/rejected": -2.308910846710205, "loss": 0.4053, "nll_loss": 0.10130521655082703, "rewards/accuracies": 1.0, "rewards/chosen": -2.0828916603932157e-05, "rewards/margins": 0.23087026178836823, "rewards/rejected": -0.23089109361171722, "step": 10463 }, { "epoch": 7.236514522821577, "grad_norm": 6.092136383056641, "learning_rate": 1.5352697095435685e-05, "log_odds_chosen": 11.38381576538086, "log_odds_ratio": -4.869890835834667e-05, "logits/chosen": -0.4995681941509247, "logits/rejected": -0.4679161310195923, "logps/chosen": -0.00020764560031238943, "logps/rejected": -2.872692584991455, "loss": 0.4099, "nll_loss": 0.10247980803251266, "rewards/accuracies": 1.0, "rewards/chosen": -2.0764560758834705e-05, "rewards/margins": 0.28724849224090576, "rewards/rejected": -0.28726926445961, "step": 10464 }, { "epoch": 7.237206085753804, "grad_norm": 4.699680805206299, "learning_rate": 1.5348855079145537e-05, "log_odds_chosen": 11.292537689208984, "log_odds_ratio": -1.5159775102802087e-05, "logits/chosen": -0.16589859127998352, "logits/rejected": -0.3281225562095642, "logps/chosen": -0.00021162032498978078, "logps/rejected": -2.7325103282928467, "loss": 0.6188, "nll_loss": 0.15469348430633545, "rewards/accuracies": 1.0, "rewards/chosen": -2.116203359037172e-05, "rewards/margins": 0.2732298970222473, "rewards/rejected": -0.2732510566711426, "step": 10465 }, { "epoch": 7.2378976486860305, "grad_norm": 7.134780406951904, "learning_rate": 1.5345013062855386e-05, "log_odds_chosen": 11.710221290588379, "log_odds_ratio": -1.791205068002455e-05, "logits/chosen": -0.27478334307670593, "logits/rejected": -0.3175143003463745, "logps/chosen": -0.00016071861318778247, "logps/rejected": -2.8377864360809326, "loss": 0.436, "nll_loss": 0.10900319367647171, "rewards/accuracies": 1.0, "rewards/chosen": -1.6071860954980366e-05, "rewards/margins": 0.2837625741958618, "rewards/rejected": -0.2837786376476288, "step": 10466 }, { "epoch": 7.238589211618257, "grad_norm": 3.875941753387451, "learning_rate": 1.534117104656524e-05, "log_odds_chosen": 11.938088417053223, "log_odds_ratio": -1.886241625470575e-05, "logits/chosen": -0.29315173625946045, "logits/rejected": -0.31250953674316406, "logps/chosen": -0.0001133958576247096, "logps/rejected": -2.6138150691986084, "loss": 0.4277, "nll_loss": 0.10692528635263443, "rewards/accuracies": 1.0, "rewards/chosen": -1.133958539867308e-05, "rewards/margins": 0.2613701820373535, "rewards/rejected": -0.26138150691986084, "step": 10467 }, { "epoch": 7.239280774550484, "grad_norm": 4.762474060058594, "learning_rate": 1.5337329030275088e-05, "log_odds_chosen": 11.086641311645508, "log_odds_ratio": -3.06558795273304e-05, "logits/chosen": -0.21213717758655548, "logits/rejected": -0.4254568815231323, "logps/chosen": -0.00031671352917328477, "logps/rejected": -2.2601804733276367, "loss": 0.431, "nll_loss": 0.10773838311433792, "rewards/accuracies": 1.0, "rewards/chosen": -3.1671352189732715e-05, "rewards/margins": 0.22598636150360107, "rewards/rejected": -0.2260180562734604, "step": 10468 }, { "epoch": 7.239972337482711, "grad_norm": 7.895349979400635, "learning_rate": 1.533348701398494e-05, "log_odds_chosen": 10.315384864807129, "log_odds_ratio": -0.002297512488439679, "logits/chosen": -0.5239083766937256, "logits/rejected": -0.5624470710754395, "logps/chosen": -0.0013429216342046857, "logps/rejected": -2.5408456325531006, "loss": 0.5067, "nll_loss": 0.12644195556640625, "rewards/accuracies": 1.0, "rewards/chosen": -0.00013429216051008552, "rewards/margins": 0.253950297832489, "rewards/rejected": -0.25408458709716797, "step": 10469 }, { "epoch": 7.240663900414938, "grad_norm": 5.463350296020508, "learning_rate": 1.5329644997694793e-05, "log_odds_chosen": 10.608156204223633, "log_odds_ratio": -0.00014082054258324206, "logits/chosen": -0.29368531703948975, "logits/rejected": -0.4282805919647217, "logps/chosen": -0.0004581251123454422, "logps/rejected": -2.505800724029541, "loss": 0.5768, "nll_loss": 0.14418385922908783, "rewards/accuracies": 1.0, "rewards/chosen": -4.5812510506948456e-05, "rewards/margins": 0.2505342662334442, "rewards/rejected": -0.2505800724029541, "step": 10470 }, { "epoch": 7.241355463347165, "grad_norm": 9.05333137512207, "learning_rate": 1.532580298140464e-05, "log_odds_chosen": 10.077249526977539, "log_odds_ratio": -0.00021240493515506387, "logits/chosen": -0.28883975744247437, "logits/rejected": -0.37934327125549316, "logps/chosen": -0.0005463613197207451, "logps/rejected": -2.040273666381836, "loss": 0.8706, "nll_loss": 0.2176218032836914, "rewards/accuracies": 1.0, "rewards/chosen": -5.463613342726603e-05, "rewards/margins": 0.203972727060318, "rewards/rejected": -0.20402735471725464, "step": 10471 }, { "epoch": 7.2420470262793915, "grad_norm": 6.9070143699646, "learning_rate": 1.5321960965114494e-05, "log_odds_chosen": 11.244699478149414, "log_odds_ratio": -2.8428865334717557e-05, "logits/chosen": -0.5642852783203125, "logits/rejected": -0.6891046166419983, "logps/chosen": -0.0001826708175940439, "logps/rejected": -2.136019706726074, "loss": 0.5984, "nll_loss": 0.14959505200386047, "rewards/accuracies": 1.0, "rewards/chosen": -1.826708103180863e-05, "rewards/margins": 0.21358370780944824, "rewards/rejected": -0.2136019766330719, "step": 10472 }, { "epoch": 7.242738589211618, "grad_norm": 4.121352672576904, "learning_rate": 1.5318118948824346e-05, "log_odds_chosen": 11.082643508911133, "log_odds_ratio": -3.54972762579564e-05, "logits/chosen": -0.3325987458229065, "logits/rejected": -0.3006635904312134, "logps/chosen": -0.00017654309340287, "logps/rejected": -2.044565200805664, "loss": 0.5264, "nll_loss": 0.131588876247406, "rewards/accuracies": 1.0, "rewards/chosen": -1.765430897648912e-05, "rewards/margins": 0.20443886518478394, "rewards/rejected": -0.20445653796195984, "step": 10473 }, { "epoch": 7.243430152143845, "grad_norm": 4.520073890686035, "learning_rate": 1.5314276932534196e-05, "log_odds_chosen": 10.439888000488281, "log_odds_ratio": -0.0002236421569250524, "logits/chosen": 0.012142367660999298, "logits/rejected": -0.011481313034892082, "logps/chosen": -0.0012220973148941994, "logps/rejected": -2.2247068881988525, "loss": 0.5939, "nll_loss": 0.1484624147415161, "rewards/accuracies": 1.0, "rewards/chosen": -0.00012220973439980298, "rewards/margins": 0.22234848141670227, "rewards/rejected": -0.2224707007408142, "step": 10474 }, { "epoch": 7.244121715076072, "grad_norm": 6.4784932136535645, "learning_rate": 1.5310434916244045e-05, "log_odds_chosen": 9.859674453735352, "log_odds_ratio": -0.00020394177408888936, "logits/chosen": -0.17349502444267273, "logits/rejected": -0.2399659901857376, "logps/chosen": -0.000377682619728148, "logps/rejected": -1.8223941326141357, "loss": 0.4168, "nll_loss": 0.10418784618377686, "rewards/accuracies": 1.0, "rewards/chosen": -3.776826270041056e-05, "rewards/margins": 0.18220163881778717, "rewards/rejected": -0.18223941326141357, "step": 10475 }, { "epoch": 7.244813278008299, "grad_norm": 19.220417022705078, "learning_rate": 1.5306592899953897e-05, "log_odds_chosen": 11.271221160888672, "log_odds_ratio": -3.5153836506651714e-05, "logits/chosen": 0.07185645401477814, "logits/rejected": 0.009163126349449158, "logps/chosen": -0.0005185899208299816, "logps/rejected": -2.475466012954712, "loss": 0.6362, "nll_loss": 0.15904828906059265, "rewards/accuracies": 1.0, "rewards/chosen": -5.1858991355402395e-05, "rewards/margins": 0.24749475717544556, "rewards/rejected": -0.24754659831523895, "step": 10476 }, { "epoch": 7.245504840940526, "grad_norm": 6.962161540985107, "learning_rate": 1.5302750883663746e-05, "log_odds_chosen": 10.97325611114502, "log_odds_ratio": -6.409084744518623e-05, "logits/chosen": -0.58690345287323, "logits/rejected": -0.6452951431274414, "logps/chosen": -0.0001535381597932428, "logps/rejected": -1.999860167503357, "loss": 0.7868, "nll_loss": 0.1966904252767563, "rewards/accuracies": 1.0, "rewards/chosen": -1.5353816706920043e-05, "rewards/margins": 0.19997066259384155, "rewards/rejected": -0.1999860256910324, "step": 10477 }, { "epoch": 7.246196403872752, "grad_norm": 11.236804962158203, "learning_rate": 1.52989088673736e-05, "log_odds_chosen": 10.579336166381836, "log_odds_ratio": -9.932727698469535e-05, "logits/chosen": -0.5695323348045349, "logits/rejected": -0.42794185876846313, "logps/chosen": -0.0003462682943791151, "logps/rejected": -2.0027308464050293, "loss": 0.503, "nll_loss": 0.12573137879371643, "rewards/accuracies": 1.0, "rewards/chosen": -3.462682798271999e-05, "rewards/margins": 0.20023846626281738, "rewards/rejected": -0.20027309656143188, "step": 10478 }, { "epoch": 7.246887966804979, "grad_norm": 4.607297897338867, "learning_rate": 1.529506685108345e-05, "log_odds_chosen": 11.616013526916504, "log_odds_ratio": -3.5801112971967086e-05, "logits/chosen": -0.08392917364835739, "logits/rejected": -0.20223979651927948, "logps/chosen": -0.0002970542118418962, "logps/rejected": -2.897155284881592, "loss": 0.8331, "nll_loss": 0.2082727998495102, "rewards/accuracies": 1.0, "rewards/chosen": -2.9705421184189618e-05, "rewards/margins": 0.28968581557273865, "rewards/rejected": -0.2897155284881592, "step": 10479 }, { "epoch": 7.247579529737206, "grad_norm": 8.609513282775879, "learning_rate": 1.52912248347933e-05, "log_odds_chosen": 11.287090301513672, "log_odds_ratio": -0.0002003059780690819, "logits/chosen": -0.4144344925880432, "logits/rejected": -0.5291805267333984, "logps/chosen": -0.0007323965546675026, "logps/rejected": -2.5458168983459473, "loss": 1.0414, "nll_loss": 0.2603290379047394, "rewards/accuracies": 1.0, "rewards/chosen": -7.323966565309092e-05, "rewards/margins": 0.25450846552848816, "rewards/rejected": -0.2545816898345947, "step": 10480 }, { "epoch": 7.248271092669433, "grad_norm": 4.7390265464782715, "learning_rate": 1.5287382818503152e-05, "log_odds_chosen": 12.203584671020508, "log_odds_ratio": -5.612453242065385e-05, "logits/chosen": 0.10204656422138214, "logits/rejected": 0.06456445157527924, "logps/chosen": -0.00033078165142796934, "logps/rejected": -3.4384679794311523, "loss": 0.5263, "nll_loss": 0.13156820833683014, "rewards/accuracies": 1.0, "rewards/chosen": -3.3078165870392695e-05, "rewards/margins": 0.3438137173652649, "rewards/rejected": -0.34384679794311523, "step": 10481 }, { "epoch": 7.24896265560166, "grad_norm": 5.3882293701171875, "learning_rate": 1.5283540802213005e-05, "log_odds_chosen": 11.932504653930664, "log_odds_ratio": -9.521067113382742e-06, "logits/chosen": 0.12056075036525726, "logits/rejected": 0.016441553831100464, "logps/chosen": -0.00011636118142632768, "logps/rejected": -2.58870267868042, "loss": 0.5869, "nll_loss": 0.14673256874084473, "rewards/accuracies": 1.0, "rewards/chosen": -1.1636117960733827e-05, "rewards/margins": 0.2588586211204529, "rewards/rejected": -0.2588702440261841, "step": 10482 }, { "epoch": 7.249654218533887, "grad_norm": 5.149341583251953, "learning_rate": 1.5279698785922854e-05, "log_odds_chosen": 11.435346603393555, "log_odds_ratio": -1.5145962606766261e-05, "logits/chosen": -0.11249607056379318, "logits/rejected": -0.2753619849681854, "logps/chosen": -0.00019512552535161376, "logps/rejected": -2.530555248260498, "loss": 0.5121, "nll_loss": 0.1280229389667511, "rewards/accuracies": 1.0, "rewards/chosen": -1.95125539903529e-05, "rewards/margins": 0.2530360221862793, "rewards/rejected": -0.25305551290512085, "step": 10483 }, { "epoch": 7.250345781466113, "grad_norm": 4.259470462799072, "learning_rate": 1.5275856769632703e-05, "log_odds_chosen": 11.782159805297852, "log_odds_ratio": -1.5393632565974258e-05, "logits/chosen": -0.25803565979003906, "logits/rejected": -0.2909121811389923, "logps/chosen": -0.00011526994785526767, "logps/rejected": -2.5579113960266113, "loss": 0.563, "nll_loss": 0.14076074957847595, "rewards/accuracies": 1.0, "rewards/chosen": -1.1526994057931006e-05, "rewards/margins": 0.2557796239852905, "rewards/rejected": -0.25579115748405457, "step": 10484 }, { "epoch": 7.25103734439834, "grad_norm": 4.723344802856445, "learning_rate": 1.5272014753342555e-05, "log_odds_chosen": 12.08088493347168, "log_odds_ratio": -1.5762172552058473e-05, "logits/chosen": -0.12385115772485733, "logits/rejected": -0.1922972947359085, "logps/chosen": -0.00016547783161513507, "logps/rejected": -3.349936008453369, "loss": 0.4411, "nll_loss": 0.11026619374752045, "rewards/accuracies": 1.0, "rewards/chosen": -1.6547783161513507e-05, "rewards/margins": 0.33497709035873413, "rewards/rejected": -0.3349936306476593, "step": 10485 }, { "epoch": 7.251728907330567, "grad_norm": 5.340590000152588, "learning_rate": 1.5268172737052405e-05, "log_odds_chosen": 11.269617080688477, "log_odds_ratio": -3.2030522561399266e-05, "logits/chosen": -0.3441758155822754, "logits/rejected": -0.45444193482398987, "logps/chosen": -8.907563460525125e-05, "logps/rejected": -1.851801872253418, "loss": 0.5742, "nll_loss": 0.14355482161045074, "rewards/accuracies": 1.0, "rewards/chosen": -8.907563824323006e-06, "rewards/margins": 0.18517126142978668, "rewards/rejected": -0.1851801872253418, "step": 10486 }, { "epoch": 7.252420470262794, "grad_norm": 3.9872963428497314, "learning_rate": 1.5264330720762257e-05, "log_odds_chosen": 10.931245803833008, "log_odds_ratio": -3.074966662097722e-05, "logits/chosen": -0.290107786655426, "logits/rejected": -0.27709782123565674, "logps/chosen": -0.00021908615599386394, "logps/rejected": -2.241764545440674, "loss": 0.5079, "nll_loss": 0.12695977091789246, "rewards/accuracies": 1.0, "rewards/chosen": -2.1908617782173678e-05, "rewards/margins": 0.22415456175804138, "rewards/rejected": -0.22417645156383514, "step": 10487 }, { "epoch": 7.253112033195021, "grad_norm": 5.664169788360596, "learning_rate": 1.526048870447211e-05, "log_odds_chosen": 10.62841510772705, "log_odds_ratio": -0.00015170712140388787, "logits/chosen": -0.543666422367096, "logits/rejected": -0.5873074531555176, "logps/chosen": -0.0002989826025441289, "logps/rejected": -1.8184638023376465, "loss": 0.5249, "nll_loss": 0.13120988011360168, "rewards/accuracies": 1.0, "rewards/chosen": -2.9898259526817128e-05, "rewards/margins": 0.1818164885044098, "rewards/rejected": -0.18184638023376465, "step": 10488 }, { "epoch": 7.253803596127248, "grad_norm": 9.838672637939453, "learning_rate": 1.5256646688181958e-05, "log_odds_chosen": 10.980676651000977, "log_odds_ratio": -5.6112156016752124e-05, "logits/chosen": -0.6420059204101562, "logits/rejected": -0.6900030374526978, "logps/chosen": -0.00023247089120559394, "logps/rejected": -2.4337525367736816, "loss": 1.0151, "nll_loss": 0.2537762522697449, "rewards/accuracies": 1.0, "rewards/chosen": -2.324709203094244e-05, "rewards/margins": 0.243352010846138, "rewards/rejected": -0.2433752715587616, "step": 10489 }, { "epoch": 7.254495159059474, "grad_norm": 5.095361709594727, "learning_rate": 1.525280467189181e-05, "log_odds_chosen": 10.622602462768555, "log_odds_ratio": -6.363199645420536e-05, "logits/chosen": -0.27274662256240845, "logits/rejected": -0.2679747939109802, "logps/chosen": -0.00013507320545613766, "logps/rejected": -1.8644970655441284, "loss": 0.4659, "nll_loss": 0.1164785847067833, "rewards/accuracies": 1.0, "rewards/chosen": -1.3507320545613766e-05, "rewards/margins": 0.18643620610237122, "rewards/rejected": -0.18644970655441284, "step": 10490 }, { "epoch": 7.255186721991701, "grad_norm": 5.322432041168213, "learning_rate": 1.5248962655601662e-05, "log_odds_chosen": 11.213184356689453, "log_odds_ratio": -6.134158320492133e-05, "logits/chosen": -0.17560338973999023, "logits/rejected": -0.2747710943222046, "logps/chosen": -0.00028960593044757843, "logps/rejected": -2.5317702293395996, "loss": 0.67, "nll_loss": 0.167500302195549, "rewards/accuracies": 1.0, "rewards/chosen": -2.8960590498172678e-05, "rewards/margins": 0.25314807891845703, "rewards/rejected": -0.25317704677581787, "step": 10491 }, { "epoch": 7.255878284923928, "grad_norm": 3.760510206222534, "learning_rate": 1.524512063931151e-05, "log_odds_chosen": 10.296430587768555, "log_odds_ratio": -0.0005208022193983197, "logits/chosen": -0.9254645109176636, "logits/rejected": -0.9133840799331665, "logps/chosen": -0.00017214936087839305, "logps/rejected": -1.6661992073059082, "loss": 0.4374, "nll_loss": 0.10929840058088303, "rewards/accuracies": 1.0, "rewards/chosen": -1.7214935724041425e-05, "rewards/margins": 0.1666027009487152, "rewards/rejected": -0.1666199266910553, "step": 10492 }, { "epoch": 7.256569847856155, "grad_norm": 10.162874221801758, "learning_rate": 1.5241278623021363e-05, "log_odds_chosen": 10.922866821289062, "log_odds_ratio": -0.0003495032142382115, "logits/chosen": -0.776273250579834, "logits/rejected": -0.8349599838256836, "logps/chosen": -0.00038618946564383805, "logps/rejected": -2.118861675262451, "loss": 0.5614, "nll_loss": 0.14032500982284546, "rewards/accuracies": 1.0, "rewards/chosen": -3.861894947476685e-05, "rewards/margins": 0.21184757351875305, "rewards/rejected": -0.21188616752624512, "step": 10493 }, { "epoch": 7.257261410788382, "grad_norm": 4.913707256317139, "learning_rate": 1.5237436606731214e-05, "log_odds_chosen": 10.938773155212402, "log_odds_ratio": -5.736919047194533e-05, "logits/chosen": -0.5350974798202515, "logits/rejected": -0.47295793890953064, "logps/chosen": -0.00023437832714989781, "logps/rejected": -2.0633440017700195, "loss": 0.4701, "nll_loss": 0.1175263375043869, "rewards/accuracies": 1.0, "rewards/chosen": -2.3437833078787662e-05, "rewards/margins": 0.20631097257137299, "rewards/rejected": -0.2063344120979309, "step": 10494 }, { "epoch": 7.2579529737206085, "grad_norm": 4.5175347328186035, "learning_rate": 1.5233594590441063e-05, "log_odds_chosen": 10.719084739685059, "log_odds_ratio": -8.76936610438861e-05, "logits/chosen": -0.2563233971595764, "logits/rejected": -0.3417225778102875, "logps/chosen": -0.0004405237559694797, "logps/rejected": -2.187819242477417, "loss": 0.5478, "nll_loss": 0.13694101572036743, "rewards/accuracies": 1.0, "rewards/chosen": -4.405237268656492e-05, "rewards/margins": 0.2187378704547882, "rewards/rejected": -0.21878191828727722, "step": 10495 }, { "epoch": 7.258644536652835, "grad_norm": 5.704215049743652, "learning_rate": 1.5229752574150915e-05, "log_odds_chosen": 11.154399871826172, "log_odds_ratio": -2.9882006856496446e-05, "logits/chosen": -0.5726063251495361, "logits/rejected": -0.5620173811912537, "logps/chosen": -0.0001177690428448841, "logps/rejected": -2.1058263778686523, "loss": 0.5194, "nll_loss": 0.12984028458595276, "rewards/accuracies": 1.0, "rewards/chosen": -1.1776905012084171e-05, "rewards/margins": 0.21057087182998657, "rewards/rejected": -0.2105826437473297, "step": 10496 }, { "epoch": 7.259336099585062, "grad_norm": 3.9949491024017334, "learning_rate": 1.5225910557860764e-05, "log_odds_chosen": 11.0726318359375, "log_odds_ratio": -2.9816317692166194e-05, "logits/chosen": -0.23565047979354858, "logits/rejected": -0.2983737885951996, "logps/chosen": -0.00022566900588572025, "logps/rejected": -2.5271129608154297, "loss": 0.509, "nll_loss": 0.12725451588630676, "rewards/accuracies": 1.0, "rewards/chosen": -2.2566900952369906e-05, "rewards/margins": 0.2526887357234955, "rewards/rejected": -0.25271129608154297, "step": 10497 }, { "epoch": 7.260027662517289, "grad_norm": 5.695704460144043, "learning_rate": 1.5222068541570617e-05, "log_odds_chosen": 10.686333656311035, "log_odds_ratio": -0.0001333652762696147, "logits/chosen": -0.23131543397903442, "logits/rejected": -0.2706787884235382, "logps/chosen": -0.00022404955234378576, "logps/rejected": -1.6549627780914307, "loss": 0.5849, "nll_loss": 0.14620620012283325, "rewards/accuracies": 1.0, "rewards/chosen": -2.2404954506782815e-05, "rewards/margins": 0.16547387838363647, "rewards/rejected": -0.16549627482891083, "step": 10498 }, { "epoch": 7.260719225449516, "grad_norm": 4.58342981338501, "learning_rate": 1.5218226525280468e-05, "log_odds_chosen": 11.475317001342773, "log_odds_ratio": -1.632480234547984e-05, "logits/chosen": -0.41076338291168213, "logits/rejected": -0.3890606760978699, "logps/chosen": -8.86881971382536e-05, "logps/rejected": -2.0317766666412354, "loss": 0.3491, "nll_loss": 0.08727437257766724, "rewards/accuracies": 1.0, "rewards/chosen": -8.868820259522181e-06, "rewards/margins": 0.2031688094139099, "rewards/rejected": -0.20317767560482025, "step": 10499 }, { "epoch": 7.261410788381743, "grad_norm": 6.624739170074463, "learning_rate": 1.5214384508990317e-05, "log_odds_chosen": 11.000941276550293, "log_odds_ratio": -0.00010065599053632468, "logits/chosen": -0.5458360314369202, "logits/rejected": -0.49721595644950867, "logps/chosen": -0.00041760189924389124, "logps/rejected": -2.0315442085266113, "loss": 0.345, "nll_loss": 0.08623616397380829, "rewards/accuracies": 1.0, "rewards/chosen": -4.176019137958065e-05, "rewards/margins": 0.20311269164085388, "rewards/rejected": -0.20315444469451904, "step": 10500 }, { "epoch": 7.2621023513139695, "grad_norm": 3.732039451599121, "learning_rate": 1.521054249270017e-05, "log_odds_chosen": 10.946099281311035, "log_odds_ratio": -9.804220462683588e-05, "logits/chosen": -0.41156861186027527, "logits/rejected": -0.5109293460845947, "logps/chosen": -0.00017111182387452573, "logps/rejected": -2.3970518112182617, "loss": 0.4056, "nll_loss": 0.10140102356672287, "rewards/accuracies": 1.0, "rewards/chosen": -1.7111182387452573e-05, "rewards/margins": 0.23968809843063354, "rewards/rejected": -0.23970520496368408, "step": 10501 }, { "epoch": 7.262793914246196, "grad_norm": 8.999588012695312, "learning_rate": 1.5206700476410022e-05, "log_odds_chosen": 9.33828353881836, "log_odds_ratio": -0.0001583740668138489, "logits/chosen": -0.19364869594573975, "logits/rejected": -0.2710861563682556, "logps/chosen": -0.0010852512205019593, "logps/rejected": -1.7289215326309204, "loss": 0.7447, "nll_loss": 0.18614733219146729, "rewards/accuracies": 1.0, "rewards/chosen": -0.00010852512787096202, "rewards/margins": 0.17278361320495605, "rewards/rejected": -0.17289213836193085, "step": 10502 }, { "epoch": 7.263485477178423, "grad_norm": 6.641200542449951, "learning_rate": 1.520285846011987e-05, "log_odds_chosen": 10.233026504516602, "log_odds_ratio": -6.110913818702102e-05, "logits/chosen": 0.1292034536600113, "logits/rejected": 0.021028682589530945, "logps/chosen": -0.0006737616495229304, "logps/rejected": -2.5986855030059814, "loss": 1.0676, "nll_loss": 0.26689720153808594, "rewards/accuracies": 1.0, "rewards/chosen": -6.737616786267608e-05, "rewards/margins": 0.2598011791706085, "rewards/rejected": -0.2598685622215271, "step": 10503 }, { "epoch": 7.26417704011065, "grad_norm": 10.292189598083496, "learning_rate": 1.5199016443829721e-05, "log_odds_chosen": 10.901208877563477, "log_odds_ratio": -0.00015122054901439697, "logits/chosen": -0.44021257758140564, "logits/rejected": -0.42914289236068726, "logps/chosen": -0.0005100009148009121, "logps/rejected": -2.387458324432373, "loss": 0.4385, "nll_loss": 0.10961472988128662, "rewards/accuracies": 1.0, "rewards/chosen": -5.100009002489969e-05, "rewards/margins": 0.23869486153125763, "rewards/rejected": -0.23874585330486298, "step": 10504 }, { "epoch": 7.264868603042877, "grad_norm": 11.866669654846191, "learning_rate": 1.5195174427539574e-05, "log_odds_chosen": 11.281862258911133, "log_odds_ratio": -1.964960028999485e-05, "logits/chosen": -0.24127569794654846, "logits/rejected": -0.247794970870018, "logps/chosen": -0.0001290692889597267, "logps/rejected": -2.2144856452941895, "loss": 0.4925, "nll_loss": 0.12313254177570343, "rewards/accuracies": 1.0, "rewards/chosen": -1.2906928532174788e-05, "rewards/margins": 0.22143566608428955, "rewards/rejected": -0.22144857048988342, "step": 10505 }, { "epoch": 7.265560165975104, "grad_norm": 5.618996620178223, "learning_rate": 1.5191332411249423e-05, "log_odds_chosen": 11.821170806884766, "log_odds_ratio": -3.0051140129216947e-05, "logits/chosen": 0.040791213512420654, "logits/rejected": 0.0787191092967987, "logps/chosen": -0.00021927639318164438, "logps/rejected": -2.905123233795166, "loss": 0.469, "nll_loss": 0.1172391027212143, "rewards/accuracies": 1.0, "rewards/chosen": -2.192764077335596e-05, "rewards/margins": 0.29049041867256165, "rewards/rejected": -0.2905123233795166, "step": 10506 }, { "epoch": 7.2662517289073305, "grad_norm": 5.204725742340088, "learning_rate": 1.5187490394959275e-05, "log_odds_chosen": 10.920637130737305, "log_odds_ratio": -7.086249388521537e-05, "logits/chosen": 0.2980443835258484, "logits/rejected": 0.26738932728767395, "logps/chosen": -0.00040924924542196095, "logps/rejected": -2.965292453765869, "loss": 0.597, "nll_loss": 0.14924757182598114, "rewards/accuracies": 1.0, "rewards/chosen": -4.0924929635366425e-05, "rewards/margins": 0.29648828506469727, "rewards/rejected": -0.29652923345565796, "step": 10507 }, { "epoch": 7.266943291839557, "grad_norm": 5.361124515533447, "learning_rate": 1.5183648378669126e-05, "log_odds_chosen": 10.659852981567383, "log_odds_ratio": -6.801797280786559e-05, "logits/chosen": -0.5049962997436523, "logits/rejected": -0.5538925528526306, "logps/chosen": -0.0001713060773909092, "logps/rejected": -1.7066850662231445, "loss": 0.3816, "nll_loss": 0.09539598226547241, "rewards/accuracies": 1.0, "rewards/chosen": -1.713060737529304e-05, "rewards/margins": 0.1706513613462448, "rewards/rejected": -0.17066851258277893, "step": 10508 }, { "epoch": 7.267634854771784, "grad_norm": 6.787872314453125, "learning_rate": 1.5179806362378977e-05, "log_odds_chosen": 11.107709884643555, "log_odds_ratio": -5.516607780009508e-05, "logits/chosen": -0.046600092202425, "logits/rejected": -0.07911308854818344, "logps/chosen": -0.0001958270586328581, "logps/rejected": -2.434133529663086, "loss": 0.4259, "nll_loss": 0.10647544264793396, "rewards/accuracies": 1.0, "rewards/chosen": -1.9582706954679452e-05, "rewards/margins": 0.24339377880096436, "rewards/rejected": -0.24341335892677307, "step": 10509 }, { "epoch": 7.268326417704011, "grad_norm": 5.733765125274658, "learning_rate": 1.5175964346088828e-05, "log_odds_chosen": 11.248653411865234, "log_odds_ratio": -6.074179327697493e-05, "logits/chosen": -0.26920583844184875, "logits/rejected": -0.3057137429714203, "logps/chosen": -0.00018125417409464717, "logps/rejected": -2.5340723991394043, "loss": 0.5202, "nll_loss": 0.13004840910434723, "rewards/accuracies": 1.0, "rewards/chosen": -1.812541813706048e-05, "rewards/margins": 0.2533891201019287, "rewards/rejected": -0.25340723991394043, "step": 10510 }, { "epoch": 7.269017980636238, "grad_norm": 3.1367990970611572, "learning_rate": 1.517212232979868e-05, "log_odds_chosen": 10.54247760772705, "log_odds_ratio": -6.820161797804758e-05, "logits/chosen": -0.06475108861923218, "logits/rejected": -0.1413227915763855, "logps/chosen": -0.00020324383513070643, "logps/rejected": -1.9654295444488525, "loss": 0.6948, "nll_loss": 0.1737029105424881, "rewards/accuracies": 1.0, "rewards/chosen": -2.0324383513070643e-05, "rewards/margins": 0.19652262330055237, "rewards/rejected": -0.19654296338558197, "step": 10511 }, { "epoch": 7.269709543568465, "grad_norm": 2.6129097938537598, "learning_rate": 1.5168280313508529e-05, "log_odds_chosen": 10.622481346130371, "log_odds_ratio": -0.00013337060227058828, "logits/chosen": -0.3024970293045044, "logits/rejected": -0.3406152129173279, "logps/chosen": -0.00026366510428488255, "logps/rejected": -1.9767258167266846, "loss": 0.232, "nll_loss": 0.05797753483057022, "rewards/accuracies": 1.0, "rewards/chosen": -2.63665133388713e-05, "rewards/margins": 0.19764623045921326, "rewards/rejected": -0.19767259061336517, "step": 10512 }, { "epoch": 7.2704011065006915, "grad_norm": 5.719183444976807, "learning_rate": 1.5164438297218382e-05, "log_odds_chosen": 11.262063026428223, "log_odds_ratio": -4.1249000787502155e-05, "logits/chosen": -0.06983301043510437, "logits/rejected": 0.006098955869674683, "logps/chosen": -0.0002281243505422026, "logps/rejected": -2.2929091453552246, "loss": 0.5736, "nll_loss": 0.14339330792427063, "rewards/accuracies": 1.0, "rewards/chosen": -2.2812433599028736e-05, "rewards/margins": 0.2292681187391281, "rewards/rejected": -0.2292909324169159, "step": 10513 }, { "epoch": 7.271092669432918, "grad_norm": 4.460439205169678, "learning_rate": 1.5160596280928232e-05, "log_odds_chosen": 11.027078628540039, "log_odds_ratio": -6.842397124273703e-05, "logits/chosen": -0.41829079389572144, "logits/rejected": -0.5007650852203369, "logps/chosen": -0.00018773046031128615, "logps/rejected": -2.3771162033081055, "loss": 0.5184, "nll_loss": 0.1295936107635498, "rewards/accuracies": 1.0, "rewards/chosen": -1.8773047486320138e-05, "rewards/margins": 0.23769287765026093, "rewards/rejected": -0.23771163821220398, "step": 10514 }, { "epoch": 7.271784232365145, "grad_norm": 12.794469833374023, "learning_rate": 1.5156754264638081e-05, "log_odds_chosen": 10.991874694824219, "log_odds_ratio": -4.0517210436519235e-05, "logits/chosen": -0.9412696957588196, "logits/rejected": -1.0342063903808594, "logps/chosen": -0.00036933840601705015, "logps/rejected": -2.2551140785217285, "loss": 0.5949, "nll_loss": 0.14871898293495178, "rewards/accuracies": 1.0, "rewards/chosen": -3.6933841329300776e-05, "rewards/margins": 0.22547447681427002, "rewards/rejected": -0.22551141679286957, "step": 10515 }, { "epoch": 7.272475795297372, "grad_norm": 5.832699298858643, "learning_rate": 1.5152912248347934e-05, "log_odds_chosen": 10.880440711975098, "log_odds_ratio": -4.107558561372571e-05, "logits/chosen": -0.5598329901695251, "logits/rejected": -0.466844379901886, "logps/chosen": -0.00018433656077831984, "logps/rejected": -2.386929750442505, "loss": 0.341, "nll_loss": 0.08524862676858902, "rewards/accuracies": 1.0, "rewards/chosen": -1.8433656805427745e-05, "rewards/margins": 0.23867452144622803, "rewards/rejected": -0.238692969083786, "step": 10516 }, { "epoch": 7.273167358229599, "grad_norm": 12.49798583984375, "learning_rate": 1.5149070232057786e-05, "log_odds_chosen": 10.499706268310547, "log_odds_ratio": -0.0005382996168918908, "logits/chosen": -0.7100361585617065, "logits/rejected": -0.7095794677734375, "logps/chosen": -0.003856180002912879, "logps/rejected": -2.9049072265625, "loss": 0.8193, "nll_loss": 0.20477049052715302, "rewards/accuracies": 1.0, "rewards/chosen": -0.0003856180119328201, "rewards/margins": 0.29010510444641113, "rewards/rejected": -0.2904907166957855, "step": 10517 }, { "epoch": 7.273858921161826, "grad_norm": 5.052276611328125, "learning_rate": 1.5145228215767635e-05, "log_odds_chosen": 11.47298812866211, "log_odds_ratio": -4.129732042201795e-05, "logits/chosen": 0.3368604779243469, "logits/rejected": 0.18388760089874268, "logps/chosen": -0.00019135003094561398, "logps/rejected": -2.2947165966033936, "loss": 0.4334, "nll_loss": 0.10833790153265, "rewards/accuracies": 1.0, "rewards/chosen": -1.913500454975292e-05, "rewards/margins": 0.22945252060890198, "rewards/rejected": -0.22947168350219727, "step": 10518 }, { "epoch": 7.2745504840940525, "grad_norm": 3.533688545227051, "learning_rate": 1.5141386199477486e-05, "log_odds_chosen": 11.837002754211426, "log_odds_ratio": -1.9109105778625235e-05, "logits/chosen": -0.4327254295349121, "logits/rejected": -0.5700982809066772, "logps/chosen": -8.316531602758914e-05, "logps/rejected": -2.10473370552063, "loss": 0.4574, "nll_loss": 0.11434361338615417, "rewards/accuracies": 1.0, "rewards/chosen": -8.316531420859974e-06, "rewards/margins": 0.21046505868434906, "rewards/rejected": -0.21047338843345642, "step": 10519 }, { "epoch": 7.275242047026279, "grad_norm": 7.7128777503967285, "learning_rate": 1.5137544183187338e-05, "log_odds_chosen": 11.128162384033203, "log_odds_ratio": -4.5072018110658973e-05, "logits/chosen": -0.04674968123435974, "logits/rejected": -0.19485291838645935, "logps/chosen": -0.00018412625649943948, "logps/rejected": -2.3100948333740234, "loss": 0.6638, "nll_loss": 0.16594311594963074, "rewards/accuracies": 1.0, "rewards/chosen": -1.8412625649943948e-05, "rewards/margins": 0.23099108040332794, "rewards/rejected": -0.23100949823856354, "step": 10520 }, { "epoch": 7.275933609958506, "grad_norm": 5.247791767120361, "learning_rate": 1.5133702166897188e-05, "log_odds_chosen": 11.184106826782227, "log_odds_ratio": -0.0001805050706025213, "logits/chosen": -0.3185228705406189, "logits/rejected": -0.2991114556789398, "logps/chosen": -0.000440732081187889, "logps/rejected": -3.0187864303588867, "loss": 0.4653, "nll_loss": 0.11631738394498825, "rewards/accuracies": 1.0, "rewards/chosen": -4.4073207391193137e-05, "rewards/margins": 0.3018345832824707, "rewards/rejected": -0.3018786907196045, "step": 10521 }, { "epoch": 7.276625172890733, "grad_norm": 3.7616748809814453, "learning_rate": 1.512986015060704e-05, "log_odds_chosen": 10.252514839172363, "log_odds_ratio": -0.00017552207282278687, "logits/chosen": -0.2603698968887329, "logits/rejected": -0.37053489685058594, "logps/chosen": -0.0002522366994526237, "logps/rejected": -1.5280945301055908, "loss": 0.3446, "nll_loss": 0.08612124621868134, "rewards/accuracies": 1.0, "rewards/chosen": -2.52236732194433e-05, "rewards/margins": 0.15278422832489014, "rewards/rejected": -0.15280945599079132, "step": 10522 }, { "epoch": 7.27731673582296, "grad_norm": 5.105588436126709, "learning_rate": 1.512601813431689e-05, "log_odds_chosen": 10.866778373718262, "log_odds_ratio": -5.1680850447155535e-05, "logits/chosen": -0.3906993865966797, "logits/rejected": -0.3765483498573303, "logps/chosen": -0.00013929187844041735, "logps/rejected": -2.2346510887145996, "loss": 0.3493, "nll_loss": 0.0873219296336174, "rewards/accuracies": 1.0, "rewards/chosen": -1.3929187844041735e-05, "rewards/margins": 0.22345119714736938, "rewards/rejected": -0.22346511483192444, "step": 10523 }, { "epoch": 7.278008298755187, "grad_norm": 5.647239685058594, "learning_rate": 1.512217611802674e-05, "log_odds_chosen": 11.40186882019043, "log_odds_ratio": -3.86638603231404e-05, "logits/chosen": -0.25338584184646606, "logits/rejected": -0.3367050588130951, "logps/chosen": -0.00011416596680646762, "logps/rejected": -2.170494318008423, "loss": 0.4294, "nll_loss": 0.10734062641859055, "rewards/accuracies": 1.0, "rewards/chosen": -1.1416597772040404e-05, "rewards/margins": 0.21703803539276123, "rewards/rejected": -0.21704944968223572, "step": 10524 }, { "epoch": 7.2786998616874135, "grad_norm": 5.01591682434082, "learning_rate": 1.5118334101736592e-05, "log_odds_chosen": 11.633666038513184, "log_odds_ratio": -1.4664670743513852e-05, "logits/chosen": -0.44926968216896057, "logits/rejected": -0.561745285987854, "logps/chosen": -0.00010955406469292939, "logps/rejected": -2.233703136444092, "loss": 0.4516, "nll_loss": 0.11290092021226883, "rewards/accuracies": 1.0, "rewards/chosen": -1.0955405741697177e-05, "rewards/margins": 0.2233593761920929, "rewards/rejected": -0.22337032854557037, "step": 10525 }, { "epoch": 7.27939142461964, "grad_norm": 3.686668872833252, "learning_rate": 1.5114492085446445e-05, "log_odds_chosen": 10.510477066040039, "log_odds_ratio": -7.26221696822904e-05, "logits/chosen": -0.1770956963300705, "logits/rejected": -0.18479953706264496, "logps/chosen": -0.00028913721325807273, "logps/rejected": -1.9379236698150635, "loss": 0.661, "nll_loss": 0.16525492072105408, "rewards/accuracies": 1.0, "rewards/chosen": -2.8913722417200916e-05, "rewards/margins": 0.19376343488693237, "rewards/rejected": -0.19379234313964844, "step": 10526 }, { "epoch": 7.280082987551867, "grad_norm": 7.128070831298828, "learning_rate": 1.5110650069156294e-05, "log_odds_chosen": 10.678375244140625, "log_odds_ratio": -0.00012848350161220878, "logits/chosen": -0.1631132960319519, "logits/rejected": -0.17706161737442017, "logps/chosen": -0.0004405647632665932, "logps/rejected": -2.2365002632141113, "loss": 0.4725, "nll_loss": 0.11811183393001556, "rewards/accuracies": 1.0, "rewards/chosen": -4.405647632665932e-05, "rewards/margins": 0.2236059606075287, "rewards/rejected": -0.2236500382423401, "step": 10527 }, { "epoch": 7.280774550484094, "grad_norm": 5.585725784301758, "learning_rate": 1.5106808052866144e-05, "log_odds_chosen": 11.369913101196289, "log_odds_ratio": -3.4828488423954695e-05, "logits/chosen": -0.02725176513195038, "logits/rejected": -0.12657952308654785, "logps/chosen": -0.00021642667707055807, "logps/rejected": -2.4218692779541016, "loss": 0.595, "nll_loss": 0.14873793721199036, "rewards/accuracies": 1.0, "rewards/chosen": -2.164266879844945e-05, "rewards/margins": 0.24216529726982117, "rewards/rejected": -0.24218693375587463, "step": 10528 }, { "epoch": 7.281466113416321, "grad_norm": 5.846073627471924, "learning_rate": 1.5102966036575997e-05, "log_odds_chosen": 11.152416229248047, "log_odds_ratio": -0.00011589920904953033, "logits/chosen": -0.1479973942041397, "logits/rejected": -0.24840444326400757, "logps/chosen": -0.00025501666823402047, "logps/rejected": -2.243656635284424, "loss": 0.5941, "nll_loss": 0.14852215349674225, "rewards/accuracies": 1.0, "rewards/chosen": -2.5501665732008405e-05, "rewards/margins": 0.22434015572071075, "rewards/rejected": -0.22436565160751343, "step": 10529 }, { "epoch": 7.282157676348548, "grad_norm": 5.144644737243652, "learning_rate": 1.5099124020285846e-05, "log_odds_chosen": 9.871131896972656, "log_odds_ratio": -0.0003432652447372675, "logits/chosen": -0.4323476254940033, "logits/rejected": -0.432583749294281, "logps/chosen": -0.0003030068473890424, "logps/rejected": -1.5213305950164795, "loss": 0.4964, "nll_loss": 0.12406061589717865, "rewards/accuracies": 1.0, "rewards/chosen": -3.0300687285489403e-05, "rewards/margins": 0.1521027386188507, "rewards/rejected": -0.152133047580719, "step": 10530 }, { "epoch": 7.282849239280774, "grad_norm": 3.782452344894409, "learning_rate": 1.5095282003995698e-05, "log_odds_chosen": 11.423676490783691, "log_odds_ratio": -5.270875044516288e-05, "logits/chosen": 0.17047113180160522, "logits/rejected": 0.1552511751651764, "logps/chosen": -0.00021822653070557863, "logps/rejected": -2.7009034156799316, "loss": 0.4165, "nll_loss": 0.10411202162504196, "rewards/accuracies": 1.0, "rewards/chosen": -2.1822654161951505e-05, "rewards/margins": 0.27006852626800537, "rewards/rejected": -0.27009034156799316, "step": 10531 }, { "epoch": 7.283540802213001, "grad_norm": 7.957913875579834, "learning_rate": 1.509143998770555e-05, "log_odds_chosen": 10.679388999938965, "log_odds_ratio": -5.0163958803750575e-05, "logits/chosen": -0.2738475203514099, "logits/rejected": -0.2702501714229584, "logps/chosen": -0.0001700377179076895, "logps/rejected": -1.7659885883331299, "loss": 0.582, "nll_loss": 0.14549194276332855, "rewards/accuracies": 1.0, "rewards/chosen": -1.7003772882162593e-05, "rewards/margins": 0.17658185958862305, "rewards/rejected": -0.17659887671470642, "step": 10532 }, { "epoch": 7.284232365145228, "grad_norm": 5.639610290527344, "learning_rate": 1.5087597971415398e-05, "log_odds_chosen": 10.85079288482666, "log_odds_ratio": -5.8406731113791466e-05, "logits/chosen": -0.4756738245487213, "logits/rejected": -0.5125161409378052, "logps/chosen": -0.00016177997167687863, "logps/rejected": -1.9398754835128784, "loss": 0.5036, "nll_loss": 0.12588562071323395, "rewards/accuracies": 1.0, "rewards/chosen": -1.6177997167687863e-05, "rewards/margins": 0.19397136569023132, "rewards/rejected": -0.19398756325244904, "step": 10533 }, { "epoch": 7.284923928077455, "grad_norm": 4.74798583984375, "learning_rate": 1.508375595512525e-05, "log_odds_chosen": 11.382217407226562, "log_odds_ratio": -0.00015084307233337313, "logits/chosen": -0.2914195656776428, "logits/rejected": -0.2960367798805237, "logps/chosen": -0.00023178594710770994, "logps/rejected": -2.4456207752227783, "loss": 0.4181, "nll_loss": 0.10451382398605347, "rewards/accuracies": 1.0, "rewards/chosen": -2.3178594346973114e-05, "rewards/margins": 0.24453890323638916, "rewards/rejected": -0.2445620745420456, "step": 10534 }, { "epoch": 7.285615491009682, "grad_norm": 6.910724639892578, "learning_rate": 1.5079913938835103e-05, "log_odds_chosen": 11.61507797241211, "log_odds_ratio": -2.5585079129086807e-05, "logits/chosen": -0.07478839159011841, "logits/rejected": -0.11948782950639725, "logps/chosen": -0.000139494746690616, "logps/rejected": -2.8178374767303467, "loss": 0.4718, "nll_loss": 0.1179506704211235, "rewards/accuracies": 1.0, "rewards/chosen": -1.394947412336478e-05, "rewards/margins": 0.2817698121070862, "rewards/rejected": -0.2817837595939636, "step": 10535 }, { "epoch": 7.286307053941909, "grad_norm": 7.961095809936523, "learning_rate": 1.5076071922544952e-05, "log_odds_chosen": 10.0950345993042, "log_odds_ratio": -0.00036910828202962875, "logits/chosen": -0.15306870639324188, "logits/rejected": -0.12899163365364075, "logps/chosen": -0.0006089457892812788, "logps/rejected": -1.624821424484253, "loss": 0.4165, "nll_loss": 0.10408499836921692, "rewards/accuracies": 1.0, "rewards/chosen": -6.089458111091517e-05, "rewards/margins": 0.16242125630378723, "rewards/rejected": -0.1624821424484253, "step": 10536 }, { "epoch": 7.286998616874135, "grad_norm": 3.866593837738037, "learning_rate": 1.5072229906254803e-05, "log_odds_chosen": 11.116514205932617, "log_odds_ratio": -0.00020502627012319863, "logits/chosen": -0.7989804744720459, "logits/rejected": -0.7939543724060059, "logps/chosen": -0.0004685473977588117, "logps/rejected": -2.6888623237609863, "loss": 0.4112, "nll_loss": 0.10278967022895813, "rewards/accuracies": 1.0, "rewards/chosen": -4.685474050347693e-05, "rewards/margins": 0.26883938908576965, "rewards/rejected": -0.2688862383365631, "step": 10537 }, { "epoch": 7.287690179806362, "grad_norm": 3.782735586166382, "learning_rate": 1.5068387889964655e-05, "log_odds_chosen": 10.658288955688477, "log_odds_ratio": -0.0001427416573278606, "logits/chosen": -0.2742254436016083, "logits/rejected": -0.25285041332244873, "logps/chosen": -0.0005352857406251132, "logps/rejected": -2.232027053833008, "loss": 0.486, "nll_loss": 0.12149648368358612, "rewards/accuracies": 1.0, "rewards/chosen": -5.35285726073198e-05, "rewards/margins": 0.22314918041229248, "rewards/rejected": -0.22320270538330078, "step": 10538 }, { "epoch": 7.288381742738589, "grad_norm": 10.33079719543457, "learning_rate": 1.5064545873674504e-05, "log_odds_chosen": 11.432767868041992, "log_odds_ratio": -1.866390448412858e-05, "logits/chosen": -0.4904142916202545, "logits/rejected": -0.46854597330093384, "logps/chosen": -9.947362559614703e-05, "logps/rejected": -2.114985466003418, "loss": 0.5173, "nll_loss": 0.1293186992406845, "rewards/accuracies": 1.0, "rewards/chosen": -9.947362741513643e-06, "rewards/margins": 0.21148860454559326, "rewards/rejected": -0.21149854362010956, "step": 10539 }, { "epoch": 7.289073305670816, "grad_norm": 6.884215831756592, "learning_rate": 1.5060703857384357e-05, "log_odds_chosen": 11.117326736450195, "log_odds_ratio": -2.461438998579979e-05, "logits/chosen": -0.20820686221122742, "logits/rejected": -0.2118438482284546, "logps/chosen": -0.0001501316437497735, "logps/rejected": -2.3482232093811035, "loss": 0.6159, "nll_loss": 0.1539812982082367, "rewards/accuracies": 1.0, "rewards/chosen": -1.5013165466370992e-05, "rewards/margins": 0.2348073124885559, "rewards/rejected": -0.2348223179578781, "step": 10540 }, { "epoch": 7.289764868603043, "grad_norm": 5.243474960327148, "learning_rate": 1.5056861841094208e-05, "log_odds_chosen": 10.785823822021484, "log_odds_ratio": -6.247861165320501e-05, "logits/chosen": -0.42967790365219116, "logits/rejected": -0.3784506618976593, "logps/chosen": -0.0003760996332857758, "logps/rejected": -2.1447207927703857, "loss": 0.4956, "nll_loss": 0.12390033900737762, "rewards/accuracies": 1.0, "rewards/chosen": -3.760996332857758e-05, "rewards/margins": 0.21443447470664978, "rewards/rejected": -0.21447208523750305, "step": 10541 }, { "epoch": 7.29045643153527, "grad_norm": 3.6636292934417725, "learning_rate": 1.5053019824804057e-05, "log_odds_chosen": 10.340880393981934, "log_odds_ratio": -0.00022311796783469617, "logits/chosen": -0.2471560537815094, "logits/rejected": -0.27003979682922363, "logps/chosen": -0.0003549442917574197, "logps/rejected": -2.0278170108795166, "loss": 0.5427, "nll_loss": 0.13564293086528778, "rewards/accuracies": 1.0, "rewards/chosen": -3.549442772055045e-05, "rewards/margins": 0.20274618268013, "rewards/rejected": -0.20278167724609375, "step": 10542 }, { "epoch": 7.291147994467496, "grad_norm": 4.385778427124023, "learning_rate": 1.5049177808513909e-05, "log_odds_chosen": 10.510884284973145, "log_odds_ratio": -0.0002553297090344131, "logits/chosen": -0.03860250860452652, "logits/rejected": -0.07355962693691254, "logps/chosen": -0.0007388624362647533, "logps/rejected": -2.116535186767578, "loss": 0.5062, "nll_loss": 0.12653449177742004, "rewards/accuracies": 1.0, "rewards/chosen": -7.388624362647533e-05, "rewards/margins": 0.21157963573932648, "rewards/rejected": -0.21165351569652557, "step": 10543 }, { "epoch": 7.291839557399723, "grad_norm": 3.9589853286743164, "learning_rate": 1.5045335792223762e-05, "log_odds_chosen": 11.206121444702148, "log_odds_ratio": -8.039772365009412e-05, "logits/chosen": -0.4851464033126831, "logits/rejected": -0.4680294394493103, "logps/chosen": -0.0015334711642935872, "logps/rejected": -2.6300063133239746, "loss": 0.4228, "nll_loss": 0.10569702833890915, "rewards/accuracies": 1.0, "rewards/chosen": -0.0001533471222501248, "rewards/margins": 0.26284730434417725, "rewards/rejected": -0.26300063729286194, "step": 10544 }, { "epoch": 7.29253112033195, "grad_norm": 8.450545310974121, "learning_rate": 1.504149377593361e-05, "log_odds_chosen": 10.956088066101074, "log_odds_ratio": -3.171690696035512e-05, "logits/chosen": -0.48458877205848694, "logits/rejected": -0.5116901397705078, "logps/chosen": -0.0001754190307110548, "logps/rejected": -2.214137077331543, "loss": 0.5659, "nll_loss": 0.14148221909999847, "rewards/accuracies": 1.0, "rewards/chosen": -1.7541900888318196e-05, "rewards/margins": 0.22139616310596466, "rewards/rejected": -0.22141368687152863, "step": 10545 }, { "epoch": 7.293222683264177, "grad_norm": 3.563842296600342, "learning_rate": 1.5037651759643461e-05, "log_odds_chosen": 10.58224868774414, "log_odds_ratio": -2.9876087864977308e-05, "logits/chosen": -0.21687503159046173, "logits/rejected": -0.3506883680820465, "logps/chosen": -0.00013978789502289146, "logps/rejected": -1.6868329048156738, "loss": 0.4411, "nll_loss": 0.11027465760707855, "rewards/accuracies": 1.0, "rewards/chosen": -1.3978789866087027e-05, "rewards/margins": 0.16866931319236755, "rewards/rejected": -0.16868329048156738, "step": 10546 }, { "epoch": 7.293914246196404, "grad_norm": 7.864605903625488, "learning_rate": 1.5033809743353314e-05, "log_odds_chosen": 10.986456871032715, "log_odds_ratio": -3.460807056399062e-05, "logits/chosen": -0.27556610107421875, "logits/rejected": -0.277159720659256, "logps/chosen": -0.0001903708907775581, "logps/rejected": -2.378696918487549, "loss": 0.4942, "nll_loss": 0.1235458254814148, "rewards/accuracies": 1.0, "rewards/chosen": -1.9037088350160047e-05, "rewards/margins": 0.23785068094730377, "rewards/rejected": -0.23786970973014832, "step": 10547 }, { "epoch": 7.2946058091286305, "grad_norm": 6.782561779022217, "learning_rate": 1.5029967727063163e-05, "log_odds_chosen": 10.94840145111084, "log_odds_ratio": -2.8345846658339724e-05, "logits/chosen": -0.2652517557144165, "logits/rejected": -0.3147188425064087, "logps/chosen": -0.001249704509973526, "logps/rejected": -2.646296501159668, "loss": 0.3847, "nll_loss": 0.09618037939071655, "rewards/accuracies": 1.0, "rewards/chosen": -0.0001249704509973526, "rewards/margins": 0.26450470089912415, "rewards/rejected": -0.26462966203689575, "step": 10548 }, { "epoch": 7.295297372060857, "grad_norm": 12.487119674682617, "learning_rate": 1.5026125710773015e-05, "log_odds_chosen": 11.011866569519043, "log_odds_ratio": -3.6507648474071175e-05, "logits/chosen": -0.6930927038192749, "logits/rejected": -0.7526741623878479, "logps/chosen": -0.00017669968656264246, "logps/rejected": -2.299051284790039, "loss": 0.4072, "nll_loss": 0.10179228335618973, "rewards/accuracies": 1.0, "rewards/chosen": -1.7669968656264246e-05, "rewards/margins": 0.229887455701828, "rewards/rejected": -0.2299051433801651, "step": 10549 }, { "epoch": 7.295988934993084, "grad_norm": 6.350553035736084, "learning_rate": 1.5022283694482866e-05, "log_odds_chosen": 11.08919620513916, "log_odds_ratio": -5.873236295883544e-05, "logits/chosen": -0.28959664702415466, "logits/rejected": -0.30707934498786926, "logps/chosen": -0.0004931816947646439, "logps/rejected": -2.704843759536743, "loss": 0.4858, "nll_loss": 0.12144720554351807, "rewards/accuracies": 1.0, "rewards/chosen": -4.9318172386847436e-05, "rewards/margins": 0.27043506503105164, "rewards/rejected": -0.27048438787460327, "step": 10550 }, { "epoch": 7.296680497925311, "grad_norm": 5.712388038635254, "learning_rate": 1.5018441678192715e-05, "log_odds_chosen": 11.027812957763672, "log_odds_ratio": -6.843291339464486e-05, "logits/chosen": -0.6185637712478638, "logits/rejected": -0.6799907684326172, "logps/chosen": -0.0002997862466145307, "logps/rejected": -2.2454919815063477, "loss": 0.5096, "nll_loss": 0.12738478183746338, "rewards/accuracies": 1.0, "rewards/chosen": -2.997862611664459e-05, "rewards/margins": 0.22451920807361603, "rewards/rejected": -0.22454921901226044, "step": 10551 }, { "epoch": 7.297372060857538, "grad_norm": 5.3984222412109375, "learning_rate": 1.5014599661902568e-05, "log_odds_chosen": 10.676874160766602, "log_odds_ratio": -7.660678238607943e-05, "logits/chosen": -0.5980682373046875, "logits/rejected": -0.7237865924835205, "logps/chosen": -0.0003695039777085185, "logps/rejected": -2.3640284538269043, "loss": 0.424, "nll_loss": 0.10600431263446808, "rewards/accuracies": 1.0, "rewards/chosen": -3.695040140883066e-05, "rewards/margins": 0.2363658845424652, "rewards/rejected": -0.23640283942222595, "step": 10552 }, { "epoch": 7.298063623789765, "grad_norm": 5.493939399719238, "learning_rate": 1.501075764561242e-05, "log_odds_chosen": 10.982223510742188, "log_odds_ratio": -5.2999013860244304e-05, "logits/chosen": -0.49189019203186035, "logits/rejected": -0.5520755648612976, "logps/chosen": -0.000196084423805587, "logps/rejected": -2.34517765045166, "loss": 1.0095, "nll_loss": 0.2523600459098816, "rewards/accuracies": 1.0, "rewards/chosen": -1.9608443835750222e-05, "rewards/margins": 0.23449814319610596, "rewards/rejected": -0.23451778292655945, "step": 10553 }, { "epoch": 7.2987551867219915, "grad_norm": 8.220337867736816, "learning_rate": 1.5006915629322269e-05, "log_odds_chosen": 9.907423973083496, "log_odds_ratio": -0.00023567386961076409, "logits/chosen": -0.43401268124580383, "logits/rejected": -0.4864483177661896, "logps/chosen": -0.0004938674974255264, "logps/rejected": -2.051173686981201, "loss": 0.7212, "nll_loss": 0.1802859604358673, "rewards/accuracies": 1.0, "rewards/chosen": -4.9386748287361115e-05, "rewards/margins": 0.2050679624080658, "rewards/rejected": -0.2051173746585846, "step": 10554 }, { "epoch": 7.299446749654218, "grad_norm": 8.015789031982422, "learning_rate": 1.500307361303212e-05, "log_odds_chosen": 10.712055206298828, "log_odds_ratio": -3.786015076912008e-05, "logits/chosen": -0.363398015499115, "logits/rejected": -0.5355995893478394, "logps/chosen": -0.0001271214132430032, "logps/rejected": -1.8779047727584839, "loss": 0.9029, "nll_loss": 0.22571158409118652, "rewards/accuracies": 1.0, "rewards/chosen": -1.2712141142401379e-05, "rewards/margins": 0.1877777874469757, "rewards/rejected": -0.18779048323631287, "step": 10555 }, { "epoch": 7.300138312586445, "grad_norm": 5.6773905754089355, "learning_rate": 1.4999231596741972e-05, "log_odds_chosen": 12.072305679321289, "log_odds_ratio": -9.910212611430325e-06, "logits/chosen": -0.5058807730674744, "logits/rejected": -0.5540425777435303, "logps/chosen": -0.0003676057094708085, "logps/rejected": -3.347212791442871, "loss": 0.5019, "nll_loss": 0.12547825276851654, "rewards/accuracies": 1.0, "rewards/chosen": -3.6760568036697805e-05, "rewards/margins": 0.33468449115753174, "rewards/rejected": -0.33472126722335815, "step": 10556 }, { "epoch": 7.300829875518672, "grad_norm": 5.691203594207764, "learning_rate": 1.4995389580451821e-05, "log_odds_chosen": 9.261990547180176, "log_odds_ratio": -0.1274312138557434, "logits/chosen": -0.5108420848846436, "logits/rejected": -0.5427403450012207, "logps/chosen": -0.01616716757416725, "logps/rejected": -1.4746990203857422, "loss": 0.4642, "nll_loss": 0.10331307351589203, "rewards/accuracies": 0.875, "rewards/chosen": -0.001616716617718339, "rewards/margins": 0.145853191614151, "rewards/rejected": -0.1474699079990387, "step": 10557 }, { "epoch": 7.301521438450899, "grad_norm": 6.426586627960205, "learning_rate": 1.4991547564161674e-05, "log_odds_chosen": 9.163455963134766, "log_odds_ratio": -0.0005875678616575897, "logits/chosen": -0.4957526922225952, "logits/rejected": -0.4884418547153473, "logps/chosen": -0.000682390877045691, "logps/rejected": -1.4966734647750854, "loss": 0.2752, "nll_loss": 0.0687483549118042, "rewards/accuracies": 1.0, "rewards/chosen": -6.823908188380301e-05, "rewards/margins": 0.1495991051197052, "rewards/rejected": -0.14966735243797302, "step": 10558 }, { "epoch": 7.302213001383126, "grad_norm": 6.857354640960693, "learning_rate": 1.4987705547871524e-05, "log_odds_chosen": 10.375746726989746, "log_odds_ratio": -9.528575174044818e-05, "logits/chosen": -0.32422560453414917, "logits/rejected": -0.4233476221561432, "logps/chosen": -0.00015979795716702938, "logps/rejected": -1.7596766948699951, "loss": 0.4444, "nll_loss": 0.11109989881515503, "rewards/accuracies": 1.0, "rewards/chosen": -1.5979794625309296e-05, "rewards/margins": 0.17595168948173523, "rewards/rejected": -0.17596766352653503, "step": 10559 }, { "epoch": 7.3029045643153525, "grad_norm": 5.945572376251221, "learning_rate": 1.4983863531581374e-05, "log_odds_chosen": 11.158388137817383, "log_odds_ratio": -5.777521801064722e-05, "logits/chosen": 0.023258313536643982, "logits/rejected": 0.0119294673204422, "logps/chosen": -0.0002473669301252812, "logps/rejected": -2.618809700012207, "loss": 0.5432, "nll_loss": 0.13579916954040527, "rewards/accuracies": 1.0, "rewards/chosen": -2.47366915573366e-05, "rewards/margins": 0.26185622811317444, "rewards/rejected": -0.2618809640407562, "step": 10560 }, { "epoch": 7.303596127247579, "grad_norm": 13.444534301757812, "learning_rate": 1.4980021515291226e-05, "log_odds_chosen": 10.43269157409668, "log_odds_ratio": -0.00010532377928029746, "logits/chosen": -0.7283393144607544, "logits/rejected": -0.8246185779571533, "logps/chosen": -0.00027138530276715755, "logps/rejected": -2.0439107418060303, "loss": 0.3743, "nll_loss": 0.09355422109365463, "rewards/accuracies": 1.0, "rewards/chosen": -2.7138530640513636e-05, "rewards/margins": 0.20436394214630127, "rewards/rejected": -0.20439106225967407, "step": 10561 }, { "epoch": 7.304287690179806, "grad_norm": 3.034716844558716, "learning_rate": 1.4976179499001078e-05, "log_odds_chosen": 11.042951583862305, "log_odds_ratio": -4.987595821148716e-05, "logits/chosen": -0.3208976089954376, "logits/rejected": -0.383137583732605, "logps/chosen": -0.000426338316174224, "logps/rejected": -2.371851682662964, "loss": 0.44, "nll_loss": 0.10998556017875671, "rewards/accuracies": 1.0, "rewards/chosen": -4.2633833800209686e-05, "rewards/margins": 0.23714253306388855, "rewards/rejected": -0.23718518018722534, "step": 10562 }, { "epoch": 7.304979253112033, "grad_norm": 5.60660457611084, "learning_rate": 1.4972337482710927e-05, "log_odds_chosen": 11.074758529663086, "log_odds_ratio": -7.649294275324792e-05, "logits/chosen": -0.26657915115356445, "logits/rejected": -0.3863256275653839, "logps/chosen": -0.0003114904393441975, "logps/rejected": -2.4336323738098145, "loss": 0.509, "nll_loss": 0.12723150849342346, "rewards/accuracies": 1.0, "rewards/chosen": -3.114904757239856e-05, "rewards/margins": 0.24333205819129944, "rewards/rejected": -0.24336321651935577, "step": 10563 }, { "epoch": 7.30567081604426, "grad_norm": 6.80715274810791, "learning_rate": 1.4968495466420778e-05, "log_odds_chosen": 10.559022903442383, "log_odds_ratio": -0.00011177535634487867, "logits/chosen": -0.3276813328266144, "logits/rejected": -0.44247764348983765, "logps/chosen": -0.00020129492622800171, "logps/rejected": -1.7909127473831177, "loss": 0.3557, "nll_loss": 0.08892100304365158, "rewards/accuracies": 1.0, "rewards/chosen": -2.0129493350395933e-05, "rewards/margins": 0.17907115817070007, "rewards/rejected": -0.17909128963947296, "step": 10564 }, { "epoch": 7.306362378976487, "grad_norm": 4.869068622589111, "learning_rate": 1.4964653450130627e-05, "log_odds_chosen": 11.06036376953125, "log_odds_ratio": -0.00016165415581781417, "logits/chosen": -0.48836928606033325, "logits/rejected": -0.5202836990356445, "logps/chosen": -0.0002001393004320562, "logps/rejected": -2.154864549636841, "loss": 0.5012, "nll_loss": 0.12527495622634888, "rewards/accuracies": 1.0, "rewards/chosen": -2.0013932953588665e-05, "rewards/margins": 0.2154664397239685, "rewards/rejected": -0.21548645198345184, "step": 10565 }, { "epoch": 7.3070539419087135, "grad_norm": 9.951133728027344, "learning_rate": 1.496081143384048e-05, "log_odds_chosen": 12.023983001708984, "log_odds_ratio": -6.627372204093263e-05, "logits/chosen": -0.19025403261184692, "logits/rejected": -0.2676059603691101, "logps/chosen": -0.0003929885569959879, "logps/rejected": -3.4180500507354736, "loss": 0.6028, "nll_loss": 0.1506935954093933, "rewards/accuracies": 1.0, "rewards/chosen": -3.9298858609981835e-05, "rewards/margins": 0.34176570177078247, "rewards/rejected": -0.34180501103401184, "step": 10566 }, { "epoch": 7.30774550484094, "grad_norm": 7.036557674407959, "learning_rate": 1.4956969417550332e-05, "log_odds_chosen": 10.718587875366211, "log_odds_ratio": -9.550920367473736e-05, "logits/chosen": -0.44108590483665466, "logits/rejected": -0.37370753288269043, "logps/chosen": -0.00017977158131543547, "logps/rejected": -1.9299838542938232, "loss": 0.7186, "nll_loss": 0.17963513731956482, "rewards/accuracies": 1.0, "rewards/chosen": -1.7977157767745666e-05, "rewards/margins": 0.19298040866851807, "rewards/rejected": -0.19299837946891785, "step": 10567 }, { "epoch": 7.308437067773167, "grad_norm": 8.872588157653809, "learning_rate": 1.4953127401260181e-05, "log_odds_chosen": 11.814168930053711, "log_odds_ratio": -2.9991559131303802e-05, "logits/chosen": 0.08540153503417969, "logits/rejected": -0.05404900014400482, "logps/chosen": -0.00012598311877809465, "logps/rejected": -2.4559972286224365, "loss": 0.6095, "nll_loss": 0.15238115191459656, "rewards/accuracies": 1.0, "rewards/chosen": -1.2598312423506286e-05, "rewards/margins": 0.24558714032173157, "rewards/rejected": -0.24559973180294037, "step": 10568 }, { "epoch": 7.309128630705394, "grad_norm": 4.762966632843018, "learning_rate": 1.4949285384970032e-05, "log_odds_chosen": 11.357812881469727, "log_odds_ratio": -5.139112181495875e-05, "logits/chosen": 0.04269764572381973, "logits/rejected": -0.12028162181377411, "logps/chosen": -0.00012316771608311683, "logps/rejected": -2.1897315979003906, "loss": 0.4882, "nll_loss": 0.12203666567802429, "rewards/accuracies": 1.0, "rewards/chosen": -1.2316771062614862e-05, "rewards/margins": 0.21896082162857056, "rewards/rejected": -0.21897317469120026, "step": 10569 }, { "epoch": 7.309820193637621, "grad_norm": 2.450059652328491, "learning_rate": 1.4945443368679884e-05, "log_odds_chosen": 10.953805923461914, "log_odds_ratio": -4.3371266656322405e-05, "logits/chosen": -0.635241687297821, "logits/rejected": -0.5566953420639038, "logps/chosen": -0.00017698638839647174, "logps/rejected": -2.116021156311035, "loss": 0.3111, "nll_loss": 0.07777298986911774, "rewards/accuracies": 1.0, "rewards/chosen": -1.7698639567242935e-05, "rewards/margins": 0.2115844190120697, "rewards/rejected": -0.21160215139389038, "step": 10570 }, { "epoch": 7.310511756569848, "grad_norm": 3.795652389526367, "learning_rate": 1.4941601352389733e-05, "log_odds_chosen": 10.717495918273926, "log_odds_ratio": -0.0009428179473616183, "logits/chosen": -0.165732741355896, "logits/rejected": -0.22573032975196838, "logps/chosen": -0.0009718775982037187, "logps/rejected": -2.1593799591064453, "loss": 0.3906, "nll_loss": 0.09756524860858917, "rewards/accuracies": 1.0, "rewards/chosen": -9.718775982037187e-05, "rewards/margins": 0.21584078669548035, "rewards/rejected": -0.21593798696994781, "step": 10571 }, { "epoch": 7.3112033195020745, "grad_norm": 4.54644250869751, "learning_rate": 1.4937759336099586e-05, "log_odds_chosen": 11.126632690429688, "log_odds_ratio": -3.421502697165124e-05, "logits/chosen": -0.4148055911064148, "logits/rejected": -0.5459097623825073, "logps/chosen": -0.00022523957886733115, "logps/rejected": -2.3449649810791016, "loss": 0.3978, "nll_loss": 0.0994362086057663, "rewards/accuracies": 1.0, "rewards/chosen": -2.25239600695204e-05, "rewards/margins": 0.23447395861148834, "rewards/rejected": -0.23449648916721344, "step": 10572 }, { "epoch": 7.311894882434301, "grad_norm": 4.444427967071533, "learning_rate": 1.4933917319809437e-05, "log_odds_chosen": 10.908353805541992, "log_odds_ratio": -2.464960743964184e-05, "logits/chosen": -0.10387469083070755, "logits/rejected": -0.16769036650657654, "logps/chosen": -0.00013196248619351536, "logps/rejected": -1.9175297021865845, "loss": 0.4857, "nll_loss": 0.12142340838909149, "rewards/accuracies": 1.0, "rewards/chosen": -1.3196249710745178e-05, "rewards/margins": 0.19173979759216309, "rewards/rejected": -0.19175297021865845, "step": 10573 }, { "epoch": 7.312586445366528, "grad_norm": 4.917417049407959, "learning_rate": 1.4930075303519286e-05, "log_odds_chosen": 10.515276908874512, "log_odds_ratio": -0.00024476443650200963, "logits/chosen": -0.580511748790741, "logits/rejected": -0.525976300239563, "logps/chosen": -0.001163422828540206, "logps/rejected": -2.875986099243164, "loss": 0.5104, "nll_loss": 0.12757620215415955, "rewards/accuracies": 1.0, "rewards/chosen": -0.00011634228576440364, "rewards/margins": 0.28748229146003723, "rewards/rejected": -0.2875986099243164, "step": 10574 }, { "epoch": 7.313278008298755, "grad_norm": 5.778778553009033, "learning_rate": 1.4926233287229138e-05, "log_odds_chosen": 11.418848037719727, "log_odds_ratio": -2.969609704450704e-05, "logits/chosen": -0.08569745719432831, "logits/rejected": -0.1920081079006195, "logps/chosen": -0.00034481758484616876, "logps/rejected": -3.314943313598633, "loss": 0.5071, "nll_loss": 0.12677092850208282, "rewards/accuracies": 1.0, "rewards/chosen": -3.44817599398084e-05, "rewards/margins": 0.3314598798751831, "rewards/rejected": -0.33149436116218567, "step": 10575 }, { "epoch": 7.313969571230982, "grad_norm": 5.823296070098877, "learning_rate": 1.492239127093899e-05, "log_odds_chosen": 11.454782485961914, "log_odds_ratio": -2.6344980142312124e-05, "logits/chosen": -0.16251061856746674, "logits/rejected": -0.06658054888248444, "logps/chosen": -0.00016875607252586633, "logps/rejected": -2.35142183303833, "loss": 0.5564, "nll_loss": 0.13910400867462158, "rewards/accuracies": 1.0, "rewards/chosen": -1.6875606888788752e-05, "rewards/margins": 0.23512530326843262, "rewards/rejected": -0.23514218628406525, "step": 10576 }, { "epoch": 7.314661134163209, "grad_norm": 3.9457826614379883, "learning_rate": 1.491854925464884e-05, "log_odds_chosen": 9.852853775024414, "log_odds_ratio": -0.00029724909109063447, "logits/chosen": -0.303438663482666, "logits/rejected": -0.3021944761276245, "logps/chosen": -0.0003416346153244376, "logps/rejected": -2.00277042388916, "loss": 0.6604, "nll_loss": 0.16506357491016388, "rewards/accuracies": 1.0, "rewards/chosen": -3.416346226003952e-05, "rewards/margins": 0.20024290680885315, "rewards/rejected": -0.20027706027030945, "step": 10577 }, { "epoch": 7.3153526970954355, "grad_norm": 10.15293025970459, "learning_rate": 1.491470723835869e-05, "log_odds_chosen": 9.349213600158691, "log_odds_ratio": -0.0002637461293488741, "logits/chosen": -0.38515281677246094, "logits/rejected": -0.3794807195663452, "logps/chosen": -0.0004498792113736272, "logps/rejected": -1.5653575658798218, "loss": 0.5365, "nll_loss": 0.13410302996635437, "rewards/accuracies": 1.0, "rewards/chosen": -4.498792259255424e-05, "rewards/margins": 0.1564907729625702, "rewards/rejected": -0.1565357744693756, "step": 10578 }, { "epoch": 7.316044260027662, "grad_norm": 3.717820167541504, "learning_rate": 1.4910865222068543e-05, "log_odds_chosen": 10.480566024780273, "log_odds_ratio": -9.45752690313384e-05, "logits/chosen": -0.3336294889450073, "logits/rejected": -0.38219499588012695, "logps/chosen": -0.0002480067778378725, "logps/rejected": -1.8441063165664673, "loss": 0.4326, "nll_loss": 0.10814318060874939, "rewards/accuracies": 1.0, "rewards/chosen": -2.480068178556394e-05, "rewards/margins": 0.18438583612442017, "rewards/rejected": -0.18441063165664673, "step": 10579 }, { "epoch": 7.316735822959889, "grad_norm": 4.929460048675537, "learning_rate": 1.4907023205778392e-05, "log_odds_chosen": 9.904447555541992, "log_odds_ratio": -0.0005250414833426476, "logits/chosen": -0.24390114843845367, "logits/rejected": -0.2871720492839813, "logps/chosen": -0.0018281986704096198, "logps/rejected": -1.694327473640442, "loss": 0.401, "nll_loss": 0.10020306706428528, "rewards/accuracies": 1.0, "rewards/chosen": -0.0001828198874136433, "rewards/margins": 0.16924992203712463, "rewards/rejected": -0.16943272948265076, "step": 10580 }, { "epoch": 7.317427385892116, "grad_norm": 4.24871301651001, "learning_rate": 1.4903181189488244e-05, "log_odds_chosen": 9.435662269592285, "log_odds_ratio": -0.00046115496661514044, "logits/chosen": -0.2529515027999878, "logits/rejected": -0.2180534303188324, "logps/chosen": -0.0009722278919070959, "logps/rejected": -1.756400227546692, "loss": 0.5971, "nll_loss": 0.14923420548439026, "rewards/accuracies": 1.0, "rewards/chosen": -9.722278628032655e-05, "rewards/margins": 0.17554278671741486, "rewards/rejected": -0.1756400167942047, "step": 10581 }, { "epoch": 7.318118948824343, "grad_norm": 12.383216857910156, "learning_rate": 1.4899339173198095e-05, "log_odds_chosen": 11.341604232788086, "log_odds_ratio": -9.020642755785957e-05, "logits/chosen": -0.24451106786727905, "logits/rejected": -0.37076500058174133, "logps/chosen": -0.000519920198712498, "logps/rejected": -2.194770097732544, "loss": 0.576, "nll_loss": 0.1440034657716751, "rewards/accuracies": 1.0, "rewards/chosen": -5.199202132644132e-05, "rewards/margins": 0.2194250226020813, "rewards/rejected": -0.21947701275348663, "step": 10582 }, { "epoch": 7.31881051175657, "grad_norm": 5.260334014892578, "learning_rate": 1.4895497156907944e-05, "log_odds_chosen": 10.190016746520996, "log_odds_ratio": -0.00013451275299303234, "logits/chosen": -0.4095885157585144, "logits/rejected": -0.5194642543792725, "logps/chosen": -0.0005643427721224725, "logps/rejected": -1.9316439628601074, "loss": 0.3941, "nll_loss": 0.09850853681564331, "rewards/accuracies": 1.0, "rewards/chosen": -5.643428085022606e-05, "rewards/margins": 0.1931079626083374, "rewards/rejected": -0.1931644082069397, "step": 10583 }, { "epoch": 7.319502074688796, "grad_norm": 3.2225735187530518, "learning_rate": 1.4891655140617797e-05, "log_odds_chosen": 10.30554485321045, "log_odds_ratio": -0.0003724767011590302, "logits/chosen": -0.6669608950614929, "logits/rejected": -0.5772339105606079, "logps/chosen": -0.00026067826547659934, "logps/rejected": -1.8772404193878174, "loss": 0.6247, "nll_loss": 0.15613077580928802, "rewards/accuracies": 1.0, "rewards/chosen": -2.6067828002851456e-05, "rewards/margins": 0.18769796192646027, "rewards/rejected": -0.1877240240573883, "step": 10584 }, { "epoch": 7.320193637621023, "grad_norm": 5.20518159866333, "learning_rate": 1.4887813124327649e-05, "log_odds_chosen": 10.33845329284668, "log_odds_ratio": -6.268893775995821e-05, "logits/chosen": -0.2971196174621582, "logits/rejected": -0.38388875126838684, "logps/chosen": -0.000514318177010864, "logps/rejected": -2.25443959236145, "loss": 0.4866, "nll_loss": 0.12163745611906052, "rewards/accuracies": 1.0, "rewards/chosen": -5.143181624589488e-05, "rewards/margins": 0.22539252042770386, "rewards/rejected": -0.2254439741373062, "step": 10585 }, { "epoch": 7.32088520055325, "grad_norm": 7.644906044006348, "learning_rate": 1.4883971108037498e-05, "log_odds_chosen": 11.65134334564209, "log_odds_ratio": -0.00015778436500113457, "logits/chosen": -0.14399686455726624, "logits/rejected": -0.23336483538150787, "logps/chosen": -0.00014643429312855005, "logps/rejected": -2.5161826610565186, "loss": 0.4515, "nll_loss": 0.11286191642284393, "rewards/accuracies": 1.0, "rewards/chosen": -1.4643430404248647e-05, "rewards/margins": 0.2516036331653595, "rewards/rejected": -0.25161826610565186, "step": 10586 }, { "epoch": 7.321576763485477, "grad_norm": 5.68562650680542, "learning_rate": 1.4880129091747349e-05, "log_odds_chosen": 10.962752342224121, "log_odds_ratio": -9.493598918197677e-05, "logits/chosen": -0.562821626663208, "logits/rejected": -0.5519170165061951, "logps/chosen": -0.0001463395165046677, "logps/rejected": -2.0623018741607666, "loss": 0.4406, "nll_loss": 0.11013630032539368, "rewards/accuracies": 1.0, "rewards/chosen": -1.463395165046677e-05, "rewards/margins": 0.20621556043624878, "rewards/rejected": -0.20623019337654114, "step": 10587 }, { "epoch": 7.322268326417704, "grad_norm": 8.166015625, "learning_rate": 1.4876287075457201e-05, "log_odds_chosen": 11.878768920898438, "log_odds_ratio": -1.8437514881952666e-05, "logits/chosen": -0.4738050103187561, "logits/rejected": -0.4197632670402527, "logps/chosen": -0.00016072619473561645, "logps/rejected": -2.418221950531006, "loss": 0.4483, "nll_loss": 0.11207035183906555, "rewards/accuracies": 1.0, "rewards/chosen": -1.6072621292551048e-05, "rewards/margins": 0.24180611968040466, "rewards/rejected": -0.24182221293449402, "step": 10588 }, { "epoch": 7.322959889349931, "grad_norm": 3.789522171020508, "learning_rate": 1.487244505916705e-05, "log_odds_chosen": 11.342962265014648, "log_odds_ratio": -0.00010516175098018721, "logits/chosen": -0.014583747833967209, "logits/rejected": 0.013038881123065948, "logps/chosen": -0.00019708505715243518, "logps/rejected": -2.7194290161132812, "loss": 0.5038, "nll_loss": 0.1259286105632782, "rewards/accuracies": 1.0, "rewards/chosen": -1.970850644283928e-05, "rewards/margins": 0.2719232141971588, "rewards/rejected": -0.2719429135322571, "step": 10589 }, { "epoch": 7.323651452282157, "grad_norm": 7.632386207580566, "learning_rate": 1.4868603042876903e-05, "log_odds_chosen": 11.190752029418945, "log_odds_ratio": -4.420655750436708e-05, "logits/chosen": -0.17970700562000275, "logits/rejected": -0.13008803129196167, "logps/chosen": -0.0006894480320625007, "logps/rejected": -2.4720520973205566, "loss": 0.7858, "nll_loss": 0.19643478095531464, "rewards/accuracies": 1.0, "rewards/chosen": -6.894480611663312e-05, "rewards/margins": 0.24713626503944397, "rewards/rejected": -0.2472052276134491, "step": 10590 }, { "epoch": 7.324343015214384, "grad_norm": 4.327857494354248, "learning_rate": 1.4864761026586754e-05, "log_odds_chosen": 11.098254203796387, "log_odds_ratio": -4.739519135910086e-05, "logits/chosen": -0.4026983976364136, "logits/rejected": -0.36631515622138977, "logps/chosen": -0.0002123128215316683, "logps/rejected": -2.294590473175049, "loss": 0.4672, "nll_loss": 0.11679333448410034, "rewards/accuracies": 1.0, "rewards/chosen": -2.1231280697975308e-05, "rewards/margins": 0.22943781316280365, "rewards/rejected": -0.22945904731750488, "step": 10591 }, { "epoch": 7.325034578146611, "grad_norm": 4.4742255210876465, "learning_rate": 1.4860919010296603e-05, "log_odds_chosen": 11.094287872314453, "log_odds_ratio": -3.746439324459061e-05, "logits/chosen": 0.13466286659240723, "logits/rejected": 0.18415427207946777, "logps/chosen": -0.00044445990351960063, "logps/rejected": -2.663757801055908, "loss": 0.4439, "nll_loss": 0.11096180230379105, "rewards/accuracies": 1.0, "rewards/chosen": -4.444599471753463e-05, "rewards/margins": 0.26633134484291077, "rewards/rejected": -0.2663758099079132, "step": 10592 }, { "epoch": 7.325726141078838, "grad_norm": 10.97510814666748, "learning_rate": 1.4857076994006455e-05, "log_odds_chosen": 10.561748504638672, "log_odds_ratio": -6.68507200316526e-05, "logits/chosen": -0.11066879332065582, "logits/rejected": -0.17471560835838318, "logps/chosen": -0.00010785304039018229, "logps/rejected": -1.5898044109344482, "loss": 0.5916, "nll_loss": 0.14789864420890808, "rewards/accuracies": 1.0, "rewards/chosen": -1.0785303857119288e-05, "rewards/margins": 0.15896965563297272, "rewards/rejected": -0.15898045897483826, "step": 10593 }, { "epoch": 7.326417704011065, "grad_norm": 15.438594818115234, "learning_rate": 1.4853234977716307e-05, "log_odds_chosen": 10.610740661621094, "log_odds_ratio": -9.984053031075746e-05, "logits/chosen": -0.08416008949279785, "logits/rejected": -0.13556626439094543, "logps/chosen": -0.0005519052501767874, "logps/rejected": -2.215517044067383, "loss": 0.5154, "nll_loss": 0.12885132431983948, "rewards/accuracies": 1.0, "rewards/chosen": -5.519052501767874e-05, "rewards/margins": 0.22149652242660522, "rewards/rejected": -0.22155171632766724, "step": 10594 }, { "epoch": 7.327109266943292, "grad_norm": 5.175950527191162, "learning_rate": 1.4849392961426157e-05, "log_odds_chosen": 10.522294044494629, "log_odds_ratio": -6.946315988898277e-05, "logits/chosen": -0.042398273944854736, "logits/rejected": -0.09602394700050354, "logps/chosen": -0.00017671106616035104, "logps/rejected": -1.894153356552124, "loss": 0.7866, "nll_loss": 0.19665397703647614, "rewards/accuracies": 1.0, "rewards/chosen": -1.7671107343630865e-05, "rewards/margins": 0.1893976628780365, "rewards/rejected": -0.1894153356552124, "step": 10595 }, { "epoch": 7.327800829875518, "grad_norm": 7.015600204467773, "learning_rate": 1.4845550945136007e-05, "log_odds_chosen": 10.751202583312988, "log_odds_ratio": -0.0008175495895557106, "logits/chosen": -0.42600420117378235, "logits/rejected": -0.48659491539001465, "logps/chosen": -0.00036460478440858424, "logps/rejected": -1.832169532775879, "loss": 0.5799, "nll_loss": 0.14488884806632996, "rewards/accuracies": 1.0, "rewards/chosen": -3.6460478440858424e-05, "rewards/margins": 0.18318049609661102, "rewards/rejected": -0.18321695923805237, "step": 10596 }, { "epoch": 7.328492392807745, "grad_norm": 6.0024566650390625, "learning_rate": 1.484170892884586e-05, "log_odds_chosen": 11.78825569152832, "log_odds_ratio": -1.578919000166934e-05, "logits/chosen": -0.17290112376213074, "logits/rejected": -0.2185828983783722, "logps/chosen": -0.00018440670100972056, "logps/rejected": -2.6440727710723877, "loss": 0.4376, "nll_loss": 0.10940653085708618, "rewards/accuracies": 1.0, "rewards/chosen": -1.8440670828567818e-05, "rewards/margins": 0.2643888294696808, "rewards/rejected": -0.26440727710723877, "step": 10597 }, { "epoch": 7.329183955739972, "grad_norm": 4.82000207901001, "learning_rate": 1.4837866912555709e-05, "log_odds_chosen": 10.55043888092041, "log_odds_ratio": -0.000115828966954723, "logits/chosen": -0.004259809851646423, "logits/rejected": -0.05958893150091171, "logps/chosen": -0.0005333904991857708, "logps/rejected": -2.640122413635254, "loss": 0.4736, "nll_loss": 0.11838662624359131, "rewards/accuracies": 1.0, "rewards/chosen": -5.333905210136436e-05, "rewards/margins": 0.2639588713645935, "rewards/rejected": -0.2640122175216675, "step": 10598 }, { "epoch": 7.329875518672199, "grad_norm": 5.208899021148682, "learning_rate": 1.4834024896265561e-05, "log_odds_chosen": 11.910272598266602, "log_odds_ratio": -1.2390030860842671e-05, "logits/chosen": -0.34170472621917725, "logits/rejected": -0.3993207812309265, "logps/chosen": -0.000160014649736695, "logps/rejected": -2.8986268043518066, "loss": 0.5649, "nll_loss": 0.14122623205184937, "rewards/accuracies": 1.0, "rewards/chosen": -1.6001464246073738e-05, "rewards/margins": 0.2898466885089874, "rewards/rejected": -0.2898626923561096, "step": 10599 }, { "epoch": 7.330567081604426, "grad_norm": 4.279346466064453, "learning_rate": 1.4830182879975412e-05, "log_odds_chosen": 11.154391288757324, "log_odds_ratio": -3.705886047100648e-05, "logits/chosen": 0.053722962737083435, "logits/rejected": -0.03543657436966896, "logps/chosen": -0.00023111490008886904, "logps/rejected": -2.4170563220977783, "loss": 0.3872, "nll_loss": 0.09679973125457764, "rewards/accuracies": 1.0, "rewards/chosen": -2.31114881898975e-05, "rewards/margins": 0.2416825294494629, "rewards/rejected": -0.24170565605163574, "step": 10600 }, { "epoch": 7.3312586445366525, "grad_norm": 6.050769805908203, "learning_rate": 1.4826340863685263e-05, "log_odds_chosen": 10.852587699890137, "log_odds_ratio": -5.3520489018410444e-05, "logits/chosen": -0.14165736734867096, "logits/rejected": -0.25913006067276, "logps/chosen": -0.00013877694436814636, "logps/rejected": -1.9309008121490479, "loss": 0.4902, "nll_loss": 0.12254425883293152, "rewards/accuracies": 1.0, "rewards/chosen": -1.3877694982511457e-05, "rewards/margins": 0.1930762082338333, "rewards/rejected": -0.19309008121490479, "step": 10601 }, { "epoch": 7.331950207468879, "grad_norm": 3.462172031402588, "learning_rate": 1.4822498847395113e-05, "log_odds_chosen": 11.225442886352539, "log_odds_ratio": -4.436544986674562e-05, "logits/chosen": -0.42905306816101074, "logits/rejected": -0.5086164474487305, "logps/chosen": -7.468480907846242e-05, "logps/rejected": -1.767258644104004, "loss": 0.2998, "nll_loss": 0.07493775337934494, "rewards/accuracies": 1.0, "rewards/chosen": -7.468481726391474e-06, "rewards/margins": 0.17671838402748108, "rewards/rejected": -0.1767258644104004, "step": 10602 }, { "epoch": 7.332641770401106, "grad_norm": 5.805577754974365, "learning_rate": 1.4818656831104966e-05, "log_odds_chosen": 11.116992950439453, "log_odds_ratio": -3.2158764952328056e-05, "logits/chosen": -0.07801266014575958, "logits/rejected": -0.24215415120124817, "logps/chosen": -0.0002241364272776991, "logps/rejected": -2.7057204246520996, "loss": 0.4149, "nll_loss": 0.10372462868690491, "rewards/accuracies": 1.0, "rewards/chosen": -2.241364200017415e-05, "rewards/margins": 0.27054962515830994, "rewards/rejected": -0.2705720365047455, "step": 10603 }, { "epoch": 7.333333333333333, "grad_norm": 6.583235740661621, "learning_rate": 1.4814814814814815e-05, "log_odds_chosen": 10.986959457397461, "log_odds_ratio": -5.965959280729294e-05, "logits/chosen": -0.23258370161056519, "logits/rejected": -0.25661396980285645, "logps/chosen": -0.00028355675749480724, "logps/rejected": -2.7544238567352295, "loss": 0.5009, "nll_loss": 0.12522797286510468, "rewards/accuracies": 1.0, "rewards/chosen": -2.8355676477076486e-05, "rewards/margins": 0.27541404962539673, "rewards/rejected": -0.2754423916339874, "step": 10604 }, { "epoch": 7.33402489626556, "grad_norm": 4.308938026428223, "learning_rate": 1.4810972798524667e-05, "log_odds_chosen": 11.198945999145508, "log_odds_ratio": -1.4784338418394327e-05, "logits/chosen": -0.344827800989151, "logits/rejected": -0.36173003911972046, "logps/chosen": -0.00011824148532468826, "logps/rejected": -2.052685260772705, "loss": 0.3203, "nll_loss": 0.08006532490253448, "rewards/accuracies": 1.0, "rewards/chosen": -1.1824149623862468e-05, "rewards/margins": 0.20525671541690826, "rewards/rejected": -0.20526854693889618, "step": 10605 }, { "epoch": 7.334716459197787, "grad_norm": 5.213698863983154, "learning_rate": 1.4807130782234518e-05, "log_odds_chosen": 11.838775634765625, "log_odds_ratio": -1.4372966688824818e-05, "logits/chosen": -0.27771133184432983, "logits/rejected": -0.17700433731079102, "logps/chosen": -5.760313069913536e-05, "logps/rejected": -2.0721049308776855, "loss": 0.4284, "nll_loss": 0.10709850490093231, "rewards/accuracies": 1.0, "rewards/chosen": -5.760313342761947e-06, "rewards/margins": 0.20720471441745758, "rewards/rejected": -0.2072104811668396, "step": 10606 }, { "epoch": 7.3354080221300135, "grad_norm": 3.2967588901519775, "learning_rate": 1.4803288765944367e-05, "log_odds_chosen": 10.962372779846191, "log_odds_ratio": -4.8951711505651474e-05, "logits/chosen": -0.4963054358959198, "logits/rejected": -0.5957614779472351, "logps/chosen": -0.00023823344963602722, "logps/rejected": -2.5089364051818848, "loss": 0.3532, "nll_loss": 0.08828841149806976, "rewards/accuracies": 1.0, "rewards/chosen": -2.3823346054996364e-05, "rewards/margins": 0.2508698105812073, "rewards/rejected": -0.25089362263679504, "step": 10607 }, { "epoch": 7.33609958506224, "grad_norm": 6.884736061096191, "learning_rate": 1.479944674965422e-05, "log_odds_chosen": 11.086265563964844, "log_odds_ratio": -2.916203811764717e-05, "logits/chosen": -0.3722105622291565, "logits/rejected": -0.4484604299068451, "logps/chosen": -0.00019026699010282755, "logps/rejected": -2.3791327476501465, "loss": 0.3414, "nll_loss": 0.08533491939306259, "rewards/accuracies": 1.0, "rewards/chosen": -1.9026698282686993e-05, "rewards/margins": 0.23789425194263458, "rewards/rejected": -0.23791325092315674, "step": 10608 }, { "epoch": 7.336791147994467, "grad_norm": 5.710556507110596, "learning_rate": 1.4795604733364072e-05, "log_odds_chosen": 11.4507474899292, "log_odds_ratio": -1.2639248780033085e-05, "logits/chosen": -0.3838127851486206, "logits/rejected": -0.45596855878829956, "logps/chosen": -0.00017679229495115578, "logps/rejected": -2.476207971572876, "loss": 0.5747, "nll_loss": 0.14368417859077454, "rewards/accuracies": 1.0, "rewards/chosen": -1.7679229131317697e-05, "rewards/margins": 0.24760311841964722, "rewards/rejected": -0.24762079119682312, "step": 10609 }, { "epoch": 7.337482710926694, "grad_norm": 4.773012161254883, "learning_rate": 1.4791762717073921e-05, "log_odds_chosen": 9.811444282531738, "log_odds_ratio": -0.00034897771547548473, "logits/chosen": -0.5299882888793945, "logits/rejected": -0.422236829996109, "logps/chosen": -0.0005181976011954248, "logps/rejected": -2.17285418510437, "loss": 0.8478, "nll_loss": 0.21192626655101776, "rewards/accuracies": 1.0, "rewards/chosen": -5.181975575396791e-05, "rewards/margins": 0.2172335833311081, "rewards/rejected": -0.2172854095697403, "step": 10610 }, { "epoch": 7.338174273858921, "grad_norm": 5.867862224578857, "learning_rate": 1.4787920700783772e-05, "log_odds_chosen": 10.030328750610352, "log_odds_ratio": -0.0003582726640161127, "logits/chosen": -0.4114347994327545, "logits/rejected": -0.4240753650665283, "logps/chosen": -0.007311244960874319, "logps/rejected": -2.227712631225586, "loss": 0.3881, "nll_loss": 0.09698623418807983, "rewards/accuracies": 1.0, "rewards/chosen": -0.0007311245426535606, "rewards/margins": 0.22204013168811798, "rewards/rejected": -0.22277125716209412, "step": 10611 }, { "epoch": 7.338865836791148, "grad_norm": 3.5677428245544434, "learning_rate": 1.4784078684493624e-05, "log_odds_chosen": 10.60622787475586, "log_odds_ratio": -0.0007465876988135278, "logits/chosen": -0.2800699472427368, "logits/rejected": -0.4186505675315857, "logps/chosen": -0.0007694175001233816, "logps/rejected": -2.591726303100586, "loss": 0.4788, "nll_loss": 0.11962364614009857, "rewards/accuracies": 1.0, "rewards/chosen": -7.694175292272121e-05, "rewards/margins": 0.259095698595047, "rewards/rejected": -0.25917261838912964, "step": 10612 }, { "epoch": 7.3395573997233745, "grad_norm": 6.79379940032959, "learning_rate": 1.4780236668203473e-05, "log_odds_chosen": 11.99535846710205, "log_odds_ratio": -0.0001469114940846339, "logits/chosen": -0.6883875131607056, "logits/rejected": -0.5342060923576355, "logps/chosen": -0.00039757287595421076, "logps/rejected": -3.7388129234313965, "loss": 0.7219, "nll_loss": 0.18045930564403534, "rewards/accuracies": 1.0, "rewards/chosen": -3.975728395744227e-05, "rewards/margins": 0.3738415539264679, "rewards/rejected": -0.3738812804222107, "step": 10613 }, { "epoch": 7.340248962655601, "grad_norm": 3.342613458633423, "learning_rate": 1.4776394651913326e-05, "log_odds_chosen": 10.904277801513672, "log_odds_ratio": -7.465697126463056e-05, "logits/chosen": -0.211563378572464, "logits/rejected": -0.4272039532661438, "logps/chosen": -0.0015133580891415477, "logps/rejected": -2.0817818641662598, "loss": 0.403, "nll_loss": 0.10073212534189224, "rewards/accuracies": 1.0, "rewards/chosen": -0.00015133581473492086, "rewards/margins": 0.20802685618400574, "rewards/rejected": -0.20817819237709045, "step": 10614 }, { "epoch": 7.340940525587828, "grad_norm": 5.032479763031006, "learning_rate": 1.4772552635623177e-05, "log_odds_chosen": 11.533479690551758, "log_odds_ratio": -4.141679892200045e-05, "logits/chosen": -0.4578152298927307, "logits/rejected": -0.40243053436279297, "logps/chosen": -0.0002884402929339558, "logps/rejected": -2.7242417335510254, "loss": 0.369, "nll_loss": 0.09225247800350189, "rewards/accuracies": 1.0, "rewards/chosen": -2.8844027838204056e-05, "rewards/margins": 0.2723953425884247, "rewards/rejected": -0.2724241614341736, "step": 10615 }, { "epoch": 7.341632088520055, "grad_norm": 3.96189284324646, "learning_rate": 1.4768710619333026e-05, "log_odds_chosen": 10.917028427124023, "log_odds_ratio": -5.428310396382585e-05, "logits/chosen": -0.5717241168022156, "logits/rejected": -0.5869755148887634, "logps/chosen": -0.00023707385116722435, "logps/rejected": -2.5564537048339844, "loss": 0.5278, "nll_loss": 0.1319570243358612, "rewards/accuracies": 1.0, "rewards/chosen": -2.370738729950972e-05, "rewards/margins": 0.25562167167663574, "rewards/rejected": -0.25564536452293396, "step": 10616 }, { "epoch": 7.342323651452282, "grad_norm": 10.240484237670898, "learning_rate": 1.4764868603042878e-05, "log_odds_chosen": 11.378911018371582, "log_odds_ratio": -9.107735240831971e-05, "logits/chosen": -0.3321758508682251, "logits/rejected": -0.4863980710506439, "logps/chosen": -0.0003882453893311322, "logps/rejected": -3.289459705352783, "loss": 0.3806, "nll_loss": 0.09513388574123383, "rewards/accuracies": 1.0, "rewards/chosen": -3.882453529513441e-05, "rewards/margins": 0.3289071321487427, "rewards/rejected": -0.32894593477249146, "step": 10617 }, { "epoch": 7.343015214384509, "grad_norm": 5.46720552444458, "learning_rate": 1.476102658675273e-05, "log_odds_chosen": 10.650997161865234, "log_odds_ratio": -6.125005165813491e-05, "logits/chosen": -0.47321465611457825, "logits/rejected": -0.5267034769058228, "logps/chosen": -0.00102140917442739, "logps/rejected": -3.0506556034088135, "loss": 0.5083, "nll_loss": 0.12706559896469116, "rewards/accuracies": 1.0, "rewards/chosen": -0.00010214091162197292, "rewards/margins": 0.30496343970298767, "rewards/rejected": -0.3050655722618103, "step": 10618 }, { "epoch": 7.3437067773167355, "grad_norm": 16.179311752319336, "learning_rate": 1.475718457046258e-05, "log_odds_chosen": 11.190753936767578, "log_odds_ratio": -4.702592923422344e-05, "logits/chosen": -0.4904909133911133, "logits/rejected": -0.6124447584152222, "logps/chosen": -0.00019545605755411088, "logps/rejected": -2.2012088298797607, "loss": 0.6981, "nll_loss": 0.17451709508895874, "rewards/accuracies": 1.0, "rewards/chosen": -1.9545605027815327e-05, "rewards/margins": 0.22010135650634766, "rewards/rejected": -0.22012090682983398, "step": 10619 }, { "epoch": 7.344398340248962, "grad_norm": 4.674870014190674, "learning_rate": 1.475334255417243e-05, "log_odds_chosen": 10.288872718811035, "log_odds_ratio": -0.0033015012741088867, "logits/chosen": -0.5148146152496338, "logits/rejected": -0.5312724709510803, "logps/chosen": -0.0028533488512039185, "logps/rejected": -1.5259571075439453, "loss": 0.4448, "nll_loss": 0.11086127907037735, "rewards/accuracies": 1.0, "rewards/chosen": -0.0002853349142242223, "rewards/margins": 0.15231037139892578, "rewards/rejected": -0.15259572863578796, "step": 10620 }, { "epoch": 7.345089903181189, "grad_norm": 5.621874809265137, "learning_rate": 1.4749500537882283e-05, "log_odds_chosen": 10.4603271484375, "log_odds_ratio": -0.00011067378363804892, "logits/chosen": -0.10758718848228455, "logits/rejected": -0.14322113990783691, "logps/chosen": -0.00019782486197073013, "logps/rejected": -1.6435022354125977, "loss": 0.5255, "nll_loss": 0.13135552406311035, "rewards/accuracies": 1.0, "rewards/chosen": -1.9782486560870893e-05, "rewards/margins": 0.16433045268058777, "rewards/rejected": -0.1643502414226532, "step": 10621 }, { "epoch": 7.345781466113416, "grad_norm": 4.4091997146606445, "learning_rate": 1.4745658521592132e-05, "log_odds_chosen": 10.637389183044434, "log_odds_ratio": -0.00015043109306134284, "logits/chosen": -0.32385605573654175, "logits/rejected": -0.42906877398490906, "logps/chosen": -0.0002690694236662239, "logps/rejected": -2.022911787033081, "loss": 0.3128, "nll_loss": 0.07819736003875732, "rewards/accuracies": 1.0, "rewards/chosen": -2.690694236662239e-05, "rewards/margins": 0.20226429402828217, "rewards/rejected": -0.20229119062423706, "step": 10622 }, { "epoch": 7.346473029045643, "grad_norm": 4.546496391296387, "learning_rate": 1.4741816505301984e-05, "log_odds_chosen": 11.610639572143555, "log_odds_ratio": -1.3535655853047501e-05, "logits/chosen": -0.6121603846549988, "logits/rejected": -0.6175299882888794, "logps/chosen": -8.269608224509284e-05, "logps/rejected": -1.9675970077514648, "loss": 0.387, "nll_loss": 0.09674078971147537, "rewards/accuracies": 1.0, "rewards/chosen": -8.269607860711403e-06, "rewards/margins": 0.1967514455318451, "rewards/rejected": -0.19675971567630768, "step": 10623 }, { "epoch": 7.34716459197787, "grad_norm": 4.866008758544922, "learning_rate": 1.4737974489011835e-05, "log_odds_chosen": 10.987707138061523, "log_odds_ratio": -4.2686409869929776e-05, "logits/chosen": -0.5667479038238525, "logits/rejected": -0.5982543230056763, "logps/chosen": -0.00047450707643292844, "logps/rejected": -2.640371561050415, "loss": 0.5077, "nll_loss": 0.12692023813724518, "rewards/accuracies": 1.0, "rewards/chosen": -4.745071055367589e-05, "rewards/margins": 0.26398971676826477, "rewards/rejected": -0.264037162065506, "step": 10624 }, { "epoch": 7.3478561549100965, "grad_norm": 4.262731075286865, "learning_rate": 1.4734132472721684e-05, "log_odds_chosen": 11.199050903320312, "log_odds_ratio": -3.262510654167272e-05, "logits/chosen": 0.013080950826406479, "logits/rejected": -0.06952833384275436, "logps/chosen": -0.00019538719789125025, "logps/rejected": -2.5833044052124023, "loss": 0.466, "nll_loss": 0.11649253219366074, "rewards/accuracies": 1.0, "rewards/chosen": -1.953872197191231e-05, "rewards/margins": 0.25831088423728943, "rewards/rejected": -0.25833040475845337, "step": 10625 }, { "epoch": 7.348547717842323, "grad_norm": 3.8600428104400635, "learning_rate": 1.4730290456431537e-05, "log_odds_chosen": 11.01201057434082, "log_odds_ratio": -3.2962183468043804e-05, "logits/chosen": -0.48151469230651855, "logits/rejected": -0.6132542490959167, "logps/chosen": -0.00011318025644868612, "logps/rejected": -1.6544891595840454, "loss": 0.284, "nll_loss": 0.07098495215177536, "rewards/accuracies": 1.0, "rewards/chosen": -1.1318026736262254e-05, "rewards/margins": 0.16543760895729065, "rewards/rejected": -0.16544891893863678, "step": 10626 }, { "epoch": 7.34923928077455, "grad_norm": 7.540706634521484, "learning_rate": 1.4726448440141389e-05, "log_odds_chosen": 10.292908668518066, "log_odds_ratio": -0.004162487108260393, "logits/chosen": -0.06213077902793884, "logits/rejected": -0.1171325147151947, "logps/chosen": -0.02855098620057106, "logps/rejected": -2.2707366943359375, "loss": 0.4042, "nll_loss": 0.10062611103057861, "rewards/accuracies": 1.0, "rewards/chosen": -0.002855098806321621, "rewards/margins": 0.22421857714653015, "rewards/rejected": -0.22707366943359375, "step": 10627 }, { "epoch": 7.349930843706777, "grad_norm": 5.197941780090332, "learning_rate": 1.4722606423851238e-05, "log_odds_chosen": 11.520332336425781, "log_odds_ratio": -2.6367244572611526e-05, "logits/chosen": -0.22637757658958435, "logits/rejected": -0.36071324348449707, "logps/chosen": -0.00021008508338127285, "logps/rejected": -2.8108935356140137, "loss": 0.4636, "nll_loss": 0.1158994808793068, "rewards/accuracies": 1.0, "rewards/chosen": -2.1008509065723047e-05, "rewards/margins": 0.2810683250427246, "rewards/rejected": -0.2810893654823303, "step": 10628 }, { "epoch": 7.350622406639004, "grad_norm": 11.5546293258667, "learning_rate": 1.4718764407561089e-05, "log_odds_chosen": 11.054574966430664, "log_odds_ratio": -0.00022649082529824227, "logits/chosen": -0.5302231907844543, "logits/rejected": -0.6536000967025757, "logps/chosen": -0.0014594955136999488, "logps/rejected": -2.953653335571289, "loss": 1.0388, "nll_loss": 0.2596677839756012, "rewards/accuracies": 1.0, "rewards/chosen": -0.00014594956883229315, "rewards/margins": 0.29521939158439636, "rewards/rejected": -0.2953653335571289, "step": 10629 }, { "epoch": 7.351313969571231, "grad_norm": 7.829750061035156, "learning_rate": 1.4714922391270941e-05, "log_odds_chosen": 9.29830551147461, "log_odds_ratio": -0.0005126740434207022, "logits/chosen": -0.11748141795396805, "logits/rejected": -0.10664454102516174, "logps/chosen": -0.0015445123426616192, "logps/rejected": -2.1555533409118652, "loss": 1.0364, "nll_loss": 0.25904539227485657, "rewards/accuracies": 1.0, "rewards/chosen": -0.00015445123426616192, "rewards/margins": 0.21540087461471558, "rewards/rejected": -0.215555340051651, "step": 10630 }, { "epoch": 7.3520055325034575, "grad_norm": 6.214385032653809, "learning_rate": 1.471108037498079e-05, "log_odds_chosen": 11.047991752624512, "log_odds_ratio": -0.00016679904365446419, "logits/chosen": 0.3329809010028839, "logits/rejected": 0.18723444640636444, "logps/chosen": -0.00027217259048484266, "logps/rejected": -2.4213221073150635, "loss": 0.8566, "nll_loss": 0.21412542462348938, "rewards/accuracies": 1.0, "rewards/chosen": -2.7217258320888504e-05, "rewards/margins": 0.24210500717163086, "rewards/rejected": -0.24213220179080963, "step": 10631 }, { "epoch": 7.352697095435684, "grad_norm": 5.405941486358643, "learning_rate": 1.4707238358690643e-05, "log_odds_chosen": 11.97227668762207, "log_odds_ratio": -1.698249252513051e-05, "logits/chosen": -0.6586976051330566, "logits/rejected": -0.6436535716056824, "logps/chosen": -8.172958041541278e-05, "logps/rejected": -2.3857898712158203, "loss": 0.3021, "nll_loss": 0.07551433145999908, "rewards/accuracies": 1.0, "rewards/chosen": -8.172957677743398e-06, "rewards/margins": 0.23857080936431885, "rewards/rejected": -0.23857899010181427, "step": 10632 }, { "epoch": 7.353388658367911, "grad_norm": 6.927206993103027, "learning_rate": 1.4703396342400492e-05, "log_odds_chosen": 10.949483871459961, "log_odds_ratio": -4.4107095163781196e-05, "logits/chosen": -0.00477069616317749, "logits/rejected": -0.0485563725233078, "logps/chosen": -0.00016855084686540067, "logps/rejected": -2.265960454940796, "loss": 0.5283, "nll_loss": 0.132061168551445, "rewards/accuracies": 1.0, "rewards/chosen": -1.6855085050337948e-05, "rewards/margins": 0.2265791893005371, "rewards/rejected": -0.22659605741500854, "step": 10633 }, { "epoch": 7.354080221300138, "grad_norm": 3.2986459732055664, "learning_rate": 1.4699554326110343e-05, "log_odds_chosen": 11.19710922241211, "log_odds_ratio": -3.278831354691647e-05, "logits/chosen": -0.7175881266593933, "logits/rejected": -0.6673378348350525, "logps/chosen": -0.00038059664075262845, "logps/rejected": -2.5626344680786133, "loss": 0.3126, "nll_loss": 0.07814760506153107, "rewards/accuracies": 1.0, "rewards/chosen": -3.80596611648798e-05, "rewards/margins": 0.2562254071235657, "rewards/rejected": -0.25626346468925476, "step": 10634 }, { "epoch": 7.354771784232365, "grad_norm": 4.856668472290039, "learning_rate": 1.4695712309820195e-05, "log_odds_chosen": 10.294366836547852, "log_odds_ratio": -0.00012593253632076085, "logits/chosen": -0.3165508806705475, "logits/rejected": -0.36833885312080383, "logps/chosen": -0.0007413438288494945, "logps/rejected": -1.5169241428375244, "loss": 0.5096, "nll_loss": 0.127391055226326, "rewards/accuracies": 1.0, "rewards/chosen": -7.413439016090706e-05, "rewards/margins": 0.15161828696727753, "rewards/rejected": -0.15169242024421692, "step": 10635 }, { "epoch": 7.355463347164592, "grad_norm": 6.945402145385742, "learning_rate": 1.4691870293530044e-05, "log_odds_chosen": 11.395181655883789, "log_odds_ratio": -4.5541368308477104e-05, "logits/chosen": -0.2549218535423279, "logits/rejected": -0.2680109739303589, "logps/chosen": -0.00023953057825565338, "logps/rejected": -2.7777349948883057, "loss": 0.4051, "nll_loss": 0.10126994550228119, "rewards/accuracies": 1.0, "rewards/chosen": -2.395305818936322e-05, "rewards/margins": 0.27774956822395325, "rewards/rejected": -0.27777349948883057, "step": 10636 }, { "epoch": 7.356154910096818, "grad_norm": 6.0253777503967285, "learning_rate": 1.4688028277239896e-05, "log_odds_chosen": 10.734546661376953, "log_odds_ratio": -3.223002204322256e-05, "logits/chosen": -0.4334542155265808, "logits/rejected": -0.3397785425186157, "logps/chosen": -9.362496348330751e-05, "logps/rejected": -1.2691165208816528, "loss": 0.5793, "nll_loss": 0.14482782781124115, "rewards/accuracies": 1.0, "rewards/chosen": -9.36249580263393e-06, "rewards/margins": 0.1269022822380066, "rewards/rejected": -0.12691165506839752, "step": 10637 }, { "epoch": 7.356846473029045, "grad_norm": 3.829284906387329, "learning_rate": 1.4684186260949747e-05, "log_odds_chosen": 10.96677017211914, "log_odds_ratio": -4.322976747062057e-05, "logits/chosen": -0.5060489177703857, "logits/rejected": -0.5655438899993896, "logps/chosen": -0.00012070621596649289, "logps/rejected": -1.8876540660858154, "loss": 0.3506, "nll_loss": 0.08765744417905807, "rewards/accuracies": 1.0, "rewards/chosen": -1.2070622688042931e-05, "rewards/margins": 0.18875333666801453, "rewards/rejected": -0.18876540660858154, "step": 10638 }, { "epoch": 7.357538035961272, "grad_norm": 7.1783246994018555, "learning_rate": 1.4680344244659596e-05, "log_odds_chosen": 10.930610656738281, "log_odds_ratio": -3.301469041616656e-05, "logits/chosen": -0.04722614586353302, "logits/rejected": -0.11504199355840683, "logps/chosen": -0.000139197101816535, "logps/rejected": -2.0308399200439453, "loss": 0.6415, "nll_loss": 0.16035979986190796, "rewards/accuracies": 1.0, "rewards/chosen": -1.3919711818743963e-05, "rewards/margins": 0.20307007431983948, "rewards/rejected": -0.20308397710323334, "step": 10639 }, { "epoch": 7.358229598893499, "grad_norm": 4.304840564727783, "learning_rate": 1.4676502228369449e-05, "log_odds_chosen": 11.903948783874512, "log_odds_ratio": -2.864694397430867e-05, "logits/chosen": -0.3215031325817108, "logits/rejected": -0.42134568095207214, "logps/chosen": -0.00019372413225937635, "logps/rejected": -2.997770309448242, "loss": 0.4164, "nll_loss": 0.10409659147262573, "rewards/accuracies": 1.0, "rewards/chosen": -1.937241540872492e-05, "rewards/margins": 0.2997576892375946, "rewards/rejected": -0.2997770309448242, "step": 10640 }, { "epoch": 7.358921161825726, "grad_norm": 5.644372463226318, "learning_rate": 1.4672660212079301e-05, "log_odds_chosen": 10.269853591918945, "log_odds_ratio": -0.00014747031673323363, "logits/chosen": -0.737433910369873, "logits/rejected": -0.835425853729248, "logps/chosen": -0.0012461732840165496, "logps/rejected": -1.93202543258667, "loss": 0.3985, "nll_loss": 0.099620521068573, "rewards/accuracies": 1.0, "rewards/chosen": -0.00012461733422242105, "rewards/margins": 0.1930779218673706, "rewards/rejected": -0.19320255517959595, "step": 10641 }, { "epoch": 7.359612724757953, "grad_norm": 5.395466327667236, "learning_rate": 1.466881819578915e-05, "log_odds_chosen": 11.738531112670898, "log_odds_ratio": -8.553595762350596e-06, "logits/chosen": -0.5634970664978027, "logits/rejected": -0.6411577463150024, "logps/chosen": -0.00012023108138237149, "logps/rejected": -2.624508857727051, "loss": 0.4429, "nll_loss": 0.11071419715881348, "rewards/accuracies": 1.0, "rewards/chosen": -1.202310886583291e-05, "rewards/margins": 0.26243889331817627, "rewards/rejected": -0.2624509036540985, "step": 10642 }, { "epoch": 7.360304287690179, "grad_norm": 4.805103778839111, "learning_rate": 1.4664976179499001e-05, "log_odds_chosen": 10.992596626281738, "log_odds_ratio": -0.00036997467395849526, "logits/chosen": -0.48275116086006165, "logits/rejected": -0.691816508769989, "logps/chosen": -0.0003715948842000216, "logps/rejected": -2.5263161659240723, "loss": 0.4734, "nll_loss": 0.11830729246139526, "rewards/accuracies": 1.0, "rewards/chosen": -3.7159494240768254e-05, "rewards/margins": 0.2525944411754608, "rewards/rejected": -0.25263160467147827, "step": 10643 }, { "epoch": 7.360995850622406, "grad_norm": 8.46743106842041, "learning_rate": 1.4661134163208853e-05, "log_odds_chosen": 10.13895320892334, "log_odds_ratio": -0.0003543172206263989, "logits/chosen": -0.6756718158721924, "logits/rejected": -0.7190816402435303, "logps/chosen": -0.00043742440175265074, "logps/rejected": -1.8771418333053589, "loss": 0.3552, "nll_loss": 0.08875347673892975, "rewards/accuracies": 1.0, "rewards/chosen": -4.374244235805236e-05, "rewards/margins": 0.18767043948173523, "rewards/rejected": -0.18771417438983917, "step": 10644 }, { "epoch": 7.361687413554633, "grad_norm": 3.5724453926086426, "learning_rate": 1.4657292146918702e-05, "log_odds_chosen": 10.295007705688477, "log_odds_ratio": -0.00011511320917634293, "logits/chosen": 0.1354466676712036, "logits/rejected": 0.08701753616333008, "logps/chosen": -0.0006204830133356154, "logps/rejected": -1.82797372341156, "loss": 0.4692, "nll_loss": 0.11728240549564362, "rewards/accuracies": 1.0, "rewards/chosen": -6.204830424394459e-05, "rewards/margins": 0.18273532390594482, "rewards/rejected": -0.182797372341156, "step": 10645 }, { "epoch": 7.36237897648686, "grad_norm": 8.771824836730957, "learning_rate": 1.4653450130628555e-05, "log_odds_chosen": 11.154577255249023, "log_odds_ratio": -7.893896690802649e-05, "logits/chosen": -0.17044945061206818, "logits/rejected": -0.1967611312866211, "logps/chosen": -0.0003008460334967822, "logps/rejected": -2.7240962982177734, "loss": 0.3921, "nll_loss": 0.09801331907510757, "rewards/accuracies": 1.0, "rewards/chosen": -3.008460407727398e-05, "rewards/margins": 0.2723795771598816, "rewards/rejected": -0.2724096477031708, "step": 10646 }, { "epoch": 7.363070539419087, "grad_norm": 3.3796043395996094, "learning_rate": 1.4649608114338406e-05, "log_odds_chosen": 11.514384269714355, "log_odds_ratio": -1.3234783182269894e-05, "logits/chosen": -0.28235557675361633, "logits/rejected": -0.3105676770210266, "logps/chosen": -7.076616020640358e-05, "logps/rejected": -1.9891992807388306, "loss": 0.3257, "nll_loss": 0.08141130954027176, "rewards/accuracies": 1.0, "rewards/chosen": -7.076616384438239e-06, "rewards/margins": 0.1989128440618515, "rewards/rejected": -0.19891992211341858, "step": 10647 }, { "epoch": 7.363762102351314, "grad_norm": 5.169199466705322, "learning_rate": 1.4645766098048255e-05, "log_odds_chosen": 9.638940811157227, "log_odds_ratio": -0.005037724506109953, "logits/chosen": -0.41315174102783203, "logits/rejected": -0.427206814289093, "logps/chosen": -0.0020048036240041256, "logps/rejected": -1.8959660530090332, "loss": 0.4813, "nll_loss": 0.11983361840248108, "rewards/accuracies": 1.0, "rewards/chosen": -0.00020048035366926342, "rewards/margins": 0.18939611315727234, "rewards/rejected": -0.18959660828113556, "step": 10648 }, { "epoch": 7.36445366528354, "grad_norm": 4.702418804168701, "learning_rate": 1.4641924081758107e-05, "log_odds_chosen": 10.577019691467285, "log_odds_ratio": -8.794783207122236e-05, "logits/chosen": -0.32490867376327515, "logits/rejected": -0.3228122591972351, "logps/chosen": -0.000967757951002568, "logps/rejected": -2.048372745513916, "loss": 0.8805, "nll_loss": 0.22010564804077148, "rewards/accuracies": 1.0, "rewards/chosen": -9.677580237621441e-05, "rewards/margins": 0.2047404944896698, "rewards/rejected": -0.20483727753162384, "step": 10649 }, { "epoch": 7.365145228215767, "grad_norm": 7.433712959289551, "learning_rate": 1.463808206546796e-05, "log_odds_chosen": 10.622085571289062, "log_odds_ratio": -0.00020646367920562625, "logits/chosen": -0.3272697627544403, "logits/rejected": -0.395301878452301, "logps/chosen": -0.0006920951418578625, "logps/rejected": -2.6321024894714355, "loss": 0.5902, "nll_loss": 0.14752425253391266, "rewards/accuracies": 1.0, "rewards/chosen": -6.920951273059472e-05, "rewards/margins": 0.26314103603363037, "rewards/rejected": -0.263210266828537, "step": 10650 }, { "epoch": 7.365836791147994, "grad_norm": 3.3372693061828613, "learning_rate": 1.4634240049177809e-05, "log_odds_chosen": 11.259846687316895, "log_odds_ratio": -7.804886990925297e-05, "logits/chosen": -0.1679503619670868, "logits/rejected": -0.21933269500732422, "logps/chosen": -0.0026375320740044117, "logps/rejected": -3.6971280574798584, "loss": 0.4456, "nll_loss": 0.11138937622308731, "rewards/accuracies": 1.0, "rewards/chosen": -0.00026375323068350554, "rewards/margins": 0.36944907903671265, "rewards/rejected": -0.36971279978752136, "step": 10651 }, { "epoch": 7.366528354080221, "grad_norm": 5.466868877410889, "learning_rate": 1.463039803288766e-05, "log_odds_chosen": 10.425373077392578, "log_odds_ratio": -7.754612306598574e-05, "logits/chosen": -0.004369847476482391, "logits/rejected": -0.012086287140846252, "logps/chosen": -0.00018663016089703888, "logps/rejected": -1.868985652923584, "loss": 0.5786, "nll_loss": 0.14463818073272705, "rewards/accuracies": 1.0, "rewards/chosen": -1.866301681729965e-05, "rewards/margins": 0.18687991797924042, "rewards/rejected": -0.18689857423305511, "step": 10652 }, { "epoch": 7.367219917012449, "grad_norm": 9.621696472167969, "learning_rate": 1.4626556016597512e-05, "log_odds_chosen": 10.301619529724121, "log_odds_ratio": -0.0031991673167794943, "logits/chosen": -0.39241892099380493, "logits/rejected": -0.4045332670211792, "logps/chosen": -0.001698142383247614, "logps/rejected": -2.017453670501709, "loss": 0.9325, "nll_loss": 0.23280005156993866, "rewards/accuracies": 1.0, "rewards/chosen": -0.00016981424414552748, "rewards/margins": 0.20157556235790253, "rewards/rejected": -0.20174537599086761, "step": 10653 }, { "epoch": 7.367911479944675, "grad_norm": 4.06018590927124, "learning_rate": 1.4622714000307361e-05, "log_odds_chosen": 11.616990089416504, "log_odds_ratio": -3.6053897929377854e-05, "logits/chosen": -0.2114473134279251, "logits/rejected": -0.26343247294425964, "logps/chosen": -0.0017658036667853594, "logps/rejected": -2.960357666015625, "loss": 0.4912, "nll_loss": 0.12278671562671661, "rewards/accuracies": 1.0, "rewards/chosen": -0.00017658036085776985, "rewards/margins": 0.2958591878414154, "rewards/rejected": -0.2960357666015625, "step": 10654 }, { "epoch": 7.368603042876902, "grad_norm": 5.952072620391846, "learning_rate": 1.4618871984017213e-05, "log_odds_chosen": 10.786083221435547, "log_odds_ratio": -7.878676115069538e-05, "logits/chosen": -0.572536826133728, "logits/rejected": -0.5953267216682434, "logps/chosen": -0.0003121356130577624, "logps/rejected": -2.293868064880371, "loss": 0.482, "nll_loss": 0.1204964742064476, "rewards/accuracies": 1.0, "rewards/chosen": -3.1213559850584716e-05, "rewards/margins": 0.229355588555336, "rewards/rejected": -0.2293868064880371, "step": 10655 }, { "epoch": 7.369294605809129, "grad_norm": 4.473649978637695, "learning_rate": 1.4615029967727064e-05, "log_odds_chosen": 11.468165397644043, "log_odds_ratio": -2.576730548753403e-05, "logits/chosen": -0.5516068935394287, "logits/rejected": -0.5789634585380554, "logps/chosen": -0.00020674789266195148, "logps/rejected": -2.422849655151367, "loss": 0.3864, "nll_loss": 0.09660729020833969, "rewards/accuracies": 1.0, "rewards/chosen": -2.067478999379091e-05, "rewards/margins": 0.2422642707824707, "rewards/rejected": -0.24228495359420776, "step": 10656 }, { "epoch": 7.369986168741356, "grad_norm": 6.674118518829346, "learning_rate": 1.4611187951436913e-05, "log_odds_chosen": 10.78459644317627, "log_odds_ratio": -6.154303264338523e-05, "logits/chosen": -0.10129731893539429, "logits/rejected": -0.38078394532203674, "logps/chosen": -0.0018489633221179247, "logps/rejected": -3.1722657680511475, "loss": 1.0619, "nll_loss": 0.2654639184474945, "rewards/accuracies": 1.0, "rewards/chosen": -0.00018489634385332465, "rewards/margins": 0.31704169511795044, "rewards/rejected": -0.3172265887260437, "step": 10657 }, { "epoch": 7.370677731673583, "grad_norm": 8.016694068908691, "learning_rate": 1.4607345935146766e-05, "log_odds_chosen": 11.63819408416748, "log_odds_ratio": -3.645301330834627e-05, "logits/chosen": -0.41492342948913574, "logits/rejected": -0.4904578924179077, "logps/chosen": -0.0002599305589683354, "logps/rejected": -2.92301344871521, "loss": 0.5225, "nll_loss": 0.13063208758831024, "rewards/accuracies": 1.0, "rewards/chosen": -2.59930566244293e-05, "rewards/margins": 0.2922753691673279, "rewards/rejected": -0.29230135679244995, "step": 10658 }, { "epoch": 7.37136929460581, "grad_norm": 4.327447891235352, "learning_rate": 1.4603503918856618e-05, "log_odds_chosen": 10.701872825622559, "log_odds_ratio": -2.96252310363343e-05, "logits/chosen": -0.5056709051132202, "logits/rejected": -0.5226324796676636, "logps/chosen": -8.454695489490405e-05, "logps/rejected": -1.362694501876831, "loss": 0.2557, "nll_loss": 0.06392902135848999, "rewards/accuracies": 1.0, "rewards/chosen": -8.454695489490405e-06, "rewards/margins": 0.1362610161304474, "rewards/rejected": -0.1362694650888443, "step": 10659 }, { "epoch": 7.372060857538036, "grad_norm": 8.658827781677246, "learning_rate": 1.4599661902566467e-05, "log_odds_chosen": 11.036666870117188, "log_odds_ratio": -0.00022471771808341146, "logits/chosen": -0.4739881455898285, "logits/rejected": -0.47643283009529114, "logps/chosen": -0.0003724672715179622, "logps/rejected": -3.089184284210205, "loss": 0.6262, "nll_loss": 0.15653277933597565, "rewards/accuracies": 1.0, "rewards/chosen": -3.72467256966047e-05, "rewards/margins": 0.30888116359710693, "rewards/rejected": -0.30891841650009155, "step": 10660 }, { "epoch": 7.372752420470263, "grad_norm": 4.330894947052002, "learning_rate": 1.4595819886276318e-05, "log_odds_chosen": 10.98950481414795, "log_odds_ratio": -0.0005101492861285806, "logits/chosen": 0.15454961359500885, "logits/rejected": 0.1601768136024475, "logps/chosen": -0.00024295755429193377, "logps/rejected": -2.2143144607543945, "loss": 0.336, "nll_loss": 0.0839565247297287, "rewards/accuracies": 1.0, "rewards/chosen": -2.4295755792991258e-05, "rewards/margins": 0.22140714526176453, "rewards/rejected": -0.2214314490556717, "step": 10661 }, { "epoch": 7.37344398340249, "grad_norm": 3.248152256011963, "learning_rate": 1.459197786998617e-05, "log_odds_chosen": 10.0687894821167, "log_odds_ratio": -0.0001112874160753563, "logits/chosen": -0.29376327991485596, "logits/rejected": -0.3159409463405609, "logps/chosen": -0.0002920984697993845, "logps/rejected": -1.9017329216003418, "loss": 0.4019, "nll_loss": 0.10047246515750885, "rewards/accuracies": 1.0, "rewards/chosen": -2.920984843512997e-05, "rewards/margins": 0.19014407694339752, "rewards/rejected": -0.19017328321933746, "step": 10662 }, { "epoch": 7.374135546334717, "grad_norm": 4.734434127807617, "learning_rate": 1.458813585369602e-05, "log_odds_chosen": 10.095927238464355, "log_odds_ratio": -0.00015906621410977095, "logits/chosen": -0.4993319511413574, "logits/rejected": -0.6769671440124512, "logps/chosen": -0.0004607696318998933, "logps/rejected": -1.7483266592025757, "loss": 0.5519, "nll_loss": 0.137965127825737, "rewards/accuracies": 1.0, "rewards/chosen": -4.607695882441476e-05, "rewards/margins": 0.17478659749031067, "rewards/rejected": -0.17483268678188324, "step": 10663 }, { "epoch": 7.374827109266944, "grad_norm": 10.651551246643066, "learning_rate": 1.4584293837405872e-05, "log_odds_chosen": 10.96095085144043, "log_odds_ratio": -3.493850090308115e-05, "logits/chosen": -0.25099456310272217, "logits/rejected": -0.2105286717414856, "logps/chosen": -0.00018202661885879934, "logps/rejected": -2.0909557342529297, "loss": 0.5512, "nll_loss": 0.1378040462732315, "rewards/accuracies": 1.0, "rewards/chosen": -1.8202661522082053e-05, "rewards/margins": 0.209077388048172, "rewards/rejected": -0.20909559726715088, "step": 10664 }, { "epoch": 7.375518672199171, "grad_norm": 6.378670692443848, "learning_rate": 1.4580451821115723e-05, "log_odds_chosen": 12.079700469970703, "log_odds_ratio": -1.1181369700352661e-05, "logits/chosen": -0.5731353163719177, "logits/rejected": -0.5929599404335022, "logps/chosen": -0.00041094704647548497, "logps/rejected": -3.2692391872406006, "loss": 0.5437, "nll_loss": 0.13593482971191406, "rewards/accuracies": 1.0, "rewards/chosen": -4.1094703192356974e-05, "rewards/margins": 0.32688283920288086, "rewards/rejected": -0.3269239068031311, "step": 10665 }, { "epoch": 7.376210235131397, "grad_norm": 5.395590782165527, "learning_rate": 1.4576609804825572e-05, "log_odds_chosen": 10.98073959350586, "log_odds_ratio": -6.951152317924425e-05, "logits/chosen": -0.6794606447219849, "logits/rejected": -0.727249264717102, "logps/chosen": -0.00024431568454019725, "logps/rejected": -2.1115775108337402, "loss": 0.6299, "nll_loss": 0.1574762612581253, "rewards/accuracies": 1.0, "rewards/chosen": -2.443157063680701e-05, "rewards/margins": 0.21113333106040955, "rewards/rejected": -0.21115775406360626, "step": 10666 }, { "epoch": 7.376901798063624, "grad_norm": 4.982635021209717, "learning_rate": 1.4572767788535424e-05, "log_odds_chosen": 10.567577362060547, "log_odds_ratio": -0.0005886530270799994, "logits/chosen": -0.3521302342414856, "logits/rejected": -0.36093980073928833, "logps/chosen": -0.0007477524923160672, "logps/rejected": -2.0216317176818848, "loss": 1.0992, "nll_loss": 0.2747451066970825, "rewards/accuracies": 1.0, "rewards/chosen": -7.477525650756434e-05, "rewards/margins": 0.2020883858203888, "rewards/rejected": -0.20216315984725952, "step": 10667 }, { "epoch": 7.377593360995851, "grad_norm": 17.154781341552734, "learning_rate": 1.4568925772245276e-05, "log_odds_chosen": 9.469273567199707, "log_odds_ratio": -0.00030858165700919926, "logits/chosen": -0.5006073117256165, "logits/rejected": -0.5103213787078857, "logps/chosen": -0.0004589389427565038, "logps/rejected": -1.5602257251739502, "loss": 0.4696, "nll_loss": 0.11737901717424393, "rewards/accuracies": 1.0, "rewards/chosen": -4.589389936882071e-05, "rewards/margins": 0.15597668290138245, "rewards/rejected": -0.1560225784778595, "step": 10668 }, { "epoch": 7.378284923928078, "grad_norm": 4.346867561340332, "learning_rate": 1.4565083755955126e-05, "log_odds_chosen": 11.488432884216309, "log_odds_ratio": -2.4716002371860668e-05, "logits/chosen": -0.002739326097071171, "logits/rejected": -0.030549317598342896, "logps/chosen": -0.00011103839642601088, "logps/rejected": -2.4439241886138916, "loss": 0.4689, "nll_loss": 0.11722592264413834, "rewards/accuracies": 1.0, "rewards/chosen": -1.1103839824500028e-05, "rewards/margins": 0.24438130855560303, "rewards/rejected": -0.24439239501953125, "step": 10669 }, { "epoch": 7.378976486860305, "grad_norm": 4.403764724731445, "learning_rate": 1.4561241739664976e-05, "log_odds_chosen": 11.146065711975098, "log_odds_ratio": -5.7201199524570256e-05, "logits/chosen": -0.27991095185279846, "logits/rejected": -0.315477192401886, "logps/chosen": -0.0001651171623962, "logps/rejected": -2.3870275020599365, "loss": 0.437, "nll_loss": 0.10924191772937775, "rewards/accuracies": 1.0, "rewards/chosen": -1.651171623962e-05, "rewards/margins": 0.2386862337589264, "rewards/rejected": -0.23870275914669037, "step": 10670 }, { "epoch": 7.3796680497925315, "grad_norm": 9.351802825927734, "learning_rate": 1.4557399723374829e-05, "log_odds_chosen": 11.575432777404785, "log_odds_ratio": -6.787521851947531e-05, "logits/chosen": -0.34020090103149414, "logits/rejected": -0.3319574296474457, "logps/chosen": -0.00018382327107246965, "logps/rejected": -2.6232800483703613, "loss": 0.469, "nll_loss": 0.11724460124969482, "rewards/accuracies": 1.0, "rewards/chosen": -1.8382328562438488e-05, "rewards/margins": 0.26230961084365845, "rewards/rejected": -0.26232796907424927, "step": 10671 }, { "epoch": 7.380359612724758, "grad_norm": 6.590975761413574, "learning_rate": 1.4553557707084678e-05, "log_odds_chosen": 11.52649974822998, "log_odds_ratio": -2.1129832020960748e-05, "logits/chosen": -0.33116066455841064, "logits/rejected": -0.43521445989608765, "logps/chosen": -0.00017208060307893902, "logps/rejected": -2.7834463119506836, "loss": 0.6478, "nll_loss": 0.16194315254688263, "rewards/accuracies": 1.0, "rewards/chosen": -1.720805994409602e-05, "rewards/margins": 0.27832740545272827, "rewards/rejected": -0.27834460139274597, "step": 10672 }, { "epoch": 7.381051175656985, "grad_norm": 4.628101348876953, "learning_rate": 1.454971569079453e-05, "log_odds_chosen": 9.457018852233887, "log_odds_ratio": -0.001599702751263976, "logits/chosen": -0.5300707817077637, "logits/rejected": -0.6058658957481384, "logps/chosen": -0.00043648615246638656, "logps/rejected": -1.410576343536377, "loss": 0.4858, "nll_loss": 0.12128002196550369, "rewards/accuracies": 1.0, "rewards/chosen": -4.3648615246638656e-05, "rewards/margins": 0.1410140097141266, "rewards/rejected": -0.14105764031410217, "step": 10673 }, { "epoch": 7.381742738589212, "grad_norm": 3.3888509273529053, "learning_rate": 1.4545873674504381e-05, "log_odds_chosen": 11.028520584106445, "log_odds_ratio": -4.537054701359011e-05, "logits/chosen": -0.11969764530658722, "logits/rejected": -0.1497419774532318, "logps/chosen": -0.000272990990197286, "logps/rejected": -2.4929189682006836, "loss": 0.4646, "nll_loss": 0.11614756286144257, "rewards/accuracies": 1.0, "rewards/chosen": -2.729909829213284e-05, "rewards/margins": 0.24926459789276123, "rewards/rejected": -0.24929189682006836, "step": 10674 }, { "epoch": 7.382434301521439, "grad_norm": 5.96130895614624, "learning_rate": 1.454203165821423e-05, "log_odds_chosen": 11.075370788574219, "log_odds_ratio": -0.0001282305602217093, "logits/chosen": -0.048389844596385956, "logits/rejected": -0.16936539113521576, "logps/chosen": -0.00021596168517135084, "logps/rejected": -2.4580938816070557, "loss": 0.6159, "nll_loss": 0.1539560854434967, "rewards/accuracies": 1.0, "rewards/chosen": -2.1596169972326607e-05, "rewards/margins": 0.24578779935836792, "rewards/rejected": -0.2458093911409378, "step": 10675 }, { "epoch": 7.383125864453666, "grad_norm": 5.834570407867432, "learning_rate": 1.4538189641924082e-05, "log_odds_chosen": 12.218822479248047, "log_odds_ratio": -8.590914148953743e-06, "logits/chosen": -0.06374844163656235, "logits/rejected": -0.18003803491592407, "logps/chosen": -0.00014865616685710847, "logps/rejected": -3.2421627044677734, "loss": 0.613, "nll_loss": 0.15326063334941864, "rewards/accuracies": 1.0, "rewards/chosen": -1.4865616321912967e-05, "rewards/margins": 0.32420143485069275, "rewards/rejected": -0.3242163062095642, "step": 10676 }, { "epoch": 7.3838174273858925, "grad_norm": 4.939897537231445, "learning_rate": 1.4534347625633935e-05, "log_odds_chosen": 11.44198989868164, "log_odds_ratio": -1.698485721135512e-05, "logits/chosen": -0.11121652275323868, "logits/rejected": -0.08655675500631332, "logps/chosen": -0.00016818266885820776, "logps/rejected": -2.6434073448181152, "loss": 0.5616, "nll_loss": 0.14038707315921783, "rewards/accuracies": 1.0, "rewards/chosen": -1.6818266885820776e-05, "rewards/margins": 0.2643239498138428, "rewards/rejected": -0.26434075832366943, "step": 10677 }, { "epoch": 7.384508990318119, "grad_norm": 3.912889003753662, "learning_rate": 1.4530505609343784e-05, "log_odds_chosen": 10.676713943481445, "log_odds_ratio": -6.23077794443816e-05, "logits/chosen": -0.3503372073173523, "logits/rejected": -0.41021400690078735, "logps/chosen": -0.00020047812722623348, "logps/rejected": -1.4013879299163818, "loss": 0.4033, "nll_loss": 0.10082890838384628, "rewards/accuracies": 1.0, "rewards/chosen": -2.0047811631229706e-05, "rewards/margins": 0.14011874794960022, "rewards/rejected": -0.14013880491256714, "step": 10678 }, { "epoch": 7.385200553250346, "grad_norm": 4.871994972229004, "learning_rate": 1.4526663593053635e-05, "log_odds_chosen": 11.72120189666748, "log_odds_ratio": -1.770121707522776e-05, "logits/chosen": -0.2013327181339264, "logits/rejected": -0.28244835138320923, "logps/chosen": -0.0002974059898406267, "logps/rejected": -3.1454806327819824, "loss": 0.6354, "nll_loss": 0.15885329246520996, "rewards/accuracies": 1.0, "rewards/chosen": -2.974059862026479e-05, "rewards/margins": 0.3145183324813843, "rewards/rejected": -0.3145480751991272, "step": 10679 }, { "epoch": 7.385892116182573, "grad_norm": 6.55807638168335, "learning_rate": 1.4522821576763487e-05, "log_odds_chosen": 10.452030181884766, "log_odds_ratio": -7.187146547948942e-05, "logits/chosen": -0.46629953384399414, "logits/rejected": -0.43954288959503174, "logps/chosen": -0.0008648043731227517, "logps/rejected": -2.166017770767212, "loss": 0.482, "nll_loss": 0.12049096822738647, "rewards/accuracies": 1.0, "rewards/chosen": -8.648043149150908e-05, "rewards/margins": 0.21651530265808105, "rewards/rejected": -0.21660177409648895, "step": 10680 }, { "epoch": 7.3865836791148, "grad_norm": 4.796786308288574, "learning_rate": 1.4518979560473336e-05, "log_odds_chosen": 11.206933975219727, "log_odds_ratio": -3.39964208251331e-05, "logits/chosen": -0.19763581454753876, "logits/rejected": -0.24650007486343384, "logps/chosen": -0.00019862827321048826, "logps/rejected": -2.145627498626709, "loss": 0.4517, "nll_loss": 0.11292488127946854, "rewards/accuracies": 1.0, "rewards/chosen": -1.986282950383611e-05, "rewards/margins": 0.21454286575317383, "rewards/rejected": -0.21456272900104523, "step": 10681 }, { "epoch": 7.387275242047027, "grad_norm": 4.821454048156738, "learning_rate": 1.4515137544183189e-05, "log_odds_chosen": 11.62645435333252, "log_odds_ratio": -3.971250043832697e-05, "logits/chosen": -0.6977535486221313, "logits/rejected": -0.7093822956085205, "logps/chosen": -0.0003523613850120455, "logps/rejected": -2.8389506340026855, "loss": 0.6053, "nll_loss": 0.15132945775985718, "rewards/accuracies": 1.0, "rewards/chosen": -3.523613850120455e-05, "rewards/margins": 0.2838597893714905, "rewards/rejected": -0.2838950455188751, "step": 10682 }, { "epoch": 7.3879668049792535, "grad_norm": 3.829512596130371, "learning_rate": 1.451129552789304e-05, "log_odds_chosen": 11.04928970336914, "log_odds_ratio": -4.361994069768116e-05, "logits/chosen": -0.3144991397857666, "logits/rejected": -0.2946760654449463, "logps/chosen": -0.00020116717496421188, "logps/rejected": -2.3325746059417725, "loss": 0.3145, "nll_loss": 0.07862447202205658, "rewards/accuracies": 1.0, "rewards/chosen": -2.0116716768825427e-05, "rewards/margins": 0.2332373410463333, "rewards/rejected": -0.2332574725151062, "step": 10683 }, { "epoch": 7.38865836791148, "grad_norm": 8.505328178405762, "learning_rate": 1.4507453511602888e-05, "log_odds_chosen": 11.85031509399414, "log_odds_ratio": -1.9721686840057373e-05, "logits/chosen": -0.48058003187179565, "logits/rejected": -0.4414098560810089, "logps/chosen": -0.00022078091569710523, "logps/rejected": -2.5994250774383545, "loss": 0.4388, "nll_loss": 0.10970518738031387, "rewards/accuracies": 1.0, "rewards/chosen": -2.2078091205912642e-05, "rewards/margins": 0.2599204480648041, "rewards/rejected": -0.25994253158569336, "step": 10684 }, { "epoch": 7.389349930843707, "grad_norm": 3.579763174057007, "learning_rate": 1.4503611495312741e-05, "log_odds_chosen": 11.160737991333008, "log_odds_ratio": -4.9370610213372856e-05, "logits/chosen": -0.12265770137310028, "logits/rejected": -0.16684816777706146, "logps/chosen": -0.0004806347715202719, "logps/rejected": -3.001997709274292, "loss": 0.5963, "nll_loss": 0.14908070862293243, "rewards/accuracies": 1.0, "rewards/chosen": -4.806347351404838e-05, "rewards/margins": 0.3001517057418823, "rewards/rejected": -0.30019980669021606, "step": 10685 }, { "epoch": 7.390041493775934, "grad_norm": 9.121960639953613, "learning_rate": 1.4499769479022593e-05, "log_odds_chosen": 10.080194473266602, "log_odds_ratio": -0.00014840658695902675, "logits/chosen": -0.17311474680900574, "logits/rejected": -0.22056277096271515, "logps/chosen": -0.0003925380588043481, "logps/rejected": -2.1007535457611084, "loss": 0.3642, "nll_loss": 0.09103476256132126, "rewards/accuracies": 1.0, "rewards/chosen": -3.925380951841362e-05, "rewards/margins": 0.21003609895706177, "rewards/rejected": -0.21007534861564636, "step": 10686 }, { "epoch": 7.390733056708161, "grad_norm": 7.154950141906738, "learning_rate": 1.4495927462732442e-05, "log_odds_chosen": 10.706562042236328, "log_odds_ratio": -0.0001363321061944589, "logits/chosen": -0.3177839517593384, "logits/rejected": -0.38389331102371216, "logps/chosen": -0.0002176029229303822, "logps/rejected": -2.251680850982666, "loss": 0.683, "nll_loss": 0.1707375943660736, "rewards/accuracies": 1.0, "rewards/chosen": -2.176029192924034e-05, "rewards/margins": 0.2251463383436203, "rewards/rejected": -0.2251681089401245, "step": 10687 }, { "epoch": 7.391424619640388, "grad_norm": 7.543350696563721, "learning_rate": 1.4492085446442293e-05, "log_odds_chosen": 11.313833236694336, "log_odds_ratio": -2.9455553885782138e-05, "logits/chosen": -0.09866035729646683, "logits/rejected": -0.17171455919742584, "logps/chosen": -0.00020820967620238662, "logps/rejected": -2.6313819885253906, "loss": 0.3626, "nll_loss": 0.09065688401460648, "rewards/accuracies": 1.0, "rewards/chosen": -2.0820967620238662e-05, "rewards/margins": 0.26311737298965454, "rewards/rejected": -0.26313820481300354, "step": 10688 }, { "epoch": 7.3921161825726145, "grad_norm": 4.683642387390137, "learning_rate": 1.4488243430152146e-05, "log_odds_chosen": 10.479778289794922, "log_odds_ratio": -5.9483056247700006e-05, "logits/chosen": -0.30255410075187683, "logits/rejected": -0.3816949725151062, "logps/chosen": -0.00015024725871626288, "logps/rejected": -1.8282932043075562, "loss": 0.4385, "nll_loss": 0.10963056236505508, "rewards/accuracies": 1.0, "rewards/chosen": -1.5024726053525228e-05, "rewards/margins": 0.18281430006027222, "rewards/rejected": -0.18282932043075562, "step": 10689 }, { "epoch": 7.392807745504841, "grad_norm": 7.751023769378662, "learning_rate": 1.4484401413861995e-05, "log_odds_chosen": 9.871282577514648, "log_odds_ratio": -0.00014603903400711715, "logits/chosen": -0.2963367700576782, "logits/rejected": -0.2837250828742981, "logps/chosen": -0.00045877366210334003, "logps/rejected": -1.9336518049240112, "loss": 0.4378, "nll_loss": 0.10944164544343948, "rewards/accuracies": 1.0, "rewards/chosen": -4.587736475514248e-05, "rewards/margins": 0.19331932067871094, "rewards/rejected": -0.1933651864528656, "step": 10690 }, { "epoch": 7.393499308437068, "grad_norm": 8.917428970336914, "learning_rate": 1.4480559397571847e-05, "log_odds_chosen": 11.292522430419922, "log_odds_ratio": -5.915127621847205e-05, "logits/chosen": -0.12618744373321533, "logits/rejected": -0.11810366809368134, "logps/chosen": -0.0004943721578456461, "logps/rejected": -3.004287004470825, "loss": 0.7274, "nll_loss": 0.18185493350028992, "rewards/accuracies": 1.0, "rewards/chosen": -4.94372179673519e-05, "rewards/margins": 0.30037927627563477, "rewards/rejected": -0.30042868852615356, "step": 10691 }, { "epoch": 7.394190871369295, "grad_norm": 3.69183349609375, "learning_rate": 1.4476717381281698e-05, "log_odds_chosen": 10.84074592590332, "log_odds_ratio": -0.0003451247466728091, "logits/chosen": -0.23937007784843445, "logits/rejected": -0.26529383659362793, "logps/chosen": -0.0006957940058782697, "logps/rejected": -2.5660195350646973, "loss": 0.4154, "nll_loss": 0.10382203757762909, "rewards/accuracies": 1.0, "rewards/chosen": -6.957939331186935e-05, "rewards/margins": 0.25653237104415894, "rewards/rejected": -0.2566019594669342, "step": 10692 }, { "epoch": 7.394882434301522, "grad_norm": 4.8974432945251465, "learning_rate": 1.4472875364991547e-05, "log_odds_chosen": 9.895940780639648, "log_odds_ratio": -0.0010130507871508598, "logits/chosen": -0.4183223247528076, "logits/rejected": -0.24061670899391174, "logps/chosen": -0.0012401751009747386, "logps/rejected": -2.0234375, "loss": 0.4782, "nll_loss": 0.11944713443517685, "rewards/accuracies": 1.0, "rewards/chosen": -0.000124017518828623, "rewards/margins": 0.20221972465515137, "rewards/rejected": -0.20234374701976776, "step": 10693 }, { "epoch": 7.395573997233749, "grad_norm": 4.895662784576416, "learning_rate": 1.44690333487014e-05, "log_odds_chosen": 11.957036018371582, "log_odds_ratio": -0.00012197183968964964, "logits/chosen": -0.42090246081352234, "logits/rejected": -0.45159658789634705, "logps/chosen": -0.00027564779156818986, "logps/rejected": -3.303597927093506, "loss": 0.4743, "nll_loss": 0.1185615286231041, "rewards/accuracies": 1.0, "rewards/chosen": -2.7564779884414747e-05, "rewards/margins": 0.3303322196006775, "rewards/rejected": -0.3303597867488861, "step": 10694 }, { "epoch": 7.3962655601659755, "grad_norm": 3.9130725860595703, "learning_rate": 1.4465191332411252e-05, "log_odds_chosen": 10.67100715637207, "log_odds_ratio": -8.581003203289583e-05, "logits/chosen": -0.26079773902893066, "logits/rejected": -0.24093040823936462, "logps/chosen": -0.0005644162301905453, "logps/rejected": -2.086552619934082, "loss": 0.5113, "nll_loss": 0.12781992554664612, "rewards/accuracies": 1.0, "rewards/chosen": -5.644162592943758e-05, "rewards/margins": 0.20859882235527039, "rewards/rejected": -0.20865526795387268, "step": 10695 }, { "epoch": 7.396957123098202, "grad_norm": 4.183816909790039, "learning_rate": 1.44613493161211e-05, "log_odds_chosen": 9.546540260314941, "log_odds_ratio": -0.00034563770168460906, "logits/chosen": -0.0491783544421196, "logits/rejected": -0.02445707842707634, "logps/chosen": -0.0010862099006772041, "logps/rejected": -1.8530609607696533, "loss": 0.3779, "nll_loss": 0.09445017576217651, "rewards/accuracies": 1.0, "rewards/chosen": -0.00010862098861252889, "rewards/margins": 0.18519748747348785, "rewards/rejected": -0.1853061020374298, "step": 10696 }, { "epoch": 7.397648686030429, "grad_norm": 4.751798152923584, "learning_rate": 1.4457507299830953e-05, "log_odds_chosen": 10.333335876464844, "log_odds_ratio": -0.00028667665901593864, "logits/chosen": -0.012272864580154419, "logits/rejected": -0.2014240324497223, "logps/chosen": -0.0004716809489764273, "logps/rejected": -1.5080033540725708, "loss": 1.0929, "nll_loss": 0.2731882929801941, "rewards/accuracies": 1.0, "rewards/chosen": -4.716809780802578e-05, "rewards/margins": 0.1507531702518463, "rewards/rejected": -0.15080034732818604, "step": 10697 }, { "epoch": 7.398340248962656, "grad_norm": 4.033143997192383, "learning_rate": 1.4453665283540804e-05, "log_odds_chosen": 10.501607894897461, "log_odds_ratio": -0.0003531145630404353, "logits/chosen": -0.45851123332977295, "logits/rejected": -0.6169005632400513, "logps/chosen": -0.00027281188522465527, "logps/rejected": -1.9819741249084473, "loss": 0.6676, "nll_loss": 0.16687491536140442, "rewards/accuracies": 1.0, "rewards/chosen": -2.728119034145493e-05, "rewards/margins": 0.19817012548446655, "rewards/rejected": -0.1981974095106125, "step": 10698 }, { "epoch": 7.399031811894883, "grad_norm": 3.8497109413146973, "learning_rate": 1.4449823267250653e-05, "log_odds_chosen": 10.944068908691406, "log_odds_ratio": -0.00011575493408599868, "logits/chosen": -0.21114255487918854, "logits/rejected": -0.27162325382232666, "logps/chosen": -0.000330332200974226, "logps/rejected": -1.948697805404663, "loss": 0.4342, "nll_loss": 0.10853491723537445, "rewards/accuracies": 1.0, "rewards/chosen": -3.3033222280209884e-05, "rewards/margins": 0.19483675062656403, "rewards/rejected": -0.1948697865009308, "step": 10699 }, { "epoch": 7.39972337482711, "grad_norm": 11.0430269241333, "learning_rate": 1.4445981250960506e-05, "log_odds_chosen": 12.51168155670166, "log_odds_ratio": -7.633711902599316e-06, "logits/chosen": 0.12339423596858978, "logits/rejected": -0.01906883716583252, "logps/chosen": -0.00014900990936439484, "logps/rejected": -3.635141372680664, "loss": 0.6599, "nll_loss": 0.1649717390537262, "rewards/accuracies": 1.0, "rewards/chosen": -1.4900991118338425e-05, "rewards/margins": 0.3634992241859436, "rewards/rejected": -0.36351412534713745, "step": 10700 }, { "epoch": 7.4004149377593365, "grad_norm": 4.974323272705078, "learning_rate": 1.4442139234670355e-05, "log_odds_chosen": 11.252674102783203, "log_odds_ratio": -4.285808972781524e-05, "logits/chosen": -0.09175758063793182, "logits/rejected": -0.17847055196762085, "logps/chosen": -0.00027339643565937877, "logps/rejected": -2.2855420112609863, "loss": 0.3829, "nll_loss": 0.0957113727927208, "rewards/accuracies": 1.0, "rewards/chosen": -2.733964538492728e-05, "rewards/margins": 0.22852687537670135, "rewards/rejected": -0.22855421900749207, "step": 10701 }, { "epoch": 7.401106500691563, "grad_norm": 6.626379489898682, "learning_rate": 1.4438297218380207e-05, "log_odds_chosen": 10.925520896911621, "log_odds_ratio": -0.00019308443006593734, "logits/chosen": -0.40792328119277954, "logits/rejected": -0.46591705083847046, "logps/chosen": -0.0005892232875339687, "logps/rejected": -2.2404043674468994, "loss": 0.7062, "nll_loss": 0.1765323281288147, "rewards/accuracies": 1.0, "rewards/chosen": -5.892233457416296e-05, "rewards/margins": 0.22398152947425842, "rewards/rejected": -0.2240404486656189, "step": 10702 }, { "epoch": 7.40179806362379, "grad_norm": 14.179370880126953, "learning_rate": 1.4434455202090058e-05, "log_odds_chosen": 11.303972244262695, "log_odds_ratio": -0.00011472676851553842, "logits/chosen": -0.22151263058185577, "logits/rejected": -0.3165542185306549, "logps/chosen": -0.00018160900799557567, "logps/rejected": -2.4688503742218018, "loss": 0.6201, "nll_loss": 0.15501704812049866, "rewards/accuracies": 1.0, "rewards/chosen": -1.816090298234485e-05, "rewards/margins": 0.2468668520450592, "rewards/rejected": -0.2468850165605545, "step": 10703 }, { "epoch": 7.402489626556017, "grad_norm": 6.767401218414307, "learning_rate": 1.4430613185799907e-05, "log_odds_chosen": 12.096673965454102, "log_odds_ratio": -1.661527858232148e-05, "logits/chosen": -0.36304378509521484, "logits/rejected": -0.34841278195381165, "logps/chosen": -0.00022185189300216734, "logps/rejected": -2.969815969467163, "loss": 0.4502, "nll_loss": 0.1125505343079567, "rewards/accuracies": 1.0, "rewards/chosen": -2.2185189664014615e-05, "rewards/margins": 0.29695940017700195, "rewards/rejected": -0.2969816029071808, "step": 10704 }, { "epoch": 7.403181189488244, "grad_norm": 4.77768611907959, "learning_rate": 1.442677116950976e-05, "log_odds_chosen": 10.152460098266602, "log_odds_ratio": -0.0015317605575546622, "logits/chosen": -0.10069093108177185, "logits/rejected": -0.29805952310562134, "logps/chosen": -0.000922149105463177, "logps/rejected": -1.9319921731948853, "loss": 0.4613, "nll_loss": 0.11516660451889038, "rewards/accuracies": 1.0, "rewards/chosen": -9.221491200150922e-05, "rewards/margins": 0.193107008934021, "rewards/rejected": -0.19319921731948853, "step": 10705 }, { "epoch": 7.403872752420471, "grad_norm": 10.977682113647461, "learning_rate": 1.4422929153219612e-05, "log_odds_chosen": 12.02888011932373, "log_odds_ratio": -1.1932907909795176e-05, "logits/chosen": -0.28198131918907166, "logits/rejected": -0.30092811584472656, "logps/chosen": -0.00018142201588489115, "logps/rejected": -3.0927035808563232, "loss": 1.0384, "nll_loss": 0.25958961248397827, "rewards/accuracies": 1.0, "rewards/chosen": -1.8142201952286996e-05, "rewards/margins": 0.30925223231315613, "rewards/rejected": -0.30927035212516785, "step": 10706 }, { "epoch": 7.404564315352697, "grad_norm": 5.967758655548096, "learning_rate": 1.441908713692946e-05, "log_odds_chosen": 10.04418659210205, "log_odds_ratio": -0.00025337719125673175, "logits/chosen": -0.2274404764175415, "logits/rejected": -0.20164667069911957, "logps/chosen": -0.007281972095370293, "logps/rejected": -2.104808807373047, "loss": 0.4879, "nll_loss": 0.12194767594337463, "rewards/accuracies": 1.0, "rewards/chosen": -0.0007281972211785614, "rewards/margins": 0.20975270867347717, "rewards/rejected": -0.21048089861869812, "step": 10707 }, { "epoch": 7.405255878284924, "grad_norm": 4.674463272094727, "learning_rate": 1.4415245120639312e-05, "log_odds_chosen": 10.830794334411621, "log_odds_ratio": -0.00020141026470810175, "logits/chosen": -0.346174031496048, "logits/rejected": -0.36087295413017273, "logps/chosen": -0.0022484012879431248, "logps/rejected": -3.0499613285064697, "loss": 0.8594, "nll_loss": 0.21483436226844788, "rewards/accuracies": 1.0, "rewards/chosen": -0.00022484012879431248, "rewards/margins": 0.3047713041305542, "rewards/rejected": -0.304996132850647, "step": 10708 }, { "epoch": 7.405947441217151, "grad_norm": 8.939460754394531, "learning_rate": 1.4411403104349164e-05, "log_odds_chosen": 10.59695816040039, "log_odds_ratio": -5.151807999936864e-05, "logits/chosen": 0.13660788536071777, "logits/rejected": 0.08717255294322968, "logps/chosen": -0.0004426073282957077, "logps/rejected": -2.0530447959899902, "loss": 0.4217, "nll_loss": 0.10542188584804535, "rewards/accuracies": 1.0, "rewards/chosen": -4.42607379227411e-05, "rewards/margins": 0.20526021718978882, "rewards/rejected": -0.20530450344085693, "step": 10709 }, { "epoch": 7.406639004149378, "grad_norm": 5.197713851928711, "learning_rate": 1.4407561088059013e-05, "log_odds_chosen": 11.313644409179688, "log_odds_ratio": -2.7155860152561218e-05, "logits/chosen": -0.37392541766166687, "logits/rejected": -0.37939178943634033, "logps/chosen": -0.0004172790504526347, "logps/rejected": -3.008484363555908, "loss": 0.5289, "nll_loss": 0.132216677069664, "rewards/accuracies": 1.0, "rewards/chosen": -4.1727907955646515e-05, "rewards/margins": 0.3008067309856415, "rewards/rejected": -0.30084845423698425, "step": 10710 }, { "epoch": 7.407330567081605, "grad_norm": 5.047555446624756, "learning_rate": 1.4403719071768865e-05, "log_odds_chosen": 10.623932838439941, "log_odds_ratio": -0.0003137765161227435, "logits/chosen": -0.21119244396686554, "logits/rejected": -0.2765723466873169, "logps/chosen": -0.00016566917474847287, "logps/rejected": -2.1351804733276367, "loss": 0.6784, "nll_loss": 0.16956068575382233, "rewards/accuracies": 1.0, "rewards/chosen": -1.6566917111049406e-05, "rewards/margins": 0.2135014683008194, "rewards/rejected": -0.21351803839206696, "step": 10711 }, { "epoch": 7.408022130013832, "grad_norm": 14.978245735168457, "learning_rate": 1.4399877055478716e-05, "log_odds_chosen": 11.25374698638916, "log_odds_ratio": -5.259553654468618e-05, "logits/chosen": -0.16523447632789612, "logits/rejected": -0.2930530905723572, "logps/chosen": -0.00020616357505787164, "logps/rejected": -2.6449317932128906, "loss": 0.447, "nll_loss": 0.11174537241458893, "rewards/accuracies": 1.0, "rewards/chosen": -2.0616356778191403e-05, "rewards/margins": 0.2644725441932678, "rewards/rejected": -0.2644931674003601, "step": 10712 }, { "epoch": 7.408713692946058, "grad_norm": 8.17994213104248, "learning_rate": 1.4396035039188565e-05, "log_odds_chosen": 10.682540893554688, "log_odds_ratio": -0.0001834592258092016, "logits/chosen": -0.19058746099472046, "logits/rejected": -0.3210287392139435, "logps/chosen": -0.0003053398395422846, "logps/rejected": -2.055088996887207, "loss": 0.5244, "nll_loss": 0.13107192516326904, "rewards/accuracies": 1.0, "rewards/chosen": -3.0533985409419984e-05, "rewards/margins": 0.20547837018966675, "rewards/rejected": -0.20550891757011414, "step": 10713 }, { "epoch": 7.409405255878285, "grad_norm": 5.7820353507995605, "learning_rate": 1.4392193022898418e-05, "log_odds_chosen": 11.297080993652344, "log_odds_ratio": -0.00011493961210362613, "logits/chosen": -0.6253798007965088, "logits/rejected": -0.6458042860031128, "logps/chosen": -0.00022130817524157465, "logps/rejected": -2.653463840484619, "loss": 0.3948, "nll_loss": 0.0986841544508934, "rewards/accuracies": 1.0, "rewards/chosen": -2.2130816432763822e-05, "rewards/margins": 0.26532429456710815, "rewards/rejected": -0.2653464078903198, "step": 10714 }, { "epoch": 7.410096818810512, "grad_norm": 4.849417209625244, "learning_rate": 1.438835100660827e-05, "log_odds_chosen": 11.000490188598633, "log_odds_ratio": -2.988562118844129e-05, "logits/chosen": -0.1645212173461914, "logits/rejected": -0.20662593841552734, "logps/chosen": -0.00014986046880949289, "logps/rejected": -2.0731770992279053, "loss": 0.3641, "nll_loss": 0.09103177487850189, "rewards/accuracies": 1.0, "rewards/chosen": -1.4986046153353527e-05, "rewards/margins": 0.20730271935462952, "rewards/rejected": -0.20731770992279053, "step": 10715 }, { "epoch": 7.410788381742739, "grad_norm": 8.789484024047852, "learning_rate": 1.438450899031812e-05, "log_odds_chosen": 9.726442337036133, "log_odds_ratio": -0.21301937103271484, "logits/chosen": -0.6469697952270508, "logits/rejected": -0.6698107123374939, "logps/chosen": -0.02360660955309868, "logps/rejected": -1.9374034404754639, "loss": 0.451, "nll_loss": 0.09143715351819992, "rewards/accuracies": 0.875, "rewards/chosen": -0.0023606610484421253, "rewards/margins": 0.19137969613075256, "rewards/rejected": -0.1937403529882431, "step": 10716 }, { "epoch": 7.411479944674966, "grad_norm": 3.8728785514831543, "learning_rate": 1.438066697402797e-05, "log_odds_chosen": 9.455845832824707, "log_odds_ratio": -0.001326136291027069, "logits/chosen": -0.06192685291171074, "logits/rejected": -0.1491842418909073, "logps/chosen": -0.0022150897420942783, "logps/rejected": -1.7510604858398438, "loss": 0.4582, "nll_loss": 0.11441729962825775, "rewards/accuracies": 1.0, "rewards/chosen": -0.0002215089916717261, "rewards/margins": 0.17488452792167664, "rewards/rejected": -0.17510606348514557, "step": 10717 }, { "epoch": 7.412171507607193, "grad_norm": 19.989530563354492, "learning_rate": 1.4376824957737822e-05, "log_odds_chosen": 10.227559089660645, "log_odds_ratio": -0.0007945247925817966, "logits/chosen": -0.47161751985549927, "logits/rejected": -0.5490267276763916, "logps/chosen": -0.0002686060615815222, "logps/rejected": -2.269040584564209, "loss": 0.7494, "nll_loss": 0.18727454543113708, "rewards/accuracies": 1.0, "rewards/chosen": -2.6860607249545865e-05, "rewards/margins": 0.22687721252441406, "rewards/rejected": -0.22690407931804657, "step": 10718 }, { "epoch": 7.412863070539419, "grad_norm": 5.017844200134277, "learning_rate": 1.4372982941447671e-05, "log_odds_chosen": 10.590387344360352, "log_odds_ratio": -6.486523489002138e-05, "logits/chosen": -0.4550924301147461, "logits/rejected": -0.4405045509338379, "logps/chosen": -0.0001809865643735975, "logps/rejected": -1.7180522680282593, "loss": 0.5406, "nll_loss": 0.135142520070076, "rewards/accuracies": 1.0, "rewards/chosen": -1.809865716495551e-05, "rewards/margins": 0.1717871129512787, "rewards/rejected": -0.1718052327632904, "step": 10719 }, { "epoch": 7.413554633471646, "grad_norm": 5.447579860687256, "learning_rate": 1.4369140925157524e-05, "log_odds_chosen": 10.651297569274902, "log_odds_ratio": -8.183491445379332e-05, "logits/chosen": -0.21291130781173706, "logits/rejected": -0.34738385677337646, "logps/chosen": -0.00040813308442011476, "logps/rejected": -2.471925735473633, "loss": 0.5637, "nll_loss": 0.14090707898139954, "rewards/accuracies": 1.0, "rewards/chosen": -4.0813312807586044e-05, "rewards/margins": 0.24715176224708557, "rewards/rejected": -0.2471925914287567, "step": 10720 }, { "epoch": 7.414246196403873, "grad_norm": 7.02000617980957, "learning_rate": 1.4365298908867375e-05, "log_odds_chosen": 10.658409118652344, "log_odds_ratio": -6.082295294618234e-05, "logits/chosen": 0.12181103974580765, "logits/rejected": 0.011016082018613815, "logps/chosen": -0.0004951843875460327, "logps/rejected": -1.7555874586105347, "loss": 0.5197, "nll_loss": 0.1299232691526413, "rewards/accuracies": 1.0, "rewards/chosen": -4.951843948219903e-05, "rewards/margins": 0.1755092293024063, "rewards/rejected": -0.17555874586105347, "step": 10721 }, { "epoch": 7.4149377593361, "grad_norm": 3.4330828189849854, "learning_rate": 1.4361456892577224e-05, "log_odds_chosen": 11.125907897949219, "log_odds_ratio": -4.971097951056436e-05, "logits/chosen": -0.6454315185546875, "logits/rejected": -0.6421571969985962, "logps/chosen": -0.0001896456378744915, "logps/rejected": -2.39558744430542, "loss": 0.677, "nll_loss": 0.1692388504743576, "rewards/accuracies": 1.0, "rewards/chosen": -1.8964565242640674e-05, "rewards/margins": 0.23953978717327118, "rewards/rejected": -0.23955872654914856, "step": 10722 }, { "epoch": 7.415629322268327, "grad_norm": 4.539621353149414, "learning_rate": 1.4357614876287076e-05, "log_odds_chosen": 11.152408599853516, "log_odds_ratio": -4.087133129360154e-05, "logits/chosen": -0.2305634319782257, "logits/rejected": -0.3199521005153656, "logps/chosen": -0.00014302245108410716, "logps/rejected": -2.1154966354370117, "loss": 0.5057, "nll_loss": 0.12642784416675568, "rewards/accuracies": 1.0, "rewards/chosen": -1.4302244380814955e-05, "rewards/margins": 0.21153536438941956, "rewards/rejected": -0.21154966950416565, "step": 10723 }, { "epoch": 7.4163208852005535, "grad_norm": 3.9620726108551025, "learning_rate": 1.4353772859996929e-05, "log_odds_chosen": 11.445850372314453, "log_odds_ratio": -0.000533333863131702, "logits/chosen": -0.19320987164974213, "logits/rejected": -0.2019619345664978, "logps/chosen": -0.0006852270453236997, "logps/rejected": -2.631791591644287, "loss": 0.4167, "nll_loss": 0.10412156581878662, "rewards/accuracies": 1.0, "rewards/chosen": -6.852269871160388e-05, "rewards/margins": 0.2631106674671173, "rewards/rejected": -0.2631791830062866, "step": 10724 }, { "epoch": 7.41701244813278, "grad_norm": 4.122745990753174, "learning_rate": 1.4349930843706778e-05, "log_odds_chosen": 9.989114761352539, "log_odds_ratio": -0.00020754087017849088, "logits/chosen": -0.23228757083415985, "logits/rejected": -0.26432672142982483, "logps/chosen": -0.004630462732166052, "logps/rejected": -2.2054288387298584, "loss": 0.5699, "nll_loss": 0.14244690537452698, "rewards/accuracies": 1.0, "rewards/chosen": -0.0004630462499335408, "rewards/margins": 0.22007985413074493, "rewards/rejected": -0.22054289281368256, "step": 10725 }, { "epoch": 7.417704011065007, "grad_norm": 4.885697364807129, "learning_rate": 1.4346088827416628e-05, "log_odds_chosen": 10.486754417419434, "log_odds_ratio": -7.210951298475266e-05, "logits/chosen": -0.21432363986968994, "logits/rejected": -0.28481611609458923, "logps/chosen": -0.0004409044631756842, "logps/rejected": -2.3748602867126465, "loss": 0.4142, "nll_loss": 0.10355227440595627, "rewards/accuracies": 1.0, "rewards/chosen": -4.409044413478114e-05, "rewards/margins": 0.23744192719459534, "rewards/rejected": -0.23748603463172913, "step": 10726 }, { "epoch": 7.418395573997234, "grad_norm": 5.20670223236084, "learning_rate": 1.434224681112648e-05, "log_odds_chosen": 10.297154426574707, "log_odds_ratio": -0.000343418592819944, "logits/chosen": -0.43424534797668457, "logits/rejected": -0.27497410774230957, "logps/chosen": -0.00046478534932248294, "logps/rejected": -1.8652817010879517, "loss": 0.3707, "nll_loss": 0.09263065457344055, "rewards/accuracies": 1.0, "rewards/chosen": -4.647853711503558e-05, "rewards/margins": 0.18648171424865723, "rewards/rejected": -0.18652817606925964, "step": 10727 }, { "epoch": 7.419087136929461, "grad_norm": 7.701286792755127, "learning_rate": 1.433840479483633e-05, "log_odds_chosen": 10.816944122314453, "log_odds_ratio": -0.00010325042239855975, "logits/chosen": -0.21323725581169128, "logits/rejected": -0.2841075360774994, "logps/chosen": -0.00025166134582832456, "logps/rejected": -2.0154638290405273, "loss": 0.543, "nll_loss": 0.13573779165744781, "rewards/accuracies": 1.0, "rewards/chosen": -2.5166134946630336e-05, "rewards/margins": 0.20152121782302856, "rewards/rejected": -0.20154638588428497, "step": 10728 }, { "epoch": 7.419778699861688, "grad_norm": 3.741018056869507, "learning_rate": 1.4334562778546182e-05, "log_odds_chosen": 9.553108215332031, "log_odds_ratio": -0.00026011423324234784, "logits/chosen": -0.8217746019363403, "logits/rejected": -0.849798321723938, "logps/chosen": -0.0003743913257494569, "logps/rejected": -1.1073468923568726, "loss": 0.358, "nll_loss": 0.08946304023265839, "rewards/accuracies": 1.0, "rewards/chosen": -3.7439131119754165e-05, "rewards/margins": 0.11069725453853607, "rewards/rejected": -0.11073470115661621, "step": 10729 }, { "epoch": 7.4204702627939145, "grad_norm": 9.998017311096191, "learning_rate": 1.4330720762256033e-05, "log_odds_chosen": 9.961030006408691, "log_odds_ratio": -0.0007552761235274374, "logits/chosen": -0.31879276037216187, "logits/rejected": -0.3525419235229492, "logps/chosen": -0.0007340236334130168, "logps/rejected": -1.8016279935836792, "loss": 0.5934, "nll_loss": 0.1482788324356079, "rewards/accuracies": 1.0, "rewards/chosen": -7.340236334130168e-05, "rewards/margins": 0.18008939921855927, "rewards/rejected": -0.18016280233860016, "step": 10730 }, { "epoch": 7.421161825726141, "grad_norm": 4.061387062072754, "learning_rate": 1.4326878745965882e-05, "log_odds_chosen": 11.667825698852539, "log_odds_ratio": -4.544300463749096e-05, "logits/chosen": -0.3991694152355194, "logits/rejected": -0.4634754955768585, "logps/chosen": -0.00041911107837222517, "logps/rejected": -3.1630825996398926, "loss": 0.5193, "nll_loss": 0.12982602417469025, "rewards/accuracies": 1.0, "rewards/chosen": -4.191110929241404e-05, "rewards/margins": 0.31626635789871216, "rewards/rejected": -0.31630825996398926, "step": 10731 }, { "epoch": 7.421853388658368, "grad_norm": 3.5463762283325195, "learning_rate": 1.4323036729675735e-05, "log_odds_chosen": 10.553950309753418, "log_odds_ratio": -0.0005513601936399937, "logits/chosen": -0.9708388447761536, "logits/rejected": -0.9920282363891602, "logps/chosen": -0.00023983963183127344, "logps/rejected": -2.1186563968658447, "loss": 0.5151, "nll_loss": 0.12870892882347107, "rewards/accuracies": 1.0, "rewards/chosen": -2.3983962819329463e-05, "rewards/margins": 0.2118416726589203, "rewards/rejected": -0.21186566352844238, "step": 10732 }, { "epoch": 7.422544951590595, "grad_norm": 4.914392948150635, "learning_rate": 1.4319194713385587e-05, "log_odds_chosen": 10.95601749420166, "log_odds_ratio": -6.64880353724584e-05, "logits/chosen": -0.5185230374336243, "logits/rejected": -0.5552864670753479, "logps/chosen": -0.0002651938411872834, "logps/rejected": -2.116021156311035, "loss": 0.487, "nll_loss": 0.12174984812736511, "rewards/accuracies": 1.0, "rewards/chosen": -2.65193848463241e-05, "rewards/margins": 0.21157559752464294, "rewards/rejected": -0.2116020917892456, "step": 10733 }, { "epoch": 7.423236514522822, "grad_norm": 8.798486709594727, "learning_rate": 1.4315352697095436e-05, "log_odds_chosen": 11.923349380493164, "log_odds_ratio": -4.4096173951402307e-05, "logits/chosen": -0.5265907049179077, "logits/rejected": -0.5732027888298035, "logps/chosen": -0.0003586837265174836, "logps/rejected": -3.3685059547424316, "loss": 0.4845, "nll_loss": 0.12111400067806244, "rewards/accuracies": 1.0, "rewards/chosen": -3.58683719241526e-05, "rewards/margins": 0.3368147313594818, "rewards/rejected": -0.3368505835533142, "step": 10734 }, { "epoch": 7.423928077455049, "grad_norm": 4.281439781188965, "learning_rate": 1.4311510680805287e-05, "log_odds_chosen": 10.40571117401123, "log_odds_ratio": -0.00017208530334755778, "logits/chosen": -0.22984299063682556, "logits/rejected": -0.34365904331207275, "logps/chosen": -0.0007090799626894295, "logps/rejected": -1.9753313064575195, "loss": 0.4389, "nll_loss": 0.10970845073461533, "rewards/accuracies": 1.0, "rewards/chosen": -7.090799772413447e-05, "rewards/margins": 0.19746221601963043, "rewards/rejected": -0.19753314554691315, "step": 10735 }, { "epoch": 7.4246196403872755, "grad_norm": 4.984375953674316, "learning_rate": 1.430766866451514e-05, "log_odds_chosen": 10.356589317321777, "log_odds_ratio": -0.00015899700520094484, "logits/chosen": -0.630196213722229, "logits/rejected": -0.5708923935890198, "logps/chosen": -0.0002872415934689343, "logps/rejected": -2.1628894805908203, "loss": 0.4995, "nll_loss": 0.1248672604560852, "rewards/accuracies": 1.0, "rewards/chosen": -2.8724161893478595e-05, "rewards/margins": 0.21626022458076477, "rewards/rejected": -0.2162889540195465, "step": 10736 }, { "epoch": 7.425311203319502, "grad_norm": 4.451498508453369, "learning_rate": 1.4303826648224988e-05, "log_odds_chosen": 10.957164764404297, "log_odds_ratio": -2.3646662157261744e-05, "logits/chosen": -0.6510589122772217, "logits/rejected": -0.6703109741210938, "logps/chosen": -6.0195921832928434e-05, "logps/rejected": -1.4261058568954468, "loss": 0.3418, "nll_loss": 0.08545950800180435, "rewards/accuracies": 1.0, "rewards/chosen": -6.019592547090724e-06, "rewards/margins": 0.14260455965995789, "rewards/rejected": -0.1426105797290802, "step": 10737 }, { "epoch": 7.426002766251729, "grad_norm": 5.354230880737305, "learning_rate": 1.429998463193484e-05, "log_odds_chosen": 10.506896018981934, "log_odds_ratio": -0.0001476502511650324, "logits/chosen": -0.5877447128295898, "logits/rejected": -0.6587046384811401, "logps/chosen": -0.00017094443319365382, "logps/rejected": -1.8655098676681519, "loss": 0.6879, "nll_loss": 0.17195531725883484, "rewards/accuracies": 1.0, "rewards/chosen": -1.7094444046961144e-05, "rewards/margins": 0.18653389811515808, "rewards/rejected": -0.18655097484588623, "step": 10738 }, { "epoch": 7.426694329183956, "grad_norm": 4.813597679138184, "learning_rate": 1.4296142615644692e-05, "log_odds_chosen": 11.009190559387207, "log_odds_ratio": -4.4412015995476395e-05, "logits/chosen": -0.5427803993225098, "logits/rejected": -0.5382024049758911, "logps/chosen": -0.00028435117565095425, "logps/rejected": -2.667729377746582, "loss": 0.4004, "nll_loss": 0.10008691251277924, "rewards/accuracies": 1.0, "rewards/chosen": -2.843511538230814e-05, "rewards/margins": 0.2667444944381714, "rewards/rejected": -0.26677295565605164, "step": 10739 }, { "epoch": 7.427385892116183, "grad_norm": 4.3468403816223145, "learning_rate": 1.429230059935454e-05, "log_odds_chosen": 12.126304626464844, "log_odds_ratio": -1.4597450899600517e-05, "logits/chosen": -0.11912352591753006, "logits/rejected": -0.07812384516000748, "logps/chosen": -8.256173168774694e-05, "logps/rejected": -2.619260311126709, "loss": 0.3048, "nll_loss": 0.07620352506637573, "rewards/accuracies": 1.0, "rewards/chosen": -8.256173714471515e-06, "rewards/margins": 0.26191776990890503, "rewards/rejected": -0.2619260251522064, "step": 10740 }, { "epoch": 7.42807745504841, "grad_norm": 5.906902313232422, "learning_rate": 1.4288458583064393e-05, "log_odds_chosen": 10.210342407226562, "log_odds_ratio": -0.00011116905079688877, "logits/chosen": -0.35095638036727905, "logits/rejected": -0.18661539256572723, "logps/chosen": -0.0003797630197368562, "logps/rejected": -1.7496932744979858, "loss": 0.5041, "nll_loss": 0.12601202726364136, "rewards/accuracies": 1.0, "rewards/chosen": -3.7976304156472906e-05, "rewards/margins": 0.1749313473701477, "rewards/rejected": -0.17496933043003082, "step": 10741 }, { "epoch": 7.4287690179806365, "grad_norm": 5.483175754547119, "learning_rate": 1.4284616566774245e-05, "log_odds_chosen": 10.167135238647461, "log_odds_ratio": -0.00012993926065973938, "logits/chosen": -0.37157195806503296, "logits/rejected": -0.44876325130462646, "logps/chosen": -0.0008369782008230686, "logps/rejected": -2.453321933746338, "loss": 0.3618, "nll_loss": 0.09044036269187927, "rewards/accuracies": 1.0, "rewards/chosen": -8.369782153749838e-05, "rewards/margins": 0.24524849653244019, "rewards/rejected": -0.24533218145370483, "step": 10742 }, { "epoch": 7.429460580912863, "grad_norm": 5.415278434753418, "learning_rate": 1.4280774550484095e-05, "log_odds_chosen": 9.506936073303223, "log_odds_ratio": -0.0006739329546689987, "logits/chosen": -0.33719977736473083, "logits/rejected": -0.34382933378219604, "logps/chosen": -0.0023659905418753624, "logps/rejected": -2.140268325805664, "loss": 0.5052, "nll_loss": 0.12623867392539978, "rewards/accuracies": 1.0, "rewards/chosen": -0.0002365990512771532, "rewards/margins": 0.21379022300243378, "rewards/rejected": -0.21402683854103088, "step": 10743 }, { "epoch": 7.43015214384509, "grad_norm": 4.979397773742676, "learning_rate": 1.4276932534193945e-05, "log_odds_chosen": 11.472922325134277, "log_odds_ratio": -3.206637848052196e-05, "logits/chosen": 0.04949849843978882, "logits/rejected": -0.01601281762123108, "logps/chosen": -0.0002056766825262457, "logps/rejected": -2.569906234741211, "loss": 0.8592, "nll_loss": 0.2147943079471588, "rewards/accuracies": 1.0, "rewards/chosen": -2.0567669707816094e-05, "rewards/margins": 0.25697004795074463, "rewards/rejected": -0.25699061155319214, "step": 10744 }, { "epoch": 7.430843706777317, "grad_norm": 5.264523029327393, "learning_rate": 1.4273090517903798e-05, "log_odds_chosen": 11.385479927062988, "log_odds_ratio": -7.055592141114175e-05, "logits/chosen": -0.5030226111412048, "logits/rejected": -0.5155960321426392, "logps/chosen": -0.00022816695854999125, "logps/rejected": -2.8762707710266113, "loss": 0.5931, "nll_loss": 0.14825564622879028, "rewards/accuracies": 1.0, "rewards/chosen": -2.2816697310190648e-05, "rewards/margins": 0.28760427236557007, "rewards/rejected": -0.28762707114219666, "step": 10745 }, { "epoch": 7.431535269709544, "grad_norm": 8.076372146606445, "learning_rate": 1.4269248501613647e-05, "log_odds_chosen": 10.986825942993164, "log_odds_ratio": -5.322953438735567e-05, "logits/chosen": -0.3730906844139099, "logits/rejected": -0.4914498031139374, "logps/chosen": -0.00014477164950221777, "logps/rejected": -1.9321510791778564, "loss": 0.4905, "nll_loss": 0.12262947112321854, "rewards/accuracies": 1.0, "rewards/chosen": -1.4477164768322837e-05, "rewards/margins": 0.19320064783096313, "rewards/rejected": -0.19321510195732117, "step": 10746 }, { "epoch": 7.432226832641771, "grad_norm": 3.97635817527771, "learning_rate": 1.42654064853235e-05, "log_odds_chosen": 10.292720794677734, "log_odds_ratio": -0.00020925466378685087, "logits/chosen": -0.19483008980751038, "logits/rejected": -0.10350719839334488, "logps/chosen": -0.00033111387165263295, "logps/rejected": -2.2941386699676514, "loss": 0.423, "nll_loss": 0.10573115944862366, "rewards/accuracies": 1.0, "rewards/chosen": -3.3111387892859057e-05, "rewards/margins": 0.2293807864189148, "rewards/rejected": -0.22941389679908752, "step": 10747 }, { "epoch": 7.4329183955739975, "grad_norm": 14.447975158691406, "learning_rate": 1.426156446903335e-05, "log_odds_chosen": 12.107383728027344, "log_odds_ratio": -4.643085048883222e-05, "logits/chosen": -0.14821778237819672, "logits/rejected": -0.15492330491542816, "logps/chosen": -0.00020316088921390474, "logps/rejected": -3.0534801483154297, "loss": 0.6349, "nll_loss": 0.15871471166610718, "rewards/accuracies": 1.0, "rewards/chosen": -2.0316088921390474e-05, "rewards/margins": 0.30532771348953247, "rewards/rejected": -0.3053480088710785, "step": 10748 }, { "epoch": 7.433609958506224, "grad_norm": 4.383937358856201, "learning_rate": 1.4257722452743199e-05, "log_odds_chosen": 11.975631713867188, "log_odds_ratio": -1.2384831279632635e-05, "logits/chosen": -0.04449837654829025, "logits/rejected": -0.04871883988380432, "logps/chosen": -0.00011500051186885685, "logps/rejected": -2.476550579071045, "loss": 0.5421, "nll_loss": 0.13553060591220856, "rewards/accuracies": 1.0, "rewards/chosen": -1.1500053005875088e-05, "rewards/margins": 0.2476436048746109, "rewards/rejected": -0.24765509366989136, "step": 10749 }, { "epoch": 7.434301521438451, "grad_norm": 8.04423713684082, "learning_rate": 1.4253880436453051e-05, "log_odds_chosen": 12.123249053955078, "log_odds_ratio": -0.0006074186530895531, "logits/chosen": -0.17462217807769775, "logits/rejected": -0.218583881855011, "logps/chosen": -0.0002956288226414472, "logps/rejected": -3.7079389095306396, "loss": 0.8922, "nll_loss": 0.2229778915643692, "rewards/accuracies": 1.0, "rewards/chosen": -2.956288335553836e-05, "rewards/margins": 0.3707643151283264, "rewards/rejected": -0.3707938492298126, "step": 10750 }, { "epoch": 7.434993084370678, "grad_norm": 4.193935394287109, "learning_rate": 1.4250038420162904e-05, "log_odds_chosen": 11.465576171875, "log_odds_ratio": -1.6591426174272783e-05, "logits/chosen": -0.18547575175762177, "logits/rejected": -0.2769421339035034, "logps/chosen": -8.868890290614218e-05, "logps/rejected": -2.083763599395752, "loss": 0.5466, "nll_loss": 0.13663995265960693, "rewards/accuracies": 1.0, "rewards/chosen": -8.868890290614218e-06, "rewards/margins": 0.2083674967288971, "rewards/rejected": -0.20837636291980743, "step": 10751 }, { "epoch": 7.435684647302905, "grad_norm": 5.5280351638793945, "learning_rate": 1.4246196403872753e-05, "log_odds_chosen": 10.965143203735352, "log_odds_ratio": -0.00019943565712310374, "logits/chosen": -0.5253455638885498, "logits/rejected": -0.5380521416664124, "logps/chosen": -0.00028030495741404593, "logps/rejected": -2.5139219760894775, "loss": 0.2926, "nll_loss": 0.07312865555286407, "rewards/accuracies": 1.0, "rewards/chosen": -2.8030497560393997e-05, "rewards/margins": 0.25136417150497437, "rewards/rejected": -0.2513922154903412, "step": 10752 }, { "epoch": 7.436376210235132, "grad_norm": 10.10204792022705, "learning_rate": 1.4242354387582604e-05, "log_odds_chosen": 10.661382675170898, "log_odds_ratio": -5.7524346630088985e-05, "logits/chosen": -0.3062681555747986, "logits/rejected": -0.31217724084854126, "logps/chosen": -0.0003130334662273526, "logps/rejected": -2.2218077182769775, "loss": 0.5308, "nll_loss": 0.13269490003585815, "rewards/accuracies": 1.0, "rewards/chosen": -3.1303348805522546e-05, "rewards/margins": 0.22214949131011963, "rewards/rejected": -0.2221807837486267, "step": 10753 }, { "epoch": 7.4370677731673585, "grad_norm": 6.339082717895508, "learning_rate": 1.4238512371292456e-05, "log_odds_chosen": 10.925583839416504, "log_odds_ratio": -0.00032584331347607076, "logits/chosen": -0.11192497611045837, "logits/rejected": -0.17509740591049194, "logps/chosen": -0.00034168068668805063, "logps/rejected": -2.2192599773406982, "loss": 0.6147, "nll_loss": 0.1536414623260498, "rewards/accuracies": 1.0, "rewards/chosen": -3.41680679412093e-05, "rewards/margins": 0.22189182043075562, "rewards/rejected": -0.2219260036945343, "step": 10754 }, { "epoch": 7.437759336099585, "grad_norm": 6.157551288604736, "learning_rate": 1.4234670355002305e-05, "log_odds_chosen": 10.910655975341797, "log_odds_ratio": -5.358006092137657e-05, "logits/chosen": -0.39808034896850586, "logits/rejected": -0.46443721652030945, "logps/chosen": -0.000200995389604941, "logps/rejected": -2.153860569000244, "loss": 0.4001, "nll_loss": 0.10002223402261734, "rewards/accuracies": 1.0, "rewards/chosen": -2.0099540051887743e-05, "rewards/margins": 0.21536597609519958, "rewards/rejected": -0.21538607776165009, "step": 10755 }, { "epoch": 7.438450899031812, "grad_norm": 4.591843128204346, "learning_rate": 1.4230828338712158e-05, "log_odds_chosen": 10.28937816619873, "log_odds_ratio": -5.2966952353017405e-05, "logits/chosen": -0.49192312359809875, "logits/rejected": -0.5401813983917236, "logps/chosen": -0.00017644742911215872, "logps/rejected": -1.429124116897583, "loss": 0.4016, "nll_loss": 0.10039389133453369, "rewards/accuracies": 1.0, "rewards/chosen": -1.764474291121587e-05, "rewards/margins": 0.14289477467536926, "rewards/rejected": -0.14291241765022278, "step": 10756 }, { "epoch": 7.439142461964039, "grad_norm": 6.4378180503845215, "learning_rate": 1.4226986322422008e-05, "log_odds_chosen": 9.888177871704102, "log_odds_ratio": -0.0002807261480484158, "logits/chosen": -0.4602343440055847, "logits/rejected": -0.5490994453430176, "logps/chosen": -0.0010474890004843473, "logps/rejected": -1.6598628759384155, "loss": 0.4625, "nll_loss": 0.11559551954269409, "rewards/accuracies": 1.0, "rewards/chosen": -0.00010474889859324321, "rewards/margins": 0.16588152945041656, "rewards/rejected": -0.16598628461360931, "step": 10757 }, { "epoch": 7.439834024896266, "grad_norm": 7.4068284034729, "learning_rate": 1.4223144306131857e-05, "log_odds_chosen": 10.624796867370605, "log_odds_ratio": -5.686835356755182e-05, "logits/chosen": -0.44092679023742676, "logits/rejected": -0.5704661011695862, "logps/chosen": -0.00023139704717323184, "logps/rejected": -2.2564005851745605, "loss": 0.7921, "nll_loss": 0.19801200926303864, "rewards/accuracies": 1.0, "rewards/chosen": -2.3139706172514707e-05, "rewards/margins": 0.225616917014122, "rewards/rejected": -0.22564005851745605, "step": 10758 }, { "epoch": 7.440525587828493, "grad_norm": 5.997343063354492, "learning_rate": 1.421930228984171e-05, "log_odds_chosen": 11.066978454589844, "log_odds_ratio": -4.089455251232721e-05, "logits/chosen": -0.14664515852928162, "logits/rejected": -0.24715624749660492, "logps/chosen": -0.0006729392916895449, "logps/rejected": -2.328310251235962, "loss": 0.5871, "nll_loss": 0.1467694789171219, "rewards/accuracies": 1.0, "rewards/chosen": -6.729392771376297e-05, "rewards/margins": 0.23276372253894806, "rewards/rejected": -0.23283101618289948, "step": 10759 }, { "epoch": 7.441217150760719, "grad_norm": 5.868139743804932, "learning_rate": 1.4215460273551562e-05, "log_odds_chosen": 11.640571594238281, "log_odds_ratio": -2.4290455257869326e-05, "logits/chosen": -0.3725976049900055, "logits/rejected": -0.4877762794494629, "logps/chosen": -0.00015499522851314396, "logps/rejected": -2.7272706031799316, "loss": 0.715, "nll_loss": 0.17873850464820862, "rewards/accuracies": 1.0, "rewards/chosen": -1.5499521396122873e-05, "rewards/margins": 0.2727115750312805, "rewards/rejected": -0.2727270722389221, "step": 10760 }, { "epoch": 7.441908713692946, "grad_norm": 5.459692478179932, "learning_rate": 1.4211618257261411e-05, "log_odds_chosen": 10.97966194152832, "log_odds_ratio": -0.00010686190944397822, "logits/chosen": -0.6309956312179565, "logits/rejected": -0.679790735244751, "logps/chosen": -0.00012509649968706071, "logps/rejected": -1.7729841470718384, "loss": 0.3926, "nll_loss": 0.09814368188381195, "rewards/accuracies": 1.0, "rewards/chosen": -1.250964942300925e-05, "rewards/margins": 0.17728590965270996, "rewards/rejected": -0.1772984266281128, "step": 10761 }, { "epoch": 7.442600276625173, "grad_norm": 10.462207794189453, "learning_rate": 1.4207776240971262e-05, "log_odds_chosen": 10.301549911499023, "log_odds_ratio": -0.00023399626661557704, "logits/chosen": -0.9923213720321655, "logits/rejected": -1.0673259496688843, "logps/chosen": -0.0002473454223945737, "logps/rejected": -2.0343191623687744, "loss": 0.3953, "nll_loss": 0.09880739450454712, "rewards/accuracies": 1.0, "rewards/chosen": -2.4734541511861607e-05, "rewards/margins": 0.2034071832895279, "rewards/rejected": -0.20343193411827087, "step": 10762 }, { "epoch": 7.4432918395574, "grad_norm": 3.7132325172424316, "learning_rate": 1.4203934224681115e-05, "log_odds_chosen": 11.021075248718262, "log_odds_ratio": -2.973015944007784e-05, "logits/chosen": -0.6955537796020508, "logits/rejected": -0.7272615432739258, "logps/chosen": -0.00016529949789401144, "logps/rejected": -2.10345458984375, "loss": 0.4654, "nll_loss": 0.1163576990365982, "rewards/accuracies": 1.0, "rewards/chosen": -1.652994797041174e-05, "rewards/margins": 0.21032892167568207, "rewards/rejected": -0.21034544706344604, "step": 10763 }, { "epoch": 7.443983402489627, "grad_norm": 12.608946800231934, "learning_rate": 1.4200092208390964e-05, "log_odds_chosen": 11.590502738952637, "log_odds_ratio": -1.641233393456787e-05, "logits/chosen": -0.09940451383590698, "logits/rejected": -0.19625744223594666, "logps/chosen": -0.00038619639235548675, "logps/rejected": -2.473310708999634, "loss": 0.6055, "nll_loss": 0.15136292576789856, "rewards/accuracies": 1.0, "rewards/chosen": -3.861963705276139e-05, "rewards/margins": 0.24729245901107788, "rewards/rejected": -0.24733106791973114, "step": 10764 }, { "epoch": 7.444674965421854, "grad_norm": 11.930988311767578, "learning_rate": 1.4196250192100816e-05, "log_odds_chosen": 11.384069442749023, "log_odds_ratio": -1.1792275472544134e-05, "logits/chosen": -0.3510861098766327, "logits/rejected": -0.3697136342525482, "logps/chosen": -0.00010838945308933035, "logps/rejected": -2.1518192291259766, "loss": 0.4241, "nll_loss": 0.10602325946092606, "rewards/accuracies": 1.0, "rewards/chosen": -1.0838944945135154e-05, "rewards/margins": 0.21517106890678406, "rewards/rejected": -0.21518190205097198, "step": 10765 }, { "epoch": 7.44536652835408, "grad_norm": 6.5892815589904785, "learning_rate": 1.4192408175810665e-05, "log_odds_chosen": 11.18331527709961, "log_odds_ratio": -3.321813346701674e-05, "logits/chosen": -0.7267346978187561, "logits/rejected": -0.6547719836235046, "logps/chosen": -0.00011852014722535387, "logps/rejected": -1.9096920490264893, "loss": 0.3928, "nll_loss": 0.09818601608276367, "rewards/accuracies": 1.0, "rewards/chosen": -1.1852014722535387e-05, "rewards/margins": 0.19095736742019653, "rewards/rejected": -0.19096921384334564, "step": 10766 }, { "epoch": 7.446058091286307, "grad_norm": 4.197224140167236, "learning_rate": 1.4188566159520516e-05, "log_odds_chosen": 11.295459747314453, "log_odds_ratio": -2.2514745069202036e-05, "logits/chosen": -0.046457454562187195, "logits/rejected": -0.06667326390743256, "logps/chosen": -0.00047718797577545047, "logps/rejected": -2.5375447273254395, "loss": 0.5637, "nll_loss": 0.14091680943965912, "rewards/accuracies": 1.0, "rewards/chosen": -4.7718796849949285e-05, "rewards/margins": 0.25370678305625916, "rewards/rejected": -0.25375446677207947, "step": 10767 }, { "epoch": 7.446749654218534, "grad_norm": 3.695483684539795, "learning_rate": 1.4184724143230368e-05, "log_odds_chosen": 10.953871726989746, "log_odds_ratio": -0.00022770927171222866, "logits/chosen": -0.1730370819568634, "logits/rejected": -0.16847573220729828, "logps/chosen": -0.00038342594052664936, "logps/rejected": -1.9761154651641846, "loss": 0.6385, "nll_loss": 0.1596061736345291, "rewards/accuracies": 1.0, "rewards/chosen": -3.8342594052664936e-05, "rewards/margins": 0.1975732147693634, "rewards/rejected": -0.19761154055595398, "step": 10768 }, { "epoch": 7.447441217150761, "grad_norm": 4.151753902435303, "learning_rate": 1.4180882126940217e-05, "log_odds_chosen": 10.379537582397461, "log_odds_ratio": -0.0001264579186681658, "logits/chosen": -0.23954395949840546, "logits/rejected": -0.39786607027053833, "logps/chosen": -0.0002555370156187564, "logps/rejected": -1.9802124500274658, "loss": 0.4602, "nll_loss": 0.11504577100276947, "rewards/accuracies": 1.0, "rewards/chosen": -2.5553703380865045e-05, "rewards/margins": 0.19799569249153137, "rewards/rejected": -0.19802124798297882, "step": 10769 }, { "epoch": 7.448132780082988, "grad_norm": 4.965488433837891, "learning_rate": 1.417704011065007e-05, "log_odds_chosen": 10.943584442138672, "log_odds_ratio": -4.247497781761922e-05, "logits/chosen": -0.40447431802749634, "logits/rejected": -0.41507893800735474, "logps/chosen": -0.00021655272576026618, "logps/rejected": -1.846780776977539, "loss": 0.5371, "nll_loss": 0.13427071273326874, "rewards/accuracies": 1.0, "rewards/chosen": -2.165527257602662e-05, "rewards/margins": 0.18465642631053925, "rewards/rejected": -0.1846780925989151, "step": 10770 }, { "epoch": 7.448824343015215, "grad_norm": 5.554091930389404, "learning_rate": 1.417319809435992e-05, "log_odds_chosen": 10.870664596557617, "log_odds_ratio": -3.63989602192305e-05, "logits/chosen": -0.4929801821708679, "logits/rejected": -0.4868937134742737, "logps/chosen": -0.00014760282647330314, "logps/rejected": -1.8903640508651733, "loss": 0.4212, "nll_loss": 0.10529060661792755, "rewards/accuracies": 1.0, "rewards/chosen": -1.4760284102521837e-05, "rewards/margins": 0.18902164697647095, "rewards/rejected": -0.18903639912605286, "step": 10771 }, { "epoch": 7.449515905947441, "grad_norm": 3.917309284210205, "learning_rate": 1.416935607806977e-05, "log_odds_chosen": 10.563467025756836, "log_odds_ratio": -4.412873022374697e-05, "logits/chosen": -0.17110557854175568, "logits/rejected": -0.023961514234542847, "logps/chosen": -0.0002084320003632456, "logps/rejected": -1.9825282096862793, "loss": 0.381, "nll_loss": 0.09524839371442795, "rewards/accuracies": 1.0, "rewards/chosen": -2.08431993087288e-05, "rewards/margins": 0.19823198020458221, "rewards/rejected": -0.1982528269290924, "step": 10772 }, { "epoch": 7.450207468879668, "grad_norm": 20.687332153320312, "learning_rate": 1.4165514061779622e-05, "log_odds_chosen": 10.863073348999023, "log_odds_ratio": -5.997123662382364e-05, "logits/chosen": -0.7228338718414307, "logits/rejected": -0.7402847409248352, "logps/chosen": -0.00035104190465062857, "logps/rejected": -2.2573041915893555, "loss": 0.4832, "nll_loss": 0.12080300599336624, "rewards/accuracies": 1.0, "rewards/chosen": -3.5104189009871334e-05, "rewards/margins": 0.22569534182548523, "rewards/rejected": -0.22573043406009674, "step": 10773 }, { "epoch": 7.450899031811895, "grad_norm": 13.213074684143066, "learning_rate": 1.4161672045489475e-05, "log_odds_chosen": 11.305316925048828, "log_odds_ratio": -2.8845432098023593e-05, "logits/chosen": -0.43465813994407654, "logits/rejected": -0.469184011220932, "logps/chosen": -0.00020464364206418395, "logps/rejected": -2.64137864112854, "loss": 0.4263, "nll_loss": 0.10656726360321045, "rewards/accuracies": 1.0, "rewards/chosen": -2.0464365661609918e-05, "rewards/margins": 0.26411741971969604, "rewards/rejected": -0.264137864112854, "step": 10774 }, { "epoch": 7.451590594744122, "grad_norm": 5.760486602783203, "learning_rate": 1.4157830029199324e-05, "log_odds_chosen": 11.941006660461426, "log_odds_ratio": -1.6288035112665966e-05, "logits/chosen": -0.22816580533981323, "logits/rejected": -0.323006808757782, "logps/chosen": -0.0001421998895239085, "logps/rejected": -3.1332175731658936, "loss": 0.5396, "nll_loss": 0.13490593433380127, "rewards/accuracies": 1.0, "rewards/chosen": -1.4219987860997207e-05, "rewards/margins": 0.313307523727417, "rewards/rejected": -0.3133217394351959, "step": 10775 }, { "epoch": 7.452282157676349, "grad_norm": 4.831750869750977, "learning_rate": 1.4153988012909174e-05, "log_odds_chosen": 10.773515701293945, "log_odds_ratio": -8.02901922725141e-05, "logits/chosen": -0.43861153721809387, "logits/rejected": -0.4492945075035095, "logps/chosen": -0.00017901469254866242, "logps/rejected": -2.006941318511963, "loss": 0.4554, "nll_loss": 0.11384207755327225, "rewards/accuracies": 1.0, "rewards/chosen": -1.7901469618664123e-05, "rewards/margins": 0.20067623257637024, "rewards/rejected": -0.20069414377212524, "step": 10776 }, { "epoch": 7.4529737206085755, "grad_norm": 6.30268669128418, "learning_rate": 1.4150145996619027e-05, "log_odds_chosen": 11.054546356201172, "log_odds_ratio": -0.00010250294872093946, "logits/chosen": -0.25811412930488586, "logits/rejected": -0.3270358145236969, "logps/chosen": -0.0002650795504450798, "logps/rejected": -2.357173442840576, "loss": 0.4343, "nll_loss": 0.10856799781322479, "rewards/accuracies": 1.0, "rewards/chosen": -2.6507957954891026e-05, "rewards/margins": 0.2356908619403839, "rewards/rejected": -0.23571735620498657, "step": 10777 }, { "epoch": 7.453665283540802, "grad_norm": 4.888160705566406, "learning_rate": 1.4146303980328876e-05, "log_odds_chosen": 10.101419448852539, "log_odds_ratio": -0.0003310735628474504, "logits/chosen": -0.5871963500976562, "logits/rejected": -0.5617615580558777, "logps/chosen": -0.0007257298566401005, "logps/rejected": -1.4469420909881592, "loss": 0.3538, "nll_loss": 0.08840520679950714, "rewards/accuracies": 1.0, "rewards/chosen": -7.257298420881853e-05, "rewards/margins": 0.14462164044380188, "rewards/rejected": -0.14469420909881592, "step": 10778 }, { "epoch": 7.454356846473029, "grad_norm": 5.455580234527588, "learning_rate": 1.4142461964038728e-05, "log_odds_chosen": 10.255231857299805, "log_odds_ratio": -9.311466419603676e-05, "logits/chosen": -0.4162459373474121, "logits/rejected": -0.4459051191806793, "logps/chosen": -0.0005949109909124672, "logps/rejected": -2.145630359649658, "loss": 0.8744, "nll_loss": 0.21859851479530334, "rewards/accuracies": 1.0, "rewards/chosen": -5.9491099818842486e-05, "rewards/margins": 0.21450357139110565, "rewards/rejected": -0.2145630568265915, "step": 10779 }, { "epoch": 7.455048409405256, "grad_norm": 4.634705543518066, "learning_rate": 1.4138619947748579e-05, "log_odds_chosen": 12.55921745300293, "log_odds_ratio": -7.130900485208258e-06, "logits/chosen": -0.43504297733306885, "logits/rejected": -0.39104989171028137, "logps/chosen": -0.00015669013373553753, "logps/rejected": -3.515052080154419, "loss": 0.3879, "nll_loss": 0.09698067605495453, "rewards/accuracies": 1.0, "rewards/chosen": -1.5669013009755872e-05, "rewards/margins": 0.3514895439147949, "rewards/rejected": -0.35150521993637085, "step": 10780 }, { "epoch": 7.455739972337483, "grad_norm": 8.080259323120117, "learning_rate": 1.4134777931458428e-05, "log_odds_chosen": 11.038820266723633, "log_odds_ratio": -3.259784716647118e-05, "logits/chosen": -0.9450827836990356, "logits/rejected": -0.9674822092056274, "logps/chosen": -8.301252091769129e-05, "logps/rejected": -1.79734206199646, "loss": 0.4615, "nll_loss": 0.11537669599056244, "rewards/accuracies": 1.0, "rewards/chosen": -8.30125281936489e-06, "rewards/margins": 0.17972591519355774, "rewards/rejected": -0.17973420023918152, "step": 10781 }, { "epoch": 7.45643153526971, "grad_norm": 6.411631107330322, "learning_rate": 1.413093591516828e-05, "log_odds_chosen": 11.170829772949219, "log_odds_ratio": -2.751310239546001e-05, "logits/chosen": -0.18959954380989075, "logits/rejected": -0.21842291951179504, "logps/chosen": -0.00018193494179286063, "logps/rejected": -2.25022554397583, "loss": 0.3511, "nll_loss": 0.08777038007974625, "rewards/accuracies": 1.0, "rewards/chosen": -1.819349199649878e-05, "rewards/margins": 0.2250043749809265, "rewards/rejected": -0.2250225692987442, "step": 10782 }, { "epoch": 7.4571230982019365, "grad_norm": 4.9749369621276855, "learning_rate": 1.4127093898878133e-05, "log_odds_chosen": 11.735504150390625, "log_odds_ratio": -1.129702286561951e-05, "logits/chosen": -0.031236648559570312, "logits/rejected": -0.15600450336933136, "logps/chosen": -0.00017883992404676974, "logps/rejected": -2.7734787464141846, "loss": 0.7117, "nll_loss": 0.1779349446296692, "rewards/accuracies": 1.0, "rewards/chosen": -1.7883992768474855e-05, "rewards/margins": 0.2773299813270569, "rewards/rejected": -0.2773478627204895, "step": 10783 }, { "epoch": 7.457814661134163, "grad_norm": 7.109059810638428, "learning_rate": 1.4123251882587982e-05, "log_odds_chosen": 10.662548065185547, "log_odds_ratio": -0.00014169953647069633, "logits/chosen": -0.4513861835002899, "logits/rejected": -0.4910764992237091, "logps/chosen": -0.0001652487990213558, "logps/rejected": -2.0241336822509766, "loss": 0.3626, "nll_loss": 0.09062620252370834, "rewards/accuracies": 1.0, "rewards/chosen": -1.652488026593346e-05, "rewards/margins": 0.20239682495594025, "rewards/rejected": -0.20241336524486542, "step": 10784 }, { "epoch": 7.45850622406639, "grad_norm": 4.813906192779541, "learning_rate": 1.4119409866297833e-05, "log_odds_chosen": 11.187372207641602, "log_odds_ratio": -1.9936051103286445e-05, "logits/chosen": -0.2327749878168106, "logits/rejected": -0.30488571524620056, "logps/chosen": -0.0004442424396984279, "logps/rejected": -2.9384379386901855, "loss": 0.5334, "nll_loss": 0.1333410143852234, "rewards/accuracies": 1.0, "rewards/chosen": -4.442424324224703e-05, "rewards/margins": 0.29379940032958984, "rewards/rejected": -0.2938438057899475, "step": 10785 }, { "epoch": 7.459197786998617, "grad_norm": 10.139615058898926, "learning_rate": 1.4115567850007685e-05, "log_odds_chosen": 10.874614715576172, "log_odds_ratio": -2.8667487640632316e-05, "logits/chosen": -0.11820893734693527, "logits/rejected": -0.16890212893486023, "logps/chosen": -0.00012665041140280664, "logps/rejected": -1.8323454856872559, "loss": 0.6004, "nll_loss": 0.15009675920009613, "rewards/accuracies": 1.0, "rewards/chosen": -1.2665040230785962e-05, "rewards/margins": 0.18322189152240753, "rewards/rejected": -0.1832345724105835, "step": 10786 }, { "epoch": 7.459889349930844, "grad_norm": 8.36119556427002, "learning_rate": 1.4111725833717534e-05, "log_odds_chosen": 10.627643585205078, "log_odds_ratio": -5.008725565858185e-05, "logits/chosen": -0.37607264518737793, "logits/rejected": -0.35164517164230347, "logps/chosen": -0.000426368264015764, "logps/rejected": -2.356189727783203, "loss": 0.3696, "nll_loss": 0.09239155799150467, "rewards/accuracies": 1.0, "rewards/chosen": -4.2636824218789116e-05, "rewards/margins": 0.235576331615448, "rewards/rejected": -0.2356189787387848, "step": 10787 }, { "epoch": 7.460580912863071, "grad_norm": 5.333108901977539, "learning_rate": 1.4107883817427387e-05, "log_odds_chosen": 11.991171836853027, "log_odds_ratio": -9.418118679604959e-06, "logits/chosen": -0.29220810532569885, "logits/rejected": -0.32383784651756287, "logps/chosen": -8.977008110377938e-05, "logps/rejected": -2.6561379432678223, "loss": 0.5265, "nll_loss": 0.13163018226623535, "rewards/accuracies": 1.0, "rewards/chosen": -8.977008292276878e-06, "rewards/margins": 0.26560482382774353, "rewards/rejected": -0.2656137943267822, "step": 10788 }, { "epoch": 7.4612724757952975, "grad_norm": 4.949190139770508, "learning_rate": 1.4104041801137237e-05, "log_odds_chosen": 11.44765853881836, "log_odds_ratio": -4.3102198105771095e-05, "logits/chosen": -0.0698050707578659, "logits/rejected": -0.061611108481884, "logps/chosen": -0.00015239565982483327, "logps/rejected": -2.726309299468994, "loss": 0.5276, "nll_loss": 0.13188467919826508, "rewards/accuracies": 1.0, "rewards/chosen": -1.5239564163493924e-05, "rewards/margins": 0.2726157307624817, "rewards/rejected": -0.2726309299468994, "step": 10789 }, { "epoch": 7.461964038727524, "grad_norm": 3.6364712715148926, "learning_rate": 1.4100199784847088e-05, "log_odds_chosen": 9.9117431640625, "log_odds_ratio": -0.000305481516988948, "logits/chosen": -0.4107625186443329, "logits/rejected": -0.4288431704044342, "logps/chosen": -0.0005071141058579087, "logps/rejected": -2.180227756500244, "loss": 0.429, "nll_loss": 0.10721185803413391, "rewards/accuracies": 1.0, "rewards/chosen": -5.071140549262054e-05, "rewards/margins": 0.2179720252752304, "rewards/rejected": -0.21802276372909546, "step": 10790 }, { "epoch": 7.462655601659751, "grad_norm": 3.9746110439300537, "learning_rate": 1.4096357768556939e-05, "log_odds_chosen": 10.442115783691406, "log_odds_ratio": -0.0003979535831604153, "logits/chosen": -0.2429000735282898, "logits/rejected": -0.4284162223339081, "logps/chosen": -0.0005142191657796502, "logps/rejected": -1.6921067237854004, "loss": 0.3517, "nll_loss": 0.08788755536079407, "rewards/accuracies": 1.0, "rewards/chosen": -5.142191730556078e-05, "rewards/margins": 0.16915924847126007, "rewards/rejected": -0.16921067237854004, "step": 10791 }, { "epoch": 7.463347164591978, "grad_norm": 3.416618824005127, "learning_rate": 1.4092515752266791e-05, "log_odds_chosen": 10.313505172729492, "log_odds_ratio": -6.649178976658732e-05, "logits/chosen": -0.601507842540741, "logits/rejected": -0.7110568284988403, "logps/chosen": -0.00023971637710928917, "logps/rejected": -1.7452600002288818, "loss": 0.382, "nll_loss": 0.09550052881240845, "rewards/accuracies": 1.0, "rewards/chosen": -2.397163916612044e-05, "rewards/margins": 0.17450203001499176, "rewards/rejected": -0.17452600598335266, "step": 10792 }, { "epoch": 7.464038727524205, "grad_norm": 4.253975868225098, "learning_rate": 1.408867373597664e-05, "log_odds_chosen": 11.393698692321777, "log_odds_ratio": -2.1312720491550863e-05, "logits/chosen": -0.03397101163864136, "logits/rejected": -0.07794360816478729, "logps/chosen": -0.0002627108769956976, "logps/rejected": -2.492248058319092, "loss": 0.5482, "nll_loss": 0.13705724477767944, "rewards/accuracies": 1.0, "rewards/chosen": -2.6271089154761285e-05, "rewards/margins": 0.2491985410451889, "rewards/rejected": -0.24922481179237366, "step": 10793 }, { "epoch": 7.464730290456432, "grad_norm": 21.331689834594727, "learning_rate": 1.4084831719686493e-05, "log_odds_chosen": 10.241486549377441, "log_odds_ratio": -0.0001373636769130826, "logits/chosen": -0.186618834733963, "logits/rejected": -0.10759405046701431, "logps/chosen": -0.00029816178721375763, "logps/rejected": -1.744796633720398, "loss": 0.4677, "nll_loss": 0.11691103875637054, "rewards/accuracies": 1.0, "rewards/chosen": -2.981617762998212e-05, "rewards/margins": 0.1744498461484909, "rewards/rejected": -0.1744796633720398, "step": 10794 }, { "epoch": 7.4654218533886585, "grad_norm": 2.5432212352752686, "learning_rate": 1.4080989703396344e-05, "log_odds_chosen": 10.715089797973633, "log_odds_ratio": -0.00037892625550739467, "logits/chosen": -0.19213101267814636, "logits/rejected": -0.18021385371685028, "logps/chosen": -0.0008219567826017737, "logps/rejected": -2.4543795585632324, "loss": 0.3218, "nll_loss": 0.08040856570005417, "rewards/accuracies": 1.0, "rewards/chosen": -8.219568553613499e-05, "rewards/margins": 0.2453557401895523, "rewards/rejected": -0.24543793499469757, "step": 10795 }, { "epoch": 7.466113416320885, "grad_norm": 8.289212226867676, "learning_rate": 1.4077147687106193e-05, "log_odds_chosen": 10.710524559020996, "log_odds_ratio": -0.00010787302016979083, "logits/chosen": -0.8265199661254883, "logits/rejected": -0.8303812742233276, "logps/chosen": -0.00028980334172956645, "logps/rejected": -2.316956043243408, "loss": 0.4287, "nll_loss": 0.1071544662117958, "rewards/accuracies": 1.0, "rewards/chosen": -2.898033562814817e-05, "rewards/margins": 0.23166662454605103, "rewards/rejected": -0.23169559240341187, "step": 10796 }, { "epoch": 7.466804979253112, "grad_norm": 4.027835369110107, "learning_rate": 1.4073305670816045e-05, "log_odds_chosen": 10.93043327331543, "log_odds_ratio": -3.0192390113370493e-05, "logits/chosen": -0.41126716136932373, "logits/rejected": -0.4459652304649353, "logps/chosen": -0.00023423923994414508, "logps/rejected": -2.1818385124206543, "loss": 0.4229, "nll_loss": 0.10572469234466553, "rewards/accuracies": 1.0, "rewards/chosen": -2.342392508580815e-05, "rewards/margins": 0.21816042065620422, "rewards/rejected": -0.21818384528160095, "step": 10797 }, { "epoch": 7.467496542185339, "grad_norm": 6.063636302947998, "learning_rate": 1.4069463654525898e-05, "log_odds_chosen": 11.581496238708496, "log_odds_ratio": -2.447320002829656e-05, "logits/chosen": 0.05990840494632721, "logits/rejected": -0.13078993558883667, "logps/chosen": -0.00015943936887197196, "logps/rejected": -2.7461681365966797, "loss": 0.5508, "nll_loss": 0.13769984245300293, "rewards/accuracies": 1.0, "rewards/chosen": -1.5943936887197196e-05, "rewards/margins": 0.2746008634567261, "rewards/rejected": -0.2746168076992035, "step": 10798 }, { "epoch": 7.468188105117566, "grad_norm": 4.830626010894775, "learning_rate": 1.4065621638235747e-05, "log_odds_chosen": 9.754347801208496, "log_odds_ratio": -0.0012114491546526551, "logits/chosen": -0.5032768249511719, "logits/rejected": -0.48865121603012085, "logps/chosen": -0.0003861731383949518, "logps/rejected": -1.4373276233673096, "loss": 0.6617, "nll_loss": 0.1653095781803131, "rewards/accuracies": 1.0, "rewards/chosen": -3.8617316022282466e-05, "rewards/margins": 0.14369414746761322, "rewards/rejected": -0.14373275637626648, "step": 10799 }, { "epoch": 7.468879668049793, "grad_norm": 7.109471797943115, "learning_rate": 1.4061779621945597e-05, "log_odds_chosen": 10.44162368774414, "log_odds_ratio": -6.263345130719244e-05, "logits/chosen": -0.3996507525444031, "logits/rejected": -0.43611565232276917, "logps/chosen": -0.00012694389442913234, "logps/rejected": -1.5626509189605713, "loss": 0.4976, "nll_loss": 0.12438362836837769, "rewards/accuracies": 1.0, "rewards/chosen": -1.2694389624812175e-05, "rewards/margins": 0.1562523990869522, "rewards/rejected": -0.15626509487628937, "step": 10800 }, { "epoch": 7.4695712309820195, "grad_norm": 10.683833122253418, "learning_rate": 1.405793760565545e-05, "log_odds_chosen": 9.845418930053711, "log_odds_ratio": -0.0006182001088745892, "logits/chosen": 0.009782552719116211, "logits/rejected": -0.12515202164649963, "logps/chosen": -0.0007117694476619363, "logps/rejected": -1.846452236175537, "loss": 0.4558, "nll_loss": 0.11387798190116882, "rewards/accuracies": 1.0, "rewards/chosen": -7.117693894542754e-05, "rewards/margins": 0.18457405269145966, "rewards/rejected": -0.18464523553848267, "step": 10801 }, { "epoch": 7.470262793914246, "grad_norm": 8.428908348083496, "learning_rate": 1.4054095589365299e-05, "log_odds_chosen": 11.426127433776855, "log_odds_ratio": -2.4880095224943943e-05, "logits/chosen": -0.08515352755784988, "logits/rejected": -0.13209059834480286, "logps/chosen": -0.00029765418730676174, "logps/rejected": -2.629096746444702, "loss": 0.5988, "nll_loss": 0.14969633519649506, "rewards/accuracies": 1.0, "rewards/chosen": -2.9765420549665578e-05, "rewards/margins": 0.2628799080848694, "rewards/rejected": -0.2629096508026123, "step": 10802 }, { "epoch": 7.470954356846473, "grad_norm": 6.119007587432861, "learning_rate": 1.4050253573075151e-05, "log_odds_chosen": 11.615740776062012, "log_odds_ratio": -8.431605965597555e-05, "logits/chosen": -0.24642956256866455, "logits/rejected": -0.33345574140548706, "logps/chosen": -0.0002293281868332997, "logps/rejected": -2.501028060913086, "loss": 0.5353, "nll_loss": 0.13381671905517578, "rewards/accuracies": 1.0, "rewards/chosen": -2.2932819774723612e-05, "rewards/margins": 0.250079870223999, "rewards/rejected": -0.25010281801223755, "step": 10803 }, { "epoch": 7.4716459197787, "grad_norm": 3.944324016571045, "learning_rate": 1.4046411556785002e-05, "log_odds_chosen": 9.476999282836914, "log_odds_ratio": -0.0001978773798327893, "logits/chosen": -0.2502937316894531, "logits/rejected": -0.33076730370521545, "logps/chosen": -0.0002558291016612202, "logps/rejected": -1.2312133312225342, "loss": 0.4154, "nll_loss": 0.10383543372154236, "rewards/accuracies": 1.0, "rewards/chosen": -2.558291089371778e-05, "rewards/margins": 0.12309575825929642, "rewards/rejected": -0.12312135100364685, "step": 10804 }, { "epoch": 7.472337482710927, "grad_norm": 5.033731937408447, "learning_rate": 1.4042569540494851e-05, "log_odds_chosen": 10.716177940368652, "log_odds_ratio": -7.970893057063222e-05, "logits/chosen": -0.2932302951812744, "logits/rejected": -0.336398184299469, "logps/chosen": -0.0003216788754798472, "logps/rejected": -2.5918960571289062, "loss": 0.4384, "nll_loss": 0.1095963567495346, "rewards/accuracies": 1.0, "rewards/chosen": -3.216788900317624e-05, "rewards/margins": 0.2591574490070343, "rewards/rejected": -0.259189635515213, "step": 10805 }, { "epoch": 7.473029045643154, "grad_norm": 10.533803939819336, "learning_rate": 1.4038727524204704e-05, "log_odds_chosen": 12.302633285522461, "log_odds_ratio": -1.1996409739367664e-05, "logits/chosen": -0.2196783572435379, "logits/rejected": -0.2457887828350067, "logps/chosen": -0.00010751017543952912, "logps/rejected": -3.1479859352111816, "loss": 0.5097, "nll_loss": 0.12743321061134338, "rewards/accuracies": 1.0, "rewards/chosen": -1.0751018635346554e-05, "rewards/margins": 0.3147878348827362, "rewards/rejected": -0.3147985637187958, "step": 10806 }, { "epoch": 7.4737206085753805, "grad_norm": 4.923978805541992, "learning_rate": 1.4034885507914556e-05, "log_odds_chosen": 10.04902172088623, "log_odds_ratio": -6.729160668328404e-05, "logits/chosen": -0.08962735533714294, "logits/rejected": -0.1747085452079773, "logps/chosen": -0.00033856008667498827, "logps/rejected": -1.9707258939743042, "loss": 0.4495, "nll_loss": 0.11237649619579315, "rewards/accuracies": 1.0, "rewards/chosen": -3.385600575711578e-05, "rewards/margins": 0.19703873991966248, "rewards/rejected": -0.1970725953578949, "step": 10807 }, { "epoch": 7.474412171507607, "grad_norm": 4.651909828186035, "learning_rate": 1.4031043491624405e-05, "log_odds_chosen": 10.471010208129883, "log_odds_ratio": -0.00011994114174740389, "logits/chosen": -0.32653313875198364, "logits/rejected": -0.3821827471256256, "logps/chosen": -0.0005166777409613132, "logps/rejected": -2.1474270820617676, "loss": 0.6188, "nll_loss": 0.15467827022075653, "rewards/accuracies": 1.0, "rewards/chosen": -5.166777191334404e-05, "rewards/margins": 0.21469107270240784, "rewards/rejected": -0.2147427350282669, "step": 10808 }, { "epoch": 7.475103734439834, "grad_norm": 5.474374771118164, "learning_rate": 1.4027201475334256e-05, "log_odds_chosen": 10.904947280883789, "log_odds_ratio": -3.5007004044018686e-05, "logits/chosen": -0.4522826075553894, "logits/rejected": -0.5444246530532837, "logps/chosen": -0.00017778566689230502, "logps/rejected": -2.036471128463745, "loss": 0.6515, "nll_loss": 0.16286161541938782, "rewards/accuracies": 1.0, "rewards/chosen": -1.7778567780624144e-05, "rewards/margins": 0.20362932980060577, "rewards/rejected": -0.20364712178707123, "step": 10809 }, { "epoch": 7.475795297372061, "grad_norm": 4.451005935668945, "learning_rate": 1.4023359459044108e-05, "log_odds_chosen": 11.30253791809082, "log_odds_ratio": -2.957035758299753e-05, "logits/chosen": 0.049805790185928345, "logits/rejected": 0.07589408755302429, "logps/chosen": -0.00016353554383385926, "logps/rejected": -2.659938097000122, "loss": 0.4698, "nll_loss": 0.11744727194309235, "rewards/accuracies": 1.0, "rewards/chosen": -1.6353555110981688e-05, "rewards/margins": 0.2659774422645569, "rewards/rejected": -0.26599380373954773, "step": 10810 }, { "epoch": 7.476486860304288, "grad_norm": 4.779135704040527, "learning_rate": 1.4019517442753957e-05, "log_odds_chosen": 11.290592193603516, "log_odds_ratio": -6.531582039315253e-05, "logits/chosen": -0.4310583472251892, "logits/rejected": -0.4172248840332031, "logps/chosen": -0.00018636384629644454, "logps/rejected": -2.7107105255126953, "loss": 0.5556, "nll_loss": 0.13890208303928375, "rewards/accuracies": 1.0, "rewards/chosen": -1.8636386812431738e-05, "rewards/margins": 0.27105242013931274, "rewards/rejected": -0.27107107639312744, "step": 10811 }, { "epoch": 7.477178423236515, "grad_norm": 4.228855133056641, "learning_rate": 1.401567542646381e-05, "log_odds_chosen": 11.1829833984375, "log_odds_ratio": -0.00010237680544378236, "logits/chosen": -0.6038228273391724, "logits/rejected": -0.6043468713760376, "logps/chosen": -0.00033634959254413843, "logps/rejected": -2.578450918197632, "loss": 0.4377, "nll_loss": 0.1094059944152832, "rewards/accuracies": 1.0, "rewards/chosen": -3.363495852681808e-05, "rewards/margins": 0.25781145691871643, "rewards/rejected": -0.25784510374069214, "step": 10812 }, { "epoch": 7.477869986168741, "grad_norm": 5.022854804992676, "learning_rate": 1.401183341017366e-05, "log_odds_chosen": 10.278348922729492, "log_odds_ratio": -7.624067075084895e-05, "logits/chosen": 0.1413256675004959, "logits/rejected": 0.09014451503753662, "logps/chosen": -0.00034738524118438363, "logps/rejected": -2.108718156814575, "loss": 0.455, "nll_loss": 0.11374906450510025, "rewards/accuracies": 1.0, "rewards/chosen": -3.4738524846034124e-05, "rewards/margins": 0.21083708107471466, "rewards/rejected": -0.21087180078029633, "step": 10813 }, { "epoch": 7.478561549100968, "grad_norm": 7.37864875793457, "learning_rate": 1.400799139388351e-05, "log_odds_chosen": 11.088615417480469, "log_odds_ratio": -5.135659739607945e-05, "logits/chosen": -0.5195218324661255, "logits/rejected": -0.534462571144104, "logps/chosen": -0.00026968546444550157, "logps/rejected": -2.4104957580566406, "loss": 0.5126, "nll_loss": 0.12813904881477356, "rewards/accuracies": 1.0, "rewards/chosen": -2.696854789974168e-05, "rewards/margins": 0.24102260172367096, "rewards/rejected": -0.24104955792427063, "step": 10814 }, { "epoch": 7.479253112033195, "grad_norm": 3.966606378555298, "learning_rate": 1.4004149377593362e-05, "log_odds_chosen": 10.707971572875977, "log_odds_ratio": -9.568365203449503e-05, "logits/chosen": -0.5049077272415161, "logits/rejected": -0.4820142984390259, "logps/chosen": -0.00014141273277346045, "logps/rejected": -1.723301887512207, "loss": 0.3993, "nll_loss": 0.09980343282222748, "rewards/accuracies": 1.0, "rewards/chosen": -1.4141274732537568e-05, "rewards/margins": 0.1723160743713379, "rewards/rejected": -0.17233020067214966, "step": 10815 }, { "epoch": 7.479944674965422, "grad_norm": 5.684383392333984, "learning_rate": 1.4000307361303214e-05, "log_odds_chosen": 11.650728225708008, "log_odds_ratio": -1.511611117166467e-05, "logits/chosen": -0.2656325101852417, "logits/rejected": -0.32714197039604187, "logps/chosen": -0.0003647196863312274, "logps/rejected": -3.0555384159088135, "loss": 0.5385, "nll_loss": 0.13461153209209442, "rewards/accuracies": 1.0, "rewards/chosen": -3.647196717793122e-05, "rewards/margins": 0.30551737546920776, "rewards/rejected": -0.3055538535118103, "step": 10816 }, { "epoch": 7.480636237897649, "grad_norm": 24.105382919311523, "learning_rate": 1.3996465345013063e-05, "log_odds_chosen": 10.815409660339355, "log_odds_ratio": -4.043741864734329e-05, "logits/chosen": -0.6106455326080322, "logits/rejected": -0.7042526602745056, "logps/chosen": -0.000373400078387931, "logps/rejected": -1.910614252090454, "loss": 0.8274, "nll_loss": 0.20684409141540527, "rewards/accuracies": 1.0, "rewards/chosen": -3.7340010749176145e-05, "rewards/margins": 0.19102409482002258, "rewards/rejected": -0.19106143712997437, "step": 10817 }, { "epoch": 7.481327800829876, "grad_norm": 4.480190753936768, "learning_rate": 1.3992623328722914e-05, "log_odds_chosen": 10.819806098937988, "log_odds_ratio": -9.418633999302983e-05, "logits/chosen": -0.22619548439979553, "logits/rejected": -0.3512183427810669, "logps/chosen": -0.00013654318172484636, "logps/rejected": -1.9781020879745483, "loss": 0.4419, "nll_loss": 0.11045687645673752, "rewards/accuracies": 1.0, "rewards/chosen": -1.3654316717293113e-05, "rewards/margins": 0.1977965533733368, "rewards/rejected": -0.19781020283699036, "step": 10818 }, { "epoch": 7.482019363762102, "grad_norm": 6.823324203491211, "learning_rate": 1.3988781312432767e-05, "log_odds_chosen": 11.323492050170898, "log_odds_ratio": -2.871198557841126e-05, "logits/chosen": -0.5868136286735535, "logits/rejected": -0.6236948370933533, "logps/chosen": -0.00010528555139899254, "logps/rejected": -2.055727481842041, "loss": 0.3748, "nll_loss": 0.09368917346000671, "rewards/accuracies": 1.0, "rewards/chosen": -1.0528554412303492e-05, "rewards/margins": 0.20556223392486572, "rewards/rejected": -0.20557278394699097, "step": 10819 }, { "epoch": 7.482710926694329, "grad_norm": 10.466057777404785, "learning_rate": 1.3984939296142616e-05, "log_odds_chosen": 11.126243591308594, "log_odds_ratio": -0.0005590688670054078, "logits/chosen": -0.436701238155365, "logits/rejected": -0.4432668387889862, "logps/chosen": -0.00015593662101309747, "logps/rejected": -1.8980236053466797, "loss": 0.591, "nll_loss": 0.14768701791763306, "rewards/accuracies": 1.0, "rewards/chosen": -1.5593661373713985e-05, "rewards/margins": 0.18978677690029144, "rewards/rejected": -0.1898023635149002, "step": 10820 }, { "epoch": 7.483402489626556, "grad_norm": 5.3855671882629395, "learning_rate": 1.3981097279852468e-05, "log_odds_chosen": 11.250757217407227, "log_odds_ratio": -2.490894439688418e-05, "logits/chosen": -0.3038305640220642, "logits/rejected": -0.2972073256969452, "logps/chosen": -0.0006267963908612728, "logps/rejected": -2.738424301147461, "loss": 0.5656, "nll_loss": 0.1413910835981369, "rewards/accuracies": 1.0, "rewards/chosen": -6.267963908612728e-05, "rewards/margins": 0.2737797498703003, "rewards/rejected": -0.273842453956604, "step": 10821 }, { "epoch": 7.484094052558783, "grad_norm": 4.8172526359558105, "learning_rate": 1.3977255263562319e-05, "log_odds_chosen": 10.236217498779297, "log_odds_ratio": -0.00011173608800163493, "logits/chosen": -0.2512364685535431, "logits/rejected": -0.2988510727882385, "logps/chosen": -0.00020601501455530524, "logps/rejected": -1.6551272869110107, "loss": 0.4242, "nll_loss": 0.10604405403137207, "rewards/accuracies": 1.0, "rewards/chosen": -2.0601502910722047e-05, "rewards/margins": 0.16549211740493774, "rewards/rejected": -0.16551271080970764, "step": 10822 }, { "epoch": 7.48478561549101, "grad_norm": 11.536404609680176, "learning_rate": 1.3973413247272168e-05, "log_odds_chosen": 11.51347827911377, "log_odds_ratio": -4.374596392153762e-05, "logits/chosen": 0.03762510418891907, "logits/rejected": -0.09024792909622192, "logps/chosen": -0.00028255494544282556, "logps/rejected": -2.5662331581115723, "loss": 0.512, "nll_loss": 0.12798868119716644, "rewards/accuracies": 1.0, "rewards/chosen": -2.82554974546656e-05, "rewards/margins": 0.25659507513046265, "rewards/rejected": -0.2566233277320862, "step": 10823 }, { "epoch": 7.485477178423237, "grad_norm": 3.3092715740203857, "learning_rate": 1.396957123098202e-05, "log_odds_chosen": 11.188426971435547, "log_odds_ratio": -4.85469754494261e-05, "logits/chosen": -0.43238580226898193, "logits/rejected": -0.4718017578125, "logps/chosen": -0.0002774116874206811, "logps/rejected": -1.945969820022583, "loss": 0.3066, "nll_loss": 0.07664395868778229, "rewards/accuracies": 1.0, "rewards/chosen": -2.774116728687659e-05, "rewards/margins": 0.19456923007965088, "rewards/rejected": -0.1945970058441162, "step": 10824 }, { "epoch": 7.486168741355463, "grad_norm": 3.8885912895202637, "learning_rate": 1.3965729214691873e-05, "log_odds_chosen": 10.345460891723633, "log_odds_ratio": -0.00019396745483390987, "logits/chosen": -0.20144695043563843, "logits/rejected": -0.27505654096603394, "logps/chosen": -0.0007674898370169103, "logps/rejected": -2.2125778198242188, "loss": 0.3127, "nll_loss": 0.07814507186412811, "rewards/accuracies": 1.0, "rewards/chosen": -7.674898370169103e-05, "rewards/margins": 0.22118106484413147, "rewards/rejected": -0.2212577909231186, "step": 10825 }, { "epoch": 7.48686030428769, "grad_norm": 5.219362258911133, "learning_rate": 1.3961887198401722e-05, "log_odds_chosen": 11.4567289352417, "log_odds_ratio": -2.5133645976893604e-05, "logits/chosen": -0.30604687333106995, "logits/rejected": -0.3041151762008667, "logps/chosen": -7.92650316725485e-05, "logps/rejected": -2.015554904937744, "loss": 0.5405, "nll_loss": 0.13513174653053284, "rewards/accuracies": 1.0, "rewards/chosen": -7.926503712951671e-06, "rewards/margins": 0.2015475630760193, "rewards/rejected": -0.20155547559261322, "step": 10826 }, { "epoch": 7.487551867219917, "grad_norm": 4.860392093658447, "learning_rate": 1.3958045182111573e-05, "log_odds_chosen": 10.375247955322266, "log_odds_ratio": -8.097510726656765e-05, "logits/chosen": -0.26754456758499146, "logits/rejected": -0.21948489546775818, "logps/chosen": -0.0003514794516377151, "logps/rejected": -2.076695203781128, "loss": 0.5953, "nll_loss": 0.14881163835525513, "rewards/accuracies": 1.0, "rewards/chosen": -3.5147942980984226e-05, "rewards/margins": 0.2076343595981598, "rewards/rejected": -0.20766952633857727, "step": 10827 }, { "epoch": 7.488243430152144, "grad_norm": 4.747002124786377, "learning_rate": 1.3954203165821425e-05, "log_odds_chosen": 10.324663162231445, "log_odds_ratio": -0.0005005595157854259, "logits/chosen": -0.6674417853355408, "logits/rejected": -0.6481961011886597, "logps/chosen": -0.0006220329669304192, "logps/rejected": -1.9338624477386475, "loss": 0.3806, "nll_loss": 0.09510613977909088, "rewards/accuracies": 1.0, "rewards/chosen": -6.220330396899953e-05, "rewards/margins": 0.1933240294456482, "rewards/rejected": -0.1933862417936325, "step": 10828 }, { "epoch": 7.488934993084371, "grad_norm": 5.115079879760742, "learning_rate": 1.3950361149531274e-05, "log_odds_chosen": 10.410758018493652, "log_odds_ratio": -0.00015181548951659352, "logits/chosen": -0.34289151430130005, "logits/rejected": -0.4306577742099762, "logps/chosen": -0.00022027691011317074, "logps/rejected": -1.804958701133728, "loss": 0.6822, "nll_loss": 0.17052656412124634, "rewards/accuracies": 1.0, "rewards/chosen": -2.2027692466508597e-05, "rewards/margins": 0.18047384917736053, "rewards/rejected": -0.18049587309360504, "step": 10829 }, { "epoch": 7.4896265560165975, "grad_norm": 5.388112545013428, "learning_rate": 1.3946519133241127e-05, "log_odds_chosen": 10.932295799255371, "log_odds_ratio": -0.00010340958397137001, "logits/chosen": -0.5505223274230957, "logits/rejected": -0.4584423005580902, "logps/chosen": -0.0001718951971270144, "logps/rejected": -1.8500322103500366, "loss": 0.4229, "nll_loss": 0.10570985078811646, "rewards/accuracies": 1.0, "rewards/chosen": -1.718951898510568e-05, "rewards/margins": 0.18498602509498596, "rewards/rejected": -0.18500322103500366, "step": 10830 }, { "epoch": 7.490318118948824, "grad_norm": 3.090034246444702, "learning_rate": 1.3942677116950977e-05, "log_odds_chosen": 12.040966033935547, "log_odds_ratio": -1.1882757462444715e-05, "logits/chosen": -0.24035069346427917, "logits/rejected": -0.2807389497756958, "logps/chosen": -0.00018961979367304593, "logps/rejected": -3.0832576751708984, "loss": 0.3444, "nll_loss": 0.08610430359840393, "rewards/accuracies": 1.0, "rewards/chosen": -1.8961978639708832e-05, "rewards/margins": 0.30830681324005127, "rewards/rejected": -0.30832576751708984, "step": 10831 }, { "epoch": 7.491009681881051, "grad_norm": 5.554166793823242, "learning_rate": 1.3938835100660826e-05, "log_odds_chosen": 11.567726135253906, "log_odds_ratio": -7.94117950135842e-05, "logits/chosen": 0.07390864193439484, "logits/rejected": 0.0015699323266744614, "logps/chosen": -0.0004241417336743325, "logps/rejected": -3.301921844482422, "loss": 0.5749, "nll_loss": 0.1437089443206787, "rewards/accuracies": 1.0, "rewards/chosen": -4.2414176277816296e-05, "rewards/margins": 0.33014976978302, "rewards/rejected": -0.3301921784877777, "step": 10832 }, { "epoch": 7.491701244813278, "grad_norm": 13.624567031860352, "learning_rate": 1.3934993084370679e-05, "log_odds_chosen": 11.913568496704102, "log_odds_ratio": -1.140839322033571e-05, "logits/chosen": -0.3410708010196686, "logits/rejected": -0.37177151441574097, "logps/chosen": -0.00010055984603241086, "logps/rejected": -2.67598557472229, "loss": 0.5215, "nll_loss": 0.13038001954555511, "rewards/accuracies": 1.0, "rewards/chosen": -1.0055984603241086e-05, "rewards/margins": 0.2675884962081909, "rewards/rejected": -0.26759853959083557, "step": 10833 }, { "epoch": 7.492392807745505, "grad_norm": 8.092597007751465, "learning_rate": 1.3931151068080528e-05, "log_odds_chosen": 10.475318908691406, "log_odds_ratio": -8.045632421271876e-05, "logits/chosen": -0.1439984142780304, "logits/rejected": -0.18829496204853058, "logps/chosen": -0.00023990156478248537, "logps/rejected": -1.957043170928955, "loss": 0.5433, "nll_loss": 0.13581514358520508, "rewards/accuracies": 1.0, "rewards/chosen": -2.399015829723794e-05, "rewards/margins": 0.19568033516407013, "rewards/rejected": -0.19570431113243103, "step": 10834 }, { "epoch": 7.493084370677732, "grad_norm": 5.48667049407959, "learning_rate": 1.392730905179038e-05, "log_odds_chosen": 9.939897537231445, "log_odds_ratio": -0.0004207099264021963, "logits/chosen": -0.3046836853027344, "logits/rejected": -0.29632100462913513, "logps/chosen": -0.0005699221510440111, "logps/rejected": -1.9147216081619263, "loss": 0.4148, "nll_loss": 0.10365672409534454, "rewards/accuracies": 1.0, "rewards/chosen": -5.699221583199687e-05, "rewards/margins": 0.19141516089439392, "rewards/rejected": -0.19147217273712158, "step": 10835 }, { "epoch": 7.4937759336099585, "grad_norm": 5.311141490936279, "learning_rate": 1.3923467035500231e-05, "log_odds_chosen": 11.650004386901855, "log_odds_ratio": -7.711305806878954e-05, "logits/chosen": -0.2858215570449829, "logits/rejected": -0.29272037744522095, "logps/chosen": -0.00040270035970024765, "logps/rejected": -3.0382204055786133, "loss": 0.5492, "nll_loss": 0.13729213178157806, "rewards/accuracies": 1.0, "rewards/chosen": -4.0270035242429e-05, "rewards/margins": 0.30378180742263794, "rewards/rejected": -0.30382204055786133, "step": 10836 }, { "epoch": 7.494467496542185, "grad_norm": 6.042548179626465, "learning_rate": 1.391962501921008e-05, "log_odds_chosen": 11.16757583618164, "log_odds_ratio": -6.1027007177472115e-05, "logits/chosen": -0.06862695515155792, "logits/rejected": -0.1478588879108429, "logps/chosen": -0.00038854603189975023, "logps/rejected": -2.471419334411621, "loss": 0.4313, "nll_loss": 0.10782044380903244, "rewards/accuracies": 1.0, "rewards/chosen": -3.885460318997502e-05, "rewards/margins": 0.24710306525230408, "rewards/rejected": -0.24714189767837524, "step": 10837 }, { "epoch": 7.495159059474412, "grad_norm": 4.193350315093994, "learning_rate": 1.3915783002919933e-05, "log_odds_chosen": 11.357162475585938, "log_odds_ratio": -6.056849815649912e-05, "logits/chosen": 0.1128070279955864, "logits/rejected": 0.034058213233947754, "logps/chosen": -0.00016933982260525227, "logps/rejected": -2.145360231399536, "loss": 0.4616, "nll_loss": 0.11538281291723251, "rewards/accuracies": 1.0, "rewards/chosen": -1.693398371571675e-05, "rewards/margins": 0.21451905369758606, "rewards/rejected": -0.21453601121902466, "step": 10838 }, { "epoch": 7.495850622406639, "grad_norm": 5.410037517547607, "learning_rate": 1.3911940986629785e-05, "log_odds_chosen": 11.573707580566406, "log_odds_ratio": -0.00010516634938539937, "logits/chosen": -0.06613228470087051, "logits/rejected": -0.07556043565273285, "logps/chosen": -0.0012632563011720777, "logps/rejected": -3.0463085174560547, "loss": 0.4794, "nll_loss": 0.1198422759771347, "rewards/accuracies": 1.0, "rewards/chosen": -0.00012632562720682472, "rewards/margins": 0.30450454354286194, "rewards/rejected": -0.3046308755874634, "step": 10839 }, { "epoch": 7.496542185338866, "grad_norm": 6.1775312423706055, "learning_rate": 1.3908098970339634e-05, "log_odds_chosen": 10.662359237670898, "log_odds_ratio": -0.00016371881065424532, "logits/chosen": -0.5707687139511108, "logits/rejected": -0.6249455213546753, "logps/chosen": -0.0003949836827814579, "logps/rejected": -2.2343854904174805, "loss": 0.5969, "nll_loss": 0.14921541512012482, "rewards/accuracies": 1.0, "rewards/chosen": -3.949836900574155e-05, "rewards/margins": 0.22339904308319092, "rewards/rejected": -0.2234385758638382, "step": 10840 }, { "epoch": 7.497233748271093, "grad_norm": 4.109016418457031, "learning_rate": 1.3904256954049485e-05, "log_odds_chosen": 11.424544334411621, "log_odds_ratio": -9.455587132833898e-05, "logits/chosen": -0.13782545924186707, "logits/rejected": -0.13644850254058838, "logps/chosen": -0.00015705320402048528, "logps/rejected": -2.8745219707489014, "loss": 0.4233, "nll_loss": 0.1058032214641571, "rewards/accuracies": 1.0, "rewards/chosen": -1.5705320038250647e-05, "rewards/margins": 0.28743648529052734, "rewards/rejected": -0.28745219111442566, "step": 10841 }, { "epoch": 7.4979253112033195, "grad_norm": 6.663751125335693, "learning_rate": 1.3900414937759337e-05, "log_odds_chosen": 11.010114669799805, "log_odds_ratio": -5.1399500080151483e-05, "logits/chosen": -0.6667817831039429, "logits/rejected": -0.705284059047699, "logps/chosen": -0.00030787562718614936, "logps/rejected": -2.436732053756714, "loss": 0.5613, "nll_loss": 0.1403152048587799, "rewards/accuracies": 1.0, "rewards/chosen": -3.078755980823189e-05, "rewards/margins": 0.2436424046754837, "rewards/rejected": -0.2436732053756714, "step": 10842 }, { "epoch": 7.498616874135546, "grad_norm": 6.209922790527344, "learning_rate": 1.3896572921469186e-05, "log_odds_chosen": 10.507733345031738, "log_odds_ratio": -0.00028826348716393113, "logits/chosen": 0.04832683503627777, "logits/rejected": 0.12393030524253845, "logps/chosen": -0.000690105021931231, "logps/rejected": -2.3561508655548096, "loss": 0.5515, "nll_loss": 0.13785235583782196, "rewards/accuracies": 1.0, "rewards/chosen": -6.901050073793158e-05, "rewards/margins": 0.2355460822582245, "rewards/rejected": -0.235615074634552, "step": 10843 }, { "epoch": 7.499308437067773, "grad_norm": 3.0588796138763428, "learning_rate": 1.3892730905179039e-05, "log_odds_chosen": 11.098876953125, "log_odds_ratio": -4.758929935633205e-05, "logits/chosen": -0.18588104844093323, "logits/rejected": -0.24176223576068878, "logps/chosen": -0.00041968681034632027, "logps/rejected": -2.5732569694519043, "loss": 0.346, "nll_loss": 0.08649633824825287, "rewards/accuracies": 1.0, "rewards/chosen": -4.196868394501507e-05, "rewards/margins": 0.2572837471961975, "rewards/rejected": -0.2573257088661194, "step": 10844 }, { "epoch": 7.5, "grad_norm": 5.975062847137451, "learning_rate": 1.388888888888889e-05, "log_odds_chosen": 10.546667098999023, "log_odds_ratio": -0.00010005592775996774, "logits/chosen": -0.2629231810569763, "logits/rejected": -0.28820547461509705, "logps/chosen": -0.00022396890562959015, "logps/rejected": -2.047438859939575, "loss": 0.5123, "nll_loss": 0.1280604898929596, "rewards/accuracies": 1.0, "rewards/chosen": -2.2396889107767493e-05, "rewards/margins": 0.20472149550914764, "rewards/rejected": -0.204743891954422, "step": 10845 }, { "epoch": 7.500691562932227, "grad_norm": 5.633182048797607, "learning_rate": 1.3885046872598739e-05, "log_odds_chosen": 11.560811996459961, "log_odds_ratio": -2.4324574042111635e-05, "logits/chosen": -0.2091791182756424, "logits/rejected": -0.3963402211666107, "logps/chosen": -0.00012010188947897404, "logps/rejected": -2.2216544151306152, "loss": 0.4798, "nll_loss": 0.11993524432182312, "rewards/accuracies": 1.0, "rewards/chosen": -1.2010188584099524e-05, "rewards/margins": 0.2221534252166748, "rewards/rejected": -0.22216545045375824, "step": 10846 }, { "epoch": 7.501383125864454, "grad_norm": 4.486021995544434, "learning_rate": 1.3881204856308591e-05, "log_odds_chosen": 10.803471565246582, "log_odds_ratio": -0.0002556285762693733, "logits/chosen": -0.42428725957870483, "logits/rejected": -0.4375617802143097, "logps/chosen": -0.0005502038984559476, "logps/rejected": -2.2866053581237793, "loss": 0.6085, "nll_loss": 0.1520923227071762, "rewards/accuracies": 1.0, "rewards/chosen": -5.5020391300786287e-05, "rewards/margins": 0.22860552370548248, "rewards/rejected": -0.22866055369377136, "step": 10847 }, { "epoch": 7.5020746887966805, "grad_norm": 6.069352626800537, "learning_rate": 1.3877362840018443e-05, "log_odds_chosen": 11.59472370147705, "log_odds_ratio": -1.1047510270145722e-05, "logits/chosen": -0.1648513525724411, "logits/rejected": -0.3039112389087677, "logps/chosen": -7.49052778701298e-05, "logps/rejected": -1.9297611713409424, "loss": 0.4073, "nll_loss": 0.10182895511388779, "rewards/accuracies": 1.0, "rewards/chosen": -7.490527423215099e-06, "rewards/margins": 0.19296863675117493, "rewards/rejected": -0.19297611713409424, "step": 10848 }, { "epoch": 7.502766251728907, "grad_norm": 6.025568962097168, "learning_rate": 1.3873520823728293e-05, "log_odds_chosen": 10.147893905639648, "log_odds_ratio": -0.00010455265874043107, "logits/chosen": -0.4919975996017456, "logits/rejected": -0.5295048952102661, "logps/chosen": -0.00031061304616741836, "logps/rejected": -1.9053452014923096, "loss": 0.3951, "nll_loss": 0.09877252578735352, "rewards/accuracies": 1.0, "rewards/chosen": -3.106130679952912e-05, "rewards/margins": 0.19050344824790955, "rewards/rejected": -0.19053450226783752, "step": 10849 }, { "epoch": 7.503457814661134, "grad_norm": 4.921655654907227, "learning_rate": 1.3869678807438143e-05, "log_odds_chosen": 10.672442436218262, "log_odds_ratio": -0.00011154203821206465, "logits/chosen": -0.2638598382472992, "logits/rejected": -0.33946990966796875, "logps/chosen": -0.00035740650491788983, "logps/rejected": -2.011767864227295, "loss": 0.5176, "nll_loss": 0.12939995527267456, "rewards/accuracies": 1.0, "rewards/chosen": -3.574065340217203e-05, "rewards/margins": 0.20114102959632874, "rewards/rejected": -0.20117677748203278, "step": 10850 }, { "epoch": 7.504149377593361, "grad_norm": 6.706781387329102, "learning_rate": 1.3865836791147996e-05, "log_odds_chosen": 10.745321273803711, "log_odds_ratio": -4.268326301826164e-05, "logits/chosen": -0.3959668278694153, "logits/rejected": -0.4123864769935608, "logps/chosen": -0.00017690425738692284, "logps/rejected": -2.003281354904175, "loss": 0.5213, "nll_loss": 0.13032077252864838, "rewards/accuracies": 1.0, "rewards/chosen": -1.7690425011096522e-05, "rewards/margins": 0.20031043887138367, "rewards/rejected": -0.20032814145088196, "step": 10851 }, { "epoch": 7.504840940525588, "grad_norm": 4.583414077758789, "learning_rate": 1.3861994774857845e-05, "log_odds_chosen": 9.970561027526855, "log_odds_ratio": -0.0002485551231075078, "logits/chosen": -0.5081609487533569, "logits/rejected": -0.5775808095932007, "logps/chosen": -0.000991704175248742, "logps/rejected": -1.5721248388290405, "loss": 0.4789, "nll_loss": 0.11970222741365433, "rewards/accuracies": 1.0, "rewards/chosen": -9.917042189044878e-05, "rewards/margins": 0.15711332857608795, "rewards/rejected": -0.1572125107049942, "step": 10852 }, { "epoch": 7.505532503457815, "grad_norm": 5.421633720397949, "learning_rate": 1.3858152758567697e-05, "log_odds_chosen": 10.60433578491211, "log_odds_ratio": -4.82356917927973e-05, "logits/chosen": 0.1491568386554718, "logits/rejected": 0.11569232493638992, "logps/chosen": -0.00021837849635630846, "logps/rejected": -2.0376646518707275, "loss": 0.4481, "nll_loss": 0.112018883228302, "rewards/accuracies": 1.0, "rewards/chosen": -2.1837848180439323e-05, "rewards/margins": 0.20374462008476257, "rewards/rejected": -0.20376646518707275, "step": 10853 }, { "epoch": 7.5062240663900415, "grad_norm": 4.001258373260498, "learning_rate": 1.3854310742277548e-05, "log_odds_chosen": 10.59055233001709, "log_odds_ratio": -6.709634908474982e-05, "logits/chosen": -0.5928189158439636, "logits/rejected": -0.6791818737983704, "logps/chosen": -0.0002604667388368398, "logps/rejected": -1.9976965188980103, "loss": 0.457, "nll_loss": 0.11424599587917328, "rewards/accuracies": 1.0, "rewards/chosen": -2.6046673156088218e-05, "rewards/margins": 0.1997436136007309, "rewards/rejected": -0.19976966083049774, "step": 10854 }, { "epoch": 7.506915629322268, "grad_norm": 4.610876560211182, "learning_rate": 1.3850468725987397e-05, "log_odds_chosen": 11.297388076782227, "log_odds_ratio": -2.201269489887636e-05, "logits/chosen": -0.5671547055244446, "logits/rejected": -0.6654840111732483, "logps/chosen": -0.0001199392499984242, "logps/rejected": -2.3370468616485596, "loss": 0.3404, "nll_loss": 0.08509953320026398, "rewards/accuracies": 1.0, "rewards/chosen": -1.199392499984242e-05, "rewards/margins": 0.2336927056312561, "rewards/rejected": -0.23370468616485596, "step": 10855 }, { "epoch": 7.507607192254495, "grad_norm": 3.907001256942749, "learning_rate": 1.384662670969725e-05, "log_odds_chosen": 9.279020309448242, "log_odds_ratio": -0.00034641113597899675, "logits/chosen": -0.48437485098838806, "logits/rejected": -0.5484408140182495, "logps/chosen": -0.0015225738752633333, "logps/rejected": -1.8507013320922852, "loss": 0.4286, "nll_loss": 0.10712503641843796, "rewards/accuracies": 1.0, "rewards/chosen": -0.00015225740207824856, "rewards/margins": 0.1849178671836853, "rewards/rejected": -0.18507012724876404, "step": 10856 }, { "epoch": 7.508298755186722, "grad_norm": 9.251437187194824, "learning_rate": 1.3842784693407102e-05, "log_odds_chosen": 11.04911994934082, "log_odds_ratio": -2.3998569304239936e-05, "logits/chosen": -0.401960551738739, "logits/rejected": -0.45356813073158264, "logps/chosen": -0.0001485783141106367, "logps/rejected": -2.1403989791870117, "loss": 0.4878, "nll_loss": 0.12194465100765228, "rewards/accuracies": 1.0, "rewards/chosen": -1.485783104726579e-05, "rewards/margins": 0.21402505040168762, "rewards/rejected": -0.2140398919582367, "step": 10857 }, { "epoch": 7.508990318118949, "grad_norm": 4.904819488525391, "learning_rate": 1.3838942677116951e-05, "log_odds_chosen": 10.50926399230957, "log_odds_ratio": -0.0006038338178768754, "logits/chosen": -0.7038325071334839, "logits/rejected": -0.7800592184066772, "logps/chosen": -0.0006285731215029955, "logps/rejected": -1.9317022562026978, "loss": 0.556, "nll_loss": 0.13892945647239685, "rewards/accuracies": 1.0, "rewards/chosen": -6.285731069510803e-05, "rewards/margins": 0.19310736656188965, "rewards/rejected": -0.1931702196598053, "step": 10858 }, { "epoch": 7.509681881051176, "grad_norm": 5.4442219734191895, "learning_rate": 1.3835100660826802e-05, "log_odds_chosen": 11.821813583374023, "log_odds_ratio": -0.00018751317111309618, "logits/chosen": -0.22810126841068268, "logits/rejected": -0.2429996132850647, "logps/chosen": -0.00044313608668744564, "logps/rejected": -2.8103132247924805, "loss": 0.5328, "nll_loss": 0.13319182395935059, "rewards/accuracies": 1.0, "rewards/chosen": -4.431361230672337e-05, "rewards/margins": 0.280987024307251, "rewards/rejected": -0.2810313105583191, "step": 10859 }, { "epoch": 7.5103734439834025, "grad_norm": 7.400333881378174, "learning_rate": 1.3831258644536654e-05, "log_odds_chosen": 11.880918502807617, "log_odds_ratio": -1.1990883649559692e-05, "logits/chosen": -0.29229143261909485, "logits/rejected": -0.37813225388526917, "logps/chosen": -0.00014303525676950812, "logps/rejected": -2.716463088989258, "loss": 0.4499, "nll_loss": 0.11248502880334854, "rewards/accuracies": 1.0, "rewards/chosen": -1.4303526768344454e-05, "rewards/margins": 0.27163201570510864, "rewards/rejected": -0.27164632081985474, "step": 10860 }, { "epoch": 7.511065006915629, "grad_norm": 4.424728870391846, "learning_rate": 1.3827416628246503e-05, "log_odds_chosen": 11.932880401611328, "log_odds_ratio": -2.0810561181860976e-05, "logits/chosen": -0.48462021350860596, "logits/rejected": -0.565332293510437, "logps/chosen": -0.00025298818945884705, "logps/rejected": -2.961071729660034, "loss": 0.3484, "nll_loss": 0.08709091693162918, "rewards/accuracies": 1.0, "rewards/chosen": -2.5298817490693182e-05, "rewards/margins": 0.29608190059661865, "rewards/rejected": -0.2961071729660034, "step": 10861 }, { "epoch": 7.511756569847856, "grad_norm": 13.965356826782227, "learning_rate": 1.3823574611956356e-05, "log_odds_chosen": 11.458879470825195, "log_odds_ratio": -5.086950841359794e-05, "logits/chosen": -0.34427160024642944, "logits/rejected": -0.39023107290267944, "logps/chosen": -0.00023432400485035032, "logps/rejected": -2.773988962173462, "loss": 0.4941, "nll_loss": 0.12353166192770004, "rewards/accuracies": 1.0, "rewards/chosen": -2.3432401576428674e-05, "rewards/margins": 0.2773754596710205, "rewards/rejected": -0.2773989140987396, "step": 10862 }, { "epoch": 7.512448132780083, "grad_norm": 4.900034427642822, "learning_rate": 1.3819732595666206e-05, "log_odds_chosen": 10.443470001220703, "log_odds_ratio": -7.572891627205536e-05, "logits/chosen": -0.350027859210968, "logits/rejected": -0.39866653084754944, "logps/chosen": -0.00017401771037839353, "logps/rejected": -1.6754381656646729, "loss": 0.4745, "nll_loss": 0.11862252652645111, "rewards/accuracies": 1.0, "rewards/chosen": -1.740176958264783e-05, "rewards/margins": 0.16752642393112183, "rewards/rejected": -0.16754382848739624, "step": 10863 }, { "epoch": 7.51313969571231, "grad_norm": 5.489154815673828, "learning_rate": 1.3815890579376055e-05, "log_odds_chosen": 9.630025863647461, "log_odds_ratio": -0.0019220231333747506, "logits/chosen": -0.5830115079879761, "logits/rejected": -0.6449246406555176, "logps/chosen": -0.013586835004389286, "logps/rejected": -1.557092308998108, "loss": 0.4387, "nll_loss": 0.1094917505979538, "rewards/accuracies": 1.0, "rewards/chosen": -0.0013586835702881217, "rewards/margins": 0.15435056388378143, "rewards/rejected": -0.15570923686027527, "step": 10864 }, { "epoch": 7.513831258644537, "grad_norm": 5.110268592834473, "learning_rate": 1.3812048563085908e-05, "log_odds_chosen": 11.101775169372559, "log_odds_ratio": -2.5323784939246252e-05, "logits/chosen": -0.5525895357131958, "logits/rejected": -0.4849681258201599, "logps/chosen": -0.0002422073157504201, "logps/rejected": -2.35410213470459, "loss": 0.4673, "nll_loss": 0.11682500690221786, "rewards/accuracies": 1.0, "rewards/chosen": -2.422073157504201e-05, "rewards/margins": 0.2353859841823578, "rewards/rejected": -0.23541021347045898, "step": 10865 }, { "epoch": 7.514522821576763, "grad_norm": 4.0105109214782715, "learning_rate": 1.380820654679576e-05, "log_odds_chosen": 10.789510726928711, "log_odds_ratio": -4.4687138142762706e-05, "logits/chosen": -0.22874239087104797, "logits/rejected": -0.27216294407844543, "logps/chosen": -0.00013487483374774456, "logps/rejected": -1.885083556175232, "loss": 0.5365, "nll_loss": 0.13411131501197815, "rewards/accuracies": 1.0, "rewards/chosen": -1.3487482647178695e-05, "rewards/margins": 0.18849486112594604, "rewards/rejected": -0.18850834667682648, "step": 10866 }, { "epoch": 7.51521438450899, "grad_norm": 8.645621299743652, "learning_rate": 1.380436453050561e-05, "log_odds_chosen": 10.929075241088867, "log_odds_ratio": -2.9114125936757773e-05, "logits/chosen": -0.23884816467761993, "logits/rejected": -0.2688317894935608, "logps/chosen": -0.0002449355670250952, "logps/rejected": -2.4290623664855957, "loss": 0.5502, "nll_loss": 0.13755536079406738, "rewards/accuracies": 1.0, "rewards/chosen": -2.4493558157701045e-05, "rewards/margins": 0.24288174510002136, "rewards/rejected": -0.24290621280670166, "step": 10867 }, { "epoch": 7.515905947441217, "grad_norm": 3.153947591781616, "learning_rate": 1.380052251421546e-05, "log_odds_chosen": 9.717711448669434, "log_odds_ratio": -0.0009161827620118856, "logits/chosen": -0.1779230833053589, "logits/rejected": -0.19890283048152924, "logps/chosen": -0.0008492513443343341, "logps/rejected": -2.064303159713745, "loss": 0.3649, "nll_loss": 0.09113053977489471, "rewards/accuracies": 1.0, "rewards/chosen": -8.492513734381646e-05, "rewards/margins": 0.20634539425373077, "rewards/rejected": -0.2064303308725357, "step": 10868 }, { "epoch": 7.516597510373444, "grad_norm": 4.896413803100586, "learning_rate": 1.3796680497925313e-05, "log_odds_chosen": 10.881827354431152, "log_odds_ratio": -8.696097938809544e-05, "logits/chosen": -0.43188685178756714, "logits/rejected": -0.6426720023155212, "logps/chosen": -0.00026656663976609707, "logps/rejected": -2.365948438644409, "loss": 0.5023, "nll_loss": 0.12557311356067657, "rewards/accuracies": 1.0, "rewards/chosen": -2.665666579559911e-05, "rewards/margins": 0.23656819760799408, "rewards/rejected": -0.23659485578536987, "step": 10869 }, { "epoch": 7.517289073305671, "grad_norm": 4.01028299331665, "learning_rate": 1.3792838481635162e-05, "log_odds_chosen": 10.271018981933594, "log_odds_ratio": -0.0006215503090061247, "logits/chosen": -0.5212066173553467, "logits/rejected": -0.5539983510971069, "logps/chosen": -0.0008918846724554896, "logps/rejected": -2.137946605682373, "loss": 0.3695, "nll_loss": 0.09231384098529816, "rewards/accuracies": 1.0, "rewards/chosen": -8.918847015593201e-05, "rewards/margins": 0.21370546519756317, "rewards/rejected": -0.21379466354846954, "step": 10870 }, { "epoch": 7.517980636237898, "grad_norm": 4.348485469818115, "learning_rate": 1.3788996465345014e-05, "log_odds_chosen": 10.320483207702637, "log_odds_ratio": -0.0003907751524820924, "logits/chosen": -0.7220146656036377, "logits/rejected": -0.757490336894989, "logps/chosen": -0.0001988973090192303, "logps/rejected": -1.8074790239334106, "loss": 0.454, "nll_loss": 0.11345857381820679, "rewards/accuracies": 1.0, "rewards/chosen": -1.9889732357114553e-05, "rewards/margins": 0.180728018283844, "rewards/rejected": -0.18074792623519897, "step": 10871 }, { "epoch": 7.518672199170124, "grad_norm": 5.348752975463867, "learning_rate": 1.3785154449054865e-05, "log_odds_chosen": 10.597746849060059, "log_odds_ratio": -0.00013086672697681934, "logits/chosen": -0.4913485646247864, "logits/rejected": -0.5052577257156372, "logps/chosen": -0.000320828752592206, "logps/rejected": -1.856242060661316, "loss": 0.4562, "nll_loss": 0.11404484510421753, "rewards/accuracies": 1.0, "rewards/chosen": -3.208287671441212e-05, "rewards/margins": 0.18559211492538452, "rewards/rejected": -0.18562419712543488, "step": 10872 }, { "epoch": 7.519363762102351, "grad_norm": 6.101108551025391, "learning_rate": 1.3781312432764714e-05, "log_odds_chosen": 11.56706428527832, "log_odds_ratio": -1.9296106984256767e-05, "logits/chosen": -0.42387184500694275, "logits/rejected": -0.4706951975822449, "logps/chosen": -0.00025341319269500673, "logps/rejected": -2.820647716522217, "loss": 0.4645, "nll_loss": 0.11612551659345627, "rewards/accuracies": 1.0, "rewards/chosen": -2.534131817810703e-05, "rewards/margins": 0.28203946352005005, "rewards/rejected": -0.2820647954940796, "step": 10873 }, { "epoch": 7.520055325034578, "grad_norm": 4.491504192352295, "learning_rate": 1.3777470416474566e-05, "log_odds_chosen": 11.704853057861328, "log_odds_ratio": -6.659721111645922e-05, "logits/chosen": -0.7244716286659241, "logits/rejected": -0.687706470489502, "logps/chosen": -0.00029626733157783747, "logps/rejected": -2.5468931198120117, "loss": 0.4924, "nll_loss": 0.1230815127491951, "rewards/accuracies": 1.0, "rewards/chosen": -2.9626738978549838e-05, "rewards/margins": 0.2546597123146057, "rewards/rejected": -0.2546893060207367, "step": 10874 }, { "epoch": 7.520746887966805, "grad_norm": 6.189776420593262, "learning_rate": 1.3773628400184419e-05, "log_odds_chosen": 9.723426818847656, "log_odds_ratio": -0.00011284545325906947, "logits/chosen": 0.09950869530439377, "logits/rejected": 0.02981482446193695, "logps/chosen": -0.0002519416739232838, "logps/rejected": -1.3983118534088135, "loss": 0.4422, "nll_loss": 0.1105465441942215, "rewards/accuracies": 1.0, "rewards/chosen": -2.5194169211317785e-05, "rewards/margins": 0.13980598747730255, "rewards/rejected": -0.13983118534088135, "step": 10875 }, { "epoch": 7.521438450899032, "grad_norm": 4.315496921539307, "learning_rate": 1.3769786383894268e-05, "log_odds_chosen": 9.566936492919922, "log_odds_ratio": -0.00027100040460936725, "logits/chosen": -0.5626333951950073, "logits/rejected": -0.5699162483215332, "logps/chosen": -0.00043434806866571307, "logps/rejected": -1.7643786668777466, "loss": 0.4788, "nll_loss": 0.11968132853507996, "rewards/accuracies": 1.0, "rewards/chosen": -4.3434811232145876e-05, "rewards/margins": 0.17639443278312683, "rewards/rejected": -0.1764378547668457, "step": 10876 }, { "epoch": 7.522130013831259, "grad_norm": 5.777177333831787, "learning_rate": 1.3765944367604119e-05, "log_odds_chosen": 11.527708053588867, "log_odds_ratio": -4.7561734390910715e-05, "logits/chosen": -0.4485383629798889, "logits/rejected": -0.49741944670677185, "logps/chosen": -0.0002845055714715272, "logps/rejected": -3.065023899078369, "loss": 0.486, "nll_loss": 0.12149247527122498, "rewards/accuracies": 1.0, "rewards/chosen": -2.845055678335484e-05, "rewards/margins": 0.3064739406108856, "rewards/rejected": -0.30650240182876587, "step": 10877 }, { "epoch": 7.522821576763485, "grad_norm": 7.339626789093018, "learning_rate": 1.3762102351313971e-05, "log_odds_chosen": 12.749917984008789, "log_odds_ratio": -5.649513695971109e-06, "logits/chosen": -0.6558374166488647, "logits/rejected": -0.6490030288696289, "logps/chosen": -9.840505663305521e-05, "logps/rejected": -3.400402069091797, "loss": 0.3943, "nll_loss": 0.09857062995433807, "rewards/accuracies": 1.0, "rewards/chosen": -9.840506209002342e-06, "rewards/margins": 0.34003040194511414, "rewards/rejected": -0.3400402367115021, "step": 10878 }, { "epoch": 7.523513139695712, "grad_norm": 3.726856231689453, "learning_rate": 1.375826033502382e-05, "log_odds_chosen": 11.200907707214355, "log_odds_ratio": -6.0507183661684394e-05, "logits/chosen": 0.2244858741760254, "logits/rejected": 0.21234449744224548, "logps/chosen": -0.00015088150394149125, "logps/rejected": -2.1337900161743164, "loss": 0.4039, "nll_loss": 0.10097833722829819, "rewards/accuracies": 1.0, "rewards/chosen": -1.5088150576048065e-05, "rewards/margins": 0.213363915681839, "rewards/rejected": -0.21337899565696716, "step": 10879 }, { "epoch": 7.524204702627939, "grad_norm": 4.425390720367432, "learning_rate": 1.3754418318733673e-05, "log_odds_chosen": 10.128186225891113, "log_odds_ratio": -0.00025991900474764407, "logits/chosen": -0.5495592355728149, "logits/rejected": -0.6602213978767395, "logps/chosen": -0.0003403293085284531, "logps/rejected": -1.90929114818573, "loss": 0.4687, "nll_loss": 0.11716103553771973, "rewards/accuracies": 1.0, "rewards/chosen": -3.403293521841988e-05, "rewards/margins": 0.19089508056640625, "rewards/rejected": -0.190929114818573, "step": 10880 }, { "epoch": 7.524896265560166, "grad_norm": 9.592628479003906, "learning_rate": 1.3750576302443523e-05, "log_odds_chosen": 10.581683158874512, "log_odds_ratio": -9.209234121954069e-05, "logits/chosen": -0.42138999700546265, "logits/rejected": -0.36162620782852173, "logps/chosen": -0.00041084305848926306, "logps/rejected": -2.096816301345825, "loss": 0.8981, "nll_loss": 0.22452345490455627, "rewards/accuracies": 1.0, "rewards/chosen": -4.108430948690511e-05, "rewards/margins": 0.2096405327320099, "rewards/rejected": -0.20968163013458252, "step": 10881 }, { "epoch": 7.525587828492393, "grad_norm": 5.487520217895508, "learning_rate": 1.3746734286153374e-05, "log_odds_chosen": 9.746979713439941, "log_odds_ratio": -0.0008402118692174554, "logits/chosen": 0.11258521676063538, "logits/rejected": -0.005723059177398682, "logps/chosen": -0.0005987212061882019, "logps/rejected": -1.7435500621795654, "loss": 0.432, "nll_loss": 0.10791030526161194, "rewards/accuracies": 1.0, "rewards/chosen": -5.987211989122443e-05, "rewards/margins": 0.17429512739181519, "rewards/rejected": -0.17435501515865326, "step": 10882 }, { "epoch": 7.5262793914246195, "grad_norm": 3.8324203491210938, "learning_rate": 1.3742892269863225e-05, "log_odds_chosen": 11.629316329956055, "log_odds_ratio": -2.1213923901086673e-05, "logits/chosen": 0.08265161514282227, "logits/rejected": -0.0059318579733371735, "logps/chosen": -0.00020495994249358773, "logps/rejected": -2.784921169281006, "loss": 0.4584, "nll_loss": 0.11459926515817642, "rewards/accuracies": 1.0, "rewards/chosen": -2.0495994249358773e-05, "rewards/margins": 0.2784716486930847, "rewards/rejected": -0.27849212288856506, "step": 10883 }, { "epoch": 7.526970954356846, "grad_norm": 4.976540565490723, "learning_rate": 1.3739050253573077e-05, "log_odds_chosen": 10.63410758972168, "log_odds_ratio": -0.00015587112284265459, "logits/chosen": -0.23880663514137268, "logits/rejected": -0.28580334782600403, "logps/chosen": -0.0003356942906975746, "logps/rejected": -2.1784019470214844, "loss": 0.5595, "nll_loss": 0.1398695409297943, "rewards/accuracies": 1.0, "rewards/chosen": -3.356942761456594e-05, "rewards/margins": 0.2178066074848175, "rewards/rejected": -0.21784019470214844, "step": 10884 }, { "epoch": 7.527662517289073, "grad_norm": 5.252691745758057, "learning_rate": 1.3735208237282926e-05, "log_odds_chosen": 10.884276390075684, "log_odds_ratio": -0.00032903405372053385, "logits/chosen": -0.4288550019264221, "logits/rejected": -0.4972131550312042, "logps/chosen": -0.00042299655615352094, "logps/rejected": -2.275272846221924, "loss": 0.3326, "nll_loss": 0.0831230953335762, "rewards/accuracies": 1.0, "rewards/chosen": -4.229965998092666e-05, "rewards/margins": 0.22748498618602753, "rewards/rejected": -0.22752729058265686, "step": 10885 }, { "epoch": 7.5283540802213, "grad_norm": 4.7539896965026855, "learning_rate": 1.3731366220992779e-05, "log_odds_chosen": 12.380012512207031, "log_odds_ratio": -1.0317680789739825e-05, "logits/chosen": -0.6371591091156006, "logits/rejected": -0.5750231742858887, "logps/chosen": -0.00010312439553672448, "logps/rejected": -2.810936450958252, "loss": 0.5902, "nll_loss": 0.14755801856517792, "rewards/accuracies": 1.0, "rewards/chosen": -1.0312439371773507e-05, "rewards/margins": 0.281083345413208, "rewards/rejected": -0.28109365701675415, "step": 10886 }, { "epoch": 7.529045643153527, "grad_norm": 4.949764251708984, "learning_rate": 1.372752420470263e-05, "log_odds_chosen": 9.83175277709961, "log_odds_ratio": -8.935166988521814e-05, "logits/chosen": -0.7508769631385803, "logits/rejected": -0.7804704904556274, "logps/chosen": -0.0005445395363494754, "logps/rejected": -2.1102652549743652, "loss": 0.4111, "nll_loss": 0.10276912897825241, "rewards/accuracies": 1.0, "rewards/chosen": -5.445395072456449e-05, "rewards/margins": 0.21097204089164734, "rewards/rejected": -0.21102651953697205, "step": 10887 }, { "epoch": 7.529737206085754, "grad_norm": 8.927925109863281, "learning_rate": 1.3723682188412479e-05, "log_odds_chosen": 11.731058120727539, "log_odds_ratio": -2.9501774406526238e-05, "logits/chosen": -0.3342224657535553, "logits/rejected": -0.4032493829727173, "logps/chosen": -0.0001932134327944368, "logps/rejected": -2.837803363800049, "loss": 0.5749, "nll_loss": 0.14372438192367554, "rewards/accuracies": 1.0, "rewards/chosen": -1.932134182425216e-05, "rewards/margins": 0.28376102447509766, "rewards/rejected": -0.2837803363800049, "step": 10888 }, { "epoch": 7.5304287690179805, "grad_norm": 4.814039707183838, "learning_rate": 1.3719840172122331e-05, "log_odds_chosen": 11.000864028930664, "log_odds_ratio": -3.4564662200864404e-05, "logits/chosen": -0.37925636768341064, "logits/rejected": -0.5058153867721558, "logps/chosen": -0.00013589671289082617, "logps/rejected": -1.8084535598754883, "loss": 0.3503, "nll_loss": 0.08757337927818298, "rewards/accuracies": 1.0, "rewards/chosen": -1.3589671652880497e-05, "rewards/margins": 0.18083176016807556, "rewards/rejected": -0.18084535002708435, "step": 10889 }, { "epoch": 7.531120331950207, "grad_norm": 6.472630500793457, "learning_rate": 1.3715998155832183e-05, "log_odds_chosen": 11.88841438293457, "log_odds_ratio": -1.0142521205125377e-05, "logits/chosen": -0.33784234523773193, "logits/rejected": -0.48194387555122375, "logps/chosen": -0.00014380461652763188, "logps/rejected": -2.7480721473693848, "loss": 0.4279, "nll_loss": 0.10696800798177719, "rewards/accuracies": 1.0, "rewards/chosen": -1.4380460925167426e-05, "rewards/margins": 0.2747928500175476, "rewards/rejected": -0.2748072147369385, "step": 10890 }, { "epoch": 7.531811894882434, "grad_norm": 3.6994967460632324, "learning_rate": 1.3712156139542032e-05, "log_odds_chosen": 11.189812660217285, "log_odds_ratio": -5.345175668480806e-05, "logits/chosen": -0.1665852963924408, "logits/rejected": -0.2894322872161865, "logps/chosen": -0.0002716692688409239, "logps/rejected": -2.516805648803711, "loss": 0.9048, "nll_loss": 0.22619935870170593, "rewards/accuracies": 1.0, "rewards/chosen": -2.716692688409239e-05, "rewards/margins": 0.25165340304374695, "rewards/rejected": -0.25168055295944214, "step": 10891 }, { "epoch": 7.532503457814661, "grad_norm": 5.318807125091553, "learning_rate": 1.3708314123251883e-05, "log_odds_chosen": 10.00977897644043, "log_odds_ratio": -0.00037104147486388683, "logits/chosen": -0.8708664178848267, "logits/rejected": -0.8931385278701782, "logps/chosen": -0.00028637779178097844, "logps/rejected": -1.5222827196121216, "loss": 0.6229, "nll_loss": 0.155700221657753, "rewards/accuracies": 1.0, "rewards/chosen": -2.86377762677148e-05, "rewards/margins": 0.1521996408700943, "rewards/rejected": -0.15222826600074768, "step": 10892 }, { "epoch": 7.533195020746888, "grad_norm": 5.508212089538574, "learning_rate": 1.3704472106961736e-05, "log_odds_chosen": 10.706792831420898, "log_odds_ratio": -4.5154974941397086e-05, "logits/chosen": -0.543492317199707, "logits/rejected": -0.5513139367103577, "logps/chosen": -0.00043358042603358626, "logps/rejected": -1.8944940567016602, "loss": 0.5116, "nll_loss": 0.1279076486825943, "rewards/accuracies": 1.0, "rewards/chosen": -4.3358046241337433e-05, "rewards/margins": 0.18940606713294983, "rewards/rejected": -0.18944941461086273, "step": 10893 }, { "epoch": 7.533886583679115, "grad_norm": 4.529955863952637, "learning_rate": 1.3700630090671585e-05, "log_odds_chosen": 11.382387161254883, "log_odds_ratio": -3.067726720473729e-05, "logits/chosen": -0.7753385305404663, "logits/rejected": -0.8173359632492065, "logps/chosen": -8.34188685985282e-05, "logps/rejected": -2.0420587062835693, "loss": 0.4156, "nll_loss": 0.10389338433742523, "rewards/accuracies": 1.0, "rewards/chosen": -8.341887223650701e-06, "rewards/margins": 0.20419752597808838, "rewards/rejected": -0.20420587062835693, "step": 10894 }, { "epoch": 7.5345781466113415, "grad_norm": 4.1907854080200195, "learning_rate": 1.3696788074381437e-05, "log_odds_chosen": 9.962699890136719, "log_odds_ratio": -8.843952673487365e-05, "logits/chosen": -0.5206528902053833, "logits/rejected": -0.6122863292694092, "logps/chosen": -0.0005318694747984409, "logps/rejected": -1.814341425895691, "loss": 0.4367, "nll_loss": 0.10916710644960403, "rewards/accuracies": 1.0, "rewards/chosen": -5.318695184541866e-05, "rewards/margins": 0.181380957365036, "rewards/rejected": -0.18143412470817566, "step": 10895 }, { "epoch": 7.535269709543568, "grad_norm": 6.572483062744141, "learning_rate": 1.3692946058091288e-05, "log_odds_chosen": 10.755268096923828, "log_odds_ratio": -6.7830944317393e-05, "logits/chosen": -0.4748067855834961, "logits/rejected": -0.5554353594779968, "logps/chosen": -0.0005805790424346924, "logps/rejected": -2.811382293701172, "loss": 0.4749, "nll_loss": 0.11872723698616028, "rewards/accuracies": 1.0, "rewards/chosen": -5.805790351587348e-05, "rewards/margins": 0.2810802161693573, "rewards/rejected": -0.28113824129104614, "step": 10896 }, { "epoch": 7.535961272475795, "grad_norm": 4.007783889770508, "learning_rate": 1.3689104041801137e-05, "log_odds_chosen": 11.30660343170166, "log_odds_ratio": -0.0003542294434737414, "logits/chosen": -0.5386285185813904, "logits/rejected": -0.5860005021095276, "logps/chosen": -9.375077934237197e-05, "logps/rejected": -2.324409008026123, "loss": 0.575, "nll_loss": 0.14370985329151154, "rewards/accuracies": 1.0, "rewards/chosen": -9.375078661832958e-06, "rewards/margins": 0.2324315309524536, "rewards/rejected": -0.23244090378284454, "step": 10897 }, { "epoch": 7.536652835408022, "grad_norm": 5.47639274597168, "learning_rate": 1.368526202551099e-05, "log_odds_chosen": 11.627883911132812, "log_odds_ratio": -2.6631163564161398e-05, "logits/chosen": -0.3915488123893738, "logits/rejected": -0.4371181130409241, "logps/chosen": -0.00011329659901093692, "logps/rejected": -2.546294927597046, "loss": 0.298, "nll_loss": 0.07449459284543991, "rewards/accuracies": 1.0, "rewards/chosen": -1.1329660082992632e-05, "rewards/margins": 0.25461816787719727, "rewards/rejected": -0.2546294927597046, "step": 10898 }, { "epoch": 7.537344398340249, "grad_norm": 13.84184741973877, "learning_rate": 1.3681420009220842e-05, "log_odds_chosen": 10.913511276245117, "log_odds_ratio": -3.6600089515559375e-05, "logits/chosen": -0.50941401720047, "logits/rejected": -0.592617392539978, "logps/chosen": -0.00012552604312077165, "logps/rejected": -2.05088210105896, "loss": 0.4461, "nll_loss": 0.11151237785816193, "rewards/accuracies": 1.0, "rewards/chosen": -1.2552604857773986e-05, "rewards/margins": 0.2050756812095642, "rewards/rejected": -0.20508822798728943, "step": 10899 }, { "epoch": 7.538035961272476, "grad_norm": 8.043490409851074, "learning_rate": 1.3677577992930691e-05, "log_odds_chosen": 10.945327758789062, "log_odds_ratio": -4.287495539756492e-05, "logits/chosen": -0.2591250538825989, "logits/rejected": -0.3053608536720276, "logps/chosen": -0.0002915628720074892, "logps/rejected": -2.4187121391296387, "loss": 0.3408, "nll_loss": 0.08519420772790909, "rewards/accuracies": 1.0, "rewards/chosen": -2.9156288292142563e-05, "rewards/margins": 0.24184203147888184, "rewards/rejected": -0.241871178150177, "step": 10900 }, { "epoch": 7.5387275242047025, "grad_norm": 6.757442951202393, "learning_rate": 1.3673735976640542e-05, "log_odds_chosen": 10.361787796020508, "log_odds_ratio": -9.385471639689058e-05, "logits/chosen": -0.5704642534255981, "logits/rejected": -0.6144750118255615, "logps/chosen": -0.00023506373690906912, "logps/rejected": -1.5697286128997803, "loss": 0.4696, "nll_loss": 0.11740292608737946, "rewards/accuracies": 1.0, "rewards/chosen": -2.3506374418502674e-05, "rewards/margins": 0.15694934129714966, "rewards/rejected": -0.15697285532951355, "step": 10901 }, { "epoch": 7.539419087136929, "grad_norm": 4.761688232421875, "learning_rate": 1.366989396035039e-05, "log_odds_chosen": 11.750556945800781, "log_odds_ratio": -1.5190888007055037e-05, "logits/chosen": -0.6291791796684265, "logits/rejected": -0.6748467683792114, "logps/chosen": -8.288262324640527e-05, "logps/rejected": -2.036034107208252, "loss": 0.399, "nll_loss": 0.09974059462547302, "rewards/accuracies": 1.0, "rewards/chosen": -8.288262506539468e-06, "rewards/margins": 0.2035951316356659, "rewards/rejected": -0.20360340178012848, "step": 10902 }, { "epoch": 7.540110650069156, "grad_norm": 4.356700897216797, "learning_rate": 1.3666051944060243e-05, "log_odds_chosen": 9.920604705810547, "log_odds_ratio": -0.00011185869516339153, "logits/chosen": -0.24763020873069763, "logits/rejected": -0.22162744402885437, "logps/chosen": -0.00019569398136809468, "logps/rejected": -1.5281915664672852, "loss": 0.4347, "nll_loss": 0.10866691172122955, "rewards/accuracies": 1.0, "rewards/chosen": -1.9569397409213707e-05, "rewards/margins": 0.152799591422081, "rewards/rejected": -0.1528191715478897, "step": 10903 }, { "epoch": 7.540802213001383, "grad_norm": 5.018890380859375, "learning_rate": 1.3662209927770096e-05, "log_odds_chosen": 11.4712553024292, "log_odds_ratio": -1.9771054212469608e-05, "logits/chosen": -0.3403857350349426, "logits/rejected": -0.3579222559928894, "logps/chosen": -0.00012209927081130445, "logps/rejected": -2.3484385013580322, "loss": 0.869, "nll_loss": 0.2172483205795288, "rewards/accuracies": 1.0, "rewards/chosen": -1.2209927263029385e-05, "rewards/margins": 0.23483163118362427, "rewards/rejected": -0.23484385013580322, "step": 10904 }, { "epoch": 7.54149377593361, "grad_norm": 4.402528762817383, "learning_rate": 1.3658367911479945e-05, "log_odds_chosen": 11.565021514892578, "log_odds_ratio": -3.308330997242592e-05, "logits/chosen": -0.5835554003715515, "logits/rejected": -0.7503261566162109, "logps/chosen": -0.0005653960397467017, "logps/rejected": -3.1881885528564453, "loss": 0.6669, "nll_loss": 0.1667259931564331, "rewards/accuracies": 1.0, "rewards/chosen": -5.653960397467017e-05, "rewards/margins": 0.31876230239868164, "rewards/rejected": -0.3188188672065735, "step": 10905 }, { "epoch": 7.542185338865837, "grad_norm": 5.141797065734863, "learning_rate": 1.3654525895189795e-05, "log_odds_chosen": 10.716361999511719, "log_odds_ratio": -0.0001025652454700321, "logits/chosen": -0.10361681878566742, "logits/rejected": -0.13290606439113617, "logps/chosen": -0.000345752079738304, "logps/rejected": -2.291757822036743, "loss": 0.6552, "nll_loss": 0.16379579901695251, "rewards/accuracies": 1.0, "rewards/chosen": -3.457520870142616e-05, "rewards/margins": 0.2291412204504013, "rewards/rejected": -0.22917580604553223, "step": 10906 }, { "epoch": 7.5428769017980635, "grad_norm": 5.401771068572998, "learning_rate": 1.3650683878899648e-05, "log_odds_chosen": 11.184661865234375, "log_odds_ratio": -0.00032188615296036005, "logits/chosen": -0.28486594557762146, "logits/rejected": -0.28327369689941406, "logps/chosen": -0.0005774404271505773, "logps/rejected": -2.8740031719207764, "loss": 0.5407, "nll_loss": 0.1351354569196701, "rewards/accuracies": 1.0, "rewards/chosen": -5.774404780822806e-05, "rewards/margins": 0.2873425781726837, "rewards/rejected": -0.2874003052711487, "step": 10907 }, { "epoch": 7.54356846473029, "grad_norm": 5.230515956878662, "learning_rate": 1.3646841862609497e-05, "log_odds_chosen": 10.53951358795166, "log_odds_ratio": -0.00010120034858118743, "logits/chosen": 0.010187406092882156, "logits/rejected": -0.06117922440171242, "logps/chosen": -0.00033147024805657566, "logps/rejected": -2.029526472091675, "loss": 0.519, "nll_loss": 0.12973934412002563, "rewards/accuracies": 1.0, "rewards/chosen": -3.314702189527452e-05, "rewards/margins": 0.20291949808597565, "rewards/rejected": -0.20295265316963196, "step": 10908 }, { "epoch": 7.544260027662517, "grad_norm": 7.434238433837891, "learning_rate": 1.364299984631935e-05, "log_odds_chosen": 11.641447067260742, "log_odds_ratio": -3.3890286431415007e-05, "logits/chosen": -0.2585147023200989, "logits/rejected": -0.19221074879169464, "logps/chosen": -0.000364738138159737, "logps/rejected": -3.0308213233947754, "loss": 0.9996, "nll_loss": 0.24988438189029694, "rewards/accuracies": 1.0, "rewards/chosen": -3.647381527116522e-05, "rewards/margins": 0.3030456304550171, "rewards/rejected": -0.30308210849761963, "step": 10909 }, { "epoch": 7.544951590594744, "grad_norm": 11.842061996459961, "learning_rate": 1.36391578300292e-05, "log_odds_chosen": 10.118453025817871, "log_odds_ratio": -0.0001638751127757132, "logits/chosen": -0.2586163580417633, "logits/rejected": -0.3308694362640381, "logps/chosen": -0.0005985454190522432, "logps/rejected": -1.85396146774292, "loss": 0.4477, "nll_loss": 0.11192052066326141, "rewards/accuracies": 1.0, "rewards/chosen": -5.985454481560737e-05, "rewards/margins": 0.18533629179000854, "rewards/rejected": -0.18539613485336304, "step": 10910 }, { "epoch": 7.545643153526971, "grad_norm": 4.961551189422607, "learning_rate": 1.363531581373905e-05, "log_odds_chosen": 10.454790115356445, "log_odds_ratio": -0.00013295510143507272, "logits/chosen": -0.42641448974609375, "logits/rejected": -0.42252567410469055, "logps/chosen": -0.00023922814580146223, "logps/rejected": -1.8349452018737793, "loss": 0.3928, "nll_loss": 0.0981857031583786, "rewards/accuracies": 1.0, "rewards/chosen": -2.3922815671539865e-05, "rewards/margins": 0.18347060680389404, "rewards/rejected": -0.18349452316761017, "step": 10911 }, { "epoch": 7.546334716459198, "grad_norm": 3.9336705207824707, "learning_rate": 1.3631473797448902e-05, "log_odds_chosen": 10.669046401977539, "log_odds_ratio": -7.399608148261905e-05, "logits/chosen": -0.16355778276920319, "logits/rejected": -0.23712387681007385, "logps/chosen": -0.0004548661527223885, "logps/rejected": -2.373354911804199, "loss": 0.4048, "nll_loss": 0.10118485987186432, "rewards/accuracies": 1.0, "rewards/chosen": -4.5486616727430373e-05, "rewards/margins": 0.23728998005390167, "rewards/rejected": -0.2373354732990265, "step": 10912 }, { "epoch": 7.5470262793914245, "grad_norm": 3.3881542682647705, "learning_rate": 1.3627631781158754e-05, "log_odds_chosen": 9.861526489257812, "log_odds_ratio": -0.0007383470074273646, "logits/chosen": -0.34626615047454834, "logits/rejected": -0.34497809410095215, "logps/chosen": -0.0006527705118060112, "logps/rejected": -1.7543696165084839, "loss": 0.3492, "nll_loss": 0.08722208440303802, "rewards/accuracies": 1.0, "rewards/chosen": -6.527705409098417e-05, "rewards/margins": 0.1753716766834259, "rewards/rejected": -0.17543694376945496, "step": 10913 }, { "epoch": 7.547717842323651, "grad_norm": 3.727787971496582, "learning_rate": 1.3623789764868603e-05, "log_odds_chosen": 9.567840576171875, "log_odds_ratio": -0.0009155230945907533, "logits/chosen": -0.030373331159353256, "logits/rejected": -0.031623005867004395, "logps/chosen": -0.0018961422611027956, "logps/rejected": -1.626967430114746, "loss": 0.7449, "nll_loss": 0.18613487482070923, "rewards/accuracies": 1.0, "rewards/chosen": -0.00018961422028951347, "rewards/margins": 0.16250713169574738, "rewards/rejected": -0.1626967489719391, "step": 10914 }, { "epoch": 7.548409405255878, "grad_norm": 5.287073612213135, "learning_rate": 1.3619947748578454e-05, "log_odds_chosen": 10.789902687072754, "log_odds_ratio": -5.0830250984290615e-05, "logits/chosen": -0.9028030037879944, "logits/rejected": -0.9219222664833069, "logps/chosen": -0.00033289406565018, "logps/rejected": -2.100735664367676, "loss": 0.3585, "nll_loss": 0.08961069583892822, "rewards/accuracies": 1.0, "rewards/chosen": -3.328940874780528e-05, "rewards/margins": 0.21004027128219604, "rewards/rejected": -0.2100735753774643, "step": 10915 }, { "epoch": 7.549100968188105, "grad_norm": 4.347574234008789, "learning_rate": 1.3616105732288306e-05, "log_odds_chosen": 10.415060043334961, "log_odds_ratio": -9.381695417687297e-05, "logits/chosen": -0.31361645460128784, "logits/rejected": -0.4492868483066559, "logps/chosen": -0.00033570057712495327, "logps/rejected": -2.3112096786499023, "loss": 0.2271, "nll_loss": 0.05677155405282974, "rewards/accuracies": 1.0, "rewards/chosen": -3.357006062287837e-05, "rewards/margins": 0.23108740150928497, "rewards/rejected": -0.2311209738254547, "step": 10916 }, { "epoch": 7.549792531120332, "grad_norm": 4.294283390045166, "learning_rate": 1.3612263715998155e-05, "log_odds_chosen": 10.835281372070312, "log_odds_ratio": -3.1858988222666085e-05, "logits/chosen": -0.05257886275649071, "logits/rejected": -0.055259451270103455, "logps/chosen": -0.00035562628181651235, "logps/rejected": -2.4179494380950928, "loss": 0.4152, "nll_loss": 0.10379417985677719, "rewards/accuracies": 1.0, "rewards/chosen": -3.5562628909246996e-05, "rewards/margins": 0.24175938963890076, "rewards/rejected": -0.24179495871067047, "step": 10917 }, { "epoch": 7.550484094052559, "grad_norm": 8.6405668258667, "learning_rate": 1.3608421699708008e-05, "log_odds_chosen": 10.642109870910645, "log_odds_ratio": -0.00011460207315394655, "logits/chosen": -0.4259280562400818, "logits/rejected": -0.4998432993888855, "logps/chosen": -0.00035607305471785367, "logps/rejected": -2.274139165878296, "loss": 0.4877, "nll_loss": 0.12190458178520203, "rewards/accuracies": 1.0, "rewards/chosen": -3.560730692697689e-05, "rewards/margins": 0.22737830877304077, "rewards/rejected": -0.22741392254829407, "step": 10918 }, { "epoch": 7.551175656984785, "grad_norm": 4.222203731536865, "learning_rate": 1.3604579683417859e-05, "log_odds_chosen": 10.999160766601562, "log_odds_ratio": -3.665951226139441e-05, "logits/chosen": -0.5962536334991455, "logits/rejected": -0.6591715812683105, "logps/chosen": -0.00010887366079259664, "logps/rejected": -1.5783509016036987, "loss": 0.3694, "nll_loss": 0.09234155714511871, "rewards/accuracies": 1.0, "rewards/chosen": -1.0887366443057545e-05, "rewards/margins": 0.15782421827316284, "rewards/rejected": -0.15783509612083435, "step": 10919 }, { "epoch": 7.551867219917012, "grad_norm": 4.335033416748047, "learning_rate": 1.3600737667127708e-05, "log_odds_chosen": 10.389474868774414, "log_odds_ratio": -6.485909398179501e-05, "logits/chosen": -0.1939990371465683, "logits/rejected": -0.242635577917099, "logps/chosen": -0.00023443308600690216, "logps/rejected": -2.0197837352752686, "loss": 0.3774, "nll_loss": 0.09434625506401062, "rewards/accuracies": 1.0, "rewards/chosen": -2.3443308236892335e-05, "rewards/margins": 0.2019549310207367, "rewards/rejected": -0.20197835564613342, "step": 10920 }, { "epoch": 7.552558782849239, "grad_norm": 4.163060665130615, "learning_rate": 1.359689565083756e-05, "log_odds_chosen": 10.459320068359375, "log_odds_ratio": -0.0014625128824263811, "logits/chosen": -0.4592801034450531, "logits/rejected": -0.5197211503982544, "logps/chosen": -0.0027722991071641445, "logps/rejected": -2.700474500656128, "loss": 0.4314, "nll_loss": 0.10769951343536377, "rewards/accuracies": 1.0, "rewards/chosen": -0.00027722990489564836, "rewards/margins": 0.26977020502090454, "rewards/rejected": -0.27004745602607727, "step": 10921 }, { "epoch": 7.553250345781466, "grad_norm": 7.067339897155762, "learning_rate": 1.3593053634547412e-05, "log_odds_chosen": 11.163808822631836, "log_odds_ratio": -9.274063631892204e-05, "logits/chosen": -0.5344827771186829, "logits/rejected": -0.5484803915023804, "logps/chosen": -7.391178223770112e-05, "logps/rejected": -2.0709049701690674, "loss": 0.8812, "nll_loss": 0.22027882933616638, "rewards/accuracies": 1.0, "rewards/chosen": -7.391178769466933e-06, "rewards/margins": 0.20708312094211578, "rewards/rejected": -0.20709051191806793, "step": 10922 }, { "epoch": 7.553941908713693, "grad_norm": 4.202138423919678, "learning_rate": 1.3589211618257262e-05, "log_odds_chosen": 10.711816787719727, "log_odds_ratio": -0.0009630117565393448, "logits/chosen": -0.5583059191703796, "logits/rejected": -0.6031702756881714, "logps/chosen": -0.0003722950932569802, "logps/rejected": -1.7346935272216797, "loss": 0.5121, "nll_loss": 0.12792344391345978, "rewards/accuracies": 1.0, "rewards/chosen": -3.7229507142910734e-05, "rewards/margins": 0.1734321117401123, "rewards/rejected": -0.17346934974193573, "step": 10923 }, { "epoch": 7.55463347164592, "grad_norm": 4.490311622619629, "learning_rate": 1.3585369601967112e-05, "log_odds_chosen": 10.83390998840332, "log_odds_ratio": -5.0170037866337225e-05, "logits/chosen": -0.5973794460296631, "logits/rejected": -0.6883317232131958, "logps/chosen": -0.0004492515290621668, "logps/rejected": -2.4969382286071777, "loss": 0.457, "nll_loss": 0.11424939334392548, "rewards/accuracies": 1.0, "rewards/chosen": -4.492515290621668e-05, "rewards/margins": 0.24964889883995056, "rewards/rejected": -0.24969382584095, "step": 10924 }, { "epoch": 7.555325034578146, "grad_norm": 3.2777633666992188, "learning_rate": 1.3581527585676965e-05, "log_odds_chosen": 11.443975448608398, "log_odds_ratio": -3.585993545129895e-05, "logits/chosen": -0.6728773713111877, "logits/rejected": -0.8710072040557861, "logps/chosen": -0.00010490731074241921, "logps/rejected": -2.018962860107422, "loss": 0.251, "nll_loss": 0.062735915184021, "rewards/accuracies": 1.0, "rewards/chosen": -1.0490731256140862e-05, "rewards/margins": 0.20188578963279724, "rewards/rejected": -0.2018962949514389, "step": 10925 }, { "epoch": 7.556016597510373, "grad_norm": 3.8097290992736816, "learning_rate": 1.3577685569386814e-05, "log_odds_chosen": 11.300907135009766, "log_odds_ratio": -9.683187818154693e-05, "logits/chosen": -0.43710342049598694, "logits/rejected": -0.3327723741531372, "logps/chosen": -0.00046393644879572093, "logps/rejected": -2.8096423149108887, "loss": 0.3756, "nll_loss": 0.09388810396194458, "rewards/accuracies": 1.0, "rewards/chosen": -4.6393644879572093e-05, "rewards/margins": 0.28091785311698914, "rewards/rejected": -0.2809642255306244, "step": 10926 }, { "epoch": 7.5567081604426, "grad_norm": 6.978212833404541, "learning_rate": 1.3573843553096666e-05, "log_odds_chosen": 11.489490509033203, "log_odds_ratio": -1.5987618098733947e-05, "logits/chosen": -0.2056851089000702, "logits/rejected": -0.2149638533592224, "logps/chosen": -0.0001905599783640355, "logps/rejected": -2.3923375606536865, "loss": 0.5754, "nll_loss": 0.14384540915489197, "rewards/accuracies": 1.0, "rewards/chosen": -1.905599856399931e-05, "rewards/margins": 0.239214688539505, "rewards/rejected": -0.23923374712467194, "step": 10927 }, { "epoch": 7.557399723374827, "grad_norm": 10.007233619689941, "learning_rate": 1.3570001536806517e-05, "log_odds_chosen": 11.136518478393555, "log_odds_ratio": -2.728265644691419e-05, "logits/chosen": -0.21735602617263794, "logits/rejected": -0.17750519514083862, "logps/chosen": -0.0002487509627826512, "logps/rejected": -2.4564075469970703, "loss": 0.474, "nll_loss": 0.1185031458735466, "rewards/accuracies": 1.0, "rewards/chosen": -2.4875096642063e-05, "rewards/margins": 0.2456158846616745, "rewards/rejected": -0.24564076960086823, "step": 10928 }, { "epoch": 7.558091286307054, "grad_norm": 3.546488046646118, "learning_rate": 1.3566159520516366e-05, "log_odds_chosen": 10.552568435668945, "log_odds_ratio": -5.7265235227532685e-05, "logits/chosen": -0.5594015121459961, "logits/rejected": -0.616118311882019, "logps/chosen": -0.00035102260881103575, "logps/rejected": -1.9318368434906006, "loss": 0.4211, "nll_loss": 0.1052689179778099, "rewards/accuracies": 1.0, "rewards/chosen": -3.5102260881103575e-05, "rewards/margins": 0.19314856827259064, "rewards/rejected": -0.19318366050720215, "step": 10929 }, { "epoch": 7.558782849239281, "grad_norm": 3.83551287651062, "learning_rate": 1.3562317504226218e-05, "log_odds_chosen": 11.369571685791016, "log_odds_ratio": -0.00012164629151811823, "logits/chosen": -0.3469897210597992, "logits/rejected": -0.39686405658721924, "logps/chosen": -0.0004319645231589675, "logps/rejected": -2.764150619506836, "loss": 0.4561, "nll_loss": 0.1140192300081253, "rewards/accuracies": 1.0, "rewards/chosen": -4.319645086070523e-05, "rewards/margins": 0.27637186646461487, "rewards/rejected": -0.276415079832077, "step": 10930 }, { "epoch": 7.559474412171507, "grad_norm": 4.2708845138549805, "learning_rate": 1.3558475487936071e-05, "log_odds_chosen": 10.607118606567383, "log_odds_ratio": -6.914585537742823e-05, "logits/chosen": -0.4425663948059082, "logits/rejected": -0.491806298494339, "logps/chosen": -0.00018268710118718445, "logps/rejected": -1.8966383934020996, "loss": 0.6051, "nll_loss": 0.1512637436389923, "rewards/accuracies": 1.0, "rewards/chosen": -1.8268709027324803e-05, "rewards/margins": 0.18964557349681854, "rewards/rejected": -0.1896638423204422, "step": 10931 }, { "epoch": 7.560165975103734, "grad_norm": 5.706757068634033, "learning_rate": 1.355463347164592e-05, "log_odds_chosen": 10.261611938476562, "log_odds_ratio": -0.0001751371455611661, "logits/chosen": -0.23027479648590088, "logits/rejected": -0.2460954487323761, "logps/chosen": -0.0038980338722467422, "logps/rejected": -2.111262798309326, "loss": 0.3182, "nll_loss": 0.07953331619501114, "rewards/accuracies": 1.0, "rewards/chosen": -0.00038980337558314204, "rewards/margins": 0.210736483335495, "rewards/rejected": -0.21112629771232605, "step": 10932 }, { "epoch": 7.560857538035961, "grad_norm": 4.881253242492676, "learning_rate": 1.355079145535577e-05, "log_odds_chosen": 11.820080757141113, "log_odds_ratio": -0.0004292959056328982, "logits/chosen": -0.5062116980552673, "logits/rejected": -0.4614921510219574, "logps/chosen": -0.00029794700094498694, "logps/rejected": -3.201214551925659, "loss": 0.3764, "nll_loss": 0.09405495971441269, "rewards/accuracies": 1.0, "rewards/chosen": -2.9794700822094455e-05, "rewards/margins": 0.3200916647911072, "rewards/rejected": -0.3201214671134949, "step": 10933 }, { "epoch": 7.561549100968188, "grad_norm": 6.380796432495117, "learning_rate": 1.3546949439065623e-05, "log_odds_chosen": 10.26038646697998, "log_odds_ratio": -6.220456270966679e-05, "logits/chosen": -0.5859804153442383, "logits/rejected": -0.6496269702911377, "logps/chosen": -0.0006967331864871085, "logps/rejected": -1.7505897283554077, "loss": 0.5233, "nll_loss": 0.13082881271839142, "rewards/accuracies": 1.0, "rewards/chosen": -6.967331864871085e-05, "rewards/margins": 0.17498929798603058, "rewards/rejected": -0.175058975815773, "step": 10934 }, { "epoch": 7.562240663900415, "grad_norm": 4.680851459503174, "learning_rate": 1.3543107422775472e-05, "log_odds_chosen": 10.964227676391602, "log_odds_ratio": -2.9342954803723842e-05, "logits/chosen": -0.40084123611450195, "logits/rejected": -0.4273257255554199, "logps/chosen": -0.00015311934112105519, "logps/rejected": -1.6325464248657227, "loss": 0.3898, "nll_loss": 0.0974491536617279, "rewards/accuracies": 1.0, "rewards/chosen": -1.53119344759034e-05, "rewards/margins": 0.16323933005332947, "rewards/rejected": -0.16325464844703674, "step": 10935 }, { "epoch": 7.5629322268326415, "grad_norm": 4.604579925537109, "learning_rate": 1.3539265406485325e-05, "log_odds_chosen": 11.361536026000977, "log_odds_ratio": -2.4415654479525983e-05, "logits/chosen": -0.4994909167289734, "logits/rejected": -0.45419901609420776, "logps/chosen": -7.685035961912945e-05, "logps/rejected": -1.7258703708648682, "loss": 0.4156, "nll_loss": 0.10389953851699829, "rewards/accuracies": 1.0, "rewards/chosen": -7.685036507609766e-06, "rewards/margins": 0.1725793480873108, "rewards/rejected": -0.172587051987648, "step": 10936 }, { "epoch": 7.563623789764868, "grad_norm": 6.140023231506348, "learning_rate": 1.3535423390195175e-05, "log_odds_chosen": 10.658794403076172, "log_odds_ratio": -0.0001274331589229405, "logits/chosen": -0.35980963706970215, "logits/rejected": -0.44661083817481995, "logps/chosen": -0.0009565124055370688, "logps/rejected": -2.4567627906799316, "loss": 0.531, "nll_loss": 0.13273148238658905, "rewards/accuracies": 1.0, "rewards/chosen": -9.565124491928145e-05, "rewards/margins": 0.24558061361312866, "rewards/rejected": -0.24567627906799316, "step": 10937 }, { "epoch": 7.564315352697095, "grad_norm": 6.799753189086914, "learning_rate": 1.3531581373905024e-05, "log_odds_chosen": 11.992451667785645, "log_odds_ratio": -9.671902262198273e-06, "logits/chosen": -0.6842149496078491, "logits/rejected": -0.5291277170181274, "logps/chosen": -7.34966088202782e-05, "logps/rejected": -1.9616817235946655, "loss": 0.4859, "nll_loss": 0.12148353457450867, "rewards/accuracies": 1.0, "rewards/chosen": -7.3496612458257005e-06, "rewards/margins": 0.19616082310676575, "rewards/rejected": -0.1961681842803955, "step": 10938 }, { "epoch": 7.565006915629322, "grad_norm": 4.647584915161133, "learning_rate": 1.3527739357614877e-05, "log_odds_chosen": 10.580001831054688, "log_odds_ratio": -0.00015425118908751756, "logits/chosen": -0.34404534101486206, "logits/rejected": -0.4109252095222473, "logps/chosen": -0.00034372409572824836, "logps/rejected": -2.354987144470215, "loss": 0.3574, "nll_loss": 0.08934677392244339, "rewards/accuracies": 1.0, "rewards/chosen": -3.4372409572824836e-05, "rewards/margins": 0.23546436429023743, "rewards/rejected": -0.23549872636795044, "step": 10939 }, { "epoch": 7.565698478561549, "grad_norm": 5.138919353485107, "learning_rate": 1.352389734132473e-05, "log_odds_chosen": 10.619292259216309, "log_odds_ratio": -6.635507452301681e-05, "logits/chosen": -0.6313656568527222, "logits/rejected": -0.664108395576477, "logps/chosen": -0.0003709258744493127, "logps/rejected": -2.564089298248291, "loss": 0.5316, "nll_loss": 0.13289271295070648, "rewards/accuracies": 1.0, "rewards/chosen": -3.709258817252703e-05, "rewards/margins": 0.2563718259334564, "rewards/rejected": -0.2564089298248291, "step": 10940 }, { "epoch": 7.566390041493776, "grad_norm": 5.1360955238342285, "learning_rate": 1.3520055325034578e-05, "log_odds_chosen": 10.733930587768555, "log_odds_ratio": -6.731198664056137e-05, "logits/chosen": -0.3754677176475525, "logits/rejected": -0.3669604957103729, "logps/chosen": -0.0004016646998934448, "logps/rejected": -2.154148578643799, "loss": 0.4801, "nll_loss": 0.12000709027051926, "rewards/accuracies": 1.0, "rewards/chosen": -4.0166469261748716e-05, "rewards/margins": 0.2153746783733368, "rewards/rejected": -0.2154148519039154, "step": 10941 }, { "epoch": 7.5670816044260025, "grad_norm": 5.6666059494018555, "learning_rate": 1.351621330874443e-05, "log_odds_chosen": 10.722338676452637, "log_odds_ratio": -9.273626346839592e-05, "logits/chosen": -0.11671411991119385, "logits/rejected": -0.16290025413036346, "logps/chosen": -0.0001824825449148193, "logps/rejected": -1.8317897319793701, "loss": 0.5547, "nll_loss": 0.1386728286743164, "rewards/accuracies": 1.0, "rewards/chosen": -1.8248256310471334e-05, "rewards/margins": 0.1831607222557068, "rewards/rejected": -0.18317899107933044, "step": 10942 }, { "epoch": 7.567773167358229, "grad_norm": 6.869671821594238, "learning_rate": 1.3512371292454282e-05, "log_odds_chosen": 11.702990531921387, "log_odds_ratio": -1.1985999663011171e-05, "logits/chosen": -0.3330806791782379, "logits/rejected": -0.39222127199172974, "logps/chosen": -5.937376045039855e-05, "logps/rejected": -2.041858673095703, "loss": 0.3399, "nll_loss": 0.08497916907072067, "rewards/accuracies": 1.0, "rewards/chosen": -5.937376045039855e-06, "rewards/margins": 0.20417992770671844, "rewards/rejected": -0.2041858732700348, "step": 10943 }, { "epoch": 7.568464730290456, "grad_norm": 4.465538024902344, "learning_rate": 1.350852927616413e-05, "log_odds_chosen": 9.980588912963867, "log_odds_ratio": -0.0002866295399144292, "logits/chosen": -0.20742267370224, "logits/rejected": -0.17144420742988586, "logps/chosen": -0.0009605524828657508, "logps/rejected": -2.3056931495666504, "loss": 0.3348, "nll_loss": 0.08365992456674576, "rewards/accuracies": 1.0, "rewards/chosen": -9.605525701772422e-05, "rewards/margins": 0.23047326505184174, "rewards/rejected": -0.23056930303573608, "step": 10944 }, { "epoch": 7.569156293222683, "grad_norm": 6.102631092071533, "learning_rate": 1.3504687259873983e-05, "log_odds_chosen": 10.743348121643066, "log_odds_ratio": -4.211036139167845e-05, "logits/chosen": -0.45897427201271057, "logits/rejected": -0.37194669246673584, "logps/chosen": -0.00015622461796738207, "logps/rejected": -1.906165361404419, "loss": 0.758, "nll_loss": 0.18948470056056976, "rewards/accuracies": 1.0, "rewards/chosen": -1.5622459613950923e-05, "rewards/margins": 0.1906009316444397, "rewards/rejected": -0.19061654806137085, "step": 10945 }, { "epoch": 7.56984785615491, "grad_norm": 4.20671272277832, "learning_rate": 1.3500845243583834e-05, "log_odds_chosen": 10.227163314819336, "log_odds_ratio": -0.0010573863983154297, "logits/chosen": -0.5030470490455627, "logits/rejected": -0.5151537656784058, "logps/chosen": -0.001639746013097465, "logps/rejected": -2.151639223098755, "loss": 0.3991, "nll_loss": 0.09966427087783813, "rewards/accuracies": 1.0, "rewards/chosen": -0.00016397460422012955, "rewards/margins": 0.2149999439716339, "rewards/rejected": -0.21516390144824982, "step": 10946 }, { "epoch": 7.570539419087137, "grad_norm": 5.223161697387695, "learning_rate": 1.3497003227293683e-05, "log_odds_chosen": 10.072952270507812, "log_odds_ratio": -0.00028690227190963924, "logits/chosen": -0.4013603627681732, "logits/rejected": -0.553269624710083, "logps/chosen": -0.0002936360251624137, "logps/rejected": -1.9826910495758057, "loss": 0.6725, "nll_loss": 0.1680985689163208, "rewards/accuracies": 1.0, "rewards/chosen": -2.936360215244349e-05, "rewards/margins": 0.1982397586107254, "rewards/rejected": -0.19826912879943848, "step": 10947 }, { "epoch": 7.5712309820193635, "grad_norm": 5.351415157318115, "learning_rate": 1.3493161211003535e-05, "log_odds_chosen": 10.563679695129395, "log_odds_ratio": -5.192552271182649e-05, "logits/chosen": -0.21586225926876068, "logits/rejected": -0.19644798338413239, "logps/chosen": -0.0003393357910681516, "logps/rejected": -1.8586252927780151, "loss": 0.5359, "nll_loss": 0.1339617669582367, "rewards/accuracies": 1.0, "rewards/chosen": -3.39335783792194e-05, "rewards/margins": 0.18582861125469208, "rewards/rejected": -0.18586254119873047, "step": 10948 }, { "epoch": 7.57192254495159, "grad_norm": 4.629922389984131, "learning_rate": 1.3489319194713388e-05, "log_odds_chosen": 11.77937126159668, "log_odds_ratio": -3.537982047419064e-05, "logits/chosen": 0.03612430393695831, "logits/rejected": 0.025129958987236023, "logps/chosen": -0.0004390345420688391, "logps/rejected": -3.6106865406036377, "loss": 0.5558, "nll_loss": 0.1389411985874176, "rewards/accuracies": 1.0, "rewards/chosen": -4.390345566207543e-05, "rewards/margins": 0.36102473735809326, "rewards/rejected": -0.3610686659812927, "step": 10949 }, { "epoch": 7.572614107883817, "grad_norm": 6.359925270080566, "learning_rate": 1.3485477178423237e-05, "log_odds_chosen": 11.694245338439941, "log_odds_ratio": -0.0001377922744723037, "logits/chosen": 0.01395311951637268, "logits/rejected": -0.0705404207110405, "logps/chosen": -0.00012315658386796713, "logps/rejected": -2.751325845718384, "loss": 0.6301, "nll_loss": 0.15750938653945923, "rewards/accuracies": 1.0, "rewards/chosen": -1.2315658750594594e-05, "rewards/margins": 0.27512025833129883, "rewards/rejected": -0.27513253688812256, "step": 10950 }, { "epoch": 7.573305670816044, "grad_norm": 5.494706153869629, "learning_rate": 1.3481635162133088e-05, "log_odds_chosen": 11.016411781311035, "log_odds_ratio": -0.0002136414113920182, "logits/chosen": -0.6450036764144897, "logits/rejected": -0.6881355047225952, "logps/chosen": -0.000511638296302408, "logps/rejected": -1.8842029571533203, "loss": 0.4141, "nll_loss": 0.10351468622684479, "rewards/accuracies": 1.0, "rewards/chosen": -5.1163828175049275e-05, "rewards/margins": 0.18836914002895355, "rewards/rejected": -0.18842029571533203, "step": 10951 }, { "epoch": 7.573997233748271, "grad_norm": 4.768087387084961, "learning_rate": 1.347779314584294e-05, "log_odds_chosen": 10.597572326660156, "log_odds_ratio": -7.884033402660862e-05, "logits/chosen": -0.0407976359128952, "logits/rejected": -0.14931830763816833, "logps/chosen": -0.0009158989414572716, "logps/rejected": -2.176492214202881, "loss": 0.539, "nll_loss": 0.13473784923553467, "rewards/accuracies": 1.0, "rewards/chosen": -9.158989996649325e-05, "rewards/margins": 0.2175576388835907, "rewards/rejected": -0.21764922142028809, "step": 10952 }, { "epoch": 7.574688796680498, "grad_norm": 4.516747951507568, "learning_rate": 1.3473951129552789e-05, "log_odds_chosen": 10.323025703430176, "log_odds_ratio": -0.0001632234634598717, "logits/chosen": 0.14617076516151428, "logits/rejected": 0.029506176710128784, "logps/chosen": -0.0003115920699201524, "logps/rejected": -1.745704174041748, "loss": 0.4054, "nll_loss": 0.10133402049541473, "rewards/accuracies": 1.0, "rewards/chosen": -3.115920480922796e-05, "rewards/margins": 0.17453926801681519, "rewards/rejected": -0.17457042634487152, "step": 10953 }, { "epoch": 7.5753803596127245, "grad_norm": 18.594852447509766, "learning_rate": 1.3470109113262642e-05, "log_odds_chosen": 11.122175216674805, "log_odds_ratio": -1.9262806745246053e-05, "logits/chosen": -0.2828685939311981, "logits/rejected": -0.3240869641304016, "logps/chosen": -0.000400659249862656, "logps/rejected": -2.6810555458068848, "loss": 0.3611, "nll_loss": 0.09027761220932007, "rewards/accuracies": 1.0, "rewards/chosen": -4.006593007943593e-05, "rewards/margins": 0.26806551218032837, "rewards/rejected": -0.2681055963039398, "step": 10954 }, { "epoch": 7.576071922544951, "grad_norm": 6.125179290771484, "learning_rate": 1.3466267096972492e-05, "log_odds_chosen": 9.519245147705078, "log_odds_ratio": -0.0002525094896554947, "logits/chosen": -0.2528844475746155, "logits/rejected": -0.3225175738334656, "logps/chosen": -0.0008995598182082176, "logps/rejected": -1.8533120155334473, "loss": 0.4051, "nll_loss": 0.10125729441642761, "rewards/accuracies": 1.0, "rewards/chosen": -8.995598909677938e-05, "rewards/margins": 0.185241237282753, "rewards/rejected": -0.18533121049404144, "step": 10955 }, { "epoch": 7.576763485477178, "grad_norm": 6.245513916015625, "learning_rate": 1.3462425080682341e-05, "log_odds_chosen": 9.875443458557129, "log_odds_ratio": -0.001092126127332449, "logits/chosen": -0.346031129360199, "logits/rejected": -0.39958661794662476, "logps/chosen": -0.0008469183230772614, "logps/rejected": -1.874719500541687, "loss": 0.612, "nll_loss": 0.1528850495815277, "rewards/accuracies": 1.0, "rewards/chosen": -8.469182648696005e-05, "rewards/margins": 0.18738725781440735, "rewards/rejected": -0.18747195601463318, "step": 10956 }, { "epoch": 7.577455048409405, "grad_norm": 5.480423450469971, "learning_rate": 1.3458583064392194e-05, "log_odds_chosen": 10.91093635559082, "log_odds_ratio": -0.0004052415315527469, "logits/chosen": -0.14419147372245789, "logits/rejected": -0.27634382247924805, "logps/chosen": -0.0007029871921986341, "logps/rejected": -2.4353132247924805, "loss": 0.6362, "nll_loss": 0.15900415182113647, "rewards/accuracies": 1.0, "rewards/chosen": -7.029872358543798e-05, "rewards/margins": 0.24346104264259338, "rewards/rejected": -0.24353134632110596, "step": 10957 }, { "epoch": 7.578146611341632, "grad_norm": 9.778234481811523, "learning_rate": 1.3454741048102046e-05, "log_odds_chosen": 11.435365676879883, "log_odds_ratio": -4.242352588335052e-05, "logits/chosen": -0.5769628286361694, "logits/rejected": -0.63201904296875, "logps/chosen": -0.00024153337290044874, "logps/rejected": -2.636207103729248, "loss": 0.3735, "nll_loss": 0.09337200224399567, "rewards/accuracies": 1.0, "rewards/chosen": -2.4153336198651232e-05, "rewards/margins": 0.2635965943336487, "rewards/rejected": -0.2636207044124603, "step": 10958 }, { "epoch": 7.578838174273859, "grad_norm": 6.235721111297607, "learning_rate": 1.3450899031811895e-05, "log_odds_chosen": 12.401895523071289, "log_odds_ratio": -2.3549444449599832e-05, "logits/chosen": -0.3957058787345886, "logits/rejected": -0.5027981996536255, "logps/chosen": -0.00010508876584935933, "logps/rejected": -3.0551514625549316, "loss": 0.3612, "nll_loss": 0.09028894454240799, "rewards/accuracies": 1.0, "rewards/chosen": -1.0508876584935933e-05, "rewards/margins": 0.3055046498775482, "rewards/rejected": -0.3055151700973511, "step": 10959 }, { "epoch": 7.5795297372060855, "grad_norm": 6.023120403289795, "learning_rate": 1.3447057015521746e-05, "log_odds_chosen": 9.714308738708496, "log_odds_ratio": -0.00028595273033715785, "logits/chosen": -0.4025363326072693, "logits/rejected": -0.6282363533973694, "logps/chosen": -0.00048416247591376305, "logps/rejected": -1.7566821575164795, "loss": 0.4718, "nll_loss": 0.11792824417352676, "rewards/accuracies": 1.0, "rewards/chosen": -4.8416248318972066e-05, "rewards/margins": 0.17561981081962585, "rewards/rejected": -0.17566822469234467, "step": 10960 }, { "epoch": 7.580221300138312, "grad_norm": 6.062592506408691, "learning_rate": 1.3443214999231598e-05, "log_odds_chosen": 11.775699615478516, "log_odds_ratio": -1.3323345228855032e-05, "logits/chosen": -0.692188560962677, "logits/rejected": -0.8705258965492249, "logps/chosen": -0.00046975412988103926, "logps/rejected": -2.715114116668701, "loss": 0.5378, "nll_loss": 0.13445189595222473, "rewards/accuracies": 1.0, "rewards/chosen": -4.6975412260508165e-05, "rewards/margins": 0.271464467048645, "rewards/rejected": -0.271511435508728, "step": 10961 }, { "epoch": 7.580912863070539, "grad_norm": 3.96543550491333, "learning_rate": 1.3439372982941448e-05, "log_odds_chosen": 10.873712539672852, "log_odds_ratio": -0.00017372961156070232, "logits/chosen": -0.15156838297843933, "logits/rejected": -0.14810311794281006, "logps/chosen": -0.00023373104340862483, "logps/rejected": -2.0172066688537598, "loss": 0.476, "nll_loss": 0.11897880584001541, "rewards/accuracies": 1.0, "rewards/chosen": -2.3373106159851886e-05, "rewards/margins": 0.20169728994369507, "rewards/rejected": -0.20172066986560822, "step": 10962 }, { "epoch": 7.581604426002766, "grad_norm": 10.002387046813965, "learning_rate": 1.34355309666513e-05, "log_odds_chosen": 11.76556396484375, "log_odds_ratio": -3.788443427765742e-05, "logits/chosen": -0.5352721214294434, "logits/rejected": -0.49158841371536255, "logps/chosen": -0.000397785275708884, "logps/rejected": -3.296494483947754, "loss": 0.5677, "nll_loss": 0.1419147551059723, "rewards/accuracies": 1.0, "rewards/chosen": -3.977853339165449e-05, "rewards/margins": 0.3296097218990326, "rewards/rejected": -0.3296494781970978, "step": 10963 }, { "epoch": 7.582295988934993, "grad_norm": 6.157203674316406, "learning_rate": 1.343168895036115e-05, "log_odds_chosen": 10.94287109375, "log_odds_ratio": -2.8982145522604696e-05, "logits/chosen": -0.9206770062446594, "logits/rejected": -0.8118359446525574, "logps/chosen": -0.00016417729784734547, "logps/rejected": -1.7131142616271973, "loss": 0.3055, "nll_loss": 0.07637281715869904, "rewards/accuracies": 1.0, "rewards/chosen": -1.6417729057138786e-05, "rewards/margins": 0.17129501700401306, "rewards/rejected": -0.17131143808364868, "step": 10964 }, { "epoch": 7.58298755186722, "grad_norm": 4.484544277191162, "learning_rate": 1.3427846934071e-05, "log_odds_chosen": 9.924369812011719, "log_odds_ratio": -0.00012947487994097173, "logits/chosen": 0.10280543565750122, "logits/rejected": 0.010313667356967926, "logps/chosen": -0.0008380117360502481, "logps/rejected": -2.1853108406066895, "loss": 0.5956, "nll_loss": 0.14889571070671082, "rewards/accuracies": 1.0, "rewards/chosen": -8.380118379136547e-05, "rewards/margins": 0.21844729781150818, "rewards/rejected": -0.21853110194206238, "step": 10965 }, { "epoch": 7.5836791147994465, "grad_norm": 5.453164100646973, "learning_rate": 1.3424004917780852e-05, "log_odds_chosen": 11.101842880249023, "log_odds_ratio": -5.8247616834705696e-05, "logits/chosen": -0.3038039803504944, "logits/rejected": -0.29949885606765747, "logps/chosen": -0.0001985014823731035, "logps/rejected": -2.3401663303375244, "loss": 0.425, "nll_loss": 0.10624787211418152, "rewards/accuracies": 1.0, "rewards/chosen": -1.985014750971459e-05, "rewards/margins": 0.23399679362773895, "rewards/rejected": -0.23401664197444916, "step": 10966 }, { "epoch": 7.584370677731673, "grad_norm": 4.547872543334961, "learning_rate": 1.3420162901490705e-05, "log_odds_chosen": 9.808728218078613, "log_odds_ratio": -0.00034682743716984987, "logits/chosen": -0.5433746576309204, "logits/rejected": -0.5797238349914551, "logps/chosen": -0.0007145186536945403, "logps/rejected": -1.7367208003997803, "loss": 0.6369, "nll_loss": 0.1591930389404297, "rewards/accuracies": 1.0, "rewards/chosen": -7.145186100387946e-05, "rewards/margins": 0.17360062897205353, "rewards/rejected": -0.17367208003997803, "step": 10967 }, { "epoch": 7.5850622406639, "grad_norm": 5.118727207183838, "learning_rate": 1.3416320885200554e-05, "log_odds_chosen": 9.705362319946289, "log_odds_ratio": -0.004164530895650387, "logits/chosen": -0.5493965148925781, "logits/rejected": -0.5997754335403442, "logps/chosen": -0.02874472737312317, "logps/rejected": -1.9699627161026, "loss": 0.4065, "nll_loss": 0.10120692849159241, "rewards/accuracies": 1.0, "rewards/chosen": -0.0028744726441800594, "rewards/margins": 0.1941218078136444, "rewards/rejected": -0.1969962865114212, "step": 10968 }, { "epoch": 7.585753803596127, "grad_norm": 5.127748966217041, "learning_rate": 1.3412478868910404e-05, "log_odds_chosen": 11.015447616577148, "log_odds_ratio": -0.00034674344351515174, "logits/chosen": -0.395458459854126, "logits/rejected": -0.5069045424461365, "logps/chosen": -0.0021178291644901037, "logps/rejected": -2.4018898010253906, "loss": 0.4879, "nll_loss": 0.12193099409341812, "rewards/accuracies": 1.0, "rewards/chosen": -0.0002117828989867121, "rewards/margins": 0.2399771809577942, "rewards/rejected": -0.24018898606300354, "step": 10969 }, { "epoch": 7.586445366528354, "grad_norm": 3.3442649841308594, "learning_rate": 1.3408636852620254e-05, "log_odds_chosen": 11.893460273742676, "log_odds_ratio": -1.2278281246835832e-05, "logits/chosen": -0.3384820818901062, "logits/rejected": -0.3775098919868469, "logps/chosen": -8.853011240717024e-05, "logps/rejected": -2.3381340503692627, "loss": 0.4465, "nll_loss": 0.11162815988063812, "rewards/accuracies": 1.0, "rewards/chosen": -8.853011422615964e-06, "rewards/margins": 0.23380455374717712, "rewards/rejected": -0.23381341993808746, "step": 10970 }, { "epoch": 7.587136929460581, "grad_norm": 3.977128505706787, "learning_rate": 1.3404794836330106e-05, "log_odds_chosen": 11.14720344543457, "log_odds_ratio": -3.100410322076641e-05, "logits/chosen": -0.4686664044857025, "logits/rejected": -0.5176323652267456, "logps/chosen": -0.00023064535344019532, "logps/rejected": -2.558053493499756, "loss": 0.3092, "nll_loss": 0.07729601860046387, "rewards/accuracies": 1.0, "rewards/chosen": -2.306453461642377e-05, "rewards/margins": 0.2557823061943054, "rewards/rejected": -0.2558053731918335, "step": 10971 }, { "epoch": 7.587828492392807, "grad_norm": 5.078945636749268, "learning_rate": 1.3400952820039958e-05, "log_odds_chosen": 10.111377716064453, "log_odds_ratio": -0.00019062630599364638, "logits/chosen": -0.5156462788581848, "logits/rejected": -0.569412887096405, "logps/chosen": -0.0004553595499601215, "logps/rejected": -2.0580437183380127, "loss": 0.4682, "nll_loss": 0.11703924834728241, "rewards/accuracies": 1.0, "rewards/chosen": -4.553595499601215e-05, "rewards/margins": 0.20575882494449615, "rewards/rejected": -0.20580437779426575, "step": 10972 }, { "epoch": 7.588520055325034, "grad_norm": 3.717108964920044, "learning_rate": 1.3397110803749807e-05, "log_odds_chosen": 12.030096054077148, "log_odds_ratio": -5.3260264394339174e-05, "logits/chosen": -0.05415065586566925, "logits/rejected": -0.14219613373279572, "logps/chosen": -0.00016555978800170124, "logps/rejected": -2.8418140411376953, "loss": 0.4215, "nll_loss": 0.10536690801382065, "rewards/accuracies": 1.0, "rewards/chosen": -1.6555979527765885e-05, "rewards/margins": 0.2841648459434509, "rewards/rejected": -0.2841814160346985, "step": 10973 }, { "epoch": 7.589211618257261, "grad_norm": 2.715785503387451, "learning_rate": 1.339326878745966e-05, "log_odds_chosen": 10.593321800231934, "log_odds_ratio": -0.0017579298000782728, "logits/chosen": -0.6318291425704956, "logits/rejected": -0.6820675134658813, "logps/chosen": -0.008687568828463554, "logps/rejected": -2.200286626815796, "loss": 0.3041, "nll_loss": 0.07583903521299362, "rewards/accuracies": 1.0, "rewards/chosen": -0.0008687569061294198, "rewards/margins": 0.21915991604328156, "rewards/rejected": -0.22002866864204407, "step": 10974 }, { "epoch": 7.589903181189488, "grad_norm": 15.399463653564453, "learning_rate": 1.338942677116951e-05, "log_odds_chosen": 9.43349838256836, "log_odds_ratio": -0.049315690994262695, "logits/chosen": 0.13571880757808685, "logits/rejected": 0.015549729578197002, "logps/chosen": -0.013712975196540356, "logps/rejected": -1.929039716720581, "loss": 0.4323, "nll_loss": 0.10313201695680618, "rewards/accuracies": 1.0, "rewards/chosen": -0.0013712975196540356, "rewards/margins": 0.1915326714515686, "rewards/rejected": -0.19290399551391602, "step": 10975 }, { "epoch": 7.590594744121715, "grad_norm": 2.948235511779785, "learning_rate": 1.338558475487936e-05, "log_odds_chosen": 11.358328819274902, "log_odds_ratio": -2.3135744413593784e-05, "logits/chosen": -0.5567000508308411, "logits/rejected": -0.6210377216339111, "logps/chosen": -0.0002084262960124761, "logps/rejected": -2.1898272037506104, "loss": 0.283, "nll_loss": 0.07073809206485748, "rewards/accuracies": 1.0, "rewards/chosen": -2.084262996504549e-05, "rewards/margins": 0.21896187961101532, "rewards/rejected": -0.2189827263355255, "step": 10976 }, { "epoch": 7.591286307053942, "grad_norm": 4.210711479187012, "learning_rate": 1.3381742738589212e-05, "log_odds_chosen": 10.899679183959961, "log_odds_ratio": -3.151923374389298e-05, "logits/chosen": -0.7823795080184937, "logits/rejected": -0.7703849077224731, "logps/chosen": -6.220992509042844e-05, "logps/rejected": -1.3939049243927002, "loss": 0.3605, "nll_loss": 0.09011554718017578, "rewards/accuracies": 1.0, "rewards/chosen": -6.220993327588076e-06, "rewards/margins": 0.13938426971435547, "rewards/rejected": -0.1393904983997345, "step": 10977 }, { "epoch": 7.591977869986168, "grad_norm": 3.5889976024627686, "learning_rate": 1.3377900722299065e-05, "log_odds_chosen": 11.090879440307617, "log_odds_ratio": -2.8063863283023238e-05, "logits/chosen": -0.5752851963043213, "logits/rejected": -0.5773409605026245, "logps/chosen": -0.00026858376804739237, "logps/rejected": -2.355632781982422, "loss": 0.3274, "nll_loss": 0.08185110986232758, "rewards/accuracies": 1.0, "rewards/chosen": -2.685837898752652e-05, "rewards/margins": 0.23553641140460968, "rewards/rejected": -0.2355632781982422, "step": 10978 }, { "epoch": 7.592669432918395, "grad_norm": 4.86741828918457, "learning_rate": 1.3374058706008914e-05, "log_odds_chosen": 11.348465919494629, "log_odds_ratio": -2.008001865760889e-05, "logits/chosen": -0.5246957540512085, "logits/rejected": -0.5292121767997742, "logps/chosen": -0.00023641643929295242, "logps/rejected": -2.6028027534484863, "loss": 0.6461, "nll_loss": 0.16152501106262207, "rewards/accuracies": 1.0, "rewards/chosen": -2.364164356549736e-05, "rewards/margins": 0.26025664806365967, "rewards/rejected": -0.2602802515029907, "step": 10979 }, { "epoch": 7.593360995850622, "grad_norm": 5.084012985229492, "learning_rate": 1.3370216689718764e-05, "log_odds_chosen": 9.90985107421875, "log_odds_ratio": -0.0005122892325744033, "logits/chosen": -0.5847611427307129, "logits/rejected": -0.6524494886398315, "logps/chosen": -0.0005478012026287615, "logps/rejected": -1.9832638502120972, "loss": 0.6648, "nll_loss": 0.16615362465381622, "rewards/accuracies": 1.0, "rewards/chosen": -5.4780120990471914e-05, "rewards/margins": 0.19827160239219666, "rewards/rejected": -0.19832637906074524, "step": 10980 }, { "epoch": 7.594052558782849, "grad_norm": 7.956467628479004, "learning_rate": 1.3366374673428617e-05, "log_odds_chosen": 10.32036018371582, "log_odds_ratio": -0.000159708913997747, "logits/chosen": -0.22464311122894287, "logits/rejected": -0.23039782047271729, "logps/chosen": -0.0003303846169728786, "logps/rejected": -2.0702874660491943, "loss": 0.5042, "nll_loss": 0.12603464722633362, "rewards/accuracies": 1.0, "rewards/chosen": -3.303846824564971e-05, "rewards/margins": 0.20699569582939148, "rewards/rejected": -0.20702874660491943, "step": 10981 }, { "epoch": 7.594744121715076, "grad_norm": 9.162469863891602, "learning_rate": 1.3362532657138466e-05, "log_odds_chosen": 10.577921867370605, "log_odds_ratio": -9.873359522316605e-05, "logits/chosen": -0.6851182579994202, "logits/rejected": -0.5594465732574463, "logps/chosen": -0.0005685054929926991, "logps/rejected": -2.600642204284668, "loss": 0.5216, "nll_loss": 0.13038098812103271, "rewards/accuracies": 1.0, "rewards/chosen": -5.6850549299269915e-05, "rewards/margins": 0.260007381439209, "rewards/rejected": -0.2600642442703247, "step": 10982 }, { "epoch": 7.595435684647303, "grad_norm": 6.030345916748047, "learning_rate": 1.3358690640848318e-05, "log_odds_chosen": 11.665300369262695, "log_odds_ratio": -2.2962234652368352e-05, "logits/chosen": -0.31740760803222656, "logits/rejected": -0.39805299043655396, "logps/chosen": -0.0002136161783710122, "logps/rejected": -2.858198642730713, "loss": 0.4233, "nll_loss": 0.10583190619945526, "rewards/accuracies": 1.0, "rewards/chosen": -2.1361618564696983e-05, "rewards/margins": 0.28579849004745483, "rewards/rejected": -0.2858198583126068, "step": 10983 }, { "epoch": 7.596127247579529, "grad_norm": 9.487578392028809, "learning_rate": 1.3354848624558169e-05, "log_odds_chosen": 11.256978034973145, "log_odds_ratio": -2.319498162250966e-05, "logits/chosen": -0.5560814738273621, "logits/rejected": -0.5887335538864136, "logps/chosen": -0.00013760194997303188, "logps/rejected": -2.3657851219177246, "loss": 0.6228, "nll_loss": 0.1557023823261261, "rewards/accuracies": 1.0, "rewards/chosen": -1.376019645249471e-05, "rewards/margins": 0.2365647554397583, "rewards/rejected": -0.23657852411270142, "step": 10984 }, { "epoch": 7.596818810511756, "grad_norm": 4.475757122039795, "learning_rate": 1.3351006608268018e-05, "log_odds_chosen": 11.180810928344727, "log_odds_ratio": -7.700147398281842e-05, "logits/chosen": -0.20350779592990875, "logits/rejected": -0.23505568504333496, "logps/chosen": -0.00022611429449170828, "logps/rejected": -2.398804187774658, "loss": 0.4366, "nll_loss": 0.10914000123739243, "rewards/accuracies": 1.0, "rewards/chosen": -2.2611427993979305e-05, "rewards/margins": 0.23985780775547028, "rewards/rejected": -0.23988041281700134, "step": 10985 }, { "epoch": 7.597510373443983, "grad_norm": 3.7865943908691406, "learning_rate": 1.334716459197787e-05, "log_odds_chosen": 11.013235092163086, "log_odds_ratio": -3.90688655897975e-05, "logits/chosen": -0.33913129568099976, "logits/rejected": -0.37505990266799927, "logps/chosen": -0.0002632609975989908, "logps/rejected": -2.480992317199707, "loss": 0.2838, "nll_loss": 0.07094120979309082, "rewards/accuracies": 1.0, "rewards/chosen": -2.6326102670282125e-05, "rewards/margins": 0.24807292222976685, "rewards/rejected": -0.24809923768043518, "step": 10986 }, { "epoch": 7.59820193637621, "grad_norm": 4.870147228240967, "learning_rate": 1.3343322575687723e-05, "log_odds_chosen": 11.334236145019531, "log_odds_ratio": -2.096058233291842e-05, "logits/chosen": -0.3121829032897949, "logits/rejected": -0.403335303068161, "logps/chosen": -0.0002347916306462139, "logps/rejected": -2.9569520950317383, "loss": 0.5128, "nll_loss": 0.128209188580513, "rewards/accuracies": 1.0, "rewards/chosen": -2.347916415601503e-05, "rewards/margins": 0.2956717610359192, "rewards/rejected": -0.2956952154636383, "step": 10987 }, { "epoch": 7.598893499308437, "grad_norm": 7.494716167449951, "learning_rate": 1.3339480559397572e-05, "log_odds_chosen": 11.476689338684082, "log_odds_ratio": -2.559039057814516e-05, "logits/chosen": -0.4330449104309082, "logits/rejected": -0.4117595851421356, "logps/chosen": -0.00018414655642118305, "logps/rejected": -2.5021536350250244, "loss": 0.4617, "nll_loss": 0.11543288826942444, "rewards/accuracies": 1.0, "rewards/chosen": -1.8414655642118305e-05, "rewards/margins": 0.2501969337463379, "rewards/rejected": -0.2502153515815735, "step": 10988 }, { "epoch": 7.5995850622406635, "grad_norm": 4.136565208435059, "learning_rate": 1.3335638543107423e-05, "log_odds_chosen": 10.985734939575195, "log_odds_ratio": -0.00016389289521612227, "logits/chosen": -0.43322715163230896, "logits/rejected": -0.4590873718261719, "logps/chosen": -0.0002824410330504179, "logps/rejected": -2.6134121417999268, "loss": 0.3672, "nll_loss": 0.09179577976465225, "rewards/accuracies": 1.0, "rewards/chosen": -2.824410330504179e-05, "rewards/margins": 0.26131296157836914, "rewards/rejected": -0.2613412141799927, "step": 10989 }, { "epoch": 7.60027662517289, "grad_norm": 4.833652019500732, "learning_rate": 1.3331796526817275e-05, "log_odds_chosen": 11.522449493408203, "log_odds_ratio": -2.0415656763361767e-05, "logits/chosen": 0.11912795156240463, "logits/rejected": 0.1455882340669632, "logps/chosen": -0.00014867138816043735, "logps/rejected": -2.4429879188537598, "loss": 0.6078, "nll_loss": 0.15194672346115112, "rewards/accuracies": 1.0, "rewards/chosen": -1.4867138816043735e-05, "rewards/margins": 0.24428394436836243, "rewards/rejected": -0.2442988008260727, "step": 10990 }, { "epoch": 7.600968188105117, "grad_norm": 7.191924571990967, "learning_rate": 1.3327954510527124e-05, "log_odds_chosen": 11.404149055480957, "log_odds_ratio": -5.720463377656415e-05, "logits/chosen": -0.12018120288848877, "logits/rejected": -0.19981467723846436, "logps/chosen": -0.00022083328804001212, "logps/rejected": -2.782083034515381, "loss": 0.4762, "nll_loss": 0.11905424296855927, "rewards/accuracies": 1.0, "rewards/chosen": -2.2083329895394854e-05, "rewards/margins": 0.27818623185157776, "rewards/rejected": -0.27820831537246704, "step": 10991 }, { "epoch": 7.601659751037344, "grad_norm": 5.22706413269043, "learning_rate": 1.3324112494236977e-05, "log_odds_chosen": 10.684869766235352, "log_odds_ratio": -0.0003298583615105599, "logits/chosen": -0.1452624648809433, "logits/rejected": -0.24476586282253265, "logps/chosen": -0.00021354752243496478, "logps/rejected": -1.7574265003204346, "loss": 0.5013, "nll_loss": 0.12530261278152466, "rewards/accuracies": 1.0, "rewards/chosen": -2.1354751879698597e-05, "rewards/margins": 0.17572128772735596, "rewards/rejected": -0.17574265599250793, "step": 10992 }, { "epoch": 7.602351313969571, "grad_norm": 5.045536518096924, "learning_rate": 1.3320270477946828e-05, "log_odds_chosen": 10.604886054992676, "log_odds_ratio": -4.5088541810400784e-05, "logits/chosen": 0.04422904551029205, "logits/rejected": -0.07325298339128494, "logps/chosen": -0.000652353628538549, "logps/rejected": -2.1631197929382324, "loss": 0.5821, "nll_loss": 0.14552420377731323, "rewards/accuracies": 1.0, "rewards/chosen": -6.52353628538549e-05, "rewards/margins": 0.21624672412872314, "rewards/rejected": -0.2163119614124298, "step": 10993 }, { "epoch": 7.603042876901798, "grad_norm": 3.8175852298736572, "learning_rate": 1.3316428461656677e-05, "log_odds_chosen": 11.407567977905273, "log_odds_ratio": -2.908537135226652e-05, "logits/chosen": -0.21130746603012085, "logits/rejected": -0.28915801644325256, "logps/chosen": -0.00012252983287908137, "logps/rejected": -1.775246500968933, "loss": 0.3452, "nll_loss": 0.08629661798477173, "rewards/accuracies": 1.0, "rewards/chosen": -1.2252983651706018e-05, "rewards/margins": 0.17751239240169525, "rewards/rejected": -0.1775246560573578, "step": 10994 }, { "epoch": 7.6037344398340245, "grad_norm": 5.9048261642456055, "learning_rate": 1.3312586445366529e-05, "log_odds_chosen": 11.353921890258789, "log_odds_ratio": -9.299576777266338e-05, "logits/chosen": -0.4369305372238159, "logits/rejected": -0.5150572657585144, "logps/chosen": -0.00022164465917740017, "logps/rejected": -2.1506056785583496, "loss": 0.4776, "nll_loss": 0.11938893049955368, "rewards/accuracies": 1.0, "rewards/chosen": -2.2164465917740017e-05, "rewards/margins": 0.21503840386867523, "rewards/rejected": -0.21506056189537048, "step": 10995 }, { "epoch": 7.604426002766251, "grad_norm": 6.622079849243164, "learning_rate": 1.3308744429076381e-05, "log_odds_chosen": 10.349295616149902, "log_odds_ratio": -7.134541374398395e-05, "logits/chosen": -0.5894879698753357, "logits/rejected": -0.5946841835975647, "logps/chosen": -0.00031164148822426796, "logps/rejected": -1.9511890411376953, "loss": 0.3949, "nll_loss": 0.09870664030313492, "rewards/accuracies": 1.0, "rewards/chosen": -3.116414518444799e-05, "rewards/margins": 0.195087730884552, "rewards/rejected": -0.19511890411376953, "step": 10996 }, { "epoch": 7.605117565698478, "grad_norm": 3.938234329223633, "learning_rate": 1.330490241278623e-05, "log_odds_chosen": 10.668981552124023, "log_odds_ratio": -7.578918302897364e-05, "logits/chosen": -0.1792973130941391, "logits/rejected": -0.2401539832353592, "logps/chosen": -0.0008044381975196302, "logps/rejected": -2.882570505142212, "loss": 0.3754, "nll_loss": 0.09384553134441376, "rewards/accuracies": 1.0, "rewards/chosen": -8.044381684157997e-05, "rewards/margins": 0.28817659616470337, "rewards/rejected": -0.28825703263282776, "step": 10997 }, { "epoch": 7.605809128630705, "grad_norm": 6.447854995727539, "learning_rate": 1.3301060396496081e-05, "log_odds_chosen": 9.736894607543945, "log_odds_ratio": -0.0004386040091048926, "logits/chosen": -0.4050842225551605, "logits/rejected": -0.40405207872390747, "logps/chosen": -0.00036903645377606153, "logps/rejected": -1.3877718448638916, "loss": 0.3477, "nll_loss": 0.08688089996576309, "rewards/accuracies": 1.0, "rewards/chosen": -3.690364246722311e-05, "rewards/margins": 0.13874028623104095, "rewards/rejected": -0.13877719640731812, "step": 10998 }, { "epoch": 7.606500691562932, "grad_norm": 6.765289306640625, "learning_rate": 1.3297218380205934e-05, "log_odds_chosen": 10.077953338623047, "log_odds_ratio": -0.000245953124249354, "logits/chosen": 0.016869522631168365, "logits/rejected": -0.13176044821739197, "logps/chosen": -0.000427676597610116, "logps/rejected": -1.8634501695632935, "loss": 0.5673, "nll_loss": 0.1417999118566513, "rewards/accuracies": 1.0, "rewards/chosen": -4.2767656850628555e-05, "rewards/margins": 0.18630225956439972, "rewards/rejected": -0.18634501099586487, "step": 10999 }, { "epoch": 7.607192254495159, "grad_norm": 5.110645771026611, "learning_rate": 1.3293376363915783e-05, "log_odds_chosen": 11.453241348266602, "log_odds_ratio": -1.5676314433221705e-05, "logits/chosen": -0.6428710222244263, "logits/rejected": -0.628454327583313, "logps/chosen": -0.00034174363827332854, "logps/rejected": -2.8143365383148193, "loss": 0.4801, "nll_loss": 0.12001337110996246, "rewards/accuracies": 1.0, "rewards/chosen": -3.417436528252438e-05, "rewards/margins": 0.2813994884490967, "rewards/rejected": -0.281433641910553, "step": 11000 }, { "epoch": 7.6078838174273855, "grad_norm": 4.963628768920898, "learning_rate": 1.3289534347625635e-05, "log_odds_chosen": 10.312638282775879, "log_odds_ratio": -0.00037841207813471556, "logits/chosen": -0.15952733159065247, "logits/rejected": -0.047689080238342285, "logps/chosen": -0.000430744286859408, "logps/rejected": -1.889266848564148, "loss": 0.5159, "nll_loss": 0.12893825769424438, "rewards/accuracies": 1.0, "rewards/chosen": -4.3074429413536564e-05, "rewards/margins": 0.18888360261917114, "rewards/rejected": -0.18892668187618256, "step": 11001 }, { "epoch": 7.608575380359612, "grad_norm": 4.401406288146973, "learning_rate": 1.3285692331335486e-05, "log_odds_chosen": 11.755447387695312, "log_odds_ratio": -3.4304284781683236e-05, "logits/chosen": -0.5623564720153809, "logits/rejected": -0.5761032104492188, "logps/chosen": -0.00023698658333159983, "logps/rejected": -2.986236095428467, "loss": 0.4556, "nll_loss": 0.11389949917793274, "rewards/accuracies": 1.0, "rewards/chosen": -2.3698659788351506e-05, "rewards/margins": 0.29859989881515503, "rewards/rejected": -0.29862362146377563, "step": 11002 }, { "epoch": 7.609266943291839, "grad_norm": 4.843077182769775, "learning_rate": 1.3281850315045335e-05, "log_odds_chosen": 10.616469383239746, "log_odds_ratio": -6.649021815974265e-05, "logits/chosen": -0.30027931928634644, "logits/rejected": -0.256147176027298, "logps/chosen": -0.00033794311457313597, "logps/rejected": -2.2427988052368164, "loss": 0.4738, "nll_loss": 0.11844848841428757, "rewards/accuracies": 1.0, "rewards/chosen": -3.379431291250512e-05, "rewards/margins": 0.224246084690094, "rewards/rejected": -0.22427986562252045, "step": 11003 }, { "epoch": 7.609958506224066, "grad_norm": 16.88494873046875, "learning_rate": 1.3278008298755187e-05, "log_odds_chosen": 10.293458938598633, "log_odds_ratio": -0.001284759258851409, "logits/chosen": -0.34886935353279114, "logits/rejected": -0.31973719596862793, "logps/chosen": -0.0020102402195334435, "logps/rejected": -2.528747797012329, "loss": 0.3658, "nll_loss": 0.09131994843482971, "rewards/accuracies": 1.0, "rewards/chosen": -0.00020102402777411044, "rewards/margins": 0.25267377495765686, "rewards/rejected": -0.25287479162216187, "step": 11004 }, { "epoch": 7.610650069156293, "grad_norm": 4.845985412597656, "learning_rate": 1.327416628246504e-05, "log_odds_chosen": 10.976385116577148, "log_odds_ratio": -8.727479143999517e-05, "logits/chosen": -0.379669189453125, "logits/rejected": -0.5029025077819824, "logps/chosen": -0.0001920202048495412, "logps/rejected": -1.9614152908325195, "loss": 0.3782, "nll_loss": 0.09454361349344254, "rewards/accuracies": 1.0, "rewards/chosen": -1.9202019757358357e-05, "rewards/margins": 0.19612233340740204, "rewards/rejected": -0.1961415410041809, "step": 11005 }, { "epoch": 7.61134163208852, "grad_norm": 3.453927993774414, "learning_rate": 1.3270324266174889e-05, "log_odds_chosen": 11.28010368347168, "log_odds_ratio": -4.166722283116542e-05, "logits/chosen": -0.3787435293197632, "logits/rejected": -0.456486314535141, "logps/chosen": -0.0001853770372690633, "logps/rejected": -2.5516510009765625, "loss": 0.2239, "nll_loss": 0.05597200244665146, "rewards/accuracies": 1.0, "rewards/chosen": -1.853770481829997e-05, "rewards/margins": 0.2551465630531311, "rewards/rejected": -0.25516510009765625, "step": 11006 }, { "epoch": 7.6120331950207465, "grad_norm": 4.172607421875, "learning_rate": 1.326648224988474e-05, "log_odds_chosen": 10.891128540039062, "log_odds_ratio": -2.8951366402907297e-05, "logits/chosen": -0.4312208294868469, "logits/rejected": -0.48598796129226685, "logps/chosen": -0.00013583191321231425, "logps/rejected": -1.9276304244995117, "loss": 0.4674, "nll_loss": 0.11685236543416977, "rewards/accuracies": 1.0, "rewards/chosen": -1.3583192412625067e-05, "rewards/margins": 0.19274947047233582, "rewards/rejected": -0.1927630603313446, "step": 11007 }, { "epoch": 7.612724757952973, "grad_norm": 4.176784515380859, "learning_rate": 1.3262640233594592e-05, "log_odds_chosen": 10.976896286010742, "log_odds_ratio": -0.00014842335076536983, "logits/chosen": -0.16801583766937256, "logits/rejected": -0.2282327264547348, "logps/chosen": -0.004101084545254707, "logps/rejected": -2.81337833404541, "loss": 0.461, "nll_loss": 0.11524462699890137, "rewards/accuracies": 1.0, "rewards/chosen": -0.0004101084778085351, "rewards/margins": 0.28092771768569946, "rewards/rejected": -0.28133782744407654, "step": 11008 }, { "epoch": 7.6134163208852, "grad_norm": 6.124029636383057, "learning_rate": 1.3258798217304441e-05, "log_odds_chosen": 10.99689769744873, "log_odds_ratio": -3.287247818661854e-05, "logits/chosen": -0.6622257232666016, "logits/rejected": -0.7392969131469727, "logps/chosen": -0.00018039315182249993, "logps/rejected": -1.7692515850067139, "loss": 0.3868, "nll_loss": 0.09669550508260727, "rewards/accuracies": 1.0, "rewards/chosen": -1.8039316273643635e-05, "rewards/margins": 0.17690712213516235, "rewards/rejected": -0.1769251525402069, "step": 11009 }, { "epoch": 7.614107883817427, "grad_norm": 5.059366703033447, "learning_rate": 1.3254956201014294e-05, "log_odds_chosen": 12.686284065246582, "log_odds_ratio": -7.265820840984816e-06, "logits/chosen": -0.4957124888896942, "logits/rejected": -0.509590744972229, "logps/chosen": -9.553524432703853e-05, "logps/rejected": -3.3620762825012207, "loss": 0.4987, "nll_loss": 0.12466945499181747, "rewards/accuracies": 1.0, "rewards/chosen": -9.553524250804912e-06, "rewards/margins": 0.3361980617046356, "rewards/rejected": -0.33620762825012207, "step": 11010 }, { "epoch": 7.614799446749654, "grad_norm": 4.135800361633301, "learning_rate": 1.3251114184724144e-05, "log_odds_chosen": 9.902254104614258, "log_odds_ratio": -0.0010262223659083247, "logits/chosen": -0.37849074602127075, "logits/rejected": -0.48255205154418945, "logps/chosen": -0.0004565394192468375, "logps/rejected": -1.282253623008728, "loss": 0.5531, "nll_loss": 0.1381729543209076, "rewards/accuracies": 1.0, "rewards/chosen": -4.565394192468375e-05, "rewards/margins": 0.12817969918251038, "rewards/rejected": -0.12822535634040833, "step": 11011 }, { "epoch": 7.615491009681881, "grad_norm": 6.363430976867676, "learning_rate": 1.3247272168433993e-05, "log_odds_chosen": 10.299551010131836, "log_odds_ratio": -5.6483760999981314e-05, "logits/chosen": -0.4059584438800812, "logits/rejected": -0.4429929852485657, "logps/chosen": -0.00020562093413900584, "logps/rejected": -1.7810602188110352, "loss": 0.6852, "nll_loss": 0.1712915003299713, "rewards/accuracies": 1.0, "rewards/chosen": -2.0562092686304823e-05, "rewards/margins": 0.17808546125888824, "rewards/rejected": -0.17810603976249695, "step": 11012 }, { "epoch": 7.6161825726141075, "grad_norm": 7.91430139541626, "learning_rate": 1.3243430152143846e-05, "log_odds_chosen": 11.475375175476074, "log_odds_ratio": -3.780444239964709e-05, "logits/chosen": -0.5700872540473938, "logits/rejected": -0.6604025363922119, "logps/chosen": -0.0003129754331894219, "logps/rejected": -3.1353888511657715, "loss": 0.3652, "nll_loss": 0.09129648655653, "rewards/accuracies": 1.0, "rewards/chosen": -3.129754259134643e-05, "rewards/margins": 0.3135075569152832, "rewards/rejected": -0.31353887915611267, "step": 11013 }, { "epoch": 7.616874135546334, "grad_norm": 5.952118396759033, "learning_rate": 1.3239588135853698e-05, "log_odds_chosen": 9.803913116455078, "log_odds_ratio": -0.00028944603400304914, "logits/chosen": -0.25628095865249634, "logits/rejected": -0.29409071803092957, "logps/chosen": -0.00030227593379095197, "logps/rejected": -1.7432351112365723, "loss": 0.792, "nll_loss": 0.19796809554100037, "rewards/accuracies": 1.0, "rewards/chosen": -3.022759483428672e-05, "rewards/margins": 0.17429327964782715, "rewards/rejected": -0.17432349920272827, "step": 11014 }, { "epoch": 7.617565698478561, "grad_norm": 8.945135116577148, "learning_rate": 1.3235746119563547e-05, "log_odds_chosen": 11.400367736816406, "log_odds_ratio": -1.497354060120415e-05, "logits/chosen": -0.6030562520027161, "logits/rejected": -0.6528980731964111, "logps/chosen": -0.0002069780748570338, "logps/rejected": -2.4284608364105225, "loss": 0.5021, "nll_loss": 0.12551532685756683, "rewards/accuracies": 1.0, "rewards/chosen": -2.069780748570338e-05, "rewards/margins": 0.24282538890838623, "rewards/rejected": -0.24284610152244568, "step": 11015 }, { "epoch": 7.618257261410788, "grad_norm": 5.686136722564697, "learning_rate": 1.3231904103273398e-05, "log_odds_chosen": 11.649585723876953, "log_odds_ratio": -2.029037023021374e-05, "logits/chosen": -0.46048489212989807, "logits/rejected": -0.606641411781311, "logps/chosen": -6.944908091099933e-05, "logps/rejected": -2.059354543685913, "loss": 0.3509, "nll_loss": 0.08772450685501099, "rewards/accuracies": 1.0, "rewards/chosen": -6.9449079092009924e-06, "rewards/margins": 0.2059285193681717, "rewards/rejected": -0.20593544840812683, "step": 11016 }, { "epoch": 7.618948824343015, "grad_norm": 4.732438564300537, "learning_rate": 1.322806208698325e-05, "log_odds_chosen": 9.011289596557617, "log_odds_ratio": -0.11389197409152985, "logits/chosen": -0.37712395191192627, "logits/rejected": -0.46562129259109497, "logps/chosen": -0.02596820518374443, "logps/rejected": -1.6240522861480713, "loss": 0.7094, "nll_loss": 0.16597187519073486, "rewards/accuracies": 0.875, "rewards/chosen": -0.0025968204718083143, "rewards/margins": 0.15980841219425201, "rewards/rejected": -0.16240522265434265, "step": 11017 }, { "epoch": 7.619640387275242, "grad_norm": 5.688709259033203, "learning_rate": 1.32242200706931e-05, "log_odds_chosen": 11.936830520629883, "log_odds_ratio": -1.1859597179864068e-05, "logits/chosen": -0.36379367113113403, "logits/rejected": -0.5231572389602661, "logps/chosen": -8.398642967222258e-05, "logps/rejected": -2.399197578430176, "loss": 0.6543, "nll_loss": 0.16357439756393433, "rewards/accuracies": 1.0, "rewards/chosen": -8.39864333102014e-06, "rewards/margins": 0.23991134762763977, "rewards/rejected": -0.2399197369813919, "step": 11018 }, { "epoch": 7.6203319502074685, "grad_norm": 7.055410385131836, "learning_rate": 1.3220378054402952e-05, "log_odds_chosen": 11.101515769958496, "log_odds_ratio": -0.00024681005743332207, "logits/chosen": -0.38099271059036255, "logits/rejected": -0.40887144207954407, "logps/chosen": -0.00039470737101510167, "logps/rejected": -3.0094618797302246, "loss": 0.597, "nll_loss": 0.1492285132408142, "rewards/accuracies": 1.0, "rewards/chosen": -3.947073855670169e-05, "rewards/margins": 0.3009067475795746, "rewards/rejected": -0.3009462058544159, "step": 11019 }, { "epoch": 7.621023513139695, "grad_norm": 4.223811626434326, "learning_rate": 1.3216536038112803e-05, "log_odds_chosen": 11.240891456604004, "log_odds_ratio": -0.00010240564733976498, "logits/chosen": -0.3982436954975128, "logits/rejected": -0.43361055850982666, "logps/chosen": -0.00044147035805508494, "logps/rejected": -2.726694107055664, "loss": 0.5172, "nll_loss": 0.1292930543422699, "rewards/accuracies": 1.0, "rewards/chosen": -4.4147036533104256e-05, "rewards/margins": 0.27262526750564575, "rewards/rejected": -0.2726694345474243, "step": 11020 }, { "epoch": 7.621715076071922, "grad_norm": 4.663049697875977, "learning_rate": 1.3212694021822652e-05, "log_odds_chosen": 10.073478698730469, "log_odds_ratio": -0.0008136788383126259, "logits/chosen": -0.33111122250556946, "logits/rejected": -0.4370899498462677, "logps/chosen": -0.0008942785789258778, "logps/rejected": -1.933433175086975, "loss": 0.9654, "nll_loss": 0.24126268923282623, "rewards/accuracies": 1.0, "rewards/chosen": -8.942785643739626e-05, "rewards/margins": 0.19325390458106995, "rewards/rejected": -0.19334332644939423, "step": 11021 }, { "epoch": 7.622406639004149, "grad_norm": 3.3118700981140137, "learning_rate": 1.3208852005532504e-05, "log_odds_chosen": 10.24764633178711, "log_odds_ratio": -0.0003374480293132365, "logits/chosen": -0.3461621105670929, "logits/rejected": -0.29326191544532776, "logps/chosen": -0.0007046432583592832, "logps/rejected": -1.8110865354537964, "loss": 0.3827, "nll_loss": 0.09563969075679779, "rewards/accuracies": 1.0, "rewards/chosen": -7.04643243807368e-05, "rewards/margins": 0.18103818595409393, "rewards/rejected": -0.18110865354537964, "step": 11022 }, { "epoch": 7.623098201936376, "grad_norm": 32.8052864074707, "learning_rate": 1.3205009989242357e-05, "log_odds_chosen": 11.21299934387207, "log_odds_ratio": -2.9290804377524182e-05, "logits/chosen": 0.08305463194847107, "logits/rejected": -0.015703324228525162, "logps/chosen": -0.00022970230202190578, "logps/rejected": -2.0762534141540527, "loss": 0.6082, "nll_loss": 0.15203939378261566, "rewards/accuracies": 1.0, "rewards/chosen": -2.297023092978634e-05, "rewards/margins": 0.207602396607399, "rewards/rejected": -0.2076253592967987, "step": 11023 }, { "epoch": 7.623789764868603, "grad_norm": 4.772336959838867, "learning_rate": 1.3201167972952206e-05, "log_odds_chosen": 11.075516700744629, "log_odds_ratio": -0.00023821770446375012, "logits/chosen": -0.2510223090648651, "logits/rejected": -0.28794288635253906, "logps/chosen": -0.00033794849878177047, "logps/rejected": -2.5776126384735107, "loss": 0.5069, "nll_loss": 0.1267106682062149, "rewards/accuracies": 1.0, "rewards/chosen": -3.379485133336857e-05, "rewards/margins": 0.2577274441719055, "rewards/rejected": -0.25776124000549316, "step": 11024 }, { "epoch": 7.624481327800829, "grad_norm": 7.352637767791748, "learning_rate": 1.3197325956662057e-05, "log_odds_chosen": 10.048306465148926, "log_odds_ratio": -0.0002940360573120415, "logits/chosen": -0.5171242952346802, "logits/rejected": -0.6400086283683777, "logps/chosen": -0.0003537190204951912, "logps/rejected": -1.8638750314712524, "loss": 0.6937, "nll_loss": 0.1733952760696411, "rewards/accuracies": 1.0, "rewards/chosen": -3.5371904232306406e-05, "rewards/margins": 0.18635213375091553, "rewards/rejected": -0.18638749420642853, "step": 11025 }, { "epoch": 7.625172890733056, "grad_norm": 5.062249660491943, "learning_rate": 1.3193483940371909e-05, "log_odds_chosen": 11.70677375793457, "log_odds_ratio": -2.8659145755227655e-05, "logits/chosen": -0.5007099509239197, "logits/rejected": -0.5252422094345093, "logps/chosen": -0.00022100911883171648, "logps/rejected": -2.3927669525146484, "loss": 0.458, "nll_loss": 0.1145017147064209, "rewards/accuracies": 1.0, "rewards/chosen": -2.2100914065958932e-05, "rewards/margins": 0.23925460875034332, "rewards/rejected": -0.2392767071723938, "step": 11026 }, { "epoch": 7.625864453665283, "grad_norm": 3.744673252105713, "learning_rate": 1.3189641924081758e-05, "log_odds_chosen": 10.837970733642578, "log_odds_ratio": -0.00011957847164012492, "logits/chosen": -0.12889793515205383, "logits/rejected": -0.14431166648864746, "logps/chosen": -0.00019565450202208012, "logps/rejected": -1.9157960414886475, "loss": 0.3511, "nll_loss": 0.08775661885738373, "rewards/accuracies": 1.0, "rewards/chosen": -1.956544838321861e-05, "rewards/margins": 0.1915600299835205, "rewards/rejected": -0.19157959520816803, "step": 11027 }, { "epoch": 7.62655601659751, "grad_norm": 5.839942455291748, "learning_rate": 1.318579990779161e-05, "log_odds_chosen": 11.359109878540039, "log_odds_ratio": -3.5803877835860476e-05, "logits/chosen": -0.36645859479904175, "logits/rejected": -0.45863598585128784, "logps/chosen": -0.00030411925399675965, "logps/rejected": -2.276093006134033, "loss": 0.3209, "nll_loss": 0.0802203118801117, "rewards/accuracies": 1.0, "rewards/chosen": -3.0411923944484442e-05, "rewards/margins": 0.22757890820503235, "rewards/rejected": -0.2276093065738678, "step": 11028 }, { "epoch": 7.627247579529737, "grad_norm": 6.054105281829834, "learning_rate": 1.3181957891501461e-05, "log_odds_chosen": 9.636726379394531, "log_odds_ratio": -0.0002913882490247488, "logits/chosen": -0.4848182201385498, "logits/rejected": -0.537946343421936, "logps/chosen": -0.0012282358948141336, "logps/rejected": -1.7565040588378906, "loss": 0.5026, "nll_loss": 0.1256217062473297, "rewards/accuracies": 1.0, "rewards/chosen": -0.0001228235923917964, "rewards/margins": 0.17552758753299713, "rewards/rejected": -0.17565041780471802, "step": 11029 }, { "epoch": 7.627939142461964, "grad_norm": 3.5452024936676025, "learning_rate": 1.317811587521131e-05, "log_odds_chosen": 10.0420560836792, "log_odds_ratio": -0.000112081368570216, "logits/chosen": -0.17948752641677856, "logits/rejected": -0.3174286484718323, "logps/chosen": -0.00016764186148066074, "logps/rejected": -1.5102574825286865, "loss": 0.3552, "nll_loss": 0.08878683298826218, "rewards/accuracies": 1.0, "rewards/chosen": -1.6764186511863954e-05, "rewards/margins": 0.1510089933872223, "rewards/rejected": -0.15102574229240417, "step": 11030 }, { "epoch": 7.62863070539419, "grad_norm": 6.366281986236572, "learning_rate": 1.3174273858921163e-05, "log_odds_chosen": 10.957046508789062, "log_odds_ratio": -9.888163913274184e-05, "logits/chosen": -0.3846125602722168, "logits/rejected": -0.5001208186149597, "logps/chosen": -0.0010962296510115266, "logps/rejected": -2.7141101360321045, "loss": 0.463, "nll_loss": 0.11573092639446259, "rewards/accuracies": 1.0, "rewards/chosen": -0.0001096229680115357, "rewards/margins": 0.27130138874053955, "rewards/rejected": -0.2714110314846039, "step": 11031 }, { "epoch": 7.629322268326417, "grad_norm": 3.164262533187866, "learning_rate": 1.3170431842631015e-05, "log_odds_chosen": 10.120061874389648, "log_odds_ratio": -5.873154805158265e-05, "logits/chosen": -0.38629233837127686, "logits/rejected": -0.45555785298347473, "logps/chosen": -0.0002465298166498542, "logps/rejected": -1.6912938356399536, "loss": 0.3558, "nll_loss": 0.0889514684677124, "rewards/accuracies": 1.0, "rewards/chosen": -2.465298166498542e-05, "rewards/margins": 0.16910472512245178, "rewards/rejected": -0.1691294014453888, "step": 11032 }, { "epoch": 7.630013831258644, "grad_norm": 5.1167497634887695, "learning_rate": 1.3166589826340864e-05, "log_odds_chosen": 10.183965682983398, "log_odds_ratio": -0.0002524867304600775, "logits/chosen": -0.28899770975112915, "logits/rejected": -0.23264774680137634, "logps/chosen": -0.0006701169768348336, "logps/rejected": -2.2900922298431396, "loss": 0.3546, "nll_loss": 0.08861719071865082, "rewards/accuracies": 1.0, "rewards/chosen": -6.701170059386641e-05, "rewards/margins": 0.22894223034381866, "rewards/rejected": -0.2290092408657074, "step": 11033 }, { "epoch": 7.630705394190871, "grad_norm": 6.881916046142578, "learning_rate": 1.3162747810050715e-05, "log_odds_chosen": 11.365351676940918, "log_odds_ratio": -0.0002513904182706028, "logits/chosen": -0.2015291452407837, "logits/rejected": -0.4687212109565735, "logps/chosen": -0.000261218985542655, "logps/rejected": -2.440117835998535, "loss": 0.4145, "nll_loss": 0.10359037667512894, "rewards/accuracies": 1.0, "rewards/chosen": -2.612189928186126e-05, "rewards/margins": 0.24398568272590637, "rewards/rejected": -0.2440118044614792, "step": 11034 }, { "epoch": 7.631396957123098, "grad_norm": 3.9592254161834717, "learning_rate": 1.3158905793760567e-05, "log_odds_chosen": 11.326955795288086, "log_odds_ratio": -0.00016150146257132292, "logits/chosen": -0.5032748579978943, "logits/rejected": -0.6475515961647034, "logps/chosen": -0.00020860202494077384, "logps/rejected": -2.3688547611236572, "loss": 0.3881, "nll_loss": 0.09700487554073334, "rewards/accuracies": 1.0, "rewards/chosen": -2.0860203221673146e-05, "rewards/margins": 0.2368645817041397, "rewards/rejected": -0.2368854582309723, "step": 11035 }, { "epoch": 7.632088520055325, "grad_norm": 33.969703674316406, "learning_rate": 1.3155063777470417e-05, "log_odds_chosen": 11.514963150024414, "log_odds_ratio": -0.00013620522804558277, "logits/chosen": -0.34811991453170776, "logits/rejected": -0.2632172405719757, "logps/chosen": -0.0005448471638374031, "logps/rejected": -2.7535440921783447, "loss": 0.4057, "nll_loss": 0.10140715539455414, "rewards/accuracies": 1.0, "rewards/chosen": -5.4484720749314874e-05, "rewards/margins": 0.27529993653297424, "rewards/rejected": -0.27535441517829895, "step": 11036 }, { "epoch": 7.632780082987551, "grad_norm": 4.854983329772949, "learning_rate": 1.3151221761180269e-05, "log_odds_chosen": 9.725920677185059, "log_odds_ratio": -0.0005855665076524019, "logits/chosen": -0.6156142950057983, "logits/rejected": -0.7673656940460205, "logps/chosen": -0.0009282439714297652, "logps/rejected": -1.8330867290496826, "loss": 0.5263, "nll_loss": 0.13151204586029053, "rewards/accuracies": 1.0, "rewards/chosen": -9.282439714297652e-05, "rewards/margins": 0.18321585655212402, "rewards/rejected": -0.1833086907863617, "step": 11037 }, { "epoch": 7.633471645919778, "grad_norm": 4.63347864151001, "learning_rate": 1.3147379744890118e-05, "log_odds_chosen": 10.164543151855469, "log_odds_ratio": -0.00015178456669673324, "logits/chosen": -0.4672108292579651, "logits/rejected": -0.455607533454895, "logps/chosen": -0.0006099496386013925, "logps/rejected": -1.7959169149398804, "loss": 0.6339, "nll_loss": 0.1584549993276596, "rewards/accuracies": 1.0, "rewards/chosen": -6.099496386013925e-05, "rewards/margins": 0.17953070998191833, "rewards/rejected": -0.17959168553352356, "step": 11038 }, { "epoch": 7.634163208852005, "grad_norm": 3.911747455596924, "learning_rate": 1.3143537728599969e-05, "log_odds_chosen": 11.236038208007812, "log_odds_ratio": -2.346294786548242e-05, "logits/chosen": -0.409637987613678, "logits/rejected": -0.37441956996917725, "logps/chosen": -0.000681709498167038, "logps/rejected": -2.959486961364746, "loss": 0.5466, "nll_loss": 0.13664905726909637, "rewards/accuracies": 1.0, "rewards/chosen": -6.81709498167038e-05, "rewards/margins": 0.2958804965019226, "rewards/rejected": -0.29594868421554565, "step": 11039 }, { "epoch": 7.634854771784232, "grad_norm": 3.748584747314453, "learning_rate": 1.3139695712309821e-05, "log_odds_chosen": 11.168519020080566, "log_odds_ratio": -8.183128375094384e-05, "logits/chosen": -0.16099804639816284, "logits/rejected": -0.1140536293387413, "logps/chosen": -0.000173651977092959, "logps/rejected": -2.3219733238220215, "loss": 0.3957, "nll_loss": 0.09892675280570984, "rewards/accuracies": 1.0, "rewards/chosen": -1.736519880068954e-05, "rewards/margins": 0.23217995464801788, "rewards/rejected": -0.2321973294019699, "step": 11040 }, { "epoch": 7.635546334716459, "grad_norm": 8.52940559387207, "learning_rate": 1.313585369601967e-05, "log_odds_chosen": 9.946934700012207, "log_odds_ratio": -0.00021104965708218515, "logits/chosen": -0.5072532892227173, "logits/rejected": -0.5420174598693848, "logps/chosen": -0.0002429535670671612, "logps/rejected": -1.3531861305236816, "loss": 0.4053, "nll_loss": 0.10131174325942993, "rewards/accuracies": 1.0, "rewards/chosen": -2.4295355615322478e-05, "rewards/margins": 0.13529431819915771, "rewards/rejected": -0.1353186070919037, "step": 11041 }, { "epoch": 7.6362378976486855, "grad_norm": 4.192775249481201, "learning_rate": 1.3132011679729523e-05, "log_odds_chosen": 9.611391067504883, "log_odds_ratio": -0.0006527138175442815, "logits/chosen": -0.23275458812713623, "logits/rejected": -0.29491573572158813, "logps/chosen": -0.0010348953073844314, "logps/rejected": -1.9139440059661865, "loss": 0.4315, "nll_loss": 0.10780539363622665, "rewards/accuracies": 1.0, "rewards/chosen": -0.00010348952491767704, "rewards/margins": 0.19129091501235962, "rewards/rejected": -0.1913943886756897, "step": 11042 }, { "epoch": 7.636929460580912, "grad_norm": 4.465821266174316, "learning_rate": 1.3128169663439373e-05, "log_odds_chosen": 9.13964557647705, "log_odds_ratio": -0.00025269301841035485, "logits/chosen": -0.5438176989555359, "logits/rejected": -0.5296689867973328, "logps/chosen": -0.0004898920306004584, "logps/rejected": -1.6209301948547363, "loss": 0.484, "nll_loss": 0.12096859514713287, "rewards/accuracies": 1.0, "rewards/chosen": -4.898920451523736e-05, "rewards/margins": 0.16204401850700378, "rewards/rejected": -0.16209301352500916, "step": 11043 }, { "epoch": 7.637621023513139, "grad_norm": 3.129265546798706, "learning_rate": 1.3124327647149223e-05, "log_odds_chosen": 11.348709106445312, "log_odds_ratio": -1.561789758852683e-05, "logits/chosen": -0.549349308013916, "logits/rejected": -0.5514668226242065, "logps/chosen": -8.251456165453419e-05, "logps/rejected": -1.7832891941070557, "loss": 0.3419, "nll_loss": 0.08547525107860565, "rewards/accuracies": 1.0, "rewards/chosen": -8.251457074948121e-06, "rewards/margins": 0.17832067608833313, "rewards/rejected": -0.17832891643047333, "step": 11044 }, { "epoch": 7.638312586445366, "grad_norm": 6.4049224853515625, "learning_rate": 1.3120485630859075e-05, "log_odds_chosen": 10.522080421447754, "log_odds_ratio": -9.593094728188589e-05, "logits/chosen": -0.32245880365371704, "logits/rejected": -0.2920140326023102, "logps/chosen": -0.0004965736879967153, "logps/rejected": -1.7941436767578125, "loss": 0.527, "nll_loss": 0.13173790276050568, "rewards/accuracies": 1.0, "rewards/chosen": -4.9657366616884246e-05, "rewards/margins": 0.17936471104621887, "rewards/rejected": -0.17941437661647797, "step": 11045 }, { "epoch": 7.639004149377593, "grad_norm": 4.091948509216309, "learning_rate": 1.3116643614568927e-05, "log_odds_chosen": 9.834325790405273, "log_odds_ratio": -0.0001687437470536679, "logits/chosen": -0.7652751207351685, "logits/rejected": -0.6992076635360718, "logps/chosen": -0.0004149047308601439, "logps/rejected": -1.7303555011749268, "loss": 0.6693, "nll_loss": 0.16730302572250366, "rewards/accuracies": 1.0, "rewards/chosen": -4.149047163082287e-05, "rewards/margins": 0.17299406230449677, "rewards/rejected": -0.17303556203842163, "step": 11046 }, { "epoch": 7.63969571230982, "grad_norm": 5.325662612915039, "learning_rate": 1.3112801598278776e-05, "log_odds_chosen": 11.176193237304688, "log_odds_ratio": -0.00010870936966966838, "logits/chosen": -0.8514454960823059, "logits/rejected": -0.8901273012161255, "logps/chosen": -0.0001286338228965178, "logps/rejected": -2.1061313152313232, "loss": 0.6168, "nll_loss": 0.15418866276741028, "rewards/accuracies": 1.0, "rewards/chosen": -1.28633819258539e-05, "rewards/margins": 0.21060027182102203, "rewards/rejected": -0.21061314642429352, "step": 11047 }, { "epoch": 7.6403872752420465, "grad_norm": 5.81463098526001, "learning_rate": 1.3108959581988627e-05, "log_odds_chosen": 11.00114917755127, "log_odds_ratio": -3.155127342324704e-05, "logits/chosen": -0.3544766306877136, "logits/rejected": -0.32276397943496704, "logps/chosen": -0.0002679823955986649, "logps/rejected": -2.583901882171631, "loss": 0.4699, "nll_loss": 0.11747168749570847, "rewards/accuracies": 1.0, "rewards/chosen": -2.6798239559866488e-05, "rewards/margins": 0.25836339592933655, "rewards/rejected": -0.2583901882171631, "step": 11048 }, { "epoch": 7.641078838174274, "grad_norm": 5.353766441345215, "learning_rate": 1.310511756569848e-05, "log_odds_chosen": 11.442070007324219, "log_odds_ratio": -1.775973942130804e-05, "logits/chosen": -0.6793317794799805, "logits/rejected": -0.642096996307373, "logps/chosen": -0.0002000771783059463, "logps/rejected": -2.514169692993164, "loss": 0.5048, "nll_loss": 0.12619104981422424, "rewards/accuracies": 1.0, "rewards/chosen": -2.000771746679675e-05, "rewards/margins": 0.25139695405960083, "rewards/rejected": -0.251416951417923, "step": 11049 }, { "epoch": 7.641770401106501, "grad_norm": 4.452378273010254, "learning_rate": 1.3101275549408329e-05, "log_odds_chosen": 10.53568172454834, "log_odds_ratio": -6.396832759492099e-05, "logits/chosen": 0.1380060613155365, "logits/rejected": 0.048368752002716064, "logps/chosen": -0.00042815087363123894, "logps/rejected": -2.1110527515411377, "loss": 0.5971, "nll_loss": 0.14927981793880463, "rewards/accuracies": 1.0, "rewards/chosen": -4.281508518033661e-05, "rewards/margins": 0.21106243133544922, "rewards/rejected": -0.21110525727272034, "step": 11050 }, { "epoch": 7.642461964038728, "grad_norm": 5.398094654083252, "learning_rate": 1.3097433533118181e-05, "log_odds_chosen": 10.258386611938477, "log_odds_ratio": -0.00016224366845563054, "logits/chosen": -0.8140449523925781, "logits/rejected": -0.8388763070106506, "logps/chosen": -0.00026844540843740106, "logps/rejected": -2.0129921436309814, "loss": 0.5928, "nll_loss": 0.1481805443763733, "rewards/accuracies": 1.0, "rewards/chosen": -2.684454375412315e-05, "rewards/margins": 0.2012723684310913, "rewards/rejected": -0.20129922032356262, "step": 11051 }, { "epoch": 7.643153526970955, "grad_norm": 4.131388187408447, "learning_rate": 1.3093591516828032e-05, "log_odds_chosen": 11.63953971862793, "log_odds_ratio": -2.8737176762660965e-05, "logits/chosen": -0.29496562480926514, "logits/rejected": -0.39437806606292725, "logps/chosen": -0.00011601299775065854, "logps/rejected": -2.398297071456909, "loss": 0.7148, "nll_loss": 0.17868834733963013, "rewards/accuracies": 1.0, "rewards/chosen": -1.1601299775065854e-05, "rewards/margins": 0.23981809616088867, "rewards/rejected": -0.23982968926429749, "step": 11052 }, { "epoch": 7.643845089903182, "grad_norm": 3.8487603664398193, "learning_rate": 1.3089749500537881e-05, "log_odds_chosen": 10.986795425415039, "log_odds_ratio": -0.00010452770220581442, "logits/chosen": -0.11752845346927643, "logits/rejected": -0.21908220648765564, "logps/chosen": -0.0001764098706189543, "logps/rejected": -2.1109352111816406, "loss": 0.4098, "nll_loss": 0.10244373977184296, "rewards/accuracies": 1.0, "rewards/chosen": -1.764098669809755e-05, "rewards/margins": 0.21107587218284607, "rewards/rejected": -0.21109351515769958, "step": 11053 }, { "epoch": 7.644536652835408, "grad_norm": 5.135563850402832, "learning_rate": 1.3085907484247733e-05, "log_odds_chosen": 10.207877159118652, "log_odds_ratio": -0.0003905233461409807, "logits/chosen": -0.8292889595031738, "logits/rejected": -0.7769272327423096, "logps/chosen": -0.0012153888819739223, "logps/rejected": -2.044013261795044, "loss": 0.4407, "nll_loss": 0.1101444885134697, "rewards/accuracies": 1.0, "rewards/chosen": -0.00012153889110777527, "rewards/margins": 0.20427978038787842, "rewards/rejected": -0.20440131425857544, "step": 11054 }, { "epoch": 7.645228215767635, "grad_norm": 3.8269355297088623, "learning_rate": 1.3082065467957586e-05, "log_odds_chosen": 11.03348159790039, "log_odds_ratio": -3.061819006688893e-05, "logits/chosen": -0.3585564196109772, "logits/rejected": -0.18128502368927002, "logps/chosen": -0.00017354279407300055, "logps/rejected": -1.9331927299499512, "loss": 0.4084, "nll_loss": 0.10210368037223816, "rewards/accuracies": 1.0, "rewards/chosen": -1.7354279407300055e-05, "rewards/margins": 0.19330193102359772, "rewards/rejected": -0.19331926107406616, "step": 11055 }, { "epoch": 7.645919778699862, "grad_norm": 3.101496696472168, "learning_rate": 1.3078223451667435e-05, "log_odds_chosen": 9.86187744140625, "log_odds_ratio": -0.0008957652025856078, "logits/chosen": -0.471457839012146, "logits/rejected": -0.4430035650730133, "logps/chosen": -0.0008445671992376447, "logps/rejected": -1.6268866062164307, "loss": 0.4819, "nll_loss": 0.12039318680763245, "rewards/accuracies": 1.0, "rewards/chosen": -8.445671846857294e-05, "rewards/margins": 0.1626042127609253, "rewards/rejected": -0.16268867254257202, "step": 11056 }, { "epoch": 7.646611341632089, "grad_norm": 5.060888290405273, "learning_rate": 1.3074381435377286e-05, "log_odds_chosen": 10.043825149536133, "log_odds_ratio": -0.00010040355118690059, "logits/chosen": -0.6101137399673462, "logits/rejected": -0.6584270000457764, "logps/chosen": -0.0004174050991423428, "logps/rejected": -1.608933687210083, "loss": 0.2704, "nll_loss": 0.06759215891361237, "rewards/accuracies": 1.0, "rewards/chosen": -4.174050991423428e-05, "rewards/margins": 0.1608516275882721, "rewards/rejected": -0.16089335083961487, "step": 11057 }, { "epoch": 7.647302904564316, "grad_norm": 4.368715763092041, "learning_rate": 1.3070539419087138e-05, "log_odds_chosen": 10.287227630615234, "log_odds_ratio": -4.808422090718523e-05, "logits/chosen": -0.42402833700180054, "logits/rejected": -0.5325243473052979, "logps/chosen": -0.00013529349234886467, "logps/rejected": -1.3623861074447632, "loss": 0.2842, "nll_loss": 0.0710451528429985, "rewards/accuracies": 1.0, "rewards/chosen": -1.3529348507290706e-05, "rewards/margins": 0.13622508943080902, "rewards/rejected": -0.13623861968517303, "step": 11058 }, { "epoch": 7.6479944674965425, "grad_norm": 3.9085097312927246, "learning_rate": 1.3066697402796987e-05, "log_odds_chosen": 11.880556106567383, "log_odds_ratio": -8.965845154307317e-06, "logits/chosen": -0.4448636770248413, "logits/rejected": -0.3722899854183197, "logps/chosen": -8.700077160028741e-05, "logps/rejected": -2.3147661685943604, "loss": 0.4813, "nll_loss": 0.1203346773982048, "rewards/accuracies": 1.0, "rewards/chosen": -8.70007716002874e-06, "rewards/margins": 0.23146793246269226, "rewards/rejected": -0.23147661983966827, "step": 11059 }, { "epoch": 7.648686030428769, "grad_norm": 5.653106212615967, "learning_rate": 1.306285538650684e-05, "log_odds_chosen": 11.682040214538574, "log_odds_ratio": -4.3620006181299686e-05, "logits/chosen": -0.41115790605545044, "logits/rejected": -0.5715847015380859, "logps/chosen": -0.00011102095595560968, "logps/rejected": -2.4288294315338135, "loss": 0.5793, "nll_loss": 0.14481747150421143, "rewards/accuracies": 1.0, "rewards/chosen": -1.110209632315673e-05, "rewards/margins": 0.24287183582782745, "rewards/rejected": -0.24288293719291687, "step": 11060 }, { "epoch": 7.649377593360996, "grad_norm": 4.428610324859619, "learning_rate": 1.305901337021669e-05, "log_odds_chosen": 12.014104843139648, "log_odds_ratio": -3.248677967349067e-05, "logits/chosen": -0.6964359879493713, "logits/rejected": -0.754778265953064, "logps/chosen": -0.00016804641927592456, "logps/rejected": -2.9685726165771484, "loss": 0.4959, "nll_loss": 0.12398052215576172, "rewards/accuracies": 1.0, "rewards/chosen": -1.6804642655188218e-05, "rewards/margins": 0.2968404293060303, "rewards/rejected": -0.29685723781585693, "step": 11061 }, { "epoch": 7.650069156293223, "grad_norm": 6.771653652191162, "learning_rate": 1.305517135392654e-05, "log_odds_chosen": 11.188127517700195, "log_odds_ratio": -2.3848771888879128e-05, "logits/chosen": -0.6413849592208862, "logits/rejected": -0.7468559145927429, "logps/chosen": -0.00015722739044576883, "logps/rejected": -2.141380548477173, "loss": 0.5106, "nll_loss": 0.1276474893093109, "rewards/accuracies": 1.0, "rewards/chosen": -1.5722740499768406e-05, "rewards/margins": 0.21412234008312225, "rewards/rejected": -0.21413806080818176, "step": 11062 }, { "epoch": 7.65076071922545, "grad_norm": 3.573374032974243, "learning_rate": 1.3051329337636392e-05, "log_odds_chosen": 11.263134002685547, "log_odds_ratio": -4.986676140106283e-05, "logits/chosen": -0.48395946621894836, "logits/rejected": -0.5471813082695007, "logps/chosen": -0.0003003604360856116, "logps/rejected": -2.517976999282837, "loss": 0.388, "nll_loss": 0.09699208289384842, "rewards/accuracies": 1.0, "rewards/chosen": -3.003604433615692e-05, "rewards/margins": 0.25176769495010376, "rewards/rejected": -0.25179770588874817, "step": 11063 }, { "epoch": 7.651452282157677, "grad_norm": 3.1509644985198975, "learning_rate": 1.3047487321346244e-05, "log_odds_chosen": 11.123980522155762, "log_odds_ratio": -0.00010064549860544503, "logits/chosen": -0.35980379581451416, "logits/rejected": -0.4639556109905243, "logps/chosen": -0.00015493386308662593, "logps/rejected": -2.0041680335998535, "loss": 0.3588, "nll_loss": 0.08968466520309448, "rewards/accuracies": 1.0, "rewards/chosen": -1.5493385944864713e-05, "rewards/margins": 0.20040130615234375, "rewards/rejected": -0.20041680335998535, "step": 11064 }, { "epoch": 7.6521438450899035, "grad_norm": 5.125320911407471, "learning_rate": 1.3043645305056093e-05, "log_odds_chosen": 10.726048469543457, "log_odds_ratio": -0.00013175973435863853, "logits/chosen": -0.6750519275665283, "logits/rejected": -0.722963809967041, "logps/chosen": -0.000154820314492099, "logps/rejected": -1.9031356573104858, "loss": 0.5442, "nll_loss": 0.13604173064231873, "rewards/accuracies": 1.0, "rewards/chosen": -1.5482029994018376e-05, "rewards/margins": 0.19029808044433594, "rewards/rejected": -0.19031357765197754, "step": 11065 }, { "epoch": 7.65283540802213, "grad_norm": 3.7792458534240723, "learning_rate": 1.3039803288765944e-05, "log_odds_chosen": 11.097878456115723, "log_odds_ratio": -6.332351040327922e-05, "logits/chosen": -0.40088146924972534, "logits/rejected": -0.414165735244751, "logps/chosen": -0.0004706221807282418, "logps/rejected": -2.7524054050445557, "loss": 0.3347, "nll_loss": 0.08366944640874863, "rewards/accuracies": 1.0, "rewards/chosen": -4.706221807282418e-05, "rewards/margins": 0.2751935124397278, "rewards/rejected": -0.27524057030677795, "step": 11066 }, { "epoch": 7.653526970954357, "grad_norm": 4.1204376220703125, "learning_rate": 1.3035961272475797e-05, "log_odds_chosen": 10.075401306152344, "log_odds_ratio": -0.00044693853124044836, "logits/chosen": -0.20077644288539886, "logits/rejected": -0.24853789806365967, "logps/chosen": -0.0005817624041810632, "logps/rejected": -1.96353018283844, "loss": 0.5577, "nll_loss": 0.13937382400035858, "rewards/accuracies": 1.0, "rewards/chosen": -5.817624332848936e-05, "rewards/margins": 0.1962948441505432, "rewards/rejected": -0.196353018283844, "step": 11067 }, { "epoch": 7.654218533886584, "grad_norm": 3.545658588409424, "learning_rate": 1.3032119256185646e-05, "log_odds_chosen": 10.331792831420898, "log_odds_ratio": -0.000348773377481848, "logits/chosen": -0.47433212399482727, "logits/rejected": -0.4328223168849945, "logps/chosen": -0.00035468151327222586, "logps/rejected": -1.8747044801712036, "loss": 0.4037, "nll_loss": 0.10088086873292923, "rewards/accuracies": 1.0, "rewards/chosen": -3.546815423760563e-05, "rewards/margins": 0.18743497133255005, "rewards/rejected": -0.1874704360961914, "step": 11068 }, { "epoch": 7.654910096818811, "grad_norm": 3.4151241779327393, "learning_rate": 1.3028277239895498e-05, "log_odds_chosen": 10.321552276611328, "log_odds_ratio": -0.00017826601106207818, "logits/chosen": -0.28782233595848083, "logits/rejected": -0.38986390829086304, "logps/chosen": -0.00032893777824938297, "logps/rejected": -1.685686469078064, "loss": 0.4947, "nll_loss": 0.12365522235631943, "rewards/accuracies": 1.0, "rewards/chosen": -3.289377855253406e-05, "rewards/margins": 0.1685357540845871, "rewards/rejected": -0.16856864094734192, "step": 11069 }, { "epoch": 7.655601659751038, "grad_norm": 5.626875400543213, "learning_rate": 1.302443522360535e-05, "log_odds_chosen": 11.525062561035156, "log_odds_ratio": -1.7547064999234863e-05, "logits/chosen": -0.4266539216041565, "logits/rejected": -0.5539487600326538, "logps/chosen": -0.0002981769503094256, "logps/rejected": -2.565019130706787, "loss": 0.5214, "nll_loss": 0.13034754991531372, "rewards/accuracies": 1.0, "rewards/chosen": -2.981769466714468e-05, "rewards/margins": 0.256472110748291, "rewards/rejected": -0.2565019130706787, "step": 11070 }, { "epoch": 7.6562932226832645, "grad_norm": 5.510021209716797, "learning_rate": 1.30205932073152e-05, "log_odds_chosen": 10.636519432067871, "log_odds_ratio": -0.0009921689052134752, "logits/chosen": -0.39235857129096985, "logits/rejected": -0.566108226776123, "logps/chosen": -0.00031541811767965555, "logps/rejected": -2.1545658111572266, "loss": 0.559, "nll_loss": 0.13966050744056702, "rewards/accuracies": 1.0, "rewards/chosen": -3.15418146783486e-05, "rewards/margins": 0.215425044298172, "rewards/rejected": -0.21545659005641937, "step": 11071 }, { "epoch": 7.656984785615491, "grad_norm": 5.544671058654785, "learning_rate": 1.301675119102505e-05, "log_odds_chosen": 11.99682903289795, "log_odds_ratio": -9.31865088205086e-06, "logits/chosen": -0.5022668838500977, "logits/rejected": -0.5518388152122498, "logps/chosen": -0.0001445444650016725, "logps/rejected": -2.7820653915405273, "loss": 0.5069, "nll_loss": 0.1267344355583191, "rewards/accuracies": 1.0, "rewards/chosen": -1.4454448319156654e-05, "rewards/margins": 0.27819210290908813, "rewards/rejected": -0.2782065272331238, "step": 11072 }, { "epoch": 7.657676348547718, "grad_norm": 5.128425598144531, "learning_rate": 1.3012909174734903e-05, "log_odds_chosen": 10.186700820922852, "log_odds_ratio": -8.977761899586767e-05, "logits/chosen": -0.5508114099502563, "logits/rejected": -0.5424075126647949, "logps/chosen": -0.00023308811069000512, "logps/rejected": -1.5030337572097778, "loss": 0.3516, "nll_loss": 0.08789639919996262, "rewards/accuracies": 1.0, "rewards/chosen": -2.330881034140475e-05, "rewards/margins": 0.15028007328510284, "rewards/rejected": -0.15030337870121002, "step": 11073 }, { "epoch": 7.658367911479945, "grad_norm": 3.445617198944092, "learning_rate": 1.3009067158444752e-05, "log_odds_chosen": 10.100408554077148, "log_odds_ratio": -6.970556569285691e-05, "logits/chosen": -0.5728808641433716, "logits/rejected": -0.5881326794624329, "logps/chosen": -0.00023653008975088596, "logps/rejected": -1.698958396911621, "loss": 0.3007, "nll_loss": 0.07517505437135696, "rewards/accuracies": 1.0, "rewards/chosen": -2.3653008611290716e-05, "rewards/margins": 0.16987217962741852, "rewards/rejected": -0.16989585757255554, "step": 11074 }, { "epoch": 7.659059474412172, "grad_norm": 7.81683874130249, "learning_rate": 1.3005225142154604e-05, "log_odds_chosen": 10.81536865234375, "log_odds_ratio": -0.00025127388653345406, "logits/chosen": -0.18729661405086517, "logits/rejected": -0.21124188601970673, "logps/chosen": -0.0003586372477002442, "logps/rejected": -2.481471538543701, "loss": 0.4688, "nll_loss": 0.11716914176940918, "rewards/accuracies": 1.0, "rewards/chosen": -3.586372622521594e-05, "rewards/margins": 0.2481113076210022, "rewards/rejected": -0.2481471598148346, "step": 11075 }, { "epoch": 7.659751037344399, "grad_norm": 6.080760955810547, "learning_rate": 1.3001383125864455e-05, "log_odds_chosen": 10.210432052612305, "log_odds_ratio": -0.00011322993668727577, "logits/chosen": -0.2846037745475769, "logits/rejected": -0.33245670795440674, "logps/chosen": -0.0004584690905176103, "logps/rejected": -1.9872753620147705, "loss": 0.4108, "nll_loss": 0.10269831866025925, "rewards/accuracies": 1.0, "rewards/chosen": -4.5846911234548315e-05, "rewards/margins": 0.19868171215057373, "rewards/rejected": -0.1987275630235672, "step": 11076 }, { "epoch": 7.6604426002766255, "grad_norm": 6.209712028503418, "learning_rate": 1.2997541109574304e-05, "log_odds_chosen": 10.95954704284668, "log_odds_ratio": -2.849074371624738e-05, "logits/chosen": -0.3062824010848999, "logits/rejected": -0.248677596449852, "logps/chosen": -0.000221608963329345, "logps/rejected": -1.973862886428833, "loss": 0.5438, "nll_loss": 0.1359541416168213, "rewards/accuracies": 1.0, "rewards/chosen": -2.2160895241540857e-05, "rewards/margins": 0.19736410677433014, "rewards/rejected": -0.19738629460334778, "step": 11077 }, { "epoch": 7.661134163208852, "grad_norm": 6.562619209289551, "learning_rate": 1.2993699093284156e-05, "log_odds_chosen": 10.852315902709961, "log_odds_ratio": -0.0002974254311993718, "logits/chosen": -0.46977511048316956, "logits/rejected": -0.5326958894729614, "logps/chosen": -0.0002460549003444612, "logps/rejected": -2.452684164047241, "loss": 0.4886, "nll_loss": 0.12212768197059631, "rewards/accuracies": 1.0, "rewards/chosen": -2.4605491489637643e-05, "rewards/margins": 0.24524381756782532, "rewards/rejected": -0.24526841938495636, "step": 11078 }, { "epoch": 7.661825726141079, "grad_norm": 8.187552452087402, "learning_rate": 1.2989857076994009e-05, "log_odds_chosen": 11.422736167907715, "log_odds_ratio": -0.00018802558770403266, "logits/chosen": -0.5413578748703003, "logits/rejected": -0.5794011354446411, "logps/chosen": -0.000185214274097234, "logps/rejected": -2.6838579177856445, "loss": 0.4113, "nll_loss": 0.10279948264360428, "rewards/accuracies": 1.0, "rewards/chosen": -1.852142668212764e-05, "rewards/margins": 0.26836729049682617, "rewards/rejected": -0.26838579773902893, "step": 11079 }, { "epoch": 7.662517289073306, "grad_norm": 5.590038776397705, "learning_rate": 1.2986015060703858e-05, "log_odds_chosen": 10.133560180664062, "log_odds_ratio": -0.0005444634589366615, "logits/chosen": -0.4628927409648895, "logits/rejected": -0.5485700368881226, "logps/chosen": -0.001269020838662982, "logps/rejected": -1.8179410696029663, "loss": 0.4836, "nll_loss": 0.12085415422916412, "rewards/accuracies": 1.0, "rewards/chosen": -0.00012690208677668124, "rewards/margins": 0.18166720867156982, "rewards/rejected": -0.18179410696029663, "step": 11080 }, { "epoch": 7.663208852005533, "grad_norm": 3.306156635284424, "learning_rate": 1.2982173044413709e-05, "log_odds_chosen": 10.308717727661133, "log_odds_ratio": -0.00011060374527005479, "logits/chosen": -0.660594642162323, "logits/rejected": -0.6330130100250244, "logps/chosen": -0.00020389862766023725, "logps/rejected": -1.6218162775039673, "loss": 0.304, "nll_loss": 0.07598458230495453, "rewards/accuracies": 1.0, "rewards/chosen": -2.0389863493619487e-05, "rewards/margins": 0.1621612161397934, "rewards/rejected": -0.16218163073062897, "step": 11081 }, { "epoch": 7.66390041493776, "grad_norm": 5.0822272300720215, "learning_rate": 1.2978331028123561e-05, "log_odds_chosen": 10.214324951171875, "log_odds_ratio": -0.00010539717914070934, "logits/chosen": -0.35386863350868225, "logits/rejected": -0.3555700182914734, "logps/chosen": -0.0003553010756149888, "logps/rejected": -1.734631061553955, "loss": 0.5304, "nll_loss": 0.13260126113891602, "rewards/accuracies": 1.0, "rewards/chosen": -3.55301090166904e-05, "rewards/margins": 0.17342758178710938, "rewards/rejected": -0.1734631210565567, "step": 11082 }, { "epoch": 7.6645919778699865, "grad_norm": 7.021419048309326, "learning_rate": 1.297448901183341e-05, "log_odds_chosen": 10.682586669921875, "log_odds_ratio": -5.314256122801453e-05, "logits/chosen": -0.4502115249633789, "logits/rejected": -0.33163556456565857, "logps/chosen": -0.0002302555221831426, "logps/rejected": -2.1205430030822754, "loss": 0.437, "nll_loss": 0.10923456400632858, "rewards/accuracies": 1.0, "rewards/chosen": -2.3025553673505783e-05, "rewards/margins": 0.2120312750339508, "rewards/rejected": -0.2120543122291565, "step": 11083 }, { "epoch": 7.665283540802213, "grad_norm": 3.6946628093719482, "learning_rate": 1.2970646995543263e-05, "log_odds_chosen": 11.259334564208984, "log_odds_ratio": -2.1235533495200798e-05, "logits/chosen": -0.5278284549713135, "logits/rejected": -0.6166843175888062, "logps/chosen": -7.506051770178601e-05, "logps/rejected": -1.926891803741455, "loss": 0.3498, "nll_loss": 0.08743873238563538, "rewards/accuracies": 1.0, "rewards/chosen": -7.506051588279661e-06, "rewards/margins": 0.192681685090065, "rewards/rejected": -0.1926891952753067, "step": 11084 }, { "epoch": 7.66597510373444, "grad_norm": 3.6382389068603516, "learning_rate": 1.2966804979253113e-05, "log_odds_chosen": 11.002378463745117, "log_odds_ratio": -4.40895528299734e-05, "logits/chosen": -0.4844134449958801, "logits/rejected": -0.40494978427886963, "logps/chosen": -0.00016172064351849258, "logps/rejected": -1.9656169414520264, "loss": 0.3551, "nll_loss": 0.08876403421163559, "rewards/accuracies": 1.0, "rewards/chosen": -1.6172063624253497e-05, "rewards/margins": 0.1965455263853073, "rewards/rejected": -0.19656170904636383, "step": 11085 }, { "epoch": 7.666666666666667, "grad_norm": 8.224577903747559, "learning_rate": 1.2962962962962962e-05, "log_odds_chosen": 11.308886528015137, "log_odds_ratio": -4.0616108890390024e-05, "logits/chosen": -0.5384029746055603, "logits/rejected": -0.6056035161018372, "logps/chosen": -0.00028480388573370874, "logps/rejected": -2.533832311630249, "loss": 0.5159, "nll_loss": 0.1289687156677246, "rewards/accuracies": 1.0, "rewards/chosen": -2.8480388209572993e-05, "rewards/margins": 0.2533547282218933, "rewards/rejected": -0.25338321924209595, "step": 11086 }, { "epoch": 7.667358229598894, "grad_norm": 14.134316444396973, "learning_rate": 1.2959120946672815e-05, "log_odds_chosen": 11.041220664978027, "log_odds_ratio": -2.8689417376881465e-05, "logits/chosen": -0.821334719657898, "logits/rejected": -0.8351207375526428, "logps/chosen": -8.886594150681049e-05, "logps/rejected": -1.7878015041351318, "loss": 0.5122, "nll_loss": 0.12804894149303436, "rewards/accuracies": 1.0, "rewards/chosen": -8.88659451447893e-06, "rewards/margins": 0.1787712723016739, "rewards/rejected": -0.17878015339374542, "step": 11087 }, { "epoch": 7.668049792531121, "grad_norm": 4.176999092102051, "learning_rate": 1.2955278930382667e-05, "log_odds_chosen": 10.662544250488281, "log_odds_ratio": -3.5465975088300183e-05, "logits/chosen": -0.5635327696800232, "logits/rejected": -0.7098397016525269, "logps/chosen": -0.0001393976272083819, "logps/rejected": -1.786032795906067, "loss": 0.4045, "nll_loss": 0.10113196074962616, "rewards/accuracies": 1.0, "rewards/chosen": -1.393976344843395e-05, "rewards/margins": 0.1785893440246582, "rewards/rejected": -0.17860327661037445, "step": 11088 }, { "epoch": 7.6687413554633475, "grad_norm": 3.8286681175231934, "learning_rate": 1.2951436914092516e-05, "log_odds_chosen": 10.819299697875977, "log_odds_ratio": -5.4313921282300726e-05, "logits/chosen": -0.44108110666275024, "logits/rejected": -0.5384078621864319, "logps/chosen": -0.0001469150447519496, "logps/rejected": -1.7243348360061646, "loss": 0.3425, "nll_loss": 0.08562694489955902, "rewards/accuracies": 1.0, "rewards/chosen": -1.469150447519496e-05, "rewards/margins": 0.17241880297660828, "rewards/rejected": -0.1724334955215454, "step": 11089 }, { "epoch": 7.669432918395574, "grad_norm": 4.699414253234863, "learning_rate": 1.2947594897802367e-05, "log_odds_chosen": 10.169301986694336, "log_odds_ratio": -0.00018610645201988518, "logits/chosen": -0.6933047771453857, "logits/rejected": -0.7240607738494873, "logps/chosen": -0.0007905750535428524, "logps/rejected": -2.3316659927368164, "loss": 0.7884, "nll_loss": 0.19707630574703217, "rewards/accuracies": 1.0, "rewards/chosen": -7.905750680947676e-05, "rewards/margins": 0.23308753967285156, "rewards/rejected": -0.23316660523414612, "step": 11090 }, { "epoch": 7.670124481327801, "grad_norm": 7.806046962738037, "learning_rate": 1.294375288151222e-05, "log_odds_chosen": 10.760869979858398, "log_odds_ratio": -0.00026300305034965277, "logits/chosen": -0.2349396049976349, "logits/rejected": -0.4316999316215515, "logps/chosen": -0.00018964064656756818, "logps/rejected": -2.3453969955444336, "loss": 0.7376, "nll_loss": 0.1843789964914322, "rewards/accuracies": 1.0, "rewards/chosen": -1.89640650205547e-05, "rewards/margins": 0.23452074825763702, "rewards/rejected": -0.2345397025346756, "step": 11091 }, { "epoch": 7.670816044260028, "grad_norm": 3.226038694381714, "learning_rate": 1.2939910865222069e-05, "log_odds_chosen": 10.356670379638672, "log_odds_ratio": -7.697167893638834e-05, "logits/chosen": -0.4536958932876587, "logits/rejected": -0.35600998997688293, "logps/chosen": -0.00035774571006186306, "logps/rejected": -1.8557066917419434, "loss": 0.3579, "nll_loss": 0.08946461975574493, "rewards/accuracies": 1.0, "rewards/chosen": -3.5774570278590545e-05, "rewards/margins": 0.18553489446640015, "rewards/rejected": -0.18557068705558777, "step": 11092 }, { "epoch": 7.671507607192255, "grad_norm": 12.186654090881348, "learning_rate": 1.2936068848931921e-05, "log_odds_chosen": 11.077951431274414, "log_odds_ratio": -0.00010484673111932352, "logits/chosen": -0.623490571975708, "logits/rejected": -0.6357483863830566, "logps/chosen": -0.00024245721579063684, "logps/rejected": -2.1002511978149414, "loss": 0.3713, "nll_loss": 0.09281662106513977, "rewards/accuracies": 1.0, "rewards/chosen": -2.4245720851467922e-05, "rewards/margins": 0.2100008726119995, "rewards/rejected": -0.2100251317024231, "step": 11093 }, { "epoch": 7.672199170124482, "grad_norm": 3.616793394088745, "learning_rate": 1.2932226832641772e-05, "log_odds_chosen": 10.724013328552246, "log_odds_ratio": -3.4062111808452755e-05, "logits/chosen": -0.3245074152946472, "logits/rejected": -0.2890125811100006, "logps/chosen": -0.00021971345995552838, "logps/rejected": -2.0852818489074707, "loss": 0.3126, "nll_loss": 0.07814963161945343, "rewards/accuracies": 1.0, "rewards/chosen": -2.1971345631754957e-05, "rewards/margins": 0.20850621163845062, "rewards/rejected": -0.20852817595005035, "step": 11094 }, { "epoch": 7.672890733056708, "grad_norm": 4.2207112312316895, "learning_rate": 1.2928384816351621e-05, "log_odds_chosen": 10.779802322387695, "log_odds_ratio": -0.00010899059270741418, "logits/chosen": -0.4756370484828949, "logits/rejected": -0.522205114364624, "logps/chosen": -0.00020636826229747385, "logps/rejected": -2.0602314472198486, "loss": 0.4028, "nll_loss": 0.10067801177501678, "rewards/accuracies": 1.0, "rewards/chosen": -2.0636827684938908e-05, "rewards/margins": 0.20600253343582153, "rewards/rejected": -0.20602315664291382, "step": 11095 }, { "epoch": 7.673582295988935, "grad_norm": 6.0017781257629395, "learning_rate": 1.2924542800061473e-05, "log_odds_chosen": 11.695032119750977, "log_odds_ratio": -1.26361783259199e-05, "logits/chosen": -0.21643948554992676, "logits/rejected": -0.38103434443473816, "logps/chosen": -0.0001555204507894814, "logps/rejected": -2.811371326446533, "loss": 0.4623, "nll_loss": 0.11556336283683777, "rewards/accuracies": 1.0, "rewards/chosen": -1.5552042896160856e-05, "rewards/margins": 0.28112155199050903, "rewards/rejected": -0.2811371088027954, "step": 11096 }, { "epoch": 7.674273858921162, "grad_norm": 3.479611873626709, "learning_rate": 1.2920700783771326e-05, "log_odds_chosen": 11.381592750549316, "log_odds_ratio": -4.066659676027484e-05, "logits/chosen": -0.38168367743492126, "logits/rejected": -0.37484219670295715, "logps/chosen": -0.00011951103806495667, "logps/rejected": -1.8855031728744507, "loss": 0.4033, "nll_loss": 0.10081565380096436, "rewards/accuracies": 1.0, "rewards/chosen": -1.1951105079788249e-05, "rewards/margins": 0.1885383576154709, "rewards/rejected": -0.18855032324790955, "step": 11097 }, { "epoch": 7.674965421853389, "grad_norm": 4.836270332336426, "learning_rate": 1.2916858767481175e-05, "log_odds_chosen": 12.316877365112305, "log_odds_ratio": -4.044532761326991e-05, "logits/chosen": -0.6205891966819763, "logits/rejected": -0.6618015766143799, "logps/chosen": -0.00032085011480376124, "logps/rejected": -3.1996188163757324, "loss": 0.3858, "nll_loss": 0.09643936157226562, "rewards/accuracies": 1.0, "rewards/chosen": -3.208500856999308e-05, "rewards/margins": 0.3199298083782196, "rewards/rejected": -0.31996190547943115, "step": 11098 }, { "epoch": 7.675656984785616, "grad_norm": 4.804100513458252, "learning_rate": 1.2913016751191026e-05, "log_odds_chosen": 11.746265411376953, "log_odds_ratio": -2.240795765828807e-05, "logits/chosen": -0.36457559466362, "logits/rejected": -0.4602331221103668, "logps/chosen": -0.00010922572982963175, "logps/rejected": -2.3472108840942383, "loss": 0.4049, "nll_loss": 0.101227305829525, "rewards/accuracies": 1.0, "rewards/chosen": -1.0922573892457876e-05, "rewards/margins": 0.23471017181873322, "rewards/rejected": -0.2347211092710495, "step": 11099 }, { "epoch": 7.676348547717843, "grad_norm": 5.350192546844482, "learning_rate": 1.2909174734900878e-05, "log_odds_chosen": 10.736270904541016, "log_odds_ratio": -3.7787984183523804e-05, "logits/chosen": -0.39044010639190674, "logits/rejected": -0.5081877708435059, "logps/chosen": -0.00017108801694121212, "logps/rejected": -1.6307861804962158, "loss": 0.5719, "nll_loss": 0.14297832548618317, "rewards/accuracies": 1.0, "rewards/chosen": -1.7108803149312735e-05, "rewards/margins": 0.16306151449680328, "rewards/rejected": -0.16307863593101501, "step": 11100 }, { "epoch": 7.677040110650069, "grad_norm": 6.268930435180664, "learning_rate": 1.2905332718610727e-05, "log_odds_chosen": 11.114761352539062, "log_odds_ratio": -0.00010535813635215163, "logits/chosen": -0.4555742144584656, "logits/rejected": -0.5609944462776184, "logps/chosen": -0.0002988710184581578, "logps/rejected": -2.241150140762329, "loss": 0.3602, "nll_loss": 0.09004272520542145, "rewards/accuracies": 1.0, "rewards/chosen": -2.9887100026826374e-05, "rewards/margins": 0.22408512234687805, "rewards/rejected": -0.22411498427391052, "step": 11101 }, { "epoch": 7.677731673582296, "grad_norm": 4.027389049530029, "learning_rate": 1.290149070232058e-05, "log_odds_chosen": 10.439371109008789, "log_odds_ratio": -0.00016615752247162163, "logits/chosen": -0.40644049644470215, "logits/rejected": -0.4407414197921753, "logps/chosen": -0.00029875640757381916, "logps/rejected": -1.6533267498016357, "loss": 0.2944, "nll_loss": 0.07359431684017181, "rewards/accuracies": 1.0, "rewards/chosen": -2.9875640393584035e-05, "rewards/margins": 0.1653028130531311, "rewards/rejected": -0.16533267498016357, "step": 11102 }, { "epoch": 7.678423236514523, "grad_norm": 3.1208536624908447, "learning_rate": 1.289764868603043e-05, "log_odds_chosen": 9.625261306762695, "log_odds_ratio": -0.000835335929878056, "logits/chosen": -0.658169686794281, "logits/rejected": -0.645994246006012, "logps/chosen": -0.00024093702086247504, "logps/rejected": -1.2727044820785522, "loss": 0.4887, "nll_loss": 0.12207914888858795, "rewards/accuracies": 1.0, "rewards/chosen": -2.4093702450045384e-05, "rewards/margins": 0.12724635004997253, "rewards/rejected": -0.12727044522762299, "step": 11103 }, { "epoch": 7.67911479944675, "grad_norm": 4.69044303894043, "learning_rate": 1.289380666974028e-05, "log_odds_chosen": 11.716411590576172, "log_odds_ratio": -1.2548175618576352e-05, "logits/chosen": -0.4701429605484009, "logits/rejected": -0.46198830008506775, "logps/chosen": -0.00010991649469360709, "logps/rejected": -2.313502311706543, "loss": 0.4781, "nll_loss": 0.11953376978635788, "rewards/accuracies": 1.0, "rewards/chosen": -1.0991649105562828e-05, "rewards/margins": 0.2313392460346222, "rewards/rejected": -0.23135024309158325, "step": 11104 }, { "epoch": 7.679806362378977, "grad_norm": 5.351927280426025, "learning_rate": 1.2889964653450132e-05, "log_odds_chosen": 10.438713073730469, "log_odds_ratio": -0.00010361884051235393, "logits/chosen": -0.4354976415634155, "logits/rejected": -0.6020984649658203, "logps/chosen": -0.0008680078317411244, "logps/rejected": -2.54000186920166, "loss": 0.3392, "nll_loss": 0.08479855209589005, "rewards/accuracies": 1.0, "rewards/chosen": -8.680079190526158e-05, "rewards/margins": 0.25391340255737305, "rewards/rejected": -0.254000186920166, "step": 11105 }, { "epoch": 7.680497925311204, "grad_norm": 5.8652143478393555, "learning_rate": 1.288612263715998e-05, "log_odds_chosen": 10.755338668823242, "log_odds_ratio": -0.00010338029096601531, "logits/chosen": -0.24784396588802338, "logits/rejected": -0.2557956874370575, "logps/chosen": -0.0004714262904599309, "logps/rejected": -2.3482601642608643, "loss": 0.8675, "nll_loss": 0.2168610394001007, "rewards/accuracies": 1.0, "rewards/chosen": -4.7142631956376135e-05, "rewards/margins": 0.23477886617183685, "rewards/rejected": -0.234825998544693, "step": 11106 }, { "epoch": 7.68118948824343, "grad_norm": 4.124483108520508, "learning_rate": 1.2882280620869833e-05, "log_odds_chosen": 9.291773796081543, "log_odds_ratio": -0.0010523165110498667, "logits/chosen": -0.38579070568084717, "logits/rejected": -0.15841074287891388, "logps/chosen": -0.0006801905110478401, "logps/rejected": -1.8890366554260254, "loss": 0.404, "nll_loss": 0.10088435560464859, "rewards/accuracies": 1.0, "rewards/chosen": -6.80190569255501e-05, "rewards/margins": 0.18883565068244934, "rewards/rejected": -0.18890367448329926, "step": 11107 }, { "epoch": 7.681881051175657, "grad_norm": 5.067246913909912, "learning_rate": 1.2878438604579684e-05, "log_odds_chosen": 10.63860034942627, "log_odds_ratio": -3.794751319219358e-05, "logits/chosen": -0.3715812861919403, "logits/rejected": -0.4050114154815674, "logps/chosen": -0.0002055682853097096, "logps/rejected": -1.8258544206619263, "loss": 0.3734, "nll_loss": 0.09333580732345581, "rewards/accuracies": 1.0, "rewards/chosen": -2.055682853097096e-05, "rewards/margins": 0.1825648993253708, "rewards/rejected": -0.1825854480266571, "step": 11108 }, { "epoch": 7.682572614107884, "grad_norm": 4.823143005371094, "learning_rate": 1.2874596588289533e-05, "log_odds_chosen": 11.639387130737305, "log_odds_ratio": -1.680310560914222e-05, "logits/chosen": -0.630001425743103, "logits/rejected": -0.719383180141449, "logps/chosen": -0.0002806360134854913, "logps/rejected": -2.560408592224121, "loss": 0.4805, "nll_loss": 0.12012257426977158, "rewards/accuracies": 1.0, "rewards/chosen": -2.8063604986527935e-05, "rewards/margins": 0.25601279735565186, "rewards/rejected": -0.25604087114334106, "step": 11109 }, { "epoch": 7.683264177040111, "grad_norm": 3.9779834747314453, "learning_rate": 1.2870754571999386e-05, "log_odds_chosen": 11.51318359375, "log_odds_ratio": -2.5370054572704248e-05, "logits/chosen": -0.1353602111339569, "logits/rejected": -0.055926613509655, "logps/chosen": -9.17123252293095e-05, "logps/rejected": -2.1241979598999023, "loss": 0.3467, "nll_loss": 0.08666293323040009, "rewards/accuracies": 1.0, "rewards/chosen": -9.17123270482989e-06, "rewards/margins": 0.21241062879562378, "rewards/rejected": -0.2124198079109192, "step": 11110 }, { "epoch": 7.683955739972338, "grad_norm": 4.638302326202393, "learning_rate": 1.2866912555709238e-05, "log_odds_chosen": 10.735479354858398, "log_odds_ratio": -3.148660107399337e-05, "logits/chosen": -0.18519659340381622, "logits/rejected": -0.28615376353263855, "logps/chosen": -0.0001478224148740992, "logps/rejected": -1.9038336277008057, "loss": 0.3745, "nll_loss": 0.09363032132387161, "rewards/accuracies": 1.0, "rewards/chosen": -1.478224203310674e-05, "rewards/margins": 0.19036859273910522, "rewards/rejected": -0.19038337469100952, "step": 11111 }, { "epoch": 7.6846473029045645, "grad_norm": 4.754729270935059, "learning_rate": 1.2863070539419087e-05, "log_odds_chosen": 11.607973098754883, "log_odds_ratio": -0.000153713597683236, "logits/chosen": -0.2973116338253021, "logits/rejected": -0.28141382336616516, "logps/chosen": -0.00012407683243509382, "logps/rejected": -2.377037525177002, "loss": 0.5049, "nll_loss": 0.12619741261005402, "rewards/accuracies": 1.0, "rewards/chosen": -1.2407683243509382e-05, "rewards/margins": 0.23769137263298035, "rewards/rejected": -0.23770377039909363, "step": 11112 }, { "epoch": 7.685338865836791, "grad_norm": 5.249823570251465, "learning_rate": 1.2859228523128938e-05, "log_odds_chosen": 11.334756851196289, "log_odds_ratio": -0.00019641799735836685, "logits/chosen": -0.28146034479141235, "logits/rejected": -0.31021493673324585, "logps/chosen": -0.0004445326921995729, "logps/rejected": -3.2133309841156006, "loss": 0.4754, "nll_loss": 0.1188407689332962, "rewards/accuracies": 1.0, "rewards/chosen": -4.445326703717001e-05, "rewards/margins": 0.3212886452674866, "rewards/rejected": -0.321333110332489, "step": 11113 }, { "epoch": 7.686030428769018, "grad_norm": 4.652561187744141, "learning_rate": 1.285538650683879e-05, "log_odds_chosen": 11.96465015411377, "log_odds_ratio": -1.159081693913322e-05, "logits/chosen": -0.2252419888973236, "logits/rejected": -0.304015189409256, "logps/chosen": -0.00014822710363660008, "logps/rejected": -3.0069327354431152, "loss": 0.4671, "nll_loss": 0.11676928400993347, "rewards/accuracies": 1.0, "rewards/chosen": -1.4822710909356829e-05, "rewards/margins": 0.30067846179008484, "rewards/rejected": -0.3006933033466339, "step": 11114 }, { "epoch": 7.686721991701245, "grad_norm": 5.863364219665527, "learning_rate": 1.285154449054864e-05, "log_odds_chosen": 9.848235130310059, "log_odds_ratio": -9.699568181531504e-05, "logits/chosen": -0.31627774238586426, "logits/rejected": -0.4065108895301819, "logps/chosen": -0.0008459068485535681, "logps/rejected": -2.004502296447754, "loss": 0.4704, "nll_loss": 0.11758262664079666, "rewards/accuracies": 1.0, "rewards/chosen": -8.459068340016529e-05, "rewards/margins": 0.20036561787128448, "rewards/rejected": -0.20045022666454315, "step": 11115 }, { "epoch": 7.687413554633472, "grad_norm": 8.533553123474121, "learning_rate": 1.2847702474258492e-05, "log_odds_chosen": 9.963769912719727, "log_odds_ratio": -0.0006467354251071811, "logits/chosen": -0.8026405572891235, "logits/rejected": -0.7909748554229736, "logps/chosen": -0.000525271927472204, "logps/rejected": -1.9408668279647827, "loss": 0.4588, "nll_loss": 0.11463183909654617, "rewards/accuracies": 1.0, "rewards/chosen": -5.252718983683735e-05, "rewards/margins": 0.1940341591835022, "rewards/rejected": -0.1940866857767105, "step": 11116 }, { "epoch": 7.688105117565699, "grad_norm": 4.827835559844971, "learning_rate": 1.2843860457968342e-05, "log_odds_chosen": 10.789627075195312, "log_odds_ratio": -0.00013693736400455236, "logits/chosen": -0.22920483350753784, "logits/rejected": -0.24551275372505188, "logps/chosen": -0.0010796966962516308, "logps/rejected": -2.63169527053833, "loss": 0.3298, "nll_loss": 0.08242494612932205, "rewards/accuracies": 1.0, "rewards/chosen": -0.00010796968126669526, "rewards/margins": 0.2630615532398224, "rewards/rejected": -0.263169527053833, "step": 11117 }, { "epoch": 7.6887966804979255, "grad_norm": 5.003745079040527, "learning_rate": 1.2840018441678192e-05, "log_odds_chosen": 10.771403312683105, "log_odds_ratio": -6.530247628688812e-05, "logits/chosen": -0.29149118065834045, "logits/rejected": -0.2357296198606491, "logps/chosen": -0.00015494701801799238, "logps/rejected": -1.824528455734253, "loss": 0.3593, "nll_loss": 0.089822918176651, "rewards/accuracies": 1.0, "rewards/chosen": -1.549470289319288e-05, "rewards/margins": 0.18243736028671265, "rewards/rejected": -0.18245285749435425, "step": 11118 }, { "epoch": 7.689488243430152, "grad_norm": 3.796119451522827, "learning_rate": 1.2836176425388044e-05, "log_odds_chosen": 10.518040657043457, "log_odds_ratio": -0.0004800974566023797, "logits/chosen": 0.30365198850631714, "logits/rejected": 0.26311445236206055, "logps/chosen": -0.0006324481219053268, "logps/rejected": -2.0838565826416016, "loss": 0.4678, "nll_loss": 0.11689867079257965, "rewards/accuracies": 1.0, "rewards/chosen": -6.324481364572421e-05, "rewards/margins": 0.2083224356174469, "rewards/rejected": -0.2083856612443924, "step": 11119 }, { "epoch": 7.690179806362379, "grad_norm": 5.890296936035156, "learning_rate": 1.2832334409097896e-05, "log_odds_chosen": 11.040729522705078, "log_odds_ratio": -9.762219269759953e-05, "logits/chosen": -0.4285397529602051, "logits/rejected": -0.23988042771816254, "logps/chosen": -0.00014194345567375422, "logps/rejected": -2.098386526107788, "loss": 0.3864, "nll_loss": 0.09659399092197418, "rewards/accuracies": 1.0, "rewards/chosen": -1.4194343748386018e-05, "rewards/margins": 0.20982447266578674, "rewards/rejected": -0.2098386585712433, "step": 11120 }, { "epoch": 7.690871369294606, "grad_norm": 5.301235198974609, "learning_rate": 1.2828492392807745e-05, "log_odds_chosen": 11.336024284362793, "log_odds_ratio": -2.032413613051176e-05, "logits/chosen": -0.1643558144569397, "logits/rejected": -0.24725359678268433, "logps/chosen": -0.00471664872020483, "logps/rejected": -3.218682050704956, "loss": 0.5756, "nll_loss": 0.14389663934707642, "rewards/accuracies": 1.0, "rewards/chosen": -0.0004716648254543543, "rewards/margins": 0.3213965594768524, "rewards/rejected": -0.32186824083328247, "step": 11121 }, { "epoch": 7.691562932226833, "grad_norm": 5.216907024383545, "learning_rate": 1.2824650376517596e-05, "log_odds_chosen": 11.451807022094727, "log_odds_ratio": -2.9015925974817947e-05, "logits/chosen": -0.4482243061065674, "logits/rejected": -0.46607935428619385, "logps/chosen": -0.0003059771261177957, "logps/rejected": -2.219949722290039, "loss": 0.3293, "nll_loss": 0.08232185244560242, "rewards/accuracies": 1.0, "rewards/chosen": -3.0597715522162616e-05, "rewards/margins": 0.22196437418460846, "rewards/rejected": -0.22199496626853943, "step": 11122 }, { "epoch": 7.69225449515906, "grad_norm": 4.615692615509033, "learning_rate": 1.2820808360227449e-05, "log_odds_chosen": 11.06494426727295, "log_odds_ratio": -4.2912073695333675e-05, "logits/chosen": -0.5642020106315613, "logits/rejected": -0.5679250955581665, "logps/chosen": -0.00022454469581134617, "logps/rejected": -2.2882659435272217, "loss": 0.5167, "nll_loss": 0.12918058037757874, "rewards/accuracies": 1.0, "rewards/chosen": -2.2454469217336737e-05, "rewards/margins": 0.22880414128303528, "rewards/rejected": -0.2288265973329544, "step": 11123 }, { "epoch": 7.6929460580912865, "grad_norm": 7.286032676696777, "learning_rate": 1.2816966343937298e-05, "log_odds_chosen": 11.843324661254883, "log_odds_ratio": -1.1874160918523557e-05, "logits/chosen": -0.2214704006910324, "logits/rejected": -0.12504197657108307, "logps/chosen": -0.00011633805115707219, "logps/rejected": -2.239959239959717, "loss": 0.4741, "nll_loss": 0.11852260679006577, "rewards/accuracies": 1.0, "rewards/chosen": -1.1633805115707219e-05, "rewards/margins": 0.2239842712879181, "rewards/rejected": -0.22399590909481049, "step": 11124 }, { "epoch": 7.693637621023513, "grad_norm": 4.300328254699707, "learning_rate": 1.281312432764715e-05, "log_odds_chosen": 10.524087905883789, "log_odds_ratio": -7.253968942677602e-05, "logits/chosen": 0.20716895163059235, "logits/rejected": 0.08800049871206284, "logps/chosen": -0.00034770212369039655, "logps/rejected": -2.085852861404419, "loss": 0.5194, "nll_loss": 0.12983600795269012, "rewards/accuracies": 1.0, "rewards/chosen": -3.477021164144389e-05, "rewards/margins": 0.20855051279067993, "rewards/rejected": -0.20858529210090637, "step": 11125 }, { "epoch": 7.69432918395574, "grad_norm": 7.918651103973389, "learning_rate": 1.2809282311357001e-05, "log_odds_chosen": 9.899200439453125, "log_odds_ratio": -0.0010072446893900633, "logits/chosen": -0.49368736147880554, "logits/rejected": -0.596975564956665, "logps/chosen": -0.0014353056903928518, "logps/rejected": -1.908512830734253, "loss": 1.0489, "nll_loss": 0.2621241807937622, "rewards/accuracies": 1.0, "rewards/chosen": -0.00014353056030813605, "rewards/margins": 0.1907077431678772, "rewards/rejected": -0.19085128605365753, "step": 11126 }, { "epoch": 7.695020746887967, "grad_norm": 5.294893264770508, "learning_rate": 1.280544029506685e-05, "log_odds_chosen": 9.854940414428711, "log_odds_ratio": -0.0003655731270555407, "logits/chosen": -0.22233858704566956, "logits/rejected": -0.26600658893585205, "logps/chosen": -0.00037876679562032223, "logps/rejected": -1.5811405181884766, "loss": 0.389, "nll_loss": 0.0972183495759964, "rewards/accuracies": 1.0, "rewards/chosen": -3.787668174481951e-05, "rewards/margins": 0.1580761820077896, "rewards/rejected": -0.15811406075954437, "step": 11127 }, { "epoch": 7.695712309820194, "grad_norm": 3.5334877967834473, "learning_rate": 1.2801598278776702e-05, "log_odds_chosen": 11.761754989624023, "log_odds_ratio": -1.1992600775556639e-05, "logits/chosen": -0.7332363724708557, "logits/rejected": -0.7346802949905396, "logps/chosen": -0.00010219802788924426, "logps/rejected": -2.2925033569335938, "loss": 0.4746, "nll_loss": 0.11864231526851654, "rewards/accuracies": 1.0, "rewards/chosen": -1.0219801879429724e-05, "rewards/margins": 0.22924014925956726, "rewards/rejected": -0.22925037145614624, "step": 11128 }, { "epoch": 7.696403872752421, "grad_norm": 3.8847203254699707, "learning_rate": 1.2797756262486555e-05, "log_odds_chosen": 10.920198440551758, "log_odds_ratio": -3.487545109237544e-05, "logits/chosen": -0.09859403967857361, "logits/rejected": -0.21252302825450897, "logps/chosen": -0.00011548738984856755, "logps/rejected": -1.8839833736419678, "loss": 0.5111, "nll_loss": 0.1277817338705063, "rewards/accuracies": 1.0, "rewards/chosen": -1.1548739166755695e-05, "rewards/margins": 0.18838679790496826, "rewards/rejected": -0.1883983463048935, "step": 11129 }, { "epoch": 7.6970954356846475, "grad_norm": 7.35653018951416, "learning_rate": 1.2793914246196404e-05, "log_odds_chosen": 11.698114395141602, "log_odds_ratio": -4.405621439218521e-05, "logits/chosen": -0.4789350628852844, "logits/rejected": -0.6115681529045105, "logps/chosen": -0.00026024412363767624, "logps/rejected": -3.242624521255493, "loss": 0.4186, "nll_loss": 0.10464924573898315, "rewards/accuracies": 1.0, "rewards/chosen": -2.602441054477822e-05, "rewards/margins": 0.3242364227771759, "rewards/rejected": -0.324262410402298, "step": 11130 }, { "epoch": 7.697786998616874, "grad_norm": 5.486740589141846, "learning_rate": 1.2790072229906255e-05, "log_odds_chosen": 11.674176216125488, "log_odds_ratio": -2.3742195480735973e-05, "logits/chosen": -0.4114059507846832, "logits/rejected": -0.4603744149208069, "logps/chosen": -0.0005180458538234234, "logps/rejected": -2.970984697341919, "loss": 0.3862, "nll_loss": 0.09653568267822266, "rewards/accuracies": 1.0, "rewards/chosen": -5.1804589020321146e-05, "rewards/margins": 0.2970466613769531, "rewards/rejected": -0.29709845781326294, "step": 11131 }, { "epoch": 7.698478561549101, "grad_norm": 5.388491630554199, "learning_rate": 1.2786230213616107e-05, "log_odds_chosen": 11.102253913879395, "log_odds_ratio": -5.318888361216523e-05, "logits/chosen": -0.4546557068824768, "logits/rejected": -0.453656405210495, "logps/chosen": -0.00015464294119738042, "logps/rejected": -2.2363927364349365, "loss": 0.4861, "nll_loss": 0.12153112143278122, "rewards/accuracies": 1.0, "rewards/chosen": -1.5464294847333804e-05, "rewards/margins": 0.22362381219863892, "rewards/rejected": -0.22363927960395813, "step": 11132 }, { "epoch": 7.699170124481328, "grad_norm": 3.3381705284118652, "learning_rate": 1.2782388197325956e-05, "log_odds_chosen": 10.243101119995117, "log_odds_ratio": -0.00012456311378628016, "logits/chosen": -0.18851953744888306, "logits/rejected": -0.20321039855480194, "logps/chosen": -0.0005285230581648648, "logps/rejected": -1.5706794261932373, "loss": 0.3261, "nll_loss": 0.08151256293058395, "rewards/accuracies": 1.0, "rewards/chosen": -5.2852308726869524e-05, "rewards/margins": 0.1570151001214981, "rewards/rejected": -0.15706795454025269, "step": 11133 }, { "epoch": 7.699861687413555, "grad_norm": 4.073915481567383, "learning_rate": 1.2778546181035809e-05, "log_odds_chosen": 10.793458938598633, "log_odds_ratio": -0.00012205556413391605, "logits/chosen": -0.5016660690307617, "logits/rejected": -0.5064582824707031, "logps/chosen": -0.0006956355064176023, "logps/rejected": -2.2256133556365967, "loss": 0.4542, "nll_loss": 0.11353007704019547, "rewards/accuracies": 1.0, "rewards/chosen": -6.956355355214328e-05, "rewards/margins": 0.2224917858839035, "rewards/rejected": -0.2225613296031952, "step": 11134 }, { "epoch": 7.700553250345782, "grad_norm": 5.965721130371094, "learning_rate": 1.277470416474566e-05, "log_odds_chosen": 9.991838455200195, "log_odds_ratio": -0.00011569932394195348, "logits/chosen": -0.2887195944786072, "logits/rejected": -0.35821449756622314, "logps/chosen": -0.0003183086810167879, "logps/rejected": -1.6685514450073242, "loss": 0.616, "nll_loss": 0.15399618446826935, "rewards/accuracies": 1.0, "rewards/chosen": -3.1830866646487266e-05, "rewards/margins": 0.16682332754135132, "rewards/rejected": -0.16685515642166138, "step": 11135 }, { "epoch": 7.7012448132780085, "grad_norm": 4.846920967102051, "learning_rate": 1.2770862148455508e-05, "log_odds_chosen": 11.548954010009766, "log_odds_ratio": -1.586158396094106e-05, "logits/chosen": -0.5817251801490784, "logits/rejected": -0.6089695692062378, "logps/chosen": -9.639248310122639e-05, "logps/rejected": -2.2452001571655273, "loss": 0.614, "nll_loss": 0.1535080224275589, "rewards/accuracies": 1.0, "rewards/chosen": -9.63924867392052e-06, "rewards/margins": 0.22451037168502808, "rewards/rejected": -0.2245200127363205, "step": 11136 }, { "epoch": 7.701936376210235, "grad_norm": 5.040010929107666, "learning_rate": 1.276702013216536e-05, "log_odds_chosen": 11.656959533691406, "log_odds_ratio": -3.1027902878122404e-05, "logits/chosen": 0.11758328974246979, "logits/rejected": 0.0003274455666542053, "logps/chosen": -0.0001720060536172241, "logps/rejected": -2.435650587081909, "loss": 0.6889, "nll_loss": 0.17223119735717773, "rewards/accuracies": 1.0, "rewards/chosen": -1.720060572552029e-05, "rewards/margins": 0.24354785680770874, "rewards/rejected": -0.24356506764888763, "step": 11137 }, { "epoch": 7.702627939142462, "grad_norm": 7.25405740737915, "learning_rate": 1.2763178115875213e-05, "log_odds_chosen": 9.189305305480957, "log_odds_ratio": -0.0007634533103555441, "logits/chosen": -0.340925931930542, "logits/rejected": -0.4222091734409332, "logps/chosen": -0.0002917966339737177, "logps/rejected": -1.4182566404342651, "loss": 0.7082, "nll_loss": 0.17698562145233154, "rewards/accuracies": 1.0, "rewards/chosen": -2.9179662305978127e-05, "rewards/margins": 0.14179648458957672, "rewards/rejected": -0.14182567596435547, "step": 11138 }, { "epoch": 7.703319502074689, "grad_norm": 4.661027908325195, "learning_rate": 1.2759336099585062e-05, "log_odds_chosen": 11.074902534484863, "log_odds_ratio": -0.00014685062342323363, "logits/chosen": -0.5922839641571045, "logits/rejected": -0.5710658431053162, "logps/chosen": -0.000186232035048306, "logps/rejected": -2.023669481277466, "loss": 0.4383, "nll_loss": 0.10955186188220978, "rewards/accuracies": 1.0, "rewards/chosen": -1.8623202777234837e-05, "rewards/margins": 0.20234830677509308, "rewards/rejected": -0.2023669332265854, "step": 11139 }, { "epoch": 7.704011065006916, "grad_norm": 4.876995086669922, "learning_rate": 1.2755494083294913e-05, "log_odds_chosen": 10.508922576904297, "log_odds_ratio": -0.00020858444622717798, "logits/chosen": -0.3271537721157074, "logits/rejected": -0.34680402278900146, "logps/chosen": -0.00028275157092139125, "logps/rejected": -2.260883331298828, "loss": 0.6873, "nll_loss": 0.1718018352985382, "rewards/accuracies": 1.0, "rewards/chosen": -2.8275157092139125e-05, "rewards/margins": 0.22606006264686584, "rewards/rejected": -0.22608834505081177, "step": 11140 }, { "epoch": 7.704702627939143, "grad_norm": 4.962493896484375, "learning_rate": 1.2751652067004766e-05, "log_odds_chosen": 10.637468338012695, "log_odds_ratio": -0.00045328630949370563, "logits/chosen": -0.7488378286361694, "logits/rejected": -0.7832546234130859, "logps/chosen": -0.0004857642634306103, "logps/rejected": -2.3516077995300293, "loss": 0.3865, "nll_loss": 0.09657375514507294, "rewards/accuracies": 1.0, "rewards/chosen": -4.8576428525848314e-05, "rewards/margins": 0.23511220514774323, "rewards/rejected": -0.23516079783439636, "step": 11141 }, { "epoch": 7.7053941908713695, "grad_norm": 4.3264617919921875, "learning_rate": 1.2747810050714615e-05, "log_odds_chosen": 11.630284309387207, "log_odds_ratio": -0.00012610270641744137, "logits/chosen": -0.39398303627967834, "logits/rejected": -0.5046098828315735, "logps/chosen": -0.00018928886856883764, "logps/rejected": -2.340277671813965, "loss": 0.4792, "nll_loss": 0.1197928637266159, "rewards/accuracies": 1.0, "rewards/chosen": -1.8928887584479526e-05, "rewards/margins": 0.2340088188648224, "rewards/rejected": -0.23402777314186096, "step": 11142 }, { "epoch": 7.706085753803596, "grad_norm": 4.560120582580566, "learning_rate": 1.2743968034424467e-05, "log_odds_chosen": 10.950681686401367, "log_odds_ratio": -0.00018634075240697712, "logits/chosen": -0.6208133101463318, "logits/rejected": -0.6838634610176086, "logps/chosen": -0.0005703361239284277, "logps/rejected": -1.7782117128372192, "loss": 0.4036, "nll_loss": 0.10088105499744415, "rewards/accuracies": 1.0, "rewards/chosen": -5.703361239284277e-05, "rewards/margins": 0.1777641475200653, "rewards/rejected": -0.17782118916511536, "step": 11143 }, { "epoch": 7.706777316735823, "grad_norm": 5.527631759643555, "learning_rate": 1.2740126018134318e-05, "log_odds_chosen": 10.481454849243164, "log_odds_ratio": -0.00026709839585237205, "logits/chosen": -0.10641849040985107, "logits/rejected": -0.11019318550825119, "logps/chosen": -0.0004817845765501261, "logps/rejected": -2.248533248901367, "loss": 0.5156, "nll_loss": 0.12888376414775848, "rewards/accuracies": 1.0, "rewards/chosen": -4.817845911020413e-05, "rewards/margins": 0.22480514645576477, "rewards/rejected": -0.22485333681106567, "step": 11144 }, { "epoch": 7.70746887966805, "grad_norm": 5.158650875091553, "learning_rate": 1.2736284001844167e-05, "log_odds_chosen": 10.584527969360352, "log_odds_ratio": -5.264970241114497e-05, "logits/chosen": -0.32706624269485474, "logits/rejected": -0.3761626183986664, "logps/chosen": -0.00027035269886255264, "logps/rejected": -1.951050043106079, "loss": 0.3769, "nll_loss": 0.09421977400779724, "rewards/accuracies": 1.0, "rewards/chosen": -2.7035270250053145e-05, "rewards/margins": 0.19507798552513123, "rewards/rejected": -0.19510501623153687, "step": 11145 }, { "epoch": 7.708160442600277, "grad_norm": 4.094021320343018, "learning_rate": 1.273244198555402e-05, "log_odds_chosen": 10.128005981445312, "log_odds_ratio": -0.0001861144119175151, "logits/chosen": -0.32210487127304077, "logits/rejected": -0.3130614161491394, "logps/chosen": -0.0003607625840231776, "logps/rejected": -1.6804444789886475, "loss": 0.5249, "nll_loss": 0.1311992108821869, "rewards/accuracies": 1.0, "rewards/chosen": -3.607625694712624e-05, "rewards/margins": 0.16800838708877563, "rewards/rejected": -0.16804444789886475, "step": 11146 }, { "epoch": 7.708852005532504, "grad_norm": 3.946537971496582, "learning_rate": 1.2728599969263872e-05, "log_odds_chosen": 11.047607421875, "log_odds_ratio": -2.5542063667671755e-05, "logits/chosen": -0.4898287057876587, "logits/rejected": -0.4658747613430023, "logps/chosen": -0.00014541887503582984, "logps/rejected": -1.984635353088379, "loss": 0.378, "nll_loss": 0.09449951350688934, "rewards/accuracies": 1.0, "rewards/chosen": -1.4541888958774507e-05, "rewards/margins": 0.1984490007162094, "rewards/rejected": -0.1984635442495346, "step": 11147 }, { "epoch": 7.70954356846473, "grad_norm": 4.478142738342285, "learning_rate": 1.272475795297372e-05, "log_odds_chosen": 11.520525932312012, "log_odds_ratio": -2.9978171369293705e-05, "logits/chosen": -0.514273464679718, "logits/rejected": -0.3573247194290161, "logps/chosen": -0.00017810847202781588, "logps/rejected": -2.5793113708496094, "loss": 0.393, "nll_loss": 0.09824325144290924, "rewards/accuracies": 1.0, "rewards/chosen": -1.7810845747590065e-05, "rewards/margins": 0.25791335105895996, "rewards/rejected": -0.2579311430454254, "step": 11148 }, { "epoch": 7.710235131396957, "grad_norm": 3.2450404167175293, "learning_rate": 1.2720915936683572e-05, "log_odds_chosen": 11.055948257446289, "log_odds_ratio": -2.0638492060243152e-05, "logits/chosen": -0.30314433574676514, "logits/rejected": -0.24709904193878174, "logps/chosen": -0.00018884678138419986, "logps/rejected": -2.2960658073425293, "loss": 0.331, "nll_loss": 0.08274277299642563, "rewards/accuracies": 1.0, "rewards/chosen": -1.8884678866015747e-05, "rewards/margins": 0.22958768904209137, "rewards/rejected": -0.22960656881332397, "step": 11149 }, { "epoch": 7.710926694329184, "grad_norm": 3.7017691135406494, "learning_rate": 1.2717073920393424e-05, "log_odds_chosen": 10.632525444030762, "log_odds_ratio": -0.0001720143627608195, "logits/chosen": -0.27402323484420776, "logits/rejected": -0.34319669008255005, "logps/chosen": -0.0004001731285825372, "logps/rejected": -2.192150115966797, "loss": 0.4616, "nll_loss": 0.11538498103618622, "rewards/accuracies": 1.0, "rewards/chosen": -4.001730849267915e-05, "rewards/margins": 0.21917499601840973, "rewards/rejected": -0.2192150205373764, "step": 11150 }, { "epoch": 7.711618257261411, "grad_norm": 7.314864158630371, "learning_rate": 1.2713231904103273e-05, "log_odds_chosen": 10.405158996582031, "log_odds_ratio": -0.0001515313924755901, "logits/chosen": -0.35661429166793823, "logits/rejected": -0.48725754022598267, "logps/chosen": -0.0005308876279741526, "logps/rejected": -2.6299867630004883, "loss": 0.6595, "nll_loss": 0.1648668348789215, "rewards/accuracies": 1.0, "rewards/chosen": -5.3088762797415257e-05, "rewards/margins": 0.2629455626010895, "rewards/rejected": -0.26299867033958435, "step": 11151 }, { "epoch": 7.712309820193638, "grad_norm": 4.250590801239014, "learning_rate": 1.2709389887813125e-05, "log_odds_chosen": 10.082304954528809, "log_odds_ratio": -0.00037102343048900366, "logits/chosen": -0.08294828236103058, "logits/rejected": -0.015783540904521942, "logps/chosen": -0.0008273039711639285, "logps/rejected": -1.81986665725708, "loss": 0.4346, "nll_loss": 0.10861542075872421, "rewards/accuracies": 1.0, "rewards/chosen": -8.273040293715894e-05, "rewards/margins": 0.18190395832061768, "rewards/rejected": -0.18198668956756592, "step": 11152 }, { "epoch": 7.713001383125865, "grad_norm": 3.800506591796875, "learning_rate": 1.2705547871522976e-05, "log_odds_chosen": 11.379698753356934, "log_odds_ratio": -3.8342433981597424e-05, "logits/chosen": -0.014530891552567482, "logits/rejected": -0.01644635759294033, "logps/chosen": -0.0003199596831109375, "logps/rejected": -2.4525840282440186, "loss": 0.3931, "nll_loss": 0.09826381504535675, "rewards/accuracies": 1.0, "rewards/chosen": -3.19959654007107e-05, "rewards/margins": 0.2452264130115509, "rewards/rejected": -0.2452584058046341, "step": 11153 }, { "epoch": 7.713692946058091, "grad_norm": 3.971003293991089, "learning_rate": 1.2701705855232825e-05, "log_odds_chosen": 10.103784561157227, "log_odds_ratio": -0.00028593253227882087, "logits/chosen": -0.8284880518913269, "logits/rejected": -0.8635731935501099, "logps/chosen": -0.0005365914548747241, "logps/rejected": -2.1351566314697266, "loss": 0.4528, "nll_loss": 0.11316827684640884, "rewards/accuracies": 1.0, "rewards/chosen": -5.365914330468513e-05, "rewards/margins": 0.2134619951248169, "rewards/rejected": -0.21351563930511475, "step": 11154 }, { "epoch": 7.714384508990318, "grad_norm": 5.799152851104736, "learning_rate": 1.2697863838942678e-05, "log_odds_chosen": 10.579777717590332, "log_odds_ratio": -0.0002642400795593858, "logits/chosen": -0.16273128986358643, "logits/rejected": -0.1782502830028534, "logps/chosen": -0.00022879459720570594, "logps/rejected": -2.074464797973633, "loss": 0.7465, "nll_loss": 0.18660762906074524, "rewards/accuracies": 1.0, "rewards/chosen": -2.2879459720570594e-05, "rewards/margins": 0.2074236273765564, "rewards/rejected": -0.20744650065898895, "step": 11155 }, { "epoch": 7.715076071922545, "grad_norm": 5.766885280609131, "learning_rate": 1.269402182265253e-05, "log_odds_chosen": 10.02370834350586, "log_odds_ratio": -9.30255264393054e-05, "logits/chosen": -0.023411564528942108, "logits/rejected": -0.11966176331043243, "logps/chosen": -0.0001804605417419225, "logps/rejected": -1.4501714706420898, "loss": 0.5821, "nll_loss": 0.14550897479057312, "rewards/accuracies": 1.0, "rewards/chosen": -1.8046055629383773e-05, "rewards/margins": 0.14499910175800323, "rewards/rejected": -0.14501714706420898, "step": 11156 }, { "epoch": 7.715767634854772, "grad_norm": 4.2179341316223145, "learning_rate": 1.269017980636238e-05, "log_odds_chosen": 9.422050476074219, "log_odds_ratio": -0.0002549506607465446, "logits/chosen": 0.053544774651527405, "logits/rejected": -0.015079125761985779, "logps/chosen": -0.0005149506032466888, "logps/rejected": -1.647516131401062, "loss": 0.4013, "nll_loss": 0.10029100626707077, "rewards/accuracies": 1.0, "rewards/chosen": -5.149505886947736e-05, "rewards/margins": 0.16470013558864594, "rewards/rejected": -0.16475161910057068, "step": 11157 }, { "epoch": 7.716459197786999, "grad_norm": 5.201953887939453, "learning_rate": 1.268633779007223e-05, "log_odds_chosen": 10.34267520904541, "log_odds_ratio": -0.001971145858988166, "logits/chosen": -0.20927292108535767, "logits/rejected": -0.18938323855400085, "logps/chosen": -0.014904233627021313, "logps/rejected": -1.9391664266586304, "loss": 0.4861, "nll_loss": 0.12132315337657928, "rewards/accuracies": 1.0, "rewards/chosen": -0.001490423339419067, "rewards/margins": 0.19242621958255768, "rewards/rejected": -0.19391664862632751, "step": 11158 }, { "epoch": 7.717150760719226, "grad_norm": 4.524053573608398, "learning_rate": 1.2682495773782082e-05, "log_odds_chosen": 11.030694961547852, "log_odds_ratio": -3.949225720134564e-05, "logits/chosen": -0.36548861861228943, "logits/rejected": -0.45517057180404663, "logps/chosen": -0.0001286904443986714, "logps/rejected": -1.940189003944397, "loss": 0.4174, "nll_loss": 0.1043517142534256, "rewards/accuracies": 1.0, "rewards/chosen": -1.2869046258856542e-05, "rewards/margins": 0.1940060406923294, "rewards/rejected": -0.1940189003944397, "step": 11159 }, { "epoch": 7.717842323651452, "grad_norm": 6.148046493530273, "learning_rate": 1.2678653757491931e-05, "log_odds_chosen": 11.965886116027832, "log_odds_ratio": -1.712049197521992e-05, "logits/chosen": -0.7160577774047852, "logits/rejected": -0.739776611328125, "logps/chosen": -0.00015731708845123649, "logps/rejected": -2.9925074577331543, "loss": 0.653, "nll_loss": 0.16325743496418, "rewards/accuracies": 1.0, "rewards/chosen": -1.573170993651729e-05, "rewards/margins": 0.2992349863052368, "rewards/rejected": -0.2992507517337799, "step": 11160 }, { "epoch": 7.718533886583679, "grad_norm": 12.926697731018066, "learning_rate": 1.2674811741201784e-05, "log_odds_chosen": 11.440237998962402, "log_odds_ratio": -2.1183654098422267e-05, "logits/chosen": -0.7331098318099976, "logits/rejected": -0.609634518623352, "logps/chosen": -0.00011901859397767112, "logps/rejected": -2.155032157897949, "loss": 0.4618, "nll_loss": 0.11544632911682129, "rewards/accuracies": 1.0, "rewards/chosen": -1.1901860489160754e-05, "rewards/margins": 0.21549132466316223, "rewards/rejected": -0.21550323069095612, "step": 11161 }, { "epoch": 7.719225449515906, "grad_norm": 3.8279616832733154, "learning_rate": 1.2670969724911635e-05, "log_odds_chosen": 11.816350936889648, "log_odds_ratio": -3.3580814488232136e-05, "logits/chosen": -0.4197129011154175, "logits/rejected": -0.5066426992416382, "logps/chosen": -0.00012585737567860633, "logps/rejected": -2.701681613922119, "loss": 0.7284, "nll_loss": 0.1820848286151886, "rewards/accuracies": 1.0, "rewards/chosen": -1.2585736840264872e-05, "rewards/margins": 0.27015554904937744, "rewards/rejected": -0.27016815543174744, "step": 11162 }, { "epoch": 7.719917012448133, "grad_norm": 4.701117038726807, "learning_rate": 1.2667127708621485e-05, "log_odds_chosen": 10.379667282104492, "log_odds_ratio": -0.00025896576698869467, "logits/chosen": -0.5930665731430054, "logits/rejected": -0.6023825407028198, "logps/chosen": -0.00022300105774775147, "logps/rejected": -1.7857420444488525, "loss": 0.5595, "nll_loss": 0.13983792066574097, "rewards/accuracies": 1.0, "rewards/chosen": -2.2300104319583625e-05, "rewards/margins": 0.17855191230773926, "rewards/rejected": -0.17857420444488525, "step": 11163 }, { "epoch": 7.72060857538036, "grad_norm": 4.207928657531738, "learning_rate": 1.2663285692331336e-05, "log_odds_chosen": 11.770381927490234, "log_odds_ratio": -5.4319327318808064e-05, "logits/chosen": -0.654198944568634, "logits/rejected": -0.6638980507850647, "logps/chosen": -0.0002516515669412911, "logps/rejected": -2.8353614807128906, "loss": 0.6246, "nll_loss": 0.1561528444290161, "rewards/accuracies": 1.0, "rewards/chosen": -2.516515633033123e-05, "rewards/margins": 0.2835109829902649, "rewards/rejected": -0.2835361361503601, "step": 11164 }, { "epoch": 7.7213001383125865, "grad_norm": 7.018038272857666, "learning_rate": 1.2659443676041189e-05, "log_odds_chosen": 11.889036178588867, "log_odds_ratio": -4.769517545355484e-05, "logits/chosen": -0.2306787520647049, "logits/rejected": -0.31250622868537903, "logps/chosen": -0.0002063388383248821, "logps/rejected": -3.314582347869873, "loss": 0.6666, "nll_loss": 0.1666460931301117, "rewards/accuracies": 1.0, "rewards/chosen": -2.063388456008397e-05, "rewards/margins": 0.3314375877380371, "rewards/rejected": -0.3314582109451294, "step": 11165 }, { "epoch": 7.721991701244813, "grad_norm": 4.5476765632629395, "learning_rate": 1.2655601659751038e-05, "log_odds_chosen": 11.020938873291016, "log_odds_ratio": -3.345979712321423e-05, "logits/chosen": -0.14058321714401245, "logits/rejected": -0.1784745305776596, "logps/chosen": -0.00021612163982354105, "logps/rejected": -2.4194188117980957, "loss": 0.5841, "nll_loss": 0.14602458477020264, "rewards/accuracies": 1.0, "rewards/chosen": -2.161216252716258e-05, "rewards/margins": 0.24192027747631073, "rewards/rejected": -0.241941899061203, "step": 11166 }, { "epoch": 7.72268326417704, "grad_norm": 4.855893611907959, "learning_rate": 1.265175964346089e-05, "log_odds_chosen": 13.27073860168457, "log_odds_ratio": -4.107092536287382e-06, "logits/chosen": -0.27976834774017334, "logits/rejected": -0.25163838267326355, "logps/chosen": -0.0001417580060660839, "logps/rejected": -4.15740966796875, "loss": 0.5532, "nll_loss": 0.13828860223293304, "rewards/accuracies": 1.0, "rewards/chosen": -1.4175801879900973e-05, "rewards/margins": 0.41572678089141846, "rewards/rejected": -0.415740966796875, "step": 11167 }, { "epoch": 7.723374827109267, "grad_norm": 4.087342262268066, "learning_rate": 1.264791762717074e-05, "log_odds_chosen": 10.767053604125977, "log_odds_ratio": -0.00036937909317202866, "logits/chosen": -0.4580538272857666, "logits/rejected": -0.4867129623889923, "logps/chosen": -0.0004119900113437325, "logps/rejected": -2.1865885257720947, "loss": 0.3387, "nll_loss": 0.08462625741958618, "rewards/accuracies": 1.0, "rewards/chosen": -4.119900404475629e-05, "rewards/margins": 0.21861764788627625, "rewards/rejected": -0.21865884959697723, "step": 11168 }, { "epoch": 7.724066390041494, "grad_norm": 3.855748176574707, "learning_rate": 1.264407561088059e-05, "log_odds_chosen": 11.13068962097168, "log_odds_ratio": -1.747292117215693e-05, "logits/chosen": -0.3536403179168701, "logits/rejected": -0.4293423295021057, "logps/chosen": -0.00011429526784922928, "logps/rejected": -2.155888795852661, "loss": 0.3741, "nll_loss": 0.09352263063192368, "rewards/accuracies": 1.0, "rewards/chosen": -1.142952805821551e-05, "rewards/margins": 0.21557745337486267, "rewards/rejected": -0.21558886766433716, "step": 11169 }, { "epoch": 7.724757952973721, "grad_norm": 3.3689465522766113, "learning_rate": 1.2640233594590442e-05, "log_odds_chosen": 11.25421142578125, "log_odds_ratio": -3.45587613992393e-05, "logits/chosen": -0.6251857280731201, "logits/rejected": -0.6823061108589172, "logps/chosen": -0.0001753601391101256, "logps/rejected": -2.045698404312134, "loss": 0.3463, "nll_loss": 0.08658169209957123, "rewards/accuracies": 1.0, "rewards/chosen": -1.7536012819618918e-05, "rewards/margins": 0.2045523226261139, "rewards/rejected": -0.20456984639167786, "step": 11170 }, { "epoch": 7.7254495159059475, "grad_norm": 5.983028411865234, "learning_rate": 1.2636391578300295e-05, "log_odds_chosen": 10.396875381469727, "log_odds_ratio": -0.00011761592759285122, "logits/chosen": -0.6582087278366089, "logits/rejected": -0.7967391610145569, "logps/chosen": -0.00023359763144981116, "logps/rejected": -1.4850988388061523, "loss": 0.2688, "nll_loss": 0.06719997525215149, "rewards/accuracies": 1.0, "rewards/chosen": -2.3359763872576877e-05, "rewards/margins": 0.14848652482032776, "rewards/rejected": -0.1485099047422409, "step": 11171 }, { "epoch": 7.726141078838174, "grad_norm": 3.940302848815918, "learning_rate": 1.2632549562010144e-05, "log_odds_chosen": 11.063858032226562, "log_odds_ratio": -8.894936763681471e-05, "logits/chosen": -0.6144142746925354, "logits/rejected": -0.7118813991546631, "logps/chosen": -0.000178087706444785, "logps/rejected": -2.3514490127563477, "loss": 0.4841, "nll_loss": 0.12101202458143234, "rewards/accuracies": 1.0, "rewards/chosen": -1.7808772099670023e-05, "rewards/margins": 0.23512712121009827, "rewards/rejected": -0.23514492809772491, "step": 11172 }, { "epoch": 7.726832641770401, "grad_norm": 3.3931593894958496, "learning_rate": 1.2628707545719995e-05, "log_odds_chosen": 10.330915451049805, "log_odds_ratio": -0.00016846258949954063, "logits/chosen": -0.22229152917861938, "logits/rejected": -0.3545496463775635, "logps/chosen": -0.0005119829438626766, "logps/rejected": -2.021285057067871, "loss": 0.3727, "nll_loss": 0.0931679829955101, "rewards/accuracies": 1.0, "rewards/chosen": -5.119829802424647e-05, "rewards/margins": 0.20207729935646057, "rewards/rejected": -0.20212849974632263, "step": 11173 }, { "epoch": 7.727524204702628, "grad_norm": 8.75682258605957, "learning_rate": 1.2624865529429844e-05, "log_odds_chosen": 11.253244400024414, "log_odds_ratio": -3.685813135234639e-05, "logits/chosen": -0.41329488158226013, "logits/rejected": -0.5813735127449036, "logps/chosen": -0.00014242672477848828, "logps/rejected": -2.198551654815674, "loss": 0.4275, "nll_loss": 0.10687272250652313, "rewards/accuracies": 1.0, "rewards/chosen": -1.4242672477848828e-05, "rewards/margins": 0.21984094381332397, "rewards/rejected": -0.2198551744222641, "step": 11174 }, { "epoch": 7.728215767634855, "grad_norm": 4.5937981605529785, "learning_rate": 1.2621023513139696e-05, "log_odds_chosen": 10.833547592163086, "log_odds_ratio": -0.0001014374356600456, "logits/chosen": -0.4467146098613739, "logits/rejected": -0.4794977903366089, "logps/chosen": -0.0005390796577557921, "logps/rejected": -2.4630837440490723, "loss": 0.3927, "nll_loss": 0.09816577285528183, "rewards/accuracies": 1.0, "rewards/chosen": -5.390797014115378e-05, "rewards/margins": 0.24625447392463684, "rewards/rejected": -0.24630838632583618, "step": 11175 }, { "epoch": 7.728907330567082, "grad_norm": 11.93628978729248, "learning_rate": 1.2617181496849549e-05, "log_odds_chosen": 9.881686210632324, "log_odds_ratio": -0.0003544997307471931, "logits/chosen": -0.7389744520187378, "logits/rejected": -0.7917199730873108, "logps/chosen": -0.0008742506033740938, "logps/rejected": -1.5302459001541138, "loss": 0.5525, "nll_loss": 0.13810065388679504, "rewards/accuracies": 1.0, "rewards/chosen": -8.742506906855851e-05, "rewards/margins": 0.1529371738433838, "rewards/rejected": -0.1530245989561081, "step": 11176 }, { "epoch": 7.7295988934993085, "grad_norm": 4.479918479919434, "learning_rate": 1.2613339480559398e-05, "log_odds_chosen": 10.797189712524414, "log_odds_ratio": -2.9541673939093016e-05, "logits/chosen": -0.1515422761440277, "logits/rejected": -0.14504599571228027, "logps/chosen": -0.0001397371233906597, "logps/rejected": -1.710416316986084, "loss": 0.3542, "nll_loss": 0.08855848014354706, "rewards/accuracies": 1.0, "rewards/chosen": -1.397371306666173e-05, "rewards/margins": 0.17102766036987305, "rewards/rejected": -0.17104163765907288, "step": 11177 }, { "epoch": 7.730290456431535, "grad_norm": 4.554428577423096, "learning_rate": 1.2609497464269248e-05, "log_odds_chosen": 11.5568265914917, "log_odds_ratio": -1.1084370271419175e-05, "logits/chosen": -0.6276842355728149, "logits/rejected": -0.6418471336364746, "logps/chosen": -0.00015286514826584607, "logps/rejected": -2.5270235538482666, "loss": 0.6737, "nll_loss": 0.16842985153198242, "rewards/accuracies": 1.0, "rewards/chosen": -1.5286514098988846e-05, "rewards/margins": 0.2526870667934418, "rewards/rejected": -0.25270235538482666, "step": 11178 }, { "epoch": 7.730982019363762, "grad_norm": 7.3763251304626465, "learning_rate": 1.26056554479791e-05, "log_odds_chosen": 11.22471809387207, "log_odds_ratio": -4.1468985727988183e-05, "logits/chosen": -0.4443528652191162, "logits/rejected": -0.47654908895492554, "logps/chosen": -0.00032382557401433587, "logps/rejected": -2.4713592529296875, "loss": 0.6009, "nll_loss": 0.1502087265253067, "rewards/accuracies": 1.0, "rewards/chosen": -3.238255885662511e-05, "rewards/margins": 0.24710355699062347, "rewards/rejected": -0.2471359521150589, "step": 11179 }, { "epoch": 7.731673582295989, "grad_norm": 7.088914394378662, "learning_rate": 1.260181343168895e-05, "log_odds_chosen": 9.736190795898438, "log_odds_ratio": -0.000677534204442054, "logits/chosen": -0.3276589512825012, "logits/rejected": -0.4400746822357178, "logps/chosen": -0.0006125522195361555, "logps/rejected": -1.2078614234924316, "loss": 0.5129, "nll_loss": 0.12815217673778534, "rewards/accuracies": 1.0, "rewards/chosen": -6.125521758804098e-05, "rewards/margins": 0.1207248866558075, "rewards/rejected": -0.12078613042831421, "step": 11180 }, { "epoch": 7.732365145228216, "grad_norm": 4.531628131866455, "learning_rate": 1.2597971415398802e-05, "log_odds_chosen": 11.409263610839844, "log_odds_ratio": -3.7987174437148497e-05, "logits/chosen": -0.3952420949935913, "logits/rejected": -0.45311498641967773, "logps/chosen": -0.00011559738049982116, "logps/rejected": -2.317403554916382, "loss": 0.3281, "nll_loss": 0.08202047646045685, "rewards/accuracies": 1.0, "rewards/chosen": -1.1559739505173638e-05, "rewards/margins": 0.23172880709171295, "rewards/rejected": -0.23174037039279938, "step": 11181 }, { "epoch": 7.733056708160443, "grad_norm": 3.2501275539398193, "learning_rate": 1.2594129399108653e-05, "log_odds_chosen": 11.311736106872559, "log_odds_ratio": -2.3325386791839264e-05, "logits/chosen": -0.3043041229248047, "logits/rejected": -0.3732486665248871, "logps/chosen": -0.000279276049695909, "logps/rejected": -2.639509677886963, "loss": 0.4197, "nll_loss": 0.10492255538702011, "rewards/accuracies": 1.0, "rewards/chosen": -2.7927606424782425e-05, "rewards/margins": 0.2639230787754059, "rewards/rejected": -0.26395100355148315, "step": 11182 }, { "epoch": 7.7337482710926695, "grad_norm": 5.095405578613281, "learning_rate": 1.2590287382818502e-05, "log_odds_chosen": 11.507871627807617, "log_odds_ratio": -5.955259985057637e-05, "logits/chosen": -0.4925724267959595, "logits/rejected": -0.5230408906936646, "logps/chosen": -9.127370140049607e-05, "logps/rejected": -2.333247661590576, "loss": 0.6057, "nll_loss": 0.1514206826686859, "rewards/accuracies": 1.0, "rewards/chosen": -9.127370503847487e-06, "rewards/margins": 0.2333156317472458, "rewards/rejected": -0.23332476615905762, "step": 11183 }, { "epoch": 7.734439834024896, "grad_norm": 5.569227695465088, "learning_rate": 1.2586445366528355e-05, "log_odds_chosen": 11.26315689086914, "log_odds_ratio": -3.517435470712371e-05, "logits/chosen": 0.6779569387435913, "logits/rejected": 0.5444155931472778, "logps/chosen": -0.0002390643785474822, "logps/rejected": -2.7062904834747314, "loss": 0.5392, "nll_loss": 0.13479849696159363, "rewards/accuracies": 1.0, "rewards/chosen": -2.3906435671960935e-05, "rewards/margins": 0.2706051468849182, "rewards/rejected": -0.27062904834747314, "step": 11184 }, { "epoch": 7.735131396957123, "grad_norm": 4.753345489501953, "learning_rate": 1.2582603350238207e-05, "log_odds_chosen": 10.940960884094238, "log_odds_ratio": -0.0001814025454223156, "logits/chosen": -0.47536489367485046, "logits/rejected": -0.42041611671447754, "logps/chosen": -0.0004682550788857043, "logps/rejected": -2.2718544006347656, "loss": 0.5859, "nll_loss": 0.14645753800868988, "rewards/accuracies": 1.0, "rewards/chosen": -4.682551298174076e-05, "rewards/margins": 0.22713860869407654, "rewards/rejected": -0.2271854281425476, "step": 11185 }, { "epoch": 7.73582295988935, "grad_norm": 5.558916091918945, "learning_rate": 1.2578761333948056e-05, "log_odds_chosen": 10.662283897399902, "log_odds_ratio": -0.0003959111636504531, "logits/chosen": -0.6355177164077759, "logits/rejected": -0.7223750352859497, "logps/chosen": -0.0004100181395187974, "logps/rejected": -2.0126688480377197, "loss": 0.5465, "nll_loss": 0.13657298684120178, "rewards/accuracies": 1.0, "rewards/chosen": -4.100181831745431e-05, "rewards/margins": 0.2012258768081665, "rewards/rejected": -0.20126686990261078, "step": 11186 }, { "epoch": 7.736514522821577, "grad_norm": 6.592702865600586, "learning_rate": 1.2574919317657907e-05, "log_odds_chosen": 11.67574405670166, "log_odds_ratio": -3.718010339071043e-05, "logits/chosen": -0.4506959617137909, "logits/rejected": -0.5245539546012878, "logps/chosen": -5.4668325901729986e-05, "logps/rejected": -2.1702640056610107, "loss": 0.4492, "nll_loss": 0.11230722069740295, "rewards/accuracies": 1.0, "rewards/chosen": -5.466832590172999e-06, "rewards/margins": 0.2170209288597107, "rewards/rejected": -0.21702641248703003, "step": 11187 }, { "epoch": 7.737206085753804, "grad_norm": 5.0268330574035645, "learning_rate": 1.257107730136776e-05, "log_odds_chosen": 12.745853424072266, "log_odds_ratio": -5.975254225631943e-06, "logits/chosen": -0.32701608538627625, "logits/rejected": -0.4010179340839386, "logps/chosen": -8.823679672786966e-05, "logps/rejected": -3.32920503616333, "loss": 0.4433, "nll_loss": 0.11081268638372421, "rewards/accuracies": 1.0, "rewards/chosen": -8.823679308989085e-06, "rewards/margins": 0.3329116702079773, "rewards/rejected": -0.33292049169540405, "step": 11188 }, { "epoch": 7.7378976486860305, "grad_norm": 5.755482196807861, "learning_rate": 1.2567235285077608e-05, "log_odds_chosen": 10.414119720458984, "log_odds_ratio": -0.00011008892033714801, "logits/chosen": -0.3587581515312195, "logits/rejected": -0.3946435749530792, "logps/chosen": -0.00015682679077144712, "logps/rejected": -1.538665533065796, "loss": 0.4158, "nll_loss": 0.10394784063100815, "rewards/accuracies": 1.0, "rewards/chosen": -1.5682679077144712e-05, "rewards/margins": 0.15385086834430695, "rewards/rejected": -0.15386654436588287, "step": 11189 }, { "epoch": 7.738589211618257, "grad_norm": 4.126373291015625, "learning_rate": 1.256339326878746e-05, "log_odds_chosen": 9.167192459106445, "log_odds_ratio": -0.0003323358250781894, "logits/chosen": -0.3083547055721283, "logits/rejected": -0.71805739402771, "logps/chosen": -0.0006533075356855989, "logps/rejected": -1.5606307983398438, "loss": 0.3303, "nll_loss": 0.08255146443843842, "rewards/accuracies": 1.0, "rewards/chosen": -6.533075065817684e-05, "rewards/margins": 0.15599775314331055, "rewards/rejected": -0.15606307983398438, "step": 11190 }, { "epoch": 7.739280774550484, "grad_norm": 5.740198135375977, "learning_rate": 1.2559551252497311e-05, "log_odds_chosen": 12.055227279663086, "log_odds_ratio": -1.5766985598020256e-05, "logits/chosen": -0.1922997236251831, "logits/rejected": -0.23225519061088562, "logps/chosen": -0.00014473784540314227, "logps/rejected": -2.7764201164245605, "loss": 0.5185, "nll_loss": 0.12963469326496124, "rewards/accuracies": 1.0, "rewards/chosen": -1.4473785086011048e-05, "rewards/margins": 0.277627557516098, "rewards/rejected": -0.27764201164245605, "step": 11191 }, { "epoch": 7.739972337482711, "grad_norm": 4.969605445861816, "learning_rate": 1.255570923620716e-05, "log_odds_chosen": 11.182586669921875, "log_odds_ratio": -4.7119465307332575e-05, "logits/chosen": -0.3878333866596222, "logits/rejected": -0.44411617517471313, "logps/chosen": -0.00016241194680333138, "logps/rejected": -2.5389904975891113, "loss": 0.4358, "nll_loss": 0.10894952714443207, "rewards/accuracies": 1.0, "rewards/chosen": -1.6241194316535257e-05, "rewards/margins": 0.25388282537460327, "rewards/rejected": -0.2538990378379822, "step": 11192 }, { "epoch": 7.740663900414938, "grad_norm": 9.156740188598633, "learning_rate": 1.2551867219917013e-05, "log_odds_chosen": 12.061928749084473, "log_odds_ratio": -4.9793354264693335e-05, "logits/chosen": -0.2080545723438263, "logits/rejected": -0.23290136456489563, "logps/chosen": -0.0007853920687921345, "logps/rejected": -3.5089850425720215, "loss": 0.5568, "nll_loss": 0.13918514549732208, "rewards/accuracies": 1.0, "rewards/chosen": -7.85392039688304e-05, "rewards/margins": 0.3508199453353882, "rewards/rejected": -0.35089850425720215, "step": 11193 }, { "epoch": 7.741355463347165, "grad_norm": 4.986725330352783, "learning_rate": 1.2548025203626865e-05, "log_odds_chosen": 11.318840026855469, "log_odds_ratio": -2.896355726988986e-05, "logits/chosen": -0.5200918316841125, "logits/rejected": -0.587061882019043, "logps/chosen": -0.00019649678142741323, "logps/rejected": -2.0499584674835205, "loss": 0.4781, "nll_loss": 0.11951858550310135, "rewards/accuracies": 1.0, "rewards/chosen": -1.9649680325528607e-05, "rewards/margins": 0.20497620105743408, "rewards/rejected": -0.20499587059020996, "step": 11194 }, { "epoch": 7.7420470262793915, "grad_norm": 7.151978015899658, "learning_rate": 1.2544183187336714e-05, "log_odds_chosen": 10.956339836120605, "log_odds_ratio": -0.00020657852292060852, "logits/chosen": -0.5801454782485962, "logits/rejected": -0.6348856687545776, "logps/chosen": -0.00025836541317403316, "logps/rejected": -2.077286720275879, "loss": 0.4545, "nll_loss": 0.11360951513051987, "rewards/accuracies": 1.0, "rewards/chosen": -2.58365435001906e-05, "rewards/margins": 0.20770283043384552, "rewards/rejected": -0.20772868394851685, "step": 11195 }, { "epoch": 7.742738589211618, "grad_norm": 6.095242500305176, "learning_rate": 1.2540341171046565e-05, "log_odds_chosen": 11.290716171264648, "log_odds_ratio": -4.969790097675286e-05, "logits/chosen": -0.2075842320919037, "logits/rejected": -0.27315694093704224, "logps/chosen": -0.0009419232374057174, "logps/rejected": -2.3505849838256836, "loss": 0.4756, "nll_loss": 0.1188892275094986, "rewards/accuracies": 1.0, "rewards/chosen": -9.419232083018869e-05, "rewards/margins": 0.2349643111228943, "rewards/rejected": -0.2350585013628006, "step": 11196 }, { "epoch": 7.743430152143845, "grad_norm": 4.93026876449585, "learning_rate": 1.2536499154756418e-05, "log_odds_chosen": 12.610750198364258, "log_odds_ratio": -1.0907778232649434e-05, "logits/chosen": -0.2769096791744232, "logits/rejected": -0.36176517605781555, "logps/chosen": -0.00010222066339338198, "logps/rejected": -3.3806655406951904, "loss": 0.3054, "nll_loss": 0.07633821666240692, "rewards/accuracies": 1.0, "rewards/chosen": -1.022206743073184e-05, "rewards/margins": 0.3380563259124756, "rewards/rejected": -0.33806657791137695, "step": 11197 }, { "epoch": 7.744121715076072, "grad_norm": 5.709609508514404, "learning_rate": 1.2532657138466267e-05, "log_odds_chosen": 10.945411682128906, "log_odds_ratio": -5.631058229482733e-05, "logits/chosen": -0.5118638873100281, "logits/rejected": -0.532324492931366, "logps/chosen": -0.0004599790263455361, "logps/rejected": -2.5661821365356445, "loss": 0.5729, "nll_loss": 0.1432260125875473, "rewards/accuracies": 1.0, "rewards/chosen": -4.599790554493666e-05, "rewards/margins": 0.2565721869468689, "rewards/rejected": -0.2566182017326355, "step": 11198 }, { "epoch": 7.744813278008299, "grad_norm": 3.967132329940796, "learning_rate": 1.2528815122176119e-05, "log_odds_chosen": 10.008167266845703, "log_odds_ratio": -0.00038261100417003036, "logits/chosen": -0.37153691053390503, "logits/rejected": -0.3605520725250244, "logps/chosen": -0.0008499649120494723, "logps/rejected": -1.7307593822479248, "loss": 0.3355, "nll_loss": 0.08382457494735718, "rewards/accuracies": 1.0, "rewards/chosen": -8.499649266013876e-05, "rewards/margins": 0.17299094796180725, "rewards/rejected": -0.17307592928409576, "step": 11199 }, { "epoch": 7.745504840940526, "grad_norm": 7.0854926109313965, "learning_rate": 1.252497310588597e-05, "log_odds_chosen": 11.453516006469727, "log_odds_ratio": -2.7779607989941724e-05, "logits/chosen": -0.5036885738372803, "logits/rejected": -0.6609827280044556, "logps/chosen": -0.00011579034617170691, "logps/rejected": -2.313054084777832, "loss": 0.4022, "nll_loss": 0.1005503386259079, "rewards/accuracies": 1.0, "rewards/chosen": -1.1579034435271751e-05, "rewards/margins": 0.23129382729530334, "rewards/rejected": -0.23130539059638977, "step": 11200 }, { "epoch": 7.746196403872752, "grad_norm": 5.6303324699401855, "learning_rate": 1.2521131089595819e-05, "log_odds_chosen": 11.321676254272461, "log_odds_ratio": -0.0001658283144934103, "logits/chosen": -0.40062427520751953, "logits/rejected": -0.42020952701568604, "logps/chosen": -0.0008212847169488668, "logps/rejected": -2.997318983078003, "loss": 0.4117, "nll_loss": 0.10292016714811325, "rewards/accuracies": 1.0, "rewards/chosen": -8.212847023969516e-05, "rewards/margins": 0.29964977502822876, "rewards/rejected": -0.29973188042640686, "step": 11201 }, { "epoch": 7.746887966804979, "grad_norm": 6.021998882293701, "learning_rate": 1.2517289073305671e-05, "log_odds_chosen": 10.015493392944336, "log_odds_ratio": -0.000374117458704859, "logits/chosen": -0.20265470445156097, "logits/rejected": -0.1699371337890625, "logps/chosen": -0.000535747327376157, "logps/rejected": -1.8978259563446045, "loss": 0.4803, "nll_loss": 0.12004905939102173, "rewards/accuracies": 1.0, "rewards/chosen": -5.357473128242418e-05, "rewards/margins": 0.1897290199995041, "rewards/rejected": -0.18978260457515717, "step": 11202 }, { "epoch": 7.747579529737206, "grad_norm": 5.411603927612305, "learning_rate": 1.2513447057015524e-05, "log_odds_chosen": 9.879266738891602, "log_odds_ratio": -0.00016255848458968103, "logits/chosen": -0.39416828751564026, "logits/rejected": -0.46854713559150696, "logps/chosen": -0.0005647170473821461, "logps/rejected": -2.0203137397766113, "loss": 0.4652, "nll_loss": 0.11628206074237823, "rewards/accuracies": 1.0, "rewards/chosen": -5.647170473821461e-05, "rewards/margins": 0.20197489857673645, "rewards/rejected": -0.20203135907649994, "step": 11203 }, { "epoch": 7.748271092669433, "grad_norm": 7.1708760261535645, "learning_rate": 1.2509605040725373e-05, "log_odds_chosen": 9.07894229888916, "log_odds_ratio": -0.000547174655366689, "logits/chosen": -0.12856854498386383, "logits/rejected": -0.23630261421203613, "logps/chosen": -0.001496141660027206, "logps/rejected": -1.2579138278961182, "loss": 0.2841, "nll_loss": 0.07098029553890228, "rewards/accuracies": 1.0, "rewards/chosen": -0.00014961417764425278, "rewards/margins": 0.12564176321029663, "rewards/rejected": -0.12579138576984406, "step": 11204 }, { "epoch": 7.74896265560166, "grad_norm": 3.752485990524292, "learning_rate": 1.2505763024435224e-05, "log_odds_chosen": 11.626092910766602, "log_odds_ratio": -3.133751670247875e-05, "logits/chosen": -0.34028932452201843, "logits/rejected": -0.46844393014907837, "logps/chosen": -0.0001301189768128097, "logps/rejected": -2.595146656036377, "loss": 0.3965, "nll_loss": 0.09911525249481201, "rewards/accuracies": 1.0, "rewards/chosen": -1.3011898772674613e-05, "rewards/margins": 0.2595016658306122, "rewards/rejected": -0.2595146894454956, "step": 11205 }, { "epoch": 7.749654218533887, "grad_norm": 4.600625038146973, "learning_rate": 1.2501921008145076e-05, "log_odds_chosen": 12.2091646194458, "log_odds_ratio": -1.5390061889775097e-05, "logits/chosen": -0.6013935804367065, "logits/rejected": -0.6590204238891602, "logps/chosen": -0.00014516216469928622, "logps/rejected": -2.883413791656494, "loss": 0.382, "nll_loss": 0.0955069437623024, "rewards/accuracies": 1.0, "rewards/chosen": -1.4516216651827563e-05, "rewards/margins": 0.28832685947418213, "rewards/rejected": -0.2883414030075073, "step": 11206 }, { "epoch": 7.750345781466113, "grad_norm": 6.549100875854492, "learning_rate": 1.2498078991854927e-05, "log_odds_chosen": 10.933587074279785, "log_odds_ratio": -4.656921373680234e-05, "logits/chosen": -0.5206800699234009, "logits/rejected": -0.6020963191986084, "logps/chosen": -0.0008355957688763738, "logps/rejected": -2.7682807445526123, "loss": 0.528, "nll_loss": 0.1319846659898758, "rewards/accuracies": 1.0, "rewards/chosen": -8.355958561878651e-05, "rewards/margins": 0.2767445147037506, "rewards/rejected": -0.2768280804157257, "step": 11207 }, { "epoch": 7.75103734439834, "grad_norm": 3.6843156814575195, "learning_rate": 1.2494236975564778e-05, "log_odds_chosen": 11.547121047973633, "log_odds_ratio": -0.00010025502706412226, "logits/chosen": -0.6024690270423889, "logits/rejected": -0.6342105865478516, "logps/chosen": -0.00016567941929679364, "logps/rejected": -2.5956060886383057, "loss": 0.5138, "nll_loss": 0.1284426748752594, "rewards/accuracies": 1.0, "rewards/chosen": -1.6567943021073006e-05, "rewards/margins": 0.2595440447330475, "rewards/rejected": -0.25956061482429504, "step": 11208 }, { "epoch": 7.751728907330567, "grad_norm": 8.264753341674805, "learning_rate": 1.2490394959274628e-05, "log_odds_chosen": 10.360306739807129, "log_odds_ratio": -6.233261956367642e-05, "logits/chosen": -0.6511682271957397, "logits/rejected": -0.6333962082862854, "logps/chosen": -0.00028001246391795576, "logps/rejected": -1.442769169807434, "loss": 0.3138, "nll_loss": 0.07843394577503204, "rewards/accuracies": 1.0, "rewards/chosen": -2.8001246391795576e-05, "rewards/margins": 0.14424893260002136, "rewards/rejected": -0.1442769169807434, "step": 11209 }, { "epoch": 7.752420470262794, "grad_norm": 3.439617872238159, "learning_rate": 1.2486552942984479e-05, "log_odds_chosen": 10.704526901245117, "log_odds_ratio": -0.0003997480671387166, "logits/chosen": -0.41600725054740906, "logits/rejected": -0.4207799434661865, "logps/chosen": -0.0009102488402277231, "logps/rejected": -2.3439385890960693, "loss": 0.4589, "nll_loss": 0.11469479650259018, "rewards/accuracies": 1.0, "rewards/chosen": -9.102488547796384e-05, "rewards/margins": 0.2343028485774994, "rewards/rejected": -0.2343938648700714, "step": 11210 }, { "epoch": 7.753112033195021, "grad_norm": 6.369625568389893, "learning_rate": 1.248271092669433e-05, "log_odds_chosen": 12.472749710083008, "log_odds_ratio": -1.946235897776205e-05, "logits/chosen": -0.30656394362449646, "logits/rejected": -0.3388577699661255, "logps/chosen": -0.00020343753567431122, "logps/rejected": -3.5933454036712646, "loss": 0.6374, "nll_loss": 0.15935854613780975, "rewards/accuracies": 1.0, "rewards/chosen": -2.0343755750218406e-05, "rewards/margins": 0.3593142032623291, "rewards/rejected": -0.3593345284461975, "step": 11211 }, { "epoch": 7.753803596127248, "grad_norm": 6.118873596191406, "learning_rate": 1.247886891040418e-05, "log_odds_chosen": 11.584425926208496, "log_odds_ratio": -7.112725143088028e-05, "logits/chosen": -0.2803153097629547, "logits/rejected": -0.3472374677658081, "logps/chosen": -0.00031616457272320986, "logps/rejected": -3.152381181716919, "loss": 0.2362, "nll_loss": 0.05905373767018318, "rewards/accuracies": 1.0, "rewards/chosen": -3.161645872751251e-05, "rewards/margins": 0.31520652770996094, "rewards/rejected": -0.3152381181716919, "step": 11212 }, { "epoch": 7.754495159059474, "grad_norm": 6.023653984069824, "learning_rate": 1.2475026894114033e-05, "log_odds_chosen": 11.131653785705566, "log_odds_ratio": -4.222708594170399e-05, "logits/chosen": -0.8086551427841187, "logits/rejected": -0.814570963382721, "logps/chosen": -0.00013568079157266766, "logps/rejected": -1.8936514854431152, "loss": 0.3326, "nll_loss": 0.08315111696720123, "rewards/accuracies": 1.0, "rewards/chosen": -1.3568078429671004e-05, "rewards/margins": 0.18935158848762512, "rewards/rejected": -0.1893651783466339, "step": 11213 }, { "epoch": 7.755186721991701, "grad_norm": 4.0896477699279785, "learning_rate": 1.2471184877823882e-05, "log_odds_chosen": 11.216859817504883, "log_odds_ratio": -3.130736877210438e-05, "logits/chosen": -0.3861258029937744, "logits/rejected": -0.4017869532108307, "logps/chosen": -0.00017719688185025007, "logps/rejected": -2.3807382583618164, "loss": 0.8433, "nll_loss": 0.21082894504070282, "rewards/accuracies": 1.0, "rewards/chosen": -1.7719688912620768e-05, "rewards/margins": 0.23805612325668335, "rewards/rejected": -0.23807384073734283, "step": 11214 }, { "epoch": 7.755878284923928, "grad_norm": 8.639068603515625, "learning_rate": 1.2467342861533733e-05, "log_odds_chosen": 8.53171157836914, "log_odds_ratio": -0.002693082904443145, "logits/chosen": -0.4846501052379608, "logits/rejected": -0.5627926588058472, "logps/chosen": -0.0017211114754900336, "logps/rejected": -1.4384164810180664, "loss": 0.7398, "nll_loss": 0.18466946482658386, "rewards/accuracies": 1.0, "rewards/chosen": -0.00017211114754900336, "rewards/margins": 0.14366954565048218, "rewards/rejected": -0.14384163916110992, "step": 11215 }, { "epoch": 7.756569847856155, "grad_norm": 4.447142601013184, "learning_rate": 1.2463500845243585e-05, "log_odds_chosen": 11.100719451904297, "log_odds_ratio": -2.4733282771194354e-05, "logits/chosen": -0.4554653763771057, "logits/rejected": -0.44291388988494873, "logps/chosen": -6.685042171739042e-05, "logps/rejected": -1.6726263761520386, "loss": 0.455, "nll_loss": 0.11374975740909576, "rewards/accuracies": 1.0, "rewards/chosen": -6.6850425355369225e-06, "rewards/margins": 0.1672559529542923, "rewards/rejected": -0.16726262867450714, "step": 11216 }, { "epoch": 7.757261410788382, "grad_norm": 4.845477104187012, "learning_rate": 1.2459658828953436e-05, "log_odds_chosen": 10.642885208129883, "log_odds_ratio": -0.00018591950356494635, "logits/chosen": -0.2249833643436432, "logits/rejected": -0.2929491400718689, "logps/chosen": -0.0009210944408550858, "logps/rejected": -2.7952280044555664, "loss": 0.4597, "nll_loss": 0.11489509046077728, "rewards/accuracies": 1.0, "rewards/chosen": -9.210944699589163e-05, "rewards/margins": 0.27943071722984314, "rewards/rejected": -0.2795228362083435, "step": 11217 }, { "epoch": 7.7579529737206085, "grad_norm": 4.492068290710449, "learning_rate": 1.2455816812663287e-05, "log_odds_chosen": 10.5167236328125, "log_odds_ratio": -0.00040569447446614504, "logits/chosen": -0.5178202390670776, "logits/rejected": -0.5283166170120239, "logps/chosen": -0.0005154095706529915, "logps/rejected": -2.447777032852173, "loss": 0.3911, "nll_loss": 0.09773518145084381, "rewards/accuracies": 1.0, "rewards/chosen": -5.1540962886065245e-05, "rewards/margins": 0.24472618103027344, "rewards/rejected": -0.24477770924568176, "step": 11218 }, { "epoch": 7.758644536652835, "grad_norm": 4.011480331420898, "learning_rate": 1.2451974796373138e-05, "log_odds_chosen": 10.492677688598633, "log_odds_ratio": -4.538420762401074e-05, "logits/chosen": -0.39754992723464966, "logits/rejected": -0.4307247996330261, "logps/chosen": -0.0001875983434729278, "logps/rejected": -1.51975679397583, "loss": 0.4099, "nll_loss": 0.10245904326438904, "rewards/accuracies": 1.0, "rewards/chosen": -1.875983434729278e-05, "rewards/margins": 0.1519569307565689, "rewards/rejected": -0.15197569131851196, "step": 11219 }, { "epoch": 7.759336099585062, "grad_norm": 5.337882995605469, "learning_rate": 1.2448132780082988e-05, "log_odds_chosen": 10.46602725982666, "log_odds_ratio": -0.00019935752789024264, "logits/chosen": -0.3067324459552765, "logits/rejected": -0.3594750463962555, "logps/chosen": -0.0002527673204895109, "logps/rejected": -1.8997879028320312, "loss": 0.6223, "nll_loss": 0.15556585788726807, "rewards/accuracies": 1.0, "rewards/chosen": -2.527673314034473e-05, "rewards/margins": 0.1899535059928894, "rewards/rejected": -0.18997877836227417, "step": 11220 }, { "epoch": 7.760027662517289, "grad_norm": 5.162551403045654, "learning_rate": 1.2444290763792839e-05, "log_odds_chosen": 11.330338478088379, "log_odds_ratio": -9.858874545898288e-05, "logits/chosen": -0.5896552801132202, "logits/rejected": -0.5296061635017395, "logps/chosen": -0.0004703239828813821, "logps/rejected": -3.0334887504577637, "loss": 0.7643, "nll_loss": 0.19106769561767578, "rewards/accuracies": 1.0, "rewards/chosen": -4.7032401198521256e-05, "rewards/margins": 0.3033018410205841, "rewards/rejected": -0.3033488988876343, "step": 11221 }, { "epoch": 7.760719225449516, "grad_norm": 3.615222215652466, "learning_rate": 1.2440448747502691e-05, "log_odds_chosen": 11.154950141906738, "log_odds_ratio": -9.620700438972563e-05, "logits/chosen": -0.07681339979171753, "logits/rejected": -0.17916563153266907, "logps/chosen": -0.0001603415294084698, "logps/rejected": -2.4335439205169678, "loss": 0.4096, "nll_loss": 0.1023990586400032, "rewards/accuracies": 1.0, "rewards/chosen": -1.6034151485655457e-05, "rewards/margins": 0.24333836138248444, "rewards/rejected": -0.24335438013076782, "step": 11222 }, { "epoch": 7.761410788381743, "grad_norm": 5.476311683654785, "learning_rate": 1.243660673121254e-05, "log_odds_chosen": 11.82361125946045, "log_odds_ratio": -1.4367436961038038e-05, "logits/chosen": -0.5733404159545898, "logits/rejected": -0.6344277858734131, "logps/chosen": -0.000166439451277256, "logps/rejected": -2.5303244590759277, "loss": 0.4878, "nll_loss": 0.1219361424446106, "rewards/accuracies": 1.0, "rewards/chosen": -1.664394403633196e-05, "rewards/margins": 0.25301581621170044, "rewards/rejected": -0.25303247570991516, "step": 11223 }, { "epoch": 7.7621023513139695, "grad_norm": 4.696140766143799, "learning_rate": 1.2432764714922391e-05, "log_odds_chosen": 11.077871322631836, "log_odds_ratio": -2.367735214647837e-05, "logits/chosen": -0.2561139762401581, "logits/rejected": -0.3287086486816406, "logps/chosen": -0.0003140079788863659, "logps/rejected": -2.57572603225708, "loss": 0.5179, "nll_loss": 0.12948326766490936, "rewards/accuracies": 1.0, "rewards/chosen": -3.140079934382811e-05, "rewards/margins": 0.2575412392616272, "rewards/rejected": -0.25757265090942383, "step": 11224 }, { "epoch": 7.762793914246196, "grad_norm": 12.073077201843262, "learning_rate": 1.2428922698632242e-05, "log_odds_chosen": 11.019550323486328, "log_odds_ratio": -0.00019632314797490835, "logits/chosen": -0.3313988149166107, "logits/rejected": -0.36905309557914734, "logps/chosen": -0.00270017609000206, "logps/rejected": -3.295527458190918, "loss": 0.5591, "nll_loss": 0.1397487372159958, "rewards/accuracies": 1.0, "rewards/chosen": -0.00027001762646250427, "rewards/margins": 0.3292827606201172, "rewards/rejected": -0.3295527696609497, "step": 11225 }, { "epoch": 7.763485477178423, "grad_norm": 6.284043788909912, "learning_rate": 1.2425080682342094e-05, "log_odds_chosen": 10.246824264526367, "log_odds_ratio": -0.0005744930822402239, "logits/chosen": -0.10029926151037216, "logits/rejected": -0.10771449655294418, "logps/chosen": -0.0012491054367274046, "logps/rejected": -2.3530826568603516, "loss": 0.3035, "nll_loss": 0.07581494003534317, "rewards/accuracies": 1.0, "rewards/chosen": -0.00012491054076235741, "rewards/margins": 0.23518335819244385, "rewards/rejected": -0.23530825972557068, "step": 11226 }, { "epoch": 7.76417704011065, "grad_norm": 4.9749369621276855, "learning_rate": 1.2421238666051945e-05, "log_odds_chosen": 10.407770156860352, "log_odds_ratio": -0.00017144394223578274, "logits/chosen": 0.13365231454372406, "logits/rejected": 0.12635274231433868, "logps/chosen": -0.0010794727131724358, "logps/rejected": -2.232872247695923, "loss": 0.5565, "nll_loss": 0.1391078531742096, "rewards/accuracies": 1.0, "rewards/chosen": -0.00010794726404128596, "rewards/margins": 0.22317931056022644, "rewards/rejected": -0.22328722476959229, "step": 11227 }, { "epoch": 7.764868603042877, "grad_norm": 6.39568567276001, "learning_rate": 1.2417396649761794e-05, "log_odds_chosen": 9.643097877502441, "log_odds_ratio": -0.12463778257369995, "logits/chosen": -0.33734792470932007, "logits/rejected": -0.3988925516605377, "logps/chosen": -0.02458512969315052, "logps/rejected": -2.240410804748535, "loss": 0.7192, "nll_loss": 0.16734297573566437, "rewards/accuracies": 0.875, "rewards/chosen": -0.0024585130158811808, "rewards/margins": 0.2215825617313385, "rewards/rejected": -0.22404105961322784, "step": 11228 }, { "epoch": 7.765560165975104, "grad_norm": 5.003486156463623, "learning_rate": 1.2413554633471647e-05, "log_odds_chosen": 10.819863319396973, "log_odds_ratio": -2.375691110501066e-05, "logits/chosen": 0.34223243594169617, "logits/rejected": 0.2088853418827057, "logps/chosen": -0.00014355707389768213, "logps/rejected": -1.991088628768921, "loss": 0.6244, "nll_loss": 0.1560867726802826, "rewards/accuracies": 1.0, "rewards/chosen": -1.4355708117363974e-05, "rewards/margins": 0.1990945041179657, "rewards/rejected": -0.19910886883735657, "step": 11229 }, { "epoch": 7.7662517289073305, "grad_norm": 6.905767917633057, "learning_rate": 1.2409712617181497e-05, "log_odds_chosen": 9.822664260864258, "log_odds_ratio": -0.00029731536051258445, "logits/chosen": -0.2679555416107178, "logits/rejected": -0.2359420359134674, "logps/chosen": -0.0014667949872091413, "logps/rejected": -1.726415753364563, "loss": 0.4251, "nll_loss": 0.10625658184289932, "rewards/accuracies": 1.0, "rewards/chosen": -0.00014667949290014803, "rewards/margins": 0.17249488830566406, "rewards/rejected": -0.1726415753364563, "step": 11230 }, { "epoch": 7.766943291839557, "grad_norm": 5.629059314727783, "learning_rate": 1.2405870600891348e-05, "log_odds_chosen": 12.017097473144531, "log_odds_ratio": -1.483617234043777e-05, "logits/chosen": -0.5283292531967163, "logits/rejected": -0.5332249402999878, "logps/chosen": -0.00043940573232248425, "logps/rejected": -3.0613012313842773, "loss": 0.444, "nll_loss": 0.11100263893604279, "rewards/accuracies": 1.0, "rewards/chosen": -4.394057032186538e-05, "rewards/margins": 0.3060861825942993, "rewards/rejected": -0.3061301112174988, "step": 11231 }, { "epoch": 7.767634854771784, "grad_norm": 4.639513969421387, "learning_rate": 1.2402028584601199e-05, "log_odds_chosen": 11.180654525756836, "log_odds_ratio": -0.0002833344624377787, "logits/chosen": -0.13472601771354675, "logits/rejected": -0.18717388808727264, "logps/chosen": -0.0004253677325323224, "logps/rejected": -2.7793374061584473, "loss": 0.4613, "nll_loss": 0.11529567092657089, "rewards/accuracies": 1.0, "rewards/chosen": -4.253677252563648e-05, "rewards/margins": 0.2778911888599396, "rewards/rejected": -0.2779337465763092, "step": 11232 }, { "epoch": 7.768326417704011, "grad_norm": 4.344387531280518, "learning_rate": 1.239818656831105e-05, "log_odds_chosen": 10.438720703125, "log_odds_ratio": -0.0005811832961626351, "logits/chosen": -0.01854592189192772, "logits/rejected": -0.04074572026729584, "logps/chosen": -0.0006477055721916258, "logps/rejected": -2.2782235145568848, "loss": 0.3492, "nll_loss": 0.08724640309810638, "rewards/accuracies": 1.0, "rewards/chosen": -6.477056012954563e-05, "rewards/margins": 0.22775757312774658, "rewards/rejected": -0.22782233357429504, "step": 11233 }, { "epoch": 7.769017980636238, "grad_norm": 4.033237934112549, "learning_rate": 1.23943445520209e-05, "log_odds_chosen": 10.35333251953125, "log_odds_ratio": -6.425260653486475e-05, "logits/chosen": -0.11870370805263519, "logits/rejected": -0.32155144214630127, "logps/chosen": -9.639868221711367e-05, "logps/rejected": -1.346225619316101, "loss": 0.4018, "nll_loss": 0.10044016689062119, "rewards/accuracies": 1.0, "rewards/chosen": -9.639867130317725e-06, "rewards/margins": 0.13461291790008545, "rewards/rejected": -0.13462257385253906, "step": 11234 }, { "epoch": 7.769709543568465, "grad_norm": 3.540543794631958, "learning_rate": 1.2390502535730753e-05, "log_odds_chosen": 12.143965721130371, "log_odds_ratio": -0.0001564951817272231, "logits/chosen": 0.0017357002943754196, "logits/rejected": -0.016939736902713776, "logps/chosen": -0.00022795412223786116, "logps/rejected": -3.7460548877716064, "loss": 0.5938, "nll_loss": 0.14844663441181183, "rewards/accuracies": 1.0, "rewards/chosen": -2.2795411496190354e-05, "rewards/margins": 0.3745827078819275, "rewards/rejected": -0.3746054768562317, "step": 11235 }, { "epoch": 7.7704011065006915, "grad_norm": 5.323680400848389, "learning_rate": 1.2386660519440604e-05, "log_odds_chosen": 11.648907661437988, "log_odds_ratio": -7.141516107367352e-05, "logits/chosen": -0.12177214026451111, "logits/rejected": -0.26459285616874695, "logps/chosen": -0.0006559010944329202, "logps/rejected": -2.6246397495269775, "loss": 0.4249, "nll_loss": 0.10621163249015808, "rewards/accuracies": 1.0, "rewards/chosen": -6.559010216733441e-05, "rewards/margins": 0.2623984217643738, "rewards/rejected": -0.2624639868736267, "step": 11236 }, { "epoch": 7.771092669432918, "grad_norm": 4.8484578132629395, "learning_rate": 1.2382818503150453e-05, "log_odds_chosen": 11.023386001586914, "log_odds_ratio": -0.00012878225243184716, "logits/chosen": -0.40300998091697693, "logits/rejected": -0.4514096975326538, "logps/chosen": -0.000475711131002754, "logps/rejected": -2.070675849914551, "loss": 0.4069, "nll_loss": 0.10171553492546082, "rewards/accuracies": 1.0, "rewards/chosen": -4.75711131002754e-05, "rewards/margins": 0.20702001452445984, "rewards/rejected": -0.2070675939321518, "step": 11237 }, { "epoch": 7.771784232365145, "grad_norm": 4.760718822479248, "learning_rate": 1.2378976486860305e-05, "log_odds_chosen": 10.488458633422852, "log_odds_ratio": -5.355846224119887e-05, "logits/chosen": -0.1504298448562622, "logits/rejected": -0.2084628939628601, "logps/chosen": -0.0002689189277589321, "logps/rejected": -2.0269899368286133, "loss": 0.4227, "nll_loss": 0.10567829012870789, "rewards/accuracies": 1.0, "rewards/chosen": -2.689189204829745e-05, "rewards/margins": 0.2026720941066742, "rewards/rejected": -0.2026989907026291, "step": 11238 }, { "epoch": 7.772475795297372, "grad_norm": 5.360637187957764, "learning_rate": 1.2375134470570156e-05, "log_odds_chosen": 9.567817687988281, "log_odds_ratio": -0.0001716063270578161, "logits/chosen": 0.03479413315653801, "logits/rejected": -0.12412481009960175, "logps/chosen": -0.00041789220995269716, "logps/rejected": -1.7624620199203491, "loss": 0.5261, "nll_loss": 0.13150463998317719, "rewards/accuracies": 1.0, "rewards/chosen": -4.178922245046124e-05, "rewards/margins": 0.17620441317558289, "rewards/rejected": -0.17624621093273163, "step": 11239 }, { "epoch": 7.773167358229599, "grad_norm": 5.343230247497559, "learning_rate": 1.2371292454280007e-05, "log_odds_chosen": 9.847702026367188, "log_odds_ratio": -0.00029629640630446374, "logits/chosen": -0.32641664147377014, "logits/rejected": -0.4043084979057312, "logps/chosen": -0.0004543064278550446, "logps/rejected": -1.8167530298233032, "loss": 0.5669, "nll_loss": 0.14169108867645264, "rewards/accuracies": 1.0, "rewards/chosen": -4.543064278550446e-05, "rewards/margins": 0.18162989616394043, "rewards/rejected": -0.18167531490325928, "step": 11240 }, { "epoch": 7.773858921161826, "grad_norm": 5.036294937133789, "learning_rate": 1.2367450437989857e-05, "log_odds_chosen": 11.093664169311523, "log_odds_ratio": -0.00019268778851255774, "logits/chosen": -0.466901034116745, "logits/rejected": -0.5163452625274658, "logps/chosen": -0.0002453480556141585, "logps/rejected": -1.9668821096420288, "loss": 0.6219, "nll_loss": 0.15545853972434998, "rewards/accuracies": 1.0, "rewards/chosen": -2.453480556141585e-05, "rewards/margins": 0.19666367769241333, "rewards/rejected": -0.1966882199048996, "step": 11241 }, { "epoch": 7.7745504840940525, "grad_norm": 7.579738140106201, "learning_rate": 1.2363608421699708e-05, "log_odds_chosen": 11.08250904083252, "log_odds_ratio": -0.00017796877364162356, "logits/chosen": -0.28047680854797363, "logits/rejected": -0.34084808826446533, "logps/chosen": -0.00011370638094376773, "logps/rejected": -2.112016201019287, "loss": 0.5131, "nll_loss": 0.12825888395309448, "rewards/accuracies": 1.0, "rewards/chosen": -1.1370637366781011e-05, "rewards/margins": 0.21119023859500885, "rewards/rejected": -0.21120160818099976, "step": 11242 }, { "epoch": 7.775242047026279, "grad_norm": 6.279829978942871, "learning_rate": 1.2359766405409559e-05, "log_odds_chosen": 10.941543579101562, "log_odds_ratio": -5.108896948513575e-05, "logits/chosen": -0.5631160140037537, "logits/rejected": -0.5826612114906311, "logps/chosen": -0.00022564077517017722, "logps/rejected": -1.958135962486267, "loss": 0.5337, "nll_loss": 0.1334185153245926, "rewards/accuracies": 1.0, "rewards/chosen": -2.2564077880815603e-05, "rewards/margins": 0.19579105079174042, "rewards/rejected": -0.1958136260509491, "step": 11243 }, { "epoch": 7.775933609958506, "grad_norm": 6.054798603057861, "learning_rate": 1.2355924389119411e-05, "log_odds_chosen": 10.729474067687988, "log_odds_ratio": -4.8919762775767595e-05, "logits/chosen": -0.3799744248390198, "logits/rejected": -0.4536796808242798, "logps/chosen": -0.00012781617988366634, "logps/rejected": -1.7896045446395874, "loss": 0.5936, "nll_loss": 0.1483994424343109, "rewards/accuracies": 1.0, "rewards/chosen": -1.2781618352164514e-05, "rewards/margins": 0.17894765734672546, "rewards/rejected": -0.17896045744419098, "step": 11244 }, { "epoch": 7.776625172890733, "grad_norm": 16.403644561767578, "learning_rate": 1.2352082372829262e-05, "log_odds_chosen": 10.198113441467285, "log_odds_ratio": -0.000842939771246165, "logits/chosen": -0.49508100748062134, "logits/rejected": -0.5563443303108215, "logps/chosen": -0.001958235399797559, "logps/rejected": -2.2131824493408203, "loss": 0.5583, "nll_loss": 0.1394910216331482, "rewards/accuracies": 1.0, "rewards/chosen": -0.00019582355162128806, "rewards/margins": 0.22112241387367249, "rewards/rejected": -0.22131824493408203, "step": 11245 }, { "epoch": 7.77731673582296, "grad_norm": 3.130303382873535, "learning_rate": 1.2348240356539111e-05, "log_odds_chosen": 10.778337478637695, "log_odds_ratio": -3.900337833329104e-05, "logits/chosen": -0.6491649150848389, "logits/rejected": -0.6453468799591064, "logps/chosen": -0.0001705015602055937, "logps/rejected": -1.8707752227783203, "loss": 0.2614, "nll_loss": 0.06534810364246368, "rewards/accuracies": 1.0, "rewards/chosen": -1.705015529296361e-05, "rewards/margins": 0.18706047534942627, "rewards/rejected": -0.18707750737667084, "step": 11246 }, { "epoch": 7.778008298755187, "grad_norm": 4.403022289276123, "learning_rate": 1.2344398340248964e-05, "log_odds_chosen": 9.740409851074219, "log_odds_ratio": -0.0019135898910462856, "logits/chosen": -0.7549378871917725, "logits/rejected": -0.8666030168533325, "logps/chosen": -0.0005426771240308881, "logps/rejected": -1.688401460647583, "loss": 0.5451, "nll_loss": 0.136093869805336, "rewards/accuracies": 1.0, "rewards/chosen": -5.426771167549305e-05, "rewards/margins": 0.16878588497638702, "rewards/rejected": -0.16884015500545502, "step": 11247 }, { "epoch": 7.7786998616874135, "grad_norm": 5.307613849639893, "learning_rate": 1.2340556323958814e-05, "log_odds_chosen": 10.75597095489502, "log_odds_ratio": -3.23170970659703e-05, "logits/chosen": -0.4456849694252014, "logits/rejected": -0.46171778440475464, "logps/chosen": -0.0001080541405826807, "logps/rejected": -1.5207815170288086, "loss": 0.3725, "nll_loss": 0.0931098461151123, "rewards/accuracies": 1.0, "rewards/chosen": -1.080541369447019e-05, "rewards/margins": 0.15206734836101532, "rewards/rejected": -0.15207815170288086, "step": 11248 }, { "epoch": 7.77939142461964, "grad_norm": 8.923099517822266, "learning_rate": 1.2336714307668665e-05, "log_odds_chosen": 10.36142635345459, "log_odds_ratio": -7.614222704432905e-05, "logits/chosen": -0.6055785417556763, "logits/rejected": -0.6693324446678162, "logps/chosen": -0.00045596505515277386, "logps/rejected": -1.8207474946975708, "loss": 1.2258, "nll_loss": 0.3064342737197876, "rewards/accuracies": 1.0, "rewards/chosen": -4.5596505515277386e-05, "rewards/margins": 0.1820291429758072, "rewards/rejected": -0.18207474052906036, "step": 11249 }, { "epoch": 7.780082987551867, "grad_norm": 9.660794258117676, "learning_rate": 1.2332872291378516e-05, "log_odds_chosen": 11.588338851928711, "log_odds_ratio": -3.191693394910544e-05, "logits/chosen": 0.021339019760489464, "logits/rejected": -0.17270031571388245, "logps/chosen": -0.00010125144763151184, "logps/rejected": -2.145808219909668, "loss": 0.373, "nll_loss": 0.09324344992637634, "rewards/accuracies": 1.0, "rewards/chosen": -1.0125144399353303e-05, "rewards/margins": 0.21457070112228394, "rewards/rejected": -0.21458081901073456, "step": 11250 }, { "epoch": 7.780774550484094, "grad_norm": 4.1214728355407715, "learning_rate": 1.2329030275088367e-05, "log_odds_chosen": 11.965429306030273, "log_odds_ratio": -1.558518852107227e-05, "logits/chosen": -0.23796215653419495, "logits/rejected": -0.246348038315773, "logps/chosen": -0.0002504836884327233, "logps/rejected": -3.3777589797973633, "loss": 0.5093, "nll_loss": 0.1273270845413208, "rewards/accuracies": 1.0, "rewards/chosen": -2.5048371753655374e-05, "rewards/margins": 0.3377508819103241, "rewards/rejected": -0.33777591586112976, "step": 11251 }, { "epoch": 7.781466113416321, "grad_norm": 3.6593050956726074, "learning_rate": 1.2325188258798217e-05, "log_odds_chosen": 10.831826210021973, "log_odds_ratio": -3.931194805772975e-05, "logits/chosen": -0.3259609043598175, "logits/rejected": -0.3795009255409241, "logps/chosen": -0.00019056108430959284, "logps/rejected": -2.411302089691162, "loss": 0.4118, "nll_loss": 0.10293841361999512, "rewards/accuracies": 1.0, "rewards/chosen": -1.9056107703363523e-05, "rewards/margins": 0.241111159324646, "rewards/rejected": -0.24113020300865173, "step": 11252 }, { "epoch": 7.782157676348548, "grad_norm": 5.860049724578857, "learning_rate": 1.232134624250807e-05, "log_odds_chosen": 10.41196060180664, "log_odds_ratio": -0.00012101135507691652, "logits/chosen": -0.7500457167625427, "logits/rejected": -0.8380433320999146, "logps/chosen": -0.000306506990455091, "logps/rejected": -2.0738277435302734, "loss": 0.5564, "nll_loss": 0.13908186554908752, "rewards/accuracies": 1.0, "rewards/chosen": -3.065070268348791e-05, "rewards/margins": 0.20735211670398712, "rewards/rejected": -0.20738276839256287, "step": 11253 }, { "epoch": 7.782849239280774, "grad_norm": 4.6908674240112305, "learning_rate": 1.231750422621792e-05, "log_odds_chosen": 11.748579025268555, "log_odds_ratio": -1.6796086129033938e-05, "logits/chosen": -0.45081204175949097, "logits/rejected": -0.5256200432777405, "logps/chosen": -0.000127775885630399, "logps/rejected": -2.526742935180664, "loss": 0.5556, "nll_loss": 0.13889187574386597, "rewards/accuracies": 1.0, "rewards/chosen": -1.277758929063566e-05, "rewards/margins": 0.2526615262031555, "rewards/rejected": -0.25267428159713745, "step": 11254 }, { "epoch": 7.783540802213001, "grad_norm": 5.625558376312256, "learning_rate": 1.2313662209927771e-05, "log_odds_chosen": 11.430329322814941, "log_odds_ratio": -4.6198027121135965e-05, "logits/chosen": -0.3089427053928375, "logits/rejected": -0.3261297047138214, "logps/chosen": -0.000235933912335895, "logps/rejected": -2.89105224609375, "loss": 0.4137, "nll_loss": 0.10342574119567871, "rewards/accuracies": 1.0, "rewards/chosen": -2.35933912335895e-05, "rewards/margins": 0.2890816330909729, "rewards/rejected": -0.28910520672798157, "step": 11255 }, { "epoch": 7.784232365145228, "grad_norm": 4.9262189865112305, "learning_rate": 1.2309820193637622e-05, "log_odds_chosen": 9.46239948272705, "log_odds_ratio": -0.0007331681554205716, "logits/chosen": -0.5546839833259583, "logits/rejected": -0.7506892085075378, "logps/chosen": -0.001192574854940176, "logps/rejected": -1.8165473937988281, "loss": 0.4424, "nll_loss": 0.11051993072032928, "rewards/accuracies": 1.0, "rewards/chosen": -0.0001192574854940176, "rewards/margins": 0.1815354973077774, "rewards/rejected": -0.18165475130081177, "step": 11256 }, { "epoch": 7.784923928077455, "grad_norm": 4.006134033203125, "learning_rate": 1.2305978177347473e-05, "log_odds_chosen": 11.54184341430664, "log_odds_ratio": -1.7175989341922104e-05, "logits/chosen": -0.20995602011680603, "logits/rejected": -0.19702300429344177, "logps/chosen": -0.00015768143930472434, "logps/rejected": -2.5994372367858887, "loss": 0.3523, "nll_loss": 0.08806820958852768, "rewards/accuracies": 1.0, "rewards/chosen": -1.5768144294270314e-05, "rewards/margins": 0.2599279582500458, "rewards/rejected": -0.25994372367858887, "step": 11257 }, { "epoch": 7.785615491009682, "grad_norm": 7.681565761566162, "learning_rate": 1.2302136161057323e-05, "log_odds_chosen": 11.597307205200195, "log_odds_ratio": -0.00026457561762072146, "logits/chosen": -0.3409126400947571, "logits/rejected": -0.40174397826194763, "logps/chosen": -0.00023098831297829747, "logps/rejected": -3.1674602031707764, "loss": 0.4596, "nll_loss": 0.11487455666065216, "rewards/accuracies": 1.0, "rewards/chosen": -2.3098831661627628e-05, "rewards/margins": 0.31672292947769165, "rewards/rejected": -0.3167460262775421, "step": 11258 }, { "epoch": 7.786307053941909, "grad_norm": 7.546180725097656, "learning_rate": 1.2298294144767174e-05, "log_odds_chosen": 10.36526107788086, "log_odds_ratio": -5.682875053025782e-05, "logits/chosen": -0.2339021861553192, "logits/rejected": -0.2601647973060608, "logps/chosen": -0.0003002375306095928, "logps/rejected": -2.0971832275390625, "loss": 0.5309, "nll_loss": 0.13272957503795624, "rewards/accuracies": 1.0, "rewards/chosen": -3.0023751605767757e-05, "rewards/margins": 0.20968829095363617, "rewards/rejected": -0.20971833169460297, "step": 11259 }, { "epoch": 7.786998616874135, "grad_norm": 7.056270122528076, "learning_rate": 1.2294452128477025e-05, "log_odds_chosen": 11.92055606842041, "log_odds_ratio": -0.00018327760335523635, "logits/chosen": -0.22537848353385925, "logits/rejected": -0.3611562252044678, "logps/chosen": -0.0003575455048121512, "logps/rejected": -3.3726859092712402, "loss": 0.4225, "nll_loss": 0.1056140884757042, "rewards/accuracies": 1.0, "rewards/chosen": -3.5754554119193926e-05, "rewards/margins": 0.3372328579425812, "rewards/rejected": -0.3372686207294464, "step": 11260 }, { "epoch": 7.787690179806362, "grad_norm": 4.214644432067871, "learning_rate": 1.2290610112186876e-05, "log_odds_chosen": 10.311920166015625, "log_odds_ratio": -0.0001279880088986829, "logits/chosen": -0.09888796508312225, "logits/rejected": -0.12999199330806732, "logps/chosen": -0.00010510604624869302, "logps/rejected": -1.3345422744750977, "loss": 0.715, "nll_loss": 0.17872878909111023, "rewards/accuracies": 1.0, "rewards/chosen": -1.0510604624869302e-05, "rewards/margins": 0.13344372808933258, "rewards/rejected": -0.13345423340797424, "step": 11261 }, { "epoch": 7.788381742738589, "grad_norm": 6.3505706787109375, "learning_rate": 1.2286768095896726e-05, "log_odds_chosen": 12.054739952087402, "log_odds_ratio": -2.838961881934665e-05, "logits/chosen": -0.29715636372566223, "logits/rejected": -0.3512900471687317, "logps/chosen": -0.00019150454318150878, "logps/rejected": -2.992788791656494, "loss": 0.3565, "nll_loss": 0.08911924809217453, "rewards/accuracies": 1.0, "rewards/chosen": -1.915045504574664e-05, "rewards/margins": 0.2992597222328186, "rewards/rejected": -0.2992788851261139, "step": 11262 }, { "epoch": 7.789073305670816, "grad_norm": 6.878966808319092, "learning_rate": 1.2282926079606579e-05, "log_odds_chosen": 10.905218124389648, "log_odds_ratio": -3.369908517925069e-05, "logits/chosen": -0.27406126260757446, "logits/rejected": -0.3869612216949463, "logps/chosen": -0.00012873244122602046, "logps/rejected": -1.9237127304077148, "loss": 0.3148, "nll_loss": 0.07870644330978394, "rewards/accuracies": 1.0, "rewards/chosen": -1.287324630538933e-05, "rewards/margins": 0.19235840439796448, "rewards/rejected": -0.19237127900123596, "step": 11263 }, { "epoch": 7.789764868603043, "grad_norm": 8.763483047485352, "learning_rate": 1.227908406331643e-05, "log_odds_chosen": 10.882516860961914, "log_odds_ratio": -4.163644916843623e-05, "logits/chosen": -0.25387126207351685, "logits/rejected": -0.27315399050712585, "logps/chosen": -0.0001641416602069512, "logps/rejected": -2.1190123558044434, "loss": 0.4698, "nll_loss": 0.11745646595954895, "rewards/accuracies": 1.0, "rewards/chosen": -1.641416565689724e-05, "rewards/margins": 0.21188482642173767, "rewards/rejected": -0.2119012475013733, "step": 11264 }, { "epoch": 7.79045643153527, "grad_norm": 3.134868860244751, "learning_rate": 1.2275242047026279e-05, "log_odds_chosen": 10.67641830444336, "log_odds_ratio": -4.588044248521328e-05, "logits/chosen": 0.09237219393253326, "logits/rejected": 0.11966807395219803, "logps/chosen": -0.0004638899117708206, "logps/rejected": -2.1800992488861084, "loss": 0.434, "nll_loss": 0.10850457102060318, "rewards/accuracies": 1.0, "rewards/chosen": -4.6388988266699016e-05, "rewards/margins": 0.21796353161334991, "rewards/rejected": -0.21800991892814636, "step": 11265 }, { "epoch": 7.791147994467496, "grad_norm": 4.402527332305908, "learning_rate": 1.2271400030736131e-05, "log_odds_chosen": 10.308174133300781, "log_odds_ratio": -0.00018428012845106423, "logits/chosen": -0.2486291527748108, "logits/rejected": -0.38063308596611023, "logps/chosen": -0.0004640338593162596, "logps/rejected": -2.1923608779907227, "loss": 0.3503, "nll_loss": 0.08756566047668457, "rewards/accuracies": 1.0, "rewards/chosen": -4.6403387386817485e-05, "rewards/margins": 0.21918968856334686, "rewards/rejected": -0.2192360907793045, "step": 11266 }, { "epoch": 7.791839557399723, "grad_norm": 5.159994125366211, "learning_rate": 1.2267558014445982e-05, "log_odds_chosen": 9.89212417602539, "log_odds_ratio": -0.00020230353402439505, "logits/chosen": -0.1726974993944168, "logits/rejected": -0.2927550673484802, "logps/chosen": -0.0005461431574076414, "logps/rejected": -1.6139581203460693, "loss": 0.4721, "nll_loss": 0.11800789833068848, "rewards/accuracies": 1.0, "rewards/chosen": -5.4614312830381095e-05, "rewards/margins": 0.16134120523929596, "rewards/rejected": -0.1613958179950714, "step": 11267 }, { "epoch": 7.79253112033195, "grad_norm": 4.365382671356201, "learning_rate": 1.2263715998155833e-05, "log_odds_chosen": 10.91610336303711, "log_odds_ratio": -8.126306056510657e-05, "logits/chosen": 0.11642088741064072, "logits/rejected": 0.125750333070755, "logps/chosen": -0.00019425661594141275, "logps/rejected": -2.039365768432617, "loss": 0.3607, "nll_loss": 0.09015924483537674, "rewards/accuracies": 1.0, "rewards/chosen": -1.9425662685534917e-05, "rewards/margins": 0.20391714572906494, "rewards/rejected": -0.20393657684326172, "step": 11268 }, { "epoch": 7.793222683264177, "grad_norm": 5.306851863861084, "learning_rate": 1.2259873981865683e-05, "log_odds_chosen": 11.79000186920166, "log_odds_ratio": -6.93091715220362e-05, "logits/chosen": -0.3146926760673523, "logits/rejected": -0.21157246828079224, "logps/chosen": -0.00012302336108405143, "logps/rejected": -2.6283581256866455, "loss": 0.3521, "nll_loss": 0.08803052455186844, "rewards/accuracies": 1.0, "rewards/chosen": -1.2302336472203024e-05, "rewards/margins": 0.26282352209091187, "rewards/rejected": -0.2628358006477356, "step": 11269 }, { "epoch": 7.793914246196404, "grad_norm": 3.641671895980835, "learning_rate": 1.2256031965575534e-05, "log_odds_chosen": 10.492412567138672, "log_odds_ratio": -6.292194302659482e-05, "logits/chosen": -0.20354798436164856, "logits/rejected": -0.21678128838539124, "logps/chosen": -0.0001462570216972381, "logps/rejected": -1.646343469619751, "loss": 0.2938, "nll_loss": 0.07344779372215271, "rewards/accuracies": 1.0, "rewards/chosen": -1.462570253352169e-05, "rewards/margins": 0.16461974382400513, "rewards/rejected": -0.1646343618631363, "step": 11270 }, { "epoch": 7.7946058091286305, "grad_norm": 3.143221855163574, "learning_rate": 1.2252189949285385e-05, "log_odds_chosen": 10.551334381103516, "log_odds_ratio": -0.00016412035620305687, "logits/chosen": -0.6718156337738037, "logits/rejected": -0.7369803190231323, "logps/chosen": -0.00035478276549838483, "logps/rejected": -2.0037951469421387, "loss": 0.3303, "nll_loss": 0.08257079869508743, "rewards/accuracies": 1.0, "rewards/chosen": -3.547827873262577e-05, "rewards/margins": 0.2003440409898758, "rewards/rejected": -0.20037952065467834, "step": 11271 }, { "epoch": 7.795297372060857, "grad_norm": 4.969653129577637, "learning_rate": 1.2248347932995237e-05, "log_odds_chosen": 11.730992317199707, "log_odds_ratio": -2.987973130075261e-05, "logits/chosen": -0.39510875940322876, "logits/rejected": -0.46338707208633423, "logps/chosen": -0.00012269796570762992, "logps/rejected": -2.4157376289367676, "loss": 0.4528, "nll_loss": 0.11319657415151596, "rewards/accuracies": 1.0, "rewards/chosen": -1.2269796570762992e-05, "rewards/margins": 0.24156148731708527, "rewards/rejected": -0.2415737509727478, "step": 11272 }, { "epoch": 7.795988934993084, "grad_norm": 5.745721340179443, "learning_rate": 1.2244505916705088e-05, "log_odds_chosen": 10.081501960754395, "log_odds_ratio": -0.00018471617659088224, "logits/chosen": -0.12037422508001328, "logits/rejected": -0.16611051559448242, "logps/chosen": -0.00048183047329075634, "logps/rejected": -1.7776180505752563, "loss": 0.4966, "nll_loss": 0.12412483990192413, "rewards/accuracies": 1.0, "rewards/chosen": -4.818305023945868e-05, "rewards/margins": 0.17771361768245697, "rewards/rejected": -0.17776180803775787, "step": 11273 }, { "epoch": 7.796680497925311, "grad_norm": 5.706189155578613, "learning_rate": 1.2240663900414937e-05, "log_odds_chosen": 9.718204498291016, "log_odds_ratio": -0.0006743724225088954, "logits/chosen": 0.06922922283411026, "logits/rejected": 0.03767970949411392, "logps/chosen": -0.0011344578815624118, "logps/rejected": -1.807583212852478, "loss": 0.4193, "nll_loss": 0.10475035011768341, "rewards/accuracies": 1.0, "rewards/chosen": -0.00011344579252181575, "rewards/margins": 0.18064486980438232, "rewards/rejected": -0.18075832724571228, "step": 11274 }, { "epoch": 7.797372060857538, "grad_norm": 4.138829231262207, "learning_rate": 1.223682188412479e-05, "log_odds_chosen": 10.995828628540039, "log_odds_ratio": -3.327604281366803e-05, "logits/chosen": -0.8057668209075928, "logits/rejected": -0.7984911203384399, "logps/chosen": -0.0002747337566688657, "logps/rejected": -2.0947799682617188, "loss": 0.3938, "nll_loss": 0.09843438118696213, "rewards/accuracies": 1.0, "rewards/chosen": -2.7473375666886568e-05, "rewards/margins": 0.20945051312446594, "rewards/rejected": -0.2094779908657074, "step": 11275 }, { "epoch": 7.798063623789765, "grad_norm": 5.856276512145996, "learning_rate": 1.223297986783464e-05, "log_odds_chosen": 10.868249893188477, "log_odds_ratio": -0.00011836976773338392, "logits/chosen": -0.3161469101905823, "logits/rejected": -0.42098069190979004, "logps/chosen": -0.0003598304174374789, "logps/rejected": -2.4453377723693848, "loss": 0.4851, "nll_loss": 0.12127453088760376, "rewards/accuracies": 1.0, "rewards/chosen": -3.598304101615213e-05, "rewards/margins": 0.24449782073497772, "rewards/rejected": -0.24453380703926086, "step": 11276 }, { "epoch": 7.7987551867219915, "grad_norm": 5.088725566864014, "learning_rate": 1.2229137851544491e-05, "log_odds_chosen": 10.577735900878906, "log_odds_ratio": -0.0001267546758754179, "logits/chosen": -0.6664071083068848, "logits/rejected": -0.6439728140830994, "logps/chosen": -0.0004955183831043541, "logps/rejected": -2.272801399230957, "loss": 0.6415, "nll_loss": 0.16036275029182434, "rewards/accuracies": 1.0, "rewards/chosen": -4.9551843403605744e-05, "rewards/margins": 0.22723057866096497, "rewards/rejected": -0.2272801399230957, "step": 11277 }, { "epoch": 7.799446749654218, "grad_norm": 3.760568857192993, "learning_rate": 1.2225295835254342e-05, "log_odds_chosen": 11.093595504760742, "log_odds_ratio": -3.8573536585317925e-05, "logits/chosen": -0.03364332765340805, "logits/rejected": 0.08477126061916351, "logps/chosen": -0.00011093143257312477, "logps/rejected": -2.148498773574829, "loss": 0.3584, "nll_loss": 0.08959043025970459, "rewards/accuracies": 1.0, "rewards/chosen": -1.1093143257312477e-05, "rewards/margins": 0.21483880281448364, "rewards/rejected": -0.21484988927841187, "step": 11278 }, { "epoch": 7.800138312586445, "grad_norm": 4.837119102478027, "learning_rate": 1.2221453818964193e-05, "log_odds_chosen": 10.585123062133789, "log_odds_ratio": -0.00012280464579816908, "logits/chosen": -0.39586615562438965, "logits/rejected": -0.3422686755657196, "logps/chosen": -0.0003601500065997243, "logps/rejected": -2.448976516723633, "loss": 0.4123, "nll_loss": 0.10305467993021011, "rewards/accuracies": 1.0, "rewards/chosen": -3.601500065997243e-05, "rewards/margins": 0.2448616325855255, "rewards/rejected": -0.24489764869213104, "step": 11279 }, { "epoch": 7.800829875518672, "grad_norm": 6.671242713928223, "learning_rate": 1.2217611802674043e-05, "log_odds_chosen": 10.349712371826172, "log_odds_ratio": -0.00029979500686749816, "logits/chosen": -0.01447632908821106, "logits/rejected": -0.006284177303314209, "logps/chosen": -0.0008054234203882515, "logps/rejected": -2.1930935382843018, "loss": 0.5554, "nll_loss": 0.13880924880504608, "rewards/accuracies": 1.0, "rewards/chosen": -8.054233330767602e-05, "rewards/margins": 0.2192288339138031, "rewards/rejected": -0.21930935978889465, "step": 11280 }, { "epoch": 7.801521438450899, "grad_norm": 4.729337692260742, "learning_rate": 1.2213769786383896e-05, "log_odds_chosen": 10.608678817749023, "log_odds_ratio": -0.00032640784047544, "logits/chosen": -0.19049939513206482, "logits/rejected": -0.23994635045528412, "logps/chosen": -0.00044982333201915026, "logps/rejected": -2.493246078491211, "loss": 0.4774, "nll_loss": 0.11930903792381287, "rewards/accuracies": 1.0, "rewards/chosen": -4.498233101912774e-05, "rewards/margins": 0.24927963316440582, "rewards/rejected": -0.24932461977005005, "step": 11281 }, { "epoch": 7.802213001383126, "grad_norm": 4.366937637329102, "learning_rate": 1.2209927770093747e-05, "log_odds_chosen": 9.542795181274414, "log_odds_ratio": -0.00035635344102047384, "logits/chosen": -0.5525764226913452, "logits/rejected": -0.411281943321228, "logps/chosen": -0.00037306107697077096, "logps/rejected": -1.736010193824768, "loss": 0.3269, "nll_loss": 0.08169664442539215, "rewards/accuracies": 1.0, "rewards/chosen": -3.7306112062651664e-05, "rewards/margins": 0.17356370389461517, "rewards/rejected": -0.17360101640224457, "step": 11282 }, { "epoch": 7.8029045643153525, "grad_norm": 5.749885559082031, "learning_rate": 1.2206085753803596e-05, "log_odds_chosen": 10.277787208557129, "log_odds_ratio": -0.000137269904371351, "logits/chosen": -0.5405263900756836, "logits/rejected": -0.5402116775512695, "logps/chosen": -0.00047800407628528774, "logps/rejected": -2.369875907897949, "loss": 0.7689, "nll_loss": 0.19220447540283203, "rewards/accuracies": 1.0, "rewards/chosen": -4.7800407628528774e-05, "rewards/margins": 0.23693978786468506, "rewards/rejected": -0.23698759078979492, "step": 11283 }, { "epoch": 7.803596127247579, "grad_norm": 4.322267055511475, "learning_rate": 1.2202243737513448e-05, "log_odds_chosen": 11.628303527832031, "log_odds_ratio": -1.104071725421818e-05, "logits/chosen": 0.1335178166627884, "logits/rejected": 0.13221508264541626, "logps/chosen": -0.00010630176984705031, "logps/rejected": -2.399397373199463, "loss": 0.6508, "nll_loss": 0.16270360350608826, "rewards/accuracies": 1.0, "rewards/chosen": -1.0630175893311389e-05, "rewards/margins": 0.23992910981178284, "rewards/rejected": -0.23993973433971405, "step": 11284 }, { "epoch": 7.804287690179806, "grad_norm": 4.353506088256836, "learning_rate": 1.2198401721223299e-05, "log_odds_chosen": 11.208612442016602, "log_odds_ratio": -5.571136352955364e-05, "logits/chosen": -0.5028769969940186, "logits/rejected": -0.4641305208206177, "logps/chosen": -0.00043331741471774876, "logps/rejected": -2.2058372497558594, "loss": 0.3791, "nll_loss": 0.09477502107620239, "rewards/accuracies": 1.0, "rewards/chosen": -4.333174001658335e-05, "rewards/margins": 0.220540389418602, "rewards/rejected": -0.2205837368965149, "step": 11285 }, { "epoch": 7.804979253112033, "grad_norm": 5.568900108337402, "learning_rate": 1.219455970493315e-05, "log_odds_chosen": 11.898628234863281, "log_odds_ratio": -3.059850132558495e-05, "logits/chosen": -0.48825883865356445, "logits/rejected": -0.575466513633728, "logps/chosen": -0.0002759605704341084, "logps/rejected": -2.878927230834961, "loss": 0.3277, "nll_loss": 0.08191083371639252, "rewards/accuracies": 1.0, "rewards/chosen": -2.759605740720872e-05, "rewards/margins": 0.2878651022911072, "rewards/rejected": -0.2878926992416382, "step": 11286 }, { "epoch": 7.80567081604426, "grad_norm": 3.559267997741699, "learning_rate": 1.2190717688643e-05, "log_odds_chosen": 10.472837448120117, "log_odds_ratio": -8.084255387075245e-05, "logits/chosen": -0.303886741399765, "logits/rejected": -0.3993980884552002, "logps/chosen": -0.0005313065485097468, "logps/rejected": -2.0672004222869873, "loss": 0.5227, "nll_loss": 0.13066476583480835, "rewards/accuracies": 1.0, "rewards/chosen": -5.313065412337892e-05, "rewards/margins": 0.20666691660881042, "rewards/rejected": -0.2067200392484665, "step": 11287 }, { "epoch": 7.806362378976487, "grad_norm": 4.979940414428711, "learning_rate": 1.2186875672352851e-05, "log_odds_chosen": 10.651639938354492, "log_odds_ratio": -0.00013395682617556304, "logits/chosen": 0.0262556504458189, "logits/rejected": -0.04236229509115219, "logps/chosen": -0.0012710446026176214, "logps/rejected": -2.7116594314575195, "loss": 0.4917, "nll_loss": 0.12290848791599274, "rewards/accuracies": 1.0, "rewards/chosen": -0.00012710444570984691, "rewards/margins": 0.27103888988494873, "rewards/rejected": -0.27116596698760986, "step": 11288 }, { "epoch": 7.8070539419087135, "grad_norm": 5.531417369842529, "learning_rate": 1.2183033656062702e-05, "log_odds_chosen": 10.462217330932617, "log_odds_ratio": -7.152614853112027e-05, "logits/chosen": -0.1546785831451416, "logits/rejected": -0.28727149963378906, "logps/chosen": -0.0002609742514323443, "logps/rejected": -1.9173094034194946, "loss": 0.5325, "nll_loss": 0.13312757015228271, "rewards/accuracies": 1.0, "rewards/chosen": -2.609742477943655e-05, "rewards/margins": 0.19170483946800232, "rewards/rejected": -0.19173094630241394, "step": 11289 }, { "epoch": 7.80774550484094, "grad_norm": 4.879218101501465, "learning_rate": 1.2179191639772554e-05, "log_odds_chosen": 10.708136558532715, "log_odds_ratio": -0.0003337001253385097, "logits/chosen": -0.17093488574028015, "logits/rejected": -0.2471480518579483, "logps/chosen": -0.00016180059174075723, "logps/rejected": -1.9442338943481445, "loss": 0.6707, "nll_loss": 0.16764989495277405, "rewards/accuracies": 1.0, "rewards/chosen": -1.6180059901671484e-05, "rewards/margins": 0.19440722465515137, "rewards/rejected": -0.1944233924150467, "step": 11290 }, { "epoch": 7.808437067773167, "grad_norm": 4.727686405181885, "learning_rate": 1.2175349623482405e-05, "log_odds_chosen": 10.129864692687988, "log_odds_ratio": -0.00031297910027205944, "logits/chosen": 0.06406151503324509, "logits/rejected": 0.027005136013031006, "logps/chosen": -0.000604638596996665, "logps/rejected": -2.3089687824249268, "loss": 0.6432, "nll_loss": 0.16076260805130005, "rewards/accuracies": 1.0, "rewards/chosen": -6.046385897207074e-05, "rewards/margins": 0.230836421251297, "rewards/rejected": -0.23089689016342163, "step": 11291 }, { "epoch": 7.809128630705394, "grad_norm": 4.830107688903809, "learning_rate": 1.2171507607192254e-05, "log_odds_chosen": 11.263401985168457, "log_odds_ratio": -2.79559098999016e-05, "logits/chosen": -0.01324993371963501, "logits/rejected": -0.14258237183094025, "logps/chosen": -7.992141763679683e-05, "logps/rejected": -1.9093828201293945, "loss": 0.4813, "nll_loss": 0.12033096700906754, "rewards/accuracies": 1.0, "rewards/chosen": -7.992141036083922e-06, "rewards/margins": 0.19093027710914612, "rewards/rejected": -0.19093827903270721, "step": 11292 }, { "epoch": 7.809820193637621, "grad_norm": 10.839892387390137, "learning_rate": 1.2167665590902105e-05, "log_odds_chosen": 10.911890983581543, "log_odds_ratio": -0.00012246929691173136, "logits/chosen": -0.04008027911186218, "logits/rejected": -0.20957855880260468, "logps/chosen": -0.00028015434509143233, "logps/rejected": -1.8444762229919434, "loss": 0.6795, "nll_loss": 0.16986694931983948, "rewards/accuracies": 1.0, "rewards/chosen": -2.801543269015383e-05, "rewards/margins": 0.18441961705684662, "rewards/rejected": -0.18444763123989105, "step": 11293 }, { "epoch": 7.810511756569848, "grad_norm": 4.458373069763184, "learning_rate": 1.2163823574611957e-05, "log_odds_chosen": 10.007013320922852, "log_odds_ratio": -9.765625145519152e-05, "logits/chosen": -0.30763986706733704, "logits/rejected": -0.3580760359764099, "logps/chosen": -0.0020079452078789473, "logps/rejected": -2.068343162536621, "loss": 0.4123, "nll_loss": 0.10306943207979202, "rewards/accuracies": 1.0, "rewards/chosen": -0.00020079451496712863, "rewards/margins": 0.206633523106575, "rewards/rejected": -0.2068343162536621, "step": 11294 }, { "epoch": 7.8112033195020745, "grad_norm": 10.859085083007812, "learning_rate": 1.2159981558321808e-05, "log_odds_chosen": 11.415952682495117, "log_odds_ratio": -3.398352055228315e-05, "logits/chosen": -0.49685126543045044, "logits/rejected": -0.4071289300918579, "logps/chosen": -0.00017499136447440833, "logps/rejected": -2.3988001346588135, "loss": 0.3024, "nll_loss": 0.07560379803180695, "rewards/accuracies": 1.0, "rewards/chosen": -1.7499136447440833e-05, "rewards/margins": 0.23986250162124634, "rewards/rejected": -0.2398800104856491, "step": 11295 }, { "epoch": 7.811894882434301, "grad_norm": 4.0313897132873535, "learning_rate": 1.2156139542031659e-05, "log_odds_chosen": 10.66956901550293, "log_odds_ratio": -0.00013550990843214095, "logits/chosen": -0.30885058641433716, "logits/rejected": -0.44245588779449463, "logps/chosen": -0.00030423677526414394, "logps/rejected": -2.413630962371826, "loss": 0.4288, "nll_loss": 0.10718473792076111, "rewards/accuracies": 1.0, "rewards/chosen": -3.0423678254010156e-05, "rewards/margins": 0.24133266508579254, "rewards/rejected": -0.24136309325695038, "step": 11296 }, { "epoch": 7.812586445366528, "grad_norm": 10.214454650878906, "learning_rate": 1.215229752574151e-05, "log_odds_chosen": 11.460878372192383, "log_odds_ratio": -2.284135189256631e-05, "logits/chosen": -0.31639039516448975, "logits/rejected": -0.3789241313934326, "logps/chosen": -0.00014475402713287622, "logps/rejected": -2.2148890495300293, "loss": 0.5287, "nll_loss": 0.13216793537139893, "rewards/accuracies": 1.0, "rewards/chosen": -1.4475403986580204e-05, "rewards/margins": 0.22147443890571594, "rewards/rejected": -0.22148890793323517, "step": 11297 }, { "epoch": 7.813278008298755, "grad_norm": 6.10732889175415, "learning_rate": 1.214845550945136e-05, "log_odds_chosen": 11.131383895874023, "log_odds_ratio": -4.605175490723923e-05, "logits/chosen": -0.15626344084739685, "logits/rejected": -0.3107757568359375, "logps/chosen": -0.00013610447058454156, "logps/rejected": -2.1935107707977295, "loss": 0.2989, "nll_loss": 0.07473208755254745, "rewards/accuracies": 1.0, "rewards/chosen": -1.3610448149847798e-05, "rewards/margins": 0.21933746337890625, "rewards/rejected": -0.21935108304023743, "step": 11298 }, { "epoch": 7.813969571230982, "grad_norm": 4.278329372406006, "learning_rate": 1.2144613493161211e-05, "log_odds_chosen": 11.866846084594727, "log_odds_ratio": -1.1192752936040051e-05, "logits/chosen": -0.49567800760269165, "logits/rejected": -0.5212503671646118, "logps/chosen": -0.00011445317068137228, "logps/rejected": -2.430196762084961, "loss": 0.5736, "nll_loss": 0.14339041709899902, "rewards/accuracies": 1.0, "rewards/chosen": -1.144531779573299e-05, "rewards/margins": 0.24300822615623474, "rewards/rejected": -0.24301967024803162, "step": 11299 }, { "epoch": 7.814661134163209, "grad_norm": 4.524301528930664, "learning_rate": 1.2140771476871063e-05, "log_odds_chosen": 10.06701946258545, "log_odds_ratio": -0.000978961936198175, "logits/chosen": -0.029292069375514984, "logits/rejected": 0.005871415138244629, "logps/chosen": -0.0012817583046853542, "logps/rejected": -2.1486034393310547, "loss": 0.4852, "nll_loss": 0.12119489163160324, "rewards/accuracies": 1.0, "rewards/chosen": -0.00012817583046853542, "rewards/margins": 0.21473215520381927, "rewards/rejected": -0.21486034989356995, "step": 11300 }, { "epoch": 7.8153526970954355, "grad_norm": 4.445444107055664, "learning_rate": 1.2136929460580914e-05, "log_odds_chosen": 11.179705619812012, "log_odds_ratio": -0.00019098444317933172, "logits/chosen": 0.027649089694023132, "logits/rejected": -0.013102950528264046, "logps/chosen": -0.0003603753575589508, "logps/rejected": -2.5527210235595703, "loss": 0.4274, "nll_loss": 0.10682663321495056, "rewards/accuracies": 1.0, "rewards/chosen": -3.603753793868236e-05, "rewards/margins": 0.25523608922958374, "rewards/rejected": -0.25527212023735046, "step": 11301 }, { "epoch": 7.816044260027662, "grad_norm": 4.457228660583496, "learning_rate": 1.2133087444290763e-05, "log_odds_chosen": 8.977705001831055, "log_odds_ratio": -0.00042235839646309614, "logits/chosen": -0.599679172039032, "logits/rejected": -0.5758978724479675, "logps/chosen": -0.006479393225163221, "logps/rejected": -1.8167825937271118, "loss": 0.3143, "nll_loss": 0.07853072136640549, "rewards/accuracies": 1.0, "rewards/chosen": -0.0006479393341578543, "rewards/margins": 0.18103033304214478, "rewards/rejected": -0.18167826533317566, "step": 11302 }, { "epoch": 7.816735822959889, "grad_norm": 4.476196765899658, "learning_rate": 1.2129245428000616e-05, "log_odds_chosen": 10.765111923217773, "log_odds_ratio": -4.8187474021688104e-05, "logits/chosen": -0.2516087591648102, "logits/rejected": -0.33950358629226685, "logps/chosen": -0.00015329348389059305, "logps/rejected": -1.8095844984054565, "loss": 0.453, "nll_loss": 0.11323701590299606, "rewards/accuracies": 1.0, "rewards/chosen": -1.5329349480452947e-05, "rewards/margins": 0.18094313144683838, "rewards/rejected": -0.18095846474170685, "step": 11303 }, { "epoch": 7.817427385892116, "grad_norm": 3.8020856380462646, "learning_rate": 1.2125403411710466e-05, "log_odds_chosen": 11.011605262756348, "log_odds_ratio": -3.462144377408549e-05, "logits/chosen": -0.2966512441635132, "logits/rejected": -0.3090302050113678, "logps/chosen": -8.981661812867969e-05, "logps/rejected": -1.5665464401245117, "loss": 0.3049, "nll_loss": 0.07622778415679932, "rewards/accuracies": 1.0, "rewards/chosen": -8.98166217666585e-06, "rewards/margins": 0.15664567053318024, "rewards/rejected": -0.15665464103221893, "step": 11304 }, { "epoch": 7.818118948824343, "grad_norm": 4.5586371421813965, "learning_rate": 1.2121561395420317e-05, "log_odds_chosen": 9.883445739746094, "log_odds_ratio": -0.00030278839403763413, "logits/chosen": -0.13593541085720062, "logits/rejected": -0.10266265273094177, "logps/chosen": -0.0006718160002492368, "logps/rejected": -1.2286714315414429, "loss": 0.3804, "nll_loss": 0.09507386386394501, "rewards/accuracies": 1.0, "rewards/chosen": -6.718160875607282e-05, "rewards/margins": 0.12279996275901794, "rewards/rejected": -0.122867152094841, "step": 11305 }, { "epoch": 7.81881051175657, "grad_norm": 7.4004387855529785, "learning_rate": 1.2117719379130168e-05, "log_odds_chosen": 10.306143760681152, "log_odds_ratio": -9.625229722587392e-05, "logits/chosen": -0.35738950967788696, "logits/rejected": -0.29458296298980713, "logps/chosen": -0.0006742849946022034, "logps/rejected": -2.2270631790161133, "loss": 0.401, "nll_loss": 0.1002361923456192, "rewards/accuracies": 1.0, "rewards/chosen": -6.74285038257949e-05, "rewards/margins": 0.22263889014720917, "rewards/rejected": -0.22270631790161133, "step": 11306 }, { "epoch": 7.819502074688796, "grad_norm": 5.4306440353393555, "learning_rate": 1.2113877362840019e-05, "log_odds_chosen": 10.84564208984375, "log_odds_ratio": -0.000269198149908334, "logits/chosen": -0.039085280150175095, "logits/rejected": 0.03959418460726738, "logps/chosen": -0.0005255647120065987, "logps/rejected": -2.5015833377838135, "loss": 0.3889, "nll_loss": 0.09719796478748322, "rewards/accuracies": 1.0, "rewards/chosen": -5.255647556623444e-05, "rewards/margins": 0.2501057982444763, "rewards/rejected": -0.2501583397388458, "step": 11307 }, { "epoch": 7.820193637621023, "grad_norm": 4.5981125831604, "learning_rate": 1.211003534654987e-05, "log_odds_chosen": 11.37891960144043, "log_odds_ratio": -6.482828757725656e-05, "logits/chosen": -0.5004502534866333, "logits/rejected": -0.6703431010246277, "logps/chosen": -0.00022166120470501482, "logps/rejected": -2.438640594482422, "loss": 0.4862, "nll_loss": 0.1215510442852974, "rewards/accuracies": 1.0, "rewards/chosen": -2.2166121198097244e-05, "rewards/margins": 0.24384190142154694, "rewards/rejected": -0.24386407434940338, "step": 11308 }, { "epoch": 7.82088520055325, "grad_norm": 6.168004035949707, "learning_rate": 1.2106193330259722e-05, "log_odds_chosen": 10.734867095947266, "log_odds_ratio": -0.0005011963658034801, "logits/chosen": -0.5463290810585022, "logits/rejected": -0.5193485617637634, "logps/chosen": -0.001047707861289382, "logps/rejected": -2.4238944053649902, "loss": 0.6156, "nll_loss": 0.1538413017988205, "rewards/accuracies": 1.0, "rewards/chosen": -0.00010477077739778906, "rewards/margins": 0.24228468537330627, "rewards/rejected": -0.24238945543766022, "step": 11309 }, { "epoch": 7.821576763485477, "grad_norm": 3.5904340744018555, "learning_rate": 1.2102351313969573e-05, "log_odds_chosen": 10.541328430175781, "log_odds_ratio": -9.438677807338536e-05, "logits/chosen": -0.26121988892555237, "logits/rejected": -0.27295538783073425, "logps/chosen": -0.0011980930576100945, "logps/rejected": -2.52219820022583, "loss": 0.4023, "nll_loss": 0.1005641371011734, "rewards/accuracies": 1.0, "rewards/chosen": -0.0001198093086713925, "rewards/margins": 0.25209999084472656, "rewards/rejected": -0.2522197961807251, "step": 11310 }, { "epoch": 7.822268326417704, "grad_norm": 4.74156379699707, "learning_rate": 1.2098509297679422e-05, "log_odds_chosen": 10.732425689697266, "log_odds_ratio": -0.00021223133080638945, "logits/chosen": -0.5578321218490601, "logits/rejected": -0.6234152317047119, "logps/chosen": -0.00027943385066464543, "logps/rejected": -2.379189968109131, "loss": 0.4482, "nll_loss": 0.11202280968427658, "rewards/accuracies": 1.0, "rewards/chosen": -2.7943386157858185e-05, "rewards/margins": 0.2378910481929779, "rewards/rejected": -0.23791900277137756, "step": 11311 }, { "epoch": 7.822959889349931, "grad_norm": 4.3947038650512695, "learning_rate": 1.2094667281389274e-05, "log_odds_chosen": 12.0952730178833, "log_odds_ratio": -3.203017695341259e-05, "logits/chosen": -0.13149462640285492, "logits/rejected": -0.323131799697876, "logps/chosen": -0.00017533727805130184, "logps/rejected": -3.08715558052063, "loss": 0.5112, "nll_loss": 0.12779048085212708, "rewards/accuracies": 1.0, "rewards/chosen": -1.7533728168928064e-05, "rewards/margins": 0.30869802832603455, "rewards/rejected": -0.3087155520915985, "step": 11312 }, { "epoch": 7.823651452282157, "grad_norm": 3.9417388439178467, "learning_rate": 1.2090825265099125e-05, "log_odds_chosen": 10.992733001708984, "log_odds_ratio": -5.133766171638854e-05, "logits/chosen": -0.1718611717224121, "logits/rejected": -0.1951899528503418, "logps/chosen": -0.00015353634080383927, "logps/rejected": -2.26267409324646, "loss": 0.3833, "nll_loss": 0.09582962095737457, "rewards/accuracies": 1.0, "rewards/chosen": -1.5353634807979688e-05, "rewards/margins": 0.22625204920768738, "rewards/rejected": -0.22626741230487823, "step": 11313 }, { "epoch": 7.824343015214384, "grad_norm": 3.576083183288574, "learning_rate": 1.2086983248808976e-05, "log_odds_chosen": 10.381665229797363, "log_odds_ratio": -0.0003442099259700626, "logits/chosen": -0.650234580039978, "logits/rejected": -0.5741584300994873, "logps/chosen": -0.00028753330116160214, "logps/rejected": -1.8145102262496948, "loss": 0.5317, "nll_loss": 0.13288593292236328, "rewards/accuracies": 1.0, "rewards/chosen": -2.8753329388564453e-05, "rewards/margins": 0.18142227828502655, "rewards/rejected": -0.18145102262496948, "step": 11314 }, { "epoch": 7.825034578146611, "grad_norm": 3.879359245300293, "learning_rate": 1.2083141232518826e-05, "log_odds_chosen": 11.245022773742676, "log_odds_ratio": -2.560452776378952e-05, "logits/chosen": 0.10069958865642548, "logits/rejected": 0.02351771481335163, "logps/chosen": -0.00020363567455206066, "logps/rejected": -2.3778505325317383, "loss": 0.4817, "nll_loss": 0.12041270732879639, "rewards/accuracies": 1.0, "rewards/chosen": -2.0363568182801828e-05, "rewards/margins": 0.23776471614837646, "rewards/rejected": -0.23778507113456726, "step": 11315 }, { "epoch": 7.825726141078838, "grad_norm": 5.315532207489014, "learning_rate": 1.2079299216228677e-05, "log_odds_chosen": 11.995738983154297, "log_odds_ratio": -8.50264259497635e-05, "logits/chosen": -0.3942870497703552, "logits/rejected": -0.4254767894744873, "logps/chosen": -0.00015716595225967467, "logps/rejected": -2.919708013534546, "loss": 0.4898, "nll_loss": 0.12245209515094757, "rewards/accuracies": 1.0, "rewards/chosen": -1.5716595953563228e-05, "rewards/margins": 0.2919551134109497, "rewards/rejected": -0.2919708490371704, "step": 11316 }, { "epoch": 7.826417704011065, "grad_norm": 2.549464702606201, "learning_rate": 1.2075457199938528e-05, "log_odds_chosen": 12.045283317565918, "log_odds_ratio": -2.14578649320174e-05, "logits/chosen": -0.19537949562072754, "logits/rejected": -0.22888311743736267, "logps/chosen": -8.560554124414921e-05, "logps/rejected": -2.7150845527648926, "loss": 0.3413, "nll_loss": 0.0853186771273613, "rewards/accuracies": 1.0, "rewards/chosen": -8.560555215808563e-06, "rewards/margins": 0.2714998722076416, "rewards/rejected": -0.27150842547416687, "step": 11317 }, { "epoch": 7.827109266943292, "grad_norm": 8.127381324768066, "learning_rate": 1.207161518364838e-05, "log_odds_chosen": 11.11913776397705, "log_odds_ratio": -0.00015894531679805368, "logits/chosen": -0.5177460312843323, "logits/rejected": -0.5508327484130859, "logps/chosen": -0.00036550668301060796, "logps/rejected": -2.83428692817688, "loss": 0.7238, "nll_loss": 0.18092840909957886, "rewards/accuracies": 1.0, "rewards/chosen": -3.6550667573465034e-05, "rewards/margins": 0.28339216113090515, "rewards/rejected": -0.28342872858047485, "step": 11318 }, { "epoch": 7.827800829875518, "grad_norm": 2.9939863681793213, "learning_rate": 1.2067773167358231e-05, "log_odds_chosen": 10.294910430908203, "log_odds_ratio": -6.300478707998991e-05, "logits/chosen": -0.2557978630065918, "logits/rejected": -0.2889474332332611, "logps/chosen": -0.00011574958625715226, "logps/rejected": -1.292106032371521, "loss": 0.2875, "nll_loss": 0.07187668979167938, "rewards/accuracies": 1.0, "rewards/chosen": -1.1574958989513107e-05, "rewards/margins": 0.12919902801513672, "rewards/rejected": -0.12921059131622314, "step": 11319 }, { "epoch": 7.828492392807745, "grad_norm": 4.445178031921387, "learning_rate": 1.206393115106808e-05, "log_odds_chosen": 10.54962158203125, "log_odds_ratio": -8.115536911645904e-05, "logits/chosen": -0.16899898648262024, "logits/rejected": -0.19577506184577942, "logps/chosen": -0.000257675041211769, "logps/rejected": -1.8648037910461426, "loss": 0.4215, "nll_loss": 0.10536029934883118, "rewards/accuracies": 1.0, "rewards/chosen": -2.5767503757379018e-05, "rewards/margins": 0.1864546239376068, "rewards/rejected": -0.18648038804531097, "step": 11320 }, { "epoch": 7.829183955739972, "grad_norm": 3.650851011276245, "learning_rate": 1.2060089134777933e-05, "log_odds_chosen": 11.044684410095215, "log_odds_ratio": -3.610683779697865e-05, "logits/chosen": -0.7067569494247437, "logits/rejected": -0.7932747602462769, "logps/chosen": -0.00035222587757743895, "logps/rejected": -2.135831356048584, "loss": 0.4366, "nll_loss": 0.10913494229316711, "rewards/accuracies": 1.0, "rewards/chosen": -3.5222587030148134e-05, "rewards/margins": 0.21354791522026062, "rewards/rejected": -0.21358314156532288, "step": 11321 }, { "epoch": 7.829875518672199, "grad_norm": 4.814984321594238, "learning_rate": 1.2056247118487783e-05, "log_odds_chosen": 9.837475776672363, "log_odds_ratio": -0.0007566395797766745, "logits/chosen": 0.09012100845575333, "logits/rejected": -0.02145160734653473, "logps/chosen": -0.0023908771108835936, "logps/rejected": -2.0067663192749023, "loss": 0.5799, "nll_loss": 0.14490315318107605, "rewards/accuracies": 1.0, "rewards/chosen": -0.00023908769071567804, "rewards/margins": 0.20043756067752838, "rewards/rejected": -0.20067663490772247, "step": 11322 }, { "epoch": 7.830567081604426, "grad_norm": 6.39214563369751, "learning_rate": 1.2052405102197634e-05, "log_odds_chosen": 10.75027847290039, "log_odds_ratio": -7.44945282349363e-05, "logits/chosen": -0.36986637115478516, "logits/rejected": -0.4756673574447632, "logps/chosen": -0.00022458047897089273, "logps/rejected": -1.8328531980514526, "loss": 0.3638, "nll_loss": 0.09094793349504471, "rewards/accuracies": 1.0, "rewards/chosen": -2.245804716949351e-05, "rewards/margins": 0.1832628697156906, "rewards/rejected": -0.18328531086444855, "step": 11323 }, { "epoch": 7.8312586445366525, "grad_norm": 4.306397914886475, "learning_rate": 1.2048563085907485e-05, "log_odds_chosen": 12.279438972473145, "log_odds_ratio": -1.2914264516439289e-05, "logits/chosen": -0.20310020446777344, "logits/rejected": -0.26829591393470764, "logps/chosen": -8.419219375355169e-05, "logps/rejected": -2.8877882957458496, "loss": 0.519, "nll_loss": 0.12974447011947632, "rewards/accuracies": 1.0, "rewards/chosen": -8.41921973915305e-06, "rewards/margins": 0.2887704074382782, "rewards/rejected": -0.2887788414955139, "step": 11324 }, { "epoch": 7.831950207468879, "grad_norm": 3.6690616607666016, "learning_rate": 1.2044721069617336e-05, "log_odds_chosen": 11.022123336791992, "log_odds_ratio": -4.5447537559084594e-05, "logits/chosen": -0.512175977230072, "logits/rejected": -0.6419370174407959, "logps/chosen": -0.00017706479411572218, "logps/rejected": -2.087106704711914, "loss": 0.3042, "nll_loss": 0.07603558897972107, "rewards/accuracies": 1.0, "rewards/chosen": -1.7706479411572218e-05, "rewards/margins": 0.20869295299053192, "rewards/rejected": -0.2087106555700302, "step": 11325 }, { "epoch": 7.832641770401106, "grad_norm": 10.011808395385742, "learning_rate": 1.2040879053327186e-05, "log_odds_chosen": 10.784045219421387, "log_odds_ratio": -7.086223195074126e-05, "logits/chosen": -0.5523691773414612, "logits/rejected": -0.4052152931690216, "logps/chosen": -0.0002722898207139224, "logps/rejected": -2.209475040435791, "loss": 0.4355, "nll_loss": 0.10886941105127335, "rewards/accuracies": 1.0, "rewards/chosen": -2.7228981707594357e-05, "rewards/margins": 0.22092029452323914, "rewards/rejected": -0.2209475338459015, "step": 11326 }, { "epoch": 7.833333333333333, "grad_norm": 4.256576061248779, "learning_rate": 1.2037037037037037e-05, "log_odds_chosen": 10.996548652648926, "log_odds_ratio": -5.791713920189068e-05, "logits/chosen": -0.4284515380859375, "logits/rejected": -0.5301926136016846, "logps/chosen": -0.00014487968292087317, "logps/rejected": -1.900244951248169, "loss": 0.3767, "nll_loss": 0.09417097270488739, "rewards/accuracies": 1.0, "rewards/chosen": -1.4487968655885197e-05, "rewards/margins": 0.19001001119613647, "rewards/rejected": -0.1900244951248169, "step": 11327 }, { "epoch": 7.83402489626556, "grad_norm": 4.8243632316589355, "learning_rate": 1.203319502074689e-05, "log_odds_chosen": 10.47626781463623, "log_odds_ratio": -0.00010160038073081523, "logits/chosen": -0.4039806127548218, "logits/rejected": -0.32478436827659607, "logps/chosen": -0.00019927705579902977, "logps/rejected": -1.7954881191253662, "loss": 0.4262, "nll_loss": 0.1065467894077301, "rewards/accuracies": 1.0, "rewards/chosen": -1.992770739889238e-05, "rewards/margins": 0.1795288771390915, "rewards/rejected": -0.17954879999160767, "step": 11328 }, { "epoch": 7.834716459197787, "grad_norm": 3.5320465564727783, "learning_rate": 1.2029353004456739e-05, "log_odds_chosen": 9.576192855834961, "log_odds_ratio": -0.0002370043657720089, "logits/chosen": -0.48754382133483887, "logits/rejected": -0.5002470016479492, "logps/chosen": -0.00023099326062947512, "logps/rejected": -0.8696978688240051, "loss": 0.2745, "nll_loss": 0.06859709322452545, "rewards/accuracies": 1.0, "rewards/chosen": -2.3099326426745392e-05, "rewards/margins": 0.08694669604301453, "rewards/rejected": -0.08696979284286499, "step": 11329 }, { "epoch": 7.8354080221300135, "grad_norm": 3.702303171157837, "learning_rate": 1.202551098816659e-05, "log_odds_chosen": 11.211159706115723, "log_odds_ratio": -3.524010753608309e-05, "logits/chosen": -0.4746825695037842, "logits/rejected": -0.6030235290527344, "logps/chosen": -0.00015717296628281474, "logps/rejected": -2.056490898132324, "loss": 0.3888, "nll_loss": 0.09720071405172348, "rewards/accuracies": 1.0, "rewards/chosen": -1.5717296264483593e-05, "rewards/margins": 0.20563337206840515, "rewards/rejected": -0.20564907789230347, "step": 11330 }, { "epoch": 7.83609958506224, "grad_norm": 4.379319190979004, "learning_rate": 1.2021668971876442e-05, "log_odds_chosen": 9.556562423706055, "log_odds_ratio": -0.00018041238945443183, "logits/chosen": -0.22800880670547485, "logits/rejected": -0.2613986134529114, "logps/chosen": -0.0013600703096017241, "logps/rejected": -1.9764504432678223, "loss": 0.464, "nll_loss": 0.11598940938711166, "rewards/accuracies": 1.0, "rewards/chosen": -0.00013600703096017241, "rewards/margins": 0.1975090503692627, "rewards/rejected": -0.19764505326747894, "step": 11331 }, { "epoch": 7.836791147994467, "grad_norm": 3.46706223487854, "learning_rate": 1.2017826955586292e-05, "log_odds_chosen": 10.588600158691406, "log_odds_ratio": -0.0007458007312379777, "logits/chosen": -0.1496710330247879, "logits/rejected": -0.21533580124378204, "logps/chosen": -0.0007539233192801476, "logps/rejected": -1.8086034059524536, "loss": 0.37, "nll_loss": 0.09242701530456543, "rewards/accuracies": 1.0, "rewards/chosen": -7.539233774878085e-05, "rewards/margins": 0.18078495562076569, "rewards/rejected": -0.18086032569408417, "step": 11332 }, { "epoch": 7.837482710926694, "grad_norm": 5.556495666503906, "learning_rate": 1.2013984939296143e-05, "log_odds_chosen": 10.02688217163086, "log_odds_ratio": -0.0004603726847562939, "logits/chosen": -0.47641515731811523, "logits/rejected": -0.5979164242744446, "logps/chosen": -0.000672024383675307, "logps/rejected": -2.213634967803955, "loss": 0.5368, "nll_loss": 0.13416364789009094, "rewards/accuracies": 1.0, "rewards/chosen": -6.720244709867984e-05, "rewards/margins": 0.2212963104248047, "rewards/rejected": -0.22136351466178894, "step": 11333 }, { "epoch": 7.838174273858921, "grad_norm": 5.558751583099365, "learning_rate": 1.2010142923005994e-05, "log_odds_chosen": 10.478246688842773, "log_odds_ratio": -0.00028890155954286456, "logits/chosen": -0.2527915835380554, "logits/rejected": -0.28625351190567017, "logps/chosen": -0.0002432153996778652, "logps/rejected": -2.022489309310913, "loss": 0.522, "nll_loss": 0.13046440482139587, "rewards/accuracies": 1.0, "rewards/chosen": -2.432153996778652e-05, "rewards/margins": 0.20222459733486176, "rewards/rejected": -0.2022489309310913, "step": 11334 }, { "epoch": 7.838865836791148, "grad_norm": 4.615329265594482, "learning_rate": 1.2006300906715845e-05, "log_odds_chosen": 12.855207443237305, "log_odds_ratio": -4.950938546244288e-06, "logits/chosen": -0.5159450769424438, "logits/rejected": -0.5952770709991455, "logps/chosen": -7.59071990614757e-05, "logps/rejected": -3.2945661544799805, "loss": 0.519, "nll_loss": 0.12975125014781952, "rewards/accuracies": 1.0, "rewards/chosen": -7.59072008804651e-06, "rewards/margins": 0.32944902777671814, "rewards/rejected": -0.3294565975666046, "step": 11335 }, { "epoch": 7.8395573997233745, "grad_norm": 3.821167230606079, "learning_rate": 1.2002458890425695e-05, "log_odds_chosen": 9.323131561279297, "log_odds_ratio": -0.0018681931542232633, "logits/chosen": -0.29701679944992065, "logits/rejected": -0.4008544683456421, "logps/chosen": -0.0014447573339566588, "logps/rejected": -1.4508062601089478, "loss": 0.4377, "nll_loss": 0.10923688858747482, "rewards/accuracies": 1.0, "rewards/chosen": -0.00014447573630604893, "rewards/margins": 0.14493615925312042, "rewards/rejected": -0.14508062601089478, "step": 11336 }, { "epoch": 7.840248962655601, "grad_norm": 4.55873966217041, "learning_rate": 1.1998616874135548e-05, "log_odds_chosen": 11.785822868347168, "log_odds_ratio": -1.9246745068812743e-05, "logits/chosen": -0.3788655996322632, "logits/rejected": -0.40937554836273193, "logps/chosen": -0.00016271023196168244, "logps/rejected": -2.8028204441070557, "loss": 0.4659, "nll_loss": 0.11648114025592804, "rewards/accuracies": 1.0, "rewards/chosen": -1.6271023923764005e-05, "rewards/margins": 0.28026577830314636, "rewards/rejected": -0.28028205037117004, "step": 11337 }, { "epoch": 7.840940525587828, "grad_norm": 3.9258370399475098, "learning_rate": 1.1994774857845397e-05, "log_odds_chosen": 11.954495429992676, "log_odds_ratio": -2.6376255846116692e-05, "logits/chosen": -0.40627315640449524, "logits/rejected": -0.5198106169700623, "logps/chosen": -0.00019579721265472472, "logps/rejected": -2.973041534423828, "loss": 0.5688, "nll_loss": 0.14218628406524658, "rewards/accuracies": 1.0, "rewards/chosen": -1.957972017407883e-05, "rewards/margins": 0.2972846031188965, "rewards/rejected": -0.2973041534423828, "step": 11338 }, { "epoch": 7.841632088520055, "grad_norm": 4.359646797180176, "learning_rate": 1.1990932841555248e-05, "log_odds_chosen": 10.56201171875, "log_odds_ratio": -0.0001327977515757084, "logits/chosen": -0.327246755361557, "logits/rejected": -0.3993592858314514, "logps/chosen": -0.0003068206424359232, "logps/rejected": -2.1173148155212402, "loss": 0.3493, "nll_loss": 0.08732067048549652, "rewards/accuracies": 1.0, "rewards/chosen": -3.0682065698783845e-05, "rewards/margins": 0.21170082688331604, "rewards/rejected": -0.21173149347305298, "step": 11339 }, { "epoch": 7.842323651452282, "grad_norm": 4.125725269317627, "learning_rate": 1.19870908252651e-05, "log_odds_chosen": 9.908230781555176, "log_odds_ratio": -0.00039624038618057966, "logits/chosen": -0.7032617330551147, "logits/rejected": -0.691260814666748, "logps/chosen": -0.0010323630413040519, "logps/rejected": -1.8295907974243164, "loss": 0.6721, "nll_loss": 0.16799086332321167, "rewards/accuracies": 1.0, "rewards/chosen": -0.00010323632159270346, "rewards/margins": 0.18285587430000305, "rewards/rejected": -0.18295909464359283, "step": 11340 }, { "epoch": 7.843015214384509, "grad_norm": 4.721843242645264, "learning_rate": 1.1983248808974951e-05, "log_odds_chosen": 11.312594413757324, "log_odds_ratio": -3.6237441236153245e-05, "logits/chosen": -0.5396626591682434, "logits/rejected": -0.5061202049255371, "logps/chosen": -0.00019042623171117157, "logps/rejected": -2.7666544914245605, "loss": 0.595, "nll_loss": 0.14875343441963196, "rewards/accuracies": 1.0, "rewards/chosen": -1.904262535390444e-05, "rewards/margins": 0.2766464054584503, "rewards/rejected": -0.27666544914245605, "step": 11341 }, { "epoch": 7.8437067773167355, "grad_norm": 4.29473352432251, "learning_rate": 1.1979406792684802e-05, "log_odds_chosen": 10.837879180908203, "log_odds_ratio": -0.0003252569295000285, "logits/chosen": -0.20344379544258118, "logits/rejected": -0.29107415676116943, "logps/chosen": -0.000317359488690272, "logps/rejected": -2.1068313121795654, "loss": 0.3754, "nll_loss": 0.0938163474202156, "rewards/accuracies": 1.0, "rewards/chosen": -3.1735948141431436e-05, "rewards/margins": 0.21065139770507812, "rewards/rejected": -0.21068313717842102, "step": 11342 }, { "epoch": 7.844398340248962, "grad_norm": 4.473525524139404, "learning_rate": 1.1975564776394652e-05, "log_odds_chosen": 11.027624130249023, "log_odds_ratio": -5.361594230635092e-05, "logits/chosen": -0.7446596026420593, "logits/rejected": -0.8093918561935425, "logps/chosen": -0.0003591571585275233, "logps/rejected": -2.716170310974121, "loss": 0.7645, "nll_loss": 0.191114604473114, "rewards/accuracies": 1.0, "rewards/chosen": -3.591571294236928e-05, "rewards/margins": 0.27158111333847046, "rewards/rejected": -0.27161702513694763, "step": 11343 }, { "epoch": 7.845089903181189, "grad_norm": 4.6528639793396, "learning_rate": 1.1971722760104503e-05, "log_odds_chosen": 10.873839378356934, "log_odds_ratio": -7.976902270456776e-05, "logits/chosen": -0.08298560976982117, "logits/rejected": -0.2112613320350647, "logps/chosen": -0.0002032502816291526, "logps/rejected": -2.1599695682525635, "loss": 0.8459, "nll_loss": 0.21146902441978455, "rewards/accuracies": 1.0, "rewards/chosen": -2.0325027435319498e-05, "rewards/margins": 0.21597662568092346, "rewards/rejected": -0.21599695086479187, "step": 11344 }, { "epoch": 7.845781466113416, "grad_norm": 3.8563454151153564, "learning_rate": 1.1967880743814354e-05, "log_odds_chosen": 10.505448341369629, "log_odds_ratio": -0.00021720759104937315, "logits/chosen": -0.21184486150741577, "logits/rejected": -0.3287862539291382, "logps/chosen": -0.000897840247489512, "logps/rejected": -3.0171875953674316, "loss": 0.3882, "nll_loss": 0.09702227264642715, "rewards/accuracies": 1.0, "rewards/chosen": -8.978402183856815e-05, "rewards/margins": 0.3016289472579956, "rewards/rejected": -0.30171874165534973, "step": 11345 }, { "epoch": 7.846473029045643, "grad_norm": 3.9811694622039795, "learning_rate": 1.1964038727524206e-05, "log_odds_chosen": 10.677752494812012, "log_odds_ratio": -8.449415327049792e-05, "logits/chosen": -0.6144614219665527, "logits/rejected": -0.6651566624641418, "logps/chosen": -0.00035767414374276996, "logps/rejected": -2.4102203845977783, "loss": 0.4091, "nll_loss": 0.10226333886384964, "rewards/accuracies": 1.0, "rewards/chosen": -3.5767414374276996e-05, "rewards/margins": 0.24098627269268036, "rewards/rejected": -0.2410220354795456, "step": 11346 }, { "epoch": 7.84716459197787, "grad_norm": 3.5016424655914307, "learning_rate": 1.1960196711234057e-05, "log_odds_chosen": 11.957420349121094, "log_odds_ratio": -2.618016878841445e-05, "logits/chosen": -0.2839650809764862, "logits/rejected": -0.24141938984394073, "logps/chosen": -0.00011954591900575906, "logps/rejected": -2.8302571773529053, "loss": 0.3573, "nll_loss": 0.08932714909315109, "rewards/accuracies": 1.0, "rewards/chosen": -1.1954592082474846e-05, "rewards/margins": 0.2830137610435486, "rewards/rejected": -0.28302574157714844, "step": 11347 }, { "epoch": 7.8478561549100965, "grad_norm": 2.5719215869903564, "learning_rate": 1.1956354694943906e-05, "log_odds_chosen": 10.805449485778809, "log_odds_ratio": -6.877508712932467e-05, "logits/chosen": -0.31896281242370605, "logits/rejected": -0.31459271907806396, "logps/chosen": -0.00021419592667371035, "logps/rejected": -1.801652431488037, "loss": 0.2996, "nll_loss": 0.0749046728014946, "rewards/accuracies": 1.0, "rewards/chosen": -2.1419591575977392e-05, "rewards/margins": 0.1801438331604004, "rewards/rejected": -0.18016526103019714, "step": 11348 }, { "epoch": 7.848547717842323, "grad_norm": 3.6286022663116455, "learning_rate": 1.1952512678653759e-05, "log_odds_chosen": 11.099205017089844, "log_odds_ratio": -4.535958214546554e-05, "logits/chosen": 0.25572267174720764, "logits/rejected": 0.12598130106925964, "logps/chosen": -0.00010003315401263535, "logps/rejected": -1.739362120628357, "loss": 0.6519, "nll_loss": 0.1629638522863388, "rewards/accuracies": 1.0, "rewards/chosen": -1.0003315765061416e-05, "rewards/margins": 0.1739262342453003, "rewards/rejected": -0.17393621802330017, "step": 11349 }, { "epoch": 7.84923928077455, "grad_norm": 4.637201309204102, "learning_rate": 1.194867066236361e-05, "log_odds_chosen": 11.232458114624023, "log_odds_ratio": -3.946627475670539e-05, "logits/chosen": -0.4672451615333557, "logits/rejected": -0.48350343108177185, "logps/chosen": -5.887038423679769e-05, "logps/rejected": -1.6665010452270508, "loss": 0.5222, "nll_loss": 0.13054095208644867, "rewards/accuracies": 1.0, "rewards/chosen": -5.887038241780829e-06, "rewards/margins": 0.16664421558380127, "rewards/rejected": -0.16665011644363403, "step": 11350 }, { "epoch": 7.849930843706777, "grad_norm": 6.867190361022949, "learning_rate": 1.194482864607346e-05, "log_odds_chosen": 10.638004302978516, "log_odds_ratio": -4.492142761591822e-05, "logits/chosen": -0.371229887008667, "logits/rejected": -0.43291574716567993, "logps/chosen": -0.00024072341329883784, "logps/rejected": -2.168043851852417, "loss": 0.4847, "nll_loss": 0.1211802065372467, "rewards/accuracies": 1.0, "rewards/chosen": -2.4072340238490142e-05, "rewards/margins": 0.21678031980991364, "rewards/rejected": -0.2168044000864029, "step": 11351 }, { "epoch": 7.850622406639004, "grad_norm": 5.103278636932373, "learning_rate": 1.1940986629783311e-05, "log_odds_chosen": 11.732542037963867, "log_odds_ratio": -2.349566057091579e-05, "logits/chosen": -0.06722603738307953, "logits/rejected": -0.14607380330562592, "logps/chosen": -0.00017894791380967945, "logps/rejected": -3.0719339847564697, "loss": 0.5151, "nll_loss": 0.12876620888710022, "rewards/accuracies": 1.0, "rewards/chosen": -1.7894790289574303e-05, "rewards/margins": 0.30717551708221436, "rewards/rejected": -0.307193398475647, "step": 11352 }, { "epoch": 7.851313969571231, "grad_norm": 3.148937225341797, "learning_rate": 1.1937144613493162e-05, "log_odds_chosen": 9.59694766998291, "log_odds_ratio": -0.00046361677232198417, "logits/chosen": -0.16655105352401733, "logits/rejected": -0.22968561947345734, "logps/chosen": -0.0014130291528999805, "logps/rejected": -1.6357417106628418, "loss": 0.4493, "nll_loss": 0.11227520555257797, "rewards/accuracies": 1.0, "rewards/chosen": -0.00014130290946923196, "rewards/margins": 0.16343286633491516, "rewards/rejected": -0.1635741889476776, "step": 11353 }, { "epoch": 7.8520055325034575, "grad_norm": 2.4725232124328613, "learning_rate": 1.1933302597203012e-05, "log_odds_chosen": 10.359973907470703, "log_odds_ratio": -7.92969367466867e-05, "logits/chosen": -0.25961601734161377, "logits/rejected": -0.32079118490219116, "logps/chosen": -0.0001499430218245834, "logps/rejected": -1.4932595491409302, "loss": 0.2224, "nll_loss": 0.0555829294025898, "rewards/accuracies": 1.0, "rewards/chosen": -1.4994302546256222e-05, "rewards/margins": 0.14931097626686096, "rewards/rejected": -0.14932596683502197, "step": 11354 }, { "epoch": 7.852697095435684, "grad_norm": 5.656296730041504, "learning_rate": 1.1929460580912865e-05, "log_odds_chosen": 10.169926643371582, "log_odds_ratio": -0.000213885388802737, "logits/chosen": -0.06691788136959076, "logits/rejected": -0.08190090954303741, "logps/chosen": -0.004533851984888315, "logps/rejected": -2.7446389198303223, "loss": 0.3907, "nll_loss": 0.0976482480764389, "rewards/accuracies": 1.0, "rewards/chosen": -0.00045338517520576715, "rewards/margins": 0.2740105390548706, "rewards/rejected": -0.2744638919830322, "step": 11355 }, { "epoch": 7.853388658367911, "grad_norm": 8.513496398925781, "learning_rate": 1.1925618564622716e-05, "log_odds_chosen": 10.300134658813477, "log_odds_ratio": -0.00021386246953625232, "logits/chosen": -0.4488566517829895, "logits/rejected": -0.49245864152908325, "logps/chosen": -0.00041282945312559605, "logps/rejected": -2.0244665145874023, "loss": 0.5869, "nll_loss": 0.14670827984809875, "rewards/accuracies": 1.0, "rewards/chosen": -4.128294676775113e-05, "rewards/margins": 0.2024053931236267, "rewards/rejected": -0.20244666934013367, "step": 11356 }, { "epoch": 7.854080221300138, "grad_norm": 5.423783779144287, "learning_rate": 1.1921776548332565e-05, "log_odds_chosen": 11.50162124633789, "log_odds_ratio": -3.0906550819054246e-05, "logits/chosen": -0.38133561611175537, "logits/rejected": -0.5150085091590881, "logps/chosen": -0.00015852053184062243, "logps/rejected": -2.555122137069702, "loss": 0.5308, "nll_loss": 0.13269172608852386, "rewards/accuracies": 1.0, "rewards/chosen": -1.5852054275455885e-05, "rewards/margins": 0.2554963529109955, "rewards/rejected": -0.25551217794418335, "step": 11357 }, { "epoch": 7.854771784232365, "grad_norm": 7.287617206573486, "learning_rate": 1.1917934532042417e-05, "log_odds_chosen": 12.808563232421875, "log_odds_ratio": -9.047294952324592e-06, "logits/chosen": -0.13653582334518433, "logits/rejected": -0.24647362530231476, "logps/chosen": -8.96514393389225e-05, "logps/rejected": -3.2580180168151855, "loss": 0.5415, "nll_loss": 0.13537272810935974, "rewards/accuracies": 1.0, "rewards/chosen": -8.96514393389225e-06, "rewards/margins": 0.3257928788661957, "rewards/rejected": -0.3258018493652344, "step": 11358 }, { "epoch": 7.855463347164592, "grad_norm": 5.785372734069824, "learning_rate": 1.1914092515752268e-05, "log_odds_chosen": 10.555879592895508, "log_odds_ratio": -0.00010297319386154413, "logits/chosen": -0.3700430393218994, "logits/rejected": -0.39971691370010376, "logps/chosen": -0.0007725593168288469, "logps/rejected": -1.972679853439331, "loss": 0.3809, "nll_loss": 0.0952107161283493, "rewards/accuracies": 1.0, "rewards/chosen": -7.725592877250165e-05, "rewards/margins": 0.19719073176383972, "rewards/rejected": -0.19726797938346863, "step": 11359 }, { "epoch": 7.856154910096818, "grad_norm": 12.34350872039795, "learning_rate": 1.1910250499462119e-05, "log_odds_chosen": 11.76555061340332, "log_odds_ratio": -2.7817648515338078e-05, "logits/chosen": -0.3621176481246948, "logits/rejected": -0.19518381357192993, "logps/chosen": -0.00010538271453697234, "logps/rejected": -2.216698169708252, "loss": 0.7454, "nll_loss": 0.18635067343711853, "rewards/accuracies": 1.0, "rewards/chosen": -1.0538271453697234e-05, "rewards/margins": 0.22165925800800323, "rewards/rejected": -0.22166979312896729, "step": 11360 }, { "epoch": 7.856846473029045, "grad_norm": 3.945906639099121, "learning_rate": 1.190640848317197e-05, "log_odds_chosen": 10.77137565612793, "log_odds_ratio": -0.00023443363897968084, "logits/chosen": -0.5106649398803711, "logits/rejected": -0.676784873008728, "logps/chosen": -0.0003039441944565624, "logps/rejected": -2.170654058456421, "loss": 0.389, "nll_loss": 0.09721540659666061, "rewards/accuracies": 1.0, "rewards/chosen": -3.039441980945412e-05, "rewards/margins": 0.21703502535820007, "rewards/rejected": -0.21706542372703552, "step": 11361 }, { "epoch": 7.857538035961272, "grad_norm": 6.773676872253418, "learning_rate": 1.190256646688182e-05, "log_odds_chosen": 10.748955726623535, "log_odds_ratio": -0.0003274211485404521, "logits/chosen": -0.5217385292053223, "logits/rejected": -0.6142206192016602, "logps/chosen": -0.0005004443228244781, "logps/rejected": -2.384121894836426, "loss": 0.6019, "nll_loss": 0.15044429898262024, "rewards/accuracies": 1.0, "rewards/chosen": -5.0044436648022383e-05, "rewards/margins": 0.2383621484041214, "rewards/rejected": -0.23841220140457153, "step": 11362 }, { "epoch": 7.858229598893499, "grad_norm": 4.326676845550537, "learning_rate": 1.189872445059167e-05, "log_odds_chosen": 11.014705657958984, "log_odds_ratio": -3.578926043701358e-05, "logits/chosen": -0.1777215152978897, "logits/rejected": -0.33752867579460144, "logps/chosen": -0.00015030778013169765, "logps/rejected": -1.9089435338974, "loss": 0.5565, "nll_loss": 0.13911172747612, "rewards/accuracies": 1.0, "rewards/chosen": -1.5030776921776123e-05, "rewards/margins": 0.19087931513786316, "rewards/rejected": -0.19089436531066895, "step": 11363 }, { "epoch": 7.858921161825726, "grad_norm": 7.081151485443115, "learning_rate": 1.1894882434301522e-05, "log_odds_chosen": 10.904725074768066, "log_odds_ratio": -4.0231516322819516e-05, "logits/chosen": -0.6134161949157715, "logits/rejected": -0.5148768424987793, "logps/chosen": -0.00023824315576348454, "logps/rejected": -2.381795644760132, "loss": 0.3085, "nll_loss": 0.07712505757808685, "rewards/accuracies": 1.0, "rewards/chosen": -2.3824315576348454e-05, "rewards/margins": 0.23815575242042542, "rewards/rejected": -0.23817956447601318, "step": 11364 }, { "epoch": 7.8596127247579535, "grad_norm": 5.808526039123535, "learning_rate": 1.1891040418011374e-05, "log_odds_chosen": 11.700054168701172, "log_odds_ratio": -2.5583209207979962e-05, "logits/chosen": -0.04865068197250366, "logits/rejected": -0.04677605628967285, "logps/chosen": -0.00012595993757713586, "logps/rejected": -2.695266008377075, "loss": 0.6089, "nll_loss": 0.1522296518087387, "rewards/accuracies": 1.0, "rewards/chosen": -1.2595994121511467e-05, "rewards/margins": 0.2695139944553375, "rewards/rejected": -0.2695266008377075, "step": 11365 }, { "epoch": 7.86030428769018, "grad_norm": 3.7067861557006836, "learning_rate": 1.1887198401721223e-05, "log_odds_chosen": 10.165199279785156, "log_odds_ratio": -0.0002667968219611794, "logits/chosen": -0.048863768577575684, "logits/rejected": -0.08387540280818939, "logps/chosen": -0.00018476907280273736, "logps/rejected": -1.482744812965393, "loss": 0.4206, "nll_loss": 0.1051340401172638, "rewards/accuracies": 1.0, "rewards/chosen": -1.847690873546526e-05, "rewards/margins": 0.14825600385665894, "rewards/rejected": -0.1482744812965393, "step": 11366 }, { "epoch": 7.860995850622407, "grad_norm": 9.02211856842041, "learning_rate": 1.1883356385431074e-05, "log_odds_chosen": 11.678213119506836, "log_odds_ratio": -1.4590928913094103e-05, "logits/chosen": -0.4199620187282562, "logits/rejected": -0.37431225180625916, "logps/chosen": -9.888997010421008e-05, "logps/rejected": -2.3296117782592773, "loss": 0.4223, "nll_loss": 0.10557062923908234, "rewards/accuracies": 1.0, "rewards/chosen": -9.88899773801677e-06, "rewards/margins": 0.23295128345489502, "rewards/rejected": -0.23296119272708893, "step": 11367 }, { "epoch": 7.861687413554634, "grad_norm": 5.323549747467041, "learning_rate": 1.1879514369140926e-05, "log_odds_chosen": 10.693358421325684, "log_odds_ratio": -5.926968879066408e-05, "logits/chosen": 0.18157647550106049, "logits/rejected": 0.08172719180583954, "logps/chosen": -0.0004564730334095657, "logps/rejected": -2.4843926429748535, "loss": 0.4597, "nll_loss": 0.11491090804338455, "rewards/accuracies": 1.0, "rewards/chosen": -4.564730261336081e-05, "rewards/margins": 0.24839362502098083, "rewards/rejected": -0.24843928217887878, "step": 11368 }, { "epoch": 7.862378976486861, "grad_norm": 6.065701961517334, "learning_rate": 1.1875672352850777e-05, "log_odds_chosen": 10.91854476928711, "log_odds_ratio": -8.575063839089125e-05, "logits/chosen": -0.4183662533760071, "logits/rejected": -0.4504457712173462, "logps/chosen": -0.0009298054501414299, "logps/rejected": -2.360944986343384, "loss": 0.6333, "nll_loss": 0.1583259552717209, "rewards/accuracies": 1.0, "rewards/chosen": -9.298054646933451e-05, "rewards/margins": 0.23600150644779205, "rewards/rejected": -0.23609450459480286, "step": 11369 }, { "epoch": 7.863070539419088, "grad_norm": 5.207459449768066, "learning_rate": 1.1871830336560628e-05, "log_odds_chosen": 10.09914779663086, "log_odds_ratio": -0.00018985196948051453, "logits/chosen": -0.2728763222694397, "logits/rejected": -0.29989945888519287, "logps/chosen": -0.0006764763966202736, "logps/rejected": -1.8139231204986572, "loss": 0.4903, "nll_loss": 0.12256253510713577, "rewards/accuracies": 1.0, "rewards/chosen": -6.764764111721888e-05, "rewards/margins": 0.18132466077804565, "rewards/rejected": -0.18139231204986572, "step": 11370 }, { "epoch": 7.8637621023513145, "grad_norm": 4.561739921569824, "learning_rate": 1.1867988320270478e-05, "log_odds_chosen": 10.708756446838379, "log_odds_ratio": -0.00019307366164866835, "logits/chosen": -0.18084324896335602, "logits/rejected": -0.2568657100200653, "logps/chosen": -0.0005610042135231197, "logps/rejected": -2.330329179763794, "loss": 0.5474, "nll_loss": 0.13683900237083435, "rewards/accuracies": 1.0, "rewards/chosen": -5.6100419897120446e-05, "rewards/margins": 0.23297682404518127, "rewards/rejected": -0.23303291201591492, "step": 11371 }, { "epoch": 7.864453665283541, "grad_norm": 4.787899494171143, "learning_rate": 1.186414630398033e-05, "log_odds_chosen": 10.986349105834961, "log_odds_ratio": -5.588722706306726e-05, "logits/chosen": -0.3971659541130066, "logits/rejected": -0.41987258195877075, "logps/chosen": -0.00015680750948376954, "logps/rejected": -1.9517314434051514, "loss": 0.5272, "nll_loss": 0.1318061649799347, "rewards/accuracies": 1.0, "rewards/chosen": -1.5680752767366357e-05, "rewards/margins": 0.1951574832201004, "rewards/rejected": -0.19517315924167633, "step": 11372 }, { "epoch": 7.865145228215768, "grad_norm": 4.8560285568237305, "learning_rate": 1.186030428769018e-05, "log_odds_chosen": 10.537019729614258, "log_odds_ratio": -0.0001434181467629969, "logits/chosen": -0.12688449025154114, "logits/rejected": -0.23663949966430664, "logps/chosen": -0.0009801515843719244, "logps/rejected": -2.249166488647461, "loss": 0.4461, "nll_loss": 0.11150926351547241, "rewards/accuracies": 1.0, "rewards/chosen": -9.801516716834158e-05, "rewards/margins": 0.2248186469078064, "rewards/rejected": -0.22491665184497833, "step": 11373 }, { "epoch": 7.865836791147995, "grad_norm": 3.163818359375, "learning_rate": 1.1856462271400032e-05, "log_odds_chosen": 10.34457015991211, "log_odds_ratio": -0.0002107964246533811, "logits/chosen": -0.2862035930156708, "logits/rejected": -0.2810376286506653, "logps/chosen": -0.0007940607611089945, "logps/rejected": -1.9568583965301514, "loss": 0.5205, "nll_loss": 0.13010820746421814, "rewards/accuracies": 1.0, "rewards/chosen": -7.940607611089945e-05, "rewards/margins": 0.19560644030570984, "rewards/rejected": -0.19568583369255066, "step": 11374 }, { "epoch": 7.866528354080222, "grad_norm": 3.7396209239959717, "learning_rate": 1.1852620255109881e-05, "log_odds_chosen": 11.540573120117188, "log_odds_ratio": -2.3659078578930348e-05, "logits/chosen": -0.1099567711353302, "logits/rejected": -0.20732180774211884, "logps/chosen": -0.00012644784874282777, "logps/rejected": -2.3760976791381836, "loss": 0.3376, "nll_loss": 0.08440861850976944, "rewards/accuracies": 1.0, "rewards/chosen": -1.2644785783777479e-05, "rewards/margins": 0.23759713768959045, "rewards/rejected": -0.23760978877544403, "step": 11375 }, { "epoch": 7.867219917012449, "grad_norm": 5.468509197235107, "learning_rate": 1.1848778238819732e-05, "log_odds_chosen": 11.563232421875, "log_odds_ratio": -3.333418135298416e-05, "logits/chosen": -0.07910732924938202, "logits/rejected": -0.14191415905952454, "logps/chosen": -0.00014625617768615484, "logps/rejected": -2.604708194732666, "loss": 0.5986, "nll_loss": 0.1496472954750061, "rewards/accuracies": 1.0, "rewards/chosen": -1.4625617950514425e-05, "rewards/margins": 0.2604562044143677, "rewards/rejected": -0.26047080755233765, "step": 11376 }, { "epoch": 7.867911479944675, "grad_norm": 11.16819953918457, "learning_rate": 1.1844936222529585e-05, "log_odds_chosen": 10.969552040100098, "log_odds_ratio": -0.0002762842341326177, "logits/chosen": -0.4043792486190796, "logits/rejected": -0.5222166180610657, "logps/chosen": -0.0009811146883293986, "logps/rejected": -2.2527077198028564, "loss": 0.4811, "nll_loss": 0.12023838609457016, "rewards/accuracies": 1.0, "rewards/chosen": -9.811147174332291e-05, "rewards/margins": 0.22517266869544983, "rewards/rejected": -0.22527077794075012, "step": 11377 }, { "epoch": 7.868603042876902, "grad_norm": 3.129385232925415, "learning_rate": 1.1841094206239435e-05, "log_odds_chosen": 10.737401008605957, "log_odds_ratio": -5.994061575620435e-05, "logits/chosen": -0.6261686086654663, "logits/rejected": -0.6428536176681519, "logps/chosen": -0.0005035304930061102, "logps/rejected": -2.0155394077301025, "loss": 0.3526, "nll_loss": 0.08814893662929535, "rewards/accuracies": 1.0, "rewards/chosen": -5.035305002820678e-05, "rewards/margins": 0.20150357484817505, "rewards/rejected": -0.20155392587184906, "step": 11378 }, { "epoch": 7.869294605809129, "grad_norm": 3.615654468536377, "learning_rate": 1.1837252189949286e-05, "log_odds_chosen": 9.873108863830566, "log_odds_ratio": -0.000126995742903091, "logits/chosen": -0.11937469989061356, "logits/rejected": -0.16411955654621124, "logps/chosen": -0.00027166143991053104, "logps/rejected": -1.670121192932129, "loss": 0.7555, "nll_loss": 0.18886922299861908, "rewards/accuracies": 1.0, "rewards/chosen": -2.716614653763827e-05, "rewards/margins": 0.16698496043682098, "rewards/rejected": -0.16701211035251617, "step": 11379 }, { "epoch": 7.869986168741356, "grad_norm": 59.96883773803711, "learning_rate": 1.1833410173659137e-05, "log_odds_chosen": 8.984199523925781, "log_odds_ratio": -1.482587456703186, "logits/chosen": -0.24020972847938538, "logits/rejected": -0.35119855403900146, "logps/chosen": -0.23727771639823914, "logps/rejected": -1.865845799446106, "loss": 1.033, "nll_loss": 0.1099969893693924, "rewards/accuracies": 0.875, "rewards/chosen": -0.023727772757411003, "rewards/margins": 0.1628568172454834, "rewards/rejected": -0.18658457696437836, "step": 11380 }, { "epoch": 7.870677731673583, "grad_norm": 3.3628134727478027, "learning_rate": 1.1829568157368988e-05, "log_odds_chosen": 10.520112037658691, "log_odds_ratio": -0.00017830124124884605, "logits/chosen": -0.5980362296104431, "logits/rejected": -0.6402428150177002, "logps/chosen": -0.0003178414481226355, "logps/rejected": -2.241232395172119, "loss": 0.3839, "nll_loss": 0.09596016258001328, "rewards/accuracies": 1.0, "rewards/chosen": -3.178414408466779e-05, "rewards/margins": 0.22409147024154663, "rewards/rejected": -0.2241232544183731, "step": 11381 }, { "epoch": 7.87136929460581, "grad_norm": 5.686932563781738, "learning_rate": 1.1825726141078838e-05, "log_odds_chosen": 12.08934211730957, "log_odds_ratio": -7.481579814339057e-06, "logits/chosen": 0.09978464990854263, "logits/rejected": 0.06304813921451569, "logps/chosen": -0.0002518608816899359, "logps/rejected": -3.102248430252075, "loss": 0.4265, "nll_loss": 0.10661499202251434, "rewards/accuracies": 1.0, "rewards/chosen": -2.518608744139783e-05, "rewards/margins": 0.31019964814186096, "rewards/rejected": -0.31022483110427856, "step": 11382 }, { "epoch": 7.872060857538036, "grad_norm": 4.011767864227295, "learning_rate": 1.1821884124788691e-05, "log_odds_chosen": 11.806448936462402, "log_odds_ratio": -2.2543908926309086e-05, "logits/chosen": -0.8501976728439331, "logits/rejected": -0.9305237531661987, "logps/chosen": -7.412520062644035e-05, "logps/rejected": -2.3156652450561523, "loss": 0.4158, "nll_loss": 0.10394752770662308, "rewards/accuracies": 1.0, "rewards/chosen": -7.412520062644035e-06, "rewards/margins": 0.2315591275691986, "rewards/rejected": -0.23156653344631195, "step": 11383 }, { "epoch": 7.872752420470263, "grad_norm": 3.638526439666748, "learning_rate": 1.181804210849854e-05, "log_odds_chosen": 9.84005355834961, "log_odds_ratio": -0.0012426018947735429, "logits/chosen": -0.3885806202888489, "logits/rejected": -0.2935905456542969, "logps/chosen": -0.000531262659933418, "logps/rejected": -1.8630025386810303, "loss": 0.6088, "nll_loss": 0.15207722783088684, "rewards/accuracies": 1.0, "rewards/chosen": -5.3126266720937565e-05, "rewards/margins": 0.18624712526798248, "rewards/rejected": -0.18630024790763855, "step": 11384 }, { "epoch": 7.87344398340249, "grad_norm": 4.375177383422852, "learning_rate": 1.181420009220839e-05, "log_odds_chosen": 10.815048217773438, "log_odds_ratio": -3.960235699196346e-05, "logits/chosen": -0.04029744863510132, "logits/rejected": -0.00986124575138092, "logps/chosen": -0.00011397508205845952, "logps/rejected": -1.61173415184021, "loss": 0.4739, "nll_loss": 0.11848108470439911, "rewards/accuracies": 1.0, "rewards/chosen": -1.1397508387744892e-05, "rewards/margins": 0.16116203367710114, "rewards/rejected": -0.16117341816425323, "step": 11385 }, { "epoch": 7.874135546334717, "grad_norm": 6.68137264251709, "learning_rate": 1.1810358075918243e-05, "log_odds_chosen": 10.267988204956055, "log_odds_ratio": -9.309072629548609e-05, "logits/chosen": -0.5029177069664001, "logits/rejected": -0.4623737335205078, "logps/chosen": -0.0002573465171735734, "logps/rejected": -1.8290235996246338, "loss": 0.4549, "nll_loss": 0.11370395123958588, "rewards/accuracies": 1.0, "rewards/chosen": -2.573465280875098e-05, "rewards/margins": 0.18287664651870728, "rewards/rejected": -0.18290236592292786, "step": 11386 }, { "epoch": 7.874827109266944, "grad_norm": 4.482590675354004, "learning_rate": 1.1806516059628094e-05, "log_odds_chosen": 11.114848136901855, "log_odds_ratio": -0.00025937389000318944, "logits/chosen": -0.4152233302593231, "logits/rejected": -0.5423761606216431, "logps/chosen": -0.0001527878048364073, "logps/rejected": -2.3076977729797363, "loss": 0.4969, "nll_loss": 0.12419218569993973, "rewards/accuracies": 1.0, "rewards/chosen": -1.5278781575034373e-05, "rewards/margins": 0.2307545244693756, "rewards/rejected": -0.2307698130607605, "step": 11387 }, { "epoch": 7.875518672199171, "grad_norm": 5.104621410369873, "learning_rate": 1.1802674043337945e-05, "log_odds_chosen": 11.1847562789917, "log_odds_ratio": -1.6824447811814025e-05, "logits/chosen": -0.5179380774497986, "logits/rejected": -0.5166718363761902, "logps/chosen": -0.00022529246052727103, "logps/rejected": -2.265241861343384, "loss": 0.4431, "nll_loss": 0.11076471954584122, "rewards/accuracies": 1.0, "rewards/chosen": -2.2529246052727103e-05, "rewards/margins": 0.2265016734600067, "rewards/rejected": -0.22652418911457062, "step": 11388 }, { "epoch": 7.876210235131397, "grad_norm": 4.958249092102051, "learning_rate": 1.1798832027047795e-05, "log_odds_chosen": 11.588720321655273, "log_odds_ratio": -1.3179649613448419e-05, "logits/chosen": -0.27237236499786377, "logits/rejected": -0.2907797694206238, "logps/chosen": -0.00013127666898071766, "logps/rejected": -2.4104108810424805, "loss": 0.4595, "nll_loss": 0.11488554626703262, "rewards/accuracies": 1.0, "rewards/chosen": -1.3127668353263289e-05, "rewards/margins": 0.24102795124053955, "rewards/rejected": -0.24104109406471252, "step": 11389 }, { "epoch": 7.876901798063624, "grad_norm": 4.1554412841796875, "learning_rate": 1.1794990010757646e-05, "log_odds_chosen": 11.154207229614258, "log_odds_ratio": -3.690812081913464e-05, "logits/chosen": -0.46767714619636536, "logits/rejected": -0.6114166975021362, "logps/chosen": -0.00019952871662098914, "logps/rejected": -2.374128818511963, "loss": 0.5106, "nll_loss": 0.12764978408813477, "rewards/accuracies": 1.0, "rewards/chosen": -1.9952873117290437e-05, "rewards/margins": 0.23739290237426758, "rewards/rejected": -0.23741286993026733, "step": 11390 }, { "epoch": 7.877593360995851, "grad_norm": 4.927827835083008, "learning_rate": 1.1791147994467497e-05, "log_odds_chosen": 10.432454109191895, "log_odds_ratio": -0.000577417784370482, "logits/chosen": -0.5227782726287842, "logits/rejected": -0.4943715035915375, "logps/chosen": -0.0006076360587030649, "logps/rejected": -1.9413719177246094, "loss": 0.4588, "nll_loss": 0.11463840305805206, "rewards/accuracies": 1.0, "rewards/chosen": -6.0763602959923446e-05, "rewards/margins": 0.19407644867897034, "rewards/rejected": -0.19413720071315765, "step": 11391 }, { "epoch": 7.878284923928078, "grad_norm": 8.029701232910156, "learning_rate": 1.178730597817735e-05, "log_odds_chosen": 10.086861610412598, "log_odds_ratio": -0.0001571264147059992, "logits/chosen": -0.3266010582447052, "logits/rejected": -0.41374966502189636, "logps/chosen": -0.0011758707696571946, "logps/rejected": -1.8890432119369507, "loss": 0.5781, "nll_loss": 0.14450611174106598, "rewards/accuracies": 1.0, "rewards/chosen": -0.0001175870856968686, "rewards/margins": 0.18878674507141113, "rewards/rejected": -0.1889043152332306, "step": 11392 }, { "epoch": 7.878976486860305, "grad_norm": 4.888491153717041, "learning_rate": 1.1783463961887198e-05, "log_odds_chosen": 11.464473724365234, "log_odds_ratio": -1.954937215487007e-05, "logits/chosen": -0.6128780841827393, "logits/rejected": -0.7605924606323242, "logps/chosen": -0.00020476209465414286, "logps/rejected": -2.521747589111328, "loss": 0.3513, "nll_loss": 0.08781325817108154, "rewards/accuracies": 1.0, "rewards/chosen": -2.047621092060581e-05, "rewards/margins": 0.25215429067611694, "rewards/rejected": -0.2521747648715973, "step": 11393 }, { "epoch": 7.8796680497925315, "grad_norm": 4.10215950012207, "learning_rate": 1.1779621945597049e-05, "log_odds_chosen": 12.365216255187988, "log_odds_ratio": -9.213494195137173e-06, "logits/chosen": 0.05360027775168419, "logits/rejected": 0.00046793476212769747, "logps/chosen": -0.00018678676860872656, "logps/rejected": -3.1028411388397217, "loss": 0.3805, "nll_loss": 0.09511671960353851, "rewards/accuracies": 1.0, "rewards/chosen": -1.867867831606418e-05, "rewards/margins": 0.3102654218673706, "rewards/rejected": -0.3102841377258301, "step": 11394 }, { "epoch": 7.880359612724758, "grad_norm": 3.1337502002716064, "learning_rate": 1.17757799293069e-05, "log_odds_chosen": 11.70418643951416, "log_odds_ratio": -1.843475729401689e-05, "logits/chosen": -0.10929186642169952, "logits/rejected": -0.029111243784427643, "logps/chosen": -0.0001291261869482696, "logps/rejected": -2.3684635162353516, "loss": 0.3893, "nll_loss": 0.09732114523649216, "rewards/accuracies": 1.0, "rewards/chosen": -1.291261833102908e-05, "rewards/margins": 0.23683345317840576, "rewards/rejected": -0.23684635758399963, "step": 11395 }, { "epoch": 7.881051175656985, "grad_norm": 3.5384857654571533, "learning_rate": 1.1771937913016752e-05, "log_odds_chosen": 11.739799499511719, "log_odds_ratio": -1.185938799608266e-05, "logits/chosen": 0.07834568619728088, "logits/rejected": -0.056670159101486206, "logps/chosen": -0.0001279369171243161, "logps/rejected": -2.635584592819214, "loss": 0.4849, "nll_loss": 0.12123227119445801, "rewards/accuracies": 1.0, "rewards/chosen": -1.2793692803825252e-05, "rewards/margins": 0.2635456621646881, "rewards/rejected": -0.26355844736099243, "step": 11396 }, { "epoch": 7.881742738589212, "grad_norm": 3.704191207885742, "learning_rate": 1.1768095896726603e-05, "log_odds_chosen": 9.989892959594727, "log_odds_ratio": -0.00014886785356793553, "logits/chosen": 0.043805141001939774, "logits/rejected": -0.012445596978068352, "logps/chosen": -0.00040336043457500637, "logps/rejected": -1.742424726486206, "loss": 0.2857, "nll_loss": 0.07141406834125519, "rewards/accuracies": 1.0, "rewards/chosen": -4.0336042729904875e-05, "rewards/margins": 0.17420212924480438, "rewards/rejected": -0.17424246668815613, "step": 11397 }, { "epoch": 7.882434301521439, "grad_norm": 4.47479772567749, "learning_rate": 1.1764253880436454e-05, "log_odds_chosen": 11.427066802978516, "log_odds_ratio": -4.571495810523629e-05, "logits/chosen": -0.20953892171382904, "logits/rejected": -0.324074387550354, "logps/chosen": -0.00020148059411440045, "logps/rejected": -2.3615403175354004, "loss": 0.5508, "nll_loss": 0.13769517838954926, "rewards/accuracies": 1.0, "rewards/chosen": -2.0148057956248522e-05, "rewards/margins": 0.2361339032649994, "rewards/rejected": -0.23615404963493347, "step": 11398 }, { "epoch": 7.883125864453666, "grad_norm": 5.660167694091797, "learning_rate": 1.1760411864146305e-05, "log_odds_chosen": 10.79905891418457, "log_odds_ratio": -0.00018808482855092734, "logits/chosen": 0.26020288467407227, "logits/rejected": 0.17786680161952972, "logps/chosen": -0.0012968253577128053, "logps/rejected": -2.7913854122161865, "loss": 0.4628, "nll_loss": 0.11568672955036163, "rewards/accuracies": 1.0, "rewards/chosen": -0.00012968253577128053, "rewards/margins": 0.2790088653564453, "rewards/rejected": -0.27913856506347656, "step": 11399 }, { "epoch": 7.8838174273858925, "grad_norm": 5.293552875518799, "learning_rate": 1.1756569847856155e-05, "log_odds_chosen": 9.490236282348633, "log_odds_ratio": -0.0009365877485834062, "logits/chosen": 0.11345387250185013, "logits/rejected": -0.07768663018941879, "logps/chosen": -0.00144152098800987, "logps/rejected": -2.5067501068115234, "loss": 0.5639, "nll_loss": 0.14088603854179382, "rewards/accuracies": 1.0, "rewards/chosen": -0.00014415210171137005, "rewards/margins": 0.25053083896636963, "rewards/rejected": -0.2506750226020813, "step": 11400 }, { "epoch": 7.884508990318119, "grad_norm": 4.563834190368652, "learning_rate": 1.1752727831566006e-05, "log_odds_chosen": 11.245931625366211, "log_odds_ratio": -2.0148238036199473e-05, "logits/chosen": -0.014509126543998718, "logits/rejected": -0.10480161011219025, "logps/chosen": -0.00014589951024390757, "logps/rejected": -2.1419601440429688, "loss": 0.5504, "nll_loss": 0.13758864998817444, "rewards/accuracies": 1.0, "rewards/chosen": -1.4589951206289697e-05, "rewards/margins": 0.21418142318725586, "rewards/rejected": -0.21419601142406464, "step": 11401 }, { "epoch": 7.885200553250346, "grad_norm": 4.083811283111572, "learning_rate": 1.1748885815275858e-05, "log_odds_chosen": 9.566080093383789, "log_odds_ratio": -0.0007488796254619956, "logits/chosen": -0.2967919707298279, "logits/rejected": -0.3396698832511902, "logps/chosen": -0.0015874492237344384, "logps/rejected": -2.4406490325927734, "loss": 0.3039, "nll_loss": 0.07590655982494354, "rewards/accuracies": 1.0, "rewards/chosen": -0.00015874492237344384, "rewards/margins": 0.243906170129776, "rewards/rejected": -0.24406489729881287, "step": 11402 }, { "epoch": 7.885892116182573, "grad_norm": 5.50595760345459, "learning_rate": 1.1745043798985708e-05, "log_odds_chosen": 11.039468765258789, "log_odds_ratio": -0.0003161336644552648, "logits/chosen": -0.3856697082519531, "logits/rejected": -0.3346801698207855, "logps/chosen": -0.0003978805907536298, "logps/rejected": -2.7548675537109375, "loss": 0.67, "nll_loss": 0.1674737185239792, "rewards/accuracies": 1.0, "rewards/chosen": -3.9788061258150265e-05, "rewards/margins": 0.27544695138931274, "rewards/rejected": -0.2754867672920227, "step": 11403 }, { "epoch": 7.8865836791148, "grad_norm": 3.6710848808288574, "learning_rate": 1.1741201782695558e-05, "log_odds_chosen": 11.459981918334961, "log_odds_ratio": -9.545421198708937e-05, "logits/chosen": -0.16816915571689606, "logits/rejected": -0.31154415011405945, "logps/chosen": -0.0001600806281203404, "logps/rejected": -2.341383934020996, "loss": 0.5636, "nll_loss": 0.1408993899822235, "rewards/accuracies": 1.0, "rewards/chosen": -1.6008063539629802e-05, "rewards/margins": 0.2341223806142807, "rewards/rejected": -0.2341383695602417, "step": 11404 }, { "epoch": 7.887275242047027, "grad_norm": 4.168887615203857, "learning_rate": 1.173735976640541e-05, "log_odds_chosen": 11.432533264160156, "log_odds_ratio": -3.0281080398708582e-05, "logits/chosen": -0.03569936007261276, "logits/rejected": -0.014538850635290146, "logps/chosen": -0.00017260480672121048, "logps/rejected": -2.6162784099578857, "loss": 0.5044, "nll_loss": 0.1261000633239746, "rewards/accuracies": 1.0, "rewards/chosen": -1.726048139971681e-05, "rewards/margins": 0.26161056756973267, "rewards/rejected": -0.26162785291671753, "step": 11405 }, { "epoch": 7.8879668049792535, "grad_norm": 10.902813911437988, "learning_rate": 1.1733517750115261e-05, "log_odds_chosen": 11.470321655273438, "log_odds_ratio": -1.4454016309173312e-05, "logits/chosen": 0.037685878574848175, "logits/rejected": -0.08560698479413986, "logps/chosen": -0.00012334572966210544, "logps/rejected": -2.1695070266723633, "loss": 0.5225, "nll_loss": 0.13062696158885956, "rewards/accuracies": 1.0, "rewards/chosen": -1.2334573511907365e-05, "rewards/margins": 0.21693839132785797, "rewards/rejected": -0.21695071458816528, "step": 11406 }, { "epoch": 7.88865836791148, "grad_norm": 4.911862850189209, "learning_rate": 1.1729675733825112e-05, "log_odds_chosen": 11.952144622802734, "log_odds_ratio": -2.8422375180525705e-05, "logits/chosen": -0.16157597303390503, "logits/rejected": -0.2448599487543106, "logps/chosen": -0.00015069168875925243, "logps/rejected": -2.9311952590942383, "loss": 0.4916, "nll_loss": 0.12288607656955719, "rewards/accuracies": 1.0, "rewards/chosen": -1.5069169421622064e-05, "rewards/margins": 0.29310446977615356, "rewards/rejected": -0.29311954975128174, "step": 11407 }, { "epoch": 7.889349930843707, "grad_norm": 3.9067697525024414, "learning_rate": 1.1725833717534963e-05, "log_odds_chosen": 11.630581855773926, "log_odds_ratio": -8.880384848453104e-05, "logits/chosen": -0.16571418941020966, "logits/rejected": -0.17225387692451477, "logps/chosen": -0.00026298040756955743, "logps/rejected": -3.0806257724761963, "loss": 0.4761, "nll_loss": 0.11901277303695679, "rewards/accuracies": 1.0, "rewards/chosen": -2.6298041120753624e-05, "rewards/margins": 0.30803629755973816, "rewards/rejected": -0.3080625832080841, "step": 11408 }, { "epoch": 7.890041493775934, "grad_norm": 4.061779499053955, "learning_rate": 1.1721991701244814e-05, "log_odds_chosen": 10.276396751403809, "log_odds_ratio": -0.0003999832842964679, "logits/chosen": -0.6998578310012817, "logits/rejected": -0.5479745864868164, "logps/chosen": -0.00022533259470947087, "logps/rejected": -1.7632088661193848, "loss": 0.3925, "nll_loss": 0.09808278828859329, "rewards/accuracies": 1.0, "rewards/chosen": -2.2533258743351325e-05, "rewards/margins": 0.1762983649969101, "rewards/rejected": -0.176320880651474, "step": 11409 }, { "epoch": 7.890733056708161, "grad_norm": 4.482332706451416, "learning_rate": 1.1718149684954664e-05, "log_odds_chosen": 11.261518478393555, "log_odds_ratio": -4.342007377999835e-05, "logits/chosen": -0.4291597008705139, "logits/rejected": -0.3953316807746887, "logps/chosen": -0.0003815246745944023, "logps/rejected": -2.932671308517456, "loss": 0.4683, "nll_loss": 0.1170596033334732, "rewards/accuracies": 1.0, "rewards/chosen": -3.8152469642227516e-05, "rewards/margins": 0.29322898387908936, "rewards/rejected": -0.2932671308517456, "step": 11410 }, { "epoch": 7.891424619640388, "grad_norm": 5.705649375915527, "learning_rate": 1.1714307668664517e-05, "log_odds_chosen": 12.284934997558594, "log_odds_ratio": -1.5169678590609692e-05, "logits/chosen": -0.4257548153400421, "logits/rejected": -0.39802616834640503, "logps/chosen": -0.00021122126781847328, "logps/rejected": -2.713761568069458, "loss": 0.4794, "nll_loss": 0.11984308063983917, "rewards/accuracies": 1.0, "rewards/chosen": -2.1122126781847328e-05, "rewards/margins": 0.2713550329208374, "rewards/rejected": -0.2713761627674103, "step": 11411 }, { "epoch": 7.8921161825726145, "grad_norm": 6.233856201171875, "learning_rate": 1.1710465652374366e-05, "log_odds_chosen": 11.601268768310547, "log_odds_ratio": -2.2449883545050398e-05, "logits/chosen": -0.3950972855091095, "logits/rejected": -0.38946732878685, "logps/chosen": -9.428364137420431e-05, "logps/rejected": -2.0098214149475098, "loss": 0.5088, "nll_loss": 0.12719620764255524, "rewards/accuracies": 1.0, "rewards/chosen": -9.428363227925729e-06, "rewards/margins": 0.20097270607948303, "rewards/rejected": -0.20098212361335754, "step": 11412 }, { "epoch": 7.892807745504841, "grad_norm": 8.967583656311035, "learning_rate": 1.1706623636084217e-05, "log_odds_chosen": 13.042219161987305, "log_odds_ratio": -4.923728283756645e-06, "logits/chosen": -0.48502588272094727, "logits/rejected": -0.5291862487792969, "logps/chosen": -0.00027994526317343116, "logps/rejected": -4.361847400665283, "loss": 0.676, "nll_loss": 0.16900193691253662, "rewards/accuracies": 1.0, "rewards/chosen": -2.7994525225949474e-05, "rewards/margins": 0.4361567497253418, "rewards/rejected": -0.43618476390838623, "step": 11413 }, { "epoch": 7.893499308437068, "grad_norm": 4.976434230804443, "learning_rate": 1.170278161979407e-05, "log_odds_chosen": 11.235292434692383, "log_odds_ratio": -5.498766404343769e-05, "logits/chosen": -0.390159547328949, "logits/rejected": -0.4429699182510376, "logps/chosen": -0.0002683410421013832, "logps/rejected": -2.61037015914917, "loss": 0.5119, "nll_loss": 0.12796220183372498, "rewards/accuracies": 1.0, "rewards/chosen": -2.68341045739362e-05, "rewards/margins": 0.26101019978523254, "rewards/rejected": -0.26103702187538147, "step": 11414 }, { "epoch": 7.894190871369295, "grad_norm": 4.873762130737305, "learning_rate": 1.169893960350392e-05, "log_odds_chosen": 11.534000396728516, "log_odds_ratio": -2.1693673261324875e-05, "logits/chosen": -0.5807253122329712, "logits/rejected": -0.6946268677711487, "logps/chosen": -0.00021547307551372796, "logps/rejected": -2.3493354320526123, "loss": 0.5295, "nll_loss": 0.1323809027671814, "rewards/accuracies": 1.0, "rewards/chosen": -2.1547308278968558e-05, "rewards/margins": 0.2349119931459427, "rewards/rejected": -0.234933540225029, "step": 11415 }, { "epoch": 7.894882434301522, "grad_norm": 4.372593879699707, "learning_rate": 1.169509758721377e-05, "log_odds_chosen": 11.154789924621582, "log_odds_ratio": -0.0002964198647532612, "logits/chosen": -0.18853700160980225, "logits/rejected": -0.14731237292289734, "logps/chosen": -0.00035180928534828126, "logps/rejected": -2.643852710723877, "loss": 0.4617, "nll_loss": 0.11539193987846375, "rewards/accuracies": 1.0, "rewards/chosen": -3.5180928534828126e-05, "rewards/margins": 0.2643500864505768, "rewards/rejected": -0.26438528299331665, "step": 11416 }, { "epoch": 7.895573997233749, "grad_norm": 13.949653625488281, "learning_rate": 1.1691255570923621e-05, "log_odds_chosen": 13.114603042602539, "log_odds_ratio": -8.687659828865435e-06, "logits/chosen": -0.3029056191444397, "logits/rejected": -0.41169899702072144, "logps/chosen": -7.216949597932398e-05, "logps/rejected": -3.5470874309539795, "loss": 0.6499, "nll_loss": 0.16247348487377167, "rewards/accuracies": 1.0, "rewards/chosen": -7.216949597932398e-06, "rewards/margins": 0.35470154881477356, "rewards/rejected": -0.3547087609767914, "step": 11417 }, { "epoch": 7.8962655601659755, "grad_norm": 5.432498931884766, "learning_rate": 1.1687413554633472e-05, "log_odds_chosen": 11.624490737915039, "log_odds_ratio": -3.6980825825594366e-05, "logits/chosen": 0.11878293752670288, "logits/rejected": -0.020380035042762756, "logps/chosen": -0.00031964771915227175, "logps/rejected": -2.654505491256714, "loss": 0.5729, "nll_loss": 0.1432090550661087, "rewards/accuracies": 1.0, "rewards/chosen": -3.196476973243989e-05, "rewards/margins": 0.2654185891151428, "rewards/rejected": -0.26545053720474243, "step": 11418 }, { "epoch": 7.896957123098202, "grad_norm": 3.285291910171509, "learning_rate": 1.1683571538343323e-05, "log_odds_chosen": 11.328643798828125, "log_odds_ratio": -4.0556675230618566e-05, "logits/chosen": -0.32337823510169983, "logits/rejected": -0.32799458503723145, "logps/chosen": -0.00010209741594735533, "logps/rejected": -2.072064161300659, "loss": 0.437, "nll_loss": 0.1092538833618164, "rewards/accuracies": 1.0, "rewards/chosen": -1.0209741958533414e-05, "rewards/margins": 0.2071962058544159, "rewards/rejected": -0.20720641314983368, "step": 11419 }, { "epoch": 7.897648686030429, "grad_norm": 5.5434465408325195, "learning_rate": 1.1679729522053175e-05, "log_odds_chosen": 11.171856880187988, "log_odds_ratio": -6.727038999088109e-05, "logits/chosen": -0.48523566126823425, "logits/rejected": -0.4981197714805603, "logps/chosen": -0.00019483445794321597, "logps/rejected": -2.566240072250366, "loss": 0.4962, "nll_loss": 0.12404817342758179, "rewards/accuracies": 1.0, "rewards/chosen": -1.9483444702927954e-05, "rewards/margins": 0.25660449266433716, "rewards/rejected": -0.2566240131855011, "step": 11420 }, { "epoch": 7.898340248962656, "grad_norm": 7.740990161895752, "learning_rate": 1.1675887505763024e-05, "log_odds_chosen": 11.712762832641602, "log_odds_ratio": -2.097015931212809e-05, "logits/chosen": -0.6795220375061035, "logits/rejected": -0.6612729430198669, "logps/chosen": -0.00015862970030866563, "logps/rejected": -2.8680872917175293, "loss": 0.4811, "nll_loss": 0.12026054412126541, "rewards/accuracies": 1.0, "rewards/chosen": -1.586297366884537e-05, "rewards/margins": 0.2867928743362427, "rewards/rejected": -0.28680872917175293, "step": 11421 }, { "epoch": 7.899031811894883, "grad_norm": 4.974034786224365, "learning_rate": 1.1672045489472875e-05, "log_odds_chosen": 10.817819595336914, "log_odds_ratio": -6.758319068467245e-05, "logits/chosen": -0.35871621966362, "logits/rejected": -0.3858879804611206, "logps/chosen": -0.0001772976538632065, "logps/rejected": -2.009894371032715, "loss": 0.3896, "nll_loss": 0.09739303588867188, "rewards/accuracies": 1.0, "rewards/chosen": -1.772976429492701e-05, "rewards/margins": 0.20097172260284424, "rewards/rejected": -0.20098945498466492, "step": 11422 }, { "epoch": 7.89972337482711, "grad_norm": 7.3034210205078125, "learning_rate": 1.1668203473182728e-05, "log_odds_chosen": 9.981136322021484, "log_odds_ratio": -9.297035285271704e-05, "logits/chosen": -0.38603436946868896, "logits/rejected": -0.4565512537956238, "logps/chosen": -0.00041265651816502213, "logps/rejected": -1.6070398092269897, "loss": 0.3876, "nll_loss": 0.09689034521579742, "rewards/accuracies": 1.0, "rewards/chosen": -4.1265651816502213e-05, "rewards/margins": 0.1606627255678177, "rewards/rejected": -0.16070398688316345, "step": 11423 }, { "epoch": 7.9004149377593365, "grad_norm": 3.654689073562622, "learning_rate": 1.1664361456892578e-05, "log_odds_chosen": 10.258405685424805, "log_odds_ratio": -0.0008943437715061009, "logits/chosen": -0.12104588747024536, "logits/rejected": -0.20898351073265076, "logps/chosen": -0.008668285794556141, "logps/rejected": -2.51485013961792, "loss": 0.3651, "nll_loss": 0.09118712693452835, "rewards/accuracies": 1.0, "rewards/chosen": -0.0008668285445310175, "rewards/margins": 0.2506181597709656, "rewards/rejected": -0.25148501992225647, "step": 11424 }, { "epoch": 7.901106500691563, "grad_norm": 4.506947994232178, "learning_rate": 1.1660519440602429e-05, "log_odds_chosen": 10.826162338256836, "log_odds_ratio": -3.588577237678692e-05, "logits/chosen": -0.24709933996200562, "logits/rejected": -0.34494447708129883, "logps/chosen": -0.00024828611640259624, "logps/rejected": -1.8355225324630737, "loss": 0.3383, "nll_loss": 0.08458095788955688, "rewards/accuracies": 1.0, "rewards/chosen": -2.4828612367855385e-05, "rewards/margins": 0.18352742493152618, "rewards/rejected": -0.18355226516723633, "step": 11425 }, { "epoch": 7.90179806362379, "grad_norm": 6.3836469650268555, "learning_rate": 1.165667742431228e-05, "log_odds_chosen": 10.288522720336914, "log_odds_ratio": -0.00035393572761677206, "logits/chosen": -0.7220645546913147, "logits/rejected": -0.8198657631874084, "logps/chosen": -0.0012157809687778354, "logps/rejected": -2.153752565383911, "loss": 0.5226, "nll_loss": 0.13060925900936127, "rewards/accuracies": 1.0, "rewards/chosen": -0.0001215781012433581, "rewards/margins": 0.21525368094444275, "rewards/rejected": -0.21537525951862335, "step": 11426 }, { "epoch": 7.902489626556017, "grad_norm": 3.665544271469116, "learning_rate": 1.165283540802213e-05, "log_odds_chosen": 10.943013191223145, "log_odds_ratio": -8.478549716528505e-05, "logits/chosen": -0.548704981803894, "logits/rejected": -0.5903621315956116, "logps/chosen": -0.00020362951909191906, "logps/rejected": -2.2660512924194336, "loss": 0.3336, "nll_loss": 0.08338891714811325, "rewards/accuracies": 1.0, "rewards/chosen": -2.0362949726404622e-05, "rewards/margins": 0.226584792137146, "rewards/rejected": -0.2266051471233368, "step": 11427 }, { "epoch": 7.903181189488244, "grad_norm": 5.852066516876221, "learning_rate": 1.1648993391731981e-05, "log_odds_chosen": 10.770855903625488, "log_odds_ratio": -9.15360651561059e-05, "logits/chosen": -0.3121373951435089, "logits/rejected": -0.42780694365501404, "logps/chosen": -0.0005003748228773475, "logps/rejected": -2.425102710723877, "loss": 0.5866, "nll_loss": 0.14663726091384888, "rewards/accuracies": 1.0, "rewards/chosen": -5.0037480832543224e-05, "rewards/margins": 0.2424602508544922, "rewards/rejected": -0.24251027405261993, "step": 11428 }, { "epoch": 7.903872752420471, "grad_norm": 3.679690361022949, "learning_rate": 1.1645151375441832e-05, "log_odds_chosen": 10.845489501953125, "log_odds_ratio": -3.326304431539029e-05, "logits/chosen": -0.5469524264335632, "logits/rejected": -0.5827398896217346, "logps/chosen": -0.00013200710236560553, "logps/rejected": -1.939971685409546, "loss": 0.3076, "nll_loss": 0.07689175009727478, "rewards/accuracies": 1.0, "rewards/chosen": -1.3200711691752076e-05, "rewards/margins": 0.19398397207260132, "rewards/rejected": -0.19399715960025787, "step": 11429 }, { "epoch": 7.904564315352697, "grad_norm": 4.049372673034668, "learning_rate": 1.1641309359151683e-05, "log_odds_chosen": 11.037715911865234, "log_odds_ratio": -0.00010179554374190047, "logits/chosen": -0.06870530545711517, "logits/rejected": -0.08930937945842743, "logps/chosen": -0.00024262507213279605, "logps/rejected": -2.683316230773926, "loss": 0.43, "nll_loss": 0.1074962466955185, "rewards/accuracies": 1.0, "rewards/chosen": -2.4262508304673247e-05, "rewards/margins": 0.2683073580265045, "rewards/rejected": -0.2683316171169281, "step": 11430 }, { "epoch": 7.905255878284924, "grad_norm": 4.094974994659424, "learning_rate": 1.1637467342861534e-05, "log_odds_chosen": 12.618879318237305, "log_odds_ratio": -7.0655078161507845e-06, "logits/chosen": -0.16843059659004211, "logits/rejected": -0.24288104474544525, "logps/chosen": -9.244784450856969e-05, "logps/rejected": -2.8020706176757812, "loss": 0.3937, "nll_loss": 0.09841499477624893, "rewards/accuracies": 1.0, "rewards/chosen": -9.244784450856969e-06, "rewards/margins": 0.28019779920578003, "rewards/rejected": -0.2802070379257202, "step": 11431 }, { "epoch": 7.905947441217151, "grad_norm": 7.394796848297119, "learning_rate": 1.1633625326571384e-05, "log_odds_chosen": 12.214794158935547, "log_odds_ratio": -5.331547981768381e-06, "logits/chosen": -0.6335046291351318, "logits/rejected": -0.6970090866088867, "logps/chosen": -6.661218503722921e-05, "logps/rejected": -2.4878671169281006, "loss": 0.6625, "nll_loss": 0.1656339168548584, "rewards/accuracies": 1.0, "rewards/chosen": -6.6612192313186824e-06, "rewards/margins": 0.24878007173538208, "rewards/rejected": -0.24878671765327454, "step": 11432 }, { "epoch": 7.906639004149378, "grad_norm": 4.621971130371094, "learning_rate": 1.1629783310281237e-05, "log_odds_chosen": 11.147924423217773, "log_odds_ratio": -2.468354068696499e-05, "logits/chosen": -0.40830671787261963, "logits/rejected": -0.459160178899765, "logps/chosen": -0.00010135288175661117, "logps/rejected": -1.8910176753997803, "loss": 0.4126, "nll_loss": 0.1031472310423851, "rewards/accuracies": 1.0, "rewards/chosen": -1.0135287993762176e-05, "rewards/margins": 0.18909165263175964, "rewards/rejected": -0.18910178542137146, "step": 11433 }, { "epoch": 7.907330567081605, "grad_norm": 5.314410209655762, "learning_rate": 1.1625941293991088e-05, "log_odds_chosen": 10.044839859008789, "log_odds_ratio": -0.00013832849799655378, "logits/chosen": -0.4927716851234436, "logits/rejected": -0.4732247292995453, "logps/chosen": -0.00022110360441729426, "logps/rejected": -1.7093604803085327, "loss": 0.4769, "nll_loss": 0.11922001838684082, "rewards/accuracies": 1.0, "rewards/chosen": -2.211036189692095e-05, "rewards/margins": 0.1709139347076416, "rewards/rejected": -0.17093604803085327, "step": 11434 }, { "epoch": 7.908022130013832, "grad_norm": 3.985020399093628, "learning_rate": 1.1622099277700937e-05, "log_odds_chosen": 11.439200401306152, "log_odds_ratio": -3.155437298119068e-05, "logits/chosen": -0.3941129744052887, "logits/rejected": -0.39420852065086365, "logps/chosen": -0.0002207858196925372, "logps/rejected": -2.724975347518921, "loss": 0.839, "nll_loss": 0.20973655581474304, "rewards/accuracies": 1.0, "rewards/chosen": -2.20785823330516e-05, "rewards/margins": 0.2724754810333252, "rewards/rejected": -0.2724975347518921, "step": 11435 }, { "epoch": 7.908713692946058, "grad_norm": 5.979078769683838, "learning_rate": 1.1618257261410789e-05, "log_odds_chosen": 11.143678665161133, "log_odds_ratio": -3.0088162020547315e-05, "logits/chosen": -0.5770678520202637, "logits/rejected": -0.6415504813194275, "logps/chosen": -8.087012975011021e-05, "logps/rejected": -1.7887535095214844, "loss": 0.438, "nll_loss": 0.10950732231140137, "rewards/accuracies": 1.0, "rewards/chosen": -8.08701224741526e-06, "rewards/margins": 0.17886726558208466, "rewards/rejected": -0.17887535691261292, "step": 11436 }, { "epoch": 7.909405255878285, "grad_norm": 3.349957227706909, "learning_rate": 1.161441524512064e-05, "log_odds_chosen": 10.178857803344727, "log_odds_ratio": -0.0002570028882473707, "logits/chosen": -0.3043663799762726, "logits/rejected": -0.26984646916389465, "logps/chosen": -0.0008028277661651373, "logps/rejected": -1.7114442586898804, "loss": 0.2807, "nll_loss": 0.07015819102525711, "rewards/accuracies": 1.0, "rewards/chosen": -8.028277079574764e-05, "rewards/margins": 0.17106413841247559, "rewards/rejected": -0.17114444077014923, "step": 11437 }, { "epoch": 7.910096818810512, "grad_norm": 10.283522605895996, "learning_rate": 1.161057322883049e-05, "log_odds_chosen": 9.67963981628418, "log_odds_ratio": -0.0006223957170732319, "logits/chosen": -0.5854220986366272, "logits/rejected": -0.598517894744873, "logps/chosen": -0.0002304011140950024, "logps/rejected": -0.9623869061470032, "loss": 0.4476, "nll_loss": 0.11184649169445038, "rewards/accuracies": 1.0, "rewards/chosen": -2.3040109226712957e-05, "rewards/margins": 0.0962156429886818, "rewards/rejected": -0.09623868018388748, "step": 11438 }, { "epoch": 7.910788381742739, "grad_norm": 7.8791913986206055, "learning_rate": 1.1606731212540341e-05, "log_odds_chosen": 11.597796440124512, "log_odds_ratio": -2.790990583889652e-05, "logits/chosen": -0.503038763999939, "logits/rejected": -0.5723360776901245, "logps/chosen": -9.536659490549937e-05, "logps/rejected": -2.3894646167755127, "loss": 0.5268, "nll_loss": 0.13169816136360168, "rewards/accuracies": 1.0, "rewards/chosen": -9.536659490549937e-06, "rewards/margins": 0.2389369159936905, "rewards/rejected": -0.23894645273685455, "step": 11439 }, { "epoch": 7.911479944674966, "grad_norm": 3.3033556938171387, "learning_rate": 1.1602889196250192e-05, "log_odds_chosen": 10.922914505004883, "log_odds_ratio": -3.322264092275873e-05, "logits/chosen": -0.14358104765415192, "logits/rejected": -0.15931063890457153, "logps/chosen": -0.00041416287422180176, "logps/rejected": -2.0859086513519287, "loss": 0.4858, "nll_loss": 0.12145288288593292, "rewards/accuracies": 1.0, "rewards/chosen": -4.141628960496746e-05, "rewards/margins": 0.20854943990707397, "rewards/rejected": -0.20859088003635406, "step": 11440 }, { "epoch": 7.912171507607193, "grad_norm": 3.984626293182373, "learning_rate": 1.1599047179960043e-05, "log_odds_chosen": 10.762500762939453, "log_odds_ratio": -0.00010356571146985516, "logits/chosen": -0.728722333908081, "logits/rejected": -0.72029709815979, "logps/chosen": -0.0002596940321382135, "logps/rejected": -1.7457057237625122, "loss": 0.4164, "nll_loss": 0.10409042239189148, "rewards/accuracies": 1.0, "rewards/chosen": -2.596940248622559e-05, "rewards/margins": 0.17454461753368378, "rewards/rejected": -0.17457059025764465, "step": 11441 }, { "epoch": 7.912863070539419, "grad_norm": 4.8249006271362305, "learning_rate": 1.1595205163669895e-05, "log_odds_chosen": 11.676660537719727, "log_odds_ratio": -5.567781408899464e-05, "logits/chosen": -0.23649048805236816, "logits/rejected": -0.27683085203170776, "logps/chosen": -0.0002755652240011841, "logps/rejected": -3.1232707500457764, "loss": 0.5273, "nll_loss": 0.13182684779167175, "rewards/accuracies": 1.0, "rewards/chosen": -2.755652167252265e-05, "rewards/margins": 0.31229954957962036, "rewards/rejected": -0.3123270869255066, "step": 11442 }, { "epoch": 7.913554633471646, "grad_norm": 6.4415669441223145, "learning_rate": 1.1591363147379746e-05, "log_odds_chosen": 10.647878646850586, "log_odds_ratio": -9.721822425490245e-05, "logits/chosen": -0.35493314266204834, "logits/rejected": -0.2665443420410156, "logps/chosen": -0.00035465031396597624, "logps/rejected": -2.031789541244507, "loss": 0.4099, "nll_loss": 0.10246631503105164, "rewards/accuracies": 1.0, "rewards/chosen": -3.546503285178915e-05, "rewards/margins": 0.20314347743988037, "rewards/rejected": -0.20317894220352173, "step": 11443 }, { "epoch": 7.914246196403873, "grad_norm": 3.6676669120788574, "learning_rate": 1.1587521131089597e-05, "log_odds_chosen": 11.052009582519531, "log_odds_ratio": -0.00010791603563120589, "logits/chosen": -0.5329593420028687, "logits/rejected": -0.4889289140701294, "logps/chosen": -0.0007934218156151474, "logps/rejected": -2.691516637802124, "loss": 0.4453, "nll_loss": 0.11130984127521515, "rewards/accuracies": 1.0, "rewards/chosen": -7.934217865113169e-05, "rewards/margins": 0.26907235383987427, "rewards/rejected": -0.2691516876220703, "step": 11444 }, { "epoch": 7.9149377593361, "grad_norm": 2.9632012844085693, "learning_rate": 1.1583679114799447e-05, "log_odds_chosen": 10.19129753112793, "log_odds_ratio": -0.00015468697529286146, "logits/chosen": -0.7022165060043335, "logits/rejected": -0.6874955892562866, "logps/chosen": -0.00013341064914129674, "logps/rejected": -1.3488829135894775, "loss": 0.4193, "nll_loss": 0.10479914397001266, "rewards/accuracies": 1.0, "rewards/chosen": -1.3341065823624376e-05, "rewards/margins": 0.13487495481967926, "rewards/rejected": -0.13488830626010895, "step": 11445 }, { "epoch": 7.915629322268327, "grad_norm": 4.115353584289551, "learning_rate": 1.1579837098509298e-05, "log_odds_chosen": 11.92996883392334, "log_odds_ratio": -0.00016364931070711464, "logits/chosen": 0.059397876262664795, "logits/rejected": -0.04412535950541496, "logps/chosen": -0.00012382738350424916, "logps/rejected": -2.920637369155884, "loss": 0.6305, "nll_loss": 0.15759626030921936, "rewards/accuracies": 1.0, "rewards/chosen": -1.2382738532323856e-05, "rewards/margins": 0.2920513451099396, "rewards/rejected": -0.29206374287605286, "step": 11446 }, { "epoch": 7.9163208852005535, "grad_norm": 4.740466117858887, "learning_rate": 1.1575995082219149e-05, "log_odds_chosen": 10.352497100830078, "log_odds_ratio": -0.00018813650240190327, "logits/chosen": 0.11943431943655014, "logits/rejected": 0.10199232399463654, "logps/chosen": -0.0015034006210044026, "logps/rejected": -2.6411702632904053, "loss": 0.5293, "nll_loss": 0.13230976462364197, "rewards/accuracies": 1.0, "rewards/chosen": -0.00015034008538350463, "rewards/margins": 0.26396670937538147, "rewards/rejected": -0.264117032289505, "step": 11447 }, { "epoch": 7.91701244813278, "grad_norm": 4.00980806350708, "learning_rate": 1.1572153065929001e-05, "log_odds_chosen": 11.186810493469238, "log_odds_ratio": -0.00021705195831600577, "logits/chosen": -0.3363361656665802, "logits/rejected": -0.3838905096054077, "logps/chosen": -0.0002448104496579617, "logps/rejected": -2.4751431941986084, "loss": 0.3961, "nll_loss": 0.09899172931909561, "rewards/accuracies": 1.0, "rewards/chosen": -2.4481045329594053e-05, "rewards/margins": 0.2474898397922516, "rewards/rejected": -0.24751430749893188, "step": 11448 }, { "epoch": 7.917704011065007, "grad_norm": 5.31357479095459, "learning_rate": 1.156831104963885e-05, "log_odds_chosen": 10.24382495880127, "log_odds_ratio": -0.00011630626977421343, "logits/chosen": -0.18191811442375183, "logits/rejected": -0.13572078943252563, "logps/chosen": -0.0009642197983339429, "logps/rejected": -2.7066733837127686, "loss": 0.6845, "nll_loss": 0.1711220145225525, "rewards/accuracies": 1.0, "rewards/chosen": -9.64219871093519e-05, "rewards/margins": 0.27057090401649475, "rewards/rejected": -0.27066731452941895, "step": 11449 }, { "epoch": 7.918395573997234, "grad_norm": 7.6785197257995605, "learning_rate": 1.1564469033348701e-05, "log_odds_chosen": 10.965532302856445, "log_odds_ratio": -3.787028981605545e-05, "logits/chosen": -0.17806333303451538, "logits/rejected": -0.3148902654647827, "logps/chosen": -0.00010926988761639223, "logps/rejected": -1.8761992454528809, "loss": 0.2852, "nll_loss": 0.07128973305225372, "rewards/accuracies": 1.0, "rewards/chosen": -1.0926989489234984e-05, "rewards/margins": 0.1876089870929718, "rewards/rejected": -0.18761992454528809, "step": 11450 }, { "epoch": 7.919087136929461, "grad_norm": 6.197747707366943, "learning_rate": 1.1560627017058554e-05, "log_odds_chosen": 11.333625793457031, "log_odds_ratio": -9.571119153406471e-05, "logits/chosen": -0.09422504901885986, "logits/rejected": -0.2055082619190216, "logps/chosen": -0.00024049320199992508, "logps/rejected": -2.4040274620056152, "loss": 0.4777, "nll_loss": 0.11942285299301147, "rewards/accuracies": 1.0, "rewards/chosen": -2.404932092758827e-05, "rewards/margins": 0.24037869274616241, "rewards/rejected": -0.24040274322032928, "step": 11451 }, { "epoch": 7.919778699861688, "grad_norm": 13.796125411987305, "learning_rate": 1.1556785000768404e-05, "log_odds_chosen": 11.29486083984375, "log_odds_ratio": -7.908118277555332e-05, "logits/chosen": -0.1983284056186676, "logits/rejected": -0.22550822794437408, "logps/chosen": -0.00020663285977207124, "logps/rejected": -2.398308038711548, "loss": 0.4954, "nll_loss": 0.12384440749883652, "rewards/accuracies": 1.0, "rewards/chosen": -2.066328306682408e-05, "rewards/margins": 0.23981015384197235, "rewards/rejected": -0.23983082175254822, "step": 11452 }, { "epoch": 7.9204702627939145, "grad_norm": 6.0768208503723145, "learning_rate": 1.1552942984478255e-05, "log_odds_chosen": 12.187479019165039, "log_odds_ratio": -2.8749112971127033e-05, "logits/chosen": -0.4303026497364044, "logits/rejected": -0.3580453097820282, "logps/chosen": -0.00015580881154164672, "logps/rejected": -2.984548568725586, "loss": 0.5069, "nll_loss": 0.12673157453536987, "rewards/accuracies": 1.0, "rewards/chosen": -1.5580882973154075e-05, "rewards/margins": 0.29843926429748535, "rewards/rejected": -0.29845482110977173, "step": 11453 }, { "epoch": 7.921161825726141, "grad_norm": 6.099323749542236, "learning_rate": 1.1549100968188106e-05, "log_odds_chosen": 10.10976791381836, "log_odds_ratio": -9.947900980478153e-05, "logits/chosen": -0.38822758197784424, "logits/rejected": -0.3949180841445923, "logps/chosen": -0.0005124437739141285, "logps/rejected": -1.9129855632781982, "loss": 0.3069, "nll_loss": 0.07670585811138153, "rewards/accuracies": 1.0, "rewards/chosen": -5.1244373025838286e-05, "rewards/margins": 0.1912473440170288, "rewards/rejected": -0.19129857420921326, "step": 11454 }, { "epoch": 7.921853388658368, "grad_norm": 5.255620956420898, "learning_rate": 1.1545258951897957e-05, "log_odds_chosen": 10.686015129089355, "log_odds_ratio": -0.00011767195246648043, "logits/chosen": -0.05838357284665108, "logits/rejected": 0.05113779753446579, "logps/chosen": -0.0008657700382173061, "logps/rejected": -2.244718074798584, "loss": 0.6678, "nll_loss": 0.16692709922790527, "rewards/accuracies": 1.0, "rewards/chosen": -8.657699800096452e-05, "rewards/margins": 0.22438523173332214, "rewards/rejected": -0.2244718074798584, "step": 11455 }, { "epoch": 7.922544951590595, "grad_norm": 4.817359447479248, "learning_rate": 1.1541416935607807e-05, "log_odds_chosen": 9.26960563659668, "log_odds_ratio": -0.0009645846439525485, "logits/chosen": -0.24357624351978302, "logits/rejected": -0.23238223791122437, "logps/chosen": -0.008362173102796078, "logps/rejected": -1.7482798099517822, "loss": 0.7388, "nll_loss": 0.18461009860038757, "rewards/accuracies": 1.0, "rewards/chosen": -0.0008362172520719469, "rewards/margins": 0.17399176955223083, "rewards/rejected": -0.17482797801494598, "step": 11456 }, { "epoch": 7.923236514522822, "grad_norm": 5.3235673904418945, "learning_rate": 1.153757491931766e-05, "log_odds_chosen": 10.89983081817627, "log_odds_ratio": -0.00013438466703519225, "logits/chosen": -0.2583634853363037, "logits/rejected": -0.10850981622934341, "logps/chosen": -0.00022484775399789214, "logps/rejected": -2.3494505882263184, "loss": 0.4482, "nll_loss": 0.11204710602760315, "rewards/accuracies": 1.0, "rewards/chosen": -2.2484775399789214e-05, "rewards/margins": 0.2349225878715515, "rewards/rejected": -0.23494507372379303, "step": 11457 }, { "epoch": 7.923928077455049, "grad_norm": 6.07865047454834, "learning_rate": 1.1533732903027509e-05, "log_odds_chosen": 11.569040298461914, "log_odds_ratio": -2.4782182663329877e-05, "logits/chosen": -0.3904721140861511, "logits/rejected": -0.507940411567688, "logps/chosen": -0.00013616510841529816, "logps/rejected": -2.530514717102051, "loss": 0.6587, "nll_loss": 0.16466355323791504, "rewards/accuracies": 1.0, "rewards/chosen": -1.3616510841529816e-05, "rewards/margins": 0.25303786993026733, "rewards/rejected": -0.2530514597892761, "step": 11458 }, { "epoch": 7.9246196403872755, "grad_norm": 8.31247615814209, "learning_rate": 1.152989088673736e-05, "log_odds_chosen": 11.501436233520508, "log_odds_ratio": -2.56904440902872e-05, "logits/chosen": -0.6161126494407654, "logits/rejected": -0.640153169631958, "logps/chosen": -0.00013021615450270474, "logps/rejected": -2.619170665740967, "loss": 0.4468, "nll_loss": 0.11169376969337463, "rewards/accuracies": 1.0, "rewards/chosen": -1.3021613995078951e-05, "rewards/margins": 0.2619040310382843, "rewards/rejected": -0.2619170546531677, "step": 11459 }, { "epoch": 7.925311203319502, "grad_norm": 6.786872386932373, "learning_rate": 1.1526048870447212e-05, "log_odds_chosen": 10.474227905273438, "log_odds_ratio": -6.774486973881721e-05, "logits/chosen": -0.0007668398320674896, "logits/rejected": -0.2563415765762329, "logps/chosen": -0.00016562627570237964, "logps/rejected": -1.8405344486236572, "loss": 0.3454, "nll_loss": 0.08634229004383087, "rewards/accuracies": 1.0, "rewards/chosen": -1.6562627934035845e-05, "rewards/margins": 0.18403686583042145, "rewards/rejected": -0.184053435921669, "step": 11460 }, { "epoch": 7.926002766251729, "grad_norm": 3.9103434085845947, "learning_rate": 1.1522206854157063e-05, "log_odds_chosen": 11.24027156829834, "log_odds_ratio": -9.854403469944373e-05, "logits/chosen": -0.39807748794555664, "logits/rejected": -0.4943687915802002, "logps/chosen": -0.00030215978040359914, "logps/rejected": -2.5182738304138184, "loss": 0.4634, "nll_loss": 0.11585089564323425, "rewards/accuracies": 1.0, "rewards/chosen": -3.021598058694508e-05, "rewards/margins": 0.2517971694469452, "rewards/rejected": -0.2518273890018463, "step": 11461 }, { "epoch": 7.926694329183956, "grad_norm": 4.208470344543457, "learning_rate": 1.1518364837866914e-05, "log_odds_chosen": 11.54410171508789, "log_odds_ratio": -1.839596006902866e-05, "logits/chosen": -0.344673216342926, "logits/rejected": -0.4512706995010376, "logps/chosen": -0.00018507882487028837, "logps/rejected": -2.8429622650146484, "loss": 0.5383, "nll_loss": 0.13457489013671875, "rewards/accuracies": 1.0, "rewards/chosen": -1.8507882487028837e-05, "rewards/margins": 0.2842777371406555, "rewards/rejected": -0.2842962443828583, "step": 11462 }, { "epoch": 7.927385892116183, "grad_norm": 6.069661617279053, "learning_rate": 1.1514522821576763e-05, "log_odds_chosen": 11.567630767822266, "log_odds_ratio": -3.671016020234674e-05, "logits/chosen": 0.17913606762886047, "logits/rejected": 0.18328267335891724, "logps/chosen": -0.00011550368799362332, "logps/rejected": -2.375983238220215, "loss": 0.3726, "nll_loss": 0.09314057976007462, "rewards/accuracies": 1.0, "rewards/chosen": -1.1550368981261272e-05, "rewards/margins": 0.23758679628372192, "rewards/rejected": -0.23759834468364716, "step": 11463 }, { "epoch": 7.92807745504841, "grad_norm": 7.161730766296387, "learning_rate": 1.1510680805286615e-05, "log_odds_chosen": 13.012365341186523, "log_odds_ratio": -6.403481165762059e-06, "logits/chosen": -0.21157574653625488, "logits/rejected": -0.28957462310791016, "logps/chosen": -7.955124601721764e-05, "logps/rejected": -3.544642925262451, "loss": 0.53, "nll_loss": 0.13249525427818298, "rewards/accuracies": 1.0, "rewards/chosen": -7.955125511216465e-06, "rewards/margins": 0.3544563353061676, "rewards/rejected": -0.3544643223285675, "step": 11464 }, { "epoch": 7.9287690179806365, "grad_norm": 4.447282314300537, "learning_rate": 1.1506838788996466e-05, "log_odds_chosen": 10.233051300048828, "log_odds_ratio": -7.13659028406255e-05, "logits/chosen": -0.11875317990779877, "logits/rejected": -0.3084731101989746, "logps/chosen": -0.00024481149739585817, "logps/rejected": -1.5526336431503296, "loss": 0.5617, "nll_loss": 0.14041319489479065, "rewards/accuracies": 1.0, "rewards/chosen": -2.448115083097946e-05, "rewards/margins": 0.15523889660835266, "rewards/rejected": -0.15526337921619415, "step": 11465 }, { "epoch": 7.929460580912863, "grad_norm": 4.517125129699707, "learning_rate": 1.1502996772706317e-05, "log_odds_chosen": 10.96858024597168, "log_odds_ratio": -5.081095878267661e-05, "logits/chosen": -0.31112051010131836, "logits/rejected": -0.3540710210800171, "logps/chosen": -0.0004644246364478022, "logps/rejected": -2.4904298782348633, "loss": 0.4102, "nll_loss": 0.10255465656518936, "rewards/accuracies": 1.0, "rewards/chosen": -4.644246291718446e-05, "rewards/margins": 0.2489965558052063, "rewards/rejected": -0.24904300272464752, "step": 11466 }, { "epoch": 7.93015214384509, "grad_norm": 6.432483196258545, "learning_rate": 1.1499154756416167e-05, "log_odds_chosen": 10.742692947387695, "log_odds_ratio": -0.00017364558880217373, "logits/chosen": -0.46216145157814026, "logits/rejected": -0.44457200169563293, "logps/chosen": -0.00041976282955147326, "logps/rejected": -2.02523136138916, "loss": 0.6607, "nll_loss": 0.16514545679092407, "rewards/accuracies": 1.0, "rewards/chosen": -4.1976287320721895e-05, "rewards/margins": 0.20248116552829742, "rewards/rejected": -0.2025231420993805, "step": 11467 }, { "epoch": 7.930843706777317, "grad_norm": 4.329329967498779, "learning_rate": 1.1495312740126018e-05, "log_odds_chosen": 11.087435722351074, "log_odds_ratio": -3.9717786421533674e-05, "logits/chosen": -0.37949714064598083, "logits/rejected": -0.4751831293106079, "logps/chosen": -0.00022894766880199313, "logps/rejected": -2.1983184814453125, "loss": 0.4495, "nll_loss": 0.11236318945884705, "rewards/accuracies": 1.0, "rewards/chosen": -2.289476469741203e-05, "rewards/margins": 0.21980898082256317, "rewards/rejected": -0.21983186900615692, "step": 11468 }, { "epoch": 7.931535269709544, "grad_norm": 4.523159503936768, "learning_rate": 1.1491470723835869e-05, "log_odds_chosen": 11.431248664855957, "log_odds_ratio": -4.009368785773404e-05, "logits/chosen": -0.3916298747062683, "logits/rejected": -0.44648686051368713, "logps/chosen": -0.00023831524595152587, "logps/rejected": -2.716752529144287, "loss": 0.39, "nll_loss": 0.09749448299407959, "rewards/accuracies": 1.0, "rewards/chosen": -2.383152605034411e-05, "rewards/margins": 0.27165141701698303, "rewards/rejected": -0.2716752588748932, "step": 11469 }, { "epoch": 7.932226832641771, "grad_norm": 3.75484037399292, "learning_rate": 1.1487628707545721e-05, "log_odds_chosen": 11.424932479858398, "log_odds_ratio": -9.248127753380686e-05, "logits/chosen": -0.4830518662929535, "logits/rejected": -0.5308735370635986, "logps/chosen": -0.00013168362784199417, "logps/rejected": -2.41013765335083, "loss": 0.3641, "nll_loss": 0.09101329743862152, "rewards/accuracies": 1.0, "rewards/chosen": -1.3168361874704715e-05, "rewards/margins": 0.24100060760974884, "rewards/rejected": -0.241013765335083, "step": 11470 }, { "epoch": 7.9329183955739975, "grad_norm": 3.4767870903015137, "learning_rate": 1.1483786691255572e-05, "log_odds_chosen": 11.029151916503906, "log_odds_ratio": -8.472451736452058e-05, "logits/chosen": -0.412309855222702, "logits/rejected": -0.39536869525909424, "logps/chosen": -0.000157872709678486, "logps/rejected": -1.6841447353363037, "loss": 0.2727, "nll_loss": 0.06816992908716202, "rewards/accuracies": 1.0, "rewards/chosen": -1.5787269148859195e-05, "rewards/margins": 0.1683986932039261, "rewards/rejected": -0.16841447353363037, "step": 11471 }, { "epoch": 7.933609958506224, "grad_norm": 4.748891830444336, "learning_rate": 1.1479944674965421e-05, "log_odds_chosen": 11.620475769042969, "log_odds_ratio": -3.167948671034537e-05, "logits/chosen": -0.4358367323875427, "logits/rejected": -0.48927396535873413, "logps/chosen": -0.00023318035528063774, "logps/rejected": -3.058469533920288, "loss": 0.8373, "nll_loss": 0.2093205600976944, "rewards/accuracies": 1.0, "rewards/chosen": -2.331803807464894e-05, "rewards/margins": 0.3058236241340637, "rewards/rejected": -0.3058469593524933, "step": 11472 }, { "epoch": 7.934301521438451, "grad_norm": 3.9937808513641357, "learning_rate": 1.1476102658675274e-05, "log_odds_chosen": 11.555363655090332, "log_odds_ratio": -1.470993083785288e-05, "logits/chosen": 0.05103334039449692, "logits/rejected": 0.05239225924015045, "logps/chosen": -0.0001619577524252236, "logps/rejected": -2.7972123622894287, "loss": 0.5296, "nll_loss": 0.13239486515522003, "rewards/accuracies": 1.0, "rewards/chosen": -1.6195774151128717e-05, "rewards/margins": 0.2797050178050995, "rewards/rejected": -0.279721200466156, "step": 11473 }, { "epoch": 7.934993084370678, "grad_norm": 3.7396841049194336, "learning_rate": 1.1472260642385124e-05, "log_odds_chosen": 11.454257011413574, "log_odds_ratio": -0.00012060473090969026, "logits/chosen": 0.004928797483444214, "logits/rejected": -0.13088713586330414, "logps/chosen": -0.00043558087782002985, "logps/rejected": -3.108194351196289, "loss": 0.4087, "nll_loss": 0.10216562449932098, "rewards/accuracies": 1.0, "rewards/chosen": -4.355808414402418e-05, "rewards/margins": 0.31077584624290466, "rewards/rejected": -0.3108194172382355, "step": 11474 }, { "epoch": 7.935684647302905, "grad_norm": 6.406213760375977, "learning_rate": 1.1468418626094975e-05, "log_odds_chosen": 10.826868057250977, "log_odds_ratio": -3.152354838675819e-05, "logits/chosen": -0.38109397888183594, "logits/rejected": -0.46262824535369873, "logps/chosen": -0.0002834878396242857, "logps/rejected": -2.1760125160217285, "loss": 0.6058, "nll_loss": 0.1514561027288437, "rewards/accuracies": 1.0, "rewards/chosen": -2.8348786145215854e-05, "rewards/margins": 0.21757292747497559, "rewards/rejected": -0.21760126948356628, "step": 11475 }, { "epoch": 7.936376210235132, "grad_norm": 4.276910305023193, "learning_rate": 1.1464576609804826e-05, "log_odds_chosen": 11.250327110290527, "log_odds_ratio": -3.175999518134631e-05, "logits/chosen": -0.11398166418075562, "logits/rejected": -0.13943475484848022, "logps/chosen": -0.00012849734048359096, "logps/rejected": -2.0981101989746094, "loss": 0.354, "nll_loss": 0.08849801123142242, "rewards/accuracies": 1.0, "rewards/chosen": -1.2849733138864394e-05, "rewards/margins": 0.2097981572151184, "rewards/rejected": -0.2098110169172287, "step": 11476 }, { "epoch": 7.9370677731673585, "grad_norm": 4.719145774841309, "learning_rate": 1.1460734593514677e-05, "log_odds_chosen": 11.08926773071289, "log_odds_ratio": -2.3812872314010747e-05, "logits/chosen": -0.00807216577231884, "logits/rejected": -0.039079755544662476, "logps/chosen": -0.00039097192347981036, "logps/rejected": -2.996049404144287, "loss": 0.853, "nll_loss": 0.21324507892131805, "rewards/accuracies": 1.0, "rewards/chosen": -3.909719453076832e-05, "rewards/margins": 0.2995658218860626, "rewards/rejected": -0.2996049225330353, "step": 11477 }, { "epoch": 7.937759336099585, "grad_norm": 16.17727279663086, "learning_rate": 1.1456892577224527e-05, "log_odds_chosen": 10.801301002502441, "log_odds_ratio": -5.282912024995312e-05, "logits/chosen": -0.3326207399368286, "logits/rejected": -0.3832235634326935, "logps/chosen": -0.0002026814327109605, "logps/rejected": -2.0088541507720947, "loss": 0.4545, "nll_loss": 0.11361575871706009, "rewards/accuracies": 1.0, "rewards/chosen": -2.0268143998691812e-05, "rewards/margins": 0.20086514949798584, "rewards/rejected": -0.20088540017604828, "step": 11478 }, { "epoch": 7.938450899031812, "grad_norm": 5.010890007019043, "learning_rate": 1.145305056093438e-05, "log_odds_chosen": 9.897591590881348, "log_odds_ratio": -0.0007988571305759251, "logits/chosen": -0.5147840976715088, "logits/rejected": -0.4357095956802368, "logps/chosen": -0.0008524827426299453, "logps/rejected": -2.4883193969726562, "loss": 0.6784, "nll_loss": 0.16951148211956024, "rewards/accuracies": 1.0, "rewards/chosen": -8.524827717337757e-05, "rewards/margins": 0.24874672293663025, "rewards/rejected": -0.24883195757865906, "step": 11479 }, { "epoch": 7.939142461964039, "grad_norm": 4.151998996734619, "learning_rate": 1.144920854464423e-05, "log_odds_chosen": 9.914112091064453, "log_odds_ratio": -0.0006920411833561957, "logits/chosen": -0.42303866147994995, "logits/rejected": -0.44322729110717773, "logps/chosen": -0.0007232284406200051, "logps/rejected": -1.6944925785064697, "loss": 0.4771, "nll_loss": 0.11921073496341705, "rewards/accuracies": 1.0, "rewards/chosen": -7.232284406200051e-05, "rewards/margins": 0.169376939535141, "rewards/rejected": -0.16944925487041473, "step": 11480 }, { "epoch": 7.939834024896266, "grad_norm": 4.5900702476501465, "learning_rate": 1.144536652835408e-05, "log_odds_chosen": 11.515774726867676, "log_odds_ratio": -0.0001623444986762479, "logits/chosen": -0.46452170610427856, "logits/rejected": -0.5852384567260742, "logps/chosen": -0.0002695045550353825, "logps/rejected": -3.310964584350586, "loss": 0.4925, "nll_loss": 0.12309806793928146, "rewards/accuracies": 1.0, "rewards/chosen": -2.695045441214461e-05, "rewards/margins": 0.3310695290565491, "rewards/rejected": -0.33109647035598755, "step": 11481 }, { "epoch": 7.940525587828493, "grad_norm": 4.539413928985596, "learning_rate": 1.1441524512063932e-05, "log_odds_chosen": 12.16585922241211, "log_odds_ratio": -3.725421993294731e-05, "logits/chosen": -0.1982676088809967, "logits/rejected": -0.30366623401641846, "logps/chosen": -0.0003364937729202211, "logps/rejected": -3.962510108947754, "loss": 0.5444, "nll_loss": 0.13608859479427338, "rewards/accuracies": 1.0, "rewards/chosen": -3.3649375836830586e-05, "rewards/margins": 0.39621734619140625, "rewards/rejected": -0.39625105261802673, "step": 11482 }, { "epoch": 7.941217150760719, "grad_norm": 5.325148105621338, "learning_rate": 1.1437682495773783e-05, "log_odds_chosen": 10.951173782348633, "log_odds_ratio": -3.835307143162936e-05, "logits/chosen": -0.5989179015159607, "logits/rejected": -0.6143508553504944, "logps/chosen": -0.0007528235437348485, "logps/rejected": -2.59660005569458, "loss": 0.7433, "nll_loss": 0.18583013117313385, "rewards/accuracies": 1.0, "rewards/chosen": -7.528235437348485e-05, "rewards/margins": 0.2595847249031067, "rewards/rejected": -0.259660005569458, "step": 11483 }, { "epoch": 7.941908713692946, "grad_norm": 3.9619648456573486, "learning_rate": 1.1433840479483633e-05, "log_odds_chosen": 10.706184387207031, "log_odds_ratio": -7.730752258794382e-05, "logits/chosen": -0.5238598585128784, "logits/rejected": -0.5505596995353699, "logps/chosen": -0.0002677328302524984, "logps/rejected": -1.9348207712173462, "loss": 0.3211, "nll_loss": 0.08026138693094254, "rewards/accuracies": 1.0, "rewards/chosen": -2.6773284844239242e-05, "rewards/margins": 0.19345532357692719, "rewards/rejected": -0.19348207116127014, "step": 11484 }, { "epoch": 7.942600276625173, "grad_norm": 3.4554922580718994, "learning_rate": 1.1429998463193484e-05, "log_odds_chosen": 10.798149108886719, "log_odds_ratio": -7.70141341490671e-05, "logits/chosen": -0.0674898773431778, "logits/rejected": -0.22109772264957428, "logps/chosen": -0.00019740698917303234, "logps/rejected": -1.794944405555725, "loss": 0.3689, "nll_loss": 0.09222354739904404, "rewards/accuracies": 1.0, "rewards/chosen": -1.9740698917303234e-05, "rewards/margins": 0.17947471141815186, "rewards/rejected": -0.1794944405555725, "step": 11485 }, { "epoch": 7.9432918395574, "grad_norm": 10.894437789916992, "learning_rate": 1.1426156446903335e-05, "log_odds_chosen": 11.126977920532227, "log_odds_ratio": -2.2043872377253138e-05, "logits/chosen": 0.04185080528259277, "logits/rejected": -0.05076335370540619, "logps/chosen": -0.0001953870232682675, "logps/rejected": -2.524343252182007, "loss": 0.5484, "nll_loss": 0.13709130883216858, "rewards/accuracies": 1.0, "rewards/chosen": -1.9538700144039467e-05, "rewards/margins": 0.2524147927761078, "rewards/rejected": -0.25243431329727173, "step": 11486 }, { "epoch": 7.943983402489627, "grad_norm": 3.130251407623291, "learning_rate": 1.1422314430613186e-05, "log_odds_chosen": 11.048722267150879, "log_odds_ratio": -2.3887419956736267e-05, "logits/chosen": 0.43735024333000183, "logits/rejected": 0.42700478434562683, "logps/chosen": -0.00016354123363271356, "logps/rejected": -2.004218816757202, "loss": 0.3407, "nll_loss": 0.08516620099544525, "rewards/accuracies": 1.0, "rewards/chosen": -1.6354124454664998e-05, "rewards/margins": 0.2004055380821228, "rewards/rejected": -0.20042188465595245, "step": 11487 }, { "epoch": 7.944674965421854, "grad_norm": 3.934866428375244, "learning_rate": 1.1418472414323038e-05, "log_odds_chosen": 11.085222244262695, "log_odds_ratio": -4.9376852985005826e-05, "logits/chosen": -0.007519755512475967, "logits/rejected": -0.07623255252838135, "logps/chosen": -9.085766214411706e-05, "logps/rejected": -2.0956532955169678, "loss": 0.4641, "nll_loss": 0.11601592600345612, "rewards/accuracies": 1.0, "rewards/chosen": -9.085766578209586e-06, "rewards/margins": 0.20955625176429749, "rewards/rejected": -0.20956535637378693, "step": 11488 }, { "epoch": 7.94536652835408, "grad_norm": 3.9306583404541016, "learning_rate": 1.1414630398032889e-05, "log_odds_chosen": 9.938104629516602, "log_odds_ratio": -0.00024783535627648234, "logits/chosen": -0.18199634552001953, "logits/rejected": -0.19213847815990448, "logps/chosen": -0.000775124819483608, "logps/rejected": -1.7354786396026611, "loss": 0.4551, "nll_loss": 0.11375893652439117, "rewards/accuracies": 1.0, "rewards/chosen": -7.751248631393537e-05, "rewards/margins": 0.17347033321857452, "rewards/rejected": -0.17354784905910492, "step": 11489 }, { "epoch": 7.946058091286307, "grad_norm": 5.256687164306641, "learning_rate": 1.141078838174274e-05, "log_odds_chosen": 11.401453018188477, "log_odds_ratio": -2.7039030101150274e-05, "logits/chosen": -0.0590481162071228, "logits/rejected": -0.1423584222793579, "logps/chosen": -0.00030657826573587954, "logps/rejected": -2.694973945617676, "loss": 0.498, "nll_loss": 0.12448520213365555, "rewards/accuracies": 1.0, "rewards/chosen": -3.0657829483971e-05, "rewards/margins": 0.269466757774353, "rewards/rejected": -0.2694973945617676, "step": 11490 }, { "epoch": 7.946749654218534, "grad_norm": 4.1328582763671875, "learning_rate": 1.140694636545259e-05, "log_odds_chosen": 9.960987091064453, "log_odds_ratio": -9.83233330771327e-05, "logits/chosen": -0.2254234403371811, "logits/rejected": -0.3161728084087372, "logps/chosen": -0.0005620485171675682, "logps/rejected": -1.6886545419692993, "loss": 0.3165, "nll_loss": 0.07911548018455505, "rewards/accuracies": 1.0, "rewards/chosen": -5.620485171675682e-05, "rewards/margins": 0.16880926489830017, "rewards/rejected": -0.16886545717716217, "step": 11491 }, { "epoch": 7.947441217150761, "grad_norm": 5.220170497894287, "learning_rate": 1.1403104349162441e-05, "log_odds_chosen": 9.741656303405762, "log_odds_ratio": -0.00037368456833064556, "logits/chosen": 0.35090339183807373, "logits/rejected": 0.47903770208358765, "logps/chosen": -0.0005976616521365941, "logps/rejected": -1.4262577295303345, "loss": 0.5657, "nll_loss": 0.14137572050094604, "rewards/accuracies": 1.0, "rewards/chosen": -5.976616375846788e-05, "rewards/margins": 0.1425660103559494, "rewards/rejected": -0.14262576401233673, "step": 11492 }, { "epoch": 7.948132780082988, "grad_norm": 5.341917514801025, "learning_rate": 1.1399262332872292e-05, "log_odds_chosen": 11.33029842376709, "log_odds_ratio": -7.657535024918616e-05, "logits/chosen": -0.37551289796829224, "logits/rejected": -0.34889504313468933, "logps/chosen": -0.0002113444497808814, "logps/rejected": -2.9321506023406982, "loss": 0.4439, "nll_loss": 0.11097773909568787, "rewards/accuracies": 1.0, "rewards/chosen": -2.113444497808814e-05, "rewards/margins": 0.2931939363479614, "rewards/rejected": -0.2932150661945343, "step": 11493 }, { "epoch": 7.948824343015215, "grad_norm": 3.6530838012695312, "learning_rate": 1.1395420316582144e-05, "log_odds_chosen": 11.197941780090332, "log_odds_ratio": -5.698140012100339e-05, "logits/chosen": -0.18177399039268494, "logits/rejected": -0.34647101163864136, "logps/chosen": -0.000269897049292922, "logps/rejected": -2.4861698150634766, "loss": 0.3696, "nll_loss": 0.09239768236875534, "rewards/accuracies": 1.0, "rewards/chosen": -2.698970456549432e-05, "rewards/margins": 0.24859000742435455, "rewards/rejected": -0.2486169934272766, "step": 11494 }, { "epoch": 7.949515905947441, "grad_norm": 4.900040149688721, "learning_rate": 1.1391578300291993e-05, "log_odds_chosen": 11.157360076904297, "log_odds_ratio": -2.2148213247419335e-05, "logits/chosen": -0.04906994849443436, "logits/rejected": -0.3092145323753357, "logps/chosen": -0.0004341888125054538, "logps/rejected": -2.4475364685058594, "loss": 0.5372, "nll_loss": 0.1343008279800415, "rewards/accuracies": 1.0, "rewards/chosen": -4.3418887798907235e-05, "rewards/margins": 0.24471025168895721, "rewards/rejected": -0.2447536587715149, "step": 11495 }, { "epoch": 7.950207468879668, "grad_norm": 8.374804496765137, "learning_rate": 1.1387736284001844e-05, "log_odds_chosen": 10.95303726196289, "log_odds_ratio": -2.8403139367583208e-05, "logits/chosen": -0.3107652962207794, "logits/rejected": -0.33756834268569946, "logps/chosen": -0.00012624255032278597, "logps/rejected": -1.8674782514572144, "loss": 0.5196, "nll_loss": 0.12989729642868042, "rewards/accuracies": 1.0, "rewards/chosen": -1.2624254850379657e-05, "rewards/margins": 0.18673519790172577, "rewards/rejected": -0.18674781918525696, "step": 11496 }, { "epoch": 7.950899031811895, "grad_norm": 3.8882741928100586, "learning_rate": 1.1383894267711695e-05, "log_odds_chosen": 10.171780586242676, "log_odds_ratio": -0.0004873498110100627, "logits/chosen": -0.41246718168258667, "logits/rejected": -0.4909096956253052, "logps/chosen": -0.0005409514997154474, "logps/rejected": -2.4792728424072266, "loss": 0.4081, "nll_loss": 0.10198444873094559, "rewards/accuracies": 1.0, "rewards/chosen": -5.4095151426736265e-05, "rewards/margins": 0.2478732019662857, "rewards/rejected": -0.24792727828025818, "step": 11497 }, { "epoch": 7.951590594744122, "grad_norm": 3.493499994277954, "learning_rate": 1.1380052251421547e-05, "log_odds_chosen": 10.759477615356445, "log_odds_ratio": -0.0001155830395873636, "logits/chosen": -0.48726120591163635, "logits/rejected": -0.47793930768966675, "logps/chosen": -0.00021786931029055268, "logps/rejected": -2.2457547187805176, "loss": 0.3185, "nll_loss": 0.07962332665920258, "rewards/accuracies": 1.0, "rewards/chosen": -2.1786931029055268e-05, "rewards/margins": 0.22455370426177979, "rewards/rejected": -0.224575474858284, "step": 11498 }, { "epoch": 7.952282157676349, "grad_norm": 9.014684677124023, "learning_rate": 1.1376210235131398e-05, "log_odds_chosen": 10.160914421081543, "log_odds_ratio": -0.00027200212934985757, "logits/chosen": 0.3355557322502136, "logits/rejected": 0.2384757101535797, "logps/chosen": -0.0005482410779222846, "logps/rejected": -1.730358362197876, "loss": 0.7167, "nll_loss": 0.17913565039634705, "rewards/accuracies": 1.0, "rewards/chosen": -5.482410779222846e-05, "rewards/margins": 0.17298100888729095, "rewards/rejected": -0.17303583025932312, "step": 11499 }, { "epoch": 7.9529737206085755, "grad_norm": 4.0301032066345215, "learning_rate": 1.1372368218841247e-05, "log_odds_chosen": 11.374445915222168, "log_odds_ratio": -5.591554509010166e-05, "logits/chosen": -0.13466550409793854, "logits/rejected": -0.2583548128604889, "logps/chosen": -0.00019826041534543037, "logps/rejected": -2.5781736373901367, "loss": 0.4475, "nll_loss": 0.11187275499105453, "rewards/accuracies": 1.0, "rewards/chosen": -1.98260422621388e-05, "rewards/margins": 0.2577975392341614, "rewards/rejected": -0.2578173875808716, "step": 11500 }, { "epoch": 7.953665283540802, "grad_norm": 5.895437717437744, "learning_rate": 1.13685262025511e-05, "log_odds_chosen": 10.141064643859863, "log_odds_ratio": -0.00096508814021945, "logits/chosen": -0.25293049216270447, "logits/rejected": -0.2667233347892761, "logps/chosen": -0.001047789235599339, "logps/rejected": -2.1944336891174316, "loss": 0.3557, "nll_loss": 0.08882546424865723, "rewards/accuracies": 1.0, "rewards/chosen": -0.00010477892647031695, "rewards/margins": 0.21933861076831818, "rewards/rejected": -0.21944338083267212, "step": 11501 }, { "epoch": 7.954356846473029, "grad_norm": 5.355554580688477, "learning_rate": 1.136468418626095e-05, "log_odds_chosen": 11.279321670532227, "log_odds_ratio": -0.0001632093481021002, "logits/chosen": -0.2464003562927246, "logits/rejected": -0.24605363607406616, "logps/chosen": -0.00020973285427317023, "logps/rejected": -2.0839450359344482, "loss": 0.5622, "nll_loss": 0.1405428946018219, "rewards/accuracies": 1.0, "rewards/chosen": -2.0973287973902188e-05, "rewards/margins": 0.2083735316991806, "rewards/rejected": -0.20839449763298035, "step": 11502 }, { "epoch": 7.955048409405256, "grad_norm": 5.61575174331665, "learning_rate": 1.1360842169970801e-05, "log_odds_chosen": 10.429574966430664, "log_odds_ratio": -0.00010323894093744457, "logits/chosen": -0.3633131980895996, "logits/rejected": -0.46927785873413086, "logps/chosen": -0.0003013019450008869, "logps/rejected": -1.9691195487976074, "loss": 0.6195, "nll_loss": 0.15487197041511536, "rewards/accuracies": 1.0, "rewards/chosen": -3.013019340869505e-05, "rewards/margins": 0.19688181579113007, "rewards/rejected": -0.19691196084022522, "step": 11503 }, { "epoch": 7.955739972337483, "grad_norm": 5.102035045623779, "learning_rate": 1.1357000153680652e-05, "log_odds_chosen": 10.669812202453613, "log_odds_ratio": -3.363552241353318e-05, "logits/chosen": 0.0006479024887084961, "logits/rejected": -0.08887091279029846, "logps/chosen": -0.0002845787676051259, "logps/rejected": -2.112110137939453, "loss": 0.5355, "nll_loss": 0.13386455178260803, "rewards/accuracies": 1.0, "rewards/chosen": -2.8457874577725306e-05, "rewards/margins": 0.21118253469467163, "rewards/rejected": -0.21121101081371307, "step": 11504 }, { "epoch": 7.95643153526971, "grad_norm": 6.779682159423828, "learning_rate": 1.1353158137390503e-05, "log_odds_chosen": 11.504022598266602, "log_odds_ratio": -3.1273968488676473e-05, "logits/chosen": -0.15041357278823853, "logits/rejected": -0.26987728476524353, "logps/chosen": -0.0007018953328952193, "logps/rejected": -3.0383384227752686, "loss": 0.8323, "nll_loss": 0.2080681324005127, "rewards/accuracies": 1.0, "rewards/chosen": -7.018954056547955e-05, "rewards/margins": 0.30376365780830383, "rewards/rejected": -0.30383384227752686, "step": 11505 }, { "epoch": 7.9571230982019365, "grad_norm": 3.7123119831085205, "learning_rate": 1.1349316121100353e-05, "log_odds_chosen": 10.654646873474121, "log_odds_ratio": -4.022525536129251e-05, "logits/chosen": -0.7917324900627136, "logits/rejected": -0.7620425820350647, "logps/chosen": -0.00012117061851313338, "logps/rejected": -1.6779022216796875, "loss": 0.3672, "nll_loss": 0.09179645776748657, "rewards/accuracies": 1.0, "rewards/chosen": -1.2117062397010159e-05, "rewards/margins": 0.1677781045436859, "rewards/rejected": -0.1677902340888977, "step": 11506 }, { "epoch": 7.957814661134163, "grad_norm": 5.274533748626709, "learning_rate": 1.1345474104810206e-05, "log_odds_chosen": 11.018901824951172, "log_odds_ratio": -3.798465695581399e-05, "logits/chosen": -0.3230525255203247, "logits/rejected": -0.3524434566497803, "logps/chosen": -0.0002959494886454195, "logps/rejected": -2.0793910026550293, "loss": 0.58, "nll_loss": 0.14500178396701813, "rewards/accuracies": 1.0, "rewards/chosen": -2.959495031973347e-05, "rewards/margins": 0.20790952444076538, "rewards/rejected": -0.20793911814689636, "step": 11507 }, { "epoch": 7.95850622406639, "grad_norm": 3.123476028442383, "learning_rate": 1.1341632088520057e-05, "log_odds_chosen": 10.636898040771484, "log_odds_ratio": -0.00014882789400871843, "logits/chosen": -0.3683475852012634, "logits/rejected": -0.4598306715488434, "logps/chosen": -0.0013843890046700835, "logps/rejected": -2.420956611633301, "loss": 0.2991, "nll_loss": 0.07475338876247406, "rewards/accuracies": 1.0, "rewards/chosen": -0.00013843891792930663, "rewards/margins": 0.24195721745491028, "rewards/rejected": -0.24209564924240112, "step": 11508 }, { "epoch": 7.959197786998617, "grad_norm": 4.339691638946533, "learning_rate": 1.1337790072229906e-05, "log_odds_chosen": 11.068500518798828, "log_odds_ratio": -0.00010646334703778848, "logits/chosen": -0.5828951001167297, "logits/rejected": -0.6474270820617676, "logps/chosen": -0.00015746541612315923, "logps/rejected": -1.9267382621765137, "loss": 0.4962, "nll_loss": 0.12404583394527435, "rewards/accuracies": 1.0, "rewards/chosen": -1.5746541976113804e-05, "rewards/margins": 0.19265806674957275, "rewards/rejected": -0.19267383217811584, "step": 11509 }, { "epoch": 7.959889349930844, "grad_norm": 4.105745792388916, "learning_rate": 1.1333948055939758e-05, "log_odds_chosen": 11.290447235107422, "log_odds_ratio": -2.614833283587359e-05, "logits/chosen": -0.07597924023866653, "logits/rejected": -0.1646428108215332, "logps/chosen": -0.00018273312889505178, "logps/rejected": -2.607384204864502, "loss": 0.5353, "nll_loss": 0.13381241261959076, "rewards/accuracies": 1.0, "rewards/chosen": -1.8273312889505178e-05, "rewards/margins": 0.2607201337814331, "rewards/rejected": -0.26073840260505676, "step": 11510 }, { "epoch": 7.960580912863071, "grad_norm": 5.290945529937744, "learning_rate": 1.1330106039649609e-05, "log_odds_chosen": 11.960050582885742, "log_odds_ratio": -8.36965955386404e-06, "logits/chosen": -0.1975208818912506, "logits/rejected": -0.19062384963035583, "logps/chosen": -9.22078761504963e-05, "logps/rejected": -2.4583163261413574, "loss": 0.4986, "nll_loss": 0.12464690953493118, "rewards/accuracies": 1.0, "rewards/chosen": -9.220788342645392e-06, "rewards/margins": 0.24582241475582123, "rewards/rejected": -0.2458316683769226, "step": 11511 }, { "epoch": 7.9612724757952975, "grad_norm": 4.686517238616943, "learning_rate": 1.132626402335946e-05, "log_odds_chosen": 9.780715942382812, "log_odds_ratio": -0.0007018402102403343, "logits/chosen": -0.33034276962280273, "logits/rejected": -0.3925611078739166, "logps/chosen": -0.0009610651759430766, "logps/rejected": -1.9885149002075195, "loss": 0.4466, "nll_loss": 0.11156748980283737, "rewards/accuracies": 1.0, "rewards/chosen": -9.610652341507375e-05, "rewards/margins": 0.1987553834915161, "rewards/rejected": -0.19885149598121643, "step": 11512 }, { "epoch": 7.961964038727524, "grad_norm": 5.430578231811523, "learning_rate": 1.132242200706931e-05, "log_odds_chosen": 11.476561546325684, "log_odds_ratio": -0.0001910024438984692, "logits/chosen": -0.09294851869344711, "logits/rejected": -0.2029493749141693, "logps/chosen": -0.00015575737052131444, "logps/rejected": -2.6696534156799316, "loss": 0.5014, "nll_loss": 0.1253352165222168, "rewards/accuracies": 1.0, "rewards/chosen": -1.5575737052131444e-05, "rewards/margins": 0.26694974303245544, "rewards/rejected": -0.2669653296470642, "step": 11513 }, { "epoch": 7.962655601659751, "grad_norm": 4.263656139373779, "learning_rate": 1.1318579990779161e-05, "log_odds_chosen": 9.819611549377441, "log_odds_ratio": -0.00027837679954245687, "logits/chosen": -0.3863220810890198, "logits/rejected": -0.32077890634536743, "logps/chosen": -0.00024438605760224164, "logps/rejected": -1.1592479944229126, "loss": 0.2405, "nll_loss": 0.06010923534631729, "rewards/accuracies": 1.0, "rewards/chosen": -2.4438606487819925e-05, "rewards/margins": 0.11590036749839783, "rewards/rejected": -0.11592480540275574, "step": 11514 }, { "epoch": 7.963347164591978, "grad_norm": 3.0869855880737305, "learning_rate": 1.1314737974489012e-05, "log_odds_chosen": 11.091203689575195, "log_odds_ratio": -3.3251842978643253e-05, "logits/chosen": -0.4070943593978882, "logits/rejected": -0.37126943469047546, "logps/chosen": -0.00011688778613461182, "logps/rejected": -2.079681396484375, "loss": 0.2682, "nll_loss": 0.06704328954219818, "rewards/accuracies": 1.0, "rewards/chosen": -1.1688778613461182e-05, "rewards/margins": 0.2079564481973648, "rewards/rejected": -0.20796814560890198, "step": 11515 }, { "epoch": 7.964038727524205, "grad_norm": 4.022365093231201, "learning_rate": 1.1310895958198864e-05, "log_odds_chosen": 11.098390579223633, "log_odds_ratio": -0.00019448368402663618, "logits/chosen": -0.3664087653160095, "logits/rejected": -0.4097881019115448, "logps/chosen": -0.0002823981922119856, "logps/rejected": -2.2339224815368652, "loss": 0.388, "nll_loss": 0.09698802977800369, "rewards/accuracies": 1.0, "rewards/chosen": -2.8239821403985843e-05, "rewards/margins": 0.22336401045322418, "rewards/rejected": -0.22339224815368652, "step": 11516 }, { "epoch": 7.964730290456432, "grad_norm": 3.9575083255767822, "learning_rate": 1.1307053941908715e-05, "log_odds_chosen": 10.273996353149414, "log_odds_ratio": -0.00010742698214016855, "logits/chosen": -0.360485315322876, "logits/rejected": -0.3785179853439331, "logps/chosen": -0.0003668889112304896, "logps/rejected": -1.7709505558013916, "loss": 0.4295, "nll_loss": 0.10736609995365143, "rewards/accuracies": 1.0, "rewards/chosen": -3.6688892578240484e-05, "rewards/margins": 0.1770583689212799, "rewards/rejected": -0.17709505558013916, "step": 11517 }, { "epoch": 7.9654218533886585, "grad_norm": 8.95025634765625, "learning_rate": 1.1303211925618564e-05, "log_odds_chosen": 11.682476997375488, "log_odds_ratio": -3.4601594961714e-05, "logits/chosen": -0.20466911792755127, "logits/rejected": -0.18964964151382446, "logps/chosen": -0.00021859840489923954, "logps/rejected": -2.6859614849090576, "loss": 0.4589, "nll_loss": 0.11471740901470184, "rewards/accuracies": 1.0, "rewards/chosen": -2.1859839762328193e-05, "rewards/margins": 0.2685742974281311, "rewards/rejected": -0.26859617233276367, "step": 11518 }, { "epoch": 7.966113416320885, "grad_norm": 17.208377838134766, "learning_rate": 1.1299369909328416e-05, "log_odds_chosen": 11.69450855255127, "log_odds_ratio": -1.2062869245710317e-05, "logits/chosen": -0.25031498074531555, "logits/rejected": -0.3548222780227661, "logps/chosen": -8.374622120754793e-05, "logps/rejected": -2.2749221324920654, "loss": 0.4229, "nll_loss": 0.10573307424783707, "rewards/accuracies": 1.0, "rewards/chosen": -8.374622666451614e-06, "rewards/margins": 0.2274838387966156, "rewards/rejected": -0.22749219834804535, "step": 11519 }, { "epoch": 7.966804979253112, "grad_norm": 4.069872856140137, "learning_rate": 1.1295527893038267e-05, "log_odds_chosen": 10.876702308654785, "log_odds_ratio": -0.0001423271605744958, "logits/chosen": -0.374772310256958, "logits/rejected": -0.35536468029022217, "logps/chosen": -0.00019855469872709364, "logps/rejected": -2.4102046489715576, "loss": 0.3834, "nll_loss": 0.09582436829805374, "rewards/accuracies": 1.0, "rewards/chosen": -1.9855469872709364e-05, "rewards/margins": 0.24100060760974884, "rewards/rejected": -0.24102044105529785, "step": 11520 }, { "epoch": 7.967496542185339, "grad_norm": 4.0037360191345215, "learning_rate": 1.1291685876748118e-05, "log_odds_chosen": 10.799403190612793, "log_odds_ratio": -6.53384777251631e-05, "logits/chosen": -0.4056221842765808, "logits/rejected": -0.4481501579284668, "logps/chosen": -0.0011118586407974362, "logps/rejected": -3.182638645172119, "loss": 0.4393, "nll_loss": 0.10982559621334076, "rewards/accuracies": 1.0, "rewards/chosen": -0.00011118586553493515, "rewards/margins": 0.31815269589424133, "rewards/rejected": -0.3182638883590698, "step": 11521 }, { "epoch": 7.968188105117566, "grad_norm": 6.190622806549072, "learning_rate": 1.1287843860457969e-05, "log_odds_chosen": 9.207051277160645, "log_odds_ratio": -0.0008154031820595264, "logits/chosen": -0.22212673723697662, "logits/rejected": -0.2690417170524597, "logps/chosen": -0.0015860882122069597, "logps/rejected": -1.5020241737365723, "loss": 0.7381, "nll_loss": 0.18443426489830017, "rewards/accuracies": 1.0, "rewards/chosen": -0.00015860881831031293, "rewards/margins": 0.1500438153743744, "rewards/rejected": -0.1502024084329605, "step": 11522 }, { "epoch": 7.968879668049793, "grad_norm": 3.886345386505127, "learning_rate": 1.128400184416782e-05, "log_odds_chosen": 10.890275955200195, "log_odds_ratio": -9.133804269367829e-05, "logits/chosen": -0.529086172580719, "logits/rejected": -0.5643143057823181, "logps/chosen": -0.00021087020286358893, "logps/rejected": -2.2759876251220703, "loss": 0.7748, "nll_loss": 0.19368351995944977, "rewards/accuracies": 1.0, "rewards/chosen": -2.1087022105348296e-05, "rewards/margins": 0.22757765650749207, "rewards/rejected": -0.22759875655174255, "step": 11523 }, { "epoch": 7.9695712309820195, "grad_norm": 13.72425651550293, "learning_rate": 1.128015982787767e-05, "log_odds_chosen": 12.068330764770508, "log_odds_ratio": -1.0651285265339538e-05, "logits/chosen": -0.16793814301490784, "logits/rejected": -0.34160295128822327, "logps/chosen": -0.00011844123218907043, "logps/rejected": -2.5251269340515137, "loss": 0.6213, "nll_loss": 0.15533268451690674, "rewards/accuracies": 1.0, "rewards/chosen": -1.1844123946502805e-05, "rewards/margins": 0.25250083208084106, "rewards/rejected": -0.25251269340515137, "step": 11524 }, { "epoch": 7.970262793914246, "grad_norm": 5.1223344802856445, "learning_rate": 1.1276317811587523e-05, "log_odds_chosen": 11.770263671875, "log_odds_ratio": -1.9887751477654092e-05, "logits/chosen": -0.21339300274848938, "logits/rejected": -0.23123294115066528, "logps/chosen": -0.00027726683765649796, "logps/rejected": -3.490894079208374, "loss": 0.7112, "nll_loss": 0.17779025435447693, "rewards/accuracies": 1.0, "rewards/chosen": -2.7726682674256153e-05, "rewards/margins": 0.3490616977214813, "rewards/rejected": -0.3490894138813019, "step": 11525 }, { "epoch": 7.970954356846473, "grad_norm": 4.5738091468811035, "learning_rate": 1.1272475795297373e-05, "log_odds_chosen": 11.602575302124023, "log_odds_ratio": -3.434224709053524e-05, "logits/chosen": -0.14012958109378815, "logits/rejected": -0.2598492503166199, "logps/chosen": -0.00012942799367010593, "logps/rejected": -2.3440585136413574, "loss": 0.4871, "nll_loss": 0.12176584452390671, "rewards/accuracies": 1.0, "rewards/chosen": -1.2942798093718011e-05, "rewards/margins": 0.234392911195755, "rewards/rejected": -0.23440584540367126, "step": 11526 }, { "epoch": 7.9716459197787, "grad_norm": 3.709731101989746, "learning_rate": 1.1268633779007222e-05, "log_odds_chosen": 10.255630493164062, "log_odds_ratio": -0.00013107730774208903, "logits/chosen": -0.6991348266601562, "logits/rejected": -0.7393057346343994, "logps/chosen": -0.00039511447539553046, "logps/rejected": -2.147977352142334, "loss": 0.331, "nll_loss": 0.08272667229175568, "rewards/accuracies": 1.0, "rewards/chosen": -3.9511447539553046e-05, "rewards/margins": 0.2147582471370697, "rewards/rejected": -0.2147977650165558, "step": 11527 }, { "epoch": 7.972337482710927, "grad_norm": 4.13797664642334, "learning_rate": 1.1264791762717075e-05, "log_odds_chosen": 9.618062973022461, "log_odds_ratio": -0.00018889813509304076, "logits/chosen": -0.588722288608551, "logits/rejected": -0.5970766544342041, "logps/chosen": -0.00038104315171949565, "logps/rejected": -1.406938910484314, "loss": 0.4677, "nll_loss": 0.11689627170562744, "rewards/accuracies": 1.0, "rewards/chosen": -3.810431371675804e-05, "rewards/margins": 0.1406557857990265, "rewards/rejected": -0.14069388806819916, "step": 11528 }, { "epoch": 7.973029045643154, "grad_norm": 3.487117290496826, "learning_rate": 1.1260949746426926e-05, "log_odds_chosen": 10.02828311920166, "log_odds_ratio": -0.00018125462520401925, "logits/chosen": -0.33877819776535034, "logits/rejected": -0.404274046421051, "logps/chosen": -0.0007607677252963185, "logps/rejected": -1.83676278591156, "loss": 0.2745, "nll_loss": 0.06860494613647461, "rewards/accuracies": 1.0, "rewards/chosen": -7.607677252963185e-05, "rewards/margins": 0.18360021710395813, "rewards/rejected": -0.18367627263069153, "step": 11529 }, { "epoch": 7.9737206085753805, "grad_norm": 3.9003255367279053, "learning_rate": 1.1257107730136776e-05, "log_odds_chosen": 9.846776962280273, "log_odds_ratio": -0.00033953096135519445, "logits/chosen": -0.7263999581336975, "logits/rejected": -0.733031690120697, "logps/chosen": -0.0005431174067780375, "logps/rejected": -1.4025123119354248, "loss": 0.3954, "nll_loss": 0.09882722795009613, "rewards/accuracies": 1.0, "rewards/chosen": -5.431173849501647e-05, "rewards/margins": 0.14019691944122314, "rewards/rejected": -0.14025121927261353, "step": 11530 }, { "epoch": 7.974412171507607, "grad_norm": 4.604633331298828, "learning_rate": 1.1253265713846627e-05, "log_odds_chosen": 11.10637378692627, "log_odds_ratio": -9.243666136171669e-05, "logits/chosen": -0.06658074259757996, "logits/rejected": -0.21274365484714508, "logps/chosen": -0.00020368752302601933, "logps/rejected": -2.292241096496582, "loss": 0.5003, "nll_loss": 0.12505874037742615, "rewards/accuracies": 1.0, "rewards/chosen": -2.0368752302601933e-05, "rewards/margins": 0.2292037308216095, "rewards/rejected": -0.2292240858078003, "step": 11531 }, { "epoch": 7.975103734439834, "grad_norm": 4.5084638595581055, "learning_rate": 1.1249423697556478e-05, "log_odds_chosen": 11.897830963134766, "log_odds_ratio": -1.5846128007979132e-05, "logits/chosen": -0.37234407663345337, "logits/rejected": -0.38094252347946167, "logps/chosen": -0.00034496927401050925, "logps/rejected": -2.6722660064697266, "loss": 0.6085, "nll_loss": 0.15213480591773987, "rewards/accuracies": 1.0, "rewards/chosen": -3.449693031143397e-05, "rewards/margins": 0.26719212532043457, "rewards/rejected": -0.26722660660743713, "step": 11532 }, { "epoch": 7.975795297372061, "grad_norm": 3.4178168773651123, "learning_rate": 1.1245581681266329e-05, "log_odds_chosen": 12.06242561340332, "log_odds_ratio": -1.9183351469109766e-05, "logits/chosen": -0.19393685460090637, "logits/rejected": -0.22952347993850708, "logps/chosen": -8.642624743515626e-05, "logps/rejected": -2.72680926322937, "loss": 0.3382, "nll_loss": 0.08455608785152435, "rewards/accuracies": 1.0, "rewards/chosen": -8.642624379717745e-06, "rewards/margins": 0.27267229557037354, "rewards/rejected": -0.27268096804618835, "step": 11533 }, { "epoch": 7.976486860304288, "grad_norm": 4.270136833190918, "learning_rate": 1.124173966497618e-05, "log_odds_chosen": 12.044026374816895, "log_odds_ratio": -1.63989097927697e-05, "logits/chosen": -0.0951736569404602, "logits/rejected": -0.19301308691501617, "logps/chosen": -0.0001634317304706201, "logps/rejected": -2.8399291038513184, "loss": 0.4765, "nll_loss": 0.11912365257740021, "rewards/accuracies": 1.0, "rewards/chosen": -1.6343172319466248e-05, "rewards/margins": 0.28397655487060547, "rewards/rejected": -0.2839928865432739, "step": 11534 }, { "epoch": 7.977178423236515, "grad_norm": 25.706132888793945, "learning_rate": 1.1237897648686032e-05, "log_odds_chosen": 11.003204345703125, "log_odds_ratio": -0.0006372515927068889, "logits/chosen": -0.24361053109169006, "logits/rejected": -0.17340120673179626, "logps/chosen": -0.0016507849795743823, "logps/rejected": -2.280179500579834, "loss": 0.4292, "nll_loss": 0.10722636431455612, "rewards/accuracies": 1.0, "rewards/chosen": -0.00016507849795743823, "rewards/margins": 0.22785286605358124, "rewards/rejected": -0.22801794111728668, "step": 11535 }, { "epoch": 7.977869986168741, "grad_norm": 5.740812301635742, "learning_rate": 1.1234055632395883e-05, "log_odds_chosen": 10.96840763092041, "log_odds_ratio": -0.00022587321291211993, "logits/chosen": -0.12754343450069427, "logits/rejected": -0.25832250714302063, "logps/chosen": -0.00031263873097486794, "logps/rejected": -2.5789904594421387, "loss": 0.6194, "nll_loss": 0.15482082962989807, "rewards/accuracies": 1.0, "rewards/chosen": -3.1263873097486794e-05, "rewards/margins": 0.2578677833080292, "rewards/rejected": -0.25789904594421387, "step": 11536 }, { "epoch": 7.978561549100968, "grad_norm": 4.151486396789551, "learning_rate": 1.1230213616105732e-05, "log_odds_chosen": 10.758419036865234, "log_odds_ratio": -5.185231566429138e-05, "logits/chosen": -0.14621517062187195, "logits/rejected": -0.19490604102611542, "logps/chosen": -0.0005299575277604163, "logps/rejected": -2.638571262359619, "loss": 0.6106, "nll_loss": 0.15265339612960815, "rewards/accuracies": 1.0, "rewards/chosen": -5.299575423123315e-05, "rewards/margins": 0.2638041377067566, "rewards/rejected": -0.2638571262359619, "step": 11537 }, { "epoch": 7.979253112033195, "grad_norm": 17.210065841674805, "learning_rate": 1.1226371599815584e-05, "log_odds_chosen": 11.015816688537598, "log_odds_ratio": -5.2646784752141684e-05, "logits/chosen": -0.12005338817834854, "logits/rejected": -0.23084372282028198, "logps/chosen": -0.00026947163860313594, "logps/rejected": -1.9214720726013184, "loss": 0.3624, "nll_loss": 0.09059273451566696, "rewards/accuracies": 1.0, "rewards/chosen": -2.6947163860313594e-05, "rewards/margins": 0.1921202689409256, "rewards/rejected": -0.19214721024036407, "step": 11538 }, { "epoch": 7.979944674965422, "grad_norm": 4.7975568771362305, "learning_rate": 1.1222529583525435e-05, "log_odds_chosen": 10.247209548950195, "log_odds_ratio": -0.00016718886035960168, "logits/chosen": -0.5553616881370544, "logits/rejected": -0.7003533840179443, "logps/chosen": -0.0002022799162659794, "logps/rejected": -1.9307901859283447, "loss": 0.499, "nll_loss": 0.12474517524242401, "rewards/accuracies": 1.0, "rewards/chosen": -2.022799162659794e-05, "rewards/margins": 0.1930587887763977, "rewards/rejected": -0.19307900965213776, "step": 11539 }, { "epoch": 7.980636237897649, "grad_norm": 5.050432205200195, "learning_rate": 1.1218687567235286e-05, "log_odds_chosen": 10.200136184692383, "log_odds_ratio": -0.000156211550347507, "logits/chosen": -0.23092946410179138, "logits/rejected": -0.2568545341491699, "logps/chosen": -0.0002525176969356835, "logps/rejected": -1.5876084566116333, "loss": 0.4569, "nll_loss": 0.11419893801212311, "rewards/accuracies": 1.0, "rewards/chosen": -2.5251771148759872e-05, "rewards/margins": 0.15873560309410095, "rewards/rejected": -0.15876084566116333, "step": 11540 }, { "epoch": 7.981327800829876, "grad_norm": 4.645444869995117, "learning_rate": 1.1214845550945136e-05, "log_odds_chosen": 11.110513687133789, "log_odds_ratio": -5.435135244624689e-05, "logits/chosen": -0.19689714908599854, "logits/rejected": -0.21087950468063354, "logps/chosen": -0.00016647031588945538, "logps/rejected": -2.2465925216674805, "loss": 0.4227, "nll_loss": 0.105674147605896, "rewards/accuracies": 1.0, "rewards/chosen": -1.664703268033918e-05, "rewards/margins": 0.22464261949062347, "rewards/rejected": -0.224659264087677, "step": 11541 }, { "epoch": 7.982019363762102, "grad_norm": 4.564971923828125, "learning_rate": 1.1211003534654987e-05, "log_odds_chosen": 11.38237190246582, "log_odds_ratio": -7.819420716259629e-05, "logits/chosen": -0.06043093651533127, "logits/rejected": -0.18942205607891083, "logps/chosen": -0.00024105122429318726, "logps/rejected": -2.574265480041504, "loss": 0.3603, "nll_loss": 0.09007608890533447, "rewards/accuracies": 1.0, "rewards/chosen": -2.4105120246531442e-05, "rewards/margins": 0.2574024498462677, "rewards/rejected": -0.25742655992507935, "step": 11542 }, { "epoch": 7.982710926694329, "grad_norm": 5.620864391326904, "learning_rate": 1.1207161518364838e-05, "log_odds_chosen": 10.171491622924805, "log_odds_ratio": -0.00013046340609434992, "logits/chosen": -0.28354907035827637, "logits/rejected": -0.3111303448677063, "logps/chosen": -0.00016116946062538773, "logps/rejected": -1.7502444982528687, "loss": 0.5933, "nll_loss": 0.14830997586250305, "rewards/accuracies": 1.0, "rewards/chosen": -1.6116946426336654e-05, "rewards/margins": 0.17500832676887512, "rewards/rejected": -0.17502444982528687, "step": 11543 }, { "epoch": 7.983402489626556, "grad_norm": 5.144567966461182, "learning_rate": 1.120331950207469e-05, "log_odds_chosen": 11.948741912841797, "log_odds_ratio": -3.4696993679972365e-05, "logits/chosen": -0.47868379950523376, "logits/rejected": -0.5196173787117004, "logps/chosen": -0.00011416654160711914, "logps/rejected": -2.7108850479125977, "loss": 0.507, "nll_loss": 0.12674714624881744, "rewards/accuracies": 1.0, "rewards/chosen": -1.1416654160711914e-05, "rewards/margins": 0.27107712626457214, "rewards/rejected": -0.27108854055404663, "step": 11544 }, { "epoch": 7.984094052558783, "grad_norm": 5.7527618408203125, "learning_rate": 1.1199477485784541e-05, "log_odds_chosen": 10.331581115722656, "log_odds_ratio": -0.0002976985997520387, "logits/chosen": -0.42527955770492554, "logits/rejected": -0.5214613676071167, "logps/chosen": -0.0005123602459207177, "logps/rejected": -1.9454360008239746, "loss": 0.4936, "nll_loss": 0.12336038053035736, "rewards/accuracies": 1.0, "rewards/chosen": -5.123602750245482e-05, "rewards/margins": 0.19449235498905182, "rewards/rejected": -0.19454360008239746, "step": 11545 }, { "epoch": 7.98478561549101, "grad_norm": 7.524501323699951, "learning_rate": 1.119563546949439e-05, "log_odds_chosen": 10.434951782226562, "log_odds_ratio": -0.00023595344100613147, "logits/chosen": -0.13727258145809174, "logits/rejected": -0.21067550778388977, "logps/chosen": -0.0011036460055038333, "logps/rejected": -2.0079808235168457, "loss": 0.4717, "nll_loss": 0.11789001524448395, "rewards/accuracies": 1.0, "rewards/chosen": -0.0001103645990951918, "rewards/margins": 0.2006877362728119, "rewards/rejected": -0.20079809427261353, "step": 11546 }, { "epoch": 7.985477178423237, "grad_norm": 5.442744731903076, "learning_rate": 1.1191793453204243e-05, "log_odds_chosen": 10.40377426147461, "log_odds_ratio": -0.0002690352266654372, "logits/chosen": -0.4835624098777771, "logits/rejected": -0.5982956886291504, "logps/chosen": -0.0005080453120172024, "logps/rejected": -2.0457310676574707, "loss": 0.6759, "nll_loss": 0.1689365804195404, "rewards/accuracies": 1.0, "rewards/chosen": -5.080453775008209e-05, "rewards/margins": 0.20452231168746948, "rewards/rejected": -0.2045731246471405, "step": 11547 }, { "epoch": 7.986168741355463, "grad_norm": 3.2058820724487305, "learning_rate": 1.1187951436914093e-05, "log_odds_chosen": 10.801993370056152, "log_odds_ratio": -0.00011381193326087669, "logits/chosen": -0.1311768740415573, "logits/rejected": -0.2777857482433319, "logps/chosen": -0.00014527339953929186, "logps/rejected": -2.0049283504486084, "loss": 0.4059, "nll_loss": 0.10146152973175049, "rewards/accuracies": 1.0, "rewards/chosen": -1.4527340681524947e-05, "rewards/margins": 0.20047834515571594, "rewards/rejected": -0.20049285888671875, "step": 11548 }, { "epoch": 7.98686030428769, "grad_norm": 12.101853370666504, "learning_rate": 1.1184109420623944e-05, "log_odds_chosen": 12.284296035766602, "log_odds_ratio": -4.364710912341252e-05, "logits/chosen": -0.0765245109796524, "logits/rejected": -0.23267234861850739, "logps/chosen": -0.00017167633632197976, "logps/rejected": -3.3040640354156494, "loss": 0.4213, "nll_loss": 0.10531854629516602, "rewards/accuracies": 1.0, "rewards/chosen": -1.7167634723591618e-05, "rewards/margins": 0.33038923144340515, "rewards/rejected": -0.33040642738342285, "step": 11549 }, { "epoch": 7.987551867219917, "grad_norm": 6.506622314453125, "learning_rate": 1.1180267404333795e-05, "log_odds_chosen": 10.512768745422363, "log_odds_ratio": -8.126518514472991e-05, "logits/chosen": -0.21105435490608215, "logits/rejected": -0.20511992275714874, "logps/chosen": -0.00044915868784300983, "logps/rejected": -1.9867548942565918, "loss": 0.4599, "nll_loss": 0.11497482657432556, "rewards/accuracies": 1.0, "rewards/chosen": -4.491586878430098e-05, "rewards/margins": 0.19863058626651764, "rewards/rejected": -0.1986754983663559, "step": 11550 }, { "epoch": 7.988243430152144, "grad_norm": 4.189328193664551, "learning_rate": 1.1176425388043646e-05, "log_odds_chosen": 11.506170272827148, "log_odds_ratio": -2.5864368581096642e-05, "logits/chosen": -0.3027508556842804, "logits/rejected": -0.3504965305328369, "logps/chosen": -0.00014351757999975234, "logps/rejected": -2.5394859313964844, "loss": 0.3577, "nll_loss": 0.08942808210849762, "rewards/accuracies": 1.0, "rewards/chosen": -1.4351757272379473e-05, "rewards/margins": 0.25393423438072205, "rewards/rejected": -0.2539485991001129, "step": 11551 }, { "epoch": 7.988934993084371, "grad_norm": 5.709794521331787, "learning_rate": 1.1172583371753496e-05, "log_odds_chosen": 11.556946754455566, "log_odds_ratio": -6.319672684185207e-05, "logits/chosen": -0.22862932085990906, "logits/rejected": -0.19877254962921143, "logps/chosen": -0.0001135072743636556, "logps/rejected": -2.5816869735717773, "loss": 0.5891, "nll_loss": 0.1472797989845276, "rewards/accuracies": 1.0, "rewards/chosen": -1.1350726708769798e-05, "rewards/margins": 0.2581573724746704, "rewards/rejected": -0.2581687271595001, "step": 11552 }, { "epoch": 7.9896265560165975, "grad_norm": 5.9907073974609375, "learning_rate": 1.1168741355463349e-05, "log_odds_chosen": 10.62165641784668, "log_odds_ratio": -9.188729745801538e-05, "logits/chosen": -0.8795843720436096, "logits/rejected": -0.9018745422363281, "logps/chosen": -0.00021737195493187755, "logps/rejected": -2.174501657485962, "loss": 0.4843, "nll_loss": 0.12106455862522125, "rewards/accuracies": 1.0, "rewards/chosen": -2.1737196220783517e-05, "rewards/margins": 0.21742843091487885, "rewards/rejected": -0.21745017170906067, "step": 11553 }, { "epoch": 7.990318118948824, "grad_norm": 4.190145015716553, "learning_rate": 1.11648993391732e-05, "log_odds_chosen": 11.428888320922852, "log_odds_ratio": -5.247345688985661e-05, "logits/chosen": -0.016565974801778793, "logits/rejected": -0.2098257839679718, "logps/chosen": -9.872991358861327e-05, "logps/rejected": -2.324761390686035, "loss": 0.7625, "nll_loss": 0.19062167406082153, "rewards/accuracies": 1.0, "rewards/chosen": -9.87299245025497e-06, "rewards/margins": 0.23246626555919647, "rewards/rejected": -0.232476145029068, "step": 11554 }, { "epoch": 7.991009681881051, "grad_norm": 2.4398210048675537, "learning_rate": 1.1161057322883049e-05, "log_odds_chosen": 11.650123596191406, "log_odds_ratio": -3.244515391997993e-05, "logits/chosen": -0.3551556468009949, "logits/rejected": -0.37677669525146484, "logps/chosen": -0.0009142399067059159, "logps/rejected": -3.184720516204834, "loss": 0.4227, "nll_loss": 0.10568135976791382, "rewards/accuracies": 1.0, "rewards/chosen": -9.142398630501702e-05, "rewards/margins": 0.3183806240558624, "rewards/rejected": -0.3184720277786255, "step": 11555 }, { "epoch": 7.991701244813278, "grad_norm": 5.6060261726379395, "learning_rate": 1.1157215306592901e-05, "log_odds_chosen": 9.99009895324707, "log_odds_ratio": -0.00025115531752817333, "logits/chosen": -0.47244876623153687, "logits/rejected": -0.5255929231643677, "logps/chosen": -0.0007966295816004276, "logps/rejected": -1.9693882465362549, "loss": 0.477, "nll_loss": 0.11922810226678848, "rewards/accuracies": 1.0, "rewards/chosen": -7.96629537944682e-05, "rewards/margins": 0.19685915112495422, "rewards/rejected": -0.19693884253501892, "step": 11556 }, { "epoch": 7.992392807745505, "grad_norm": 5.761258602142334, "learning_rate": 1.1153373290302752e-05, "log_odds_chosen": 10.994587898254395, "log_odds_ratio": -0.00020646223856601864, "logits/chosen": -0.40414923429489136, "logits/rejected": -0.5490624904632568, "logps/chosen": -0.0001353785046376288, "logps/rejected": -2.2475452423095703, "loss": 0.5792, "nll_loss": 0.1447751373052597, "rewards/accuracies": 1.0, "rewards/chosen": -1.3537851373257581e-05, "rewards/margins": 0.22474099695682526, "rewards/rejected": -0.22475454211235046, "step": 11557 }, { "epoch": 7.993084370677732, "grad_norm": 3.093449831008911, "learning_rate": 1.1149531274012602e-05, "log_odds_chosen": 10.990373611450195, "log_odds_ratio": -5.954828520771116e-05, "logits/chosen": -0.8087414503097534, "logits/rejected": -0.8039386868476868, "logps/chosen": -0.0004817190929315984, "logps/rejected": -2.7298450469970703, "loss": 0.3396, "nll_loss": 0.08489243686199188, "rewards/accuracies": 1.0, "rewards/chosen": -4.8171910748351365e-05, "rewards/margins": 0.27293631434440613, "rewards/rejected": -0.27298450469970703, "step": 11558 }, { "epoch": 7.9937759336099585, "grad_norm": 4.10735559463501, "learning_rate": 1.1145689257722453e-05, "log_odds_chosen": 10.438919067382812, "log_odds_ratio": -0.0001241380232386291, "logits/chosen": -0.345724493265152, "logits/rejected": -0.38152217864990234, "logps/chosen": -0.0003483956679701805, "logps/rejected": -2.2857847213745117, "loss": 0.4543, "nll_loss": 0.11357006430625916, "rewards/accuracies": 1.0, "rewards/chosen": -3.48395697074011e-05, "rewards/margins": 0.22854363918304443, "rewards/rejected": -0.22857847809791565, "step": 11559 }, { "epoch": 7.994467496542185, "grad_norm": 6.2611403465271, "learning_rate": 1.1141847241432304e-05, "log_odds_chosen": 11.2708101272583, "log_odds_ratio": -9.53746130107902e-05, "logits/chosen": -0.23702967166900635, "logits/rejected": -0.3147750496864319, "logps/chosen": -0.00017018525977618992, "logps/rejected": -2.328094482421875, "loss": 0.5765, "nll_loss": 0.1441277414560318, "rewards/accuracies": 1.0, "rewards/chosen": -1.7018526705214754e-05, "rewards/margins": 0.23279240727424622, "rewards/rejected": -0.23280943930149078, "step": 11560 }, { "epoch": 7.995159059474412, "grad_norm": 4.762275695800781, "learning_rate": 1.1138005225142155e-05, "log_odds_chosen": 11.396519660949707, "log_odds_ratio": -8.356718171853572e-05, "logits/chosen": -0.6899176239967346, "logits/rejected": -0.7288756370544434, "logps/chosen": -0.00012063531903550029, "logps/rejected": -2.24245023727417, "loss": 0.4872, "nll_loss": 0.12178252637386322, "rewards/accuracies": 1.0, "rewards/chosen": -1.206353226734791e-05, "rewards/margins": 0.22423294186592102, "rewards/rejected": -0.22424501180648804, "step": 11561 }, { "epoch": 7.995850622406639, "grad_norm": 5.18743371963501, "learning_rate": 1.1134163208852007e-05, "log_odds_chosen": 11.00822925567627, "log_odds_ratio": -0.0007812322000972927, "logits/chosen": -0.6710682511329651, "logits/rejected": -0.7657569050788879, "logps/chosen": -0.002024431247264147, "logps/rejected": -2.8388352394104004, "loss": 0.4816, "nll_loss": 0.12031008303165436, "rewards/accuracies": 1.0, "rewards/chosen": -0.00020244311599526554, "rewards/margins": 0.28368109464645386, "rewards/rejected": -0.28388354182243347, "step": 11562 }, { "epoch": 7.996542185338866, "grad_norm": 4.819424152374268, "learning_rate": 1.1130321192561858e-05, "log_odds_chosen": 11.201433181762695, "log_odds_ratio": -3.710830060299486e-05, "logits/chosen": -0.07250084728002548, "logits/rejected": -0.17073743045330048, "logps/chosen": -0.0001514070318080485, "logps/rejected": -2.1362361907958984, "loss": 0.5593, "nll_loss": 0.13982567191123962, "rewards/accuracies": 1.0, "rewards/chosen": -1.514070390840061e-05, "rewards/margins": 0.21360847353935242, "rewards/rejected": -0.21362361311912537, "step": 11563 }, { "epoch": 7.997233748271093, "grad_norm": 4.8403215408325195, "learning_rate": 1.1126479176271707e-05, "log_odds_chosen": 13.158977508544922, "log_odds_ratio": -3.391482550796354e-06, "logits/chosen": -0.4475821852684021, "logits/rejected": -0.48769521713256836, "logps/chosen": -7.10982785676606e-05, "logps/rejected": -3.243312358856201, "loss": 0.4071, "nll_loss": 0.10178689658641815, "rewards/accuracies": 1.0, "rewards/chosen": -7.109828402462881e-06, "rewards/margins": 0.3243241310119629, "rewards/rejected": -0.32433122396469116, "step": 11564 }, { "epoch": 7.9979253112033195, "grad_norm": 5.4192938804626465, "learning_rate": 1.1122637159981558e-05, "log_odds_chosen": 11.397504806518555, "log_odds_ratio": -4.034879748360254e-05, "logits/chosen": -0.5433164834976196, "logits/rejected": -0.5988404154777527, "logps/chosen": -7.839765021344647e-05, "logps/rejected": -2.162886142730713, "loss": 0.4798, "nll_loss": 0.11994602531194687, "rewards/accuracies": 1.0, "rewards/chosen": -7.839765203243587e-06, "rewards/margins": 0.2162807583808899, "rewards/rejected": -0.21628859639167786, "step": 11565 }, { "epoch": 7.998616874135546, "grad_norm": 3.662543535232544, "learning_rate": 1.111879514369141e-05, "log_odds_chosen": 12.392351150512695, "log_odds_ratio": -4.640197403205093e-06, "logits/chosen": -0.6322042346000671, "logits/rejected": -0.6288639903068542, "logps/chosen": -0.0001787141227396205, "logps/rejected": -3.1459133625030518, "loss": 0.5516, "nll_loss": 0.13790521025657654, "rewards/accuracies": 1.0, "rewards/chosen": -1.7871414456749335e-05, "rewards/margins": 0.3145734965801239, "rewards/rejected": -0.31459134817123413, "step": 11566 }, { "epoch": 7.999308437067773, "grad_norm": 8.122488021850586, "learning_rate": 1.1114953127401261e-05, "log_odds_chosen": 10.237462043762207, "log_odds_ratio": -0.00034276203950867057, "logits/chosen": -0.3474234938621521, "logits/rejected": -0.34457603096961975, "logps/chosen": -0.0008376427576877177, "logps/rejected": -2.554757595062256, "loss": 1.0159, "nll_loss": 0.2539416253566742, "rewards/accuracies": 1.0, "rewards/chosen": -8.376428013434634e-05, "rewards/margins": 0.25539201498031616, "rewards/rejected": -0.2554757595062256, "step": 11567 }, { "epoch": 8.0, "grad_norm": 3.228456497192383, "learning_rate": 1.1111111111111112e-05, "log_odds_chosen": 11.486684799194336, "log_odds_ratio": -0.0001133008481701836, "logits/chosen": -0.6037741899490356, "logits/rejected": -0.4470462501049042, "logps/chosen": -0.0001116130079026334, "logps/rejected": -2.2419252395629883, "loss": 0.3898, "nll_loss": 0.09744127839803696, "rewards/accuracies": 1.0, "rewards/chosen": -1.1161301699758042e-05, "rewards/margins": 0.22418135404586792, "rewards/rejected": -0.2241925150156021, "step": 11568 }, { "epoch": 8.000691562932227, "grad_norm": 2.2350728511810303, "learning_rate": 1.1107269094820962e-05, "log_odds_chosen": 10.858540534973145, "log_odds_ratio": -3.378919791430235e-05, "logits/chosen": -0.45352715253829956, "logits/rejected": -0.36443424224853516, "logps/chosen": -0.00014449376612901688, "logps/rejected": -2.170830726623535, "loss": 0.249, "nll_loss": 0.06224460154771805, "rewards/accuracies": 1.0, "rewards/chosen": -1.4449376976699568e-05, "rewards/margins": 0.217068612575531, "rewards/rejected": -0.21708306670188904, "step": 11569 }, { "epoch": 8.001383125864454, "grad_norm": 3.7147507667541504, "learning_rate": 1.1103427078530813e-05, "log_odds_chosen": 12.093175888061523, "log_odds_ratio": -1.1225498383282684e-05, "logits/chosen": -0.36110997200012207, "logits/rejected": -0.3920789659023285, "logps/chosen": -0.00011191416706424206, "logps/rejected": -2.748927116394043, "loss": 0.4191, "nll_loss": 0.10478252172470093, "rewards/accuracies": 1.0, "rewards/chosen": -1.1191416888323147e-05, "rewards/margins": 0.2748815417289734, "rewards/rejected": -0.27489274740219116, "step": 11570 }, { "epoch": 8.00207468879668, "grad_norm": 3.585357427597046, "learning_rate": 1.1099585062240664e-05, "log_odds_chosen": 10.27100944519043, "log_odds_ratio": -0.00012903407332487404, "logits/chosen": -0.7512014508247375, "logits/rejected": -0.7946118116378784, "logps/chosen": -0.000788436271250248, "logps/rejected": -2.2089595794677734, "loss": 0.4822, "nll_loss": 0.12052592635154724, "rewards/accuracies": 1.0, "rewards/chosen": -7.884363731136546e-05, "rewards/margins": 0.22081711888313293, "rewards/rejected": -0.22089596092700958, "step": 11571 }, { "epoch": 8.002766251728907, "grad_norm": 5.002890110015869, "learning_rate": 1.1095743045950516e-05, "log_odds_chosen": 11.039785385131836, "log_odds_ratio": -2.9156521122786216e-05, "logits/chosen": -0.3379184603691101, "logits/rejected": -0.4358202815055847, "logps/chosen": -0.0001397759042447433, "logps/rejected": -1.9672892093658447, "loss": 0.4984, "nll_loss": 0.12460320442914963, "rewards/accuracies": 1.0, "rewards/chosen": -1.3977590242575388e-05, "rewards/margins": 0.19671493768692017, "rewards/rejected": -0.19672891497612, "step": 11572 }, { "epoch": 8.003457814661134, "grad_norm": 4.253098011016846, "learning_rate": 1.1091901029660365e-05, "log_odds_chosen": 11.585793495178223, "log_odds_ratio": -2.382851562288124e-05, "logits/chosen": -0.2774587869644165, "logits/rejected": -0.35317111015319824, "logps/chosen": -0.0001064272946678102, "logps/rejected": -2.1052513122558594, "loss": 0.4296, "nll_loss": 0.10740029066801071, "rewards/accuracies": 1.0, "rewards/chosen": -1.0642728739185259e-05, "rewards/margins": 0.21051448583602905, "rewards/rejected": -0.21052512526512146, "step": 11573 }, { "epoch": 8.004149377593361, "grad_norm": 6.317983627319336, "learning_rate": 1.1088059013370216e-05, "log_odds_chosen": 10.828872680664062, "log_odds_ratio": -6.566552474396303e-05, "logits/chosen": -0.26200076937675476, "logits/rejected": -0.33439376950263977, "logps/chosen": -0.0003057863796129823, "logps/rejected": -2.421590566635132, "loss": 0.3395, "nll_loss": 0.084869883954525, "rewards/accuracies": 1.0, "rewards/chosen": -3.057863796129823e-05, "rewards/margins": 0.24212849140167236, "rewards/rejected": -0.24215905368328094, "step": 11574 }, { "epoch": 8.004840940525588, "grad_norm": 4.240973949432373, "learning_rate": 1.1084216997080069e-05, "log_odds_chosen": 9.857244491577148, "log_odds_ratio": -0.00043547729728743434, "logits/chosen": -0.46123895049095154, "logits/rejected": -0.5853416323661804, "logps/chosen": -0.00031145193497650325, "logps/rejected": -1.161023736000061, "loss": 0.2557, "nll_loss": 0.06387601792812347, "rewards/accuracies": 1.0, "rewards/chosen": -3.114519495284185e-05, "rewards/margins": 0.11607123166322708, "rewards/rejected": -0.11610237509012222, "step": 11575 }, { "epoch": 8.005532503457815, "grad_norm": 4.478937149047852, "learning_rate": 1.108037498078992e-05, "log_odds_chosen": 12.100873947143555, "log_odds_ratio": -1.816160511225462e-05, "logits/chosen": -0.15364758670330048, "logits/rejected": -0.18345826864242554, "logps/chosen": -0.0001433866418665275, "logps/rejected": -2.913017749786377, "loss": 0.5397, "nll_loss": 0.1349209100008011, "rewards/accuracies": 1.0, "rewards/chosen": -1.4338663277158048e-05, "rewards/margins": 0.2912874221801758, "rewards/rejected": -0.29130178689956665, "step": 11576 }, { "epoch": 8.006224066390041, "grad_norm": 5.591915607452393, "learning_rate": 1.107653296449977e-05, "log_odds_chosen": 10.005435943603516, "log_odds_ratio": -0.0005105708260089159, "logits/chosen": -0.32908353209495544, "logits/rejected": -0.39447811245918274, "logps/chosen": -0.0005771131254732609, "logps/rejected": -1.6710118055343628, "loss": 0.4533, "nll_loss": 0.11326804757118225, "rewards/accuracies": 1.0, "rewards/chosen": -5.771130963694304e-05, "rewards/margins": 0.16704347729682922, "rewards/rejected": -0.1671011745929718, "step": 11577 }, { "epoch": 8.006915629322268, "grad_norm": 3.091831684112549, "learning_rate": 1.107269094820962e-05, "log_odds_chosen": 10.920646667480469, "log_odds_ratio": -0.00010340339940739796, "logits/chosen": -0.3442881107330322, "logits/rejected": -0.4258328676223755, "logps/chosen": -0.00026276629068888724, "logps/rejected": -1.8977267742156982, "loss": 0.3592, "nll_loss": 0.08978812396526337, "rewards/accuracies": 1.0, "rewards/chosen": -2.627663161547389e-05, "rewards/margins": 0.1897464096546173, "rewards/rejected": -0.18977268040180206, "step": 11578 }, { "epoch": 8.007607192254495, "grad_norm": 6.157090663909912, "learning_rate": 1.1068848931919472e-05, "log_odds_chosen": 11.511394500732422, "log_odds_ratio": -6.123785715317354e-05, "logits/chosen": -0.3286557197570801, "logits/rejected": -0.4205593466758728, "logps/chosen": -0.00011942606943193823, "logps/rejected": -2.7054905891418457, "loss": 0.3561, "nll_loss": 0.08902223408222198, "rewards/accuracies": 1.0, "rewards/chosen": -1.1942607670789585e-05, "rewards/margins": 0.2705371379852295, "rewards/rejected": -0.27054905891418457, "step": 11579 }, { "epoch": 8.008298755186722, "grad_norm": 3.5800986289978027, "learning_rate": 1.1065006915629322e-05, "log_odds_chosen": 11.658208847045898, "log_odds_ratio": -2.0560266420943663e-05, "logits/chosen": -0.27686649560928345, "logits/rejected": -0.360891729593277, "logps/chosen": -0.00014447391731664538, "logps/rejected": -2.26849627494812, "loss": 0.4374, "nll_loss": 0.10934217274188995, "rewards/accuracies": 1.0, "rewards/chosen": -1.4447391549765598e-05, "rewards/margins": 0.2268351912498474, "rewards/rejected": -0.22684964537620544, "step": 11580 }, { "epoch": 8.008990318118949, "grad_norm": 3.4170472621917725, "learning_rate": 1.1061164899339175e-05, "log_odds_chosen": 10.878726959228516, "log_odds_ratio": -4.549963341560215e-05, "logits/chosen": -0.6824355125427246, "logits/rejected": -0.683419942855835, "logps/chosen": -0.00018226999964099377, "logps/rejected": -2.0756149291992188, "loss": 0.3822, "nll_loss": 0.09554344415664673, "rewards/accuracies": 1.0, "rewards/chosen": -1.82270014192909e-05, "rewards/margins": 0.2075432538986206, "rewards/rejected": -0.20756149291992188, "step": 11581 }, { "epoch": 8.009681881051176, "grad_norm": 3.8616960048675537, "learning_rate": 1.1057322883049026e-05, "log_odds_chosen": 10.877504348754883, "log_odds_ratio": -4.084506872459315e-05, "logits/chosen": -0.381029337644577, "logits/rejected": -0.5061492323875427, "logps/chosen": -0.00019211815379094332, "logps/rejected": -1.7320504188537598, "loss": 0.4776, "nll_loss": 0.11939746141433716, "rewards/accuracies": 1.0, "rewards/chosen": -1.9211815015296452e-05, "rewards/margins": 0.1731858253479004, "rewards/rejected": -0.17320504784584045, "step": 11582 }, { "epoch": 8.010373443983402, "grad_norm": 2.913362979888916, "learning_rate": 1.1053480866758875e-05, "log_odds_chosen": 11.55898666381836, "log_odds_ratio": -1.3246997696114704e-05, "logits/chosen": 0.10358059406280518, "logits/rejected": -0.024889543652534485, "logps/chosen": -0.00011885564163094386, "logps/rejected": -2.18052339553833, "loss": 0.3903, "nll_loss": 0.09757485240697861, "rewards/accuracies": 1.0, "rewards/chosen": -1.1885564163094386e-05, "rewards/margins": 0.21804048120975494, "rewards/rejected": -0.21805237233638763, "step": 11583 }, { "epoch": 8.01106500691563, "grad_norm": 4.308811664581299, "learning_rate": 1.1049638850468727e-05, "log_odds_chosen": 10.301643371582031, "log_odds_ratio": -9.870291978586465e-05, "logits/chosen": -0.1822156012058258, "logits/rejected": -0.27328962087631226, "logps/chosen": -0.0004364804772194475, "logps/rejected": -1.9258673191070557, "loss": 0.4504, "nll_loss": 0.11259178072214127, "rewards/accuracies": 1.0, "rewards/chosen": -4.364804772194475e-05, "rewards/margins": 0.19254308938980103, "rewards/rejected": -0.192586749792099, "step": 11584 }, { "epoch": 8.011756569847856, "grad_norm": 4.623989582061768, "learning_rate": 1.1045796834178578e-05, "log_odds_chosen": 11.376053810119629, "log_odds_ratio": -2.161969678127207e-05, "logits/chosen": -0.04305008053779602, "logits/rejected": -0.11707738041877747, "logps/chosen": -0.00014561483112629503, "logps/rejected": -2.256037950515747, "loss": 0.6169, "nll_loss": 0.15423165261745453, "rewards/accuracies": 1.0, "rewards/chosen": -1.4561482203134801e-05, "rewards/margins": 0.22558923065662384, "rewards/rejected": -0.22560378909111023, "step": 11585 }, { "epoch": 8.012448132780083, "grad_norm": 2.276597023010254, "learning_rate": 1.1041954817888429e-05, "log_odds_chosen": 10.119415283203125, "log_odds_ratio": -0.0006033480749465525, "logits/chosen": -0.6786028742790222, "logits/rejected": -0.6111587285995483, "logps/chosen": -0.0020572494249790907, "logps/rejected": -1.9731730222702026, "loss": 0.2592, "nll_loss": 0.06475097686052322, "rewards/accuracies": 1.0, "rewards/chosen": -0.0002057249512290582, "rewards/margins": 0.1971115618944168, "rewards/rejected": -0.19731728732585907, "step": 11586 }, { "epoch": 8.01313969571231, "grad_norm": 4.214800834655762, "learning_rate": 1.103811280159828e-05, "log_odds_chosen": 9.672806739807129, "log_odds_ratio": -0.00034011263051070273, "logits/chosen": 0.0970701351761818, "logits/rejected": -0.09030620008707047, "logps/chosen": -0.0003722485271282494, "logps/rejected": -1.773955225944519, "loss": 0.9936, "nll_loss": 0.24835583567619324, "rewards/accuracies": 1.0, "rewards/chosen": -3.7224854168016464e-05, "rewards/margins": 0.17735828459262848, "rewards/rejected": -0.1773955225944519, "step": 11587 }, { "epoch": 8.013831258644537, "grad_norm": 3.8448140621185303, "learning_rate": 1.103427078530813e-05, "log_odds_chosen": 12.254363059997559, "log_odds_ratio": -3.8563590351259336e-05, "logits/chosen": -0.16270017623901367, "logits/rejected": -0.21075886487960815, "logps/chosen": -0.00015512807294726372, "logps/rejected": -3.3416941165924072, "loss": 0.5497, "nll_loss": 0.13743028044700623, "rewards/accuracies": 1.0, "rewards/chosen": -1.5512807294726372e-05, "rewards/margins": 0.3341538906097412, "rewards/rejected": -0.3341693878173828, "step": 11588 }, { "epoch": 8.014522821576763, "grad_norm": 3.889132022857666, "learning_rate": 1.103042876901798e-05, "log_odds_chosen": 9.956747055053711, "log_odds_ratio": -0.0002764341770671308, "logits/chosen": -0.5464695692062378, "logits/rejected": -0.6339938044548035, "logps/chosen": -0.00048362798406742513, "logps/rejected": -1.6153647899627686, "loss": 0.5563, "nll_loss": 0.1390453428030014, "rewards/accuracies": 1.0, "rewards/chosen": -4.8362799134338275e-05, "rewards/margins": 0.1614881306886673, "rewards/rejected": -0.16153648495674133, "step": 11589 }, { "epoch": 8.01521438450899, "grad_norm": 3.216763496398926, "learning_rate": 1.1026586752727833e-05, "log_odds_chosen": 11.178112983703613, "log_odds_ratio": -3.872377419611439e-05, "logits/chosen": -0.12826332449913025, "logits/rejected": -0.20491275191307068, "logps/chosen": -0.00033394325873814523, "logps/rejected": -2.4093754291534424, "loss": 0.5163, "nll_loss": 0.1290750652551651, "rewards/accuracies": 1.0, "rewards/chosen": -3.339432805660181e-05, "rewards/margins": 0.24090415239334106, "rewards/rejected": -0.24093753099441528, "step": 11590 }, { "epoch": 8.015905947441217, "grad_norm": 3.4779105186462402, "learning_rate": 1.1022744736437684e-05, "log_odds_chosen": 9.287093162536621, "log_odds_ratio": -0.00034964943188242614, "logits/chosen": -0.42299336194992065, "logits/rejected": -0.4615306258201599, "logps/chosen": -0.0003623334923759103, "logps/rejected": -1.330190658569336, "loss": 0.3426, "nll_loss": 0.08560502529144287, "rewards/accuracies": 1.0, "rewards/chosen": -3.6233352147974074e-05, "rewards/margins": 0.13298282027244568, "rewards/rejected": -0.13301905989646912, "step": 11591 }, { "epoch": 8.016597510373444, "grad_norm": 3.7570581436157227, "learning_rate": 1.1018902720147533e-05, "log_odds_chosen": 10.141767501831055, "log_odds_ratio": -0.0001211938142660074, "logits/chosen": -0.6092631816864014, "logits/rejected": -0.642663300037384, "logps/chosen": -0.0007956069894134998, "logps/rejected": -1.4864659309387207, "loss": 0.3201, "nll_loss": 0.08002170920372009, "rewards/accuracies": 1.0, "rewards/chosen": -7.956070476211607e-05, "rewards/margins": 0.1485670506954193, "rewards/rejected": -0.14864660799503326, "step": 11592 }, { "epoch": 8.01728907330567, "grad_norm": 4.438507556915283, "learning_rate": 1.1015060703857385e-05, "log_odds_chosen": 11.716581344604492, "log_odds_ratio": -4.336609708843753e-05, "logits/chosen": -0.5998443365097046, "logits/rejected": -0.6375141739845276, "logps/chosen": -0.00041599958785809577, "logps/rejected": -2.9002466201782227, "loss": 0.4021, "nll_loss": 0.10051152110099792, "rewards/accuracies": 1.0, "rewards/chosen": -4.15999602410011e-05, "rewards/margins": 0.2899830639362335, "rewards/rejected": -0.29002466797828674, "step": 11593 }, { "epoch": 8.017980636237898, "grad_norm": 5.6682000160217285, "learning_rate": 1.1011218687567236e-05, "log_odds_chosen": 10.26571273803711, "log_odds_ratio": -0.00014328473480418324, "logits/chosen": -0.3404668867588043, "logits/rejected": -0.18539004027843475, "logps/chosen": -0.00023215243709273636, "logps/rejected": -1.9225413799285889, "loss": 0.5967, "nll_loss": 0.14915236830711365, "rewards/accuracies": 1.0, "rewards/chosen": -2.3215243345475756e-05, "rewards/margins": 0.1922309398651123, "rewards/rejected": -0.19225415587425232, "step": 11594 }, { "epoch": 8.018672199170124, "grad_norm": 5.593873977661133, "learning_rate": 1.1007376671277087e-05, "log_odds_chosen": 9.951410293579102, "log_odds_ratio": -9.344021964352578e-05, "logits/chosen": -0.2194986343383789, "logits/rejected": -0.21711423993110657, "logps/chosen": -0.00020417847554199398, "logps/rejected": -1.3823903799057007, "loss": 0.4061, "nll_loss": 0.1015174463391304, "rewards/accuracies": 1.0, "rewards/chosen": -2.041784864559304e-05, "rewards/margins": 0.13821862637996674, "rewards/rejected": -0.1382390409708023, "step": 11595 }, { "epoch": 8.019363762102351, "grad_norm": 7.501003742218018, "learning_rate": 1.1003534654986938e-05, "log_odds_chosen": 10.992705345153809, "log_odds_ratio": -5.0389258831273764e-05, "logits/chosen": -0.22574585676193237, "logits/rejected": -0.3487429618835449, "logps/chosen": -0.000133042354718782, "logps/rejected": -2.1215598583221436, "loss": 0.2911, "nll_loss": 0.07276320457458496, "rewards/accuracies": 1.0, "rewards/chosen": -1.3304234016686678e-05, "rewards/margins": 0.212142676115036, "rewards/rejected": -0.21215596795082092, "step": 11596 }, { "epoch": 8.020055325034578, "grad_norm": 5.872107982635498, "learning_rate": 1.0999692638696788e-05, "log_odds_chosen": 11.554754257202148, "log_odds_ratio": -4.173180786892772e-05, "logits/chosen": -0.061662398278713226, "logits/rejected": -0.3210103213787079, "logps/chosen": -0.0003299083618912846, "logps/rejected": -2.537262439727783, "loss": 0.5086, "nll_loss": 0.12715497612953186, "rewards/accuracies": 1.0, "rewards/chosen": -3.299083618912846e-05, "rewards/margins": 0.25369325280189514, "rewards/rejected": -0.25372621417045593, "step": 11597 }, { "epoch": 8.020746887966805, "grad_norm": 6.822277069091797, "learning_rate": 1.099585062240664e-05, "log_odds_chosen": 11.166653633117676, "log_odds_ratio": -5.960985799902119e-05, "logits/chosen": -0.6631227135658264, "logits/rejected": -0.6951757669448853, "logps/chosen": -0.00016014205175451934, "logps/rejected": -2.3508334159851074, "loss": 0.2911, "nll_loss": 0.07277705520391464, "rewards/accuracies": 1.0, "rewards/chosen": -1.6014204447856173e-05, "rewards/margins": 0.23506733775138855, "rewards/rejected": -0.23508334159851074, "step": 11598 }, { "epoch": 8.021438450899032, "grad_norm": 4.478630065917969, "learning_rate": 1.099200860611649e-05, "log_odds_chosen": 10.090757369995117, "log_odds_ratio": -0.0001920961367432028, "logits/chosen": -0.14601203799247742, "logits/rejected": -0.1455862820148468, "logps/chosen": -0.0005073798238299787, "logps/rejected": -1.910022258758545, "loss": 0.375, "nll_loss": 0.09372103214263916, "rewards/accuracies": 1.0, "rewards/chosen": -5.0737984565785155e-05, "rewards/margins": 0.19095146656036377, "rewards/rejected": -0.19100221991539001, "step": 11599 }, { "epoch": 8.022130013831259, "grad_norm": 4.48646879196167, "learning_rate": 1.0988166589826342e-05, "log_odds_chosen": 10.446651458740234, "log_odds_ratio": -0.0001280968717765063, "logits/chosen": -0.07451249659061432, "logits/rejected": -0.23958072066307068, "logps/chosen": -0.0002140738070011139, "logps/rejected": -1.84334397315979, "loss": 0.47, "nll_loss": 0.11749942600727081, "rewards/accuracies": 1.0, "rewards/chosen": -2.140738070011139e-05, "rewards/margins": 0.18431299924850464, "rewards/rejected": -0.184334397315979, "step": 11600 }, { "epoch": 8.022821576763485, "grad_norm": 4.529657363891602, "learning_rate": 1.0984324573536191e-05, "log_odds_chosen": 10.769058227539062, "log_odds_ratio": -0.00019904434157069772, "logits/chosen": -0.4270152747631073, "logits/rejected": -0.3482168912887573, "logps/chosen": -0.0003587648388929665, "logps/rejected": -1.9207621812820435, "loss": 0.4951, "nll_loss": 0.12376239895820618, "rewards/accuracies": 1.0, "rewards/chosen": -3.587648461689241e-05, "rewards/margins": 0.1920403391122818, "rewards/rejected": -0.19207622110843658, "step": 11601 }, { "epoch": 8.023513139695712, "grad_norm": 3.555155038833618, "learning_rate": 1.0980482557246042e-05, "log_odds_chosen": 11.555574417114258, "log_odds_ratio": -2.083612525893841e-05, "logits/chosen": -0.5681159496307373, "logits/rejected": -0.6570307612419128, "logps/chosen": -9.780601249076426e-05, "logps/rejected": -2.4376511573791504, "loss": 0.3922, "nll_loss": 0.09804081916809082, "rewards/accuracies": 1.0, "rewards/chosen": -9.780602340470068e-06, "rewards/margins": 0.24375534057617188, "rewards/rejected": -0.24376511573791504, "step": 11602 }, { "epoch": 8.024204702627939, "grad_norm": 4.330643177032471, "learning_rate": 1.0976640540955895e-05, "log_odds_chosen": 9.720133781433105, "log_odds_ratio": -0.00024881906574591994, "logits/chosen": -0.27257040143013, "logits/rejected": -0.4565042555332184, "logps/chosen": -0.00029421134968288243, "logps/rejected": -1.327944040298462, "loss": 0.5498, "nll_loss": 0.13742215931415558, "rewards/accuracies": 1.0, "rewards/chosen": -2.9421134968288243e-05, "rewards/margins": 0.13276498019695282, "rewards/rejected": -0.13279439508914948, "step": 11603 }, { "epoch": 8.024896265560166, "grad_norm": 3.577317476272583, "learning_rate": 1.0972798524665745e-05, "log_odds_chosen": 12.49842643737793, "log_odds_ratio": -9.71730059973197e-06, "logits/chosen": -0.5571168065071106, "logits/rejected": -0.5459440350532532, "logps/chosen": -0.00013112853048369288, "logps/rejected": -3.0554847717285156, "loss": 0.5315, "nll_loss": 0.132865309715271, "rewards/accuracies": 1.0, "rewards/chosen": -1.3112854503560811e-05, "rewards/margins": 0.30553534626960754, "rewards/rejected": -0.30554845929145813, "step": 11604 }, { "epoch": 8.025587828492393, "grad_norm": 3.6991682052612305, "learning_rate": 1.0968956508375596e-05, "log_odds_chosen": 10.687396049499512, "log_odds_ratio": -6.707415741402656e-05, "logits/chosen": -0.47288778424263, "logits/rejected": -0.5687136054039001, "logps/chosen": -0.0001317668065894395, "logps/rejected": -1.7559115886688232, "loss": 0.4231, "nll_loss": 0.10575664043426514, "rewards/accuracies": 1.0, "rewards/chosen": -1.3176681022741832e-05, "rewards/margins": 0.17557796835899353, "rewards/rejected": -0.1755911409854889, "step": 11605 }, { "epoch": 8.02627939142462, "grad_norm": 3.841230630874634, "learning_rate": 1.0965114492085447e-05, "log_odds_chosen": 11.022579193115234, "log_odds_ratio": -9.507785580353811e-05, "logits/chosen": -0.3446550965309143, "logits/rejected": -0.4020662009716034, "logps/chosen": -0.000423995777964592, "logps/rejected": -2.345393657684326, "loss": 0.4796, "nll_loss": 0.11990146338939667, "rewards/accuracies": 1.0, "rewards/chosen": -4.2399580706842244e-05, "rewards/margins": 0.23449698090553284, "rewards/rejected": -0.23453938961029053, "step": 11606 }, { "epoch": 8.026970954356846, "grad_norm": 3.552696704864502, "learning_rate": 1.0961272475795298e-05, "log_odds_chosen": 9.273900032043457, "log_odds_ratio": -0.0002788471174426377, "logits/chosen": -0.521811306476593, "logits/rejected": -0.546200156211853, "logps/chosen": -0.00033644909854047, "logps/rejected": -1.2243123054504395, "loss": 0.3082, "nll_loss": 0.07701878994703293, "rewards/accuracies": 1.0, "rewards/chosen": -3.364490476087667e-05, "rewards/margins": 0.12239758670330048, "rewards/rejected": -0.12243123352527618, "step": 11607 }, { "epoch": 8.027662517289073, "grad_norm": 5.155054569244385, "learning_rate": 1.0957430459505148e-05, "log_odds_chosen": 10.779470443725586, "log_odds_ratio": -3.2731564715504646e-05, "logits/chosen": -0.2122465968132019, "logits/rejected": -0.29279085993766785, "logps/chosen": -0.00015573952987324446, "logps/rejected": -1.9129401445388794, "loss": 0.4411, "nll_loss": 0.11027791351079941, "rewards/accuracies": 1.0, "rewards/chosen": -1.557395444251597e-05, "rewards/margins": 0.19127842783927917, "rewards/rejected": -0.19129401445388794, "step": 11608 }, { "epoch": 8.0283540802213, "grad_norm": 3.937246799468994, "learning_rate": 1.0953588443215e-05, "log_odds_chosen": 11.833447456359863, "log_odds_ratio": -9.855545067694038e-05, "logits/chosen": -0.40071818232536316, "logits/rejected": -0.5153113007545471, "logps/chosen": -0.00011262780026299879, "logps/rejected": -2.7883224487304688, "loss": 0.4929, "nll_loss": 0.12320946156978607, "rewards/accuracies": 1.0, "rewards/chosen": -1.126278039009776e-05, "rewards/margins": 0.2788209617137909, "rewards/rejected": -0.27883222699165344, "step": 11609 }, { "epoch": 8.029045643153527, "grad_norm": 2.6097638607025146, "learning_rate": 1.094974642692485e-05, "log_odds_chosen": 10.482734680175781, "log_odds_ratio": -0.0008700615144334733, "logits/chosen": -0.10049141943454742, "logits/rejected": -0.0984073355793953, "logps/chosen": -0.0010803381446748972, "logps/rejected": -2.508087396621704, "loss": 0.2925, "nll_loss": 0.0730450376868248, "rewards/accuracies": 1.0, "rewards/chosen": -0.00010803381883306429, "rewards/margins": 0.2507007122039795, "rewards/rejected": -0.2508087754249573, "step": 11610 }, { "epoch": 8.029737206085754, "grad_norm": 2.400596857070923, "learning_rate": 1.09459044106347e-05, "log_odds_chosen": 10.24705982208252, "log_odds_ratio": -8.93109681783244e-05, "logits/chosen": -0.3052368462085724, "logits/rejected": -0.37610214948654175, "logps/chosen": -0.0007598986267112195, "logps/rejected": -1.868806004524231, "loss": 0.271, "nll_loss": 0.06773454695940018, "rewards/accuracies": 1.0, "rewards/chosen": -7.598986121593043e-05, "rewards/margins": 0.1868046224117279, "rewards/rejected": -0.18688060343265533, "step": 11611 }, { "epoch": 8.03042876901798, "grad_norm": 4.1927971839904785, "learning_rate": 1.0942062394344553e-05, "log_odds_chosen": 11.035076141357422, "log_odds_ratio": -4.5136461267247796e-05, "logits/chosen": -0.28807422518730164, "logits/rejected": -0.36113497614860535, "logps/chosen": -0.00015464363968931139, "logps/rejected": -2.043996810913086, "loss": 0.5135, "nll_loss": 0.12838131189346313, "rewards/accuracies": 1.0, "rewards/chosen": -1.5464365787920542e-05, "rewards/margins": 0.2043842226266861, "rewards/rejected": -0.20439967513084412, "step": 11612 }, { "epoch": 8.031120331950207, "grad_norm": 4.350894451141357, "learning_rate": 1.0938220378054404e-05, "log_odds_chosen": 9.81594467163086, "log_odds_ratio": -0.04112391918897629, "logits/chosen": -0.26520606875419617, "logits/rejected": -0.19644969701766968, "logps/chosen": -0.007886898703873158, "logps/rejected": -2.090251922607422, "loss": 0.422, "nll_loss": 0.10137651860713959, "rewards/accuracies": 1.0, "rewards/chosen": -0.000788689823821187, "rewards/margins": 0.20823650062084198, "rewards/rejected": -0.20902520418167114, "step": 11613 }, { "epoch": 8.031811894882434, "grad_norm": 9.433053970336914, "learning_rate": 1.0934378361764255e-05, "log_odds_chosen": 10.647979736328125, "log_odds_ratio": -8.212029933929443e-05, "logits/chosen": -0.6354914903640747, "logits/rejected": -0.6734293103218079, "logps/chosen": -0.00031259743263944983, "logps/rejected": -2.2240993976593018, "loss": 0.4741, "nll_loss": 0.11851217597723007, "rewards/accuracies": 1.0, "rewards/chosen": -3.125974035356194e-05, "rewards/margins": 0.222378671169281, "rewards/rejected": -0.2224099338054657, "step": 11614 }, { "epoch": 8.032503457814661, "grad_norm": 3.092810869216919, "learning_rate": 1.0930536345474105e-05, "log_odds_chosen": 11.151169776916504, "log_odds_ratio": -0.0006351221818476915, "logits/chosen": -0.5462251901626587, "logits/rejected": -0.5252736806869507, "logps/chosen": -0.006568577140569687, "logps/rejected": -3.098931312561035, "loss": 0.6262, "nll_loss": 0.1564791053533554, "rewards/accuracies": 1.0, "rewards/chosen": -0.000656857795547694, "rewards/margins": 0.3092362582683563, "rewards/rejected": -0.3098931312561035, "step": 11615 }, { "epoch": 8.033195020746888, "grad_norm": 2.6533544063568115, "learning_rate": 1.0926694329183956e-05, "log_odds_chosen": 9.996108055114746, "log_odds_ratio": -0.00024949904764071107, "logits/chosen": -0.19736546277999878, "logits/rejected": -0.3198302388191223, "logps/chosen": -0.00030988510116003454, "logps/rejected": -1.627938985824585, "loss": 0.262, "nll_loss": 0.06546853482723236, "rewards/accuracies": 1.0, "rewards/chosen": -3.098850720562041e-05, "rewards/margins": 0.16276292502880096, "rewards/rejected": -0.16279390454292297, "step": 11616 }, { "epoch": 8.033886583679115, "grad_norm": 4.698807716369629, "learning_rate": 1.0922852312893807e-05, "log_odds_chosen": 11.338987350463867, "log_odds_ratio": -0.00014085850853007287, "logits/chosen": -0.3571838438510895, "logits/rejected": -0.34271275997161865, "logps/chosen": -0.0004037006292492151, "logps/rejected": -2.928548574447632, "loss": 0.5348, "nll_loss": 0.1336820274591446, "rewards/accuracies": 1.0, "rewards/chosen": -4.0370068745687604e-05, "rewards/margins": 0.2928144931793213, "rewards/rejected": -0.2928548753261566, "step": 11617 }, { "epoch": 8.034578146611342, "grad_norm": 4.942789077758789, "learning_rate": 1.091901029660366e-05, "log_odds_chosen": 11.588337898254395, "log_odds_ratio": -3.798071702476591e-05, "logits/chosen": -0.36027687788009644, "logits/rejected": -0.5589436292648315, "logps/chosen": -0.00031201704405248165, "logps/rejected": -2.9800496101379395, "loss": 0.4361, "nll_loss": 0.1090322956442833, "rewards/accuracies": 1.0, "rewards/chosen": -3.12017000396736e-05, "rewards/margins": 0.29797375202178955, "rewards/rejected": -0.29800495505332947, "step": 11618 }, { "epoch": 8.035269709543568, "grad_norm": 4.74745512008667, "learning_rate": 1.0915168280313508e-05, "log_odds_chosen": 11.68956184387207, "log_odds_ratio": -3.099131572525948e-05, "logits/chosen": -0.10821807384490967, "logits/rejected": -0.2176653891801834, "logps/chosen": -0.0001303794706473127, "logps/rejected": -2.342233657836914, "loss": 0.3688, "nll_loss": 0.09219465404748917, "rewards/accuracies": 1.0, "rewards/chosen": -1.3037947610428091e-05, "rewards/margins": 0.23421034216880798, "rewards/rejected": -0.2342233657836914, "step": 11619 }, { "epoch": 8.035961272475795, "grad_norm": 4.976463317871094, "learning_rate": 1.0911326264023359e-05, "log_odds_chosen": 10.253108024597168, "log_odds_ratio": -0.0005621587042696774, "logits/chosen": -0.11551451683044434, "logits/rejected": -0.1854763776063919, "logps/chosen": -0.0005382780218496919, "logps/rejected": -1.9607380628585815, "loss": 0.3469, "nll_loss": 0.08666163682937622, "rewards/accuracies": 1.0, "rewards/chosen": -5.382780364016071e-05, "rewards/margins": 0.1960199624300003, "rewards/rejected": -0.19607380032539368, "step": 11620 }, { "epoch": 8.036652835408022, "grad_norm": 4.694047451019287, "learning_rate": 1.0907484247733212e-05, "log_odds_chosen": 10.585967063903809, "log_odds_ratio": -0.0002062514831777662, "logits/chosen": -0.21767069399356842, "logits/rejected": -0.3120375871658325, "logps/chosen": -0.00042118871351704, "logps/rejected": -2.0939784049987793, "loss": 0.3806, "nll_loss": 0.0951206386089325, "rewards/accuracies": 1.0, "rewards/chosen": -4.211887426208705e-05, "rewards/margins": 0.20935575664043427, "rewards/rejected": -0.20939785242080688, "step": 11621 }, { "epoch": 8.037344398340249, "grad_norm": 3.249535083770752, "learning_rate": 1.0903642231443062e-05, "log_odds_chosen": 11.063423156738281, "log_odds_ratio": -9.601256169844419e-05, "logits/chosen": 0.0025239139795303345, "logits/rejected": 0.01908857375383377, "logps/chosen": -0.00028602141537703574, "logps/rejected": -2.2366976737976074, "loss": 0.5118, "nll_loss": 0.12793602049350739, "rewards/accuracies": 1.0, "rewards/chosen": -2.8602142265299335e-05, "rewards/margins": 0.22364118695259094, "rewards/rejected": -0.22366978228092194, "step": 11622 }, { "epoch": 8.038035961272476, "grad_norm": 4.789508819580078, "learning_rate": 1.0899800215152913e-05, "log_odds_chosen": 10.925281524658203, "log_odds_ratio": -0.00011090299813076854, "logits/chosen": 0.2372182309627533, "logits/rejected": 0.08504534512758255, "logps/chosen": -0.00030619502649642527, "logps/rejected": -2.3250906467437744, "loss": 0.5231, "nll_loss": 0.13077004253864288, "rewards/accuracies": 1.0, "rewards/chosen": -3.061950337723829e-05, "rewards/margins": 0.23247846961021423, "rewards/rejected": -0.2325090765953064, "step": 11623 }, { "epoch": 8.038727524204702, "grad_norm": 3.4549219608306885, "learning_rate": 1.0895958198862764e-05, "log_odds_chosen": 11.175882339477539, "log_odds_ratio": -3.460890002315864e-05, "logits/chosen": 0.020357206463813782, "logits/rejected": -0.09108800441026688, "logps/chosen": -0.0002967551117762923, "logps/rejected": -2.847583770751953, "loss": 0.3917, "nll_loss": 0.09792071580886841, "rewards/accuracies": 1.0, "rewards/chosen": -2.9675511541427113e-05, "rewards/margins": 0.2847287058830261, "rewards/rejected": -0.2847583591938019, "step": 11624 }, { "epoch": 8.03941908713693, "grad_norm": 9.085065841674805, "learning_rate": 1.0892116182572615e-05, "log_odds_chosen": 11.052441596984863, "log_odds_ratio": -3.7479036109289154e-05, "logits/chosen": -0.21325267851352692, "logits/rejected": -0.2510700225830078, "logps/chosen": -0.00045145026524551213, "logps/rejected": -2.528874635696411, "loss": 0.3844, "nll_loss": 0.09608972817659378, "rewards/accuracies": 1.0, "rewards/chosen": -4.51450287073385e-05, "rewards/margins": 0.25284233689308167, "rewards/rejected": -0.25288745760917664, "step": 11625 }, { "epoch": 8.040110650069156, "grad_norm": 7.780580997467041, "learning_rate": 1.0888274166282465e-05, "log_odds_chosen": 11.271392822265625, "log_odds_ratio": -1.960910958587192e-05, "logits/chosen": -0.30678537487983704, "logits/rejected": -0.32438284158706665, "logps/chosen": -0.00010147874127142131, "logps/rejected": -1.9354403018951416, "loss": 0.3865, "nll_loss": 0.09662692248821259, "rewards/accuracies": 1.0, "rewards/chosen": -1.0147874490940012e-05, "rewards/margins": 0.19353388249874115, "rewards/rejected": -0.19354403018951416, "step": 11626 }, { "epoch": 8.040802213001383, "grad_norm": 2.9626879692077637, "learning_rate": 1.0884432149992318e-05, "log_odds_chosen": 10.288896560668945, "log_odds_ratio": -0.00017442651733290404, "logits/chosen": 0.11750302463769913, "logits/rejected": 0.08454018831253052, "logps/chosen": -0.00024051466607488692, "logps/rejected": -1.6254515647888184, "loss": 0.2825, "nll_loss": 0.07061274349689484, "rewards/accuracies": 1.0, "rewards/chosen": -2.4051467335084453e-05, "rewards/margins": 0.1625211089849472, "rewards/rejected": -0.16254517436027527, "step": 11627 }, { "epoch": 8.04149377593361, "grad_norm": 3.7291109561920166, "learning_rate": 1.0880590133702168e-05, "log_odds_chosen": 10.420429229736328, "log_odds_ratio": -7.376746361842379e-05, "logits/chosen": 0.1731017827987671, "logits/rejected": 0.1459406614303589, "logps/chosen": -0.0007401591865345836, "logps/rejected": -1.845339059829712, "loss": 0.4723, "nll_loss": 0.11805613338947296, "rewards/accuracies": 1.0, "rewards/chosen": -7.401593029499054e-05, "rewards/margins": 0.1844598799943924, "rewards/rejected": -0.18453389406204224, "step": 11628 }, { "epoch": 8.042185338865837, "grad_norm": 4.236103534698486, "learning_rate": 1.0876748117412018e-05, "log_odds_chosen": 10.347618103027344, "log_odds_ratio": -7.569259469164535e-05, "logits/chosen": 0.058949798345565796, "logits/rejected": 0.04264757037162781, "logps/chosen": -0.0009270127629861236, "logps/rejected": -2.4099678993225098, "loss": 0.5477, "nll_loss": 0.13691957294940948, "rewards/accuracies": 1.0, "rewards/chosen": -9.270127338822931e-05, "rewards/margins": 0.2409040927886963, "rewards/rejected": -0.24099679291248322, "step": 11629 }, { "epoch": 8.042876901798063, "grad_norm": 4.982583045959473, "learning_rate": 1.087290610112187e-05, "log_odds_chosen": 11.669830322265625, "log_odds_ratio": -2.4999659217428416e-05, "logits/chosen": -0.3383077085018158, "logits/rejected": -0.3936065435409546, "logps/chosen": -0.0004458319745026529, "logps/rejected": -3.0715909004211426, "loss": 0.5125, "nll_loss": 0.12813270092010498, "rewards/accuracies": 1.0, "rewards/chosen": -4.4583201088244095e-05, "rewards/margins": 0.30711451172828674, "rewards/rejected": -0.3071591258049011, "step": 11630 }, { "epoch": 8.04356846473029, "grad_norm": 4.099587440490723, "learning_rate": 1.086906408483172e-05, "log_odds_chosen": 10.979185104370117, "log_odds_ratio": -0.000795874570030719, "logits/chosen": -0.07905671745538712, "logits/rejected": -0.26628753542900085, "logps/chosen": -0.0004789835074916482, "logps/rejected": -2.364879846572876, "loss": 0.3142, "nll_loss": 0.07848026603460312, "rewards/accuracies": 1.0, "rewards/chosen": -4.78983492939733e-05, "rewards/margins": 0.23644009232521057, "rewards/rejected": -0.2364879995584488, "step": 11631 }, { "epoch": 8.044260027662517, "grad_norm": 7.567932605743408, "learning_rate": 1.0865222068541571e-05, "log_odds_chosen": 11.234302520751953, "log_odds_ratio": -2.353546369704418e-05, "logits/chosen": -0.24551396071910858, "logits/rejected": -0.1941598355770111, "logps/chosen": -0.00015664119564462453, "logps/rejected": -2.4318954944610596, "loss": 0.4114, "nll_loss": 0.10283748060464859, "rewards/accuracies": 1.0, "rewards/chosen": -1.5664119928260334e-05, "rewards/margins": 0.24317388236522675, "rewards/rejected": -0.24318955838680267, "step": 11632 }, { "epoch": 8.044951590594744, "grad_norm": 4.48465633392334, "learning_rate": 1.0861380052251422e-05, "log_odds_chosen": 11.189987182617188, "log_odds_ratio": -6.808263424318284e-05, "logits/chosen": -0.851357638835907, "logits/rejected": -0.8038666248321533, "logps/chosen": -0.0001597387745277956, "logps/rejected": -2.1322896480560303, "loss": 0.419, "nll_loss": 0.10473240911960602, "rewards/accuracies": 1.0, "rewards/chosen": -1.5973879271768965e-05, "rewards/margins": 0.2132129967212677, "rewards/rejected": -0.2132289707660675, "step": 11633 }, { "epoch": 8.04564315352697, "grad_norm": 3.913274049758911, "learning_rate": 1.0857538035961273e-05, "log_odds_chosen": 12.231964111328125, "log_odds_ratio": -1.1904116945515852e-05, "logits/chosen": -0.18402579426765442, "logits/rejected": -0.18470388650894165, "logps/chosen": -0.00018358533270657063, "logps/rejected": -2.8793082237243652, "loss": 0.3135, "nll_loss": 0.07838185131549835, "rewards/accuracies": 1.0, "rewards/chosen": -1.8358534362050705e-05, "rewards/margins": 0.2879124879837036, "rewards/rejected": -0.28793084621429443, "step": 11634 }, { "epoch": 8.046334716459198, "grad_norm": 7.306576728820801, "learning_rate": 1.0853696019671124e-05, "log_odds_chosen": 11.634334564208984, "log_odds_ratio": -1.9609975424828008e-05, "logits/chosen": -0.11489503085613251, "logits/rejected": -0.18257029354572296, "logps/chosen": -0.0002044460125034675, "logps/rejected": -2.74857234954834, "loss": 0.5027, "nll_loss": 0.125673308968544, "rewards/accuracies": 1.0, "rewards/chosen": -2.044460052275099e-05, "rewards/margins": 0.27483680844306946, "rewards/rejected": -0.2748572528362274, "step": 11635 }, { "epoch": 8.047026279391424, "grad_norm": 7.451847076416016, "learning_rate": 1.0849854003380974e-05, "log_odds_chosen": 11.932010650634766, "log_odds_ratio": -5.420656452770345e-05, "logits/chosen": -0.017487986013293266, "logits/rejected": -0.21278372406959534, "logps/chosen": -0.00030589461675845087, "logps/rejected": -3.496171474456787, "loss": 0.6613, "nll_loss": 0.1653130054473877, "rewards/accuracies": 1.0, "rewards/chosen": -3.058946458622813e-05, "rewards/margins": 0.349586546421051, "rewards/rejected": -0.3496171236038208, "step": 11636 }, { "epoch": 8.047717842323651, "grad_norm": 4.815349102020264, "learning_rate": 1.0846011987090827e-05, "log_odds_chosen": 9.847448348999023, "log_odds_ratio": -0.0005425678100436926, "logits/chosen": -0.18823321163654327, "logits/rejected": -0.3128855526447296, "logps/chosen": -0.0012195882154628634, "logps/rejected": -2.4391961097717285, "loss": 0.5404, "nll_loss": 0.13504886627197266, "rewards/accuracies": 1.0, "rewards/chosen": -0.00012195881572552025, "rewards/margins": 0.2437976449728012, "rewards/rejected": -0.24391961097717285, "step": 11637 }, { "epoch": 8.048409405255878, "grad_norm": 3.5785138607025146, "learning_rate": 1.0842169970800676e-05, "log_odds_chosen": 10.383599281311035, "log_odds_ratio": -0.00015447475016117096, "logits/chosen": -0.44339680671691895, "logits/rejected": -0.4116237759590149, "logps/chosen": -0.00036477501271292567, "logps/rejected": -1.862234354019165, "loss": 0.4705, "nll_loss": 0.11760586500167847, "rewards/accuracies": 1.0, "rewards/chosen": -3.647750418167561e-05, "rewards/margins": 0.18618696928024292, "rewards/rejected": -0.18622344732284546, "step": 11638 }, { "epoch": 8.049100968188105, "grad_norm": 4.362621784210205, "learning_rate": 1.0838327954510527e-05, "log_odds_chosen": 10.884719848632812, "log_odds_ratio": -5.54381767869927e-05, "logits/chosen": -0.3651423156261444, "logits/rejected": -0.37535524368286133, "logps/chosen": -0.0001369999663438648, "logps/rejected": -1.898221731185913, "loss": 0.54, "nll_loss": 0.13500377535820007, "rewards/accuracies": 1.0, "rewards/chosen": -1.3699996088689659e-05, "rewards/margins": 0.18980847299098969, "rewards/rejected": -0.18982218205928802, "step": 11639 }, { "epoch": 8.049792531120332, "grad_norm": 3.383798837661743, "learning_rate": 1.0834485938220379e-05, "log_odds_chosen": 11.25387191772461, "log_odds_ratio": -8.179190626833588e-05, "logits/chosen": -0.4336949586868286, "logits/rejected": -0.5522239804267883, "logps/chosen": -0.00037895014975219965, "logps/rejected": -2.568528652191162, "loss": 0.2993, "nll_loss": 0.07481571286916733, "rewards/accuracies": 1.0, "rewards/chosen": -3.789501715800725e-05, "rewards/margins": 0.25681498646736145, "rewards/rejected": -0.2568528652191162, "step": 11640 }, { "epoch": 8.050484094052559, "grad_norm": 2.910865306854248, "learning_rate": 1.083064392193023e-05, "log_odds_chosen": 9.588981628417969, "log_odds_ratio": -0.0007639298564754426, "logits/chosen": 0.09794837981462479, "logits/rejected": -0.042546890676021576, "logps/chosen": -0.0018523625330999494, "logps/rejected": -1.5158963203430176, "loss": 0.2927, "nll_loss": 0.07309460639953613, "rewards/accuracies": 1.0, "rewards/chosen": -0.00018523624748922884, "rewards/margins": 0.15140441060066223, "rewards/rejected": -0.15158963203430176, "step": 11641 }, { "epoch": 8.051175656984785, "grad_norm": 2.5796313285827637, "learning_rate": 1.082680190564008e-05, "log_odds_chosen": 9.901168823242188, "log_odds_ratio": -9.957759903045371e-05, "logits/chosen": -0.4514090418815613, "logits/rejected": -0.5148862600326538, "logps/chosen": -0.00025001997710205615, "logps/rejected": -1.448334813117981, "loss": 0.2457, "nll_loss": 0.06141233444213867, "rewards/accuracies": 1.0, "rewards/chosen": -2.5001998437801376e-05, "rewards/margins": 0.14480847120285034, "rewards/rejected": -0.14483347535133362, "step": 11642 }, { "epoch": 8.051867219917012, "grad_norm": 3.1253342628479004, "learning_rate": 1.0822959889349931e-05, "log_odds_chosen": 9.646997451782227, "log_odds_ratio": -0.0008379703504033387, "logits/chosen": -0.213422030210495, "logits/rejected": -0.14800477027893066, "logps/chosen": -0.0015627527609467506, "logps/rejected": -1.773625373840332, "loss": 0.2836, "nll_loss": 0.07080502808094025, "rewards/accuracies": 1.0, "rewards/chosen": -0.00015627527318429202, "rewards/margins": 0.17720626294612885, "rewards/rejected": -0.17736253142356873, "step": 11643 }, { "epoch": 8.052558782849239, "grad_norm": 4.140681266784668, "learning_rate": 1.0819117873059782e-05, "log_odds_chosen": 11.40479564666748, "log_odds_ratio": -0.000390512403100729, "logits/chosen": -0.0796457827091217, "logits/rejected": -0.1556369662284851, "logps/chosen": -0.0013447202509269118, "logps/rejected": -3.1734018325805664, "loss": 0.3751, "nll_loss": 0.09372792392969131, "rewards/accuracies": 1.0, "rewards/chosen": -0.00013447203673422337, "rewards/margins": 0.3172057271003723, "rewards/rejected": -0.3173401951789856, "step": 11644 }, { "epoch": 8.053250345781466, "grad_norm": 4.3886919021606445, "learning_rate": 1.0815275856769633e-05, "log_odds_chosen": 11.26523494720459, "log_odds_ratio": -4.273475133231841e-05, "logits/chosen": -0.4076208174228668, "logits/rejected": -0.5760940909385681, "logps/chosen": -0.00014619230933021754, "logps/rejected": -2.109987258911133, "loss": 0.276, "nll_loss": 0.06899204105138779, "rewards/accuracies": 1.0, "rewards/chosen": -1.4619231478718575e-05, "rewards/margins": 0.21098411083221436, "rewards/rejected": -0.21099872887134552, "step": 11645 }, { "epoch": 8.053941908713693, "grad_norm": 6.297887802124023, "learning_rate": 1.0811433840479485e-05, "log_odds_chosen": 11.587589263916016, "log_odds_ratio": -1.1658419680316001e-05, "logits/chosen": -0.5171922445297241, "logits/rejected": -0.5611253976821899, "logps/chosen": -8.005322160897776e-05, "logps/rejected": -2.2642323970794678, "loss": 0.5005, "nll_loss": 0.12512831389904022, "rewards/accuracies": 1.0, "rewards/chosen": -8.005322342796717e-06, "rewards/margins": 0.2264152467250824, "rewards/rejected": -0.2264232635498047, "step": 11646 }, { "epoch": 8.05463347164592, "grad_norm": 4.215669631958008, "learning_rate": 1.0807591824189334e-05, "log_odds_chosen": 11.566927909851074, "log_odds_ratio": -2.2225858629099093e-05, "logits/chosen": -0.3866409957408905, "logits/rejected": -0.3898853659629822, "logps/chosen": -0.0001798073499230668, "logps/rejected": -2.6164865493774414, "loss": 0.3943, "nll_loss": 0.09856823086738586, "rewards/accuracies": 1.0, "rewards/chosen": -1.798073571990244e-05, "rewards/margins": 0.26163071393966675, "rewards/rejected": -0.26164865493774414, "step": 11647 }, { "epoch": 8.055325034578146, "grad_norm": 5.187557697296143, "learning_rate": 1.0803749807899185e-05, "log_odds_chosen": 10.51015567779541, "log_odds_ratio": -6.061392923584208e-05, "logits/chosen": -0.3999212682247162, "logits/rejected": -0.41192543506622314, "logps/chosen": -0.00026394022279419005, "logps/rejected": -2.035250663757324, "loss": 0.4287, "nll_loss": 0.10716467350721359, "rewards/accuracies": 1.0, "rewards/chosen": -2.6394021915621124e-05, "rewards/margins": 0.2034986913204193, "rewards/rejected": -0.20352506637573242, "step": 11648 }, { "epoch": 8.056016597510373, "grad_norm": 4.2619547843933105, "learning_rate": 1.0799907791609038e-05, "log_odds_chosen": 11.489641189575195, "log_odds_ratio": -5.318426701705903e-05, "logits/chosen": -0.5062676668167114, "logits/rejected": -0.6034945845603943, "logps/chosen": -0.00047905201790854335, "logps/rejected": -2.9666566848754883, "loss": 0.432, "nll_loss": 0.108005091547966, "rewards/accuracies": 1.0, "rewards/chosen": -4.790520324604586e-05, "rewards/margins": 0.2966177463531494, "rewards/rejected": -0.29666566848754883, "step": 11649 }, { "epoch": 8.0567081604426, "grad_norm": 3.2921361923217773, "learning_rate": 1.0796065775318888e-05, "log_odds_chosen": 10.716009140014648, "log_odds_ratio": -5.8777186495717615e-05, "logits/chosen": 0.06899416446685791, "logits/rejected": 0.008984297513961792, "logps/chosen": -0.0003209186252206564, "logps/rejected": -2.0981249809265137, "loss": 0.3706, "nll_loss": 0.09265469014644623, "rewards/accuracies": 1.0, "rewards/chosen": -3.209186252206564e-05, "rewards/margins": 0.20978042483329773, "rewards/rejected": -0.2098124921321869, "step": 11650 }, { "epoch": 8.057399723374827, "grad_norm": 18.162288665771484, "learning_rate": 1.0792223759028739e-05, "log_odds_chosen": 11.46948528289795, "log_odds_ratio": -0.0001158723680418916, "logits/chosen": -0.14668020606040955, "logits/rejected": -0.4037422835826874, "logps/chosen": -0.0009772483026608825, "logps/rejected": -2.8705010414123535, "loss": 0.4352, "nll_loss": 0.10878217220306396, "rewards/accuracies": 1.0, "rewards/chosen": -9.772483463166282e-05, "rewards/margins": 0.2869523763656616, "rewards/rejected": -0.2870500981807709, "step": 11651 }, { "epoch": 8.058091286307054, "grad_norm": 9.27110767364502, "learning_rate": 1.078838174273859e-05, "log_odds_chosen": 11.206290245056152, "log_odds_ratio": -2.393356589891482e-05, "logits/chosen": 0.10491567105054855, "logits/rejected": 0.07313147187232971, "logps/chosen": -0.00015125813661143184, "logps/rejected": -2.252528190612793, "loss": 0.6418, "nll_loss": 0.16045981645584106, "rewards/accuracies": 1.0, "rewards/chosen": -1.5125813661143184e-05, "rewards/margins": 0.22523772716522217, "rewards/rejected": -0.22525283694267273, "step": 11652 }, { "epoch": 8.05878284923928, "grad_norm": 7.424502849578857, "learning_rate": 1.078453972644844e-05, "log_odds_chosen": 10.65821361541748, "log_odds_ratio": -0.00013696661335416138, "logits/chosen": -0.32704007625579834, "logits/rejected": -0.2945024371147156, "logps/chosen": -0.00035628239857032895, "logps/rejected": -2.109651565551758, "loss": 0.4554, "nll_loss": 0.11382496356964111, "rewards/accuracies": 1.0, "rewards/chosen": -3.5628239857032895e-05, "rewards/margins": 0.2109295278787613, "rewards/rejected": -0.21096515655517578, "step": 11653 }, { "epoch": 8.059474412171507, "grad_norm": 3.443453073501587, "learning_rate": 1.0780697710158291e-05, "log_odds_chosen": 10.71337890625, "log_odds_ratio": -5.269534085527994e-05, "logits/chosen": -0.31750330328941345, "logits/rejected": -0.41674119234085083, "logps/chosen": -0.00018927460769191384, "logps/rejected": -2.038935661315918, "loss": 0.4133, "nll_loss": 0.10332353413105011, "rewards/accuracies": 1.0, "rewards/chosen": -1.8927461496787146e-05, "rewards/margins": 0.2038746178150177, "rewards/rejected": -0.20389357209205627, "step": 11654 }, { "epoch": 8.060165975103734, "grad_norm": 5.964850902557373, "learning_rate": 1.0776855693868144e-05, "log_odds_chosen": 11.15042495727539, "log_odds_ratio": -0.0005366685218177736, "logits/chosen": -0.2146221250295639, "logits/rejected": -0.2757033109664917, "logps/chosen": -0.0006667596171610057, "logps/rejected": -2.569218397140503, "loss": 0.4874, "nll_loss": 0.1217995136976242, "rewards/accuracies": 1.0, "rewards/chosen": -6.667595880571753e-05, "rewards/margins": 0.2568551301956177, "rewards/rejected": -0.25692182779312134, "step": 11655 }, { "epoch": 8.060857538035961, "grad_norm": 5.175530433654785, "learning_rate": 1.0773013677577993e-05, "log_odds_chosen": 11.816387176513672, "log_odds_ratio": -1.1562333384063095e-05, "logits/chosen": -0.252761572599411, "logits/rejected": -0.30724823474884033, "logps/chosen": -0.00015155701839830726, "logps/rejected": -2.734372138977051, "loss": 0.4865, "nll_loss": 0.12162323296070099, "rewards/accuracies": 1.0, "rewards/chosen": -1.5155701476032846e-05, "rewards/margins": 0.2734220623970032, "rewards/rejected": -0.2734372317790985, "step": 11656 }, { "epoch": 8.061549100968188, "grad_norm": 4.897181987762451, "learning_rate": 1.0769171661287844e-05, "log_odds_chosen": 12.112031936645508, "log_odds_ratio": -8.372231604880653e-06, "logits/chosen": -0.08842036128044128, "logits/rejected": -0.07480307668447495, "logps/chosen": -0.00013405829668045044, "logps/rejected": -2.833430767059326, "loss": 0.3035, "nll_loss": 0.07588332146406174, "rewards/accuracies": 1.0, "rewards/chosen": -1.3405830941337626e-05, "rewards/margins": 0.28332969546318054, "rewards/rejected": -0.2833430767059326, "step": 11657 }, { "epoch": 8.062240663900415, "grad_norm": 3.7266387939453125, "learning_rate": 1.0765329644997696e-05, "log_odds_chosen": 11.5302095413208, "log_odds_ratio": -0.0002347620902583003, "logits/chosen": -0.3410811126232147, "logits/rejected": -0.37336307764053345, "logps/chosen": -0.0015355985378846526, "logps/rejected": -2.878851890563965, "loss": 0.3232, "nll_loss": 0.08078762888908386, "rewards/accuracies": 1.0, "rewards/chosen": -0.00015355987125076354, "rewards/margins": 0.2877316176891327, "rewards/rejected": -0.2878851890563965, "step": 11658 }, { "epoch": 8.062932226832642, "grad_norm": 7.588277339935303, "learning_rate": 1.0761487628707547e-05, "log_odds_chosen": 10.4693603515625, "log_odds_ratio": -0.0007120502414181828, "logits/chosen": -0.262089341878891, "logits/rejected": -0.2863765358924866, "logps/chosen": -0.0006862751324661076, "logps/rejected": -2.2005093097686768, "loss": 0.5006, "nll_loss": 0.12508998811244965, "rewards/accuracies": 1.0, "rewards/chosen": -6.86275161569938e-05, "rewards/margins": 0.2199822962284088, "rewards/rejected": -0.22005091607570648, "step": 11659 }, { "epoch": 8.063623789764868, "grad_norm": 3.875340223312378, "learning_rate": 1.0757645612417398e-05, "log_odds_chosen": 11.094024658203125, "log_odds_ratio": -0.00012229704589117318, "logits/chosen": -0.7080512046813965, "logits/rejected": -0.675679624080658, "logps/chosen": -0.00026382546639069915, "logps/rejected": -2.685532331466675, "loss": 0.3727, "nll_loss": 0.09317460656166077, "rewards/accuracies": 1.0, "rewards/chosen": -2.638254954945296e-05, "rewards/margins": 0.2685268521308899, "rewards/rejected": -0.2685532569885254, "step": 11660 }, { "epoch": 8.064315352697095, "grad_norm": 5.578856945037842, "learning_rate": 1.0753803596127248e-05, "log_odds_chosen": 11.5849609375, "log_odds_ratio": -4.599348903866485e-05, "logits/chosen": -0.713058590888977, "logits/rejected": -0.7514088153839111, "logps/chosen": -0.00018928774807136506, "logps/rejected": -2.5326950550079346, "loss": 0.4545, "nll_loss": 0.11361332982778549, "rewards/accuracies": 1.0, "rewards/chosen": -1.8928774807136506e-05, "rewards/margins": 0.2532505691051483, "rewards/rejected": -0.2532695233821869, "step": 11661 }, { "epoch": 8.065006915629322, "grad_norm": 3.122255325317383, "learning_rate": 1.0749961579837099e-05, "log_odds_chosen": 10.60268783569336, "log_odds_ratio": -0.0003385727177374065, "logits/chosen": -0.8399189710617065, "logits/rejected": -0.8972233533859253, "logps/chosen": -0.0006615786114707589, "logps/rejected": -1.813633918762207, "loss": 0.3471, "nll_loss": 0.08674322068691254, "rewards/accuracies": 1.0, "rewards/chosen": -6.615785969188437e-05, "rewards/margins": 0.18129724264144897, "rewards/rejected": -0.18136340379714966, "step": 11662 }, { "epoch": 8.065698478561549, "grad_norm": 3.3046131134033203, "learning_rate": 1.074611956354695e-05, "log_odds_chosen": 11.259008407592773, "log_odds_ratio": -0.00023068346490617841, "logits/chosen": -0.5720511674880981, "logits/rejected": -0.6492300033569336, "logps/chosen": -0.00022080892813391984, "logps/rejected": -2.3607053756713867, "loss": 0.3182, "nll_loss": 0.07953804731369019, "rewards/accuracies": 1.0, "rewards/chosen": -2.2080894268583506e-05, "rewards/margins": 0.23604848980903625, "rewards/rejected": -0.23607054352760315, "step": 11663 }, { "epoch": 8.066390041493776, "grad_norm": 3.43304443359375, "learning_rate": 1.0742277547256802e-05, "log_odds_chosen": 10.118446350097656, "log_odds_ratio": -0.00044802669435739517, "logits/chosen": -0.23905214667320251, "logits/rejected": -0.16716815531253815, "logps/chosen": -0.0006772215710952878, "logps/rejected": -2.0158321857452393, "loss": 0.2561, "nll_loss": 0.06397796422243118, "rewards/accuracies": 1.0, "rewards/chosen": -6.772215419914573e-05, "rewards/margins": 0.2015155404806137, "rewards/rejected": -0.20158325135707855, "step": 11664 }, { "epoch": 8.067081604426003, "grad_norm": 7.999740123748779, "learning_rate": 1.0738435530966651e-05, "log_odds_chosen": 11.912637710571289, "log_odds_ratio": -4.0913459088187665e-05, "logits/chosen": -0.07696963846683502, "logits/rejected": -0.15583869814872742, "logps/chosen": -0.00018690910656005144, "logps/rejected": -2.847935676574707, "loss": 1.0301, "nll_loss": 0.2575136423110962, "rewards/accuracies": 1.0, "rewards/chosen": -1.869090920081362e-05, "rewards/margins": 0.28477486968040466, "rewards/rejected": -0.28479355573654175, "step": 11665 }, { "epoch": 8.06777316735823, "grad_norm": 3.754718065261841, "learning_rate": 1.0734593514676502e-05, "log_odds_chosen": 10.807878494262695, "log_odds_ratio": -0.00012588589743245393, "logits/chosen": -0.5574550628662109, "logits/rejected": -0.6791661977767944, "logps/chosen": -0.0003695531631819904, "logps/rejected": -2.385138511657715, "loss": 0.5487, "nll_loss": 0.1371651589870453, "rewards/accuracies": 1.0, "rewards/chosen": -3.6955319956177846e-05, "rewards/margins": 0.23847690224647522, "rewards/rejected": -0.23851384222507477, "step": 11666 }, { "epoch": 8.068464730290456, "grad_norm": 3.555729866027832, "learning_rate": 1.0730751498386353e-05, "log_odds_chosen": 11.32563304901123, "log_odds_ratio": -3.7144818634260446e-05, "logits/chosen": -0.3752025365829468, "logits/rejected": -0.4483998417854309, "logps/chosen": -0.0004869193071499467, "logps/rejected": -2.7447586059570312, "loss": 0.4554, "nll_loss": 0.11384133249521255, "rewards/accuracies": 1.0, "rewards/chosen": -4.869193071499467e-05, "rewards/margins": 0.2744271755218506, "rewards/rejected": -0.2744758725166321, "step": 11667 }, { "epoch": 8.069156293222683, "grad_norm": 3.514678716659546, "learning_rate": 1.0726909482096205e-05, "log_odds_chosen": 9.528676986694336, "log_odds_ratio": -0.0005053476197645068, "logits/chosen": -0.4819263219833374, "logits/rejected": -0.46735963225364685, "logps/chosen": -0.0002845790295396, "logps/rejected": -1.1177395582199097, "loss": 0.3643, "nll_loss": 0.09101204574108124, "rewards/accuracies": 1.0, "rewards/chosen": -2.8457903681555763e-05, "rewards/margins": 0.11174549907445908, "rewards/rejected": -0.11177396774291992, "step": 11668 }, { "epoch": 8.06984785615491, "grad_norm": 11.719605445861816, "learning_rate": 1.0723067465806056e-05, "log_odds_chosen": 10.538747787475586, "log_odds_ratio": -0.00031500202021561563, "logits/chosen": -0.5664748549461365, "logits/rejected": -0.5411901473999023, "logps/chosen": -0.0004703707236330956, "logps/rejected": -2.0027015209198, "loss": 0.4376, "nll_loss": 0.1093745082616806, "rewards/accuracies": 1.0, "rewards/chosen": -4.703707236330956e-05, "rewards/margins": 0.20022311806678772, "rewards/rejected": -0.2002701610326767, "step": 11669 }, { "epoch": 8.070539419087137, "grad_norm": 4.784857273101807, "learning_rate": 1.0719225449515907e-05, "log_odds_chosen": 11.857826232910156, "log_odds_ratio": -5.775447789346799e-05, "logits/chosen": -0.38018327951431274, "logits/rejected": -0.41651451587677, "logps/chosen": -0.0003240426303818822, "logps/rejected": -2.7629175186157227, "loss": 0.549, "nll_loss": 0.13724809885025024, "rewards/accuracies": 1.0, "rewards/chosen": -3.240426303818822e-05, "rewards/margins": 0.2762593626976013, "rewards/rejected": -0.27629178762435913, "step": 11670 }, { "epoch": 8.071230982019364, "grad_norm": 3.8318252563476562, "learning_rate": 1.0715383433225757e-05, "log_odds_chosen": 10.338508605957031, "log_odds_ratio": -7.135741179808974e-05, "logits/chosen": -0.6009719371795654, "logits/rejected": -0.47979089617729187, "logps/chosen": -0.00045763261732645333, "logps/rejected": -2.244736909866333, "loss": 0.3491, "nll_loss": 0.08727248758077621, "rewards/accuracies": 1.0, "rewards/chosen": -4.576325954985805e-05, "rewards/margins": 0.22442790865898132, "rewards/rejected": -0.22447368502616882, "step": 11671 }, { "epoch": 8.07192254495159, "grad_norm": 4.182869911193848, "learning_rate": 1.0711541416935608e-05, "log_odds_chosen": 11.189848899841309, "log_odds_ratio": -0.0003522265760693699, "logits/chosen": -0.47810447216033936, "logits/rejected": -0.5015765428543091, "logps/chosen": -0.0004364237829577178, "logps/rejected": -2.4150683879852295, "loss": 0.409, "nll_loss": 0.10220624506473541, "rewards/accuracies": 1.0, "rewards/chosen": -4.36423797509633e-05, "rewards/margins": 0.24146321415901184, "rewards/rejected": -0.24150684475898743, "step": 11672 }, { "epoch": 8.072614107883817, "grad_norm": 3.8744325637817383, "learning_rate": 1.0707699400645459e-05, "log_odds_chosen": 10.744927406311035, "log_odds_ratio": -0.00014370067219715565, "logits/chosen": -0.34662389755249023, "logits/rejected": -0.30861902236938477, "logps/chosen": -0.00019246491137892008, "logps/rejected": -2.105988025665283, "loss": 0.4266, "nll_loss": 0.1066313087940216, "rewards/accuracies": 1.0, "rewards/chosen": -1.9246490410296246e-05, "rewards/margins": 0.2105795443058014, "rewards/rejected": -0.21059879660606384, "step": 11673 }, { "epoch": 8.073305670816044, "grad_norm": 3.939030885696411, "learning_rate": 1.0703857384355311e-05, "log_odds_chosen": 10.826810836791992, "log_odds_ratio": -9.675160981714725e-05, "logits/chosen": -0.5049489736557007, "logits/rejected": -0.5568135380744934, "logps/chosen": -0.00012390982010401785, "logps/rejected": -1.9049142599105835, "loss": 0.3574, "nll_loss": 0.08934393525123596, "rewards/accuracies": 1.0, "rewards/chosen": -1.2390981282806024e-05, "rewards/margins": 0.19047902524471283, "rewards/rejected": -0.1904914230108261, "step": 11674 }, { "epoch": 8.07399723374827, "grad_norm": 3.718947172164917, "learning_rate": 1.070001536806516e-05, "log_odds_chosen": 12.584778785705566, "log_odds_ratio": -1.6311325452988967e-05, "logits/chosen": -0.4680939018726349, "logits/rejected": -0.4668935239315033, "logps/chosen": -0.0004223872674629092, "logps/rejected": -3.424896240234375, "loss": 0.3788, "nll_loss": 0.09469722211360931, "rewards/accuracies": 1.0, "rewards/chosen": -4.2238727473886684e-05, "rewards/margins": 0.3424473702907562, "rewards/rejected": -0.342489629983902, "step": 11675 }, { "epoch": 8.074688796680498, "grad_norm": 3.831510305404663, "learning_rate": 1.0696173351775011e-05, "log_odds_chosen": 10.228010177612305, "log_odds_ratio": -0.00012409157352522016, "logits/chosen": -0.6946989297866821, "logits/rejected": -0.7984243035316467, "logps/chosen": -0.00046772955101914704, "logps/rejected": -2.4283576011657715, "loss": 0.3698, "nll_loss": 0.09244365990161896, "rewards/accuracies": 1.0, "rewards/chosen": -4.677295873989351e-05, "rewards/margins": 0.24278900027275085, "rewards/rejected": -0.24283577501773834, "step": 11676 }, { "epoch": 8.075380359612724, "grad_norm": 5.196197986602783, "learning_rate": 1.0692331335484864e-05, "log_odds_chosen": 10.614950180053711, "log_odds_ratio": -0.001403479604050517, "logits/chosen": -0.3574877977371216, "logits/rejected": -0.4411139190196991, "logps/chosen": -0.0010486284736543894, "logps/rejected": -2.6322503089904785, "loss": 0.9034, "nll_loss": 0.22569890320301056, "rewards/accuracies": 1.0, "rewards/chosen": -0.00010486285464139655, "rewards/margins": 0.263120174407959, "rewards/rejected": -0.2632250189781189, "step": 11677 }, { "epoch": 8.076071922544951, "grad_norm": 3.0540876388549805, "learning_rate": 1.0688489319194714e-05, "log_odds_chosen": 9.706781387329102, "log_odds_ratio": -0.0003603575169108808, "logits/chosen": -0.5334009528160095, "logits/rejected": -0.6449951529502869, "logps/chosen": -0.0004149775777477771, "logps/rejected": -1.623328447341919, "loss": 0.3817, "nll_loss": 0.09539544582366943, "rewards/accuracies": 1.0, "rewards/chosen": -4.149776214035228e-05, "rewards/margins": 0.16229134798049927, "rewards/rejected": -0.16233286261558533, "step": 11678 }, { "epoch": 8.076763485477178, "grad_norm": 5.293995380401611, "learning_rate": 1.0684647302904565e-05, "log_odds_chosen": 9.589630126953125, "log_odds_ratio": -0.00028994199237786233, "logits/chosen": -0.38811492919921875, "logits/rejected": -0.5318203568458557, "logps/chosen": -0.0005249874666333199, "logps/rejected": -1.6922428607940674, "loss": 0.4026, "nll_loss": 0.10061775892972946, "rewards/accuracies": 1.0, "rewards/chosen": -5.249874811852351e-05, "rewards/margins": 0.1691717952489853, "rewards/rejected": -0.16922429203987122, "step": 11679 }, { "epoch": 8.077455048409405, "grad_norm": 4.3679351806640625, "learning_rate": 1.0680805286614416e-05, "log_odds_chosen": 11.298325538635254, "log_odds_ratio": -4.026488750241697e-05, "logits/chosen": -0.27658116817474365, "logits/rejected": -0.30183690786361694, "logps/chosen": -0.00022356417321134359, "logps/rejected": -2.728652000427246, "loss": 0.5673, "nll_loss": 0.1418299674987793, "rewards/accuracies": 1.0, "rewards/chosen": -2.2356416593538597e-05, "rewards/margins": 0.2728428542613983, "rewards/rejected": -0.2728652060031891, "step": 11680 }, { "epoch": 8.078146611341632, "grad_norm": 5.592859268188477, "learning_rate": 1.0676963270324267e-05, "log_odds_chosen": 11.06486701965332, "log_odds_ratio": -0.00012340693501755595, "logits/chosen": -0.41180622577667236, "logits/rejected": -0.5484762191772461, "logps/chosen": -0.00026425684336572886, "logps/rejected": -2.107800245285034, "loss": 0.574, "nll_loss": 0.1434963345527649, "rewards/accuracies": 1.0, "rewards/chosen": -2.642568688315805e-05, "rewards/margins": 0.21075361967086792, "rewards/rejected": -0.2107800394296646, "step": 11681 }, { "epoch": 8.078838174273859, "grad_norm": 4.267911434173584, "learning_rate": 1.0673121254034117e-05, "log_odds_chosen": 10.70009994506836, "log_odds_ratio": -5.717628664569929e-05, "logits/chosen": -0.38904839754104614, "logits/rejected": -0.5367807149887085, "logps/chosen": -0.0003111464611720294, "logps/rejected": -2.428642749786377, "loss": 0.4344, "nll_loss": 0.10860622674226761, "rewards/accuracies": 1.0, "rewards/chosen": -3.11146468447987e-05, "rewards/margins": 0.24283316731452942, "rewards/rejected": -0.24286428093910217, "step": 11682 }, { "epoch": 8.079529737206085, "grad_norm": 9.669475555419922, "learning_rate": 1.066927923774397e-05, "log_odds_chosen": 11.366908073425293, "log_odds_ratio": -5.51422344869934e-05, "logits/chosen": -0.20215241611003876, "logits/rejected": -0.30782490968704224, "logps/chosen": -0.0003663312818389386, "logps/rejected": -2.961658000946045, "loss": 0.5382, "nll_loss": 0.1345456838607788, "rewards/accuracies": 1.0, "rewards/chosen": -3.663312963908538e-05, "rewards/margins": 0.29612916707992554, "rewards/rejected": -0.29616579413414, "step": 11683 }, { "epoch": 8.080221300138312, "grad_norm": 9.101593017578125, "learning_rate": 1.0665437221453819e-05, "log_odds_chosen": 11.405898094177246, "log_odds_ratio": -3.301673132227734e-05, "logits/chosen": -0.060617730021476746, "logits/rejected": -0.19241876900196075, "logps/chosen": -0.00026630048523657024, "logps/rejected": -2.7963948249816895, "loss": 0.3653, "nll_loss": 0.09131920337677002, "rewards/accuracies": 1.0, "rewards/chosen": -2.6630050342646427e-05, "rewards/margins": 0.27961283922195435, "rewards/rejected": -0.27963948249816895, "step": 11684 }, { "epoch": 8.08091286307054, "grad_norm": 6.0880208015441895, "learning_rate": 1.066159520516367e-05, "log_odds_chosen": 10.40422248840332, "log_odds_ratio": -0.00013111547741573304, "logits/chosen": -0.4507831335067749, "logits/rejected": -0.4514143466949463, "logps/chosen": -0.0001635592634556815, "logps/rejected": -1.5886892080307007, "loss": 0.3797, "nll_loss": 0.09490390866994858, "rewards/accuracies": 1.0, "rewards/chosen": -1.6355925254174508e-05, "rewards/margins": 0.15885257720947266, "rewards/rejected": -0.1588689237833023, "step": 11685 }, { "epoch": 8.081604426002766, "grad_norm": 3.816469669342041, "learning_rate": 1.0657753188873522e-05, "log_odds_chosen": 11.360533714294434, "log_odds_ratio": -2.9774164431728423e-05, "logits/chosen": -0.1263056844472885, "logits/rejected": -0.3316514492034912, "logps/chosen": -0.00037145017995499074, "logps/rejected": -2.7097506523132324, "loss": 0.809, "nll_loss": 0.2022462636232376, "rewards/accuracies": 1.0, "rewards/chosen": -3.714502236107364e-05, "rewards/margins": 0.2709379196166992, "rewards/rejected": -0.2709750831127167, "step": 11686 }, { "epoch": 8.082295988934993, "grad_norm": 4.535207748413086, "learning_rate": 1.0653911172583373e-05, "log_odds_chosen": 8.963878631591797, "log_odds_ratio": -0.0005296789458952844, "logits/chosen": -0.18249739706516266, "logits/rejected": -0.3741540014743805, "logps/chosen": -0.0005585459875874221, "logps/rejected": -1.2982432842254639, "loss": 0.2973, "nll_loss": 0.07427582889795303, "rewards/accuracies": 1.0, "rewards/chosen": -5.5854594393167645e-05, "rewards/margins": 0.1297684609889984, "rewards/rejected": -0.12982431054115295, "step": 11687 }, { "epoch": 8.08298755186722, "grad_norm": 4.2169270515441895, "learning_rate": 1.0650069156293224e-05, "log_odds_chosen": 11.26722526550293, "log_odds_ratio": -3.825961539405398e-05, "logits/chosen": 0.1320866346359253, "logits/rejected": 0.07569442689418793, "logps/chosen": -0.0002050643670372665, "logps/rejected": -2.474332571029663, "loss": 0.41, "nll_loss": 0.10250428318977356, "rewards/accuracies": 1.0, "rewards/chosen": -2.0506438886513934e-05, "rewards/margins": 0.2474127560853958, "rewards/rejected": -0.24743324518203735, "step": 11688 }, { "epoch": 8.083679114799446, "grad_norm": 4.922435760498047, "learning_rate": 1.0646227140003074e-05, "log_odds_chosen": 11.057992935180664, "log_odds_ratio": -0.00010158990335185081, "logits/chosen": -0.4223814904689789, "logits/rejected": -0.2926499843597412, "logps/chosen": -0.0004958102363161743, "logps/rejected": -2.3514440059661865, "loss": 0.3024, "nll_loss": 0.07559921592473984, "rewards/accuracies": 1.0, "rewards/chosen": -4.958102363161743e-05, "rewards/margins": 0.2350948005914688, "rewards/rejected": -0.23514439165592194, "step": 11689 }, { "epoch": 8.084370677731673, "grad_norm": 5.644584655761719, "learning_rate": 1.0642385123712925e-05, "log_odds_chosen": 11.906888961791992, "log_odds_ratio": -2.213385232607834e-05, "logits/chosen": -0.28304755687713623, "logits/rejected": -0.35353371500968933, "logps/chosen": -0.00018962196190841496, "logps/rejected": -3.090592384338379, "loss": 0.311, "nll_loss": 0.0777548998594284, "rewards/accuracies": 1.0, "rewards/chosen": -1.8962196918437257e-05, "rewards/margins": 0.3090403079986572, "rewards/rejected": -0.3090592622756958, "step": 11690 }, { "epoch": 8.0850622406639, "grad_norm": 2.3281097412109375, "learning_rate": 1.0638543107422776e-05, "log_odds_chosen": 10.885601997375488, "log_odds_ratio": -0.0003212861774954945, "logits/chosen": -0.32111576199531555, "logits/rejected": -0.3752807378768921, "logps/chosen": -0.0005567088956013322, "logps/rejected": -2.0498268604278564, "loss": 0.2634, "nll_loss": 0.06582079827785492, "rewards/accuracies": 1.0, "rewards/chosen": -5.5670887377345935e-05, "rewards/margins": 0.2049270123243332, "rewards/rejected": -0.2049826830625534, "step": 11691 }, { "epoch": 8.085753803596127, "grad_norm": 3.4586286544799805, "learning_rate": 1.0634701091132628e-05, "log_odds_chosen": 12.256976127624512, "log_odds_ratio": -2.0153183868387714e-05, "logits/chosen": -0.8171830773353577, "logits/rejected": -0.8613120913505554, "logps/chosen": -0.00020997013780288398, "logps/rejected": -3.2046985626220703, "loss": 0.4156, "nll_loss": 0.10389473289251328, "rewards/accuracies": 1.0, "rewards/chosen": -2.0997013052692637e-05, "rewards/margins": 0.3204488754272461, "rewards/rejected": -0.32046985626220703, "step": 11692 }, { "epoch": 8.086445366528354, "grad_norm": 4.121342658996582, "learning_rate": 1.0630859074842477e-05, "log_odds_chosen": 11.76333236694336, "log_odds_ratio": -2.062761996057816e-05, "logits/chosen": -0.19090475142002106, "logits/rejected": -0.24660451710224152, "logps/chosen": -0.0002958507393486798, "logps/rejected": -2.9039571285247803, "loss": 0.4254, "nll_loss": 0.10634127259254456, "rewards/accuracies": 1.0, "rewards/chosen": -2.9585071388282813e-05, "rewards/margins": 0.29036611318588257, "rewards/rejected": -0.29039567708969116, "step": 11693 }, { "epoch": 8.08713692946058, "grad_norm": 3.8869848251342773, "learning_rate": 1.0627017058552328e-05, "log_odds_chosen": 10.896076202392578, "log_odds_ratio": -5.0996481149923056e-05, "logits/chosen": 0.1542137861251831, "logits/rejected": 0.10924919694662094, "logps/chosen": -0.000141911645187065, "logps/rejected": -1.7987464666366577, "loss": 0.4506, "nll_loss": 0.11265307664871216, "rewards/accuracies": 1.0, "rewards/chosen": -1.4191165973898023e-05, "rewards/margins": 0.17986047267913818, "rewards/rejected": -0.17987464368343353, "step": 11694 }, { "epoch": 8.087828492392807, "grad_norm": 3.6346919536590576, "learning_rate": 1.062317504226218e-05, "log_odds_chosen": 11.227770805358887, "log_odds_ratio": -0.00011380699288565665, "logits/chosen": -0.29328569769859314, "logits/rejected": -0.30168992280960083, "logps/chosen": -0.0001262957084691152, "logps/rejected": -1.8149688243865967, "loss": 0.3976, "nll_loss": 0.09939021617174149, "rewards/accuracies": 1.0, "rewards/chosen": -1.2629571756406222e-05, "rewards/margins": 0.18148425221443176, "rewards/rejected": -0.18149688839912415, "step": 11695 }, { "epoch": 8.088520055325034, "grad_norm": 4.028033256530762, "learning_rate": 1.0619333025972031e-05, "log_odds_chosen": 10.170938491821289, "log_odds_ratio": -0.0011964394943788648, "logits/chosen": -0.5307091474533081, "logits/rejected": -0.575198769569397, "logps/chosen": -0.000592139782384038, "logps/rejected": -1.9726135730743408, "loss": 0.4069, "nll_loss": 0.10161113739013672, "rewards/accuracies": 1.0, "rewards/chosen": -5.921397314523347e-05, "rewards/margins": 0.1972021460533142, "rewards/rejected": -0.19726136326789856, "step": 11696 }, { "epoch": 8.089211618257261, "grad_norm": 6.926070213317871, "learning_rate": 1.0615491009681882e-05, "log_odds_chosen": 9.902064323425293, "log_odds_ratio": -0.16591793298721313, "logits/chosen": -0.3648636043071747, "logits/rejected": -0.27038756012916565, "logps/chosen": -0.03061908297240734, "logps/rejected": -2.4003472328186035, "loss": 0.5452, "nll_loss": 0.11971628665924072, "rewards/accuracies": 0.875, "rewards/chosen": -0.0030619085300713778, "rewards/margins": 0.23697280883789062, "rewards/rejected": -0.24003471434116364, "step": 11697 }, { "epoch": 8.089903181189488, "grad_norm": 4.866793155670166, "learning_rate": 1.0611648993391731e-05, "log_odds_chosen": 11.041983604431152, "log_odds_ratio": -5.963775038253516e-05, "logits/chosen": -0.2963463068008423, "logits/rejected": -0.3301407992839813, "logps/chosen": -0.000261253968346864, "logps/rejected": -2.1881203651428223, "loss": 0.5279, "nll_loss": 0.13197532296180725, "rewards/accuracies": 1.0, "rewards/chosen": -2.612539719848428e-05, "rewards/margins": 0.21878591179847717, "rewards/rejected": -0.21881204843521118, "step": 11698 }, { "epoch": 8.090594744121715, "grad_norm": 4.243380546569824, "learning_rate": 1.0607806977101584e-05, "log_odds_chosen": 11.479931831359863, "log_odds_ratio": -1.3241216947790235e-05, "logits/chosen": -0.5409437417984009, "logits/rejected": -0.5891100168228149, "logps/chosen": -0.00010321621084585786, "logps/rejected": -2.011333465576172, "loss": 0.4353, "nll_loss": 0.10882383584976196, "rewards/accuracies": 1.0, "rewards/chosen": -1.0321620720787905e-05, "rewards/margins": 0.20112305879592896, "rewards/rejected": -0.2011333703994751, "step": 11699 }, { "epoch": 8.091286307053942, "grad_norm": 4.312553405761719, "learning_rate": 1.0603964960811434e-05, "log_odds_chosen": 11.484989166259766, "log_odds_ratio": -0.00010771736560855061, "logits/chosen": -0.011333595961332321, "logits/rejected": -0.0748065859079361, "logps/chosen": -0.00029604186420328915, "logps/rejected": -2.8591153621673584, "loss": 0.547, "nll_loss": 0.13673458993434906, "rewards/accuracies": 1.0, "rewards/chosen": -2.9604187147924677e-05, "rewards/margins": 0.2858819365501404, "rewards/rejected": -0.28591153025627136, "step": 11700 }, { "epoch": 8.091977869986168, "grad_norm": 3.610816717147827, "learning_rate": 1.0600122944521285e-05, "log_odds_chosen": 11.068406105041504, "log_odds_ratio": -0.00010744159953901544, "logits/chosen": -0.2816433012485504, "logits/rejected": -0.34033089876174927, "logps/chosen": -0.00041612557834014297, "logps/rejected": -2.609286308288574, "loss": 0.3272, "nll_loss": 0.08178265392780304, "rewards/accuracies": 1.0, "rewards/chosen": -4.161255492363125e-05, "rewards/margins": 0.2608869969844818, "rewards/rejected": -0.2609286308288574, "step": 11701 }, { "epoch": 8.092669432918395, "grad_norm": 2.7508389949798584, "learning_rate": 1.0596280928231136e-05, "log_odds_chosen": 10.45711612701416, "log_odds_ratio": -0.0003854866372421384, "logits/chosen": -0.2440710961818695, "logits/rejected": -0.21395137906074524, "logps/chosen": -0.0003028765204362571, "logps/rejected": -1.627083420753479, "loss": 0.209, "nll_loss": 0.05221348628401756, "rewards/accuracies": 1.0, "rewards/chosen": -3.028765058843419e-05, "rewards/margins": 0.162678062915802, "rewards/rejected": -0.1627083420753479, "step": 11702 }, { "epoch": 8.093360995850622, "grad_norm": 3.8575940132141113, "learning_rate": 1.0592438911940986e-05, "log_odds_chosen": 10.117890357971191, "log_odds_ratio": -0.0002173526445403695, "logits/chosen": -0.1967553347349167, "logits/rejected": -0.24321748316287994, "logps/chosen": -0.000661263766232878, "logps/rejected": -1.8736224174499512, "loss": 0.4698, "nll_loss": 0.11741577088832855, "rewards/accuracies": 1.0, "rewards/chosen": -6.61263766232878e-05, "rewards/margins": 0.18729612231254578, "rewards/rejected": -0.18736225366592407, "step": 11703 }, { "epoch": 8.094052558782849, "grad_norm": 3.7447421550750732, "learning_rate": 1.0588596895650837e-05, "log_odds_chosen": 10.763427734375, "log_odds_ratio": -7.86112723289989e-05, "logits/chosen": -0.45996594429016113, "logits/rejected": -0.4717139005661011, "logps/chosen": -0.00015374028589576483, "logps/rejected": -1.995914101600647, "loss": 0.3841, "nll_loss": 0.09600993990898132, "rewards/accuracies": 1.0, "rewards/chosen": -1.5374029317172244e-05, "rewards/margins": 0.19957603514194489, "rewards/rejected": -0.19959142804145813, "step": 11704 }, { "epoch": 8.094744121715076, "grad_norm": 3.0664539337158203, "learning_rate": 1.058475487936069e-05, "log_odds_chosen": 10.705331802368164, "log_odds_ratio": -0.00010342212772229686, "logits/chosen": -0.12351376563310623, "logits/rejected": -0.19361796975135803, "logps/chosen": -0.0003090745012741536, "logps/rejected": -2.340331554412842, "loss": 0.2656, "nll_loss": 0.06640031188726425, "rewards/accuracies": 1.0, "rewards/chosen": -3.09074493998196e-05, "rewards/margins": 0.2340022623538971, "rewards/rejected": -0.23403316736221313, "step": 11705 }, { "epoch": 8.095435684647303, "grad_norm": 3.6997134685516357, "learning_rate": 1.058091286307054e-05, "log_odds_chosen": 11.018940925598145, "log_odds_ratio": -2.4768874936853535e-05, "logits/chosen": 0.04698491469025612, "logits/rejected": 0.04826957359910011, "logps/chosen": -0.0004916464095003903, "logps/rejected": -2.697220802307129, "loss": 0.5134, "nll_loss": 0.1283407062292099, "rewards/accuracies": 1.0, "rewards/chosen": -4.9164649681188166e-05, "rewards/margins": 0.2696729302406311, "rewards/rejected": -0.2697220742702484, "step": 11706 }, { "epoch": 8.09612724757953, "grad_norm": 3.7170534133911133, "learning_rate": 1.057707084678039e-05, "log_odds_chosen": 10.96964168548584, "log_odds_ratio": -6.920520536368713e-05, "logits/chosen": -0.34352627396583557, "logits/rejected": -0.24258866906166077, "logps/chosen": -0.0002988358319271356, "logps/rejected": -2.604693651199341, "loss": 0.3497, "nll_loss": 0.08741586655378342, "rewards/accuracies": 1.0, "rewards/chosen": -2.988358392030932e-05, "rewards/margins": 0.26043951511383057, "rewards/rejected": -0.26046937704086304, "step": 11707 }, { "epoch": 8.096818810511756, "grad_norm": 3.525240421295166, "learning_rate": 1.0573228830490242e-05, "log_odds_chosen": 10.831472396850586, "log_odds_ratio": -0.00013157624925952405, "logits/chosen": -0.2283937633037567, "logits/rejected": -0.3139522075653076, "logps/chosen": -0.00024043480516411364, "logps/rejected": -2.343863010406494, "loss": 0.406, "nll_loss": 0.10149849951267242, "rewards/accuracies": 1.0, "rewards/chosen": -2.4043480152613483e-05, "rewards/margins": 0.23436225950717926, "rewards/rejected": -0.23438629508018494, "step": 11708 }, { "epoch": 8.097510373443983, "grad_norm": 7.458436965942383, "learning_rate": 1.0569386814200093e-05, "log_odds_chosen": 11.120165824890137, "log_odds_ratio": -2.6500281819608063e-05, "logits/chosen": -0.23689518868923187, "logits/rejected": -0.25085359811782837, "logps/chosen": -0.00014912855112925172, "logps/rejected": -1.925016164779663, "loss": 0.3739, "nll_loss": 0.09347623586654663, "rewards/accuracies": 1.0, "rewards/chosen": -1.4912855476723053e-05, "rewards/margins": 0.1924867033958435, "rewards/rejected": -0.19250163435935974, "step": 11709 }, { "epoch": 8.09820193637621, "grad_norm": 3.490351438522339, "learning_rate": 1.0565544797909943e-05, "log_odds_chosen": 10.826065063476562, "log_odds_ratio": -0.00010091800504596904, "logits/chosen": -0.027560945600271225, "logits/rejected": -0.059178732335567474, "logps/chosen": -0.00033488537883386016, "logps/rejected": -2.706929922103882, "loss": 0.311, "nll_loss": 0.07775228470563889, "rewards/accuracies": 1.0, "rewards/chosen": -3.3488537155790254e-05, "rewards/margins": 0.27065950632095337, "rewards/rejected": -0.27069300413131714, "step": 11710 }, { "epoch": 8.098893499308437, "grad_norm": 2.662122964859009, "learning_rate": 1.0561702781619794e-05, "log_odds_chosen": 10.835779190063477, "log_odds_ratio": -4.204745346214622e-05, "logits/chosen": -0.11458122730255127, "logits/rejected": -0.1769244521856308, "logps/chosen": -0.00015519153384957463, "logps/rejected": -1.9499348402023315, "loss": 0.3402, "nll_loss": 0.0850481316447258, "rewards/accuracies": 1.0, "rewards/chosen": -1.5519153748755343e-05, "rewards/margins": 0.19497796893119812, "rewards/rejected": -0.19499346613883972, "step": 11711 }, { "epoch": 8.099585062240664, "grad_norm": 3.402977228164673, "learning_rate": 1.0557860765329645e-05, "log_odds_chosen": 10.551286697387695, "log_odds_ratio": -0.00014364722301252186, "logits/chosen": -0.33826518058776855, "logits/rejected": -0.4199278950691223, "logps/chosen": -0.0012256636982783675, "logps/rejected": -2.1779050827026367, "loss": 0.4171, "nll_loss": 0.10425589978694916, "rewards/accuracies": 1.0, "rewards/chosen": -0.0001225663727382198, "rewards/margins": 0.21766793727874756, "rewards/rejected": -0.21779048442840576, "step": 11712 }, { "epoch": 8.10027662517289, "grad_norm": 5.266214847564697, "learning_rate": 1.0554018749039496e-05, "log_odds_chosen": 12.025720596313477, "log_odds_ratio": -2.9008931960561313e-05, "logits/chosen": -0.4864197075366974, "logits/rejected": -0.503054678440094, "logps/chosen": -0.00017336485325358808, "logps/rejected": -2.8456907272338867, "loss": 0.539, "nll_loss": 0.13474708795547485, "rewards/accuracies": 1.0, "rewards/chosen": -1.733648605295457e-05, "rewards/margins": 0.284551739692688, "rewards/rejected": -0.2845690846443176, "step": 11713 }, { "epoch": 8.100968188105117, "grad_norm": 3.9011898040771484, "learning_rate": 1.0550176732749348e-05, "log_odds_chosen": 10.942675590515137, "log_odds_ratio": -2.8809481591451913e-05, "logits/chosen": 0.13352540135383606, "logits/rejected": 0.006145041435956955, "logps/chosen": -0.0001652721839491278, "logps/rejected": -2.090564727783203, "loss": 0.3707, "nll_loss": 0.09268441796302795, "rewards/accuracies": 1.0, "rewards/chosen": -1.6527217667317018e-05, "rewards/margins": 0.20903997123241425, "rewards/rejected": -0.20905649662017822, "step": 11714 }, { "epoch": 8.101659751037344, "grad_norm": 7.787944316864014, "learning_rate": 1.0546334716459199e-05, "log_odds_chosen": 12.608068466186523, "log_odds_ratio": -2.1099880541441962e-05, "logits/chosen": 0.07174703478813171, "logits/rejected": 0.07831723988056183, "logps/chosen": -0.000200631155166775, "logps/rejected": -3.4012465476989746, "loss": 0.3049, "nll_loss": 0.0762215405702591, "rewards/accuracies": 1.0, "rewards/chosen": -2.0063114789081737e-05, "rewards/margins": 0.3401045799255371, "rewards/rejected": -0.3401246666908264, "step": 11715 }, { "epoch": 8.10235131396957, "grad_norm": 4.928945064544678, "learning_rate": 1.0542492700169048e-05, "log_odds_chosen": 10.747015953063965, "log_odds_ratio": -0.000226613599807024, "logits/chosen": -0.14238809049129486, "logits/rejected": -0.19443312287330627, "logps/chosen": -0.0008222081232815981, "logps/rejected": -2.1754188537597656, "loss": 0.5648, "nll_loss": 0.1411823034286499, "rewards/accuracies": 1.0, "rewards/chosen": -8.222080941777676e-05, "rewards/margins": 0.21745967864990234, "rewards/rejected": -0.21754190325737, "step": 11716 }, { "epoch": 8.103042876901798, "grad_norm": 3.7822341918945312, "learning_rate": 1.05386506838789e-05, "log_odds_chosen": 10.048130989074707, "log_odds_ratio": -0.00016685103764757514, "logits/chosen": -0.23168587684631348, "logits/rejected": -0.19154129922389984, "logps/chosen": -0.0003420043212827295, "logps/rejected": -1.6004074811935425, "loss": 0.4462, "nll_loss": 0.11152322590351105, "rewards/accuracies": 1.0, "rewards/chosen": -3.420043140067719e-05, "rewards/margins": 0.1600065529346466, "rewards/rejected": -0.1600407361984253, "step": 11717 }, { "epoch": 8.103734439834025, "grad_norm": 4.209904193878174, "learning_rate": 1.0534808667588751e-05, "log_odds_chosen": 11.270818710327148, "log_odds_ratio": -5.3001502237748355e-05, "logits/chosen": -0.6203676462173462, "logits/rejected": -0.6190280914306641, "logps/chosen": -0.00022040428302716464, "logps/rejected": -2.402357578277588, "loss": 0.3598, "nll_loss": 0.08995695412158966, "rewards/accuracies": 1.0, "rewards/chosen": -2.2040429030312225e-05, "rewards/margins": 0.24021372199058533, "rewards/rejected": -0.24023577570915222, "step": 11718 }, { "epoch": 8.104426002766251, "grad_norm": 4.452395439147949, "learning_rate": 1.0530966651298602e-05, "log_odds_chosen": 11.229473114013672, "log_odds_ratio": -0.00017697452858556062, "logits/chosen": -0.3201093375682831, "logits/rejected": -0.4116101861000061, "logps/chosen": -0.00024741722154431045, "logps/rejected": -2.6428608894348145, "loss": 0.6095, "nll_loss": 0.15235979855060577, "rewards/accuracies": 1.0, "rewards/chosen": -2.4741722882026806e-05, "rewards/margins": 0.2642613649368286, "rewards/rejected": -0.2642861008644104, "step": 11719 }, { "epoch": 8.105117565698478, "grad_norm": 3.6376991271972656, "learning_rate": 1.0527124635008454e-05, "log_odds_chosen": 11.011824607849121, "log_odds_ratio": -9.336704533779994e-05, "logits/chosen": -0.18999135494232178, "logits/rejected": -0.1827242523431778, "logps/chosen": -0.0004461828211788088, "logps/rejected": -2.6694254875183105, "loss": 0.3557, "nll_loss": 0.08890706300735474, "rewards/accuracies": 1.0, "rewards/chosen": -4.461828211788088e-05, "rewards/margins": 0.26689794659614563, "rewards/rejected": -0.26694256067276, "step": 11720 }, { "epoch": 8.105809128630705, "grad_norm": 3.3639659881591797, "learning_rate": 1.0523282618718303e-05, "log_odds_chosen": 11.908004760742188, "log_odds_ratio": -4.112864189664833e-05, "logits/chosen": -0.1534234881401062, "logits/rejected": -0.15365737676620483, "logps/chosen": -0.00016780171426944435, "logps/rejected": -2.8708534240722656, "loss": 0.3309, "nll_loss": 0.08273102343082428, "rewards/accuracies": 1.0, "rewards/chosen": -1.678017360973172e-05, "rewards/margins": 0.28706854581832886, "rewards/rejected": -0.2870853543281555, "step": 11721 }, { "epoch": 8.106500691562932, "grad_norm": 8.023885726928711, "learning_rate": 1.0519440602428154e-05, "log_odds_chosen": 9.650749206542969, "log_odds_ratio": -0.00020404420502018183, "logits/chosen": -0.5126906037330627, "logits/rejected": -0.6327559351921082, "logps/chosen": -0.0004856606828980148, "logps/rejected": -1.758907675743103, "loss": 0.4059, "nll_loss": 0.10146161913871765, "rewards/accuracies": 1.0, "rewards/chosen": -4.8566071200184524e-05, "rewards/margins": 0.17584219574928284, "rewards/rejected": -0.1758907586336136, "step": 11722 }, { "epoch": 8.107192254495159, "grad_norm": 3.2019126415252686, "learning_rate": 1.0515598586138007e-05, "log_odds_chosen": 10.740392684936523, "log_odds_ratio": -3.777168603846803e-05, "logits/chosen": -0.8093961477279663, "logits/rejected": -0.7486404180526733, "logps/chosen": -0.0002348105190321803, "logps/rejected": -2.1447434425354004, "loss": 0.3787, "nll_loss": 0.09465982019901276, "rewards/accuracies": 1.0, "rewards/chosen": -2.348105226701591e-05, "rewards/margins": 0.214450865983963, "rewards/rejected": -0.21447435021400452, "step": 11723 }, { "epoch": 8.107883817427386, "grad_norm": 5.206589221954346, "learning_rate": 1.0511756569847857e-05, "log_odds_chosen": 11.088725090026855, "log_odds_ratio": -0.0001943162496900186, "logits/chosen": -0.5015290379524231, "logits/rejected": -0.41111627221107483, "logps/chosen": -0.0004091473820153624, "logps/rejected": -2.845104217529297, "loss": 0.3381, "nll_loss": 0.08451039344072342, "rewards/accuracies": 1.0, "rewards/chosen": -4.091473965672776e-05, "rewards/margins": 0.28446948528289795, "rewards/rejected": -0.28451040387153625, "step": 11724 }, { "epoch": 8.108575380359612, "grad_norm": 3.697836399078369, "learning_rate": 1.0507914553557708e-05, "log_odds_chosen": 10.652294158935547, "log_odds_ratio": -5.1158724090782925e-05, "logits/chosen": -0.9492626190185547, "logits/rejected": -0.9893613457679749, "logps/chosen": -0.0005272809648886323, "logps/rejected": -2.086449146270752, "loss": 0.4128, "nll_loss": 0.10319921374320984, "rewards/accuracies": 1.0, "rewards/chosen": -5.2728093578480184e-05, "rewards/margins": 0.20859217643737793, "rewards/rejected": -0.20864489674568176, "step": 11725 }, { "epoch": 8.10926694329184, "grad_norm": 5.2985920906066895, "learning_rate": 1.0504072537267559e-05, "log_odds_chosen": 10.933512687683105, "log_odds_ratio": -0.00010600912355585024, "logits/chosen": -0.6199065446853638, "logits/rejected": -0.6506941318511963, "logps/chosen": -0.0006066207424737513, "logps/rejected": -2.5779573917388916, "loss": 0.3398, "nll_loss": 0.0849461555480957, "rewards/accuracies": 1.0, "rewards/chosen": -6.066207424737513e-05, "rewards/margins": 0.25773507356643677, "rewards/rejected": -0.2577957510948181, "step": 11726 }, { "epoch": 8.109958506224066, "grad_norm": 4.998779296875, "learning_rate": 1.050023052097741e-05, "log_odds_chosen": 10.645483016967773, "log_odds_ratio": -9.701005183160305e-05, "logits/chosen": -0.3984414041042328, "logits/rejected": -0.38287946581840515, "logps/chosen": -0.0003364400181453675, "logps/rejected": -2.164236068725586, "loss": 0.3282, "nll_loss": 0.08203479647636414, "rewards/accuracies": 1.0, "rewards/chosen": -3.364400254213251e-05, "rewards/margins": 0.2163899540901184, "rewards/rejected": -0.2164236158132553, "step": 11727 }, { "epoch": 8.110650069156293, "grad_norm": 4.733262062072754, "learning_rate": 1.049638850468726e-05, "log_odds_chosen": 11.274560928344727, "log_odds_ratio": -0.0001574632478877902, "logits/chosen": -0.10809072107076645, "logits/rejected": -0.18218806385993958, "logps/chosen": -0.0002483499119989574, "logps/rejected": -3.040714740753174, "loss": 0.5118, "nll_loss": 0.12793704867362976, "rewards/accuracies": 1.0, "rewards/chosen": -2.483499156369362e-05, "rewards/margins": 0.304046630859375, "rewards/rejected": -0.30407148599624634, "step": 11728 }, { "epoch": 8.11134163208852, "grad_norm": 8.088932037353516, "learning_rate": 1.0492546488397113e-05, "log_odds_chosen": 11.483865737915039, "log_odds_ratio": -0.00010575917985988781, "logits/chosen": -0.4476820230484009, "logits/rejected": -0.37150460481643677, "logps/chosen": -0.0007105075637809932, "logps/rejected": -2.8520753383636475, "loss": 0.3618, "nll_loss": 0.09043564647436142, "rewards/accuracies": 1.0, "rewards/chosen": -7.105076656443998e-05, "rewards/margins": 0.28513649106025696, "rewards/rejected": -0.2852075397968292, "step": 11729 }, { "epoch": 8.112033195020746, "grad_norm": 6.1691460609436035, "learning_rate": 1.0488704472106962e-05, "log_odds_chosen": 12.182022094726562, "log_odds_ratio": -2.4681070499354973e-05, "logits/chosen": -0.4261326193809509, "logits/rejected": -0.5570498704910278, "logps/chosen": -0.0003534106654115021, "logps/rejected": -3.1881463527679443, "loss": 0.5829, "nll_loss": 0.1457168161869049, "rewards/accuracies": 1.0, "rewards/chosen": -3.534106508595869e-05, "rewards/margins": 0.318779319524765, "rewards/rejected": -0.3188146650791168, "step": 11730 }, { "epoch": 8.112724757952973, "grad_norm": 3.71748948097229, "learning_rate": 1.0484862455816813e-05, "log_odds_chosen": 10.750641822814941, "log_odds_ratio": -0.00017214790568687022, "logits/chosen": -0.31991609930992126, "logits/rejected": -0.3900543749332428, "logps/chosen": -0.0003686299023684114, "logps/rejected": -2.6714460849761963, "loss": 0.3827, "nll_loss": 0.09565538167953491, "rewards/accuracies": 1.0, "rewards/chosen": -3.6862991692032665e-05, "rewards/margins": 0.267107754945755, "rewards/rejected": -0.2671446204185486, "step": 11731 }, { "epoch": 8.1134163208852, "grad_norm": 2.332948923110962, "learning_rate": 1.0481020439526663e-05, "log_odds_chosen": 10.546846389770508, "log_odds_ratio": -0.0001390865072607994, "logits/chosen": -0.4906228482723236, "logits/rejected": -0.4684491455554962, "logps/chosen": -0.00023612199584022164, "logps/rejected": -1.8974788188934326, "loss": 0.3942, "nll_loss": 0.09854131937026978, "rewards/accuracies": 1.0, "rewards/chosen": -2.3612201403011568e-05, "rewards/margins": 0.18972426652908325, "rewards/rejected": -0.1897478997707367, "step": 11732 }, { "epoch": 8.114107883817427, "grad_norm": 4.659921169281006, "learning_rate": 1.0477178423236516e-05, "log_odds_chosen": 11.114786148071289, "log_odds_ratio": -0.00022430458921007812, "logits/chosen": 0.07960556447505951, "logits/rejected": -0.11168298870325089, "logps/chosen": -0.00036845554132014513, "logps/rejected": -2.572969913482666, "loss": 0.6721, "nll_loss": 0.1679985523223877, "rewards/accuracies": 1.0, "rewards/chosen": -3.6845554859610274e-05, "rewards/margins": 0.25726014375686646, "rewards/rejected": -0.25729697942733765, "step": 11733 }, { "epoch": 8.114799446749654, "grad_norm": 3.753039598464966, "learning_rate": 1.0473336406946366e-05, "log_odds_chosen": 10.989168167114258, "log_odds_ratio": -8.060281106736511e-05, "logits/chosen": -0.5705875754356384, "logits/rejected": -0.6338287591934204, "logps/chosen": -0.0005902528646402061, "logps/rejected": -2.472515821456909, "loss": 0.2849, "nll_loss": 0.07122041285037994, "rewards/accuracies": 1.0, "rewards/chosen": -5.9025289374403656e-05, "rewards/margins": 0.24719256162643433, "rewards/rejected": -0.24725157022476196, "step": 11734 }, { "epoch": 8.11549100968188, "grad_norm": 4.227781295776367, "learning_rate": 1.0469494390656216e-05, "log_odds_chosen": 10.857701301574707, "log_odds_ratio": -5.3189338359516114e-05, "logits/chosen": -0.3402118384838104, "logits/rejected": -0.32114389538764954, "logps/chosen": -0.00019546672410797328, "logps/rejected": -2.3145198822021484, "loss": 0.4392, "nll_loss": 0.10979792475700378, "rewards/accuracies": 1.0, "rewards/chosen": -1.9546674593584612e-05, "rewards/margins": 0.2314324676990509, "rewards/rejected": -0.23145200312137604, "step": 11735 }, { "epoch": 8.116182572614107, "grad_norm": 4.631288051605225, "learning_rate": 1.0465652374366068e-05, "log_odds_chosen": 11.65530776977539, "log_odds_ratio": -0.00029073667246848345, "logits/chosen": -0.27442625164985657, "logits/rejected": -0.4939839839935303, "logps/chosen": -0.0006930760573595762, "logps/rejected": -3.0342812538146973, "loss": 0.4844, "nll_loss": 0.1210765540599823, "rewards/accuracies": 1.0, "rewards/chosen": -6.930760719114915e-05, "rewards/margins": 0.303358793258667, "rewards/rejected": -0.30342811346054077, "step": 11736 }, { "epoch": 8.116874135546334, "grad_norm": 2.9287948608398438, "learning_rate": 1.0461810358075919e-05, "log_odds_chosen": 11.522528648376465, "log_odds_ratio": -1.7534979633637704e-05, "logits/chosen": -0.5429355502128601, "logits/rejected": -0.614828884601593, "logps/chosen": -0.0002202479518018663, "logps/rejected": -2.4436118602752686, "loss": 0.323, "nll_loss": 0.080753393471241, "rewards/accuracies": 1.0, "rewards/chosen": -2.202479481638875e-05, "rewards/margins": 0.24433915317058563, "rewards/rejected": -0.24436119198799133, "step": 11737 }, { "epoch": 8.117565698478561, "grad_norm": 5.3048577308654785, "learning_rate": 1.045796834178577e-05, "log_odds_chosen": 11.234204292297363, "log_odds_ratio": -1.8929702491732314e-05, "logits/chosen": -0.5729963779449463, "logits/rejected": -0.6361346244812012, "logps/chosen": -9.631240391172469e-05, "logps/rejected": -1.866896390914917, "loss": 0.6894, "nll_loss": 0.17235279083251953, "rewards/accuracies": 1.0, "rewards/chosen": -9.63124148256611e-06, "rewards/margins": 0.18668000400066376, "rewards/rejected": -0.18668964505195618, "step": 11738 }, { "epoch": 8.118257261410788, "grad_norm": 4.327181816101074, "learning_rate": 1.045412632549562e-05, "log_odds_chosen": 10.270843505859375, "log_odds_ratio": -0.0002234416315332055, "logits/chosen": -0.4312435984611511, "logits/rejected": -0.469840943813324, "logps/chosen": -0.0014292667619884014, "logps/rejected": -2.169156789779663, "loss": 0.4497, "nll_loss": 0.11241503059864044, "rewards/accuracies": 1.0, "rewards/chosen": -0.0001429266994819045, "rewards/margins": 0.21677275002002716, "rewards/rejected": -0.21691568195819855, "step": 11739 }, { "epoch": 8.118948824343015, "grad_norm": 4.3350396156311035, "learning_rate": 1.0450284309205471e-05, "log_odds_chosen": 10.924783706665039, "log_odds_ratio": -0.000957698212005198, "logits/chosen": 0.08049260079860687, "logits/rejected": -0.008040845394134521, "logps/chosen": -0.00039710375131107867, "logps/rejected": -2.2647242546081543, "loss": 0.5589, "nll_loss": 0.13964161276817322, "rewards/accuracies": 1.0, "rewards/chosen": -3.9710379496682435e-05, "rewards/margins": 0.22643274068832397, "rewards/rejected": -0.22647245228290558, "step": 11740 }, { "epoch": 8.119640387275242, "grad_norm": 3.162846326828003, "learning_rate": 1.0446442292915322e-05, "log_odds_chosen": 12.0403470993042, "log_odds_ratio": -8.135210009641014e-06, "logits/chosen": -0.2058313935995102, "logits/rejected": -0.23427899181842804, "logps/chosen": -6.742282857885584e-05, "logps/rejected": -2.2653045654296875, "loss": 0.4328, "nll_loss": 0.10819646716117859, "rewards/accuracies": 1.0, "rewards/chosen": -6.742283403582405e-06, "rewards/margins": 0.22652371227741241, "rewards/rejected": -0.22653046250343323, "step": 11741 }, { "epoch": 8.120331950207468, "grad_norm": 3.899838447570801, "learning_rate": 1.0442600276625174e-05, "log_odds_chosen": 10.268180847167969, "log_odds_ratio": -0.00013161652896087617, "logits/chosen": -0.6410156488418579, "logits/rejected": -0.6552188396453857, "logps/chosen": -0.00023809485719539225, "logps/rejected": -1.8581407070159912, "loss": 0.4474, "nll_loss": 0.11183653771877289, "rewards/accuracies": 1.0, "rewards/chosen": -2.380948353675194e-05, "rewards/margins": 0.1857902556657791, "rewards/rejected": -0.18581408262252808, "step": 11742 }, { "epoch": 8.121023513139695, "grad_norm": 3.759852647781372, "learning_rate": 1.0438758260335025e-05, "log_odds_chosen": 10.801055908203125, "log_odds_ratio": -8.971167699201033e-05, "logits/chosen": -0.24352969229221344, "logits/rejected": -0.27986443042755127, "logps/chosen": -0.0002285518276039511, "logps/rejected": -2.148073196411133, "loss": 0.4463, "nll_loss": 0.11155528575181961, "rewards/accuracies": 1.0, "rewards/chosen": -2.2855185306980275e-05, "rewards/margins": 0.21478447318077087, "rewards/rejected": -0.21480733156204224, "step": 11743 }, { "epoch": 8.121715076071922, "grad_norm": 3.330958127975464, "learning_rate": 1.0434916244044874e-05, "log_odds_chosen": 11.091012001037598, "log_odds_ratio": -0.00026707968208938837, "logits/chosen": -0.41995424032211304, "logits/rejected": -0.457638144493103, "logps/chosen": -0.00033980689477175474, "logps/rejected": -2.834537982940674, "loss": 0.3658, "nll_loss": 0.09141948074102402, "rewards/accuracies": 1.0, "rewards/chosen": -3.398069384275004e-05, "rewards/margins": 0.28341981768608093, "rewards/rejected": -0.2834537923336029, "step": 11744 }, { "epoch": 8.122406639004149, "grad_norm": 4.29794454574585, "learning_rate": 1.0431074227754726e-05, "log_odds_chosen": 11.204475402832031, "log_odds_ratio": -2.0994948499719612e-05, "logits/chosen": -0.2526164650917053, "logits/rejected": -0.24609719216823578, "logps/chosen": -0.0009924235055223107, "logps/rejected": -2.580258369445801, "loss": 0.3958, "nll_loss": 0.09894296526908875, "rewards/accuracies": 1.0, "rewards/chosen": -9.924236655933782e-05, "rewards/margins": 0.2579265832901001, "rewards/rejected": -0.2580258250236511, "step": 11745 }, { "epoch": 8.123098201936376, "grad_norm": 3.5008950233459473, "learning_rate": 1.0427232211464577e-05, "log_odds_chosen": 10.998022079467773, "log_odds_ratio": -0.00012253725435584784, "logits/chosen": -0.020180311053991318, "logits/rejected": -0.07521750032901764, "logps/chosen": -0.0004845786024816334, "logps/rejected": -2.175794839859009, "loss": 0.4081, "nll_loss": 0.10201320797204971, "rewards/accuracies": 1.0, "rewards/chosen": -4.845785952056758e-05, "rewards/margins": 0.21753105521202087, "rewards/rejected": -0.21757948398590088, "step": 11746 }, { "epoch": 8.123789764868603, "grad_norm": 4.073617935180664, "learning_rate": 1.0423390195174428e-05, "log_odds_chosen": 10.413125038146973, "log_odds_ratio": -0.00023272483667824417, "logits/chosen": -0.308295339345932, "logits/rejected": -0.3603138327598572, "logps/chosen": -0.000880507577676326, "logps/rejected": -2.106022834777832, "loss": 0.3743, "nll_loss": 0.09354668110609055, "rewards/accuracies": 1.0, "rewards/chosen": -8.805075049167499e-05, "rewards/margins": 0.21051423251628876, "rewards/rejected": -0.2106022834777832, "step": 11747 }, { "epoch": 8.12448132780083, "grad_norm": 5.366794109344482, "learning_rate": 1.0419548178884279e-05, "log_odds_chosen": 11.902491569519043, "log_odds_ratio": -1.66897243616404e-05, "logits/chosen": -0.07255479693412781, "logits/rejected": -0.2560787796974182, "logps/chosen": -0.00012625358067452908, "logps/rejected": -2.5823984146118164, "loss": 0.4061, "nll_loss": 0.10153479874134064, "rewards/accuracies": 1.0, "rewards/chosen": -1.2625358067452908e-05, "rewards/margins": 0.25822722911834717, "rewards/rejected": -0.25823986530303955, "step": 11748 }, { "epoch": 8.125172890733056, "grad_norm": 5.0338215827941895, "learning_rate": 1.041570616259413e-05, "log_odds_chosen": 11.396980285644531, "log_odds_ratio": -7.926767284516245e-05, "logits/chosen": -0.045376040041446686, "logits/rejected": -0.08236894011497498, "logps/chosen": -0.00019739723938982934, "logps/rejected": -2.5047426223754883, "loss": 0.4258, "nll_loss": 0.1064419075846672, "rewards/accuracies": 1.0, "rewards/chosen": -1.9739725757972337e-05, "rewards/margins": 0.25045451521873474, "rewards/rejected": -0.2504742741584778, "step": 11749 }, { "epoch": 8.125864453665283, "grad_norm": 2.8154377937316895, "learning_rate": 1.041186414630398e-05, "log_odds_chosen": 10.108574867248535, "log_odds_ratio": -7.602085679536685e-05, "logits/chosen": -0.5656677484512329, "logits/rejected": -0.6253648996353149, "logps/chosen": -0.00017004036635626107, "logps/rejected": -1.509115219116211, "loss": 0.3699, "nll_loss": 0.09246040880680084, "rewards/accuracies": 1.0, "rewards/chosen": -1.700403845461551e-05, "rewards/margins": 0.15089452266693115, "rewards/rejected": -0.15091153979301453, "step": 11750 }, { "epoch": 8.12655601659751, "grad_norm": 5.296730041503906, "learning_rate": 1.0408022130013833e-05, "log_odds_chosen": 11.02202033996582, "log_odds_ratio": -4.4179709220770746e-05, "logits/chosen": -0.4976728558540344, "logits/rejected": -0.6354691386222839, "logps/chosen": -0.00023194911773316562, "logps/rejected": -2.3813910484313965, "loss": 0.4758, "nll_loss": 0.11895264685153961, "rewards/accuracies": 1.0, "rewards/chosen": -2.3194912500912324e-05, "rewards/margins": 0.23811590671539307, "rewards/rejected": -0.2381390929222107, "step": 11751 }, { "epoch": 8.127247579529737, "grad_norm": 3.8053126335144043, "learning_rate": 1.0404180113723683e-05, "log_odds_chosen": 10.760663986206055, "log_odds_ratio": -0.00013347077765502036, "logits/chosen": 0.2298506796360016, "logits/rejected": 0.12136253714561462, "logps/chosen": -0.00028582217055372894, "logps/rejected": -2.100955009460449, "loss": 0.4429, "nll_loss": 0.11070973426103592, "rewards/accuracies": 1.0, "rewards/chosen": -2.8582218874362297e-05, "rewards/margins": 0.21006692945957184, "rewards/rejected": -0.21009549498558044, "step": 11752 }, { "epoch": 8.127939142461964, "grad_norm": 3.647557497024536, "learning_rate": 1.0400338097433532e-05, "log_odds_chosen": 11.35352897644043, "log_odds_ratio": -6.096455399529077e-05, "logits/chosen": -0.39178648591041565, "logits/rejected": -0.45617157220840454, "logps/chosen": -0.0001079787325579673, "logps/rejected": -2.3015360832214355, "loss": 0.3426, "nll_loss": 0.08565562218427658, "rewards/accuracies": 1.0, "rewards/chosen": -1.079787307389779e-05, "rewards/margins": 0.23014283180236816, "rewards/rejected": -0.2301536202430725, "step": 11753 }, { "epoch": 8.12863070539419, "grad_norm": 4.891421794891357, "learning_rate": 1.0396496081143385e-05, "log_odds_chosen": 10.965896606445312, "log_odds_ratio": -0.00021092222596053034, "logits/chosen": -0.4915197491645813, "logits/rejected": -0.5347434878349304, "logps/chosen": -0.0016437186859548092, "logps/rejected": -2.8035781383514404, "loss": 0.5808, "nll_loss": 0.14518901705741882, "rewards/accuracies": 1.0, "rewards/chosen": -0.00016437187150586396, "rewards/margins": 0.2801934480667114, "rewards/rejected": -0.28035783767700195, "step": 11754 }, { "epoch": 8.129322268326417, "grad_norm": 3.891859531402588, "learning_rate": 1.0392654064853236e-05, "log_odds_chosen": 12.413590431213379, "log_odds_ratio": -2.2598998839384876e-05, "logits/chosen": -0.8400213718414307, "logits/rejected": -0.8548598289489746, "logps/chosen": -0.00011090834595961496, "logps/rejected": -3.0911455154418945, "loss": 0.3709, "nll_loss": 0.09272802621126175, "rewards/accuracies": 1.0, "rewards/chosen": -1.1090835869254079e-05, "rewards/margins": 0.30910348892211914, "rewards/rejected": -0.30911457538604736, "step": 11755 }, { "epoch": 8.130013831258644, "grad_norm": 5.259171962738037, "learning_rate": 1.0388812048563086e-05, "log_odds_chosen": 11.260395050048828, "log_odds_ratio": -0.00016276906535495073, "logits/chosen": -0.04953872784972191, "logits/rejected": -0.14904235303401947, "logps/chosen": -0.00020939150999765843, "logps/rejected": -2.425943613052368, "loss": 0.4882, "nll_loss": 0.12202227860689163, "rewards/accuracies": 1.0, "rewards/chosen": -2.0939150999765843e-05, "rewards/margins": 0.24257344007492065, "rewards/rejected": -0.24259436130523682, "step": 11756 }, { "epoch": 8.130705394190871, "grad_norm": 3.7498621940612793, "learning_rate": 1.0384970032272937e-05, "log_odds_chosen": 10.983440399169922, "log_odds_ratio": -6.46675398456864e-05, "logits/chosen": -0.5816982984542847, "logits/rejected": -0.5811514854431152, "logps/chosen": -0.00016490585403516889, "logps/rejected": -1.9317435026168823, "loss": 0.3925, "nll_loss": 0.0981072410941124, "rewards/accuracies": 1.0, "rewards/chosen": -1.6490585039719008e-05, "rewards/margins": 0.1931578516960144, "rewards/rejected": -0.1931743323802948, "step": 11757 }, { "epoch": 8.131396957123098, "grad_norm": 3.3982486724853516, "learning_rate": 1.0381128015982788e-05, "log_odds_chosen": 11.659255027770996, "log_odds_ratio": -2.207270881626755e-05, "logits/chosen": -0.3394581079483032, "logits/rejected": -0.31356391310691833, "logps/chosen": -0.00010923718218691647, "logps/rejected": -2.487722873687744, "loss": 0.3847, "nll_loss": 0.09616082906723022, "rewards/accuracies": 1.0, "rewards/chosen": -1.0923718946287408e-05, "rewards/margins": 0.248761385679245, "rewards/rejected": -0.24877230823040009, "step": 11758 }, { "epoch": 8.132088520055325, "grad_norm": 5.193324565887451, "learning_rate": 1.0377285999692639e-05, "log_odds_chosen": 10.180469512939453, "log_odds_ratio": -0.00010024676885223016, "logits/chosen": -0.20359095931053162, "logits/rejected": -0.1900109499692917, "logps/chosen": -0.0003616532776504755, "logps/rejected": -2.0986833572387695, "loss": 0.4011, "nll_loss": 0.1002582311630249, "rewards/accuracies": 1.0, "rewards/chosen": -3.616532922023907e-05, "rewards/margins": 0.20983219146728516, "rewards/rejected": -0.20986835658550262, "step": 11759 }, { "epoch": 8.132780082987551, "grad_norm": 8.418069839477539, "learning_rate": 1.0373443983402491e-05, "log_odds_chosen": 10.925003051757812, "log_odds_ratio": -0.00010719512647483498, "logits/chosen": -0.6948219537734985, "logits/rejected": -0.6730424761772156, "logps/chosen": -0.0001794162963051349, "logps/rejected": -2.2363123893737793, "loss": 0.4189, "nll_loss": 0.10472062230110168, "rewards/accuracies": 1.0, "rewards/chosen": -1.794162926671561e-05, "rewards/margins": 0.22361332178115845, "rewards/rejected": -0.22363126277923584, "step": 11760 }, { "epoch": 8.133471645919778, "grad_norm": 4.210556507110596, "learning_rate": 1.0369601967112342e-05, "log_odds_chosen": 11.479969024658203, "log_odds_ratio": -4.065928442287259e-05, "logits/chosen": -0.10444462299346924, "logits/rejected": -0.20772212743759155, "logps/chosen": -0.00046929556992836297, "logps/rejected": -2.8166074752807617, "loss": 0.3729, "nll_loss": 0.09321242570877075, "rewards/accuracies": 1.0, "rewards/chosen": -4.6929555537644774e-05, "rewards/margins": 0.281613826751709, "rewards/rejected": -0.2816607356071472, "step": 11761 }, { "epoch": 8.134163208852005, "grad_norm": 3.6201324462890625, "learning_rate": 1.0365759950822191e-05, "log_odds_chosen": 9.515316009521484, "log_odds_ratio": -0.00029265222838148475, "logits/chosen": -0.45242300629615784, "logits/rejected": -0.5208587646484375, "logps/chosen": -0.0007963213138282299, "logps/rejected": -1.312931776046753, "loss": 0.7563, "nll_loss": 0.18905624747276306, "rewards/accuracies": 1.0, "rewards/chosen": -7.963214011397213e-05, "rewards/margins": 0.13121354579925537, "rewards/rejected": -0.1312931925058365, "step": 11762 }, { "epoch": 8.134854771784232, "grad_norm": 5.176267147064209, "learning_rate": 1.0361917934532043e-05, "log_odds_chosen": 11.17673110961914, "log_odds_ratio": -0.00012910067744087428, "logits/chosen": -0.6295480728149414, "logits/rejected": -0.5572465658187866, "logps/chosen": -0.0006317183724604547, "logps/rejected": -3.2958240509033203, "loss": 0.2742, "nll_loss": 0.06854239851236343, "rewards/accuracies": 1.0, "rewards/chosen": -6.317184306681156e-05, "rewards/margins": 0.32951924204826355, "rewards/rejected": -0.32958242297172546, "step": 11763 }, { "epoch": 8.135546334716459, "grad_norm": 3.88042950630188, "learning_rate": 1.0358075918241894e-05, "log_odds_chosen": 10.987863540649414, "log_odds_ratio": -2.5229761376976967e-05, "logits/chosen": 0.14450721442699432, "logits/rejected": 0.16441746056079865, "logps/chosen": -0.00013737456174567342, "logps/rejected": -1.970505952835083, "loss": 0.4267, "nll_loss": 0.1066756397485733, "rewards/accuracies": 1.0, "rewards/chosen": -1.3737457265960984e-05, "rewards/margins": 0.19703687727451324, "rewards/rejected": -0.19705060124397278, "step": 11764 }, { "epoch": 8.136237897648686, "grad_norm": 4.437648773193359, "learning_rate": 1.0354233901951745e-05, "log_odds_chosen": 11.371580123901367, "log_odds_ratio": -4.398940654937178e-05, "logits/chosen": -0.5579978227615356, "logits/rejected": -0.44501692056655884, "logps/chosen": -0.00014620760339312255, "logps/rejected": -2.3657233715057373, "loss": 0.3917, "nll_loss": 0.09792998433113098, "rewards/accuracies": 1.0, "rewards/chosen": -1.4620760339312255e-05, "rewards/margins": 0.2365577071905136, "rewards/rejected": -0.23657235503196716, "step": 11765 }, { "epoch": 8.136929460580912, "grad_norm": 3.7708580493927, "learning_rate": 1.0350391885661596e-05, "log_odds_chosen": 11.193588256835938, "log_odds_ratio": -2.5481427655904554e-05, "logits/chosen": 0.14303867518901825, "logits/rejected": 0.02036203444004059, "logps/chosen": -0.00014616517000831664, "logps/rejected": -2.056412696838379, "loss": 0.404, "nll_loss": 0.10100395977497101, "rewards/accuracies": 1.0, "rewards/chosen": -1.4616516637033783e-05, "rewards/margins": 0.20562666654586792, "rewards/rejected": -0.2056412696838379, "step": 11766 }, { "epoch": 8.13762102351314, "grad_norm": 8.705947875976562, "learning_rate": 1.0346549869371446e-05, "log_odds_chosen": 10.947639465332031, "log_odds_ratio": -5.8245903346687555e-05, "logits/chosen": -0.5470505952835083, "logits/rejected": -0.5812299251556396, "logps/chosen": -0.0004906191607005894, "logps/rejected": -2.086725950241089, "loss": 0.3744, "nll_loss": 0.09358873963356018, "rewards/accuracies": 1.0, "rewards/chosen": -4.90619167976547e-05, "rewards/margins": 0.20862354338169098, "rewards/rejected": -0.20867261290550232, "step": 11767 }, { "epoch": 8.138312586445366, "grad_norm": 4.619115829467773, "learning_rate": 1.0342707853081297e-05, "log_odds_chosen": 10.422161102294922, "log_odds_ratio": -6.12240910413675e-05, "logits/chosen": -0.33057886362075806, "logits/rejected": -0.3734843134880066, "logps/chosen": -0.00027954988763667643, "logps/rejected": -2.0615577697753906, "loss": 0.3589, "nll_loss": 0.08972762525081635, "rewards/accuracies": 1.0, "rewards/chosen": -2.7954987672274e-05, "rewards/margins": 0.2061278223991394, "rewards/rejected": -0.20615577697753906, "step": 11768 }, { "epoch": 8.139004149377593, "grad_norm": 4.015398025512695, "learning_rate": 1.0338865836791148e-05, "log_odds_chosen": 11.448203086853027, "log_odds_ratio": -2.4008486434468068e-05, "logits/chosen": 0.008959844708442688, "logits/rejected": -0.04505334049463272, "logps/chosen": -0.00021311917225830257, "logps/rejected": -2.0293219089508057, "loss": 0.389, "nll_loss": 0.09724324196577072, "rewards/accuracies": 1.0, "rewards/chosen": -2.13119183172239e-05, "rewards/margins": 0.2029108852148056, "rewards/rejected": -0.2029321789741516, "step": 11769 }, { "epoch": 8.13969571230982, "grad_norm": 3.8475868701934814, "learning_rate": 1.0335023820501e-05, "log_odds_chosen": 10.870960235595703, "log_odds_ratio": -0.00019746023463085294, "logits/chosen": -0.06472350656986237, "logits/rejected": -0.24817217886447906, "logps/chosen": -0.0004481807118281722, "logps/rejected": -2.510279417037964, "loss": 0.5035, "nll_loss": 0.12584780156612396, "rewards/accuracies": 1.0, "rewards/chosen": -4.4818069000029936e-05, "rewards/margins": 0.2509831488132477, "rewards/rejected": -0.2510279417037964, "step": 11770 }, { "epoch": 8.140387275242047, "grad_norm": 3.4504826068878174, "learning_rate": 1.0331181804210851e-05, "log_odds_chosen": 10.947843551635742, "log_odds_ratio": -2.3020382286631502e-05, "logits/chosen": -0.10515400767326355, "logits/rejected": -0.16172391176223755, "logps/chosen": -0.0002008742158068344, "logps/rejected": -2.12304425239563, "loss": 0.3276, "nll_loss": 0.08189594745635986, "rewards/accuracies": 1.0, "rewards/chosen": -2.008742194448132e-05, "rewards/margins": 0.21228432655334473, "rewards/rejected": -0.21230441331863403, "step": 11771 }, { "epoch": 8.141078838174273, "grad_norm": 3.5445170402526855, "learning_rate": 1.03273397879207e-05, "log_odds_chosen": 10.88604736328125, "log_odds_ratio": -8.225615601986647e-05, "logits/chosen": -0.18684306740760803, "logits/rejected": -0.23621304333209991, "logps/chosen": -0.00030686456011608243, "logps/rejected": -2.4081804752349854, "loss": 0.3665, "nll_loss": 0.09162560850381851, "rewards/accuracies": 1.0, "rewards/chosen": -3.068646037718281e-05, "rewards/margins": 0.2407873570919037, "rewards/rejected": -0.24081803858280182, "step": 11772 }, { "epoch": 8.1417704011065, "grad_norm": 6.290334224700928, "learning_rate": 1.0323497771630552e-05, "log_odds_chosen": 12.10903549194336, "log_odds_ratio": -1.729245559545234e-05, "logits/chosen": 0.04256100207567215, "logits/rejected": -0.14866912364959717, "logps/chosen": -0.00031669961754232645, "logps/rejected": -3.1162285804748535, "loss": 0.5985, "nll_loss": 0.14962761104106903, "rewards/accuracies": 1.0, "rewards/chosen": -3.1669962481828406e-05, "rewards/margins": 0.31159117817878723, "rewards/rejected": -0.31162285804748535, "step": 11773 }, { "epoch": 8.142461964038727, "grad_norm": 3.9987449645996094, "learning_rate": 1.0319655755340403e-05, "log_odds_chosen": 11.783027648925781, "log_odds_ratio": -2.3419324861606583e-05, "logits/chosen": -0.1818915605545044, "logits/rejected": -0.1939907670021057, "logps/chosen": -0.00046360495616681874, "logps/rejected": -3.1576924324035645, "loss": 0.4404, "nll_loss": 0.11008903384208679, "rewards/accuracies": 1.0, "rewards/chosen": -4.6360495616681874e-05, "rewards/margins": 0.31572288274765015, "rewards/rejected": -0.3157692551612854, "step": 11774 }, { "epoch": 8.143153526970954, "grad_norm": 6.004032611846924, "learning_rate": 1.0315813739050254e-05, "log_odds_chosen": 10.846492767333984, "log_odds_ratio": -4.8165969928959385e-05, "logits/chosen": -0.6579508185386658, "logits/rejected": -0.6603182554244995, "logps/chosen": -0.00030509906355291605, "logps/rejected": -2.0657896995544434, "loss": 0.5789, "nll_loss": 0.14472922682762146, "rewards/accuracies": 1.0, "rewards/chosen": -3.0509905627695844e-05, "rewards/margins": 0.20654848217964172, "rewards/rejected": -0.20657898485660553, "step": 11775 }, { "epoch": 8.14384508990318, "grad_norm": 3.6259446144104004, "learning_rate": 1.0311971722760105e-05, "log_odds_chosen": 11.277198791503906, "log_odds_ratio": -7.718206325080246e-05, "logits/chosen": -0.22679950296878815, "logits/rejected": -0.28733691573143005, "logps/chosen": -0.0002462563570588827, "logps/rejected": -2.8088607788085938, "loss": 0.4148, "nll_loss": 0.103690966963768, "rewards/accuracies": 1.0, "rewards/chosen": -2.4625636797281913e-05, "rewards/margins": 0.28086143732070923, "rewards/rejected": -0.28088608384132385, "step": 11776 }, { "epoch": 8.144536652835408, "grad_norm": 2.8234851360321045, "learning_rate": 1.0308129706469955e-05, "log_odds_chosen": 11.608818054199219, "log_odds_ratio": -2.148692510672845e-05, "logits/chosen": 0.08555248379707336, "logits/rejected": 0.060075029730796814, "logps/chosen": -0.00011031079338863492, "logps/rejected": -2.2806997299194336, "loss": 0.5941, "nll_loss": 0.14852266013622284, "rewards/accuracies": 1.0, "rewards/chosen": -1.103107842936879e-05, "rewards/margins": 0.22805896401405334, "rewards/rejected": -0.2280699908733368, "step": 11777 }, { "epoch": 8.145228215767634, "grad_norm": 3.5794787406921387, "learning_rate": 1.0304287690179806e-05, "log_odds_chosen": 10.891388893127441, "log_odds_ratio": -0.00013585921260528266, "logits/chosen": -0.7135385274887085, "logits/rejected": -0.84055495262146, "logps/chosen": -0.0002605140907689929, "logps/rejected": -2.4400527477264404, "loss": 0.4378, "nll_loss": 0.10942918062210083, "rewards/accuracies": 1.0, "rewards/chosen": -2.6051407985505648e-05, "rewards/margins": 0.24397921562194824, "rewards/rejected": -0.2440052628517151, "step": 11778 }, { "epoch": 8.145919778699861, "grad_norm": 4.064663887023926, "learning_rate": 1.0300445673889659e-05, "log_odds_chosen": 11.208548545837402, "log_odds_ratio": -8.20230197859928e-05, "logits/chosen": -0.3107730448246002, "logits/rejected": -0.275856614112854, "logps/chosen": -0.0007718256092630327, "logps/rejected": -2.843167304992676, "loss": 0.4666, "nll_loss": 0.11663761734962463, "rewards/accuracies": 1.0, "rewards/chosen": -7.718256529187784e-05, "rewards/margins": 0.2842395603656769, "rewards/rejected": -0.284316748380661, "step": 11779 }, { "epoch": 8.146611341632088, "grad_norm": 4.965521812438965, "learning_rate": 1.029660365759951e-05, "log_odds_chosen": 10.105997085571289, "log_odds_ratio": -6.436584226321429e-05, "logits/chosen": -0.10426747053861618, "logits/rejected": -0.13834770023822784, "logps/chosen": -0.0003076042339671403, "logps/rejected": -1.3770906925201416, "loss": 0.3954, "nll_loss": 0.0988457053899765, "rewards/accuracies": 1.0, "rewards/chosen": -3.076042412430979e-05, "rewards/margins": 0.13767831027507782, "rewards/rejected": -0.13770908117294312, "step": 11780 }, { "epoch": 8.147302904564315, "grad_norm": 2.708247423171997, "learning_rate": 1.0292761641309358e-05, "log_odds_chosen": 11.923017501831055, "log_odds_ratio": -8.847984645399265e-06, "logits/chosen": -0.5788747072219849, "logits/rejected": -0.492117702960968, "logps/chosen": -6.703606777591631e-05, "logps/rejected": -2.3570947647094727, "loss": 0.2904, "nll_loss": 0.07258927077054977, "rewards/accuracies": 1.0, "rewards/chosen": -6.70360623189481e-06, "rewards/margins": 0.23570279777050018, "rewards/rejected": -0.23570948839187622, "step": 11781 }, { "epoch": 8.147994467496542, "grad_norm": 3.8277664184570312, "learning_rate": 1.0288919625019211e-05, "log_odds_chosen": 10.717927932739258, "log_odds_ratio": -0.00020918138034176081, "logits/chosen": -0.47696831822395325, "logits/rejected": -0.43856728076934814, "logps/chosen": -0.00017271784599870443, "logps/rejected": -2.05072283744812, "loss": 0.3742, "nll_loss": 0.0935308188199997, "rewards/accuracies": 1.0, "rewards/chosen": -1.7271784599870443e-05, "rewards/margins": 0.20505502820014954, "rewards/rejected": -0.205072283744812, "step": 11782 }, { "epoch": 8.148686030428768, "grad_norm": 4.970156192779541, "learning_rate": 1.0285077608729062e-05, "log_odds_chosen": 10.575085639953613, "log_odds_ratio": -0.0001494882453698665, "logits/chosen": -0.24861939251422882, "logits/rejected": -0.3763630986213684, "logps/chosen": -0.0002744381199590862, "logps/rejected": -2.2138054370880127, "loss": 0.7953, "nll_loss": 0.1987987905740738, "rewards/accuracies": 1.0, "rewards/chosen": -2.7443811632110737e-05, "rewards/margins": 0.22135311365127563, "rewards/rejected": -0.2213805466890335, "step": 11783 }, { "epoch": 8.149377593360995, "grad_norm": 3.446476459503174, "learning_rate": 1.0281235592438912e-05, "log_odds_chosen": 10.981772422790527, "log_odds_ratio": -3.398286207811907e-05, "logits/chosen": -0.15307289361953735, "logits/rejected": -0.32064151763916016, "logps/chosen": -0.000214852683711797, "logps/rejected": -2.250171422958374, "loss": 0.536, "nll_loss": 0.13399583101272583, "rewards/accuracies": 1.0, "rewards/chosen": -2.1485269826371223e-05, "rewards/margins": 0.2249956727027893, "rewards/rejected": -0.22501714527606964, "step": 11784 }, { "epoch": 8.150069156293222, "grad_norm": 5.106137275695801, "learning_rate": 1.0277393576148763e-05, "log_odds_chosen": 11.902192115783691, "log_odds_ratio": -1.2729897207464091e-05, "logits/chosen": -0.40789902210235596, "logits/rejected": -0.5072999000549316, "logps/chosen": -0.00012040400179103017, "logps/rejected": -2.754049777984619, "loss": 0.3974, "nll_loss": 0.09935708343982697, "rewards/accuracies": 1.0, "rewards/chosen": -1.2040400179103017e-05, "rewards/margins": 0.27539294958114624, "rewards/rejected": -0.27540498971939087, "step": 11785 }, { "epoch": 8.150760719225449, "grad_norm": 4.746187210083008, "learning_rate": 1.0273551559858614e-05, "log_odds_chosen": 11.31079387664795, "log_odds_ratio": -2.847578070941381e-05, "logits/chosen": -0.35697272419929504, "logits/rejected": -0.35685810446739197, "logps/chosen": -0.00015442268340848386, "logps/rejected": -2.295431137084961, "loss": 0.5456, "nll_loss": 0.13639333844184875, "rewards/accuracies": 1.0, "rewards/chosen": -1.544227052363567e-05, "rewards/margins": 0.22952768206596375, "rewards/rejected": -0.22954311966896057, "step": 11786 }, { "epoch": 8.151452282157676, "grad_norm": 4.14506196975708, "learning_rate": 1.0269709543568465e-05, "log_odds_chosen": 10.621984481811523, "log_odds_ratio": -3.840686258627102e-05, "logits/chosen": -0.2479134351015091, "logits/rejected": -0.2759229242801666, "logps/chosen": -0.00012764088751282543, "logps/rejected": -1.6436896324157715, "loss": 0.5525, "nll_loss": 0.1381143033504486, "rewards/accuracies": 1.0, "rewards/chosen": -1.2764088751282543e-05, "rewards/margins": 0.16435620188713074, "rewards/rejected": -0.16436897218227386, "step": 11787 }, { "epoch": 8.152143845089903, "grad_norm": 6.618087291717529, "learning_rate": 1.0265867527278317e-05, "log_odds_chosen": 11.717941284179688, "log_odds_ratio": -2.240051981061697e-05, "logits/chosen": -0.7674741744995117, "logits/rejected": -0.7641814947128296, "logps/chosen": -7.586412539239973e-05, "logps/rejected": -1.8555504083633423, "loss": 0.3964, "nll_loss": 0.09909586608409882, "rewards/accuracies": 1.0, "rewards/chosen": -7.586412266391562e-06, "rewards/margins": 0.18554747104644775, "rewards/rejected": -0.18555505573749542, "step": 11788 }, { "epoch": 8.15283540802213, "grad_norm": 8.497203826904297, "learning_rate": 1.0262025510988168e-05, "log_odds_chosen": 11.106819152832031, "log_odds_ratio": -4.6539134928025305e-05, "logits/chosen": -0.480145663022995, "logits/rejected": -0.5249185562133789, "logps/chosen": -0.0002152361412299797, "logps/rejected": -2.2641043663024902, "loss": 0.4401, "nll_loss": 0.11001075804233551, "rewards/accuracies": 1.0, "rewards/chosen": -2.1523612304008566e-05, "rewards/margins": 0.22638891637325287, "rewards/rejected": -0.22641046345233917, "step": 11789 }, { "epoch": 8.153526970954356, "grad_norm": 2.5081300735473633, "learning_rate": 1.0258183494698017e-05, "log_odds_chosen": 11.968205451965332, "log_odds_ratio": -1.1951849955949001e-05, "logits/chosen": -0.3167341351509094, "logits/rejected": -0.3897658586502075, "logps/chosen": -5.4685951909050345e-05, "logps/rejected": -1.9660568237304688, "loss": 0.3649, "nll_loss": 0.09123249351978302, "rewards/accuracies": 1.0, "rewards/chosen": -5.468594736157684e-06, "rewards/margins": 0.19660022854804993, "rewards/rejected": -0.19660569727420807, "step": 11790 }, { "epoch": 8.154218533886583, "grad_norm": 4.652139663696289, "learning_rate": 1.025434147840787e-05, "log_odds_chosen": 11.587353706359863, "log_odds_ratio": -2.519033478165511e-05, "logits/chosen": -0.056919172406196594, "logits/rejected": -0.08620971441268921, "logps/chosen": -0.00015185258234851062, "logps/rejected": -2.5194754600524902, "loss": 0.7231, "nll_loss": 0.1807647943496704, "rewards/accuracies": 1.0, "rewards/chosen": -1.5185258234851062e-05, "rewards/margins": 0.25193238258361816, "rewards/rejected": -0.2519475221633911, "step": 11791 }, { "epoch": 8.15491009681881, "grad_norm": 5.727940082550049, "learning_rate": 1.025049946211772e-05, "log_odds_chosen": 11.416126251220703, "log_odds_ratio": -5.229043017607182e-05, "logits/chosen": -0.43798014521598816, "logits/rejected": -0.37286901473999023, "logps/chosen": -0.0001776377612259239, "logps/rejected": -2.1930556297302246, "loss": 0.5775, "nll_loss": 0.14437736570835114, "rewards/accuracies": 1.0, "rewards/chosen": -1.7763777577783912e-05, "rewards/margins": 0.21928778290748596, "rewards/rejected": -0.21930554509162903, "step": 11792 }, { "epoch": 8.155601659751037, "grad_norm": 3.599702835083008, "learning_rate": 1.0246657445827571e-05, "log_odds_chosen": 11.529845237731934, "log_odds_ratio": -3.249730434617959e-05, "logits/chosen": -0.5446562767028809, "logits/rejected": -0.558387041091919, "logps/chosen": -0.0001430368865840137, "logps/rejected": -2.637044906616211, "loss": 0.3239, "nll_loss": 0.08098269253969193, "rewards/accuracies": 1.0, "rewards/chosen": -1.4303689567896072e-05, "rewards/margins": 0.26369020342826843, "rewards/rejected": -0.26370447874069214, "step": 11793 }, { "epoch": 8.156293222683264, "grad_norm": 4.702301025390625, "learning_rate": 1.0242815429537422e-05, "log_odds_chosen": 10.420896530151367, "log_odds_ratio": -6.696030322927982e-05, "logits/chosen": 0.09832927584648132, "logits/rejected": 0.023856177926063538, "logps/chosen": -0.00029260682640597224, "logps/rejected": -1.9983750581741333, "loss": 0.4929, "nll_loss": 0.12321722507476807, "rewards/accuracies": 1.0, "rewards/chosen": -2.9260681913001463e-05, "rewards/margins": 0.1998082399368286, "rewards/rejected": -0.19983749091625214, "step": 11794 }, { "epoch": 8.15698478561549, "grad_norm": 2.956678628921509, "learning_rate": 1.0238973413247272e-05, "log_odds_chosen": 11.952880859375, "log_odds_ratio": -1.3823173503624275e-05, "logits/chosen": -0.6629164218902588, "logits/rejected": -0.788197934627533, "logps/chosen": -0.00010116046178154647, "logps/rejected": -2.5265512466430664, "loss": 0.3091, "nll_loss": 0.07727310061454773, "rewards/accuracies": 1.0, "rewards/chosen": -1.011604763334617e-05, "rewards/margins": 0.25264501571655273, "rewards/rejected": -0.25265511870384216, "step": 11795 }, { "epoch": 8.157676348547717, "grad_norm": 4.642671585083008, "learning_rate": 1.0235131396957123e-05, "log_odds_chosen": 10.813591003417969, "log_odds_ratio": -0.0005754618323408067, "logits/chosen": -0.5008108019828796, "logits/rejected": -0.49642693996429443, "logps/chosen": -0.0010430947877466679, "logps/rejected": -2.2011709213256836, "loss": 0.3081, "nll_loss": 0.07697095721960068, "rewards/accuracies": 1.0, "rewards/chosen": -0.00010430948168504983, "rewards/margins": 0.22001276910305023, "rewards/rejected": -0.22011709213256836, "step": 11796 }, { "epoch": 8.158367911479944, "grad_norm": 3.9972987174987793, "learning_rate": 1.0231289380666976e-05, "log_odds_chosen": 11.959649085998535, "log_odds_ratio": -1.605860234121792e-05, "logits/chosen": -0.7508334517478943, "logits/rejected": -0.7808016538619995, "logps/chosen": -0.0001648375764489174, "logps/rejected": -2.869148015975952, "loss": 0.3852, "nll_loss": 0.09630319476127625, "rewards/accuracies": 1.0, "rewards/chosen": -1.6483760191476904e-05, "rewards/margins": 0.28689831495285034, "rewards/rejected": -0.2869148254394531, "step": 11797 }, { "epoch": 8.159059474412171, "grad_norm": 3.9052696228027344, "learning_rate": 1.0227447364376826e-05, "log_odds_chosen": 12.224464416503906, "log_odds_ratio": -1.2303040421102196e-05, "logits/chosen": -0.33660537004470825, "logits/rejected": -0.3174796402454376, "logps/chosen": -7.784018816892058e-05, "logps/rejected": -2.7565410137176514, "loss": 0.3949, "nll_loss": 0.09873456507921219, "rewards/accuracies": 1.0, "rewards/chosen": -7.784018634993117e-06, "rewards/margins": 0.2756463289260864, "rewards/rejected": -0.2756541073322296, "step": 11798 }, { "epoch": 8.159751037344398, "grad_norm": 4.492676734924316, "learning_rate": 1.0223605348086675e-05, "log_odds_chosen": 10.744736671447754, "log_odds_ratio": -0.0003734507772605866, "logits/chosen": -0.0019226372241973877, "logits/rejected": 0.035294584929943085, "logps/chosen": -0.0003265690465923399, "logps/rejected": -1.884582757949829, "loss": 0.506, "nll_loss": 0.1264553815126419, "rewards/accuracies": 1.0, "rewards/chosen": -3.26569024764467e-05, "rewards/margins": 0.18842563033103943, "rewards/rejected": -0.18845829367637634, "step": 11799 }, { "epoch": 8.160442600276625, "grad_norm": 4.888298988342285, "learning_rate": 1.0219763331796526e-05, "log_odds_chosen": 11.357194900512695, "log_odds_ratio": -3.0228675313992426e-05, "logits/chosen": -0.4045562148094177, "logits/rejected": -0.4352848529815674, "logps/chosen": -0.00028314441442489624, "logps/rejected": -2.7511138916015625, "loss": 0.31, "nll_loss": 0.0774860605597496, "rewards/accuracies": 1.0, "rewards/chosen": -2.8314443625276908e-05, "rewards/margins": 0.275083065032959, "rewards/rejected": -0.2751113772392273, "step": 11800 }, { "epoch": 8.161134163208851, "grad_norm": 4.62258768081665, "learning_rate": 1.0215921315506379e-05, "log_odds_chosen": 10.585798263549805, "log_odds_ratio": -3.910541272489354e-05, "logits/chosen": -0.291620135307312, "logits/rejected": -0.32577189803123474, "logps/chosen": -0.00016927148681133986, "logps/rejected": -1.7979787588119507, "loss": 0.513, "nll_loss": 0.12825287878513336, "rewards/accuracies": 1.0, "rewards/chosen": -1.6927147953538224e-05, "rewards/margins": 0.1797809600830078, "rewards/rejected": -0.17979787290096283, "step": 11801 }, { "epoch": 8.161825726141078, "grad_norm": 3.9092464447021484, "learning_rate": 1.021207929921623e-05, "log_odds_chosen": 10.713225364685059, "log_odds_ratio": -7.490997086279094e-05, "logits/chosen": -0.1884932816028595, "logits/rejected": -0.263356477022171, "logps/chosen": -0.00043695775093510747, "logps/rejected": -1.96100914478302, "loss": 0.4515, "nll_loss": 0.11286258697509766, "rewards/accuracies": 1.0, "rewards/chosen": -4.3695774365914986e-05, "rewards/margins": 0.19605720043182373, "rewards/rejected": -0.19610090553760529, "step": 11802 }, { "epoch": 8.162517289073305, "grad_norm": 4.2609171867370605, "learning_rate": 1.020823728292608e-05, "log_odds_chosen": 10.838077545166016, "log_odds_ratio": -0.000666751351673156, "logits/chosen": -0.4899050295352936, "logits/rejected": -0.5162637829780579, "logps/chosen": -0.002191039966419339, "logps/rejected": -2.339308977127075, "loss": 0.398, "nll_loss": 0.09942437708377838, "rewards/accuracies": 1.0, "rewards/chosen": -0.00021910399664193392, "rewards/margins": 0.23371180891990662, "rewards/rejected": -0.23393090069293976, "step": 11803 }, { "epoch": 8.163208852005532, "grad_norm": 3.8477578163146973, "learning_rate": 1.020439526663593e-05, "log_odds_chosen": 11.547567367553711, "log_odds_ratio": -5.723809954361059e-05, "logits/chosen": -0.16270583868026733, "logits/rejected": -0.2292848378419876, "logps/chosen": -0.0004535773186944425, "logps/rejected": -2.771484375, "loss": 0.8456, "nll_loss": 0.2113933265209198, "rewards/accuracies": 1.0, "rewards/chosen": -4.535773041425273e-05, "rewards/margins": 0.27710309624671936, "rewards/rejected": -0.27714845538139343, "step": 11804 }, { "epoch": 8.163900414937759, "grad_norm": 5.081116676330566, "learning_rate": 1.0200553250345782e-05, "log_odds_chosen": 10.498542785644531, "log_odds_ratio": -0.00021043805463705212, "logits/chosen": -0.15183137357234955, "logits/rejected": -0.26076340675354004, "logps/chosen": -0.0002863018016796559, "logps/rejected": -1.7621605396270752, "loss": 0.6046, "nll_loss": 0.15112774074077606, "rewards/accuracies": 1.0, "rewards/chosen": -2.863018016796559e-05, "rewards/margins": 0.1761874258518219, "rewards/rejected": -0.1762160360813141, "step": 11805 }, { "epoch": 8.164591977869986, "grad_norm": 3.674516201019287, "learning_rate": 1.0196711234055632e-05, "log_odds_chosen": 11.024900436401367, "log_odds_ratio": -2.3138207325246185e-05, "logits/chosen": -0.15182772278785706, "logits/rejected": -0.2801783084869385, "logps/chosen": -0.000141732714837417, "logps/rejected": -2.197587490081787, "loss": 0.4128, "nll_loss": 0.10320456326007843, "rewards/accuracies": 1.0, "rewards/chosen": -1.417327075614594e-05, "rewards/margins": 0.21974456310272217, "rewards/rejected": -0.2197587490081787, "step": 11806 }, { "epoch": 8.165283540802212, "grad_norm": 3.9621241092681885, "learning_rate": 1.0192869217765485e-05, "log_odds_chosen": 10.270439147949219, "log_odds_ratio": -7.926029502414167e-05, "logits/chosen": -0.06136815622448921, "logits/rejected": 0.0402841717004776, "logps/chosen": -0.0002291160635650158, "logps/rejected": -1.7038178443908691, "loss": 0.4296, "nll_loss": 0.10739271342754364, "rewards/accuracies": 1.0, "rewards/chosen": -2.291160672029946e-05, "rewards/margins": 0.17035886645317078, "rewards/rejected": -0.17038178443908691, "step": 11807 }, { "epoch": 8.16597510373444, "grad_norm": 4.1792216300964355, "learning_rate": 1.0189027201475334e-05, "log_odds_chosen": 9.855060577392578, "log_odds_ratio": -0.0003239849756937474, "logits/chosen": -0.07540471851825714, "logits/rejected": -0.09573408961296082, "logps/chosen": -0.0012772049522027373, "logps/rejected": -2.435047149658203, "loss": 0.5607, "nll_loss": 0.14014235138893127, "rewards/accuracies": 1.0, "rewards/chosen": -0.00012772050104103982, "rewards/margins": 0.2433769702911377, "rewards/rejected": -0.24350470304489136, "step": 11808 }, { "epoch": 8.166666666666666, "grad_norm": 3.7249915599823, "learning_rate": 1.0185185185185185e-05, "log_odds_chosen": 9.077827453613281, "log_odds_ratio": -0.00032695557456463575, "logits/chosen": -0.1378357857465744, "logits/rejected": -0.1385318785905838, "logps/chosen": -0.0005659713642671704, "logps/rejected": -0.9379556179046631, "loss": 0.2906, "nll_loss": 0.07261990010738373, "rewards/accuracies": 1.0, "rewards/chosen": -5.6597142247483134e-05, "rewards/margins": 0.09373895823955536, "rewards/rejected": -0.09379556775093079, "step": 11809 }, { "epoch": 8.167358229598893, "grad_norm": 3.8937172889709473, "learning_rate": 1.0181343168895037e-05, "log_odds_chosen": 10.944185256958008, "log_odds_ratio": -3.4075532312272117e-05, "logits/chosen": -0.16298729181289673, "logits/rejected": -0.24571256339550018, "logps/chosen": -0.00010787440987769514, "logps/rejected": -1.5944535732269287, "loss": 0.3732, "nll_loss": 0.09330610185861588, "rewards/accuracies": 1.0, "rewards/chosen": -1.0787441169668455e-05, "rewards/margins": 0.15943457186222076, "rewards/rejected": -0.1594453603029251, "step": 11810 }, { "epoch": 8.16804979253112, "grad_norm": 4.463415145874023, "learning_rate": 1.0177501152604888e-05, "log_odds_chosen": 11.398051261901855, "log_odds_ratio": -2.5497522074147128e-05, "logits/chosen": -0.048560142517089844, "logits/rejected": 0.1563296765089035, "logps/chosen": -0.00015461770817637444, "logps/rejected": -2.366382122039795, "loss": 0.3542, "nll_loss": 0.08854615688323975, "rewards/accuracies": 1.0, "rewards/chosen": -1.5461770090041682e-05, "rewards/margins": 0.23662272095680237, "rewards/rejected": -0.23663818836212158, "step": 11811 }, { "epoch": 8.168741355463347, "grad_norm": 3.4059031009674072, "learning_rate": 1.0173659136314738e-05, "log_odds_chosen": 9.915818214416504, "log_odds_ratio": -0.00022760103456676006, "logits/chosen": -0.2256205677986145, "logits/rejected": -0.451595664024353, "logps/chosen": -0.0004684239684138447, "logps/rejected": -1.9310569763183594, "loss": 0.2849, "nll_loss": 0.07119649648666382, "rewards/accuracies": 1.0, "rewards/chosen": -4.684239684138447e-05, "rewards/margins": 0.19305887818336487, "rewards/rejected": -0.19310569763183594, "step": 11812 }, { "epoch": 8.169432918395573, "grad_norm": 4.045708656311035, "learning_rate": 1.016981712002459e-05, "log_odds_chosen": 11.007131576538086, "log_odds_ratio": -5.7747245591599494e-05, "logits/chosen": 0.21243897080421448, "logits/rejected": 0.13054439425468445, "logps/chosen": -0.0003317214432172477, "logps/rejected": -2.272037982940674, "loss": 0.4716, "nll_loss": 0.1179007962346077, "rewards/accuracies": 1.0, "rewards/chosen": -3.317214213893749e-05, "rewards/margins": 0.22717060148715973, "rewards/rejected": -0.22720378637313843, "step": 11813 }, { "epoch": 8.1701244813278, "grad_norm": 2.7341315746307373, "learning_rate": 1.016597510373444e-05, "log_odds_chosen": 10.3163423538208, "log_odds_ratio": -5.366649565985426e-05, "logits/chosen": -0.20679514110088348, "logits/rejected": -0.19088438153266907, "logps/chosen": -0.0001653216895647347, "logps/rejected": -1.5532350540161133, "loss": 0.229, "nll_loss": 0.05723622441291809, "rewards/accuracies": 1.0, "rewards/chosen": -1.653216895647347e-05, "rewards/margins": 0.15530698001384735, "rewards/rejected": -0.15532350540161133, "step": 11814 }, { "epoch": 8.170816044260027, "grad_norm": 5.030986309051514, "learning_rate": 1.016213308744429e-05, "log_odds_chosen": 10.275716781616211, "log_odds_ratio": -0.00022884247300680727, "logits/chosen": -0.5482270121574402, "logits/rejected": -0.47892725467681885, "logps/chosen": -0.00023503063130192459, "logps/rejected": -1.9703631401062012, "loss": 0.4739, "nll_loss": 0.11845801770687103, "rewards/accuracies": 1.0, "rewards/chosen": -2.350306385778822e-05, "rewards/margins": 0.19701281189918518, "rewards/rejected": -0.19703632593154907, "step": 11815 }, { "epoch": 8.171507607192254, "grad_norm": 12.4885892868042, "learning_rate": 1.0158291071154143e-05, "log_odds_chosen": 11.943331718444824, "log_odds_ratio": -9.822726497077383e-06, "logits/chosen": -0.07970777153968811, "logits/rejected": -0.1332622617483139, "logps/chosen": -8.568631892558187e-05, "logps/rejected": -2.324699878692627, "loss": 0.5231, "nll_loss": 0.13078296184539795, "rewards/accuracies": 1.0, "rewards/chosen": -8.568631528760307e-06, "rewards/margins": 0.23246142268180847, "rewards/rejected": -0.23246999084949493, "step": 11816 }, { "epoch": 8.17219917012448, "grad_norm": 5.055882930755615, "learning_rate": 1.0154449054863994e-05, "log_odds_chosen": 9.738774299621582, "log_odds_ratio": -0.00040705245919525623, "logits/chosen": -0.6552140712738037, "logits/rejected": -0.7386389970779419, "logps/chosen": -0.0006381264538504183, "logps/rejected": -1.8834278583526611, "loss": 0.4624, "nll_loss": 0.11555864661931992, "rewards/accuracies": 1.0, "rewards/chosen": -6.381265120580792e-05, "rewards/margins": 0.1882789582014084, "rewards/rejected": -0.18834277987480164, "step": 11817 }, { "epoch": 8.172890733056708, "grad_norm": 4.6499505043029785, "learning_rate": 1.0150607038573843e-05, "log_odds_chosen": 10.724308967590332, "log_odds_ratio": -5.115863314131275e-05, "logits/chosen": -0.25859251618385315, "logits/rejected": -0.3702784776687622, "logps/chosen": -0.00018527252541389316, "logps/rejected": -2.147641897201538, "loss": 0.4386, "nll_loss": 0.10964687168598175, "rewards/accuracies": 1.0, "rewards/chosen": -1.8527252905187197e-05, "rewards/margins": 0.2147456556558609, "rewards/rejected": -0.21476417779922485, "step": 11818 }, { "epoch": 8.173582295988934, "grad_norm": 4.096492290496826, "learning_rate": 1.0146765022283695e-05, "log_odds_chosen": 11.613139152526855, "log_odds_ratio": -4.364008054835722e-05, "logits/chosen": -0.44727540016174316, "logits/rejected": -0.5089828372001648, "logps/chosen": -0.0002530687488615513, "logps/rejected": -2.85605525970459, "loss": 0.3856, "nll_loss": 0.09638633579015732, "rewards/accuracies": 1.0, "rewards/chosen": -2.530687561375089e-05, "rewards/margins": 0.28558024764060974, "rewards/rejected": -0.2856055498123169, "step": 11819 }, { "epoch": 8.174273858921161, "grad_norm": 3.698554039001465, "learning_rate": 1.0142923005993546e-05, "log_odds_chosen": 10.253783226013184, "log_odds_ratio": -9.055588452611119e-05, "logits/chosen": -0.10288023203611374, "logits/rejected": -0.14574682712554932, "logps/chosen": -0.0001656345120863989, "logps/rejected": -1.596636414527893, "loss": 0.3126, "nll_loss": 0.07813283801078796, "rewards/accuracies": 1.0, "rewards/chosen": -1.656345193623565e-05, "rewards/margins": 0.15964707732200623, "rewards/rejected": -0.1596636325120926, "step": 11820 }, { "epoch": 8.174965421853388, "grad_norm": 3.5334091186523438, "learning_rate": 1.0139080989703397e-05, "log_odds_chosen": 11.575165748596191, "log_odds_ratio": -3.546190419001505e-05, "logits/chosen": -0.6529729962348938, "logits/rejected": -0.7235559225082397, "logps/chosen": -0.000588984985370189, "logps/rejected": -3.3183703422546387, "loss": 0.7238, "nll_loss": 0.1809358298778534, "rewards/accuracies": 1.0, "rewards/chosen": -5.8898498537018895e-05, "rewards/margins": 0.3317781686782837, "rewards/rejected": -0.3318370580673218, "step": 11821 }, { "epoch": 8.175656984785615, "grad_norm": 4.298321723937988, "learning_rate": 1.0135238973413248e-05, "log_odds_chosen": 11.861610412597656, "log_odds_ratio": -2.6712465114542283e-05, "logits/chosen": -0.22402334213256836, "logits/rejected": -0.23995819687843323, "logps/chosen": -0.0014007949503138661, "logps/rejected": -3.094560384750366, "loss": 0.4677, "nll_loss": 0.11691083014011383, "rewards/accuracies": 1.0, "rewards/chosen": -0.0001400795008521527, "rewards/margins": 0.309315949678421, "rewards/rejected": -0.3094560205936432, "step": 11822 }, { "epoch": 8.176348547717842, "grad_norm": 3.889859437942505, "learning_rate": 1.0131396957123098e-05, "log_odds_chosen": 10.959208488464355, "log_odds_ratio": -5.500488987308927e-05, "logits/chosen": -0.42340776324272156, "logits/rejected": -0.37429279088974, "logps/chosen": -0.000169272898347117, "logps/rejected": -2.243509292602539, "loss": 0.3862, "nll_loss": 0.09653585404157639, "rewards/accuracies": 1.0, "rewards/chosen": -1.69272898347117e-05, "rewards/margins": 0.2243340015411377, "rewards/rejected": -0.2243509292602539, "step": 11823 }, { "epoch": 8.177040110650069, "grad_norm": 4.201392650604248, "learning_rate": 1.012755494083295e-05, "log_odds_chosen": 11.622950553894043, "log_odds_ratio": -2.326399044250138e-05, "logits/chosen": -0.8622159361839294, "logits/rejected": -0.8740267157554626, "logps/chosen": -0.00017339483019895852, "logps/rejected": -2.517303943634033, "loss": 0.5086, "nll_loss": 0.12715020775794983, "rewards/accuracies": 1.0, "rewards/chosen": -1.7339485566481017e-05, "rewards/margins": 0.2517130374908447, "rewards/rejected": -0.25173038244247437, "step": 11824 }, { "epoch": 8.177731673582295, "grad_norm": 4.291811943054199, "learning_rate": 1.0123712924542802e-05, "log_odds_chosen": 9.529434204101562, "log_odds_ratio": -0.0008333018631674349, "logits/chosen": -0.5183630585670471, "logits/rejected": -0.45871567726135254, "logps/chosen": -0.0013324617175385356, "logps/rejected": -1.9355536699295044, "loss": 0.2921, "nll_loss": 0.07294148206710815, "rewards/accuracies": 1.0, "rewards/chosen": -0.00013324618339538574, "rewards/margins": 0.1934221237897873, "rewards/rejected": -0.19355536997318268, "step": 11825 }, { "epoch": 8.178423236514522, "grad_norm": 4.345532417297363, "learning_rate": 1.0119870908252652e-05, "log_odds_chosen": 11.120061874389648, "log_odds_ratio": -3.411036595935002e-05, "logits/chosen": -0.23412591218948364, "logits/rejected": -0.2507272958755493, "logps/chosen": -0.00027076120022684336, "logps/rejected": -2.612562656402588, "loss": 0.4705, "nll_loss": 0.11761318892240524, "rewards/accuracies": 1.0, "rewards/chosen": -2.7076119295088574e-05, "rewards/margins": 0.26122918725013733, "rewards/rejected": -0.26125624775886536, "step": 11826 }, { "epoch": 8.179114799446749, "grad_norm": 5.8843793869018555, "learning_rate": 1.0116028891962501e-05, "log_odds_chosen": 11.541424751281738, "log_odds_ratio": -1.2509261068771593e-05, "logits/chosen": -0.5906883478164673, "logits/rejected": -0.5607191324234009, "logps/chosen": -8.202745812013745e-05, "logps/rejected": -2.0678231716156006, "loss": 0.4322, "nll_loss": 0.10804738849401474, "rewards/accuracies": 1.0, "rewards/chosen": -8.202745448215865e-06, "rewards/margins": 0.20677411556243896, "rewards/rejected": -0.20678231120109558, "step": 11827 }, { "epoch": 8.179806362378976, "grad_norm": 3.337306261062622, "learning_rate": 1.0112186875672354e-05, "log_odds_chosen": 11.85464096069336, "log_odds_ratio": -1.6896887245820835e-05, "logits/chosen": -0.39175254106521606, "logits/rejected": -0.4269852936267853, "logps/chosen": -8.84580222191289e-05, "logps/rejected": -2.1351940631866455, "loss": 0.2785, "nll_loss": 0.06961159408092499, "rewards/accuracies": 1.0, "rewards/chosen": -8.84580185811501e-06, "rewards/margins": 0.21351055800914764, "rewards/rejected": -0.2135194092988968, "step": 11828 }, { "epoch": 8.180497925311203, "grad_norm": 2.875051736831665, "learning_rate": 1.0108344859382205e-05, "log_odds_chosen": 9.571345329284668, "log_odds_ratio": -7.962647941894829e-05, "logits/chosen": -0.3000252842903137, "logits/rejected": -0.42375290393829346, "logps/chosen": -0.0002875003847293556, "logps/rejected": -1.5732154846191406, "loss": 0.3661, "nll_loss": 0.0915074497461319, "rewards/accuracies": 1.0, "rewards/chosen": -2.8750037017744035e-05, "rewards/margins": 0.15729279816150665, "rewards/rejected": -0.15732155740261078, "step": 11829 }, { "epoch": 8.18118948824343, "grad_norm": 3.728848457336426, "learning_rate": 1.0104502843092055e-05, "log_odds_chosen": 10.975574493408203, "log_odds_ratio": -2.4418277462245896e-05, "logits/chosen": -0.514509379863739, "logits/rejected": -0.6360599994659424, "logps/chosen": -0.00021895149257034063, "logps/rejected": -2.1837289333343506, "loss": 0.3772, "nll_loss": 0.09429260343313217, "rewards/accuracies": 1.0, "rewards/chosen": -2.189514816564042e-05, "rewards/margins": 0.21835100650787354, "rewards/rejected": -0.2183728963136673, "step": 11830 }, { "epoch": 8.181881051175656, "grad_norm": 3.2154626846313477, "learning_rate": 1.0100660826801906e-05, "log_odds_chosen": 10.822830200195312, "log_odds_ratio": -0.00029166098101995885, "logits/chosen": -0.773431658744812, "logits/rejected": -0.7771180868148804, "logps/chosen": -0.00030548672657459974, "logps/rejected": -2.0690057277679443, "loss": 0.3367, "nll_loss": 0.08415533602237701, "rewards/accuracies": 1.0, "rewards/chosen": -3.054867556784302e-05, "rewards/margins": 0.20687001943588257, "rewards/rejected": -0.20690058171749115, "step": 11831 }, { "epoch": 8.182572614107883, "grad_norm": 4.847975730895996, "learning_rate": 1.0096818810511757e-05, "log_odds_chosen": 11.216750144958496, "log_odds_ratio": -9.346687147626653e-05, "logits/chosen": -0.18628649413585663, "logits/rejected": -0.23374198377132416, "logps/chosen": -0.00024281159858219326, "logps/rejected": -2.7525458335876465, "loss": 0.2698, "nll_loss": 0.06744711846113205, "rewards/accuracies": 1.0, "rewards/chosen": -2.4281160222017206e-05, "rewards/margins": 0.2752303183078766, "rewards/rejected": -0.27525460720062256, "step": 11832 }, { "epoch": 8.18326417704011, "grad_norm": 2.793700695037842, "learning_rate": 1.0092976794221608e-05, "log_odds_chosen": 10.602350234985352, "log_odds_ratio": -7.463981455657631e-05, "logits/chosen": -0.7802792191505432, "logits/rejected": -0.8679121732711792, "logps/chosen": -0.0009457082487642765, "logps/rejected": -2.089808464050293, "loss": 0.3161, "nll_loss": 0.07902882993221283, "rewards/accuracies": 1.0, "rewards/chosen": -9.457083797315136e-05, "rewards/margins": 0.2088862806558609, "rewards/rejected": -0.20898084342479706, "step": 11833 }, { "epoch": 8.183955739972337, "grad_norm": 4.563675880432129, "learning_rate": 1.0089134777931458e-05, "log_odds_chosen": 10.466887474060059, "log_odds_ratio": -0.00010899512562900782, "logits/chosen": -0.5160598158836365, "logits/rejected": -0.5109041929244995, "logps/chosen": -0.000562231638468802, "logps/rejected": -2.570701837539673, "loss": 0.339, "nll_loss": 0.08474425971508026, "rewards/accuracies": 1.0, "rewards/chosen": -5.622316894005053e-05, "rewards/margins": 0.2570139467716217, "rewards/rejected": -0.2570701837539673, "step": 11834 }, { "epoch": 8.184647302904564, "grad_norm": 5.127349853515625, "learning_rate": 1.008529276164131e-05, "log_odds_chosen": 11.212685585021973, "log_odds_ratio": -0.0007525036344304681, "logits/chosen": -0.4909210205078125, "logits/rejected": -0.4616711735725403, "logps/chosen": -0.00021634719450958073, "logps/rejected": -2.375697135925293, "loss": 0.6213, "nll_loss": 0.1552608758211136, "rewards/accuracies": 1.0, "rewards/chosen": -2.1634719814755954e-05, "rewards/margins": 0.2375481128692627, "rewards/rejected": -0.23756971955299377, "step": 11835 }, { "epoch": 8.18533886583679, "grad_norm": 4.297598838806152, "learning_rate": 1.008145074535116e-05, "log_odds_chosen": 11.013813018798828, "log_odds_ratio": -0.000144320132676512, "logits/chosen": -0.34821152687072754, "logits/rejected": -0.40879786014556885, "logps/chosen": -0.0002897988888435066, "logps/rejected": -2.404247760772705, "loss": 0.4883, "nll_loss": 0.12206760048866272, "rewards/accuracies": 1.0, "rewards/chosen": -2.8979891794733703e-05, "rewards/margins": 0.24039578437805176, "rewards/rejected": -0.24042478203773499, "step": 11836 }, { "epoch": 8.186030428769017, "grad_norm": 4.018962860107422, "learning_rate": 1.007760872906101e-05, "log_odds_chosen": 11.19547176361084, "log_odds_ratio": -4.441758574103005e-05, "logits/chosen": -0.26014795899391174, "logits/rejected": -0.3397113084793091, "logps/chosen": -0.0002606114430818707, "logps/rejected": -2.4648585319519043, "loss": 0.6568, "nll_loss": 0.1642070859670639, "rewards/accuracies": 1.0, "rewards/chosen": -2.606114503578283e-05, "rewards/margins": 0.24645981192588806, "rewards/rejected": -0.2464858591556549, "step": 11837 }, { "epoch": 8.186721991701244, "grad_norm": 3.1762855052948, "learning_rate": 1.0073766712770863e-05, "log_odds_chosen": 10.405686378479004, "log_odds_ratio": -9.969981329049915e-05, "logits/chosen": -0.5910043716430664, "logits/rejected": -0.6282913088798523, "logps/chosen": -0.0003152275749016553, "logps/rejected": -1.9180370569229126, "loss": 0.2665, "nll_loss": 0.06660556048154831, "rewards/accuracies": 1.0, "rewards/chosen": -3.152275530737825e-05, "rewards/margins": 0.1917721927165985, "rewards/rejected": -0.1918036937713623, "step": 11838 }, { "epoch": 8.187413554633471, "grad_norm": 3.890634059906006, "learning_rate": 1.0069924696480714e-05, "log_odds_chosen": 11.562515258789062, "log_odds_ratio": -3.290794847998768e-05, "logits/chosen": -0.5644373893737793, "logits/rejected": -0.6337707042694092, "logps/chosen": -0.00021835914230905473, "logps/rejected": -2.628633975982666, "loss": 0.456, "nll_loss": 0.11399047076702118, "rewards/accuracies": 1.0, "rewards/chosen": -2.183591277571395e-05, "rewards/margins": 0.2628415822982788, "rewards/rejected": -0.2628633975982666, "step": 11839 }, { "epoch": 8.188105117565698, "grad_norm": 4.618536472320557, "learning_rate": 1.0066082680190565e-05, "log_odds_chosen": 10.990549087524414, "log_odds_ratio": -0.001142018474638462, "logits/chosen": -0.260355144739151, "logits/rejected": -0.3478155732154846, "logps/chosen": -0.0024526086635887623, "logps/rejected": -2.606916904449463, "loss": 0.4936, "nll_loss": 0.12329496443271637, "rewards/accuracies": 1.0, "rewards/chosen": -0.0002452608314342797, "rewards/margins": 0.2604464292526245, "rewards/rejected": -0.26069170236587524, "step": 11840 }, { "epoch": 8.188796680497925, "grad_norm": 3.341639518737793, "learning_rate": 1.0062240663900415e-05, "log_odds_chosen": 10.685967445373535, "log_odds_ratio": -6.907777424203232e-05, "logits/chosen": -0.27254483103752136, "logits/rejected": -0.35793501138687134, "logps/chosen": -0.0011382971424609423, "logps/rejected": -2.0838351249694824, "loss": 0.3415, "nll_loss": 0.0853566825389862, "rewards/accuracies": 1.0, "rewards/chosen": -0.00011382971570128575, "rewards/margins": 0.20826967060565948, "rewards/rejected": -0.2083835005760193, "step": 11841 }, { "epoch": 8.189488243430151, "grad_norm": 3.463982105255127, "learning_rate": 1.0058398647610266e-05, "log_odds_chosen": 11.680423736572266, "log_odds_ratio": -5.199678707867861e-05, "logits/chosen": -0.4965265989303589, "logits/rejected": -0.6189468502998352, "logps/chosen": -0.0002663609047885984, "logps/rejected": -2.63663911819458, "loss": 0.3655, "nll_loss": 0.09136730432510376, "rewards/accuracies": 1.0, "rewards/chosen": -2.6636087568476796e-05, "rewards/margins": 0.2636372745037079, "rewards/rejected": -0.2636639177799225, "step": 11842 }, { "epoch": 8.190179806362378, "grad_norm": 3.801323175430298, "learning_rate": 1.0054556631320117e-05, "log_odds_chosen": 11.148916244506836, "log_odds_ratio": -2.395886076556053e-05, "logits/chosen": -0.11259734630584717, "logits/rejected": -0.19178740680217743, "logps/chosen": -0.00017104196012951434, "logps/rejected": -2.371026039123535, "loss": 0.2818, "nll_loss": 0.07044067978858948, "rewards/accuracies": 1.0, "rewards/chosen": -1.710419383016415e-05, "rewards/margins": 0.2370855212211609, "rewards/rejected": -0.23710262775421143, "step": 11843 }, { "epoch": 8.190871369294605, "grad_norm": 5.517773151397705, "learning_rate": 1.005071461502997e-05, "log_odds_chosen": 11.084335327148438, "log_odds_ratio": -5.177776256459765e-05, "logits/chosen": -0.18667830526828766, "logits/rejected": -0.1939568817615509, "logps/chosen": -0.0010915655875578523, "logps/rejected": -2.410806655883789, "loss": 0.6075, "nll_loss": 0.15186257660388947, "rewards/accuracies": 1.0, "rewards/chosen": -0.00010915656457655132, "rewards/margins": 0.24097150564193726, "rewards/rejected": -0.2410806566476822, "step": 11844 }, { "epoch": 8.191562932226832, "grad_norm": 3.830199718475342, "learning_rate": 1.0046872598739818e-05, "log_odds_chosen": 9.771222114562988, "log_odds_ratio": -0.0008265036158263683, "logits/chosen": -0.10115376114845276, "logits/rejected": -0.15087993443012238, "logps/chosen": -0.0015982795739546418, "logps/rejected": -2.151283025741577, "loss": 0.4022, "nll_loss": 0.10046719759702682, "rewards/accuracies": 1.0, "rewards/chosen": -0.00015982796321623027, "rewards/margins": 0.2149684727191925, "rewards/rejected": -0.21512830257415771, "step": 11845 }, { "epoch": 8.192254495159059, "grad_norm": 4.922761917114258, "learning_rate": 1.0043030582449669e-05, "log_odds_chosen": 10.53628921508789, "log_odds_ratio": -0.00016840444004628807, "logits/chosen": -0.5491094589233398, "logits/rejected": -0.5219364762306213, "logps/chosen": -0.0001730314252199605, "logps/rejected": -1.8352140188217163, "loss": 0.4915, "nll_loss": 0.122856006026268, "rewards/accuracies": 1.0, "rewards/chosen": -1.7303143977187574e-05, "rewards/margins": 0.1835041046142578, "rewards/rejected": -0.18352138996124268, "step": 11846 }, { "epoch": 8.192946058091286, "grad_norm": 2.727844715118408, "learning_rate": 1.0039188566159521e-05, "log_odds_chosen": 10.407556533813477, "log_odds_ratio": -0.0002446332073304802, "logits/chosen": -0.0866495743393898, "logits/rejected": -0.3335264325141907, "logps/chosen": -0.0008980625425465405, "logps/rejected": -1.89154052734375, "loss": 0.3546, "nll_loss": 0.0886288732290268, "rewards/accuracies": 1.0, "rewards/chosen": -8.98062571650371e-05, "rewards/margins": 0.18906424939632416, "rewards/rejected": -0.18915404379367828, "step": 11847 }, { "epoch": 8.193637621023512, "grad_norm": 2.52756404876709, "learning_rate": 1.0035346549869372e-05, "log_odds_chosen": 10.452568054199219, "log_odds_ratio": -8.982956933323294e-05, "logits/chosen": -0.6266956329345703, "logits/rejected": -0.5948130488395691, "logps/chosen": -0.00014999463746789843, "logps/rejected": -1.5260772705078125, "loss": 0.2274, "nll_loss": 0.05684291943907738, "rewards/accuracies": 1.0, "rewards/chosen": -1.4999463019194081e-05, "rewards/margins": 0.1525927186012268, "rewards/rejected": -0.1526077389717102, "step": 11848 }, { "epoch": 8.19432918395574, "grad_norm": 3.984170436859131, "learning_rate": 1.0031504533579223e-05, "log_odds_chosen": 10.623090744018555, "log_odds_ratio": -0.00012865487951785326, "logits/chosen": -0.19675840437412262, "logits/rejected": -0.18537622690200806, "logps/chosen": -0.0013972955057397485, "logps/rejected": -2.636220932006836, "loss": 0.564, "nll_loss": 0.1409960240125656, "rewards/accuracies": 1.0, "rewards/chosen": -0.00013972955639474094, "rewards/margins": 0.26348239183425903, "rewards/rejected": -0.26362210512161255, "step": 11849 }, { "epoch": 8.195020746887966, "grad_norm": 8.14512825012207, "learning_rate": 1.0027662517289074e-05, "log_odds_chosen": 10.018821716308594, "log_odds_ratio": -0.0001869121624622494, "logits/chosen": -0.4175964295864105, "logits/rejected": -0.5103138089179993, "logps/chosen": -0.0005602188175544143, "logps/rejected": -2.2952799797058105, "loss": 0.3597, "nll_loss": 0.08990222215652466, "rewards/accuracies": 1.0, "rewards/chosen": -5.602188321063295e-05, "rewards/margins": 0.22947199642658234, "rewards/rejected": -0.22952800989151, "step": 11850 }, { "epoch": 8.195712309820193, "grad_norm": 4.538632869720459, "learning_rate": 1.0023820500998924e-05, "log_odds_chosen": 11.071784973144531, "log_odds_ratio": -7.922316581243649e-05, "logits/chosen": 0.10132365673780441, "logits/rejected": 0.0524822473526001, "logps/chosen": -0.00031479087192565203, "logps/rejected": -2.6277666091918945, "loss": 0.8658, "nll_loss": 0.21643231809139252, "rewards/accuracies": 1.0, "rewards/chosen": -3.147908500977792e-05, "rewards/margins": 0.262745201587677, "rewards/rejected": -0.2627766728401184, "step": 11851 }, { "epoch": 8.19640387275242, "grad_norm": 3.5381200313568115, "learning_rate": 1.0019978484708775e-05, "log_odds_chosen": 11.354914665222168, "log_odds_ratio": -2.0675848645623773e-05, "logits/chosen": -0.44369906187057495, "logits/rejected": -0.4636278450489044, "logps/chosen": -9.953975677490234e-05, "logps/rejected": -2.1649391651153564, "loss": 0.3316, "nll_loss": 0.08289721608161926, "rewards/accuracies": 1.0, "rewards/chosen": -9.953975677490234e-06, "rewards/margins": 0.2164839655160904, "rewards/rejected": -0.21649391949176788, "step": 11852 }, { "epoch": 8.197095435684647, "grad_norm": 4.758200645446777, "learning_rate": 1.0016136468418628e-05, "log_odds_chosen": 11.802242279052734, "log_odds_ratio": -6.090774331823923e-05, "logits/chosen": -0.6279882192611694, "logits/rejected": -0.6225122213363647, "logps/chosen": -6.893193494761363e-05, "logps/rejected": -2.4928269386291504, "loss": 0.4056, "nll_loss": 0.10139544308185577, "rewards/accuracies": 1.0, "rewards/chosen": -6.893194040458184e-06, "rewards/margins": 0.249275803565979, "rewards/rejected": -0.24928270280361176, "step": 11853 }, { "epoch": 8.197786998616873, "grad_norm": 8.527070999145508, "learning_rate": 1.0012294452128477e-05, "log_odds_chosen": 10.675111770629883, "log_odds_ratio": -6.840465357527137e-05, "logits/chosen": -0.4798826575279236, "logits/rejected": -0.4432487189769745, "logps/chosen": -9.258472709916532e-05, "logps/rejected": -1.6617554426193237, "loss": 0.377, "nll_loss": 0.09423164278268814, "rewards/accuracies": 1.0, "rewards/chosen": -9.258472346118651e-06, "rewards/margins": 0.166166290640831, "rewards/rejected": -0.16617554426193237, "step": 11854 }, { "epoch": 8.1984785615491, "grad_norm": 3.8203048706054688, "learning_rate": 1.0008452435838327e-05, "log_odds_chosen": 11.196329116821289, "log_odds_ratio": -5.840366429765709e-05, "logits/chosen": -0.23118522763252258, "logits/rejected": -0.3228698968887329, "logps/chosen": -0.00047525044647045434, "logps/rejected": -2.8010623455047607, "loss": 0.5395, "nll_loss": 0.13487224280834198, "rewards/accuracies": 1.0, "rewards/chosen": -4.752504173666239e-05, "rewards/margins": 0.28005874156951904, "rewards/rejected": -0.28010624647140503, "step": 11855 }, { "epoch": 8.199170124481327, "grad_norm": 6.984753608703613, "learning_rate": 1.000461041954818e-05, "log_odds_chosen": 10.723838806152344, "log_odds_ratio": -0.00010693888179957867, "logits/chosen": -0.3761310875415802, "logits/rejected": -0.38185182213783264, "logps/chosen": -0.00027034140657633543, "logps/rejected": -1.8784973621368408, "loss": 0.7063, "nll_loss": 0.17655616998672485, "rewards/accuracies": 1.0, "rewards/chosen": -2.7034140657633543e-05, "rewards/margins": 0.18782271444797516, "rewards/rejected": -0.187849760055542, "step": 11856 }, { "epoch": 8.199861687413554, "grad_norm": 5.063896656036377, "learning_rate": 1.000076840325803e-05, "log_odds_chosen": 9.568683624267578, "log_odds_ratio": -0.00012912966485600919, "logits/chosen": -0.13418522477149963, "logits/rejected": -0.1199626699090004, "logps/chosen": -0.0005637831054627895, "logps/rejected": -1.5717370510101318, "loss": 0.7314, "nll_loss": 0.18283309042453766, "rewards/accuracies": 1.0, "rewards/chosen": -5.6378310546278954e-05, "rewards/margins": 0.1571173220872879, "rewards/rejected": -0.15717370808124542, "step": 11857 }, { "epoch": 8.20055325034578, "grad_norm": 3.49157452583313, "learning_rate": 9.996926386967881e-06, "log_odds_chosen": 11.587803840637207, "log_odds_ratio": -2.791120277834125e-05, "logits/chosen": -0.040826573967933655, "logits/rejected": -0.03154802322387695, "logps/chosen": -0.00015233646263368428, "logps/rejected": -2.730283260345459, "loss": 0.3362, "nll_loss": 0.08404259383678436, "rewards/accuracies": 1.0, "rewards/chosen": -1.523364699096419e-05, "rewards/margins": 0.27301308512687683, "rewards/rejected": -0.27302831411361694, "step": 11858 }, { "epoch": 8.201244813278008, "grad_norm": 5.375125408172607, "learning_rate": 9.993084370677732e-06, "log_odds_chosen": 10.799838066101074, "log_odds_ratio": -8.351242286153138e-05, "logits/chosen": -0.0013588108122348785, "logits/rejected": -0.020535770803689957, "logps/chosen": -0.0009846636094152927, "logps/rejected": -3.3954825401306152, "loss": 0.4605, "nll_loss": 0.11510814726352692, "rewards/accuracies": 1.0, "rewards/chosen": -9.846636385191232e-05, "rewards/margins": 0.33944979310035706, "rewards/rejected": -0.3395482897758484, "step": 11859 }, { "epoch": 8.201936376210234, "grad_norm": 3.087932825088501, "learning_rate": 9.989242354387583e-06, "log_odds_chosen": 10.207304954528809, "log_odds_ratio": -0.0001233558141393587, "logits/chosen": -0.006505733355879784, "logits/rejected": -0.06693039834499359, "logps/chosen": -0.00036496983375400305, "logps/rejected": -1.86687433719635, "loss": 0.4002, "nll_loss": 0.10004688054323196, "rewards/accuracies": 1.0, "rewards/chosen": -3.649698555818759e-05, "rewards/margins": 0.18665093183517456, "rewards/rejected": -0.1866874396800995, "step": 11860 }, { "epoch": 8.202627939142461, "grad_norm": 3.8856632709503174, "learning_rate": 9.985400338097434e-06, "log_odds_chosen": 12.092461585998535, "log_odds_ratio": -1.748041540849954e-05, "logits/chosen": -0.11215893179178238, "logits/rejected": -0.11910742521286011, "logps/chosen": -0.00010045425733551383, "logps/rejected": -2.9239437580108643, "loss": 0.4678, "nll_loss": 0.11693894863128662, "rewards/accuracies": 1.0, "rewards/chosen": -1.0045426279248204e-05, "rewards/margins": 0.2923843562602997, "rewards/rejected": -0.29239436984062195, "step": 11861 }, { "epoch": 8.203319502074688, "grad_norm": 3.417696475982666, "learning_rate": 9.981558321807286e-06, "log_odds_chosen": 10.846441268920898, "log_odds_ratio": -6.147399835754186e-05, "logits/chosen": -0.2771728038787842, "logits/rejected": -0.28151994943618774, "logps/chosen": -0.00015563865599688143, "logps/rejected": -1.9300942420959473, "loss": 0.3209, "nll_loss": 0.08022533357143402, "rewards/accuracies": 1.0, "rewards/chosen": -1.5563866327283904e-05, "rewards/margins": 0.19299384951591492, "rewards/rejected": -0.1930094212293625, "step": 11862 }, { "epoch": 8.204011065006915, "grad_norm": 4.589306354522705, "learning_rate": 9.977716305517137e-06, "log_odds_chosen": 11.178730964660645, "log_odds_ratio": -6.520461465697736e-05, "logits/chosen": -0.3614737093448639, "logits/rejected": -0.45247286558151245, "logps/chosen": -0.0003307850274723023, "logps/rejected": -2.6263513565063477, "loss": 0.721, "nll_loss": 0.18025082349777222, "rewards/accuracies": 1.0, "rewards/chosen": -3.3078504202421755e-05, "rewards/margins": 0.2626020908355713, "rewards/rejected": -0.26263514161109924, "step": 11863 }, { "epoch": 8.204702627939142, "grad_norm": 5.1506028175354, "learning_rate": 9.973874289226986e-06, "log_odds_chosen": 11.342752456665039, "log_odds_ratio": -5.206582136452198e-05, "logits/chosen": -0.552399754524231, "logits/rejected": -0.5551222562789917, "logps/chosen": -0.0006311584729701281, "logps/rejected": -2.6455674171447754, "loss": 0.4314, "nll_loss": 0.10783309489488602, "rewards/accuracies": 1.0, "rewards/chosen": -6.31158472970128e-05, "rewards/margins": 0.2644936144351959, "rewards/rejected": -0.26455673575401306, "step": 11864 }, { "epoch": 8.205394190871369, "grad_norm": 3.7225213050842285, "learning_rate": 9.970032272936838e-06, "log_odds_chosen": 11.683008193969727, "log_odds_ratio": -1.5529356460319832e-05, "logits/chosen": -0.21142897009849548, "logits/rejected": -0.3459831476211548, "logps/chosen": -0.00011245267523918301, "logps/rejected": -2.5953619480133057, "loss": 0.3665, "nll_loss": 0.09162183105945587, "rewards/accuracies": 1.0, "rewards/chosen": -1.124526716012042e-05, "rewards/margins": 0.259524941444397, "rewards/rejected": -0.25953617691993713, "step": 11865 }, { "epoch": 8.206085753803595, "grad_norm": 4.879708766937256, "learning_rate": 9.966190256646689e-06, "log_odds_chosen": 10.546676635742188, "log_odds_ratio": -0.00018046073091682047, "logits/chosen": -0.7415679693222046, "logits/rejected": -0.7752382755279541, "logps/chosen": -0.00023197535483632237, "logps/rejected": -2.147796154022217, "loss": 0.3633, "nll_loss": 0.09080185741186142, "rewards/accuracies": 1.0, "rewards/chosen": -2.3197535483632237e-05, "rewards/margins": 0.21475641429424286, "rewards/rejected": -0.21477960050106049, "step": 11866 }, { "epoch": 8.206777316735822, "grad_norm": 3.5352513790130615, "learning_rate": 9.96234824035654e-06, "log_odds_chosen": 11.506108283996582, "log_odds_ratio": -3.419344648136757e-05, "logits/chosen": -0.3386998474597931, "logits/rejected": -0.42703479528427124, "logps/chosen": -0.0004458025505300611, "logps/rejected": -2.6201348304748535, "loss": 0.475, "nll_loss": 0.1187388151884079, "rewards/accuracies": 1.0, "rewards/chosen": -4.458025432541035e-05, "rewards/margins": 0.2619689106941223, "rewards/rejected": -0.2620134949684143, "step": 11867 }, { "epoch": 8.207468879668049, "grad_norm": 3.62316632270813, "learning_rate": 9.95850622406639e-06, "log_odds_chosen": 10.394129753112793, "log_odds_ratio": -8.255482680397108e-05, "logits/chosen": -0.5799174904823303, "logits/rejected": -0.6252142190933228, "logps/chosen": -0.00030348976724781096, "logps/rejected": -1.5779904127120972, "loss": 0.4337, "nll_loss": 0.10842124372720718, "rewards/accuracies": 1.0, "rewards/chosen": -3.034897417819593e-05, "rewards/margins": 0.15776869654655457, "rewards/rejected": -0.15779903531074524, "step": 11868 }, { "epoch": 8.208160442600276, "grad_norm": 4.204887390136719, "learning_rate": 9.954664207776241e-06, "log_odds_chosen": 10.332489013671875, "log_odds_ratio": -0.00021437757823150605, "logits/chosen": 0.18167400360107422, "logits/rejected": -0.0008254246786236763, "logps/chosen": -0.00041375181172043085, "logps/rejected": -2.1173388957977295, "loss": 0.5232, "nll_loss": 0.1307739019393921, "rewards/accuracies": 1.0, "rewards/chosen": -4.137518408242613e-05, "rewards/margins": 0.21169252693653107, "rewards/rejected": -0.2117338925600052, "step": 11869 }, { "epoch": 8.208852005532503, "grad_norm": 3.426561117172241, "learning_rate": 9.950822191486092e-06, "log_odds_chosen": 10.906770706176758, "log_odds_ratio": -6.29575879429467e-05, "logits/chosen": -0.3955892324447632, "logits/rejected": -0.34222662448883057, "logps/chosen": -0.0001728545903461054, "logps/rejected": -1.9308866262435913, "loss": 0.3435, "nll_loss": 0.08586615324020386, "rewards/accuracies": 1.0, "rewards/chosen": -1.72854597622063e-05, "rewards/margins": 0.1930713802576065, "rewards/rejected": -0.19308865070343018, "step": 11870 }, { "epoch": 8.20954356846473, "grad_norm": 4.162632942199707, "learning_rate": 9.946980175195943e-06, "log_odds_chosen": 10.008193969726562, "log_odds_ratio": -0.00013287433830555528, "logits/chosen": 0.018637903034687042, "logits/rejected": 0.04746834561228752, "logps/chosen": -0.0002809230354614556, "logps/rejected": -1.9701550006866455, "loss": 0.6517, "nll_loss": 0.16291363537311554, "rewards/accuracies": 1.0, "rewards/chosen": -2.8092303182347678e-05, "rewards/margins": 0.19698740541934967, "rewards/rejected": -0.19701550900936127, "step": 11871 }, { "epoch": 8.210235131396956, "grad_norm": 4.263104438781738, "learning_rate": 9.943138158905795e-06, "log_odds_chosen": 10.590243339538574, "log_odds_ratio": -5.595001857727766e-05, "logits/chosen": -0.37323158979415894, "logits/rejected": -0.36012914776802063, "logps/chosen": -0.0002597020356915891, "logps/rejected": -2.14939546585083, "loss": 0.3621, "nll_loss": 0.09050886332988739, "rewards/accuracies": 1.0, "rewards/chosen": -2.597020284156315e-05, "rewards/margins": 0.21491357684135437, "rewards/rejected": -0.21493953466415405, "step": 11872 }, { "epoch": 8.210926694329183, "grad_norm": 8.134347915649414, "learning_rate": 9.939296142615644e-06, "log_odds_chosen": 10.484792709350586, "log_odds_ratio": -0.00042060474515892565, "logits/chosen": 0.01173505187034607, "logits/rejected": -0.018582195043563843, "logps/chosen": -0.0008123047882691026, "logps/rejected": -2.4332172870635986, "loss": 0.3267, "nll_loss": 0.08162355422973633, "rewards/accuracies": 1.0, "rewards/chosen": -8.123047882691026e-05, "rewards/margins": 0.24324052035808563, "rewards/rejected": -0.2433217465877533, "step": 11873 }, { "epoch": 8.21161825726141, "grad_norm": 6.031350135803223, "learning_rate": 9.935454126325495e-06, "log_odds_chosen": 10.102872848510742, "log_odds_ratio": -0.00033000472467392683, "logits/chosen": -0.744584321975708, "logits/rejected": -0.69932621717453, "logps/chosen": -0.0010207390878349543, "logps/rejected": -1.9441288709640503, "loss": 0.3996, "nll_loss": 0.09986548870801926, "rewards/accuracies": 1.0, "rewards/chosen": -0.00010207389277638867, "rewards/margins": 0.19431079924106598, "rewards/rejected": -0.19441288709640503, "step": 11874 }, { "epoch": 8.212309820193637, "grad_norm": 5.100654125213623, "learning_rate": 9.931612110035348e-06, "log_odds_chosen": 10.475208282470703, "log_odds_ratio": -5.977584078209475e-05, "logits/chosen": -0.6865031719207764, "logits/rejected": -0.6750398874282837, "logps/chosen": -0.0001411703269695863, "logps/rejected": -1.342596411705017, "loss": 0.2567, "nll_loss": 0.06417740136384964, "rewards/accuracies": 1.0, "rewards/chosen": -1.4117033060756512e-05, "rewards/margins": 0.13424552977085114, "rewards/rejected": -0.1342596411705017, "step": 11875 }, { "epoch": 8.213001383125864, "grad_norm": 3.6606369018554688, "learning_rate": 9.927770093745198e-06, "log_odds_chosen": 11.791744232177734, "log_odds_ratio": -1.4413297321880236e-05, "logits/chosen": 0.10309985280036926, "logits/rejected": 0.07292437553405762, "logps/chosen": -0.00011298189929220825, "logps/rejected": -2.626086711883545, "loss": 0.4187, "nll_loss": 0.10467715561389923, "rewards/accuracies": 1.0, "rewards/chosen": -1.1298190656816587e-05, "rewards/margins": 0.26259738206863403, "rewards/rejected": -0.26260867714881897, "step": 11876 }, { "epoch": 8.21369294605809, "grad_norm": 5.350027084350586, "learning_rate": 9.923928077455049e-06, "log_odds_chosen": 11.024895668029785, "log_odds_ratio": -4.6858815039740875e-05, "logits/chosen": -0.15542323887348175, "logits/rejected": -0.19625607132911682, "logps/chosen": -0.00010433487477712333, "logps/rejected": -1.9152851104736328, "loss": 0.3122, "nll_loss": 0.07805629074573517, "rewards/accuracies": 1.0, "rewards/chosen": -1.0433487659611274e-05, "rewards/margins": 0.19151808321475983, "rewards/rejected": -0.19152851402759552, "step": 11877 }, { "epoch": 8.214384508990317, "grad_norm": 4.7614946365356445, "learning_rate": 9.9200860611649e-06, "log_odds_chosen": 10.522577285766602, "log_odds_ratio": -0.00020592294458765537, "logits/chosen": 0.07700219005346298, "logits/rejected": -0.1007341742515564, "logps/chosen": -0.001401687623001635, "logps/rejected": -2.5692789554595947, "loss": 0.3943, "nll_loss": 0.09854703396558762, "rewards/accuracies": 1.0, "rewards/chosen": -0.0001401687623001635, "rewards/margins": 0.2567877173423767, "rewards/rejected": -0.25692787766456604, "step": 11878 }, { "epoch": 8.215076071922544, "grad_norm": 4.957317352294922, "learning_rate": 9.91624404487475e-06, "log_odds_chosen": 9.691752433776855, "log_odds_ratio": -0.0007342268945649266, "logits/chosen": -0.5804744958877563, "logits/rejected": -0.5840776562690735, "logps/chosen": -0.00027017746469937265, "logps/rejected": -1.3753609657287598, "loss": 0.5321, "nll_loss": 0.1329451948404312, "rewards/accuracies": 1.0, "rewards/chosen": -2.7017747925128788e-05, "rewards/margins": 0.1375090777873993, "rewards/rejected": -0.13753610849380493, "step": 11879 }, { "epoch": 8.215767634854771, "grad_norm": 3.693619728088379, "learning_rate": 9.912402028584601e-06, "log_odds_chosen": 10.603633880615234, "log_odds_ratio": -7.273280061781406e-05, "logits/chosen": -0.18116453289985657, "logits/rejected": -0.2575952410697937, "logps/chosen": -0.0001808809902286157, "logps/rejected": -1.8459020853042603, "loss": 0.3505, "nll_loss": 0.0876234918832779, "rewards/accuracies": 1.0, "rewards/chosen": -1.8088101569446735e-05, "rewards/margins": 0.18457213044166565, "rewards/rejected": -0.18459022045135498, "step": 11880 }, { "epoch": 8.216459197786998, "grad_norm": 5.794073104858398, "learning_rate": 9.908560012294454e-06, "log_odds_chosen": 9.234406471252441, "log_odds_ratio": -0.0003990632831119001, "logits/chosen": -0.40051859617233276, "logits/rejected": -0.3610343933105469, "logps/chosen": -0.0010065014939755201, "logps/rejected": -1.7072646617889404, "loss": 0.6433, "nll_loss": 0.16079066693782806, "rewards/accuracies": 1.0, "rewards/chosen": -0.00010065016249427572, "rewards/margins": 0.17062580585479736, "rewards/rejected": -0.1707264631986618, "step": 11881 }, { "epoch": 8.217150760719225, "grad_norm": 4.333235740661621, "learning_rate": 9.904717996004303e-06, "log_odds_chosen": 11.285673141479492, "log_odds_ratio": -1.546269231766928e-05, "logits/chosen": 0.07247123122215271, "logits/rejected": -0.045315831899642944, "logps/chosen": -9.770147153176367e-05, "logps/rejected": -1.9647116661071777, "loss": 0.4386, "nll_loss": 0.1096573919057846, "rewards/accuracies": 1.0, "rewards/chosen": -9.770147698873188e-06, "rewards/margins": 0.19646139442920685, "rewards/rejected": -0.19647115468978882, "step": 11882 }, { "epoch": 8.217842323651452, "grad_norm": 3.874656915664673, "learning_rate": 9.900875979714154e-06, "log_odds_chosen": 11.24226188659668, "log_odds_ratio": -2.1476273104781285e-05, "logits/chosen": -0.3501582145690918, "logits/rejected": -0.3311619162559509, "logps/chosen": -0.00010106174158863723, "logps/rejected": -2.1260154247283936, "loss": 0.4227, "nll_loss": 0.10567951202392578, "rewards/accuracies": 1.0, "rewards/chosen": -1.0106174158863723e-05, "rewards/margins": 0.21259145438671112, "rewards/rejected": -0.21260155737400055, "step": 11883 }, { "epoch": 8.218533886583678, "grad_norm": 3.4754481315612793, "learning_rate": 9.897033963424006e-06, "log_odds_chosen": 10.466971397399902, "log_odds_ratio": -5.804416286991909e-05, "logits/chosen": -0.14578090608119965, "logits/rejected": -0.08390910178422928, "logps/chosen": -0.00012252731539774686, "logps/rejected": -1.4804524183273315, "loss": 0.3364, "nll_loss": 0.08408783376216888, "rewards/accuracies": 1.0, "rewards/chosen": -1.2252730812178925e-05, "rewards/margins": 0.1480329930782318, "rewards/rejected": -0.14804525673389435, "step": 11884 }, { "epoch": 8.219225449515905, "grad_norm": 3.501004457473755, "learning_rate": 9.893191947133857e-06, "log_odds_chosen": 11.709239959716797, "log_odds_ratio": -2.7292315280647017e-05, "logits/chosen": -0.1410721093416214, "logits/rejected": -0.23302824795246124, "logps/chosen": -8.871190948411822e-05, "logps/rejected": -2.1929562091827393, "loss": 0.4125, "nll_loss": 0.10312415659427643, "rewards/accuracies": 1.0, "rewards/chosen": -8.871191312209703e-06, "rewards/margins": 0.21928676962852478, "rewards/rejected": -0.2192956507205963, "step": 11885 }, { "epoch": 8.219917012448132, "grad_norm": 3.4106521606445312, "learning_rate": 9.889349930843707e-06, "log_odds_chosen": 10.613020896911621, "log_odds_ratio": -0.00017831180593930185, "logits/chosen": -0.6792394518852234, "logits/rejected": -0.6880941987037659, "logps/chosen": -0.0004885156522504985, "logps/rejected": -2.28318190574646, "loss": 0.2814, "nll_loss": 0.07033880054950714, "rewards/accuracies": 1.0, "rewards/chosen": -4.885156522504985e-05, "rewards/margins": 0.22826935350894928, "rewards/rejected": -0.2283182144165039, "step": 11886 }, { "epoch": 8.220608575380359, "grad_norm": 3.315471887588501, "learning_rate": 9.885507914553558e-06, "log_odds_chosen": 10.854877471923828, "log_odds_ratio": -6.311023025773466e-05, "logits/chosen": -0.26056307554244995, "logits/rejected": -0.2880066931247711, "logps/chosen": -0.0003195895696990192, "logps/rejected": -2.209442377090454, "loss": 0.4086, "nll_loss": 0.10214497148990631, "rewards/accuracies": 1.0, "rewards/chosen": -3.1958959880284965e-05, "rewards/margins": 0.22091226279735565, "rewards/rejected": -0.22094422578811646, "step": 11887 }, { "epoch": 8.221300138312586, "grad_norm": 4.172388553619385, "learning_rate": 9.881665898263409e-06, "log_odds_chosen": 10.32114315032959, "log_odds_ratio": -0.00030060680001042783, "logits/chosen": 0.08563702553510666, "logits/rejected": 0.1317209154367447, "logps/chosen": -0.0005976824322715402, "logps/rejected": -2.283992052078247, "loss": 0.3728, "nll_loss": 0.09316124022006989, "rewards/accuracies": 1.0, "rewards/chosen": -5.9768248320324346e-05, "rewards/margins": 0.22833941876888275, "rewards/rejected": -0.22839921712875366, "step": 11888 }, { "epoch": 8.221991701244812, "grad_norm": 7.285219192504883, "learning_rate": 9.87782388197326e-06, "log_odds_chosen": 11.31580924987793, "log_odds_ratio": -4.5658751332666725e-05, "logits/chosen": -0.7076231241226196, "logits/rejected": -0.6245047450065613, "logps/chosen": -0.0002565417962614447, "logps/rejected": -2.7010302543640137, "loss": 0.4708, "nll_loss": 0.11768751591444016, "rewards/accuracies": 1.0, "rewards/chosen": -2.5654178898548707e-05, "rewards/margins": 0.2700774073600769, "rewards/rejected": -0.2701030671596527, "step": 11889 }, { "epoch": 8.22268326417704, "grad_norm": 3.4534096717834473, "learning_rate": 9.873981865683112e-06, "log_odds_chosen": 12.416353225708008, "log_odds_ratio": -3.0102659366093576e-05, "logits/chosen": -0.34680306911468506, "logits/rejected": -0.42320072650909424, "logps/chosen": -0.00020526333537418395, "logps/rejected": -3.8194408416748047, "loss": 0.4224, "nll_loss": 0.10560045391321182, "rewards/accuracies": 1.0, "rewards/chosen": -2.052633135463111e-05, "rewards/margins": 0.3819235563278198, "rewards/rejected": -0.38194406032562256, "step": 11890 }, { "epoch": 8.223374827109266, "grad_norm": 7.042224407196045, "learning_rate": 9.870139849392961e-06, "log_odds_chosen": 10.453751564025879, "log_odds_ratio": -0.00010059493797598407, "logits/chosen": 0.1438857614994049, "logits/rejected": 0.07333517074584961, "logps/chosen": -0.00031427317298948765, "logps/rejected": -2.2300024032592773, "loss": 0.2914, "nll_loss": 0.07283707708120346, "rewards/accuracies": 1.0, "rewards/chosen": -3.1427316571353e-05, "rewards/margins": 0.22296880185604095, "rewards/rejected": -0.22300024330615997, "step": 11891 }, { "epoch": 8.224066390041493, "grad_norm": 3.609309673309326, "learning_rate": 9.866297833102812e-06, "log_odds_chosen": 11.430818557739258, "log_odds_ratio": -3.3167856599902734e-05, "logits/chosen": -0.3859490156173706, "logits/rejected": -0.3728792369365692, "logps/chosen": -0.00021129933884367347, "logps/rejected": -1.9585065841674805, "loss": 0.5518, "nll_loss": 0.13795900344848633, "rewards/accuracies": 1.0, "rewards/chosen": -2.1129933884367347e-05, "rewards/margins": 0.19582951068878174, "rewards/rejected": -0.195850670337677, "step": 11892 }, { "epoch": 8.22475795297372, "grad_norm": 3.3586835861206055, "learning_rate": 9.862455816812664e-06, "log_odds_chosen": 11.46454906463623, "log_odds_ratio": -2.6788418836076744e-05, "logits/chosen": -0.2561917304992676, "logits/rejected": -0.2784116268157959, "logps/chosen": -0.00014413880126085132, "logps/rejected": -2.368454933166504, "loss": 0.4054, "nll_loss": 0.10135869681835175, "rewards/accuracies": 1.0, "rewards/chosen": -1.4413879398489371e-05, "rewards/margins": 0.23683109879493713, "rewards/rejected": -0.23684552311897278, "step": 11893 }, { "epoch": 8.225449515905947, "grad_norm": 5.3997344970703125, "learning_rate": 9.858613800522515e-06, "log_odds_chosen": 11.586647987365723, "log_odds_ratio": -2.1225545424385928e-05, "logits/chosen": -0.052325744181871414, "logits/rejected": -0.13196569681167603, "logps/chosen": -0.0001532369205961004, "logps/rejected": -2.433154344558716, "loss": 0.4923, "nll_loss": 0.12306039035320282, "rewards/accuracies": 1.0, "rewards/chosen": -1.5323690604418516e-05, "rewards/margins": 0.24330011010169983, "rewards/rejected": -0.2433154433965683, "step": 11894 }, { "epoch": 8.226141078838173, "grad_norm": 11.203163146972656, "learning_rate": 9.854771784232366e-06, "log_odds_chosen": 11.251202583312988, "log_odds_ratio": -3.209187707398087e-05, "logits/chosen": -0.17335253953933716, "logits/rejected": -0.24422144889831543, "logps/chosen": -0.0001083131501218304, "logps/rejected": -1.97250497341156, "loss": 0.5254, "nll_loss": 0.13133439421653748, "rewards/accuracies": 1.0, "rewards/chosen": -1.083131519408198e-05, "rewards/margins": 0.19723966717720032, "rewards/rejected": -0.19725048542022705, "step": 11895 }, { "epoch": 8.2268326417704, "grad_norm": 4.20961332321167, "learning_rate": 9.850929767942217e-06, "log_odds_chosen": 10.151579856872559, "log_odds_ratio": -0.00021857497631572187, "logits/chosen": -0.5564976930618286, "logits/rejected": -0.7172648906707764, "logps/chosen": -0.0008203632314689457, "logps/rejected": -2.225661277770996, "loss": 0.529, "nll_loss": 0.13222664594650269, "rewards/accuracies": 1.0, "rewards/chosen": -8.203632751246914e-05, "rewards/margins": 0.22248411178588867, "rewards/rejected": -0.2225661277770996, "step": 11896 }, { "epoch": 8.227524204702627, "grad_norm": 2.7075605392456055, "learning_rate": 9.847087751652067e-06, "log_odds_chosen": 10.783187866210938, "log_odds_ratio": -0.00012845300079789013, "logits/chosen": -0.42407190799713135, "logits/rejected": -0.36330926418304443, "logps/chosen": -0.00020437348575796932, "logps/rejected": -1.8607637882232666, "loss": 0.3263, "nll_loss": 0.08155211806297302, "rewards/accuracies": 1.0, "rewards/chosen": -2.043734821199905e-05, "rewards/margins": 0.18605592846870422, "rewards/rejected": -0.18607638776302338, "step": 11897 }, { "epoch": 8.228215767634854, "grad_norm": 5.69032096862793, "learning_rate": 9.843245735361918e-06, "log_odds_chosen": 12.672260284423828, "log_odds_ratio": -1.0369312803959474e-05, "logits/chosen": -0.3008144199848175, "logits/rejected": -0.40587708353996277, "logps/chosen": -0.00024107444914989173, "logps/rejected": -3.753671646118164, "loss": 0.5438, "nll_loss": 0.13596031069755554, "rewards/accuracies": 1.0, "rewards/chosen": -2.410744309599977e-05, "rewards/margins": 0.37534308433532715, "rewards/rejected": -0.375367134809494, "step": 11898 }, { "epoch": 8.22890733056708, "grad_norm": 7.97053337097168, "learning_rate": 9.83940371907177e-06, "log_odds_chosen": 10.980829238891602, "log_odds_ratio": -3.558182652341202e-05, "logits/chosen": -0.22462347149848938, "logits/rejected": -0.21821101009845734, "logps/chosen": -0.0003172093420289457, "logps/rejected": -2.1453938484191895, "loss": 0.5254, "nll_loss": 0.1313534528017044, "rewards/accuracies": 1.0, "rewards/chosen": -3.172093420289457e-05, "rewards/margins": 0.21450765430927277, "rewards/rejected": -0.21453937888145447, "step": 11899 }, { "epoch": 8.229598893499308, "grad_norm": 4.776434898376465, "learning_rate": 9.83556170278162e-06, "log_odds_chosen": 10.950285911560059, "log_odds_ratio": -8.446057472610846e-05, "logits/chosen": -0.3791242837905884, "logits/rejected": -0.37221699953079224, "logps/chosen": -0.0003215722390450537, "logps/rejected": -2.2435872554779053, "loss": 0.5553, "nll_loss": 0.13881456851959229, "rewards/accuracies": 1.0, "rewards/chosen": -3.2157226087292656e-05, "rewards/margins": 0.22432658076286316, "rewards/rejected": -0.22435873746871948, "step": 11900 }, { "epoch": 8.230290456431534, "grad_norm": 4.019122123718262, "learning_rate": 9.83171968649147e-06, "log_odds_chosen": 11.55584716796875, "log_odds_ratio": -1.9348452042322606e-05, "logits/chosen": -0.2871738374233246, "logits/rejected": -0.4227055311203003, "logps/chosen": -0.0002205895580118522, "logps/rejected": -2.533057928085327, "loss": 0.3716, "nll_loss": 0.09290733933448792, "rewards/accuracies": 1.0, "rewards/chosen": -2.2058957256376743e-05, "rewards/margins": 0.2532837390899658, "rewards/rejected": -0.2533057928085327, "step": 11901 }, { "epoch": 8.230982019363761, "grad_norm": 4.368720054626465, "learning_rate": 9.827877670201321e-06, "log_odds_chosen": 10.433951377868652, "log_odds_ratio": -0.0005253016715869308, "logits/chosen": -0.41565749049186707, "logits/rejected": -0.5145907402038574, "logps/chosen": -0.0009341444238089025, "logps/rejected": -2.0665760040283203, "loss": 0.8229, "nll_loss": 0.20566663146018982, "rewards/accuracies": 1.0, "rewards/chosen": -9.341444092569873e-05, "rewards/margins": 0.20656421780586243, "rewards/rejected": -0.20665761828422546, "step": 11902 }, { "epoch": 8.231673582295988, "grad_norm": 3.7045254707336426, "learning_rate": 9.824035653911174e-06, "log_odds_chosen": 10.184175491333008, "log_odds_ratio": -8.395993791054934e-05, "logits/chosen": -0.3897843062877655, "logits/rejected": -0.37134748697280884, "logps/chosen": -0.0006803659489378333, "logps/rejected": -2.4080569744110107, "loss": 0.4519, "nll_loss": 0.11295488476753235, "rewards/accuracies": 1.0, "rewards/chosen": -6.80365992593579e-05, "rewards/margins": 0.2407376766204834, "rewards/rejected": -0.2408057004213333, "step": 11903 }, { "epoch": 8.232365145228215, "grad_norm": 8.1160249710083, "learning_rate": 9.820193637621024e-06, "log_odds_chosen": 10.831419944763184, "log_odds_ratio": -4.790275852428749e-05, "logits/chosen": -0.32159915566444397, "logits/rejected": -0.4292929172515869, "logps/chosen": -0.00021727457351516932, "logps/rejected": -1.9313918352127075, "loss": 0.3378, "nll_loss": 0.08443906158208847, "rewards/accuracies": 1.0, "rewards/chosen": -2.1727457351516932e-05, "rewards/margins": 0.1931174397468567, "rewards/rejected": -0.1931391954421997, "step": 11904 }, { "epoch": 8.233056708160442, "grad_norm": 8.284672737121582, "learning_rate": 9.816351621330875e-06, "log_odds_chosen": 12.164765357971191, "log_odds_ratio": -8.79414346854901e-06, "logits/chosen": -0.1950538158416748, "logits/rejected": -0.3341953158378601, "logps/chosen": -0.00016817479627206922, "logps/rejected": -3.2107934951782227, "loss": 0.6467, "nll_loss": 0.1616656333208084, "rewards/accuracies": 1.0, "rewards/chosen": -1.681747926340904e-05, "rewards/margins": 0.32106253504753113, "rewards/rejected": -0.3210793435573578, "step": 11905 }, { "epoch": 8.233748271092669, "grad_norm": 7.224637508392334, "learning_rate": 9.812509605040726e-06, "log_odds_chosen": 11.125343322753906, "log_odds_ratio": -0.00014493428170681, "logits/chosen": 0.05868227779865265, "logits/rejected": 0.05458486080169678, "logps/chosen": -0.00013728538760915399, "logps/rejected": -2.3505802154541016, "loss": 0.3263, "nll_loss": 0.08156958967447281, "rewards/accuracies": 1.0, "rewards/chosen": -1.3728540579904802e-05, "rewards/margins": 0.23504430055618286, "rewards/rejected": -0.2350580394268036, "step": 11906 }, { "epoch": 8.234439834024895, "grad_norm": 4.543659210205078, "learning_rate": 9.808667588750577e-06, "log_odds_chosen": 11.368104934692383, "log_odds_ratio": -0.00010937463230220601, "logits/chosen": -0.16547857224941254, "logits/rejected": -0.19322216510772705, "logps/chosen": -0.0007860024925321341, "logps/rejected": -2.766275405883789, "loss": 0.4289, "nll_loss": 0.10720409452915192, "rewards/accuracies": 1.0, "rewards/chosen": -7.86002492532134e-05, "rewards/margins": 0.27654892206192017, "rewards/rejected": -0.2766275405883789, "step": 11907 }, { "epoch": 8.235131396957122, "grad_norm": 3.6898257732391357, "learning_rate": 9.804825572460427e-06, "log_odds_chosen": 10.989175796508789, "log_odds_ratio": -0.00010870847472688183, "logits/chosen": -0.31423258781433105, "logits/rejected": -0.5556448101997375, "logps/chosen": -0.00040482316398993134, "logps/rejected": -2.564761161804199, "loss": 0.8335, "nll_loss": 0.2083652764558792, "rewards/accuracies": 1.0, "rewards/chosen": -4.048231494380161e-05, "rewards/margins": 0.25643566250801086, "rewards/rejected": -0.25647613406181335, "step": 11908 }, { "epoch": 8.235822959889349, "grad_norm": 6.863066673278809, "learning_rate": 9.80098355617028e-06, "log_odds_chosen": 11.990863800048828, "log_odds_ratio": -2.5272656785091385e-05, "logits/chosen": -0.513546347618103, "logits/rejected": -0.5886914134025574, "logps/chosen": -0.00012314581545069814, "logps/rejected": -2.846043348312378, "loss": 0.2662, "nll_loss": 0.06654417514801025, "rewards/accuracies": 1.0, "rewards/chosen": -1.2314581908867694e-05, "rewards/margins": 0.28459203243255615, "rewards/rejected": -0.28460434079170227, "step": 11909 }, { "epoch": 8.236514522821576, "grad_norm": 3.6549644470214844, "learning_rate": 9.797141539880129e-06, "log_odds_chosen": 10.573244094848633, "log_odds_ratio": -0.00011436867498559877, "logits/chosen": 0.0248430036008358, "logits/rejected": 0.11080904304981232, "logps/chosen": -0.00022818223806098104, "logps/rejected": -2.2123470306396484, "loss": 0.3748, "nll_loss": 0.0936833918094635, "rewards/accuracies": 1.0, "rewards/chosen": -2.2818225261289626e-05, "rewards/margins": 0.22121189534664154, "rewards/rejected": -0.22123470902442932, "step": 11910 }, { "epoch": 8.237206085753803, "grad_norm": 4.8388495445251465, "learning_rate": 9.79329952358998e-06, "log_odds_chosen": 11.364583969116211, "log_odds_ratio": -0.00023915823840070516, "logits/chosen": -0.8164856433868408, "logits/rejected": -0.8062664270401001, "logps/chosen": -0.0001275077520404011, "logps/rejected": -2.420968532562256, "loss": 0.3621, "nll_loss": 0.09050973504781723, "rewards/accuracies": 1.0, "rewards/chosen": -1.2750776477332693e-05, "rewards/margins": 0.2420840859413147, "rewards/rejected": -0.24209685623645782, "step": 11911 }, { "epoch": 8.23789764868603, "grad_norm": 3.9634106159210205, "learning_rate": 9.789457507299832e-06, "log_odds_chosen": 10.325061798095703, "log_odds_ratio": -6.946113717276603e-05, "logits/chosen": -0.23887857794761658, "logits/rejected": -0.2788199484348297, "logps/chosen": -0.0012760156532749534, "logps/rejected": -2.2283194065093994, "loss": 0.4579, "nll_loss": 0.11447641253471375, "rewards/accuracies": 1.0, "rewards/chosen": -0.00012760156823787838, "rewards/margins": 0.22270435094833374, "rewards/rejected": -0.22283194959163666, "step": 11912 }, { "epoch": 8.238589211618256, "grad_norm": 3.8492555618286133, "learning_rate": 9.785615491009683e-06, "log_odds_chosen": 11.542254447937012, "log_odds_ratio": -1.5774694475112483e-05, "logits/chosen": -0.5774122476577759, "logits/rejected": -0.5654475092887878, "logps/chosen": -7.090227882144973e-05, "logps/rejected": -2.098196029663086, "loss": 0.3943, "nll_loss": 0.09858129918575287, "rewards/accuracies": 1.0, "rewards/chosen": -7.090227882144973e-06, "rewards/margins": 0.2098124921321869, "rewards/rejected": -0.20981959998607635, "step": 11913 }, { "epoch": 8.239280774550483, "grad_norm": 5.6373982429504395, "learning_rate": 9.781773474719534e-06, "log_odds_chosen": 11.087379455566406, "log_odds_ratio": -8.546755998395383e-05, "logits/chosen": -0.6590429544448853, "logits/rejected": -0.7686599493026733, "logps/chosen": -0.00012145326763857156, "logps/rejected": -2.06939959526062, "loss": 0.3423, "nll_loss": 0.08557528257369995, "rewards/accuracies": 1.0, "rewards/chosen": -1.2145326763857156e-05, "rewards/margins": 0.2069278210401535, "rewards/rejected": -0.2069399654865265, "step": 11914 }, { "epoch": 8.23997233748271, "grad_norm": 4.372389793395996, "learning_rate": 9.777931458429384e-06, "log_odds_chosen": 10.3779935836792, "log_odds_ratio": -6.900944572407752e-05, "logits/chosen": -0.49039220809936523, "logits/rejected": -0.45434218645095825, "logps/chosen": -0.00029394644661806524, "logps/rejected": -2.24226450920105, "loss": 0.4261, "nll_loss": 0.10651591420173645, "rewards/accuracies": 1.0, "rewards/chosen": -2.9394643206615e-05, "rewards/margins": 0.22419705986976624, "rewards/rejected": -0.2242264598608017, "step": 11915 }, { "epoch": 8.240663900414937, "grad_norm": 3.81577730178833, "learning_rate": 9.774089442139235e-06, "log_odds_chosen": 11.480928421020508, "log_odds_ratio": -0.00013029044202994555, "logits/chosen": -0.25032010674476624, "logits/rejected": -0.3787113130092621, "logps/chosen": -0.00018976113642565906, "logps/rejected": -2.105654239654541, "loss": 0.3038, "nll_loss": 0.0759335458278656, "rewards/accuracies": 1.0, "rewards/chosen": -1.8976112187374383e-05, "rewards/margins": 0.21054644882678986, "rewards/rejected": -0.210565447807312, "step": 11916 }, { "epoch": 8.241355463347164, "grad_norm": 3.03281569480896, "learning_rate": 9.770247425849086e-06, "log_odds_chosen": 10.569976806640625, "log_odds_ratio": -0.00010669581388356164, "logits/chosen": -0.13931097090244293, "logits/rejected": -0.2053048312664032, "logps/chosen": -0.00015964414342306554, "logps/rejected": -1.923472285270691, "loss": 0.2408, "nll_loss": 0.06017881631851196, "rewards/accuracies": 1.0, "rewards/chosen": -1.5964415069902316e-05, "rewards/margins": 0.19233126938343048, "rewards/rejected": -0.1923472285270691, "step": 11917 }, { "epoch": 8.24204702627939, "grad_norm": 3.186953544616699, "learning_rate": 9.766405409558938e-06, "log_odds_chosen": 11.287572860717773, "log_odds_ratio": -3.111410478595644e-05, "logits/chosen": 0.07891267538070679, "logits/rejected": 0.013783074915409088, "logps/chosen": -0.00014697012375108898, "logps/rejected": -2.314755439758301, "loss": 0.3065, "nll_loss": 0.07661764323711395, "rewards/accuracies": 1.0, "rewards/chosen": -1.4697012375108898e-05, "rewards/margins": 0.23146085441112518, "rewards/rejected": -0.23147554695606232, "step": 11918 }, { "epoch": 8.242738589211617, "grad_norm": 5.805723190307617, "learning_rate": 9.762563393268787e-06, "log_odds_chosen": 12.481376647949219, "log_odds_ratio": -7.375199857051484e-06, "logits/chosen": -0.5111141800880432, "logits/rejected": -0.5841240286827087, "logps/chosen": -5.368373604142107e-05, "logps/rejected": -2.6199378967285156, "loss": 0.4566, "nll_loss": 0.11414913088083267, "rewards/accuracies": 1.0, "rewards/chosen": -5.368373422243167e-06, "rewards/margins": 0.26198846101760864, "rewards/rejected": -0.2619938254356384, "step": 11919 }, { "epoch": 8.243430152143844, "grad_norm": 3.241328716278076, "learning_rate": 9.758721376978638e-06, "log_odds_chosen": 11.063311576843262, "log_odds_ratio": -0.00022662435367237777, "logits/chosen": -0.35234570503234863, "logits/rejected": -0.3682654798030853, "logps/chosen": -0.00032121199183166027, "logps/rejected": -2.4346401691436768, "loss": 0.2577, "nll_loss": 0.06439036130905151, "rewards/accuracies": 1.0, "rewards/chosen": -3.212119918316603e-05, "rewards/margins": 0.24343189597129822, "rewards/rejected": -0.24346402287483215, "step": 11920 }, { "epoch": 8.244121715076071, "grad_norm": 3.0784521102905273, "learning_rate": 9.75487936068849e-06, "log_odds_chosen": 11.698726654052734, "log_odds_ratio": -2.0082336050109006e-05, "logits/chosen": -0.3633999228477478, "logits/rejected": -0.40448373556137085, "logps/chosen": -0.0005937843234278262, "logps/rejected": -3.2323977947235107, "loss": 0.3206, "nll_loss": 0.08015991002321243, "rewards/accuracies": 1.0, "rewards/chosen": -5.937842797720805e-05, "rewards/margins": 0.3231803774833679, "rewards/rejected": -0.3232397735118866, "step": 11921 }, { "epoch": 8.244813278008298, "grad_norm": 3.568092107772827, "learning_rate": 9.751037344398341e-06, "log_odds_chosen": 11.089823722839355, "log_odds_ratio": -0.00012849707854911685, "logits/chosen": -0.09959299862384796, "logits/rejected": -0.22197115421295166, "logps/chosen": -0.0003033549874089658, "logps/rejected": -2.257047176361084, "loss": 0.4518, "nll_loss": 0.11293190717697144, "rewards/accuracies": 1.0, "rewards/chosen": -3.0335499104694463e-05, "rewards/margins": 0.22567439079284668, "rewards/rejected": -0.22570469975471497, "step": 11922 }, { "epoch": 8.245504840940525, "grad_norm": 5.759087562561035, "learning_rate": 9.747195328108192e-06, "log_odds_chosen": 12.131702423095703, "log_odds_ratio": -1.2354894352029078e-05, "logits/chosen": -0.3387818932533264, "logits/rejected": -0.459004282951355, "logps/chosen": -0.00013360743469092995, "logps/rejected": -2.852360725402832, "loss": 0.5861, "nll_loss": 0.14653277397155762, "rewards/accuracies": 1.0, "rewards/chosen": -1.3360744560486637e-05, "rewards/margins": 0.28522270917892456, "rewards/rejected": -0.28523609042167664, "step": 11923 }, { "epoch": 8.246196403872752, "grad_norm": 4.073774814605713, "learning_rate": 9.743353311818043e-06, "log_odds_chosen": 11.734428405761719, "log_odds_ratio": -1.382894697599113e-05, "logits/chosen": -0.06801869720220566, "logits/rejected": -0.16279873251914978, "logps/chosen": -0.00010149933223146945, "logps/rejected": -2.4640071392059326, "loss": 0.4536, "nll_loss": 0.11338850855827332, "rewards/accuracies": 1.0, "rewards/chosen": -1.0149933586944826e-05, "rewards/margins": 0.24639055132865906, "rewards/rejected": -0.24640071392059326, "step": 11924 }, { "epoch": 8.24688796680498, "grad_norm": 4.897947788238525, "learning_rate": 9.739511295527893e-06, "log_odds_chosen": 10.829876899719238, "log_odds_ratio": -8.704655920155346e-05, "logits/chosen": 0.05027832090854645, "logits/rejected": 0.09646852314472198, "logps/chosen": -0.0003285640850663185, "logps/rejected": -2.5738582611083984, "loss": 0.343, "nll_loss": 0.08574408292770386, "rewards/accuracies": 1.0, "rewards/chosen": -3.285640923422761e-05, "rewards/margins": 0.2573529779911041, "rewards/rejected": -0.25738582015037537, "step": 11925 }, { "epoch": 8.247579529737205, "grad_norm": 4.184667587280273, "learning_rate": 9.735669279237744e-06, "log_odds_chosen": 10.841010093688965, "log_odds_ratio": -4.2419906094437465e-05, "logits/chosen": -0.17443953454494476, "logits/rejected": -0.2786588668823242, "logps/chosen": -0.00013494440645445138, "logps/rejected": -1.8008975982666016, "loss": 0.4378, "nll_loss": 0.10945791751146317, "rewards/accuracies": 1.0, "rewards/chosen": -1.349444119114196e-05, "rewards/margins": 0.18007627129554749, "rewards/rejected": -0.18008975684642792, "step": 11926 }, { "epoch": 8.248271092669434, "grad_norm": 5.399971961975098, "learning_rate": 9.731827262947597e-06, "log_odds_chosen": 10.012310028076172, "log_odds_ratio": -0.0003816061362158507, "logits/chosen": -0.12254303693771362, "logits/rejected": -0.08368848264217377, "logps/chosen": -0.0005873021436855197, "logps/rejected": -2.2701234817504883, "loss": 0.4698, "nll_loss": 0.11741822957992554, "rewards/accuracies": 1.0, "rewards/chosen": -5.8730216551339254e-05, "rewards/margins": 0.22695361077785492, "rewards/rejected": -0.22701233625411987, "step": 11927 }, { "epoch": 8.248962655601659, "grad_norm": 4.6173930168151855, "learning_rate": 9.727985246657446e-06, "log_odds_chosen": 11.417407989501953, "log_odds_ratio": -0.00016668268654029816, "logits/chosen": -0.17062698304653168, "logits/rejected": -0.256955087184906, "logps/chosen": -0.0002462010888848454, "logps/rejected": -2.1261165142059326, "loss": 0.3565, "nll_loss": 0.08911935985088348, "rewards/accuracies": 1.0, "rewards/chosen": -2.4620108888484538e-05, "rewards/margins": 0.21258702874183655, "rewards/rejected": -0.21261164546012878, "step": 11928 }, { "epoch": 8.249654218533887, "grad_norm": 6.097728252410889, "learning_rate": 9.724143230367296e-06, "log_odds_chosen": 11.014291763305664, "log_odds_ratio": -4.301685112295672e-05, "logits/chosen": -0.14947861433029175, "logits/rejected": -0.22163355350494385, "logps/chosen": -0.00014046431169845164, "logps/rejected": -2.1883559226989746, "loss": 0.5594, "nll_loss": 0.1398484706878662, "rewards/accuracies": 1.0, "rewards/chosen": -1.4046430806047283e-05, "rewards/margins": 0.21882155537605286, "rewards/rejected": -0.21883559226989746, "step": 11929 }, { "epoch": 8.250345781466113, "grad_norm": 3.72836971282959, "learning_rate": 9.720301214077149e-06, "log_odds_chosen": 11.393030166625977, "log_odds_ratio": -1.6560083167860284e-05, "logits/chosen": -0.37463024258613586, "logits/rejected": -0.41027507185935974, "logps/chosen": -0.00012916355626657605, "logps/rejected": -2.406658172607422, "loss": 0.4886, "nll_loss": 0.1221555545926094, "rewards/accuracies": 1.0, "rewards/chosen": -1.2916356354253367e-05, "rewards/margins": 0.2406529039144516, "rewards/rejected": -0.24066582322120667, "step": 11930 }, { "epoch": 8.251037344398341, "grad_norm": 3.9670143127441406, "learning_rate": 9.716459197787e-06, "log_odds_chosen": 11.113236427307129, "log_odds_ratio": -2.2357296984409913e-05, "logits/chosen": -0.28098994493484497, "logits/rejected": -0.2765815854072571, "logps/chosen": -9.308641165262088e-05, "logps/rejected": -1.8025569915771484, "loss": 0.3587, "nll_loss": 0.08968042582273483, "rewards/accuracies": 1.0, "rewards/chosen": -9.308640983363148e-06, "rewards/margins": 0.18024641275405884, "rewards/rejected": -0.180255725979805, "step": 11931 }, { "epoch": 8.251728907330566, "grad_norm": 8.630131721496582, "learning_rate": 9.71261718149685e-06, "log_odds_chosen": 9.948583602905273, "log_odds_ratio": -9.74518625298515e-05, "logits/chosen": -0.2843390703201294, "logits/rejected": -0.3486748933792114, "logps/chosen": -0.000242379741393961, "logps/rejected": -1.5226190090179443, "loss": 0.4809, "nll_loss": 0.12021724879741669, "rewards/accuracies": 1.0, "rewards/chosen": -2.4237977413577028e-05, "rewards/margins": 0.1522376537322998, "rewards/rejected": -0.152261883020401, "step": 11932 }, { "epoch": 8.252420470262795, "grad_norm": 4.932256698608398, "learning_rate": 9.708775165206701e-06, "log_odds_chosen": 12.347237586975098, "log_odds_ratio": -1.0626763469190337e-05, "logits/chosen": -0.16214779019355774, "logits/rejected": -0.29443246126174927, "logps/chosen": -0.0003793642681557685, "logps/rejected": -3.688446044921875, "loss": 0.4671, "nll_loss": 0.11676811426877975, "rewards/accuracies": 1.0, "rewards/chosen": -3.7936428270768374e-05, "rewards/margins": 0.3688066303730011, "rewards/rejected": -0.368844598531723, "step": 11933 }, { "epoch": 8.25311203319502, "grad_norm": 4.48406982421875, "learning_rate": 9.704933148916552e-06, "log_odds_chosen": 11.195205688476562, "log_odds_ratio": -8.202636672649533e-05, "logits/chosen": -0.11493605375289917, "logits/rejected": -0.2281782329082489, "logps/chosen": -0.00037876551505178213, "logps/rejected": -2.323265552520752, "loss": 0.3885, "nll_loss": 0.09712628275156021, "rewards/accuracies": 1.0, "rewards/chosen": -3.787655441556126e-05, "rewards/margins": 0.2322886884212494, "rewards/rejected": -0.23232656717300415, "step": 11934 }, { "epoch": 8.253803596127248, "grad_norm": 4.716032981872559, "learning_rate": 9.701091132626403e-06, "log_odds_chosen": 10.351265907287598, "log_odds_ratio": -9.08208021428436e-05, "logits/chosen": -0.022960372269153595, "logits/rejected": -0.1691531538963318, "logps/chosen": -0.00030095313559286296, "logps/rejected": -1.9174537658691406, "loss": 0.3216, "nll_loss": 0.08038497716188431, "rewards/accuracies": 1.0, "rewards/chosen": -3.0095314286882058e-05, "rewards/margins": 0.191715270280838, "rewards/rejected": -0.19174537062644958, "step": 11935 }, { "epoch": 8.254495159059474, "grad_norm": 4.077927112579346, "learning_rate": 9.697249116336253e-06, "log_odds_chosen": 10.980993270874023, "log_odds_ratio": -6.214114546310157e-05, "logits/chosen": -0.43654143810272217, "logits/rejected": -0.4607735872268677, "logps/chosen": -0.00015193871513474733, "logps/rejected": -1.98684561252594, "loss": 0.4099, "nll_loss": 0.10245777666568756, "rewards/accuracies": 1.0, "rewards/chosen": -1.5193872059171554e-05, "rewards/margins": 0.19866937398910522, "rewards/rejected": -0.19868457317352295, "step": 11936 }, { "epoch": 8.255186721991702, "grad_norm": 3.7622458934783936, "learning_rate": 9.693407100046104e-06, "log_odds_chosen": 10.723194122314453, "log_odds_ratio": -6.372190546244383e-05, "logits/chosen": -0.36659157276153564, "logits/rejected": -0.41019150614738464, "logps/chosen": -0.0001748724898789078, "logps/rejected": -2.0019383430480957, "loss": 0.3297, "nll_loss": 0.08241233229637146, "rewards/accuracies": 1.0, "rewards/chosen": -1.748724935168866e-05, "rewards/margins": 0.20017635822296143, "rewards/rejected": -0.2001938372850418, "step": 11937 }, { "epoch": 8.255878284923927, "grad_norm": 3.4987776279449463, "learning_rate": 9.689565083755955e-06, "log_odds_chosen": 11.87031364440918, "log_odds_ratio": -4.831314799957909e-05, "logits/chosen": -0.13452598452568054, "logits/rejected": -0.32432645559310913, "logps/chosen": -0.00027610603137873113, "logps/rejected": -2.5127694606781006, "loss": 0.3822, "nll_loss": 0.09553464502096176, "rewards/accuracies": 1.0, "rewards/chosen": -2.7610603865468875e-05, "rewards/margins": 0.2512493431568146, "rewards/rejected": -0.25127696990966797, "step": 11938 }, { "epoch": 8.256569847856156, "grad_norm": 3.179476737976074, "learning_rate": 9.685723067465806e-06, "log_odds_chosen": 11.044832229614258, "log_odds_ratio": -0.0004461394564714283, "logits/chosen": -0.328361451625824, "logits/rejected": -0.29957258701324463, "logps/chosen": -0.0006780716357752681, "logps/rejected": -2.9000000953674316, "loss": 0.2553, "nll_loss": 0.06378158181905746, "rewards/accuracies": 1.0, "rewards/chosen": -6.780715921195224e-05, "rewards/margins": 0.2899321913719177, "rewards/rejected": -0.28999999165534973, "step": 11939 }, { "epoch": 8.25726141078838, "grad_norm": 3.719775915145874, "learning_rate": 9.681881051175658e-06, "log_odds_chosen": 11.379352569580078, "log_odds_ratio": -4.088229616172612e-05, "logits/chosen": -0.5665051341056824, "logits/rejected": -0.6805068850517273, "logps/chosen": -8.75107798492536e-05, "logps/rejected": -1.9972556829452515, "loss": 0.4335, "nll_loss": 0.10838184505701065, "rewards/accuracies": 1.0, "rewards/chosen": -8.751078894420061e-06, "rewards/margins": 0.19971680641174316, "rewards/rejected": -0.19972556829452515, "step": 11940 }, { "epoch": 8.25795297372061, "grad_norm": 4.267411708831787, "learning_rate": 9.678039034885509e-06, "log_odds_chosen": 11.939834594726562, "log_odds_ratio": -2.9243588869576342e-05, "logits/chosen": -0.52231764793396, "logits/rejected": -0.5219714641571045, "logps/chosen": -0.00013408969971351326, "logps/rejected": -2.323073387145996, "loss": 0.4224, "nll_loss": 0.1055847555398941, "rewards/accuracies": 1.0, "rewards/chosen": -1.3408969607553445e-05, "rewards/margins": 0.23229394853115082, "rewards/rejected": -0.2323073446750641, "step": 11941 }, { "epoch": 8.258644536652836, "grad_norm": 3.8554868698120117, "learning_rate": 9.674197018595358e-06, "log_odds_chosen": 12.19814682006836, "log_odds_ratio": -1.4316083252197132e-05, "logits/chosen": -0.8943642377853394, "logits/rejected": -0.9774724245071411, "logps/chosen": -0.00010931852739304304, "logps/rejected": -2.5393433570861816, "loss": 0.3261, "nll_loss": 0.08151516318321228, "rewards/accuracies": 1.0, "rewards/chosen": -1.0931853466900066e-05, "rewards/margins": 0.2539233863353729, "rewards/rejected": -0.2539343237876892, "step": 11942 }, { "epoch": 8.259336099585063, "grad_norm": 5.9453864097595215, "learning_rate": 9.67035500230521e-06, "log_odds_chosen": 10.217976570129395, "log_odds_ratio": -0.0001286189362872392, "logits/chosen": -0.23656044900417328, "logits/rejected": -0.308110773563385, "logps/chosen": -0.0004473762819543481, "logps/rejected": -1.6824361085891724, "loss": 0.4505, "nll_loss": 0.11260005086660385, "rewards/accuracies": 1.0, "rewards/chosen": -4.4737626012647524e-05, "rewards/margins": 0.1681988686323166, "rewards/rejected": -0.1682436168193817, "step": 11943 }, { "epoch": 8.26002766251729, "grad_norm": 3.4335649013519287, "learning_rate": 9.666512986015061e-06, "log_odds_chosen": 9.835412979125977, "log_odds_ratio": -7.321751763811335e-05, "logits/chosen": -0.7244390845298767, "logits/rejected": -0.7772824168205261, "logps/chosen": -0.0006460523582063615, "logps/rejected": -1.6226214170455933, "loss": 0.2635, "nll_loss": 0.06585706025362015, "rewards/accuracies": 1.0, "rewards/chosen": -6.460522854467854e-05, "rewards/margins": 0.1621975302696228, "rewards/rejected": -0.16226214170455933, "step": 11944 }, { "epoch": 8.260719225449517, "grad_norm": 4.816346645355225, "learning_rate": 9.662670969724912e-06, "log_odds_chosen": 11.229515075683594, "log_odds_ratio": -3.3624597563175485e-05, "logits/chosen": -0.19780333340168, "logits/rejected": -0.20678195357322693, "logps/chosen": -0.00016987889830488712, "logps/rejected": -2.421178102493286, "loss": 0.5191, "nll_loss": 0.12976379692554474, "rewards/accuracies": 1.0, "rewards/chosen": -1.698788946669083e-05, "rewards/margins": 0.24210083484649658, "rewards/rejected": -0.24211782217025757, "step": 11945 }, { "epoch": 8.261410788381744, "grad_norm": 3.3699300289154053, "learning_rate": 9.658828953434763e-06, "log_odds_chosen": 10.996805191040039, "log_odds_ratio": -0.00020430742006283253, "logits/chosen": -0.3011920750141144, "logits/rejected": -0.26979607343673706, "logps/chosen": -0.00016290269559249282, "logps/rejected": -1.726262092590332, "loss": 0.3178, "nll_loss": 0.07941769808530807, "rewards/accuracies": 1.0, "rewards/chosen": -1.6290270650642924e-05, "rewards/margins": 0.17260992527008057, "rewards/rejected": -0.17262621223926544, "step": 11946 }, { "epoch": 8.26210235131397, "grad_norm": 5.184370517730713, "learning_rate": 9.654986937144613e-06, "log_odds_chosen": 9.627692222595215, "log_odds_ratio": -0.000175231893081218, "logits/chosen": -0.6078090071678162, "logits/rejected": -0.6238219141960144, "logps/chosen": -0.00043194310273975134, "logps/rejected": -1.395906686782837, "loss": 0.3988, "nll_loss": 0.0996830016374588, "rewards/accuracies": 1.0, "rewards/chosen": -4.319431172916666e-05, "rewards/margins": 0.1395474672317505, "rewards/rejected": -0.13959068059921265, "step": 11947 }, { "epoch": 8.262793914246197, "grad_norm": 3.9152684211730957, "learning_rate": 9.651144920854464e-06, "log_odds_chosen": 11.434577941894531, "log_odds_ratio": -5.2444524044403806e-05, "logits/chosen": 0.019659768790006638, "logits/rejected": -0.13808220624923706, "logps/chosen": -0.0010777817806228995, "logps/rejected": -3.294294595718384, "loss": 0.3711, "nll_loss": 0.09276201575994492, "rewards/accuracies": 1.0, "rewards/chosen": -0.00010777818533824757, "rewards/margins": 0.3293216824531555, "rewards/rejected": -0.3294294476509094, "step": 11948 }, { "epoch": 8.263485477178424, "grad_norm": 4.5004143714904785, "learning_rate": 9.647302904564317e-06, "log_odds_chosen": 10.837656021118164, "log_odds_ratio": -0.00035609930637292564, "logits/chosen": 0.02208644151687622, "logits/rejected": -0.09548585116863251, "logps/chosen": -0.0006469839718192816, "logps/rejected": -2.8256313800811768, "loss": 0.4955, "nll_loss": 0.1238352507352829, "rewards/accuracies": 1.0, "rewards/chosen": -6.469839718192816e-05, "rewards/margins": 0.28249841928482056, "rewards/rejected": -0.28256314992904663, "step": 11949 }, { "epoch": 8.264177040110651, "grad_norm": 5.343628406524658, "learning_rate": 9.643460888274167e-06, "log_odds_chosen": 11.494791030883789, "log_odds_ratio": -4.55570625490509e-05, "logits/chosen": -0.24458926916122437, "logits/rejected": -0.27908527851104736, "logps/chosen": -0.00032541013206355274, "logps/rejected": -2.9635403156280518, "loss": 0.3951, "nll_loss": 0.09877359867095947, "rewards/accuracies": 1.0, "rewards/chosen": -3.2541014661546797e-05, "rewards/margins": 0.2963215112686157, "rewards/rejected": -0.2963540256023407, "step": 11950 }, { "epoch": 8.264868603042878, "grad_norm": 3.323399066925049, "learning_rate": 9.639618871984018e-06, "log_odds_chosen": 10.727117538452148, "log_odds_ratio": -5.0795613788068295e-05, "logits/chosen": -0.5654752254486084, "logits/rejected": -0.6004953980445862, "logps/chosen": -0.00021166335500311106, "logps/rejected": -1.810473918914795, "loss": 0.3586, "nll_loss": 0.08963505923748016, "rewards/accuracies": 1.0, "rewards/chosen": -2.1166335500311106e-05, "rewards/margins": 0.18102622032165527, "rewards/rejected": -0.18104739487171173, "step": 11951 }, { "epoch": 8.265560165975105, "grad_norm": 4.638492584228516, "learning_rate": 9.635776855693869e-06, "log_odds_chosen": 10.843294143676758, "log_odds_ratio": -3.520100290188566e-05, "logits/chosen": -0.6220878958702087, "logits/rejected": -0.688339114189148, "logps/chosen": -0.00038481189403682947, "logps/rejected": -2.357971668243408, "loss": 0.3556, "nll_loss": 0.08889587223529816, "rewards/accuracies": 1.0, "rewards/chosen": -3.84811864932999e-05, "rewards/margins": 0.23575866222381592, "rewards/rejected": -0.23579715192317963, "step": 11952 }, { "epoch": 8.266251728907331, "grad_norm": 3.359079360961914, "learning_rate": 9.63193483940372e-06, "log_odds_chosen": 11.392254829406738, "log_odds_ratio": -2.3173997760750353e-05, "logits/chosen": -0.22304686903953552, "logits/rejected": -0.45449167490005493, "logps/chosen": -0.000344922038493678, "logps/rejected": -2.812790870666504, "loss": 0.3852, "nll_loss": 0.09629756212234497, "rewards/accuracies": 1.0, "rewards/chosen": -3.4492208214942366e-05, "rewards/margins": 0.28124457597732544, "rewards/rejected": -0.2812790870666504, "step": 11953 }, { "epoch": 8.266943291839558, "grad_norm": 7.765052795410156, "learning_rate": 9.62809282311357e-06, "log_odds_chosen": 11.814998626708984, "log_odds_ratio": -0.00015538069419562817, "logits/chosen": -0.08499948680400848, "logits/rejected": -0.2897992730140686, "logps/chosen": -0.0007674909429624677, "logps/rejected": -3.324516773223877, "loss": 0.5177, "nll_loss": 0.12940450012683868, "rewards/accuracies": 1.0, "rewards/chosen": -7.674910739297047e-05, "rewards/margins": 0.3323749303817749, "rewards/rejected": -0.3324517011642456, "step": 11954 }, { "epoch": 8.267634854771785, "grad_norm": 4.8267974853515625, "learning_rate": 9.624250806823423e-06, "log_odds_chosen": 10.045418739318848, "log_odds_ratio": -9.528405644232407e-05, "logits/chosen": -0.7230570316314697, "logits/rejected": -0.7842307090759277, "logps/chosen": -0.00027150430832989514, "logps/rejected": -1.7861135005950928, "loss": 0.4108, "nll_loss": 0.10269634425640106, "rewards/accuracies": 1.0, "rewards/chosen": -2.7150430469191633e-05, "rewards/margins": 0.17858418822288513, "rewards/rejected": -0.17861135303974152, "step": 11955 }, { "epoch": 8.268326417704012, "grad_norm": 5.728063106536865, "learning_rate": 9.620408790533272e-06, "log_odds_chosen": 10.704045295715332, "log_odds_ratio": -0.00032331154216080904, "logits/chosen": -0.1421336978673935, "logits/rejected": -0.20743423700332642, "logps/chosen": -0.0004792292893398553, "logps/rejected": -2.226273775100708, "loss": 0.473, "nll_loss": 0.11822532117366791, "rewards/accuracies": 1.0, "rewards/chosen": -4.7922931116772816e-05, "rewards/margins": 0.2225794643163681, "rewards/rejected": -0.22262738645076752, "step": 11956 }, { "epoch": 8.269017980636239, "grad_norm": 3.7819995880126953, "learning_rate": 9.616566774243123e-06, "log_odds_chosen": 10.83586311340332, "log_odds_ratio": -3.1590192520525306e-05, "logits/chosen": -0.4727594554424286, "logits/rejected": -0.6352878212928772, "logps/chosen": -6.83098696754314e-05, "logps/rejected": -1.3699564933776855, "loss": 0.3469, "nll_loss": 0.08671838790178299, "rewards/accuracies": 1.0, "rewards/chosen": -6.830986421846319e-06, "rewards/margins": 0.1369888186454773, "rewards/rejected": -0.13699564337730408, "step": 11957 }, { "epoch": 8.269709543568466, "grad_norm": 3.127134323120117, "learning_rate": 9.612724757952975e-06, "log_odds_chosen": 10.934288024902344, "log_odds_ratio": -4.619704486685805e-05, "logits/chosen": -0.042971763759851456, "logits/rejected": -0.12262189388275146, "logps/chosen": -0.00011522687418619171, "logps/rejected": -1.725632667541504, "loss": 0.4098, "nll_loss": 0.10243922472000122, "rewards/accuracies": 1.0, "rewards/chosen": -1.1522687600518111e-05, "rewards/margins": 0.17255175113677979, "rewards/rejected": -0.17256325483322144, "step": 11958 }, { "epoch": 8.270401106500692, "grad_norm": 3.787686824798584, "learning_rate": 9.608882741662826e-06, "log_odds_chosen": 11.660400390625, "log_odds_ratio": -7.071129948599264e-05, "logits/chosen": -0.18112653493881226, "logits/rejected": -0.2788428068161011, "logps/chosen": -0.0006761641707271338, "logps/rejected": -3.0301501750946045, "loss": 0.3963, "nll_loss": 0.09906087815761566, "rewards/accuracies": 1.0, "rewards/chosen": -6.76164127071388e-05, "rewards/margins": 0.30294740200042725, "rewards/rejected": -0.30301499366760254, "step": 11959 }, { "epoch": 8.27109266943292, "grad_norm": 4.528950214385986, "learning_rate": 9.605040725372676e-06, "log_odds_chosen": 11.360909461975098, "log_odds_ratio": -0.0002581155567895621, "logits/chosen": -0.05907332897186279, "logits/rejected": -0.15207792818546295, "logps/chosen": -0.00123701267875731, "logps/rejected": -2.7253646850585938, "loss": 0.3377, "nll_loss": 0.08440046012401581, "rewards/accuracies": 1.0, "rewards/chosen": -0.0001237012620549649, "rewards/margins": 0.2724127769470215, "rewards/rejected": -0.2725364863872528, "step": 11960 }, { "epoch": 8.271784232365146, "grad_norm": 3.4399218559265137, "learning_rate": 9.601198709082527e-06, "log_odds_chosen": 11.670557022094727, "log_odds_ratio": -2.467823651386425e-05, "logits/chosen": -0.6992796659469604, "logits/rejected": -0.7787087559700012, "logps/chosen": -0.00024264826788567007, "logps/rejected": -2.8075666427612305, "loss": 0.3266, "nll_loss": 0.08164522796869278, "rewards/accuracies": 1.0, "rewards/chosen": -2.4264827516162768e-05, "rewards/margins": 0.2807323932647705, "rewards/rejected": -0.2807566523551941, "step": 11961 }, { "epoch": 8.272475795297373, "grad_norm": 6.1883625984191895, "learning_rate": 9.597356692792378e-06, "log_odds_chosen": 12.197917938232422, "log_odds_ratio": -1.1628735592239536e-05, "logits/chosen": -0.4272370934486389, "logits/rejected": -0.34923166036605835, "logps/chosen": -0.00018850239575840533, "logps/rejected": -3.2359747886657715, "loss": 0.3713, "nll_loss": 0.09283263236284256, "rewards/accuracies": 1.0, "rewards/chosen": -1.8850239939638413e-05, "rewards/margins": 0.3235785961151123, "rewards/rejected": -0.3235974609851837, "step": 11962 }, { "epoch": 8.2731673582296, "grad_norm": 3.554027557373047, "learning_rate": 9.593514676502229e-06, "log_odds_chosen": 10.515666961669922, "log_odds_ratio": -6.209921411937103e-05, "logits/chosen": -0.540285587310791, "logits/rejected": -0.5418251752853394, "logps/chosen": -0.000453216111054644, "logps/rejected": -2.0480105876922607, "loss": 0.3462, "nll_loss": 0.08654274046421051, "rewards/accuracies": 1.0, "rewards/chosen": -4.532161256065592e-05, "rewards/margins": 0.2047557532787323, "rewards/rejected": -0.2048010528087616, "step": 11963 }, { "epoch": 8.273858921161827, "grad_norm": 3.581557035446167, "learning_rate": 9.589672660212081e-06, "log_odds_chosen": 11.442468643188477, "log_odds_ratio": -0.0001763361506164074, "logits/chosen": -0.5076885223388672, "logits/rejected": -0.5351912975311279, "logps/chosen": -0.0005444286507554352, "logps/rejected": -2.9354920387268066, "loss": 0.4353, "nll_loss": 0.10881911218166351, "rewards/accuracies": 1.0, "rewards/chosen": -5.444286216516048e-05, "rewards/margins": 0.2934947609901428, "rewards/rejected": -0.29354918003082275, "step": 11964 }, { "epoch": 8.274550484094053, "grad_norm": 9.163952827453613, "learning_rate": 9.58583064392193e-06, "log_odds_chosen": 10.351091384887695, "log_odds_ratio": -8.336950850207359e-05, "logits/chosen": -0.029397621750831604, "logits/rejected": -0.03363652527332306, "logps/chosen": -0.0003193440497852862, "logps/rejected": -2.0989863872528076, "loss": 0.6588, "nll_loss": 0.1646943837404251, "rewards/accuracies": 1.0, "rewards/chosen": -3.19344071613159e-05, "rewards/margins": 0.2098666876554489, "rewards/rejected": -0.20989862084388733, "step": 11965 }, { "epoch": 8.27524204702628, "grad_norm": 3.1572184562683105, "learning_rate": 9.581988627631781e-06, "log_odds_chosen": 11.891756057739258, "log_odds_ratio": -1.1338936019456014e-05, "logits/chosen": -0.7405799627304077, "logits/rejected": -0.771040141582489, "logps/chosen": -9.116944420384243e-05, "logps/rejected": -2.6080727577209473, "loss": 0.3485, "nll_loss": 0.08713450282812119, "rewards/accuracies": 1.0, "rewards/chosen": -9.116944056586362e-06, "rewards/margins": 0.2607981562614441, "rewards/rejected": -0.2608072757720947, "step": 11966 }, { "epoch": 8.275933609958507, "grad_norm": 3.5829575061798096, "learning_rate": 9.578146611341633e-06, "log_odds_chosen": 9.52750015258789, "log_odds_ratio": -0.0012475146213546395, "logits/chosen": -0.6426373720169067, "logits/rejected": -0.6256951689720154, "logps/chosen": -0.0017292017582803965, "logps/rejected": -1.389811635017395, "loss": 0.4721, "nll_loss": 0.11789209395647049, "rewards/accuracies": 1.0, "rewards/chosen": -0.00017292017582803965, "rewards/margins": 0.1388082504272461, "rewards/rejected": -0.1389811635017395, "step": 11967 }, { "epoch": 8.276625172890734, "grad_norm": 4.077126979827881, "learning_rate": 9.574304595051484e-06, "log_odds_chosen": 10.81316089630127, "log_odds_ratio": -4.982240352546796e-05, "logits/chosen": -0.6174009442329407, "logits/rejected": -0.5894420146942139, "logps/chosen": -0.0002926184970419854, "logps/rejected": -2.6595776081085205, "loss": 0.3411, "nll_loss": 0.08527268469333649, "rewards/accuracies": 1.0, "rewards/chosen": -2.9261851523187943e-05, "rewards/margins": 0.2659285068511963, "rewards/rejected": -0.2659577429294586, "step": 11968 }, { "epoch": 8.27731673582296, "grad_norm": 5.055628299713135, "learning_rate": 9.570462578761335e-06, "log_odds_chosen": 11.854629516601562, "log_odds_ratio": -1.4586242286895867e-05, "logits/chosen": -0.12202761322259903, "logits/rejected": -0.17216485738754272, "logps/chosen": -0.00012257893104106188, "logps/rejected": -2.690403938293457, "loss": 0.3756, "nll_loss": 0.09390611946582794, "rewards/accuracies": 1.0, "rewards/chosen": -1.225789401360089e-05, "rewards/margins": 0.2690281569957733, "rewards/rejected": -0.26904040575027466, "step": 11969 }, { "epoch": 8.278008298755188, "grad_norm": 3.672074556350708, "learning_rate": 9.566620562471184e-06, "log_odds_chosen": 10.932299613952637, "log_odds_ratio": -5.1668266678461805e-05, "logits/chosen": -0.8195986747741699, "logits/rejected": -0.771497368812561, "logps/chosen": -0.0001285741018364206, "logps/rejected": -1.9971344470977783, "loss": 0.4194, "nll_loss": 0.10485148429870605, "rewards/accuracies": 1.0, "rewards/chosen": -1.2857411093136761e-05, "rewards/margins": 0.19970059394836426, "rewards/rejected": -0.19971343874931335, "step": 11970 }, { "epoch": 8.278699861687414, "grad_norm": 5.524552822113037, "learning_rate": 9.562778546181036e-06, "log_odds_chosen": 10.681053161621094, "log_odds_ratio": -6.293236219789833e-05, "logits/chosen": -0.4877323508262634, "logits/rejected": -0.4758765697479248, "logps/chosen": -0.00020377582404762506, "logps/rejected": -1.9765567779541016, "loss": 0.5573, "nll_loss": 0.13932693004608154, "rewards/accuracies": 1.0, "rewards/chosen": -2.0377581677166745e-05, "rewards/margins": 0.19763529300689697, "rewards/rejected": -0.19765567779541016, "step": 11971 }, { "epoch": 8.279391424619641, "grad_norm": 3.4568281173706055, "learning_rate": 9.558936529890887e-06, "log_odds_chosen": 10.67101764678955, "log_odds_ratio": -6.874544487800449e-05, "logits/chosen": -0.13481301069259644, "logits/rejected": -0.16933226585388184, "logps/chosen": -0.0008989893249236047, "logps/rejected": -1.8208454847335815, "loss": 0.4204, "nll_loss": 0.10510402172803879, "rewards/accuracies": 1.0, "rewards/chosen": -8.989893103716895e-05, "rewards/margins": 0.18199466168880463, "rewards/rejected": -0.18208454549312592, "step": 11972 }, { "epoch": 8.280082987551868, "grad_norm": 3.5602478981018066, "learning_rate": 9.555094513600738e-06, "log_odds_chosen": 11.59362506866455, "log_odds_ratio": -0.00014446736895479262, "logits/chosen": 0.11925359070301056, "logits/rejected": 0.03051447868347168, "logps/chosen": -0.00034636116470210254, "logps/rejected": -2.8364691734313965, "loss": 0.3448, "nll_loss": 0.08617541939020157, "rewards/accuracies": 1.0, "rewards/chosen": -3.463611574261449e-05, "rewards/margins": 0.28361228108406067, "rewards/rejected": -0.28364691138267517, "step": 11973 }, { "epoch": 8.280774550484095, "grad_norm": 4.688289165496826, "learning_rate": 9.551252497310589e-06, "log_odds_chosen": 11.239917755126953, "log_odds_ratio": -3.473057586234063e-05, "logits/chosen": -0.2402084618806839, "logits/rejected": -0.17532745003700256, "logps/chosen": -0.0004617611411958933, "logps/rejected": -3.1051254272460938, "loss": 1.0618, "nll_loss": 0.26544082164764404, "rewards/accuracies": 1.0, "rewards/chosen": -4.617611557478085e-05, "rewards/margins": 0.3104664087295532, "rewards/rejected": -0.31051257252693176, "step": 11974 }, { "epoch": 8.281466113416322, "grad_norm": 3.4036073684692383, "learning_rate": 9.54741048102044e-06, "log_odds_chosen": 10.610368728637695, "log_odds_ratio": -9.449348726775497e-05, "logits/chosen": -0.4558260142803192, "logits/rejected": -0.4234117865562439, "logps/chosen": -0.00013078105985186994, "logps/rejected": -1.9758434295654297, "loss": 0.3565, "nll_loss": 0.08910534530878067, "rewards/accuracies": 1.0, "rewards/chosen": -1.3078107258479577e-05, "rewards/margins": 0.1975712776184082, "rewards/rejected": -0.1975843459367752, "step": 11975 }, { "epoch": 8.282157676348548, "grad_norm": 4.087696075439453, "learning_rate": 9.54356846473029e-06, "log_odds_chosen": 10.31620979309082, "log_odds_ratio": -0.00024037774710450321, "logits/chosen": -0.2854808568954468, "logits/rejected": -0.3649406433105469, "logps/chosen": -0.0005136193940415978, "logps/rejected": -1.529449462890625, "loss": 0.4878, "nll_loss": 0.12192420661449432, "rewards/accuracies": 1.0, "rewards/chosen": -5.1361945224925876e-05, "rewards/margins": 0.15289360284805298, "rewards/rejected": -0.15294496715068817, "step": 11976 }, { "epoch": 8.282849239280775, "grad_norm": 3.270496368408203, "learning_rate": 9.539726448440143e-06, "log_odds_chosen": 11.231451034545898, "log_odds_ratio": -0.00034369956119917333, "logits/chosen": -0.326479434967041, "logits/rejected": -0.439314067363739, "logps/chosen": -0.00022332128719426692, "logps/rejected": -2.6155924797058105, "loss": 0.3325, "nll_loss": 0.08309418708086014, "rewards/accuracies": 1.0, "rewards/chosen": -2.233212762803305e-05, "rewards/margins": 0.26153692603111267, "rewards/rejected": -0.26155924797058105, "step": 11977 }, { "epoch": 8.283540802213002, "grad_norm": 4.652988910675049, "learning_rate": 9.535884432149993e-06, "log_odds_chosen": 10.786218643188477, "log_odds_ratio": -6.878216663608328e-05, "logits/chosen": -0.687461256980896, "logits/rejected": -0.6254911422729492, "logps/chosen": -0.000608769478276372, "logps/rejected": -2.4494590759277344, "loss": 0.4639, "nll_loss": 0.1159728467464447, "rewards/accuracies": 1.0, "rewards/chosen": -6.0876947827637196e-05, "rewards/margins": 0.24488502740859985, "rewards/rejected": -0.24494591355323792, "step": 11978 }, { "epoch": 8.284232365145229, "grad_norm": 3.475490093231201, "learning_rate": 9.532042415859842e-06, "log_odds_chosen": 11.721963882446289, "log_odds_ratio": -1.3145630873623304e-05, "logits/chosen": -0.3036603331565857, "logits/rejected": -0.3121589124202728, "logps/chosen": -0.0003267270512878895, "logps/rejected": -2.862849473953247, "loss": 0.4222, "nll_loss": 0.1055576428771019, "rewards/accuracies": 1.0, "rewards/chosen": -3.26727022184059e-05, "rewards/margins": 0.28625229001045227, "rewards/rejected": -0.2862849533557892, "step": 11979 }, { "epoch": 8.284923928077456, "grad_norm": 2.267528772354126, "learning_rate": 9.528200399569695e-06, "log_odds_chosen": 10.153470039367676, "log_odds_ratio": -8.534399967174977e-05, "logits/chosen": -0.3335803747177124, "logits/rejected": -0.3047267496585846, "logps/chosen": -0.00017864150868263096, "logps/rejected": -1.5260097980499268, "loss": 0.2074, "nll_loss": 0.05183638632297516, "rewards/accuracies": 1.0, "rewards/chosen": -1.7864151232060976e-05, "rewards/margins": 0.15258312225341797, "rewards/rejected": -0.1526009887456894, "step": 11980 }, { "epoch": 8.285615491009683, "grad_norm": 2.8518567085266113, "learning_rate": 9.524358383279546e-06, "log_odds_chosen": 11.519308090209961, "log_odds_ratio": -2.236364525742829e-05, "logits/chosen": -0.3141467571258545, "logits/rejected": -0.3175681531429291, "logps/chosen": -0.00032692833337932825, "logps/rejected": -2.522468090057373, "loss": 0.3041, "nll_loss": 0.07602708041667938, "rewards/accuracies": 1.0, "rewards/chosen": -3.269283479312435e-05, "rewards/margins": 0.25221410393714905, "rewards/rejected": -0.25224679708480835, "step": 11981 }, { "epoch": 8.28630705394191, "grad_norm": 5.877493858337402, "learning_rate": 9.520516366989396e-06, "log_odds_chosen": 11.447668075561523, "log_odds_ratio": -2.5962668587453663e-05, "logits/chosen": -0.521180272102356, "logits/rejected": -0.587950587272644, "logps/chosen": -0.0001497505436418578, "logps/rejected": -2.5814712047576904, "loss": 0.4525, "nll_loss": 0.11313392221927643, "rewards/accuracies": 1.0, "rewards/chosen": -1.4975053090893198e-05, "rewards/margins": 0.2581321597099304, "rewards/rejected": -0.25814712047576904, "step": 11982 }, { "epoch": 8.286998616874136, "grad_norm": 4.329975605010986, "learning_rate": 9.516674350699247e-06, "log_odds_chosen": 11.1446533203125, "log_odds_ratio": -4.236675886204466e-05, "logits/chosen": -0.2646929919719696, "logits/rejected": -0.2811059355735779, "logps/chosen": -0.00033514917595312, "logps/rejected": -2.479694128036499, "loss": 0.3655, "nll_loss": 0.09138160943984985, "rewards/accuracies": 1.0, "rewards/chosen": -3.3514916140120476e-05, "rewards/margins": 0.24793589115142822, "rewards/rejected": -0.24796943366527557, "step": 11983 }, { "epoch": 8.287690179806363, "grad_norm": 3.3373169898986816, "learning_rate": 9.512832334409098e-06, "log_odds_chosen": 11.226622581481934, "log_odds_ratio": -0.00012851627252530307, "logits/chosen": -0.06570761650800705, "logits/rejected": -0.13167516887187958, "logps/chosen": -0.0010037928586825728, "logps/rejected": -2.534026861190796, "loss": 0.3234, "nll_loss": 0.08082625269889832, "rewards/accuracies": 1.0, "rewards/chosen": -0.0001003792931442149, "rewards/margins": 0.25330230593681335, "rewards/rejected": -0.2534027099609375, "step": 11984 }, { "epoch": 8.28838174273859, "grad_norm": 2.9590377807617188, "learning_rate": 9.508990318118949e-06, "log_odds_chosen": 9.680686950683594, "log_odds_ratio": -0.00016874133143574, "logits/chosen": -0.16577889025211334, "logits/rejected": -0.15216651558876038, "logps/chosen": -0.00026318003074266016, "logps/rejected": -1.310761570930481, "loss": 0.3329, "nll_loss": 0.08320856094360352, "rewards/accuracies": 1.0, "rewards/chosen": -2.631800452945754e-05, "rewards/margins": 0.13104984164237976, "rewards/rejected": -0.1310761570930481, "step": 11985 }, { "epoch": 8.289073305670817, "grad_norm": 5.105093955993652, "learning_rate": 9.505148301828801e-06, "log_odds_chosen": 10.774724960327148, "log_odds_ratio": -0.0006012396770529449, "logits/chosen": -0.34266242384910583, "logits/rejected": -0.4288986623287201, "logps/chosen": -0.00046030114754103124, "logps/rejected": -2.547994613647461, "loss": 0.4747, "nll_loss": 0.11861294507980347, "rewards/accuracies": 1.0, "rewards/chosen": -4.6030119847273454e-05, "rewards/margins": 0.2547534108161926, "rewards/rejected": -0.2547994554042816, "step": 11986 }, { "epoch": 8.289764868603044, "grad_norm": 3.635981798171997, "learning_rate": 9.501306285538652e-06, "log_odds_chosen": 12.766622543334961, "log_odds_ratio": -1.744990368024446e-05, "logits/chosen": -0.020735815167427063, "logits/rejected": -0.05121392011642456, "logps/chosen": -0.00016500083438586444, "logps/rejected": -3.8408167362213135, "loss": 0.3746, "nll_loss": 0.09365012496709824, "rewards/accuracies": 1.0, "rewards/chosen": -1.6500083802384324e-05, "rewards/margins": 0.384065181016922, "rewards/rejected": -0.3840816915035248, "step": 11987 }, { "epoch": 8.29045643153527, "grad_norm": 3.4699249267578125, "learning_rate": 9.4974642692485e-06, "log_odds_chosen": 10.973389625549316, "log_odds_ratio": -4.201291085337289e-05, "logits/chosen": -0.5190303325653076, "logits/rejected": -0.5497217178344727, "logps/chosen": -0.0002159866999136284, "logps/rejected": -1.9042290449142456, "loss": 0.3383, "nll_loss": 0.08458288013935089, "rewards/accuracies": 1.0, "rewards/chosen": -2.1598672901745886e-05, "rewards/margins": 0.19040131568908691, "rewards/rejected": -0.190422922372818, "step": 11988 }, { "epoch": 8.291147994467497, "grad_norm": 3.639089345932007, "learning_rate": 9.493622252958353e-06, "log_odds_chosen": 10.550485610961914, "log_odds_ratio": -0.0005758869810961187, "logits/chosen": -0.05049346387386322, "logits/rejected": -0.134282186627388, "logps/chosen": -0.0009018158307299018, "logps/rejected": -2.437182903289795, "loss": 0.469, "nll_loss": 0.11719910055398941, "rewards/accuracies": 1.0, "rewards/chosen": -9.018159471452236e-05, "rewards/margins": 0.24362812936306, "rewards/rejected": -0.24371829628944397, "step": 11989 }, { "epoch": 8.291839557399724, "grad_norm": 5.109975814819336, "learning_rate": 9.489780236668204e-06, "log_odds_chosen": 11.503591537475586, "log_odds_ratio": -7.810613897163421e-05, "logits/chosen": -0.27875831723213196, "logits/rejected": -0.29320570826530457, "logps/chosen": -0.0001523328828625381, "logps/rejected": -2.7631423473358154, "loss": 0.3234, "nll_loss": 0.08085079491138458, "rewards/accuracies": 1.0, "rewards/chosen": -1.5233287740556989e-05, "rewards/margins": 0.27629899978637695, "rewards/rejected": -0.27631425857543945, "step": 11990 }, { "epoch": 8.292531120331951, "grad_norm": 8.194153785705566, "learning_rate": 9.485938220378055e-06, "log_odds_chosen": 11.782495498657227, "log_odds_ratio": -2.081832462863531e-05, "logits/chosen": -0.14003007113933563, "logits/rejected": -0.1627955138683319, "logps/chosen": -0.00012124376371502876, "logps/rejected": -2.629612922668457, "loss": 0.4105, "nll_loss": 0.1026250571012497, "rewards/accuracies": 1.0, "rewards/chosen": -1.2124375643907115e-05, "rewards/margins": 0.2629491686820984, "rewards/rejected": -0.2629612982273102, "step": 11991 }, { "epoch": 8.293222683264178, "grad_norm": 11.853130340576172, "learning_rate": 9.482096204087906e-06, "log_odds_chosen": 10.236540794372559, "log_odds_ratio": -0.00012168378452770412, "logits/chosen": -0.143966943025589, "logits/rejected": -0.17248134315013885, "logps/chosen": -0.00038400760968215764, "logps/rejected": -1.9316377639770508, "loss": 0.5801, "nll_loss": 0.1450154036283493, "rewards/accuracies": 1.0, "rewards/chosen": -3.8400761695811525e-05, "rewards/margins": 0.19312536716461182, "rewards/rejected": -0.19316376745700836, "step": 11992 }, { "epoch": 8.293914246196405, "grad_norm": 6.01344108581543, "learning_rate": 9.478254187797756e-06, "log_odds_chosen": 10.616826057434082, "log_odds_ratio": -0.0004952213494107127, "logits/chosen": 0.25140225887298584, "logits/rejected": 0.23632827401161194, "logps/chosen": -0.0021340660750865936, "logps/rejected": -2.39619779586792, "loss": 0.6051, "nll_loss": 0.1512296199798584, "rewards/accuracies": 1.0, "rewards/chosen": -0.00021340660168789327, "rewards/margins": 0.23940636217594147, "rewards/rejected": -0.23961979150772095, "step": 11993 }, { "epoch": 8.294605809128631, "grad_norm": 3.8722987174987793, "learning_rate": 9.474412171507607e-06, "log_odds_chosen": 11.499201774597168, "log_odds_ratio": -1.2345096365606878e-05, "logits/chosen": 0.2737366557121277, "logits/rejected": 0.10590272396802902, "logps/chosen": -0.00012341570982243866, "logps/rejected": -2.270833969116211, "loss": 0.9109, "nll_loss": 0.22772641479969025, "rewards/accuracies": 1.0, "rewards/chosen": -1.2341570254648104e-05, "rewards/margins": 0.22707104682922363, "rewards/rejected": -0.22708338499069214, "step": 11994 }, { "epoch": 8.295297372060858, "grad_norm": 4.4436492919921875, "learning_rate": 9.47057015521746e-06, "log_odds_chosen": 10.22150707244873, "log_odds_ratio": -0.00022509277914650738, "logits/chosen": 0.5555198192596436, "logits/rejected": 0.45036596059799194, "logps/chosen": -0.0005974264349788427, "logps/rejected": -1.5443403720855713, "loss": 0.3662, "nll_loss": 0.09152275323867798, "rewards/accuracies": 1.0, "rewards/chosen": -5.974264786345884e-05, "rewards/margins": 0.15437428653240204, "rewards/rejected": -0.15443404018878937, "step": 11995 }, { "epoch": 8.295988934993085, "grad_norm": 3.988182783126831, "learning_rate": 9.46672813892731e-06, "log_odds_chosen": 11.658870697021484, "log_odds_ratio": -1.8149998140870593e-05, "logits/chosen": 0.2656916379928589, "logits/rejected": 0.17560166120529175, "logps/chosen": -0.0001451415300834924, "logps/rejected": -2.2492456436157227, "loss": 0.4222, "nll_loss": 0.10554390400648117, "rewards/accuracies": 1.0, "rewards/chosen": -1.451415300834924e-05, "rewards/margins": 0.22491006553173065, "rewards/rejected": -0.22492457926273346, "step": 11996 }, { "epoch": 8.296680497925312, "grad_norm": 4.026516914367676, "learning_rate": 9.462886122637161e-06, "log_odds_chosen": 11.03490924835205, "log_odds_ratio": -5.12512560817413e-05, "logits/chosen": 0.1875906139612198, "logits/rejected": -0.0007145889103412628, "logps/chosen": -0.00017578538972884417, "logps/rejected": -2.1576361656188965, "loss": 0.4374, "nll_loss": 0.10933278501033783, "rewards/accuracies": 1.0, "rewards/chosen": -1.7578538972884417e-05, "rewards/margins": 0.21574603021144867, "rewards/rejected": -0.2157636135816574, "step": 11997 }, { "epoch": 8.297372060857539, "grad_norm": 3.844303846359253, "learning_rate": 9.459044106347012e-06, "log_odds_chosen": 11.463105201721191, "log_odds_ratio": -3.170102718286216e-05, "logits/chosen": -0.10093332827091217, "logits/rejected": -0.18920347094535828, "logps/chosen": -0.0001887732360046357, "logps/rejected": -2.466240882873535, "loss": 0.3886, "nll_loss": 0.09715703874826431, "rewards/accuracies": 1.0, "rewards/chosen": -1.8877322872867808e-05, "rewards/margins": 0.24660521745681763, "rewards/rejected": -0.24662411212921143, "step": 11998 }, { "epoch": 8.298063623789766, "grad_norm": 4.110239028930664, "learning_rate": 9.455202090056862e-06, "log_odds_chosen": 11.454631805419922, "log_odds_ratio": -1.3583087820734363e-05, "logits/chosen": -0.5618329644203186, "logits/rejected": -0.5980328917503357, "logps/chosen": -6.422075966838747e-05, "logps/rejected": -1.8239201307296753, "loss": 0.3823, "nll_loss": 0.09557496011257172, "rewards/accuracies": 1.0, "rewards/chosen": -6.422075784939807e-06, "rewards/margins": 0.18238559365272522, "rewards/rejected": -0.18239201605319977, "step": 11999 }, { "epoch": 8.298755186721992, "grad_norm": 2.6601874828338623, "learning_rate": 9.451360073766713e-06, "log_odds_chosen": 9.818875312805176, "log_odds_ratio": -0.00027578973094932735, "logits/chosen": -0.2035187929868698, "logits/rejected": -0.24134671688079834, "logps/chosen": -0.0003357850364409387, "logps/rejected": -1.7158150672912598, "loss": 0.2818, "nll_loss": 0.07043261080980301, "rewards/accuracies": 1.0, "rewards/chosen": -3.357850437168963e-05, "rewards/margins": 0.17154793441295624, "rewards/rejected": -0.17158152163028717, "step": 12000 }, { "epoch": 8.29944674965422, "grad_norm": 3.5226309299468994, "learning_rate": 9.447518057476566e-06, "log_odds_chosen": 11.647101402282715, "log_odds_ratio": -3.9287675463128835e-05, "logits/chosen": -0.18191269040107727, "logits/rejected": -0.2964354455471039, "logps/chosen": -0.00018018539412878454, "logps/rejected": -2.9349350929260254, "loss": 0.4718, "nll_loss": 0.11795572191476822, "rewards/accuracies": 1.0, "rewards/chosen": -1.8018541595665738e-05, "rewards/margins": 0.29347550868988037, "rewards/rejected": -0.29349350929260254, "step": 12001 }, { "epoch": 8.300138312586446, "grad_norm": 4.153374195098877, "learning_rate": 9.443676041186415e-06, "log_odds_chosen": 10.61652946472168, "log_odds_ratio": -0.0006325379363261163, "logits/chosen": -0.34415724873542786, "logits/rejected": -0.33158639073371887, "logps/chosen": -0.00023147767933551222, "logps/rejected": -1.5006442070007324, "loss": 0.3987, "nll_loss": 0.09960442036390305, "rewards/accuracies": 1.0, "rewards/chosen": -2.3147767933551222e-05, "rewards/margins": 0.15004128217697144, "rewards/rejected": -0.15006442368030548, "step": 12002 }, { "epoch": 8.300829875518673, "grad_norm": 4.306608200073242, "learning_rate": 9.439834024896265e-06, "log_odds_chosen": 11.898919105529785, "log_odds_ratio": -1.5399793483084068e-05, "logits/chosen": -0.06470970064401627, "logits/rejected": -0.22172331809997559, "logps/chosen": -6.676278280792758e-05, "logps/rejected": -2.223916530609131, "loss": 0.3876, "nll_loss": 0.09691078960895538, "rewards/accuracies": 1.0, "rewards/chosen": -6.676278644590639e-06, "rewards/margins": 0.2223849892616272, "rewards/rejected": -0.22239167988300323, "step": 12003 }, { "epoch": 8.3015214384509, "grad_norm": 7.656002521514893, "learning_rate": 9.435992008606116e-06, "log_odds_chosen": 12.206582069396973, "log_odds_ratio": -0.00027584240888245404, "logits/chosen": -0.37336617708206177, "logits/rejected": -0.26509660482406616, "logps/chosen": -0.00016218278324231505, "logps/rejected": -3.6707310676574707, "loss": 0.6351, "nll_loss": 0.15875668823719025, "rewards/accuracies": 1.0, "rewards/chosen": -1.6218276869039983e-05, "rewards/margins": 0.3670569062232971, "rewards/rejected": -0.36707305908203125, "step": 12004 }, { "epoch": 8.302213001383127, "grad_norm": 4.507673263549805, "learning_rate": 9.432149992315969e-06, "log_odds_chosen": 11.556356430053711, "log_odds_ratio": -0.00019117892952635884, "logits/chosen": -0.03574218600988388, "logits/rejected": -0.04328570514917374, "logps/chosen": -0.0006461284938268363, "logps/rejected": -2.985804319381714, "loss": 0.6046, "nll_loss": 0.1511344313621521, "rewards/accuracies": 1.0, "rewards/chosen": -6.46128537482582e-05, "rewards/margins": 0.29851582646369934, "rewards/rejected": -0.2985804080963135, "step": 12005 }, { "epoch": 8.302904564315353, "grad_norm": 6.191859245300293, "learning_rate": 9.42830797602582e-06, "log_odds_chosen": 11.82589340209961, "log_odds_ratio": -1.3909155313740484e-05, "logits/chosen": -0.29082420468330383, "logits/rejected": -0.13369691371917725, "logps/chosen": -0.00014884091797284782, "logps/rejected": -2.8574018478393555, "loss": 0.5995, "nll_loss": 0.1498684287071228, "rewards/accuracies": 1.0, "rewards/chosen": -1.4884091797284782e-05, "rewards/margins": 0.28572532534599304, "rewards/rejected": -0.2857401967048645, "step": 12006 }, { "epoch": 8.30359612724758, "grad_norm": 5.1775946617126465, "learning_rate": 9.424465959735668e-06, "log_odds_chosen": 10.76055908203125, "log_odds_ratio": -2.980104909511283e-05, "logits/chosen": -0.07235311716794968, "logits/rejected": -0.1714707314968109, "logps/chosen": -0.0002315481542609632, "logps/rejected": -1.8887486457824707, "loss": 0.6075, "nll_loss": 0.1518632173538208, "rewards/accuracies": 1.0, "rewards/chosen": -2.3154816517489962e-05, "rewards/margins": 0.1888517141342163, "rewards/rejected": -0.18887485563755035, "step": 12007 }, { "epoch": 8.304287690179807, "grad_norm": 5.869711875915527, "learning_rate": 9.420623943445521e-06, "log_odds_chosen": 11.506105422973633, "log_odds_ratio": -1.4619786270486657e-05, "logits/chosen": -0.4481998085975647, "logits/rejected": -0.4345892667770386, "logps/chosen": -5.6978064094437286e-05, "logps/rejected": -1.6851485967636108, "loss": 0.3266, "nll_loss": 0.0816439688205719, "rewards/accuracies": 1.0, "rewards/chosen": -5.697806500393199e-06, "rewards/margins": 0.16850917041301727, "rewards/rejected": -0.16851487755775452, "step": 12008 }, { "epoch": 8.304979253112034, "grad_norm": 4.347225189208984, "learning_rate": 9.416781927155372e-06, "log_odds_chosen": 11.164071083068848, "log_odds_ratio": -0.00010318847489543259, "logits/chosen": -0.27535247802734375, "logits/rejected": -0.3589295744895935, "logps/chosen": -0.00023351672280114144, "logps/rejected": -2.745594024658203, "loss": 0.674, "nll_loss": 0.16848233342170715, "rewards/accuracies": 1.0, "rewards/chosen": -2.3351673007709906e-05, "rewards/margins": 0.2745360732078552, "rewards/rejected": -0.2745594382286072, "step": 12009 }, { "epoch": 8.30567081604426, "grad_norm": 4.195399284362793, "learning_rate": 9.412939910865222e-06, "log_odds_chosen": 11.876922607421875, "log_odds_ratio": -1.9107839762000367e-05, "logits/chosen": -0.33225470781326294, "logits/rejected": -0.32006558775901794, "logps/chosen": -9.441829752177e-05, "logps/rejected": -2.646901845932007, "loss": 0.5167, "nll_loss": 0.1291753351688385, "rewards/accuracies": 1.0, "rewards/chosen": -9.44183011597488e-06, "rewards/margins": 0.26468074321746826, "rewards/rejected": -0.26469019055366516, "step": 12010 }, { "epoch": 8.306362378976488, "grad_norm": 3.7133564949035645, "learning_rate": 9.409097894575073e-06, "log_odds_chosen": 9.273460388183594, "log_odds_ratio": -0.0001292635570280254, "logits/chosen": -0.6664369106292725, "logits/rejected": -0.6849371194839478, "logps/chosen": -0.0003915390116162598, "logps/rejected": -1.3386191129684448, "loss": 0.3377, "nll_loss": 0.0844026431441307, "rewards/accuracies": 1.0, "rewards/chosen": -3.9153903344413266e-05, "rewards/margins": 0.1338227391242981, "rewards/rejected": -0.13386189937591553, "step": 12011 }, { "epoch": 8.307053941908714, "grad_norm": 8.380539894104004, "learning_rate": 9.405255878284924e-06, "log_odds_chosen": 10.739407539367676, "log_odds_ratio": -9.787101589608938e-05, "logits/chosen": -0.2542285919189453, "logits/rejected": -0.3180106282234192, "logps/chosen": -0.0004297649720683694, "logps/rejected": -2.015817403793335, "loss": 0.3486, "nll_loss": 0.08714074641466141, "rewards/accuracies": 1.0, "rewards/chosen": -4.297649866202846e-05, "rewards/margins": 0.20153875648975372, "rewards/rejected": -0.20158173143863678, "step": 12012 }, { "epoch": 8.307745504840941, "grad_norm": 3.912259101867676, "learning_rate": 9.401413861994775e-06, "log_odds_chosen": 11.8102445602417, "log_odds_ratio": -4.580942550092004e-05, "logits/chosen": -0.34821221232414246, "logits/rejected": -0.39427390694618225, "logps/chosen": -0.00019697409879881889, "logps/rejected": -2.733593463897705, "loss": 0.3726, "nll_loss": 0.09314112365245819, "rewards/accuracies": 1.0, "rewards/chosen": -1.9697408788488247e-05, "rewards/margins": 0.2733396887779236, "rewards/rejected": -0.27335938811302185, "step": 12013 }, { "epoch": 8.308437067773168, "grad_norm": 3.243351459503174, "learning_rate": 9.397571845704627e-06, "log_odds_chosen": 10.895149230957031, "log_odds_ratio": -0.0001612679334357381, "logits/chosen": -0.4643056392669678, "logits/rejected": -0.5565376281738281, "logps/chosen": -0.00031255075009539723, "logps/rejected": -2.2396113872528076, "loss": 0.7204, "nll_loss": 0.18007400631904602, "rewards/accuracies": 1.0, "rewards/chosen": -3.1255076464731246e-05, "rewards/margins": 0.22392991185188293, "rewards/rejected": -0.22396114468574524, "step": 12014 }, { "epoch": 8.309128630705395, "grad_norm": 3.78821063041687, "learning_rate": 9.393729829414478e-06, "log_odds_chosen": 9.914217948913574, "log_odds_ratio": -0.00014684694178868085, "logits/chosen": -0.14503690600395203, "logits/rejected": -0.16446837782859802, "logps/chosen": -0.00018237555923406035, "logps/rejected": -1.2952816486358643, "loss": 0.7321, "nll_loss": 0.18300293385982513, "rewards/accuracies": 1.0, "rewards/chosen": -1.8237555195810273e-05, "rewards/margins": 0.12950992584228516, "rewards/rejected": -0.12952816486358643, "step": 12015 }, { "epoch": 8.309820193637622, "grad_norm": 4.330719470977783, "learning_rate": 9.389887813124327e-06, "log_odds_chosen": 11.473482131958008, "log_odds_ratio": -0.0002337690384592861, "logits/chosen": 0.139499232172966, "logits/rejected": 0.1419997215270996, "logps/chosen": -0.0003186473622918129, "logps/rejected": -2.680612564086914, "loss": 0.4573, "nll_loss": 0.11430097371339798, "rewards/accuracies": 1.0, "rewards/chosen": -3.186473622918129e-05, "rewards/margins": 0.2680293917655945, "rewards/rejected": -0.2680612802505493, "step": 12016 }, { "epoch": 8.310511756569849, "grad_norm": 3.5915465354919434, "learning_rate": 9.38604579683418e-06, "log_odds_chosen": 10.803346633911133, "log_odds_ratio": -3.7507430533878505e-05, "logits/chosen": -0.41293051838874817, "logits/rejected": -0.4520162343978882, "logps/chosen": -0.0001572552282596007, "logps/rejected": -1.758723258972168, "loss": 0.4038, "nll_loss": 0.10093595087528229, "rewards/accuracies": 1.0, "rewards/chosen": -1.572552355355583e-05, "rewards/margins": 0.17585662007331848, "rewards/rejected": -0.1758723258972168, "step": 12017 }, { "epoch": 8.311203319502075, "grad_norm": 5.969757556915283, "learning_rate": 9.38220378054403e-06, "log_odds_chosen": 11.04709529876709, "log_odds_ratio": -0.00011035658826585859, "logits/chosen": -0.6205735802650452, "logits/rejected": -0.8208602070808411, "logps/chosen": -0.00020124486763961613, "logps/rejected": -2.5319573879241943, "loss": 0.3032, "nll_loss": 0.07580114901065826, "rewards/accuracies": 1.0, "rewards/chosen": -2.0124487491557375e-05, "rewards/margins": 0.25317561626434326, "rewards/rejected": -0.25319573283195496, "step": 12018 }, { "epoch": 8.311894882434302, "grad_norm": 5.246040344238281, "learning_rate": 9.37836176425388e-06, "log_odds_chosen": 11.330859184265137, "log_odds_ratio": -2.4554348783567548e-05, "logits/chosen": -0.47599172592163086, "logits/rejected": -0.4993157386779785, "logps/chosen": -0.0002337495534447953, "logps/rejected": -2.5536375045776367, "loss": 0.5799, "nll_loss": 0.144969642162323, "rewards/accuracies": 1.0, "rewards/chosen": -2.3374956072075292e-05, "rewards/margins": 0.2553403973579407, "rewards/rejected": -0.2553637623786926, "step": 12019 }, { "epoch": 8.312586445366529, "grad_norm": 4.8827714920043945, "learning_rate": 9.374519747963732e-06, "log_odds_chosen": 10.16954231262207, "log_odds_ratio": -0.00012112972763134167, "logits/chosen": -0.686266303062439, "logits/rejected": -0.7174581289291382, "logps/chosen": -0.00019134815374854952, "logps/rejected": -1.2021300792694092, "loss": 0.2649, "nll_loss": 0.06621464341878891, "rewards/accuracies": 1.0, "rewards/chosen": -1.9134817193844356e-05, "rewards/margins": 0.12019386887550354, "rewards/rejected": -0.12021300196647644, "step": 12020 }, { "epoch": 8.313278008298756, "grad_norm": 5.487241744995117, "learning_rate": 9.370677731673582e-06, "log_odds_chosen": 10.528274536132812, "log_odds_ratio": -4.080952930962667e-05, "logits/chosen": -0.6552819609642029, "logits/rejected": -0.6959894299507141, "logps/chosen": -0.00021928038040641695, "logps/rejected": -1.752156376838684, "loss": 0.3278, "nll_loss": 0.08195218443870544, "rewards/accuracies": 1.0, "rewards/chosen": -2.1928039132035337e-05, "rewards/margins": 0.17519372701644897, "rewards/rejected": -0.17521564662456512, "step": 12021 }, { "epoch": 8.313969571230983, "grad_norm": 8.090567588806152, "learning_rate": 9.366835715383433e-06, "log_odds_chosen": 10.119335174560547, "log_odds_ratio": -0.00010075949830934405, "logits/chosen": -0.4302787184715271, "logits/rejected": -0.5251643061637878, "logps/chosen": -0.00014466950960922986, "logps/rejected": -1.6406134366989136, "loss": 0.6859, "nll_loss": 0.17146024107933044, "rewards/accuracies": 1.0, "rewards/chosen": -1.4466952052316628e-05, "rewards/margins": 0.16404689848423004, "rewards/rejected": -0.16406136751174927, "step": 12022 }, { "epoch": 8.31466113416321, "grad_norm": 6.585088729858398, "learning_rate": 9.362993699093286e-06, "log_odds_chosen": 10.886041641235352, "log_odds_ratio": -0.0005051199696026742, "logits/chosen": -0.18099522590637207, "logits/rejected": -0.47358912229537964, "logps/chosen": -0.000296997808618471, "logps/rejected": -2.187826633453369, "loss": 0.6549, "nll_loss": 0.16368287801742554, "rewards/accuracies": 1.0, "rewards/chosen": -2.9699782317038625e-05, "rewards/margins": 0.21875298023223877, "rewards/rejected": -0.2187826931476593, "step": 12023 }, { "epoch": 8.315352697095436, "grad_norm": 3.961249828338623, "learning_rate": 9.359151682803136e-06, "log_odds_chosen": 11.450935363769531, "log_odds_ratio": -2.176938869524747e-05, "logits/chosen": -0.3161793351173401, "logits/rejected": -0.284781277179718, "logps/chosen": -0.00032644631573930383, "logps/rejected": -2.914153814315796, "loss": 0.5379, "nll_loss": 0.1344832479953766, "rewards/accuracies": 1.0, "rewards/chosen": -3.264463157393038e-05, "rewards/margins": 0.29138273000717163, "rewards/rejected": -0.29141539335250854, "step": 12024 }, { "epoch": 8.316044260027663, "grad_norm": 4.829967975616455, "learning_rate": 9.355309666512985e-06, "log_odds_chosen": 11.148256301879883, "log_odds_ratio": -3.4924247302114964e-05, "logits/chosen": 0.11780108511447906, "logits/rejected": 0.09679309278726578, "logps/chosen": -0.0003267015272285789, "logps/rejected": -2.400546073913574, "loss": 0.4474, "nll_loss": 0.11185356974601746, "rewards/accuracies": 1.0, "rewards/chosen": -3.267015563324094e-05, "rewards/margins": 0.24002191424369812, "rewards/rejected": -0.24005459249019623, "step": 12025 }, { "epoch": 8.31673582295989, "grad_norm": 2.4630846977233887, "learning_rate": 9.351467650222838e-06, "log_odds_chosen": 10.963375091552734, "log_odds_ratio": -0.000224971110583283, "logits/chosen": -0.5358527898788452, "logits/rejected": -0.5666942596435547, "logps/chosen": -0.0004893578588962555, "logps/rejected": -2.31632137298584, "loss": 0.3522, "nll_loss": 0.08801663666963577, "rewards/accuracies": 1.0, "rewards/chosen": -4.8935784434434026e-05, "rewards/margins": 0.23158320784568787, "rewards/rejected": -0.23163215816020966, "step": 12026 }, { "epoch": 8.317427385892117, "grad_norm": 5.256807804107666, "learning_rate": 9.347625633932689e-06, "log_odds_chosen": 11.870455741882324, "log_odds_ratio": -1.347633315162966e-05, "logits/chosen": -0.4692208468914032, "logits/rejected": -0.47148019075393677, "logps/chosen": -8.793309825705364e-05, "logps/rejected": -2.2496893405914307, "loss": 0.3668, "nll_loss": 0.09170769155025482, "rewards/accuracies": 1.0, "rewards/chosen": -8.793309461907484e-06, "rewards/margins": 0.22496017813682556, "rewards/rejected": -0.22496894001960754, "step": 12027 }, { "epoch": 8.318118948824344, "grad_norm": 3.6092746257781982, "learning_rate": 9.34378361764254e-06, "log_odds_chosen": 11.170661926269531, "log_odds_ratio": -3.2713978725951165e-05, "logits/chosen": -0.5941171050071716, "logits/rejected": -0.7027862071990967, "logps/chosen": -0.00018664052186068147, "logps/rejected": -2.2522315979003906, "loss": 0.4077, "nll_loss": 0.10191075503826141, "rewards/accuracies": 1.0, "rewards/chosen": -1.8664051822270267e-05, "rewards/margins": 0.2252044975757599, "rewards/rejected": -0.22522315382957458, "step": 12028 }, { "epoch": 8.31881051175657, "grad_norm": 3.9987618923187256, "learning_rate": 9.33994160135239e-06, "log_odds_chosen": 11.089741706848145, "log_odds_ratio": -7.078353519318625e-05, "logits/chosen": 0.07066502422094345, "logits/rejected": 0.05900406092405319, "logps/chosen": -0.0052088359370827675, "logps/rejected": -2.619210720062256, "loss": 0.4813, "nll_loss": 0.12032265961170197, "rewards/accuracies": 1.0, "rewards/chosen": -0.0005208835937082767, "rewards/margins": 0.2614002227783203, "rewards/rejected": -0.26192110776901245, "step": 12029 }, { "epoch": 8.319502074688797, "grad_norm": 3.9945900440216064, "learning_rate": 9.33609958506224e-06, "log_odds_chosen": 11.662487030029297, "log_odds_ratio": -2.972657239297405e-05, "logits/chosen": -0.2970031201839447, "logits/rejected": -0.3440643548965454, "logps/chosen": -0.0003341581905260682, "logps/rejected": -2.6654715538024902, "loss": 0.4138, "nll_loss": 0.10345922410488129, "rewards/accuracies": 1.0, "rewards/chosen": -3.34158175974153e-05, "rewards/margins": 0.26651373505592346, "rewards/rejected": -0.26654714345932007, "step": 12030 }, { "epoch": 8.320193637621024, "grad_norm": 4.668849468231201, "learning_rate": 9.332257568772092e-06, "log_odds_chosen": 10.378423690795898, "log_odds_ratio": -0.00013369829684961587, "logits/chosen": -0.42694246768951416, "logits/rejected": -0.46096310019493103, "logps/chosen": -0.0009023980237543583, "logps/rejected": -1.9191800355911255, "loss": 0.4479, "nll_loss": 0.11196404695510864, "rewards/accuracies": 1.0, "rewards/chosen": -9.023980237543583e-05, "rewards/margins": 0.19182777404785156, "rewards/rejected": -0.1919180154800415, "step": 12031 }, { "epoch": 8.320885200553251, "grad_norm": 3.3766138553619385, "learning_rate": 9.328415552481944e-06, "log_odds_chosen": 11.727277755737305, "log_odds_ratio": -3.624118471634574e-05, "logits/chosen": -0.2877175211906433, "logits/rejected": -0.28738832473754883, "logps/chosen": -0.00013960532669443637, "logps/rejected": -2.453561305999756, "loss": 0.3224, "nll_loss": 0.08059151470661163, "rewards/accuracies": 1.0, "rewards/chosen": -1.3960533578938339e-05, "rewards/margins": 0.24534213542938232, "rewards/rejected": -0.24535611271858215, "step": 12032 }, { "epoch": 8.321576763485478, "grad_norm": 4.295798301696777, "learning_rate": 9.324573536191795e-06, "log_odds_chosen": 10.742616653442383, "log_odds_ratio": -0.00027178574237041175, "logits/chosen": -0.2917238473892212, "logits/rejected": -0.33401742577552795, "logps/chosen": -0.0009895612020045519, "logps/rejected": -2.305877208709717, "loss": 0.3827, "nll_loss": 0.09564106166362762, "rewards/accuracies": 1.0, "rewards/chosen": -9.895613038679585e-05, "rewards/margins": 0.23048877716064453, "rewards/rejected": -0.23058775067329407, "step": 12033 }, { "epoch": 8.322268326417705, "grad_norm": 4.387558937072754, "learning_rate": 9.320731519901644e-06, "log_odds_chosen": 11.680660247802734, "log_odds_ratio": -1.2796385817637201e-05, "logits/chosen": -0.5847553014755249, "logits/rejected": -0.6642088890075684, "logps/chosen": -0.00010414030111860484, "logps/rejected": -2.4134974479675293, "loss": 0.5157, "nll_loss": 0.1289164125919342, "rewards/accuracies": 1.0, "rewards/chosen": -1.0414029020466842e-05, "rewards/margins": 0.24133934080600739, "rewards/rejected": -0.24134975671768188, "step": 12034 }, { "epoch": 8.322959889349931, "grad_norm": 7.636205673217773, "learning_rate": 9.316889503611496e-06, "log_odds_chosen": 11.007831573486328, "log_odds_ratio": -5.737036553910002e-05, "logits/chosen": -0.4497582018375397, "logits/rejected": -0.41020840406417847, "logps/chosen": -7.495034515159205e-05, "logps/rejected": -1.4946386814117432, "loss": 0.2984, "nll_loss": 0.0746031254529953, "rewards/accuracies": 1.0, "rewards/chosen": -7.495034878957085e-06, "rewards/margins": 0.14945638179779053, "rewards/rejected": -0.14946386218070984, "step": 12035 }, { "epoch": 8.323651452282158, "grad_norm": 4.195898532867432, "learning_rate": 9.313047487321347e-06, "log_odds_chosen": 11.578995704650879, "log_odds_ratio": -1.3797025530948304e-05, "logits/chosen": -0.23090381920337677, "logits/rejected": -0.2601828873157501, "logps/chosen": -8.795601752353832e-05, "logps/rejected": -2.304961681365967, "loss": 0.4116, "nll_loss": 0.10290920734405518, "rewards/accuracies": 1.0, "rewards/chosen": -8.795602298050653e-06, "rewards/margins": 0.2304873913526535, "rewards/rejected": -0.23049618303775787, "step": 12036 }, { "epoch": 8.324343015214385, "grad_norm": 5.19864559173584, "learning_rate": 9.309205471031198e-06, "log_odds_chosen": 11.066790580749512, "log_odds_ratio": -4.9645372200757265e-05, "logits/chosen": -0.3808850646018982, "logits/rejected": -0.4125874936580658, "logps/chosen": -0.00017530072364024818, "logps/rejected": -2.204151153564453, "loss": 0.8283, "nll_loss": 0.2070685774087906, "rewards/accuracies": 1.0, "rewards/chosen": -1.7530072000226937e-05, "rewards/margins": 0.22039756178855896, "rewards/rejected": -0.22041510045528412, "step": 12037 }, { "epoch": 8.325034578146612, "grad_norm": 3.293100118637085, "learning_rate": 9.305363454741048e-06, "log_odds_chosen": 11.432401657104492, "log_odds_ratio": -3.613935405155644e-05, "logits/chosen": -0.44712987542152405, "logits/rejected": -0.4820421636104584, "logps/chosen": -0.00016187304572667927, "logps/rejected": -2.1475753784179688, "loss": 0.348, "nll_loss": 0.08698876202106476, "rewards/accuracies": 1.0, "rewards/chosen": -1.6187303117476404e-05, "rewards/margins": 0.2147413194179535, "rewards/rejected": -0.2147575169801712, "step": 12038 }, { "epoch": 8.325726141078839, "grad_norm": 3.3491287231445312, "learning_rate": 9.3015214384509e-06, "log_odds_chosen": 11.12563705444336, "log_odds_ratio": -0.00013439056056085974, "logits/chosen": -0.35802096128463745, "logits/rejected": -0.316034197807312, "logps/chosen": -0.000603663909714669, "logps/rejected": -2.807803153991699, "loss": 0.8425, "nll_loss": 0.2106177806854248, "rewards/accuracies": 1.0, "rewards/chosen": -6.0366393881849945e-05, "rewards/margins": 0.28071993589401245, "rewards/rejected": -0.2807803452014923, "step": 12039 }, { "epoch": 8.326417704011066, "grad_norm": 5.166214942932129, "learning_rate": 9.29767942216075e-06, "log_odds_chosen": 10.950658798217773, "log_odds_ratio": -2.832374957506545e-05, "logits/chosen": -0.5255546569824219, "logits/rejected": -0.5025449395179749, "logps/chosen": -0.00038333734846673906, "logps/rejected": -2.604123115539551, "loss": 0.7587, "nll_loss": 0.1896737813949585, "rewards/accuracies": 1.0, "rewards/chosen": -3.833373193629086e-05, "rewards/margins": 0.2603739798069, "rewards/rejected": -0.260412335395813, "step": 12040 }, { "epoch": 8.327109266943292, "grad_norm": 3.9560062885284424, "learning_rate": 9.2938374058706e-06, "log_odds_chosen": 11.090538024902344, "log_odds_ratio": -7.66869488870725e-05, "logits/chosen": -0.6735488176345825, "logits/rejected": -0.6175107955932617, "logps/chosen": -0.00021816727530676872, "logps/rejected": -2.164520502090454, "loss": 0.4316, "nll_loss": 0.10789860039949417, "rewards/accuracies": 1.0, "rewards/chosen": -2.181672607548535e-05, "rewards/margins": 0.21643024682998657, "rewards/rejected": -0.21645204722881317, "step": 12041 }, { "epoch": 8.32780082987552, "grad_norm": 3.482357978820801, "learning_rate": 9.289995389580453e-06, "log_odds_chosen": 11.901061058044434, "log_odds_ratio": -2.272317578899674e-05, "logits/chosen": -0.21799947321414948, "logits/rejected": -0.3493354916572571, "logps/chosen": -0.0001254864619113505, "logps/rejected": -2.394380569458008, "loss": 0.3969, "nll_loss": 0.09922779351472855, "rewards/accuracies": 1.0, "rewards/chosen": -1.2548645827337168e-05, "rewards/margins": 0.23942552506923676, "rewards/rejected": -0.23943805694580078, "step": 12042 }, { "epoch": 8.328492392807746, "grad_norm": 12.327674865722656, "learning_rate": 9.286153373290304e-06, "log_odds_chosen": 10.347898483276367, "log_odds_ratio": -7.437378371832892e-05, "logits/chosen": -0.21823422610759735, "logits/rejected": -0.22773411870002747, "logps/chosen": -0.00033282540971413255, "logps/rejected": -1.9625000953674316, "loss": 0.3499, "nll_loss": 0.08747636526823044, "rewards/accuracies": 1.0, "rewards/chosen": -3.32825438817963e-05, "rewards/margins": 0.19621673226356506, "rewards/rejected": -0.19625002145767212, "step": 12043 }, { "epoch": 8.329183955739973, "grad_norm": 3.386772394180298, "learning_rate": 9.282311357000153e-06, "log_odds_chosen": 10.630339622497559, "log_odds_ratio": -0.0003122264170087874, "logits/chosen": -0.2610680162906647, "logits/rejected": -0.30401110649108887, "logps/chosen": -0.00017395528266206384, "logps/rejected": -2.0399298667907715, "loss": 0.3553, "nll_loss": 0.08878161013126373, "rewards/accuracies": 1.0, "rewards/chosen": -1.7395528630004264e-05, "rewards/margins": 0.2039755880832672, "rewards/rejected": -0.20399296283721924, "step": 12044 }, { "epoch": 8.3298755186722, "grad_norm": 4.99088191986084, "learning_rate": 9.278469340710005e-06, "log_odds_chosen": 10.918338775634766, "log_odds_ratio": -0.00010383578046457842, "logits/chosen": 0.016888275742530823, "logits/rejected": -0.03546109050512314, "logps/chosen": -0.00026136101223528385, "logps/rejected": -2.3230907917022705, "loss": 0.3535, "nll_loss": 0.08835355192422867, "rewards/accuracies": 1.0, "rewards/chosen": -2.6136101951124147e-05, "rewards/margins": 0.23228295147418976, "rewards/rejected": -0.23230908811092377, "step": 12045 }, { "epoch": 8.330567081604427, "grad_norm": 3.7360870838165283, "learning_rate": 9.274627324419856e-06, "log_odds_chosen": 10.23141098022461, "log_odds_ratio": -0.00010504803503863513, "logits/chosen": -0.5646274089813232, "logits/rejected": -0.5441697835922241, "logps/chosen": -0.00023329338000621647, "logps/rejected": -1.9220106601715088, "loss": 0.456, "nll_loss": 0.11398179829120636, "rewards/accuracies": 1.0, "rewards/chosen": -2.332933945581317e-05, "rewards/margins": 0.19217772781848907, "rewards/rejected": -0.19220104813575745, "step": 12046 }, { "epoch": 8.331258644536653, "grad_norm": 5.112025260925293, "learning_rate": 9.270785308129707e-06, "log_odds_chosen": 10.852737426757812, "log_odds_ratio": -0.0002522445283830166, "logits/chosen": -0.5581848621368408, "logits/rejected": -0.5673970580101013, "logps/chosen": -0.0002991710207425058, "logps/rejected": -2.4401395320892334, "loss": 0.556, "nll_loss": 0.13898655772209167, "rewards/accuracies": 1.0, "rewards/chosen": -2.9917100619059056e-05, "rewards/margins": 0.24398404359817505, "rewards/rejected": -0.2440139502286911, "step": 12047 }, { "epoch": 8.33195020746888, "grad_norm": 4.515886306762695, "learning_rate": 9.266943291839558e-06, "log_odds_chosen": 10.767580032348633, "log_odds_ratio": -7.623255078215152e-05, "logits/chosen": -0.25946271419525146, "logits/rejected": -0.3516313433647156, "logps/chosen": -0.00024921749718487263, "logps/rejected": -2.1053853034973145, "loss": 0.5781, "nll_loss": 0.14452941715717316, "rewards/accuracies": 1.0, "rewards/chosen": -2.492174826329574e-05, "rewards/margins": 0.21051359176635742, "rewards/rejected": -0.21053853631019592, "step": 12048 }, { "epoch": 8.332641770401107, "grad_norm": 5.479881763458252, "learning_rate": 9.263101275549408e-06, "log_odds_chosen": 10.361333847045898, "log_odds_ratio": -0.00015310835442505777, "logits/chosen": -0.43369877338409424, "logits/rejected": -0.3672742545604706, "logps/chosen": -0.0005247532390058041, "logps/rejected": -2.2179088592529297, "loss": 0.4675, "nll_loss": 0.11685863882303238, "rewards/accuracies": 1.0, "rewards/chosen": -5.247531953500584e-05, "rewards/margins": 0.22173842787742615, "rewards/rejected": -0.22179089486598969, "step": 12049 }, { "epoch": 8.333333333333334, "grad_norm": 4.073111534118652, "learning_rate": 9.259259259259259e-06, "log_odds_chosen": 11.245848655700684, "log_odds_ratio": -4.935469405609183e-05, "logits/chosen": 0.07112825661897659, "logits/rejected": 0.03870762512087822, "logps/chosen": -0.00018877757247537374, "logps/rejected": -2.5663628578186035, "loss": 0.4235, "nll_loss": 0.10587833821773529, "rewards/accuracies": 1.0, "rewards/chosen": -1.887775579234585e-05, "rewards/margins": 0.2566174268722534, "rewards/rejected": -0.25663629174232483, "step": 12050 }, { "epoch": 8.33402489626556, "grad_norm": 3.9776432514190674, "learning_rate": 9.255417242969112e-06, "log_odds_chosen": 11.619805335998535, "log_odds_ratio": -1.539092045277357e-05, "logits/chosen": -0.29793232679367065, "logits/rejected": -0.3029605746269226, "logps/chosen": -0.00016180785314645618, "logps/rejected": -2.5110225677490234, "loss": 0.3194, "nll_loss": 0.07984956353902817, "rewards/accuracies": 1.0, "rewards/chosen": -1.61807856784435e-05, "rewards/margins": 0.2510860562324524, "rewards/rejected": -0.2511022388935089, "step": 12051 }, { "epoch": 8.334716459197788, "grad_norm": 3.287205219268799, "learning_rate": 9.251575226678962e-06, "log_odds_chosen": 9.773767471313477, "log_odds_ratio": -0.0002140636497642845, "logits/chosen": -0.22501704096794128, "logits/rejected": -0.22273699939250946, "logps/chosen": -0.00026829185662791133, "logps/rejected": -1.6013988256454468, "loss": 0.3525, "nll_loss": 0.08809895068407059, "rewards/accuracies": 1.0, "rewards/chosen": -2.6829187845578417e-05, "rewards/margins": 0.16011305153369904, "rewards/rejected": -0.16013988852500916, "step": 12052 }, { "epoch": 8.335408022130014, "grad_norm": 3.6830883026123047, "learning_rate": 9.247733210388811e-06, "log_odds_chosen": 11.520283699035645, "log_odds_ratio": -1.649722980801016e-05, "logits/chosen": -0.09420540928840637, "logits/rejected": -0.09450474381446838, "logps/chosen": -0.0001776470453478396, "logps/rejected": -2.614943027496338, "loss": 0.44, "nll_loss": 0.11000753939151764, "rewards/accuracies": 1.0, "rewards/chosen": -1.776470526237972e-05, "rewards/margins": 0.2614765465259552, "rewards/rejected": -0.26149433851242065, "step": 12053 }, { "epoch": 8.336099585062241, "grad_norm": 3.466386318206787, "learning_rate": 9.243891194098664e-06, "log_odds_chosen": 11.200641632080078, "log_odds_ratio": -1.9975921532022767e-05, "logits/chosen": -0.3815682828426361, "logits/rejected": -0.4218658208847046, "logps/chosen": -0.00012150880502304062, "logps/rejected": -2.148578643798828, "loss": 0.3755, "nll_loss": 0.09387080371379852, "rewards/accuracies": 1.0, "rewards/chosen": -1.2150880138506182e-05, "rewards/margins": 0.21484573185443878, "rewards/rejected": -0.21485787630081177, "step": 12054 }, { "epoch": 8.336791147994468, "grad_norm": 8.70840835571289, "learning_rate": 9.240049177808515e-06, "log_odds_chosen": 10.134420394897461, "log_odds_ratio": -0.0004330216906964779, "logits/chosen": -0.3002817630767822, "logits/rejected": -0.43083691596984863, "logps/chosen": -0.000349003414157778, "logps/rejected": -1.3802651166915894, "loss": 0.3248, "nll_loss": 0.08115517348051071, "rewards/accuracies": 1.0, "rewards/chosen": -3.490033850539476e-05, "rewards/margins": 0.13799162209033966, "rewards/rejected": -0.13802652060985565, "step": 12055 }, { "epoch": 8.337482710926695, "grad_norm": 3.6529688835144043, "learning_rate": 9.236207161518365e-06, "log_odds_chosen": 9.931303024291992, "log_odds_ratio": -0.0006245232652872801, "logits/chosen": -0.2061011642217636, "logits/rejected": -0.22699670493602753, "logps/chosen": -0.0006724453414790332, "logps/rejected": -1.889383316040039, "loss": 0.5147, "nll_loss": 0.128619983792305, "rewards/accuracies": 1.0, "rewards/chosen": -6.724453851347789e-05, "rewards/margins": 0.1888710856437683, "rewards/rejected": -0.18893831968307495, "step": 12056 }, { "epoch": 8.338174273858922, "grad_norm": 4.977734565734863, "learning_rate": 9.232365145228216e-06, "log_odds_chosen": 11.125265121459961, "log_odds_ratio": -0.00022959726629778743, "logits/chosen": -0.04439122974872589, "logits/rejected": -0.02409200370311737, "logps/chosen": -0.00017756447778083384, "logps/rejected": -2.2842795848846436, "loss": 0.5022, "nll_loss": 0.12553563714027405, "rewards/accuracies": 1.0, "rewards/chosen": -1.7756448869477026e-05, "rewards/margins": 0.22841018438339233, "rewards/rejected": -0.2284279465675354, "step": 12057 }, { "epoch": 8.338865836791149, "grad_norm": 4.1805853843688965, "learning_rate": 9.228523128938067e-06, "log_odds_chosen": 11.242300987243652, "log_odds_ratio": -6.137518357718363e-05, "logits/chosen": -0.17572718858718872, "logits/rejected": -0.16212934255599976, "logps/chosen": -0.0001616746449144557, "logps/rejected": -2.141157865524292, "loss": 0.3299, "nll_loss": 0.08248007297515869, "rewards/accuracies": 1.0, "rewards/chosen": -1.6167465219041333e-05, "rewards/margins": 0.21409964561462402, "rewards/rejected": -0.21411579847335815, "step": 12058 }, { "epoch": 8.339557399723375, "grad_norm": 3.5841095447540283, "learning_rate": 9.224681112647918e-06, "log_odds_chosen": 11.254419326782227, "log_odds_ratio": -0.0021393040660768747, "logits/chosen": -0.2600412964820862, "logits/rejected": -0.31797146797180176, "logps/chosen": -0.016505086794495583, "logps/rejected": -3.2776453495025635, "loss": 0.4008, "nll_loss": 0.09999529272317886, "rewards/accuracies": 1.0, "rewards/chosen": -0.001650508726015687, "rewards/margins": 0.32611405849456787, "rewards/rejected": -0.3277645409107208, "step": 12059 }, { "epoch": 8.340248962655602, "grad_norm": 5.044835090637207, "learning_rate": 9.22083909635777e-06, "log_odds_chosen": 11.485102653503418, "log_odds_ratio": -1.8132033801521175e-05, "logits/chosen": -0.4359765648841858, "logits/rejected": -0.48265624046325684, "logps/chosen": -0.0002121072611771524, "logps/rejected": -2.7514710426330566, "loss": 0.4768, "nll_loss": 0.119210384786129, "rewards/accuracies": 1.0, "rewards/chosen": -2.121072611771524e-05, "rewards/margins": 0.2751259207725525, "rewards/rejected": -0.27514714002609253, "step": 12060 }, { "epoch": 8.340940525587829, "grad_norm": 5.355807304382324, "learning_rate": 9.21699708006762e-06, "log_odds_chosen": 11.519990921020508, "log_odds_ratio": -1.8206472304882482e-05, "logits/chosen": -0.1301870346069336, "logits/rejected": -0.20199596881866455, "logps/chosen": -0.00019549021089915186, "logps/rejected": -2.5161893367767334, "loss": 0.4364, "nll_loss": 0.10910181701183319, "rewards/accuracies": 1.0, "rewards/chosen": -1.954902290890459e-05, "rewards/margins": 0.25159937143325806, "rewards/rejected": -0.2516189217567444, "step": 12061 }, { "epoch": 8.341632088520056, "grad_norm": 5.413180828094482, "learning_rate": 9.21315506377747e-06, "log_odds_chosen": 10.790306091308594, "log_odds_ratio": -6.160003977129236e-05, "logits/chosen": -0.14251375198364258, "logits/rejected": -0.2059217244386673, "logps/chosen": -0.00035422618384473026, "logps/rejected": -2.222874164581299, "loss": 0.3951, "nll_loss": 0.09876702725887299, "rewards/accuracies": 1.0, "rewards/chosen": -3.542262129485607e-05, "rewards/margins": 0.2222519963979721, "rewards/rejected": -0.22228741645812988, "step": 12062 }, { "epoch": 8.342323651452283, "grad_norm": 3.652357339859009, "learning_rate": 9.209313047487322e-06, "log_odds_chosen": 10.979130744934082, "log_odds_ratio": -0.0004109518777113408, "logits/chosen": -0.09868557006120682, "logits/rejected": -0.17369824647903442, "logps/chosen": -0.0015244006644934416, "logps/rejected": -2.420994997024536, "loss": 0.4998, "nll_loss": 0.12490478903055191, "rewards/accuracies": 1.0, "rewards/chosen": -0.00015244007227011025, "rewards/margins": 0.24194706976413727, "rewards/rejected": -0.24209947884082794, "step": 12063 }, { "epoch": 8.34301521438451, "grad_norm": 3.9147958755493164, "learning_rate": 9.205471031197173e-06, "log_odds_chosen": 10.266362190246582, "log_odds_ratio": -0.00010355141421314329, "logits/chosen": -0.11190681904554367, "logits/rejected": -0.11449551582336426, "logps/chosen": -0.0005471897311508656, "logps/rejected": -2.3507325649261475, "loss": 0.4864, "nll_loss": 0.12158728390932083, "rewards/accuracies": 1.0, "rewards/chosen": -5.471897384268232e-05, "rewards/margins": 0.2350185364484787, "rewards/rejected": -0.2350732535123825, "step": 12064 }, { "epoch": 8.343706777316736, "grad_norm": 4.411325931549072, "learning_rate": 9.201629014907024e-06, "log_odds_chosen": 11.177576065063477, "log_odds_ratio": -3.77266296709422e-05, "logits/chosen": -0.574793815612793, "logits/rejected": -0.5641138553619385, "logps/chosen": -0.00012600421905517578, "logps/rejected": -1.910329818725586, "loss": 0.4758, "nll_loss": 0.11893565207719803, "rewards/accuracies": 1.0, "rewards/chosen": -1.26004224512144e-05, "rewards/margins": 0.19102038443088531, "rewards/rejected": -0.19103297591209412, "step": 12065 }, { "epoch": 8.344398340248963, "grad_norm": 6.895480632781982, "learning_rate": 9.197786998616875e-06, "log_odds_chosen": 11.2932710647583, "log_odds_ratio": -3.7191490264376625e-05, "logits/chosen": -0.4668288230895996, "logits/rejected": -0.6262189149856567, "logps/chosen": -0.00012896041152998805, "logps/rejected": -2.0346803665161133, "loss": 0.429, "nll_loss": 0.10724306106567383, "rewards/accuracies": 1.0, "rewards/chosen": -1.2896041880594566e-05, "rewards/margins": 0.2034551203250885, "rewards/rejected": -0.20346803963184357, "step": 12066 }, { "epoch": 8.34508990318119, "grad_norm": 3.7116177082061768, "learning_rate": 9.193944982326725e-06, "log_odds_chosen": 10.76824951171875, "log_odds_ratio": -7.783662294968963e-05, "logits/chosen": -0.4730428457260132, "logits/rejected": -0.5344309210777283, "logps/chosen": -0.00038928643334656954, "logps/rejected": -1.9175119400024414, "loss": 0.4823, "nll_loss": 0.12056785821914673, "rewards/accuracies": 1.0, "rewards/chosen": -3.8928643334656954e-05, "rewards/margins": 0.19171224534511566, "rewards/rejected": -0.19175118207931519, "step": 12067 }, { "epoch": 8.345781466113417, "grad_norm": 3.591780662536621, "learning_rate": 9.190102966036576e-06, "log_odds_chosen": 10.766321182250977, "log_odds_ratio": -4.374091440695338e-05, "logits/chosen": -0.4310253858566284, "logits/rejected": -0.47409093379974365, "logps/chosen": -0.00013906153617426753, "logps/rejected": -1.8614153861999512, "loss": 0.3285, "nll_loss": 0.08212421089410782, "rewards/accuracies": 1.0, "rewards/chosen": -1.3906153981224634e-05, "rewards/margins": 0.18612763285636902, "rewards/rejected": -0.18614153563976288, "step": 12068 }, { "epoch": 8.346473029045644, "grad_norm": 4.2191009521484375, "learning_rate": 9.186260949746428e-06, "log_odds_chosen": 11.604708671569824, "log_odds_ratio": -1.1452235412434675e-05, "logits/chosen": -0.24239104986190796, "logits/rejected": -0.2985646426677704, "logps/chosen": -0.00012208960833959281, "logps/rejected": -2.358849048614502, "loss": 0.368, "nll_loss": 0.09199665486812592, "rewards/accuracies": 1.0, "rewards/chosen": -1.22089604701614e-05, "rewards/margins": 0.23587268590927124, "rewards/rejected": -0.2358849048614502, "step": 12069 }, { "epoch": 8.34716459197787, "grad_norm": 2.925367593765259, "learning_rate": 9.18241893345628e-06, "log_odds_chosen": 10.485984802246094, "log_odds_ratio": -6.372129428200424e-05, "logits/chosen": -0.42998701333999634, "logits/rejected": -0.4588412344455719, "logps/chosen": -0.00015813851496204734, "logps/rejected": -1.2758333683013916, "loss": 0.3471, "nll_loss": 0.08675874769687653, "rewards/accuracies": 1.0, "rewards/chosen": -1.5813851860002615e-05, "rewards/margins": 0.12756752967834473, "rewards/rejected": -0.1275833398103714, "step": 12070 }, { "epoch": 8.347856154910097, "grad_norm": 5.613522529602051, "learning_rate": 9.178576917166128e-06, "log_odds_chosen": 11.866766929626465, "log_odds_ratio": -1.0126213965122588e-05, "logits/chosen": -0.06285068392753601, "logits/rejected": -0.13774362206459045, "logps/chosen": -0.00015430677740368992, "logps/rejected": -2.7546706199645996, "loss": 0.5627, "nll_loss": 0.1406654268503189, "rewards/accuracies": 1.0, "rewards/chosen": -1.5430678104166873e-05, "rewards/margins": 0.27545166015625, "rewards/rejected": -0.27546706795692444, "step": 12071 }, { "epoch": 8.348547717842324, "grad_norm": 5.118077278137207, "learning_rate": 9.174734900875979e-06, "log_odds_chosen": 11.430276870727539, "log_odds_ratio": -5.91300122323446e-05, "logits/chosen": -0.19762642681598663, "logits/rejected": -0.2530553340911865, "logps/chosen": -0.00019286992028355598, "logps/rejected": -2.1932029724121094, "loss": 0.4997, "nll_loss": 0.12492159754037857, "rewards/accuracies": 1.0, "rewards/chosen": -1.92869920283556e-05, "rewards/margins": 0.2193010300397873, "rewards/rejected": -0.21932031214237213, "step": 12072 }, { "epoch": 8.349239280774551, "grad_norm": 4.854573726654053, "learning_rate": 9.170892884585831e-06, "log_odds_chosen": 11.556985855102539, "log_odds_ratio": -5.447504372568801e-05, "logits/chosen": -0.3260948359966278, "logits/rejected": -0.3692905008792877, "logps/chosen": -0.00039603933691978455, "logps/rejected": -2.43080735206604, "loss": 0.7249, "nll_loss": 0.18123196065425873, "rewards/accuracies": 1.0, "rewards/chosen": -3.960393587476574e-05, "rewards/margins": 0.24304112792015076, "rewards/rejected": -0.243080735206604, "step": 12073 }, { "epoch": 8.349930843706778, "grad_norm": 4.689001083374023, "learning_rate": 9.167050868295682e-06, "log_odds_chosen": 10.60562515258789, "log_odds_ratio": -6.536449654959142e-05, "logits/chosen": -0.5237111449241638, "logits/rejected": -0.4365498125553131, "logps/chosen": -0.00016227777814492583, "logps/rejected": -1.987557291984558, "loss": 0.2749, "nll_loss": 0.0687284991145134, "rewards/accuracies": 1.0, "rewards/chosen": -1.62277756317053e-05, "rewards/margins": 0.19873949885368347, "rewards/rejected": -0.19875574111938477, "step": 12074 }, { "epoch": 8.350622406639005, "grad_norm": 3.7940571308135986, "learning_rate": 9.163208852005533e-06, "log_odds_chosen": 10.647010803222656, "log_odds_ratio": -0.00011371282016625628, "logits/chosen": -0.14266344904899597, "logits/rejected": -0.1224905252456665, "logps/chosen": -0.0002618691651150584, "logps/rejected": -1.930260419845581, "loss": 0.3824, "nll_loss": 0.09559597074985504, "rewards/accuracies": 1.0, "rewards/chosen": -2.6186917239101604e-05, "rewards/margins": 0.19299986958503723, "rewards/rejected": -0.19302606582641602, "step": 12075 }, { "epoch": 8.351313969571232, "grad_norm": 3.318660020828247, "learning_rate": 9.159366835715384e-06, "log_odds_chosen": 9.893108367919922, "log_odds_ratio": -0.0004447439860086888, "logits/chosen": -0.5518521070480347, "logits/rejected": -0.5224223136901855, "logps/chosen": -0.00021021733118686825, "logps/rejected": -1.7410143613815308, "loss": 0.3573, "nll_loss": 0.08928947895765305, "rewards/accuracies": 1.0, "rewards/chosen": -2.1021733118686825e-05, "rewards/margins": 0.17408041656017303, "rewards/rejected": -0.17410144209861755, "step": 12076 }, { "epoch": 8.352005532503458, "grad_norm": 5.724148750305176, "learning_rate": 9.155524819425234e-06, "log_odds_chosen": 11.430328369140625, "log_odds_ratio": -3.109716999460943e-05, "logits/chosen": -0.3521016538143158, "logits/rejected": -0.31222349405288696, "logps/chosen": -9.736038191476837e-05, "logps/rejected": -2.2274422645568848, "loss": 0.3271, "nll_loss": 0.08176403492689133, "rewards/accuracies": 1.0, "rewards/chosen": -9.736038919072598e-06, "rewards/margins": 0.2227345108985901, "rewards/rejected": -0.22274425625801086, "step": 12077 }, { "epoch": 8.352697095435685, "grad_norm": 4.420742988586426, "learning_rate": 9.151682803135085e-06, "log_odds_chosen": 11.0013427734375, "log_odds_ratio": -3.3046468161046505e-05, "logits/chosen": -0.07796618342399597, "logits/rejected": 0.006830569356679916, "logps/chosen": -0.00027067813789471984, "logps/rejected": -2.153871536254883, "loss": 0.7758, "nll_loss": 0.19394733011722565, "rewards/accuracies": 1.0, "rewards/chosen": -2.7067813789471984e-05, "rewards/margins": 0.21536009013652802, "rewards/rejected": -0.21538715064525604, "step": 12078 }, { "epoch": 8.353388658367912, "grad_norm": 6.765323162078857, "learning_rate": 9.147840786844938e-06, "log_odds_chosen": 10.374045372009277, "log_odds_ratio": -0.0005182506865821779, "logits/chosen": -0.1451987773180008, "logits/rejected": -0.16935515403747559, "logps/chosen": -0.0005734489532187581, "logps/rejected": -2.156761407852173, "loss": 0.6643, "nll_loss": 0.16601383686065674, "rewards/accuracies": 1.0, "rewards/chosen": -5.734489968745038e-05, "rewards/margins": 0.2156187891960144, "rewards/rejected": -0.21567615866661072, "step": 12079 }, { "epoch": 8.354080221300139, "grad_norm": 3.6350746154785156, "learning_rate": 9.143998770554787e-06, "log_odds_chosen": 10.419280052185059, "log_odds_ratio": -0.00027461652643978596, "logits/chosen": -0.03309977054595947, "logits/rejected": 0.003689005970954895, "logps/chosen": -0.0006293084588833153, "logps/rejected": -2.2337255477905273, "loss": 0.4066, "nll_loss": 0.10161426663398743, "rewards/accuracies": 1.0, "rewards/chosen": -6.293084879871458e-05, "rewards/margins": 0.22330963611602783, "rewards/rejected": -0.22337256371974945, "step": 12080 }, { "epoch": 8.354771784232366, "grad_norm": 11.4356050491333, "learning_rate": 9.140156754264637e-06, "log_odds_chosen": 11.893506050109863, "log_odds_ratio": -1.4665483831777237e-05, "logits/chosen": -0.024583622813224792, "logits/rejected": -0.10612979531288147, "logps/chosen": -0.0001170150499092415, "logps/rejected": -2.6500325202941895, "loss": 0.4769, "nll_loss": 0.11923235654830933, "rewards/accuracies": 1.0, "rewards/chosen": -1.170150517282309e-05, "rewards/margins": 0.26499155163764954, "rewards/rejected": -0.2650032639503479, "step": 12081 }, { "epoch": 8.355463347164592, "grad_norm": 9.346952438354492, "learning_rate": 9.13631473797449e-06, "log_odds_chosen": 10.354605674743652, "log_odds_ratio": -4.912112854071893e-05, "logits/chosen": -0.4560277462005615, "logits/rejected": -0.4287424683570862, "logps/chosen": -0.00019161278032697737, "logps/rejected": -1.747044324874878, "loss": 0.3942, "nll_loss": 0.09854578971862793, "rewards/accuracies": 1.0, "rewards/chosen": -1.9161278032697737e-05, "rewards/margins": 0.1746852695941925, "rewards/rejected": -0.1747044324874878, "step": 12082 }, { "epoch": 8.35615491009682, "grad_norm": 3.4023470878601074, "learning_rate": 9.13247272168434e-06, "log_odds_chosen": 11.008105278015137, "log_odds_ratio": -6.754696369171143e-05, "logits/chosen": -0.6563997268676758, "logits/rejected": -0.7640130519866943, "logps/chosen": -7.45670186006464e-05, "logps/rejected": -1.789588451385498, "loss": 0.319, "nll_loss": 0.07973403483629227, "rewards/accuracies": 1.0, "rewards/chosen": -7.456702405761462e-06, "rewards/margins": 0.17895139753818512, "rewards/rejected": -0.17895883321762085, "step": 12083 }, { "epoch": 8.356846473029046, "grad_norm": 4.0229315757751465, "learning_rate": 9.128630705394191e-06, "log_odds_chosen": 11.00053596496582, "log_odds_ratio": -9.482467430643737e-05, "logits/chosen": -0.7036505341529846, "logits/rejected": -0.7025102376937866, "logps/chosen": -0.0002775189932435751, "logps/rejected": -2.4160661697387695, "loss": 0.3879, "nll_loss": 0.09697068482637405, "rewards/accuracies": 1.0, "rewards/chosen": -2.775189932435751e-05, "rewards/margins": 0.2415788620710373, "rewards/rejected": -0.24160662293434143, "step": 12084 }, { "epoch": 8.357538035961273, "grad_norm": 4.352490425109863, "learning_rate": 9.124788689104042e-06, "log_odds_chosen": 10.189447402954102, "log_odds_ratio": -0.0006835731328465044, "logits/chosen": -0.39894899725914, "logits/rejected": -0.5076238512992859, "logps/chosen": -0.0012036004336550832, "logps/rejected": -1.9045734405517578, "loss": 0.5762, "nll_loss": 0.14398697018623352, "rewards/accuracies": 1.0, "rewards/chosen": -0.00012036004045512527, "rewards/margins": 0.19033700227737427, "rewards/rejected": -0.19045734405517578, "step": 12085 }, { "epoch": 8.3582295988935, "grad_norm": 5.3557658195495605, "learning_rate": 9.120946672813893e-06, "log_odds_chosen": 11.98507308959961, "log_odds_ratio": -0.00010645409201970324, "logits/chosen": -0.4352482259273529, "logits/rejected": -0.5673301815986633, "logps/chosen": -0.00016585568664595485, "logps/rejected": -2.974811553955078, "loss": 0.7435, "nll_loss": 0.1858687847852707, "rewards/accuracies": 1.0, "rewards/chosen": -1.6585569028393365e-05, "rewards/margins": 0.2974645495414734, "rewards/rejected": -0.29748111963272095, "step": 12086 }, { "epoch": 8.358921161825727, "grad_norm": 5.833132743835449, "learning_rate": 9.117104656523744e-06, "log_odds_chosen": 11.723255157470703, "log_odds_ratio": -1.0014520739787258e-05, "logits/chosen": 0.12011925131082535, "logits/rejected": 0.16198879480361938, "logps/chosen": -0.00010595491039566696, "logps/rejected": -2.3934383392333984, "loss": 0.4981, "nll_loss": 0.12452814728021622, "rewards/accuracies": 1.0, "rewards/chosen": -1.0595492312859278e-05, "rewards/margins": 0.23933324217796326, "rewards/rejected": -0.2393438220024109, "step": 12087 }, { "epoch": 8.359612724757953, "grad_norm": 5.850451469421387, "learning_rate": 9.113262640233596e-06, "log_odds_chosen": 10.695171356201172, "log_odds_ratio": -0.00017267999646719545, "logits/chosen": -0.3322739601135254, "logits/rejected": -0.3321690261363983, "logps/chosen": -0.00021232443396002054, "logps/rejected": -2.0772128105163574, "loss": 0.3142, "nll_loss": 0.07852254807949066, "rewards/accuracies": 1.0, "rewards/chosen": -2.123244121321477e-05, "rewards/margins": 0.20770004391670227, "rewards/rejected": -0.2077212780714035, "step": 12088 }, { "epoch": 8.36030428769018, "grad_norm": 4.204518795013428, "learning_rate": 9.109420623943445e-06, "log_odds_chosen": 11.224333763122559, "log_odds_ratio": -2.6536186851444654e-05, "logits/chosen": -0.47312721610069275, "logits/rejected": -0.5358306169509888, "logps/chosen": -0.0001297694689128548, "logps/rejected": -2.1915783882141113, "loss": 0.379, "nll_loss": 0.09475453197956085, "rewards/accuracies": 1.0, "rewards/chosen": -1.2976946891285479e-05, "rewards/margins": 0.21914485096931458, "rewards/rejected": -0.21915782988071442, "step": 12089 }, { "epoch": 8.360995850622407, "grad_norm": 2.6959381103515625, "learning_rate": 9.105578607653296e-06, "log_odds_chosen": 10.964401245117188, "log_odds_ratio": -4.955555414198898e-05, "logits/chosen": -0.19748425483703613, "logits/rejected": -0.23469477891921997, "logps/chosen": -0.00014199868019204587, "logps/rejected": -1.934066891670227, "loss": 0.3309, "nll_loss": 0.08271662145853043, "rewards/accuracies": 1.0, "rewards/chosen": -1.4199868019204587e-05, "rewards/margins": 0.19339248538017273, "rewards/rejected": -0.19340670108795166, "step": 12090 }, { "epoch": 8.361687413554634, "grad_norm": 3.581792116165161, "learning_rate": 9.101736591363148e-06, "log_odds_chosen": 10.874795913696289, "log_odds_ratio": -0.0003300320531707257, "logits/chosen": -0.12171950191259384, "logits/rejected": 0.01747075468301773, "logps/chosen": -0.00027409259928390384, "logps/rejected": -1.9720582962036133, "loss": 0.377, "nll_loss": 0.09421323239803314, "rewards/accuracies": 1.0, "rewards/chosen": -2.740925810940098e-05, "rewards/margins": 0.1971784085035324, "rewards/rejected": -0.1972058117389679, "step": 12091 }, { "epoch": 8.36237897648686, "grad_norm": 5.888037204742432, "learning_rate": 9.097894575072999e-06, "log_odds_chosen": 9.601099014282227, "log_odds_ratio": -0.0004326591733843088, "logits/chosen": -0.6578595638275146, "logits/rejected": -0.747802734375, "logps/chosen": -0.0004255325475241989, "logps/rejected": -1.3348298072814941, "loss": 0.3399, "nll_loss": 0.08492371439933777, "rewards/accuracies": 1.0, "rewards/chosen": -4.255325620761141e-05, "rewards/margins": 0.13344043493270874, "rewards/rejected": -0.13348299264907837, "step": 12092 }, { "epoch": 8.363070539419088, "grad_norm": 3.4145381450653076, "learning_rate": 9.09405255878285e-06, "log_odds_chosen": 11.088571548461914, "log_odds_ratio": -6.40248617855832e-05, "logits/chosen": 0.2030135989189148, "logits/rejected": 0.13781681656837463, "logps/chosen": -0.0005639658775180578, "logps/rejected": -2.5252761840820312, "loss": 0.4239, "nll_loss": 0.10596401244401932, "rewards/accuracies": 1.0, "rewards/chosen": -5.639659138978459e-05, "rewards/margins": 0.2524712085723877, "rewards/rejected": -0.2525276243686676, "step": 12093 }, { "epoch": 8.363762102351314, "grad_norm": 3.536792039871216, "learning_rate": 9.0902105424927e-06, "log_odds_chosen": 10.259236335754395, "log_odds_ratio": -0.0006960731698200107, "logits/chosen": -0.039343543350696564, "logits/rejected": -0.08427160978317261, "logps/chosen": -0.0003419583954382688, "logps/rejected": -1.974465012550354, "loss": 0.4182, "nll_loss": 0.10447600483894348, "rewards/accuracies": 1.0, "rewards/chosen": -3.419584027142264e-05, "rewards/margins": 0.19741231203079224, "rewards/rejected": -0.19744651019573212, "step": 12094 }, { "epoch": 8.364453665283541, "grad_norm": 5.547048568725586, "learning_rate": 9.086368526202551e-06, "log_odds_chosen": 10.491537094116211, "log_odds_ratio": -0.00026805573725141585, "logits/chosen": -0.1757640242576599, "logits/rejected": -0.14906059205532074, "logps/chosen": -0.002750436309725046, "logps/rejected": -2.2830355167388916, "loss": 0.7167, "nll_loss": 0.17914150655269623, "rewards/accuracies": 1.0, "rewards/chosen": -0.00027504368335939944, "rewards/margins": 0.22802849113941193, "rewards/rejected": -0.22830355167388916, "step": 12095 }, { "epoch": 8.365145228215768, "grad_norm": 3.5837619304656982, "learning_rate": 9.082526509912402e-06, "log_odds_chosen": 11.178289413452148, "log_odds_ratio": -0.0002664781059138477, "logits/chosen": -0.34205400943756104, "logits/rejected": -0.44289296865463257, "logps/chosen": -0.00012246904952917248, "logps/rejected": -1.9099770784378052, "loss": 0.4891, "nll_loss": 0.12223700433969498, "rewards/accuracies": 1.0, "rewards/chosen": -1.2246905498614069e-05, "rewards/margins": 0.19098547101020813, "rewards/rejected": -0.19099771976470947, "step": 12096 }, { "epoch": 8.365836791147995, "grad_norm": 3.8647124767303467, "learning_rate": 9.078684493622255e-06, "log_odds_chosen": 10.128311157226562, "log_odds_ratio": -7.842542981961742e-05, "logits/chosen": -0.407065749168396, "logits/rejected": -0.47137102484703064, "logps/chosen": -0.00037784138112328947, "logps/rejected": -1.7541797161102295, "loss": 0.4091, "nll_loss": 0.10227620601654053, "rewards/accuracies": 1.0, "rewards/chosen": -3.7784142477903515e-05, "rewards/margins": 0.17538020014762878, "rewards/rejected": -0.17541798949241638, "step": 12097 }, { "epoch": 8.366528354080222, "grad_norm": 5.806434154510498, "learning_rate": 9.074842477332105e-06, "log_odds_chosen": 10.017135620117188, "log_odds_ratio": -0.0001657155662542209, "logits/chosen": -0.41675513982772827, "logits/rejected": -0.4195055067539215, "logps/chosen": -0.0003376719541847706, "logps/rejected": -1.8514747619628906, "loss": 0.3504, "nll_loss": 0.08758871257305145, "rewards/accuracies": 1.0, "rewards/chosen": -3.3767199056455866e-05, "rewards/margins": 0.18511369824409485, "rewards/rejected": -0.1851474642753601, "step": 12098 }, { "epoch": 8.367219917012449, "grad_norm": 3.287838935852051, "learning_rate": 9.071000461041954e-06, "log_odds_chosen": 10.787469863891602, "log_odds_ratio": -0.00012153637362644076, "logits/chosen": -0.18966057896614075, "logits/rejected": -0.2829188108444214, "logps/chosen": -0.0003287080326117575, "logps/rejected": -2.6297342777252197, "loss": 0.534, "nll_loss": 0.13347730040550232, "rewards/accuracies": 1.0, "rewards/chosen": -3.2870804716367275e-05, "rewards/margins": 0.26294058561325073, "rewards/rejected": -0.262973427772522, "step": 12099 }, { "epoch": 8.367911479944675, "grad_norm": 4.554553031921387, "learning_rate": 9.067158444751807e-06, "log_odds_chosen": 11.362350463867188, "log_odds_ratio": -2.019827297772281e-05, "logits/chosen": -0.17572999000549316, "logits/rejected": -0.281780481338501, "logps/chosen": -0.00023074873024597764, "logps/rejected": -2.5494611263275146, "loss": 0.35, "nll_loss": 0.08750632405281067, "rewards/accuracies": 1.0, "rewards/chosen": -2.307487557118293e-05, "rewards/margins": 0.2549230456352234, "rewards/rejected": -0.25494611263275146, "step": 12100 }, { "epoch": 8.368603042876902, "grad_norm": 5.976508140563965, "learning_rate": 9.063316428461658e-06, "log_odds_chosen": 11.16000747680664, "log_odds_ratio": -4.418138269102201e-05, "logits/chosen": -0.12001601606607437, "logits/rejected": -0.13546162843704224, "logps/chosen": -0.0001884956145659089, "logps/rejected": -2.1030831336975098, "loss": 0.4924, "nll_loss": 0.12308388948440552, "rewards/accuracies": 1.0, "rewards/chosen": -1.884956145659089e-05, "rewards/margins": 0.21028946340084076, "rewards/rejected": -0.21030831336975098, "step": 12101 }, { "epoch": 8.369294605809129, "grad_norm": 4.149490833282471, "learning_rate": 9.059474412171508e-06, "log_odds_chosen": 11.068801879882812, "log_odds_ratio": -0.00016131362644955516, "logits/chosen": 0.06716214120388031, "logits/rejected": 6.300210952758789e-05, "logps/chosen": -0.00033710466232150793, "logps/rejected": -2.0708696842193604, "loss": 0.4826, "nll_loss": 0.12063352763652802, "rewards/accuracies": 1.0, "rewards/chosen": -3.371046477695927e-05, "rewards/margins": 0.2070532739162445, "rewards/rejected": -0.2070869505405426, "step": 12102 }, { "epoch": 8.369986168741356, "grad_norm": 4.572751522064209, "learning_rate": 9.055632395881359e-06, "log_odds_chosen": 10.715896606445312, "log_odds_ratio": -0.0006129711982794106, "logits/chosen": -0.6699153184890747, "logits/rejected": -0.7459685802459717, "logps/chosen": -0.0007518876809626818, "logps/rejected": -2.247579574584961, "loss": 0.4651, "nll_loss": 0.11621610075235367, "rewards/accuracies": 1.0, "rewards/chosen": -7.518876373069361e-05, "rewards/margins": 0.2246827781200409, "rewards/rejected": -0.22475793957710266, "step": 12103 }, { "epoch": 8.370677731673583, "grad_norm": 3.426314353942871, "learning_rate": 9.05179037959121e-06, "log_odds_chosen": 10.887182235717773, "log_odds_ratio": -5.992503429297358e-05, "logits/chosen": -0.32155394554138184, "logits/rejected": -0.3380813002586365, "logps/chosen": -0.0007114798063412309, "logps/rejected": -2.13638973236084, "loss": 0.3702, "nll_loss": 0.0925537571310997, "rewards/accuracies": 1.0, "rewards/chosen": -7.114798063412309e-05, "rewards/margins": 0.2135678231716156, "rewards/rejected": -0.21363899111747742, "step": 12104 }, { "epoch": 8.37136929460581, "grad_norm": 3.439755916595459, "learning_rate": 9.04794836330106e-06, "log_odds_chosen": 10.813738822937012, "log_odds_ratio": -3.219860082026571e-05, "logits/chosen": -0.46938440203666687, "logits/rejected": -0.43001264333724976, "logps/chosen": -0.0005872396286576986, "logps/rejected": -2.688671588897705, "loss": 0.5214, "nll_loss": 0.13035404682159424, "rewards/accuracies": 1.0, "rewards/chosen": -5.8723959227791056e-05, "rewards/margins": 0.26880842447280884, "rewards/rejected": -0.2688671946525574, "step": 12105 }, { "epoch": 8.372060857538036, "grad_norm": 4.733785629272461, "learning_rate": 9.044106347010911e-06, "log_odds_chosen": 9.735095977783203, "log_odds_ratio": -0.0002063530555460602, "logits/chosen": -0.41500332951545715, "logits/rejected": -0.36877870559692383, "logps/chosen": -0.00025138130877166986, "logps/rejected": -1.0509212017059326, "loss": 0.3176, "nll_loss": 0.07937469333410263, "rewards/accuracies": 1.0, "rewards/chosen": -2.513812796678394e-05, "rewards/margins": 0.10506697744131088, "rewards/rejected": -0.1050921157002449, "step": 12106 }, { "epoch": 8.372752420470263, "grad_norm": 3.8326022624969482, "learning_rate": 9.040264330720764e-06, "log_odds_chosen": 11.010136604309082, "log_odds_ratio": -4.120486482861452e-05, "logits/chosen": -0.9739433526992798, "logits/rejected": -0.9723539352416992, "logps/chosen": -0.000342509156325832, "logps/rejected": -2.3533740043640137, "loss": 0.3739, "nll_loss": 0.09346066415309906, "rewards/accuracies": 1.0, "rewards/chosen": -3.425091927056201e-05, "rewards/margins": 0.23530316352844238, "rewards/rejected": -0.23533740639686584, "step": 12107 }, { "epoch": 8.37344398340249, "grad_norm": 3.7740235328674316, "learning_rate": 9.036422314430613e-06, "log_odds_chosen": 11.705621719360352, "log_odds_ratio": -0.00019673358474392444, "logits/chosen": -0.33428582549095154, "logits/rejected": -0.41250723600387573, "logps/chosen": -0.00018244172679260373, "logps/rejected": -2.9301788806915283, "loss": 0.307, "nll_loss": 0.07673575729131699, "rewards/accuracies": 1.0, "rewards/chosen": -1.8244172679260373e-05, "rewards/margins": 0.29299962520599365, "rewards/rejected": -0.2930178940296173, "step": 12108 }, { "epoch": 8.374135546334717, "grad_norm": 7.275375843048096, "learning_rate": 9.032580298140464e-06, "log_odds_chosen": 11.373159408569336, "log_odds_ratio": -4.282902227714658e-05, "logits/chosen": -0.09751863777637482, "logits/rejected": -0.10958105325698853, "logps/chosen": -0.0001243318838533014, "logps/rejected": -2.1905131340026855, "loss": 0.6857, "nll_loss": 0.17140938341617584, "rewards/accuracies": 1.0, "rewards/chosen": -1.2433189112925902e-05, "rewards/margins": 0.21903890371322632, "rewards/rejected": -0.219051331281662, "step": 12109 }, { "epoch": 8.374827109266944, "grad_norm": 2.739427328109741, "learning_rate": 9.028738281850316e-06, "log_odds_chosen": 13.150906562805176, "log_odds_ratio": -3.6712751807499444e-06, "logits/chosen": -0.6241665482521057, "logits/rejected": -0.6573482751846313, "logps/chosen": -8.547461038688198e-05, "logps/rejected": -3.4761362075805664, "loss": 0.3557, "nll_loss": 0.08893192559480667, "rewards/accuracies": 1.0, "rewards/chosen": -8.547461220587138e-06, "rewards/margins": 0.3476050794124603, "rewards/rejected": -0.3476136326789856, "step": 12110 }, { "epoch": 8.37551867219917, "grad_norm": 4.139987945556641, "learning_rate": 9.024896265560167e-06, "log_odds_chosen": 10.556331634521484, "log_odds_ratio": -0.00036771217128261924, "logits/chosen": -0.1802615076303482, "logits/rejected": -0.2879849076271057, "logps/chosen": -0.0007980070076882839, "logps/rejected": -2.4222426414489746, "loss": 0.4299, "nll_loss": 0.10743677616119385, "rewards/accuracies": 1.0, "rewards/chosen": -7.980070222401991e-05, "rewards/margins": 0.24214446544647217, "rewards/rejected": -0.2422242909669876, "step": 12111 }, { "epoch": 8.376210235131397, "grad_norm": 6.3887481689453125, "learning_rate": 9.021054249270017e-06, "log_odds_chosen": 10.783723831176758, "log_odds_ratio": -0.00010180518438573927, "logits/chosen": -0.34942975640296936, "logits/rejected": -0.3650428056716919, "logps/chosen": -0.00045497252722270787, "logps/rejected": -2.3204410076141357, "loss": 0.6587, "nll_loss": 0.16466113924980164, "rewards/accuracies": 1.0, "rewards/chosen": -4.549725417746231e-05, "rewards/margins": 0.23199859261512756, "rewards/rejected": -0.23204410076141357, "step": 12112 }, { "epoch": 8.376901798063624, "grad_norm": 3.7242636680603027, "learning_rate": 9.017212232979868e-06, "log_odds_chosen": 11.271821975708008, "log_odds_ratio": -3.3475887903477997e-05, "logits/chosen": -0.5573599338531494, "logits/rejected": -0.6018040180206299, "logps/chosen": -0.0002326086541870609, "logps/rejected": -2.2966699600219727, "loss": 0.3278, "nll_loss": 0.08195127546787262, "rewards/accuracies": 1.0, "rewards/chosen": -2.326086541870609e-05, "rewards/margins": 0.22964373230934143, "rewards/rejected": -0.22966697812080383, "step": 12113 }, { "epoch": 8.377593360995851, "grad_norm": 3.9293277263641357, "learning_rate": 9.013370216689719e-06, "log_odds_chosen": 12.213785171508789, "log_odds_ratio": -1.0291758371749893e-05, "logits/chosen": -0.31658226251602173, "logits/rejected": -0.580470085144043, "logps/chosen": -0.00015860905114095658, "logps/rejected": -3.0600228309631348, "loss": 0.4878, "nll_loss": 0.1219446212053299, "rewards/accuracies": 1.0, "rewards/chosen": -1.5860903658904135e-05, "rewards/margins": 0.3059864640235901, "rewards/rejected": -0.30600231885910034, "step": 12114 }, { "epoch": 8.378284923928078, "grad_norm": 5.134614944458008, "learning_rate": 9.00952820039957e-06, "log_odds_chosen": 12.008981704711914, "log_odds_ratio": -6.621122156502679e-05, "logits/chosen": -0.10655619204044342, "logits/rejected": -0.10315324366092682, "logps/chosen": -0.00018991855904459953, "logps/rejected": -3.2541890144348145, "loss": 0.5297, "nll_loss": 0.13242211937904358, "rewards/accuracies": 1.0, "rewards/chosen": -1.8991855540662073e-05, "rewards/margins": 0.32539990544319153, "rewards/rejected": -0.3254188895225525, "step": 12115 }, { "epoch": 8.378976486860305, "grad_norm": 2.5658793449401855, "learning_rate": 9.005686184109422e-06, "log_odds_chosen": 11.466197967529297, "log_odds_ratio": -2.4516677513020113e-05, "logits/chosen": -0.6814978122711182, "logits/rejected": -0.7252025604248047, "logps/chosen": -9.871042129816487e-05, "logps/rejected": -1.9790711402893066, "loss": 0.3263, "nll_loss": 0.08158230036497116, "rewards/accuracies": 1.0, "rewards/chosen": -9.871042493614368e-06, "rewards/margins": 0.19789722561836243, "rewards/rejected": -0.19790711998939514, "step": 12116 }, { "epoch": 8.379668049792532, "grad_norm": 3.70289945602417, "learning_rate": 9.001844167819271e-06, "log_odds_chosen": 11.636565208435059, "log_odds_ratio": -2.1965597625239752e-05, "logits/chosen": 0.254207968711853, "logits/rejected": 0.1469896137714386, "logps/chosen": -0.00010575105989119038, "logps/rejected": -2.418914318084717, "loss": 0.437, "nll_loss": 0.1092444434762001, "rewards/accuracies": 1.0, "rewards/chosen": -1.0575105989119038e-05, "rewards/margins": 0.24188083410263062, "rewards/rejected": -0.24189142882823944, "step": 12117 }, { "epoch": 8.380359612724758, "grad_norm": 2.544630765914917, "learning_rate": 8.998002151529122e-06, "log_odds_chosen": 10.39298152923584, "log_odds_ratio": -8.388479909626767e-05, "logits/chosen": -0.4096953272819519, "logits/rejected": -0.47978413105010986, "logps/chosen": -0.0002411601017229259, "logps/rejected": -1.8249914646148682, "loss": 0.3276, "nll_loss": 0.08188360184431076, "rewards/accuracies": 1.0, "rewards/chosen": -2.4116008717101067e-05, "rewards/margins": 0.1824750453233719, "rewards/rejected": -0.18249915540218353, "step": 12118 }, { "epoch": 8.381051175656985, "grad_norm": 7.59868860244751, "learning_rate": 8.994160135238974e-06, "log_odds_chosen": 12.390218734741211, "log_odds_ratio": -2.03878698812332e-05, "logits/chosen": -0.06851515173912048, "logits/rejected": -0.19629979133605957, "logps/chosen": -0.00019201249233447015, "logps/rejected": -3.3422369956970215, "loss": 0.7047, "nll_loss": 0.17616420984268188, "rewards/accuracies": 1.0, "rewards/chosen": -1.9201250324840657e-05, "rewards/margins": 0.3342045247554779, "rewards/rejected": -0.3342237174510956, "step": 12119 }, { "epoch": 8.381742738589212, "grad_norm": 3.9883933067321777, "learning_rate": 8.990318118948825e-06, "log_odds_chosen": 10.478074073791504, "log_odds_ratio": -0.00011548143811523914, "logits/chosen": -0.2359369695186615, "logits/rejected": -0.3406507968902588, "logps/chosen": -0.0005425603594630957, "logps/rejected": -2.217989683151245, "loss": 0.2968, "nll_loss": 0.0741945430636406, "rewards/accuracies": 1.0, "rewards/chosen": -5.425603376352228e-05, "rewards/margins": 0.22174471616744995, "rewards/rejected": -0.22179898619651794, "step": 12120 }, { "epoch": 8.382434301521439, "grad_norm": 4.174715995788574, "learning_rate": 8.986476102658676e-06, "log_odds_chosen": 10.57568359375, "log_odds_ratio": -0.00012932793470099568, "logits/chosen": -0.03034919500350952, "logits/rejected": -0.0810474306344986, "logps/chosen": -0.00015235492901410908, "logps/rejected": -1.7275258302688599, "loss": 0.3434, "nll_loss": 0.08582790195941925, "rewards/accuracies": 1.0, "rewards/chosen": -1.5235491446219385e-05, "rewards/margins": 0.17273734509944916, "rewards/rejected": -0.17275258898735046, "step": 12121 }, { "epoch": 8.383125864453666, "grad_norm": 3.964179754257202, "learning_rate": 8.982634086368527e-06, "log_odds_chosen": 11.0924072265625, "log_odds_ratio": -6.431773363146931e-05, "logits/chosen": -0.1575760841369629, "logits/rejected": -0.22044521570205688, "logps/chosen": -0.0001404537761118263, "logps/rejected": -2.251370906829834, "loss": 0.5174, "nll_loss": 0.1293363720178604, "rewards/accuracies": 1.0, "rewards/chosen": -1.404537761118263e-05, "rewards/margins": 0.22512304782867432, "rewards/rejected": -0.22513708472251892, "step": 12122 }, { "epoch": 8.383817427385893, "grad_norm": 5.138609886169434, "learning_rate": 8.978792070078377e-06, "log_odds_chosen": 10.380916595458984, "log_odds_ratio": -0.0002986867038998753, "logits/chosen": -0.2631905972957611, "logits/rejected": -0.38608020544052124, "logps/chosen": -0.0035598543472588062, "logps/rejected": -2.1883394718170166, "loss": 0.4938, "nll_loss": 0.12340797483921051, "rewards/accuracies": 1.0, "rewards/chosen": -0.0003559854521881789, "rewards/margins": 0.21847794950008392, "rewards/rejected": -0.21883393824100494, "step": 12123 }, { "epoch": 8.38450899031812, "grad_norm": 7.429013729095459, "learning_rate": 8.974950053788228e-06, "log_odds_chosen": 9.923589706420898, "log_odds_ratio": -0.12461341172456741, "logits/chosen": -0.33789655566215515, "logits/rejected": -0.371481329202652, "logps/chosen": -0.02460256591439247, "logps/rejected": -2.5953493118286133, "loss": 0.7909, "nll_loss": 0.18525590002536774, "rewards/accuracies": 0.875, "rewards/chosen": -0.0024602566845715046, "rewards/margins": 0.25707465410232544, "rewards/rejected": -0.25953492522239685, "step": 12124 }, { "epoch": 8.385200553250346, "grad_norm": 5.318549633026123, "learning_rate": 8.97110803749808e-06, "log_odds_chosen": 9.908432960510254, "log_odds_ratio": -0.00018403760623186827, "logits/chosen": -0.3189855217933655, "logits/rejected": -0.25400811433792114, "logps/chosen": -0.0002738266484811902, "logps/rejected": -1.2519633769989014, "loss": 0.3783, "nll_loss": 0.09455760568380356, "rewards/accuracies": 1.0, "rewards/chosen": -2.7382666303310543e-05, "rewards/margins": 0.12516896426677704, "rewards/rejected": -0.12519635260105133, "step": 12125 }, { "epoch": 8.385892116182573, "grad_norm": 4.526805400848389, "learning_rate": 8.96726602120793e-06, "log_odds_chosen": 11.152012825012207, "log_odds_ratio": -4.143865226069465e-05, "logits/chosen": 0.08569681644439697, "logits/rejected": 0.12167654931545258, "logps/chosen": -0.00021132684196345508, "logps/rejected": -2.5041356086730957, "loss": 0.3601, "nll_loss": 0.0900086835026741, "rewards/accuracies": 1.0, "rewards/chosen": -2.113268601533491e-05, "rewards/margins": 0.25039243698120117, "rewards/rejected": -0.25041356682777405, "step": 12126 }, { "epoch": 8.3865836791148, "grad_norm": 3.170875072479248, "learning_rate": 8.96342400491778e-06, "log_odds_chosen": 11.103178977966309, "log_odds_ratio": -0.00010288170597050339, "logits/chosen": -0.45769423246383667, "logits/rejected": -0.5105969905853271, "logps/chosen": -0.0003910820232704282, "logps/rejected": -2.262617349624634, "loss": 0.2931, "nll_loss": 0.07327709347009659, "rewards/accuracies": 1.0, "rewards/chosen": -3.910820305463858e-05, "rewards/margins": 0.22622261941432953, "rewards/rejected": -0.22626172006130219, "step": 12127 }, { "epoch": 8.387275242047027, "grad_norm": 3.580827236175537, "learning_rate": 8.959581988627633e-06, "log_odds_chosen": 11.565444946289062, "log_odds_ratio": -1.3538083294406533e-05, "logits/chosen": -0.4459022283554077, "logits/rejected": -0.5162770748138428, "logps/chosen": -0.00011571186769288033, "logps/rejected": -2.497537851333618, "loss": 0.3723, "nll_loss": 0.09307833015918732, "rewards/accuracies": 1.0, "rewards/chosen": -1.1571187314984854e-05, "rewards/margins": 0.24974222481250763, "rewards/rejected": -0.24975380301475525, "step": 12128 }, { "epoch": 8.387966804979254, "grad_norm": 3.24259614944458, "learning_rate": 8.955739972337484e-06, "log_odds_chosen": 11.701902389526367, "log_odds_ratio": -3.0087014238233678e-05, "logits/chosen": -0.4742031991481781, "logits/rejected": -0.48803767561912537, "logps/chosen": -0.00012482488818932325, "logps/rejected": -2.4717555046081543, "loss": 0.5772, "nll_loss": 0.14430077373981476, "rewards/accuracies": 1.0, "rewards/chosen": -1.2482489182730205e-05, "rewards/margins": 0.2471630871295929, "rewards/rejected": -0.24717557430267334, "step": 12129 }, { "epoch": 8.38865836791148, "grad_norm": 16.22124671936035, "learning_rate": 8.951897956047334e-06, "log_odds_chosen": 11.777971267700195, "log_odds_ratio": -0.00025148893473669887, "logits/chosen": -0.3254454731941223, "logits/rejected": -0.3738795816898346, "logps/chosen": -0.000138016912387684, "logps/rejected": -2.7632956504821777, "loss": 0.4057, "nll_loss": 0.10140404105186462, "rewards/accuracies": 1.0, "rewards/chosen": -1.3801690329273697e-05, "rewards/margins": 0.2763157784938812, "rewards/rejected": -0.27632957696914673, "step": 12130 }, { "epoch": 8.389349930843707, "grad_norm": 6.9681315422058105, "learning_rate": 8.948055939757185e-06, "log_odds_chosen": 9.457818984985352, "log_odds_ratio": -0.003858291544020176, "logits/chosen": 0.024935171008110046, "logits/rejected": -0.09445832669734955, "logps/chosen": -0.0016933679580688477, "logps/rejected": -1.6940945386886597, "loss": 0.6565, "nll_loss": 0.1637456715106964, "rewards/accuracies": 1.0, "rewards/chosen": -0.00016933679580688477, "rewards/margins": 0.16924011707305908, "rewards/rejected": -0.16940946877002716, "step": 12131 }, { "epoch": 8.390041493775934, "grad_norm": 3.176541566848755, "learning_rate": 8.944213923467036e-06, "log_odds_chosen": 11.606234550476074, "log_odds_ratio": -3.1693620258010924e-05, "logits/chosen": -0.545473575592041, "logits/rejected": -0.634528636932373, "logps/chosen": -0.00034183548996225, "logps/rejected": -2.784149408340454, "loss": 0.3399, "nll_loss": 0.08497949689626694, "rewards/accuracies": 1.0, "rewards/chosen": -3.4183547541033477e-05, "rewards/margins": 0.27838078141212463, "rewards/rejected": -0.2784149646759033, "step": 12132 }, { "epoch": 8.39073305670816, "grad_norm": 3.890603542327881, "learning_rate": 8.940371907176887e-06, "log_odds_chosen": 10.679001808166504, "log_odds_ratio": -4.887073737336323e-05, "logits/chosen": -0.487888365983963, "logits/rejected": -0.7082811594009399, "logps/chosen": -0.00012446560140233487, "logps/rejected": -1.8423631191253662, "loss": 0.6429, "nll_loss": 0.1607137769460678, "rewards/accuracies": 1.0, "rewards/chosen": -1.2446559594536666e-05, "rewards/margins": 0.18422386050224304, "rewards/rejected": -0.1842363178730011, "step": 12133 }, { "epoch": 8.391424619640388, "grad_norm": 3.3028998374938965, "learning_rate": 8.936529890886739e-06, "log_odds_chosen": 11.168549537658691, "log_odds_ratio": -0.00025892583653330803, "logits/chosen": -0.12940393388271332, "logits/rejected": -0.15886016190052032, "logps/chosen": -0.0007486994145438075, "logps/rejected": -2.3878705501556396, "loss": 0.4677, "nll_loss": 0.11690366268157959, "rewards/accuracies": 1.0, "rewards/chosen": -7.48699385439977e-05, "rewards/margins": 0.23871219158172607, "rewards/rejected": -0.23878705501556396, "step": 12134 }, { "epoch": 8.392116182572614, "grad_norm": 2.5992722511291504, "learning_rate": 8.932687874596588e-06, "log_odds_chosen": 10.669702529907227, "log_odds_ratio": -4.008759788121097e-05, "logits/chosen": -0.40739911794662476, "logits/rejected": -0.3629014492034912, "logps/chosen": -0.00013733016385231167, "logps/rejected": -1.8446671962738037, "loss": 0.2705, "nll_loss": 0.06761143356561661, "rewards/accuracies": 1.0, "rewards/chosen": -1.3733016203332227e-05, "rewards/margins": 0.18445296585559845, "rewards/rejected": -0.18446670472621918, "step": 12135 }, { "epoch": 8.392807745504841, "grad_norm": 4.764731407165527, "learning_rate": 8.928845858306439e-06, "log_odds_chosen": 11.26877498626709, "log_odds_ratio": -0.00014127125905361027, "logits/chosen": -0.03872055560350418, "logits/rejected": 0.004581443965435028, "logps/chosen": -0.00023471549502573907, "logps/rejected": -2.5994601249694824, "loss": 0.3531, "nll_loss": 0.08826877921819687, "rewards/accuracies": 1.0, "rewards/chosen": -2.3471548047382385e-05, "rewards/margins": 0.2599225342273712, "rewards/rejected": -0.2599460184574127, "step": 12136 }, { "epoch": 8.393499308437068, "grad_norm": 3.465942144393921, "learning_rate": 8.925003842016291e-06, "log_odds_chosen": 11.4708251953125, "log_odds_ratio": -3.860402648570016e-05, "logits/chosen": -0.20086407661437988, "logits/rejected": -0.37650150060653687, "logps/chosen": -0.0001465227105654776, "logps/rejected": -2.322443962097168, "loss": 0.2577, "nll_loss": 0.0644330084323883, "rewards/accuracies": 1.0, "rewards/chosen": -1.4652272511739284e-05, "rewards/margins": 0.23222973942756653, "rewards/rejected": -0.23224438726902008, "step": 12137 }, { "epoch": 8.394190871369295, "grad_norm": 17.046611785888672, "learning_rate": 8.921161825726142e-06, "log_odds_chosen": 11.655769348144531, "log_odds_ratio": -2.2001067918608896e-05, "logits/chosen": -0.15086019039154053, "logits/rejected": -0.19133687019348145, "logps/chosen": -0.00010712641233112663, "logps/rejected": -2.574110269546509, "loss": 0.528, "nll_loss": 0.13198719918727875, "rewards/accuracies": 1.0, "rewards/chosen": -1.0712641596910544e-05, "rewards/margins": 0.2574003338813782, "rewards/rejected": -0.25741100311279297, "step": 12138 }, { "epoch": 8.394882434301522, "grad_norm": 3.9673593044281006, "learning_rate": 8.917319809435993e-06, "log_odds_chosen": 10.512715339660645, "log_odds_ratio": -6.370164919644594e-05, "logits/chosen": 0.008853770792484283, "logits/rejected": -0.06970393657684326, "logps/chosen": -0.00030971781234256923, "logps/rejected": -2.1695480346679688, "loss": 0.5843, "nll_loss": 0.14605872333049774, "rewards/accuracies": 1.0, "rewards/chosen": -3.097178341704421e-05, "rewards/margins": 0.21692386269569397, "rewards/rejected": -0.2169548124074936, "step": 12139 }, { "epoch": 8.395573997233749, "grad_norm": 4.223468780517578, "learning_rate": 8.913477793145844e-06, "log_odds_chosen": 12.047952651977539, "log_odds_ratio": -1.451267598895356e-05, "logits/chosen": -0.08651173114776611, "logits/rejected": -0.259906530380249, "logps/chosen": -6.24086387688294e-05, "logps/rejected": -2.3703420162200928, "loss": 0.3786, "nll_loss": 0.09465420246124268, "rewards/accuracies": 1.0, "rewards/chosen": -6.240864422579762e-06, "rewards/margins": 0.23702794313430786, "rewards/rejected": -0.23703420162200928, "step": 12140 }, { "epoch": 8.396265560165975, "grad_norm": 4.868855953216553, "learning_rate": 8.909635776855694e-06, "log_odds_chosen": 12.190462112426758, "log_odds_ratio": -1.316713314736262e-05, "logits/chosen": -0.4914645552635193, "logits/rejected": -0.46773529052734375, "logps/chosen": -0.00014875730266794562, "logps/rejected": -3.121089458465576, "loss": 0.4575, "nll_loss": 0.11437132209539413, "rewards/accuracies": 1.0, "rewards/chosen": -1.4875729902996682e-05, "rewards/margins": 0.3120940923690796, "rewards/rejected": -0.31210893392562866, "step": 12141 }, { "epoch": 8.396957123098202, "grad_norm": 4.057255744934082, "learning_rate": 8.905793760565545e-06, "log_odds_chosen": 11.078142166137695, "log_odds_ratio": -0.0001451470161555335, "logits/chosen": -0.03570991009473801, "logits/rejected": -0.21599245071411133, "logps/chosen": -0.00032920308876782656, "logps/rejected": -2.516672134399414, "loss": 0.4433, "nll_loss": 0.11080868542194366, "rewards/accuracies": 1.0, "rewards/chosen": -3.292031033197418e-05, "rewards/margins": 0.25163429975509644, "rewards/rejected": -0.25166723132133484, "step": 12142 }, { "epoch": 8.39764868603043, "grad_norm": 4.454241752624512, "learning_rate": 8.901951744275396e-06, "log_odds_chosen": 10.203397750854492, "log_odds_ratio": -0.00020603620214387774, "logits/chosen": -0.5759111046791077, "logits/rejected": -0.5808447003364563, "logps/chosen": -0.00016467072418890893, "logps/rejected": -1.8405020236968994, "loss": 0.5661, "nll_loss": 0.14150398969650269, "rewards/accuracies": 1.0, "rewards/chosen": -1.6467072782688774e-05, "rewards/margins": 0.18403376638889313, "rewards/rejected": -0.18405021727085114, "step": 12143 }, { "epoch": 8.398340248962656, "grad_norm": 3.6221299171447754, "learning_rate": 8.898109727985248e-06, "log_odds_chosen": 10.22618293762207, "log_odds_ratio": -0.00020153902005404234, "logits/chosen": -0.3695487380027771, "logits/rejected": -0.38564279675483704, "logps/chosen": -0.000372840411728248, "logps/rejected": -1.8671324253082275, "loss": 0.4263, "nll_loss": 0.10655222088098526, "rewards/accuracies": 1.0, "rewards/chosen": -3.728403680725023e-05, "rewards/margins": 0.18667596578598022, "rewards/rejected": -0.18671324849128723, "step": 12144 }, { "epoch": 8.399031811894883, "grad_norm": 4.070497512817383, "learning_rate": 8.894267711695097e-06, "log_odds_chosen": 11.722437858581543, "log_odds_ratio": -6.260615919018164e-05, "logits/chosen": -0.11679200828075409, "logits/rejected": -0.408610463142395, "logps/chosen": -0.0001105478877434507, "logps/rejected": -2.658935070037842, "loss": 0.4122, "nll_loss": 0.1030396968126297, "rewards/accuracies": 1.0, "rewards/chosen": -1.1054788046749309e-05, "rewards/margins": 0.2658824324607849, "rewards/rejected": -0.26589351892471313, "step": 12145 }, { "epoch": 8.39972337482711, "grad_norm": 4.5460205078125, "learning_rate": 8.890425695404948e-06, "log_odds_chosen": 11.706779479980469, "log_odds_ratio": -5.225447239354253e-05, "logits/chosen": -0.19363567233085632, "logits/rejected": -0.23461072146892548, "logps/chosen": -0.00026260357117280364, "logps/rejected": -2.646803855895996, "loss": 0.5108, "nll_loss": 0.1277005672454834, "rewards/accuracies": 1.0, "rewards/chosen": -2.6260357117280364e-05, "rewards/margins": 0.26465412974357605, "rewards/rejected": -0.2646803855895996, "step": 12146 }, { "epoch": 8.400414937759336, "grad_norm": 5.4933929443359375, "learning_rate": 8.8865836791148e-06, "log_odds_chosen": 11.374542236328125, "log_odds_ratio": -5.557585245696828e-05, "logits/chosen": -0.22197853028774261, "logits/rejected": -0.28390344977378845, "logps/chosen": -0.00048701392370276153, "logps/rejected": -3.2011351585388184, "loss": 0.4014, "nll_loss": 0.10034617781639099, "rewards/accuracies": 1.0, "rewards/chosen": -4.8701393097871915e-05, "rewards/margins": 0.32006484270095825, "rewards/rejected": -0.32011356949806213, "step": 12147 }, { "epoch": 8.401106500691563, "grad_norm": 2.9592678546905518, "learning_rate": 8.882741662824651e-06, "log_odds_chosen": 10.759661674499512, "log_odds_ratio": -4.308186180423945e-05, "logits/chosen": -0.39755889773368835, "logits/rejected": -0.39187881350517273, "logps/chosen": -0.00021549042139668018, "logps/rejected": -2.106257200241089, "loss": 0.3276, "nll_loss": 0.08189596235752106, "rewards/accuracies": 1.0, "rewards/chosen": -2.1549041775870137e-05, "rewards/margins": 0.21060419082641602, "rewards/rejected": -0.21062573790550232, "step": 12148 }, { "epoch": 8.40179806362379, "grad_norm": 3.577422857284546, "learning_rate": 8.878899646534502e-06, "log_odds_chosen": 10.447932243347168, "log_odds_ratio": -0.0003077391884289682, "logits/chosen": -0.12919040024280548, "logits/rejected": -0.12657864391803741, "logps/chosen": -0.0006332011544145644, "logps/rejected": -2.045531749725342, "loss": 0.3966, "nll_loss": 0.09911017119884491, "rewards/accuracies": 1.0, "rewards/chosen": -6.332011253107339e-05, "rewards/margins": 0.20448985695838928, "rewards/rejected": -0.20455315709114075, "step": 12149 }, { "epoch": 8.402489626556017, "grad_norm": 8.003704071044922, "learning_rate": 8.875057630244353e-06, "log_odds_chosen": 11.208322525024414, "log_odds_ratio": -3.253010436310433e-05, "logits/chosen": -0.5645467638969421, "logits/rejected": -0.4849031865596771, "logps/chosen": -0.0001778160803951323, "logps/rejected": -2.103116035461426, "loss": 0.4208, "nll_loss": 0.10519391298294067, "rewards/accuracies": 1.0, "rewards/chosen": -1.778160731191747e-05, "rewards/margins": 0.21029382944107056, "rewards/rejected": -0.21031160652637482, "step": 12150 }, { "epoch": 8.403181189488244, "grad_norm": 5.889824867248535, "learning_rate": 8.871215613954203e-06, "log_odds_chosen": 10.591680526733398, "log_odds_ratio": -0.00012240585056133568, "logits/chosen": -0.0922984853386879, "logits/rejected": -0.10059482604265213, "logps/chosen": -0.00026184759917669, "logps/rejected": -1.976510763168335, "loss": 0.5141, "nll_loss": 0.12850183248519897, "rewards/accuracies": 1.0, "rewards/chosen": -2.6184758098679595e-05, "rewards/margins": 0.19762490689754486, "rewards/rejected": -0.19765108823776245, "step": 12151 }, { "epoch": 8.40387275242047, "grad_norm": 4.742496967315674, "learning_rate": 8.867373597664054e-06, "log_odds_chosen": 12.126361846923828, "log_odds_ratio": -1.0117659257957712e-05, "logits/chosen": -0.13543446362018585, "logits/rejected": -0.10724420845508575, "logps/chosen": -0.0001919452624861151, "logps/rejected": -3.2606234550476074, "loss": 0.4523, "nll_loss": 0.11307486891746521, "rewards/accuracies": 1.0, "rewards/chosen": -1.919452552101575e-05, "rewards/margins": 0.32604315876960754, "rewards/rejected": -0.3260623514652252, "step": 12152 }, { "epoch": 8.404564315352697, "grad_norm": 3.5070595741271973, "learning_rate": 8.863531581373907e-06, "log_odds_chosen": 11.187559127807617, "log_odds_ratio": -3.286149149062112e-05, "logits/chosen": -0.2311168909072876, "logits/rejected": -0.33597952127456665, "logps/chosen": -0.0003408733173273504, "logps/rejected": -2.568341016769409, "loss": 0.4203, "nll_loss": 0.10506104677915573, "rewards/accuracies": 1.0, "rewards/chosen": -3.4087330277543515e-05, "rewards/margins": 0.2568000257015228, "rewards/rejected": -0.25683411955833435, "step": 12153 }, { "epoch": 8.405255878284924, "grad_norm": 3.7609317302703857, "learning_rate": 8.859689565083756e-06, "log_odds_chosen": 10.210521697998047, "log_odds_ratio": -0.0002451605396345258, "logits/chosen": -0.42259418964385986, "logits/rejected": -0.5031062960624695, "logps/chosen": -0.0001644161675358191, "logps/rejected": -1.500652551651001, "loss": 0.4255, "nll_loss": 0.10636003315448761, "rewards/accuracies": 1.0, "rewards/chosen": -1.644161602598615e-05, "rewards/margins": 0.15004882216453552, "rewards/rejected": -0.15006527304649353, "step": 12154 }, { "epoch": 8.405947441217151, "grad_norm": 4.2350358963012695, "learning_rate": 8.855847548793606e-06, "log_odds_chosen": 9.83108139038086, "log_odds_ratio": -0.00022007252846378833, "logits/chosen": -0.23014146089553833, "logits/rejected": -0.30746909976005554, "logps/chosen": -0.00015861319843679667, "logps/rejected": -1.3929765224456787, "loss": 0.3984, "nll_loss": 0.09957799315452576, "rewards/accuracies": 1.0, "rewards/chosen": -1.5861318388488144e-05, "rewards/margins": 0.13928177952766418, "rewards/rejected": -0.13929766416549683, "step": 12155 }, { "epoch": 8.406639004149378, "grad_norm": 4.1333160400390625, "learning_rate": 8.852005532503459e-06, "log_odds_chosen": 10.633649826049805, "log_odds_ratio": -0.0001436761813238263, "logits/chosen": -0.21853643655776978, "logits/rejected": -0.16347083449363708, "logps/chosen": -0.00018755270866677165, "logps/rejected": -2.1546311378479004, "loss": 0.468, "nll_loss": 0.1169951781630516, "rewards/accuracies": 1.0, "rewards/chosen": -1.8755272321868688e-05, "rewards/margins": 0.21544435620307922, "rewards/rejected": -0.21546313166618347, "step": 12156 }, { "epoch": 8.407330567081605, "grad_norm": 3.858767509460449, "learning_rate": 8.84816351621331e-06, "log_odds_chosen": 12.503584861755371, "log_odds_ratio": -0.0003335903456900269, "logits/chosen": -0.7343727946281433, "logits/rejected": -0.6697561740875244, "logps/chosen": -0.0002777479530777782, "logps/rejected": -2.963214874267578, "loss": 0.3465, "nll_loss": 0.08658871054649353, "rewards/accuracies": 1.0, "rewards/chosen": -2.7774796762969345e-05, "rewards/margins": 0.296293705701828, "rewards/rejected": -0.29632145166397095, "step": 12157 }, { "epoch": 8.408022130013832, "grad_norm": 4.064150333404541, "learning_rate": 8.84432149992316e-06, "log_odds_chosen": 11.826985359191895, "log_odds_ratio": -4.684683881350793e-05, "logits/chosen": -0.5308201909065247, "logits/rejected": -0.6423518061637878, "logps/chosen": -0.00013899643090553582, "logps/rejected": -2.4273645877838135, "loss": 0.368, "nll_loss": 0.09199005365371704, "rewards/accuracies": 1.0, "rewards/chosen": -1.3899643818149343e-05, "rewards/margins": 0.2427225559949875, "rewards/rejected": -0.24273645877838135, "step": 12158 }, { "epoch": 8.408713692946058, "grad_norm": 3.600189447402954, "learning_rate": 8.840479483633011e-06, "log_odds_chosen": 11.107683181762695, "log_odds_ratio": -9.071611566469073e-05, "logits/chosen": -0.22912006080150604, "logits/rejected": -0.08014730364084244, "logps/chosen": -0.0002447239530738443, "logps/rejected": -2.6281778812408447, "loss": 0.3179, "nll_loss": 0.07946532219648361, "rewards/accuracies": 1.0, "rewards/chosen": -2.4472396034980193e-05, "rewards/margins": 0.26279330253601074, "rewards/rejected": -0.2628178000450134, "step": 12159 }, { "epoch": 8.409405255878285, "grad_norm": 4.881404876708984, "learning_rate": 8.836637467342862e-06, "log_odds_chosen": 11.663158416748047, "log_odds_ratio": -2.5977791665354744e-05, "logits/chosen": -0.43095946311950684, "logits/rejected": -0.5290078520774841, "logps/chosen": -6.659415521426126e-05, "logps/rejected": -2.0811989307403564, "loss": 0.592, "nll_loss": 0.14798519015312195, "rewards/accuracies": 1.0, "rewards/chosen": -6.659415248577716e-06, "rewards/margins": 0.20811323821544647, "rewards/rejected": -0.20811989903450012, "step": 12160 }, { "epoch": 8.410096818810512, "grad_norm": 5.0877861976623535, "learning_rate": 8.832795451052713e-06, "log_odds_chosen": 10.184471130371094, "log_odds_ratio": -0.0008171540684998035, "logits/chosen": 0.12613186240196228, "logits/rejected": 0.050111062824726105, "logps/chosen": -0.000566254137083888, "logps/rejected": -2.0396904945373535, "loss": 0.4631, "nll_loss": 0.11569279432296753, "rewards/accuracies": 1.0, "rewards/chosen": -5.662541661877185e-05, "rewards/margins": 0.20391243696212769, "rewards/rejected": -0.2039690613746643, "step": 12161 }, { "epoch": 8.410788381742739, "grad_norm": 4.5020036697387695, "learning_rate": 8.828953434762565e-06, "log_odds_chosen": 11.530824661254883, "log_odds_ratio": -5.652059189742431e-05, "logits/chosen": -0.2417091429233551, "logits/rejected": -0.24491730332374573, "logps/chosen": -0.0006334069184958935, "logps/rejected": -3.11706805229187, "loss": 0.5545, "nll_loss": 0.13860823214054108, "rewards/accuracies": 1.0, "rewards/chosen": -6.334069621516392e-05, "rewards/margins": 0.31164348125457764, "rewards/rejected": -0.3117068409919739, "step": 12162 }, { "epoch": 8.411479944674966, "grad_norm": 4.6040167808532715, "learning_rate": 8.825111418472414e-06, "log_odds_chosen": 11.503437995910645, "log_odds_ratio": -1.1247922884649597e-05, "logits/chosen": -0.41219836473464966, "logits/rejected": -0.45347335934638977, "logps/chosen": -0.00012556483852677047, "logps/rejected": -2.360114574432373, "loss": 0.4967, "nll_loss": 0.12417368590831757, "rewards/accuracies": 1.0, "rewards/chosen": -1.255648567166645e-05, "rewards/margins": 0.23599891364574432, "rewards/rejected": -0.23601147532463074, "step": 12163 }, { "epoch": 8.412171507607193, "grad_norm": 4.140271186828613, "learning_rate": 8.821269402182265e-06, "log_odds_chosen": 11.344810485839844, "log_odds_ratio": -0.00012324423005338758, "logits/chosen": -0.49369677901268005, "logits/rejected": -0.614727795124054, "logps/chosen": -0.0002669915556907654, "logps/rejected": -2.55715012550354, "loss": 0.32, "nll_loss": 0.07999327778816223, "rewards/accuracies": 1.0, "rewards/chosen": -2.6699155569076538e-05, "rewards/margins": 0.255688339471817, "rewards/rejected": -0.255715012550354, "step": 12164 }, { "epoch": 8.41286307053942, "grad_norm": 4.0589776039123535, "learning_rate": 8.817427385892117e-06, "log_odds_chosen": 11.506391525268555, "log_odds_ratio": -3.021215343324002e-05, "logits/chosen": -0.32307207584381104, "logits/rejected": -0.4154369831085205, "logps/chosen": -7.630437175976112e-05, "logps/rejected": -2.0438153743743896, "loss": 0.4018, "nll_loss": 0.10045389086008072, "rewards/accuracies": 1.0, "rewards/chosen": -7.630436812178232e-06, "rewards/margins": 0.20437388122081757, "rewards/rejected": -0.20438152551651, "step": 12165 }, { "epoch": 8.413554633471646, "grad_norm": 3.8586294651031494, "learning_rate": 8.813585369601968e-06, "log_odds_chosen": 10.577253341674805, "log_odds_ratio": -5.563483136938885e-05, "logits/chosen": 0.056054629385471344, "logits/rejected": -0.09274060279130936, "logps/chosen": -0.0007029641419649124, "logps/rejected": -2.80169415473938, "loss": 0.5042, "nll_loss": 0.12604326009750366, "rewards/accuracies": 1.0, "rewards/chosen": -7.029641710687429e-05, "rewards/margins": 0.28009912371635437, "rewards/rejected": -0.28016942739486694, "step": 12166 }, { "epoch": 8.414246196403873, "grad_norm": 2.5449228286743164, "learning_rate": 8.809743353311819e-06, "log_odds_chosen": 10.405815124511719, "log_odds_ratio": -0.00010505613317945972, "logits/chosen": -0.5005052089691162, "logits/rejected": -0.5660674571990967, "logps/chosen": -0.001792833092622459, "logps/rejected": -1.830446720123291, "loss": 0.2082, "nll_loss": 0.05203032121062279, "rewards/accuracies": 1.0, "rewards/chosen": -0.00017928332090377808, "rewards/margins": 0.18286538124084473, "rewards/rejected": -0.1830446720123291, "step": 12167 }, { "epoch": 8.4149377593361, "grad_norm": 4.84822416305542, "learning_rate": 8.80590133702167e-06, "log_odds_chosen": 11.884775161743164, "log_odds_ratio": -7.349726365646347e-05, "logits/chosen": -0.3754521608352661, "logits/rejected": -0.4655861556529999, "logps/chosen": -0.000315560755552724, "logps/rejected": -3.4204797744750977, "loss": 0.5375, "nll_loss": 0.13436771929264069, "rewards/accuracies": 1.0, "rewards/chosen": -3.155607919325121e-05, "rewards/margins": 0.34201645851135254, "rewards/rejected": -0.3420480191707611, "step": 12168 }, { "epoch": 8.415629322268327, "grad_norm": 4.6386847496032715, "learning_rate": 8.80205932073152e-06, "log_odds_chosen": 10.568868637084961, "log_odds_ratio": -4.5589913497678936e-05, "logits/chosen": 0.06592310965061188, "logits/rejected": 0.012271258980035782, "logps/chosen": -0.0003107009397353977, "logps/rejected": -2.078974485397339, "loss": 0.4368, "nll_loss": 0.10918490588665009, "rewards/accuracies": 1.0, "rewards/chosen": -3.107009251834825e-05, "rewards/margins": 0.207866370677948, "rewards/rejected": -0.20789743959903717, "step": 12169 }, { "epoch": 8.416320885200554, "grad_norm": 3.8307695388793945, "learning_rate": 8.798217304441371e-06, "log_odds_chosen": 11.827062606811523, "log_odds_ratio": -0.00010294328239979222, "logits/chosen": -0.36065948009490967, "logits/rejected": -0.27619484066963196, "logps/chosen": -0.0002090797497658059, "logps/rejected": -2.57730770111084, "loss": 0.4453, "nll_loss": 0.11131422221660614, "rewards/accuracies": 1.0, "rewards/chosen": -2.090797534037847e-05, "rewards/margins": 0.2577098608016968, "rewards/rejected": -0.25773078203201294, "step": 12170 }, { "epoch": 8.41701244813278, "grad_norm": 2.962205648422241, "learning_rate": 8.794375288151224e-06, "log_odds_chosen": 11.268369674682617, "log_odds_ratio": -1.8425016605760902e-05, "logits/chosen": 0.2704715430736542, "logits/rejected": 0.19724391400814056, "logps/chosen": -0.00040048419032245874, "logps/rejected": -2.1504323482513428, "loss": 0.4012, "nll_loss": 0.10029654204845428, "rewards/accuracies": 1.0, "rewards/chosen": -4.00484204874374e-05, "rewards/margins": 0.21500319242477417, "rewards/rejected": -0.21504324674606323, "step": 12171 }, { "epoch": 8.417704011065007, "grad_norm": 3.4812543392181396, "learning_rate": 8.790533271861073e-06, "log_odds_chosen": 11.892035484313965, "log_odds_ratio": -1.1453821571194567e-05, "logits/chosen": -0.08979454636573792, "logits/rejected": -0.12990817427635193, "logps/chosen": -7.508957060053945e-05, "logps/rejected": -2.176224708557129, "loss": 0.484, "nll_loss": 0.12100017070770264, "rewards/accuracies": 1.0, "rewards/chosen": -7.508956969104474e-06, "rewards/margins": 0.21761493384838104, "rewards/rejected": -0.21762245893478394, "step": 12172 }, { "epoch": 8.418395573997234, "grad_norm": 3.4574899673461914, "learning_rate": 8.786691255570923e-06, "log_odds_chosen": 10.994365692138672, "log_odds_ratio": -4.289807839086279e-05, "logits/chosen": -0.12744206190109253, "logits/rejected": -0.32379990816116333, "logps/chosen": -0.0004646638408303261, "logps/rejected": -2.8650026321411133, "loss": 0.3915, "nll_loss": 0.09787625074386597, "rewards/accuracies": 1.0, "rewards/chosen": -4.6466382627841085e-05, "rewards/margins": 0.2864537835121155, "rewards/rejected": -0.2865002751350403, "step": 12173 }, { "epoch": 8.41908713692946, "grad_norm": 3.1096792221069336, "learning_rate": 8.782849239280774e-06, "log_odds_chosen": 11.081042289733887, "log_odds_ratio": -3.763330460060388e-05, "logits/chosen": -0.5980318784713745, "logits/rejected": -0.5375435948371887, "logps/chosen": -0.00017640799342188984, "logps/rejected": -1.9923516511917114, "loss": 0.3995, "nll_loss": 0.09987182170152664, "rewards/accuracies": 1.0, "rewards/chosen": -1.7640801161178388e-05, "rewards/margins": 0.1992175281047821, "rewards/rejected": -0.19923515617847443, "step": 12174 }, { "epoch": 8.419778699861688, "grad_norm": 3.380220890045166, "learning_rate": 8.779007222990627e-06, "log_odds_chosen": 11.464609146118164, "log_odds_ratio": -2.2556834665010683e-05, "logits/chosen": -0.016593724489212036, "logits/rejected": -0.14954808354377747, "logps/chosen": -0.00011275989527348429, "logps/rejected": -2.324753999710083, "loss": 0.3875, "nll_loss": 0.09687241911888123, "rewards/accuracies": 1.0, "rewards/chosen": -1.1275988072156906e-05, "rewards/margins": 0.23246413469314575, "rewards/rejected": -0.2324753999710083, "step": 12175 }, { "epoch": 8.420470262793915, "grad_norm": 5.193109512329102, "learning_rate": 8.775165206700477e-06, "log_odds_chosen": 9.941858291625977, "log_odds_ratio": -0.00018944896874018013, "logits/chosen": 0.13896964490413666, "logits/rejected": -0.0418696403503418, "logps/chosen": -0.0012561215553432703, "logps/rejected": -2.5737171173095703, "loss": 0.6994, "nll_loss": 0.17483317852020264, "rewards/accuracies": 1.0, "rewards/chosen": -0.00012561216135509312, "rewards/margins": 0.25724610686302185, "rewards/rejected": -0.257371723651886, "step": 12176 }, { "epoch": 8.421161825726141, "grad_norm": 7.516174793243408, "learning_rate": 8.771323190410326e-06, "log_odds_chosen": 12.087610244750977, "log_odds_ratio": -1.127936047851108e-05, "logits/chosen": 0.2538374364376068, "logits/rejected": 0.08998441696166992, "logps/chosen": -0.00012017204426229, "logps/rejected": -2.966963529586792, "loss": 0.6934, "nll_loss": 0.1733420491218567, "rewards/accuracies": 1.0, "rewards/chosen": -1.2017204426229e-05, "rewards/margins": 0.2966843247413635, "rewards/rejected": -0.29669636487960815, "step": 12177 }, { "epoch": 8.421853388658368, "grad_norm": 4.537851810455322, "learning_rate": 8.767481174120179e-06, "log_odds_chosen": 12.000404357910156, "log_odds_ratio": -2.539575143600814e-05, "logits/chosen": -0.07242706418037415, "logits/rejected": -0.17881931364536285, "logps/chosen": -0.0002449548337608576, "logps/rejected": -3.37038254737854, "loss": 0.4343, "nll_loss": 0.10857339203357697, "rewards/accuracies": 1.0, "rewards/chosen": -2.44954844674794e-05, "rewards/margins": 0.33701375126838684, "rewards/rejected": -0.3370382785797119, "step": 12178 }, { "epoch": 8.422544951590595, "grad_norm": 5.332724571228027, "learning_rate": 8.76363915783003e-06, "log_odds_chosen": 11.437095642089844, "log_odds_ratio": -3.303804624010809e-05, "logits/chosen": 0.06925775110721588, "logits/rejected": -0.05285045504570007, "logps/chosen": -0.0002176310954382643, "logps/rejected": -2.8794593811035156, "loss": 0.4062, "nll_loss": 0.10154794156551361, "rewards/accuracies": 1.0, "rewards/chosen": -2.176310954382643e-05, "rewards/margins": 0.2879241704940796, "rewards/rejected": -0.2879459261894226, "step": 12179 }, { "epoch": 8.423236514522822, "grad_norm": 3.739887237548828, "learning_rate": 8.75979714153988e-06, "log_odds_chosen": 10.907541275024414, "log_odds_ratio": -0.00019406666979193687, "logits/chosen": 0.007940517738461494, "logits/rejected": -0.0989658460021019, "logps/chosen": -0.0011361661599949002, "logps/rejected": -2.5060086250305176, "loss": 0.4233, "nll_loss": 0.1058058887720108, "rewards/accuracies": 1.0, "rewards/chosen": -0.00011361661017872393, "rewards/margins": 0.2504872679710388, "rewards/rejected": -0.2506008744239807, "step": 12180 }, { "epoch": 8.423928077455049, "grad_norm": 4.398841857910156, "learning_rate": 8.755955125249731e-06, "log_odds_chosen": 11.400894165039062, "log_odds_ratio": -0.00013864760694559664, "logits/chosen": -0.18514317274093628, "logits/rejected": -0.3388518691062927, "logps/chosen": -0.0001667686883592978, "logps/rejected": -2.3160228729248047, "loss": 0.4045, "nll_loss": 0.10111506283283234, "rewards/accuracies": 1.0, "rewards/chosen": -1.6676869563525543e-05, "rewards/margins": 0.23158562183380127, "rewards/rejected": -0.23160231113433838, "step": 12181 }, { "epoch": 8.424619640387276, "grad_norm": 3.549445629119873, "learning_rate": 8.752113108959582e-06, "log_odds_chosen": 11.261871337890625, "log_odds_ratio": -8.981912833405659e-05, "logits/chosen": 0.05491316318511963, "logits/rejected": 0.14074671268463135, "logps/chosen": -0.0008438285440206528, "logps/rejected": -2.6764464378356934, "loss": 0.3137, "nll_loss": 0.07841696590185165, "rewards/accuracies": 1.0, "rewards/chosen": -8.438285294687375e-05, "rewards/margins": 0.26756027340888977, "rewards/rejected": -0.26764464378356934, "step": 12182 }, { "epoch": 8.425311203319502, "grad_norm": 4.180157661437988, "learning_rate": 8.748271092669432e-06, "log_odds_chosen": 11.801605224609375, "log_odds_ratio": -1.6300808056257665e-05, "logits/chosen": -0.31147193908691406, "logits/rejected": -0.3165523111820221, "logps/chosen": -0.0001688749180175364, "logps/rejected": -2.610840320587158, "loss": 0.3341, "nll_loss": 0.08351986855268478, "rewards/accuracies": 1.0, "rewards/chosen": -1.6887490346562117e-05, "rewards/margins": 0.26106715202331543, "rewards/rejected": -0.26108402013778687, "step": 12183 }, { "epoch": 8.42600276625173, "grad_norm": 3.374314069747925, "learning_rate": 8.744429076379285e-06, "log_odds_chosen": 11.43653392791748, "log_odds_ratio": -1.3258251783554442e-05, "logits/chosen": 0.20424631237983704, "logits/rejected": 0.13409529626369476, "logps/chosen": -6.864860915811732e-05, "logps/rejected": -1.869539499282837, "loss": 0.3789, "nll_loss": 0.09472458064556122, "rewards/accuracies": 1.0, "rewards/chosen": -6.864860552013852e-06, "rewards/margins": 0.1869470775127411, "rewards/rejected": -0.18695393204689026, "step": 12184 }, { "epoch": 8.426694329183956, "grad_norm": 5.52797269821167, "learning_rate": 8.740587060089136e-06, "log_odds_chosen": 12.257134437561035, "log_odds_ratio": -4.6418386773439124e-05, "logits/chosen": 0.3385429084300995, "logits/rejected": 0.2501536011695862, "logps/chosen": -0.0001583172706887126, "logps/rejected": -3.3235623836517334, "loss": 0.6013, "nll_loss": 0.15032291412353516, "rewards/accuracies": 1.0, "rewards/chosen": -1.5831728887860663e-05, "rewards/margins": 0.33234041929244995, "rewards/rejected": -0.3323562443256378, "step": 12185 }, { "epoch": 8.427385892116183, "grad_norm": 3.664227247238159, "learning_rate": 8.736745043798986e-06, "log_odds_chosen": 10.88051700592041, "log_odds_ratio": -4.372180410427973e-05, "logits/chosen": -0.19316840171813965, "logits/rejected": -0.2230408787727356, "logps/chosen": -0.00013133355241734535, "logps/rejected": -1.4515959024429321, "loss": 0.3532, "nll_loss": 0.08829930424690247, "rewards/accuracies": 1.0, "rewards/chosen": -1.3133354514138773e-05, "rewards/margins": 0.1451464593410492, "rewards/rejected": -0.14515958726406097, "step": 12186 }, { "epoch": 8.42807745504841, "grad_norm": 5.888396263122559, "learning_rate": 8.732903027508837e-06, "log_odds_chosen": 11.336963653564453, "log_odds_ratio": -2.995860813825857e-05, "logits/chosen": -0.3141791522502899, "logits/rejected": -0.24508805572986603, "logps/chosen": -0.0001898179471027106, "logps/rejected": -2.411524772644043, "loss": 0.5331, "nll_loss": 0.13326820731163025, "rewards/accuracies": 1.0, "rewards/chosen": -1.898179471027106e-05, "rewards/margins": 0.24113348126411438, "rewards/rejected": -0.24115246534347534, "step": 12187 }, { "epoch": 8.428769017980636, "grad_norm": 4.525623321533203, "learning_rate": 8.729061011218688e-06, "log_odds_chosen": 11.285069465637207, "log_odds_ratio": -2.8658303563133813e-05, "logits/chosen": -0.325903058052063, "logits/rejected": -0.3379442095756531, "logps/chosen": -0.0001579608942847699, "logps/rejected": -2.2226176261901855, "loss": 0.5298, "nll_loss": 0.13244321942329407, "rewards/accuracies": 1.0, "rewards/chosen": -1.579608942847699e-05, "rewards/margins": 0.2222459614276886, "rewards/rejected": -0.22226177155971527, "step": 12188 }, { "epoch": 8.429460580912863, "grad_norm": 2.9861581325531006, "learning_rate": 8.725218994928539e-06, "log_odds_chosen": 11.143623352050781, "log_odds_ratio": -0.00014747746172361076, "logits/chosen": -0.3942258059978485, "logits/rejected": -0.37120938301086426, "logps/chosen": -0.00024196562299039215, "logps/rejected": -2.6612982749938965, "loss": 0.349, "nll_loss": 0.08722300827503204, "rewards/accuracies": 1.0, "rewards/chosen": -2.4196562662837096e-05, "rewards/margins": 0.26610565185546875, "rewards/rejected": -0.26612985134124756, "step": 12189 }, { "epoch": 8.43015214384509, "grad_norm": 4.748229503631592, "learning_rate": 8.721376978638391e-06, "log_odds_chosen": 10.968669891357422, "log_odds_ratio": -7.720799476373941e-05, "logits/chosen": 0.15589505434036255, "logits/rejected": 0.08062975853681564, "logps/chosen": -0.00024540620506741107, "logps/rejected": -2.3836829662323, "loss": 0.5417, "nll_loss": 0.13542327284812927, "rewards/accuracies": 1.0, "rewards/chosen": -2.4540620870538987e-05, "rewards/margins": 0.238343745470047, "rewards/rejected": -0.23836830258369446, "step": 12190 }, { "epoch": 8.430843706777317, "grad_norm": 4.7469377517700195, "learning_rate": 8.71753496234824e-06, "log_odds_chosen": 10.221165657043457, "log_odds_ratio": -0.00020709558157250285, "logits/chosen": -0.17670588195323944, "logits/rejected": -0.38517507910728455, "logps/chosen": -0.001517834491096437, "logps/rejected": -2.492877244949341, "loss": 0.3678, "nll_loss": 0.09192745387554169, "rewards/accuracies": 1.0, "rewards/chosen": -0.00015178346075117588, "rewards/margins": 0.24913595616817474, "rewards/rejected": -0.24928775429725647, "step": 12191 }, { "epoch": 8.431535269709544, "grad_norm": 11.059328079223633, "learning_rate": 8.713692946058091e-06, "log_odds_chosen": 11.756535530090332, "log_odds_ratio": -7.888920663390309e-05, "logits/chosen": 0.21010424196720123, "logits/rejected": 0.19032058119773865, "logps/chosen": -0.0002800583897624165, "logps/rejected": -3.1366214752197266, "loss": 0.6249, "nll_loss": 0.15621690452098846, "rewards/accuracies": 1.0, "rewards/chosen": -2.8005841159028932e-05, "rewards/margins": 0.31363415718078613, "rewards/rejected": -0.31366217136383057, "step": 12192 }, { "epoch": 8.43222683264177, "grad_norm": 3.9881293773651123, "learning_rate": 8.709850929767943e-06, "log_odds_chosen": 10.353635787963867, "log_odds_ratio": -0.0001635959924897179, "logits/chosen": -0.20906057953834534, "logits/rejected": -0.1871834546327591, "logps/chosen": -0.00031968281837180257, "logps/rejected": -1.9287995100021362, "loss": 0.44, "nll_loss": 0.10998797416687012, "rewards/accuracies": 1.0, "rewards/chosen": -3.196828401996754e-05, "rewards/margins": 0.19284798204898834, "rewards/rejected": -0.19287994503974915, "step": 12193 }, { "epoch": 8.432918395573997, "grad_norm": 8.100662231445312, "learning_rate": 8.706008913477794e-06, "log_odds_chosen": 11.065887451171875, "log_odds_ratio": -6.525550998048857e-05, "logits/chosen": 0.10948903858661652, "logits/rejected": 0.01994427479803562, "logps/chosen": -0.0005141894216649234, "logps/rejected": -2.825793504714966, "loss": 0.4932, "nll_loss": 0.12329844385385513, "rewards/accuracies": 1.0, "rewards/chosen": -5.1418937800917774e-05, "rewards/margins": 0.2825279235839844, "rewards/rejected": -0.28257936239242554, "step": 12194 }, { "epoch": 8.433609958506224, "grad_norm": 4.43437385559082, "learning_rate": 8.702166897187645e-06, "log_odds_chosen": 12.670177459716797, "log_odds_ratio": -9.009807399706915e-06, "logits/chosen": -0.007829040288925171, "logits/rejected": -0.13075649738311768, "logps/chosen": -0.00011624552280409262, "logps/rejected": -3.3971920013427734, "loss": 0.6189, "nll_loss": 0.15472808480262756, "rewards/accuracies": 1.0, "rewards/chosen": -1.1624552826106083e-05, "rewards/margins": 0.339707612991333, "rewards/rejected": -0.3397192358970642, "step": 12195 }, { "epoch": 8.434301521438451, "grad_norm": 4.17750358581543, "learning_rate": 8.698324880897496e-06, "log_odds_chosen": 10.271980285644531, "log_odds_ratio": -0.00017558068793732673, "logits/chosen": -0.05984572321176529, "logits/rejected": -0.07835595309734344, "logps/chosen": -0.00019501293718349189, "logps/rejected": -1.7530505657196045, "loss": 0.4146, "nll_loss": 0.10362517088651657, "rewards/accuracies": 1.0, "rewards/chosen": -1.950129444594495e-05, "rewards/margins": 0.17528554797172546, "rewards/rejected": -0.1753050684928894, "step": 12196 }, { "epoch": 8.434993084370678, "grad_norm": 2.6712281703948975, "learning_rate": 8.694482864607346e-06, "log_odds_chosen": 11.466583251953125, "log_odds_ratio": -4.071402145200409e-05, "logits/chosen": -0.17353668808937073, "logits/rejected": -0.24364444613456726, "logps/chosen": -0.00044437983888201416, "logps/rejected": -2.8944315910339355, "loss": 0.2417, "nll_loss": 0.06041009724140167, "rewards/accuracies": 1.0, "rewards/chosen": -4.4437983888201416e-05, "rewards/margins": 0.289398729801178, "rewards/rejected": -0.28944316506385803, "step": 12197 }, { "epoch": 8.435684647302905, "grad_norm": 5.480512619018555, "learning_rate": 8.690640848317197e-06, "log_odds_chosen": 11.018938064575195, "log_odds_ratio": -7.402783376164734e-05, "logits/chosen": -0.02111491560935974, "logits/rejected": -0.09096451848745346, "logps/chosen": -0.0005470228497870266, "logps/rejected": -2.91145658493042, "loss": 0.4897, "nll_loss": 0.12242485582828522, "rewards/accuracies": 1.0, "rewards/chosen": -5.4702290071872994e-05, "rewards/margins": 0.2910909950733185, "rewards/rejected": -0.2911456823348999, "step": 12198 }, { "epoch": 8.436376210235132, "grad_norm": 3.627214193344116, "learning_rate": 8.68679883202705e-06, "log_odds_chosen": 9.854841232299805, "log_odds_ratio": -0.0004699954588431865, "logits/chosen": -0.14234337210655212, "logits/rejected": -0.1919478327035904, "logps/chosen": -0.0007636388181708753, "logps/rejected": -1.9873713254928589, "loss": 0.354, "nll_loss": 0.08845454454421997, "rewards/accuracies": 1.0, "rewards/chosen": -7.636388181708753e-05, "rewards/margins": 0.19866077601909637, "rewards/rejected": -0.19873715937137604, "step": 12199 }, { "epoch": 8.437067773167358, "grad_norm": 3.840066432952881, "learning_rate": 8.682956815736899e-06, "log_odds_chosen": 10.535361289978027, "log_odds_ratio": -9.704182593850419e-05, "logits/chosen": -0.348332941532135, "logits/rejected": -0.3617256283760071, "logps/chosen": -0.0003035268746316433, "logps/rejected": -1.9288250207901, "loss": 0.4512, "nll_loss": 0.1127912625670433, "rewards/accuracies": 1.0, "rewards/chosen": -3.035268855455797e-05, "rewards/margins": 0.19285213947296143, "rewards/rejected": -0.1928825080394745, "step": 12200 }, { "epoch": 8.437759336099585, "grad_norm": 5.707167148590088, "learning_rate": 8.67911479944675e-06, "log_odds_chosen": 10.545770645141602, "log_odds_ratio": -6.985733489273116e-05, "logits/chosen": 0.03148500621318817, "logits/rejected": 0.056927334517240524, "logps/chosen": -0.0003280769451521337, "logps/rejected": -2.1979610919952393, "loss": 0.3663, "nll_loss": 0.0915786474943161, "rewards/accuracies": 1.0, "rewards/chosen": -3.280769306002185e-05, "rewards/margins": 0.21976329386234283, "rewards/rejected": -0.2197960913181305, "step": 12201 }, { "epoch": 8.438450899031812, "grad_norm": 4.823429584503174, "learning_rate": 8.675272783156602e-06, "log_odds_chosen": 11.148094177246094, "log_odds_ratio": -0.00015547266229987144, "logits/chosen": 0.022853679955005646, "logits/rejected": -0.06651067733764648, "logps/chosen": -0.0002881488180719316, "logps/rejected": -2.530770778656006, "loss": 0.3408, "nll_loss": 0.08519028127193451, "rewards/accuracies": 1.0, "rewards/chosen": -2.881488217099104e-05, "rewards/margins": 0.25304824113845825, "rewards/rejected": -0.25307705998420715, "step": 12202 }, { "epoch": 8.439142461964039, "grad_norm": 5.328279495239258, "learning_rate": 8.671430766866453e-06, "log_odds_chosen": 10.745915412902832, "log_odds_ratio": -6.916501297382638e-05, "logits/chosen": 0.02286948636174202, "logits/rejected": -0.07234127074480057, "logps/chosen": -0.00023049935407470912, "logps/rejected": -2.2224698066711426, "loss": 0.3664, "nll_loss": 0.0915989875793457, "rewards/accuracies": 1.0, "rewards/chosen": -2.3049935407470912e-05, "rewards/margins": 0.2222239375114441, "rewards/rejected": -0.22224700450897217, "step": 12203 }, { "epoch": 8.439834024896266, "grad_norm": 4.388050556182861, "learning_rate": 8.667588750576303e-06, "log_odds_chosen": 11.873761177062988, "log_odds_ratio": -9.016584954224527e-06, "logits/chosen": -0.25353333353996277, "logits/rejected": -0.5208718776702881, "logps/chosen": -0.0001195125951198861, "logps/rejected": -2.624521017074585, "loss": 0.5946, "nll_loss": 0.14865511655807495, "rewards/accuracies": 1.0, "rewards/chosen": -1.1951260603382252e-05, "rewards/margins": 0.26244014501571655, "rewards/rejected": -0.26245206594467163, "step": 12204 }, { "epoch": 8.440525587828493, "grad_norm": 3.7123875617980957, "learning_rate": 8.663746734286154e-06, "log_odds_chosen": 10.771041870117188, "log_odds_ratio": -6.33986564935185e-05, "logits/chosen": -0.1201104074716568, "logits/rejected": -0.16946665942668915, "logps/chosen": -0.000875930767506361, "logps/rejected": -2.485856771469116, "loss": 0.4159, "nll_loss": 0.10397932678461075, "rewards/accuracies": 1.0, "rewards/chosen": -8.759308548178524e-05, "rewards/margins": 0.2484980821609497, "rewards/rejected": -0.24858567118644714, "step": 12205 }, { "epoch": 8.44121715076072, "grad_norm": 2.749152660369873, "learning_rate": 8.659904717996005e-06, "log_odds_chosen": 11.550658226013184, "log_odds_ratio": -3.068416481255554e-05, "logits/chosen": -0.594039261341095, "logits/rejected": -0.49890458583831787, "logps/chosen": -0.0001349577505607158, "logps/rejected": -2.299560308456421, "loss": 0.4535, "nll_loss": 0.11336538195610046, "rewards/accuracies": 1.0, "rewards/chosen": -1.349577541986946e-05, "rewards/margins": 0.22994254529476166, "rewards/rejected": -0.2299560308456421, "step": 12206 }, { "epoch": 8.441908713692946, "grad_norm": 7.287164211273193, "learning_rate": 8.656062701705856e-06, "log_odds_chosen": 11.473183631896973, "log_odds_ratio": -1.6418533050455153e-05, "logits/chosen": -0.10909983515739441, "logits/rejected": -0.20523229241371155, "logps/chosen": -0.0001011871499940753, "logps/rejected": -2.2168242931365967, "loss": 0.6241, "nll_loss": 0.15602414309978485, "rewards/accuracies": 1.0, "rewards/chosen": -1.011871518130647e-05, "rewards/margins": 0.22167231142520905, "rewards/rejected": -0.22168241441249847, "step": 12207 }, { "epoch": 8.442600276625173, "grad_norm": 3.5397424697875977, "learning_rate": 8.652220685415706e-06, "log_odds_chosen": 9.55927848815918, "log_odds_ratio": -0.0002645787608344108, "logits/chosen": -0.10018275678157806, "logits/rejected": -0.09783291816711426, "logps/chosen": -0.00026651995722204447, "logps/rejected": -1.4151195287704468, "loss": 0.2867, "nll_loss": 0.07166013866662979, "rewards/accuracies": 1.0, "rewards/chosen": -2.6651994630810805e-05, "rewards/margins": 0.1414853036403656, "rewards/rejected": -0.1415119469165802, "step": 12208 }, { "epoch": 8.4432918395574, "grad_norm": 3.645803451538086, "learning_rate": 8.648378669125557e-06, "log_odds_chosen": 10.460966110229492, "log_odds_ratio": -0.0001973931648535654, "logits/chosen": -0.31744301319122314, "logits/rejected": -0.2794133424758911, "logps/chosen": -0.00026282010367140174, "logps/rejected": -1.6829901933670044, "loss": 0.4669, "nll_loss": 0.11669819056987762, "rewards/accuracies": 1.0, "rewards/chosen": -2.6282012186129577e-05, "rewards/margins": 0.1682727336883545, "rewards/rejected": -0.16829901933670044, "step": 12209 }, { "epoch": 8.443983402489627, "grad_norm": 6.920041084289551, "learning_rate": 8.644536652835408e-06, "log_odds_chosen": 10.067420959472656, "log_odds_ratio": -0.0001373344857711345, "logits/chosen": -0.1907285749912262, "logits/rejected": -0.2630813419818878, "logps/chosen": -0.00036663428181782365, "logps/rejected": -1.7894313335418701, "loss": 0.4426, "nll_loss": 0.11062738299369812, "rewards/accuracies": 1.0, "rewards/chosen": -3.666342672659084e-05, "rewards/margins": 0.17890647053718567, "rewards/rejected": -0.17894312739372253, "step": 12210 }, { "epoch": 8.444674965421854, "grad_norm": 3.9832401275634766, "learning_rate": 8.640694636545259e-06, "log_odds_chosen": 12.233833312988281, "log_odds_ratio": -9.003819286590442e-06, "logits/chosen": -0.12104113399982452, "logits/rejected": -0.3305937647819519, "logps/chosen": -9.159260662272573e-05, "logps/rejected": -2.7944068908691406, "loss": 0.3574, "nll_loss": 0.0893465206027031, "rewards/accuracies": 1.0, "rewards/chosen": -9.159261026070453e-06, "rewards/margins": 0.2794315218925476, "rewards/rejected": -0.27944067120552063, "step": 12211 }, { "epoch": 8.44536652835408, "grad_norm": 4.182587623596191, "learning_rate": 8.636852620255111e-06, "log_odds_chosen": 11.980340003967285, "log_odds_ratio": -1.8716213162406348e-05, "logits/chosen": -0.16231654584407806, "logits/rejected": -0.24108725786209106, "logps/chosen": -6.173996371217072e-05, "logps/rejected": -2.3546369075775146, "loss": 0.2665, "nll_loss": 0.06663516908884048, "rewards/accuracies": 1.0, "rewards/chosen": -6.173996553116012e-06, "rewards/margins": 0.23545750975608826, "rewards/rejected": -0.2354636788368225, "step": 12212 }, { "epoch": 8.446058091286307, "grad_norm": 5.585501670837402, "learning_rate": 8.633010603964962e-06, "log_odds_chosen": 12.061046600341797, "log_odds_ratio": -4.808824087376706e-05, "logits/chosen": 0.07987240701913834, "logits/rejected": 0.04905199632048607, "logps/chosen": -0.000453970511443913, "logps/rejected": -3.1471643447875977, "loss": 0.4296, "nll_loss": 0.10740132629871368, "rewards/accuracies": 1.0, "rewards/chosen": -4.5397049689199775e-05, "rewards/margins": 0.31467103958129883, "rewards/rejected": -0.3147164583206177, "step": 12213 }, { "epoch": 8.446749654218534, "grad_norm": 3.547139883041382, "learning_rate": 8.62916858767481e-06, "log_odds_chosen": 11.334451675415039, "log_odds_ratio": -0.0001582876720931381, "logits/chosen": -0.045442111790180206, "logits/rejected": -0.13509945571422577, "logps/chosen": -0.00022221980907488614, "logps/rejected": -2.3844339847564697, "loss": 0.4707, "nll_loss": 0.11766263842582703, "rewards/accuracies": 1.0, "rewards/chosen": -2.2221982362680137e-05, "rewards/margins": 0.23842118680477142, "rewards/rejected": -0.23844340443611145, "step": 12214 }, { "epoch": 8.447441217150761, "grad_norm": 3.663975477218628, "learning_rate": 8.625326571384663e-06, "log_odds_chosen": 10.991447448730469, "log_odds_ratio": -0.00013837986625730991, "logits/chosen": 0.10032883286476135, "logits/rejected": 0.15407739579677582, "logps/chosen": -0.0005728535470552742, "logps/rejected": -2.9996249675750732, "loss": 0.8703, "nll_loss": 0.2175583392381668, "rewards/accuracies": 1.0, "rewards/chosen": -5.72853532503359e-05, "rewards/margins": 0.2999052405357361, "rewards/rejected": -0.2999625504016876, "step": 12215 }, { "epoch": 8.448132780082988, "grad_norm": 4.488317012786865, "learning_rate": 8.621484555094514e-06, "log_odds_chosen": 11.125843048095703, "log_odds_ratio": -0.00010230368934571743, "logits/chosen": -0.05891960486769676, "logits/rejected": -0.1504005342721939, "logps/chosen": -0.0002756164758466184, "logps/rejected": -2.4272990226745605, "loss": 0.5428, "nll_loss": 0.135688915848732, "rewards/accuracies": 1.0, "rewards/chosen": -2.756165122264065e-05, "rewards/margins": 0.24270235002040863, "rewards/rejected": -0.24272991716861725, "step": 12216 }, { "epoch": 8.448824343015215, "grad_norm": 4.28665018081665, "learning_rate": 8.617642538804365e-06, "log_odds_chosen": 10.961759567260742, "log_odds_ratio": -6.393397052306682e-05, "logits/chosen": -0.5306499004364014, "logits/rejected": -0.4578840136528015, "logps/chosen": -0.00016053387662395835, "logps/rejected": -1.6820318698883057, "loss": 0.4358, "nll_loss": 0.10893905162811279, "rewards/accuracies": 1.0, "rewards/chosen": -1.6053387298597954e-05, "rewards/margins": 0.16818714141845703, "rewards/rejected": -0.168203204870224, "step": 12217 }, { "epoch": 8.449515905947441, "grad_norm": 4.335683345794678, "learning_rate": 8.613800522514215e-06, "log_odds_chosen": 10.537773132324219, "log_odds_ratio": -0.00010391612886451185, "logits/chosen": -0.15171320736408234, "logits/rejected": -0.08037220686674118, "logps/chosen": -0.000234838473261334, "logps/rejected": -2.0352041721343994, "loss": 0.576, "nll_loss": 0.14398017525672913, "rewards/accuracies": 1.0, "rewards/chosen": -2.3483846234739758e-05, "rewards/margins": 0.20349694788455963, "rewards/rejected": -0.20352043211460114, "step": 12218 }, { "epoch": 8.450207468879668, "grad_norm": 2.9263315200805664, "learning_rate": 8.609958506224066e-06, "log_odds_chosen": 10.841743469238281, "log_odds_ratio": -3.680678128148429e-05, "logits/chosen": -0.35723212361335754, "logits/rejected": -0.3755517899990082, "logps/chosen": -0.00010235553054371849, "logps/rejected": -1.8037569522857666, "loss": 0.3004, "nll_loss": 0.07508653402328491, "rewards/accuracies": 1.0, "rewards/chosen": -1.023555341816973e-05, "rewards/margins": 0.18036547303199768, "rewards/rejected": -0.18037571012973785, "step": 12219 }, { "epoch": 8.450899031811895, "grad_norm": 3.9375154972076416, "learning_rate": 8.606116489933917e-06, "log_odds_chosen": 11.44171142578125, "log_odds_ratio": -2.285877963004168e-05, "logits/chosen": -0.1019626185297966, "logits/rejected": -0.05697731673717499, "logps/chosen": -0.00022979540517553687, "logps/rejected": -2.6647655963897705, "loss": 0.3392, "nll_loss": 0.08479012548923492, "rewards/accuracies": 1.0, "rewards/chosen": -2.2979538698564284e-05, "rewards/margins": 0.2664535641670227, "rewards/rejected": -0.2664765417575836, "step": 12220 }, { "epoch": 8.451590594744122, "grad_norm": 2.777099370956421, "learning_rate": 8.60227447364377e-06, "log_odds_chosen": 10.597506523132324, "log_odds_ratio": -0.0005935538210906088, "logits/chosen": -0.14212588965892792, "logits/rejected": -0.21859464049339294, "logps/chosen": -0.00168894964735955, "logps/rejected": -2.2412731647491455, "loss": 0.3362, "nll_loss": 0.0839921236038208, "rewards/accuracies": 1.0, "rewards/chosen": -0.0001688949705567211, "rewards/margins": 0.22395843267440796, "rewards/rejected": -0.22412732243537903, "step": 12221 }, { "epoch": 8.452282157676349, "grad_norm": 4.140081405639648, "learning_rate": 8.59843245735362e-06, "log_odds_chosen": 10.09188175201416, "log_odds_ratio": -0.00014964811271056533, "logits/chosen": -0.27937549352645874, "logits/rejected": -0.32379117608070374, "logps/chosen": -0.0002269076940137893, "logps/rejected": -1.7309942245483398, "loss": 0.3503, "nll_loss": 0.08755438029766083, "rewards/accuracies": 1.0, "rewards/chosen": -2.2690768673783168e-05, "rewards/margins": 0.17307671904563904, "rewards/rejected": -0.17309942841529846, "step": 12222 }, { "epoch": 8.452973720608576, "grad_norm": 3.208634376525879, "learning_rate": 8.59459044106347e-06, "log_odds_chosen": 10.598793029785156, "log_odds_ratio": -0.0008950430783443153, "logits/chosen": -0.35770896077156067, "logits/rejected": -0.44755396246910095, "logps/chosen": -0.00039982280577532947, "logps/rejected": -1.6339364051818848, "loss": 0.3351, "nll_loss": 0.08368603885173798, "rewards/accuracies": 1.0, "rewards/chosen": -3.998228203272447e-05, "rewards/margins": 0.16335365176200867, "rewards/rejected": -0.16339364647865295, "step": 12223 }, { "epoch": 8.453665283540802, "grad_norm": 5.284857749938965, "learning_rate": 8.590748424773322e-06, "log_odds_chosen": 11.325607299804688, "log_odds_ratio": -8.094254735624418e-05, "logits/chosen": -0.6272037029266357, "logits/rejected": -0.5712891817092896, "logps/chosen": -0.00017863322864286602, "logps/rejected": -2.5380496978759766, "loss": 0.408, "nll_loss": 0.10198329389095306, "rewards/accuracies": 1.0, "rewards/chosen": -1.7863323591882363e-05, "rewards/margins": 0.253787100315094, "rewards/rejected": -0.2538049519062042, "step": 12224 }, { "epoch": 8.45435684647303, "grad_norm": 7.783411979675293, "learning_rate": 8.586906408483172e-06, "log_odds_chosen": 10.21047306060791, "log_odds_ratio": -7.494394958484918e-05, "logits/chosen": -0.2875575125217438, "logits/rejected": -0.3262416124343872, "logps/chosen": -0.0003216335317119956, "logps/rejected": -2.103597402572632, "loss": 0.3015, "nll_loss": 0.07537385821342468, "rewards/accuracies": 1.0, "rewards/chosen": -3.2163356081582606e-05, "rewards/margins": 0.21032759547233582, "rewards/rejected": -0.21035973727703094, "step": 12225 }, { "epoch": 8.455048409405256, "grad_norm": 3.302433729171753, "learning_rate": 8.583064392193023e-06, "log_odds_chosen": 10.463157653808594, "log_odds_ratio": -0.00010431169357616454, "logits/chosen": -0.1801649034023285, "logits/rejected": -0.28582602739334106, "logps/chosen": -0.00015797067317180336, "logps/rejected": -1.7762954235076904, "loss": 0.3563, "nll_loss": 0.08906436711549759, "rewards/accuracies": 1.0, "rewards/chosen": -1.5797068044776097e-05, "rewards/margins": 0.1776137501001358, "rewards/rejected": -0.17762956023216248, "step": 12226 }, { "epoch": 8.455739972337483, "grad_norm": 3.054396629333496, "learning_rate": 8.579222375902874e-06, "log_odds_chosen": 11.380236625671387, "log_odds_ratio": -2.2073149011703208e-05, "logits/chosen": -0.28868213295936584, "logits/rejected": -0.34390684962272644, "logps/chosen": -0.00012567838712129742, "logps/rejected": -2.3157641887664795, "loss": 0.4203, "nll_loss": 0.10508093237876892, "rewards/accuracies": 1.0, "rewards/chosen": -1.256783798453398e-05, "rewards/margins": 0.23156385123729706, "rewards/rejected": -0.23157641291618347, "step": 12227 }, { "epoch": 8.45643153526971, "grad_norm": 5.460083961486816, "learning_rate": 8.575380359612725e-06, "log_odds_chosen": 11.746459007263184, "log_odds_ratio": -9.499966108705848e-05, "logits/chosen": -0.4380587637424469, "logits/rejected": -0.5310646891593933, "logps/chosen": -0.0008287655073218048, "logps/rejected": -2.680570602416992, "loss": 0.3497, "nll_loss": 0.0874042958021164, "rewards/accuracies": 1.0, "rewards/chosen": -8.287656237371266e-05, "rewards/margins": 0.2679741680622101, "rewards/rejected": -0.26805704832077026, "step": 12228 }, { "epoch": 8.457123098201937, "grad_norm": 4.851430892944336, "learning_rate": 8.571538343322575e-06, "log_odds_chosen": 10.439013481140137, "log_odds_ratio": -7.416032894980162e-05, "logits/chosen": -0.2315261960029602, "logits/rejected": -0.3399898111820221, "logps/chosen": -0.00019020246691070497, "logps/rejected": -1.8271386623382568, "loss": 0.4617, "nll_loss": 0.11541399359703064, "rewards/accuracies": 1.0, "rewards/chosen": -1.9020246327272616e-05, "rewards/margins": 0.18269485235214233, "rewards/rejected": -0.18271386623382568, "step": 12229 }, { "epoch": 8.457814661134163, "grad_norm": 2.786869764328003, "learning_rate": 8.567696327032428e-06, "log_odds_chosen": 11.525938987731934, "log_odds_ratio": -4.216610614093952e-05, "logits/chosen": -0.24258199334144592, "logits/rejected": -0.24917370080947876, "logps/chosen": -0.00016670665354467928, "logps/rejected": -2.475355625152588, "loss": 0.3162, "nll_loss": 0.07905568182468414, "rewards/accuracies": 1.0, "rewards/chosen": -1.6670664990670048e-05, "rewards/margins": 0.2475188970565796, "rewards/rejected": -0.2475355714559555, "step": 12230 }, { "epoch": 8.45850622406639, "grad_norm": 3.303145170211792, "learning_rate": 8.563854310742279e-06, "log_odds_chosen": 10.78154468536377, "log_odds_ratio": -0.00016814623086247593, "logits/chosen": -0.4072422683238983, "logits/rejected": -0.4645257592201233, "logps/chosen": -0.0005194892291910946, "logps/rejected": -1.976096749305725, "loss": 0.5252, "nll_loss": 0.13128185272216797, "rewards/accuracies": 1.0, "rewards/chosen": -5.1948918553534895e-05, "rewards/margins": 0.197557732462883, "rewards/rejected": -0.19760967791080475, "step": 12231 }, { "epoch": 8.459197786998617, "grad_norm": 6.349554538726807, "learning_rate": 8.56001229445213e-06, "log_odds_chosen": 10.750853538513184, "log_odds_ratio": -0.00018462100706528872, "logits/chosen": -0.2011597901582718, "logits/rejected": -0.3059113621711731, "logps/chosen": -0.0006470892112702131, "logps/rejected": -2.489020586013794, "loss": 0.3602, "nll_loss": 0.09003066271543503, "rewards/accuracies": 1.0, "rewards/chosen": -6.470891821663827e-05, "rewards/margins": 0.24883735179901123, "rewards/rejected": -0.2489020824432373, "step": 12232 }, { "epoch": 8.459889349930844, "grad_norm": 3.7937495708465576, "learning_rate": 8.55617027816198e-06, "log_odds_chosen": 13.031977653503418, "log_odds_ratio": -5.436918399936985e-06, "logits/chosen": -0.5057649612426758, "logits/rejected": -0.5373073816299438, "logps/chosen": -8.866170537658036e-05, "logps/rejected": -3.4923675060272217, "loss": 0.3489, "nll_loss": 0.08722809702157974, "rewards/accuracies": 1.0, "rewards/chosen": -8.866170901455916e-06, "rewards/margins": 0.3492278754711151, "rewards/rejected": -0.34923672676086426, "step": 12233 }, { "epoch": 8.46058091286307, "grad_norm": 3.9468917846679688, "learning_rate": 8.552328261871831e-06, "log_odds_chosen": 10.901199340820312, "log_odds_ratio": -2.931928975158371e-05, "logits/chosen": -0.45410242676734924, "logits/rejected": -0.3756451904773712, "logps/chosen": -0.00015654783055651933, "logps/rejected": -2.104238986968994, "loss": 0.2744, "nll_loss": 0.06858496367931366, "rewards/accuracies": 1.0, "rewards/chosen": -1.5654783055651933e-05, "rewards/margins": 0.2104082554578781, "rewards/rejected": -0.21042391657829285, "step": 12234 }, { "epoch": 8.461272475795298, "grad_norm": 3.589395046234131, "learning_rate": 8.548486245581682e-06, "log_odds_chosen": 10.89686393737793, "log_odds_ratio": -0.0001275877293664962, "logits/chosen": -0.20068512856960297, "logits/rejected": -0.2796490788459778, "logps/chosen": -0.00015666541003156453, "logps/rejected": -1.6563934087753296, "loss": 0.369, "nll_loss": 0.0922326073050499, "rewards/accuracies": 1.0, "rewards/chosen": -1.5666542822145857e-05, "rewards/margins": 0.16562367975711823, "rewards/rejected": -0.16563934087753296, "step": 12235 }, { "epoch": 8.461964038727524, "grad_norm": 3.8936188220977783, "learning_rate": 8.544644229291534e-06, "log_odds_chosen": 11.288116455078125, "log_odds_ratio": -0.00013722201401833445, "logits/chosen": -0.5005241632461548, "logits/rejected": -0.5129424929618835, "logps/chosen": -0.00038747471990063787, "logps/rejected": -3.04249906539917, "loss": 0.4177, "nll_loss": 0.10440607368946075, "rewards/accuracies": 1.0, "rewards/chosen": -3.8747471990063787e-05, "rewards/margins": 0.3042111396789551, "rewards/rejected": -0.30424991250038147, "step": 12236 }, { "epoch": 8.462655601659751, "grad_norm": 4.5463151931762695, "learning_rate": 8.540802213001383e-06, "log_odds_chosen": 11.708345413208008, "log_odds_ratio": -2.0565257727866992e-05, "logits/chosen": -0.37573695182800293, "logits/rejected": -0.3132202923297882, "logps/chosen": -0.00018604497017804533, "logps/rejected": -2.4184212684631348, "loss": 0.4605, "nll_loss": 0.11511382460594177, "rewards/accuracies": 1.0, "rewards/chosen": -1.8604498109198175e-05, "rewards/margins": 0.24182350933551788, "rewards/rejected": -0.241842120885849, "step": 12237 }, { "epoch": 8.463347164591978, "grad_norm": 4.1725077629089355, "learning_rate": 8.536960196711234e-06, "log_odds_chosen": 11.375772476196289, "log_odds_ratio": -1.633433930692263e-05, "logits/chosen": -0.03759506717324257, "logits/rejected": -0.0693630501627922, "logps/chosen": -0.00016117203631438315, "logps/rejected": -2.4761452674865723, "loss": 0.4233, "nll_loss": 0.10581202805042267, "rewards/accuracies": 1.0, "rewards/chosen": -1.6117204722831957e-05, "rewards/margins": 0.24759840965270996, "rewards/rejected": -0.2476145327091217, "step": 12238 }, { "epoch": 8.464038727524205, "grad_norm": 3.870225429534912, "learning_rate": 8.533118180421086e-06, "log_odds_chosen": 11.548178672790527, "log_odds_ratio": -8.487018931191415e-05, "logits/chosen": -0.5760559439659119, "logits/rejected": -0.5758537650108337, "logps/chosen": -0.00016482733190059662, "logps/rejected": -2.121696949005127, "loss": 0.3367, "nll_loss": 0.0841749906539917, "rewards/accuracies": 1.0, "rewards/chosen": -1.64827324624639e-05, "rewards/margins": 0.21215322613716125, "rewards/rejected": -0.21216970682144165, "step": 12239 }, { "epoch": 8.464730290456432, "grad_norm": 4.679924964904785, "learning_rate": 8.529276164130937e-06, "log_odds_chosen": 11.397396087646484, "log_odds_ratio": -2.749465056695044e-05, "logits/chosen": -0.2636311650276184, "logits/rejected": -0.4466463625431061, "logps/chosen": -0.00019256738596595824, "logps/rejected": -2.625746250152588, "loss": 0.527, "nll_loss": 0.1317574381828308, "rewards/accuracies": 1.0, "rewards/chosen": -1.9256738596595824e-05, "rewards/margins": 0.2625553607940674, "rewards/rejected": -0.26257461309432983, "step": 12240 }, { "epoch": 8.465421853388658, "grad_norm": 6.500254154205322, "learning_rate": 8.525434147840788e-06, "log_odds_chosen": 10.750042915344238, "log_odds_ratio": -0.0003058892616536468, "logits/chosen": -0.0664311945438385, "logits/rejected": -0.08879110962152481, "logps/chosen": -0.00020295722060836852, "logps/rejected": -2.48722505569458, "loss": 0.5886, "nll_loss": 0.1471274346113205, "rewards/accuracies": 1.0, "rewards/chosen": -2.029572169703897e-05, "rewards/margins": 0.24870222806930542, "rewards/rejected": -0.24872252345085144, "step": 12241 }, { "epoch": 8.466113416320885, "grad_norm": 3.87945556640625, "learning_rate": 8.521592131550637e-06, "log_odds_chosen": 11.44169807434082, "log_odds_ratio": -1.966490162885748e-05, "logits/chosen": -0.4701942205429077, "logits/rejected": -0.46853071451187134, "logps/chosen": -0.00022171103046275675, "logps/rejected": -2.206536293029785, "loss": 0.3678, "nll_loss": 0.09194795042276382, "rewards/accuracies": 1.0, "rewards/chosen": -2.2171103410073556e-05, "rewards/margins": 0.22063148021697998, "rewards/rejected": -0.22065363824367523, "step": 12242 }, { "epoch": 8.466804979253112, "grad_norm": 32.3742790222168, "learning_rate": 8.51775011526049e-06, "log_odds_chosen": 11.439393997192383, "log_odds_ratio": -0.00010279798152623698, "logits/chosen": -0.4229426980018616, "logits/rejected": -0.45810258388519287, "logps/chosen": -0.0003110724501311779, "logps/rejected": -2.6357650756835938, "loss": 0.4091, "nll_loss": 0.1022646576166153, "rewards/accuracies": 1.0, "rewards/chosen": -3.110724355792627e-05, "rewards/margins": 0.2635453939437866, "rewards/rejected": -0.2635765075683594, "step": 12243 }, { "epoch": 8.467496542185339, "grad_norm": 4.420064926147461, "learning_rate": 8.51390809897034e-06, "log_odds_chosen": 10.127920150756836, "log_odds_ratio": -0.00014040838868822902, "logits/chosen": -0.4244319200515747, "logits/rejected": -0.4691627025604248, "logps/chosen": -0.00036072367220185697, "logps/rejected": -1.6990998983383179, "loss": 0.5634, "nll_loss": 0.14082545042037964, "rewards/accuracies": 1.0, "rewards/chosen": -3.6072371585760266e-05, "rewards/margins": 0.1698739230632782, "rewards/rejected": -0.1699099987745285, "step": 12244 }, { "epoch": 8.468188105117566, "grad_norm": 3.776829242706299, "learning_rate": 8.51006608268019e-06, "log_odds_chosen": 9.505712509155273, "log_odds_ratio": -0.0007945825927890837, "logits/chosen": -0.6400420069694519, "logits/rejected": -0.6537680625915527, "logps/chosen": -0.001891196588985622, "logps/rejected": -1.7540403604507446, "loss": 0.4024, "nll_loss": 0.10051442682743073, "rewards/accuracies": 1.0, "rewards/chosen": -0.0001891196588985622, "rewards/margins": 0.17521491646766663, "rewards/rejected": -0.17540404200553894, "step": 12245 }, { "epoch": 8.468879668049793, "grad_norm": 3.0698983669281006, "learning_rate": 8.506224066390042e-06, "log_odds_chosen": 12.119697570800781, "log_odds_ratio": -0.000320388819091022, "logits/chosen": -0.15294787287712097, "logits/rejected": -0.17274241149425507, "logps/chosen": -0.0005173084791749716, "logps/rejected": -3.080294609069824, "loss": 0.3448, "nll_loss": 0.0861569195985794, "rewards/accuracies": 1.0, "rewards/chosen": -5.17308471899014e-05, "rewards/margins": 0.30797770619392395, "rewards/rejected": -0.3080294728279114, "step": 12246 }, { "epoch": 8.46957123098202, "grad_norm": 3.754049777984619, "learning_rate": 8.502382050099892e-06, "log_odds_chosen": 9.665637016296387, "log_odds_ratio": -0.0006061159074306488, "logits/chosen": -0.5966587662696838, "logits/rejected": -0.6343376040458679, "logps/chosen": -0.0006555309519171715, "logps/rejected": -2.0075156688690186, "loss": 0.3651, "nll_loss": 0.09120656549930573, "rewards/accuracies": 1.0, "rewards/chosen": -6.555309664690867e-05, "rewards/margins": 0.2006860226392746, "rewards/rejected": -0.20075158774852753, "step": 12247 }, { "epoch": 8.470262793914246, "grad_norm": 2.940383195877075, "learning_rate": 8.498540033809743e-06, "log_odds_chosen": 9.661136627197266, "log_odds_ratio": -0.00047493373858742416, "logits/chosen": -0.20162512362003326, "logits/rejected": -0.3301006853580475, "logps/chosen": -0.00038299331208691, "logps/rejected": -1.3793359994888306, "loss": 0.2561, "nll_loss": 0.0639653131365776, "rewards/accuracies": 1.0, "rewards/chosen": -3.8299327570712194e-05, "rewards/margins": 0.13789530098438263, "rewards/rejected": -0.137933611869812, "step": 12248 }, { "epoch": 8.470954356846473, "grad_norm": 3.431257486343384, "learning_rate": 8.494698017519595e-06, "log_odds_chosen": 11.056364059448242, "log_odds_ratio": -7.521332008764148e-05, "logits/chosen": 0.04458847641944885, "logits/rejected": -0.13365104794502258, "logps/chosen": -0.0005594642134383321, "logps/rejected": -2.1823577880859375, "loss": 0.3501, "nll_loss": 0.08752588927745819, "rewards/accuracies": 1.0, "rewards/chosen": -5.5946424254216254e-05, "rewards/margins": 0.2181798219680786, "rewards/rejected": -0.21823576092720032, "step": 12249 }, { "epoch": 8.4716459197787, "grad_norm": 4.6607489585876465, "learning_rate": 8.490856001229446e-06, "log_odds_chosen": 12.409748077392578, "log_odds_ratio": -8.075324331002776e-06, "logits/chosen": -0.3750876486301422, "logits/rejected": -0.30534347891807556, "logps/chosen": -0.00014612097584176809, "logps/rejected": -3.4359819889068604, "loss": 0.4619, "nll_loss": 0.11548404395580292, "rewards/accuracies": 1.0, "rewards/chosen": -1.461209831177257e-05, "rewards/margins": 0.3435836136341095, "rewards/rejected": -0.34359821677207947, "step": 12250 }, { "epoch": 8.472337482710927, "grad_norm": 4.8379340171813965, "learning_rate": 8.487013984939295e-06, "log_odds_chosen": 12.72007942199707, "log_odds_ratio": -4.733223249786533e-06, "logits/chosen": -0.7242316603660583, "logits/rejected": -0.8033230304718018, "logps/chosen": -3.626089164754376e-05, "logps/rejected": -2.414877414703369, "loss": 0.5539, "nll_loss": 0.138469398021698, "rewards/accuracies": 1.0, "rewards/chosen": -3.626089210229111e-06, "rewards/margins": 0.24148410558700562, "rewards/rejected": -0.24148772656917572, "step": 12251 }, { "epoch": 8.473029045643154, "grad_norm": 4.147072792053223, "learning_rate": 8.483171968649148e-06, "log_odds_chosen": 10.668270111083984, "log_odds_ratio": -8.216902642743662e-05, "logits/chosen": -0.13350696861743927, "logits/rejected": -0.16221120953559875, "logps/chosen": -0.0008802133379504085, "logps/rejected": -1.8316550254821777, "loss": 1.1092, "nll_loss": 0.2772865295410156, "rewards/accuracies": 1.0, "rewards/chosen": -8.802134107099846e-05, "rewards/margins": 0.18307748436927795, "rewards/rejected": -0.18316550552845, "step": 12252 }, { "epoch": 8.47372060857538, "grad_norm": 3.817070484161377, "learning_rate": 8.479329952358998e-06, "log_odds_chosen": 11.888955116271973, "log_odds_ratio": -2.0903258700855076e-05, "logits/chosen": 0.04838571697473526, "logits/rejected": -0.07474128156900406, "logps/chosen": -0.00016068377590272576, "logps/rejected": -2.808671712875366, "loss": 0.4464, "nll_loss": 0.111586794257164, "rewards/accuracies": 1.0, "rewards/chosen": -1.606837940926198e-05, "rewards/margins": 0.2808510959148407, "rewards/rejected": -0.28086715936660767, "step": 12253 }, { "epoch": 8.474412171507607, "grad_norm": 3.8038136959075928, "learning_rate": 8.47548793606885e-06, "log_odds_chosen": 10.744874000549316, "log_odds_ratio": -4.887772956863046e-05, "logits/chosen": -0.0147324800491333, "logits/rejected": -0.019203737378120422, "logps/chosen": -0.0002447717997711152, "logps/rejected": -1.7378864288330078, "loss": 0.33, "nll_loss": 0.08249451965093613, "rewards/accuracies": 1.0, "rewards/chosen": -2.447717997711152e-05, "rewards/margins": 0.1737641543149948, "rewards/rejected": -0.1737886369228363, "step": 12254 }, { "epoch": 8.475103734439834, "grad_norm": 3.671657085418701, "learning_rate": 8.4716459197787e-06, "log_odds_chosen": 11.238716125488281, "log_odds_ratio": -5.318321200320497e-05, "logits/chosen": -0.7400655150413513, "logits/rejected": -0.7886126041412354, "logps/chosen": -0.00022681456175632775, "logps/rejected": -2.474215507507324, "loss": 0.3976, "nll_loss": 0.09940271824598312, "rewards/accuracies": 1.0, "rewards/chosen": -2.2681457267026417e-05, "rewards/margins": 0.24739889800548553, "rewards/rejected": -0.24742157757282257, "step": 12255 }, { "epoch": 8.475795297372061, "grad_norm": 11.087566375732422, "learning_rate": 8.46780390348855e-06, "log_odds_chosen": 10.086482048034668, "log_odds_ratio": -0.014995497651398182, "logits/chosen": -0.5288323163986206, "logits/rejected": -0.5677878856658936, "logps/chosen": -0.002039603190496564, "logps/rejected": -2.0050981044769287, "loss": 0.4747, "nll_loss": 0.11717545986175537, "rewards/accuracies": 1.0, "rewards/chosen": -0.00020396032778080553, "rewards/margins": 0.20030583441257477, "rewards/rejected": -0.20050980150699615, "step": 12256 }, { "epoch": 8.476486860304288, "grad_norm": 4.343221187591553, "learning_rate": 8.463961887198401e-06, "log_odds_chosen": 11.159956932067871, "log_odds_ratio": -0.00019973155576735735, "logits/chosen": -0.03564649820327759, "logits/rejected": -0.13567349314689636, "logps/chosen": -0.0003570501576177776, "logps/rejected": -2.8839199542999268, "loss": 0.3822, "nll_loss": 0.09552636742591858, "rewards/accuracies": 1.0, "rewards/chosen": -3.5705019399756566e-05, "rewards/margins": 0.28835630416870117, "rewards/rejected": -0.28839200735092163, "step": 12257 }, { "epoch": 8.477178423236515, "grad_norm": 4.364232540130615, "learning_rate": 8.460119870908254e-06, "log_odds_chosen": 12.289931297302246, "log_odds_ratio": -3.1001669412944466e-05, "logits/chosen": -0.1956077218055725, "logits/rejected": -0.26861050724983215, "logps/chosen": -0.00019162609532941133, "logps/rejected": -3.37749981880188, "loss": 0.5817, "nll_loss": 0.14541320502758026, "rewards/accuracies": 1.0, "rewards/chosen": -1.9162609532941133e-05, "rewards/margins": 0.3377308249473572, "rewards/rejected": -0.3377499580383301, "step": 12258 }, { "epoch": 8.477869986168741, "grad_norm": 4.493581771850586, "learning_rate": 8.456277854618105e-06, "log_odds_chosen": 12.043193817138672, "log_odds_ratio": -1.669879929977469e-05, "logits/chosen": -0.35270121693611145, "logits/rejected": -0.35970330238342285, "logps/chosen": -0.00017171446233987808, "logps/rejected": -2.586911201477051, "loss": 0.408, "nll_loss": 0.10200444608926773, "rewards/accuracies": 1.0, "rewards/chosen": -1.717144732538145e-05, "rewards/margins": 0.2586739659309387, "rewards/rejected": -0.25869113206863403, "step": 12259 }, { "epoch": 8.478561549100968, "grad_norm": 3.1778171062469482, "learning_rate": 8.452435838327954e-06, "log_odds_chosen": 11.451273918151855, "log_odds_ratio": -3.4607517591211945e-05, "logits/chosen": -0.5733155012130737, "logits/rejected": -0.5721181035041809, "logps/chosen": -0.00013759022112935781, "logps/rejected": -2.4554004669189453, "loss": 0.4866, "nll_loss": 0.12163795530796051, "rewards/accuracies": 1.0, "rewards/chosen": -1.3759023204329424e-05, "rewards/margins": 0.2455262839794159, "rewards/rejected": -0.245540052652359, "step": 12260 }, { "epoch": 8.479253112033195, "grad_norm": 3.6349940299987793, "learning_rate": 8.448593822037806e-06, "log_odds_chosen": 10.85304069519043, "log_odds_ratio": -5.069684630143456e-05, "logits/chosen": -0.5185079574584961, "logits/rejected": -0.5641573071479797, "logps/chosen": -0.0011122021824121475, "logps/rejected": -2.387133836746216, "loss": 0.3159, "nll_loss": 0.07897399365901947, "rewards/accuracies": 1.0, "rewards/chosen": -0.0001112202153308317, "rewards/margins": 0.2386021614074707, "rewards/rejected": -0.23871338367462158, "step": 12261 }, { "epoch": 8.479944674965422, "grad_norm": 5.675334930419922, "learning_rate": 8.444751805747657e-06, "log_odds_chosen": 11.700498580932617, "log_odds_ratio": -2.6974110369337723e-05, "logits/chosen": -0.15723538398742676, "logits/rejected": -0.28113627433776855, "logps/chosen": -0.0002666892542038113, "logps/rejected": -2.8769774436950684, "loss": 0.6429, "nll_loss": 0.16072174906730652, "rewards/accuracies": 1.0, "rewards/chosen": -2.6668923965189606e-05, "rewards/margins": 0.2876710891723633, "rewards/rejected": -0.2876977324485779, "step": 12262 }, { "epoch": 8.480636237897649, "grad_norm": 2.913564443588257, "learning_rate": 8.440909789457508e-06, "log_odds_chosen": 10.883523941040039, "log_odds_ratio": -0.0002211461978731677, "logits/chosen": -0.4537699818611145, "logits/rejected": -0.4931557774543762, "logps/chosen": -0.0014517331728711724, "logps/rejected": -3.1377670764923096, "loss": 0.3183, "nll_loss": 0.07955101877450943, "rewards/accuracies": 1.0, "rewards/chosen": -0.00014517331146635115, "rewards/margins": 0.313631534576416, "rewards/rejected": -0.3137767016887665, "step": 12263 }, { "epoch": 8.481327800829876, "grad_norm": 4.738100528717041, "learning_rate": 8.437067773167358e-06, "log_odds_chosen": 11.844022750854492, "log_odds_ratio": -1.6696354578016326e-05, "logits/chosen": -0.5999171733856201, "logits/rejected": -0.7666305899620056, "logps/chosen": -0.00015944114420562983, "logps/rejected": -2.4665656089782715, "loss": 0.4528, "nll_loss": 0.1132088303565979, "rewards/accuracies": 1.0, "rewards/chosen": -1.5944115148158744e-05, "rewards/margins": 0.2466406524181366, "rewards/rejected": -0.24665658175945282, "step": 12264 }, { "epoch": 8.482019363762102, "grad_norm": 4.688370704650879, "learning_rate": 8.43322575687721e-06, "log_odds_chosen": 10.04583740234375, "log_odds_ratio": -0.0032382027711719275, "logits/chosen": -0.5382931232452393, "logits/rejected": -0.5006886124610901, "logps/chosen": -0.014946999959647655, "logps/rejected": -2.335589647293091, "loss": 0.55, "nll_loss": 0.13716506958007812, "rewards/accuracies": 1.0, "rewards/chosen": -0.001494700089097023, "rewards/margins": 0.23206427693367004, "rewards/rejected": -0.23355896770954132, "step": 12265 }, { "epoch": 8.48271092669433, "grad_norm": 6.848077297210693, "learning_rate": 8.42938374058706e-06, "log_odds_chosen": 12.40252685546875, "log_odds_ratio": -2.2604503101320006e-05, "logits/chosen": -0.4898621439933777, "logits/rejected": -0.5430005788803101, "logps/chosen": -0.00012176691961940378, "logps/rejected": -3.148735284805298, "loss": 0.3172, "nll_loss": 0.07930289208889008, "rewards/accuracies": 1.0, "rewards/chosen": -1.2176690688647795e-05, "rewards/margins": 0.31486135721206665, "rewards/rejected": -0.3148735463619232, "step": 12266 }, { "epoch": 8.483402489626556, "grad_norm": 2.9791226387023926, "learning_rate": 8.425541724296912e-06, "log_odds_chosen": 11.094881057739258, "log_odds_ratio": -3.153106808895245e-05, "logits/chosen": -0.7539383172988892, "logits/rejected": -0.6733986139297485, "logps/chosen": -0.00010487916006240994, "logps/rejected": -1.760202169418335, "loss": 0.3915, "nll_loss": 0.09787617623806, "rewards/accuracies": 1.0, "rewards/chosen": -1.0487916370038874e-05, "rewards/margins": 0.17600972950458527, "rewards/rejected": -0.17602021992206573, "step": 12267 }, { "epoch": 8.484094052558783, "grad_norm": 3.049551248550415, "learning_rate": 8.421699708006763e-06, "log_odds_chosen": 11.18005657196045, "log_odds_ratio": -7.208783063106239e-05, "logits/chosen": -0.48468202352523804, "logits/rejected": -0.5185460448265076, "logps/chosen": -0.00017760792979970574, "logps/rejected": -2.4330902099609375, "loss": 0.242, "nll_loss": 0.06049405038356781, "rewards/accuracies": 1.0, "rewards/chosen": -1.7760794435162097e-05, "rewards/margins": 0.24329127371311188, "rewards/rejected": -0.24330900609493256, "step": 12268 }, { "epoch": 8.48478561549101, "grad_norm": 4.436456203460693, "learning_rate": 8.417857691716612e-06, "log_odds_chosen": 11.085650444030762, "log_odds_ratio": -0.0005416136118583381, "logits/chosen": -0.3808354437351227, "logits/rejected": -0.33489638566970825, "logps/chosen": -0.0010177572257816792, "logps/rejected": -2.746601104736328, "loss": 0.3895, "nll_loss": 0.0973270907998085, "rewards/accuracies": 1.0, "rewards/chosen": -0.00010177572403335944, "rewards/margins": 0.27455833554267883, "rewards/rejected": -0.2746601104736328, "step": 12269 }, { "epoch": 8.485477178423237, "grad_norm": 4.066561222076416, "learning_rate": 8.414015675426465e-06, "log_odds_chosen": 12.0043363571167, "log_odds_ratio": -8.750682354730088e-06, "logits/chosen": -0.24367359280586243, "logits/rejected": -0.44787317514419556, "logps/chosen": -0.00016268889885395765, "logps/rejected": -2.643892288208008, "loss": 0.4556, "nll_loss": 0.11390087008476257, "rewards/accuracies": 1.0, "rewards/chosen": -1.6268888430204242e-05, "rewards/margins": 0.26437294483184814, "rewards/rejected": -0.2643892168998718, "step": 12270 }, { "epoch": 8.486168741355463, "grad_norm": 4.154919147491455, "learning_rate": 8.410173659136315e-06, "log_odds_chosen": 11.176225662231445, "log_odds_ratio": -0.0002369789290241897, "logits/chosen": -0.6168784499168396, "logits/rejected": -0.749509871006012, "logps/chosen": -0.00018601972260512412, "logps/rejected": -2.156191825866699, "loss": 0.4435, "nll_loss": 0.11084267497062683, "rewards/accuracies": 1.0, "rewards/chosen": -1.860197153291665e-05, "rewards/margins": 0.2156006097793579, "rewards/rejected": -0.21561920642852783, "step": 12271 }, { "epoch": 8.48686030428769, "grad_norm": 3.97359037399292, "learning_rate": 8.406331642846166e-06, "log_odds_chosen": 9.596004486083984, "log_odds_ratio": -0.00012900654110126197, "logits/chosen": -0.5155652761459351, "logits/rejected": -0.501971960067749, "logps/chosen": -0.00044222682481631637, "logps/rejected": -1.644789695739746, "loss": 0.35, "nll_loss": 0.0874941423535347, "rewards/accuracies": 1.0, "rewards/chosen": -4.422268466441892e-05, "rewards/margins": 0.1644347608089447, "rewards/rejected": -0.16447898745536804, "step": 12272 }, { "epoch": 8.487551867219917, "grad_norm": 3.0364789962768555, "learning_rate": 8.402489626556017e-06, "log_odds_chosen": 10.494122505187988, "log_odds_ratio": -0.00013410058454610407, "logits/chosen": -0.7801151871681213, "logits/rejected": -0.8372578620910645, "logps/chosen": -0.00021797572844661772, "logps/rejected": -1.7526401281356812, "loss": 0.3125, "nll_loss": 0.07811284065246582, "rewards/accuracies": 1.0, "rewards/chosen": -2.179757211706601e-05, "rewards/margins": 0.17524223029613495, "rewards/rejected": -0.17526403069496155, "step": 12273 }, { "epoch": 8.488243430152144, "grad_norm": 4.63698148727417, "learning_rate": 8.398647610265868e-06, "log_odds_chosen": 12.364485740661621, "log_odds_ratio": -1.7829099306254648e-05, "logits/chosen": -0.2129366248846054, "logits/rejected": -0.28443700075149536, "logps/chosen": -0.000122582889162004, "logps/rejected": -3.121785879135132, "loss": 0.3408, "nll_loss": 0.08520200848579407, "rewards/accuracies": 1.0, "rewards/chosen": -1.2258290553290863e-05, "rewards/margins": 0.312166303396225, "rewards/rejected": -0.3121785819530487, "step": 12274 }, { "epoch": 8.48893499308437, "grad_norm": 2.922107458114624, "learning_rate": 8.394805593975718e-06, "log_odds_chosen": 10.744071960449219, "log_odds_ratio": -4.883888686890714e-05, "logits/chosen": -0.6283242106437683, "logits/rejected": -0.6851826310157776, "logps/chosen": -0.0001939169887918979, "logps/rejected": -1.8735204935073853, "loss": 0.251, "nll_loss": 0.06274637579917908, "rewards/accuracies": 1.0, "rewards/chosen": -1.9391700334381312e-05, "rewards/margins": 0.1873326599597931, "rewards/rejected": -0.18735206127166748, "step": 12275 }, { "epoch": 8.489626556016598, "grad_norm": 4.272983074188232, "learning_rate": 8.390963577685569e-06, "log_odds_chosen": 9.681724548339844, "log_odds_ratio": -0.0021691133733838797, "logits/chosen": 0.08250885456800461, "logits/rejected": -0.03361310809850693, "logps/chosen": -0.015918850898742676, "logps/rejected": -1.5145361423492432, "loss": 0.5234, "nll_loss": 0.13062867522239685, "rewards/accuracies": 1.0, "rewards/chosen": -0.0015918852295726538, "rewards/margins": 0.14986173808574677, "rewards/rejected": -0.1514536291360855, "step": 12276 }, { "epoch": 8.490318118948824, "grad_norm": 4.874604225158691, "learning_rate": 8.387121561395422e-06, "log_odds_chosen": 10.615398406982422, "log_odds_ratio": -0.00016952301666606218, "logits/chosen": -0.39670851826667786, "logits/rejected": -0.50009685754776, "logps/chosen": -0.00037242763210088015, "logps/rejected": -2.066460609436035, "loss": 0.4313, "nll_loss": 0.10781387984752655, "rewards/accuracies": 1.0, "rewards/chosen": -3.7242763937683776e-05, "rewards/margins": 0.20660880208015442, "rewards/rejected": -0.20664605498313904, "step": 12277 }, { "epoch": 8.491009681881051, "grad_norm": 2.926595449447632, "learning_rate": 8.383279545105272e-06, "log_odds_chosen": 11.713717460632324, "log_odds_ratio": -3.113495040452108e-05, "logits/chosen": -0.7721793055534363, "logits/rejected": -0.7168105244636536, "logps/chosen": -9.897611744236201e-05, "logps/rejected": -2.2081406116485596, "loss": 0.337, "nll_loss": 0.08423657715320587, "rewards/accuracies": 1.0, "rewards/chosen": -9.897612471831962e-06, "rewards/margins": 0.2208041548728943, "rewards/rejected": -0.220814049243927, "step": 12278 }, { "epoch": 8.491701244813278, "grad_norm": 5.314805030822754, "learning_rate": 8.379437528815121e-06, "log_odds_chosen": 11.87393569946289, "log_odds_ratio": -9.592822607373819e-06, "logits/chosen": -0.386319637298584, "logits/rejected": -0.4082978367805481, "logps/chosen": -0.00014236682909540832, "logps/rejected": -2.774528980255127, "loss": 0.4811, "nll_loss": 0.12028633803129196, "rewards/accuracies": 1.0, "rewards/chosen": -1.4236682545742951e-05, "rewards/margins": 0.2774386703968048, "rewards/rejected": -0.27745291590690613, "step": 12279 }, { "epoch": 8.492392807745505, "grad_norm": 2.6736106872558594, "learning_rate": 8.375595512524974e-06, "log_odds_chosen": 9.523404121398926, "log_odds_ratio": -0.0007579984958283603, "logits/chosen": 0.22026318311691284, "logits/rejected": 0.17569872736930847, "logps/chosen": -0.0019360886653885245, "logps/rejected": -1.5031076669692993, "loss": 0.3019, "nll_loss": 0.07539349049329758, "rewards/accuracies": 1.0, "rewards/chosen": -0.00019360888109076768, "rewards/margins": 0.1501171588897705, "rewards/rejected": -0.15031076967716217, "step": 12280 }, { "epoch": 8.493084370677732, "grad_norm": 5.25847864151001, "learning_rate": 8.371753496234825e-06, "log_odds_chosen": 10.68351936340332, "log_odds_ratio": -0.0001417723105987534, "logits/chosen": -0.12427209317684174, "logits/rejected": -0.21619272232055664, "logps/chosen": -0.0007611039909534156, "logps/rejected": -2.130582094192505, "loss": 0.5091, "nll_loss": 0.12725578248500824, "rewards/accuracies": 1.0, "rewards/chosen": -7.611038745380938e-05, "rewards/margins": 0.21298208832740784, "rewards/rejected": -0.2130582183599472, "step": 12281 }, { "epoch": 8.493775933609959, "grad_norm": 3.5506532192230225, "learning_rate": 8.367911479944675e-06, "log_odds_chosen": 11.635926246643066, "log_odds_ratio": -2.9104781788191758e-05, "logits/chosen": -0.15105272829532623, "logits/rejected": -0.13993534445762634, "logps/chosen": -0.0004803336050827056, "logps/rejected": -2.7633471488952637, "loss": 0.4275, "nll_loss": 0.10686735808849335, "rewards/accuracies": 1.0, "rewards/chosen": -4.8033361963462085e-05, "rewards/margins": 0.27628669142723083, "rewards/rejected": -0.2763347029685974, "step": 12282 }, { "epoch": 8.494467496542185, "grad_norm": 4.606646537780762, "learning_rate": 8.364069463654526e-06, "log_odds_chosen": 9.82073974609375, "log_odds_ratio": -0.0006549949757754803, "logits/chosen": -0.16509506106376648, "logits/rejected": -0.18839670717716217, "logps/chosen": -0.0017432832391932607, "logps/rejected": -1.6615170240402222, "loss": 0.49, "nll_loss": 0.12244555354118347, "rewards/accuracies": 1.0, "rewards/chosen": -0.00017432833556085825, "rewards/margins": 0.1659773886203766, "rewards/rejected": -0.16615170240402222, "step": 12283 }, { "epoch": 8.495159059474412, "grad_norm": 3.7970452308654785, "learning_rate": 8.360227447364377e-06, "log_odds_chosen": 10.787665367126465, "log_odds_ratio": -6.754439527867362e-05, "logits/chosen": -0.34210094809532166, "logits/rejected": -0.29848796129226685, "logps/chosen": -0.0002140133292414248, "logps/rejected": -2.0320956707000732, "loss": 0.4188, "nll_loss": 0.10468335449695587, "rewards/accuracies": 1.0, "rewards/chosen": -2.1401334379334003e-05, "rewards/margins": 0.20318818092346191, "rewards/rejected": -0.20320956408977509, "step": 12284 }, { "epoch": 8.495850622406639, "grad_norm": 4.580048561096191, "learning_rate": 8.356385431074228e-06, "log_odds_chosen": 11.143314361572266, "log_odds_ratio": -2.657229924807325e-05, "logits/chosen": 0.09083308279514313, "logits/rejected": 0.03352555260062218, "logps/chosen": -0.0001280966680496931, "logps/rejected": -2.0760304927825928, "loss": 0.5341, "nll_loss": 0.13351541757583618, "rewards/accuracies": 1.0, "rewards/chosen": -1.2809668078261893e-05, "rewards/margins": 0.2075902372598648, "rewards/rejected": -0.20760303735733032, "step": 12285 }, { "epoch": 8.496542185338866, "grad_norm": 3.6384947299957275, "learning_rate": 8.35254341478408e-06, "log_odds_chosen": 11.60141372680664, "log_odds_ratio": -1.9051440176554024e-05, "logits/chosen": 0.24498462677001953, "logits/rejected": 0.22010770440101624, "logps/chosen": -0.00018078883294947445, "logps/rejected": -2.8363616466522217, "loss": 0.4146, "nll_loss": 0.10365588217973709, "rewards/accuracies": 1.0, "rewards/chosen": -1.8078884750138968e-05, "rewards/margins": 0.28361809253692627, "rewards/rejected": -0.2836361527442932, "step": 12286 }, { "epoch": 8.497233748271093, "grad_norm": 2.5925114154815674, "learning_rate": 8.34870139849393e-06, "log_odds_chosen": 10.944217681884766, "log_odds_ratio": -2.410522210993804e-05, "logits/chosen": -0.44311681389808655, "logits/rejected": -0.48636582493782043, "logps/chosen": -0.0001314889086643234, "logps/rejected": -1.9731330871582031, "loss": 0.2769, "nll_loss": 0.06922472268342972, "rewards/accuracies": 1.0, "rewards/chosen": -1.314889141212916e-05, "rewards/margins": 0.19730015099048615, "rewards/rejected": -0.1973133087158203, "step": 12287 }, { "epoch": 8.49792531120332, "grad_norm": 5.674478530883789, "learning_rate": 8.34485938220378e-06, "log_odds_chosen": 10.447786331176758, "log_odds_ratio": -6.981974001973867e-05, "logits/chosen": -0.27241989970207214, "logits/rejected": -0.2633300721645355, "logps/chosen": -0.00027206639060750604, "logps/rejected": -2.0152056217193604, "loss": 0.5358, "nll_loss": 0.13393688201904297, "rewards/accuracies": 1.0, "rewards/chosen": -2.720664269872941e-05, "rewards/margins": 0.20149333775043488, "rewards/rejected": -0.20152056217193604, "step": 12288 }, { "epoch": 8.498616874135546, "grad_norm": 3.292074203491211, "learning_rate": 8.341017365913632e-06, "log_odds_chosen": 10.503924369812012, "log_odds_ratio": -3.558215394150466e-05, "logits/chosen": 0.45011278986930847, "logits/rejected": 0.3444060683250427, "logps/chosen": -0.00011096706293756142, "logps/rejected": -1.5259891748428345, "loss": 0.314, "nll_loss": 0.07848691940307617, "rewards/accuracies": 1.0, "rewards/chosen": -1.1096706657554023e-05, "rewards/margins": 0.15258783102035522, "rewards/rejected": -0.15259891748428345, "step": 12289 }, { "epoch": 8.499308437067773, "grad_norm": 5.755599021911621, "learning_rate": 8.337175349623483e-06, "log_odds_chosen": 11.601394653320312, "log_odds_ratio": -7.088996062520891e-05, "logits/chosen": 0.17589890956878662, "logits/rejected": 0.13562311232089996, "logps/chosen": -0.00022538311895914376, "logps/rejected": -2.678291082382202, "loss": 0.3043, "nll_loss": 0.07607081532478333, "rewards/accuracies": 1.0, "rewards/chosen": -2.2538310076924972e-05, "rewards/margins": 0.2678065598011017, "rewards/rejected": -0.26782912015914917, "step": 12290 }, { "epoch": 8.5, "grad_norm": 3.1782124042510986, "learning_rate": 8.333333333333334e-06, "log_odds_chosen": 11.137289047241211, "log_odds_ratio": -0.00018118553271051496, "logits/chosen": -0.3635497987270355, "logits/rejected": -0.47213447093963623, "logps/chosen": -0.0005629704101011157, "logps/rejected": -2.5133163928985596, "loss": 0.3656, "nll_loss": 0.09139396250247955, "rewards/accuracies": 1.0, "rewards/chosen": -5.629704537568614e-05, "rewards/margins": 0.25127536058425903, "rewards/rejected": -0.251331627368927, "step": 12291 }, { "epoch": 8.500691562932227, "grad_norm": 5.488527774810791, "learning_rate": 8.329491317043184e-06, "log_odds_chosen": 11.514001846313477, "log_odds_ratio": -2.5029741664184257e-05, "logits/chosen": -0.24020123481750488, "logits/rejected": -0.2714656591415405, "logps/chosen": -0.00017623655730858445, "logps/rejected": -2.542510986328125, "loss": 0.4081, "nll_loss": 0.10201875120401382, "rewards/accuracies": 1.0, "rewards/chosen": -1.7623655367060564e-05, "rewards/margins": 0.2542335093021393, "rewards/rejected": -0.2542511224746704, "step": 12292 }, { "epoch": 8.501383125864454, "grad_norm": 4.107009410858154, "learning_rate": 8.325649300753035e-06, "log_odds_chosen": 12.135652542114258, "log_odds_ratio": -7.428465323755518e-05, "logits/chosen": -0.044987812638282776, "logits/rejected": -0.0901426374912262, "logps/chosen": -0.00034369496279396117, "logps/rejected": -2.7795896530151367, "loss": 0.3878, "nll_loss": 0.09694696962833405, "rewards/accuracies": 1.0, "rewards/chosen": -3.436949918977916e-05, "rewards/margins": 0.2779245972633362, "rewards/rejected": -0.2779589593410492, "step": 12293 }, { "epoch": 8.50207468879668, "grad_norm": 4.193845748901367, "learning_rate": 8.321807284462886e-06, "log_odds_chosen": 11.168009757995605, "log_odds_ratio": -5.719634282286279e-05, "logits/chosen": 0.2666996121406555, "logits/rejected": 0.18223300576210022, "logps/chosen": -0.00015161471674218774, "logps/rejected": -2.2643349170684814, "loss": 0.4812, "nll_loss": 0.12029269337654114, "rewards/accuracies": 1.0, "rewards/chosen": -1.5161472219915595e-05, "rewards/margins": 0.22641833126544952, "rewards/rejected": -0.22643348574638367, "step": 12294 }, { "epoch": 8.502766251728907, "grad_norm": 5.293142795562744, "learning_rate": 8.317965268172738e-06, "log_odds_chosen": 10.646807670593262, "log_odds_ratio": -0.00019855228310916573, "logits/chosen": -0.18091386556625366, "logits/rejected": -0.22599759697914124, "logps/chosen": -0.0007174762431532145, "logps/rejected": -2.800795316696167, "loss": 0.606, "nll_loss": 0.15148796141147614, "rewards/accuracies": 1.0, "rewards/chosen": -7.17476214049384e-05, "rewards/margins": 0.2800077795982361, "rewards/rejected": -0.28007954359054565, "step": 12295 }, { "epoch": 8.503457814661134, "grad_norm": 4.917719841003418, "learning_rate": 8.31412325188259e-06, "log_odds_chosen": 11.533758163452148, "log_odds_ratio": -0.0001370185927953571, "logits/chosen": -0.42198553681373596, "logits/rejected": -0.43564262986183167, "logps/chosen": -0.000686085200868547, "logps/rejected": -3.2868759632110596, "loss": 0.5574, "nll_loss": 0.1393275260925293, "rewards/accuracies": 1.0, "rewards/chosen": -6.860852590762079e-05, "rewards/margins": 0.32861900329589844, "rewards/rejected": -0.3286876082420349, "step": 12296 }, { "epoch": 8.504149377593361, "grad_norm": 2.980889081954956, "learning_rate": 8.310281235592438e-06, "log_odds_chosen": 11.35621452331543, "log_odds_ratio": -4.0140759665519e-05, "logits/chosen": -0.21690016984939575, "logits/rejected": -0.2787854075431824, "logps/chosen": -0.00018471668590791523, "logps/rejected": -2.469820022583008, "loss": 0.4044, "nll_loss": 0.10108595341444016, "rewards/accuracies": 1.0, "rewards/chosen": -1.8471670045983046e-05, "rewards/margins": 0.2469635307788849, "rewards/rejected": -0.24698200821876526, "step": 12297 }, { "epoch": 8.504840940525588, "grad_norm": 3.8085482120513916, "learning_rate": 8.30643921930229e-06, "log_odds_chosen": 10.582088470458984, "log_odds_ratio": -0.00012150261318311095, "logits/chosen": -0.16991454362869263, "logits/rejected": -0.16436061263084412, "logps/chosen": -0.0006639507482759655, "logps/rejected": -2.382948398590088, "loss": 0.326, "nll_loss": 0.08148948848247528, "rewards/accuracies": 1.0, "rewards/chosen": -6.639507773797959e-05, "rewards/margins": 0.238228440284729, "rewards/rejected": -0.2382948398590088, "step": 12298 }, { "epoch": 8.505532503457815, "grad_norm": 3.6274943351745605, "learning_rate": 8.302597203012141e-06, "log_odds_chosen": 11.005544662475586, "log_odds_ratio": -0.00012421110295690596, "logits/chosen": 0.21192803978919983, "logits/rejected": 0.2692212462425232, "logps/chosen": -0.0002638070727698505, "logps/rejected": -2.1027445793151855, "loss": 0.2937, "nll_loss": 0.07340645045042038, "rewards/accuracies": 1.0, "rewards/chosen": -2.6380705094197765e-05, "rewards/margins": 0.21024811267852783, "rewards/rejected": -0.21027448773384094, "step": 12299 }, { "epoch": 8.506224066390041, "grad_norm": 3.427682638168335, "learning_rate": 8.298755186721992e-06, "log_odds_chosen": 11.169326782226562, "log_odds_ratio": -4.729488864541054e-05, "logits/chosen": 0.011243600398302078, "logits/rejected": -0.03214600309729576, "logps/chosen": -0.0001485542015871033, "logps/rejected": -2.084603786468506, "loss": 0.4336, "nll_loss": 0.10838790237903595, "rewards/accuracies": 1.0, "rewards/chosen": -1.485541997681139e-05, "rewards/margins": 0.20844553411006927, "rewards/rejected": -0.20846039056777954, "step": 12300 }, { "epoch": 8.506915629322268, "grad_norm": 4.2835493087768555, "learning_rate": 8.294913170431843e-06, "log_odds_chosen": 9.640701293945312, "log_odds_ratio": -0.0003793557989411056, "logits/chosen": -0.517144501209259, "logits/rejected": -0.5247098207473755, "logps/chosen": -0.00040619890205562115, "logps/rejected": -1.758765697479248, "loss": 0.3626, "nll_loss": 0.09061383455991745, "rewards/accuracies": 1.0, "rewards/chosen": -4.061988875037059e-05, "rewards/margins": 0.17583593726158142, "rewards/rejected": -0.17587657272815704, "step": 12301 }, { "epoch": 8.507607192254495, "grad_norm": 4.868540287017822, "learning_rate": 8.291071154141694e-06, "log_odds_chosen": 10.661447525024414, "log_odds_ratio": -6.143888458609581e-05, "logits/chosen": -0.4865027368068695, "logits/rejected": -0.5694153904914856, "logps/chosen": -0.0001912845327751711, "logps/rejected": -2.037360191345215, "loss": 0.4801, "nll_loss": 0.12001441419124603, "rewards/accuracies": 1.0, "rewards/chosen": -1.912845254992135e-05, "rewards/margins": 0.20371690392494202, "rewards/rejected": -0.20373603701591492, "step": 12302 }, { "epoch": 8.508298755186722, "grad_norm": 3.5946638584136963, "learning_rate": 8.287229137851544e-06, "log_odds_chosen": 11.281814575195312, "log_odds_ratio": -4.3780812120530754e-05, "logits/chosen": -0.24768759310245514, "logits/rejected": -0.3600189685821533, "logps/chosen": -0.00018627429381012917, "logps/rejected": -1.897853970527649, "loss": 0.3271, "nll_loss": 0.08176817744970322, "rewards/accuracies": 1.0, "rewards/chosen": -1.8627430108608678e-05, "rewards/margins": 0.1897667795419693, "rewards/rejected": -0.1897854208946228, "step": 12303 }, { "epoch": 8.508990318118949, "grad_norm": 3.5438852310180664, "learning_rate": 8.283387121561397e-06, "log_odds_chosen": 10.485623359680176, "log_odds_ratio": -4.5006632717559114e-05, "logits/chosen": -0.20240357518196106, "logits/rejected": -0.17878305912017822, "logps/chosen": -0.00014959985855966806, "logps/rejected": -1.6814618110656738, "loss": 0.3431, "nll_loss": 0.08578226715326309, "rewards/accuracies": 1.0, "rewards/chosen": -1.4959986401663627e-05, "rewards/margins": 0.16813123226165771, "rewards/rejected": -0.16814617812633514, "step": 12304 }, { "epoch": 8.509681881051176, "grad_norm": 4.398319244384766, "learning_rate": 8.279545105271248e-06, "log_odds_chosen": 12.11357593536377, "log_odds_ratio": -2.4059154384303838e-05, "logits/chosen": -0.024562709033489227, "logits/rejected": -0.2101573646068573, "logps/chosen": -0.00010782388562802225, "logps/rejected": -3.0146188735961914, "loss": 0.3632, "nll_loss": 0.09080375730991364, "rewards/accuracies": 1.0, "rewards/chosen": -1.0782388926600106e-05, "rewards/margins": 0.3014511168003082, "rewards/rejected": -0.3014618754386902, "step": 12305 }, { "epoch": 8.510373443983402, "grad_norm": 2.8083410263061523, "learning_rate": 8.275703088981097e-06, "log_odds_chosen": 11.353227615356445, "log_odds_ratio": -2.210174898209516e-05, "logits/chosen": -0.3386459946632385, "logits/rejected": -0.2464192658662796, "logps/chosen": -0.0001623683492653072, "logps/rejected": -1.993700623512268, "loss": 0.242, "nll_loss": 0.06050754711031914, "rewards/accuracies": 1.0, "rewards/chosen": -1.6236834198934957e-05, "rewards/margins": 0.19935382902622223, "rewards/rejected": -0.19937007129192352, "step": 12306 }, { "epoch": 8.51106500691563, "grad_norm": 4.819612503051758, "learning_rate": 8.271861072690949e-06, "log_odds_chosen": 10.617549896240234, "log_odds_ratio": -0.00035102281253784895, "logits/chosen": -0.1268101930618286, "logits/rejected": -0.16175130009651184, "logps/chosen": -0.0003959328751079738, "logps/rejected": -2.089851140975952, "loss": 0.6655, "nll_loss": 0.1663326919078827, "rewards/accuracies": 1.0, "rewards/chosen": -3.959328751079738e-05, "rewards/margins": 0.20894552767276764, "rewards/rejected": -0.2089851051568985, "step": 12307 }, { "epoch": 8.511756569847856, "grad_norm": 6.782039165496826, "learning_rate": 8.2680190564008e-06, "log_odds_chosen": 11.941649436950684, "log_odds_ratio": -7.390565588138998e-05, "logits/chosen": -0.27633628249168396, "logits/rejected": -0.2620106041431427, "logps/chosen": -7.525588443968445e-05, "logps/rejected": -2.5158262252807617, "loss": 0.5836, "nll_loss": 0.14588358998298645, "rewards/accuracies": 1.0, "rewards/chosen": -7.525588443968445e-06, "rewards/margins": 0.2515750825405121, "rewards/rejected": -0.25158262252807617, "step": 12308 }, { "epoch": 8.512448132780083, "grad_norm": 4.102380752563477, "learning_rate": 8.26417704011065e-06, "log_odds_chosen": 11.285173416137695, "log_odds_ratio": -2.9311428079381585e-05, "logits/chosen": -0.19089141488075256, "logits/rejected": -0.20214393734931946, "logps/chosen": -0.0001509404683019966, "logps/rejected": -2.247863531112671, "loss": 0.7208, "nll_loss": 0.18019136786460876, "rewards/accuracies": 1.0, "rewards/chosen": -1.5094046830199659e-05, "rewards/margins": 0.22477126121520996, "rewards/rejected": -0.22478634119033813, "step": 12309 }, { "epoch": 8.51313969571231, "grad_norm": 3.3171002864837646, "learning_rate": 8.260335023820501e-06, "log_odds_chosen": 9.499252319335938, "log_odds_ratio": -0.0002143854071618989, "logits/chosen": -0.07529734075069427, "logits/rejected": -0.044407326728105545, "logps/chosen": -0.0005177122075110674, "logps/rejected": -1.8591790199279785, "loss": 0.2827, "nll_loss": 0.07066380232572556, "rewards/accuracies": 1.0, "rewards/chosen": -5.17712214787025e-05, "rewards/margins": 0.185866117477417, "rewards/rejected": -0.1859178990125656, "step": 12310 }, { "epoch": 8.513831258644537, "grad_norm": 3.6642651557922363, "learning_rate": 8.256493007530352e-06, "log_odds_chosen": 10.917474746704102, "log_odds_ratio": -5.3249630582286045e-05, "logits/chosen": -0.39446714520454407, "logits/rejected": -0.44384223222732544, "logps/chosen": -0.0001516881602583453, "logps/rejected": -1.9198907613754272, "loss": 0.3015, "nll_loss": 0.07537909597158432, "rewards/accuracies": 1.0, "rewards/chosen": -1.5168816389632411e-05, "rewards/margins": 0.19197390973567963, "rewards/rejected": -0.19198909401893616, "step": 12311 }, { "epoch": 8.514522821576763, "grad_norm": 3.984049081802368, "learning_rate": 8.252650991240203e-06, "log_odds_chosen": 11.000242233276367, "log_odds_ratio": -5.1191036618547514e-05, "logits/chosen": -0.6422182321548462, "logits/rejected": -0.51802659034729, "logps/chosen": -0.0002157797134714201, "logps/rejected": -2.0283071994781494, "loss": 0.4572, "nll_loss": 0.11429233103990555, "rewards/accuracies": 1.0, "rewards/chosen": -2.157797098334413e-05, "rewards/margins": 0.20280912518501282, "rewards/rejected": -0.2028307020664215, "step": 12312 }, { "epoch": 8.51521438450899, "grad_norm": 8.821842193603516, "learning_rate": 8.248808974950054e-06, "log_odds_chosen": 10.934852600097656, "log_odds_ratio": -7.927478145575151e-05, "logits/chosen": -0.536807656288147, "logits/rejected": -0.5345730185508728, "logps/chosen": -0.00025932028074748814, "logps/rejected": -2.190347194671631, "loss": 0.252, "nll_loss": 0.06299175322055817, "rewards/accuracies": 1.0, "rewards/chosen": -2.5932029529940337e-05, "rewards/margins": 0.21900878846645355, "rewards/rejected": -0.21903470158576965, "step": 12313 }, { "epoch": 8.515905947441217, "grad_norm": 2.862980365753174, "learning_rate": 8.244966958659906e-06, "log_odds_chosen": 11.223052024841309, "log_odds_ratio": -1.99354635697091e-05, "logits/chosen": -0.40606260299682617, "logits/rejected": -0.47642767429351807, "logps/chosen": -9.023462189361453e-05, "logps/rejected": -1.9665813446044922, "loss": 0.3914, "nll_loss": 0.09783907979726791, "rewards/accuracies": 1.0, "rewards/chosen": -9.023462553159334e-06, "rewards/margins": 0.19664910435676575, "rewards/rejected": -0.19665813446044922, "step": 12314 }, { "epoch": 8.516597510373444, "grad_norm": 6.2338738441467285, "learning_rate": 8.241124942369755e-06, "log_odds_chosen": 11.054450988769531, "log_odds_ratio": -3.191823998349719e-05, "logits/chosen": -0.6752179265022278, "logits/rejected": -0.6671661734580994, "logps/chosen": -0.0002096227981382981, "logps/rejected": -1.9763360023498535, "loss": 0.5009, "nll_loss": 0.12522666156291962, "rewards/accuracies": 1.0, "rewards/chosen": -2.096227945003193e-05, "rewards/margins": 0.19761261343955994, "rewards/rejected": -0.19763359427452087, "step": 12315 }, { "epoch": 8.51728907330567, "grad_norm": 3.3995327949523926, "learning_rate": 8.237282926079606e-06, "log_odds_chosen": 10.937870025634766, "log_odds_ratio": -0.00079381960676983, "logits/chosen": -0.6280683279037476, "logits/rejected": -0.671973705291748, "logps/chosen": -0.0005241141188889742, "logps/rejected": -2.157975435256958, "loss": 0.2516, "nll_loss": 0.06282602250576019, "rewards/accuracies": 1.0, "rewards/chosen": -5.2411414799280465e-05, "rewards/margins": 0.21574516594409943, "rewards/rejected": -0.2157975733280182, "step": 12316 }, { "epoch": 8.517980636237898, "grad_norm": 3.2070159912109375, "learning_rate": 8.233440909789458e-06, "log_odds_chosen": 11.502121925354004, "log_odds_ratio": -2.8572507289936766e-05, "logits/chosen": -0.16348521411418915, "logits/rejected": -0.14277753233909607, "logps/chosen": -0.00018423848086968064, "logps/rejected": -2.4327659606933594, "loss": 0.3631, "nll_loss": 0.09076198935508728, "rewards/accuracies": 1.0, "rewards/chosen": -1.8423846995574422e-05, "rewards/margins": 0.24325817823410034, "rewards/rejected": -0.24327659606933594, "step": 12317 }, { "epoch": 8.518672199170124, "grad_norm": 5.480223178863525, "learning_rate": 8.229598893499309e-06, "log_odds_chosen": 10.304766654968262, "log_odds_ratio": -0.00010444460349390283, "logits/chosen": -0.5530600547790527, "logits/rejected": -0.5032128691673279, "logps/chosen": -0.00040447936044074595, "logps/rejected": -2.0766894817352295, "loss": 0.3791, "nll_loss": 0.09476219862699509, "rewards/accuracies": 1.0, "rewards/chosen": -4.0447936044074595e-05, "rewards/margins": 0.20762848854064941, "rewards/rejected": -0.2076689451932907, "step": 12318 }, { "epoch": 8.519363762102351, "grad_norm": 4.5215559005737305, "learning_rate": 8.22575687720916e-06, "log_odds_chosen": 10.32512092590332, "log_odds_ratio": -0.00013688394392374903, "logits/chosen": -0.22447320818901062, "logits/rejected": -0.2803284227848053, "logps/chosen": -0.00019440040341578424, "logps/rejected": -1.9294097423553467, "loss": 0.5061, "nll_loss": 0.1265106052160263, "rewards/accuracies": 1.0, "rewards/chosen": -1.9440039977780543e-05, "rewards/margins": 0.19292153418064117, "rewards/rejected": -0.19294098019599915, "step": 12319 }, { "epoch": 8.520055325034578, "grad_norm": 5.495118618011475, "learning_rate": 8.22191486091901e-06, "log_odds_chosen": 10.946943283081055, "log_odds_ratio": -0.00013044103980064392, "logits/chosen": -0.2992880046367645, "logits/rejected": -0.4046541154384613, "logps/chosen": -0.00020025305275339633, "logps/rejected": -2.3770759105682373, "loss": 0.4413, "nll_loss": 0.11030249297618866, "rewards/accuracies": 1.0, "rewards/chosen": -2.0025305275339633e-05, "rewards/margins": 0.2376875877380371, "rewards/rejected": -0.23770761489868164, "step": 12320 }, { "epoch": 8.520746887966805, "grad_norm": 3.188117742538452, "learning_rate": 8.218072844628861e-06, "log_odds_chosen": 9.897478103637695, "log_odds_ratio": -0.00010892859427258372, "logits/chosen": -0.3278999328613281, "logits/rejected": -0.38061243295669556, "logps/chosen": -0.0004890036070719361, "logps/rejected": -1.8024688959121704, "loss": 0.3539, "nll_loss": 0.08847436308860779, "rewards/accuracies": 1.0, "rewards/chosen": -4.890035779681057e-05, "rewards/margins": 0.18019799888134003, "rewards/rejected": -0.18024688959121704, "step": 12321 }, { "epoch": 8.521438450899032, "grad_norm": 3.4911656379699707, "learning_rate": 8.214230828338712e-06, "log_odds_chosen": 11.001504898071289, "log_odds_ratio": -5.02866787428502e-05, "logits/chosen": -0.5928372144699097, "logits/rejected": -0.5494281053543091, "logps/chosen": -0.00030572060495615005, "logps/rejected": -2.429985284805298, "loss": 0.3272, "nll_loss": 0.08180222660303116, "rewards/accuracies": 1.0, "rewards/chosen": -3.0572060495615005e-05, "rewards/margins": 0.24296796321868896, "rewards/rejected": -0.24299854040145874, "step": 12322 }, { "epoch": 8.522130013831259, "grad_norm": 3.057826280593872, "learning_rate": 8.210388812048564e-06, "log_odds_chosen": 11.435529708862305, "log_odds_ratio": -3.5779325116891414e-05, "logits/chosen": -0.6360045671463013, "logits/rejected": -0.7269700765609741, "logps/chosen": -0.00019725115271285176, "logps/rejected": -2.423240900039673, "loss": 0.3774, "nll_loss": 0.09433731436729431, "rewards/accuracies": 1.0, "rewards/chosen": -1.972511745407246e-05, "rewards/margins": 0.24230436980724335, "rewards/rejected": -0.2423241138458252, "step": 12323 }, { "epoch": 8.522821576763485, "grad_norm": 4.978005886077881, "learning_rate": 8.206546795758415e-06, "log_odds_chosen": 10.862587928771973, "log_odds_ratio": -0.00013348314678296447, "logits/chosen": -0.23941171169281006, "logits/rejected": -0.3022950291633606, "logps/chosen": -0.00032461649971082807, "logps/rejected": -2.654205560684204, "loss": 0.3983, "nll_loss": 0.0995587706565857, "rewards/accuracies": 1.0, "rewards/chosen": -3.2461648515891284e-05, "rewards/margins": 0.2653881013393402, "rewards/rejected": -0.2654205560684204, "step": 12324 }, { "epoch": 8.523513139695712, "grad_norm": 3.5846354961395264, "learning_rate": 8.202704779468264e-06, "log_odds_chosen": 11.656807899475098, "log_odds_ratio": -5.0690625357674435e-05, "logits/chosen": -0.4642685651779175, "logits/rejected": -0.5309992432594299, "logps/chosen": -0.00020530421170406044, "logps/rejected": -3.100234270095825, "loss": 0.3337, "nll_loss": 0.08341825753450394, "rewards/accuracies": 1.0, "rewards/chosen": -2.053042408078909e-05, "rewards/margins": 0.3100028932094574, "rewards/rejected": -0.3100234270095825, "step": 12325 }, { "epoch": 8.524204702627939, "grad_norm": 4.919602394104004, "learning_rate": 8.198862763178117e-06, "log_odds_chosen": 12.687283515930176, "log_odds_ratio": -2.108301669068169e-05, "logits/chosen": -0.6012700796127319, "logits/rejected": -0.7129382491111755, "logps/chosen": -0.00016982763190753758, "logps/rejected": -3.3141398429870605, "loss": 0.3157, "nll_loss": 0.07891141623258591, "rewards/accuracies": 1.0, "rewards/chosen": -1.6982761735562235e-05, "rewards/margins": 0.33139699697494507, "rewards/rejected": -0.33141398429870605, "step": 12326 }, { "epoch": 8.524896265560166, "grad_norm": 11.470624923706055, "learning_rate": 8.195020746887967e-06, "log_odds_chosen": 11.43114948272705, "log_odds_ratio": -2.383892933721654e-05, "logits/chosen": -0.47737187147140503, "logits/rejected": -0.5778495073318481, "logps/chosen": -0.00020170229254290462, "logps/rejected": -2.5206422805786133, "loss": 0.4786, "nll_loss": 0.11964017152786255, "rewards/accuracies": 1.0, "rewards/chosen": -2.0170229618088342e-05, "rewards/margins": 0.25204405188560486, "rewards/rejected": -0.25206422805786133, "step": 12327 }, { "epoch": 8.525587828492393, "grad_norm": 3.273254871368408, "learning_rate": 8.191178730597818e-06, "log_odds_chosen": 11.17822265625, "log_odds_ratio": -2.2545445972355083e-05, "logits/chosen": -0.7379517555236816, "logits/rejected": -0.8989492654800415, "logps/chosen": -0.0003359833499416709, "logps/rejected": -2.4224159717559814, "loss": 0.3547, "nll_loss": 0.08866336941719055, "rewards/accuracies": 1.0, "rewards/chosen": -3.3598338632145897e-05, "rewards/margins": 0.24220798909664154, "rewards/rejected": -0.24224157631397247, "step": 12328 }, { "epoch": 8.52627939142462, "grad_norm": 3.759458065032959, "learning_rate": 8.187336714307669e-06, "log_odds_chosen": 11.058753967285156, "log_odds_ratio": -4.0378348785452545e-05, "logits/chosen": -0.06687570363283157, "logits/rejected": -0.15267179906368256, "logps/chosen": -0.00019187794532626867, "logps/rejected": -2.1204071044921875, "loss": 0.4475, "nll_loss": 0.11186422407627106, "rewards/accuracies": 1.0, "rewards/chosen": -1.918779526022263e-05, "rewards/margins": 0.21202154457569122, "rewards/rejected": -0.2120407074689865, "step": 12329 }, { "epoch": 8.526970954356846, "grad_norm": 4.646742343902588, "learning_rate": 8.18349469801752e-06, "log_odds_chosen": 10.960451126098633, "log_odds_ratio": -5.372767918743193e-05, "logits/chosen": -0.49918264150619507, "logits/rejected": -0.4375155568122864, "logps/chosen": -0.00021653309522662312, "logps/rejected": -2.144824981689453, "loss": 0.7104, "nll_loss": 0.1775887906551361, "rewards/accuracies": 1.0, "rewards/chosen": -2.1653311705449596e-05, "rewards/margins": 0.2144608497619629, "rewards/rejected": -0.21448248624801636, "step": 12330 }, { "epoch": 8.527662517289073, "grad_norm": 5.321842670440674, "learning_rate": 8.17965268172737e-06, "log_odds_chosen": 10.420870780944824, "log_odds_ratio": -0.0002663970517460257, "logits/chosen": -0.40803062915802, "logits/rejected": -0.29475855827331543, "logps/chosen": -0.0004102752427570522, "logps/rejected": -1.6895699501037598, "loss": 0.673, "nll_loss": 0.16821430623531342, "rewards/accuracies": 1.0, "rewards/chosen": -4.102752427570522e-05, "rewards/margins": 0.16891595721244812, "rewards/rejected": -0.16895699501037598, "step": 12331 }, { "epoch": 8.5283540802213, "grad_norm": 12.990723609924316, "learning_rate": 8.175810665437223e-06, "log_odds_chosen": 11.02590560913086, "log_odds_ratio": -5.349262210074812e-05, "logits/chosen": -0.6384226083755493, "logits/rejected": -0.6647124290466309, "logps/chosen": -0.000241591376834549, "logps/rejected": -2.1901700496673584, "loss": 0.4627, "nll_loss": 0.11568032205104828, "rewards/accuracies": 1.0, "rewards/chosen": -2.415913695585914e-05, "rewards/margins": 0.21899282932281494, "rewards/rejected": -0.21901699900627136, "step": 12332 }, { "epoch": 8.529045643153527, "grad_norm": 3.829329490661621, "learning_rate": 8.171968649147074e-06, "log_odds_chosen": 11.376579284667969, "log_odds_ratio": -2.9566979719675146e-05, "logits/chosen": -0.5997239351272583, "logits/rejected": -0.5821545124053955, "logps/chosen": -7.834136340534315e-05, "logps/rejected": -2.0063858032226562, "loss": 0.4316, "nll_loss": 0.107905812561512, "rewards/accuracies": 1.0, "rewards/chosen": -7.834136340534315e-06, "rewards/margins": 0.2006307691335678, "rewards/rejected": -0.20063860714435577, "step": 12333 }, { "epoch": 8.529737206085754, "grad_norm": 3.409250497817993, "learning_rate": 8.168126632856923e-06, "log_odds_chosen": 11.758556365966797, "log_odds_ratio": -4.655357770388946e-05, "logits/chosen": -0.16191110014915466, "logits/rejected": -0.19495658576488495, "logps/chosen": -0.00020725368813145906, "logps/rejected": -2.755053997039795, "loss": 0.623, "nll_loss": 0.15575700998306274, "rewards/accuracies": 1.0, "rewards/chosen": -2.0725368813145906e-05, "rewards/margins": 0.2754846513271332, "rewards/rejected": -0.275505393743515, "step": 12334 }, { "epoch": 8.53042876901798, "grad_norm": 2.668457269668579, "learning_rate": 8.164284616566775e-06, "log_odds_chosen": 10.386791229248047, "log_odds_ratio": -0.00017962889978662133, "logits/chosen": -0.37340259552001953, "logits/rejected": -0.33036869764328003, "logps/chosen": -0.0003662610542960465, "logps/rejected": -2.257016658782959, "loss": 0.2609, "nll_loss": 0.06521536409854889, "rewards/accuracies": 1.0, "rewards/chosen": -3.6626108339987695e-05, "rewards/margins": 0.22566506266593933, "rewards/rejected": -0.22570167481899261, "step": 12335 }, { "epoch": 8.531120331950207, "grad_norm": 3.2178878784179688, "learning_rate": 8.160442600276626e-06, "log_odds_chosen": 10.688413619995117, "log_odds_ratio": -8.053887722780928e-05, "logits/chosen": -0.1574697643518448, "logits/rejected": -0.30061471462249756, "logps/chosen": -0.00027518076240085065, "logps/rejected": -1.84926438331604, "loss": 0.4135, "nll_loss": 0.10337050259113312, "rewards/accuracies": 1.0, "rewards/chosen": -2.7518077331478707e-05, "rewards/margins": 0.18489891290664673, "rewards/rejected": -0.18492642045021057, "step": 12336 }, { "epoch": 8.531811894882434, "grad_norm": 2.9085309505462646, "learning_rate": 8.156600583986477e-06, "log_odds_chosen": 10.503604888916016, "log_odds_ratio": -0.00011857294884976, "logits/chosen": -0.28567206859588623, "logits/rejected": -0.28339099884033203, "logps/chosen": -0.00027978868456557393, "logps/rejected": -1.8618590831756592, "loss": 0.6276, "nll_loss": 0.15689247846603394, "rewards/accuracies": 1.0, "rewards/chosen": -2.7978869184153154e-05, "rewards/margins": 0.1861579269170761, "rewards/rejected": -0.18618589639663696, "step": 12337 }, { "epoch": 8.532503457814661, "grad_norm": 4.562682628631592, "learning_rate": 8.152758567696327e-06, "log_odds_chosen": 11.258023262023926, "log_odds_ratio": -2.551450779719744e-05, "logits/chosen": -0.6869363784790039, "logits/rejected": -0.7892690896987915, "logps/chosen": -0.0001631429768167436, "logps/rejected": -2.302780866622925, "loss": 0.4823, "nll_loss": 0.12058058381080627, "rewards/accuracies": 1.0, "rewards/chosen": -1.631429768167436e-05, "rewards/margins": 0.23026177287101746, "rewards/rejected": -0.23027808964252472, "step": 12338 }, { "epoch": 8.533195020746888, "grad_norm": 3.6148369312286377, "learning_rate": 8.148916551406178e-06, "log_odds_chosen": 11.666459083557129, "log_odds_ratio": -3.3464657462900504e-05, "logits/chosen": 0.041404321789741516, "logits/rejected": 0.026137858629226685, "logps/chosen": -0.0001358877052552998, "logps/rejected": -2.480074882507324, "loss": 0.3857, "nll_loss": 0.0964311957359314, "rewards/accuracies": 1.0, "rewards/chosen": -1.3588771253125742e-05, "rewards/margins": 0.24799390137195587, "rewards/rejected": -0.24800749123096466, "step": 12339 }, { "epoch": 8.533886583679115, "grad_norm": 3.3950388431549072, "learning_rate": 8.145074535116029e-06, "log_odds_chosen": 10.787850379943848, "log_odds_ratio": -4.8465499276062474e-05, "logits/chosen": -0.3860059082508087, "logits/rejected": -0.459553599357605, "logps/chosen": -0.0003434290993027389, "logps/rejected": -2.387206792831421, "loss": 0.2738, "nll_loss": 0.06845402717590332, "rewards/accuracies": 1.0, "rewards/chosen": -3.434290920267813e-05, "rewards/margins": 0.23868635296821594, "rewards/rejected": -0.23872068524360657, "step": 12340 }, { "epoch": 8.534578146611342, "grad_norm": 3.6412434577941895, "learning_rate": 8.141232518825881e-06, "log_odds_chosen": 9.9017333984375, "log_odds_ratio": -0.00012677724589593709, "logits/chosen": -0.21874283254146576, "logits/rejected": -0.2888812720775604, "logps/chosen": -0.0003149479744024575, "logps/rejected": -1.8529309034347534, "loss": 0.343, "nll_loss": 0.08573202043771744, "rewards/accuracies": 1.0, "rewards/chosen": -3.149479744024575e-05, "rewards/margins": 0.18526160717010498, "rewards/rejected": -0.18529310822486877, "step": 12341 }, { "epoch": 8.535269709543568, "grad_norm": 3.061228036880493, "learning_rate": 8.137390502535732e-06, "log_odds_chosen": 11.397375106811523, "log_odds_ratio": -1.8148359231418e-05, "logits/chosen": -0.2281343787908554, "logits/rejected": -0.18515226244926453, "logps/chosen": -8.230004459619522e-05, "logps/rejected": -1.8191204071044922, "loss": 0.3075, "nll_loss": 0.07687129825353622, "rewards/accuracies": 1.0, "rewards/chosen": -8.230003913922701e-06, "rewards/margins": 0.18190382421016693, "rewards/rejected": -0.18191204965114594, "step": 12342 }, { "epoch": 8.535961272475795, "grad_norm": 5.672790050506592, "learning_rate": 8.133548486245581e-06, "log_odds_chosen": 12.381575584411621, "log_odds_ratio": -1.670279016252607e-05, "logits/chosen": -0.1853232979774475, "logits/rejected": -0.25712287425994873, "logps/chosen": -0.00012582400813698769, "logps/rejected": -3.195413827896118, "loss": 0.6336, "nll_loss": 0.15838681161403656, "rewards/accuracies": 1.0, "rewards/chosen": -1.2582400813698769e-05, "rewards/margins": 0.3195287883281708, "rewards/rejected": -0.31954139471054077, "step": 12343 }, { "epoch": 8.536652835408022, "grad_norm": 4.7994160652160645, "learning_rate": 8.129706469955432e-06, "log_odds_chosen": 11.000398635864258, "log_odds_ratio": -9.112850239034742e-05, "logits/chosen": 0.0034987851977348328, "logits/rejected": -0.10294229537248611, "logps/chosen": -0.0002761443320196122, "logps/rejected": -2.4395899772644043, "loss": 0.4132, "nll_loss": 0.10329889506101608, "rewards/accuracies": 1.0, "rewards/chosen": -2.7614434657152742e-05, "rewards/margins": 0.243931382894516, "rewards/rejected": -0.2439589947462082, "step": 12344 }, { "epoch": 8.537344398340249, "grad_norm": 3.5451295375823975, "learning_rate": 8.125864453665284e-06, "log_odds_chosen": 11.421430587768555, "log_odds_ratio": -2.3883238100097515e-05, "logits/chosen": -0.3302974998950958, "logits/rejected": -0.44120854139328003, "logps/chosen": -0.00019329342467244714, "logps/rejected": -2.6955575942993164, "loss": 0.4013, "nll_loss": 0.10033205151557922, "rewards/accuracies": 1.0, "rewards/chosen": -1.9329343558638357e-05, "rewards/margins": 0.26953643560409546, "rewards/rejected": -0.2695557773113251, "step": 12345 }, { "epoch": 8.538035961272476, "grad_norm": 4.600787162780762, "learning_rate": 8.122022437375135e-06, "log_odds_chosen": 11.199943542480469, "log_odds_ratio": -4.997500218451023e-05, "logits/chosen": -0.07723156362771988, "logits/rejected": -0.1380307674407959, "logps/chosen": -0.00027827589656226337, "logps/rejected": -2.495893716812134, "loss": 0.4573, "nll_loss": 0.11431986093521118, "rewards/accuracies": 1.0, "rewards/chosen": -2.782759111141786e-05, "rewards/margins": 0.24956156313419342, "rewards/rejected": -0.24958938360214233, "step": 12346 }, { "epoch": 8.538727524204702, "grad_norm": 4.425629615783691, "learning_rate": 8.118180421084986e-06, "log_odds_chosen": 11.156241416931152, "log_odds_ratio": -3.892029417329468e-05, "logits/chosen": -0.3224028944969177, "logits/rejected": -0.3680450916290283, "logps/chosen": -0.0006028384086675942, "logps/rejected": -2.400455951690674, "loss": 0.6719, "nll_loss": 0.16797758638858795, "rewards/accuracies": 1.0, "rewards/chosen": -6.0283848142717034e-05, "rewards/margins": 0.23998533189296722, "rewards/rejected": -0.24004562199115753, "step": 12347 }, { "epoch": 8.53941908713693, "grad_norm": 7.704127311706543, "learning_rate": 8.114338404794837e-06, "log_odds_chosen": 11.423298835754395, "log_odds_ratio": -0.0003145392402075231, "logits/chosen": -0.03623528778553009, "logits/rejected": -0.10272793471813202, "logps/chosen": -0.00033639915636740625, "logps/rejected": -3.0414867401123047, "loss": 0.5372, "nll_loss": 0.1342613697052002, "rewards/accuracies": 1.0, "rewards/chosen": -3.363991709193215e-05, "rewards/margins": 0.3041149973869324, "rewards/rejected": -0.30414867401123047, "step": 12348 }, { "epoch": 8.540110650069156, "grad_norm": 6.523242473602295, "learning_rate": 8.110496388504687e-06, "log_odds_chosen": 11.448200225830078, "log_odds_ratio": -0.00010262696741847321, "logits/chosen": -0.054167747497558594, "logits/rejected": -0.14434875547885895, "logps/chosen": -0.00013683061115443707, "logps/rejected": -2.6697921752929688, "loss": 0.5282, "nll_loss": 0.13205023109912872, "rewards/accuracies": 1.0, "rewards/chosen": -1.3683061297342647e-05, "rewards/margins": 0.2669655382633209, "rewards/rejected": -0.2669792175292969, "step": 12349 }, { "epoch": 8.540802213001383, "grad_norm": 8.026880264282227, "learning_rate": 8.106654372214538e-06, "log_odds_chosen": 10.558284759521484, "log_odds_ratio": -0.00018640939379110932, "logits/chosen": -0.33718621730804443, "logits/rejected": -0.38743701577186584, "logps/chosen": -0.0006922598695382476, "logps/rejected": -2.594275712966919, "loss": 0.5186, "nll_loss": 0.12963704764842987, "rewards/accuracies": 1.0, "rewards/chosen": -6.922599277459085e-05, "rewards/margins": 0.25935834646224976, "rewards/rejected": -0.25942760705947876, "step": 12350 }, { "epoch": 8.54149377593361, "grad_norm": 2.8525102138519287, "learning_rate": 8.10281235592439e-06, "log_odds_chosen": 11.669790267944336, "log_odds_ratio": -7.162422116380185e-05, "logits/chosen": 0.1007673367857933, "logits/rejected": -0.023373395204544067, "logps/chosen": -0.00022936250024940819, "logps/rejected": -2.460688352584839, "loss": 0.4003, "nll_loss": 0.10007898509502411, "rewards/accuracies": 1.0, "rewards/chosen": -2.2936252207728103e-05, "rewards/margins": 0.24604588747024536, "rewards/rejected": -0.2460688352584839, "step": 12351 }, { "epoch": 8.542185338865837, "grad_norm": 2.9962687492370605, "learning_rate": 8.09897033963424e-06, "log_odds_chosen": 12.462955474853516, "log_odds_ratio": -8.526945748599246e-06, "logits/chosen": -0.02684374898672104, "logits/rejected": 0.05897844582796097, "logps/chosen": -8.549378253519535e-05, "logps/rejected": -2.8761844635009766, "loss": 0.348, "nll_loss": 0.0869932547211647, "rewards/accuracies": 1.0, "rewards/chosen": -8.549377525923774e-06, "rewards/margins": 0.28760990500450134, "rewards/rejected": -0.2876184284687042, "step": 12352 }, { "epoch": 8.542876901798063, "grad_norm": 3.766939878463745, "learning_rate": 8.09512832334409e-06, "log_odds_chosen": 11.607439041137695, "log_odds_ratio": -2.3744345526210964e-05, "logits/chosen": -0.41923362016677856, "logits/rejected": -0.3867270052433014, "logps/chosen": -0.00019827390497084707, "logps/rejected": -2.717073440551758, "loss": 0.4329, "nll_loss": 0.10822376608848572, "rewards/accuracies": 1.0, "rewards/chosen": -1.9827390133286826e-05, "rewards/margins": 0.27168750762939453, "rewards/rejected": -0.27170735597610474, "step": 12353 }, { "epoch": 8.54356846473029, "grad_norm": 3.8533971309661865, "learning_rate": 8.091286307053943e-06, "log_odds_chosen": 12.08335018157959, "log_odds_ratio": -1.4224663573259022e-05, "logits/chosen": -0.22680625319480896, "logits/rejected": -0.25222718715667725, "logps/chosen": -0.0003561171470209956, "logps/rejected": -3.0477356910705566, "loss": 0.433, "nll_loss": 0.10824976861476898, "rewards/accuracies": 1.0, "rewards/chosen": -3.5611716157291085e-05, "rewards/margins": 0.30473795533180237, "rewards/rejected": -0.30477356910705566, "step": 12354 }, { "epoch": 8.544260027662517, "grad_norm": 3.2876503467559814, "learning_rate": 8.087444290763794e-06, "log_odds_chosen": 10.656986236572266, "log_odds_ratio": -0.0011162598384544253, "logits/chosen": -0.17624646425247192, "logits/rejected": -0.17000812292099, "logps/chosen": -0.0006880313740111887, "logps/rejected": -2.123157024383545, "loss": 0.3177, "nll_loss": 0.07931582629680634, "rewards/accuracies": 1.0, "rewards/chosen": -6.880313594592735e-05, "rewards/margins": 0.21224690973758698, "rewards/rejected": -0.21231570839881897, "step": 12355 }, { "epoch": 8.544951590594744, "grad_norm": 4.662578105926514, "learning_rate": 8.083602274473644e-06, "log_odds_chosen": 11.871183395385742, "log_odds_ratio": -3.7982386857038364e-05, "logits/chosen": -0.0023404359817504883, "logits/rejected": -0.0741780549287796, "logps/chosen": -7.023196667432785e-05, "logps/rejected": -2.2987864017486572, "loss": 0.4072, "nll_loss": 0.10178625583648682, "rewards/accuracies": 1.0, "rewards/chosen": -7.023197667876957e-06, "rewards/margins": 0.22987163066864014, "rewards/rejected": -0.22987863421440125, "step": 12356 }, { "epoch": 8.54564315352697, "grad_norm": 4.404334545135498, "learning_rate": 8.079760258183495e-06, "log_odds_chosen": 10.278458595275879, "log_odds_ratio": -0.00012774733477272093, "logits/chosen": 0.058876536786556244, "logits/rejected": 0.03073546290397644, "logps/chosen": -0.000667063519358635, "logps/rejected": -2.4232096672058105, "loss": 0.2532, "nll_loss": 0.06329509615898132, "rewards/accuracies": 1.0, "rewards/chosen": -6.670635048067197e-05, "rewards/margins": 0.24225425720214844, "rewards/rejected": -0.2423209697008133, "step": 12357 }, { "epoch": 8.546334716459198, "grad_norm": 3.8350791931152344, "learning_rate": 8.075918241893346e-06, "log_odds_chosen": 10.941924095153809, "log_odds_ratio": -6.48950444883667e-05, "logits/chosen": -0.28380313515663147, "logits/rejected": -0.3791557848453522, "logps/chosen": -0.00013279700942803174, "logps/rejected": -2.2448134422302246, "loss": 0.3658, "nll_loss": 0.0914371907711029, "rewards/accuracies": 1.0, "rewards/chosen": -1.3279700397106353e-05, "rewards/margins": 0.22446808218955994, "rewards/rejected": -0.22448134422302246, "step": 12358 }, { "epoch": 8.547026279391424, "grad_norm": 2.915459394454956, "learning_rate": 8.072076225603197e-06, "log_odds_chosen": 10.384876251220703, "log_odds_ratio": -0.00010581470269244164, "logits/chosen": 0.02290746383368969, "logits/rejected": -0.03376729041337967, "logps/chosen": -0.0005225928616710007, "logps/rejected": -2.035266876220703, "loss": 0.3995, "nll_loss": 0.09986265748739243, "rewards/accuracies": 1.0, "rewards/chosen": -5.2259281801525503e-05, "rewards/margins": 0.20347443222999573, "rewards/rejected": -0.20352670550346375, "step": 12359 }, { "epoch": 8.547717842323651, "grad_norm": 4.734531402587891, "learning_rate": 8.068234209313049e-06, "log_odds_chosen": 10.698156356811523, "log_odds_ratio": -5.0863098294939846e-05, "logits/chosen": -0.4898606836795807, "logits/rejected": -0.5427123308181763, "logps/chosen": -0.00018635543528944254, "logps/rejected": -1.9825899600982666, "loss": 0.4297, "nll_loss": 0.10742726922035217, "rewards/accuracies": 1.0, "rewards/chosen": -1.8635542801348493e-05, "rewards/margins": 0.19824035465717316, "rewards/rejected": -0.19825901091098785, "step": 12360 }, { "epoch": 8.548409405255878, "grad_norm": 4.701021671295166, "learning_rate": 8.064392193022898e-06, "log_odds_chosen": 10.699748992919922, "log_odds_ratio": -4.7124533011810854e-05, "logits/chosen": -0.43253517150878906, "logits/rejected": -0.4224990904331207, "logps/chosen": -0.00014709580864291638, "logps/rejected": -1.9485095739364624, "loss": 0.4198, "nll_loss": 0.10495181381702423, "rewards/accuracies": 1.0, "rewards/chosen": -1.4709580682392698e-05, "rewards/margins": 0.19483624398708344, "rewards/rejected": -0.19485095143318176, "step": 12361 }, { "epoch": 8.549100968188105, "grad_norm": 4.619813919067383, "learning_rate": 8.060550176732749e-06, "log_odds_chosen": 11.04226303100586, "log_odds_ratio": -2.6943056582240388e-05, "logits/chosen": -0.5407166481018066, "logits/rejected": -0.5664966106414795, "logps/chosen": -0.000211311416933313, "logps/rejected": -2.360203504562378, "loss": 0.4539, "nll_loss": 0.11346258223056793, "rewards/accuracies": 1.0, "rewards/chosen": -2.11311416933313e-05, "rewards/margins": 0.2359992116689682, "rewards/rejected": -0.23602034151554108, "step": 12362 }, { "epoch": 8.549792531120332, "grad_norm": 4.517255783081055, "learning_rate": 8.056708160442601e-06, "log_odds_chosen": 10.63552474975586, "log_odds_ratio": -7.231361814774573e-05, "logits/chosen": -0.549342930316925, "logits/rejected": -0.6850473880767822, "logps/chosen": -0.0008130917558446527, "logps/rejected": -2.4010677337646484, "loss": 0.4628, "nll_loss": 0.11569619923830032, "rewards/accuracies": 1.0, "rewards/chosen": -8.130916830850765e-05, "rewards/margins": 0.24002546072006226, "rewards/rejected": -0.2401067614555359, "step": 12363 }, { "epoch": 8.550484094052559, "grad_norm": 5.180215835571289, "learning_rate": 8.052866144152452e-06, "log_odds_chosen": 10.763911247253418, "log_odds_ratio": -0.00020632933592423797, "logits/chosen": -0.12438270449638367, "logits/rejected": -0.02698398008942604, "logps/chosen": -0.00023761746706441045, "logps/rejected": -2.5075294971466064, "loss": 0.4828, "nll_loss": 0.1206858828663826, "rewards/accuracies": 1.0, "rewards/chosen": -2.3761747797834687e-05, "rewards/margins": 0.25072920322418213, "rewards/rejected": -0.2507529556751251, "step": 12364 }, { "epoch": 8.551175656984785, "grad_norm": 3.4681193828582764, "learning_rate": 8.049024127862303e-06, "log_odds_chosen": 10.928182601928711, "log_odds_ratio": -3.5743283660849556e-05, "logits/chosen": -0.16697700321674347, "logits/rejected": -0.2656247615814209, "logps/chosen": -0.00037262385012581944, "logps/rejected": -2.7088029384613037, "loss": 0.3534, "nll_loss": 0.08834241330623627, "rewards/accuracies": 1.0, "rewards/chosen": -3.726238355739042e-05, "rewards/margins": 0.2708430290222168, "rewards/rejected": -0.2708802819252014, "step": 12365 }, { "epoch": 8.551867219917012, "grad_norm": 3.508986234664917, "learning_rate": 8.045182111572153e-06, "log_odds_chosen": 11.227447509765625, "log_odds_ratio": -2.6481051463633776e-05, "logits/chosen": -0.0640401542186737, "logits/rejected": -0.12471738457679749, "logps/chosen": -0.00022163873654790223, "logps/rejected": -2.4728031158447266, "loss": 0.3749, "nll_loss": 0.09372153133153915, "rewards/accuracies": 1.0, "rewards/chosen": -2.2163874746183865e-05, "rewards/margins": 0.24725812673568726, "rewards/rejected": -0.2472802996635437, "step": 12366 }, { "epoch": 8.552558782849239, "grad_norm": 4.076083183288574, "learning_rate": 8.041340095282004e-06, "log_odds_chosen": 11.609827995300293, "log_odds_ratio": -2.9761686164420098e-05, "logits/chosen": -0.3336213231086731, "logits/rejected": -0.4385530352592468, "logps/chosen": -7.464332156814635e-05, "logps/rejected": -1.9322714805603027, "loss": 0.37, "nll_loss": 0.09250232577323914, "rewards/accuracies": 1.0, "rewards/chosen": -7.464332156814635e-06, "rewards/margins": 0.19321969151496887, "rewards/rejected": -0.1932271420955658, "step": 12367 }, { "epoch": 8.553250345781466, "grad_norm": 4.22585391998291, "learning_rate": 8.037498078991855e-06, "log_odds_chosen": 11.770487785339355, "log_odds_ratio": -1.6943175069172867e-05, "logits/chosen": -0.1061343103647232, "logits/rejected": -0.073185995221138, "logps/chosen": -9.793087519938126e-05, "logps/rejected": -2.4531030654907227, "loss": 0.397, "nll_loss": 0.09924229234457016, "rewards/accuracies": 1.0, "rewards/chosen": -9.793087883736007e-06, "rewards/margins": 0.2453005015850067, "rewards/rejected": -0.24531030654907227, "step": 12368 }, { "epoch": 8.553941908713693, "grad_norm": 3.291067123413086, "learning_rate": 8.033656062701707e-06, "log_odds_chosen": 10.750139236450195, "log_odds_ratio": -7.862604252295569e-05, "logits/chosen": -0.17375710606575012, "logits/rejected": -0.277298241853714, "logps/chosen": -0.0002167547499993816, "logps/rejected": -2.1705241203308105, "loss": 0.552, "nll_loss": 0.13799560070037842, "rewards/accuracies": 1.0, "rewards/chosen": -2.1675476091331802e-05, "rewards/margins": 0.21703073382377625, "rewards/rejected": -0.2170524150133133, "step": 12369 }, { "epoch": 8.55463347164592, "grad_norm": 3.480898857116699, "learning_rate": 8.029814046411558e-06, "log_odds_chosen": 10.99404525756836, "log_odds_ratio": -4.4477874325821176e-05, "logits/chosen": -0.33993351459503174, "logits/rejected": -0.3758736848831177, "logps/chosen": -0.00010217801172984764, "logps/rejected": -1.725305438041687, "loss": 0.3707, "nll_loss": 0.09266746044158936, "rewards/accuracies": 1.0, "rewards/chosen": -1.0217800991085824e-05, "rewards/margins": 0.17252033948898315, "rewards/rejected": -0.17253056168556213, "step": 12370 }, { "epoch": 8.555325034578146, "grad_norm": 4.511864185333252, "learning_rate": 8.025972030121407e-06, "log_odds_chosen": 11.547143936157227, "log_odds_ratio": -2.7884903829544783e-05, "logits/chosen": -0.5886281132698059, "logits/rejected": -0.5377705097198486, "logps/chosen": -0.0010218716925010085, "logps/rejected": -3.1817193031311035, "loss": 0.5387, "nll_loss": 0.1346650868654251, "rewards/accuracies": 1.0, "rewards/chosen": -0.0001021871721604839, "rewards/margins": 0.3180697560310364, "rewards/rejected": -0.3181719481945038, "step": 12371 }, { "epoch": 8.556016597510373, "grad_norm": 7.103489398956299, "learning_rate": 8.02213001383126e-06, "log_odds_chosen": 11.430948257446289, "log_odds_ratio": -3.2747379009379074e-05, "logits/chosen": -0.3653043210506439, "logits/rejected": -0.35591912269592285, "logps/chosen": -6.450832006521523e-05, "logps/rejected": -1.9181818962097168, "loss": 0.3688, "nll_loss": 0.09220267832279205, "rewards/accuracies": 1.0, "rewards/chosen": -6.450833097915165e-06, "rewards/margins": 0.19181175529956818, "rewards/rejected": -0.1918182075023651, "step": 12372 }, { "epoch": 8.5567081604426, "grad_norm": 2.6456801891326904, "learning_rate": 8.01828799754111e-06, "log_odds_chosen": 10.765140533447266, "log_odds_ratio": -0.00013235537335276604, "logits/chosen": -0.3317301571369171, "logits/rejected": -0.4527689516544342, "logps/chosen": -0.00024052447406575084, "logps/rejected": -1.942337989807129, "loss": 0.2734, "nll_loss": 0.0683254674077034, "rewards/accuracies": 1.0, "rewards/chosen": -2.4052449589362368e-05, "rewards/margins": 0.1942097395658493, "rewards/rejected": -0.19423377513885498, "step": 12373 }, { "epoch": 8.557399723374827, "grad_norm": 3.8534891605377197, "learning_rate": 8.014445981250961e-06, "log_odds_chosen": 11.574892044067383, "log_odds_ratio": -6.876789120724425e-05, "logits/chosen": 0.031613051891326904, "logits/rejected": -0.07511621713638306, "logps/chosen": -0.00015929131768643856, "logps/rejected": -2.676453113555908, "loss": 0.565, "nll_loss": 0.14123935997486115, "rewards/accuracies": 1.0, "rewards/chosen": -1.5929130313452333e-05, "rewards/margins": 0.26762938499450684, "rewards/rejected": -0.26764532923698425, "step": 12374 }, { "epoch": 8.558091286307054, "grad_norm": 3.723045587539673, "learning_rate": 8.010603964960812e-06, "log_odds_chosen": 11.224947929382324, "log_odds_ratio": -0.0001394870487274602, "logits/chosen": -0.09539203345775604, "logits/rejected": -0.027994796633720398, "logps/chosen": -0.00020092641352675855, "logps/rejected": -2.439518690109253, "loss": 0.4812, "nll_loss": 0.12027867138385773, "rewards/accuracies": 1.0, "rewards/chosen": -2.0092640625080094e-05, "rewards/margins": 0.24393177032470703, "rewards/rejected": -0.24395185708999634, "step": 12375 }, { "epoch": 8.55878284923928, "grad_norm": 3.302299737930298, "learning_rate": 8.006761948670663e-06, "log_odds_chosen": 11.917454719543457, "log_odds_ratio": -1.1386916412448045e-05, "logits/chosen": -0.380159467458725, "logits/rejected": -0.4698712229728699, "logps/chosen": -9.014501847559586e-05, "logps/rejected": -2.5528717041015625, "loss": 0.4428, "nll_loss": 0.11068766564130783, "rewards/accuracies": 1.0, "rewards/chosen": -9.014502211357467e-06, "rewards/margins": 0.25527817010879517, "rewards/rejected": -0.25528717041015625, "step": 12376 }, { "epoch": 8.559474412171507, "grad_norm": 3.8763320446014404, "learning_rate": 8.002919932380513e-06, "log_odds_chosen": 10.3782320022583, "log_odds_ratio": -9.551684343023226e-05, "logits/chosen": -0.3191932737827301, "logits/rejected": -0.4106774926185608, "logps/chosen": -0.00038932557799853384, "logps/rejected": -1.5302492380142212, "loss": 0.3755, "nll_loss": 0.09385703504085541, "rewards/accuracies": 1.0, "rewards/chosen": -3.8932557799853384e-05, "rewards/margins": 0.15298599004745483, "rewards/rejected": -0.15302491188049316, "step": 12377 }, { "epoch": 8.560165975103734, "grad_norm": 5.742145538330078, "learning_rate": 7.999077916090364e-06, "log_odds_chosen": 11.48661994934082, "log_odds_ratio": -1.9364917534403503e-05, "logits/chosen": -0.6679335236549377, "logits/rejected": -0.7200274467468262, "logps/chosen": -0.00029462837846949697, "logps/rejected": -2.473931312561035, "loss": 0.4403, "nll_loss": 0.11007969826459885, "rewards/accuracies": 1.0, "rewards/chosen": -2.9462837119353935e-05, "rewards/margins": 0.24736365675926208, "rewards/rejected": -0.24739313125610352, "step": 12378 }, { "epoch": 8.560857538035961, "grad_norm": 3.9970247745513916, "learning_rate": 7.995235899800217e-06, "log_odds_chosen": 11.762286186218262, "log_odds_ratio": -1.555400376673788e-05, "logits/chosen": -0.34955230355262756, "logits/rejected": -0.38188910484313965, "logps/chosen": -0.00016597509966231883, "logps/rejected": -2.812894344329834, "loss": 0.4296, "nll_loss": 0.10739898681640625, "rewards/accuracies": 1.0, "rewards/chosen": -1.659750887483824e-05, "rewards/margins": 0.281272828578949, "rewards/rejected": -0.2812894284725189, "step": 12379 }, { "epoch": 8.561549100968188, "grad_norm": 2.7898218631744385, "learning_rate": 7.991393883510066e-06, "log_odds_chosen": 10.549398422241211, "log_odds_ratio": -0.00034211043384857476, "logits/chosen": -0.32482385635375977, "logits/rejected": -0.42106741666793823, "logps/chosen": -0.0004134580958634615, "logps/rejected": -1.7813361883163452, "loss": 0.2325, "nll_loss": 0.05810131877660751, "rewards/accuracies": 1.0, "rewards/chosen": -4.134581104153767e-05, "rewards/margins": 0.17809228599071503, "rewards/rejected": -0.17813362181186676, "step": 12380 }, { "epoch": 8.562240663900415, "grad_norm": 4.60001802444458, "learning_rate": 7.987551867219916e-06, "log_odds_chosen": 9.886676788330078, "log_odds_ratio": -0.00040853844257071614, "logits/chosen": -0.5294456481933594, "logits/rejected": -0.574863612651825, "logps/chosen": -0.0002222563634859398, "logps/rejected": -1.5985755920410156, "loss": 0.5867, "nll_loss": 0.14664258062839508, "rewards/accuracies": 1.0, "rewards/chosen": -2.222563671239186e-05, "rewards/margins": 0.1598353236913681, "rewards/rejected": -0.15985754132270813, "step": 12381 }, { "epoch": 8.562932226832642, "grad_norm": 3.649996042251587, "learning_rate": 7.983709850929769e-06, "log_odds_chosen": 11.81763744354248, "log_odds_ratio": -1.9539475033525378e-05, "logits/chosen": -0.33109086751937866, "logits/rejected": -0.2591181993484497, "logps/chosen": -0.00014043958799447864, "logps/rejected": -2.5570220947265625, "loss": 0.3677, "nll_loss": 0.0919288769364357, "rewards/accuracies": 1.0, "rewards/chosen": -1.4043957889953163e-05, "rewards/margins": 0.2556881606578827, "rewards/rejected": -0.2557021975517273, "step": 12382 }, { "epoch": 8.563623789764868, "grad_norm": 3.148855447769165, "learning_rate": 7.97986783463962e-06, "log_odds_chosen": 11.481773376464844, "log_odds_ratio": -3.30365655827336e-05, "logits/chosen": -0.32694217562675476, "logits/rejected": -0.2874327301979065, "logps/chosen": -0.00010169432789552957, "logps/rejected": -2.180878162384033, "loss": 0.327, "nll_loss": 0.08175890147686005, "rewards/accuracies": 1.0, "rewards/chosen": -1.0169432243856136e-05, "rewards/margins": 0.2180776447057724, "rewards/rejected": -0.2180878221988678, "step": 12383 }, { "epoch": 8.564315352697095, "grad_norm": 3.285156011581421, "learning_rate": 7.97602581834947e-06, "log_odds_chosen": 11.242053985595703, "log_odds_ratio": -3.248647408327088e-05, "logits/chosen": -0.43714556097984314, "logits/rejected": -0.5591869354248047, "logps/chosen": -0.000421356875449419, "logps/rejected": -2.650657892227173, "loss": 0.3332, "nll_loss": 0.0832899734377861, "rewards/accuracies": 1.0, "rewards/chosen": -4.213568536215462e-05, "rewards/margins": 0.2650236487388611, "rewards/rejected": -0.2650657892227173, "step": 12384 }, { "epoch": 8.565006915629322, "grad_norm": 4.97734260559082, "learning_rate": 7.972183802059321e-06, "log_odds_chosen": 10.468923568725586, "log_odds_ratio": -0.00040179112693294883, "logits/chosen": -0.8248478174209595, "logits/rejected": -0.7545952796936035, "logps/chosen": -0.0006409147172234952, "logps/rejected": -2.1354901790618896, "loss": 0.4791, "nll_loss": 0.11974728107452393, "rewards/accuracies": 1.0, "rewards/chosen": -6.409147317754105e-05, "rewards/margins": 0.21348492801189423, "rewards/rejected": -0.21354901790618896, "step": 12385 }, { "epoch": 8.565698478561549, "grad_norm": 3.5596377849578857, "learning_rate": 7.968341785769172e-06, "log_odds_chosen": 11.15113353729248, "log_odds_ratio": -2.0991963538108394e-05, "logits/chosen": -0.4224829375743866, "logits/rejected": -0.3148638904094696, "logps/chosen": -7.999094668775797e-05, "logps/rejected": -1.7266626358032227, "loss": 0.3593, "nll_loss": 0.08982215076684952, "rewards/accuracies": 1.0, "rewards/chosen": -7.999095032573678e-06, "rewards/margins": 0.1726582646369934, "rewards/rejected": -0.1726662665605545, "step": 12386 }, { "epoch": 8.566390041493776, "grad_norm": 3.6879234313964844, "learning_rate": 7.964499769479023e-06, "log_odds_chosen": 9.973072052001953, "log_odds_ratio": -0.000349164882209152, "logits/chosen": -0.3515580892562866, "logits/rejected": -0.3518614172935486, "logps/chosen": -0.0003501469036564231, "logps/rejected": -1.7841719388961792, "loss": 0.5321, "nll_loss": 0.13299128413200378, "rewards/accuracies": 1.0, "rewards/chosen": -3.501469473121688e-05, "rewards/margins": 0.17838218808174133, "rewards/rejected": -0.1784171760082245, "step": 12387 }, { "epoch": 8.567081604426003, "grad_norm": 5.057744026184082, "learning_rate": 7.960657753188875e-06, "log_odds_chosen": 10.082660675048828, "log_odds_ratio": -0.11251819878816605, "logits/chosen": 0.10367824137210846, "logits/rejected": 0.042181506752967834, "logps/chosen": -0.01484946720302105, "logps/rejected": -1.9130041599273682, "loss": 0.4188, "nll_loss": 0.09344511479139328, "rewards/accuracies": 0.875, "rewards/chosen": -0.0014849468134343624, "rewards/margins": 0.1898154616355896, "rewards/rejected": -0.1913003921508789, "step": 12388 }, { "epoch": 8.56777316735823, "grad_norm": 5.113132953643799, "learning_rate": 7.956815736898724e-06, "log_odds_chosen": 11.004137992858887, "log_odds_ratio": -4.713025555247441e-05, "logits/chosen": -0.3097335696220398, "logits/rejected": -0.28606897592544556, "logps/chosen": -0.0001457059697713703, "logps/rejected": -2.219068765640259, "loss": 0.5757, "nll_loss": 0.14392338693141937, "rewards/accuracies": 1.0, "rewards/chosen": -1.457059715903597e-05, "rewards/margins": 0.2218923270702362, "rewards/rejected": -0.2219068855047226, "step": 12389 }, { "epoch": 8.568464730290456, "grad_norm": 3.081252098083496, "learning_rate": 7.952973720608575e-06, "log_odds_chosen": 9.218973159790039, "log_odds_ratio": -0.0005203372566029429, "logits/chosen": -0.31809869408607483, "logits/rejected": -0.4148636758327484, "logps/chosen": -0.00035823852522298694, "logps/rejected": -1.401037335395813, "loss": 0.4117, "nll_loss": 0.10286275297403336, "rewards/accuracies": 1.0, "rewards/chosen": -3.582385033951141e-05, "rewards/margins": 0.1400679051876068, "rewards/rejected": -0.14010372757911682, "step": 12390 }, { "epoch": 8.569156293222683, "grad_norm": 3.4991044998168945, "learning_rate": 7.949131704318427e-06, "log_odds_chosen": 10.65394115447998, "log_odds_ratio": -0.00010200730321230367, "logits/chosen": -0.5947825312614441, "logits/rejected": -0.6780396699905396, "logps/chosen": -0.00046277031651698053, "logps/rejected": -2.273961067199707, "loss": 0.3214, "nll_loss": 0.08033182471990585, "rewards/accuracies": 1.0, "rewards/chosen": -4.627702946891077e-05, "rewards/margins": 0.22734983265399933, "rewards/rejected": -0.22739610075950623, "step": 12391 }, { "epoch": 8.56984785615491, "grad_norm": 5.116530895233154, "learning_rate": 7.945289688028278e-06, "log_odds_chosen": 10.812143325805664, "log_odds_ratio": -0.0005723558133468032, "logits/chosen": -0.3249131739139557, "logits/rejected": -0.38664811849594116, "logps/chosen": -0.0001360624737571925, "logps/rejected": -1.6817561388015747, "loss": 0.3159, "nll_loss": 0.07892321795225143, "rewards/accuracies": 1.0, "rewards/chosen": -1.3606247193820309e-05, "rewards/margins": 0.168162003159523, "rewards/rejected": -0.168175607919693, "step": 12392 }, { "epoch": 8.570539419087137, "grad_norm": 3.7217743396759033, "learning_rate": 7.941447671738129e-06, "log_odds_chosen": 11.626294136047363, "log_odds_ratio": -1.4052928236196749e-05, "logits/chosen": -0.10477419197559357, "logits/rejected": -0.15939825773239136, "logps/chosen": -9.7850919701159e-05, "logps/rejected": -2.1122097969055176, "loss": 0.4629, "nll_loss": 0.11572445929050446, "rewards/accuracies": 1.0, "rewards/chosen": -9.78509160631802e-06, "rewards/margins": 0.2112111747264862, "rewards/rejected": -0.21122097969055176, "step": 12393 }, { "epoch": 8.571230982019364, "grad_norm": 3.8822696208953857, "learning_rate": 7.93760565544798e-06, "log_odds_chosen": 11.469244003295898, "log_odds_ratio": -3.676761480164714e-05, "logits/chosen": -0.7538586258888245, "logits/rejected": -0.7865060567855835, "logps/chosen": -0.00016852424596436322, "logps/rejected": -2.6194863319396973, "loss": 0.3714, "nll_loss": 0.09284963458776474, "rewards/accuracies": 1.0, "rewards/chosen": -1.685242386884056e-05, "rewards/margins": 0.26193177700042725, "rewards/rejected": -0.2619486451148987, "step": 12394 }, { "epoch": 8.57192254495159, "grad_norm": 4.259050369262695, "learning_rate": 7.93376363915783e-06, "log_odds_chosen": 10.146892547607422, "log_odds_ratio": -0.0005833891336806118, "logits/chosen": 0.22315584123134613, "logits/rejected": -0.10273627936840057, "logps/chosen": -0.0004596480284817517, "logps/rejected": -2.2382736206054688, "loss": 0.4593, "nll_loss": 0.11477246880531311, "rewards/accuracies": 1.0, "rewards/chosen": -4.596479993779212e-05, "rewards/margins": 0.22378139197826385, "rewards/rejected": -0.22382736206054688, "step": 12395 }, { "epoch": 8.572614107883817, "grad_norm": 4.730365753173828, "learning_rate": 7.929921622867681e-06, "log_odds_chosen": 10.769886016845703, "log_odds_ratio": -5.231639079283923e-05, "logits/chosen": -0.2158849686384201, "logits/rejected": -0.21605895459651947, "logps/chosen": -0.0001321196323260665, "logps/rejected": -1.861050009727478, "loss": 0.4505, "nll_loss": 0.11261654645204544, "rewards/accuracies": 1.0, "rewards/chosen": -1.3211963050707709e-05, "rewards/margins": 0.18609178066253662, "rewards/rejected": -0.18610499799251556, "step": 12396 }, { "epoch": 8.573305670816044, "grad_norm": 3.3090145587921143, "learning_rate": 7.926079606577533e-06, "log_odds_chosen": 12.224320411682129, "log_odds_ratio": -7.869349246902857e-06, "logits/chosen": -0.3469293713569641, "logits/rejected": -0.39128121733665466, "logps/chosen": -0.00012052787496941164, "logps/rejected": -3.163099765777588, "loss": 0.5273, "nll_loss": 0.13183526694774628, "rewards/accuracies": 1.0, "rewards/chosen": -1.2052787496941164e-05, "rewards/margins": 0.3162979483604431, "rewards/rejected": -0.31630995869636536, "step": 12397 }, { "epoch": 8.57399723374827, "grad_norm": 3.909712553024292, "learning_rate": 7.922237590287383e-06, "log_odds_chosen": 10.657337188720703, "log_odds_ratio": -7.064934470690787e-05, "logits/chosen": -0.21469080448150635, "logits/rejected": -0.3312116861343384, "logps/chosen": -0.0002643395564518869, "logps/rejected": -1.6290345191955566, "loss": 0.4238, "nll_loss": 0.10594627261161804, "rewards/accuracies": 1.0, "rewards/chosen": -2.643395600898657e-05, "rewards/margins": 0.16287702322006226, "rewards/rejected": -0.16290345788002014, "step": 12398 }, { "epoch": 8.574688796680498, "grad_norm": 4.151533603668213, "learning_rate": 7.918395573997233e-06, "log_odds_chosen": 11.33218765258789, "log_odds_ratio": -2.8675931389443576e-05, "logits/chosen": -0.005869865417480469, "logits/rejected": -0.07197088748216629, "logps/chosen": -0.00016606459394097328, "logps/rejected": -2.044290781021118, "loss": 0.3302, "nll_loss": 0.082536980509758, "rewards/accuracies": 1.0, "rewards/chosen": -1.6606458302703686e-05, "rewards/margins": 0.20441249012947083, "rewards/rejected": -0.20442909002304077, "step": 12399 }, { "epoch": 8.575380359612724, "grad_norm": 3.3870725631713867, "learning_rate": 7.914553557707086e-06, "log_odds_chosen": 11.454788208007812, "log_odds_ratio": -1.4353579899761826e-05, "logits/chosen": -0.4412459433078766, "logits/rejected": -0.455695241689682, "logps/chosen": -4.6074765123194084e-05, "logps/rejected": -1.6434885263442993, "loss": 0.4064, "nll_loss": 0.10160781443119049, "rewards/accuracies": 1.0, "rewards/chosen": -4.607476967066759e-06, "rewards/margins": 0.16434425115585327, "rewards/rejected": -0.16434885561466217, "step": 12400 }, { "epoch": 8.576071922544951, "grad_norm": 7.673307418823242, "learning_rate": 7.910711541416936e-06, "log_odds_chosen": 9.517881393432617, "log_odds_ratio": -0.00014684451161883771, "logits/chosen": -0.3621896505355835, "logits/rejected": -0.4492979049682617, "logps/chosen": -0.0006431568181142211, "logps/rejected": -1.7415471076965332, "loss": 0.75, "nll_loss": 0.18749095499515533, "rewards/accuracies": 1.0, "rewards/chosen": -6.431568181142211e-05, "rewards/margins": 0.17409038543701172, "rewards/rejected": -0.17415471374988556, "step": 12401 }, { "epoch": 8.576763485477178, "grad_norm": 2.8077375888824463, "learning_rate": 7.906869525126787e-06, "log_odds_chosen": 10.993677139282227, "log_odds_ratio": -6.793371721869335e-05, "logits/chosen": -0.23306405544281006, "logits/rejected": -0.24650588631629944, "logps/chosen": -7.878048199927434e-05, "logps/rejected": -1.7447535991668701, "loss": 0.2995, "nll_loss": 0.07486958801746368, "rewards/accuracies": 1.0, "rewards/chosen": -7.878048563725315e-06, "rewards/margins": 0.17446748912334442, "rewards/rejected": -0.17447537183761597, "step": 12402 }, { "epoch": 8.577455048409405, "grad_norm": 3.4173429012298584, "learning_rate": 7.903027508836638e-06, "log_odds_chosen": 10.148531913757324, "log_odds_ratio": -0.0002441834658384323, "logits/chosen": -0.2620766758918762, "logits/rejected": -0.38100454211235046, "logps/chosen": -0.0008849852601997554, "logps/rejected": -2.099001169204712, "loss": 0.3004, "nll_loss": 0.07506556063890457, "rewards/accuracies": 1.0, "rewards/chosen": -8.849853475112468e-05, "rewards/margins": 0.20981164276599884, "rewards/rejected": -0.2099001407623291, "step": 12403 }, { "epoch": 8.578146611341632, "grad_norm": 3.674800395965576, "learning_rate": 7.899185492546489e-06, "log_odds_chosen": 9.341479301452637, "log_odds_ratio": -0.00031686053262092173, "logits/chosen": 0.1067897230386734, "logits/rejected": 0.0006720144301652908, "logps/chosen": -0.00023755063011776656, "logps/rejected": -1.1609251499176025, "loss": 0.5396, "nll_loss": 0.1348668336868286, "rewards/accuracies": 1.0, "rewards/chosen": -2.3755063011776656e-05, "rewards/margins": 0.1160687655210495, "rewards/rejected": -0.11609251797199249, "step": 12404 }, { "epoch": 8.578838174273859, "grad_norm": 3.9436371326446533, "learning_rate": 7.89534347625634e-06, "log_odds_chosen": 11.42556381225586, "log_odds_ratio": -0.00021511407976504415, "logits/chosen": 0.05667072534561157, "logits/rejected": -0.06897382438182831, "logps/chosen": -0.0002811176818795502, "logps/rejected": -2.468977928161621, "loss": 0.4235, "nll_loss": 0.10585974156856537, "rewards/accuracies": 1.0, "rewards/chosen": -2.8111768187955022e-05, "rewards/margins": 0.24686969816684723, "rewards/rejected": -0.24689781665802002, "step": 12405 }, { "epoch": 8.579529737206085, "grad_norm": 5.153789043426514, "learning_rate": 7.891501459966192e-06, "log_odds_chosen": 11.876541137695312, "log_odds_ratio": -0.00033157187863253057, "logits/chosen": -0.26482778787612915, "logits/rejected": -0.08884061872959137, "logps/chosen": -0.0004525747208390385, "logps/rejected": -3.064087390899658, "loss": 0.5616, "nll_loss": 0.14036419987678528, "rewards/accuracies": 1.0, "rewards/chosen": -4.525747135630809e-05, "rewards/margins": 0.3063634932041168, "rewards/rejected": -0.30640873312950134, "step": 12406 }, { "epoch": 8.580221300138312, "grad_norm": 4.294795513153076, "learning_rate": 7.887659443676041e-06, "log_odds_chosen": 9.854249954223633, "log_odds_ratio": -0.0002540835994295776, "logits/chosen": -0.417694628238678, "logits/rejected": -0.4679708480834961, "logps/chosen": -0.0001627085730433464, "logps/rejected": -1.143477201461792, "loss": 0.3042, "nll_loss": 0.07602757960557938, "rewards/accuracies": 1.0, "rewards/chosen": -1.627085657673888e-05, "rewards/margins": 0.11433145403862, "rewards/rejected": -0.11434773355722427, "step": 12407 }, { "epoch": 8.58091286307054, "grad_norm": 6.635833740234375, "learning_rate": 7.883817427385892e-06, "log_odds_chosen": 12.390829086303711, "log_odds_ratio": -5.070034239906818e-05, "logits/chosen": -0.33184492588043213, "logits/rejected": -0.35565727949142456, "logps/chosen": -0.00014887124416418374, "logps/rejected": -3.6356828212738037, "loss": 0.5374, "nll_loss": 0.1343454271554947, "rewards/accuracies": 1.0, "rewards/chosen": -1.4887124962115195e-05, "rewards/margins": 0.36355340480804443, "rewards/rejected": -0.3635683059692383, "step": 12408 }, { "epoch": 8.581604426002766, "grad_norm": 4.665489673614502, "learning_rate": 7.879975411095744e-06, "log_odds_chosen": 10.724321365356445, "log_odds_ratio": -7.045797974569723e-05, "logits/chosen": -0.4190210700035095, "logits/rejected": -0.4892667531967163, "logps/chosen": -0.00021666633256245404, "logps/rejected": -2.175072431564331, "loss": 0.3865, "nll_loss": 0.09662678092718124, "rewards/accuracies": 1.0, "rewards/chosen": -2.1666633983841166e-05, "rewards/margins": 0.21748557686805725, "rewards/rejected": -0.2175072431564331, "step": 12409 }, { "epoch": 8.582295988934993, "grad_norm": 3.4573514461517334, "learning_rate": 7.876133394805595e-06, "log_odds_chosen": 11.061342239379883, "log_odds_ratio": -1.9485076336422935e-05, "logits/chosen": -0.38122037053108215, "logits/rejected": -0.3857944905757904, "logps/chosen": -0.00016124022658914328, "logps/rejected": -2.0994670391082764, "loss": 0.3621, "nll_loss": 0.0905316025018692, "rewards/accuracies": 1.0, "rewards/chosen": -1.6124022295116447e-05, "rewards/margins": 0.20993059873580933, "rewards/rejected": -0.20994670689105988, "step": 12410 }, { "epoch": 8.58298755186722, "grad_norm": 4.032299518585205, "learning_rate": 7.872291378515446e-06, "log_odds_chosen": 11.768798828125, "log_odds_ratio": -1.0957606718875468e-05, "logits/chosen": -0.13458450138568878, "logits/rejected": -0.19058941304683685, "logps/chosen": -9.453172970097512e-05, "logps/rejected": -2.56960129737854, "loss": 0.5784, "nll_loss": 0.14460241794586182, "rewards/accuracies": 1.0, "rewards/chosen": -9.453172424400691e-06, "rewards/margins": 0.2569506764411926, "rewards/rejected": -0.2569601535797119, "step": 12411 }, { "epoch": 8.583679114799446, "grad_norm": 7.356479644775391, "learning_rate": 7.868449362225295e-06, "log_odds_chosen": 10.914276123046875, "log_odds_ratio": -2.452870467095636e-05, "logits/chosen": -0.6373881101608276, "logits/rejected": -0.7422344088554382, "logps/chosen": -0.00029046228155493736, "logps/rejected": -2.442608594894409, "loss": 0.5577, "nll_loss": 0.13943205773830414, "rewards/accuracies": 1.0, "rewards/chosen": -2.9046228519291617e-05, "rewards/margins": 0.24423182010650635, "rewards/rejected": -0.24426086246967316, "step": 12412 }, { "epoch": 8.584370677731673, "grad_norm": 2.993687391281128, "learning_rate": 7.864607345935147e-06, "log_odds_chosen": 10.779678344726562, "log_odds_ratio": -0.00016207742737606168, "logits/chosen": -0.6277985572814941, "logits/rejected": -0.5838664770126343, "logps/chosen": -0.000505428877659142, "logps/rejected": -2.434213638305664, "loss": 0.3336, "nll_loss": 0.08338885009288788, "rewards/accuracies": 1.0, "rewards/chosen": -5.054288340033963e-05, "rewards/margins": 0.24337083101272583, "rewards/rejected": -0.24342137575149536, "step": 12413 }, { "epoch": 8.5850622406639, "grad_norm": 3.3739326000213623, "learning_rate": 7.860765329644998e-06, "log_odds_chosen": 11.435413360595703, "log_odds_ratio": -6.362981366692111e-05, "logits/chosen": -0.43177902698516846, "logits/rejected": -0.5248620510101318, "logps/chosen": -0.00018691572768148035, "logps/rejected": -2.1464920043945312, "loss": 0.5922, "nll_loss": 0.14805501699447632, "rewards/accuracies": 1.0, "rewards/chosen": -1.8691573131945916e-05, "rewards/margins": 0.21463051438331604, "rewards/rejected": -0.2146492302417755, "step": 12414 }, { "epoch": 8.585753803596127, "grad_norm": 4.994083881378174, "learning_rate": 7.856923313354849e-06, "log_odds_chosen": 10.883642196655273, "log_odds_ratio": -0.00011137684487039223, "logits/chosen": 0.15582235157489777, "logits/rejected": 0.09947144240140915, "logps/chosen": -0.00015272808377631009, "logps/rejected": -2.2074105739593506, "loss": 0.7234, "nll_loss": 0.1808408498764038, "rewards/accuracies": 1.0, "rewards/chosen": -1.527280983282253e-05, "rewards/margins": 0.22072578966617584, "rewards/rejected": -0.22074106335639954, "step": 12415 }, { "epoch": 8.586445366528354, "grad_norm": 4.330446720123291, "learning_rate": 7.853081297064701e-06, "log_odds_chosen": 11.47640609741211, "log_odds_ratio": -2.2898813767824322e-05, "logits/chosen": -0.5861948728561401, "logits/rejected": -0.5868821740150452, "logps/chosen": -0.0001652640785323456, "logps/rejected": -2.2207837104797363, "loss": 0.6718, "nll_loss": 0.16793853044509888, "rewards/accuracies": 1.0, "rewards/chosen": -1.6526406398043036e-05, "rewards/margins": 0.2220618724822998, "rewards/rejected": -0.22207841277122498, "step": 12416 }, { "epoch": 8.58713692946058, "grad_norm": 4.56218957901001, "learning_rate": 7.84923928077455e-06, "log_odds_chosen": 9.333669662475586, "log_odds_ratio": -0.00046048639342188835, "logits/chosen": -0.4840073883533478, "logits/rejected": -0.40228593349456787, "logps/chosen": -0.00044993084156885743, "logps/rejected": -1.4231839179992676, "loss": 0.3605, "nll_loss": 0.09007444977760315, "rewards/accuracies": 1.0, "rewards/chosen": -4.4993084884481505e-05, "rewards/margins": 0.14227339625358582, "rewards/rejected": -0.14231839776039124, "step": 12417 }, { "epoch": 8.587828492392807, "grad_norm": 2.649446964263916, "learning_rate": 7.845397264484401e-06, "log_odds_chosen": 11.239232063293457, "log_odds_ratio": -2.4983983166748658e-05, "logits/chosen": -0.27129191160202026, "logits/rejected": -0.3342203199863434, "logps/chosen": -0.00010748470958787948, "logps/rejected": -2.064061164855957, "loss": 0.3006, "nll_loss": 0.07513561099767685, "rewards/accuracies": 1.0, "rewards/chosen": -1.0748471140686888e-05, "rewards/margins": 0.20639537274837494, "rewards/rejected": -0.2064061164855957, "step": 12418 }, { "epoch": 8.588520055325034, "grad_norm": 2.885756492614746, "learning_rate": 7.841555248194253e-06, "log_odds_chosen": 10.988302230834961, "log_odds_ratio": -7.131589518394321e-05, "logits/chosen": -0.6593400835990906, "logits/rejected": -0.646030068397522, "logps/chosen": -0.00020226562628522515, "logps/rejected": -2.248378276824951, "loss": 0.2897, "nll_loss": 0.07242387533187866, "rewards/accuracies": 1.0, "rewards/chosen": -2.0226561900926754e-05, "rewards/margins": 0.22481761872768402, "rewards/rejected": -0.22483783960342407, "step": 12419 }, { "epoch": 8.589211618257261, "grad_norm": 3.9579591751098633, "learning_rate": 7.837713231904104e-06, "log_odds_chosen": 10.152627944946289, "log_odds_ratio": -0.00012383170542307198, "logits/chosen": -0.15010769665241241, "logits/rejected": -0.30715739727020264, "logps/chosen": -0.0010937991319224238, "logps/rejected": -1.7039655447006226, "loss": 0.3133, "nll_loss": 0.07831807434558868, "rewards/accuracies": 1.0, "rewards/chosen": -0.00010937990737147629, "rewards/margins": 0.17028717696666718, "rewards/rejected": -0.1703965663909912, "step": 12420 }, { "epoch": 8.589903181189488, "grad_norm": 11.929691314697266, "learning_rate": 7.833871215613955e-06, "log_odds_chosen": 11.3349609375, "log_odds_ratio": -3.4468917874619365e-05, "logits/chosen": -0.407043993473053, "logits/rejected": -0.4227757155895233, "logps/chosen": -9.850895730778575e-05, "logps/rejected": -2.127613067626953, "loss": 0.3972, "nll_loss": 0.09930659830570221, "rewards/accuracies": 1.0, "rewards/chosen": -9.850896276475396e-06, "rewards/margins": 0.21275146305561066, "rewards/rejected": -0.2127612978219986, "step": 12421 }, { "epoch": 8.590594744121715, "grad_norm": 5.26082706451416, "learning_rate": 7.830029199323806e-06, "log_odds_chosen": 12.367096900939941, "log_odds_ratio": -1.268644609808689e-05, "logits/chosen": -0.6866305470466614, "logits/rejected": -0.640007495880127, "logps/chosen": -0.0001297138660447672, "logps/rejected": -3.2089452743530273, "loss": 0.3444, "nll_loss": 0.08609630912542343, "rewards/accuracies": 1.0, "rewards/chosen": -1.2971387150173541e-05, "rewards/margins": 0.32088154554367065, "rewards/rejected": -0.3208945393562317, "step": 12422 }, { "epoch": 8.591286307053942, "grad_norm": 4.022059917449951, "learning_rate": 7.826187183033656e-06, "log_odds_chosen": 11.207496643066406, "log_odds_ratio": -2.807136843330227e-05, "logits/chosen": -0.17429150640964508, "logits/rejected": -0.2393246293067932, "logps/chosen": -0.00024039827985689044, "logps/rejected": -2.346452236175537, "loss": 0.4454, "nll_loss": 0.11134126782417297, "rewards/accuracies": 1.0, "rewards/chosen": -2.4039827621891163e-05, "rewards/margins": 0.23462116718292236, "rewards/rejected": -0.23464521765708923, "step": 12423 }, { "epoch": 8.591977869986168, "grad_norm": 7.19039249420166, "learning_rate": 7.822345166743507e-06, "log_odds_chosen": 11.800032615661621, "log_odds_ratio": -1.4056697182240896e-05, "logits/chosen": -0.503092348575592, "logits/rejected": -0.5850523710250854, "logps/chosen": -0.00042195821879431605, "logps/rejected": -2.707442045211792, "loss": 0.4701, "nll_loss": 0.11752810329198837, "rewards/accuracies": 1.0, "rewards/chosen": -4.219581751385704e-05, "rewards/margins": 0.2707020044326782, "rewards/rejected": -0.2707442045211792, "step": 12424 }, { "epoch": 8.592669432918395, "grad_norm": 3.98699951171875, "learning_rate": 7.81850315045336e-06, "log_odds_chosen": 9.630369186401367, "log_odds_ratio": -0.000857133767567575, "logits/chosen": -0.7769922018051147, "logits/rejected": -0.6913413405418396, "logps/chosen": -0.0009730067104101181, "logps/rejected": -1.8855459690093994, "loss": 0.3904, "nll_loss": 0.09750308841466904, "rewards/accuracies": 1.0, "rewards/chosen": -9.730067540658638e-05, "rewards/margins": 0.18845731019973755, "rewards/rejected": -0.18855461478233337, "step": 12425 }, { "epoch": 8.593360995850622, "grad_norm": 2.6930856704711914, "learning_rate": 7.814661134163209e-06, "log_odds_chosen": 11.874320030212402, "log_odds_ratio": -4.8003526899265125e-05, "logits/chosen": -0.37165212631225586, "logits/rejected": -0.37712541222572327, "logps/chosen": -0.0001580321550136432, "logps/rejected": -2.784151554107666, "loss": 0.2959, "nll_loss": 0.0739690363407135, "rewards/accuracies": 1.0, "rewards/chosen": -1.5803216228960082e-05, "rewards/margins": 0.27839934825897217, "rewards/rejected": -0.27841517329216003, "step": 12426 }, { "epoch": 8.594052558782849, "grad_norm": 2.8228485584259033, "learning_rate": 7.81081911787306e-06, "log_odds_chosen": 10.419713973999023, "log_odds_ratio": -7.389950769720599e-05, "logits/chosen": 0.11854930967092514, "logits/rejected": 0.037088390439748764, "logps/chosen": -0.00019729827181436121, "logps/rejected": -1.8497958183288574, "loss": 0.3051, "nll_loss": 0.07627741247415543, "rewards/accuracies": 1.0, "rewards/chosen": -1.9729828636627644e-05, "rewards/margins": 0.18495984375476837, "rewards/rejected": -0.18497957289218903, "step": 12427 }, { "epoch": 8.594744121715076, "grad_norm": 3.9206323623657227, "learning_rate": 7.806977101582912e-06, "log_odds_chosen": 12.833907127380371, "log_odds_ratio": -7.3549958869989496e-06, "logits/chosen": -0.41607871651649475, "logits/rejected": -0.4357336461544037, "logps/chosen": -0.0001064469397533685, "logps/rejected": -3.4070348739624023, "loss": 0.3021, "nll_loss": 0.07551757246255875, "rewards/accuracies": 1.0, "rewards/chosen": -1.0644693247741088e-05, "rewards/margins": 0.3406928479671478, "rewards/rejected": -0.34070348739624023, "step": 12428 }, { "epoch": 8.595435684647303, "grad_norm": 4.900330543518066, "learning_rate": 7.803135085292763e-06, "log_odds_chosen": 10.996088981628418, "log_odds_ratio": -4.514288957579993e-05, "logits/chosen": -0.7172442078590393, "logits/rejected": -0.7768953442573547, "logps/chosen": -0.00010613269114401191, "logps/rejected": -1.6444897651672363, "loss": 0.3359, "nll_loss": 0.08397156745195389, "rewards/accuracies": 1.0, "rewards/chosen": -1.0613269296300132e-05, "rewards/margins": 0.1644383668899536, "rewards/rejected": -0.16444897651672363, "step": 12429 }, { "epoch": 8.59612724757953, "grad_norm": 3.610318899154663, "learning_rate": 7.799293069002613e-06, "log_odds_chosen": 11.238482475280762, "log_odds_ratio": -9.320876415586099e-05, "logits/chosen": -0.23434729874134064, "logits/rejected": -0.25972285866737366, "logps/chosen": -0.00015973681001923978, "logps/rejected": -2.5857834815979004, "loss": 0.3783, "nll_loss": 0.09457194060087204, "rewards/accuracies": 1.0, "rewards/chosen": -1.597368100192398e-05, "rewards/margins": 0.2585623562335968, "rewards/rejected": -0.2585783302783966, "step": 12430 }, { "epoch": 8.596818810511756, "grad_norm": 2.846229314804077, "learning_rate": 7.795451052712464e-06, "log_odds_chosen": 11.087169647216797, "log_odds_ratio": -2.728665822360199e-05, "logits/chosen": -0.5193180441856384, "logits/rejected": -0.49915337562561035, "logps/chosen": -0.00019525145762600005, "logps/rejected": -2.261341094970703, "loss": 0.2822, "nll_loss": 0.07054363936185837, "rewards/accuracies": 1.0, "rewards/chosen": -1.952514321601484e-05, "rewards/margins": 0.22611457109451294, "rewards/rejected": -0.22613409161567688, "step": 12431 }, { "epoch": 8.597510373443983, "grad_norm": 5.272435188293457, "learning_rate": 7.791609036422315e-06, "log_odds_chosen": 11.377605438232422, "log_odds_ratio": -2.1794385247631e-05, "logits/chosen": -0.6591401696205139, "logits/rejected": -0.6738142371177673, "logps/chosen": -0.00019038034952245653, "logps/rejected": -2.741790771484375, "loss": 0.4957, "nll_loss": 0.12391241639852524, "rewards/accuracies": 1.0, "rewards/chosen": -1.9038036043639295e-05, "rewards/margins": 0.2741600275039673, "rewards/rejected": -0.274179071187973, "step": 12432 }, { "epoch": 8.59820193637621, "grad_norm": 3.132481336593628, "learning_rate": 7.787767020132166e-06, "log_odds_chosen": 10.9615478515625, "log_odds_ratio": -8.768655243329704e-05, "logits/chosen": -0.422199010848999, "logits/rejected": -0.4062215983867645, "logps/chosen": -0.0003607200342230499, "logps/rejected": -2.757406234741211, "loss": 0.4979, "nll_loss": 0.1244717612862587, "rewards/accuracies": 1.0, "rewards/chosen": -3.607200414990075e-05, "rewards/margins": 0.2757045328617096, "rewards/rejected": -0.2757406234741211, "step": 12433 }, { "epoch": 8.598893499308437, "grad_norm": 3.0860936641693115, "learning_rate": 7.783925003842018e-06, "log_odds_chosen": 10.253957748413086, "log_odds_ratio": -8.260829781647772e-05, "logits/chosen": -0.5182685852050781, "logits/rejected": -0.5591882467269897, "logps/chosen": -0.000137411494506523, "logps/rejected": -1.4636437892913818, "loss": 0.3903, "nll_loss": 0.09755666553974152, "rewards/accuracies": 1.0, "rewards/chosen": -1.3741150723944884e-05, "rewards/margins": 0.14635063707828522, "rewards/rejected": -0.14636439085006714, "step": 12434 }, { "epoch": 8.599585062240664, "grad_norm": 4.983059883117676, "learning_rate": 7.780082987551867e-06, "log_odds_chosen": 11.465124130249023, "log_odds_ratio": -2.0820034478674643e-05, "logits/chosen": 0.12352912127971649, "logits/rejected": 0.09706210345029831, "logps/chosen": -0.0013850387185811996, "logps/rejected": -2.9979348182678223, "loss": 0.5263, "nll_loss": 0.13156157732009888, "rewards/accuracies": 1.0, "rewards/chosen": -0.00013850386312697083, "rewards/margins": 0.299655020236969, "rewards/rejected": -0.2997935116291046, "step": 12435 }, { "epoch": 8.60027662517289, "grad_norm": 4.760752201080322, "learning_rate": 7.776240971261718e-06, "log_odds_chosen": 11.416793823242188, "log_odds_ratio": -8.722698112251237e-05, "logits/chosen": -0.33795422315597534, "logits/rejected": -0.3896262049674988, "logps/chosen": -0.0003545653016772121, "logps/rejected": -2.9029064178466797, "loss": 0.6167, "nll_loss": 0.15416058897972107, "rewards/accuracies": 1.0, "rewards/chosen": -3.545653453329578e-05, "rewards/margins": 0.29025518894195557, "rewards/rejected": -0.2902906537055969, "step": 12436 }, { "epoch": 8.600968188105117, "grad_norm": 26.122861862182617, "learning_rate": 7.77239895497157e-06, "log_odds_chosen": 10.64809513092041, "log_odds_ratio": -8.275601430796087e-05, "logits/chosen": -0.2887398600578308, "logits/rejected": -0.355773001909256, "logps/chosen": -0.00025288446340709925, "logps/rejected": -2.215735912322998, "loss": 0.4121, "nll_loss": 0.10302183032035828, "rewards/accuracies": 1.0, "rewards/chosen": -2.5288447432103567e-05, "rewards/margins": 0.22154831886291504, "rewards/rejected": -0.221573606133461, "step": 12437 }, { "epoch": 8.601659751037344, "grad_norm": 3.66646671295166, "learning_rate": 7.768556938681421e-06, "log_odds_chosen": 12.302300453186035, "log_odds_ratio": -8.563475603295956e-06, "logits/chosen": -0.06317726522684097, "logits/rejected": -0.10501933842897415, "logps/chosen": -0.00013157639477867633, "logps/rejected": -3.0213968753814697, "loss": 0.4363, "nll_loss": 0.10907794535160065, "rewards/accuracies": 1.0, "rewards/chosen": -1.3157639841665514e-05, "rewards/margins": 0.30212652683258057, "rewards/rejected": -0.3021396994590759, "step": 12438 }, { "epoch": 8.60235131396957, "grad_norm": 4.6148152351379395, "learning_rate": 7.764714922391272e-06, "log_odds_chosen": 11.005189895629883, "log_odds_ratio": -3.72265130863525e-05, "logits/chosen": -0.02495836466550827, "logits/rejected": -0.11913137137889862, "logps/chosen": -0.0002928634639829397, "logps/rejected": -2.1169567108154297, "loss": 0.6361, "nll_loss": 0.1590229868888855, "rewards/accuracies": 1.0, "rewards/chosen": -2.928634603449609e-05, "rewards/margins": 0.21166637539863586, "rewards/rejected": -0.21169567108154297, "step": 12439 }, { "epoch": 8.603042876901798, "grad_norm": 2.781829833984375, "learning_rate": 7.760872906101122e-06, "log_odds_chosen": 10.781734466552734, "log_odds_ratio": -9.079500159714371e-05, "logits/chosen": -0.6345353722572327, "logits/rejected": -0.6050483584403992, "logps/chosen": -0.000219700435991399, "logps/rejected": -1.947325587272644, "loss": 0.1992, "nll_loss": 0.04979780316352844, "rewards/accuracies": 1.0, "rewards/chosen": -2.197004323534202e-05, "rewards/margins": 0.19471058249473572, "rewards/rejected": -0.19473256170749664, "step": 12440 }, { "epoch": 8.603734439834025, "grad_norm": 3.6431546211242676, "learning_rate": 7.757030889810973e-06, "log_odds_chosen": 11.355770111083984, "log_odds_ratio": -3.7065976357553154e-05, "logits/chosen": -0.2684091329574585, "logits/rejected": -0.22216904163360596, "logps/chosen": -0.00013720057904720306, "logps/rejected": -2.2910001277923584, "loss": 0.4009, "nll_loss": 0.10022085905075073, "rewards/accuracies": 1.0, "rewards/chosen": -1.3720056813326664e-05, "rewards/margins": 0.22908629477024078, "rewards/rejected": -0.22910000383853912, "step": 12441 }, { "epoch": 8.604426002766251, "grad_norm": 4.933355808258057, "learning_rate": 7.753188873520824e-06, "log_odds_chosen": 10.542715072631836, "log_odds_ratio": -6.371807830873877e-05, "logits/chosen": -0.36058422923088074, "logits/rejected": -0.39982569217681885, "logps/chosen": -0.0005256114527583122, "logps/rejected": -2.4589436054229736, "loss": 0.5143, "nll_loss": 0.12855909764766693, "rewards/accuracies": 1.0, "rewards/chosen": -5.256115036900155e-05, "rewards/margins": 0.24584180116653442, "rewards/rejected": -0.24589437246322632, "step": 12442 }, { "epoch": 8.605117565698478, "grad_norm": 3.6169495582580566, "learning_rate": 7.749346857230676e-06, "log_odds_chosen": 10.901447296142578, "log_odds_ratio": -0.00013576316996477544, "logits/chosen": -0.3425856828689575, "logits/rejected": -0.526218831539154, "logps/chosen": -0.00019560789223760366, "logps/rejected": -1.9449979066848755, "loss": 0.3021, "nll_loss": 0.07550856471061707, "rewards/accuracies": 1.0, "rewards/chosen": -1.9560789951356128e-05, "rewards/margins": 0.19448024034500122, "rewards/rejected": -0.19449979066848755, "step": 12443 }, { "epoch": 8.605809128630705, "grad_norm": 7.017158508300781, "learning_rate": 7.745504840940525e-06, "log_odds_chosen": 11.732519149780273, "log_odds_ratio": -1.1716860171873122e-05, "logits/chosen": -0.661844789981842, "logits/rejected": -0.7151435613632202, "logps/chosen": -9.2258196673356e-05, "logps/rejected": -2.1086206436157227, "loss": 0.4627, "nll_loss": 0.11568126827478409, "rewards/accuracies": 1.0, "rewards/chosen": -9.2258196673356e-06, "rewards/margins": 0.21085286140441895, "rewards/rejected": -0.21086208522319794, "step": 12444 }, { "epoch": 8.606500691562932, "grad_norm": 4.3740010261535645, "learning_rate": 7.741662824650376e-06, "log_odds_chosen": 11.738712310791016, "log_odds_ratio": -1.8135553546017036e-05, "logits/chosen": -0.4909563958644867, "logits/rejected": -0.5950556397438049, "logps/chosen": -8.437960059382021e-05, "logps/rejected": -2.2165842056274414, "loss": 0.3897, "nll_loss": 0.09741343557834625, "rewards/accuracies": 1.0, "rewards/chosen": -8.437959877483081e-06, "rewards/margins": 0.22164995968341827, "rewards/rejected": -0.22165840864181519, "step": 12445 }, { "epoch": 8.607192254495159, "grad_norm": 3.511035680770874, "learning_rate": 7.737820808360227e-06, "log_odds_chosen": 11.519828796386719, "log_odds_ratio": -0.0002825894916895777, "logits/chosen": -0.09426072239875793, "logits/rejected": -0.2573559582233429, "logps/chosen": -0.0005895392969250679, "logps/rejected": -3.521434783935547, "loss": 0.4748, "nll_loss": 0.1186603531241417, "rewards/accuracies": 1.0, "rewards/chosen": -5.895393042010255e-05, "rewards/margins": 0.35208457708358765, "rewards/rejected": -0.3521435260772705, "step": 12446 }, { "epoch": 8.607883817427386, "grad_norm": 2.761916160583496, "learning_rate": 7.73397879207008e-06, "log_odds_chosen": 10.481441497802734, "log_odds_ratio": -6.497966387541965e-05, "logits/chosen": -0.3832981586456299, "logits/rejected": -0.512840211391449, "logps/chosen": -0.0011071816552430391, "logps/rejected": -2.6986961364746094, "loss": 0.2443, "nll_loss": 0.061074674129486084, "rewards/accuracies": 1.0, "rewards/chosen": -0.00011071816697949544, "rewards/margins": 0.2697589099407196, "rewards/rejected": -0.2698696255683899, "step": 12447 }, { "epoch": 8.608575380359612, "grad_norm": 4.164094924926758, "learning_rate": 7.73013677577993e-06, "log_odds_chosen": 9.988368034362793, "log_odds_ratio": -0.00010692431533243507, "logits/chosen": -0.6198857426643372, "logits/rejected": -0.5629023313522339, "logps/chosen": -0.00011031327449018136, "logps/rejected": -1.316125512123108, "loss": 0.368, "nll_loss": 0.09198827296495438, "rewards/accuracies": 1.0, "rewards/chosen": -1.1031327630917076e-05, "rewards/margins": 0.1316015124320984, "rewards/rejected": -0.13161253929138184, "step": 12448 }, { "epoch": 8.60926694329184, "grad_norm": 3.2900867462158203, "learning_rate": 7.72629475948978e-06, "log_odds_chosen": 11.8999662399292, "log_odds_ratio": -1.8995482605532743e-05, "logits/chosen": -0.4648793041706085, "logits/rejected": -0.4841955900192261, "logps/chosen": -0.00017940175894182175, "logps/rejected": -2.465615749359131, "loss": 0.3498, "nll_loss": 0.08744871616363525, "rewards/accuracies": 1.0, "rewards/chosen": -1.7940175894182175e-05, "rewards/margins": 0.24654364585876465, "rewards/rejected": -0.24656157195568085, "step": 12449 }, { "epoch": 8.609958506224066, "grad_norm": 3.0412774085998535, "learning_rate": 7.722452743199632e-06, "log_odds_chosen": 11.757244110107422, "log_odds_ratio": -1.554191840114072e-05, "logits/chosen": -0.47457292675971985, "logits/rejected": -0.43818992376327515, "logps/chosen": -8.676251309225336e-05, "logps/rejected": -2.4316704273223877, "loss": 0.3544, "nll_loss": 0.08859597146511078, "rewards/accuracies": 1.0, "rewards/chosen": -8.676252036821097e-06, "rewards/margins": 0.24315837025642395, "rewards/rejected": -0.24316704273223877, "step": 12450 }, { "epoch": 8.610650069156293, "grad_norm": 3.6737210750579834, "learning_rate": 7.718610726909482e-06, "log_odds_chosen": 11.129951477050781, "log_odds_ratio": -8.118825644487515e-05, "logits/chosen": -0.49961328506469727, "logits/rejected": -0.5508215427398682, "logps/chosen": -0.001525634783320129, "logps/rejected": -2.4258229732513428, "loss": 0.4255, "nll_loss": 0.10636596381664276, "rewards/accuracies": 1.0, "rewards/chosen": -0.00015256348706316203, "rewards/margins": 0.24242974817752838, "rewards/rejected": -0.242582306265831, "step": 12451 }, { "epoch": 8.61134163208852, "grad_norm": 3.468428134918213, "learning_rate": 7.714768710619333e-06, "log_odds_chosen": 11.673049926757812, "log_odds_ratio": -3.096312502748333e-05, "logits/chosen": 0.007082067430019379, "logits/rejected": -0.08783157914876938, "logps/chosen": -0.00016287853941321373, "logps/rejected": -2.8624157905578613, "loss": 0.4315, "nll_loss": 0.10786047577857971, "rewards/accuracies": 1.0, "rewards/chosen": -1.6287855032715015e-05, "rewards/margins": 0.2862253189086914, "rewards/rejected": -0.2862415909767151, "step": 12452 }, { "epoch": 8.612033195020746, "grad_norm": 3.6790342330932617, "learning_rate": 7.710926694329184e-06, "log_odds_chosen": 10.868727684020996, "log_odds_ratio": -0.012142255902290344, "logits/chosen": -0.2326192855834961, "logits/rejected": -0.2559518814086914, "logps/chosen": -0.005303188692778349, "logps/rejected": -2.8267157077789307, "loss": 0.4519, "nll_loss": 0.11177083104848862, "rewards/accuracies": 1.0, "rewards/chosen": -0.0005303188809193671, "rewards/margins": 0.28214123845100403, "rewards/rejected": -0.28267157077789307, "step": 12453 }, { "epoch": 8.612724757952973, "grad_norm": 4.647916316986084, "learning_rate": 7.707084678039035e-06, "log_odds_chosen": 9.514710426330566, "log_odds_ratio": -0.00031602318631485105, "logits/chosen": -0.09073175489902496, "logits/rejected": -0.037744827568531036, "logps/chosen": -0.00106034183409065, "logps/rejected": -1.9035536050796509, "loss": 0.3582, "nll_loss": 0.08952006697654724, "rewards/accuracies": 1.0, "rewards/chosen": -0.00010603418195387349, "rewards/margins": 0.19024932384490967, "rewards/rejected": -0.1903553605079651, "step": 12454 }, { "epoch": 8.6134163208852, "grad_norm": 4.053160190582275, "learning_rate": 7.703242661748885e-06, "log_odds_chosen": 11.832403182983398, "log_odds_ratio": -1.3382677025219891e-05, "logits/chosen": -0.3503800928592682, "logits/rejected": -0.3921029567718506, "logps/chosen": -0.00010898220352828503, "logps/rejected": -2.5034584999084473, "loss": 0.5187, "nll_loss": 0.12968364357948303, "rewards/accuracies": 1.0, "rewards/chosen": -1.0898222171817906e-05, "rewards/margins": 0.2503349781036377, "rewards/rejected": -0.2503458261489868, "step": 12455 }, { "epoch": 8.614107883817427, "grad_norm": 3.5278303623199463, "learning_rate": 7.699400645458738e-06, "log_odds_chosen": 11.763395309448242, "log_odds_ratio": -1.0844772987184115e-05, "logits/chosen": -0.5048444867134094, "logits/rejected": -0.5952221751213074, "logps/chosen": -0.00012985311332158744, "logps/rejected": -2.556190013885498, "loss": 0.3979, "nll_loss": 0.09947910159826279, "rewards/accuracies": 1.0, "rewards/chosen": -1.2985312423552386e-05, "rewards/margins": 0.2556060254573822, "rewards/rejected": -0.25561901926994324, "step": 12456 }, { "epoch": 8.614799446749654, "grad_norm": 4.416454315185547, "learning_rate": 7.695558629168589e-06, "log_odds_chosen": 10.293468475341797, "log_odds_ratio": -0.00017848135030362755, "logits/chosen": -0.4820539355278015, "logits/rejected": -0.4339297115802765, "logps/chosen": -0.0003156516177114099, "logps/rejected": -1.7361714839935303, "loss": 0.2258, "nll_loss": 0.056426484137773514, "rewards/accuracies": 1.0, "rewards/chosen": -3.156516686431132e-05, "rewards/margins": 0.17358556389808655, "rewards/rejected": -0.1736171394586563, "step": 12457 }, { "epoch": 8.61549100968188, "grad_norm": 3.6610524654388428, "learning_rate": 7.691716612878438e-06, "log_odds_chosen": 11.170249938964844, "log_odds_ratio": -3.065021155634895e-05, "logits/chosen": -0.13752108812332153, "logits/rejected": -0.06596602499485016, "logps/chosen": -0.000118286392535083, "logps/rejected": -2.0699398517608643, "loss": 0.3302, "nll_loss": 0.082545205950737, "rewards/accuracies": 1.0, "rewards/chosen": -1.1828638889710419e-05, "rewards/margins": 0.20698216557502747, "rewards/rejected": -0.20699399709701538, "step": 12458 }, { "epoch": 8.616182572614107, "grad_norm": 3.788586378097534, "learning_rate": 7.68787459658829e-06, "log_odds_chosen": 10.646763801574707, "log_odds_ratio": -0.00024764935369603336, "logits/chosen": -0.470620334148407, "logits/rejected": -0.593694269657135, "logps/chosen": -0.0006167700048536062, "logps/rejected": -2.0630569458007812, "loss": 0.2904, "nll_loss": 0.07256941497325897, "rewards/accuracies": 1.0, "rewards/chosen": -6.167699757497758e-05, "rewards/margins": 0.20624405145645142, "rewards/rejected": -0.20630571246147156, "step": 12459 }, { "epoch": 8.616874135546334, "grad_norm": 2.969237804412842, "learning_rate": 7.68403258029814e-06, "log_odds_chosen": 10.207712173461914, "log_odds_ratio": -0.00015455170068889856, "logits/chosen": -0.785926878452301, "logits/rejected": -0.7482253909111023, "logps/chosen": -0.00015500865993089974, "logps/rejected": -1.4396579265594482, "loss": 0.3058, "nll_loss": 0.07643543183803558, "rewards/accuracies": 1.0, "rewards/chosen": -1.5500865629292093e-05, "rewards/margins": 0.14395028352737427, "rewards/rejected": -0.14396579563617706, "step": 12460 }, { "epoch": 8.617565698478561, "grad_norm": 4.449199676513672, "learning_rate": 7.680190564007992e-06, "log_odds_chosen": 11.42137336730957, "log_odds_ratio": -2.0058974769199267e-05, "logits/chosen": -0.28403031826019287, "logits/rejected": -0.32174474000930786, "logps/chosen": -0.0007978876237757504, "logps/rejected": -2.9671578407287598, "loss": 0.3856, "nll_loss": 0.0964074581861496, "rewards/accuracies": 1.0, "rewards/chosen": -7.978876965353265e-05, "rewards/margins": 0.2966359853744507, "rewards/rejected": -0.29671579599380493, "step": 12461 }, { "epoch": 8.618257261410788, "grad_norm": 2.8202242851257324, "learning_rate": 7.676348547717842e-06, "log_odds_chosen": 10.440109252929688, "log_odds_ratio": -0.00012012768274871632, "logits/chosen": -0.6182264089584351, "logits/rejected": -0.645176351070404, "logps/chosen": -0.00036001091939397156, "logps/rejected": -2.136899948120117, "loss": 0.3381, "nll_loss": 0.08450530469417572, "rewards/accuracies": 1.0, "rewards/chosen": -3.6001096304971725e-05, "rewards/margins": 0.21365398168563843, "rewards/rejected": -0.21368998289108276, "step": 12462 }, { "epoch": 8.618948824343015, "grad_norm": 5.376323223114014, "learning_rate": 7.672506531427693e-06, "log_odds_chosen": 10.457123756408691, "log_odds_ratio": -0.00016366233467124403, "logits/chosen": -0.18849360942840576, "logits/rejected": -0.015208382159471512, "logps/chosen": -0.00011773478763643652, "logps/rejected": -1.2152167558670044, "loss": 0.2718, "nll_loss": 0.06792550534009933, "rewards/accuracies": 1.0, "rewards/chosen": -1.177347803604789e-05, "rewards/margins": 0.12150990962982178, "rewards/rejected": -0.12152168154716492, "step": 12463 }, { "epoch": 8.619640387275242, "grad_norm": 5.826288223266602, "learning_rate": 7.668664515137544e-06, "log_odds_chosen": 11.298666954040527, "log_odds_ratio": -3.8496054912684485e-05, "logits/chosen": -0.18737944960594177, "logits/rejected": -0.30055534839630127, "logps/chosen": -0.0005287721287459135, "logps/rejected": -2.9021079540252686, "loss": 0.5334, "nll_loss": 0.13334029912948608, "rewards/accuracies": 1.0, "rewards/chosen": -5.287720705382526e-05, "rewards/margins": 0.2901579439640045, "rewards/rejected": -0.2902108132839203, "step": 12464 }, { "epoch": 8.620331950207468, "grad_norm": 2.5242791175842285, "learning_rate": 7.664822498847396e-06, "log_odds_chosen": 10.83395004272461, "log_odds_ratio": -0.0002878225641325116, "logits/chosen": 0.11042840778827667, "logits/rejected": -0.012141779065132141, "logps/chosen": -0.0005985196912661195, "logps/rejected": -2.4075498580932617, "loss": 0.2705, "nll_loss": 0.0676034688949585, "rewards/accuracies": 1.0, "rewards/chosen": -5.985196912661195e-05, "rewards/margins": 0.24069511890411377, "rewards/rejected": -0.24075497686862946, "step": 12465 }, { "epoch": 8.621023513139695, "grad_norm": 3.6309618949890137, "learning_rate": 7.660980482557247e-06, "log_odds_chosen": 10.47065258026123, "log_odds_ratio": -0.00021392585767898709, "logits/chosen": -0.21913385391235352, "logits/rejected": -0.24686682224273682, "logps/chosen": -0.00020930226310156286, "logps/rejected": -1.8738446235656738, "loss": 0.3847, "nll_loss": 0.0961625725030899, "rewards/accuracies": 1.0, "rewards/chosen": -2.0930227037752047e-05, "rewards/margins": 0.18736353516578674, "rewards/rejected": -0.1873844414949417, "step": 12466 }, { "epoch": 8.621715076071922, "grad_norm": 5.189695835113525, "learning_rate": 7.657138466267098e-06, "log_odds_chosen": 10.602476119995117, "log_odds_ratio": -0.00012163497740402818, "logits/chosen": -0.5747342705726624, "logits/rejected": -0.7193750739097595, "logps/chosen": -0.00024523091269657016, "logps/rejected": -2.1552419662475586, "loss": 0.5453, "nll_loss": 0.13631293177604675, "rewards/accuracies": 1.0, "rewards/chosen": -2.4523091269657016e-05, "rewards/margins": 0.2154996693134308, "rewards/rejected": -0.21552419662475586, "step": 12467 }, { "epoch": 8.622406639004149, "grad_norm": 3.0300586223602295, "learning_rate": 7.653296449976949e-06, "log_odds_chosen": 9.845947265625, "log_odds_ratio": -0.0002649944508448243, "logits/chosen": 0.23362484574317932, "logits/rejected": 0.2671985626220703, "logps/chosen": -0.0010978097561746836, "logps/rejected": -1.7431879043579102, "loss": 0.2776, "nll_loss": 0.06936274468898773, "rewards/accuracies": 1.0, "rewards/chosen": -0.00010978097998304293, "rewards/margins": 0.17420899868011475, "rewards/rejected": -0.17431879043579102, "step": 12468 }, { "epoch": 8.623098201936376, "grad_norm": 3.6477975845336914, "learning_rate": 7.6494544336868e-06, "log_odds_chosen": 10.568387031555176, "log_odds_ratio": -0.00020521630358416587, "logits/chosen": -0.5410515666007996, "logits/rejected": -0.6348446011543274, "logps/chosen": -0.0006153516587801278, "logps/rejected": -2.5193915367126465, "loss": 0.4283, "nll_loss": 0.10704678297042847, "rewards/accuracies": 1.0, "rewards/chosen": -6.153517460916191e-05, "rewards/margins": 0.2518776059150696, "rewards/rejected": -0.25193914771080017, "step": 12469 }, { "epoch": 8.623789764868603, "grad_norm": 4.447237968444824, "learning_rate": 7.64561241739665e-06, "log_odds_chosen": 10.911580085754395, "log_odds_ratio": -2.6519053790252656e-05, "logits/chosen": -0.6546154618263245, "logits/rejected": -0.6488785147666931, "logps/chosen": -9.79092437773943e-05, "logps/rejected": -1.491811752319336, "loss": 0.2256, "nll_loss": 0.05638564005494118, "rewards/accuracies": 1.0, "rewards/chosen": -9.790924195840489e-06, "rewards/margins": 0.149171382188797, "rewards/rejected": -0.14918117225170135, "step": 12470 }, { "epoch": 8.62448132780083, "grad_norm": 4.240222454071045, "learning_rate": 7.641770401106502e-06, "log_odds_chosen": 11.76026725769043, "log_odds_ratio": -1.119616899813991e-05, "logits/chosen": -0.9003368616104126, "logits/rejected": -0.8908242583274841, "logps/chosen": -0.00013467471580952406, "logps/rejected": -2.6712076663970947, "loss": 0.4495, "nll_loss": 0.1123720034956932, "rewards/accuracies": 1.0, "rewards/chosen": -1.3467471035255585e-05, "rewards/margins": 0.2671073079109192, "rewards/rejected": -0.2671207785606384, "step": 12471 }, { "epoch": 8.625172890733056, "grad_norm": 3.913267135620117, "learning_rate": 7.637928384816352e-06, "log_odds_chosen": 10.158397674560547, "log_odds_ratio": -0.0004838298773393035, "logits/chosen": 0.24459408223628998, "logits/rejected": 0.2145857959985733, "logps/chosen": -0.0006363195134326816, "logps/rejected": -1.689408779144287, "loss": 0.5827, "nll_loss": 0.14563320577144623, "rewards/accuracies": 1.0, "rewards/chosen": -6.363195279845968e-05, "rewards/margins": 0.1688772737979889, "rewards/rejected": -0.16894090175628662, "step": 12472 }, { "epoch": 8.625864453665283, "grad_norm": 5.289096355438232, "learning_rate": 7.634086368526202e-06, "log_odds_chosen": 10.315433502197266, "log_odds_ratio": -0.00011007794819306582, "logits/chosen": -0.5646550059318542, "logits/rejected": -0.6940526962280273, "logps/chosen": -0.00021625487715937197, "logps/rejected": -1.8233420848846436, "loss": 0.4642, "nll_loss": 0.11602815240621567, "rewards/accuracies": 1.0, "rewards/chosen": -2.162548844353296e-05, "rewards/margins": 0.18231257796287537, "rewards/rejected": -0.18233419954776764, "step": 12473 }, { "epoch": 8.62655601659751, "grad_norm": 6.1395263671875, "learning_rate": 7.630244352236055e-06, "log_odds_chosen": 11.674334526062012, "log_odds_ratio": -3.4381027944618836e-05, "logits/chosen": -0.43784981966018677, "logits/rejected": -0.528411865234375, "logps/chosen": -0.0001351241226075217, "logps/rejected": -2.595203399658203, "loss": 0.3261, "nll_loss": 0.08151505142450333, "rewards/accuracies": 1.0, "rewards/chosen": -1.351241189695429e-05, "rewards/margins": 0.25950679183006287, "rewards/rejected": -0.2595203220844269, "step": 12474 }, { "epoch": 8.627247579529737, "grad_norm": 3.8196938037872314, "learning_rate": 7.626402335945905e-06, "log_odds_chosen": 11.671865463256836, "log_odds_ratio": -2.4040429707383737e-05, "logits/chosen": -0.07806392014026642, "logits/rejected": -0.16425377130508423, "logps/chosen": -9.739068627823144e-05, "logps/rejected": -2.3208045959472656, "loss": 0.4907, "nll_loss": 0.12267977744340897, "rewards/accuracies": 1.0, "rewards/chosen": -9.739068445924204e-06, "rewards/margins": 0.2320706844329834, "rewards/rejected": -0.23208042979240417, "step": 12475 }, { "epoch": 8.627939142461964, "grad_norm": 4.208126068115234, "learning_rate": 7.622560319655755e-06, "log_odds_chosen": 11.09755802154541, "log_odds_ratio": -1.8838678442989476e-05, "logits/chosen": -0.17204993963241577, "logits/rejected": -0.23128736019134521, "logps/chosen": -0.00017000493244268, "logps/rejected": -2.1367721557617188, "loss": 0.4703, "nll_loss": 0.11758065223693848, "rewards/accuracies": 1.0, "rewards/chosen": -1.7000493244268e-05, "rewards/margins": 0.21366021037101746, "rewards/rejected": -0.21367719769477844, "step": 12476 }, { "epoch": 8.62863070539419, "grad_norm": 2.9325435161590576, "learning_rate": 7.618718303365607e-06, "log_odds_chosen": 10.483333587646484, "log_odds_ratio": -0.0002429414598736912, "logits/chosen": -0.5756587386131287, "logits/rejected": -0.5522986054420471, "logps/chosen": -0.000315562792820856, "logps/rejected": -2.3459036350250244, "loss": 0.283, "nll_loss": 0.07073624432086945, "rewards/accuracies": 1.0, "rewards/chosen": -3.15562792820856e-05, "rewards/margins": 0.2345588207244873, "rewards/rejected": -0.23459038138389587, "step": 12477 }, { "epoch": 8.629322268326417, "grad_norm": 2.7504851818084717, "learning_rate": 7.614876287075458e-06, "log_odds_chosen": 11.237836837768555, "log_odds_ratio": -2.547233634686563e-05, "logits/chosen": -0.08669843524694443, "logits/rejected": -0.11949992179870605, "logps/chosen": -0.00014983415894676, "logps/rejected": -2.472243309020996, "loss": 0.2966, "nll_loss": 0.07414659857749939, "rewards/accuracies": 1.0, "rewards/chosen": -1.4983415894676e-05, "rewards/margins": 0.247209370136261, "rewards/rejected": -0.2472243309020996, "step": 12478 }, { "epoch": 8.630013831258644, "grad_norm": 4.2703857421875, "learning_rate": 7.6110342707853085e-06, "log_odds_chosen": 10.029302597045898, "log_odds_ratio": -5.834795592818409e-05, "logits/chosen": -0.7825497984886169, "logits/rejected": -0.7867951393127441, "logps/chosen": -0.00041046348633244634, "logps/rejected": -1.5852978229522705, "loss": 0.5751, "nll_loss": 0.14375720918178558, "rewards/accuracies": 1.0, "rewards/chosen": -4.104634717805311e-05, "rewards/margins": 0.15848875045776367, "rewards/rejected": -0.15852978825569153, "step": 12479 }, { "epoch": 8.630705394190871, "grad_norm": 3.0581610202789307, "learning_rate": 7.607192254495158e-06, "log_odds_chosen": 10.007383346557617, "log_odds_ratio": -0.0001696431718301028, "logits/chosen": -0.7570608854293823, "logits/rejected": -0.7735745310783386, "logps/chosen": -0.0006320398533716798, "logps/rejected": -1.4598944187164307, "loss": 0.3194, "nll_loss": 0.07982651144266129, "rewards/accuracies": 1.0, "rewards/chosen": -6.320398097159341e-05, "rewards/margins": 0.14592623710632324, "rewards/rejected": -0.14598944783210754, "step": 12480 }, { "epoch": 8.631396957123098, "grad_norm": 2.993604898452759, "learning_rate": 7.603350238205011e-06, "log_odds_chosen": 10.759903907775879, "log_odds_ratio": -7.590532186441123e-05, "logits/chosen": -0.4560757577419281, "logits/rejected": -0.37916862964630127, "logps/chosen": -0.0006725385319441557, "logps/rejected": -1.7399346828460693, "loss": 0.3037, "nll_loss": 0.0759168341755867, "rewards/accuracies": 1.0, "rewards/chosen": -6.725385901518166e-05, "rewards/margins": 0.1739262193441391, "rewards/rejected": -0.17399348318576813, "step": 12481 }, { "epoch": 8.632088520055325, "grad_norm": 2.7968311309814453, "learning_rate": 7.599508221914861e-06, "log_odds_chosen": 9.725410461425781, "log_odds_ratio": -0.0001912424631882459, "logits/chosen": -0.5142273902893066, "logits/rejected": -0.44338592886924744, "logps/chosen": -0.0003131648409180343, "logps/rejected": -1.332704782485962, "loss": 0.2163, "nll_loss": 0.05405230447649956, "rewards/accuracies": 1.0, "rewards/chosen": -3.1316485546994954e-05, "rewards/margins": 0.13323916494846344, "rewards/rejected": -0.1332704722881317, "step": 12482 }, { "epoch": 8.632780082987551, "grad_norm": 3.0606510639190674, "learning_rate": 7.5956662056247115e-06, "log_odds_chosen": 10.806468963623047, "log_odds_ratio": -7.89956102380529e-05, "logits/chosen": 0.03566619008779526, "logits/rejected": 0.007338635623455048, "logps/chosen": -0.0015425796154886484, "logps/rejected": -3.2979660034179688, "loss": 0.3353, "nll_loss": 0.0838153064250946, "rewards/accuracies": 1.0, "rewards/chosen": -0.00015425795572809875, "rewards/margins": 0.3296423554420471, "rewards/rejected": -0.32979661226272583, "step": 12483 }, { "epoch": 8.633471645919778, "grad_norm": 2.7655177116394043, "learning_rate": 7.591824189334563e-06, "log_odds_chosen": 11.027166366577148, "log_odds_ratio": -0.0002024203713517636, "logits/chosen": -0.3868097960948944, "logits/rejected": -0.4246591329574585, "logps/chosen": -0.0002740652998909354, "logps/rejected": -2.0678699016571045, "loss": 0.2639, "nll_loss": 0.06594347208738327, "rewards/accuracies": 1.0, "rewards/chosen": -2.740652962529566e-05, "rewards/margins": 0.20675958693027496, "rewards/rejected": -0.20678699016571045, "step": 12484 }, { "epoch": 8.634163208852005, "grad_norm": 3.056570529937744, "learning_rate": 7.587982173044414e-06, "log_odds_chosen": 10.932531356811523, "log_odds_ratio": -3.319044117233716e-05, "logits/chosen": -0.4801771640777588, "logits/rejected": -0.5469212532043457, "logps/chosen": -0.00012442399747669697, "logps/rejected": -1.8965985774993896, "loss": 0.3506, "nll_loss": 0.08763975650072098, "rewards/accuracies": 1.0, "rewards/chosen": -1.2442400475265458e-05, "rewards/margins": 0.18964743614196777, "rewards/rejected": -0.18965987861156464, "step": 12485 }, { "epoch": 8.634854771784232, "grad_norm": 3.596604108810425, "learning_rate": 7.5841401567542645e-06, "log_odds_chosen": 11.202720642089844, "log_odds_ratio": -2.8192072932142764e-05, "logits/chosen": -0.016135483980178833, "logits/rejected": -0.15050634741783142, "logps/chosen": -0.000246469076955691, "logps/rejected": -2.520927667617798, "loss": 0.381, "nll_loss": 0.0952383279800415, "rewards/accuracies": 1.0, "rewards/chosen": -2.464690805936698e-05, "rewards/margins": 0.25206810235977173, "rewards/rejected": -0.25209277868270874, "step": 12486 }, { "epoch": 8.635546334716459, "grad_norm": 3.749194383621216, "learning_rate": 7.580298140464116e-06, "log_odds_chosen": 10.0772705078125, "log_odds_ratio": -7.941433432279155e-05, "logits/chosen": -0.6670262217521667, "logits/rejected": -0.6328946352005005, "logps/chosen": -0.00027387653244659305, "logps/rejected": -1.8500633239746094, "loss": 0.4957, "nll_loss": 0.1239088922739029, "rewards/accuracies": 1.0, "rewards/chosen": -2.7387653972255066e-05, "rewards/margins": 0.1849789321422577, "rewards/rejected": -0.18500632047653198, "step": 12487 }, { "epoch": 8.636237897648686, "grad_norm": 4.95552396774292, "learning_rate": 7.576456124173967e-06, "log_odds_chosen": 11.116426467895508, "log_odds_ratio": -2.903983295254875e-05, "logits/chosen": 0.04311956465244293, "logits/rejected": -0.02329292893409729, "logps/chosen": -0.0002852858742699027, "logps/rejected": -2.4899492263793945, "loss": 0.4551, "nll_loss": 0.11377936601638794, "rewards/accuracies": 1.0, "rewards/chosen": -2.8528589609777555e-05, "rewards/margins": 0.24896641075611115, "rewards/rejected": -0.24899493157863617, "step": 12488 }, { "epoch": 8.636929460580912, "grad_norm": 3.235852003097534, "learning_rate": 7.572614107883818e-06, "log_odds_chosen": 9.979238510131836, "log_odds_ratio": -0.00012453217641450465, "logits/chosen": -0.32398855686187744, "logits/rejected": -0.4147062301635742, "logps/chosen": -0.000752622087020427, "logps/rejected": -2.2185888290405273, "loss": 0.4353, "nll_loss": 0.10880966484546661, "rewards/accuracies": 1.0, "rewards/chosen": -7.526220724685118e-05, "rewards/margins": 0.22178363800048828, "rewards/rejected": -0.2218588888645172, "step": 12489 }, { "epoch": 8.63762102351314, "grad_norm": 3.885465383529663, "learning_rate": 7.568772091593669e-06, "log_odds_chosen": 10.54115104675293, "log_odds_ratio": -7.24983838154003e-05, "logits/chosen": -0.3686515688896179, "logits/rejected": -0.37892991304397583, "logps/chosen": -0.0004790807724930346, "logps/rejected": -1.7787805795669556, "loss": 0.2843, "nll_loss": 0.0710686445236206, "rewards/accuracies": 1.0, "rewards/chosen": -4.790807724930346e-05, "rewards/margins": 0.17783014476299286, "rewards/rejected": -0.17787805199623108, "step": 12490 }, { "epoch": 8.638312586445366, "grad_norm": 3.279143810272217, "learning_rate": 7.56493007530352e-06, "log_odds_chosen": 11.464825630187988, "log_odds_ratio": -2.6816604076884687e-05, "logits/chosen": -0.3882400691509247, "logits/rejected": -0.4565013647079468, "logps/chosen": -0.00019119751232210547, "logps/rejected": -2.661956548690796, "loss": 0.3415, "nll_loss": 0.08537627756595612, "rewards/accuracies": 1.0, "rewards/chosen": -1.9119750504614785e-05, "rewards/margins": 0.2661765515804291, "rewards/rejected": -0.2661956548690796, "step": 12491 }, { "epoch": 8.639004149377593, "grad_norm": 3.959516763687134, "learning_rate": 7.56108805901337e-06, "log_odds_chosen": 11.49631118774414, "log_odds_ratio": -1.8636765162227675e-05, "logits/chosen": -0.13141991198062897, "logits/rejected": -0.11028458178043365, "logps/chosen": -0.0002770294086076319, "logps/rejected": -3.0129501819610596, "loss": 0.4554, "nll_loss": 0.11384393274784088, "rewards/accuracies": 1.0, "rewards/chosen": -2.770293940557167e-05, "rewards/margins": 0.3012672960758209, "rewards/rejected": -0.30129504203796387, "step": 12492 }, { "epoch": 8.63969571230982, "grad_norm": 6.257438659667969, "learning_rate": 7.557246042723222e-06, "log_odds_chosen": 11.744972229003906, "log_odds_ratio": -1.1943647223233711e-05, "logits/chosen": -0.3867034316062927, "logits/rejected": -0.39720794558525085, "logps/chosen": -0.00018167459347750992, "logps/rejected": -2.951838970184326, "loss": 0.3992, "nll_loss": 0.09978760778903961, "rewards/accuracies": 1.0, "rewards/chosen": -1.8167460439144634e-05, "rewards/margins": 0.2951657176017761, "rewards/rejected": -0.2951838970184326, "step": 12493 }, { "epoch": 8.640387275242047, "grad_norm": 3.343430519104004, "learning_rate": 7.553404026433072e-06, "log_odds_chosen": 10.571279525756836, "log_odds_ratio": -0.0001090109144570306, "logits/chosen": 0.21396251022815704, "logits/rejected": 0.18011826276779175, "logps/chosen": -0.0004061561485286802, "logps/rejected": -1.7455267906188965, "loss": 0.5452, "nll_loss": 0.13628709316253662, "rewards/accuracies": 1.0, "rewards/chosen": -4.0615617763251066e-05, "rewards/margins": 0.1745120733976364, "rewards/rejected": -0.17455267906188965, "step": 12494 }, { "epoch": 8.641078838174273, "grad_norm": 2.921513557434082, "learning_rate": 7.549562010142923e-06, "log_odds_chosen": 10.715484619140625, "log_odds_ratio": -0.00015988711675163358, "logits/chosen": -0.2499525099992752, "logits/rejected": -0.32474803924560547, "logps/chosen": -0.00015378088573925197, "logps/rejected": -2.1522469520568848, "loss": 0.3132, "nll_loss": 0.07827220112085342, "rewards/accuracies": 1.0, "rewards/chosen": -1.537808930152096e-05, "rewards/margins": 0.21520933508872986, "rewards/rejected": -0.21522469818592072, "step": 12495 }, { "epoch": 8.6417704011065, "grad_norm": 5.468051433563232, "learning_rate": 7.545719993852775e-06, "log_odds_chosen": 10.28244400024414, "log_odds_ratio": -6.366943853208795e-05, "logits/chosen": -0.24660497903823853, "logits/rejected": -0.3208356201648712, "logps/chosen": -0.00017482020484749228, "logps/rejected": -1.4915902614593506, "loss": 0.6028, "nll_loss": 0.15068833529949188, "rewards/accuracies": 1.0, "rewards/chosen": -1.7482019757153466e-05, "rewards/margins": 0.14914155006408691, "rewards/rejected": -0.1491590291261673, "step": 12496 }, { "epoch": 8.642461964038727, "grad_norm": 4.540300369262695, "learning_rate": 7.541877977562625e-06, "log_odds_chosen": 11.03828239440918, "log_odds_ratio": -0.00013013739953748882, "logits/chosen": -0.259831964969635, "logits/rejected": -0.3700653612613678, "logps/chosen": -0.00030625227373093367, "logps/rejected": -2.2334890365600586, "loss": 0.3429, "nll_loss": 0.08570323884487152, "rewards/accuracies": 1.0, "rewards/chosen": -3.0625225917901844e-05, "rewards/margins": 0.22331829369068146, "rewards/rejected": -0.22334891557693481, "step": 12497 }, { "epoch": 8.643153526970954, "grad_norm": 8.347272872924805, "learning_rate": 7.538035961272476e-06, "log_odds_chosen": 11.745546340942383, "log_odds_ratio": -1.4129373994364869e-05, "logits/chosen": -0.32230523228645325, "logits/rejected": -0.39761868119239807, "logps/chosen": -6.494563422165811e-05, "logps/rejected": -2.194420099258423, "loss": 0.3645, "nll_loss": 0.09113363921642303, "rewards/accuracies": 1.0, "rewards/chosen": -6.494563422165811e-06, "rewards/margins": 0.21943552792072296, "rewards/rejected": -0.21944202482700348, "step": 12498 }, { "epoch": 8.64384508990318, "grad_norm": 4.28175687789917, "learning_rate": 7.534193944982328e-06, "log_odds_chosen": 10.019571304321289, "log_odds_ratio": -9.600758494343609e-05, "logits/chosen": -0.4364526867866516, "logits/rejected": -0.5668889284133911, "logps/chosen": -0.00032136833760887384, "logps/rejected": -1.647362470626831, "loss": 0.4576, "nll_loss": 0.11438850313425064, "rewards/accuracies": 1.0, "rewards/chosen": -3.213683521607891e-05, "rewards/margins": 0.1647041141986847, "rewards/rejected": -0.16473624110221863, "step": 12499 }, { "epoch": 8.644536652835408, "grad_norm": 3.8962275981903076, "learning_rate": 7.5303519286921784e-06, "log_odds_chosen": 10.956960678100586, "log_odds_ratio": -0.0005306452512741089, "logits/chosen": -0.4991099238395691, "logits/rejected": -0.6036314964294434, "logps/chosen": -0.0008679937454871833, "logps/rejected": -2.1128909587860107, "loss": 0.4405, "nll_loss": 0.11007068306207657, "rewards/accuracies": 1.0, "rewards/chosen": -8.67993658175692e-05, "rewards/margins": 0.21120230853557587, "rewards/rejected": -0.21128910779953003, "step": 12500 }, { "epoch": 8.645228215767634, "grad_norm": 3.9985406398773193, "learning_rate": 7.526509912402028e-06, "log_odds_chosen": 11.414043426513672, "log_odds_ratio": -1.5188716133707203e-05, "logits/chosen": -0.35912656784057617, "logits/rejected": -0.4726759195327759, "logps/chosen": -0.00020685499475803226, "logps/rejected": -2.3544654846191406, "loss": 0.3269, "nll_loss": 0.0817258358001709, "rewards/accuracies": 1.0, "rewards/chosen": -2.068549656542018e-05, "rewards/margins": 0.23542584478855133, "rewards/rejected": -0.23544654250144958, "step": 12501 }, { "epoch": 8.645919778699861, "grad_norm": 3.2609992027282715, "learning_rate": 7.522667896111881e-06, "log_odds_chosen": 10.814139366149902, "log_odds_ratio": -0.00016953656449913979, "logits/chosen": -0.33186572790145874, "logits/rejected": -0.43221697211265564, "logps/chosen": -0.0002584143658168614, "logps/rejected": -2.140547275543213, "loss": 0.446, "nll_loss": 0.11149090528488159, "rewards/accuracies": 1.0, "rewards/chosen": -2.584143658168614e-05, "rewards/margins": 0.21402889490127563, "rewards/rejected": -0.21405473351478577, "step": 12502 }, { "epoch": 8.646611341632088, "grad_norm": 5.537511825561523, "learning_rate": 7.518825879821731e-06, "log_odds_chosen": 11.373611450195312, "log_odds_ratio": -5.9118709032190964e-05, "logits/chosen": -0.6136024594306946, "logits/rejected": -0.6121117472648621, "logps/chosen": -0.0002486594021320343, "logps/rejected": -2.7007157802581787, "loss": 0.2574, "nll_loss": 0.06435317546129227, "rewards/accuracies": 1.0, "rewards/chosen": -2.4865941668394953e-05, "rewards/margins": 0.2700467109680176, "rewards/rejected": -0.2700715959072113, "step": 12503 }, { "epoch": 8.647302904564315, "grad_norm": 2.877756357192993, "learning_rate": 7.514983863531581e-06, "log_odds_chosen": 11.185656547546387, "log_odds_ratio": -0.000456160691101104, "logits/chosen": -0.34598487615585327, "logits/rejected": -0.3764715790748596, "logps/chosen": -0.00172577821649611, "logps/rejected": -3.0296967029571533, "loss": 0.3978, "nll_loss": 0.09940056502819061, "rewards/accuracies": 1.0, "rewards/chosen": -0.0001725778274703771, "rewards/margins": 0.3027970790863037, "rewards/rejected": -0.30296963453292847, "step": 12504 }, { "epoch": 8.647994467496542, "grad_norm": 7.3781232833862305, "learning_rate": 7.511141847241433e-06, "log_odds_chosen": 11.257057189941406, "log_odds_ratio": -0.00010700400889618322, "logits/chosen": 0.1673416644334793, "logits/rejected": 0.12245422601699829, "logps/chosen": -0.0006069620721973479, "logps/rejected": -2.762889862060547, "loss": 0.6208, "nll_loss": 0.15518996119499207, "rewards/accuracies": 1.0, "rewards/chosen": -6.0696205764543265e-05, "rewards/margins": 0.27622830867767334, "rewards/rejected": -0.2762889862060547, "step": 12505 }, { "epoch": 8.648686030428768, "grad_norm": 2.924121618270874, "learning_rate": 7.507299830951284e-06, "log_odds_chosen": 9.39946174621582, "log_odds_ratio": -0.0004493095329962671, "logits/chosen": -0.41169384121894836, "logits/rejected": -0.5052452087402344, "logps/chosen": -0.001000140910036862, "logps/rejected": -1.6869672536849976, "loss": 0.2957, "nll_loss": 0.07387848198413849, "rewards/accuracies": 1.0, "rewards/chosen": -0.00010001410555560142, "rewards/margins": 0.16859671473503113, "rewards/rejected": -0.16869673132896423, "step": 12506 }, { "epoch": 8.649377593360995, "grad_norm": 5.006665229797363, "learning_rate": 7.5034578146611345e-06, "log_odds_chosen": 11.246997833251953, "log_odds_ratio": -0.0006229121354408562, "logits/chosen": -0.15815459191799164, "logits/rejected": -0.07593107223510742, "logps/chosen": -0.0007676490349695086, "logps/rejected": -2.4148197174072266, "loss": 0.6833, "nll_loss": 0.17075814306735992, "rewards/accuracies": 1.0, "rewards/chosen": -7.676490349695086e-05, "rewards/margins": 0.24140521883964539, "rewards/rejected": -0.2414819896221161, "step": 12507 }, { "epoch": 8.650069156293222, "grad_norm": 4.329807758331299, "learning_rate": 7.499615798370986e-06, "log_odds_chosen": 11.328744888305664, "log_odds_ratio": -1.9021983462153003e-05, "logits/chosen": -0.06117581948637962, "logits/rejected": -0.043100398033857346, "logps/chosen": -0.000164288270752877, "logps/rejected": -2.2955570220947266, "loss": 0.3944, "nll_loss": 0.09859944134950638, "rewards/accuracies": 1.0, "rewards/chosen": -1.6428828530479223e-05, "rewards/margins": 0.22953930497169495, "rewards/rejected": -0.22955572605133057, "step": 12508 }, { "epoch": 8.650760719225449, "grad_norm": 3.991814613342285, "learning_rate": 7.495773782080837e-06, "log_odds_chosen": 10.413236618041992, "log_odds_ratio": -5.779110506409779e-05, "logits/chosen": -0.7478570342063904, "logits/rejected": -0.7675559520721436, "logps/chosen": -0.00017067301087081432, "logps/rejected": -1.0780225992202759, "loss": 0.2589, "nll_loss": 0.06472624838352203, "rewards/accuracies": 1.0, "rewards/chosen": -1.7067301087081432e-05, "rewards/margins": 0.1077851951122284, "rewards/rejected": -0.10780227184295654, "step": 12509 }, { "epoch": 8.651452282157676, "grad_norm": 2.4446229934692383, "learning_rate": 7.491931765790687e-06, "log_odds_chosen": 10.83348274230957, "log_odds_ratio": -5.711670019081794e-05, "logits/chosen": -0.6774818897247314, "logits/rejected": -0.7664629817008972, "logps/chosen": -0.0003899620787706226, "logps/rejected": -2.597978115081787, "loss": 0.3238, "nll_loss": 0.08095055818557739, "rewards/accuracies": 1.0, "rewards/chosen": -3.899620787706226e-05, "rewards/margins": 0.2597588300704956, "rewards/rejected": -0.2597978115081787, "step": 12510 }, { "epoch": 8.652143845089903, "grad_norm": 5.429533958435059, "learning_rate": 7.488089749500539e-06, "log_odds_chosen": 9.524100303649902, "log_odds_ratio": -0.00021248232224024832, "logits/chosen": -0.29696527123451233, "logits/rejected": -0.3293287456035614, "logps/chosen": -0.0004965565167367458, "logps/rejected": -1.6754616498947144, "loss": 0.4853, "nll_loss": 0.12129941582679749, "rewards/accuracies": 1.0, "rewards/chosen": -4.9655653128866106e-05, "rewards/margins": 0.16749650239944458, "rewards/rejected": -0.16754615306854248, "step": 12511 }, { "epoch": 8.65283540802213, "grad_norm": 4.308308124542236, "learning_rate": 7.484247733210389e-06, "log_odds_chosen": 10.372257232666016, "log_odds_ratio": -0.000601473671849817, "logits/chosen": -0.281741201877594, "logits/rejected": -0.30346786975860596, "logps/chosen": -0.0005873222835361958, "logps/rejected": -2.0079917907714844, "loss": 0.4027, "nll_loss": 0.10062563419342041, "rewards/accuracies": 1.0, "rewards/chosen": -5.8732228353619576e-05, "rewards/margins": 0.20074047148227692, "rewards/rejected": -0.20079921185970306, "step": 12512 }, { "epoch": 8.653526970954356, "grad_norm": 4.821245193481445, "learning_rate": 7.48040571692024e-06, "log_odds_chosen": 11.229162216186523, "log_odds_ratio": -5.2564311772584915e-05, "logits/chosen": 0.06918393075466156, "logits/rejected": 0.17258884012699127, "logps/chosen": -0.0002505563898012042, "logps/rejected": -2.54415225982666, "loss": 0.5032, "nll_loss": 0.12579584121704102, "rewards/accuracies": 1.0, "rewards/chosen": -2.5055640435311943e-05, "rewards/margins": 0.254390150308609, "rewards/rejected": -0.25441521406173706, "step": 12513 }, { "epoch": 8.654218533886583, "grad_norm": 3.9665966033935547, "learning_rate": 7.476563700630091e-06, "log_odds_chosen": 11.276213645935059, "log_odds_ratio": -3.955683496315032e-05, "logits/chosen": -0.1911214292049408, "logits/rejected": -0.22925563156604767, "logps/chosen": -0.00015900543075986207, "logps/rejected": -2.275547742843628, "loss": 0.388, "nll_loss": 0.09700003266334534, "rewards/accuracies": 1.0, "rewards/chosen": -1.5900543075986207e-05, "rewards/margins": 0.2275388538837433, "rewards/rejected": -0.22755476832389832, "step": 12514 }, { "epoch": 8.65491009681881, "grad_norm": 3.5284528732299805, "learning_rate": 7.472721684339942e-06, "log_odds_chosen": 11.778959274291992, "log_odds_ratio": -1.4203714272298384e-05, "logits/chosen": -0.2765466570854187, "logits/rejected": -0.3749210834503174, "logps/chosen": -0.00013069694978184998, "logps/rejected": -2.9017252922058105, "loss": 0.4733, "nll_loss": 0.11832163482904434, "rewards/accuracies": 1.0, "rewards/chosen": -1.3069693522993475e-05, "rewards/margins": 0.2901594340801239, "rewards/rejected": -0.2901725172996521, "step": 12515 }, { "epoch": 8.655601659751037, "grad_norm": 3.512288808822632, "learning_rate": 7.468879668049793e-06, "log_odds_chosen": 10.894959449768066, "log_odds_ratio": -5.7100933190668e-05, "logits/chosen": -0.3321828544139862, "logits/rejected": -0.42700040340423584, "logps/chosen": -0.00021454865054693073, "logps/rejected": -2.0859456062316895, "loss": 0.3536, "nll_loss": 0.08839131146669388, "rewards/accuracies": 1.0, "rewards/chosen": -2.1454865418490954e-05, "rewards/margins": 0.2085730880498886, "rewards/rejected": -0.20859453082084656, "step": 12516 }, { "epoch": 8.656293222683264, "grad_norm": 4.475125789642334, "learning_rate": 7.465037651759643e-06, "log_odds_chosen": 10.695079803466797, "log_odds_ratio": -9.16783683351241e-05, "logits/chosen": -0.2723444104194641, "logits/rejected": -0.42437297105789185, "logps/chosen": -0.00010329029464628547, "logps/rejected": -1.5851585865020752, "loss": 0.315, "nll_loss": 0.07873716205358505, "rewards/accuracies": 1.0, "rewards/chosen": -1.032903037412325e-05, "rewards/margins": 0.15850552916526794, "rewards/rejected": -0.15851587057113647, "step": 12517 }, { "epoch": 8.65698478561549, "grad_norm": 2.8404202461242676, "learning_rate": 7.461195635469495e-06, "log_odds_chosen": 10.811230659484863, "log_odds_ratio": -8.040317334234715e-05, "logits/chosen": -0.255277156829834, "logits/rejected": -0.32991933822631836, "logps/chosen": -0.00025593285681679845, "logps/rejected": -2.292391300201416, "loss": 0.3287, "nll_loss": 0.08215835690498352, "rewards/accuracies": 1.0, "rewards/chosen": -2.5593286409275606e-05, "rewards/margins": 0.22921353578567505, "rewards/rejected": -0.22923913598060608, "step": 12518 }, { "epoch": 8.657676348547717, "grad_norm": 5.891508102416992, "learning_rate": 7.457353619179345e-06, "log_odds_chosen": 11.82068920135498, "log_odds_ratio": -3.0011773560545407e-05, "logits/chosen": -0.4721341133117676, "logits/rejected": -0.41853195428848267, "logps/chosen": -0.0001907099795062095, "logps/rejected": -3.0076072216033936, "loss": 0.3828, "nll_loss": 0.0956956297159195, "rewards/accuracies": 1.0, "rewards/chosen": -1.907099795062095e-05, "rewards/margins": 0.30074167251586914, "rewards/rejected": -0.30076074600219727, "step": 12519 }, { "epoch": 8.658367911479944, "grad_norm": 4.65853214263916, "learning_rate": 7.453511602889196e-06, "log_odds_chosen": 10.887121200561523, "log_odds_ratio": -0.0004176338261459023, "logits/chosen": -0.2970947027206421, "logits/rejected": -0.3491339087486267, "logps/chosen": -0.0006700168596580625, "logps/rejected": -2.4534764289855957, "loss": 0.4927, "nll_loss": 0.12313016504049301, "rewards/accuracies": 1.0, "rewards/chosen": -6.700168160023168e-05, "rewards/margins": 0.2452806532382965, "rewards/rejected": -0.24534766376018524, "step": 12520 }, { "epoch": 8.659059474412171, "grad_norm": 3.566943645477295, "learning_rate": 7.4496695865990475e-06, "log_odds_chosen": 11.55712604522705, "log_odds_ratio": -8.315506420331076e-05, "logits/chosen": -0.3606160283088684, "logits/rejected": -0.4622876048088074, "logps/chosen": -0.0003821479913312942, "logps/rejected": -2.8081679344177246, "loss": 0.5489, "nll_loss": 0.1372266411781311, "rewards/accuracies": 1.0, "rewards/chosen": -3.821479913312942e-05, "rewards/margins": 0.28077858686447144, "rewards/rejected": -0.28081679344177246, "step": 12521 }, { "epoch": 8.659751037344398, "grad_norm": 2.726372241973877, "learning_rate": 7.445827570308898e-06, "log_odds_chosen": 10.538716316223145, "log_odds_ratio": -6.737098738085479e-05, "logits/chosen": -0.3288825452327728, "logits/rejected": -0.35905057191848755, "logps/chosen": -0.00027229191618971527, "logps/rejected": -2.214806318283081, "loss": 0.3153, "nll_loss": 0.07881700247526169, "rewards/accuracies": 1.0, "rewards/chosen": -2.7229190891375765e-05, "rewards/margins": 0.22145341336727142, "rewards/rejected": -0.22148065268993378, "step": 12522 }, { "epoch": 8.660442600276625, "grad_norm": 3.5521833896636963, "learning_rate": 7.441985554018749e-06, "log_odds_chosen": 11.483410835266113, "log_odds_ratio": -1.6197574950638227e-05, "logits/chosen": -0.2110901176929474, "logits/rejected": -0.4438256323337555, "logps/chosen": -0.00015359108510892838, "logps/rejected": -2.654276132583618, "loss": 0.4356, "nll_loss": 0.10890985280275345, "rewards/accuracies": 1.0, "rewards/chosen": -1.535910996608436e-05, "rewards/margins": 0.26541227102279663, "rewards/rejected": -0.2654276192188263, "step": 12523 }, { "epoch": 8.661134163208851, "grad_norm": 5.966548442840576, "learning_rate": 7.438143537728601e-06, "log_odds_chosen": 10.445170402526855, "log_odds_ratio": -0.0004751587985083461, "logits/chosen": -0.10279648751020432, "logits/rejected": -0.05743744224309921, "logps/chosen": -0.0004982667742297053, "logps/rejected": -2.129284381866455, "loss": 0.4002, "nll_loss": 0.10000036656856537, "rewards/accuracies": 1.0, "rewards/chosen": -4.9826678150566295e-05, "rewards/margins": 0.21287861466407776, "rewards/rejected": -0.21292844414710999, "step": 12524 }, { "epoch": 8.661825726141078, "grad_norm": 4.023740768432617, "learning_rate": 7.434301521438451e-06, "log_odds_chosen": 10.345528602600098, "log_odds_ratio": -0.0005008649313822389, "logits/chosen": -0.14824295043945312, "logits/rejected": -0.12089388072490692, "logps/chosen": -0.00037331614294089377, "logps/rejected": -2.3994011878967285, "loss": 0.566, "nll_loss": 0.14145886898040771, "rewards/accuracies": 1.0, "rewards/chosen": -3.733161065611057e-05, "rewards/margins": 0.2399027794599533, "rewards/rejected": -0.2399401217699051, "step": 12525 }, { "epoch": 8.662517289073305, "grad_norm": 5.07864236831665, "learning_rate": 7.430459505148301e-06, "log_odds_chosen": 11.224198341369629, "log_odds_ratio": -3.23234053212218e-05, "logits/chosen": -0.24662211537361145, "logits/rejected": -0.1978820562362671, "logps/chosen": -0.00024225276138167828, "logps/rejected": -2.860351085662842, "loss": 0.5755, "nll_loss": 0.14386588335037231, "rewards/accuracies": 1.0, "rewards/chosen": -2.4225275410572067e-05, "rewards/margins": 0.28601086139678955, "rewards/rejected": -0.28603509068489075, "step": 12526 }, { "epoch": 8.663208852005532, "grad_norm": 4.163898944854736, "learning_rate": 7.426617488858154e-06, "log_odds_chosen": 10.051204681396484, "log_odds_ratio": -6.197369657456875e-05, "logits/chosen": 0.10701654106378555, "logits/rejected": 0.10469865053892136, "logps/chosen": -0.00023373853764496744, "logps/rejected": -1.4638707637786865, "loss": 0.795, "nll_loss": 0.19875499606132507, "rewards/accuracies": 1.0, "rewards/chosen": -2.3373853764496744e-05, "rewards/margins": 0.14636372029781342, "rewards/rejected": -0.14638708531856537, "step": 12527 }, { "epoch": 8.663900414937759, "grad_norm": 2.7802116870880127, "learning_rate": 7.422775472568004e-06, "log_odds_chosen": 9.961128234863281, "log_odds_ratio": -0.00017989228945225477, "logits/chosen": -0.06987164914608002, "logits/rejected": -0.024553827941417694, "logps/chosen": -0.00034464691998437047, "logps/rejected": -1.683363676071167, "loss": 0.3131, "nll_loss": 0.07825967669487, "rewards/accuracies": 1.0, "rewards/chosen": -3.4464690543245524e-05, "rewards/margins": 0.16830191016197205, "rewards/rejected": -0.16833636164665222, "step": 12528 }, { "epoch": 8.664591977869986, "grad_norm": 4.402554512023926, "learning_rate": 7.418933456277854e-06, "log_odds_chosen": 10.956361770629883, "log_odds_ratio": -7.46956720831804e-05, "logits/chosen": -0.07751287519931793, "logits/rejected": -0.07672972977161407, "logps/chosen": -0.00017957479576580226, "logps/rejected": -2.2911131381988525, "loss": 0.4873, "nll_loss": 0.12182983011007309, "rewards/accuracies": 1.0, "rewards/chosen": -1.7957479940378107e-05, "rewards/margins": 0.22909337282180786, "rewards/rejected": -0.22911131381988525, "step": 12529 }, { "epoch": 8.665283540802212, "grad_norm": 3.342729330062866, "learning_rate": 7.415091439987706e-06, "log_odds_chosen": 10.63759994506836, "log_odds_ratio": -3.1314044463215396e-05, "logits/chosen": -0.37202584743499756, "logits/rejected": -0.3920147716999054, "logps/chosen": -0.00014479250239674002, "logps/rejected": -1.799759030342102, "loss": 0.3138, "nll_loss": 0.0784500390291214, "rewards/accuracies": 1.0, "rewards/chosen": -1.4479250239674002e-05, "rewards/margins": 0.1799614429473877, "rewards/rejected": -0.17997589707374573, "step": 12530 }, { "epoch": 8.66597510373444, "grad_norm": 4.784072399139404, "learning_rate": 7.411249423697557e-06, "log_odds_chosen": 11.285941123962402, "log_odds_ratio": -2.2382810129784048e-05, "logits/chosen": -0.5358566641807556, "logits/rejected": -0.5093519687652588, "logps/chosen": -0.0001689097553025931, "logps/rejected": -2.4816975593566895, "loss": 0.3534, "nll_loss": 0.08835049718618393, "rewards/accuracies": 1.0, "rewards/chosen": -1.689097553025931e-05, "rewards/margins": 0.24815288186073303, "rewards/rejected": -0.24816977977752686, "step": 12531 }, { "epoch": 8.666666666666666, "grad_norm": 3.1711583137512207, "learning_rate": 7.4074074074074075e-06, "log_odds_chosen": 11.662884712219238, "log_odds_ratio": -3.922557152691297e-05, "logits/chosen": -0.03454257547855377, "logits/rejected": -0.17196756601333618, "logps/chosen": -0.00015968605293892324, "logps/rejected": -2.495405673980713, "loss": 0.3412, "nll_loss": 0.085299551486969, "rewards/accuracies": 1.0, "rewards/chosen": -1.5968604202498682e-05, "rewards/margins": 0.24952462315559387, "rewards/rejected": -0.2495405673980713, "step": 12532 }, { "epoch": 8.667358229598893, "grad_norm": 5.585811138153076, "learning_rate": 7.403565391117259e-06, "log_odds_chosen": 10.636640548706055, "log_odds_ratio": -0.0001833289279602468, "logits/chosen": -0.36438095569610596, "logits/rejected": -0.3280951976776123, "logps/chosen": -0.0007131825550459325, "logps/rejected": -2.577727794647217, "loss": 0.489, "nll_loss": 0.1222197413444519, "rewards/accuracies": 1.0, "rewards/chosen": -7.131825987016782e-05, "rewards/margins": 0.25770145654678345, "rewards/rejected": -0.2577727735042572, "step": 12533 }, { "epoch": 8.66804979253112, "grad_norm": 3.481321334838867, "learning_rate": 7.39972337482711e-06, "log_odds_chosen": 11.821352005004883, "log_odds_ratio": -3.541969272191636e-05, "logits/chosen": -0.10815463215112686, "logits/rejected": -0.0630636140704155, "logps/chosen": -0.00010432667477289215, "logps/rejected": -2.1872663497924805, "loss": 0.3699, "nll_loss": 0.09248305857181549, "rewards/accuracies": 1.0, "rewards/chosen": -1.0432667295390274e-05, "rewards/margins": 0.21871618926525116, "rewards/rejected": -0.21872663497924805, "step": 12534 }, { "epoch": 8.668741355463347, "grad_norm": 3.907029867172241, "learning_rate": 7.3958813585369606e-06, "log_odds_chosen": 10.354106903076172, "log_odds_ratio": -0.00018895111861638725, "logits/chosen": -0.3448829650878906, "logits/rejected": -0.3576931357383728, "logps/chosen": -0.00028838051366619766, "logps/rejected": -1.6199371814727783, "loss": 0.2629, "nll_loss": 0.0656980574131012, "rewards/accuracies": 1.0, "rewards/chosen": -2.88380488200346e-05, "rewards/margins": 0.16196487843990326, "rewards/rejected": -0.16199371218681335, "step": 12535 }, { "epoch": 8.669432918395573, "grad_norm": 3.7974905967712402, "learning_rate": 7.392039342246812e-06, "log_odds_chosen": 10.901939392089844, "log_odds_ratio": -2.7674408556777053e-05, "logits/chosen": -0.11420188844203949, "logits/rejected": -0.16965460777282715, "logps/chosen": -0.0002406880958005786, "logps/rejected": -2.3652772903442383, "loss": 0.403, "nll_loss": 0.10075122117996216, "rewards/accuracies": 1.0, "rewards/chosen": -2.406880958005786e-05, "rewards/margins": 0.23650366067886353, "rewards/rejected": -0.2365277260541916, "step": 12536 }, { "epoch": 8.6701244813278, "grad_norm": 4.031060218811035, "learning_rate": 7.388197325956663e-06, "log_odds_chosen": 10.959932327270508, "log_odds_ratio": -2.7098983991891146e-05, "logits/chosen": -0.4994911253452301, "logits/rejected": -0.4914383590221405, "logps/chosen": -0.0001192599447676912, "logps/rejected": -2.019474506378174, "loss": 0.4702, "nll_loss": 0.11753655225038528, "rewards/accuracies": 1.0, "rewards/chosen": -1.1925993021577597e-05, "rewards/margins": 0.2019355297088623, "rewards/rejected": -0.20194746553897858, "step": 12537 }, { "epoch": 8.670816044260027, "grad_norm": 6.080068111419678, "learning_rate": 7.384355309666513e-06, "log_odds_chosen": 11.134516716003418, "log_odds_ratio": -3.303651828900911e-05, "logits/chosen": -0.47941821813583374, "logits/rejected": -0.5384718179702759, "logps/chosen": -9.550550021231174e-05, "logps/rejected": -1.8790864944458008, "loss": 0.5679, "nll_loss": 0.1419667899608612, "rewards/accuracies": 1.0, "rewards/chosen": -9.550550203130115e-06, "rewards/margins": 0.18789908289909363, "rewards/rejected": -0.18790864944458008, "step": 12538 }, { "epoch": 8.671507607192254, "grad_norm": 7.047338485717773, "learning_rate": 7.380513293376365e-06, "log_odds_chosen": 11.69442367553711, "log_odds_ratio": -2.412005778751336e-05, "logits/chosen": -0.36900442838668823, "logits/rejected": -0.37385451793670654, "logps/chosen": -0.00014486766303889453, "logps/rejected": -2.316293478012085, "loss": 0.3567, "nll_loss": 0.08917532861232758, "rewards/accuracies": 1.0, "rewards/chosen": -1.4486766303889453e-05, "rewards/margins": 0.2316148579120636, "rewards/rejected": -0.23162934184074402, "step": 12539 }, { "epoch": 8.67219917012448, "grad_norm": 4.273880481719971, "learning_rate": 7.376671277086215e-06, "log_odds_chosen": 11.082413673400879, "log_odds_ratio": -0.00023529511236120015, "logits/chosen": 0.06696252524852753, "logits/rejected": -0.06390891969203949, "logps/chosen": -0.000319934100843966, "logps/rejected": -2.3815979957580566, "loss": 0.3856, "nll_loss": 0.0963764488697052, "rewards/accuracies": 1.0, "rewards/chosen": -3.1993411539588124e-05, "rewards/margins": 0.23812782764434814, "rewards/rejected": -0.23815982043743134, "step": 12540 }, { "epoch": 8.672890733056708, "grad_norm": 3.5581562519073486, "learning_rate": 7.372829260796066e-06, "log_odds_chosen": 9.663544654846191, "log_odds_ratio": -0.0004422256606630981, "logits/chosen": -0.39682891964912415, "logits/rejected": -0.4791784882545471, "logps/chosen": -0.0005750549025833607, "logps/rejected": -1.5449265241622925, "loss": 0.4687, "nll_loss": 0.11713872104883194, "rewards/accuracies": 1.0, "rewards/chosen": -5.750549098593183e-05, "rewards/margins": 0.1544351577758789, "rewards/rejected": -0.15449264645576477, "step": 12541 }, { "epoch": 8.673582295988934, "grad_norm": 3.255485773086548, "learning_rate": 7.3689872445059175e-06, "log_odds_chosen": 10.975412368774414, "log_odds_ratio": -0.00011924972204724327, "logits/chosen": -0.16594725847244263, "logits/rejected": -0.31802070140838623, "logps/chosen": -0.0004657926328945905, "logps/rejected": -2.780015468597412, "loss": 0.4166, "nll_loss": 0.10414905846118927, "rewards/accuracies": 1.0, "rewards/chosen": -4.657926183426753e-05, "rewards/margins": 0.27795499563217163, "rewards/rejected": -0.2780015766620636, "step": 12542 }, { "epoch": 8.674273858921161, "grad_norm": 4.244312286376953, "learning_rate": 7.365145228215768e-06, "log_odds_chosen": 11.83520793914795, "log_odds_ratio": -1.8101507521350868e-05, "logits/chosen": -0.24233081936836243, "logits/rejected": -0.35883381962776184, "logps/chosen": -9.870299254544079e-05, "logps/rejected": -2.3833212852478027, "loss": 0.471, "nll_loss": 0.11775927245616913, "rewards/accuracies": 1.0, "rewards/chosen": -9.870300345937721e-06, "rewards/margins": 0.23832225799560547, "rewards/rejected": -0.2383321225643158, "step": 12543 }, { "epoch": 8.674965421853388, "grad_norm": 6.95550537109375, "learning_rate": 7.361303211925619e-06, "log_odds_chosen": 12.086908340454102, "log_odds_ratio": -1.8966737115988508e-05, "logits/chosen": -0.5800905823707581, "logits/rejected": -0.5021611452102661, "logps/chosen": -0.00015917661949060857, "logps/rejected": -3.2165818214416504, "loss": 0.365, "nll_loss": 0.09124217927455902, "rewards/accuracies": 1.0, "rewards/chosen": -1.5917659766273573e-05, "rewards/margins": 0.32164227962493896, "rewards/rejected": -0.321658194065094, "step": 12544 }, { "epoch": 8.675656984785615, "grad_norm": 4.134158611297607, "learning_rate": 7.357461195635471e-06, "log_odds_chosen": 10.438495635986328, "log_odds_ratio": -0.00031156576005741954, "logits/chosen": -0.5812631845474243, "logits/rejected": -0.6295356154441833, "logps/chosen": -0.00029764368082396686, "logps/rejected": -2.0201539993286133, "loss": 0.3774, "nll_loss": 0.09431064873933792, "rewards/accuracies": 1.0, "rewards/chosen": -2.9764367354800925e-05, "rewards/margins": 0.20198562741279602, "rewards/rejected": -0.20201539993286133, "step": 12545 }, { "epoch": 8.676348547717842, "grad_norm": 3.0808298587799072, "learning_rate": 7.353619179345321e-06, "log_odds_chosen": 11.856780052185059, "log_odds_ratio": -1.4338037544803228e-05, "logits/chosen": -0.8203387260437012, "logits/rejected": -0.9521452188491821, "logps/chosen": -0.00018739672668743879, "logps/rejected": -2.838536262512207, "loss": 0.3641, "nll_loss": 0.09101735055446625, "rewards/accuracies": 1.0, "rewards/chosen": -1.873967266874388e-05, "rewards/margins": 0.28383487462997437, "rewards/rejected": -0.28385359048843384, "step": 12546 }, { "epoch": 8.677040110650069, "grad_norm": 4.475383281707764, "learning_rate": 7.349777163055171e-06, "log_odds_chosen": 10.019126892089844, "log_odds_ratio": -0.0007663845317438245, "logits/chosen": 0.10257889330387115, "logits/rejected": -0.01933739334344864, "logps/chosen": -0.0010240648407489061, "logps/rejected": -2.111617088317871, "loss": 0.8074, "nll_loss": 0.20177187025547028, "rewards/accuracies": 1.0, "rewards/chosen": -0.00010240648407489061, "rewards/margins": 0.2110593020915985, "rewards/rejected": -0.21116170287132263, "step": 12547 }, { "epoch": 8.677731673582295, "grad_norm": 4.1379852294921875, "learning_rate": 7.345935146765022e-06, "log_odds_chosen": 11.253612518310547, "log_odds_ratio": -8.35497266962193e-05, "logits/chosen": -0.14084036648273468, "logits/rejected": -0.2441224902868271, "logps/chosen": -0.0010256430832669139, "logps/rejected": -2.3779947757720947, "loss": 0.4041, "nll_loss": 0.1010219156742096, "rewards/accuracies": 1.0, "rewards/chosen": -0.00010256430687149987, "rewards/margins": 0.23769691586494446, "rewards/rejected": -0.2377994805574417, "step": 12548 }, { "epoch": 8.678423236514522, "grad_norm": 4.599795818328857, "learning_rate": 7.342093130474874e-06, "log_odds_chosen": 9.960695266723633, "log_odds_ratio": -0.0007999525987543166, "logits/chosen": -0.3448615074157715, "logits/rejected": -0.4812205135822296, "logps/chosen": -0.00048607925418764353, "logps/rejected": -1.8350410461425781, "loss": 0.4098, "nll_loss": 0.10236094892024994, "rewards/accuracies": 1.0, "rewards/chosen": -4.8607926146360114e-05, "rewards/margins": 0.18345551192760468, "rewards/rejected": -0.183504119515419, "step": 12549 }, { "epoch": 8.679114799446749, "grad_norm": 2.339559555053711, "learning_rate": 7.338251114184724e-06, "log_odds_chosen": 10.132892608642578, "log_odds_ratio": -0.00018708023708313704, "logits/chosen": -0.37915366888046265, "logits/rejected": -0.3266010582447052, "logps/chosen": -0.00019164435798302293, "logps/rejected": -1.3973734378814697, "loss": 0.2702, "nll_loss": 0.06754133850336075, "rewards/accuracies": 1.0, "rewards/chosen": -1.9164435798302293e-05, "rewards/margins": 0.1397181898355484, "rewards/rejected": -0.1397373378276825, "step": 12550 }, { "epoch": 8.679806362378976, "grad_norm": 5.420063018798828, "learning_rate": 7.334409097894575e-06, "log_odds_chosen": 10.726099014282227, "log_odds_ratio": -0.00015335682837758213, "logits/chosen": -0.07302002608776093, "logits/rejected": -0.20137712359428406, "logps/chosen": -0.0007101158262230456, "logps/rejected": -2.376372814178467, "loss": 0.4888, "nll_loss": 0.1221751719713211, "rewards/accuracies": 1.0, "rewards/chosen": -7.10115855326876e-05, "rewards/margins": 0.2375662922859192, "rewards/rejected": -0.23763728141784668, "step": 12551 }, { "epoch": 8.680497925311203, "grad_norm": 3.888152837753296, "learning_rate": 7.330567081604427e-06, "log_odds_chosen": 9.247932434082031, "log_odds_ratio": -0.00015723289106972516, "logits/chosen": -0.3507700562477112, "logits/rejected": -0.39326196908950806, "logps/chosen": -0.0003475480480119586, "logps/rejected": -1.3601428270339966, "loss": 0.4583, "nll_loss": 0.11455010622739792, "rewards/accuracies": 1.0, "rewards/chosen": -3.475480116321705e-05, "rewards/margins": 0.1359795182943344, "rewards/rejected": -0.13601426780223846, "step": 12552 }, { "epoch": 8.68118948824343, "grad_norm": 3.6204445362091064, "learning_rate": 7.3267250653142774e-06, "log_odds_chosen": 11.143669128417969, "log_odds_ratio": -4.348631773609668e-05, "logits/chosen": -0.22094419598579407, "logits/rejected": -0.3634456992149353, "logps/chosen": -0.0003180534695275128, "logps/rejected": -2.687262535095215, "loss": 0.3249, "nll_loss": 0.08122783154249191, "rewards/accuracies": 1.0, "rewards/chosen": -3.1805349863134325e-05, "rewards/margins": 0.2686944603919983, "rewards/rejected": -0.26872625946998596, "step": 12553 }, { "epoch": 8.681881051175656, "grad_norm": 2.4743692874908447, "learning_rate": 7.322883049024127e-06, "log_odds_chosen": 10.48563003540039, "log_odds_ratio": -3.777826714213006e-05, "logits/chosen": -0.24010899662971497, "logits/rejected": -0.2702690660953522, "logps/chosen": -0.0003850167558994144, "logps/rejected": -1.9075738191604614, "loss": 0.2904, "nll_loss": 0.07258377224206924, "rewards/accuracies": 1.0, "rewards/chosen": -3.850167558994144e-05, "rewards/margins": 0.1907188892364502, "rewards/rejected": -0.1907573938369751, "step": 12554 }, { "epoch": 8.682572614107883, "grad_norm": 4.934008598327637, "learning_rate": 7.31904103273398e-06, "log_odds_chosen": 11.115760803222656, "log_odds_ratio": -3.487409048830159e-05, "logits/chosen": -0.39205920696258545, "logits/rejected": -0.44652342796325684, "logps/chosen": -0.0001003592333290726, "logps/rejected": -1.8912320137023926, "loss": 0.4261, "nll_loss": 0.10652204602956772, "rewards/accuracies": 1.0, "rewards/chosen": -1.0035922969109379e-05, "rewards/margins": 0.18911318480968475, "rewards/rejected": -0.18912319839000702, "step": 12555 }, { "epoch": 8.68326417704011, "grad_norm": 7.027980804443359, "learning_rate": 7.31519901644383e-06, "log_odds_chosen": 10.794315338134766, "log_odds_ratio": -3.511262548272498e-05, "logits/chosen": -0.06356941163539886, "logits/rejected": -0.16646791994571686, "logps/chosen": -0.00010867961827898398, "logps/rejected": -1.8001667261123657, "loss": 0.8285, "nll_loss": 0.20711015164852142, "rewards/accuracies": 1.0, "rewards/chosen": -1.0867961464100517e-05, "rewards/margins": 0.18000580370426178, "rewards/rejected": -0.1800166666507721, "step": 12556 }, { "epoch": 8.683955739972337, "grad_norm": 4.8774943351745605, "learning_rate": 7.3113570001536804e-06, "log_odds_chosen": 11.061723709106445, "log_odds_ratio": -4.698024713434279e-05, "logits/chosen": 0.021711044013500214, "logits/rejected": -0.04401899129152298, "logps/chosen": -0.00020374648738652468, "logps/rejected": -2.322935104370117, "loss": 0.334, "nll_loss": 0.08349205553531647, "rewards/accuracies": 1.0, "rewards/chosen": -2.0374651285237633e-05, "rewards/margins": 0.2322731465101242, "rewards/rejected": -0.2322934865951538, "step": 12557 }, { "epoch": 8.684647302904564, "grad_norm": 5.135361194610596, "learning_rate": 7.307514983863532e-06, "log_odds_chosen": 9.509974479675293, "log_odds_ratio": -0.0002750680723693222, "logits/chosen": -0.3695323169231415, "logits/rejected": -0.41089409589767456, "logps/chosen": -0.0005933195352554321, "logps/rejected": -1.8128838539123535, "loss": 0.4796, "nll_loss": 0.11986302584409714, "rewards/accuracies": 1.0, "rewards/chosen": -5.9331960073905066e-05, "rewards/margins": 0.18122906982898712, "rewards/rejected": -0.18128840625286102, "step": 12558 }, { "epoch": 8.68533886583679, "grad_norm": 5.54158878326416, "learning_rate": 7.303672967573383e-06, "log_odds_chosen": 11.576949119567871, "log_odds_ratio": -3.356914021424018e-05, "logits/chosen": -0.33821767568588257, "logits/rejected": -0.2222682237625122, "logps/chosen": -0.0006055298144929111, "logps/rejected": -2.9858524799346924, "loss": 0.4759, "nll_loss": 0.11898404359817505, "rewards/accuracies": 1.0, "rewards/chosen": -6.0552989452844486e-05, "rewards/margins": 0.2985247075557709, "rewards/rejected": -0.2985852360725403, "step": 12559 }, { "epoch": 8.686030428769017, "grad_norm": 3.1131091117858887, "learning_rate": 7.2998309512832335e-06, "log_odds_chosen": 9.934978485107422, "log_odds_ratio": -0.00014396195183508098, "logits/chosen": -0.383375346660614, "logits/rejected": -0.39323872327804565, "logps/chosen": -0.00020214408868923783, "logps/rejected": -1.5136477947235107, "loss": 0.3916, "nll_loss": 0.09787482768297195, "rewards/accuracies": 1.0, "rewards/chosen": -2.0214409232721664e-05, "rewards/margins": 0.15134456753730774, "rewards/rejected": -0.1513647884130478, "step": 12560 }, { "epoch": 8.686721991701244, "grad_norm": 2.7549195289611816, "learning_rate": 7.295988934993085e-06, "log_odds_chosen": 10.395133972167969, "log_odds_ratio": -0.0001060390131897293, "logits/chosen": 0.09680791199207306, "logits/rejected": -0.08237051218748093, "logps/chosen": -0.00030865223379805684, "logps/rejected": -2.023832321166992, "loss": 0.2334, "nll_loss": 0.058332134038209915, "rewards/accuracies": 1.0, "rewards/chosen": -3.086522701778449e-05, "rewards/margins": 0.2023523598909378, "rewards/rejected": -0.20238322019577026, "step": 12561 }, { "epoch": 8.687413554633471, "grad_norm": 4.050837516784668, "learning_rate": 7.292146918702936e-06, "log_odds_chosen": 11.533906936645508, "log_odds_ratio": -4.815634747501463e-05, "logits/chosen": -0.41314229369163513, "logits/rejected": -0.3989812135696411, "logps/chosen": -0.00024749228032305837, "logps/rejected": -2.437464714050293, "loss": 0.413, "nll_loss": 0.10324068367481232, "rewards/accuracies": 1.0, "rewards/chosen": -2.4749231670284644e-05, "rewards/margins": 0.24372172355651855, "rewards/rejected": -0.24374648928642273, "step": 12562 }, { "epoch": 8.688105117565698, "grad_norm": 4.586285591125488, "learning_rate": 7.288304902412786e-06, "log_odds_chosen": 12.125259399414062, "log_odds_ratio": -1.427874667569995e-05, "logits/chosen": -0.2432543784379959, "logits/rejected": -0.3548405170440674, "logps/chosen": -0.00017908416339196265, "logps/rejected": -3.028646469116211, "loss": 0.6192, "nll_loss": 0.15479278564453125, "rewards/accuracies": 1.0, "rewards/chosen": -1.7908416339196265e-05, "rewards/margins": 0.3028467297554016, "rewards/rejected": -0.3028646409511566, "step": 12563 }, { "epoch": 8.688796680497925, "grad_norm": 3.127497434616089, "learning_rate": 7.284462886122638e-06, "log_odds_chosen": 11.781786918640137, "log_odds_ratio": -2.0596940885297954e-05, "logits/chosen": -0.7333821654319763, "logits/rejected": -0.80907142162323, "logps/chosen": -0.0001434225996490568, "logps/rejected": -2.65181565284729, "loss": 0.4069, "nll_loss": 0.10172779858112335, "rewards/accuracies": 1.0, "rewards/chosen": -1.4342261238198262e-05, "rewards/margins": 0.265167236328125, "rewards/rejected": -0.2651815712451935, "step": 12564 }, { "epoch": 8.689488243430151, "grad_norm": 5.145379066467285, "learning_rate": 7.280620869832488e-06, "log_odds_chosen": 11.970072746276855, "log_odds_ratio": -1.3783266695099883e-05, "logits/chosen": -0.2855966091156006, "logits/rejected": -0.2714526653289795, "logps/chosen": -0.0001378589222440496, "logps/rejected": -2.928068161010742, "loss": 0.5744, "nll_loss": 0.1435983031988144, "rewards/accuracies": 1.0, "rewards/chosen": -1.3785893315798603e-05, "rewards/margins": 0.29279303550720215, "rewards/rejected": -0.29280680418014526, "step": 12565 }, { "epoch": 8.690179806362378, "grad_norm": 4.036627769470215, "learning_rate": 7.276778853542339e-06, "log_odds_chosen": 10.717005729675293, "log_odds_ratio": -0.0003459883155301213, "logits/chosen": -0.26304930448532104, "logits/rejected": -0.363567054271698, "logps/chosen": -0.00023860861256252974, "logps/rejected": -1.7904492616653442, "loss": 0.4647, "nll_loss": 0.11615101993083954, "rewards/accuracies": 1.0, "rewards/chosen": -2.3860862711444497e-05, "rewards/margins": 0.17902106046676636, "rewards/rejected": -0.1790449321269989, "step": 12566 }, { "epoch": 8.690871369294605, "grad_norm": 4.8369011878967285, "learning_rate": 7.2729368372521905e-06, "log_odds_chosen": 12.138534545898438, "log_odds_ratio": -1.8096015992341563e-05, "logits/chosen": -0.3726271688938141, "logits/rejected": -0.3865164518356323, "logps/chosen": -0.0001636394445085898, "logps/rejected": -2.8079988956451416, "loss": 0.4224, "nll_loss": 0.1056041345000267, "rewards/accuracies": 1.0, "rewards/chosen": -1.6363945178454742e-05, "rewards/margins": 0.2807835340499878, "rewards/rejected": -0.28079989552497864, "step": 12567 }, { "epoch": 8.691562932226832, "grad_norm": 4.219342231750488, "learning_rate": 7.269094820962041e-06, "log_odds_chosen": 11.014745712280273, "log_odds_ratio": -4.224685108056292e-05, "logits/chosen": -0.633184015750885, "logits/rejected": -0.5657342076301575, "logps/chosen": -0.00012159225298091769, "logps/rejected": -2.0499179363250732, "loss": 0.3539, "nll_loss": 0.08847401291131973, "rewards/accuracies": 1.0, "rewards/chosen": -1.215922566188965e-05, "rewards/margins": 0.20497965812683105, "rewards/rejected": -0.20499181747436523, "step": 12568 }, { "epoch": 8.692254495159059, "grad_norm": 3.421389579772949, "learning_rate": 7.265252804671892e-06, "log_odds_chosen": 11.144598007202148, "log_odds_ratio": -2.0327761376393028e-05, "logits/chosen": 0.20896287262439728, "logits/rejected": 0.19816726446151733, "logps/chosen": -0.0002626884379424155, "logps/rejected": -2.649585485458374, "loss": 0.4227, "nll_loss": 0.10567466914653778, "rewards/accuracies": 1.0, "rewards/chosen": -2.626884452183731e-05, "rewards/margins": 0.2649322748184204, "rewards/rejected": -0.26495856046676636, "step": 12569 }, { "epoch": 8.692946058091286, "grad_norm": 3.9324991703033447, "learning_rate": 7.2614107883817436e-06, "log_odds_chosen": 12.014698028564453, "log_odds_ratio": -1.5872808944550343e-05, "logits/chosen": -0.4532574415206909, "logits/rejected": -0.5123213529586792, "logps/chosen": -9.749118908075616e-05, "logps/rejected": -2.4674978256225586, "loss": 0.4644, "nll_loss": 0.11609979718923569, "rewards/accuracies": 1.0, "rewards/chosen": -9.749119271873496e-06, "rewards/margins": 0.24674005806446075, "rewards/rejected": -0.24674980342388153, "step": 12570 }, { "epoch": 8.693637621023512, "grad_norm": 5.530608177185059, "learning_rate": 7.257568772091594e-06, "log_odds_chosen": 9.991994857788086, "log_odds_ratio": -0.0002486288140062243, "logits/chosen": -0.33492887020111084, "logits/rejected": -0.41823023557662964, "logps/chosen": -0.0004774140543304384, "logps/rejected": -2.2885494232177734, "loss": 0.4417, "nll_loss": 0.11040748655796051, "rewards/accuracies": 1.0, "rewards/chosen": -4.774140688823536e-05, "rewards/margins": 0.2288072109222412, "rewards/rejected": -0.2288549542427063, "step": 12571 }, { "epoch": 8.69432918395574, "grad_norm": 3.9434962272644043, "learning_rate": 7.253726755801444e-06, "log_odds_chosen": 11.857610702514648, "log_odds_ratio": -2.52670215559192e-05, "logits/chosen": -0.10745374858379364, "logits/rejected": -0.15593905746936798, "logps/chosen": -0.00013630901230499148, "logps/rejected": -2.7091565132141113, "loss": 0.5198, "nll_loss": 0.12993690371513367, "rewards/accuracies": 1.0, "rewards/chosen": -1.363090177619597e-05, "rewards/margins": 0.27090200781822205, "rewards/rejected": -0.2709156572818756, "step": 12572 }, { "epoch": 8.695020746887966, "grad_norm": 4.2418212890625, "learning_rate": 7.249884739511297e-06, "log_odds_chosen": 11.451732635498047, "log_odds_ratio": -3.787030072999187e-05, "logits/chosen": -0.5961735844612122, "logits/rejected": -0.6251358389854431, "logps/chosen": -0.0001301948941545561, "logps/rejected": -2.368989944458008, "loss": 0.4418, "nll_loss": 0.11045637726783752, "rewards/accuracies": 1.0, "rewards/chosen": -1.301948941545561e-05, "rewards/margins": 0.23688596487045288, "rewards/rejected": -0.2368989884853363, "step": 12573 }, { "epoch": 8.695712309820193, "grad_norm": 3.2620646953582764, "learning_rate": 7.2460427232211466e-06, "log_odds_chosen": 11.303452491760254, "log_odds_ratio": -3.886825652443804e-05, "logits/chosen": -0.9809278249740601, "logits/rejected": -0.7991894483566284, "logps/chosen": -0.0001622609415790066, "logps/rejected": -1.9842360019683838, "loss": 0.3051, "nll_loss": 0.07626000046730042, "rewards/accuracies": 1.0, "rewards/chosen": -1.6226094885496423e-05, "rewards/margins": 0.198407381772995, "rewards/rejected": -0.1984236091375351, "step": 12574 }, { "epoch": 8.69640387275242, "grad_norm": 2.8854849338531494, "learning_rate": 7.242200706930997e-06, "log_odds_chosen": 10.891356468200684, "log_odds_ratio": -0.00012532320397440344, "logits/chosen": -0.41142168641090393, "logits/rejected": -0.4985794425010681, "logps/chosen": -0.0002292045101057738, "logps/rejected": -2.070760726928711, "loss": 0.267, "nll_loss": 0.06673512607812881, "rewards/accuracies": 1.0, "rewards/chosen": -2.29204506467795e-05, "rewards/margins": 0.20705315470695496, "rewards/rejected": -0.2070760726928711, "step": 12575 }, { "epoch": 8.697095435684647, "grad_norm": 4.4359211921691895, "learning_rate": 7.238358690640849e-06, "log_odds_chosen": 10.509967803955078, "log_odds_ratio": -0.00018874961824622005, "logits/chosen": 0.07059525698423386, "logits/rejected": -0.06952603161334991, "logps/chosen": -0.00029744295170530677, "logps/rejected": -2.1389076709747314, "loss": 0.5862, "nll_loss": 0.14652371406555176, "rewards/accuracies": 1.0, "rewards/chosen": -2.9744294806732796e-05, "rewards/margins": 0.21386101841926575, "rewards/rejected": -0.21389076113700867, "step": 12576 }, { "epoch": 8.697786998616873, "grad_norm": 4.180637836456299, "learning_rate": 7.2345166743507e-06, "log_odds_chosen": 11.173707962036133, "log_odds_ratio": -3.307564475107938e-05, "logits/chosen": -0.3616059124469757, "logits/rejected": -0.38579240441322327, "logps/chosen": -0.000232769685680978, "logps/rejected": -2.3975720405578613, "loss": 0.4217, "nll_loss": 0.1054258793592453, "rewards/accuracies": 1.0, "rewards/chosen": -2.3276967112906277e-05, "rewards/margins": 0.23973393440246582, "rewards/rejected": -0.2397572249174118, "step": 12577 }, { "epoch": 8.6984785615491, "grad_norm": 4.1263346672058105, "learning_rate": 7.23067465806055e-06, "log_odds_chosen": 10.084461212158203, "log_odds_ratio": -0.000205255564651452, "logits/chosen": -0.4047377109527588, "logits/rejected": -0.41672807931900024, "logps/chosen": -0.0005328357219696045, "logps/rejected": -2.0177836418151855, "loss": 0.4105, "nll_loss": 0.1026056632399559, "rewards/accuracies": 1.0, "rewards/chosen": -5.3283569286577404e-05, "rewards/margins": 0.2017250657081604, "rewards/rejected": -0.2017783522605896, "step": 12578 }, { "epoch": 8.699170124481327, "grad_norm": 4.116734981536865, "learning_rate": 7.226832641770402e-06, "log_odds_chosen": 11.684539794921875, "log_odds_ratio": -3.806072345469147e-05, "logits/chosen": -0.21267051994800568, "logits/rejected": -0.277524471282959, "logps/chosen": -0.00014213379472494125, "logps/rejected": -2.839292049407959, "loss": 0.6086, "nll_loss": 0.1521454155445099, "rewards/accuracies": 1.0, "rewards/chosen": -1.4213380381988827e-05, "rewards/margins": 0.2839149534702301, "rewards/rejected": -0.28392916917800903, "step": 12579 }, { "epoch": 8.699861687413554, "grad_norm": 4.5581135749816895, "learning_rate": 7.222990625480253e-06, "log_odds_chosen": 10.256507873535156, "log_odds_ratio": -0.0004174423520453274, "logits/chosen": 0.030440326780080795, "logits/rejected": -0.15859153866767883, "logps/chosen": -0.0005254342686384916, "logps/rejected": -1.7349371910095215, "loss": 0.458, "nll_loss": 0.11446496844291687, "rewards/accuracies": 1.0, "rewards/chosen": -5.254342977423221e-05, "rewards/margins": 0.17344118654727936, "rewards/rejected": -0.17349371314048767, "step": 12580 }, { "epoch": 8.70055325034578, "grad_norm": 3.6485166549682617, "learning_rate": 7.2191486091901035e-06, "log_odds_chosen": 10.942009925842285, "log_odds_ratio": -6.669512367807329e-05, "logits/chosen": -0.26107847690582275, "logits/rejected": -0.24480968713760376, "logps/chosen": -0.0002021636173594743, "logps/rejected": -2.267293930053711, "loss": 0.3822, "nll_loss": 0.09555569291114807, "rewards/accuracies": 1.0, "rewards/chosen": -2.0216362827341072e-05, "rewards/margins": 0.22670917212963104, "rewards/rejected": -0.2267293930053711, "step": 12581 }, { "epoch": 8.701244813278008, "grad_norm": 5.63314962387085, "learning_rate": 7.215306592899953e-06, "log_odds_chosen": 11.545077323913574, "log_odds_ratio": -1.9420609532971866e-05, "logits/chosen": -0.17932385206222534, "logits/rejected": -0.29974794387817383, "logps/chosen": -0.00010438306344440207, "logps/rejected": -2.3992764949798584, "loss": 0.4362, "nll_loss": 0.10905801504850388, "rewards/accuracies": 1.0, "rewards/chosen": -1.0438306162541267e-05, "rewards/margins": 0.23991720378398895, "rewards/rejected": -0.23992764949798584, "step": 12582 }, { "epoch": 8.701936376210234, "grad_norm": 8.555891036987305, "learning_rate": 7.211464576609806e-06, "log_odds_chosen": 11.477520942687988, "log_odds_ratio": -1.730481380946003e-05, "logits/chosen": -0.5060890316963196, "logits/rejected": -0.5176623463630676, "logps/chosen": -0.00015273058670572937, "logps/rejected": -2.328230619430542, "loss": 0.3748, "nll_loss": 0.0936933383345604, "rewards/accuracies": 1.0, "rewards/chosen": -1.5273057215381414e-05, "rewards/margins": 0.23280777037143707, "rewards/rejected": -0.23282305896282196, "step": 12583 }, { "epoch": 8.702627939142461, "grad_norm": 6.84820556640625, "learning_rate": 7.207622560319656e-06, "log_odds_chosen": 10.907526969909668, "log_odds_ratio": -6.034345642547123e-05, "logits/chosen": -0.2660806477069855, "logits/rejected": -0.34288960695266724, "logps/chosen": -0.0004112754249945283, "logps/rejected": -2.787398338317871, "loss": 0.4983, "nll_loss": 0.12456180900335312, "rewards/accuracies": 1.0, "rewards/chosen": -4.11275468650274e-05, "rewards/margins": 0.27869871258735657, "rewards/rejected": -0.2787398397922516, "step": 12584 }, { "epoch": 8.703319502074688, "grad_norm": 5.089015007019043, "learning_rate": 7.2037805440295065e-06, "log_odds_chosen": 10.05128288269043, "log_odds_ratio": -0.00040466885548084974, "logits/chosen": -0.9034023880958557, "logits/rejected": -0.8430752754211426, "logps/chosen": -0.00016636928194202483, "logps/rejected": -1.6874971389770508, "loss": 0.5554, "nll_loss": 0.13880708813667297, "rewards/accuracies": 1.0, "rewards/chosen": -1.6636928194202483e-05, "rewards/margins": 0.16873309016227722, "rewards/rejected": -0.16874971985816956, "step": 12585 }, { "epoch": 8.704011065006915, "grad_norm": 5.3731465339660645, "learning_rate": 7.199938527739358e-06, "log_odds_chosen": 11.04498291015625, "log_odds_ratio": -8.862379036145285e-05, "logits/chosen": -0.3212606906890869, "logits/rejected": -0.44569075107574463, "logps/chosen": -0.0004522545204963535, "logps/rejected": -2.747040271759033, "loss": 0.4199, "nll_loss": 0.10495775192975998, "rewards/accuracies": 1.0, "rewards/chosen": -4.5225449866848066e-05, "rewards/margins": 0.27465879917144775, "rewards/rejected": -0.2747040390968323, "step": 12586 }, { "epoch": 8.704702627939142, "grad_norm": 3.756265640258789, "learning_rate": 7.196096511449209e-06, "log_odds_chosen": 9.995338439941406, "log_odds_ratio": -8.589846402173862e-05, "logits/chosen": -0.2591727674007416, "logits/rejected": -0.32403162121772766, "logps/chosen": -0.00024264455714728683, "logps/rejected": -1.6683635711669922, "loss": 0.3217, "nll_loss": 0.08042872697114944, "rewards/accuracies": 1.0, "rewards/chosen": -2.4264456442324445e-05, "rewards/margins": 0.16681209206581116, "rewards/rejected": -0.16683635115623474, "step": 12587 }, { "epoch": 8.705394190871369, "grad_norm": 3.1371610164642334, "learning_rate": 7.19225449515906e-06, "log_odds_chosen": 9.871097564697266, "log_odds_ratio": -0.00022018066374585032, "logits/chosen": -0.3875494599342346, "logits/rejected": -0.42681199312210083, "logps/chosen": -0.0001549256849102676, "logps/rejected": -1.2135441303253174, "loss": 0.2897, "nll_loss": 0.07240563631057739, "rewards/accuracies": 1.0, "rewards/chosen": -1.549256921862252e-05, "rewards/margins": 0.12133892625570297, "rewards/rejected": -0.12135441601276398, "step": 12588 }, { "epoch": 8.706085753803595, "grad_norm": 8.206844329833984, "learning_rate": 7.188412478868911e-06, "log_odds_chosen": 9.889083862304688, "log_odds_ratio": -7.846077642170712e-05, "logits/chosen": -0.5176295638084412, "logits/rejected": -0.6087726950645447, "logps/chosen": -0.0003467805508989841, "logps/rejected": -1.7795064449310303, "loss": 0.3239, "nll_loss": 0.08095990866422653, "rewards/accuracies": 1.0, "rewards/chosen": -3.4678054362302646e-05, "rewards/margins": 0.17791596055030823, "rewards/rejected": -0.1779506504535675, "step": 12589 }, { "epoch": 8.706777316735822, "grad_norm": 3.7519938945770264, "learning_rate": 7.184570462578762e-06, "log_odds_chosen": 10.750160217285156, "log_odds_ratio": -0.00017055787611752748, "logits/chosen": -0.15037044882774353, "logits/rejected": -0.26256707310676575, "logps/chosen": -0.000679384043905884, "logps/rejected": -2.163872003555298, "loss": 0.3447, "nll_loss": 0.08616575598716736, "rewards/accuracies": 1.0, "rewards/chosen": -6.793840293539688e-05, "rewards/margins": 0.2163192480802536, "rewards/rejected": -0.21638718247413635, "step": 12590 }, { "epoch": 8.707468879668049, "grad_norm": 3.6036159992218018, "learning_rate": 7.180728446288612e-06, "log_odds_chosen": 10.451128959655762, "log_odds_ratio": -9.225706162396818e-05, "logits/chosen": -0.20861420035362244, "logits/rejected": -0.27226394414901733, "logps/chosen": -0.00017163812299259007, "logps/rejected": -2.0080366134643555, "loss": 0.5543, "nll_loss": 0.13857188820838928, "rewards/accuracies": 1.0, "rewards/chosen": -1.7163811207865365e-05, "rewards/margins": 0.2007865309715271, "rewards/rejected": -0.20080366730690002, "step": 12591 }, { "epoch": 8.708160442600276, "grad_norm": 3.12872314453125, "learning_rate": 7.176886429998464e-06, "log_odds_chosen": 11.708080291748047, "log_odds_ratio": -1.2221262295497581e-05, "logits/chosen": -0.8714234232902527, "logits/rejected": -0.8446594476699829, "logps/chosen": -0.0001258108823094517, "logps/rejected": -2.541940927505493, "loss": 0.2771, "nll_loss": 0.0692775622010231, "rewards/accuracies": 1.0, "rewards/chosen": -1.2581089322338812e-05, "rewards/margins": 0.25418150424957275, "rewards/rejected": -0.25419408082962036, "step": 12592 }, { "epoch": 8.708852005532503, "grad_norm": 3.5991594791412354, "learning_rate": 7.173044413708314e-06, "log_odds_chosen": 10.483360290527344, "log_odds_ratio": -0.00011691125109791756, "logits/chosen": -0.4403286278247833, "logits/rejected": -0.5139305591583252, "logps/chosen": -0.00017070303147193044, "logps/rejected": -2.086862087249756, "loss": 0.4266, "nll_loss": 0.10663561522960663, "rewards/accuracies": 1.0, "rewards/chosen": -1.7070302419597283e-05, "rewards/margins": 0.20866911113262177, "rewards/rejected": -0.2086862176656723, "step": 12593 }, { "epoch": 8.70954356846473, "grad_norm": 5.748128890991211, "learning_rate": 7.169202397418165e-06, "log_odds_chosen": 9.889494895935059, "log_odds_ratio": -0.0004682771395891905, "logits/chosen": -0.1965133547782898, "logits/rejected": -0.24669188261032104, "logps/chosen": -0.00048058730317279696, "logps/rejected": -1.8298323154449463, "loss": 0.334, "nll_loss": 0.08344646543264389, "rewards/accuracies": 1.0, "rewards/chosen": -4.8058729589683935e-05, "rewards/margins": 0.1829351782798767, "rewards/rejected": -0.18298323452472687, "step": 12594 }, { "epoch": 8.710235131396956, "grad_norm": 2.409346103668213, "learning_rate": 7.1653603811280165e-06, "log_odds_chosen": 10.281041145324707, "log_odds_ratio": -8.93459000508301e-05, "logits/chosen": -0.09198392927646637, "logits/rejected": -0.20935139060020447, "logps/chosen": -0.00020214702817611396, "logps/rejected": -1.562282681465149, "loss": 0.2675, "nll_loss": 0.06685630977153778, "rewards/accuracies": 1.0, "rewards/chosen": -2.0214702090015635e-05, "rewards/margins": 0.15620805323123932, "rewards/rejected": -0.15622827410697937, "step": 12595 }, { "epoch": 8.710926694329183, "grad_norm": 3.746424674987793, "learning_rate": 7.161518364837867e-06, "log_odds_chosen": 10.644408226013184, "log_odds_ratio": -0.0003350527840666473, "logits/chosen": -0.25374066829681396, "logits/rejected": -0.3007930517196655, "logps/chosen": -0.0003922658215742558, "logps/rejected": -1.6606199741363525, "loss": 0.3313, "nll_loss": 0.08277983963489532, "rewards/accuracies": 1.0, "rewards/chosen": -3.9226582885021344e-05, "rewards/margins": 0.16602277755737305, "rewards/rejected": -0.16606199741363525, "step": 12596 }, { "epoch": 8.71161825726141, "grad_norm": 4.901035785675049, "learning_rate": 7.157676348547718e-06, "log_odds_chosen": 11.265356063842773, "log_odds_ratio": -2.715396294661332e-05, "logits/chosen": 0.05210549384355545, "logits/rejected": -0.05895791947841644, "logps/chosen": -0.00033751592854969203, "logps/rejected": -2.655134916305542, "loss": 0.6153, "nll_loss": 0.15382874011993408, "rewards/accuracies": 1.0, "rewards/chosen": -3.375159576535225e-05, "rewards/margins": 0.2654797434806824, "rewards/rejected": -0.26551347970962524, "step": 12597 }, { "epoch": 8.712309820193637, "grad_norm": 5.053715705871582, "learning_rate": 7.15383433225757e-06, "log_odds_chosen": 11.885496139526367, "log_odds_ratio": -4.7778376028873026e-05, "logits/chosen": -0.03668589144945145, "logits/rejected": -0.13197988271713257, "logps/chosen": -0.00018731525051407516, "logps/rejected": -2.8411545753479004, "loss": 0.4438, "nll_loss": 0.11095211654901505, "rewards/accuracies": 1.0, "rewards/chosen": -1.87315272341948e-05, "rewards/margins": 0.28409671783447266, "rewards/rejected": -0.2841154634952545, "step": 12598 }, { "epoch": 8.713001383125864, "grad_norm": 3.501352071762085, "learning_rate": 7.14999231596742e-06, "log_odds_chosen": 10.418107986450195, "log_odds_ratio": -4.948129935655743e-05, "logits/chosen": -0.17310462892055511, "logits/rejected": -0.11154043674468994, "logps/chosen": -0.0005186637281440198, "logps/rejected": -1.961129903793335, "loss": 0.4102, "nll_loss": 0.10254395008087158, "rewards/accuracies": 1.0, "rewards/chosen": -5.1866372814401984e-05, "rewards/margins": 0.1960611343383789, "rewards/rejected": -0.1961129903793335, "step": 12599 }, { "epoch": 8.71369294605809, "grad_norm": 3.993454694747925, "learning_rate": 7.14615029967727e-06, "log_odds_chosen": 11.306490898132324, "log_odds_ratio": -1.9935872842324898e-05, "logits/chosen": -0.36342853307724, "logits/rejected": -0.5179613828659058, "logps/chosen": -0.00020630184735637158, "logps/rejected": -2.2232353687286377, "loss": 0.3613, "nll_loss": 0.09033246338367462, "rewards/accuracies": 1.0, "rewards/chosen": -2.0630184735637158e-05, "rewards/margins": 0.22230291366577148, "rewards/rejected": -0.22232355177402496, "step": 12600 }, { "epoch": 8.714384508990317, "grad_norm": 3.8765039443969727, "learning_rate": 7.142308283387123e-06, "log_odds_chosen": 11.331442832946777, "log_odds_ratio": -6.65429761284031e-05, "logits/chosen": -0.6077317595481873, "logits/rejected": -0.58716881275177, "logps/chosen": -0.0002417774376226589, "logps/rejected": -2.399134635925293, "loss": 0.4247, "nll_loss": 0.10616884380578995, "rewards/accuracies": 1.0, "rewards/chosen": -2.4177745217457414e-05, "rewards/margins": 0.23988929390907288, "rewards/rejected": -0.2399134635925293, "step": 12601 }, { "epoch": 8.715076071922544, "grad_norm": 4.196615219116211, "learning_rate": 7.138466267096973e-06, "log_odds_chosen": 10.872209548950195, "log_odds_ratio": -5.1147879275958985e-05, "logits/chosen": 0.13419261574745178, "logits/rejected": 0.06465723365545273, "logps/chosen": -0.00032623554579913616, "logps/rejected": -1.8758091926574707, "loss": 0.6551, "nll_loss": 0.1637628972530365, "rewards/accuracies": 1.0, "rewards/chosen": -3.26235567627009e-05, "rewards/margins": 0.18754829466342926, "rewards/rejected": -0.1875808984041214, "step": 12602 }, { "epoch": 8.715767634854771, "grad_norm": 3.0449070930480957, "learning_rate": 7.134624250806823e-06, "log_odds_chosen": 12.087318420410156, "log_odds_ratio": -3.8094327464932576e-05, "logits/chosen": -0.8015158176422119, "logits/rejected": -0.8395053148269653, "logps/chosen": -9.241785301128402e-05, "logps/rejected": -2.986556053161621, "loss": 0.3298, "nll_loss": 0.08245350420475006, "rewards/accuracies": 1.0, "rewards/chosen": -9.241785846825223e-06, "rewards/margins": 0.29864639043807983, "rewards/rejected": -0.29865562915802, "step": 12603 }, { "epoch": 8.716459197786998, "grad_norm": 4.552496433258057, "learning_rate": 7.130782234516675e-06, "log_odds_chosen": 11.790977478027344, "log_odds_ratio": -1.52910051838262e-05, "logits/chosen": -0.12492383271455765, "logits/rejected": 0.024242829531431198, "logps/chosen": -6.92198591423221e-05, "logps/rejected": -2.2918920516967773, "loss": 0.2776, "nll_loss": 0.06939011067152023, "rewards/accuracies": 1.0, "rewards/chosen": -6.921985914232209e-06, "rewards/margins": 0.22918227314949036, "rewards/rejected": -0.2291892021894455, "step": 12604 }, { "epoch": 8.717150760719225, "grad_norm": 2.587273359298706, "learning_rate": 7.126940218226526e-06, "log_odds_chosen": 11.168664932250977, "log_odds_ratio": -9.771065379027277e-05, "logits/chosen": -0.48414313793182373, "logits/rejected": -0.2710132300853729, "logps/chosen": -0.00022592968889512122, "logps/rejected": -2.34855318069458, "loss": 0.274, "nll_loss": 0.06848978996276855, "rewards/accuracies": 1.0, "rewards/chosen": -2.2592970708501525e-05, "rewards/margins": 0.2348327338695526, "rewards/rejected": -0.23485532402992249, "step": 12605 }, { "epoch": 8.717842323651452, "grad_norm": 3.0871849060058594, "learning_rate": 7.1230982019363765e-06, "log_odds_chosen": 11.11591911315918, "log_odds_ratio": -0.0003519427846185863, "logits/chosen": -0.18353600800037384, "logits/rejected": -0.18372249603271484, "logps/chosen": -0.0002869067538995296, "logps/rejected": -2.1926016807556152, "loss": 0.3918, "nll_loss": 0.09791938215494156, "rewards/accuracies": 1.0, "rewards/chosen": -2.869067611754872e-05, "rewards/margins": 0.2192314714193344, "rewards/rejected": -0.21926017105579376, "step": 12606 }, { "epoch": 8.718533886583678, "grad_norm": 4.727206707000732, "learning_rate": 7.119256185646228e-06, "log_odds_chosen": 12.682234764099121, "log_odds_ratio": -4.2280080378986895e-06, "logits/chosen": -0.4567912817001343, "logits/rejected": -0.41550421714782715, "logps/chosen": -7.959036156535149e-05, "logps/rejected": -3.1277332305908203, "loss": 0.4349, "nll_loss": 0.10871396958827972, "rewards/accuracies": 1.0, "rewards/chosen": -7.959036338434089e-06, "rewards/margins": 0.31276535987854004, "rewards/rejected": -0.31277331709861755, "step": 12607 }, { "epoch": 8.719225449515905, "grad_norm": 4.500866413116455, "learning_rate": 7.115414169356079e-06, "log_odds_chosen": 10.519453048706055, "log_odds_ratio": -0.00015784561401233077, "logits/chosen": -0.0011763647198677063, "logits/rejected": -0.051256291568279266, "logps/chosen": -0.0008152094087563455, "logps/rejected": -2.0021238327026367, "loss": 0.4842, "nll_loss": 0.12104595452547073, "rewards/accuracies": 1.0, "rewards/chosen": -8.152094233082607e-05, "rewards/margins": 0.2001308798789978, "rewards/rejected": -0.20021241903305054, "step": 12608 }, { "epoch": 8.719917012448132, "grad_norm": 5.224221706390381, "learning_rate": 7.111572153065929e-06, "log_odds_chosen": 11.925086975097656, "log_odds_ratio": -9.902300917019602e-06, "logits/chosen": -0.7283359169960022, "logits/rejected": -0.7884764075279236, "logps/chosen": -8.355738100362942e-05, "logps/rejected": -2.3458425998687744, "loss": 0.8125, "nll_loss": 0.20313361287117004, "rewards/accuracies": 1.0, "rewards/chosen": -8.355738827958703e-06, "rewards/margins": 0.23457591235637665, "rewards/rejected": -0.2345842719078064, "step": 12609 }, { "epoch": 8.720608575380359, "grad_norm": 3.826673746109009, "learning_rate": 7.107730136775781e-06, "log_odds_chosen": 10.73037052154541, "log_odds_ratio": -8.004449045984074e-05, "logits/chosen": -0.19814665615558624, "logits/rejected": -0.29370230436325073, "logps/chosen": -0.00048773473827168345, "logps/rejected": -2.245574712753296, "loss": 0.4769, "nll_loss": 0.11920658499002457, "rewards/accuracies": 1.0, "rewards/chosen": -4.877347237197682e-05, "rewards/margins": 0.22450870275497437, "rewards/rejected": -0.22455745935440063, "step": 12610 }, { "epoch": 8.721300138312586, "grad_norm": 4.613309860229492, "learning_rate": 7.103888120485631e-06, "log_odds_chosen": 10.130619049072266, "log_odds_ratio": -9.19510202948004e-05, "logits/chosen": 0.07331767678260803, "logits/rejected": -0.019994735717773438, "logps/chosen": -0.00010537736670812592, "logps/rejected": -1.35574471950531, "loss": 0.4954, "nll_loss": 0.12383987754583359, "rewards/accuracies": 1.0, "rewards/chosen": -1.0537736670812592e-05, "rewards/margins": 0.1355639398097992, "rewards/rejected": -0.13557447493076324, "step": 12611 }, { "epoch": 8.721991701244812, "grad_norm": 10.10327434539795, "learning_rate": 7.100046104195482e-06, "log_odds_chosen": 11.801833152770996, "log_odds_ratio": -3.427134288358502e-05, "logits/chosen": -0.4082002639770508, "logits/rejected": -0.5625779628753662, "logps/chosen": -0.0001247481704922393, "logps/rejected": -2.620378255844116, "loss": 0.4185, "nll_loss": 0.10463136434555054, "rewards/accuracies": 1.0, "rewards/chosen": -1.2474816685426049e-05, "rewards/margins": 0.2620253562927246, "rewards/rejected": -0.26203781366348267, "step": 12612 }, { "epoch": 8.72268326417704, "grad_norm": 3.1714866161346436, "learning_rate": 7.0962040879053326e-06, "log_odds_chosen": 10.744078636169434, "log_odds_ratio": -4.0337850805372e-05, "logits/chosen": -0.4073140025138855, "logits/rejected": -0.4945356547832489, "logps/chosen": -0.00017250381642952561, "logps/rejected": -1.7121458053588867, "loss": 0.3845, "nll_loss": 0.09612710773944855, "rewards/accuracies": 1.0, "rewards/chosen": -1.725038055155892e-05, "rewards/margins": 0.1711973249912262, "rewards/rejected": -0.17121456563472748, "step": 12613 }, { "epoch": 8.723374827109266, "grad_norm": 3.5619633197784424, "learning_rate": 7.092362071615184e-06, "log_odds_chosen": 11.23532485961914, "log_odds_ratio": -4.3949934479314834e-05, "logits/chosen": -0.22376228868961334, "logits/rejected": -0.18069423735141754, "logps/chosen": -0.0001710898068267852, "logps/rejected": -2.4304232597351074, "loss": 0.3353, "nll_loss": 0.08382796496152878, "rewards/accuracies": 1.0, "rewards/chosen": -1.7108981410274282e-05, "rewards/margins": 0.24302522838115692, "rewards/rejected": -0.24304234981536865, "step": 12614 }, { "epoch": 8.724066390041493, "grad_norm": 4.019108295440674, "learning_rate": 7.088520055325035e-06, "log_odds_chosen": 11.954331398010254, "log_odds_ratio": -3.5667791962623596e-05, "logits/chosen": -0.6766592860221863, "logits/rejected": -0.7944373488426208, "logps/chosen": -0.00015910463116597384, "logps/rejected": -2.911555767059326, "loss": 0.4366, "nll_loss": 0.10915455222129822, "rewards/accuracies": 1.0, "rewards/chosen": -1.5910463844193146e-05, "rewards/margins": 0.2911396622657776, "rewards/rejected": -0.2911555767059326, "step": 12615 }, { "epoch": 8.72475795297372, "grad_norm": 4.540794372558594, "learning_rate": 7.084678039034885e-06, "log_odds_chosen": 12.210094451904297, "log_odds_ratio": -1.2035889085382223e-05, "logits/chosen": -0.42616957426071167, "logits/rejected": -0.4792676568031311, "logps/chosen": -4.8736692406237125e-05, "logps/rejected": -2.242910861968994, "loss": 0.337, "nll_loss": 0.08424394577741623, "rewards/accuracies": 1.0, "rewards/chosen": -4.8736692406237125e-06, "rewards/margins": 0.22428621351718903, "rewards/rejected": -0.22429108619689941, "step": 12616 }, { "epoch": 8.725449515905947, "grad_norm": 3.611435890197754, "learning_rate": 7.080836022744737e-06, "log_odds_chosen": 12.544656753540039, "log_odds_ratio": -9.19141893973574e-06, "logits/chosen": -0.47252994775772095, "logits/rejected": -0.5143290758132935, "logps/chosen": -0.00015017985424492508, "logps/rejected": -3.355624198913574, "loss": 0.3825, "nll_loss": 0.09561695158481598, "rewards/accuracies": 1.0, "rewards/chosen": -1.5017985788290389e-05, "rewards/margins": 0.33554738759994507, "rewards/rejected": -0.33556240797042847, "step": 12617 }, { "epoch": 8.726141078838173, "grad_norm": 5.643194675445557, "learning_rate": 7.076994006454587e-06, "log_odds_chosen": 11.000103950500488, "log_odds_ratio": -0.008125041611492634, "logits/chosen": -0.12783555686473846, "logits/rejected": -0.19143040478229523, "logps/chosen": -0.00514611043035984, "logps/rejected": -2.533423900604248, "loss": 0.5958, "nll_loss": 0.14812599122524261, "rewards/accuracies": 1.0, "rewards/chosen": -0.0005146110197529197, "rewards/margins": 0.2528277635574341, "rewards/rejected": -0.2533423900604248, "step": 12618 }, { "epoch": 8.7268326417704, "grad_norm": 5.0216803550720215, "learning_rate": 7.073151990164438e-06, "log_odds_chosen": 10.841806411743164, "log_odds_ratio": -0.00014175310207065195, "logits/chosen": -0.37150660157203674, "logits/rejected": -0.4800848066806793, "logps/chosen": -0.00025622386601753533, "logps/rejected": -2.5595288276672363, "loss": 0.5147, "nll_loss": 0.12865594029426575, "rewards/accuracies": 1.0, "rewards/chosen": -2.5622386601753533e-05, "rewards/margins": 0.25592726469039917, "rewards/rejected": -0.2559528946876526, "step": 12619 }, { "epoch": 8.727524204702627, "grad_norm": 3.8709161281585693, "learning_rate": 7.0693099738742895e-06, "log_odds_chosen": 10.722715377807617, "log_odds_ratio": -0.00010744819155661389, "logits/chosen": -0.04501526430249214, "logits/rejected": -0.27509522438049316, "logps/chosen": -0.00015670969150960445, "logps/rejected": -1.9246792793273926, "loss": 0.5193, "nll_loss": 0.12981583178043365, "rewards/accuracies": 1.0, "rewards/chosen": -1.5670970242354088e-05, "rewards/margins": 0.19245225191116333, "rewards/rejected": -0.19246794283390045, "step": 12620 }, { "epoch": 8.728215767634854, "grad_norm": 3.180833339691162, "learning_rate": 7.06546795758414e-06, "log_odds_chosen": 10.660358428955078, "log_odds_ratio": -8.569403871661052e-05, "logits/chosen": -0.3871857523918152, "logits/rejected": -0.3466258943080902, "logps/chosen": -0.0003636727633420378, "logps/rejected": -2.124570608139038, "loss": 0.3908, "nll_loss": 0.0976894348859787, "rewards/accuracies": 1.0, "rewards/chosen": -3.636727706179954e-05, "rewards/margins": 0.21242070198059082, "rewards/rejected": -0.2124570608139038, "step": 12621 }, { "epoch": 8.72890733056708, "grad_norm": 4.775147914886475, "learning_rate": 7.061625941293991e-06, "log_odds_chosen": 10.252630233764648, "log_odds_ratio": -0.0007642924902029335, "logits/chosen": -0.6822867393493652, "logits/rejected": -0.7102404832839966, "logps/chosen": -0.0007607060251757503, "logps/rejected": -2.434619426727295, "loss": 0.2722, "nll_loss": 0.06797634810209274, "rewards/accuracies": 1.0, "rewards/chosen": -7.607060251757503e-05, "rewards/margins": 0.24338586628437042, "rewards/rejected": -0.24346192181110382, "step": 12622 }, { "epoch": 8.729598893499308, "grad_norm": 3.3071470260620117, "learning_rate": 7.057783925003843e-06, "log_odds_chosen": 11.80417251586914, "log_odds_ratio": -4.535269908956252e-05, "logits/chosen": -0.32505127787590027, "logits/rejected": -0.33316338062286377, "logps/chosen": -0.00013507023686543107, "logps/rejected": -2.901522397994995, "loss": 0.3179, "nll_loss": 0.07946906983852386, "rewards/accuracies": 1.0, "rewards/chosen": -1.3507024050340988e-05, "rewards/margins": 0.29013872146606445, "rewards/rejected": -0.2901522219181061, "step": 12623 }, { "epoch": 8.730290456431534, "grad_norm": 2.9452579021453857, "learning_rate": 7.053941908713693e-06, "log_odds_chosen": 9.710962295532227, "log_odds_ratio": -0.0014099262189120054, "logits/chosen": -0.1288309246301651, "logits/rejected": -0.1669916808605194, "logps/chosen": -0.002055376535281539, "logps/rejected": -2.0088515281677246, "loss": 0.3109, "nll_loss": 0.07759317755699158, "rewards/accuracies": 1.0, "rewards/chosen": -0.00020553766808006912, "rewards/margins": 0.20067963004112244, "rewards/rejected": -0.20088517665863037, "step": 12624 }, { "epoch": 8.730982019363761, "grad_norm": 2.780109167098999, "learning_rate": 7.050099892423544e-06, "log_odds_chosen": 10.963064193725586, "log_odds_ratio": -2.1850135453860275e-05, "logits/chosen": -0.32372623682022095, "logits/rejected": -0.37397193908691406, "logps/chosen": -0.0004387985682114959, "logps/rejected": -2.7941770553588867, "loss": 0.4054, "nll_loss": 0.1013399064540863, "rewards/accuracies": 1.0, "rewards/chosen": -4.3879856093553826e-05, "rewards/margins": 0.27937382459640503, "rewards/rejected": -0.2794176936149597, "step": 12625 }, { "epoch": 8.731673582295988, "grad_norm": 4.202791213989258, "learning_rate": 7.046257876133396e-06, "log_odds_chosen": 11.598773002624512, "log_odds_ratio": -1.712938137643505e-05, "logits/chosen": 0.002315342426300049, "logits/rejected": -0.0762111097574234, "logps/chosen": -0.00010078588093165308, "logps/rejected": -2.4241111278533936, "loss": 0.5578, "nll_loss": 0.13943594694137573, "rewards/accuracies": 1.0, "rewards/chosen": -1.007858918455895e-05, "rewards/margins": 0.24240103363990784, "rewards/rejected": -0.24241109192371368, "step": 12626 }, { "epoch": 8.732365145228215, "grad_norm": 4.474009990692139, "learning_rate": 7.0424158598432464e-06, "log_odds_chosen": 10.89125919342041, "log_odds_ratio": -0.00019299234554637223, "logits/chosen": -0.2681261897087097, "logits/rejected": -0.3043631315231323, "logps/chosen": -0.001370918471366167, "logps/rejected": -1.9742841720581055, "loss": 0.798, "nll_loss": 0.1994776874780655, "rewards/accuracies": 1.0, "rewards/chosen": -0.00013709186168853194, "rewards/margins": 0.1972913146018982, "rewards/rejected": -0.1974284052848816, "step": 12627 }, { "epoch": 8.733056708160442, "grad_norm": 3.0392186641693115, "learning_rate": 7.038573843553096e-06, "log_odds_chosen": 11.702457427978516, "log_odds_ratio": -1.5169207472354174e-05, "logits/chosen": -0.5146140456199646, "logits/rejected": -0.5965772867202759, "logps/chosen": -7.573777838842943e-05, "logps/rejected": -2.1300292015075684, "loss": 0.3362, "nll_loss": 0.08405561745166779, "rewards/accuracies": 1.0, "rewards/chosen": -7.573778020741884e-06, "rewards/margins": 0.21299535036087036, "rewards/rejected": -0.21300292015075684, "step": 12628 }, { "epoch": 8.733748271092669, "grad_norm": 4.198177337646484, "learning_rate": 7.034731827262949e-06, "log_odds_chosen": 10.71436882019043, "log_odds_ratio": -0.00033096273546107113, "logits/chosen": -0.3540208041667938, "logits/rejected": -0.417741596698761, "logps/chosen": -0.00041849075932987034, "logps/rejected": -2.053122043609619, "loss": 0.4197, "nll_loss": 0.10489177703857422, "rewards/accuracies": 1.0, "rewards/chosen": -4.184907447779551e-05, "rewards/margins": 0.20527033507823944, "rewards/rejected": -0.20531219244003296, "step": 12629 }, { "epoch": 8.734439834024897, "grad_norm": 6.847844123840332, "learning_rate": 7.030889810972799e-06, "log_odds_chosen": 11.515414237976074, "log_odds_ratio": -5.265471190796234e-05, "logits/chosen": -0.12134791165590286, "logits/rejected": -0.2235623151063919, "logps/chosen": -9.148490789812058e-05, "logps/rejected": -2.0303359031677246, "loss": 0.5226, "nll_loss": 0.13064485788345337, "rewards/accuracies": 1.0, "rewards/chosen": -9.14849169930676e-06, "rewards/margins": 0.2030244767665863, "rewards/rejected": -0.20303361117839813, "step": 12630 }, { "epoch": 8.735131396957122, "grad_norm": 3.86293625831604, "learning_rate": 7.0270477946826494e-06, "log_odds_chosen": 10.796789169311523, "log_odds_ratio": -5.433675687527284e-05, "logits/chosen": -0.15211236476898193, "logits/rejected": -0.2016526758670807, "logps/chosen": -0.0005663117044605315, "logps/rejected": -2.7149319648742676, "loss": 0.5026, "nll_loss": 0.12565679848194122, "rewards/accuracies": 1.0, "rewards/chosen": -5.663117190124467e-05, "rewards/margins": 0.27143657207489014, "rewards/rejected": -0.27149319648742676, "step": 12631 }, { "epoch": 8.73582295988935, "grad_norm": 5.464266777038574, "learning_rate": 7.023205778392501e-06, "log_odds_chosen": 10.550954818725586, "log_odds_ratio": -7.085293327691033e-05, "logits/chosen": -0.4823170304298401, "logits/rejected": -0.607326328754425, "logps/chosen": -0.00017116402159444988, "logps/rejected": -1.851062536239624, "loss": 0.4346, "nll_loss": 0.10864056646823883, "rewards/accuracies": 1.0, "rewards/chosen": -1.711640288704075e-05, "rewards/margins": 0.1850891411304474, "rewards/rejected": -0.18510624766349792, "step": 12632 }, { "epoch": 8.736514522821576, "grad_norm": 2.564517021179199, "learning_rate": 7.019363762102352e-06, "log_odds_chosen": 10.802752494812012, "log_odds_ratio": -5.0851380365202203e-05, "logits/chosen": -0.13559579849243164, "logits/rejected": -0.07748744636774063, "logps/chosen": -0.0002571075165178627, "logps/rejected": -2.224565267562866, "loss": 0.3544, "nll_loss": 0.08859890699386597, "rewards/accuracies": 1.0, "rewards/chosen": -2.571075310697779e-05, "rewards/margins": 0.22243082523345947, "rewards/rejected": -0.22245654463768005, "step": 12633 }, { "epoch": 8.737206085753805, "grad_norm": 2.7891175746917725, "learning_rate": 7.0155217458122025e-06, "log_odds_chosen": 10.613808631896973, "log_odds_ratio": -3.704680784721859e-05, "logits/chosen": -0.3028711974620819, "logits/rejected": -0.13343878090381622, "logps/chosen": -0.00017175314133055508, "logps/rejected": -1.9700030088424683, "loss": 0.3105, "nll_loss": 0.07763022929430008, "rewards/accuracies": 1.0, "rewards/chosen": -1.7175312677863985e-05, "rewards/margins": 0.19698314368724823, "rewards/rejected": -0.19700030982494354, "step": 12634 }, { "epoch": 8.73789764868603, "grad_norm": 3.843463182449341, "learning_rate": 7.011679729522054e-06, "log_odds_chosen": 11.393573760986328, "log_odds_ratio": -0.00019181430980097502, "logits/chosen": -0.39797934889793396, "logits/rejected": -0.5349745154380798, "logps/chosen": -0.00023400085046887398, "logps/rejected": -2.7963550090789795, "loss": 0.3211, "nll_loss": 0.08026115596294403, "rewards/accuracies": 1.0, "rewards/chosen": -2.3400087229674682e-05, "rewards/margins": 0.27961212396621704, "rewards/rejected": -0.279635488986969, "step": 12635 }, { "epoch": 8.738589211618258, "grad_norm": 3.8218467235565186, "learning_rate": 7.007837713231905e-06, "log_odds_chosen": 11.811062812805176, "log_odds_ratio": -8.913544661481865e-06, "logits/chosen": -0.33981579542160034, "logits/rejected": -0.4165121912956238, "logps/chosen": -0.00015810728655196726, "logps/rejected": -2.7615065574645996, "loss": 0.3937, "nll_loss": 0.09841378778219223, "rewards/accuracies": 1.0, "rewards/chosen": -1.5810728655196726e-05, "rewards/margins": 0.2761348485946655, "rewards/rejected": -0.276150643825531, "step": 12636 }, { "epoch": 8.739280774550483, "grad_norm": 3.311710834503174, "learning_rate": 7.003995696941755e-06, "log_odds_chosen": 11.325023651123047, "log_odds_ratio": -3.8699912693118677e-05, "logits/chosen": -0.4233155846595764, "logits/rejected": -0.43373045325279236, "logps/chosen": -0.000263809080934152, "logps/rejected": -2.1461169719696045, "loss": 0.3791, "nll_loss": 0.09477078914642334, "rewards/accuracies": 1.0, "rewards/chosen": -2.6380910640000366e-05, "rewards/margins": 0.2145853191614151, "rewards/rejected": -0.2146117091178894, "step": 12637 }, { "epoch": 8.739972337482712, "grad_norm": 4.0727715492248535, "learning_rate": 7.000153680651607e-06, "log_odds_chosen": 11.421920776367188, "log_odds_ratio": -5.327256803866476e-05, "logits/chosen": -0.4592892527580261, "logits/rejected": -0.5124595165252686, "logps/chosen": -0.00022929380065761507, "logps/rejected": -2.3530163764953613, "loss": 0.4648, "nll_loss": 0.11619154363870621, "rewards/accuracies": 1.0, "rewards/chosen": -2.2929380065761507e-05, "rewards/margins": 0.23527869582176208, "rewards/rejected": -0.2353016436100006, "step": 12638 }, { "epoch": 8.740663900414937, "grad_norm": 6.244078159332275, "learning_rate": 6.996311664361457e-06, "log_odds_chosen": 11.745616912841797, "log_odds_ratio": -9.599408076610416e-05, "logits/chosen": -0.004837028216570616, "logits/rejected": 0.05525718629360199, "logps/chosen": -0.0002785644610412419, "logps/rejected": -3.0801918506622314, "loss": 0.3365, "nll_loss": 0.08412357419729233, "rewards/accuracies": 1.0, "rewards/chosen": -2.7856445740326308e-05, "rewards/margins": 0.3079913258552551, "rewards/rejected": -0.3080191910266876, "step": 12639 }, { "epoch": 8.741355463347166, "grad_norm": 3.8222501277923584, "learning_rate": 6.992469648071308e-06, "log_odds_chosen": 12.153668403625488, "log_odds_ratio": -1.307290222030133e-05, "logits/chosen": -0.1858936995267868, "logits/rejected": -0.28293725848197937, "logps/chosen": -0.00013134724576957524, "logps/rejected": -2.8148396015167236, "loss": 0.6301, "nll_loss": 0.15752094984054565, "rewards/accuracies": 1.0, "rewards/chosen": -1.3134726032149047e-05, "rewards/margins": 0.2814708352088928, "rewards/rejected": -0.2814839482307434, "step": 12640 }, { "epoch": 8.74204702627939, "grad_norm": 3.784421682357788, "learning_rate": 6.9886276317811595e-06, "log_odds_chosen": 11.424051284790039, "log_odds_ratio": -7.20424868632108e-05, "logits/chosen": -0.2345329374074936, "logits/rejected": -0.20033228397369385, "logps/chosen": -0.00031157504417933524, "logps/rejected": -3.2700071334838867, "loss": 0.3724, "nll_loss": 0.09310246258974075, "rewards/accuracies": 1.0, "rewards/chosen": -3.1157505873125046e-05, "rewards/margins": 0.32696956396102905, "rewards/rejected": -0.3270007371902466, "step": 12641 }, { "epoch": 8.74273858921162, "grad_norm": 3.0729806423187256, "learning_rate": 6.98478561549101e-06, "log_odds_chosen": 12.01394271850586, "log_odds_ratio": -2.7902418878511526e-05, "logits/chosen": -0.21632501482963562, "logits/rejected": -0.30397987365722656, "logps/chosen": -0.00028304714942350984, "logps/rejected": -3.192939519882202, "loss": 0.4473, "nll_loss": 0.11181334406137466, "rewards/accuracies": 1.0, "rewards/chosen": -2.830471385095734e-05, "rewards/margins": 0.3192656636238098, "rewards/rejected": -0.31929394602775574, "step": 12642 }, { "epoch": 8.743430152143844, "grad_norm": 4.0299458503723145, "learning_rate": 6.980943599200861e-06, "log_odds_chosen": 12.166967391967773, "log_odds_ratio": -1.8124801499652676e-05, "logits/chosen": -0.19768285751342773, "logits/rejected": -0.19605018198490143, "logps/chosen": -8.931868069339544e-05, "logps/rejected": -2.820674419403076, "loss": 0.4265, "nll_loss": 0.10661443322896957, "rewards/accuracies": 1.0, "rewards/chosen": -8.931868251238484e-06, "rewards/margins": 0.2820585072040558, "rewards/rejected": -0.2820674479007721, "step": 12643 }, { "epoch": 8.744121715076073, "grad_norm": 4.370787143707275, "learning_rate": 6.9771015829107126e-06, "log_odds_chosen": 9.561683654785156, "log_odds_ratio": -0.0007459899061359465, "logits/chosen": -0.4039541184902191, "logits/rejected": -0.4170818328857422, "logps/chosen": -0.0006070085219107568, "logps/rejected": -1.8454440832138062, "loss": 0.5765, "nll_loss": 0.14405278861522675, "rewards/accuracies": 1.0, "rewards/chosen": -6.070085510145873e-05, "rewards/margins": 0.18448373675346375, "rewards/rejected": -0.1845444142818451, "step": 12644 }, { "epoch": 8.744813278008298, "grad_norm": 3.183621406555176, "learning_rate": 6.973259566620563e-06, "log_odds_chosen": 11.528457641601562, "log_odds_ratio": -2.550122735556215e-05, "logits/chosen": -0.5070438981056213, "logits/rejected": -0.5771378874778748, "logps/chosen": -0.00021793476480524987, "logps/rejected": -2.513467311859131, "loss": 0.3878, "nll_loss": 0.09694357216358185, "rewards/accuracies": 1.0, "rewards/chosen": -2.179347757191863e-05, "rewards/margins": 0.25132498145103455, "rewards/rejected": -0.25134676694869995, "step": 12645 }, { "epoch": 8.745504840940526, "grad_norm": 4.302793025970459, "learning_rate": 6.969417550330413e-06, "log_odds_chosen": 10.690472602844238, "log_odds_ratio": -0.00014246918726712465, "logits/chosen": 0.21797120571136475, "logits/rejected": 0.06780949980020523, "logps/chosen": -0.0004303157329559326, "logps/rejected": -2.490640878677368, "loss": 0.4077, "nll_loss": 0.10190653800964355, "rewards/accuracies": 1.0, "rewards/chosen": -4.303157766116783e-05, "rewards/margins": 0.24902105331420898, "rewards/rejected": -0.24906408786773682, "step": 12646 }, { "epoch": 8.746196403872752, "grad_norm": 4.100922107696533, "learning_rate": 6.965575534040264e-06, "log_odds_chosen": 9.01432991027832, "log_odds_ratio": -0.0014760670019313693, "logits/chosen": -0.33062905073165894, "logits/rejected": -0.38164985179901123, "logps/chosen": -0.0009245839901268482, "logps/rejected": -1.5423380136489868, "loss": 0.3171, "nll_loss": 0.07912150025367737, "rewards/accuracies": 1.0, "rewards/chosen": -9.245839464711025e-05, "rewards/margins": 0.1541413515806198, "rewards/rejected": -0.15423379838466644, "step": 12647 }, { "epoch": 8.74688796680498, "grad_norm": 10.031594276428223, "learning_rate": 6.9617335177501156e-06, "log_odds_chosen": 11.902109146118164, "log_odds_ratio": -2.4353628759854473e-05, "logits/chosen": -0.010901231318712234, "logits/rejected": -0.11041456460952759, "logps/chosen": -0.0003525819047354162, "logps/rejected": -3.04818058013916, "loss": 0.6412, "nll_loss": 0.1602867841720581, "rewards/accuracies": 1.0, "rewards/chosen": -3.525819556671195e-05, "rewards/margins": 0.30478280782699585, "rewards/rejected": -0.3048180639743805, "step": 12648 }, { "epoch": 8.747579529737205, "grad_norm": 2.9875195026397705, "learning_rate": 6.957891501459966e-06, "log_odds_chosen": 10.417152404785156, "log_odds_ratio": -0.0002167121128877625, "logits/chosen": -0.4560806155204773, "logits/rejected": -0.5182000994682312, "logps/chosen": -0.00053954723989591, "logps/rejected": -2.046635389328003, "loss": 0.3537, "nll_loss": 0.0883985161781311, "rewards/accuracies": 1.0, "rewards/chosen": -5.3954725444782525e-05, "rewards/margins": 0.20460957288742065, "rewards/rejected": -0.20466352999210358, "step": 12649 }, { "epoch": 8.748271092669434, "grad_norm": 3.9878203868865967, "learning_rate": 6.954049485169817e-06, "log_odds_chosen": 11.966552734375, "log_odds_ratio": -5.745194721384905e-05, "logits/chosen": -0.5101600289344788, "logits/rejected": -0.6078588962554932, "logps/chosen": -0.00016526752733625472, "logps/rejected": -3.0164387226104736, "loss": 0.3714, "nll_loss": 0.09284399449825287, "rewards/accuracies": 1.0, "rewards/chosen": -1.6526753825019114e-05, "rewards/margins": 0.30162736773490906, "rewards/rejected": -0.30164390802383423, "step": 12650 }, { "epoch": 8.748962655601659, "grad_norm": 3.7651588916778564, "learning_rate": 6.950207468879669e-06, "log_odds_chosen": 11.419900894165039, "log_odds_ratio": -2.6444653485668823e-05, "logits/chosen": -0.3522256016731262, "logits/rejected": -0.42689967155456543, "logps/chosen": -0.00010521085641812533, "logps/rejected": -2.185702323913574, "loss": 0.2686, "nll_loss": 0.06714515388011932, "rewards/accuracies": 1.0, "rewards/chosen": -1.0521085641812533e-05, "rewards/margins": 0.21855969727039337, "rewards/rejected": -0.21857021749019623, "step": 12651 }, { "epoch": 8.749654218533887, "grad_norm": 2.433018922805786, "learning_rate": 6.946365452589519e-06, "log_odds_chosen": 10.421948432922363, "log_odds_ratio": -0.00015964567137416452, "logits/chosen": -0.5980747938156128, "logits/rejected": -0.6149604320526123, "logps/chosen": -0.0002093097718898207, "logps/rejected": -1.6080046892166138, "loss": 0.2819, "nll_loss": 0.07044967263936996, "rewards/accuracies": 1.0, "rewards/chosen": -2.0930976461386308e-05, "rewards/margins": 0.16077953577041626, "rewards/rejected": -0.16080045700073242, "step": 12652 }, { "epoch": 8.750345781466113, "grad_norm": 3.1320106983184814, "learning_rate": 6.942523436299369e-06, "log_odds_chosen": 10.990400314331055, "log_odds_ratio": -0.00020457607752177864, "logits/chosen": -0.2756420969963074, "logits/rejected": -0.2904963195323944, "logps/chosen": -0.00015380124386865646, "logps/rejected": -1.4968993663787842, "loss": 0.3821, "nll_loss": 0.09550082683563232, "rewards/accuracies": 1.0, "rewards/chosen": -1.5380124750663526e-05, "rewards/margins": 0.14967454969882965, "rewards/rejected": -0.1496899425983429, "step": 12653 }, { "epoch": 8.751037344398341, "grad_norm": 5.653360366821289, "learning_rate": 6.938681420009222e-06, "log_odds_chosen": 11.22561264038086, "log_odds_ratio": -8.661947504151613e-05, "logits/chosen": -0.484736829996109, "logits/rejected": -0.5598161220550537, "logps/chosen": -0.0004911787691526115, "logps/rejected": -3.0218851566314697, "loss": 0.9148, "nll_loss": 0.22868333756923676, "rewards/accuracies": 1.0, "rewards/chosen": -4.9117879825644195e-05, "rewards/margins": 0.30213940143585205, "rewards/rejected": -0.302188515663147, "step": 12654 }, { "epoch": 8.751728907330566, "grad_norm": 3.8202199935913086, "learning_rate": 6.934839403719072e-06, "log_odds_chosen": 11.486774444580078, "log_odds_ratio": -0.0001702620356809348, "logits/chosen": -0.17967629432678223, "logits/rejected": -0.17935806512832642, "logps/chosen": -0.00038216577377170324, "logps/rejected": -2.837064266204834, "loss": 0.3708, "nll_loss": 0.0926813930273056, "rewards/accuracies": 1.0, "rewards/chosen": -3.8216578104766086e-05, "rewards/margins": 0.2836682200431824, "rewards/rejected": -0.2837064266204834, "step": 12655 }, { "epoch": 8.752420470262795, "grad_norm": 3.8114304542541504, "learning_rate": 6.930997387428922e-06, "log_odds_chosen": 10.966930389404297, "log_odds_ratio": -3.6104695027461275e-05, "logits/chosen": -0.33328402042388916, "logits/rejected": -0.40690383315086365, "logps/chosen": -0.00014368303527589887, "logps/rejected": -1.8688400983810425, "loss": 0.4061, "nll_loss": 0.1015223041176796, "rewards/accuracies": 1.0, "rewards/chosen": -1.4368303709488828e-05, "rewards/margins": 0.18686963617801666, "rewards/rejected": -0.18688401579856873, "step": 12656 }, { "epoch": 8.75311203319502, "grad_norm": 3.9191980361938477, "learning_rate": 6.927155371138774e-06, "log_odds_chosen": 12.820165634155273, "log_odds_ratio": -7.132274731702637e-06, "logits/chosen": -0.6197792887687683, "logits/rejected": -0.637600302696228, "logps/chosen": -8.839755173539743e-05, "logps/rejected": -3.3203587532043457, "loss": 0.3296, "nll_loss": 0.0824105441570282, "rewards/accuracies": 1.0, "rewards/chosen": -8.839757356327027e-06, "rewards/margins": 0.33202704787254333, "rewards/rejected": -0.3320358693599701, "step": 12657 }, { "epoch": 8.753803596127248, "grad_norm": 4.134014129638672, "learning_rate": 6.923313354848625e-06, "log_odds_chosen": 11.503602981567383, "log_odds_ratio": -2.0928862795699388e-05, "logits/chosen": -0.5688618421554565, "logits/rejected": -0.6299799680709839, "logps/chosen": -0.00018121409812010825, "logps/rejected": -1.9686450958251953, "loss": 0.3294, "nll_loss": 0.08235260099172592, "rewards/accuracies": 1.0, "rewards/chosen": -1.8121409084415063e-05, "rewards/margins": 0.1968463659286499, "rewards/rejected": -0.196864515542984, "step": 12658 }, { "epoch": 8.754495159059474, "grad_norm": 3.557575225830078, "learning_rate": 6.9194713385584755e-06, "log_odds_chosen": 11.22537612915039, "log_odds_ratio": -3.842857404379174e-05, "logits/chosen": -0.21988216042518616, "logits/rejected": -0.36144596338272095, "logps/chosen": -0.0005637517315335572, "logps/rejected": -2.552690267562866, "loss": 0.3812, "nll_loss": 0.09529484063386917, "rewards/accuracies": 1.0, "rewards/chosen": -5.637517460854724e-05, "rewards/margins": 0.255212664604187, "rewards/rejected": -0.25526905059814453, "step": 12659 }, { "epoch": 8.755186721991702, "grad_norm": 3.9204976558685303, "learning_rate": 6.915629322268327e-06, "log_odds_chosen": 12.630598068237305, "log_odds_ratio": -1.0285146345268004e-05, "logits/chosen": -0.6050891280174255, "logits/rejected": -0.6181715130805969, "logps/chosen": -0.00013520887296181172, "logps/rejected": -3.6140971183776855, "loss": 0.3421, "nll_loss": 0.08551633358001709, "rewards/accuracies": 1.0, "rewards/chosen": -1.3520888387574814e-05, "rewards/margins": 0.3613961637020111, "rewards/rejected": -0.3614096939563751, "step": 12660 }, { "epoch": 8.755878284923927, "grad_norm": 2.353888750076294, "learning_rate": 6.911787305978178e-06, "log_odds_chosen": 8.957233428955078, "log_odds_ratio": -0.0003664310206659138, "logits/chosen": -0.26996999979019165, "logits/rejected": -0.34410420060157776, "logps/chosen": -0.000894219963811338, "logps/rejected": -1.4619256258010864, "loss": 0.2453, "nll_loss": 0.0612957589328289, "rewards/accuracies": 1.0, "rewards/chosen": -8.942200656747445e-05, "rewards/margins": 0.1461031436920166, "rewards/rejected": -0.14619258046150208, "step": 12661 }, { "epoch": 8.756569847856156, "grad_norm": 4.985196590423584, "learning_rate": 6.907945289688028e-06, "log_odds_chosen": 11.134862899780273, "log_odds_ratio": -4.339057340985164e-05, "logits/chosen": -0.422914981842041, "logits/rejected": -0.5093221068382263, "logps/chosen": -0.0001525114494143054, "logps/rejected": -2.2537786960601807, "loss": 0.4558, "nll_loss": 0.11395804584026337, "rewards/accuracies": 1.0, "rewards/chosen": -1.5251144759531599e-05, "rewards/margins": 0.2253626137971878, "rewards/rejected": -0.2253778576850891, "step": 12662 }, { "epoch": 8.75726141078838, "grad_norm": 4.657219886779785, "learning_rate": 6.90410327339788e-06, "log_odds_chosen": 11.336843490600586, "log_odds_ratio": -3.69651970686391e-05, "logits/chosen": -0.07278013229370117, "logits/rejected": -0.4056493639945984, "logps/chosen": -0.00011815188190666959, "logps/rejected": -1.9549498558044434, "loss": 0.3547, "nll_loss": 0.08867579698562622, "rewards/accuracies": 1.0, "rewards/chosen": -1.1815188372565899e-05, "rewards/margins": 0.19548316299915314, "rewards/rejected": -0.19549499452114105, "step": 12663 }, { "epoch": 8.75795297372061, "grad_norm": 3.1503381729125977, "learning_rate": 6.90026125710773e-06, "log_odds_chosen": 11.625396728515625, "log_odds_ratio": -2.158879578928463e-05, "logits/chosen": -0.19551731646060944, "logits/rejected": -0.27250638604164124, "logps/chosen": -0.00017402239609509706, "logps/rejected": -2.6546432971954346, "loss": 0.2782, "nll_loss": 0.06954455375671387, "rewards/accuracies": 1.0, "rewards/chosen": -1.7402238881913945e-05, "rewards/margins": 0.2654469311237335, "rewards/rejected": -0.26546433568000793, "step": 12664 }, { "epoch": 8.758644536652834, "grad_norm": 6.812473297119141, "learning_rate": 6.896419240817581e-06, "log_odds_chosen": 12.126462936401367, "log_odds_ratio": -2.7259564376436174e-05, "logits/chosen": -0.6818879842758179, "logits/rejected": -0.7184396982192993, "logps/chosen": -0.00011000910308212042, "logps/rejected": -2.533221960067749, "loss": 0.3746, "nll_loss": 0.09363803267478943, "rewards/accuracies": 1.0, "rewards/chosen": -1.1000910490110982e-05, "rewards/margins": 0.2533112168312073, "rewards/rejected": -0.25332218408584595, "step": 12665 }, { "epoch": 8.759336099585063, "grad_norm": 3.495081901550293, "learning_rate": 6.8925772245274324e-06, "log_odds_chosen": 10.932785034179688, "log_odds_ratio": -6.819087866460904e-05, "logits/chosen": -0.6123518943786621, "logits/rejected": -0.5373475551605225, "logps/chosen": -0.0001256611431017518, "logps/rejected": -2.137763738632202, "loss": 0.337, "nll_loss": 0.08423256874084473, "rewards/accuracies": 1.0, "rewards/chosen": -1.2566115401568823e-05, "rewards/margins": 0.21376380324363708, "rewards/rejected": -0.2137763649225235, "step": 12666 }, { "epoch": 8.760027662517288, "grad_norm": 2.8406386375427246, "learning_rate": 6.888735208237283e-06, "log_odds_chosen": 11.421842575073242, "log_odds_ratio": -2.9473754693754017e-05, "logits/chosen": -0.24375933408737183, "logits/rejected": -0.04970206320285797, "logps/chosen": -0.00015173686551861465, "logps/rejected": -2.3240654468536377, "loss": 0.3222, "nll_loss": 0.0805358737707138, "rewards/accuracies": 1.0, "rewards/chosen": -1.5173687643255107e-05, "rewards/margins": 0.2323913723230362, "rewards/rejected": -0.23240655660629272, "step": 12667 }, { "epoch": 8.760719225449517, "grad_norm": 7.1928935050964355, "learning_rate": 6.884893191947134e-06, "log_odds_chosen": 11.925975799560547, "log_odds_ratio": -1.749107650539372e-05, "logits/chosen": -0.6639752984046936, "logits/rejected": -0.6779666543006897, "logps/chosen": -7.219088001875207e-05, "logps/rejected": -2.313953399658203, "loss": 0.3397, "nll_loss": 0.08491578698158264, "rewards/accuracies": 1.0, "rewards/chosen": -7.219087365228916e-06, "rewards/margins": 0.23138810694217682, "rewards/rejected": -0.23139533400535583, "step": 12668 }, { "epoch": 8.761410788381742, "grad_norm": 5.088903427124023, "learning_rate": 6.8810511756569855e-06, "log_odds_chosen": 10.844564437866211, "log_odds_ratio": -4.963607352692634e-05, "logits/chosen": -0.06909926980733871, "logits/rejected": -0.11633356660604477, "logps/chosen": -0.00018518233264330775, "logps/rejected": -2.1275336742401123, "loss": 0.5095, "nll_loss": 0.1273745894432068, "rewards/accuracies": 1.0, "rewards/chosen": -1.8518232536735013e-05, "rewards/margins": 0.21273484826087952, "rewards/rejected": -0.21275337040424347, "step": 12669 }, { "epoch": 8.76210235131397, "grad_norm": 5.377123832702637, "learning_rate": 6.877209159366836e-06, "log_odds_chosen": 12.353730201721191, "log_odds_ratio": -8.520941264578141e-06, "logits/chosen": -0.5854908227920532, "logits/rejected": -0.6783852577209473, "logps/chosen": -8.955941302701831e-05, "logps/rejected": -2.8780431747436523, "loss": 0.5907, "nll_loss": 0.14767661690711975, "rewards/accuracies": 1.0, "rewards/chosen": -8.95594075700501e-06, "rewards/margins": 0.28779539465904236, "rewards/rejected": -0.28780436515808105, "step": 12670 }, { "epoch": 8.762793914246195, "grad_norm": 2.7313125133514404, "learning_rate": 6.873367143076687e-06, "log_odds_chosen": 11.512551307678223, "log_odds_ratio": -2.1834759536432102e-05, "logits/chosen": -0.3850710988044739, "logits/rejected": -0.48258426785469055, "logps/chosen": -0.00015986453217919916, "logps/rejected": -2.486776351928711, "loss": 0.2892, "nll_loss": 0.07229001820087433, "rewards/accuracies": 1.0, "rewards/chosen": -1.5986453945515677e-05, "rewards/margins": 0.24866165220737457, "rewards/rejected": -0.24867764115333557, "step": 12671 }, { "epoch": 8.763485477178424, "grad_norm": 3.895965814590454, "learning_rate": 6.869525126786539e-06, "log_odds_chosen": 10.890059471130371, "log_odds_ratio": -4.000085391453467e-05, "logits/chosen": -0.42519626021385193, "logits/rejected": -0.6218904256820679, "logps/chosen": -0.0002847716968972236, "logps/rejected": -2.296088218688965, "loss": 0.379, "nll_loss": 0.09474711120128632, "rewards/accuracies": 1.0, "rewards/chosen": -2.8477168598328717e-05, "rewards/margins": 0.229580357670784, "rewards/rejected": -0.22960881888866425, "step": 12672 }, { "epoch": 8.76417704011065, "grad_norm": 3.6324892044067383, "learning_rate": 6.865683110496389e-06, "log_odds_chosen": 11.511186599731445, "log_odds_ratio": -2.8699216272798367e-05, "logits/chosen": -0.6761150360107422, "logits/rejected": -0.6927566528320312, "logps/chosen": -0.0001163781707873568, "logps/rejected": -1.9792041778564453, "loss": 0.3162, "nll_loss": 0.07904962450265884, "rewards/accuracies": 1.0, "rewards/chosen": -1.1637816896836739e-05, "rewards/margins": 0.19790878891944885, "rewards/rejected": -0.19792042672634125, "step": 12673 }, { "epoch": 8.764868603042878, "grad_norm": 3.701308488845825, "learning_rate": 6.861841094206239e-06, "log_odds_chosen": 11.138769149780273, "log_odds_ratio": -0.00015294156037271023, "logits/chosen": -0.4651656150817871, "logits/rejected": -0.5149001479148865, "logps/chosen": -0.00016836769646033645, "logps/rejected": -2.1938812732696533, "loss": 0.3896, "nll_loss": 0.09738843142986298, "rewards/accuracies": 1.0, "rewards/chosen": -1.683676782704424e-05, "rewards/margins": 0.21937128901481628, "rewards/rejected": -0.21938814222812653, "step": 12674 }, { "epoch": 8.765560165975103, "grad_norm": 4.318880081176758, "learning_rate": 6.857999077916092e-06, "log_odds_chosen": 11.026379585266113, "log_odds_ratio": -0.00032751416438259184, "logits/chosen": -0.38952934741973877, "logits/rejected": -0.44622597098350525, "logps/chosen": -0.001070479047484696, "logps/rejected": -2.7773513793945312, "loss": 0.3938, "nll_loss": 0.09841464459896088, "rewards/accuracies": 1.0, "rewards/chosen": -0.00010704790474846959, "rewards/margins": 0.277628093957901, "rewards/rejected": -0.2777351438999176, "step": 12675 }, { "epoch": 8.766251728907331, "grad_norm": 4.59893798828125, "learning_rate": 6.854157061625942e-06, "log_odds_chosen": 10.647390365600586, "log_odds_ratio": -8.746745879761875e-05, "logits/chosen": -0.607392430305481, "logits/rejected": -0.6704075336456299, "logps/chosen": -0.000778252724558115, "logps/rejected": -2.5483808517456055, "loss": 0.4577, "nll_loss": 0.11440734565258026, "rewards/accuracies": 1.0, "rewards/chosen": -7.78252724558115e-05, "rewards/margins": 0.2547602653503418, "rewards/rejected": -0.25483807921409607, "step": 12676 }, { "epoch": 8.766943291839558, "grad_norm": 3.0734097957611084, "learning_rate": 6.850315045335792e-06, "log_odds_chosen": 11.056943893432617, "log_odds_ratio": -2.5260575057473034e-05, "logits/chosen": -0.47094714641571045, "logits/rejected": -0.5174641013145447, "logps/chosen": -0.00048814882757142186, "logps/rejected": -2.6240057945251465, "loss": 0.3099, "nll_loss": 0.07747091352939606, "rewards/accuracies": 1.0, "rewards/chosen": -4.8814887122716755e-05, "rewards/margins": 0.26235175132751465, "rewards/rejected": -0.2624005675315857, "step": 12677 }, { "epoch": 8.767634854771785, "grad_norm": 3.880401849746704, "learning_rate": 6.846473029045644e-06, "log_odds_chosen": 11.165346145629883, "log_odds_ratio": -3.870287036988884e-05, "logits/chosen": -0.4656296372413635, "logits/rejected": -0.5163675546646118, "logps/chosen": -0.0003671708982437849, "logps/rejected": -2.0970325469970703, "loss": 0.4177, "nll_loss": 0.10441119223833084, "rewards/accuracies": 1.0, "rewards/chosen": -3.671709418995306e-05, "rewards/margins": 0.20966653525829315, "rewards/rejected": -0.2097032368183136, "step": 12678 }, { "epoch": 8.768326417704012, "grad_norm": 4.935536861419678, "learning_rate": 6.842631012755495e-06, "log_odds_chosen": 11.351346969604492, "log_odds_ratio": -9.1105917817913e-05, "logits/chosen": 0.022925205528736115, "logits/rejected": -0.13815544545650482, "logps/chosen": -0.00024218103499151766, "logps/rejected": -2.6840310096740723, "loss": 0.525, "nll_loss": 0.1312437355518341, "rewards/accuracies": 1.0, "rewards/chosen": -2.4218103135353886e-05, "rewards/margins": 0.2683789134025574, "rewards/rejected": -0.2684031128883362, "step": 12679 }, { "epoch": 8.769017980636239, "grad_norm": 3.416334867477417, "learning_rate": 6.8387889964653455e-06, "log_odds_chosen": 11.296628952026367, "log_odds_ratio": -0.0004493595624808222, "logits/chosen": -0.4751221239566803, "logits/rejected": -0.5552091598510742, "logps/chosen": -0.00039027677848935127, "logps/rejected": -2.32190203666687, "loss": 0.5385, "nll_loss": 0.1345841884613037, "rewards/accuracies": 1.0, "rewards/chosen": -3.902768366970122e-05, "rewards/margins": 0.23215121030807495, "rewards/rejected": -0.23219022154808044, "step": 12680 }, { "epoch": 8.769709543568466, "grad_norm": 4.126625061035156, "learning_rate": 6.834946980175195e-06, "log_odds_chosen": 11.260299682617188, "log_odds_ratio": -6.328269228106365e-05, "logits/chosen": -0.07310329377651215, "logits/rejected": 0.08745068311691284, "logps/chosen": -0.00016332468658220023, "logps/rejected": -2.020930051803589, "loss": 0.3832, "nll_loss": 0.09578186273574829, "rewards/accuracies": 1.0, "rewards/chosen": -1.633246756682638e-05, "rewards/margins": 0.20207667350769043, "rewards/rejected": -0.2020930051803589, "step": 12681 }, { "epoch": 8.770401106500692, "grad_norm": 5.452424049377441, "learning_rate": 6.831104963885048e-06, "log_odds_chosen": 12.215993881225586, "log_odds_ratio": -1.5033554518595338e-05, "logits/chosen": -0.1208108440041542, "logits/rejected": -0.15237045288085938, "logps/chosen": -0.0002600625157356262, "logps/rejected": -3.15916109085083, "loss": 0.6392, "nll_loss": 0.15979792177677155, "rewards/accuracies": 1.0, "rewards/chosen": -2.6006251573562622e-05, "rewards/margins": 0.3158901035785675, "rewards/rejected": -0.31591612100601196, "step": 12682 }, { "epoch": 8.77109266943292, "grad_norm": 5.0705342292785645, "learning_rate": 6.827262947594898e-06, "log_odds_chosen": 10.764846801757812, "log_odds_ratio": -0.00030922293080948293, "logits/chosen": -0.9068706035614014, "logits/rejected": -0.8894181251525879, "logps/chosen": -0.00034139491617679596, "logps/rejected": -2.2270474433898926, "loss": 0.4354, "nll_loss": 0.1088085025548935, "rewards/accuracies": 1.0, "rewards/chosen": -3.41394952556584e-05, "rewards/margins": 0.22267059981822968, "rewards/rejected": -0.22270473837852478, "step": 12683 }, { "epoch": 8.771784232365146, "grad_norm": 3.2080435752868652, "learning_rate": 6.8234209313047485e-06, "log_odds_chosen": 11.981725692749023, "log_odds_ratio": -1.5340634490712546e-05, "logits/chosen": -0.30534428358078003, "logits/rejected": -0.4273209869861603, "logps/chosen": -0.0001579856761964038, "logps/rejected": -2.5716822147369385, "loss": 0.3241, "nll_loss": 0.08102867007255554, "rewards/accuracies": 1.0, "rewards/chosen": -1.579856689204462e-05, "rewards/margins": 0.25715240836143494, "rewards/rejected": -0.2571682035923004, "step": 12684 }, { "epoch": 8.772475795297373, "grad_norm": 2.746500015258789, "learning_rate": 6.8195789150146e-06, "log_odds_chosen": 11.367488861083984, "log_odds_ratio": -4.743778481497429e-05, "logits/chosen": -0.10710550844669342, "logits/rejected": -0.1570693403482437, "logps/chosen": -0.0003469569201115519, "logps/rejected": -2.4717087745666504, "loss": 0.2992, "nll_loss": 0.07480612397193909, "rewards/accuracies": 1.0, "rewards/chosen": -3.469569128355943e-05, "rewards/margins": 0.2471361756324768, "rewards/rejected": -0.24717086553573608, "step": 12685 }, { "epoch": 8.7731673582296, "grad_norm": 3.9325008392333984, "learning_rate": 6.815736898724451e-06, "log_odds_chosen": 11.478471755981445, "log_odds_ratio": -4.558680666377768e-05, "logits/chosen": -0.07658401131629944, "logits/rejected": -0.29487428069114685, "logps/chosen": -0.00018664757953956723, "logps/rejected": -2.660466194152832, "loss": 0.456, "nll_loss": 0.11400678753852844, "rewards/accuracies": 1.0, "rewards/chosen": -1.8664759409148246e-05, "rewards/margins": 0.26602795720100403, "rewards/rejected": -0.2660466432571411, "step": 12686 }, { "epoch": 8.773858921161827, "grad_norm": 4.728855609893799, "learning_rate": 6.8118948824343016e-06, "log_odds_chosen": 11.943270683288574, "log_odds_ratio": -2.3963817511685193e-05, "logits/chosen": 0.271952748298645, "logits/rejected": 0.23709207773208618, "logps/chosen": -0.00030409041210077703, "logps/rejected": -3.301213264465332, "loss": 0.5569, "nll_loss": 0.13921433687210083, "rewards/accuracies": 1.0, "rewards/chosen": -3.0409040846279822e-05, "rewards/margins": 0.3300909101963043, "rewards/rejected": -0.33012130856513977, "step": 12687 }, { "epoch": 8.774550484094053, "grad_norm": 4.821633815765381, "learning_rate": 6.808052866144153e-06, "log_odds_chosen": 10.78215503692627, "log_odds_ratio": -0.00015306704153772444, "logits/chosen": -0.5289968252182007, "logits/rejected": -0.5817938446998596, "logps/chosen": -0.00021679667406715453, "logps/rejected": -1.9820137023925781, "loss": 0.3593, "nll_loss": 0.08979903161525726, "rewards/accuracies": 1.0, "rewards/chosen": -2.1679668861906976e-05, "rewards/margins": 0.198179692029953, "rewards/rejected": -0.19820138812065125, "step": 12688 }, { "epoch": 8.77524204702628, "grad_norm": 4.591336727142334, "learning_rate": 6.804210849854004e-06, "log_odds_chosen": 11.119473457336426, "log_odds_ratio": -3.22980122291483e-05, "logits/chosen": -0.4589541554450989, "logits/rejected": -0.4797210097312927, "logps/chosen": -5.7230747188441455e-05, "logps/rejected": -1.5906126499176025, "loss": 0.433, "nll_loss": 0.10825856029987335, "rewards/accuracies": 1.0, "rewards/chosen": -5.723074991692556e-06, "rewards/margins": 0.15905553102493286, "rewards/rejected": -0.1590612530708313, "step": 12689 }, { "epoch": 8.775933609958507, "grad_norm": 2.532083511352539, "learning_rate": 6.800368833563854e-06, "log_odds_chosen": 10.686437606811523, "log_odds_ratio": -0.00012513159890659153, "logits/chosen": -0.4632389545440674, "logits/rejected": -0.608898401260376, "logps/chosen": -0.0006360713741742074, "logps/rejected": -1.9530318975448608, "loss": 0.3341, "nll_loss": 0.08351817727088928, "rewards/accuracies": 1.0, "rewards/chosen": -6.360714178299531e-05, "rewards/margins": 0.1952395737171173, "rewards/rejected": -0.19530320167541504, "step": 12690 }, { "epoch": 8.776625172890734, "grad_norm": 4.333111763000488, "learning_rate": 6.796526817273706e-06, "log_odds_chosen": 10.954401969909668, "log_odds_ratio": -5.6871689594117925e-05, "logits/chosen": -0.2808159589767456, "logits/rejected": -0.30360111594200134, "logps/chosen": -0.0002845522831194103, "logps/rejected": -2.3893260955810547, "loss": 0.4027, "nll_loss": 0.10066375136375427, "rewards/accuracies": 1.0, "rewards/chosen": -2.845522976713255e-05, "rewards/margins": 0.2389041632413864, "rewards/rejected": -0.23893260955810547, "step": 12691 }, { "epoch": 8.77731673582296, "grad_norm": 2.768235445022583, "learning_rate": 6.792684800983556e-06, "log_odds_chosen": 11.034823417663574, "log_odds_ratio": -2.2319993149721995e-05, "logits/chosen": -0.24669495224952698, "logits/rejected": -0.46884268522262573, "logps/chosen": -0.00020692654652521014, "logps/rejected": -2.3289706707000732, "loss": 0.3428, "nll_loss": 0.08570310473442078, "rewards/accuracies": 1.0, "rewards/chosen": -2.0692654288723134e-05, "rewards/margins": 0.23287639021873474, "rewards/rejected": -0.232897087931633, "step": 12692 }, { "epoch": 8.778008298755188, "grad_norm": 2.6725335121154785, "learning_rate": 6.788842784693407e-06, "log_odds_chosen": 10.67829418182373, "log_odds_ratio": -0.00024256770848296583, "logits/chosen": -0.3352501690387726, "logits/rejected": -0.44572025537490845, "logps/chosen": -0.0055665490217506886, "logps/rejected": -2.264678955078125, "loss": 0.3241, "nll_loss": 0.0809900239109993, "rewards/accuracies": 1.0, "rewards/chosen": -0.0005566548788920045, "rewards/margins": 0.2259112298488617, "rewards/rejected": -0.22646789252758026, "step": 12693 }, { "epoch": 8.778699861687414, "grad_norm": 3.7822775840759277, "learning_rate": 6.7850007684032585e-06, "log_odds_chosen": 11.56524658203125, "log_odds_ratio": -1.3682654753210954e-05, "logits/chosen": -0.3407299816608429, "logits/rejected": -0.44203171133995056, "logps/chosen": -0.00014287997328210622, "logps/rejected": -2.7055978775024414, "loss": 0.5001, "nll_loss": 0.12502838671207428, "rewards/accuracies": 1.0, "rewards/chosen": -1.4287997146311682e-05, "rewards/margins": 0.27054551243782043, "rewards/rejected": -0.27055978775024414, "step": 12694 }, { "epoch": 8.779391424619641, "grad_norm": 3.6747331619262695, "learning_rate": 6.781158752113109e-06, "log_odds_chosen": 10.785626411437988, "log_odds_ratio": -0.00014876978821121156, "logits/chosen": -0.5394725799560547, "logits/rejected": -0.572901725769043, "logps/chosen": -6.740719254594296e-05, "logps/rejected": -1.3617208003997803, "loss": 0.418, "nll_loss": 0.10448471456766129, "rewards/accuracies": 1.0, "rewards/chosen": -6.740719072695356e-06, "rewards/margins": 0.1361653357744217, "rewards/rejected": -0.1361720860004425, "step": 12695 }, { "epoch": 8.780082987551868, "grad_norm": 2.931102752685547, "learning_rate": 6.77731673582296e-06, "log_odds_chosen": 10.604806900024414, "log_odds_ratio": -3.93532682210207e-05, "logits/chosen": -0.4118063449859619, "logits/rejected": -0.4413941502571106, "logps/chosen": -0.00024991604732349515, "logps/rejected": -1.7044813632965088, "loss": 0.299, "nll_loss": 0.0747336596250534, "rewards/accuracies": 1.0, "rewards/chosen": -2.4991604732349515e-05, "rewards/margins": 0.17042315006256104, "rewards/rejected": -0.17044813930988312, "step": 12696 }, { "epoch": 8.780774550484095, "grad_norm": 3.015228509902954, "learning_rate": 6.773474719532812e-06, "log_odds_chosen": 11.405709266662598, "log_odds_ratio": -4.749331856146455e-05, "logits/chosen": -0.34732571244239807, "logits/rejected": -0.3698383867740631, "logps/chosen": -0.00031884806230664253, "logps/rejected": -2.771390438079834, "loss": 0.4244, "nll_loss": 0.10610456019639969, "rewards/accuracies": 1.0, "rewards/chosen": -3.1884806958260015e-05, "rewards/margins": 0.2771071791648865, "rewards/rejected": -0.2771390378475189, "step": 12697 }, { "epoch": 8.781466113416322, "grad_norm": 3.2666025161743164, "learning_rate": 6.769632703242662e-06, "log_odds_chosen": 10.806560516357422, "log_odds_ratio": -4.139018710702658e-05, "logits/chosen": -0.33733034133911133, "logits/rejected": -0.3503572344779968, "logps/chosen": -0.00016613573825452477, "logps/rejected": -2.0087902545928955, "loss": 0.5032, "nll_loss": 0.1257835030555725, "rewards/accuracies": 1.0, "rewards/chosen": -1.6613574189250357e-05, "rewards/margins": 0.20086240768432617, "rewards/rejected": -0.2008790373802185, "step": 12698 }, { "epoch": 8.782157676348548, "grad_norm": 5.432217121124268, "learning_rate": 6.765790686952512e-06, "log_odds_chosen": 10.60544204711914, "log_odds_ratio": -7.107143756002188e-05, "logits/chosen": -0.37671369314193726, "logits/rejected": -0.36169812083244324, "logps/chosen": -0.0004219827533233911, "logps/rejected": -2.247098445892334, "loss": 0.305, "nll_loss": 0.07624978572130203, "rewards/accuracies": 1.0, "rewards/chosen": -4.219827678753063e-05, "rewards/margins": 0.22466763854026794, "rewards/rejected": -0.22470983862876892, "step": 12699 }, { "epoch": 8.782849239280775, "grad_norm": 3.230746269226074, "learning_rate": 6.761948670662365e-06, "log_odds_chosen": 11.212976455688477, "log_odds_ratio": -2.6032004825538024e-05, "logits/chosen": -0.11250437796115875, "logits/rejected": -0.10483334958553314, "logps/chosen": -0.00019747507758438587, "logps/rejected": -1.8300416469573975, "loss": 0.2308, "nll_loss": 0.05768878012895584, "rewards/accuracies": 1.0, "rewards/chosen": -1.9747507394640706e-05, "rewards/margins": 0.18298442661762238, "rewards/rejected": -0.18300415575504303, "step": 12700 }, { "epoch": 8.783540802213002, "grad_norm": 2.508547782897949, "learning_rate": 6.758106654372215e-06, "log_odds_chosen": 9.457145690917969, "log_odds_ratio": -0.0013111267471686006, "logits/chosen": -0.25359445810317993, "logits/rejected": -0.09427622705698013, "logps/chosen": -0.0009612166322767735, "logps/rejected": -1.4299101829528809, "loss": 0.3206, "nll_loss": 0.08000911772251129, "rewards/accuracies": 1.0, "rewards/chosen": -9.612165740691125e-05, "rewards/margins": 0.1428948938846588, "rewards/rejected": -0.14299100637435913, "step": 12701 }, { "epoch": 8.784232365145229, "grad_norm": 4.50275993347168, "learning_rate": 6.754264638082065e-06, "log_odds_chosen": 11.06375789642334, "log_odds_ratio": -3.1684601708548144e-05, "logits/chosen": -0.05529871582984924, "logits/rejected": -0.04770771414041519, "logps/chosen": -0.00018408260075375438, "logps/rejected": -2.2451171875, "loss": 0.488, "nll_loss": 0.12198619544506073, "rewards/accuracies": 1.0, "rewards/chosen": -1.8408260075375438e-05, "rewards/margins": 0.22449329495429993, "rewards/rejected": -0.22451171278953552, "step": 12702 }, { "epoch": 8.784923928077456, "grad_norm": 3.462139844894409, "learning_rate": 6.750422621791917e-06, "log_odds_chosen": 11.399433135986328, "log_odds_ratio": -1.8106655261362903e-05, "logits/chosen": -0.27013203501701355, "logits/rejected": -0.31153810024261475, "logps/chosen": -0.00017975937225855887, "logps/rejected": -2.3728885650634766, "loss": 0.4365, "nll_loss": 0.10911567509174347, "rewards/accuracies": 1.0, "rewards/chosen": -1.797593904484529e-05, "rewards/margins": 0.23727090656757355, "rewards/rejected": -0.23728886246681213, "step": 12703 }, { "epoch": 8.785615491009683, "grad_norm": 3.590181589126587, "learning_rate": 6.746580605501768e-06, "log_odds_chosen": 12.149531364440918, "log_odds_ratio": -1.190086368296761e-05, "logits/chosen": -0.37782835960388184, "logits/rejected": -0.43353283405303955, "logps/chosen": -7.747412018943578e-05, "logps/rejected": -2.699915885925293, "loss": 0.3816, "nll_loss": 0.09540417790412903, "rewards/accuracies": 1.0, "rewards/chosen": -7.747412382741459e-06, "rewards/margins": 0.26998385787010193, "rewards/rejected": -0.26999160647392273, "step": 12704 }, { "epoch": 8.78630705394191, "grad_norm": 2.9558005332946777, "learning_rate": 6.7427385892116184e-06, "log_odds_chosen": 12.330540657043457, "log_odds_ratio": -7.616674338351004e-06, "logits/chosen": -0.32497966289520264, "logits/rejected": -0.3744075894355774, "logps/chosen": -0.00010802918404806405, "logps/rejected": -3.1242334842681885, "loss": 0.4138, "nll_loss": 0.10344050079584122, "rewards/accuracies": 1.0, "rewards/chosen": -1.0802918950503226e-05, "rewards/margins": 0.3124125301837921, "rewards/rejected": -0.31242334842681885, "step": 12705 }, { "epoch": 8.786998616874136, "grad_norm": 3.709592580795288, "learning_rate": 6.73889657292147e-06, "log_odds_chosen": 9.82625961303711, "log_odds_ratio": -0.0006991361733525991, "logits/chosen": -0.666206955909729, "logits/rejected": -0.7812671065330505, "logps/chosen": -0.0008450027671642601, "logps/rejected": -1.7990524768829346, "loss": 0.3182, "nll_loss": 0.0794852152466774, "rewards/accuracies": 1.0, "rewards/chosen": -8.450027962680906e-05, "rewards/margins": 0.17982074618339539, "rewards/rejected": -0.1799052357673645, "step": 12706 }, { "epoch": 8.787690179806363, "grad_norm": 3.0048162937164307, "learning_rate": 6.735054556631321e-06, "log_odds_chosen": 11.722905158996582, "log_odds_ratio": -0.00030517796403728426, "logits/chosen": -0.16083022952079773, "logits/rejected": -0.28174495697021484, "logps/chosen": -0.0002863854169845581, "logps/rejected": -2.798520803451538, "loss": 0.4008, "nll_loss": 0.1001795083284378, "rewards/accuracies": 1.0, "rewards/chosen": -2.8638543881243095e-05, "rewards/margins": 0.2798234224319458, "rewards/rejected": -0.2798520624637604, "step": 12707 }, { "epoch": 8.78838174273859, "grad_norm": 4.8236403465271, "learning_rate": 6.731212540341171e-06, "log_odds_chosen": 11.70523452758789, "log_odds_ratio": -3.543759521562606e-05, "logits/chosen": -0.1501210629940033, "logits/rejected": -0.18344929814338684, "logps/chosen": -0.0006368308095261455, "logps/rejected": -3.0577917098999023, "loss": 0.4492, "nll_loss": 0.11230667680501938, "rewards/accuracies": 1.0, "rewards/chosen": -6.368308095261455e-05, "rewards/margins": 0.30571550130844116, "rewards/rejected": -0.3057791590690613, "step": 12708 }, { "epoch": 8.789073305670817, "grad_norm": 5.425579071044922, "learning_rate": 6.727370524051023e-06, "log_odds_chosen": 11.123497009277344, "log_odds_ratio": -2.579005558800418e-05, "logits/chosen": -0.5647867918014526, "logits/rejected": -0.6334821581840515, "logps/chosen": -0.00023311632685363293, "logps/rejected": -2.4519762992858887, "loss": 0.6806, "nll_loss": 0.17014306783676147, "rewards/accuracies": 1.0, "rewards/chosen": -2.3311633412959054e-05, "rewards/margins": 0.2451743185520172, "rewards/rejected": -0.2451976239681244, "step": 12709 }, { "epoch": 8.789764868603044, "grad_norm": 2.829416513442993, "learning_rate": 6.723528507760873e-06, "log_odds_chosen": 12.006536483764648, "log_odds_ratio": -7.780406122037675e-06, "logits/chosen": -0.2859390377998352, "logits/rejected": -0.3590567111968994, "logps/chosen": -7.057748734951019e-05, "logps/rejected": -2.3626785278320312, "loss": 0.3268, "nll_loss": 0.08168688416481018, "rewards/accuracies": 1.0, "rewards/chosen": -7.057748007355258e-06, "rewards/margins": 0.2362608164548874, "rewards/rejected": -0.23626789450645447, "step": 12710 }, { "epoch": 8.79045643153527, "grad_norm": 2.6061692237854004, "learning_rate": 6.719686491470724e-06, "log_odds_chosen": 11.23437213897705, "log_odds_ratio": -5.0204274884890765e-05, "logits/chosen": -0.7914714813232422, "logits/rejected": -0.7617835998535156, "logps/chosen": -8.995120879262686e-05, "logps/rejected": -2.085355520248413, "loss": 0.3244, "nll_loss": 0.08109920471906662, "rewards/accuracies": 1.0, "rewards/chosen": -8.995120879262686e-06, "rewards/margins": 0.20852655172348022, "rewards/rejected": -0.2085355520248413, "step": 12711 }, { "epoch": 8.791147994467497, "grad_norm": 6.227917671203613, "learning_rate": 6.715844475180575e-06, "log_odds_chosen": 10.377006530761719, "log_odds_ratio": -0.00018965032359119505, "logits/chosen": -0.47637438774108887, "logits/rejected": -0.36658811569213867, "logps/chosen": -0.00047040573554113507, "logps/rejected": -2.130073070526123, "loss": 0.3822, "nll_loss": 0.09552697837352753, "rewards/accuracies": 1.0, "rewards/chosen": -4.704057573690079e-05, "rewards/margins": 0.21296028792858124, "rewards/rejected": -0.21300733089447021, "step": 12712 }, { "epoch": 8.791839557399724, "grad_norm": 4.331904411315918, "learning_rate": 6.712002458890426e-06, "log_odds_chosen": 11.185213088989258, "log_odds_ratio": -6.156775634735823e-05, "logits/chosen": -0.012590788304805756, "logits/rejected": -0.12466521561145782, "logps/chosen": -0.0002501815906725824, "logps/rejected": -2.258512258529663, "loss": 0.4147, "nll_loss": 0.10368071496486664, "rewards/accuracies": 1.0, "rewards/chosen": -2.501816015865188e-05, "rewards/margins": 0.2258262038230896, "rewards/rejected": -0.22585120797157288, "step": 12713 }, { "epoch": 8.792531120331951, "grad_norm": 3.270273447036743, "learning_rate": 6.708160442600277e-06, "log_odds_chosen": 11.703085899353027, "log_odds_ratio": -1.3806956303596962e-05, "logits/chosen": -0.054852619767189026, "logits/rejected": -0.10522682964801788, "logps/chosen": -8.263815107056871e-05, "logps/rejected": -2.3079423904418945, "loss": 0.3706, "nll_loss": 0.09264373034238815, "rewards/accuracies": 1.0, "rewards/chosen": -8.26381437946111e-06, "rewards/margins": 0.23078598082065582, "rewards/rejected": -0.2307942509651184, "step": 12714 }, { "epoch": 8.793222683264178, "grad_norm": 3.0255978107452393, "learning_rate": 6.704318426310127e-06, "log_odds_chosen": 11.447072982788086, "log_odds_ratio": -2.2637163056060672e-05, "logits/chosen": -0.2060781866312027, "logits/rejected": -0.2843821048736572, "logps/chosen": -0.0001971060410141945, "logps/rejected": -2.666531562805176, "loss": 0.2886, "nll_loss": 0.07215721905231476, "rewards/accuracies": 1.0, "rewards/chosen": -1.9710603737621568e-05, "rewards/margins": 0.26663345098495483, "rewards/rejected": -0.2666531503200531, "step": 12715 }, { "epoch": 8.793914246196405, "grad_norm": 3.2624900341033936, "learning_rate": 6.700476410019979e-06, "log_odds_chosen": 11.005241394042969, "log_odds_ratio": -4.519320646068081e-05, "logits/chosen": 0.07528844475746155, "logits/rejected": 0.016680315136909485, "logps/chosen": -0.00014727868256159127, "logps/rejected": -1.9385497570037842, "loss": 0.323, "nll_loss": 0.08074452728033066, "rewards/accuracies": 1.0, "rewards/chosen": -1.472786971135065e-05, "rewards/margins": 0.19384025037288666, "rewards/rejected": -0.19385498762130737, "step": 12716 }, { "epoch": 8.794605809128631, "grad_norm": 6.010182857513428, "learning_rate": 6.69663439372983e-06, "log_odds_chosen": 10.442169189453125, "log_odds_ratio": -7.135375926736742e-05, "logits/chosen": 0.12811151146888733, "logits/rejected": 0.1726951003074646, "logps/chosen": -0.001846662606112659, "logps/rejected": -2.3940958976745605, "loss": 0.6773, "nll_loss": 0.1693248599767685, "rewards/accuracies": 1.0, "rewards/chosen": -0.00018466624896973372, "rewards/margins": 0.23922495543956757, "rewards/rejected": -0.23940961062908173, "step": 12717 }, { "epoch": 8.795297372060858, "grad_norm": 3.396695852279663, "learning_rate": 6.69279237743968e-06, "log_odds_chosen": 10.815861701965332, "log_odds_ratio": -0.00030398424132727087, "logits/chosen": -0.3280583620071411, "logits/rejected": -0.3126670718193054, "logps/chosen": -0.0002372421440668404, "logps/rejected": -2.193223237991333, "loss": 0.3949, "nll_loss": 0.09870284795761108, "rewards/accuracies": 1.0, "rewards/chosen": -2.3724214770481922e-05, "rewards/margins": 0.2192986160516739, "rewards/rejected": -0.2193223387002945, "step": 12718 }, { "epoch": 8.795988934993085, "grad_norm": 3.7363529205322266, "learning_rate": 6.688950361149532e-06, "log_odds_chosen": 10.347034454345703, "log_odds_ratio": -0.00011987592733930796, "logits/chosen": -0.3330230116844177, "logits/rejected": -0.39492470026016235, "logps/chosen": -0.0006288375006988645, "logps/rejected": -2.307096481323242, "loss": 0.3899, "nll_loss": 0.09745712578296661, "rewards/accuracies": 1.0, "rewards/chosen": -6.288375152507797e-05, "rewards/margins": 0.2306467741727829, "rewards/rejected": -0.23070964217185974, "step": 12719 }, { "epoch": 8.796680497925312, "grad_norm": 4.139132022857666, "learning_rate": 6.685108344859382e-06, "log_odds_chosen": 12.102994918823242, "log_odds_ratio": -5.405969568528235e-05, "logits/chosen": -0.44276031851768494, "logits/rejected": -0.5707353353500366, "logps/chosen": -0.00011724488285835832, "logps/rejected": -2.9865834712982178, "loss": 0.2988, "nll_loss": 0.07468465715646744, "rewards/accuracies": 1.0, "rewards/chosen": -1.1724488103936892e-05, "rewards/margins": 0.29864659905433655, "rewards/rejected": -0.2986583113670349, "step": 12720 }, { "epoch": 8.797372060857539, "grad_norm": 3.834527015686035, "learning_rate": 6.681266328569233e-06, "log_odds_chosen": 11.772808074951172, "log_odds_ratio": -1.9629087546491064e-05, "logits/chosen": -0.4522663354873657, "logits/rejected": -0.4562579393386841, "logps/chosen": -0.00013129066792316735, "logps/rejected": -2.5432262420654297, "loss": 0.3911, "nll_loss": 0.09776200354099274, "rewards/accuracies": 1.0, "rewards/chosen": -1.3129067156114615e-05, "rewards/margins": 0.2543094754219055, "rewards/rejected": -0.2543226182460785, "step": 12721 }, { "epoch": 8.798063623789766, "grad_norm": 4.2890238761901855, "learning_rate": 6.6774243122790845e-06, "log_odds_chosen": 11.038679122924805, "log_odds_ratio": -0.00011169728531967849, "logits/chosen": -0.16470174491405487, "logits/rejected": -0.12332076579332352, "logps/chosen": -0.00019009933748748153, "logps/rejected": -2.4229607582092285, "loss": 0.4674, "nll_loss": 0.11683283746242523, "rewards/accuracies": 1.0, "rewards/chosen": -1.9009934476343915e-05, "rewards/margins": 0.24227705597877502, "rewards/rejected": -0.24229606986045837, "step": 12722 }, { "epoch": 8.798755186721992, "grad_norm": 3.0468249320983887, "learning_rate": 6.673582295988935e-06, "log_odds_chosen": 9.727727890014648, "log_odds_ratio": -0.00012284204422030598, "logits/chosen": -0.4163380563259125, "logits/rejected": -0.4225500822067261, "logps/chosen": -0.0009680213988758624, "logps/rejected": -2.3284566402435303, "loss": 0.3026, "nll_loss": 0.07564578950405121, "rewards/accuracies": 1.0, "rewards/chosen": -9.680214861873537e-05, "rewards/margins": 0.2327488511800766, "rewards/rejected": -0.23284566402435303, "step": 12723 }, { "epoch": 8.79944674965422, "grad_norm": 4.225685119628906, "learning_rate": 6.669740279698786e-06, "log_odds_chosen": 11.941052436828613, "log_odds_ratio": -3.5983946872875094e-05, "logits/chosen": -0.12287880480289459, "logits/rejected": -0.22012335062026978, "logps/chosen": -0.0004759762668982148, "logps/rejected": -3.008145570755005, "loss": 0.3765, "nll_loss": 0.09410983324050903, "rewards/accuracies": 1.0, "rewards/chosen": -4.7597633965779096e-05, "rewards/margins": 0.3007669746875763, "rewards/rejected": -0.30081456899642944, "step": 12724 }, { "epoch": 8.800138312586446, "grad_norm": 3.547755479812622, "learning_rate": 6.665898263408638e-06, "log_odds_chosen": 10.471291542053223, "log_odds_ratio": -0.00019993323076050729, "logits/chosen": -0.3355550169944763, "logits/rejected": -0.39193442463874817, "logps/chosen": -0.00028798278071917593, "logps/rejected": -1.8607462644577026, "loss": 0.3266, "nll_loss": 0.08162867277860641, "rewards/accuracies": 1.0, "rewards/chosen": -2.8798280254704878e-05, "rewards/margins": 0.1860458254814148, "rewards/rejected": -0.1860746443271637, "step": 12725 }, { "epoch": 8.800829875518673, "grad_norm": 3.7131428718566895, "learning_rate": 6.662056247118488e-06, "log_odds_chosen": 11.517807006835938, "log_odds_ratio": -2.6031164452433586e-05, "logits/chosen": -0.22525209188461304, "logits/rejected": -0.2523791790008545, "logps/chosen": -0.00043190247379243374, "logps/rejected": -3.1265716552734375, "loss": 0.4177, "nll_loss": 0.1044142097234726, "rewards/accuracies": 1.0, "rewards/chosen": -4.3190248106839135e-05, "rewards/margins": 0.31261399388313293, "rewards/rejected": -0.3126571774482727, "step": 12726 }, { "epoch": 8.8015214384509, "grad_norm": 3.570354700088501, "learning_rate": 6.658214230828338e-06, "log_odds_chosen": 10.523756980895996, "log_odds_ratio": -0.00022319788695313036, "logits/chosen": -0.08164535462856293, "logits/rejected": -0.4064290523529053, "logps/chosen": -0.0003809003974311054, "logps/rejected": -1.9425835609436035, "loss": 0.398, "nll_loss": 0.09946616739034653, "rewards/accuracies": 1.0, "rewards/chosen": -3.809004192589782e-05, "rewards/margins": 0.19422025978565216, "rewards/rejected": -0.19425833225250244, "step": 12727 }, { "epoch": 8.802213001383127, "grad_norm": 4.042540073394775, "learning_rate": 6.654372214538191e-06, "log_odds_chosen": 12.080122947692871, "log_odds_ratio": -1.2566923942358699e-05, "logits/chosen": -0.351513534784317, "logits/rejected": -0.5006579756736755, "logps/chosen": -0.00039561832090839744, "logps/rejected": -3.51840877532959, "loss": 0.4536, "nll_loss": 0.11340173333883286, "rewards/accuracies": 1.0, "rewards/chosen": -3.9561837184010074e-05, "rewards/margins": 0.3518013060092926, "rewards/rejected": -0.3518408536911011, "step": 12728 }, { "epoch": 8.802904564315353, "grad_norm": 4.987198829650879, "learning_rate": 6.650530198248041e-06, "log_odds_chosen": 11.424623489379883, "log_odds_ratio": -5.1819573855027556e-05, "logits/chosen": -0.5144933462142944, "logits/rejected": -0.5797327756881714, "logps/chosen": -0.0001781594182830304, "logps/rejected": -2.4864256381988525, "loss": 0.5341, "nll_loss": 0.1335185170173645, "rewards/accuracies": 1.0, "rewards/chosen": -1.78159425558988e-05, "rewards/margins": 0.2486247718334198, "rewards/rejected": -0.24864259362220764, "step": 12729 }, { "epoch": 8.80359612724758, "grad_norm": 4.397002220153809, "learning_rate": 6.646688181957891e-06, "log_odds_chosen": 10.954423904418945, "log_odds_ratio": -0.00013551233860198408, "logits/chosen": 0.3836933672428131, "logits/rejected": 0.2545219659805298, "logps/chosen": -0.0003029437211807817, "logps/rejected": -2.4139468669891357, "loss": 0.6459, "nll_loss": 0.16146302223205566, "rewards/accuracies": 1.0, "rewards/chosen": -3.0294373573269695e-05, "rewards/margins": 0.24136443436145782, "rewards/rejected": -0.24139472842216492, "step": 12730 }, { "epoch": 8.804287690179807, "grad_norm": 3.042309284210205, "learning_rate": 6.642846165667743e-06, "log_odds_chosen": 11.329593658447266, "log_odds_ratio": -2.613785363791976e-05, "logits/chosen": -0.47953522205352783, "logits/rejected": -0.503200888633728, "logps/chosen": -0.00014091703633312136, "logps/rejected": -2.2789580821990967, "loss": 0.3789, "nll_loss": 0.0947137251496315, "rewards/accuracies": 1.0, "rewards/chosen": -1.4091704542806838e-05, "rewards/margins": 0.22788169980049133, "rewards/rejected": -0.2278957962989807, "step": 12731 }, { "epoch": 8.804979253112034, "grad_norm": 3.1978232860565186, "learning_rate": 6.639004149377594e-06, "log_odds_chosen": 11.014985084533691, "log_odds_ratio": -0.0003254017501603812, "logits/chosen": -0.42042186856269836, "logits/rejected": -0.44931113719940186, "logps/chosen": -0.000167686928762123, "logps/rejected": -2.00655460357666, "loss": 0.3797, "nll_loss": 0.09488354623317719, "rewards/accuracies": 1.0, "rewards/chosen": -1.6768692148616537e-05, "rewards/margins": 0.20063868165016174, "rewards/rejected": -0.20065546035766602, "step": 12732 }, { "epoch": 8.80567081604426, "grad_norm": 3.5881807804107666, "learning_rate": 6.6351621330874445e-06, "log_odds_chosen": 8.955921173095703, "log_odds_ratio": -0.0038107852451503277, "logits/chosen": -0.3033401370048523, "logits/rejected": -0.18464888632297516, "logps/chosen": -0.026263626292347908, "logps/rejected": -1.458088994026184, "loss": 0.3769, "nll_loss": 0.09385372698307037, "rewards/accuracies": 1.0, "rewards/chosen": -0.0026263627223670483, "rewards/margins": 0.14318254590034485, "rewards/rejected": -0.14580890536308289, "step": 12733 }, { "epoch": 8.806362378976488, "grad_norm": 4.713569164276123, "learning_rate": 6.631320116797296e-06, "log_odds_chosen": 12.279664993286133, "log_odds_ratio": -0.00010816467693075538, "logits/chosen": -0.08790473639965057, "logits/rejected": -0.1655513495206833, "logps/chosen": -0.00010481144272489473, "logps/rejected": -2.963526487350464, "loss": 0.6965, "nll_loss": 0.17410698533058167, "rewards/accuracies": 1.0, "rewards/chosen": -1.0481143362994771e-05, "rewards/margins": 0.2963421642780304, "rewards/rejected": -0.2963526248931885, "step": 12734 }, { "epoch": 8.807053941908714, "grad_norm": 5.424431800842285, "learning_rate": 6.627478100507147e-06, "log_odds_chosen": 11.423365592956543, "log_odds_ratio": -1.666683965595439e-05, "logits/chosen": 0.09181933850049973, "logits/rejected": 0.11511439085006714, "logps/chosen": -7.890022243373096e-05, "logps/rejected": -1.944343090057373, "loss": 0.549, "nll_loss": 0.13723790645599365, "rewards/accuracies": 1.0, "rewards/chosen": -7.890022970968857e-06, "rewards/margins": 0.19442641735076904, "rewards/rejected": -0.1944343000650406, "step": 12735 }, { "epoch": 8.807745504840941, "grad_norm": 3.7672154903411865, "learning_rate": 6.623636084216997e-06, "log_odds_chosen": 10.905462265014648, "log_odds_ratio": -6.981779006309807e-05, "logits/chosen": -0.5699967741966248, "logits/rejected": -0.6361503005027771, "logps/chosen": -0.00023144498118199408, "logps/rejected": -1.8278840780258179, "loss": 0.304, "nll_loss": 0.07600371539592743, "rewards/accuracies": 1.0, "rewards/chosen": -2.3144497390603647e-05, "rewards/margins": 0.18276527523994446, "rewards/rejected": -0.1827884167432785, "step": 12736 }, { "epoch": 8.808437067773168, "grad_norm": 2.5110762119293213, "learning_rate": 6.619794067926849e-06, "log_odds_chosen": 11.414780616760254, "log_odds_ratio": -1.6504767700098455e-05, "logits/chosen": -0.24265910685062408, "logits/rejected": -0.21113057434558868, "logps/chosen": -0.0001499099307693541, "logps/rejected": -2.345609664916992, "loss": 0.2051, "nll_loss": 0.05127387121319771, "rewards/accuracies": 1.0, "rewards/chosen": -1.4990991985541768e-05, "rewards/margins": 0.2345459908246994, "rewards/rejected": -0.23456096649169922, "step": 12737 }, { "epoch": 8.809128630705395, "grad_norm": 3.5081794261932373, "learning_rate": 6.615952051636699e-06, "log_odds_chosen": 10.710822105407715, "log_odds_ratio": -0.0003082406474277377, "logits/chosen": -0.42299550771713257, "logits/rejected": -0.458909809589386, "logps/chosen": -0.00019325618632137775, "logps/rejected": -2.1691081523895264, "loss": 0.3748, "nll_loss": 0.09366155415773392, "rewards/accuracies": 1.0, "rewards/chosen": -1.9325620087329298e-05, "rewards/margins": 0.21689146757125854, "rewards/rejected": -0.21691077947616577, "step": 12738 }, { "epoch": 8.809820193637622, "grad_norm": 2.9431204795837402, "learning_rate": 6.61211003534655e-06, "log_odds_chosen": 11.093559265136719, "log_odds_ratio": -0.00018852236098609865, "logits/chosen": -0.7440529465675354, "logits/rejected": -0.7985327243804932, "logps/chosen": -0.0003106665099039674, "logps/rejected": -2.407902240753174, "loss": 0.3018, "nll_loss": 0.07542867213487625, "rewards/accuracies": 1.0, "rewards/chosen": -3.106664735241793e-05, "rewards/margins": 0.24075916409492493, "rewards/rejected": -0.2407902181148529, "step": 12739 }, { "epoch": 8.810511756569849, "grad_norm": 4.121976852416992, "learning_rate": 6.608268019056401e-06, "log_odds_chosen": 11.665343284606934, "log_odds_ratio": -1.7321885025012307e-05, "logits/chosen": -0.3306739330291748, "logits/rejected": -0.3703592121601105, "logps/chosen": -0.0002438789524603635, "logps/rejected": -2.5956716537475586, "loss": 0.3885, "nll_loss": 0.097113698720932, "rewards/accuracies": 1.0, "rewards/chosen": -2.4387896701227874e-05, "rewards/margins": 0.2595427930355072, "rewards/rejected": -0.2595672011375427, "step": 12740 }, { "epoch": 8.811203319502075, "grad_norm": 4.804633617401123, "learning_rate": 6.604426002766252e-06, "log_odds_chosen": 10.657184600830078, "log_odds_ratio": -6.92599278409034e-05, "logits/chosen": -0.5924953818321228, "logits/rejected": -0.6714246273040771, "logps/chosen": -0.0002521405986044556, "logps/rejected": -2.042790174484253, "loss": 0.3351, "nll_loss": 0.08377320319414139, "rewards/accuracies": 1.0, "rewards/chosen": -2.521405986044556e-05, "rewards/margins": 0.20425380766391754, "rewards/rejected": -0.20427900552749634, "step": 12741 }, { "epoch": 8.811894882434302, "grad_norm": 8.727124214172363, "learning_rate": 6.600583986476103e-06, "log_odds_chosen": 9.931739807128906, "log_odds_ratio": -0.0005722044734284282, "logits/chosen": 0.05851912498474121, "logits/rejected": -0.1667538285255432, "logps/chosen": -0.0011412083404138684, "logps/rejected": -2.185616970062256, "loss": 0.4747, "nll_loss": 0.11861248314380646, "rewards/accuracies": 1.0, "rewards/chosen": -0.00011412083404138684, "rewards/margins": 0.21844759583473206, "rewards/rejected": -0.21856170892715454, "step": 12742 }, { "epoch": 8.812586445366529, "grad_norm": 2.942943811416626, "learning_rate": 6.5967419701859545e-06, "log_odds_chosen": 10.176290512084961, "log_odds_ratio": -0.0004964149557054043, "logits/chosen": -0.1795501857995987, "logits/rejected": -0.2746792435646057, "logps/chosen": -0.0007704338058829308, "logps/rejected": -1.7060078382492065, "loss": 0.2599, "nll_loss": 0.06492795795202255, "rewards/accuracies": 1.0, "rewards/chosen": -7.704339077463374e-05, "rewards/margins": 0.1705237329006195, "rewards/rejected": -0.1706007719039917, "step": 12743 }, { "epoch": 8.813278008298756, "grad_norm": 3.4092957973480225, "learning_rate": 6.592899953895805e-06, "log_odds_chosen": 11.288472175598145, "log_odds_ratio": -2.884104469558224e-05, "logits/chosen": -0.457816481590271, "logits/rejected": -0.5336398482322693, "logps/chosen": -0.00020037713693454862, "logps/rejected": -2.184605598449707, "loss": 0.3271, "nll_loss": 0.08177774399518967, "rewards/accuracies": 1.0, "rewards/chosen": -2.0037714421050623e-05, "rewards/margins": 0.21844051778316498, "rewards/rejected": -0.2184605598449707, "step": 12744 }, { "epoch": 8.813969571230983, "grad_norm": 2.6864378452301025, "learning_rate": 6.589057937605655e-06, "log_odds_chosen": 11.065103530883789, "log_odds_ratio": -4.73825384688098e-05, "logits/chosen": -0.09832610189914703, "logits/rejected": -0.10603950172662735, "logps/chosen": -0.00018619374895934016, "logps/rejected": -2.327572822570801, "loss": 0.2336, "nll_loss": 0.058385420590639114, "rewards/accuracies": 1.0, "rewards/chosen": -1.8619373804540373e-05, "rewards/margins": 0.2327386736869812, "rewards/rejected": -0.2327573001384735, "step": 12745 }, { "epoch": 8.81466113416321, "grad_norm": 3.6604156494140625, "learning_rate": 6.585215921315508e-06, "log_odds_chosen": 11.522943496704102, "log_odds_ratio": -4.35113615822047e-05, "logits/chosen": -0.1032416969537735, "logits/rejected": -0.17721807956695557, "logps/chosen": -0.00027219849289394915, "logps/rejected": -3.0030527114868164, "loss": 0.4215, "nll_loss": 0.10537984222173691, "rewards/accuracies": 1.0, "rewards/chosen": -2.721985219977796e-05, "rewards/margins": 0.30027803778648376, "rewards/rejected": -0.3003052771091461, "step": 12746 }, { "epoch": 8.815352697095436, "grad_norm": 5.408121585845947, "learning_rate": 6.5813739050253575e-06, "log_odds_chosen": 10.814599990844727, "log_odds_ratio": -0.0005240280297584832, "logits/chosen": -0.16442151367664337, "logits/rejected": -0.22039881348609924, "logps/chosen": -0.00017183725140057504, "logps/rejected": -1.8447155952453613, "loss": 0.4989, "nll_loss": 0.12467427551746368, "rewards/accuracies": 1.0, "rewards/chosen": -1.7183725503855385e-05, "rewards/margins": 0.18445438146591187, "rewards/rejected": -0.18447156250476837, "step": 12747 }, { "epoch": 8.816044260027663, "grad_norm": 5.701321125030518, "learning_rate": 6.577531888735208e-06, "log_odds_chosen": 10.8905611038208, "log_odds_ratio": -2.500377013348043e-05, "logits/chosen": -0.32780396938323975, "logits/rejected": -0.30016857385635376, "logps/chosen": -8.67684866534546e-05, "logps/rejected": -1.7069416046142578, "loss": 0.6302, "nll_loss": 0.15754103660583496, "rewards/accuracies": 1.0, "rewards/chosen": -8.676849574840162e-06, "rewards/margins": 0.17068548500537872, "rewards/rejected": -0.17069417238235474, "step": 12748 }, { "epoch": 8.81673582295989, "grad_norm": 3.65494704246521, "learning_rate": 6.573689872445059e-06, "log_odds_chosen": 12.122621536254883, "log_odds_ratio": -1.0510473657632247e-05, "logits/chosen": -0.31138962507247925, "logits/rejected": -0.33350175619125366, "logps/chosen": -8.071251795627177e-05, "logps/rejected": -2.522864818572998, "loss": 0.4539, "nll_loss": 0.11346264183521271, "rewards/accuracies": 1.0, "rewards/chosen": -8.071252523222938e-06, "rewards/margins": 0.2522784173488617, "rewards/rejected": -0.25228649377822876, "step": 12749 }, { "epoch": 8.817427385892117, "grad_norm": 3.0768041610717773, "learning_rate": 6.569847856154911e-06, "log_odds_chosen": 11.274198532104492, "log_odds_ratio": -3.721012399182655e-05, "logits/chosen": -0.5060542225837708, "logits/rejected": -0.6420814990997314, "logps/chosen": -0.00011597540287766606, "logps/rejected": -2.2312769889831543, "loss": 0.2978, "nll_loss": 0.07445670664310455, "rewards/accuracies": 1.0, "rewards/chosen": -1.1597541742958128e-05, "rewards/margins": 0.22311611473560333, "rewards/rejected": -0.22312772274017334, "step": 12750 }, { "epoch": 8.818118948824344, "grad_norm": 3.526170492172241, "learning_rate": 6.566005839864761e-06, "log_odds_chosen": 11.549579620361328, "log_odds_ratio": -4.6264962293207645e-05, "logits/chosen": -0.5068678259849548, "logits/rejected": -0.526757001876831, "logps/chosen": -0.00010017196473199874, "logps/rejected": -2.3823142051696777, "loss": 0.3506, "nll_loss": 0.08764593303203583, "rewards/accuracies": 1.0, "rewards/chosen": -1.0017196473199874e-05, "rewards/margins": 0.23822137713432312, "rewards/rejected": -0.23823140561580658, "step": 12751 }, { "epoch": 8.81881051175657, "grad_norm": 3.2240893840789795, "learning_rate": 6.562163823574611e-06, "log_odds_chosen": 10.802983283996582, "log_odds_ratio": -5.024659913033247e-05, "logits/chosen": -0.5198428630828857, "logits/rejected": -0.5384092330932617, "logps/chosen": -0.0004417779855430126, "logps/rejected": -2.2304627895355225, "loss": 0.3948, "nll_loss": 0.09869439899921417, "rewards/accuracies": 1.0, "rewards/chosen": -4.417780655785464e-05, "rewards/margins": 0.223002091050148, "rewards/rejected": -0.22304627299308777, "step": 12752 }, { "epoch": 8.819502074688797, "grad_norm": 3.056912422180176, "learning_rate": 6.558321807284464e-06, "log_odds_chosen": 11.202829360961914, "log_odds_ratio": -3.9832044421928003e-05, "logits/chosen": -0.4405937194824219, "logits/rejected": -0.4304983913898468, "logps/chosen": -0.0003462162858340889, "logps/rejected": -2.4193732738494873, "loss": 0.3596, "nll_loss": 0.08990298211574554, "rewards/accuracies": 1.0, "rewards/chosen": -3.462163294898346e-05, "rewards/margins": 0.24190272390842438, "rewards/rejected": -0.24193733930587769, "step": 12753 }, { "epoch": 8.820193637621024, "grad_norm": 3.57071590423584, "learning_rate": 6.554479790994314e-06, "log_odds_chosen": 11.801494598388672, "log_odds_ratio": -8.60178770381026e-05, "logits/chosen": -0.14969930052757263, "logits/rejected": -0.1381671130657196, "logps/chosen": -0.00033189854002557695, "logps/rejected": -2.458753824234009, "loss": 0.2773, "nll_loss": 0.0693252757191658, "rewards/accuracies": 1.0, "rewards/chosen": -3.318985545774922e-05, "rewards/margins": 0.24584218859672546, "rewards/rejected": -0.24587538838386536, "step": 12754 }, { "epoch": 8.820885200553251, "grad_norm": 4.084262847900391, "learning_rate": 6.550637774704164e-06, "log_odds_chosen": 11.506061553955078, "log_odds_ratio": -5.4857049690326676e-05, "logits/chosen": -0.5423182249069214, "logits/rejected": -0.5082879662513733, "logps/chosen": -0.00020827885600738227, "logps/rejected": -2.6145410537719727, "loss": 0.3825, "nll_loss": 0.09560876339673996, "rewards/accuracies": 1.0, "rewards/chosen": -2.082788705592975e-05, "rewards/margins": 0.26143327355384827, "rewards/rejected": -0.26145410537719727, "step": 12755 }, { "epoch": 8.821576763485478, "grad_norm": 2.3327832221984863, "learning_rate": 6.546795758414016e-06, "log_odds_chosen": 11.193686485290527, "log_odds_ratio": -5.154366954229772e-05, "logits/chosen": -0.5911445617675781, "logits/rejected": -0.733811616897583, "logps/chosen": -0.0001743299071677029, "logps/rejected": -2.031386613845825, "loss": 0.253, "nll_loss": 0.06324849277734756, "rewards/accuracies": 1.0, "rewards/chosen": -1.7432992535759695e-05, "rewards/margins": 0.20312124490737915, "rewards/rejected": -0.20313867926597595, "step": 12756 }, { "epoch": 8.822268326417705, "grad_norm": 4.580896854400635, "learning_rate": 6.542953742123867e-06, "log_odds_chosen": 9.616424560546875, "log_odds_ratio": -0.0015286378329619765, "logits/chosen": -0.2255147099494934, "logits/rejected": -0.14628499746322632, "logps/chosen": -0.0004408976819831878, "logps/rejected": -1.315403938293457, "loss": 0.5038, "nll_loss": 0.12580369412899017, "rewards/accuracies": 1.0, "rewards/chosen": -4.4089771108701825e-05, "rewards/margins": 0.13149632513523102, "rewards/rejected": -0.13154040277004242, "step": 12757 }, { "epoch": 8.822959889349931, "grad_norm": 3.8236727714538574, "learning_rate": 6.5391117258337174e-06, "log_odds_chosen": 10.288022994995117, "log_odds_ratio": -9.033164678839967e-05, "logits/chosen": -0.2098008245229721, "logits/rejected": -0.24087196588516235, "logps/chosen": -0.0004149182641413063, "logps/rejected": -2.0759353637695312, "loss": 0.358, "nll_loss": 0.08948257565498352, "rewards/accuracies": 1.0, "rewards/chosen": -4.149182859691791e-05, "rewards/margins": 0.20755203068256378, "rewards/rejected": -0.20759351551532745, "step": 12758 }, { "epoch": 8.823651452282158, "grad_norm": 3.7073469161987305, "learning_rate": 6.535269709543569e-06, "log_odds_chosen": 11.815654754638672, "log_odds_ratio": -1.0503194062039256e-05, "logits/chosen": -0.21260342001914978, "logits/rejected": -0.1908436417579651, "logps/chosen": -0.0004060858045704663, "logps/rejected": -2.8777055740356445, "loss": 0.3155, "nll_loss": 0.078867606818676, "rewards/accuracies": 1.0, "rewards/chosen": -4.0608578274259344e-05, "rewards/margins": 0.28772997856140137, "rewards/rejected": -0.2877705693244934, "step": 12759 }, { "epoch": 8.824343015214385, "grad_norm": 3.740062952041626, "learning_rate": 6.53142769325342e-06, "log_odds_chosen": 10.717554092407227, "log_odds_ratio": -5.8261815865989774e-05, "logits/chosen": -0.02434053272008896, "logits/rejected": -0.04800887778401375, "logps/chosen": -0.0002340275823371485, "logps/rejected": -1.6554569005966187, "loss": 0.338, "nll_loss": 0.08449292927980423, "rewards/accuracies": 1.0, "rewards/chosen": -2.340275932510849e-05, "rewards/margins": 0.16552230715751648, "rewards/rejected": -0.16554570198059082, "step": 12760 }, { "epoch": 8.825034578146612, "grad_norm": 4.184948921203613, "learning_rate": 6.52758567696327e-06, "log_odds_chosen": 10.76385498046875, "log_odds_ratio": -3.3372751204296947e-05, "logits/chosen": -0.340414822101593, "logits/rejected": -0.31318366527557373, "logps/chosen": -0.0007341946475207806, "logps/rejected": -2.632049560546875, "loss": 0.4692, "nll_loss": 0.11730688810348511, "rewards/accuracies": 1.0, "rewards/chosen": -7.341946911765262e-05, "rewards/margins": 0.2631315290927887, "rewards/rejected": -0.2632049322128296, "step": 12761 }, { "epoch": 8.825726141078839, "grad_norm": 5.250175476074219, "learning_rate": 6.523743660673122e-06, "log_odds_chosen": 11.203939437866211, "log_odds_ratio": -0.0003403636219445616, "logits/chosen": -0.3742835819721222, "logits/rejected": -0.33408549427986145, "logps/chosen": -0.0002061006671283394, "logps/rejected": -2.6237549781799316, "loss": 0.4048, "nll_loss": 0.10115813463926315, "rewards/accuracies": 1.0, "rewards/chosen": -2.0610064893844537e-05, "rewards/margins": 0.26235488057136536, "rewards/rejected": -0.26237550377845764, "step": 12762 }, { "epoch": 8.826417704011066, "grad_norm": 10.360694885253906, "learning_rate": 6.519901644382972e-06, "log_odds_chosen": 12.885150909423828, "log_odds_ratio": -1.928115489135962e-05, "logits/chosen": -0.2504520118236542, "logits/rejected": -0.33734339475631714, "logps/chosen": -0.00021348144218791276, "logps/rejected": -4.043447971343994, "loss": 0.4204, "nll_loss": 0.10510491579771042, "rewards/accuracies": 1.0, "rewards/chosen": -2.1348147129174322e-05, "rewards/margins": 0.4043235182762146, "rewards/rejected": -0.4043447971343994, "step": 12763 }, { "epoch": 8.827109266943292, "grad_norm": 3.3477675914764404, "learning_rate": 6.516059628092823e-06, "log_odds_chosen": 11.33936595916748, "log_odds_ratio": -3.511543764034286e-05, "logits/chosen": -0.41620469093322754, "logits/rejected": -0.473169207572937, "logps/chosen": -7.452804129570723e-05, "logps/rejected": -1.7823110818862915, "loss": 0.3579, "nll_loss": 0.08947095274925232, "rewards/accuracies": 1.0, "rewards/chosen": -7.452804311469663e-06, "rewards/margins": 0.17822366952896118, "rewards/rejected": -0.1782311052083969, "step": 12764 }, { "epoch": 8.82780082987552, "grad_norm": 2.7336463928222656, "learning_rate": 6.512217611802675e-06, "log_odds_chosen": 10.976394653320312, "log_odds_ratio": -5.622572643915191e-05, "logits/chosen": -0.1379978358745575, "logits/rejected": -0.2915462553501129, "logps/chosen": -0.00033241885830648243, "logps/rejected": -1.9764206409454346, "loss": 0.3088, "nll_loss": 0.07719646394252777, "rewards/accuracies": 1.0, "rewards/chosen": -3.3241885830648243e-05, "rewards/margins": 0.1976088136434555, "rewards/rejected": -0.19764205813407898, "step": 12765 }, { "epoch": 8.828492392807746, "grad_norm": 3.080021858215332, "learning_rate": 6.508375595512525e-06, "log_odds_chosen": 10.0333833694458, "log_odds_ratio": -0.00017765231314115226, "logits/chosen": -0.5421645641326904, "logits/rejected": -0.5306675434112549, "logps/chosen": -0.00044594579958356917, "logps/rejected": -1.6764360666275024, "loss": 0.346, "nll_loss": 0.0864863246679306, "rewards/accuracies": 1.0, "rewards/chosen": -4.459457704797387e-05, "rewards/margins": 0.16759900748729706, "rewards/rejected": -0.16764359176158905, "step": 12766 }, { "epoch": 8.829183955739973, "grad_norm": 2.9879536628723145, "learning_rate": 6.504533579222376e-06, "log_odds_chosen": 10.86699390411377, "log_odds_ratio": -4.3034284317400306e-05, "logits/chosen": -0.01258639246225357, "logits/rejected": 0.09293460100889206, "logps/chosen": -0.00016677375242579728, "logps/rejected": -1.8913809061050415, "loss": 0.3406, "nll_loss": 0.08515028655529022, "rewards/accuracies": 1.0, "rewards/chosen": -1.6677373423590325e-05, "rewards/margins": 0.18912141025066376, "rewards/rejected": -0.18913809955120087, "step": 12767 }, { "epoch": 8.8298755186722, "grad_norm": 4.226680278778076, "learning_rate": 6.5006915629322275e-06, "log_odds_chosen": 11.947617530822754, "log_odds_ratio": -2.5241959519917145e-05, "logits/chosen": -0.3539278507232666, "logits/rejected": -0.3582938313484192, "logps/chosen": -0.00020406056137289852, "logps/rejected": -3.0730202198028564, "loss": 0.3761, "nll_loss": 0.09401322901248932, "rewards/accuracies": 1.0, "rewards/chosen": -2.0406056137289852e-05, "rewards/margins": 0.30728161334991455, "rewards/rejected": -0.3073020279407501, "step": 12768 }, { "epoch": 8.830567081604427, "grad_norm": 4.433466911315918, "learning_rate": 6.496849546642078e-06, "log_odds_chosen": 12.175585746765137, "log_odds_ratio": -9.85627411864698e-06, "logits/chosen": 0.19564247131347656, "logits/rejected": 0.14580342173576355, "logps/chosen": -0.00020560537814162672, "logps/rejected": -2.9848077297210693, "loss": 0.4718, "nll_loss": 0.11795386672019958, "rewards/accuracies": 1.0, "rewards/chosen": -2.056053745036479e-05, "rewards/margins": 0.2984602153301239, "rewards/rejected": -0.2984807789325714, "step": 12769 }, { "epoch": 8.831258644536653, "grad_norm": 2.693063974380493, "learning_rate": 6.493007530351929e-06, "log_odds_chosen": 10.376236915588379, "log_odds_ratio": -0.00017671348177827895, "logits/chosen": 0.007927365601062775, "logits/rejected": -0.03316565603017807, "logps/chosen": -0.0006801652489230037, "logps/rejected": -2.863428831100464, "loss": 0.3297, "nll_loss": 0.08241531997919083, "rewards/accuracies": 1.0, "rewards/chosen": -6.801652489230037e-05, "rewards/margins": 0.28627485036849976, "rewards/rejected": -0.28634288907051086, "step": 12770 }, { "epoch": 8.83195020746888, "grad_norm": 5.1559977531433105, "learning_rate": 6.4891655140617806e-06, "log_odds_chosen": 11.185357093811035, "log_odds_ratio": -3.578070754883811e-05, "logits/chosen": -0.4364149570465088, "logits/rejected": -0.4735616445541382, "logps/chosen": -0.00016242300625890493, "logps/rejected": -2.062422275543213, "loss": 0.3282, "nll_loss": 0.08205679804086685, "rewards/accuracies": 1.0, "rewards/chosen": -1.6242302081082016e-05, "rewards/margins": 0.20622599124908447, "rewards/rejected": -0.20624223351478577, "step": 12771 }, { "epoch": 8.832641770401107, "grad_norm": 4.903589725494385, "learning_rate": 6.485323497771631e-06, "log_odds_chosen": 11.552923202514648, "log_odds_ratio": -2.0057505025761202e-05, "logits/chosen": -0.35818684101104736, "logits/rejected": -0.38597989082336426, "logps/chosen": -0.00014166624168865383, "logps/rejected": -2.558972120285034, "loss": 0.3711, "nll_loss": 0.09278266131877899, "rewards/accuracies": 1.0, "rewards/chosen": -1.4166622349875979e-05, "rewards/margins": 0.25588303804397583, "rewards/rejected": -0.2558972239494324, "step": 12772 }, { "epoch": 8.833333333333334, "grad_norm": 2.9457814693450928, "learning_rate": 6.481481481481481e-06, "log_odds_chosen": 11.662984848022461, "log_odds_ratio": -4.007641473435797e-05, "logits/chosen": -0.1337454915046692, "logits/rejected": -0.2075233906507492, "logps/chosen": -0.00017183000454679132, "logps/rejected": -2.5259475708007812, "loss": 0.3374, "nll_loss": 0.08435782045125961, "rewards/accuracies": 1.0, "rewards/chosen": -1.7183001546072774e-05, "rewards/margins": 0.25257760286331177, "rewards/rejected": -0.2525947690010071, "step": 12773 }, { "epoch": 8.83402489626556, "grad_norm": 3.8707709312438965, "learning_rate": 6.477639465191334e-06, "log_odds_chosen": 12.129270553588867, "log_odds_ratio": -6.537245099025313e-06, "logits/chosen": -0.6894609928131104, "logits/rejected": -0.6343203186988831, "logps/chosen": -0.00013332456001080573, "logps/rejected": -2.8702447414398193, "loss": 0.3374, "nll_loss": 0.08435693383216858, "rewards/accuracies": 1.0, "rewards/chosen": -1.3332456546777394e-05, "rewards/margins": 0.28701114654541016, "rewards/rejected": -0.28702446818351746, "step": 12774 }, { "epoch": 8.834716459197788, "grad_norm": 3.372408628463745, "learning_rate": 6.4737974489011836e-06, "log_odds_chosen": 11.362260818481445, "log_odds_ratio": -2.4923283490352333e-05, "logits/chosen": -0.23244208097457886, "logits/rejected": -0.30278605222702026, "logps/chosen": -0.00016180599050130695, "logps/rejected": -2.273146152496338, "loss": 0.3145, "nll_loss": 0.07862184941768646, "rewards/accuracies": 1.0, "rewards/chosen": -1.6180598322534934e-05, "rewards/margins": 0.22729843854904175, "rewards/rejected": -0.22731462121009827, "step": 12775 }, { "epoch": 8.835408022130014, "grad_norm": 4.276350498199463, "learning_rate": 6.469955432611034e-06, "log_odds_chosen": 11.451852798461914, "log_odds_ratio": -0.00010747795749921352, "logits/chosen": -0.5826900005340576, "logits/rejected": -0.622979998588562, "logps/chosen": -0.00019800482550635934, "logps/rejected": -2.329267740249634, "loss": 0.2783, "nll_loss": 0.06955676525831223, "rewards/accuracies": 1.0, "rewards/chosen": -1.9800481823040172e-05, "rewards/margins": 0.2329069823026657, "rewards/rejected": -0.23292678594589233, "step": 12776 }, { "epoch": 8.836099585062241, "grad_norm": 4.371342182159424, "learning_rate": 6.466113416320886e-06, "log_odds_chosen": 11.968100547790527, "log_odds_ratio": -1.7586673493497074e-05, "logits/chosen": 0.07722847908735275, "logits/rejected": 0.11953555047512054, "logps/chosen": -9.393729851581156e-05, "logps/rejected": -2.5000882148742676, "loss": 0.4745, "nll_loss": 0.11862242221832275, "rewards/accuracies": 1.0, "rewards/chosen": -9.393729669682216e-06, "rewards/margins": 0.24999943375587463, "rewards/rejected": -0.25000882148742676, "step": 12777 }, { "epoch": 8.836791147994468, "grad_norm": 3.7952606678009033, "learning_rate": 6.462271400030737e-06, "log_odds_chosen": 12.824507713317871, "log_odds_ratio": -1.2694898032350466e-05, "logits/chosen": 0.04276101291179657, "logits/rejected": -0.013445567339658737, "logps/chosen": -0.00018952536629512906, "logps/rejected": -3.921670436859131, "loss": 0.3081, "nll_loss": 0.07701949775218964, "rewards/accuracies": 1.0, "rewards/chosen": -1.895253808470443e-05, "rewards/margins": 0.39214810729026794, "rewards/rejected": -0.3921670615673065, "step": 12778 }, { "epoch": 8.837482710926695, "grad_norm": 3.1971945762634277, "learning_rate": 6.458429383740587e-06, "log_odds_chosen": 10.57689094543457, "log_odds_ratio": -4.320785592426546e-05, "logits/chosen": -0.3199876844882965, "logits/rejected": -0.33395707607269287, "logps/chosen": -0.0002134922833647579, "logps/rejected": -1.9453153610229492, "loss": 0.3245, "nll_loss": 0.08111564069986343, "rewards/accuracies": 1.0, "rewards/chosen": -2.1349231246858835e-05, "rewards/margins": 0.19451019167900085, "rewards/rejected": -0.19453153014183044, "step": 12779 }, { "epoch": 8.838174273858922, "grad_norm": 3.570117950439453, "learning_rate": 6.454587367450439e-06, "log_odds_chosen": 10.053936004638672, "log_odds_ratio": -0.0001422764325980097, "logits/chosen": -0.28322821855545044, "logits/rejected": -0.3680499196052551, "logps/chosen": -0.00044421886559575796, "logps/rejected": -1.7309963703155518, "loss": 0.3456, "nll_loss": 0.08637740463018417, "rewards/accuracies": 1.0, "rewards/chosen": -4.4421885831980035e-05, "rewards/margins": 0.17305521667003632, "rewards/rejected": -0.17309962213039398, "step": 12780 }, { "epoch": 8.838865836791149, "grad_norm": 4.41160249710083, "learning_rate": 6.45074535116029e-06, "log_odds_chosen": 11.318617820739746, "log_odds_ratio": -8.401113882428035e-05, "logits/chosen": 0.19311122596263885, "logits/rejected": 0.1457151174545288, "logps/chosen": -0.00013428172678686678, "logps/rejected": -2.4825916290283203, "loss": 0.4355, "nll_loss": 0.10887414216995239, "rewards/accuracies": 1.0, "rewards/chosen": -1.3428171769191977e-05, "rewards/margins": 0.2482457309961319, "rewards/rejected": -0.24825915694236755, "step": 12781 }, { "epoch": 8.839557399723375, "grad_norm": 3.3861706256866455, "learning_rate": 6.44690333487014e-06, "log_odds_chosen": 11.522031784057617, "log_odds_ratio": -0.00010014892177423462, "logits/chosen": -0.46342214941978455, "logits/rejected": -0.5653108954429626, "logps/chosen": -0.0008928571478463709, "logps/rejected": -2.940377950668335, "loss": 0.4646, "nll_loss": 0.1161460429430008, "rewards/accuracies": 1.0, "rewards/chosen": -8.928572060540318e-05, "rewards/margins": 0.29394853115081787, "rewards/rejected": -0.294037789106369, "step": 12782 }, { "epoch": 8.840248962655602, "grad_norm": 3.975029945373535, "learning_rate": 6.44306131857999e-06, "log_odds_chosen": 11.149683952331543, "log_odds_ratio": -3.567495150491595e-05, "logits/chosen": -0.31347280740737915, "logits/rejected": -0.3032105565071106, "logps/chosen": -0.0001983102411031723, "logps/rejected": -1.9459149837493896, "loss": 0.4983, "nll_loss": 0.12457741796970367, "rewards/accuracies": 1.0, "rewards/chosen": -1.9831026293104514e-05, "rewards/margins": 0.19457167387008667, "rewards/rejected": -0.19459150731563568, "step": 12783 }, { "epoch": 8.840940525587829, "grad_norm": 4.7988600730896, "learning_rate": 6.439219302289842e-06, "log_odds_chosen": 12.046151161193848, "log_odds_ratio": -4.2681695049395785e-05, "logits/chosen": -0.053906239569187164, "logits/rejected": -0.14985615015029907, "logps/chosen": -0.00016439243336208165, "logps/rejected": -2.840974807739258, "loss": 0.5175, "nll_loss": 0.12937091290950775, "rewards/accuracies": 1.0, "rewards/chosen": -1.643924588279333e-05, "rewards/margins": 0.28408104181289673, "rewards/rejected": -0.28409749269485474, "step": 12784 }, { "epoch": 8.841632088520056, "grad_norm": 4.1446146965026855, "learning_rate": 6.435377285999693e-06, "log_odds_chosen": 11.283458709716797, "log_odds_ratio": -3.4393011446809396e-05, "logits/chosen": -0.2070007473230362, "logits/rejected": -0.3016629219055176, "logps/chosen": -0.000497006403747946, "logps/rejected": -2.5244784355163574, "loss": 0.422, "nll_loss": 0.10548632591962814, "rewards/accuracies": 1.0, "rewards/chosen": -4.97006403747946e-05, "rewards/margins": 0.25239813327789307, "rewards/rejected": -0.25244784355163574, "step": 12785 }, { "epoch": 8.842323651452283, "grad_norm": 4.659236907958984, "learning_rate": 6.4315352697095435e-06, "log_odds_chosen": 11.342458724975586, "log_odds_ratio": -3.612410364439711e-05, "logits/chosen": -0.4639470875263214, "logits/rejected": -0.5213142037391663, "logps/chosen": -0.00042212463449686766, "logps/rejected": -2.7059743404388428, "loss": 0.3976, "nll_loss": 0.0993962287902832, "rewards/accuracies": 1.0, "rewards/chosen": -4.221246490487829e-05, "rewards/margins": 0.27055519819259644, "rewards/rejected": -0.2705973982810974, "step": 12786 }, { "epoch": 8.84301521438451, "grad_norm": 3.9992127418518066, "learning_rate": 6.427693253419395e-06, "log_odds_chosen": 10.961469650268555, "log_odds_ratio": -3.790142363868654e-05, "logits/chosen": -0.443245530128479, "logits/rejected": -0.5575623512268066, "logps/chosen": -0.0001961943635251373, "logps/rejected": -1.908239722251892, "loss": 0.3471, "nll_loss": 0.08676640689373016, "rewards/accuracies": 1.0, "rewards/chosen": -1.9619437807705253e-05, "rewards/margins": 0.1908043622970581, "rewards/rejected": -0.1908239722251892, "step": 12787 }, { "epoch": 8.843706777316736, "grad_norm": 3.0610389709472656, "learning_rate": 6.423851237129246e-06, "log_odds_chosen": 11.528380393981934, "log_odds_ratio": -2.1074079995742068e-05, "logits/chosen": -0.31700849533081055, "logits/rejected": -0.369879812002182, "logps/chosen": -0.00015870729112066329, "logps/rejected": -2.5586135387420654, "loss": 0.373, "nll_loss": 0.09325310587882996, "rewards/accuracies": 1.0, "rewards/chosen": -1.587072983966209e-05, "rewards/margins": 0.25584548711776733, "rewards/rejected": -0.2558613419532776, "step": 12788 }, { "epoch": 8.844398340248963, "grad_norm": 3.834096908569336, "learning_rate": 6.420009220839096e-06, "log_odds_chosen": 10.404363632202148, "log_odds_ratio": -0.0003085200733039528, "logits/chosen": -0.34495866298675537, "logits/rejected": -0.43560492992401123, "logps/chosen": -0.00025962202926166356, "logps/rejected": -1.8464734554290771, "loss": 0.4628, "nll_loss": 0.11567828059196472, "rewards/accuracies": 1.0, "rewards/chosen": -2.5962202926166356e-05, "rewards/margins": 0.1846213936805725, "rewards/rejected": -0.1846473515033722, "step": 12789 }, { "epoch": 8.84508990318119, "grad_norm": 3.1313297748565674, "learning_rate": 6.416167204548948e-06, "log_odds_chosen": 11.057754516601562, "log_odds_ratio": -4.367155270301737e-05, "logits/chosen": -0.10010676085948944, "logits/rejected": -0.2379499077796936, "logps/chosen": -0.00020684795163106173, "logps/rejected": -2.0329058170318604, "loss": 0.2781, "nll_loss": 0.06952624022960663, "rewards/accuracies": 1.0, "rewards/chosen": -2.0684796254499815e-05, "rewards/margins": 0.20326989889144897, "rewards/rejected": -0.20329056680202484, "step": 12790 }, { "epoch": 8.845781466113417, "grad_norm": 3.7589199542999268, "learning_rate": 6.412325188258798e-06, "log_odds_chosen": 10.544598579406738, "log_odds_ratio": -6.38895871816203e-05, "logits/chosen": -0.15169522166252136, "logits/rejected": -0.2930341958999634, "logps/chosen": -0.0003536183503456414, "logps/rejected": -2.1912853717803955, "loss": 0.3678, "nll_loss": 0.09195493161678314, "rewards/accuracies": 1.0, "rewards/chosen": -3.536183794494718e-05, "rewards/margins": 0.2190931737422943, "rewards/rejected": -0.2191285341978073, "step": 12791 }, { "epoch": 8.846473029045644, "grad_norm": 3.619842767715454, "learning_rate": 6.408483171968649e-06, "log_odds_chosen": 11.351167678833008, "log_odds_ratio": -4.215494482195936e-05, "logits/chosen": -0.37345826625823975, "logits/rejected": -0.4001314342021942, "logps/chosen": -0.00016402498295065016, "logps/rejected": -2.2864413261413574, "loss": 0.3679, "nll_loss": 0.09198301285505295, "rewards/accuracies": 1.0, "rewards/chosen": -1.6402498658862896e-05, "rewards/margins": 0.2286277413368225, "rewards/rejected": -0.22864416241645813, "step": 12792 }, { "epoch": 8.84716459197787, "grad_norm": 4.5168561935424805, "learning_rate": 6.4046411556785004e-06, "log_odds_chosen": 10.029914855957031, "log_odds_ratio": -0.00015833518409635872, "logits/chosen": -0.16558289527893066, "logits/rejected": -0.21536770462989807, "logps/chosen": -0.0013712483923882246, "logps/rejected": -2.173950672149658, "loss": 0.3235, "nll_loss": 0.08085213601589203, "rewards/accuracies": 1.0, "rewards/chosen": -0.00013712485088035464, "rewards/margins": 0.21725796163082123, "rewards/rejected": -0.21739508211612701, "step": 12793 }, { "epoch": 8.847856154910097, "grad_norm": 5.88995885848999, "learning_rate": 6.400799139388351e-06, "log_odds_chosen": 11.599223136901855, "log_odds_ratio": -0.00055232661543414, "logits/chosen": -0.4300704598426819, "logits/rejected": -0.5501849055290222, "logps/chosen": -0.0002075859229080379, "logps/rejected": -2.715794563293457, "loss": 0.5537, "nll_loss": 0.1383628249168396, "rewards/accuracies": 1.0, "rewards/chosen": -2.0758594473591074e-05, "rewards/margins": 0.2715587019920349, "rewards/rejected": -0.27157944440841675, "step": 12794 }, { "epoch": 8.848547717842324, "grad_norm": 3.6452476978302, "learning_rate": 6.396957123098202e-06, "log_odds_chosen": 10.567415237426758, "log_odds_ratio": -0.00012351528857834637, "logits/chosen": 0.004386186599731445, "logits/rejected": -0.0502280592918396, "logps/chosen": -0.00024670836864970624, "logps/rejected": -1.9211772680282593, "loss": 0.4473, "nll_loss": 0.11181392520666122, "rewards/accuracies": 1.0, "rewards/chosen": -2.4670836864970624e-05, "rewards/margins": 0.19209304451942444, "rewards/rejected": -0.19211772084236145, "step": 12795 }, { "epoch": 8.849239280774551, "grad_norm": 3.6943931579589844, "learning_rate": 6.3931151068080535e-06, "log_odds_chosen": 11.638896942138672, "log_odds_ratio": -1.964723560377024e-05, "logits/chosen": -0.6820564866065979, "logits/rejected": -0.7239702343940735, "logps/chosen": -6.672356539638713e-05, "logps/rejected": -2.1108269691467285, "loss": 0.4618, "nll_loss": 0.11546014249324799, "rewards/accuracies": 1.0, "rewards/chosen": -6.672355993941892e-06, "rewards/margins": 0.211076021194458, "rewards/rejected": -0.21108269691467285, "step": 12796 }, { "epoch": 8.849930843706778, "grad_norm": 2.904240369796753, "learning_rate": 6.389273090517904e-06, "log_odds_chosen": 10.31950569152832, "log_odds_ratio": -0.00010807502258103341, "logits/chosen": -0.19965432584285736, "logits/rejected": -0.07110458612442017, "logps/chosen": -0.0005260208854451776, "logps/rejected": -1.9000871181488037, "loss": 0.3618, "nll_loss": 0.09044182300567627, "rewards/accuracies": 1.0, "rewards/chosen": -5.2602088544517756e-05, "rewards/margins": 0.18995609879493713, "rewards/rejected": -0.19000869989395142, "step": 12797 }, { "epoch": 8.850622406639005, "grad_norm": 3.6862058639526367, "learning_rate": 6.385431074227754e-06, "log_odds_chosen": 9.927431106567383, "log_odds_ratio": -0.001017512520775199, "logits/chosen": -0.2530617415904999, "logits/rejected": -0.2509405016899109, "logps/chosen": -0.0012779454700648785, "logps/rejected": -1.9349370002746582, "loss": 0.4581, "nll_loss": 0.11442754417657852, "rewards/accuracies": 1.0, "rewards/chosen": -0.00012779454118572176, "rewards/margins": 0.1933659017086029, "rewards/rejected": -0.19349370896816254, "step": 12798 }, { "epoch": 8.851313969571232, "grad_norm": 4.178948402404785, "learning_rate": 6.381589057937607e-06, "log_odds_chosen": 10.891372680664062, "log_odds_ratio": -4.565313793136738e-05, "logits/chosen": -0.4360129237174988, "logits/rejected": -0.5046184062957764, "logps/chosen": -0.00018814804207067937, "logps/rejected": -1.9857664108276367, "loss": 0.4265, "nll_loss": 0.10661087185144424, "rewards/accuracies": 1.0, "rewards/chosen": -1.8814802388078533e-05, "rewards/margins": 0.19855782389640808, "rewards/rejected": -0.19857662916183472, "step": 12799 }, { "epoch": 8.852005532503458, "grad_norm": 2.3786985874176025, "learning_rate": 6.3777470416474565e-06, "log_odds_chosen": 11.908330917358398, "log_odds_ratio": -2.3679904188611545e-05, "logits/chosen": -0.5502616763114929, "logits/rejected": -0.5585063099861145, "logps/chosen": -0.0001272763474844396, "logps/rejected": -2.439074993133545, "loss": 0.3383, "nll_loss": 0.08457043021917343, "rewards/accuracies": 1.0, "rewards/chosen": -1.2727635294140782e-05, "rewards/margins": 0.2438947856426239, "rewards/rejected": -0.24390751123428345, "step": 12800 }, { "epoch": 8.852697095435685, "grad_norm": 3.721696376800537, "learning_rate": 6.373905025357307e-06, "log_odds_chosen": 11.587570190429688, "log_odds_ratio": -2.8774469683412462e-05, "logits/chosen": -0.2774308919906616, "logits/rejected": -0.40616902709007263, "logps/chosen": -0.00034545804373919964, "logps/rejected": -2.403249502182007, "loss": 0.4794, "nll_loss": 0.11984878778457642, "rewards/accuracies": 1.0, "rewards/chosen": -3.454580291872844e-05, "rewards/margins": 0.24029040336608887, "rewards/rejected": -0.2403249442577362, "step": 12801 }, { "epoch": 8.853388658367912, "grad_norm": 3.288451671600342, "learning_rate": 6.370063009067159e-06, "log_odds_chosen": 11.266716957092285, "log_odds_ratio": -3.245002881158143e-05, "logits/chosen": -0.1537206918001175, "logits/rejected": -0.2285475730895996, "logps/chosen": -0.00011662822362268344, "logps/rejected": -2.271165370941162, "loss": 0.4268, "nll_loss": 0.10669860988855362, "rewards/accuracies": 1.0, "rewards/chosen": -1.1662822544167284e-05, "rewards/margins": 0.22710487246513367, "rewards/rejected": -0.22711655497550964, "step": 12802 }, { "epoch": 8.854080221300139, "grad_norm": 3.4580912590026855, "learning_rate": 6.36622099277701e-06, "log_odds_chosen": 11.14916706085205, "log_odds_ratio": -7.049908163025975e-05, "logits/chosen": -0.3396326005458832, "logits/rejected": -0.39334046840667725, "logps/chosen": -0.00017810799181461334, "logps/rejected": -2.1985089778900146, "loss": 0.3566, "nll_loss": 0.08914760500192642, "rewards/accuracies": 1.0, "rewards/chosen": -1.7810798453865573e-05, "rewards/margins": 0.21983309090137482, "rewards/rejected": -0.21985091269016266, "step": 12803 }, { "epoch": 8.854771784232366, "grad_norm": 4.225655555725098, "learning_rate": 6.36237897648686e-06, "log_odds_chosen": 10.638168334960938, "log_odds_ratio": -0.00013483702787198126, "logits/chosen": -0.5227504372596741, "logits/rejected": -0.6066082715988159, "logps/chosen": -0.00039914826629683375, "logps/rejected": -2.0195770263671875, "loss": 0.3202, "nll_loss": 0.08004799485206604, "rewards/accuracies": 1.0, "rewards/chosen": -3.9914826629683375e-05, "rewards/margins": 0.20191779732704163, "rewards/rejected": -0.20195770263671875, "step": 12804 }, { "epoch": 8.855463347164592, "grad_norm": 4.3255157470703125, "learning_rate": 6.358536960196712e-06, "log_odds_chosen": 11.599845886230469, "log_odds_ratio": -2.2031070329830982e-05, "logits/chosen": 0.013053441420197487, "logits/rejected": -0.16108641028404236, "logps/chosen": -0.00015875123790465295, "logps/rejected": -2.578869581222534, "loss": 0.399, "nll_loss": 0.09974716603755951, "rewards/accuracies": 1.0, "rewards/chosen": -1.5875124518061057e-05, "rewards/margins": 0.2578710913658142, "rewards/rejected": -0.25788694620132446, "step": 12805 }, { "epoch": 8.85615491009682, "grad_norm": 3.2311954498291016, "learning_rate": 6.354694943906563e-06, "log_odds_chosen": 11.382307052612305, "log_odds_ratio": -5.089869591756724e-05, "logits/chosen": -0.6852065324783325, "logits/rejected": -0.7032715678215027, "logps/chosen": -0.00012299341324251145, "logps/rejected": -2.0894734859466553, "loss": 0.3764, "nll_loss": 0.09410445392131805, "rewards/accuracies": 1.0, "rewards/chosen": -1.2299342415644787e-05, "rewards/margins": 0.20893505215644836, "rewards/rejected": -0.20894736051559448, "step": 12806 }, { "epoch": 8.856846473029046, "grad_norm": 3.9020462036132812, "learning_rate": 6.350852927616413e-06, "log_odds_chosen": 10.981860160827637, "log_odds_ratio": -0.001179091283120215, "logits/chosen": -0.4362276494503021, "logits/rejected": -0.36282485723495483, "logps/chosen": -0.0008756135357543826, "logps/rejected": -2.592684745788574, "loss": 0.6029, "nll_loss": 0.150602787733078, "rewards/accuracies": 1.0, "rewards/chosen": -8.756135503062978e-05, "rewards/margins": 0.2591809034347534, "rewards/rejected": -0.25926846265792847, "step": 12807 }, { "epoch": 8.857538035961273, "grad_norm": 3.7402217388153076, "learning_rate": 6.347010911326265e-06, "log_odds_chosen": 11.241284370422363, "log_odds_ratio": -0.000701241078786552, "logits/chosen": -0.2342221438884735, "logits/rejected": -0.319629043340683, "logps/chosen": -0.001087275566533208, "logps/rejected": -2.5344743728637695, "loss": 0.4757, "nll_loss": 0.11884773522615433, "rewards/accuracies": 1.0, "rewards/chosen": -0.00010872755228774622, "rewards/margins": 0.25333869457244873, "rewards/rejected": -0.25344744324684143, "step": 12808 }, { "epoch": 8.8582295988935, "grad_norm": 3.1583425998687744, "learning_rate": 6.343168895036115e-06, "log_odds_chosen": 11.552385330200195, "log_odds_ratio": -2.8983889933442697e-05, "logits/chosen": 0.15185663104057312, "logits/rejected": -0.0315750315785408, "logps/chosen": -0.001065196585841477, "logps/rejected": -3.191751003265381, "loss": 0.3775, "nll_loss": 0.0943649560213089, "rewards/accuracies": 1.0, "rewards/chosen": -0.00010651966294972226, "rewards/margins": 0.3190686106681824, "rewards/rejected": -0.319175124168396, "step": 12809 }, { "epoch": 8.858921161825727, "grad_norm": 5.184572696685791, "learning_rate": 6.339326878745966e-06, "log_odds_chosen": 11.042671203613281, "log_odds_ratio": -2.5831781385932118e-05, "logits/chosen": -0.3577697277069092, "logits/rejected": -0.4123019576072693, "logps/chosen": -0.00017461557581555098, "logps/rejected": -2.0305471420288086, "loss": 0.2783, "nll_loss": 0.06958448886871338, "rewards/accuracies": 1.0, "rewards/chosen": -1.746155794535298e-05, "rewards/margins": 0.20303726196289062, "rewards/rejected": -0.20305472612380981, "step": 12810 }, { "epoch": 8.859612724757953, "grad_norm": 3.8851094245910645, "learning_rate": 6.335484862455817e-06, "log_odds_chosen": 12.024246215820312, "log_odds_ratio": -3.257915159338154e-05, "logits/chosen": -0.0674266517162323, "logits/rejected": -0.004043757915496826, "logps/chosen": -0.0001970323792193085, "logps/rejected": -3.0870656967163086, "loss": 0.3569, "nll_loss": 0.08922448754310608, "rewards/accuracies": 1.0, "rewards/chosen": -1.970323864952661e-05, "rewards/margins": 0.30868688225746155, "rewards/rejected": -0.3087065815925598, "step": 12811 }, { "epoch": 8.86030428769018, "grad_norm": 3.7420690059661865, "learning_rate": 6.331642846165668e-06, "log_odds_chosen": 10.842025756835938, "log_odds_ratio": -6.420222052838653e-05, "logits/chosen": -0.5012819766998291, "logits/rejected": -0.5137823820114136, "logps/chosen": -0.00014037819346413016, "logps/rejected": -1.833478569984436, "loss": 0.3821, "nll_loss": 0.09550876915454865, "rewards/accuracies": 1.0, "rewards/chosen": -1.4037818800716195e-05, "rewards/margins": 0.18333381414413452, "rewards/rejected": -0.18334785103797913, "step": 12812 }, { "epoch": 8.860995850622407, "grad_norm": 3.35178279876709, "learning_rate": 6.327800829875519e-06, "log_odds_chosen": 10.516077041625977, "log_odds_ratio": -0.0001255777315236628, "logits/chosen": -0.52434903383255, "logits/rejected": -0.5578416585922241, "logps/chosen": -0.00040695726056583226, "logps/rejected": -2.4701924324035645, "loss": 0.391, "nll_loss": 0.0977269783616066, "rewards/accuracies": 1.0, "rewards/chosen": -4.069573333254084e-05, "rewards/margins": 0.2469785511493683, "rewards/rejected": -0.2470192313194275, "step": 12813 }, { "epoch": 8.861687413554634, "grad_norm": 4.4756669998168945, "learning_rate": 6.32395881358537e-06, "log_odds_chosen": 12.155223846435547, "log_odds_ratio": -6.89135049469769e-06, "logits/chosen": -0.38654935359954834, "logits/rejected": -0.5143874287605286, "logps/chosen": -0.000187495126738213, "logps/rejected": -3.0094921588897705, "loss": 0.5471, "nll_loss": 0.136766716837883, "rewards/accuracies": 1.0, "rewards/chosen": -1.874951340141706e-05, "rewards/margins": 0.3009304702281952, "rewards/rejected": -0.30094921588897705, "step": 12814 }, { "epoch": 8.86237897648686, "grad_norm": 3.7717156410217285, "learning_rate": 6.320116797295221e-06, "log_odds_chosen": 11.97378158569336, "log_odds_ratio": -3.298856609035283e-05, "logits/chosen": -0.763237714767456, "logits/rejected": -0.8728320598602295, "logps/chosen": -9.731641330290586e-05, "logps/rejected": -2.331000804901123, "loss": 0.4268, "nll_loss": 0.10668745636940002, "rewards/accuracies": 1.0, "rewards/chosen": -9.731641512189526e-06, "rewards/margins": 0.2330903708934784, "rewards/rejected": -0.23310008645057678, "step": 12815 }, { "epoch": 8.863070539419088, "grad_norm": 3.1342639923095703, "learning_rate": 6.316274781005072e-06, "log_odds_chosen": 10.921533584594727, "log_odds_ratio": -2.9228267521830276e-05, "logits/chosen": -0.3252316415309906, "logits/rejected": -0.15217556059360504, "logps/chosen": -0.0001484237000113353, "logps/rejected": -1.9739879369735718, "loss": 0.5619, "nll_loss": 0.14046324789524078, "rewards/accuracies": 1.0, "rewards/chosen": -1.4842370546830352e-05, "rewards/margins": 0.19738394021987915, "rewards/rejected": -0.19739878177642822, "step": 12816 }, { "epoch": 8.863762102351314, "grad_norm": 3.5931262969970703, "learning_rate": 6.312432764714922e-06, "log_odds_chosen": 11.546354293823242, "log_odds_ratio": -2.073409268632531e-05, "logits/chosen": 0.14714400470256805, "logits/rejected": 0.061776965856552124, "logps/chosen": -0.0003727427392732352, "logps/rejected": -2.9483630657196045, "loss": 0.4372, "nll_loss": 0.10930097848176956, "rewards/accuracies": 1.0, "rewards/chosen": -3.727427974808961e-05, "rewards/margins": 0.2947990596294403, "rewards/rejected": -0.2948363423347473, "step": 12817 }, { "epoch": 8.864453665283541, "grad_norm": 4.673916339874268, "learning_rate": 6.308590748424774e-06, "log_odds_chosen": 11.306009292602539, "log_odds_ratio": -5.6018761824816465e-05, "logits/chosen": -0.8934646844863892, "logits/rejected": -1.0032732486724854, "logps/chosen": -0.00013073139416519552, "logps/rejected": -2.1071205139160156, "loss": 0.559, "nll_loss": 0.1397353559732437, "rewards/accuracies": 1.0, "rewards/chosen": -1.3073140507913195e-05, "rewards/margins": 0.21069897711277008, "rewards/rejected": -0.21071207523345947, "step": 12818 }, { "epoch": 8.865145228215768, "grad_norm": 3.451753616333008, "learning_rate": 6.304748732134624e-06, "log_odds_chosen": 11.227121353149414, "log_odds_ratio": -0.00012289262667763978, "logits/chosen": 0.05771773308515549, "logits/rejected": -0.11866432428359985, "logps/chosen": -0.00041187513852491975, "logps/rejected": -2.3950722217559814, "loss": 0.2846, "nll_loss": 0.07113710045814514, "rewards/accuracies": 1.0, "rewards/chosen": -4.11875153076835e-05, "rewards/margins": 0.23946604132652283, "rewards/rejected": -0.23950722813606262, "step": 12819 }, { "epoch": 8.865836791147995, "grad_norm": 5.063889980316162, "learning_rate": 6.300906715844475e-06, "log_odds_chosen": 10.330509185791016, "log_odds_ratio": -9.231100557371974e-05, "logits/chosen": 0.4023495018482208, "logits/rejected": 0.2766904830932617, "logps/chosen": -0.00024465369642712176, "logps/rejected": -2.0109057426452637, "loss": 0.4294, "nll_loss": 0.10734772682189941, "rewards/accuracies": 1.0, "rewards/chosen": -2.4465369278914295e-05, "rewards/margins": 0.20106610655784607, "rewards/rejected": -0.20109057426452637, "step": 12820 }, { "epoch": 8.866528354080222, "grad_norm": 5.208076477050781, "learning_rate": 6.2970646995543265e-06, "log_odds_chosen": 10.887624740600586, "log_odds_ratio": -4.503025775193237e-05, "logits/chosen": -0.4728430509567261, "logits/rejected": -0.36085015535354614, "logps/chosen": -0.0001553138135932386, "logps/rejected": -1.9350595474243164, "loss": 0.368, "nll_loss": 0.0919969379901886, "rewards/accuracies": 1.0, "rewards/chosen": -1.5531382814515382e-05, "rewards/margins": 0.1934904307126999, "rewards/rejected": -0.19350595772266388, "step": 12821 }, { "epoch": 8.867219917012449, "grad_norm": 3.113422393798828, "learning_rate": 6.293222683264177e-06, "log_odds_chosen": 9.91431999206543, "log_odds_ratio": -0.0001638552057556808, "logits/chosen": -0.30638688802719116, "logits/rejected": -0.24746140837669373, "logps/chosen": -0.0005692498525604606, "logps/rejected": -1.891648769378662, "loss": 0.3147, "nll_loss": 0.07865750789642334, "rewards/accuracies": 1.0, "rewards/chosen": -5.6924989621620625e-05, "rewards/margins": 0.1891079545021057, "rewards/rejected": -0.1891648769378662, "step": 12822 }, { "epoch": 8.867911479944675, "grad_norm": 7.623961925506592, "learning_rate": 6.289380666974028e-06, "log_odds_chosen": 11.233999252319336, "log_odds_ratio": -1.57922477228567e-05, "logits/chosen": -0.10346847772598267, "logits/rejected": -0.01052796095609665, "logps/chosen": -0.00010200871474808082, "logps/rejected": -2.065570592880249, "loss": 0.3413, "nll_loss": 0.08533357083797455, "rewards/accuracies": 1.0, "rewards/chosen": -1.020087074721232e-05, "rewards/margins": 0.2065468579530716, "rewards/rejected": -0.20655705034732819, "step": 12823 }, { "epoch": 8.868603042876902, "grad_norm": 3.6400489807128906, "learning_rate": 6.28553865068388e-06, "log_odds_chosen": 12.271528244018555, "log_odds_ratio": -3.0035564122954383e-05, "logits/chosen": -0.13494722545146942, "logits/rejected": -0.13900548219680786, "logps/chosen": -0.0001601783442310989, "logps/rejected": -2.7965166568756104, "loss": 0.3943, "nll_loss": 0.09856868535280228, "rewards/accuracies": 1.0, "rewards/chosen": -1.601783515070565e-05, "rewards/margins": 0.2796356678009033, "rewards/rejected": -0.2796516716480255, "step": 12824 }, { "epoch": 8.869294605809129, "grad_norm": 4.36489725112915, "learning_rate": 6.28169663439373e-06, "log_odds_chosen": 10.686090469360352, "log_odds_ratio": -0.00018325365090277046, "logits/chosen": -0.00666133314371109, "logits/rejected": 0.12504035234451294, "logps/chosen": -0.0009015874238684773, "logps/rejected": -2.6624984741210938, "loss": 0.7697, "nll_loss": 0.19239458441734314, "rewards/accuracies": 1.0, "rewards/chosen": -9.015874820761383e-05, "rewards/margins": 0.26615968346595764, "rewards/rejected": -0.2662498354911804, "step": 12825 }, { "epoch": 8.869986168741356, "grad_norm": 4.69269323348999, "learning_rate": 6.27785461810358e-06, "log_odds_chosen": 11.004969596862793, "log_odds_ratio": -0.00010248890612274408, "logits/chosen": -0.6516998410224915, "logits/rejected": -0.6196410059928894, "logps/chosen": -0.00013203633716329932, "logps/rejected": -1.8846098184585571, "loss": 0.322, "nll_loss": 0.08050191402435303, "rewards/accuracies": 1.0, "rewards/chosen": -1.3203634807723574e-05, "rewards/margins": 0.18844778835773468, "rewards/rejected": -0.18846097588539124, "step": 12826 }, { "epoch": 8.870677731673583, "grad_norm": 3.3342418670654297, "learning_rate": 6.274012601813433e-06, "log_odds_chosen": 11.465997695922852, "log_odds_ratio": -1.9525301468092948e-05, "logits/chosen": -0.5683550834655762, "logits/rejected": -0.691111147403717, "logps/chosen": -9.050694643519819e-05, "logps/rejected": -2.0195186138153076, "loss": 0.391, "nll_loss": 0.09774242341518402, "rewards/accuracies": 1.0, "rewards/chosen": -9.050694643519819e-06, "rewards/margins": 0.2019428014755249, "rewards/rejected": -0.20195186138153076, "step": 12827 }, { "epoch": 8.87136929460581, "grad_norm": 4.982881546020508, "learning_rate": 6.270170585523283e-06, "log_odds_chosen": 10.338197708129883, "log_odds_ratio": -0.00015780533431097865, "logits/chosen": -0.06611031293869019, "logits/rejected": -0.04280729219317436, "logps/chosen": -0.0048825982958078384, "logps/rejected": -1.9869780540466309, "loss": 0.5907, "nll_loss": 0.14765596389770508, "rewards/accuracies": 1.0, "rewards/chosen": -0.00048825977137312293, "rewards/margins": 0.19820955395698547, "rewards/rejected": -0.19869780540466309, "step": 12828 }, { "epoch": 8.872060857538036, "grad_norm": 4.2163848876953125, "learning_rate": 6.266328569233133e-06, "log_odds_chosen": 11.460411071777344, "log_odds_ratio": -8.664889173815027e-05, "logits/chosen": -0.27214065194129944, "logits/rejected": -0.33706462383270264, "logps/chosen": -0.00016919002518989146, "logps/rejected": -2.4618775844573975, "loss": 0.4722, "nll_loss": 0.1180378869175911, "rewards/accuracies": 1.0, "rewards/chosen": -1.691900433797855e-05, "rewards/margins": 0.24617084860801697, "rewards/rejected": -0.2461877465248108, "step": 12829 }, { "epoch": 8.872752420470263, "grad_norm": 4.774320602416992, "learning_rate": 6.262486552942985e-06, "log_odds_chosen": 9.06969165802002, "log_odds_ratio": -0.0004509476129896939, "logits/chosen": -0.46754372119903564, "logits/rejected": -0.3664955794811249, "logps/chosen": -0.0012682451633736491, "logps/rejected": -1.5781537294387817, "loss": 0.5363, "nll_loss": 0.13401776552200317, "rewards/accuracies": 1.0, "rewards/chosen": -0.00012682451051659882, "rewards/margins": 0.15768855810165405, "rewards/rejected": -0.1578153669834137, "step": 12830 }, { "epoch": 8.87344398340249, "grad_norm": 4.465846538543701, "learning_rate": 6.258644536652836e-06, "log_odds_chosen": 11.783326148986816, "log_odds_ratio": -4.0657272620592266e-05, "logits/chosen": -0.37636798620224, "logits/rejected": -0.3685925602912903, "logps/chosen": -0.0005404252442531288, "logps/rejected": -3.2265686988830566, "loss": 0.3596, "nll_loss": 0.08990206569433212, "rewards/accuracies": 1.0, "rewards/chosen": -5.404253170127049e-05, "rewards/margins": 0.3226028382778168, "rewards/rejected": -0.32265686988830566, "step": 12831 }, { "epoch": 8.874135546334717, "grad_norm": 5.006585597991943, "learning_rate": 6.2548025203626864e-06, "log_odds_chosen": 11.334182739257812, "log_odds_ratio": -6.983600906096399e-05, "logits/chosen": -0.3358827829360962, "logits/rejected": -0.37939217686653137, "logps/chosen": -0.00030704212258569896, "logps/rejected": -2.3556134700775146, "loss": 0.3131, "nll_loss": 0.07827488332986832, "rewards/accuracies": 1.0, "rewards/chosen": -3.070421371376142e-05, "rewards/margins": 0.23553064465522766, "rewards/rejected": -0.23556135594844818, "step": 12832 }, { "epoch": 8.874827109266944, "grad_norm": 2.919361114501953, "learning_rate": 6.250960504072538e-06, "log_odds_chosen": 10.745523452758789, "log_odds_ratio": -3.151583223370835e-05, "logits/chosen": 0.00840643048286438, "logits/rejected": -0.03765976428985596, "logps/chosen": -0.00019609363516792655, "logps/rejected": -1.8906185626983643, "loss": 0.3046, "nll_loss": 0.07615765929222107, "rewards/accuracies": 1.0, "rewards/chosen": -1.9609364244388416e-05, "rewards/margins": 0.18904224038124084, "rewards/rejected": -0.18906186521053314, "step": 12833 }, { "epoch": 8.87551867219917, "grad_norm": 4.858453273773193, "learning_rate": 6.247118487782389e-06, "log_odds_chosen": 11.026376724243164, "log_odds_ratio": -0.0001252561341971159, "logits/chosen": -0.1558229774236679, "logits/rejected": -0.21528393030166626, "logps/chosen": -0.00019361113663762808, "logps/rejected": -1.9671748876571655, "loss": 0.511, "nll_loss": 0.12774495780467987, "rewards/accuracies": 1.0, "rewards/chosen": -1.9361112208571285e-05, "rewards/margins": 0.1966981291770935, "rewards/rejected": -0.1967175006866455, "step": 12834 }, { "epoch": 8.876210235131397, "grad_norm": 3.224592447280884, "learning_rate": 6.2432764714922395e-06, "log_odds_chosen": 10.939685821533203, "log_odds_ratio": -4.270240242476575e-05, "logits/chosen": -0.08519141376018524, "logits/rejected": -0.14094194769859314, "logps/chosen": -0.00024868431501090527, "logps/rejected": -2.2184906005859375, "loss": 0.4215, "nll_loss": 0.10537970066070557, "rewards/accuracies": 1.0, "rewards/chosen": -2.4868431864888407e-05, "rewards/margins": 0.22182418406009674, "rewards/rejected": -0.22184905409812927, "step": 12835 }, { "epoch": 8.876901798063624, "grad_norm": 9.507604598999023, "learning_rate": 6.23943445520209e-06, "log_odds_chosen": 11.620631217956543, "log_odds_ratio": -1.5148013517318759e-05, "logits/chosen": -0.27090179920196533, "logits/rejected": -0.4988439083099365, "logps/chosen": -0.00016171421157196164, "logps/rejected": -2.8916406631469727, "loss": 1.2096, "nll_loss": 0.30239495635032654, "rewards/accuracies": 1.0, "rewards/chosen": -1.6171421520994045e-05, "rewards/margins": 0.28914791345596313, "rewards/rejected": -0.28916406631469727, "step": 12836 }, { "epoch": 8.877593360995851, "grad_norm": 3.323967695236206, "learning_rate": 6.235592438911941e-06, "log_odds_chosen": 11.173929214477539, "log_odds_ratio": -4.231243292451836e-05, "logits/chosen": 0.00477018766105175, "logits/rejected": 0.00024968013167381287, "logps/chosen": -0.00010535558249102905, "logps/rejected": -2.1376261711120605, "loss": 0.3183, "nll_loss": 0.07958222180604935, "rewards/accuracies": 1.0, "rewards/chosen": -1.0535557521507144e-05, "rewards/margins": 0.21375209093093872, "rewards/rejected": -0.21376262605190277, "step": 12837 }, { "epoch": 8.878284923928078, "grad_norm": 3.3721134662628174, "learning_rate": 6.231750422621793e-06, "log_odds_chosen": 10.522093772888184, "log_odds_ratio": -0.0001202807470690459, "logits/chosen": 0.16521060466766357, "logits/rejected": 0.08164684474468231, "logps/chosen": -0.00020281919569242746, "logps/rejected": -1.9066367149353027, "loss": 0.4108, "nll_loss": 0.10269135236740112, "rewards/accuracies": 1.0, "rewards/chosen": -2.028192102443427e-05, "rewards/margins": 0.19064339995384216, "rewards/rejected": -0.1906636655330658, "step": 12838 }, { "epoch": 8.878976486860305, "grad_norm": 7.772226810455322, "learning_rate": 6.227908406331643e-06, "log_odds_chosen": 11.112709999084473, "log_odds_ratio": -8.9738801761996e-05, "logits/chosen": -0.10383787751197815, "logits/rejected": -0.08016571402549744, "logps/chosen": -0.00013127163401804864, "logps/rejected": -2.2397923469543457, "loss": 0.3076, "nll_loss": 0.07689593732357025, "rewards/accuracies": 1.0, "rewards/chosen": -1.3127162674209103e-05, "rewards/margins": 0.22396612167358398, "rewards/rejected": -0.22397923469543457, "step": 12839 }, { "epoch": 8.879668049792532, "grad_norm": 3.7768611907958984, "learning_rate": 6.224066390041494e-06, "log_odds_chosen": 11.829188346862793, "log_odds_ratio": -2.597944694571197e-05, "logits/chosen": -0.05140957981348038, "logits/rejected": -0.1450553834438324, "logps/chosen": -0.00020165527530480176, "logps/rejected": -2.8034486770629883, "loss": 0.5474, "nll_loss": 0.13685037195682526, "rewards/accuracies": 1.0, "rewards/chosen": -2.0165527530480176e-05, "rewards/margins": 0.28032469749450684, "rewards/rejected": -0.2803449034690857, "step": 12840 }, { "epoch": 8.880359612724758, "grad_norm": 2.841261863708496, "learning_rate": 6.220224373751346e-06, "log_odds_chosen": 10.983772277832031, "log_odds_ratio": -0.00022373626416083425, "logits/chosen": -0.1124921515583992, "logits/rejected": -0.0917963832616806, "logps/chosen": -0.00021983537590131164, "logps/rejected": -2.258674144744873, "loss": 0.2772, "nll_loss": 0.06927579641342163, "rewards/accuracies": 1.0, "rewards/chosen": -2.198353649873752e-05, "rewards/margins": 0.22584544122219086, "rewards/rejected": -0.22586743533611298, "step": 12841 }, { "epoch": 8.881051175656985, "grad_norm": 2.910684823989868, "learning_rate": 6.216382357461196e-06, "log_odds_chosen": 11.847644805908203, "log_odds_ratio": -9.29053385334555e-06, "logits/chosen": -0.6474719643592834, "logits/rejected": -0.6795064210891724, "logps/chosen": -0.00010121862578671426, "logps/rejected": -2.5809245109558105, "loss": 0.4401, "nll_loss": 0.11002403497695923, "rewards/accuracies": 1.0, "rewards/chosen": -1.0121862032974605e-05, "rewards/margins": 0.2580823302268982, "rewards/rejected": -0.2580924332141876, "step": 12842 }, { "epoch": 8.881742738589212, "grad_norm": 3.9916775226593018, "learning_rate": 6.212540341171047e-06, "log_odds_chosen": 10.949371337890625, "log_odds_ratio": -4.622457345249131e-05, "logits/chosen": -0.677534282207489, "logits/rejected": -0.6486817002296448, "logps/chosen": -0.0003831333015114069, "logps/rejected": -2.3778223991394043, "loss": 0.5308, "nll_loss": 0.13269220292568207, "rewards/accuracies": 1.0, "rewards/chosen": -3.831333015114069e-05, "rewards/margins": 0.23774391412734985, "rewards/rejected": -0.23778222501277924, "step": 12843 }, { "epoch": 8.882434301521439, "grad_norm": 2.8240256309509277, "learning_rate": 6.208698324880897e-06, "log_odds_chosen": 11.520731925964355, "log_odds_ratio": -2.8301981728873216e-05, "logits/chosen": -0.2797636389732361, "logits/rejected": -0.33228588104248047, "logps/chosen": -0.0004265240568201989, "logps/rejected": -2.2186403274536133, "loss": 0.333, "nll_loss": 0.08325869590044022, "rewards/accuracies": 1.0, "rewards/chosen": -4.265240568201989e-05, "rewards/margins": 0.2218213826417923, "rewards/rejected": -0.22186404466629028, "step": 12844 }, { "epoch": 8.883125864453666, "grad_norm": 4.2253522872924805, "learning_rate": 6.204856308590749e-06, "log_odds_chosen": 10.74767780303955, "log_odds_ratio": -0.0001074980100383982, "logits/chosen": -0.32336893677711487, "logits/rejected": -0.3765081763267517, "logps/chosen": -0.0002828908618539572, "logps/rejected": -2.316620111465454, "loss": 0.2209, "nll_loss": 0.055208850651979446, "rewards/accuracies": 1.0, "rewards/chosen": -2.828908691299148e-05, "rewards/margins": 0.23163369297981262, "rewards/rejected": -0.23166200518608093, "step": 12845 }, { "epoch": 8.883817427385893, "grad_norm": 3.6948225498199463, "learning_rate": 6.2010142923005995e-06, "log_odds_chosen": 11.301068305969238, "log_odds_ratio": -0.00011821203952422366, "logits/chosen": -0.37012338638305664, "logits/rejected": -0.3886184096336365, "logps/chosen": -0.0001534484763396904, "logps/rejected": -2.148512601852417, "loss": 0.4827, "nll_loss": 0.12065938860177994, "rewards/accuracies": 1.0, "rewards/chosen": -1.5344847270171158e-05, "rewards/margins": 0.21483591198921204, "rewards/rejected": -0.2148512601852417, "step": 12846 }, { "epoch": 8.88450899031812, "grad_norm": 4.01563835144043, "learning_rate": 6.19717227601045e-06, "log_odds_chosen": 10.94062614440918, "log_odds_ratio": -0.00011104773147962987, "logits/chosen": -0.49986332654953003, "logits/rejected": -0.4930412173271179, "logps/chosen": -0.0003276771167293191, "logps/rejected": -2.2818984985351562, "loss": 0.5161, "nll_loss": 0.12901024520397186, "rewards/accuracies": 1.0, "rewards/chosen": -3.276771167293191e-05, "rewards/margins": 0.22815708816051483, "rewards/rejected": -0.2281898558139801, "step": 12847 }, { "epoch": 8.885200553250346, "grad_norm": 7.741686820983887, "learning_rate": 6.193330259720302e-06, "log_odds_chosen": 10.345017433166504, "log_odds_ratio": -0.0004354672273620963, "logits/chosen": -0.23076802492141724, "logits/rejected": -0.22433820366859436, "logps/chosen": -0.001394479419104755, "logps/rejected": -2.4289042949676514, "loss": 0.3773, "nll_loss": 0.0942818820476532, "rewards/accuracies": 1.0, "rewards/chosen": -0.00013944793317932636, "rewards/margins": 0.24275097250938416, "rewards/rejected": -0.24289043247699738, "step": 12848 }, { "epoch": 8.885892116182573, "grad_norm": 4.3841328620910645, "learning_rate": 6.1894882434301526e-06, "log_odds_chosen": 11.631941795349121, "log_odds_ratio": -1.916138717206195e-05, "logits/chosen": 0.07189624011516571, "logits/rejected": 0.013653043657541275, "logps/chosen": -0.00021943646424915642, "logps/rejected": -3.1260085105895996, "loss": 0.4685, "nll_loss": 0.11713487654924393, "rewards/accuracies": 1.0, "rewards/chosen": -2.1943644242128357e-05, "rewards/margins": 0.312578946352005, "rewards/rejected": -0.31260088086128235, "step": 12849 }, { "epoch": 8.8865836791148, "grad_norm": 3.0561251640319824, "learning_rate": 6.185646227140003e-06, "log_odds_chosen": 11.04151725769043, "log_odds_ratio": -0.00019640347454696894, "logits/chosen": -0.4556815028190613, "logits/rejected": -0.4344314634799957, "logps/chosen": -0.0001518328208476305, "logps/rejected": -2.3554883003234863, "loss": 0.384, "nll_loss": 0.09599006175994873, "rewards/accuracies": 1.0, "rewards/chosen": -1.518328190286411e-05, "rewards/margins": 0.23553365468978882, "rewards/rejected": -0.23554883897304535, "step": 12850 }, { "epoch": 8.887275242047027, "grad_norm": 3.5416200160980225, "learning_rate": 6.181804210849854e-06, "log_odds_chosen": 11.330231666564941, "log_odds_ratio": -0.0001224998850375414, "logits/chosen": -0.487703412771225, "logits/rejected": -0.4873879551887512, "logps/chosen": -0.00037789743510074914, "logps/rejected": -2.9198246002197266, "loss": 0.3969, "nll_loss": 0.09920765459537506, "rewards/accuracies": 1.0, "rewards/chosen": -3.778974132728763e-05, "rewards/margins": 0.2919447124004364, "rewards/rejected": -0.2919824719429016, "step": 12851 }, { "epoch": 8.887966804979254, "grad_norm": 3.288649082183838, "learning_rate": 6.177962194559706e-06, "log_odds_chosen": 11.113425254821777, "log_odds_ratio": -0.00010231428314000368, "logits/chosen": -0.18255461752414703, "logits/rejected": -0.15630057454109192, "logps/chosen": -0.0004620938270818442, "logps/rejected": -2.818892240524292, "loss": 0.3652, "nll_loss": 0.09128084778785706, "rewards/accuracies": 1.0, "rewards/chosen": -4.62093812529929e-05, "rewards/margins": 0.28184300661087036, "rewards/rejected": -0.2818892300128937, "step": 12852 }, { "epoch": 8.88865836791148, "grad_norm": 2.824305534362793, "learning_rate": 6.1741201782695556e-06, "log_odds_chosen": 11.38699722290039, "log_odds_ratio": -2.6646030164556578e-05, "logits/chosen": 0.15093719959259033, "logits/rejected": 0.32247185707092285, "logps/chosen": -0.000268961041001603, "logps/rejected": -2.542618751525879, "loss": 0.4033, "nll_loss": 0.10083185136318207, "rewards/accuracies": 1.0, "rewards/chosen": -2.6896104827756062e-05, "rewards/margins": 0.2542349696159363, "rewards/rejected": -0.25426188111305237, "step": 12853 }, { "epoch": 8.889349930843707, "grad_norm": 3.5842673778533936, "learning_rate": 6.170278161979407e-06, "log_odds_chosen": 10.66506290435791, "log_odds_ratio": -5.382444942370057e-05, "logits/chosen": 0.23136094212532043, "logits/rejected": 0.07855528593063354, "logps/chosen": -0.00024186752852983773, "logps/rejected": -2.076673984527588, "loss": 0.4375, "nll_loss": 0.10937762260437012, "rewards/accuracies": 1.0, "rewards/chosen": -2.4186752852983773e-05, "rewards/margins": 0.20764324069023132, "rewards/rejected": -0.20766742527484894, "step": 12854 }, { "epoch": 8.890041493775934, "grad_norm": 4.17619514465332, "learning_rate": 6.166436145689258e-06, "log_odds_chosen": 11.726327896118164, "log_odds_ratio": -1.552937646920327e-05, "logits/chosen": -0.06163576617836952, "logits/rejected": -0.042286425828933716, "logps/chosen": -0.00010107838170370087, "logps/rejected": -2.2582149505615234, "loss": 0.5613, "nll_loss": 0.14033466577529907, "rewards/accuracies": 1.0, "rewards/chosen": -1.0107837624673266e-05, "rewards/margins": 0.22581139206886292, "rewards/rejected": -0.22582149505615234, "step": 12855 }, { "epoch": 8.89073305670816, "grad_norm": 4.036785125732422, "learning_rate": 6.162594129399109e-06, "log_odds_chosen": 10.615226745605469, "log_odds_ratio": -6.379517435561866e-05, "logits/chosen": -0.08870018273591995, "logits/rejected": -0.16170233488082886, "logps/chosen": -0.00035349337849766016, "logps/rejected": -2.0830063819885254, "loss": 0.4075, "nll_loss": 0.10186357796192169, "rewards/accuracies": 1.0, "rewards/chosen": -3.5349337849766016e-05, "rewards/margins": 0.2082652598619461, "rewards/rejected": -0.2083006203174591, "step": 12856 }, { "epoch": 8.891424619640388, "grad_norm": 2.970618486404419, "learning_rate": 6.15875211310896e-06, "log_odds_chosen": 10.837800979614258, "log_odds_ratio": -8.722803613636643e-05, "logits/chosen": -0.0712822899222374, "logits/rejected": -0.06109131500124931, "logps/chosen": -0.0002337824844289571, "logps/rejected": -2.4234533309936523, "loss": 0.3319, "nll_loss": 0.08297805488109589, "rewards/accuracies": 1.0, "rewards/chosen": -2.337824844289571e-05, "rewards/margins": 0.24232198297977448, "rewards/rejected": -0.24234536290168762, "step": 12857 }, { "epoch": 8.892116182572614, "grad_norm": 3.3171956539154053, "learning_rate": 6.154910096818811e-06, "log_odds_chosen": 12.331867218017578, "log_odds_ratio": -8.634147889097221e-06, "logits/chosen": 0.17678335309028625, "logits/rejected": 0.09244874119758606, "logps/chosen": -0.00012825954763684422, "logps/rejected": -3.29720401763916, "loss": 0.3602, "nll_loss": 0.09005635976791382, "rewards/accuracies": 1.0, "rewards/chosen": -1.2825955309381243e-05, "rewards/margins": 0.3297075927257538, "rewards/rejected": -0.3297204077243805, "step": 12858 }, { "epoch": 8.892807745504841, "grad_norm": 3.4692952632904053, "learning_rate": 6.151068080528662e-06, "log_odds_chosen": 10.557546615600586, "log_odds_ratio": -3.8898957427591085e-05, "logits/chosen": -0.34669697284698486, "logits/rejected": -0.29970356822013855, "logps/chosen": -0.000205686577828601, "logps/rejected": -1.8281556367874146, "loss": 0.3759, "nll_loss": 0.09398093819618225, "rewards/accuracies": 1.0, "rewards/chosen": -2.0568655600072816e-05, "rewards/margins": 0.18279501795768738, "rewards/rejected": -0.1828155666589737, "step": 12859 }, { "epoch": 8.893499308437068, "grad_norm": 3.5237081050872803, "learning_rate": 6.1472260642385125e-06, "log_odds_chosen": 11.03484058380127, "log_odds_ratio": -8.832294406602159e-05, "logits/chosen": 0.07353997230529785, "logits/rejected": 0.06155790761113167, "logps/chosen": -0.0001543796097394079, "logps/rejected": -1.8885478973388672, "loss": 0.3609, "nll_loss": 0.09021463990211487, "rewards/accuracies": 1.0, "rewards/chosen": -1.543796133773867e-05, "rewards/margins": 0.18883934617042542, "rewards/rejected": -0.18885478377342224, "step": 12860 }, { "epoch": 8.894190871369295, "grad_norm": 4.468094348907471, "learning_rate": 6.143384047948363e-06, "log_odds_chosen": 11.360898971557617, "log_odds_ratio": -2.829604272847064e-05, "logits/chosen": -0.1274232566356659, "logits/rejected": -0.1843823790550232, "logps/chosen": -5.869750748388469e-05, "logps/rejected": -1.824514627456665, "loss": 0.39, "nll_loss": 0.0974905714392662, "rewards/accuracies": 1.0, "rewards/chosen": -5.869750566489529e-06, "rewards/margins": 0.18244561553001404, "rewards/rejected": -0.18245148658752441, "step": 12861 }, { "epoch": 8.894882434301522, "grad_norm": 4.219329357147217, "learning_rate": 6.139542031658215e-06, "log_odds_chosen": 12.523521423339844, "log_odds_ratio": -6.953141564736143e-06, "logits/chosen": -0.1573798656463623, "logits/rejected": -0.21202057600021362, "logps/chosen": -0.0001332549873040989, "logps/rejected": -3.31657338142395, "loss": 0.3527, "nll_loss": 0.08818377554416656, "rewards/accuracies": 1.0, "rewards/chosen": -1.3325498002814129e-05, "rewards/margins": 0.3316440284252167, "rewards/rejected": -0.331657350063324, "step": 12862 }, { "epoch": 8.895573997233749, "grad_norm": 2.5606212615966797, "learning_rate": 6.135700015368066e-06, "log_odds_chosen": 11.821147918701172, "log_odds_ratio": -2.5821833332884125e-05, "logits/chosen": -0.3105715215206146, "logits/rejected": -0.3597787022590637, "logps/chosen": -2.3031490854918957e-05, "logps/rejected": -1.2378180027008057, "loss": 0.2508, "nll_loss": 0.06269891560077667, "rewards/accuracies": 1.0, "rewards/chosen": -2.3031489035929553e-06, "rewards/margins": 0.12377950549125671, "rewards/rejected": -0.12378180772066116, "step": 12863 }, { "epoch": 8.896265560165975, "grad_norm": 3.686124324798584, "learning_rate": 6.131857999077916e-06, "log_odds_chosen": 12.19543743133545, "log_odds_ratio": -4.4658067054115236e-05, "logits/chosen": -0.023852862417697906, "logits/rejected": -0.09455503523349762, "logps/chosen": -0.0002681456971913576, "logps/rejected": -3.197145700454712, "loss": 0.4735, "nll_loss": 0.1183595210313797, "rewards/accuracies": 1.0, "rewards/chosen": -2.681456862774212e-05, "rewards/margins": 0.31968778371810913, "rewards/rejected": -0.31971457600593567, "step": 12864 }, { "epoch": 8.896957123098202, "grad_norm": 3.9430251121520996, "learning_rate": 6.128015982787767e-06, "log_odds_chosen": 12.069925308227539, "log_odds_ratio": -1.630558108445257e-05, "logits/chosen": -0.44029539823532104, "logits/rejected": -0.5152474641799927, "logps/chosen": -0.00014673490659333766, "logps/rejected": -3.1295437812805176, "loss": 0.2993, "nll_loss": 0.07482878863811493, "rewards/accuracies": 1.0, "rewards/chosen": -1.4673489204142243e-05, "rewards/margins": 0.31293970346450806, "rewards/rejected": -0.3129543364048004, "step": 12865 }, { "epoch": 8.89764868603043, "grad_norm": 4.9491353034973145, "learning_rate": 6.124173966497619e-06, "log_odds_chosen": 11.404975891113281, "log_odds_ratio": -7.579707744298503e-05, "logits/chosen": -0.14094297587871552, "logits/rejected": -0.07794803380966187, "logps/chosen": -0.0006937507423572242, "logps/rejected": -3.015080690383911, "loss": 0.3813, "nll_loss": 0.09531792253255844, "rewards/accuracies": 1.0, "rewards/chosen": -6.937507714610547e-05, "rewards/margins": 0.30143871903419495, "rewards/rejected": -0.3015080690383911, "step": 12866 }, { "epoch": 8.898340248962656, "grad_norm": 4.752613067626953, "learning_rate": 6.120331950207469e-06, "log_odds_chosen": 10.431466102600098, "log_odds_ratio": -0.0002222591283498332, "logits/chosen": -0.10291004180908203, "logits/rejected": -0.18914756178855896, "logps/chosen": -0.0002860078529920429, "logps/rejected": -2.165668249130249, "loss": 0.5033, "nll_loss": 0.12581023573875427, "rewards/accuracies": 1.0, "rewards/chosen": -2.8600787118193693e-05, "rewards/margins": 0.2165382206439972, "rewards/rejected": -0.216566801071167, "step": 12867 }, { "epoch": 8.899031811894883, "grad_norm": 3.4255270957946777, "learning_rate": 6.11648993391732e-06, "log_odds_chosen": 11.101449966430664, "log_odds_ratio": -0.00017685459170024842, "logits/chosen": -0.3344976305961609, "logits/rejected": -0.224727064371109, "logps/chosen": -0.00043847967754118145, "logps/rejected": -2.806305408477783, "loss": 0.3789, "nll_loss": 0.09469691663980484, "rewards/accuracies": 1.0, "rewards/chosen": -4.384796193335205e-05, "rewards/margins": 0.28058671951293945, "rewards/rejected": -0.28063055872917175, "step": 12868 }, { "epoch": 8.89972337482711, "grad_norm": 3.2223756313323975, "learning_rate": 6.112647917627171e-06, "log_odds_chosen": 10.269891738891602, "log_odds_ratio": -0.0005653017433360219, "logits/chosen": 0.10738126188516617, "logits/rejected": 0.05207332223653793, "logps/chosen": -0.0010791353415697813, "logps/rejected": -2.058807373046875, "loss": 0.3766, "nll_loss": 0.094081811606884, "rewards/accuracies": 1.0, "rewards/chosen": -0.00010791353270178661, "rewards/margins": 0.20577283203601837, "rewards/rejected": -0.20588073134422302, "step": 12869 }, { "epoch": 8.900414937759336, "grad_norm": 4.243490219116211, "learning_rate": 6.108805901337022e-06, "log_odds_chosen": 10.89694595336914, "log_odds_ratio": -5.5372696806443855e-05, "logits/chosen": 0.15985409915447235, "logits/rejected": 0.021556168794631958, "logps/chosen": -0.00019277591491118073, "logps/rejected": -1.7762912511825562, "loss": 0.4085, "nll_loss": 0.10212212800979614, "rewards/accuracies": 1.0, "rewards/chosen": -1.9277591491118073e-05, "rewards/margins": 0.1776098608970642, "rewards/rejected": -0.17762914299964905, "step": 12870 }, { "epoch": 8.901106500691563, "grad_norm": 4.758657932281494, "learning_rate": 6.104963885046873e-06, "log_odds_chosen": 10.790678977966309, "log_odds_ratio": -0.0005490690236911178, "logits/chosen": 0.3262699544429779, "logits/rejected": 0.3296666145324707, "logps/chosen": -0.0004946886328980327, "logps/rejected": -1.9456416368484497, "loss": 0.4463, "nll_loss": 0.11152751743793488, "rewards/accuracies": 1.0, "rewards/chosen": -4.9468868382973596e-05, "rewards/margins": 0.1945146918296814, "rewards/rejected": -0.19456416368484497, "step": 12871 }, { "epoch": 8.90179806362379, "grad_norm": 8.041678428649902, "learning_rate": 6.101121868756724e-06, "log_odds_chosen": 9.12094783782959, "log_odds_ratio": -0.3240659534931183, "logits/chosen": -0.3996647596359253, "logits/rejected": -0.38311266899108887, "logps/chosen": -0.05655330419540405, "logps/rejected": -1.757213830947876, "loss": 0.4893, "nll_loss": 0.08991822600364685, "rewards/accuracies": 0.875, "rewards/chosen": -0.00565533060580492, "rewards/margins": 0.17006604373455048, "rewards/rejected": -0.17572136223316193, "step": 12872 }, { "epoch": 8.902489626556017, "grad_norm": 3.2363626956939697, "learning_rate": 6.097279852466575e-06, "log_odds_chosen": 10.426342010498047, "log_odds_ratio": -0.00019338764832355082, "logits/chosen": -0.3495548963546753, "logits/rejected": -0.34510067105293274, "logps/chosen": -0.00038078470970503986, "logps/rejected": -1.8046441078186035, "loss": 0.3205, "nll_loss": 0.08011390268802643, "rewards/accuracies": 1.0, "rewards/chosen": -3.807847315329127e-05, "rewards/margins": 0.18042632937431335, "rewards/rejected": -0.18046441674232483, "step": 12873 }, { "epoch": 8.903181189488244, "grad_norm": 3.247476577758789, "learning_rate": 6.0934378361764255e-06, "log_odds_chosen": 12.517592430114746, "log_odds_ratio": -2.2141441149869934e-05, "logits/chosen": -0.9256491661071777, "logits/rejected": -1.013659119606018, "logps/chosen": -9.961918112821877e-05, "logps/rejected": -3.2004740238189697, "loss": 0.4517, "nll_loss": 0.11293157190084457, "rewards/accuracies": 1.0, "rewards/chosen": -9.961918294720817e-06, "rewards/margins": 0.3200374245643616, "rewards/rejected": -0.32004737854003906, "step": 12874 }, { "epoch": 8.90387275242047, "grad_norm": 3.782608985900879, "learning_rate": 6.089595819886277e-06, "log_odds_chosen": 9.190512657165527, "log_odds_ratio": -0.0004990790039300919, "logits/chosen": -0.016513854265213013, "logits/rejected": -0.03796660900115967, "logps/chosen": -0.005915326066315174, "logps/rejected": -2.5319509506225586, "loss": 0.3965, "nll_loss": 0.09908261150121689, "rewards/accuracies": 1.0, "rewards/chosen": -0.0005915326182730496, "rewards/margins": 0.25260356068611145, "rewards/rejected": -0.2531951069831848, "step": 12875 }, { "epoch": 8.904564315352697, "grad_norm": 3.9237937927246094, "learning_rate": 6.085753803596127e-06, "log_odds_chosen": 11.165157318115234, "log_odds_ratio": -0.00014351973368320614, "logits/chosen": -0.1661158800125122, "logits/rejected": -0.18346044421195984, "logps/chosen": -0.00042894965736195445, "logps/rejected": -2.540163278579712, "loss": 0.4959, "nll_loss": 0.12395624816417694, "rewards/accuracies": 1.0, "rewards/chosen": -4.289496428100392e-05, "rewards/margins": 0.25397342443466187, "rewards/rejected": -0.25401633977890015, "step": 12876 }, { "epoch": 8.905255878284924, "grad_norm": 5.909702777862549, "learning_rate": 6.081911787305979e-06, "log_odds_chosen": 9.985353469848633, "log_odds_ratio": -0.00014786752581130713, "logits/chosen": -0.551906168460846, "logits/rejected": -0.3992077708244324, "logps/chosen": -0.00029689978691749275, "logps/rejected": -1.4320263862609863, "loss": 0.5359, "nll_loss": 0.1339660882949829, "rewards/accuracies": 1.0, "rewards/chosen": -2.9689979783142917e-05, "rewards/margins": 0.143172949552536, "rewards/rejected": -0.14320263266563416, "step": 12877 }, { "epoch": 8.905947441217151, "grad_norm": 3.7521986961364746, "learning_rate": 6.078069771015829e-06, "log_odds_chosen": 11.077217102050781, "log_odds_ratio": -6.529257370857522e-05, "logits/chosen": -0.3501136004924774, "logits/rejected": -0.46925801038742065, "logps/chosen": -0.00012800595141015947, "logps/rejected": -2.1673057079315186, "loss": 0.361, "nll_loss": 0.09023625403642654, "rewards/accuracies": 1.0, "rewards/chosen": -1.2800595868611708e-05, "rewards/margins": 0.21671777963638306, "rewards/rejected": -0.21673056483268738, "step": 12878 }, { "epoch": 8.906639004149378, "grad_norm": 4.921710968017578, "learning_rate": 6.07422775472568e-06, "log_odds_chosen": 10.498296737670898, "log_odds_ratio": -0.00024876854149624705, "logits/chosen": -0.10423548519611359, "logits/rejected": -0.07139390707015991, "logps/chosen": -0.0006196919712238014, "logps/rejected": -1.8094336986541748, "loss": 0.6675, "nll_loss": 0.16684389114379883, "rewards/accuracies": 1.0, "rewards/chosen": -6.19692073087208e-05, "rewards/margins": 0.18088141083717346, "rewards/rejected": -0.18094336986541748, "step": 12879 }, { "epoch": 8.907330567081605, "grad_norm": 3.8833465576171875, "learning_rate": 6.070385738435532e-06, "log_odds_chosen": 10.646595001220703, "log_odds_ratio": -4.954760515829548e-05, "logits/chosen": -0.4792764484882355, "logits/rejected": -0.5197547674179077, "logps/chosen": -0.00021351524628698826, "logps/rejected": -2.2753090858459473, "loss": 0.3743, "nll_loss": 0.09356006234884262, "rewards/accuracies": 1.0, "rewards/chosen": -2.1351523173507303e-05, "rewards/margins": 0.22750955820083618, "rewards/rejected": -0.22753089666366577, "step": 12880 }, { "epoch": 8.908022130013832, "grad_norm": 4.093209743499756, "learning_rate": 6.066543722145382e-06, "log_odds_chosen": 10.409040451049805, "log_odds_ratio": -0.00010184202983509749, "logits/chosen": -0.47333845496177673, "logits/rejected": -0.42190152406692505, "logps/chosen": -0.00014664071204606444, "logps/rejected": -1.6970136165618896, "loss": 0.3442, "nll_loss": 0.086033895611763, "rewards/accuracies": 1.0, "rewards/chosen": -1.4664070477010682e-05, "rewards/margins": 0.1696867197751999, "rewards/rejected": -0.16970139741897583, "step": 12881 }, { "epoch": 8.908713692946058, "grad_norm": 4.007850646972656, "learning_rate": 6.062701705855233e-06, "log_odds_chosen": 10.015172958374023, "log_odds_ratio": -0.00034102267818525434, "logits/chosen": -0.16968387365341187, "logits/rejected": -0.1261231005191803, "logps/chosen": -0.0008383641252294183, "logps/rejected": -2.1441397666931152, "loss": 0.471, "nll_loss": 0.11772003024816513, "rewards/accuracies": 1.0, "rewards/chosen": -8.383641397813335e-05, "rewards/margins": 0.21433015167713165, "rewards/rejected": -0.21441400051116943, "step": 12882 }, { "epoch": 8.909405255878285, "grad_norm": 3.5645110607147217, "learning_rate": 6.058859689565084e-06, "log_odds_chosen": 10.257688522338867, "log_odds_ratio": -9.336881339550018e-05, "logits/chosen": -0.2140951156616211, "logits/rejected": -0.1839694380760193, "logps/chosen": -0.00048242363845929503, "logps/rejected": -1.7746849060058594, "loss": 0.421, "nll_loss": 0.10525010526180267, "rewards/accuracies": 1.0, "rewards/chosen": -4.8242360207950696e-05, "rewards/margins": 0.17742027342319489, "rewards/rejected": -0.17746850848197937, "step": 12883 }, { "epoch": 8.910096818810512, "grad_norm": 4.715599060058594, "learning_rate": 6.055017673274935e-06, "log_odds_chosen": 11.176240921020508, "log_odds_ratio": -0.00010343264148104936, "logits/chosen": 0.056480832397937775, "logits/rejected": 0.10483216494321823, "logps/chosen": -0.0003942837647628039, "logps/rejected": -2.299161195755005, "loss": 0.39, "nll_loss": 0.09748789668083191, "rewards/accuracies": 1.0, "rewards/chosen": -3.942838156945072e-05, "rewards/margins": 0.22987669706344604, "rewards/rejected": -0.22991611063480377, "step": 12884 }, { "epoch": 8.910788381742739, "grad_norm": 3.4167327880859375, "learning_rate": 6.051175656984786e-06, "log_odds_chosen": 11.99148941040039, "log_odds_ratio": -0.00018259882926940918, "logits/chosen": -0.8361790776252747, "logits/rejected": -0.8355551958084106, "logps/chosen": -0.0002680736070033163, "logps/rejected": -2.5528435707092285, "loss": 0.2954, "nll_loss": 0.07383642345666885, "rewards/accuracies": 1.0, "rewards/chosen": -2.6807359972735867e-05, "rewards/margins": 0.2552575469017029, "rewards/rejected": -0.2552843689918518, "step": 12885 }, { "epoch": 8.911479944674966, "grad_norm": 3.4526193141937256, "learning_rate": 6.047333640694637e-06, "log_odds_chosen": 11.774279594421387, "log_odds_ratio": -1.844509824877605e-05, "logits/chosen": -0.26306766271591187, "logits/rejected": -0.4063612222671509, "logps/chosen": -0.000151776141137816, "logps/rejected": -2.588733673095703, "loss": 0.3469, "nll_loss": 0.08673368394374847, "rewards/accuracies": 1.0, "rewards/chosen": -1.517761393188266e-05, "rewards/margins": 0.25885817408561707, "rewards/rejected": -0.2588733434677124, "step": 12886 }, { "epoch": 8.912171507607193, "grad_norm": 3.6353368759155273, "learning_rate": 6.043491624404488e-06, "log_odds_chosen": 11.046255111694336, "log_odds_ratio": -2.924896944023203e-05, "logits/chosen": -0.6610513925552368, "logits/rejected": -0.7078690528869629, "logps/chosen": -0.000441804644651711, "logps/rejected": -2.542691230773926, "loss": 0.3444, "nll_loss": 0.08609253168106079, "rewards/accuracies": 1.0, "rewards/chosen": -4.418046592036262e-05, "rewards/margins": 0.254224956035614, "rewards/rejected": -0.2542691230773926, "step": 12887 }, { "epoch": 8.91286307053942, "grad_norm": 5.413662910461426, "learning_rate": 6.0396496081143386e-06, "log_odds_chosen": 11.483522415161133, "log_odds_ratio": -1.6104526366689242e-05, "logits/chosen": -0.2953591048717499, "logits/rejected": -0.28520074486732483, "logps/chosen": -0.00025644036941230297, "logps/rejected": -2.4214723110198975, "loss": 0.554, "nll_loss": 0.13850894570350647, "rewards/accuracies": 1.0, "rewards/chosen": -2.564403803262394e-05, "rewards/margins": 0.2421215921640396, "rewards/rejected": -0.24214723706245422, "step": 12888 }, { "epoch": 8.913554633471646, "grad_norm": 4.65927791595459, "learning_rate": 6.03580759182419e-06, "log_odds_chosen": 11.934712409973145, "log_odds_ratio": -1.6666734154568985e-05, "logits/chosen": -0.4538784921169281, "logits/rejected": -0.5382453799247742, "logps/chosen": -0.00015001199790276587, "logps/rejected": -2.846168279647827, "loss": 0.3555, "nll_loss": 0.0888776034116745, "rewards/accuracies": 1.0, "rewards/chosen": -1.500120106356917e-05, "rewards/margins": 0.2846018373966217, "rewards/rejected": -0.2846168279647827, "step": 12889 }, { "epoch": 8.914246196403873, "grad_norm": 3.734205961227417, "learning_rate": 6.03196557553404e-06, "log_odds_chosen": 9.649690628051758, "log_odds_ratio": -0.0001534527982585132, "logits/chosen": -0.6405388116836548, "logits/rejected": -0.6261313557624817, "logps/chosen": -0.0006668938440270722, "logps/rejected": -1.7804850339889526, "loss": 0.3567, "nll_loss": 0.08914894610643387, "rewards/accuracies": 1.0, "rewards/chosen": -6.668939022347331e-05, "rewards/margins": 0.17798182368278503, "rewards/rejected": -0.1780485212802887, "step": 12890 }, { "epoch": 8.9149377593361, "grad_norm": 4.171419143676758, "learning_rate": 6.028123559243892e-06, "log_odds_chosen": 10.836984634399414, "log_odds_ratio": -8.64302710397169e-05, "logits/chosen": -0.40992042422294617, "logits/rejected": -0.5382879972457886, "logps/chosen": -0.000171576117281802, "logps/rejected": -1.7506659030914307, "loss": 0.5129, "nll_loss": 0.12822216749191284, "rewards/accuracies": 1.0, "rewards/chosen": -1.715761209197808e-05, "rewards/margins": 0.17504942417144775, "rewards/rejected": -0.17506657540798187, "step": 12891 }, { "epoch": 8.915629322268327, "grad_norm": 3.3719029426574707, "learning_rate": 6.024281542953742e-06, "log_odds_chosen": 11.46414852142334, "log_odds_ratio": -6.169763946672902e-05, "logits/chosen": -0.3673262894153595, "logits/rejected": -0.3972077965736389, "logps/chosen": -0.0002595953119453043, "logps/rejected": -2.636369228363037, "loss": 0.3731, "nll_loss": 0.09327857196331024, "rewards/accuracies": 1.0, "rewards/chosen": -2.5959534468711354e-05, "rewards/margins": 0.26361095905303955, "rewards/rejected": -0.26363691687583923, "step": 12892 }, { "epoch": 8.916320885200554, "grad_norm": 5.283071041107178, "learning_rate": 6.020439526663593e-06, "log_odds_chosen": 11.639951705932617, "log_odds_ratio": -2.1493771782843396e-05, "logits/chosen": -0.12732785940170288, "logits/rejected": -0.19323810935020447, "logps/chosen": -0.0001850973058026284, "logps/rejected": -2.5360217094421387, "loss": 0.4399, "nll_loss": 0.10997996479272842, "rewards/accuracies": 1.0, "rewards/chosen": -1.850973058026284e-05, "rewards/margins": 0.2535836696624756, "rewards/rejected": -0.25360217690467834, "step": 12893 }, { "epoch": 8.91701244813278, "grad_norm": 2.961632013320923, "learning_rate": 6.016597510373445e-06, "log_odds_chosen": 10.04461669921875, "log_odds_ratio": -0.0001737783313728869, "logits/chosen": -0.37068772315979004, "logits/rejected": -0.334428995847702, "logps/chosen": -0.0004964639665558934, "logps/rejected": -1.6151020526885986, "loss": 0.2749, "nll_loss": 0.0687069222331047, "rewards/accuracies": 1.0, "rewards/chosen": -4.9646398110780865e-05, "rewards/margins": 0.16146056354045868, "rewards/rejected": -0.16151021420955658, "step": 12894 }, { "epoch": 8.917704011065007, "grad_norm": 3.3943188190460205, "learning_rate": 6.012755494083295e-06, "log_odds_chosen": 11.50641918182373, "log_odds_ratio": -5.9767960919998586e-05, "logits/chosen": -0.20371156930923462, "logits/rejected": -0.21763645112514496, "logps/chosen": -0.00039010701584629714, "logps/rejected": -2.814880132675171, "loss": 0.4541, "nll_loss": 0.11352216452360153, "rewards/accuracies": 1.0, "rewards/chosen": -3.9010701584629714e-05, "rewards/margins": 0.28144901990890503, "rewards/rejected": -0.28148800134658813, "step": 12895 }, { "epoch": 8.918395573997234, "grad_norm": 4.094331741333008, "learning_rate": 6.008913477793146e-06, "log_odds_chosen": 11.03071403503418, "log_odds_ratio": -6.582721835002303e-05, "logits/chosen": -0.2888537049293518, "logits/rejected": -0.36299294233322144, "logps/chosen": -0.00029856865876354277, "logps/rejected": -2.29645037651062, "loss": 0.4599, "nll_loss": 0.11497573554515839, "rewards/accuracies": 1.0, "rewards/chosen": -2.985686660395004e-05, "rewards/margins": 0.2296151965856552, "rewards/rejected": -0.2296450436115265, "step": 12896 }, { "epoch": 8.91908713692946, "grad_norm": 3.5496981143951416, "learning_rate": 6.005071461502997e-06, "log_odds_chosen": 11.905342102050781, "log_odds_ratio": -1.7123305951827206e-05, "logits/chosen": -0.5121790170669556, "logits/rejected": -0.6361386179924011, "logps/chosen": -0.0001013822911772877, "logps/rejected": -2.4408626556396484, "loss": 0.3271, "nll_loss": 0.08177856355905533, "rewards/accuracies": 1.0, "rewards/chosen": -1.013822929962771e-05, "rewards/margins": 0.24407611787319183, "rewards/rejected": -0.24408625066280365, "step": 12897 }, { "epoch": 8.919778699861688, "grad_norm": 4.878851890563965, "learning_rate": 6.001229445212848e-06, "log_odds_chosen": 11.991606712341309, "log_odds_ratio": -1.579675154061988e-05, "logits/chosen": -0.5167713165283203, "logits/rejected": -0.4996938705444336, "logps/chosen": -9.84660437097773e-05, "logps/rejected": -2.709446430206299, "loss": 0.4521, "nll_loss": 0.11301855742931366, "rewards/accuracies": 1.0, "rewards/chosen": -9.84660437097773e-06, "rewards/margins": 0.2709348201751709, "rewards/rejected": -0.27094465494155884, "step": 12898 }, { "epoch": 8.920470262793915, "grad_norm": 2.981738805770874, "learning_rate": 5.9973874289226985e-06, "log_odds_chosen": 12.06153678894043, "log_odds_ratio": -1.4296781955636106e-05, "logits/chosen": -0.5645366907119751, "logits/rejected": -0.4843648672103882, "logps/chosen": -7.617931987624615e-05, "logps/rejected": -2.453253746032715, "loss": 0.2887, "nll_loss": 0.07217483222484589, "rewards/accuracies": 1.0, "rewards/chosen": -7.617932169523556e-06, "rewards/margins": 0.2453177571296692, "rewards/rejected": -0.24532538652420044, "step": 12899 }, { "epoch": 8.921161825726141, "grad_norm": 4.18194580078125, "learning_rate": 5.99354541263255e-06, "log_odds_chosen": 10.102523803710938, "log_odds_ratio": -0.00013515673344954848, "logits/chosen": -0.22662414610385895, "logits/rejected": -0.37085866928100586, "logps/chosen": -0.00022467051167041063, "logps/rejected": -1.9578008651733398, "loss": 0.6993, "nll_loss": 0.17481519281864166, "rewards/accuracies": 1.0, "rewards/chosen": -2.2467052986030467e-05, "rewards/margins": 0.19575762748718262, "rewards/rejected": -0.19578008353710175, "step": 12900 }, { "epoch": 8.921853388658368, "grad_norm": 3.408759832382202, "learning_rate": 5.989703396342401e-06, "log_odds_chosen": 11.4803466796875, "log_odds_ratio": -2.0814597519347444e-05, "logits/chosen": -0.6959565877914429, "logits/rejected": -0.7700467705726624, "logps/chosen": -0.00016696771490387619, "logps/rejected": -2.2839138507843018, "loss": 0.4165, "nll_loss": 0.10413195192813873, "rewards/accuracies": 1.0, "rewards/chosen": -1.669677294557914e-05, "rewards/margins": 0.2283746898174286, "rewards/rejected": -0.2283913791179657, "step": 12901 }, { "epoch": 8.922544951590595, "grad_norm": 3.7199792861938477, "learning_rate": 5.985861380052252e-06, "log_odds_chosen": 12.063653945922852, "log_odds_ratio": -1.393444836139679e-05, "logits/chosen": -0.14315587282180786, "logits/rejected": -0.15993037819862366, "logps/chosen": -0.00010108323476742953, "logps/rejected": -2.712588310241699, "loss": 0.4464, "nll_loss": 0.11160380393266678, "rewards/accuracies": 1.0, "rewards/chosen": -1.0108323294844013e-05, "rewards/margins": 0.2712487280368805, "rewards/rejected": -0.2712588310241699, "step": 12902 }, { "epoch": 8.923236514522822, "grad_norm": 2.6532492637634277, "learning_rate": 5.982019363762103e-06, "log_odds_chosen": 11.91860580444336, "log_odds_ratio": -0.0002539358683861792, "logits/chosen": -0.8610085844993591, "logits/rejected": -0.6951379776000977, "logps/chosen": -0.0005537466495297849, "logps/rejected": -2.798727512359619, "loss": 0.2503, "nll_loss": 0.06255386024713516, "rewards/accuracies": 1.0, "rewards/chosen": -5.537466495297849e-05, "rewards/margins": 0.2798174023628235, "rewards/rejected": -0.2798727750778198, "step": 12903 }, { "epoch": 8.923928077455049, "grad_norm": 3.5159571170806885, "learning_rate": 5.978177347471953e-06, "log_odds_chosen": 10.942304611206055, "log_odds_ratio": -2.9865241231163964e-05, "logits/chosen": -0.19039902091026306, "logits/rejected": -0.3441365659236908, "logps/chosen": -0.00013759138528257608, "logps/rejected": -2.0489914417266846, "loss": 0.3204, "nll_loss": 0.08008623123168945, "rewards/accuracies": 1.0, "rewards/chosen": -1.375913961965125e-05, "rewards/margins": 0.20488540828227997, "rewards/rejected": -0.2048991620540619, "step": 12904 }, { "epoch": 8.924619640387276, "grad_norm": 4.046666622161865, "learning_rate": 5.974335331181805e-06, "log_odds_chosen": 10.682632446289062, "log_odds_ratio": -0.00018207883113063872, "logits/chosen": -0.4414520859718323, "logits/rejected": -0.4927637279033661, "logps/chosen": -0.00041065309778787196, "logps/rejected": -2.2062034606933594, "loss": 0.4563, "nll_loss": 0.1140667051076889, "rewards/accuracies": 1.0, "rewards/chosen": -4.106530832359567e-05, "rewards/margins": 0.22057931125164032, "rewards/rejected": -0.22062036395072937, "step": 12905 }, { "epoch": 8.925311203319502, "grad_norm": 3.6008899211883545, "learning_rate": 5.9704933148916554e-06, "log_odds_chosen": 10.777645111083984, "log_odds_ratio": -3.5626049793791026e-05, "logits/chosen": -0.2412605583667755, "logits/rejected": -0.3672165274620056, "logps/chosen": -0.00024223854416050017, "logps/rejected": -1.8778043985366821, "loss": 0.3319, "nll_loss": 0.08297750353813171, "rewards/accuracies": 1.0, "rewards/chosen": -2.4223854779847898e-05, "rewards/margins": 0.1877562254667282, "rewards/rejected": -0.1877804547548294, "step": 12906 }, { "epoch": 8.92600276625173, "grad_norm": 4.40451717376709, "learning_rate": 5.966651298601506e-06, "log_odds_chosen": 10.959539413452148, "log_odds_ratio": -6.162847421364859e-05, "logits/chosen": -0.4150117337703705, "logits/rejected": -0.5284802317619324, "logps/chosen": -0.00032370290136896074, "logps/rejected": -2.604623556137085, "loss": 0.3389, "nll_loss": 0.08471447229385376, "rewards/accuracies": 1.0, "rewards/chosen": -3.23702915920876e-05, "rewards/margins": 0.2604300081729889, "rewards/rejected": -0.26046237349510193, "step": 12907 }, { "epoch": 8.926694329183956, "grad_norm": 4.132070064544678, "learning_rate": 5.962809282311358e-06, "log_odds_chosen": 10.724711418151855, "log_odds_ratio": -0.00018126626673620194, "logits/chosen": -0.2756832242012024, "logits/rejected": -0.46245506405830383, "logps/chosen": -0.00045413419138640165, "logps/rejected": -2.5995662212371826, "loss": 0.2985, "nll_loss": 0.07460995763540268, "rewards/accuracies": 1.0, "rewards/chosen": -4.541342059383169e-05, "rewards/margins": 0.2599112391471863, "rewards/rejected": -0.25995662808418274, "step": 12908 }, { "epoch": 8.927385892116183, "grad_norm": 4.18914270401001, "learning_rate": 5.9589672660212085e-06, "log_odds_chosen": 10.33116340637207, "log_odds_ratio": -0.00024307092826347798, "logits/chosen": -0.40187928080558777, "logits/rejected": -0.4286579489707947, "logps/chosen": -0.0008695382857695222, "logps/rejected": -1.8647468090057373, "loss": 0.7077, "nll_loss": 0.1768883615732193, "rewards/accuracies": 1.0, "rewards/chosen": -8.695383439771831e-05, "rewards/margins": 0.18638773262500763, "rewards/rejected": -0.18647469580173492, "step": 12909 }, { "epoch": 8.92807745504841, "grad_norm": 3.1195945739746094, "learning_rate": 5.955125249731059e-06, "log_odds_chosen": 9.846996307373047, "log_odds_ratio": -0.00019851140677928925, "logits/chosen": -0.24981671571731567, "logits/rejected": -0.4254041314125061, "logps/chosen": -0.00045403523836284876, "logps/rejected": -1.7068804502487183, "loss": 0.293, "nll_loss": 0.07322686910629272, "rewards/accuracies": 1.0, "rewards/chosen": -4.5403528929455206e-05, "rewards/margins": 0.1706426441669464, "rewards/rejected": -0.17068806290626526, "step": 12910 }, { "epoch": 8.928769017980636, "grad_norm": 4.41091775894165, "learning_rate": 5.95128323344091e-06, "log_odds_chosen": 11.76103401184082, "log_odds_ratio": -3.7582372897304595e-05, "logits/chosen": -0.11107562482357025, "logits/rejected": -0.142415851354599, "logps/chosen": -0.0002628167567308992, "logps/rejected": -3.3587985038757324, "loss": 0.487, "nll_loss": 0.12175235152244568, "rewards/accuracies": 1.0, "rewards/chosen": -2.628167567308992e-05, "rewards/margins": 0.33585357666015625, "rewards/rejected": -0.3358798623085022, "step": 12911 }, { "epoch": 8.929460580912863, "grad_norm": 5.352042198181152, "learning_rate": 5.947441217150761e-06, "log_odds_chosen": 10.705965042114258, "log_odds_ratio": -5.521774073713459e-05, "logits/chosen": -0.138837069272995, "logits/rejected": -0.13578133285045624, "logps/chosen": -0.00019610003801062703, "logps/rejected": -1.6561689376831055, "loss": 0.6693, "nll_loss": 0.16732990741729736, "rewards/accuracies": 1.0, "rewards/chosen": -1.9610004528658465e-05, "rewards/margins": 0.16559728980064392, "rewards/rejected": -0.16561688482761383, "step": 12912 }, { "epoch": 8.93015214384509, "grad_norm": 2.649043560028076, "learning_rate": 5.9435992008606115e-06, "log_odds_chosen": 10.769306182861328, "log_odds_ratio": -3.2309049856849015e-05, "logits/chosen": -0.24757800996303558, "logits/rejected": -0.2868199050426483, "logps/chosen": -0.00011812502634711564, "logps/rejected": -1.742016315460205, "loss": 0.2712, "nll_loss": 0.06779469549655914, "rewards/accuracies": 1.0, "rewards/chosen": -1.1812502634711564e-05, "rewards/margins": 0.17418982088565826, "rewards/rejected": -0.17420163750648499, "step": 12913 }, { "epoch": 8.930843706777317, "grad_norm": 6.05476713180542, "learning_rate": 5.939757184570463e-06, "log_odds_chosen": 10.862494468688965, "log_odds_ratio": -7.975931657711044e-05, "logits/chosen": -0.015442818403244019, "logits/rejected": -0.0983901172876358, "logps/chosen": -0.00032114313216879964, "logps/rejected": -2.0443334579467773, "loss": 0.4352, "nll_loss": 0.10878505557775497, "rewards/accuracies": 1.0, "rewards/chosen": -3.21143124892842e-05, "rewards/margins": 0.20440122485160828, "rewards/rejected": -0.2044333517551422, "step": 12914 }, { "epoch": 8.931535269709544, "grad_norm": 3.4305267333984375, "learning_rate": 5.935915168280314e-06, "log_odds_chosen": 11.25173282623291, "log_odds_ratio": -5.024883648729883e-05, "logits/chosen": -0.4715416431427002, "logits/rejected": -0.3380431532859802, "logps/chosen": -0.00018392120546195656, "logps/rejected": -2.075536012649536, "loss": 0.3506, "nll_loss": 0.08765394985675812, "rewards/accuracies": 1.0, "rewards/chosen": -1.839212200138718e-05, "rewards/margins": 0.20753520727157593, "rewards/rejected": -0.20755359530448914, "step": 12915 }, { "epoch": 8.93222683264177, "grad_norm": 3.3291497230529785, "learning_rate": 5.932073151990165e-06, "log_odds_chosen": 10.705180168151855, "log_odds_ratio": -0.00010199702228419483, "logits/chosen": -0.5880506038665771, "logits/rejected": -0.66464763879776, "logps/chosen": -0.00012990215327590704, "logps/rejected": -1.8533952236175537, "loss": 0.4839, "nll_loss": 0.12096136808395386, "rewards/accuracies": 1.0, "rewards/chosen": -1.2990216418984346e-05, "rewards/margins": 0.18532654643058777, "rewards/rejected": -0.1853395253419876, "step": 12916 }, { "epoch": 8.932918395573997, "grad_norm": 4.643276691436768, "learning_rate": 5.928231135700016e-06, "log_odds_chosen": 10.787064552307129, "log_odds_ratio": -5.836702621309087e-05, "logits/chosen": -0.8610040545463562, "logits/rejected": -0.8191035389900208, "logps/chosen": -0.00011820576037280262, "logps/rejected": -1.7074048519134521, "loss": 0.709, "nll_loss": 0.17724096775054932, "rewards/accuracies": 1.0, "rewards/chosen": -1.1820576219179202e-05, "rewards/margins": 0.1707286536693573, "rewards/rejected": -0.17074048519134521, "step": 12917 }, { "epoch": 8.933609958506224, "grad_norm": 6.3906569480896, "learning_rate": 5.924389119409866e-06, "log_odds_chosen": 11.986827850341797, "log_odds_ratio": -3.079729140154086e-05, "logits/chosen": 0.005445163231343031, "logits/rejected": -0.07006532698869705, "logps/chosen": -0.00014014035696163774, "logps/rejected": -2.962845802307129, "loss": 0.5773, "nll_loss": 0.1443236768245697, "rewards/accuracies": 1.0, "rewards/chosen": -1.4014036423759535e-05, "rewards/margins": 0.29627057909965515, "rewards/rejected": -0.29628461599349976, "step": 12918 }, { "epoch": 8.934301521438451, "grad_norm": 4.157391548156738, "learning_rate": 5.920547103119718e-06, "log_odds_chosen": 9.951608657836914, "log_odds_ratio": -0.0008364100940525532, "logits/chosen": 0.005071647465229034, "logits/rejected": -0.21576163172721863, "logps/chosen": -0.0008652383694425225, "logps/rejected": -2.1721723079681396, "loss": 0.5196, "nll_loss": 0.12982165813446045, "rewards/accuracies": 1.0, "rewards/chosen": -8.65238398546353e-05, "rewards/margins": 0.21713072061538696, "rewards/rejected": -0.21721722185611725, "step": 12919 }, { "epoch": 8.934993084370678, "grad_norm": 3.5984549522399902, "learning_rate": 5.9167050868295685e-06, "log_odds_chosen": 10.075098037719727, "log_odds_ratio": -0.00210709311068058, "logits/chosen": -0.08690416812896729, "logits/rejected": -0.06445138901472092, "logps/chosen": -0.015951991081237793, "logps/rejected": -1.8065953254699707, "loss": 0.3356, "nll_loss": 0.08369840681552887, "rewards/accuracies": 1.0, "rewards/chosen": -0.0015951991081237793, "rewards/margins": 0.1790643334388733, "rewards/rejected": -0.18065953254699707, "step": 12920 }, { "epoch": 8.935684647302905, "grad_norm": 3.1239354610443115, "learning_rate": 5.912863070539419e-06, "log_odds_chosen": 11.789560317993164, "log_odds_ratio": -1.7286456568399444e-05, "logits/chosen": -0.07189354300498962, "logits/rejected": -0.1427178978919983, "logps/chosen": -9.551585389999673e-05, "logps/rejected": -2.617459774017334, "loss": 0.353, "nll_loss": 0.08825767785310745, "rewards/accuracies": 1.0, "rewards/chosen": -9.551585208100732e-06, "rewards/margins": 0.2617364227771759, "rewards/rejected": -0.26174598932266235, "step": 12921 }, { "epoch": 8.936376210235132, "grad_norm": 2.6034464836120605, "learning_rate": 5.90902105424927e-06, "log_odds_chosen": 12.263644218444824, "log_odds_ratio": -2.3179472918855026e-05, "logits/chosen": -0.36033573746681213, "logits/rejected": -0.4884033799171448, "logps/chosen": -7.522512169089168e-05, "logps/rejected": -2.77170467376709, "loss": 0.2692, "nll_loss": 0.06729036569595337, "rewards/accuracies": 1.0, "rewards/chosen": -7.522512078139698e-06, "rewards/margins": 0.27716296911239624, "rewards/rejected": -0.27717047929763794, "step": 12922 }, { "epoch": 8.937067773167358, "grad_norm": 3.0454115867614746, "learning_rate": 5.9051790379591216e-06, "log_odds_chosen": 11.782583236694336, "log_odds_ratio": -3.901875970768742e-05, "logits/chosen": -0.2533440887928009, "logits/rejected": -0.3794962465763092, "logps/chosen": -0.00028467184165492654, "logps/rejected": -2.714261531829834, "loss": 0.4794, "nll_loss": 0.11983369290828705, "rewards/accuracies": 1.0, "rewards/chosen": -2.846718234650325e-05, "rewards/margins": 0.2713976800441742, "rewards/rejected": -0.27142617106437683, "step": 12923 }, { "epoch": 8.937759336099585, "grad_norm": 5.516876220703125, "learning_rate": 5.901337021668972e-06, "log_odds_chosen": 10.510817527770996, "log_odds_ratio": -8.967219037003815e-05, "logits/chosen": -0.07414241135120392, "logits/rejected": -0.1742817759513855, "logps/chosen": -0.0008987659821286798, "logps/rejected": -1.9164701700210571, "loss": 0.3752, "nll_loss": 0.09378183633089066, "rewards/accuracies": 1.0, "rewards/chosen": -8.987660112325102e-05, "rewards/margins": 0.1915571391582489, "rewards/rejected": -0.1916470229625702, "step": 12924 }, { "epoch": 8.938450899031812, "grad_norm": 3.05340576171875, "learning_rate": 5.897495005378823e-06, "log_odds_chosen": 10.531975746154785, "log_odds_ratio": -0.0001402997731929645, "logits/chosen": -0.13580265641212463, "logits/rejected": -0.17524084448814392, "logps/chosen": -0.0002469943428877741, "logps/rejected": -1.6168746948242188, "loss": 0.2895, "nll_loss": 0.07236262410879135, "rewards/accuracies": 1.0, "rewards/chosen": -2.4699435016373172e-05, "rewards/margins": 0.161662757396698, "rewards/rejected": -0.1616874635219574, "step": 12925 }, { "epoch": 8.939142461964039, "grad_norm": 3.0517728328704834, "learning_rate": 5.893652989088675e-06, "log_odds_chosen": 10.63259506225586, "log_odds_ratio": -0.00014863951946608722, "logits/chosen": -0.848569929599762, "logits/rejected": -0.7625840902328491, "logps/chosen": -0.000918300764169544, "logps/rejected": -2.0427727699279785, "loss": 0.3004, "nll_loss": 0.075095035135746, "rewards/accuracies": 1.0, "rewards/chosen": -9.183008660329506e-05, "rewards/margins": 0.20418545603752136, "rewards/rejected": -0.20427730679512024, "step": 12926 }, { "epoch": 8.939834024896266, "grad_norm": 2.812586545944214, "learning_rate": 5.8898109727985246e-06, "log_odds_chosen": 11.898021697998047, "log_odds_ratio": -4.053749216836877e-05, "logits/chosen": 0.04052726551890373, "logits/rejected": -0.009697234258055687, "logps/chosen": -0.0004591932229232043, "logps/rejected": -3.321937084197998, "loss": 0.3124, "nll_loss": 0.07810840010643005, "rewards/accuracies": 1.0, "rewards/chosen": -4.591932884068228e-05, "rewards/margins": 0.3321478068828583, "rewards/rejected": -0.3321937024593353, "step": 12927 }, { "epoch": 8.940525587828493, "grad_norm": 2.847050666809082, "learning_rate": 5.885968956508376e-06, "log_odds_chosen": 10.623669624328613, "log_odds_ratio": -0.00014860433293506503, "logits/chosen": -0.14876213669776917, "logits/rejected": -0.23775899410247803, "logps/chosen": -0.00025466305669397116, "logps/rejected": -1.7008178234100342, "loss": 0.287, "nll_loss": 0.07172475010156631, "rewards/accuracies": 1.0, "rewards/chosen": -2.5466306396992877e-05, "rewards/margins": 0.1700563132762909, "rewards/rejected": -0.17008177936077118, "step": 12928 }, { "epoch": 8.94121715076072, "grad_norm": 3.5438013076782227, "learning_rate": 5.882126940218227e-06, "log_odds_chosen": 11.95173454284668, "log_odds_ratio": -2.073194264085032e-05, "logits/chosen": -0.0442764014005661, "logits/rejected": -0.09602615237236023, "logps/chosen": -0.0001264129241462797, "logps/rejected": -2.8235487937927246, "loss": 0.4455, "nll_loss": 0.11136940866708755, "rewards/accuracies": 1.0, "rewards/chosen": -1.2641294233617373e-05, "rewards/margins": 0.28234225511550903, "rewards/rejected": -0.2823548913002014, "step": 12929 }, { "epoch": 8.941908713692946, "grad_norm": 2.4485747814178467, "learning_rate": 5.878284923928078e-06, "log_odds_chosen": 11.85983657836914, "log_odds_ratio": -1.5567180525977165e-05, "logits/chosen": -0.5800382494926453, "logits/rejected": -0.5141011476516724, "logps/chosen": -0.0001387509546475485, "logps/rejected": -2.8985164165496826, "loss": 0.3206, "nll_loss": 0.08015190809965134, "rewards/accuracies": 1.0, "rewards/chosen": -1.3875096556148492e-05, "rewards/margins": 0.2898377776145935, "rewards/rejected": -0.28985166549682617, "step": 12930 }, { "epoch": 8.942600276625173, "grad_norm": 5.132098197937012, "learning_rate": 5.874442907637929e-06, "log_odds_chosen": 11.904727935791016, "log_odds_ratio": -1.5229367818392348e-05, "logits/chosen": -0.3762471675872803, "logits/rejected": -0.4908618927001953, "logps/chosen": -0.00013014674186706543, "logps/rejected": -2.568297863006592, "loss": 0.3269, "nll_loss": 0.08172107487916946, "rewards/accuracies": 1.0, "rewards/chosen": -1.3014674550504424e-05, "rewards/margins": 0.2568167746067047, "rewards/rejected": -0.25682979822158813, "step": 12931 }, { "epoch": 8.9432918395574, "grad_norm": 6.083807945251465, "learning_rate": 5.870600891347779e-06, "log_odds_chosen": 11.309540748596191, "log_odds_ratio": -6.240185030037537e-05, "logits/chosen": -0.37128546833992004, "logits/rejected": -0.2944900393486023, "logps/chosen": -0.00011243198241572827, "logps/rejected": -2.2674102783203125, "loss": 0.653, "nll_loss": 0.16324546933174133, "rewards/accuracies": 1.0, "rewards/chosen": -1.1243198059673887e-05, "rewards/margins": 0.22672978043556213, "rewards/rejected": -0.2267410308122635, "step": 12932 }, { "epoch": 8.943983402489627, "grad_norm": 6.254725456237793, "learning_rate": 5.866758875057631e-06, "log_odds_chosen": 11.77969741821289, "log_odds_ratio": -1.0002793715102598e-05, "logits/chosen": -0.018555432558059692, "logits/rejected": -0.08299314975738525, "logps/chosen": -0.00010570554877631366, "logps/rejected": -2.568028450012207, "loss": 0.5376, "nll_loss": 0.13440899550914764, "rewards/accuracies": 1.0, "rewards/chosen": -1.0570555787126068e-05, "rewards/margins": 0.2567923069000244, "rewards/rejected": -0.25680285692214966, "step": 12933 }, { "epoch": 8.944674965421854, "grad_norm": 3.648500680923462, "learning_rate": 5.8629168587674815e-06, "log_odds_chosen": 10.733579635620117, "log_odds_ratio": -9.244455577572808e-05, "logits/chosen": -0.45096123218536377, "logits/rejected": -0.4225650727748871, "logps/chosen": -0.0001773490075720474, "logps/rejected": -1.7317311763763428, "loss": 0.3204, "nll_loss": 0.08009810745716095, "rewards/accuracies": 1.0, "rewards/chosen": -1.7734901121002622e-05, "rewards/margins": 0.17315536737442017, "rewards/rejected": -0.17317311465740204, "step": 12934 }, { "epoch": 8.94536652835408, "grad_norm": 5.030453205108643, "learning_rate": 5.859074842477332e-06, "log_odds_chosen": 11.375288009643555, "log_odds_ratio": -2.013690937019419e-05, "logits/chosen": -0.36561113595962524, "logits/rejected": -0.45199379324913025, "logps/chosen": -0.00012862158473581076, "logps/rejected": -2.1877007484436035, "loss": 0.3258, "nll_loss": 0.08145881444215775, "rewards/accuracies": 1.0, "rewards/chosen": -1.2862159564974718e-05, "rewards/margins": 0.21875722706317902, "rewards/rejected": -0.2187700867652893, "step": 12935 }, { "epoch": 8.946058091286307, "grad_norm": 4.5273942947387695, "learning_rate": 5.855232826187183e-06, "log_odds_chosen": 11.617217063903809, "log_odds_ratio": -0.00019359974248800427, "logits/chosen": -0.2258719503879547, "logits/rejected": -0.3301970362663269, "logps/chosen": -0.0002116190444212407, "logps/rejected": -2.6876113414764404, "loss": 0.4106, "nll_loss": 0.10263660550117493, "rewards/accuracies": 1.0, "rewards/chosen": -2.1161906261113472e-05, "rewards/margins": 0.2687399685382843, "rewards/rejected": -0.26876112818717957, "step": 12936 }, { "epoch": 8.946749654218534, "grad_norm": 3.5262722969055176, "learning_rate": 5.851390809897035e-06, "log_odds_chosen": 11.726446151733398, "log_odds_ratio": -1.7297075828537345e-05, "logits/chosen": -0.7810917496681213, "logits/rejected": -0.7564036846160889, "logps/chosen": -0.00010531338193686679, "logps/rejected": -2.2990102767944336, "loss": 0.2811, "nll_loss": 0.07026363164186478, "rewards/accuracies": 1.0, "rewards/chosen": -1.053133837558562e-05, "rewards/margins": 0.22989048063755035, "rewards/rejected": -0.2299010157585144, "step": 12937 }, { "epoch": 8.947441217150761, "grad_norm": 3.5038387775421143, "learning_rate": 5.847548793606885e-06, "log_odds_chosen": 11.217157363891602, "log_odds_ratio": -6.293215119512752e-05, "logits/chosen": 0.015321101993322372, "logits/rejected": -0.06858345121145248, "logps/chosen": -0.0003072897670790553, "logps/rejected": -2.6550493240356445, "loss": 0.4326, "nll_loss": 0.10815395414829254, "rewards/accuracies": 1.0, "rewards/chosen": -3.0728973797522485e-05, "rewards/margins": 0.26547420024871826, "rewards/rejected": -0.2655049264431, "step": 12938 }, { "epoch": 8.948132780082988, "grad_norm": 6.250890254974365, "learning_rate": 5.843706777316736e-06, "log_odds_chosen": 10.85504150390625, "log_odds_ratio": -4.487161641009152e-05, "logits/chosen": -0.051928721368312836, "logits/rejected": -0.1429467797279358, "logps/chosen": -0.00021378413657657802, "logps/rejected": -2.436502456665039, "loss": 0.4156, "nll_loss": 0.1038907915353775, "rewards/accuracies": 1.0, "rewards/chosen": -2.1378415112849325e-05, "rewards/margins": 0.2436288595199585, "rewards/rejected": -0.24365025758743286, "step": 12939 }, { "epoch": 8.948824343015215, "grad_norm": 4.680706024169922, "learning_rate": 5.839864761026588e-06, "log_odds_chosen": 10.078415870666504, "log_odds_ratio": -0.0001346963836112991, "logits/chosen": -0.7340799570083618, "logits/rejected": -0.7243325710296631, "logps/chosen": -0.00021745695266872644, "logps/rejected": -1.7046865224838257, "loss": 0.398, "nll_loss": 0.09948675334453583, "rewards/accuracies": 1.0, "rewards/chosen": -2.1745694539276883e-05, "rewards/margins": 0.17044691741466522, "rewards/rejected": -0.17046865820884705, "step": 12940 }, { "epoch": 8.949515905947441, "grad_norm": 3.1285927295684814, "learning_rate": 5.836022744736438e-06, "log_odds_chosen": 10.486005783081055, "log_odds_ratio": -0.0003326584701426327, "logits/chosen": -0.3442254662513733, "logits/rejected": -0.44644254446029663, "logps/chosen": -0.0007124101975932717, "logps/rejected": -2.2549710273742676, "loss": 0.3062, "nll_loss": 0.07652806490659714, "rewards/accuracies": 1.0, "rewards/chosen": -7.124102558009326e-05, "rewards/margins": 0.2254258692264557, "rewards/rejected": -0.22549709677696228, "step": 12941 }, { "epoch": 8.950207468879668, "grad_norm": 4.95887565612793, "learning_rate": 5.832180728446289e-06, "log_odds_chosen": 10.1839599609375, "log_odds_ratio": -0.00018181573250330985, "logits/chosen": -0.38977324962615967, "logits/rejected": -0.43788349628448486, "logps/chosen": -0.0007927245460450649, "logps/rejected": -2.435636520385742, "loss": 0.5751, "nll_loss": 0.1437670737504959, "rewards/accuracies": 1.0, "rewards/chosen": -7.927245314931497e-05, "rewards/margins": 0.24348436295986176, "rewards/rejected": -0.24356365203857422, "step": 12942 }, { "epoch": 8.950899031811895, "grad_norm": 5.5691022872924805, "learning_rate": 5.82833871215614e-06, "log_odds_chosen": 10.578014373779297, "log_odds_ratio": -0.00010544771066633984, "logits/chosen": -0.3612707853317261, "logits/rejected": -0.3483430743217468, "logps/chosen": -0.00033310920116491616, "logps/rejected": -1.9903244972229004, "loss": 0.2752, "nll_loss": 0.06878717243671417, "rewards/accuracies": 1.0, "rewards/chosen": -3.3310920116491616e-05, "rewards/margins": 0.19899912178516388, "rewards/rejected": -0.19903242588043213, "step": 12943 }, { "epoch": 8.951590594744122, "grad_norm": 4.931035041809082, "learning_rate": 5.824496695865991e-06, "log_odds_chosen": 11.544112205505371, "log_odds_ratio": -8.371780131710693e-05, "logits/chosen": -0.01711699366569519, "logits/rejected": -0.12601056694984436, "logps/chosen": -0.00022672172053717077, "logps/rejected": -2.5129387378692627, "loss": 0.5678, "nll_loss": 0.14193536341190338, "rewards/accuracies": 1.0, "rewards/chosen": -2.2672171326121315e-05, "rewards/margins": 0.25127118825912476, "rewards/rejected": -0.2512938678264618, "step": 12944 }, { "epoch": 8.952282157676349, "grad_norm": 3.42557430267334, "learning_rate": 5.8206546795758414e-06, "log_odds_chosen": 11.254566192626953, "log_odds_ratio": -2.6210473151877522e-05, "logits/chosen": -0.6701757311820984, "logits/rejected": -0.7973129153251648, "logps/chosen": -0.00014112208737060428, "logps/rejected": -2.1129250526428223, "loss": 0.3808, "nll_loss": 0.09519369900226593, "rewards/accuracies": 1.0, "rewards/chosen": -1.4112210919847712e-05, "rewards/margins": 0.21127840876579285, "rewards/rejected": -0.21129250526428223, "step": 12945 }, { "epoch": 8.952973720608576, "grad_norm": 4.1768012046813965, "learning_rate": 5.816812663285692e-06, "log_odds_chosen": 11.903830528259277, "log_odds_ratio": -7.812883268343285e-06, "logits/chosen": -0.0588909387588501, "logits/rejected": -0.13018256425857544, "logps/chosen": -6.235972978174686e-05, "logps/rejected": -2.16908860206604, "loss": 0.3494, "nll_loss": 0.08734458684921265, "rewards/accuracies": 1.0, "rewards/chosen": -6.235973614820978e-06, "rewards/margins": 0.21690261363983154, "rewards/rejected": -0.21690885722637177, "step": 12946 }, { "epoch": 8.953665283540802, "grad_norm": 2.765223979949951, "learning_rate": 5.812970646995544e-06, "log_odds_chosen": 11.580140113830566, "log_odds_ratio": -6.168565596453846e-05, "logits/chosen": -0.2464328408241272, "logits/rejected": -0.3394399881362915, "logps/chosen": -0.00020144270092714578, "logps/rejected": -2.749572277069092, "loss": 0.2704, "nll_loss": 0.06760434806346893, "rewards/accuracies": 1.0, "rewards/chosen": -2.0144269001320936e-05, "rewards/margins": 0.27493709325790405, "rewards/rejected": -0.27495723962783813, "step": 12947 }, { "epoch": 8.95435684647303, "grad_norm": 3.754606008529663, "learning_rate": 5.8091286307053945e-06, "log_odds_chosen": 12.030777931213379, "log_odds_ratio": -0.0003125116927549243, "logits/chosen": -0.49243322014808655, "logits/rejected": -0.5307193994522095, "logps/chosen": -0.00020017765928059816, "logps/rejected": -2.820101737976074, "loss": 0.3514, "nll_loss": 0.0878116562962532, "rewards/accuracies": 1.0, "rewards/chosen": -2.001776738325134e-05, "rewards/margins": 0.2819901704788208, "rewards/rejected": -0.28201019763946533, "step": 12948 }, { "epoch": 8.955048409405256, "grad_norm": 4.734391689300537, "learning_rate": 5.805286614415245e-06, "log_odds_chosen": 11.878615379333496, "log_odds_ratio": -1.0572795872576535e-05, "logits/chosen": -0.26124829053878784, "logits/rejected": -0.26312682032585144, "logps/chosen": -0.00014705224020872265, "logps/rejected": -2.939932107925415, "loss": 0.4128, "nll_loss": 0.10318911075592041, "rewards/accuracies": 1.0, "rewards/chosen": -1.4705223293276504e-05, "rewards/margins": 0.2939785122871399, "rewards/rejected": -0.293993204832077, "step": 12949 }, { "epoch": 8.955739972337483, "grad_norm": 3.333796977996826, "learning_rate": 5.801444598125096e-06, "log_odds_chosen": 9.838175773620605, "log_odds_ratio": -0.001023567165248096, "logits/chosen": -0.053210288286209106, "logits/rejected": -0.2315722405910492, "logps/chosen": -0.0015227515250444412, "logps/rejected": -2.102912664413452, "loss": 0.3932, "nll_loss": 0.0982080027461052, "rewards/accuracies": 1.0, "rewards/chosen": -0.00015227515541482717, "rewards/margins": 0.21013899147510529, "rewards/rejected": -0.21029126644134521, "step": 12950 }, { "epoch": 8.95643153526971, "grad_norm": 4.7104082107543945, "learning_rate": 5.797602581834948e-06, "log_odds_chosen": 12.069852828979492, "log_odds_ratio": -2.0408857380971313e-05, "logits/chosen": -0.7314295172691345, "logits/rejected": -0.7798600196838379, "logps/chosen": -7.370587263721973e-05, "logps/rejected": -2.668787956237793, "loss": 0.4181, "nll_loss": 0.10451561212539673, "rewards/accuracies": 1.0, "rewards/chosen": -7.370587354671443e-06, "rewards/margins": 0.26687145233154297, "rewards/rejected": -0.26687881350517273, "step": 12951 }, { "epoch": 8.957123098201937, "grad_norm": 4.402284145355225, "learning_rate": 5.793760565544798e-06, "log_odds_chosen": 11.385189056396484, "log_odds_ratio": -1.8733659089775756e-05, "logits/chosen": -0.5349085330963135, "logits/rejected": -0.529983639717102, "logps/chosen": -0.00043169912532903254, "logps/rejected": -2.567232370376587, "loss": 0.6042, "nll_loss": 0.1510457843542099, "rewards/accuracies": 1.0, "rewards/chosen": -4.31699154432863e-05, "rewards/margins": 0.25668007135391235, "rewards/rejected": -0.25672322511672974, "step": 12952 }, { "epoch": 8.957814661134163, "grad_norm": 2.8857409954071045, "learning_rate": 5.789918549254649e-06, "log_odds_chosen": 11.362333297729492, "log_odds_ratio": -0.00023769524705130607, "logits/chosen": -0.3102943003177643, "logits/rejected": -0.35927265882492065, "logps/chosen": -0.00030940567376092076, "logps/rejected": -2.809323310852051, "loss": 0.3118, "nll_loss": 0.0779205933213234, "rewards/accuracies": 1.0, "rewards/chosen": -3.0940565920900553e-05, "rewards/margins": 0.28090137243270874, "rewards/rejected": -0.28093230724334717, "step": 12953 }, { "epoch": 8.95850622406639, "grad_norm": 3.743520498275757, "learning_rate": 5.786076532964501e-06, "log_odds_chosen": 11.816137313842773, "log_odds_ratio": -1.3433329513645731e-05, "logits/chosen": 0.0074203647673130035, "logits/rejected": -0.024694515392184258, "logps/chosen": -0.00023023040557745844, "logps/rejected": -2.7371764183044434, "loss": 0.4441, "nll_loss": 0.11101721227169037, "rewards/accuracies": 1.0, "rewards/chosen": -2.302304346812889e-05, "rewards/margins": 0.27369460463523865, "rewards/rejected": -0.27371764183044434, "step": 12954 }, { "epoch": 8.959197786998617, "grad_norm": 2.8320579528808594, "learning_rate": 5.782234516674351e-06, "log_odds_chosen": 10.388065338134766, "log_odds_ratio": -0.00019531026191543788, "logits/chosen": -0.14298123121261597, "logits/rejected": 0.0031429678201675415, "logps/chosen": -0.0002625812776386738, "logps/rejected": -2.0366084575653076, "loss": 0.2295, "nll_loss": 0.05736586079001427, "rewards/accuracies": 1.0, "rewards/chosen": -2.6258127036271617e-05, "rewards/margins": 0.2036345899105072, "rewards/rejected": -0.20366084575653076, "step": 12955 }, { "epoch": 8.959889349930844, "grad_norm": 2.7911875247955322, "learning_rate": 5.778392500384202e-06, "log_odds_chosen": 11.16344928741455, "log_odds_ratio": -3.560550248948857e-05, "logits/chosen": -0.5641869306564331, "logits/rejected": -0.6493061780929565, "logps/chosen": -9.963040793081746e-05, "logps/rejected": -1.9015522003173828, "loss": 0.3369, "nll_loss": 0.08421522378921509, "rewards/accuracies": 1.0, "rewards/chosen": -9.963041520677507e-06, "rewards/margins": 0.19014525413513184, "rewards/rejected": -0.19015520811080933, "step": 12956 }, { "epoch": 8.96058091286307, "grad_norm": 3.4528636932373047, "learning_rate": 5.774550484094053e-06, "log_odds_chosen": 13.004242897033691, "log_odds_ratio": -9.532086551189423e-06, "logits/chosen": -0.02442549169063568, "logits/rejected": -0.16204750537872314, "logps/chosen": -9.38226148718968e-05, "logps/rejected": -3.5031580924987793, "loss": 0.4265, "nll_loss": 0.10662779957056046, "rewards/accuracies": 1.0, "rewards/chosen": -9.382261850987561e-06, "rewards/margins": 0.3503064215183258, "rewards/rejected": -0.35031580924987793, "step": 12957 }, { "epoch": 8.961272475795298, "grad_norm": 3.688526153564453, "learning_rate": 5.770708467803904e-06, "log_odds_chosen": 12.311029434204102, "log_odds_ratio": -2.615476660139393e-05, "logits/chosen": -0.21588446199893951, "logits/rejected": -0.20584505796432495, "logps/chosen": -0.00017990361084230244, "logps/rejected": -3.441439151763916, "loss": 0.5047, "nll_loss": 0.12618131935596466, "rewards/accuracies": 1.0, "rewards/chosen": -1.7990361811826006e-05, "rewards/margins": 0.3441259264945984, "rewards/rejected": -0.34414392709732056, "step": 12958 }, { "epoch": 8.961964038727524, "grad_norm": 3.9806787967681885, "learning_rate": 5.7668664515137545e-06, "log_odds_chosen": 12.16893196105957, "log_odds_ratio": -2.0543651771731675e-05, "logits/chosen": -0.4245246648788452, "logits/rejected": -0.48382627964019775, "logps/chosen": -0.00013065878010820597, "logps/rejected": -2.8527469635009766, "loss": 0.3881, "nll_loss": 0.09702569246292114, "rewards/accuracies": 1.0, "rewards/chosen": -1.3065878192719538e-05, "rewards/margins": 0.2852616310119629, "rewards/rejected": -0.2852747142314911, "step": 12959 }, { "epoch": 8.962655601659751, "grad_norm": 4.678699970245361, "learning_rate": 5.763024435223606e-06, "log_odds_chosen": 11.23891830444336, "log_odds_ratio": -0.00018691481091082096, "logits/chosen": -0.15080493688583374, "logits/rejected": -0.20680178701877594, "logps/chosen": -0.0006821725401096046, "logps/rejected": -2.517026901245117, "loss": 0.4356, "nll_loss": 0.10888192802667618, "rewards/accuracies": 1.0, "rewards/chosen": -6.821725401096046e-05, "rewards/margins": 0.25163447856903076, "rewards/rejected": -0.2517027258872986, "step": 12960 }, { "epoch": 8.963347164591978, "grad_norm": 3.943449020385742, "learning_rate": 5.759182418933457e-06, "log_odds_chosen": 11.836612701416016, "log_odds_ratio": -2.155277252313681e-05, "logits/chosen": 0.08393380045890808, "logits/rejected": -0.059216536581516266, "logps/chosen": -0.00016867309750523418, "logps/rejected": -2.7192699909210205, "loss": 0.3419, "nll_loss": 0.08547375351190567, "rewards/accuracies": 1.0, "rewards/chosen": -1.686731047811918e-05, "rewards/margins": 0.2719101309776306, "rewards/rejected": -0.27192699909210205, "step": 12961 }, { "epoch": 8.964038727524205, "grad_norm": 4.497346878051758, "learning_rate": 5.7553404026433075e-06, "log_odds_chosen": 10.36648941040039, "log_odds_ratio": -0.00037106170202605426, "logits/chosen": -0.08332974463701248, "logits/rejected": -0.0729660615324974, "logps/chosen": -0.0007888587424531579, "logps/rejected": -2.127197027206421, "loss": 0.6249, "nll_loss": 0.15619061887264252, "rewards/accuracies": 1.0, "rewards/chosen": -7.88858815212734e-05, "rewards/margins": 0.21264082193374634, "rewards/rejected": -0.21271970868110657, "step": 12962 }, { "epoch": 8.964730290456432, "grad_norm": 7.316700458526611, "learning_rate": 5.751498386353158e-06, "log_odds_chosen": 12.941807746887207, "log_odds_ratio": -1.3672158274857793e-05, "logits/chosen": 0.25198501348495483, "logits/rejected": 0.14804057776927948, "logps/chosen": -0.00013084606325719506, "logps/rejected": -3.711289405822754, "loss": 0.6218, "nll_loss": 0.1554485559463501, "rewards/accuracies": 1.0, "rewards/chosen": -1.3084607417113148e-05, "rewards/margins": 0.37111586332321167, "rewards/rejected": -0.37112894654273987, "step": 12963 }, { "epoch": 8.965421853388658, "grad_norm": 3.6101772785186768, "learning_rate": 5.747656370063009e-06, "log_odds_chosen": 11.11297607421875, "log_odds_ratio": -3.442693196120672e-05, "logits/chosen": -0.390951931476593, "logits/rejected": -0.4710812568664551, "logps/chosen": -0.002315881662070751, "logps/rejected": -2.4413504600524902, "loss": 0.3658, "nll_loss": 0.09143579751253128, "rewards/accuracies": 1.0, "rewards/chosen": -0.00023158815747592598, "rewards/margins": 0.24390347301959991, "rewards/rejected": -0.2441350519657135, "step": 12964 }, { "epoch": 8.966113416320885, "grad_norm": 3.953753709793091, "learning_rate": 5.743814353772861e-06, "log_odds_chosen": 11.519864082336426, "log_odds_ratio": -7.848943641874939e-05, "logits/chosen": -0.42134904861450195, "logits/rejected": -0.43484270572662354, "logps/chosen": -0.00011462083057267591, "logps/rejected": -2.2720160484313965, "loss": 0.469, "nll_loss": 0.11724768579006195, "rewards/accuracies": 1.0, "rewards/chosen": -1.1462083421065472e-05, "rewards/margins": 0.22719016671180725, "rewards/rejected": -0.22720161080360413, "step": 12965 }, { "epoch": 8.966804979253112, "grad_norm": 3.7234745025634766, "learning_rate": 5.7399723374827105e-06, "log_odds_chosen": 11.299764633178711, "log_odds_ratio": -9.398536349181086e-05, "logits/chosen": -0.06496064364910126, "logits/rejected": -0.0666482150554657, "logps/chosen": -0.0002664439380168915, "logps/rejected": -2.4187681674957275, "loss": 0.3625, "nll_loss": 0.09060367941856384, "rewards/accuracies": 1.0, "rewards/chosen": -2.6644391255103983e-05, "rewards/margins": 0.24185019731521606, "rewards/rejected": -0.24187684059143066, "step": 12966 }, { "epoch": 8.967496542185339, "grad_norm": 2.955653667449951, "learning_rate": 5.736130321192562e-06, "log_odds_chosen": 10.262022972106934, "log_odds_ratio": -0.00018744800763670355, "logits/chosen": -0.3373003304004669, "logits/rejected": -0.4345345199108124, "logps/chosen": -0.0005691086989827454, "logps/rejected": -1.997305989265442, "loss": 0.3906, "nll_loss": 0.09762973338365555, "rewards/accuracies": 1.0, "rewards/chosen": -5.69108706258703e-05, "rewards/margins": 0.19967368245124817, "rewards/rejected": -0.19973058998584747, "step": 12967 }, { "epoch": 8.968188105117566, "grad_norm": 4.628073692321777, "learning_rate": 5.732288304902413e-06, "log_odds_chosen": 11.930381774902344, "log_odds_ratio": -0.00014643717440776527, "logits/chosen": 0.09199459105730057, "logits/rejected": -0.00980144739151001, "logps/chosen": -0.00012088009680155665, "logps/rejected": -2.930823802947998, "loss": 0.5376, "nll_loss": 0.13438236713409424, "rewards/accuracies": 1.0, "rewards/chosen": -1.2088009498256724e-05, "rewards/margins": 0.2930702865123749, "rewards/rejected": -0.2930823564529419, "step": 12968 }, { "epoch": 8.968879668049793, "grad_norm": 2.5830917358398438, "learning_rate": 5.728446288612264e-06, "log_odds_chosen": 11.369787216186523, "log_odds_ratio": -4.7115587221924216e-05, "logits/chosen": -0.30060356855392456, "logits/rejected": -0.2330998182296753, "logps/chosen": -0.00012587365927174687, "logps/rejected": -2.4167582988739014, "loss": 0.2575, "nll_loss": 0.06437969207763672, "rewards/accuracies": 1.0, "rewards/chosen": -1.2587365745275747e-05, "rewards/margins": 0.24166324734687805, "rewards/rejected": -0.24167583882808685, "step": 12969 }, { "epoch": 8.96957123098202, "grad_norm": 4.0085883140563965, "learning_rate": 5.724604272322115e-06, "log_odds_chosen": 12.000626564025879, "log_odds_ratio": -1.0436234333610628e-05, "logits/chosen": -0.11000185459852219, "logits/rejected": -0.19354018568992615, "logps/chosen": -0.00019879452884197235, "logps/rejected": -3.1691694259643555, "loss": 0.3915, "nll_loss": 0.09788475185632706, "rewards/accuracies": 1.0, "rewards/chosen": -1.987945506698452e-05, "rewards/margins": 0.31689709424972534, "rewards/rejected": -0.31691697239875793, "step": 12970 }, { "epoch": 8.970262793914246, "grad_norm": 5.15786075592041, "learning_rate": 5.720762256031966e-06, "log_odds_chosen": 11.258201599121094, "log_odds_ratio": -3.323886630823836e-05, "logits/chosen": 0.225187748670578, "logits/rejected": 0.07851268351078033, "logps/chosen": -0.0005183239700272679, "logps/rejected": -2.8408079147338867, "loss": 0.5503, "nll_loss": 0.1375592052936554, "rewards/accuracies": 1.0, "rewards/chosen": -5.183239409234375e-05, "rewards/margins": 0.2840290069580078, "rewards/rejected": -0.2840808033943176, "step": 12971 }, { "epoch": 8.970954356846473, "grad_norm": 3.935385227203369, "learning_rate": 5.716920239741817e-06, "log_odds_chosen": 12.17033863067627, "log_odds_ratio": -1.7809870769269764e-05, "logits/chosen": -0.10739308595657349, "logits/rejected": -0.17841652035713196, "logps/chosen": -0.00014362987712956965, "logps/rejected": -3.1119933128356934, "loss": 0.3589, "nll_loss": 0.08972074836492538, "rewards/accuracies": 1.0, "rewards/chosen": -1.4362988622451667e-05, "rewards/margins": 0.31118500232696533, "rewards/rejected": -0.3111993670463562, "step": 12972 }, { "epoch": 8.9716459197787, "grad_norm": 4.132678508758545, "learning_rate": 5.7130782234516675e-06, "log_odds_chosen": 11.330833435058594, "log_odds_ratio": -2.018288978433702e-05, "logits/chosen": -0.0428299643099308, "logits/rejected": -0.17370249330997467, "logps/chosen": -0.00012135677388869226, "logps/rejected": -2.0014662742614746, "loss": 0.4309, "nll_loss": 0.10771875083446503, "rewards/accuracies": 1.0, "rewards/chosen": -1.2135677025071345e-05, "rewards/margins": 0.20013447105884552, "rewards/rejected": -0.2001466155052185, "step": 12973 }, { "epoch": 8.972337482710927, "grad_norm": 3.055609703063965, "learning_rate": 5.709236207161519e-06, "log_odds_chosen": 10.535971641540527, "log_odds_ratio": -8.944002911448479e-05, "logits/chosen": -0.21757450699806213, "logits/rejected": -0.28312134742736816, "logps/chosen": -0.0004208739846944809, "logps/rejected": -2.163292407989502, "loss": 0.3287, "nll_loss": 0.08217275142669678, "rewards/accuracies": 1.0, "rewards/chosen": -4.208739846944809e-05, "rewards/margins": 0.21628715097904205, "rewards/rejected": -0.21632923185825348, "step": 12974 }, { "epoch": 8.973029045643154, "grad_norm": 3.2214202880859375, "learning_rate": 5.70539419087137e-06, "log_odds_chosen": 10.623476028442383, "log_odds_ratio": -0.0001413396093994379, "logits/chosen": -0.4127468466758728, "logits/rejected": -0.38557878136634827, "logps/chosen": -0.0007369809318333864, "logps/rejected": -2.2583162784576416, "loss": 0.4966, "nll_loss": 0.12413250654935837, "rewards/accuracies": 1.0, "rewards/chosen": -7.369809463853016e-05, "rewards/margins": 0.2257579118013382, "rewards/rejected": -0.22583162784576416, "step": 12975 }, { "epoch": 8.97372060857538, "grad_norm": 4.862353801727295, "learning_rate": 5.701552174581221e-06, "log_odds_chosen": 10.94636344909668, "log_odds_ratio": -0.00038006139220669866, "logits/chosen": -0.46704912185668945, "logits/rejected": -0.5940245389938354, "logps/chosen": -0.0037317401729524136, "logps/rejected": -2.696951389312744, "loss": 0.7203, "nll_loss": 0.18004614114761353, "rewards/accuracies": 1.0, "rewards/chosen": -0.00037317402893677354, "rewards/margins": 0.2693219780921936, "rewards/rejected": -0.2696951627731323, "step": 12976 }, { "epoch": 8.974412171507607, "grad_norm": 3.3329851627349854, "learning_rate": 5.697710158291072e-06, "log_odds_chosen": 10.701688766479492, "log_odds_ratio": -8.773025183472782e-05, "logits/chosen": -0.247919499874115, "logits/rejected": -0.2692440450191498, "logps/chosen": -0.0001977643696591258, "logps/rejected": -1.7542986869812012, "loss": 0.3565, "nll_loss": 0.08911775797605515, "rewards/accuracies": 1.0, "rewards/chosen": -1.97764366021147e-05, "rewards/margins": 0.17541009187698364, "rewards/rejected": -0.17542988061904907, "step": 12977 }, { "epoch": 8.975103734439834, "grad_norm": 3.1159744262695312, "learning_rate": 5.693868142000922e-06, "log_odds_chosen": 11.69921875, "log_odds_ratio": -3.141276101814583e-05, "logits/chosen": -0.11053688824176788, "logits/rejected": -0.18538329005241394, "logps/chosen": -0.00016071014397311956, "logps/rejected": -2.599425792694092, "loss": 0.3581, "nll_loss": 0.08951176702976227, "rewards/accuracies": 1.0, "rewards/chosen": -1.6071015124907717e-05, "rewards/margins": 0.25992652773857117, "rewards/rejected": -0.25994259119033813, "step": 12978 }, { "epoch": 8.975795297372061, "grad_norm": 4.339756488800049, "learning_rate": 5.690026125710774e-06, "log_odds_chosen": 10.366867065429688, "log_odds_ratio": -0.0003004825266543776, "logits/chosen": -0.6165687441825867, "logits/rejected": -0.7288893461227417, "logps/chosen": -0.0002853489713743329, "logps/rejected": -1.3389570713043213, "loss": 0.3429, "nll_loss": 0.08568733930587769, "rewards/accuracies": 1.0, "rewards/chosen": -2.8534897865029052e-05, "rewards/margins": 0.13386717438697815, "rewards/rejected": -0.13389572501182556, "step": 12979 }, { "epoch": 8.976486860304288, "grad_norm": 4.901917934417725, "learning_rate": 5.6861841094206236e-06, "log_odds_chosen": 10.370695114135742, "log_odds_ratio": -9.051861707121134e-05, "logits/chosen": 0.14978300034999847, "logits/rejected": 0.1547352373600006, "logps/chosen": -0.00029195219394750893, "logps/rejected": -1.8662596940994263, "loss": 0.4795, "nll_loss": 0.11985791474580765, "rewards/accuracies": 1.0, "rewards/chosen": -2.9195220122346655e-05, "rewards/margins": 0.18659678101539612, "rewards/rejected": -0.18662597239017487, "step": 12980 }, { "epoch": 8.977178423236515, "grad_norm": 3.297598361968994, "learning_rate": 5.682342093130475e-06, "log_odds_chosen": 10.424062728881836, "log_odds_ratio": -0.0006554003339260817, "logits/chosen": -0.1414889097213745, "logits/rejected": -0.14082834124565125, "logps/chosen": -0.002491435967385769, "logps/rejected": -2.1096932888031006, "loss": 0.3112, "nll_loss": 0.0777302086353302, "rewards/accuracies": 1.0, "rewards/chosen": -0.000249143602559343, "rewards/margins": 0.21072018146514893, "rewards/rejected": -0.21096934378147125, "step": 12981 }, { "epoch": 8.977869986168741, "grad_norm": 4.84626579284668, "learning_rate": 5.678500076840326e-06, "log_odds_chosen": 9.627571105957031, "log_odds_ratio": -0.00032676331466063857, "logits/chosen": -0.4661812484264374, "logits/rejected": -0.4674217402935028, "logps/chosen": -0.0007495335303246975, "logps/rejected": -2.1007561683654785, "loss": 0.4845, "nll_loss": 0.12110137939453125, "rewards/accuracies": 1.0, "rewards/chosen": -7.49533501220867e-05, "rewards/margins": 0.2100006639957428, "rewards/rejected": -0.21007561683654785, "step": 12982 }, { "epoch": 8.978561549100968, "grad_norm": 3.3853838443756104, "learning_rate": 5.674658060550177e-06, "log_odds_chosen": 11.485723495483398, "log_odds_ratio": -3.2207815820584074e-05, "logits/chosen": -0.6942331790924072, "logits/rejected": -0.7253660559654236, "logps/chosen": -0.00016284678713418543, "logps/rejected": -2.3455090522766113, "loss": 0.4133, "nll_loss": 0.10331219434738159, "rewards/accuracies": 1.0, "rewards/chosen": -1.6284679077216424e-05, "rewards/margins": 0.2345346212387085, "rewards/rejected": -0.23455092310905457, "step": 12983 }, { "epoch": 8.979253112033195, "grad_norm": 3.226949691772461, "learning_rate": 5.670816044260028e-06, "log_odds_chosen": 11.627418518066406, "log_odds_ratio": -6.23153755441308e-05, "logits/chosen": -0.03964445739984512, "logits/rejected": 0.03879677504301071, "logps/chosen": -0.0001272661320399493, "logps/rejected": -2.3287415504455566, "loss": 0.4016, "nll_loss": 0.10040175914764404, "rewards/accuracies": 1.0, "rewards/chosen": -1.272661302209599e-05, "rewards/margins": 0.23286142945289612, "rewards/rejected": -0.23287415504455566, "step": 12984 }, { "epoch": 8.979944674965422, "grad_norm": 7.261765480041504, "learning_rate": 5.666974027969879e-06, "log_odds_chosen": 11.11042594909668, "log_odds_ratio": -2.1344809283618815e-05, "logits/chosen": -0.20489904284477234, "logits/rejected": -0.22213873267173767, "logps/chosen": -8.23369700810872e-05, "logps/rejected": -1.7812402248382568, "loss": 0.2939, "nll_loss": 0.07347705215215683, "rewards/accuracies": 1.0, "rewards/chosen": -8.233697371906601e-06, "rewards/margins": 0.17811578512191772, "rewards/rejected": -0.17812402546405792, "step": 12985 }, { "epoch": 8.980636237897649, "grad_norm": 2.761765956878662, "learning_rate": 5.66313201167973e-06, "log_odds_chosen": 10.917032241821289, "log_odds_ratio": -0.0002200824674218893, "logits/chosen": -0.33595991134643555, "logits/rejected": -0.3741520047187805, "logps/chosen": -0.00033478077966719866, "logps/rejected": -2.235827922821045, "loss": 0.3161, "nll_loss": 0.07900466024875641, "rewards/accuracies": 1.0, "rewards/chosen": -3.3478077966719866e-05, "rewards/margins": 0.22354930639266968, "rewards/rejected": -0.22358280420303345, "step": 12986 }, { "epoch": 8.981327800829876, "grad_norm": 2.592590808868408, "learning_rate": 5.6592899953895805e-06, "log_odds_chosen": 10.91973876953125, "log_odds_ratio": -3.753927012439817e-05, "logits/chosen": -0.10205067694187164, "logits/rejected": -0.08578318357467651, "logps/chosen": -0.00017666578060016036, "logps/rejected": -1.973038911819458, "loss": 0.302, "nll_loss": 0.07549074292182922, "rewards/accuracies": 1.0, "rewards/chosen": -1.766657987900544e-05, "rewards/margins": 0.19728624820709229, "rewards/rejected": -0.1973038911819458, "step": 12987 }, { "epoch": 8.982019363762102, "grad_norm": 3.359773635864258, "learning_rate": 5.655447979099432e-06, "log_odds_chosen": 10.698223114013672, "log_odds_ratio": -0.000163710443302989, "logits/chosen": -0.563290536403656, "logits/rejected": -0.5860562324523926, "logps/chosen": -0.0032191036734730005, "logps/rejected": -1.9369779825210571, "loss": 0.365, "nll_loss": 0.09123671799898148, "rewards/accuracies": 1.0, "rewards/chosen": -0.00032191036734730005, "rewards/margins": 0.19337588548660278, "rewards/rejected": -0.19369781017303467, "step": 12988 }, { "epoch": 8.98271092669433, "grad_norm": 3.8330631256103516, "learning_rate": 5.651605962809282e-06, "log_odds_chosen": 11.048598289489746, "log_odds_ratio": -7.060886855470017e-05, "logits/chosen": -0.23575690388679504, "logits/rejected": -0.27460363507270813, "logps/chosen": -0.00018293302855454385, "logps/rejected": -2.244535446166992, "loss": 0.3572, "nll_loss": 0.08928923308849335, "rewards/accuracies": 1.0, "rewards/chosen": -1.8293303583050147e-05, "rewards/margins": 0.2244352400302887, "rewards/rejected": -0.22445355355739594, "step": 12989 }, { "epoch": 8.983402489626556, "grad_norm": 4.441878795623779, "learning_rate": 5.647763946519134e-06, "log_odds_chosen": 11.518059730529785, "log_odds_ratio": -0.00014070258475840092, "logits/chosen": -0.4614870548248291, "logits/rejected": -0.3396298587322235, "logps/chosen": -0.0001907533296616748, "logps/rejected": -2.7353930473327637, "loss": 0.402, "nll_loss": 0.10049546509981155, "rewards/accuracies": 1.0, "rewards/chosen": -1.90753326023696e-05, "rewards/margins": 0.27352023124694824, "rewards/rejected": -0.27353930473327637, "step": 12990 }, { "epoch": 8.984094052558783, "grad_norm": 4.48910665512085, "learning_rate": 5.643921930228984e-06, "log_odds_chosen": 11.144182205200195, "log_odds_ratio": -2.1766394638689235e-05, "logits/chosen": -0.4736829400062561, "logits/rejected": -0.49978816509246826, "logps/chosen": -0.0001298037386732176, "logps/rejected": -2.041663646697998, "loss": 0.4048, "nll_loss": 0.10119934380054474, "rewards/accuracies": 1.0, "rewards/chosen": -1.2980374776816461e-05, "rewards/margins": 0.2041533887386322, "rewards/rejected": -0.20416638255119324, "step": 12991 }, { "epoch": 8.98478561549101, "grad_norm": 3.2902517318725586, "learning_rate": 5.640079913938835e-06, "log_odds_chosen": 10.783985137939453, "log_odds_ratio": -0.00022832911054138094, "logits/chosen": -0.47414305806159973, "logits/rejected": -0.5175694823265076, "logps/chosen": -0.00024217108148150146, "logps/rejected": -2.541477680206299, "loss": 0.5231, "nll_loss": 0.13074392080307007, "rewards/accuracies": 1.0, "rewards/chosen": -2.4217108148150146e-05, "rewards/margins": 0.2541235685348511, "rewards/rejected": -0.2541477680206299, "step": 12992 }, { "epoch": 8.985477178423237, "grad_norm": 12.170414924621582, "learning_rate": 5.636237897648687e-06, "log_odds_chosen": 10.079124450683594, "log_odds_ratio": -0.0012567834928631783, "logits/chosen": -0.8585034608840942, "logits/rejected": -0.9505305886268616, "logps/chosen": -0.031139731407165527, "logps/rejected": -2.6075291633605957, "loss": 0.3143, "nll_loss": 0.07844439148902893, "rewards/accuracies": 1.0, "rewards/chosen": -0.003113973420113325, "rewards/margins": 0.25763893127441406, "rewards/rejected": -0.26075291633605957, "step": 12993 }, { "epoch": 8.986168741355463, "grad_norm": 3.859238386154175, "learning_rate": 5.6323958813585375e-06, "log_odds_chosen": 10.160091400146484, "log_odds_ratio": -8.257082663476467e-05, "logits/chosen": -0.5174241065979004, "logits/rejected": -0.5387182235717773, "logps/chosen": -0.00047265770263038576, "logps/rejected": -1.6943714618682861, "loss": 0.3584, "nll_loss": 0.08958704769611359, "rewards/accuracies": 1.0, "rewards/chosen": -4.7265770263038576e-05, "rewards/margins": 0.16938988864421844, "rewards/rejected": -0.16943715512752533, "step": 12994 }, { "epoch": 8.98686030428769, "grad_norm": 2.6806588172912598, "learning_rate": 5.628553865068388e-06, "log_odds_chosen": 11.655320167541504, "log_odds_ratio": -1.0061345165013336e-05, "logits/chosen": -0.09891670197248459, "logits/rejected": -0.26903587579727173, "logps/chosen": -5.585578765021637e-05, "logps/rejected": -1.7518370151519775, "loss": 0.3508, "nll_loss": 0.08770234882831573, "rewards/accuracies": 1.0, "rewards/chosen": -5.5855789469205774e-06, "rewards/margins": 0.17517811059951782, "rewards/rejected": -0.1751837134361267, "step": 12995 }, { "epoch": 8.987551867219917, "grad_norm": 5.6125264167785645, "learning_rate": 5.624711848778239e-06, "log_odds_chosen": 8.728017807006836, "log_odds_ratio": -0.0006680641090497375, "logits/chosen": -0.48296207189559937, "logits/rejected": -0.5777981877326965, "logps/chosen": -0.0025174610782414675, "logps/rejected": -1.8701331615447998, "loss": 0.3588, "nll_loss": 0.08962175250053406, "rewards/accuracies": 1.0, "rewards/chosen": -0.000251746125286445, "rewards/margins": 0.18676157295703888, "rewards/rejected": -0.18701332807540894, "step": 12996 }, { "epoch": 8.988243430152144, "grad_norm": 2.7184691429138184, "learning_rate": 5.62086983248809e-06, "log_odds_chosen": 9.169109344482422, "log_odds_ratio": -0.0022203708067536354, "logits/chosen": -0.07939259707927704, "logits/rejected": -0.1898723989725113, "logps/chosen": -0.010505957528948784, "logps/rejected": -1.6082786321640015, "loss": 0.3542, "nll_loss": 0.0883224830031395, "rewards/accuracies": 1.0, "rewards/chosen": -0.001050595659762621, "rewards/margins": 0.15977725386619568, "rewards/rejected": -0.1608278453350067, "step": 12997 }, { "epoch": 8.98893499308437, "grad_norm": 4.824005126953125, "learning_rate": 5.617027816197941e-06, "log_odds_chosen": 10.597808837890625, "log_odds_ratio": -7.037734030745924e-05, "logits/chosen": 0.06742497533559799, "logits/rejected": 0.014721512794494629, "logps/chosen": -0.00015499829896725714, "logps/rejected": -1.6016286611557007, "loss": 0.4814, "nll_loss": 0.12033873051404953, "rewards/accuracies": 1.0, "rewards/chosen": -1.5499828805332072e-05, "rewards/margins": 0.16014736890792847, "rewards/rejected": -0.16016286611557007, "step": 12998 }, { "epoch": 8.989626556016598, "grad_norm": 4.160258769989014, "learning_rate": 5.613185799907792e-06, "log_odds_chosen": 9.693705558776855, "log_odds_ratio": -0.00022970604186411947, "logits/chosen": 0.00805056095123291, "logits/rejected": -0.2778843641281128, "logps/chosen": -0.0010679198894649744, "logps/rejected": -2.0614240169525146, "loss": 0.3218, "nll_loss": 0.08041678369045258, "rewards/accuracies": 1.0, "rewards/chosen": -0.00010679199476726353, "rewards/margins": 0.20603561401367188, "rewards/rejected": -0.20614241063594818, "step": 12999 }, { "epoch": 8.990318118948824, "grad_norm": 4.214306354522705, "learning_rate": 5.609343783617643e-06, "log_odds_chosen": 11.489789009094238, "log_odds_ratio": -1.7366832253173925e-05, "logits/chosen": -0.06594070047140121, "logits/rejected": -0.1417391449213028, "logps/chosen": -0.0001669024204602465, "logps/rejected": -2.746901273727417, "loss": 0.7706, "nll_loss": 0.1926540732383728, "rewards/accuracies": 1.0, "rewards/chosen": -1.6690242773620412e-05, "rewards/margins": 0.2746734321117401, "rewards/rejected": -0.2746901214122772, "step": 13000 }, { "epoch": 8.991009681881051, "grad_norm": 3.0645954608917236, "learning_rate": 5.6055017673274935e-06, "log_odds_chosen": 10.695037841796875, "log_odds_ratio": -0.00014979125990066677, "logits/chosen": -0.7848923206329346, "logits/rejected": -0.759530246257782, "logps/chosen": -0.003960360772907734, "logps/rejected": -2.8756608963012695, "loss": 0.3462, "nll_loss": 0.0865248367190361, "rewards/accuracies": 1.0, "rewards/chosen": -0.0003960360772907734, "rewards/margins": 0.28717008233070374, "rewards/rejected": -0.2875661253929138, "step": 13001 }, { "epoch": 8.991701244813278, "grad_norm": 3.4911797046661377, "learning_rate": 5.601659751037345e-06, "log_odds_chosen": 10.644820213317871, "log_odds_ratio": -0.0004768046783283353, "logits/chosen": -0.4039542078971863, "logits/rejected": -0.4295424818992615, "logps/chosen": -0.0008888083975762129, "logps/rejected": -2.218118667602539, "loss": 0.401, "nll_loss": 0.10020698606967926, "rewards/accuracies": 1.0, "rewards/chosen": -8.888084266800433e-05, "rewards/margins": 0.22172297537326813, "rewards/rejected": -0.22181186079978943, "step": 13002 }, { "epoch": 8.992392807745505, "grad_norm": 3.9166111946105957, "learning_rate": 5.597817734747195e-06, "log_odds_chosen": 11.036426544189453, "log_odds_ratio": -4.672490103985183e-05, "logits/chosen": -0.46782681345939636, "logits/rejected": -0.45481324195861816, "logps/chosen": -0.00010561967792455107, "logps/rejected": -1.8134853839874268, "loss": 0.3286, "nll_loss": 0.0821574330329895, "rewards/accuracies": 1.0, "rewards/chosen": -1.0561967428657226e-05, "rewards/margins": 0.18133798241615295, "rewards/rejected": -0.1813485473394394, "step": 13003 }, { "epoch": 8.993084370677732, "grad_norm": 3.3774895668029785, "learning_rate": 5.593975718457047e-06, "log_odds_chosen": 11.584017753601074, "log_odds_ratio": -1.1614972208917607e-05, "logits/chosen": -0.31898242235183716, "logits/rejected": -0.4430631995201111, "logps/chosen": -0.00012595205043908209, "logps/rejected": -2.2793500423431396, "loss": 0.3441, "nll_loss": 0.08601472526788712, "rewards/accuracies": 1.0, "rewards/chosen": -1.2595206499099731e-05, "rewards/margins": 0.22792242467403412, "rewards/rejected": -0.22793501615524292, "step": 13004 }, { "epoch": 8.993775933609959, "grad_norm": 4.269271373748779, "learning_rate": 5.590133702166897e-06, "log_odds_chosen": 11.4553861618042, "log_odds_ratio": -0.0001780561578925699, "logits/chosen": -0.18526721000671387, "logits/rejected": -0.2101747989654541, "logps/chosen": -0.00024178973399102688, "logps/rejected": -2.9740517139434814, "loss": 0.3912, "nll_loss": 0.09779347479343414, "rewards/accuracies": 1.0, "rewards/chosen": -2.4178971216315404e-05, "rewards/margins": 0.2973810136318207, "rewards/rejected": -0.2974051833152771, "step": 13005 }, { "epoch": 8.994467496542185, "grad_norm": 4.434983730316162, "learning_rate": 5.586291685876748e-06, "log_odds_chosen": 10.024709701538086, "log_odds_ratio": -0.00012213116860948503, "logits/chosen": -0.43611031770706177, "logits/rejected": -0.5170964002609253, "logps/chosen": -0.0002620690211188048, "logps/rejected": -1.636785864830017, "loss": 0.4045, "nll_loss": 0.10110985487699509, "rewards/accuracies": 1.0, "rewards/chosen": -2.6206904294667765e-05, "rewards/margins": 0.16365239024162292, "rewards/rejected": -0.1636785864830017, "step": 13006 }, { "epoch": 8.995159059474412, "grad_norm": 5.280633926391602, "learning_rate": 5.5824496695866e-06, "log_odds_chosen": 10.429874420166016, "log_odds_ratio": -0.00018652115250006318, "logits/chosen": -0.2633684575557709, "logits/rejected": -0.3636382222175598, "logps/chosen": -0.0003898479917552322, "logps/rejected": -2.0011839866638184, "loss": 0.3037, "nll_loss": 0.0759064331650734, "rewards/accuracies": 1.0, "rewards/chosen": -3.8984795537544414e-05, "rewards/margins": 0.20007941126823425, "rewards/rejected": -0.20011840760707855, "step": 13007 }, { "epoch": 8.995850622406639, "grad_norm": 3.260202169418335, "learning_rate": 5.5786076532964505e-06, "log_odds_chosen": 11.279440879821777, "log_odds_ratio": -2.7851805498357862e-05, "logits/chosen": -0.3012050986289978, "logits/rejected": -0.44973263144493103, "logps/chosen": -0.00024160815519280732, "logps/rejected": -2.4623751640319824, "loss": 0.3494, "nll_loss": 0.0873558521270752, "rewards/accuracies": 1.0, "rewards/chosen": -2.4160815883078612e-05, "rewards/margins": 0.24621334671974182, "rewards/rejected": -0.24623751640319824, "step": 13008 }, { "epoch": 8.996542185338866, "grad_norm": 4.038920879364014, "learning_rate": 5.574765637006301e-06, "log_odds_chosen": 12.036626815795898, "log_odds_ratio": -0.0007205168949440122, "logits/chosen": -0.6345183253288269, "logits/rejected": -0.6824157238006592, "logps/chosen": -0.001240055076777935, "logps/rejected": -4.034633159637451, "loss": 0.355, "nll_loss": 0.08866976946592331, "rewards/accuracies": 1.0, "rewards/chosen": -0.0001240055134985596, "rewards/margins": 0.40333932638168335, "rewards/rejected": -0.40346336364746094, "step": 13009 }, { "epoch": 8.997233748271093, "grad_norm": 5.614645481109619, "learning_rate": 5.570923620716152e-06, "log_odds_chosen": 10.99240493774414, "log_odds_ratio": -3.525690408423543e-05, "logits/chosen": -0.5974779725074768, "logits/rejected": -0.5112378597259521, "logps/chosen": -0.00010461565398145467, "logps/rejected": -1.6510086059570312, "loss": 0.44, "nll_loss": 0.1099885106086731, "rewards/accuracies": 1.0, "rewards/chosen": -1.0461564670549706e-05, "rewards/margins": 0.1650903970003128, "rewards/rejected": -0.1651008576154709, "step": 13010 }, { "epoch": 8.99792531120332, "grad_norm": 3.4298603534698486, "learning_rate": 5.5670816044260036e-06, "log_odds_chosen": 10.097305297851562, "log_odds_ratio": -0.0002910443290602416, "logits/chosen": -0.7691456079483032, "logits/rejected": -0.8014342784881592, "logps/chosen": -0.0001380304165650159, "logps/rejected": -1.7263977527618408, "loss": 0.4124, "nll_loss": 0.10307244956493378, "rewards/accuracies": 1.0, "rewards/chosen": -1.3803042747895233e-05, "rewards/margins": 0.17262595891952515, "rewards/rejected": -0.17263977229595184, "step": 13011 }, { "epoch": 8.998616874135546, "grad_norm": 4.5642991065979, "learning_rate": 5.5632395881358535e-06, "log_odds_chosen": 11.386275291442871, "log_odds_ratio": -2.4504495740984567e-05, "logits/chosen": -0.16985617578029633, "logits/rejected": -0.2218393087387085, "logps/chosen": -0.00022014914429746568, "logps/rejected": -2.3555777072906494, "loss": 0.4621, "nll_loss": 0.11551637947559357, "rewards/accuracies": 1.0, "rewards/chosen": -2.2014914065948687e-05, "rewards/margins": 0.23553577065467834, "rewards/rejected": -0.23555776476860046, "step": 13012 }, { "epoch": 8.999308437067773, "grad_norm": 3.209639310836792, "learning_rate": 5.559397571845705e-06, "log_odds_chosen": 10.485466003417969, "log_odds_ratio": -7.259511039592326e-05, "logits/chosen": 0.09163352102041245, "logits/rejected": 0.022199518978595734, "logps/chosen": -0.0003781650448217988, "logps/rejected": -2.1259493827819824, "loss": 0.3894, "nll_loss": 0.09734359383583069, "rewards/accuracies": 1.0, "rewards/chosen": -3.7816502299392596e-05, "rewards/margins": 0.21255715191364288, "rewards/rejected": -0.21259495615959167, "step": 13013 }, { "epoch": 9.0, "grad_norm": 2.81518292427063, "learning_rate": 5.555555555555556e-06, "log_odds_chosen": 12.507894515991211, "log_odds_ratio": -9.165652045339812e-06, "logits/chosen": -0.10490019619464874, "logits/rejected": -0.1608884483575821, "logps/chosen": -8.094940858427435e-05, "logps/rejected": -2.832207202911377, "loss": 0.2795, "nll_loss": 0.06986867636442184, "rewards/accuracies": 1.0, "rewards/chosen": -8.094941222225316e-06, "rewards/margins": 0.2832126319408417, "rewards/rejected": -0.28322070837020874, "step": 13014 }, { "epoch": 9.000691562932227, "grad_norm": 3.2384731769561768, "learning_rate": 5.5517135392654066e-06, "log_odds_chosen": 11.528509140014648, "log_odds_ratio": -4.0053528209682554e-05, "logits/chosen": -0.030156686902046204, "logits/rejected": -0.05053587257862091, "logps/chosen": -0.00010144417319679633, "logps/rejected": -2.4233481884002686, "loss": 0.3581, "nll_loss": 0.0895245298743248, "rewards/accuracies": 1.0, "rewards/chosen": -1.0144417501578573e-05, "rewards/margins": 0.24232468008995056, "rewards/rejected": -0.24233481287956238, "step": 13015 }, { "epoch": 9.001383125864454, "grad_norm": 4.159745693206787, "learning_rate": 5.547871522975258e-06, "log_odds_chosen": 11.156106948852539, "log_odds_ratio": -0.00015267080743797123, "logits/chosen": -0.6843860149383545, "logits/rejected": -0.6840651631355286, "logps/chosen": -9.612218127585948e-05, "logps/rejected": -2.060917615890503, "loss": 0.4, "nll_loss": 0.0999767854809761, "rewards/accuracies": 1.0, "rewards/chosen": -9.612218491383828e-06, "rewards/margins": 0.20608215034008026, "rewards/rejected": -0.2060917615890503, "step": 13016 }, { "epoch": 9.00207468879668, "grad_norm": 3.164273262023926, "learning_rate": 5.544029506685108e-06, "log_odds_chosen": 11.69143295288086, "log_odds_ratio": -3.219091013306752e-05, "logits/chosen": -0.5757405161857605, "logits/rejected": -0.7102214097976685, "logps/chosen": -0.00013833888806402683, "logps/rejected": -2.4703941345214844, "loss": 0.3825, "nll_loss": 0.09561428427696228, "rewards/accuracies": 1.0, "rewards/chosen": -1.3833889170200564e-05, "rewards/margins": 0.24702557921409607, "rewards/rejected": -0.24703940749168396, "step": 13017 }, { "epoch": 9.002766251728907, "grad_norm": 1.8051459789276123, "learning_rate": 5.54018749039496e-06, "log_odds_chosen": 10.524673461914062, "log_odds_ratio": -4.670019916375168e-05, "logits/chosen": -0.27058786153793335, "logits/rejected": -0.11664856225252151, "logps/chosen": -0.0010510836727917194, "logps/rejected": -2.2729997634887695, "loss": 0.2015, "nll_loss": 0.05036091059446335, "rewards/accuracies": 1.0, "rewards/chosen": -0.00010510836000321433, "rewards/margins": 0.2271948754787445, "rewards/rejected": -0.2272999882698059, "step": 13018 }, { "epoch": 9.003457814661134, "grad_norm": 6.395131587982178, "learning_rate": 5.53634547410481e-06, "log_odds_chosen": 11.191022872924805, "log_odds_ratio": -2.956366733997129e-05, "logits/chosen": -0.30440646409988403, "logits/rejected": -0.4327055513858795, "logps/chosen": -0.000211410311749205, "logps/rejected": -2.4802041053771973, "loss": 0.5894, "nll_loss": 0.1473536342382431, "rewards/accuracies": 1.0, "rewards/chosen": -2.1141029719728976e-05, "rewards/margins": 0.24799926578998566, "rewards/rejected": -0.24802041053771973, "step": 13019 }, { "epoch": 9.004149377593361, "grad_norm": 5.150073051452637, "learning_rate": 5.532503457814661e-06, "log_odds_chosen": 10.99140739440918, "log_odds_ratio": -3.988608659710735e-05, "logits/chosen": 0.22210730612277985, "logits/rejected": 0.13139745593070984, "logps/chosen": -0.0004938662750646472, "logps/rejected": -2.522759437561035, "loss": 0.3374, "nll_loss": 0.08433938771486282, "rewards/accuracies": 1.0, "rewards/chosen": -4.938662823406048e-05, "rewards/margins": 0.2522265613079071, "rewards/rejected": -0.2522759437561035, "step": 13020 }, { "epoch": 9.004840940525588, "grad_norm": 3.9866042137145996, "learning_rate": 5.528661441524513e-06, "log_odds_chosen": 11.550115585327148, "log_odds_ratio": -6.959481834201142e-05, "logits/chosen": -0.09290512651205063, "logits/rejected": -0.020532796159386635, "logps/chosen": -0.00021088341600261629, "logps/rejected": -2.310567617416382, "loss": 0.4046, "nll_loss": 0.10113376379013062, "rewards/accuracies": 1.0, "rewards/chosen": -2.1088340872665867e-05, "rewards/margins": 0.23103567957878113, "rewards/rejected": -0.23105676472187042, "step": 13021 }, { "epoch": 9.005532503457815, "grad_norm": 3.425347089767456, "learning_rate": 5.5248194252343635e-06, "log_odds_chosen": 10.409988403320312, "log_odds_ratio": -4.699845885625109e-05, "logits/chosen": -0.062008604407310486, "logits/rejected": -0.146600604057312, "logps/chosen": -0.0004828722740057856, "logps/rejected": -2.3168044090270996, "loss": 0.3594, "nll_loss": 0.08984710276126862, "rewards/accuracies": 1.0, "rewards/chosen": -4.828722740057856e-05, "rewards/margins": 0.23163215816020966, "rewards/rejected": -0.23168045282363892, "step": 13022 }, { "epoch": 9.006224066390041, "grad_norm": 4.393455505371094, "learning_rate": 5.520977408944214e-06, "log_odds_chosen": 10.297155380249023, "log_odds_ratio": -0.00019715650705620646, "logits/chosen": -0.277144193649292, "logits/rejected": -0.34454062581062317, "logps/chosen": -0.0005988165503367782, "logps/rejected": -2.442268133163452, "loss": 0.2592, "nll_loss": 0.06477570533752441, "rewards/accuracies": 1.0, "rewards/chosen": -5.988165139569901e-05, "rewards/margins": 0.24416694045066833, "rewards/rejected": -0.24422681331634521, "step": 13023 }, { "epoch": 9.006915629322268, "grad_norm": 2.774665594100952, "learning_rate": 5.517135392654065e-06, "log_odds_chosen": 11.69587516784668, "log_odds_ratio": -3.743657725863159e-05, "logits/chosen": -0.456280916929245, "logits/rejected": -0.44717827439308167, "logps/chosen": -0.00011302022903691977, "logps/rejected": -2.711245059967041, "loss": 0.2757, "nll_loss": 0.06891781836748123, "rewards/accuracies": 1.0, "rewards/chosen": -1.1302023267489858e-05, "rewards/margins": 0.27111321687698364, "rewards/rejected": -0.27112454175949097, "step": 13024 }, { "epoch": 9.007607192254495, "grad_norm": 3.6081957817077637, "learning_rate": 5.513293376363917e-06, "log_odds_chosen": 11.367313385009766, "log_odds_ratio": -1.4467418623098638e-05, "logits/chosen": -0.32773351669311523, "logits/rejected": -0.39627397060394287, "logps/chosen": -8.029278978938237e-05, "logps/rejected": -1.8186954259872437, "loss": 0.276, "nll_loss": 0.06899484992027283, "rewards/accuracies": 1.0, "rewards/chosen": -8.029278433241416e-06, "rewards/margins": 0.1818615198135376, "rewards/rejected": -0.18186955153942108, "step": 13025 }, { "epoch": 9.008298755186722, "grad_norm": 4.3829755783081055, "learning_rate": 5.5094513600737665e-06, "log_odds_chosen": 11.095830917358398, "log_odds_ratio": -0.00013280285929795355, "logits/chosen": 0.15786179900169373, "logits/rejected": 0.09668511897325516, "logps/chosen": -0.00019103838712908328, "logps/rejected": -2.1930274963378906, "loss": 0.5074, "nll_loss": 0.12683293223381042, "rewards/accuracies": 1.0, "rewards/chosen": -1.9103837985312566e-05, "rewards/margins": 0.21928367018699646, "rewards/rejected": -0.21930275857448578, "step": 13026 }, { "epoch": 9.008990318118949, "grad_norm": 4.688060760498047, "learning_rate": 5.505609343783618e-06, "log_odds_chosen": 11.868011474609375, "log_odds_ratio": -1.2133230484323576e-05, "logits/chosen": 0.03171950578689575, "logits/rejected": -0.1849055290222168, "logps/chosen": -6.48779678158462e-05, "logps/rejected": -2.3685197830200195, "loss": 0.3904, "nll_loss": 0.09759525954723358, "rewards/accuracies": 1.0, "rewards/chosen": -6.4877967815846205e-06, "rewards/margins": 0.2368454933166504, "rewards/rejected": -0.2368519902229309, "step": 13027 }, { "epoch": 9.009681881051176, "grad_norm": 2.7641689777374268, "learning_rate": 5.501767327493469e-06, "log_odds_chosen": 10.573009490966797, "log_odds_ratio": -8.692661504028365e-05, "logits/chosen": -0.66283118724823, "logits/rejected": -0.6663856506347656, "logps/chosen": -0.00019317277474328876, "logps/rejected": -2.2193799018859863, "loss": 0.2742, "nll_loss": 0.06855355948209763, "rewards/accuracies": 1.0, "rewards/chosen": -1.9317278201924637e-05, "rewards/margins": 0.22191870212554932, "rewards/rejected": -0.22193799912929535, "step": 13028 }, { "epoch": 9.010373443983402, "grad_norm": 2.9909303188323975, "learning_rate": 5.49792531120332e-06, "log_odds_chosen": 10.86269760131836, "log_odds_ratio": -8.339952910318971e-05, "logits/chosen": -0.3977804183959961, "logits/rejected": -0.3863662779331207, "logps/chosen": -0.0002721707278396934, "logps/rejected": -2.121858596801758, "loss": 0.3169, "nll_loss": 0.07920723408460617, "rewards/accuracies": 1.0, "rewards/chosen": -2.7217072783969343e-05, "rewards/margins": 0.21215865015983582, "rewards/rejected": -0.21218587458133698, "step": 13029 }, { "epoch": 9.01106500691563, "grad_norm": 4.653593063354492, "learning_rate": 5.494083294913171e-06, "log_odds_chosen": 11.301478385925293, "log_odds_ratio": -0.0001647967437747866, "logits/chosen": 0.008434537798166275, "logits/rejected": -0.12119042873382568, "logps/chosen": -0.00045101603609509766, "logps/rejected": -2.783571720123291, "loss": 0.3445, "nll_loss": 0.08611457049846649, "rewards/accuracies": 1.0, "rewards/chosen": -4.510160579229705e-05, "rewards/margins": 0.278312087059021, "rewards/rejected": -0.2783571779727936, "step": 13030 }, { "epoch": 9.011756569847856, "grad_norm": 3.588960647583008, "learning_rate": 5.490241278623021e-06, "log_odds_chosen": 11.11030387878418, "log_odds_ratio": -9.362775017507374e-05, "logits/chosen": 0.3267763555049896, "logits/rejected": 0.33334988355636597, "logps/chosen": -0.00014919544628355652, "logps/rejected": -2.088637351989746, "loss": 0.2984, "nll_loss": 0.07458983361721039, "rewards/accuracies": 1.0, "rewards/chosen": -1.4919545719749294e-05, "rewards/margins": 0.20884880423545837, "rewards/rejected": -0.2088637351989746, "step": 13031 }, { "epoch": 9.012448132780083, "grad_norm": 3.110220432281494, "learning_rate": 5.486399262332873e-06, "log_odds_chosen": 11.047928810119629, "log_odds_ratio": -3.0999708542367443e-05, "logits/chosen": 0.04424513876438141, "logits/rejected": 0.02165381610393524, "logps/chosen": -0.00016195660282392055, "logps/rejected": -2.224316120147705, "loss": 0.3611, "nll_loss": 0.0902659147977829, "rewards/accuracies": 1.0, "rewards/chosen": -1.6195661373785697e-05, "rewards/margins": 0.22241541743278503, "rewards/rejected": -0.22243162989616394, "step": 13032 }, { "epoch": 9.01313969571231, "grad_norm": 2.3600432872772217, "learning_rate": 5.4825572460427234e-06, "log_odds_chosen": 11.666213989257812, "log_odds_ratio": -2.090056659653783e-05, "logits/chosen": -0.1270778626203537, "logits/rejected": -0.2530510723590851, "logps/chosen": -0.00011234758858336136, "logps/rejected": -2.5693368911743164, "loss": 0.2981, "nll_loss": 0.074515700340271, "rewards/accuracies": 1.0, "rewards/chosen": -1.1234758858336136e-05, "rewards/margins": 0.25692248344421387, "rewards/rejected": -0.25693371891975403, "step": 13033 }, { "epoch": 9.013831258644537, "grad_norm": 3.420464515686035, "learning_rate": 5.478715229752574e-06, "log_odds_chosen": 10.560396194458008, "log_odds_ratio": -5.9101683291373774e-05, "logits/chosen": -0.43361103534698486, "logits/rejected": -0.454063355922699, "logps/chosen": -0.0002974130620714277, "logps/rejected": -1.8523221015930176, "loss": 0.356, "nll_loss": 0.08900587260723114, "rewards/accuracies": 1.0, "rewards/chosen": -2.974130620714277e-05, "rewards/margins": 0.1852024793624878, "rewards/rejected": -0.1852322220802307, "step": 13034 }, { "epoch": 9.014522821576763, "grad_norm": 3.225816488265991, "learning_rate": 5.474873213462425e-06, "log_odds_chosen": 11.012438774108887, "log_odds_ratio": -0.0002742533397395164, "logits/chosen": -0.07121704518795013, "logits/rejected": -0.1961957961320877, "logps/chosen": -0.0002828654833137989, "logps/rejected": -1.8338875770568848, "loss": 0.3048, "nll_loss": 0.0761764869093895, "rewards/accuracies": 1.0, "rewards/chosen": -2.8286549422773533e-05, "rewards/margins": 0.18336045742034912, "rewards/rejected": -0.18338875472545624, "step": 13035 }, { "epoch": 9.01521438450899, "grad_norm": 4.2216949462890625, "learning_rate": 5.4710311971722765e-06, "log_odds_chosen": 10.79727840423584, "log_odds_ratio": -8.785985846770927e-05, "logits/chosen": -0.09870100021362305, "logits/rejected": -0.03432294726371765, "logps/chosen": -0.00015546029317192733, "logps/rejected": -2.134243965148926, "loss": 0.4346, "nll_loss": 0.10864897072315216, "rewards/accuracies": 1.0, "rewards/chosen": -1.5546029317192733e-05, "rewards/margins": 0.21340885758399963, "rewards/rejected": -0.21342439949512482, "step": 13036 }, { "epoch": 9.015905947441217, "grad_norm": 4.209107398986816, "learning_rate": 5.467189180882127e-06, "log_odds_chosen": 11.64953899383545, "log_odds_ratio": -3.766633017221466e-05, "logits/chosen": 0.06325505673885345, "logits/rejected": -0.04017649218440056, "logps/chosen": -0.0003921979514416307, "logps/rejected": -3.406096935272217, "loss": 0.4285, "nll_loss": 0.10713205486536026, "rewards/accuracies": 1.0, "rewards/chosen": -3.921979805454612e-05, "rewards/margins": 0.34057047963142395, "rewards/rejected": -0.34060966968536377, "step": 13037 }, { "epoch": 9.016597510373444, "grad_norm": 3.5881240367889404, "learning_rate": 5.463347164591978e-06, "log_odds_chosen": 10.89795970916748, "log_odds_ratio": -9.123482595896348e-05, "logits/chosen": 0.0009332895278930664, "logits/rejected": 0.009780220687389374, "logps/chosen": -0.0001413720747223124, "logps/rejected": -1.919877290725708, "loss": 0.3057, "nll_loss": 0.07642700523138046, "rewards/accuracies": 1.0, "rewards/chosen": -1.4137207472231239e-05, "rewards/margins": 0.19197361171245575, "rewards/rejected": -0.19198772311210632, "step": 13038 }, { "epoch": 9.01728907330567, "grad_norm": 3.7098147869110107, "learning_rate": 5.45950514830183e-06, "log_odds_chosen": 11.856633186340332, "log_odds_ratio": -0.000278906780295074, "logits/chosen": 0.05790846049785614, "logits/rejected": -0.055482715368270874, "logps/chosen": -0.00029186869505792856, "logps/rejected": -3.1621522903442383, "loss": 0.5305, "nll_loss": 0.13260848820209503, "rewards/accuracies": 1.0, "rewards/chosen": -2.918687096098438e-05, "rewards/margins": 0.3161860406398773, "rewards/rejected": -0.3162152171134949, "step": 13039 }, { "epoch": 9.017980636237898, "grad_norm": 4.042640209197998, "learning_rate": 5.4556631320116795e-06, "log_odds_chosen": 10.286334991455078, "log_odds_ratio": -0.00013100498472340405, "logits/chosen": 0.03478764742612839, "logits/rejected": 0.015952982008457184, "logps/chosen": -0.0009522599866613746, "logps/rejected": -2.5293731689453125, "loss": 0.5226, "nll_loss": 0.13063320517539978, "rewards/accuracies": 1.0, "rewards/chosen": -9.522600885247812e-05, "rewards/margins": 0.25284209847450256, "rewards/rejected": -0.25293731689453125, "step": 13040 }, { "epoch": 9.018672199170124, "grad_norm": 2.555697202682495, "learning_rate": 5.451821115721531e-06, "log_odds_chosen": 10.420969009399414, "log_odds_ratio": -0.00016010666149668396, "logits/chosen": -0.09627027064561844, "logits/rejected": 0.1425366997718811, "logps/chosen": -0.00023016006161924452, "logps/rejected": -1.4496179819107056, "loss": 0.3197, "nll_loss": 0.07989896088838577, "rewards/accuracies": 1.0, "rewards/chosen": -2.3016007617115974e-05, "rewards/margins": 0.14493878185749054, "rewards/rejected": -0.14496180415153503, "step": 13041 }, { "epoch": 9.019363762102351, "grad_norm": 3.8921778202056885, "learning_rate": 5.447979099431382e-06, "log_odds_chosen": 11.134313583374023, "log_odds_ratio": -0.0004263078444637358, "logits/chosen": -0.24421606957912445, "logits/rejected": -0.28580600023269653, "logps/chosen": -0.000531858648173511, "logps/rejected": -1.8364176750183105, "loss": 0.3845, "nll_loss": 0.09608376026153564, "rewards/accuracies": 1.0, "rewards/chosen": -5.318586408975534e-05, "rewards/margins": 0.18358857929706573, "rewards/rejected": -0.18364176154136658, "step": 13042 }, { "epoch": 9.020055325034578, "grad_norm": 3.8458752632141113, "learning_rate": 5.444137083141233e-06, "log_odds_chosen": 10.768726348876953, "log_odds_ratio": -0.00013082656369078904, "logits/chosen": -0.16428546607494354, "logits/rejected": -0.25296562910079956, "logps/chosen": -0.0003512292169034481, "logps/rejected": -2.459240674972534, "loss": 0.4308, "nll_loss": 0.10768191516399384, "rewards/accuracies": 1.0, "rewards/chosen": -3.512292096274905e-05, "rewards/margins": 0.24588894844055176, "rewards/rejected": -0.24592408537864685, "step": 13043 }, { "epoch": 9.020746887966805, "grad_norm": 3.0180575847625732, "learning_rate": 5.440295066851084e-06, "log_odds_chosen": 9.483352661132812, "log_odds_ratio": -0.0007811450632289052, "logits/chosen": 0.11073106527328491, "logits/rejected": 0.10512968897819519, "logps/chosen": -0.0007253906223922968, "logps/rejected": -1.383448839187622, "loss": 0.2585, "nll_loss": 0.06454954296350479, "rewards/accuracies": 1.0, "rewards/chosen": -7.253907097037882e-05, "rewards/margins": 0.13827234506607056, "rewards/rejected": -0.1383448839187622, "step": 13044 }, { "epoch": 9.021438450899032, "grad_norm": 3.089290142059326, "learning_rate": 5.436453050560935e-06, "log_odds_chosen": 11.264272689819336, "log_odds_ratio": -3.018009738298133e-05, "logits/chosen": -0.6477086544036865, "logits/rejected": -0.7271855473518372, "logps/chosen": -0.0001471658470109105, "logps/rejected": -2.383556365966797, "loss": 0.3152, "nll_loss": 0.07880601286888123, "rewards/accuracies": 1.0, "rewards/chosen": -1.4716585610585753e-05, "rewards/margins": 0.23834092915058136, "rewards/rejected": -0.2383556365966797, "step": 13045 }, { "epoch": 9.022130013831259, "grad_norm": 3.0523979663848877, "learning_rate": 5.432611034270786e-06, "log_odds_chosen": 10.005661010742188, "log_odds_ratio": -8.830070873955265e-05, "logits/chosen": -0.1184106096625328, "logits/rejected": -0.22679013013839722, "logps/chosen": -0.00018152232223656029, "logps/rejected": -1.5227150917053223, "loss": 0.2984, "nll_loss": 0.07458432018756866, "rewards/accuracies": 1.0, "rewards/chosen": -1.8152231859858148e-05, "rewards/margins": 0.15225335955619812, "rewards/rejected": -0.15227152407169342, "step": 13046 }, { "epoch": 9.022821576763485, "grad_norm": 3.5876870155334473, "learning_rate": 5.4287690179806365e-06, "log_odds_chosen": 11.028236389160156, "log_odds_ratio": -4.377803634270094e-05, "logits/chosen": -0.12041618674993515, "logits/rejected": -0.06218274310231209, "logps/chosen": -0.0003131921112071723, "logps/rejected": -1.9908089637756348, "loss": 0.329, "nll_loss": 0.0822470411658287, "rewards/accuracies": 1.0, "rewards/chosen": -3.131921403110027e-05, "rewards/margins": 0.19904956221580505, "rewards/rejected": -0.19908089935779572, "step": 13047 }, { "epoch": 9.023513139695712, "grad_norm": 3.6984646320343018, "learning_rate": 5.424927001690487e-06, "log_odds_chosen": 11.58115005493164, "log_odds_ratio": -2.4297107302118093e-05, "logits/chosen": -0.13553261756896973, "logits/rejected": -0.31182727217674255, "logps/chosen": -0.00016632108599878848, "logps/rejected": -2.4116837978363037, "loss": 0.429, "nll_loss": 0.10723809152841568, "rewards/accuracies": 1.0, "rewards/chosen": -1.663210969127249e-05, "rewards/margins": 0.24115175008773804, "rewards/rejected": -0.24116836488246918, "step": 13048 }, { "epoch": 9.024204702627939, "grad_norm": 3.875378370285034, "learning_rate": 5.421084985400338e-06, "log_odds_chosen": 12.607780456542969, "log_odds_ratio": -6.794036835344741e-06, "logits/chosen": -0.46647220849990845, "logits/rejected": -0.51015305519104, "logps/chosen": -5.174192483536899e-05, "logps/rejected": -2.7071480751037598, "loss": 0.3713, "nll_loss": 0.09283040463924408, "rewards/accuracies": 1.0, "rewards/chosen": -5.1741926654358394e-06, "rewards/margins": 0.2707096338272095, "rewards/rejected": -0.27071481943130493, "step": 13049 }, { "epoch": 9.024896265560166, "grad_norm": 3.5087029933929443, "learning_rate": 5.4172429691101896e-06, "log_odds_chosen": 9.820684432983398, "log_odds_ratio": -0.0006471116794273257, "logits/chosen": -0.47990888357162476, "logits/rejected": -0.5877619981765747, "logps/chosen": -0.00029866749537177384, "logps/rejected": -1.5530221462249756, "loss": 0.406, "nll_loss": 0.10142374038696289, "rewards/accuracies": 1.0, "rewards/chosen": -2.9866749173379503e-05, "rewards/margins": 0.15527234971523285, "rewards/rejected": -0.1553022265434265, "step": 13050 }, { "epoch": 9.025587828492393, "grad_norm": 2.8552744388580322, "learning_rate": 5.41340095282004e-06, "log_odds_chosen": 11.862985610961914, "log_odds_ratio": -1.448189141228795e-05, "logits/chosen": -0.4123150110244751, "logits/rejected": -0.469348281621933, "logps/chosen": -0.00014005962293595076, "logps/rejected": -2.778240919113159, "loss": 0.4193, "nll_loss": 0.10481908917427063, "rewards/accuracies": 1.0, "rewards/chosen": -1.4005961020302493e-05, "rewards/margins": 0.27781009674072266, "rewards/rejected": -0.2778240740299225, "step": 13051 }, { "epoch": 9.02627939142462, "grad_norm": 3.7326252460479736, "learning_rate": 5.409558936529891e-06, "log_odds_chosen": 11.56201457977295, "log_odds_ratio": -5.6688295444473624e-05, "logits/chosen": -0.7447749376296997, "logits/rejected": -0.6999694108963013, "logps/chosen": -0.0016155238263309002, "logps/rejected": -2.88490629196167, "loss": 0.4791, "nll_loss": 0.1197793036699295, "rewards/accuracies": 1.0, "rewards/chosen": -0.0001615523942746222, "rewards/margins": 0.2883290648460388, "rewards/rejected": -0.2884906232357025, "step": 13052 }, { "epoch": 9.026970954356846, "grad_norm": 2.2243173122406006, "learning_rate": 5.405716920239743e-06, "log_odds_chosen": 11.60845947265625, "log_odds_ratio": -3.414329330553301e-05, "logits/chosen": -0.517898440361023, "logits/rejected": -0.5478456616401672, "logps/chosen": -0.0010220286203548312, "logps/rejected": -2.5593619346618652, "loss": 0.3012, "nll_loss": 0.07529132813215256, "rewards/accuracies": 1.0, "rewards/chosen": -0.00010220285912510008, "rewards/margins": 0.2558339834213257, "rewards/rejected": -0.2559362053871155, "step": 13053 }, { "epoch": 9.027662517289073, "grad_norm": 2.877593755722046, "learning_rate": 5.4018749039495926e-06, "log_odds_chosen": 10.34636402130127, "log_odds_ratio": -0.00028239202219992876, "logits/chosen": -0.515480101108551, "logits/rejected": -0.5283478498458862, "logps/chosen": -0.00042652367847040296, "logps/rejected": -1.8192906379699707, "loss": 0.2366, "nll_loss": 0.05912820249795914, "rewards/accuracies": 1.0, "rewards/chosen": -4.265236930223182e-05, "rewards/margins": 0.18188641965389252, "rewards/rejected": -0.18192905187606812, "step": 13054 }, { "epoch": 9.0283540802213, "grad_norm": 3.929666519165039, "learning_rate": 5.398032887659444e-06, "log_odds_chosen": 11.522621154785156, "log_odds_ratio": -2.0546456653391942e-05, "logits/chosen": -0.435081422328949, "logits/rejected": -0.45330917835235596, "logps/chosen": -0.00013197583029977977, "logps/rejected": -2.1118600368499756, "loss": 0.5275, "nll_loss": 0.13188423216342926, "rewards/accuracies": 1.0, "rewards/chosen": -1.3197583029977977e-05, "rewards/margins": 0.21117281913757324, "rewards/rejected": -0.211186021566391, "step": 13055 }, { "epoch": 9.029045643153527, "grad_norm": 2.5256550312042236, "learning_rate": 5.394190871369295e-06, "log_odds_chosen": 11.52824592590332, "log_odds_ratio": -3.128191747236997e-05, "logits/chosen": -0.18849243223667145, "logits/rejected": -0.24364817142486572, "logps/chosen": -0.00010533664317335933, "logps/rejected": -2.4289488792419434, "loss": 0.2592, "nll_loss": 0.06480279564857483, "rewards/accuracies": 1.0, "rewards/chosen": -1.0533663953538053e-05, "rewards/margins": 0.24288436770439148, "rewards/rejected": -0.24289490282535553, "step": 13056 }, { "epoch": 9.029737206085754, "grad_norm": 4.501963138580322, "learning_rate": 5.390348855079146e-06, "log_odds_chosen": 12.487890243530273, "log_odds_ratio": -1.3090188076603226e-05, "logits/chosen": -0.2066900134086609, "logits/rejected": -0.2767907679080963, "logps/chosen": -0.00013198704982642084, "logps/rejected": -3.3000569343566895, "loss": 0.4018, "nll_loss": 0.10044016689062119, "rewards/accuracies": 1.0, "rewards/chosen": -1.3198705346439965e-05, "rewards/margins": 0.32999250292778015, "rewards/rejected": -0.3300057053565979, "step": 13057 }, { "epoch": 9.03042876901798, "grad_norm": 3.2746903896331787, "learning_rate": 5.386506838788996e-06, "log_odds_chosen": 11.492822647094727, "log_odds_ratio": -7.269456546055153e-05, "logits/chosen": -0.29028403759002686, "logits/rejected": -0.36464041471481323, "logps/chosen": -6.488826329587027e-05, "logps/rejected": -2.028392791748047, "loss": 0.4216, "nll_loss": 0.10539723932743073, "rewards/accuracies": 1.0, "rewards/chosen": -6.488827239081729e-06, "rewards/margins": 0.20283278822898865, "rewards/rejected": -0.20283928513526917, "step": 13058 }, { "epoch": 9.031120331950207, "grad_norm": 3.6708459854125977, "learning_rate": 5.382664822498848e-06, "log_odds_chosen": 10.951431274414062, "log_odds_ratio": -0.00013642846897710115, "logits/chosen": -0.4781211018562317, "logits/rejected": -0.5032169818878174, "logps/chosen": -0.000443106924649328, "logps/rejected": -2.230484962463379, "loss": 0.455, "nll_loss": 0.11373548209667206, "rewards/accuracies": 1.0, "rewards/chosen": -4.4310694647720084e-05, "rewards/margins": 0.22300422191619873, "rewards/rejected": -0.22304850816726685, "step": 13059 }, { "epoch": 9.031811894882434, "grad_norm": 3.364274024963379, "learning_rate": 5.378822806208699e-06, "log_odds_chosen": 9.455132484436035, "log_odds_ratio": -0.0005840727826580405, "logits/chosen": -0.2446010559797287, "logits/rejected": -0.2981387674808502, "logps/chosen": -0.0006788469036109746, "logps/rejected": -1.457331895828247, "loss": 0.2991, "nll_loss": 0.07472015917301178, "rewards/accuracies": 1.0, "rewards/chosen": -6.788469181628898e-05, "rewards/margins": 0.14566530287265778, "rewards/rejected": -0.14573319256305695, "step": 13060 }, { "epoch": 9.032503457814661, "grad_norm": 4.478184223175049, "learning_rate": 5.3749807899185495e-06, "log_odds_chosen": 11.661033630371094, "log_odds_ratio": -0.0003269641601946205, "logits/chosen": -0.2188836634159088, "logits/rejected": -0.2645830512046814, "logps/chosen": -0.00020673639664892107, "logps/rejected": -2.8120455741882324, "loss": 0.5693, "nll_loss": 0.1423034816980362, "rewards/accuracies": 1.0, "rewards/chosen": -2.067364039248787e-05, "rewards/margins": 0.2811838984489441, "rewards/rejected": -0.28120458126068115, "step": 13061 }, { "epoch": 9.033195020746888, "grad_norm": 3.9616754055023193, "learning_rate": 5.371138773628401e-06, "log_odds_chosen": 11.736493110656738, "log_odds_ratio": -2.007854709518142e-05, "logits/chosen": -0.30342715978622437, "logits/rejected": -0.29140985012054443, "logps/chosen": -0.0002348808920942247, "logps/rejected": -2.836967945098877, "loss": 0.427, "nll_loss": 0.10674826800823212, "rewards/accuracies": 1.0, "rewards/chosen": -2.348808993701823e-05, "rewards/margins": 0.2836732864379883, "rewards/rejected": -0.2836967706680298, "step": 13062 }, { "epoch": 9.033886583679115, "grad_norm": 4.665407657623291, "learning_rate": 5.367296757338251e-06, "log_odds_chosen": 10.088314056396484, "log_odds_ratio": -0.00013019969628658146, "logits/chosen": -0.29994332790374756, "logits/rejected": -0.46783965826034546, "logps/chosen": -0.000256081490078941, "logps/rejected": -1.404195785522461, "loss": 0.3107, "nll_loss": 0.0776708796620369, "rewards/accuracies": 1.0, "rewards/chosen": -2.560814937169198e-05, "rewards/margins": 0.14039397239685059, "rewards/rejected": -0.1404195874929428, "step": 13063 }, { "epoch": 9.034578146611342, "grad_norm": 2.985196113586426, "learning_rate": 5.363454741048103e-06, "log_odds_chosen": 11.143171310424805, "log_odds_ratio": -2.7591235266299918e-05, "logits/chosen": -0.31959080696105957, "logits/rejected": -0.38788843154907227, "logps/chosen": -0.00014333522995002568, "logps/rejected": -2.1929399967193604, "loss": 0.2743, "nll_loss": 0.06858177483081818, "rewards/accuracies": 1.0, "rewards/chosen": -1.4333522813103627e-05, "rewards/margins": 0.2192796766757965, "rewards/rejected": -0.21929402649402618, "step": 13064 }, { "epoch": 9.035269709543568, "grad_norm": 2.820173740386963, "learning_rate": 5.359612724757953e-06, "log_odds_chosen": 10.983320236206055, "log_odds_ratio": -5.282166239339858e-05, "logits/chosen": -0.05022948235273361, "logits/rejected": -0.16276401281356812, "logps/chosen": -0.00013071924331597984, "logps/rejected": -1.7682867050170898, "loss": 0.3098, "nll_loss": 0.07744050770998001, "rewards/accuracies": 1.0, "rewards/chosen": -1.3071924513496924e-05, "rewards/margins": 0.17681559920310974, "rewards/rejected": -0.17682868242263794, "step": 13065 }, { "epoch": 9.035961272475795, "grad_norm": 3.91086483001709, "learning_rate": 5.355770708467804e-06, "log_odds_chosen": 11.58726692199707, "log_odds_ratio": -2.9941369575681165e-05, "logits/chosen": -0.25834226608276367, "logits/rejected": -0.2564094662666321, "logps/chosen": -0.00023409070854540914, "logps/rejected": -2.926124334335327, "loss": 0.6506, "nll_loss": 0.16264963150024414, "rewards/accuracies": 1.0, "rewards/chosen": -2.34090730373282e-05, "rewards/margins": 0.2925890386104584, "rewards/rejected": -0.2926124632358551, "step": 13066 }, { "epoch": 9.036652835408022, "grad_norm": 4.259881973266602, "learning_rate": 5.351928692177656e-06, "log_odds_chosen": 11.248711585998535, "log_odds_ratio": -2.3729864551569335e-05, "logits/chosen": -0.3928593099117279, "logits/rejected": -0.42217206954956055, "logps/chosen": -0.0002243789640488103, "logps/rejected": -2.3699398040771484, "loss": 0.5108, "nll_loss": 0.1276947259902954, "rewards/accuracies": 1.0, "rewards/chosen": -2.243789640488103e-05, "rewards/margins": 0.23697157204151154, "rewards/rejected": -0.23699399828910828, "step": 13067 }, { "epoch": 9.037344398340249, "grad_norm": 2.519014835357666, "learning_rate": 5.348086675887506e-06, "log_odds_chosen": 10.962300300598145, "log_odds_ratio": -4.053633165312931e-05, "logits/chosen": -0.6078428030014038, "logits/rejected": -0.5253631472587585, "logps/chosen": -0.00018711041775532067, "logps/rejected": -1.7675940990447998, "loss": 0.3109, "nll_loss": 0.07772395014762878, "rewards/accuracies": 1.0, "rewards/chosen": -1.8711041775532067e-05, "rewards/margins": 0.17674070596694946, "rewards/rejected": -0.17675942182540894, "step": 13068 }, { "epoch": 9.038035961272476, "grad_norm": 3.7536327838897705, "learning_rate": 5.344244659597357e-06, "log_odds_chosen": 11.013205528259277, "log_odds_ratio": -3.441493754507974e-05, "logits/chosen": 0.10737214982509613, "logits/rejected": 0.1350289285182953, "logps/chosen": -9.220686479238793e-05, "logps/rejected": -1.5986688137054443, "loss": 0.3746, "nll_loss": 0.09365054965019226, "rewards/accuracies": 1.0, "rewards/chosen": -9.220686479238793e-06, "rewards/margins": 0.15985766053199768, "rewards/rejected": -0.15986689925193787, "step": 13069 }, { "epoch": 9.038727524204702, "grad_norm": 2.6560962200164795, "learning_rate": 5.340402643307208e-06, "log_odds_chosen": 11.294528007507324, "log_odds_ratio": -2.282520836160984e-05, "logits/chosen": -0.31131410598754883, "logits/rejected": -0.3638339340686798, "logps/chosen": -0.00020951607439201325, "logps/rejected": -2.445250988006592, "loss": 0.2748, "nll_loss": 0.06870685517787933, "rewards/accuracies": 1.0, "rewards/chosen": -2.0951607439201325e-05, "rewards/margins": 0.24450412392616272, "rewards/rejected": -0.24452508985996246, "step": 13070 }, { "epoch": 9.03941908713693, "grad_norm": 3.578831195831299, "learning_rate": 5.336560627017059e-06, "log_odds_chosen": 10.841608047485352, "log_odds_ratio": -4.9118079914478585e-05, "logits/chosen": 0.0027485936880111694, "logits/rejected": 0.10918466746807098, "logps/chosen": -0.0004803585179615766, "logps/rejected": -2.3801283836364746, "loss": 0.5241, "nll_loss": 0.1310272514820099, "rewards/accuracies": 1.0, "rewards/chosen": -4.8035850340966135e-05, "rewards/margins": 0.23796480894088745, "rewards/rejected": -0.23801285028457642, "step": 13071 }, { "epoch": 9.040110650069156, "grad_norm": 3.7686567306518555, "learning_rate": 5.3327186107269094e-06, "log_odds_chosen": 12.269804954528809, "log_odds_ratio": -3.954406929551624e-05, "logits/chosen": -0.22742539644241333, "logits/rejected": -0.4627268314361572, "logps/chosen": -0.0001714062091195956, "logps/rejected": -3.400301218032837, "loss": 0.3694, "nll_loss": 0.09234748780727386, "rewards/accuracies": 1.0, "rewards/chosen": -1.714062091195956e-05, "rewards/margins": 0.34001296758651733, "rewards/rejected": -0.34003013372421265, "step": 13072 }, { "epoch": 9.040802213001383, "grad_norm": 6.61577844619751, "learning_rate": 5.328876594436761e-06, "log_odds_chosen": 10.836341857910156, "log_odds_ratio": -0.0006617381004616618, "logits/chosen": -0.4405108392238617, "logits/rejected": -0.4111618995666504, "logps/chosen": -0.000346881482983008, "logps/rejected": -2.331427574157715, "loss": 0.4129, "nll_loss": 0.10316643863916397, "rewards/accuracies": 1.0, "rewards/chosen": -3.4688149753492326e-05, "rewards/margins": 0.23310808837413788, "rewards/rejected": -0.23314279317855835, "step": 13073 }, { "epoch": 9.04149377593361, "grad_norm": 3.9309210777282715, "learning_rate": 5.325034578146612e-06, "log_odds_chosen": 11.050765991210938, "log_odds_ratio": -3.2668547646608204e-05, "logits/chosen": -0.4415837228298187, "logits/rejected": -0.43602824211120605, "logps/chosen": -0.00020155491074547172, "logps/rejected": -2.247512102127075, "loss": 0.5909, "nll_loss": 0.14771312475204468, "rewards/accuracies": 1.0, "rewards/chosen": -2.015549034695141e-05, "rewards/margins": 0.22473105788230896, "rewards/rejected": -0.22475121915340424, "step": 13074 }, { "epoch": 9.042185338865837, "grad_norm": 6.616016864776611, "learning_rate": 5.3211925618564625e-06, "log_odds_chosen": 10.741582870483398, "log_odds_ratio": -0.00016240161494351923, "logits/chosen": 0.0491463765501976, "logits/rejected": -0.027135292068123817, "logps/chosen": -0.0006349672912620008, "logps/rejected": -2.457874059677124, "loss": 0.5645, "nll_loss": 0.14110106229782104, "rewards/accuracies": 1.0, "rewards/chosen": -6.349672912620008e-05, "rewards/margins": 0.2457239180803299, "rewards/rejected": -0.24578741192817688, "step": 13075 }, { "epoch": 9.042876901798063, "grad_norm": 3.3583385944366455, "learning_rate": 5.317350545566314e-06, "log_odds_chosen": 11.328731536865234, "log_odds_ratio": -3.160857886541635e-05, "logits/chosen": -0.21437186002731323, "logits/rejected": -0.386541485786438, "logps/chosen": -0.00045217963634058833, "logps/rejected": -2.5081796646118164, "loss": 0.2882, "nll_loss": 0.07204939424991608, "rewards/accuracies": 1.0, "rewards/chosen": -4.521796290646307e-05, "rewards/margins": 0.25077277421951294, "rewards/rejected": -0.2508179843425751, "step": 13076 }, { "epoch": 9.04356846473029, "grad_norm": 4.046693801879883, "learning_rate": 5.313508529276164e-06, "log_odds_chosen": 10.828814506530762, "log_odds_ratio": -0.00013855035649612546, "logits/chosen": -0.19150567054748535, "logits/rejected": -0.30324339866638184, "logps/chosen": -0.00014190759975463152, "logps/rejected": -1.6376926898956299, "loss": 0.5934, "nll_loss": 0.14834515750408173, "rewards/accuracies": 1.0, "rewards/chosen": -1.4190760339261033e-05, "rewards/margins": 0.16375507414340973, "rewards/rejected": -0.16376927495002747, "step": 13077 }, { "epoch": 9.044260027662517, "grad_norm": 2.2931673526763916, "learning_rate": 5.309666512986016e-06, "log_odds_chosen": 10.644186019897461, "log_odds_ratio": -0.00012147890083724633, "logits/chosen": 0.014885544776916504, "logits/rejected": 0.026649564504623413, "logps/chosen": -0.00023539473477285355, "logps/rejected": -1.762995719909668, "loss": 0.2362, "nll_loss": 0.05904865637421608, "rewards/accuracies": 1.0, "rewards/chosen": -2.3539472749689594e-05, "rewards/margins": 0.1762760579586029, "rewards/rejected": -0.176299586892128, "step": 13078 }, { "epoch": 9.044951590594744, "grad_norm": 4.323233127593994, "learning_rate": 5.3058244966958655e-06, "log_odds_chosen": 11.343955993652344, "log_odds_ratio": -8.940276165958494e-05, "logits/chosen": -0.11328444629907608, "logits/rejected": -0.14247748255729675, "logps/chosen": -0.00016477785538882017, "logps/rejected": -2.03590726852417, "loss": 0.4436, "nll_loss": 0.11090175062417984, "rewards/accuracies": 1.0, "rewards/chosen": -1.6477784811286256e-05, "rewards/margins": 0.20357424020767212, "rewards/rejected": -0.20359072089195251, "step": 13079 }, { "epoch": 9.04564315352697, "grad_norm": 2.719088077545166, "learning_rate": 5.301982480405717e-06, "log_odds_chosen": 12.002786636352539, "log_odds_ratio": -1.0852253581106197e-05, "logits/chosen": -0.6455560326576233, "logits/rejected": -0.6803206205368042, "logps/chosen": -0.00010349987132940441, "logps/rejected": -2.305574417114258, "loss": 0.3786, "nll_loss": 0.09465332329273224, "rewards/accuracies": 1.0, "rewards/chosen": -1.0349987860536203e-05, "rewards/margins": 0.23054710030555725, "rewards/rejected": -0.23055744171142578, "step": 13080 }, { "epoch": 9.046334716459198, "grad_norm": 2.3111789226531982, "learning_rate": 5.298140464115568e-06, "log_odds_chosen": 10.059361457824707, "log_odds_ratio": -0.00012542004697024822, "logits/chosen": -0.4591970145702362, "logits/rejected": -0.5288703441619873, "logps/chosen": -0.0004874311271123588, "logps/rejected": -1.520819067955017, "loss": 0.2362, "nll_loss": 0.059025026857852936, "rewards/accuracies": 1.0, "rewards/chosen": -4.8743109800852835e-05, "rewards/margins": 0.15203317999839783, "rewards/rejected": -0.1520819216966629, "step": 13081 }, { "epoch": 9.047026279391424, "grad_norm": 2.565124750137329, "learning_rate": 5.294298447825419e-06, "log_odds_chosen": 11.172361373901367, "log_odds_ratio": -0.00011450420424807817, "logits/chosen": 0.21182212233543396, "logits/rejected": 0.255759596824646, "logps/chosen": -0.00019564552349038422, "logps/rejected": -2.238527297973633, "loss": 0.3252, "nll_loss": 0.08128926157951355, "rewards/accuracies": 1.0, "rewards/chosen": -1.956455162144266e-05, "rewards/margins": 0.22383317351341248, "rewards/rejected": -0.2238527238368988, "step": 13082 }, { "epoch": 9.047717842323651, "grad_norm": 3.543975353240967, "learning_rate": 5.29045643153527e-06, "log_odds_chosen": 11.253551483154297, "log_odds_ratio": -6.646641122642905e-05, "logits/chosen": -0.13842296600341797, "logits/rejected": -0.11984425783157349, "logps/chosen": -0.00011901521065738052, "logps/rejected": -2.047550678253174, "loss": 0.4295, "nll_loss": 0.10736589878797531, "rewards/accuracies": 1.0, "rewards/chosen": -1.1901522157131694e-05, "rewards/margins": 0.2047431468963623, "rewards/rejected": -0.2047550529241562, "step": 13083 }, { "epoch": 9.048409405255878, "grad_norm": 2.7371878623962402, "learning_rate": 5.286614415245121e-06, "log_odds_chosen": 11.49665641784668, "log_odds_ratio": -8.669508679304272e-05, "logits/chosen": -0.06901225447654724, "logits/rejected": -0.1418294459581375, "logps/chosen": -0.0002866098075173795, "logps/rejected": -2.9433188438415527, "loss": 0.4201, "nll_loss": 0.10502439737319946, "rewards/accuracies": 1.0, "rewards/chosen": -2.866097929654643e-05, "rewards/margins": 0.2943032383918762, "rewards/rejected": -0.2943318784236908, "step": 13084 }, { "epoch": 9.049100968188105, "grad_norm": 2.7726447582244873, "learning_rate": 5.282772398954972e-06, "log_odds_chosen": 9.871931076049805, "log_odds_ratio": -0.00035949063021689653, "logits/chosen": -0.4832724332809448, "logits/rejected": -0.5116129517555237, "logps/chosen": -0.0005433057667687535, "logps/rejected": -2.2153072357177734, "loss": 0.2505, "nll_loss": 0.06258294731378555, "rewards/accuracies": 1.0, "rewards/chosen": -5.43305795872584e-05, "rewards/margins": 0.22147642076015472, "rewards/rejected": -0.2215307503938675, "step": 13085 }, { "epoch": 9.049792531120332, "grad_norm": 2.650989294052124, "learning_rate": 5.2789303826648225e-06, "log_odds_chosen": 10.100696563720703, "log_odds_ratio": -0.0004444028891157359, "logits/chosen": -0.4472510814666748, "logits/rejected": -0.5792377591133118, "logps/chosen": -0.0005354660097509623, "logps/rejected": -1.691986322402954, "loss": 0.2374, "nll_loss": 0.059308335185050964, "rewards/accuracies": 1.0, "rewards/chosen": -5.354660243028775e-05, "rewards/margins": 0.1691451072692871, "rewards/rejected": -0.1691986471414566, "step": 13086 }, { "epoch": 9.050484094052559, "grad_norm": 4.133640289306641, "learning_rate": 5.275088366374674e-06, "log_odds_chosen": 12.123001098632812, "log_odds_ratio": -1.3905494597565848e-05, "logits/chosen": -0.06451994925737381, "logits/rejected": -0.2188749611377716, "logps/chosen": -0.00024088873760774732, "logps/rejected": -2.9247636795043945, "loss": 0.3953, "nll_loss": 0.09881335496902466, "rewards/accuracies": 1.0, "rewards/chosen": -2.408887303317897e-05, "rewards/margins": 0.29245227575302124, "rewards/rejected": -0.2924763560295105, "step": 13087 }, { "epoch": 9.051175656984785, "grad_norm": 4.221945285797119, "learning_rate": 5.271246350084524e-06, "log_odds_chosen": 11.77324390411377, "log_odds_ratio": -1.764004264259711e-05, "logits/chosen": -0.13490182161331177, "logits/rejected": -0.20364238321781158, "logps/chosen": -0.00011886367428814992, "logps/rejected": -2.5822014808654785, "loss": 0.4196, "nll_loss": 0.10489566624164581, "rewards/accuracies": 1.0, "rewards/chosen": -1.1886368156410754e-05, "rewards/margins": 0.2582082748413086, "rewards/rejected": -0.2582201659679413, "step": 13088 }, { "epoch": 9.051867219917012, "grad_norm": 2.730008840560913, "learning_rate": 5.2674043337943756e-06, "log_odds_chosen": 10.970198631286621, "log_odds_ratio": -7.255510718096048e-05, "logits/chosen": -0.4563295841217041, "logits/rejected": -0.6076275110244751, "logps/chosen": -0.0002196189743699506, "logps/rejected": -2.3138020038604736, "loss": 0.3483, "nll_loss": 0.08706316351890564, "rewards/accuracies": 1.0, "rewards/chosen": -2.196189780079294e-05, "rewards/margins": 0.23135823011398315, "rewards/rejected": -0.23138019442558289, "step": 13089 }, { "epoch": 9.052558782849239, "grad_norm": 3.694523334503174, "learning_rate": 5.263562317504227e-06, "log_odds_chosen": 11.171448707580566, "log_odds_ratio": -8.31487777759321e-05, "logits/chosen": -0.5866619944572449, "logits/rejected": -0.6402585506439209, "logps/chosen": -0.0007456215098500252, "logps/rejected": -2.4630119800567627, "loss": 0.5786, "nll_loss": 0.14464695751667023, "rewards/accuracies": 1.0, "rewards/chosen": -7.456215826096013e-05, "rewards/margins": 0.24622663855552673, "rewards/rejected": -0.24630121886730194, "step": 13090 }, { "epoch": 9.053250345781466, "grad_norm": 3.026980400085449, "learning_rate": 5.259720301214077e-06, "log_odds_chosen": 11.604578018188477, "log_odds_ratio": -0.00010914913582382724, "logits/chosen": -0.5673521757125854, "logits/rejected": -0.5239881277084351, "logps/chosen": -0.0001174298522528261, "logps/rejected": -2.4955615997314453, "loss": 0.2728, "nll_loss": 0.06819754093885422, "rewards/accuracies": 1.0, "rewards/chosen": -1.1742986316676252e-05, "rewards/margins": 0.24954447150230408, "rewards/rejected": -0.24955618381500244, "step": 13091 }, { "epoch": 9.053941908713693, "grad_norm": 3.0807971954345703, "learning_rate": 5.255878284923929e-06, "log_odds_chosen": 10.801340103149414, "log_odds_ratio": -8.183487079804763e-05, "logits/chosen": -0.5854331254959106, "logits/rejected": -0.6655092239379883, "logps/chosen": -0.0002837886568158865, "logps/rejected": -1.8841159343719482, "loss": 0.365, "nll_loss": 0.09124018251895905, "rewards/accuracies": 1.0, "rewards/chosen": -2.837886495399289e-05, "rewards/margins": 0.18838322162628174, "rewards/rejected": -0.18841159343719482, "step": 13092 }, { "epoch": 9.05463347164592, "grad_norm": 3.904637098312378, "learning_rate": 5.252036268633779e-06, "log_odds_chosen": 10.611612319946289, "log_odds_ratio": -9.759830572875217e-05, "logits/chosen": -0.5094362497329712, "logits/rejected": -0.4815782904624939, "logps/chosen": -0.00036221236223354936, "logps/rejected": -2.191467046737671, "loss": 0.306, "nll_loss": 0.0764811560511589, "rewards/accuracies": 1.0, "rewards/chosen": -3.622123767854646e-05, "rewards/margins": 0.21911050379276276, "rewards/rejected": -0.2191467136144638, "step": 13093 }, { "epoch": 9.055325034578146, "grad_norm": 3.0631561279296875, "learning_rate": 5.24819425234363e-06, "log_odds_chosen": 11.680587768554688, "log_odds_ratio": -3.716135324793868e-05, "logits/chosen": 0.16301950812339783, "logits/rejected": 0.12410911917686462, "logps/chosen": -0.00011040831304853782, "logps/rejected": -2.468747615814209, "loss": 0.2997, "nll_loss": 0.07492666691541672, "rewards/accuracies": 1.0, "rewards/chosen": -1.1040832760045305e-05, "rewards/margins": 0.2468637228012085, "rewards/rejected": -0.24687474966049194, "step": 13094 }, { "epoch": 9.056016597510373, "grad_norm": 4.222609996795654, "learning_rate": 5.244352236053481e-06, "log_odds_chosen": 11.704336166381836, "log_odds_ratio": -2.5846133212326095e-05, "logits/chosen": -0.27149274945259094, "logits/rejected": -0.33308032155036926, "logps/chosen": -0.0001536675845272839, "logps/rejected": -2.5176734924316406, "loss": 0.5543, "nll_loss": 0.13858075439929962, "rewards/accuracies": 1.0, "rewards/chosen": -1.536675881652627e-05, "rewards/margins": 0.25175195932388306, "rewards/rejected": -0.2517673373222351, "step": 13095 }, { "epoch": 9.0567081604426, "grad_norm": 3.1188364028930664, "learning_rate": 5.240510219763332e-06, "log_odds_chosen": 11.076004028320312, "log_odds_ratio": -0.00010368539369665086, "logits/chosen": -0.10039699822664261, "logits/rejected": -0.10477086156606674, "logps/chosen": -0.00041912851156666875, "logps/rejected": -2.0893921852111816, "loss": 0.3091, "nll_loss": 0.07725635170936584, "rewards/accuracies": 1.0, "rewards/chosen": -4.1912851884262636e-05, "rewards/margins": 0.20889730751514435, "rewards/rejected": -0.20893922448158264, "step": 13096 }, { "epoch": 9.057399723374827, "grad_norm": 2.7609245777130127, "learning_rate": 5.236668203473183e-06, "log_odds_chosen": 11.29232406616211, "log_odds_ratio": -3.3745109249139205e-05, "logits/chosen": -0.541816234588623, "logits/rejected": -0.6653363108634949, "logps/chosen": -0.00010187992302235216, "logps/rejected": -2.0963993072509766, "loss": 0.3485, "nll_loss": 0.08711716532707214, "rewards/accuracies": 1.0, "rewards/chosen": -1.0187993211729918e-05, "rewards/margins": 0.20962974429130554, "rewards/rejected": -0.20963993668556213, "step": 13097 }, { "epoch": 9.058091286307054, "grad_norm": 2.5463993549346924, "learning_rate": 5.232826187183034e-06, "log_odds_chosen": 10.777527809143066, "log_odds_ratio": -2.8020123863825575e-05, "logits/chosen": -0.14532683789730072, "logits/rejected": -0.19010743498802185, "logps/chosen": -0.00010921778448391706, "logps/rejected": -1.6891964673995972, "loss": 0.3606, "nll_loss": 0.0901351198554039, "rewards/accuracies": 1.0, "rewards/chosen": -1.0921778084593825e-05, "rewards/margins": 0.1689087301492691, "rewards/rejected": -0.1689196527004242, "step": 13098 }, { "epoch": 9.05878284923928, "grad_norm": 3.5195703506469727, "learning_rate": 5.228984170892885e-06, "log_odds_chosen": 11.336797714233398, "log_odds_ratio": -5.716394298360683e-05, "logits/chosen": -0.2846332788467407, "logits/rejected": -0.3512307405471802, "logps/chosen": -0.0002162289310945198, "logps/rejected": -2.9113054275512695, "loss": 0.3919, "nll_loss": 0.097966268658638, "rewards/accuracies": 1.0, "rewards/chosen": -2.16228927456541e-05, "rewards/margins": 0.2911089062690735, "rewards/rejected": -0.29113057255744934, "step": 13099 }, { "epoch": 9.059474412171507, "grad_norm": 4.029845237731934, "learning_rate": 5.2251421546027355e-06, "log_odds_chosen": 10.34531021118164, "log_odds_ratio": -0.00023824439267627895, "logits/chosen": -0.09753906726837158, "logits/rejected": -0.09223097562789917, "logps/chosen": -0.0006019758293405175, "logps/rejected": -2.266861915588379, "loss": 0.391, "nll_loss": 0.09771972894668579, "rewards/accuracies": 1.0, "rewards/chosen": -6.019758075126447e-05, "rewards/margins": 0.22662599384784698, "rewards/rejected": -0.22668620944023132, "step": 13100 }, { "epoch": 9.060165975103734, "grad_norm": 3.6992623805999756, "learning_rate": 5.221300138312587e-06, "log_odds_chosen": 11.187959671020508, "log_odds_ratio": -7.363592885667458e-05, "logits/chosen": -0.398265540599823, "logits/rejected": -0.5765001773834229, "logps/chosen": -0.00021470579667948186, "logps/rejected": -2.195138454437256, "loss": 0.4704, "nll_loss": 0.1176011860370636, "rewards/accuracies": 1.0, "rewards/chosen": -2.147058148693759e-05, "rewards/margins": 0.2194923758506775, "rewards/rejected": -0.21951386332511902, "step": 13101 }, { "epoch": 9.060857538035961, "grad_norm": 3.011972427368164, "learning_rate": 5.217458122022437e-06, "log_odds_chosen": 11.178058624267578, "log_odds_ratio": -4.639428516384214e-05, "logits/chosen": -0.18648342788219452, "logits/rejected": -0.3156924545764923, "logps/chosen": -0.00020562413556035608, "logps/rejected": -2.582714080810547, "loss": 0.3531, "nll_loss": 0.08826884627342224, "rewards/accuracies": 1.0, "rewards/chosen": -2.056241464742925e-05, "rewards/margins": 0.2582508325576782, "rewards/rejected": -0.25827139616012573, "step": 13102 }, { "epoch": 9.061549100968188, "grad_norm": 2.6720283031463623, "learning_rate": 5.213616105732289e-06, "log_odds_chosen": 12.21937370300293, "log_odds_ratio": -1.6247211533482186e-05, "logits/chosen": -0.47814512252807617, "logits/rejected": -0.4694201946258545, "logps/chosen": -9.942967881215736e-05, "logps/rejected": -2.6553704738616943, "loss": 0.2414, "nll_loss": 0.06034007668495178, "rewards/accuracies": 1.0, "rewards/chosen": -9.942967153619975e-06, "rewards/margins": 0.26552706956863403, "rewards/rejected": -0.2655370533466339, "step": 13103 }, { "epoch": 9.062240663900415, "grad_norm": 3.425598621368408, "learning_rate": 5.209774089442139e-06, "log_odds_chosen": 10.777713775634766, "log_odds_ratio": -0.00031900242902338505, "logits/chosen": -0.09612531960010529, "logits/rejected": -0.16886131465435028, "logps/chosen": -0.00017488611047156155, "logps/rejected": -2.1556687355041504, "loss": 0.318, "nll_loss": 0.07945883274078369, "rewards/accuracies": 1.0, "rewards/chosen": -1.7488609955762513e-05, "rewards/margins": 0.21554937958717346, "rewards/rejected": -0.21556688845157623, "step": 13104 }, { "epoch": 9.062932226832642, "grad_norm": 2.207106113433838, "learning_rate": 5.20593207315199e-06, "log_odds_chosen": 11.121923446655273, "log_odds_ratio": -0.00023766764206811786, "logits/chosen": -0.37215495109558105, "logits/rejected": -0.3522205948829651, "logps/chosen": -9.444890747545287e-05, "logps/rejected": -1.9948663711547852, "loss": 0.2259, "nll_loss": 0.056441545486450195, "rewards/accuracies": 1.0, "rewards/chosen": -9.444891475141048e-06, "rewards/margins": 0.1994771957397461, "rewards/rejected": -0.1994866281747818, "step": 13105 }, { "epoch": 9.063623789764868, "grad_norm": 3.165987491607666, "learning_rate": 5.202090056861842e-06, "log_odds_chosen": 12.515090942382812, "log_odds_ratio": -5.959595910098869e-06, "logits/chosen": -0.3708084523677826, "logits/rejected": -0.4040341377258301, "logps/chosen": -0.00014049882884137332, "logps/rejected": -3.0284299850463867, "loss": 0.3313, "nll_loss": 0.08281341195106506, "rewards/accuracies": 1.0, "rewards/chosen": -1.4049882338440511e-05, "rewards/margins": 0.30282896757125854, "rewards/rejected": -0.30284303426742554, "step": 13106 }, { "epoch": 9.064315352697095, "grad_norm": 2.9175662994384766, "learning_rate": 5.1982480405716924e-06, "log_odds_chosen": 10.615434646606445, "log_odds_ratio": -0.00035317084984853864, "logits/chosen": -0.5332492589950562, "logits/rejected": -0.5264109373092651, "logps/chosen": -0.0006139426259323955, "logps/rejected": -1.8538782596588135, "loss": 0.3253, "nll_loss": 0.08128499984741211, "rewards/accuracies": 1.0, "rewards/chosen": -6.139426113804802e-05, "rewards/margins": 0.18532642722129822, "rewards/rejected": -0.18538782000541687, "step": 13107 }, { "epoch": 9.065006915629322, "grad_norm": 3.3031039237976074, "learning_rate": 5.194406024281543e-06, "log_odds_chosen": 11.36837387084961, "log_odds_ratio": -6.764855788787827e-05, "logits/chosen": -0.11974788457155228, "logits/rejected": -0.12750250101089478, "logps/chosen": -0.0002460972755216062, "logps/rejected": -2.46195912361145, "loss": 0.3839, "nll_loss": 0.09596128761768341, "rewards/accuracies": 1.0, "rewards/chosen": -2.4609729734947905e-05, "rewards/margins": 0.24617129564285278, "rewards/rejected": -0.24619589745998383, "step": 13108 }, { "epoch": 9.065698478561549, "grad_norm": 3.527372121810913, "learning_rate": 5.190564007991394e-06, "log_odds_chosen": 10.913864135742188, "log_odds_ratio": -0.00026111333863809705, "logits/chosen": -0.5446960926055908, "logits/rejected": -0.5455598831176758, "logps/chosen": -0.0002539431443437934, "logps/rejected": -2.0499162673950195, "loss": 0.4883, "nll_loss": 0.12203717231750488, "rewards/accuracies": 1.0, "rewards/chosen": -2.539431443437934e-05, "rewards/margins": 0.2049662470817566, "rewards/rejected": -0.2049916386604309, "step": 13109 }, { "epoch": 9.066390041493776, "grad_norm": 3.0204710960388184, "learning_rate": 5.1867219917012455e-06, "log_odds_chosen": 10.103961944580078, "log_odds_ratio": -0.00032514857593923807, "logits/chosen": -0.10731053352355957, "logits/rejected": -0.01982625015079975, "logps/chosen": -0.0006700665107928216, "logps/rejected": -1.8548423051834106, "loss": 0.347, "nll_loss": 0.08671282231807709, "rewards/accuracies": 1.0, "rewards/chosen": -6.700665835523978e-05, "rewards/margins": 0.18541721999645233, "rewards/rejected": -0.18548423051834106, "step": 13110 }, { "epoch": 9.067081604426003, "grad_norm": 5.251509189605713, "learning_rate": 5.1828799754110954e-06, "log_odds_chosen": 12.076827049255371, "log_odds_ratio": -2.3212494852486998e-05, "logits/chosen": -0.38703128695487976, "logits/rejected": -0.3221339285373688, "logps/chosen": -0.0004165376885794103, "logps/rejected": -3.2787702083587646, "loss": 0.4495, "nll_loss": 0.1123625785112381, "rewards/accuracies": 1.0, "rewards/chosen": -4.1653765947557986e-05, "rewards/margins": 0.3278353810310364, "rewards/rejected": -0.3278770446777344, "step": 13111 }, { "epoch": 9.06777316735823, "grad_norm": 4.941395282745361, "learning_rate": 5.179037959120947e-06, "log_odds_chosen": 11.142715454101562, "log_odds_ratio": -3.0218645406421274e-05, "logits/chosen": -0.4054286479949951, "logits/rejected": -0.5167216062545776, "logps/chosen": -0.0003387325559742749, "logps/rejected": -2.3566412925720215, "loss": 0.4717, "nll_loss": 0.11792398989200592, "rewards/accuracies": 1.0, "rewards/chosen": -3.38732534146402e-05, "rewards/margins": 0.23563024401664734, "rewards/rejected": -0.23566412925720215, "step": 13112 }, { "epoch": 9.068464730290456, "grad_norm": 2.290341377258301, "learning_rate": 5.175195942830798e-06, "log_odds_chosen": 11.20633602142334, "log_odds_ratio": -3.111179103143513e-05, "logits/chosen": -0.12001897394657135, "logits/rejected": -0.14916636049747467, "logps/chosen": -0.00010382429172750562, "logps/rejected": -1.8613578081130981, "loss": 0.2732, "nll_loss": 0.06830594688653946, "rewards/accuracies": 1.0, "rewards/chosen": -1.0382428627053741e-05, "rewards/margins": 0.18612539768218994, "rewards/rejected": -0.18613578379154205, "step": 13113 }, { "epoch": 9.069156293222683, "grad_norm": 3.792060613632202, "learning_rate": 5.1713539265406485e-06, "log_odds_chosen": 11.503861427307129, "log_odds_ratio": -0.0001394737046211958, "logits/chosen": 0.13606256246566772, "logits/rejected": 0.025217028334736824, "logps/chosen": -0.00030634726863354445, "logps/rejected": -2.5279359817504883, "loss": 0.3845, "nll_loss": 0.0961097702383995, "rewards/accuracies": 1.0, "rewards/chosen": -3.0634731956524774e-05, "rewards/margins": 0.25276297330856323, "rewards/rejected": -0.2527935802936554, "step": 13114 }, { "epoch": 9.06984785615491, "grad_norm": 3.5024185180664062, "learning_rate": 5.1675119102505e-06, "log_odds_chosen": 10.771469116210938, "log_odds_ratio": -4.546689524431713e-05, "logits/chosen": 0.012300148606300354, "logits/rejected": -0.0769965648651123, "logps/chosen": -0.00017290910182055086, "logps/rejected": -2.2188332080841064, "loss": 0.3947, "nll_loss": 0.09866972267627716, "rewards/accuracies": 1.0, "rewards/chosen": -1.7290909454459324e-05, "rewards/margins": 0.22186604142189026, "rewards/rejected": -0.22188332676887512, "step": 13115 }, { "epoch": 9.070539419087137, "grad_norm": 2.8086256980895996, "learning_rate": 5.16366989396035e-06, "log_odds_chosen": 11.772309303283691, "log_odds_ratio": -5.0502352678449824e-05, "logits/chosen": 0.15161627531051636, "logits/rejected": 0.05611416697502136, "logps/chosen": -0.00022595847258344293, "logps/rejected": -2.136870861053467, "loss": 0.3632, "nll_loss": 0.09080210328102112, "rewards/accuracies": 1.0, "rewards/chosen": -2.2595848349737935e-05, "rewards/margins": 0.21366450190544128, "rewards/rejected": -0.21368709206581116, "step": 13116 }, { "epoch": 9.071230982019364, "grad_norm": 3.9946985244750977, "learning_rate": 5.159827877670202e-06, "log_odds_chosen": 10.807271957397461, "log_odds_ratio": -4.892574725090526e-05, "logits/chosen": -0.18946993350982666, "logits/rejected": -0.34398671984672546, "logps/chosen": -0.0002714378642849624, "logps/rejected": -2.0007245540618896, "loss": 0.5261, "nll_loss": 0.13151350617408752, "rewards/accuracies": 1.0, "rewards/chosen": -2.714378570090048e-05, "rewards/margins": 0.20004534721374512, "rewards/rejected": -0.2000724822282791, "step": 13117 }, { "epoch": 9.07192254495159, "grad_norm": 3.284456491470337, "learning_rate": 5.155985861380052e-06, "log_odds_chosen": 11.461698532104492, "log_odds_ratio": -6.98392977938056e-05, "logits/chosen": -0.2301444262266159, "logits/rejected": -0.2811543345451355, "logps/chosen": -0.00048389926087111235, "logps/rejected": -3.2208995819091797, "loss": 0.2865, "nll_loss": 0.07162515819072723, "rewards/accuracies": 1.0, "rewards/chosen": -4.838992754230276e-05, "rewards/margins": 0.3220415711402893, "rewards/rejected": -0.3220899701118469, "step": 13118 }, { "epoch": 9.072614107883817, "grad_norm": 3.5966992378234863, "learning_rate": 5.152143845089903e-06, "log_odds_chosen": 11.999691009521484, "log_odds_ratio": -2.1546753487200476e-05, "logits/chosen": -0.48742491006851196, "logits/rejected": -0.4489961266517639, "logps/chosen": -8.737298776395619e-05, "logps/rejected": -2.584155797958374, "loss": 0.4487, "nll_loss": 0.11216401308774948, "rewards/accuracies": 1.0, "rewards/chosen": -8.7372991401935e-06, "rewards/margins": 0.2584068477153778, "rewards/rejected": -0.2584155797958374, "step": 13119 }, { "epoch": 9.073305670816044, "grad_norm": 4.022125720977783, "learning_rate": 5.148301828799755e-06, "log_odds_chosen": 10.40102767944336, "log_odds_ratio": -7.439900218741968e-05, "logits/chosen": -0.015573695302009583, "logits/rejected": -0.04067067801952362, "logps/chosen": -0.00011290111433481798, "logps/rejected": -1.432818055152893, "loss": 0.3738, "nll_loss": 0.09345272183418274, "rewards/accuracies": 1.0, "rewards/chosen": -1.129011252487544e-05, "rewards/margins": 0.14327052235603333, "rewards/rejected": -0.14328181743621826, "step": 13120 }, { "epoch": 9.07399723374827, "grad_norm": 2.8626561164855957, "learning_rate": 5.1444598125096055e-06, "log_odds_chosen": 11.430521965026855, "log_odds_ratio": -4.875660306424834e-05, "logits/chosen": -0.724206268787384, "logits/rejected": -0.7216789126396179, "logps/chosen": -0.00043527281377464533, "logps/rejected": -2.789267063140869, "loss": 0.3303, "nll_loss": 0.08256605267524719, "rewards/accuracies": 1.0, "rewards/chosen": -4.352728137746453e-05, "rewards/margins": 0.278883159160614, "rewards/rejected": -0.27892670035362244, "step": 13121 }, { "epoch": 9.074688796680498, "grad_norm": 3.03627347946167, "learning_rate": 5.140617796219456e-06, "log_odds_chosen": 11.022279739379883, "log_odds_ratio": -0.0005119434790685773, "logits/chosen": -0.2759706377983093, "logits/rejected": -0.35239100456237793, "logps/chosen": -0.00046582252252846956, "logps/rejected": -2.379591464996338, "loss": 0.2766, "nll_loss": 0.06909601390361786, "rewards/accuracies": 1.0, "rewards/chosen": -4.6582252252846956e-05, "rewards/margins": 0.23791258037090302, "rewards/rejected": -0.2379591464996338, "step": 13122 }, { "epoch": 9.075380359612724, "grad_norm": 3.4199564456939697, "learning_rate": 5.136775779929307e-06, "log_odds_chosen": 11.351811408996582, "log_odds_ratio": -1.5767138393130153e-05, "logits/chosen": -0.3166934847831726, "logits/rejected": -0.365016907453537, "logps/chosen": -0.0001815901923691854, "logps/rejected": -2.5316357612609863, "loss": 0.4853, "nll_loss": 0.1213144063949585, "rewards/accuracies": 1.0, "rewards/chosen": -1.8159018509322777e-05, "rewards/margins": 0.2531454265117645, "rewards/rejected": -0.25316357612609863, "step": 13123 }, { "epoch": 9.076071922544951, "grad_norm": 3.119173049926758, "learning_rate": 5.1329337636391586e-06, "log_odds_chosen": 9.569600105285645, "log_odds_ratio": -0.000530701712705195, "logits/chosen": 0.002615414559841156, "logits/rejected": -0.06705156713724136, "logps/chosen": -0.0006330714095383883, "logps/rejected": -1.9746882915496826, "loss": 0.3692, "nll_loss": 0.09225521981716156, "rewards/accuracies": 1.0, "rewards/chosen": -6.33071394986473e-05, "rewards/margins": 0.19740553200244904, "rewards/rejected": -0.1974688470363617, "step": 13124 }, { "epoch": 9.076763485477178, "grad_norm": 2.911562919616699, "learning_rate": 5.1290917473490085e-06, "log_odds_chosen": 11.062809944152832, "log_odds_ratio": -0.00010248189209960401, "logits/chosen": -0.22816266119480133, "logits/rejected": -0.2339855432510376, "logps/chosen": -0.00022922157950233668, "logps/rejected": -2.2725064754486084, "loss": 0.3558, "nll_loss": 0.08894093334674835, "rewards/accuracies": 1.0, "rewards/chosen": -2.292215867782943e-05, "rewards/margins": 0.22722773253917694, "rewards/rejected": -0.22725063562393188, "step": 13125 }, { "epoch": 9.077455048409405, "grad_norm": 2.968526840209961, "learning_rate": 5.12524973105886e-06, "log_odds_chosen": 12.123152732849121, "log_odds_ratio": -2.1198087779339403e-05, "logits/chosen": -0.08643770217895508, "logits/rejected": -0.24279722571372986, "logps/chosen": -0.0001828512322390452, "logps/rejected": -3.424088954925537, "loss": 0.3092, "nll_loss": 0.07728907465934753, "rewards/accuracies": 1.0, "rewards/chosen": -1.8285125406691805e-05, "rewards/margins": 0.34239059686660767, "rewards/rejected": -0.3424088954925537, "step": 13126 }, { "epoch": 9.078146611341632, "grad_norm": 3.580775499343872, "learning_rate": 5.121407714768711e-06, "log_odds_chosen": 10.601216316223145, "log_odds_ratio": -9.730371675686911e-05, "logits/chosen": -0.46868231892585754, "logits/rejected": -0.5867648720741272, "logps/chosen": -0.00010927829134743661, "logps/rejected": -1.7784924507141113, "loss": 0.487, "nll_loss": 0.12175001204013824, "rewards/accuracies": 1.0, "rewards/chosen": -1.0927829862339422e-05, "rewards/margins": 0.17783832550048828, "rewards/rejected": -0.17784924805164337, "step": 13127 }, { "epoch": 9.078838174273859, "grad_norm": 4.234866619110107, "learning_rate": 5.1175656984785616e-06, "log_odds_chosen": 10.49764347076416, "log_odds_ratio": -9.967401274479926e-05, "logits/chosen": -0.34242355823516846, "logits/rejected": -0.4020652174949646, "logps/chosen": -0.00039785588160157204, "logps/rejected": -2.255194902420044, "loss": 0.8858, "nll_loss": 0.22144310176372528, "rewards/accuracies": 1.0, "rewards/chosen": -3.978558743256144e-05, "rewards/margins": 0.22547970712184906, "rewards/rejected": -0.22551949322223663, "step": 13128 }, { "epoch": 9.079529737206085, "grad_norm": 3.9162516593933105, "learning_rate": 5.113723682188413e-06, "log_odds_chosen": 10.959028244018555, "log_odds_ratio": -0.00017648242646828294, "logits/chosen": 0.050031401216983795, "logits/rejected": 0.008972518146038055, "logps/chosen": -0.00042688497342169285, "logps/rejected": -2.5868098735809326, "loss": 0.4281, "nll_loss": 0.10699784755706787, "rewards/accuracies": 1.0, "rewards/chosen": -4.2688498069765046e-05, "rewards/margins": 0.25863829255104065, "rewards/rejected": -0.25868096947669983, "step": 13129 }, { "epoch": 9.080221300138312, "grad_norm": 2.9817802906036377, "learning_rate": 5.109881665898263e-06, "log_odds_chosen": 10.926042556762695, "log_odds_ratio": -2.3100288672139868e-05, "logits/chosen": -0.11011086404323578, "logits/rejected": -0.3051503300666809, "logps/chosen": -0.00021605490474030375, "logps/rejected": -2.295247793197632, "loss": 0.3411, "nll_loss": 0.08526698499917984, "rewards/accuracies": 1.0, "rewards/chosen": -2.160549229301978e-05, "rewards/margins": 0.22950318455696106, "rewards/rejected": -0.22952479124069214, "step": 13130 }, { "epoch": 9.08091286307054, "grad_norm": 3.780095100402832, "learning_rate": 5.106039649608115e-06, "log_odds_chosen": 9.921340942382812, "log_odds_ratio": -0.00042483158176764846, "logits/chosen": -0.1498485803604126, "logits/rejected": -0.18959610164165497, "logps/chosen": -0.00021947725326754153, "logps/rejected": -1.544329285621643, "loss": 0.4839, "nll_loss": 0.12092307209968567, "rewards/accuracies": 1.0, "rewards/chosen": -2.194772423536051e-05, "rewards/margins": 0.15441098809242249, "rewards/rejected": -0.15443293750286102, "step": 13131 }, { "epoch": 9.081604426002766, "grad_norm": 2.6842169761657715, "learning_rate": 5.102197633317965e-06, "log_odds_chosen": 11.452657699584961, "log_odds_ratio": -5.78801627852954e-05, "logits/chosen": -0.08300793170928955, "logits/rejected": -0.11368023604154587, "logps/chosen": -0.00013762382150162011, "logps/rejected": -2.3229103088378906, "loss": 0.3209, "nll_loss": 0.08022750169038773, "rewards/accuracies": 1.0, "rewards/chosen": -1.3762381968263071e-05, "rewards/margins": 0.2322772890329361, "rewards/rejected": -0.2322910577058792, "step": 13132 }, { "epoch": 9.082295988934993, "grad_norm": 3.5657460689544678, "learning_rate": 5.098355617027816e-06, "log_odds_chosen": 11.11960506439209, "log_odds_ratio": -3.4535565646365285e-05, "logits/chosen": -0.436089426279068, "logits/rejected": -0.398897647857666, "logps/chosen": -0.00014532770728692412, "logps/rejected": -2.1216166019439697, "loss": 0.4049, "nll_loss": 0.10122960805892944, "rewards/accuracies": 1.0, "rewards/chosen": -1.4532770364894532e-05, "rewards/margins": 0.21214714646339417, "rewards/rejected": -0.21216166019439697, "step": 13133 }, { "epoch": 9.08298755186722, "grad_norm": 3.125889539718628, "learning_rate": 5.094513600737667e-06, "log_odds_chosen": 10.308817863464355, "log_odds_ratio": -9.981192124541849e-05, "logits/chosen": -0.19146594405174255, "logits/rejected": -0.33207714557647705, "logps/chosen": -0.0005562923615798354, "logps/rejected": -2.1228606700897217, "loss": 0.2483, "nll_loss": 0.06205949932336807, "rewards/accuracies": 1.0, "rewards/chosen": -5.562922888202593e-05, "rewards/margins": 0.21223042905330658, "rewards/rejected": -0.2122860550880432, "step": 13134 }, { "epoch": 9.083679114799446, "grad_norm": 3.330157995223999, "learning_rate": 5.0906715844475185e-06, "log_odds_chosen": 11.04733943939209, "log_odds_ratio": -3.28706628351938e-05, "logits/chosen": -0.291248083114624, "logits/rejected": -0.4195294678211212, "logps/chosen": -0.00020840237266384065, "logps/rejected": -1.991461992263794, "loss": 0.3979, "nll_loss": 0.09946480393409729, "rewards/accuracies": 1.0, "rewards/chosen": -2.0840237993979827e-05, "rewards/margins": 0.19912534952163696, "rewards/rejected": -0.19914621114730835, "step": 13135 }, { "epoch": 9.084370677731673, "grad_norm": 3.6042749881744385, "learning_rate": 5.086829568157369e-06, "log_odds_chosen": 11.078657150268555, "log_odds_ratio": -2.63779529632302e-05, "logits/chosen": -0.4319020211696625, "logits/rejected": -0.4092617928981781, "logps/chosen": -0.00011186770279891789, "logps/rejected": -1.9281851053237915, "loss": 0.3121, "nll_loss": 0.07801743596792221, "rewards/accuracies": 1.0, "rewards/chosen": -1.1186770279891789e-05, "rewards/margins": 0.19280733168125153, "rewards/rejected": -0.1928185224533081, "step": 13136 }, { "epoch": 9.0850622406639, "grad_norm": 3.3793747425079346, "learning_rate": 5.08298755186722e-06, "log_odds_chosen": 10.268448829650879, "log_odds_ratio": -0.00013268145266920328, "logits/chosen": -0.17872902750968933, "logits/rejected": -0.25412702560424805, "logps/chosen": -0.0003009192587342113, "logps/rejected": -1.6723623275756836, "loss": 0.3279, "nll_loss": 0.08195911347866058, "rewards/accuracies": 1.0, "rewards/chosen": -3.0091925509623252e-05, "rewards/margins": 0.16720612347126007, "rewards/rejected": -0.16723620891571045, "step": 13137 }, { "epoch": 9.085753803596127, "grad_norm": 2.3682475090026855, "learning_rate": 5.079145535577072e-06, "log_odds_chosen": 11.593698501586914, "log_odds_ratio": -3.967937300330959e-05, "logits/chosen": -0.3585340976715088, "logits/rejected": -0.4492180347442627, "logps/chosen": -0.00010811621905304492, "logps/rejected": -2.1206421852111816, "loss": 0.3031, "nll_loss": 0.07576078921556473, "rewards/accuracies": 1.0, "rewards/chosen": -1.0811621905304492e-05, "rewards/margins": 0.21205341815948486, "rewards/rejected": -0.2120642215013504, "step": 13138 }, { "epoch": 9.086445366528354, "grad_norm": 4.587029457092285, "learning_rate": 5.0753035192869215e-06, "log_odds_chosen": 11.464735984802246, "log_odds_ratio": -2.5043180357897654e-05, "logits/chosen": -0.31821760535240173, "logits/rejected": -0.39988356828689575, "logps/chosen": -0.00012989738024771214, "logps/rejected": -2.334730386734009, "loss": 0.3182, "nll_loss": 0.0795440748333931, "rewards/accuracies": 1.0, "rewards/chosen": -1.2989738024771214e-05, "rewards/margins": 0.23346003890037537, "rewards/rejected": -0.2334730327129364, "step": 13139 }, { "epoch": 9.08713692946058, "grad_norm": 3.3115270137786865, "learning_rate": 5.071461502996773e-06, "log_odds_chosen": 10.78053092956543, "log_odds_ratio": -6.271836900850758e-05, "logits/chosen": 0.04377426207065582, "logits/rejected": 0.06366972625255585, "logps/chosen": -0.0012491923989728093, "logps/rejected": -1.8240324258804321, "loss": 0.3577, "nll_loss": 0.08942988514900208, "rewards/accuracies": 1.0, "rewards/chosen": -0.00012491924280766398, "rewards/margins": 0.1822783201932907, "rewards/rejected": -0.18240323662757874, "step": 13140 }, { "epoch": 9.087828492392807, "grad_norm": 3.5181050300598145, "learning_rate": 5.067619486706624e-06, "log_odds_chosen": 10.943486213684082, "log_odds_ratio": -4.695288589573465e-05, "logits/chosen": -0.0279490128159523, "logits/rejected": -0.07990120351314545, "logps/chosen": -0.00024129982921294868, "logps/rejected": -2.2386720180511475, "loss": 0.3133, "nll_loss": 0.07832954823970795, "rewards/accuracies": 1.0, "rewards/chosen": -2.4129982193699107e-05, "rewards/margins": 0.2238430678844452, "rewards/rejected": -0.22386720776557922, "step": 13141 }, { "epoch": 9.088520055325034, "grad_norm": 3.6358225345611572, "learning_rate": 5.063777470416475e-06, "log_odds_chosen": 11.821958541870117, "log_odds_ratio": -3.9039121475070715e-05, "logits/chosen": -0.1069912388920784, "logits/rejected": -0.16369011998176575, "logps/chosen": -0.0002085616288240999, "logps/rejected": -2.8206725120544434, "loss": 0.4536, "nll_loss": 0.11340697109699249, "rewards/accuracies": 1.0, "rewards/chosen": -2.085616324620787e-05, "rewards/margins": 0.282046377658844, "rewards/rejected": -0.2820672392845154, "step": 13142 }, { "epoch": 9.089211618257261, "grad_norm": 3.2715563774108887, "learning_rate": 5.059935454126326e-06, "log_odds_chosen": 10.589996337890625, "log_odds_ratio": -4.2793963075382635e-05, "logits/chosen": -0.4457647502422333, "logits/rejected": -0.509307861328125, "logps/chosen": -0.00015700332005508244, "logps/rejected": -1.5440524816513062, "loss": 0.3215, "nll_loss": 0.08037039637565613, "rewards/accuracies": 1.0, "rewards/chosen": -1.5700332369306125e-05, "rewards/margins": 0.15438956022262573, "rewards/rejected": -0.15440526604652405, "step": 13143 }, { "epoch": 9.089903181189488, "grad_norm": 3.2407965660095215, "learning_rate": 5.056093437836177e-06, "log_odds_chosen": 10.927106857299805, "log_odds_ratio": -5.0967435527127236e-05, "logits/chosen": 0.08164151012897491, "logits/rejected": 0.02284158021211624, "logps/chosen": -0.00013107166159898043, "logps/rejected": -1.689274549484253, "loss": 0.2775, "nll_loss": 0.06936167180538177, "rewards/accuracies": 1.0, "rewards/chosen": -1.3107166523695923e-05, "rewards/margins": 0.16891434788703918, "rewards/rejected": -0.16892746090888977, "step": 13144 }, { "epoch": 9.090594744121715, "grad_norm": 4.398402690887451, "learning_rate": 5.052251421546028e-06, "log_odds_chosen": 11.465459823608398, "log_odds_ratio": -0.00012407473695930094, "logits/chosen": 0.3121594488620758, "logits/rejected": 0.27814939618110657, "logps/chosen": -0.0003415195969864726, "logps/rejected": -3.200613021850586, "loss": 0.4124, "nll_loss": 0.10309255868196487, "rewards/accuracies": 1.0, "rewards/chosen": -3.41519589710515e-05, "rewards/margins": 0.3200271427631378, "rewards/rejected": -0.3200612962245941, "step": 13145 }, { "epoch": 9.091286307053942, "grad_norm": 4.4590253829956055, "learning_rate": 5.0484094052558784e-06, "log_odds_chosen": 10.064178466796875, "log_odds_ratio": -0.00011093214561697096, "logits/chosen": -0.11966148763895035, "logits/rejected": -0.21258839964866638, "logps/chosen": -0.0003168184484820813, "logps/rejected": -1.9933526515960693, "loss": 0.3697, "nll_loss": 0.09242478013038635, "rewards/accuracies": 1.0, "rewards/chosen": -3.1681847758591175e-05, "rewards/margins": 0.19930359721183777, "rewards/rejected": -0.1993352770805359, "step": 13146 }, { "epoch": 9.091977869986168, "grad_norm": 3.343506097793579, "learning_rate": 5.044567388965729e-06, "log_odds_chosen": 12.137199401855469, "log_odds_ratio": -2.5066479793167673e-05, "logits/chosen": -0.35906705260276794, "logits/rejected": -0.27062171697616577, "logps/chosen": -0.00017379832570441067, "logps/rejected": -3.39198637008667, "loss": 0.379, "nll_loss": 0.09473510086536407, "rewards/accuracies": 1.0, "rewards/chosen": -1.737983438943047e-05, "rewards/margins": 0.33918124437332153, "rewards/rejected": -0.33919864892959595, "step": 13147 }, { "epoch": 9.092669432918395, "grad_norm": 5.016630172729492, "learning_rate": 5.04072537267558e-06, "log_odds_chosen": 11.218782424926758, "log_odds_ratio": -4.880976484855637e-05, "logits/chosen": -0.1608268916606903, "logits/rejected": -0.1783805787563324, "logps/chosen": -0.0001921278308145702, "logps/rejected": -2.267690896987915, "loss": 0.4646, "nll_loss": 0.11615432053804398, "rewards/accuracies": 1.0, "rewards/chosen": -1.9212782717659138e-05, "rewards/margins": 0.22674988210201263, "rewards/rejected": -0.2267691045999527, "step": 13148 }, { "epoch": 9.093360995850622, "grad_norm": 2.3774566650390625, "learning_rate": 5.0368833563854315e-06, "log_odds_chosen": 10.962896347045898, "log_odds_ratio": -0.00010562760871835053, "logits/chosen": -0.27799174189567566, "logits/rejected": -0.29090049862861633, "logps/chosen": -0.00019772813539020717, "logps/rejected": -2.3116509914398193, "loss": 0.2353, "nll_loss": 0.05882162228226662, "rewards/accuracies": 1.0, "rewards/chosen": -1.977281499421224e-05, "rewards/margins": 0.23114533722400665, "rewards/rejected": -0.2311651110649109, "step": 13149 }, { "epoch": 9.094052558782849, "grad_norm": 3.0054092407226562, "learning_rate": 5.033041340095282e-06, "log_odds_chosen": 11.95867919921875, "log_odds_ratio": -1.787081237125676e-05, "logits/chosen": -0.2743851840496063, "logits/rejected": -0.32335007190704346, "logps/chosen": -0.00018235544848721474, "logps/rejected": -2.9181389808654785, "loss": 0.3871, "nll_loss": 0.09678283333778381, "rewards/accuracies": 1.0, "rewards/chosen": -1.823554339352995e-05, "rewards/margins": 0.29179567098617554, "rewards/rejected": -0.2918138802051544, "step": 13150 }, { "epoch": 9.094744121715076, "grad_norm": 4.452502727508545, "learning_rate": 5.029199323805133e-06, "log_odds_chosen": 11.339390754699707, "log_odds_ratio": -6.690513691864908e-05, "logits/chosen": -0.48545175790786743, "logits/rejected": -0.5180833339691162, "logps/chosen": -0.0004081081715412438, "logps/rejected": -2.6610803604125977, "loss": 0.5438, "nll_loss": 0.13594885170459747, "rewards/accuracies": 1.0, "rewards/chosen": -4.0810820792103186e-05, "rewards/margins": 0.2660672068595886, "rewards/rejected": -0.26610803604125977, "step": 13151 }, { "epoch": 9.095435684647303, "grad_norm": 6.563195705413818, "learning_rate": 5.025357307514985e-06, "log_odds_chosen": 10.881711959838867, "log_odds_ratio": -0.0003529912792146206, "logits/chosen": 0.21874408423900604, "logits/rejected": 0.029261693358421326, "logps/chosen": -0.0005761877982877195, "logps/rejected": -1.6720936298370361, "loss": 0.6063, "nll_loss": 0.15154193341732025, "rewards/accuracies": 1.0, "rewards/chosen": -5.761878128396347e-05, "rewards/margins": 0.16715176403522491, "rewards/rejected": -0.16720937192440033, "step": 13152 }, { "epoch": 9.09612724757953, "grad_norm": 4.052469730377197, "learning_rate": 5.0215152912248345e-06, "log_odds_chosen": 11.643571853637695, "log_odds_ratio": -0.00046770076733082533, "logits/chosen": 0.14311496913433075, "logits/rejected": 0.12904202938079834, "logps/chosen": -0.0011221003951504827, "logps/rejected": -3.477919101715088, "loss": 0.4618, "nll_loss": 0.11539174616336823, "rewards/accuracies": 1.0, "rewards/chosen": -0.00011221005115658045, "rewards/margins": 0.3476797044277191, "rewards/rejected": -0.3477919101715088, "step": 13153 }, { "epoch": 9.096818810511756, "grad_norm": 3.6318225860595703, "learning_rate": 5.017673274934686e-06, "log_odds_chosen": 11.56639289855957, "log_odds_ratio": -6.830410711700097e-05, "logits/chosen": -0.017568401992321014, "logits/rejected": -0.012221388518810272, "logps/chosen": -0.0001570779422763735, "logps/rejected": -2.9654502868652344, "loss": 0.3252, "nll_loss": 0.08129463344812393, "rewards/accuracies": 1.0, "rewards/chosen": -1.5707795682828873e-05, "rewards/margins": 0.2965293526649475, "rewards/rejected": -0.2965450584888458, "step": 13154 }, { "epoch": 9.097510373443983, "grad_norm": 3.365553855895996, "learning_rate": 5.013831258644537e-06, "log_odds_chosen": 10.493221282958984, "log_odds_ratio": -0.0002885025169234723, "logits/chosen": 0.04266492277383804, "logits/rejected": -0.0294787660241127, "logps/chosen": -0.00023267159122042358, "logps/rejected": -1.9413899183273315, "loss": 0.4053, "nll_loss": 0.10130857676267624, "rewards/accuracies": 1.0, "rewards/chosen": -2.3267159122042358e-05, "rewards/margins": 0.19411572813987732, "rewards/rejected": -0.1941390037536621, "step": 13155 }, { "epoch": 9.09820193637621, "grad_norm": 5.188926696777344, "learning_rate": 5.009989242354388e-06, "log_odds_chosen": 10.942852973937988, "log_odds_ratio": -7.25128993508406e-05, "logits/chosen": -0.48505446314811707, "logits/rejected": -0.45248520374298096, "logps/chosen": -0.00043346683378331363, "logps/rejected": -2.1067748069763184, "loss": 0.4581, "nll_loss": 0.11450614780187607, "rewards/accuracies": 1.0, "rewards/chosen": -4.334668119554408e-05, "rewards/margins": 0.21063414216041565, "rewards/rejected": -0.21067747473716736, "step": 13156 }, { "epoch": 9.098893499308437, "grad_norm": 3.068840980529785, "learning_rate": 5.006147226064238e-06, "log_odds_chosen": 11.091489791870117, "log_odds_ratio": -5.561709258472547e-05, "logits/chosen": -0.3738434612751007, "logits/rejected": -0.33890044689178467, "logps/chosen": -0.00016683740250300616, "logps/rejected": -1.8817553520202637, "loss": 0.378, "nll_loss": 0.09448930621147156, "rewards/accuracies": 1.0, "rewards/chosen": -1.6683739886502735e-05, "rewards/margins": 0.18815885484218597, "rewards/rejected": -0.1881755292415619, "step": 13157 }, { "epoch": 9.099585062240664, "grad_norm": 2.2089033126831055, "learning_rate": 5.00230520977409e-06, "log_odds_chosen": 10.12149429321289, "log_odds_ratio": -0.0003154563601128757, "logits/chosen": -0.10125580430030823, "logits/rejected": -0.0849694237112999, "logps/chosen": -0.0005034382920712233, "logps/rejected": -2.024165630340576, "loss": 0.2075, "nll_loss": 0.0518321767449379, "rewards/accuracies": 1.0, "rewards/chosen": -5.03438277519308e-05, "rewards/margins": 0.20236621797084808, "rewards/rejected": -0.2024165689945221, "step": 13158 }, { "epoch": 9.10027662517289, "grad_norm": 3.7857158184051514, "learning_rate": 4.998463193483941e-06, "log_odds_chosen": 11.10672378540039, "log_odds_ratio": -5.2406474424060434e-05, "logits/chosen": -0.22684435546398163, "logits/rejected": -0.28922390937805176, "logps/chosen": -0.0003105810610577464, "logps/rejected": -2.401808977127075, "loss": 0.4286, "nll_loss": 0.1071481853723526, "rewards/accuracies": 1.0, "rewards/chosen": -3.105810901615769e-05, "rewards/margins": 0.2401498556137085, "rewards/rejected": -0.24018090963363647, "step": 13159 }, { "epoch": 9.100968188105117, "grad_norm": 4.0806450843811035, "learning_rate": 4.9946211771937915e-06, "log_odds_chosen": 11.757749557495117, "log_odds_ratio": -1.2237173905305099e-05, "logits/chosen": -0.09956353902816772, "logits/rejected": -0.11438395828008652, "logps/chosen": -8.435586642008275e-05, "logps/rejected": -2.281965970993042, "loss": 0.517, "nll_loss": 0.12925131618976593, "rewards/accuracies": 1.0, "rewards/chosen": -8.435587005806156e-06, "rewards/margins": 0.228188157081604, "rewards/rejected": -0.22819659113883972, "step": 13160 }, { "epoch": 9.101659751037344, "grad_norm": 3.7898600101470947, "learning_rate": 4.990779160903643e-06, "log_odds_chosen": 10.907726287841797, "log_odds_ratio": -4.189980973023921e-05, "logits/chosen": -0.566761314868927, "logits/rejected": -0.46380579471588135, "logps/chosen": -0.00017752411076799035, "logps/rejected": -1.85300874710083, "loss": 0.4109, "nll_loss": 0.10271905362606049, "rewards/accuracies": 1.0, "rewards/chosen": -1.7752410713001154e-05, "rewards/margins": 0.1852831244468689, "rewards/rejected": -0.18530087172985077, "step": 13161 }, { "epoch": 9.10235131396957, "grad_norm": 4.558443546295166, "learning_rate": 4.986937144613493e-06, "log_odds_chosen": 10.4443359375, "log_odds_ratio": -4.984636325389147e-05, "logits/chosen": -0.07645373046398163, "logits/rejected": -0.17390292882919312, "logps/chosen": -0.00016142117965500802, "logps/rejected": -1.8318181037902832, "loss": 0.3211, "nll_loss": 0.08027378469705582, "rewards/accuracies": 1.0, "rewards/chosen": -1.614211760170292e-05, "rewards/margins": 0.18316569924354553, "rewards/rejected": -0.18318183720111847, "step": 13162 }, { "epoch": 9.103042876901798, "grad_norm": 3.671687602996826, "learning_rate": 4.9830951283233446e-06, "log_odds_chosen": 12.03889274597168, "log_odds_ratio": -1.1940827789658215e-05, "logits/chosen": 0.09454180300235748, "logits/rejected": 0.05508112162351608, "logps/chosen": -0.0002764679375104606, "logps/rejected": -2.8818044662475586, "loss": 0.4997, "nll_loss": 0.12493018805980682, "rewards/accuracies": 1.0, "rewards/chosen": -2.764679265965242e-05, "rewards/margins": 0.2881527543067932, "rewards/rejected": -0.288180410861969, "step": 13163 }, { "epoch": 9.103734439834025, "grad_norm": 2.8568713665008545, "learning_rate": 4.979253112033195e-06, "log_odds_chosen": 10.179363250732422, "log_odds_ratio": -7.92900609667413e-05, "logits/chosen": -0.549415647983551, "logits/rejected": -0.526592493057251, "logps/chosen": -0.0002810688456520438, "logps/rejected": -1.7964861392974854, "loss": 0.3399, "nll_loss": 0.08496960997581482, "rewards/accuracies": 1.0, "rewards/chosen": -2.81068842014065e-05, "rewards/margins": 0.17962051928043365, "rewards/rejected": -0.17964863777160645, "step": 13164 }, { "epoch": 9.104426002766251, "grad_norm": 3.650968313217163, "learning_rate": 4.975411095743046e-06, "log_odds_chosen": 10.66606616973877, "log_odds_ratio": -0.00035028919228352606, "logits/chosen": -0.08267174661159515, "logits/rejected": -0.2358940839767456, "logps/chosen": -0.00016879210306797177, "logps/rejected": -2.1179189682006836, "loss": 0.431, "nll_loss": 0.10771405696868896, "rewards/accuracies": 1.0, "rewards/chosen": -1.6879210306797177e-05, "rewards/margins": 0.2117750495672226, "rewards/rejected": -0.21179193258285522, "step": 13165 }, { "epoch": 9.105117565698478, "grad_norm": 4.6579060554504395, "learning_rate": 4.971569079452898e-06, "log_odds_chosen": 12.230474472045898, "log_odds_ratio": -2.4434060833300464e-05, "logits/chosen": -0.38378095626831055, "logits/rejected": -0.3854064345359802, "logps/chosen": -0.00015963416080921888, "logps/rejected": -3.438502311706543, "loss": 0.2897, "nll_loss": 0.07242448627948761, "rewards/accuracies": 1.0, "rewards/chosen": -1.5963418263709173e-05, "rewards/margins": 0.3438342809677124, "rewards/rejected": -0.3438502550125122, "step": 13166 }, { "epoch": 9.105809128630705, "grad_norm": 3.5084073543548584, "learning_rate": 4.9677270631627475e-06, "log_odds_chosen": 11.561970710754395, "log_odds_ratio": -2.341391154914163e-05, "logits/chosen": -0.4982847571372986, "logits/rejected": -0.4246971011161804, "logps/chosen": -0.0001479636412113905, "logps/rejected": -2.591989517211914, "loss": 0.3273, "nll_loss": 0.08181080222129822, "rewards/accuracies": 1.0, "rewards/chosen": -1.4796363757341169e-05, "rewards/margins": 0.25918418169021606, "rewards/rejected": -0.25919896364212036, "step": 13167 }, { "epoch": 9.106500691562932, "grad_norm": 4.859031677246094, "learning_rate": 4.963885046872599e-06, "log_odds_chosen": 10.677106857299805, "log_odds_ratio": -0.0009081160533241928, "logits/chosen": 0.2539621591567993, "logits/rejected": 0.10869896411895752, "logps/chosen": -0.0006138522294349968, "logps/rejected": -2.1311261653900146, "loss": 0.2616, "nll_loss": 0.06530681252479553, "rewards/accuracies": 1.0, "rewards/chosen": -6.138522439869121e-05, "rewards/margins": 0.2130512297153473, "rewards/rejected": -0.21311262249946594, "step": 13168 }, { "epoch": 9.107192254495159, "grad_norm": 3.615055561065674, "learning_rate": 4.96004303058245e-06, "log_odds_chosen": 10.297002792358398, "log_odds_ratio": -0.00012715287448372692, "logits/chosen": -0.30406704545021057, "logits/rejected": -0.18537336587905884, "logps/chosen": -0.0002870440948754549, "logps/rejected": -1.7042102813720703, "loss": 0.371, "nll_loss": 0.09272780269384384, "rewards/accuracies": 1.0, "rewards/chosen": -2.870440948754549e-05, "rewards/margins": 0.17039233446121216, "rewards/rejected": -0.1704210340976715, "step": 13169 }, { "epoch": 9.107883817427386, "grad_norm": 3.216033697128296, "learning_rate": 4.956201014292301e-06, "log_odds_chosen": 12.333643913269043, "log_odds_ratio": -7.912468390713912e-06, "logits/chosen": -0.3236329257488251, "logits/rejected": -0.3933505117893219, "logps/chosen": -5.795392280560918e-05, "logps/rejected": -2.314310073852539, "loss": 0.3152, "nll_loss": 0.0788043662905693, "rewards/accuracies": 1.0, "rewards/chosen": -5.795392553409329e-06, "rewards/margins": 0.2314252108335495, "rewards/rejected": -0.2314310073852539, "step": 13170 }, { "epoch": 9.108575380359612, "grad_norm": 4.9232282638549805, "learning_rate": 4.952358998002151e-06, "log_odds_chosen": 11.705574035644531, "log_odds_ratio": -2.032737756962888e-05, "logits/chosen": 0.01616286300122738, "logits/rejected": -0.04003433510661125, "logps/chosen": -0.0003002825251314789, "logps/rejected": -2.7152099609375, "loss": 0.667, "nll_loss": 0.16675525903701782, "rewards/accuracies": 1.0, "rewards/chosen": -3.0028253604541533e-05, "rewards/margins": 0.27149099111557007, "rewards/rejected": -0.2715210020542145, "step": 13171 }, { "epoch": 9.10926694329184, "grad_norm": 3.5715765953063965, "learning_rate": 4.948516981712003e-06, "log_odds_chosen": 11.402990341186523, "log_odds_ratio": -6.371325434884056e-05, "logits/chosen": -0.6012564897537231, "logits/rejected": -0.49459028244018555, "logps/chosen": -0.00019830641394946724, "logps/rejected": -2.2350080013275146, "loss": 0.4016, "nll_loss": 0.10040025413036346, "rewards/accuracies": 1.0, "rewards/chosen": -1.9830642486340366e-05, "rewards/margins": 0.2234809547662735, "rewards/rejected": -0.2235007882118225, "step": 13172 }, { "epoch": 9.109958506224066, "grad_norm": 3.1697309017181396, "learning_rate": 4.944674965421854e-06, "log_odds_chosen": 10.932571411132812, "log_odds_ratio": -8.260000322479755e-05, "logits/chosen": -0.2529147267341614, "logits/rejected": -0.2343870997428894, "logps/chosen": -0.004691335838288069, "logps/rejected": -2.201251268386841, "loss": 0.355, "nll_loss": 0.08873645216226578, "rewards/accuracies": 1.0, "rewards/chosen": -0.00046913354890421033, "rewards/margins": 0.21965602040290833, "rewards/rejected": -0.22012513875961304, "step": 13173 }, { "epoch": 9.110650069156293, "grad_norm": 3.2608823776245117, "learning_rate": 4.9408329491317045e-06, "log_odds_chosen": 11.447783470153809, "log_odds_ratio": -4.559377339319326e-05, "logits/chosen": 0.0027496833354234695, "logits/rejected": -0.034812696278095245, "logps/chosen": -0.000319063343340531, "logps/rejected": -2.4481163024902344, "loss": 0.2811, "nll_loss": 0.07026031613349915, "rewards/accuracies": 1.0, "rewards/chosen": -3.1906336516840383e-05, "rewards/margins": 0.24477970600128174, "rewards/rejected": -0.24481162428855896, "step": 13174 }, { "epoch": 9.11134163208852, "grad_norm": 4.426057815551758, "learning_rate": 4.936990932841556e-06, "log_odds_chosen": 11.010347366333008, "log_odds_ratio": -6.925136403879151e-05, "logits/chosen": -0.05647280812263489, "logits/rejected": -0.2984941899776459, "logps/chosen": -0.00027949127252213657, "logps/rejected": -2.3876566886901855, "loss": 0.4398, "nll_loss": 0.10994251072406769, "rewards/accuracies": 1.0, "rewards/chosen": -2.7949130526394583e-05, "rewards/margins": 0.23873771727085114, "rewards/rejected": -0.2387656718492508, "step": 13175 }, { "epoch": 9.112033195020746, "grad_norm": 3.035013198852539, "learning_rate": 4.933148916551406e-06, "log_odds_chosen": 12.202170372009277, "log_odds_ratio": -7.800666026014369e-06, "logits/chosen": -0.616619884967804, "logits/rejected": -0.5678720474243164, "logps/chosen": -9.361472621094435e-05, "logps/rejected": -2.7422537803649902, "loss": 0.3129, "nll_loss": 0.07821905612945557, "rewards/accuracies": 1.0, "rewards/chosen": -9.361472621094435e-06, "rewards/margins": 0.27421602606773376, "rewards/rejected": -0.2742254137992859, "step": 13176 }, { "epoch": 9.112724757952973, "grad_norm": 3.3646538257598877, "learning_rate": 4.929306900261258e-06, "log_odds_chosen": 10.746342658996582, "log_odds_ratio": -4.711302608484402e-05, "logits/chosen": -0.28519192337989807, "logits/rejected": -0.3103817403316498, "logps/chosen": -0.0002904360298998654, "logps/rejected": -2.317279100418091, "loss": 0.4137, "nll_loss": 0.1034327819943428, "rewards/accuracies": 1.0, "rewards/chosen": -2.90436037175823e-05, "rewards/margins": 0.23169885575771332, "rewards/rejected": -0.23172789812088013, "step": 13177 }, { "epoch": 9.1134163208852, "grad_norm": 3.402470588684082, "learning_rate": 4.925464883971108e-06, "log_odds_chosen": 11.85820198059082, "log_odds_ratio": -1.4866004676150624e-05, "logits/chosen": 0.08311102539300919, "logits/rejected": 0.03828234225511551, "logps/chosen": -0.00020142002904321998, "logps/rejected": -2.738119602203369, "loss": 0.5828, "nll_loss": 0.14569005370140076, "rewards/accuracies": 1.0, "rewards/chosen": -2.0142002540524118e-05, "rewards/margins": 0.2737918198108673, "rewards/rejected": -0.2738119661808014, "step": 13178 }, { "epoch": 9.114107883817427, "grad_norm": 3.4675073623657227, "learning_rate": 4.921622867680959e-06, "log_odds_chosen": 10.453381538391113, "log_odds_ratio": -0.0006085903150960803, "logits/chosen": -0.28540289402008057, "logits/rejected": -0.2581024467945099, "logps/chosen": -0.003210814204066992, "logps/rejected": -2.185744047164917, "loss": 0.3239, "nll_loss": 0.08090663701295853, "rewards/accuracies": 1.0, "rewards/chosen": -0.00032108143204823136, "rewards/margins": 0.21825332939624786, "rewards/rejected": -0.2185744196176529, "step": 13179 }, { "epoch": 9.114799446749654, "grad_norm": 4.592959403991699, "learning_rate": 4.91778085139081e-06, "log_odds_chosen": 11.726934432983398, "log_odds_ratio": -1.3600827514892444e-05, "logits/chosen": -0.33622393012046814, "logits/rejected": -0.4734468460083008, "logps/chosen": -0.0001402581692673266, "logps/rejected": -2.4596095085144043, "loss": 0.5269, "nll_loss": 0.13171550631523132, "rewards/accuracies": 1.0, "rewards/chosen": -1.40258171086316e-05, "rewards/margins": 0.24594691395759583, "rewards/rejected": -0.24596095085144043, "step": 13180 }, { "epoch": 9.11549100968188, "grad_norm": 3.1724741458892822, "learning_rate": 4.913938835100661e-06, "log_odds_chosen": 11.557546615600586, "log_odds_ratio": -1.7893340555019677e-05, "logits/chosen": -0.4319803714752197, "logits/rejected": -0.42902466654777527, "logps/chosen": -0.00011146764154545963, "logps/rejected": -2.4004411697387695, "loss": 0.3218, "nll_loss": 0.08044193685054779, "rewards/accuracies": 1.0, "rewards/chosen": -1.1146764336444903e-05, "rewards/margins": 0.24003298580646515, "rewards/rejected": -0.24004413187503815, "step": 13181 }, { "epoch": 9.116182572614107, "grad_norm": 2.474640130996704, "learning_rate": 4.910096818810512e-06, "log_odds_chosen": 11.198797225952148, "log_odds_ratio": -3.0988761864136904e-05, "logits/chosen": 0.05575866997241974, "logits/rejected": 0.0315190851688385, "logps/chosen": -0.00020474701886996627, "logps/rejected": -2.4687418937683105, "loss": 0.2955, "nll_loss": 0.0738632082939148, "rewards/accuracies": 1.0, "rewards/chosen": -2.0474701159400865e-05, "rewards/margins": 0.24685370922088623, "rewards/rejected": -0.24687419831752777, "step": 13182 }, { "epoch": 9.116874135546334, "grad_norm": 2.3888325691223145, "learning_rate": 4.906254802520363e-06, "log_odds_chosen": 10.87182903289795, "log_odds_ratio": -0.0003233412862755358, "logits/chosen": 0.07042770087718964, "logits/rejected": 0.014010794460773468, "logps/chosen": -0.00014998050755821168, "logps/rejected": -1.9851473569869995, "loss": 0.2403, "nll_loss": 0.06004884093999863, "rewards/accuracies": 1.0, "rewards/chosen": -1.4998049664427526e-05, "rewards/margins": 0.19849973917007446, "rewards/rejected": -0.19851475954055786, "step": 13183 }, { "epoch": 9.117565698478561, "grad_norm": 2.9431097507476807, "learning_rate": 4.902412786230214e-06, "log_odds_chosen": 11.711980819702148, "log_odds_ratio": -2.999811840709299e-05, "logits/chosen": 0.02263740450143814, "logits/rejected": -0.1297261267900467, "logps/chosen": -0.00025718723190948367, "logps/rejected": -3.303467273712158, "loss": 0.3269, "nll_loss": 0.08172940462827682, "rewards/accuracies": 1.0, "rewards/chosen": -2.571872028056532e-05, "rewards/margins": 0.3303210139274597, "rewards/rejected": -0.3303467333316803, "step": 13184 }, { "epoch": 9.118257261410788, "grad_norm": 3.024400472640991, "learning_rate": 4.898570769940064e-06, "log_odds_chosen": 10.90009593963623, "log_odds_ratio": -0.00043232255848124623, "logits/chosen": -0.43155479431152344, "logits/rejected": -0.404721736907959, "logps/chosen": -0.0004337151476647705, "logps/rejected": -3.1928770542144775, "loss": 0.352, "nll_loss": 0.08794493973255157, "rewards/accuracies": 1.0, "rewards/chosen": -4.337151403888129e-05, "rewards/margins": 0.3192443251609802, "rewards/rejected": -0.3192877173423767, "step": 13185 }, { "epoch": 9.118948824343015, "grad_norm": 2.188103437423706, "learning_rate": 4.894728753649916e-06, "log_odds_chosen": 10.891100883483887, "log_odds_ratio": -0.000127013074234128, "logits/chosen": -0.31894204020500183, "logits/rejected": -0.2324240505695343, "logps/chosen": -0.00011675543646560982, "logps/rejected": -1.966536521911621, "loss": 0.2551, "nll_loss": 0.06376811861991882, "rewards/accuracies": 1.0, "rewards/chosen": -1.1675543646560982e-05, "rewards/margins": 0.1966419816017151, "rewards/rejected": -0.19665364921092987, "step": 13186 }, { "epoch": 9.119640387275242, "grad_norm": 3.5622682571411133, "learning_rate": 4.890886737359767e-06, "log_odds_chosen": 11.323587417602539, "log_odds_ratio": -2.4846201995387673e-05, "logits/chosen": -0.1945473849773407, "logits/rejected": -0.25932082533836365, "logps/chosen": -9.580461482983083e-05, "logps/rejected": -1.8983120918273926, "loss": 0.2771, "nll_loss": 0.06927736103534698, "rewards/accuracies": 1.0, "rewards/chosen": -9.580460755387321e-06, "rewards/margins": 0.18982163071632385, "rewards/rejected": -0.1898311972618103, "step": 13187 }, { "epoch": 9.120331950207468, "grad_norm": 4.116031646728516, "learning_rate": 4.8870447210696175e-06, "log_odds_chosen": 11.892321586608887, "log_odds_ratio": -1.5779898603796028e-05, "logits/chosen": -0.2089371383190155, "logits/rejected": -0.15336598455905914, "logps/chosen": -0.00032184182782657444, "logps/rejected": -3.1460373401641846, "loss": 0.376, "nll_loss": 0.09399942308664322, "rewards/accuracies": 1.0, "rewards/chosen": -3.2184183510253206e-05, "rewards/margins": 0.3145715594291687, "rewards/rejected": -0.3146037459373474, "step": 13188 }, { "epoch": 9.121023513139695, "grad_norm": 2.5508615970611572, "learning_rate": 4.883202704779469e-06, "log_odds_chosen": 12.145647048950195, "log_odds_ratio": -9.136807420873083e-06, "logits/chosen": -0.24097317457199097, "logits/rejected": -0.23421518504619598, "logps/chosen": -0.00011886593711096793, "logps/rejected": -2.9510018825531006, "loss": 0.3036, "nll_loss": 0.07589846849441528, "rewards/accuracies": 1.0, "rewards/chosen": -1.1886593711096793e-05, "rewards/margins": 0.2950882911682129, "rewards/rejected": -0.2951001524925232, "step": 13189 }, { "epoch": 9.121715076071922, "grad_norm": 4.2100443840026855, "learning_rate": 4.879360688489319e-06, "log_odds_chosen": 11.170172691345215, "log_odds_ratio": -8.050798351177946e-05, "logits/chosen": -0.2680250406265259, "logits/rejected": -0.38149750232696533, "logps/chosen": -0.00017637033306527883, "logps/rejected": -1.7586069107055664, "loss": 0.432, "nll_loss": 0.10799022018909454, "rewards/accuracies": 1.0, "rewards/chosen": -1.763703221513424e-05, "rewards/margins": 0.17584306001663208, "rewards/rejected": -0.1758607029914856, "step": 13190 }, { "epoch": 9.122406639004149, "grad_norm": 3.2198972702026367, "learning_rate": 4.875518672199171e-06, "log_odds_chosen": 10.57756233215332, "log_odds_ratio": -0.00038010356365703046, "logits/chosen": -0.11352365463972092, "logits/rejected": -0.20239883661270142, "logps/chosen": -0.0013985616387799382, "logps/rejected": -2.335310459136963, "loss": 0.2675, "nll_loss": 0.06683464348316193, "rewards/accuracies": 1.0, "rewards/chosen": -0.0001398561871610582, "rewards/margins": 0.2333911955356598, "rewards/rejected": -0.23353107273578644, "step": 13191 }, { "epoch": 9.123098201936376, "grad_norm": 4.147787570953369, "learning_rate": 4.871676655909021e-06, "log_odds_chosen": 11.222441673278809, "log_odds_ratio": -6.227093399502337e-05, "logits/chosen": -0.44622766971588135, "logits/rejected": -0.5303174257278442, "logps/chosen": -0.00029116583755239844, "logps/rejected": -2.246415138244629, "loss": 0.5198, "nll_loss": 0.12995260953903198, "rewards/accuracies": 1.0, "rewards/chosen": -2.9116585210431367e-05, "rewards/margins": 0.22461241483688354, "rewards/rejected": -0.22464153170585632, "step": 13192 }, { "epoch": 9.123789764868603, "grad_norm": 3.82718825340271, "learning_rate": 4.867834639618872e-06, "log_odds_chosen": 11.536949157714844, "log_odds_ratio": -1.4661351087852381e-05, "logits/chosen": 0.2355799823999405, "logits/rejected": 0.16792237758636475, "logps/chosen": -0.00010456145537318662, "logps/rejected": -2.4021620750427246, "loss": 0.5924, "nll_loss": 0.14808820188045502, "rewards/accuracies": 1.0, "rewards/chosen": -1.045614499162184e-05, "rewards/margins": 0.2402057647705078, "rewards/rejected": -0.2402162104845047, "step": 13193 }, { "epoch": 9.12448132780083, "grad_norm": 3.064208745956421, "learning_rate": 4.863992623328723e-06, "log_odds_chosen": 12.178282737731934, "log_odds_ratio": -7.479978648916585e-06, "logits/chosen": -0.19847072660923004, "logits/rejected": -0.23306681215763092, "logps/chosen": -7.786977948853746e-05, "logps/rejected": -2.460477352142334, "loss": 0.3654, "nll_loss": 0.09134089946746826, "rewards/accuracies": 1.0, "rewards/chosen": -7.786978130752686e-06, "rewards/margins": 0.24603994190692902, "rewards/rejected": -0.2460477352142334, "step": 13194 }, { "epoch": 9.125172890733056, "grad_norm": 5.66312313079834, "learning_rate": 4.8601506070385745e-06, "log_odds_chosen": 11.594490051269531, "log_odds_ratio": -6.0059661336708814e-05, "logits/chosen": -0.4265395402908325, "logits/rejected": -0.4704775810241699, "logps/chosen": -0.0002761242794804275, "logps/rejected": -2.5915656089782715, "loss": 0.487, "nll_loss": 0.12174142897129059, "rewards/accuracies": 1.0, "rewards/chosen": -2.7612426492851228e-05, "rewards/margins": 0.25912895798683167, "rewards/rejected": -0.25915655493736267, "step": 13195 }, { "epoch": 9.125864453665283, "grad_norm": 3.052619457244873, "learning_rate": 4.856308590748425e-06, "log_odds_chosen": 11.270345687866211, "log_odds_ratio": -5.994427192490548e-05, "logits/chosen": -0.34307360649108887, "logits/rejected": -0.411067932844162, "logps/chosen": -0.0003244389081373811, "logps/rejected": -2.9828009605407715, "loss": 0.3114, "nll_loss": 0.07783990353345871, "rewards/accuracies": 1.0, "rewards/chosen": -3.244388790335506e-05, "rewards/margins": 0.29824766516685486, "rewards/rejected": -0.29828011989593506, "step": 13196 }, { "epoch": 9.12655601659751, "grad_norm": 3.9561896324157715, "learning_rate": 4.852466574458276e-06, "log_odds_chosen": 11.210367202758789, "log_odds_ratio": -2.6617461116984487e-05, "logits/chosen": -0.15798631310462952, "logits/rejected": -0.2608197033405304, "logps/chosen": -0.00025633175391703844, "logps/rejected": -2.435305595397949, "loss": 0.3488, "nll_loss": 0.0871882438659668, "rewards/accuracies": 1.0, "rewards/chosen": -2.5633176846895367e-05, "rewards/margins": 0.24350491166114807, "rewards/rejected": -0.2435305416584015, "step": 13197 }, { "epoch": 9.127247579529737, "grad_norm": 3.7001283168792725, "learning_rate": 4.848624558168127e-06, "log_odds_chosen": 11.743751525878906, "log_odds_ratio": -6.15029493928887e-05, "logits/chosen": -0.11739799380302429, "logits/rejected": -0.03368782252073288, "logps/chosen": -0.00014788135013077408, "logps/rejected": -2.913625717163086, "loss": 0.3259, "nll_loss": 0.08147196471691132, "rewards/accuracies": 1.0, "rewards/chosen": -1.4788134649279527e-05, "rewards/margins": 0.29134780168533325, "rewards/rejected": -0.29136258363723755, "step": 13198 }, { "epoch": 9.127939142461964, "grad_norm": 3.7062315940856934, "learning_rate": 4.8447825418779775e-06, "log_odds_chosen": 11.067384719848633, "log_odds_ratio": -4.263326627551578e-05, "logits/chosen": 0.023071758449077606, "logits/rejected": -0.03538735210895538, "logps/chosen": -0.0001302417367696762, "logps/rejected": -2.1587891578674316, "loss": 0.3631, "nll_loss": 0.09078304469585419, "rewards/accuracies": 1.0, "rewards/chosen": -1.3024174222664442e-05, "rewards/margins": 0.21586589515209198, "rewards/rejected": -0.2158789038658142, "step": 13199 }, { "epoch": 9.12863070539419, "grad_norm": 3.1669230461120605, "learning_rate": 4.840940525587829e-06, "log_odds_chosen": 11.683211326599121, "log_odds_ratio": -1.102572787203826e-05, "logits/chosen": -0.33253878355026245, "logits/rejected": -0.23485800623893738, "logps/chosen": -7.140888192225248e-05, "logps/rejected": -2.051136016845703, "loss": 0.2974, "nll_loss": 0.0743558332324028, "rewards/accuracies": 1.0, "rewards/chosen": -7.140888556023128e-06, "rewards/margins": 0.2051064670085907, "rewards/rejected": -0.20511361956596375, "step": 13200 }, { "epoch": 9.129322268326417, "grad_norm": 2.7919909954071045, "learning_rate": 4.837098509297679e-06, "log_odds_chosen": 10.918241500854492, "log_odds_ratio": -2.582272827567067e-05, "logits/chosen": -0.52049720287323, "logits/rejected": -0.577045202255249, "logps/chosen": -0.00011689725215546787, "logps/rejected": -1.9347944259643555, "loss": 0.2784, "nll_loss": 0.0695895105600357, "rewards/accuracies": 1.0, "rewards/chosen": -1.1689724487951025e-05, "rewards/margins": 0.19346776604652405, "rewards/rejected": -0.19347944855690002, "step": 13201 }, { "epoch": 9.130013831258644, "grad_norm": 3.1376986503601074, "learning_rate": 4.8332564930075305e-06, "log_odds_chosen": 11.279829025268555, "log_odds_ratio": -4.477910260902718e-05, "logits/chosen": 0.17851456999778748, "logits/rejected": 0.10664086788892746, "logps/chosen": -0.00012507177598308772, "logps/rejected": -2.1952924728393555, "loss": 0.2443, "nll_loss": 0.06107247620820999, "rewards/accuracies": 1.0, "rewards/chosen": -1.2507178325904533e-05, "rewards/margins": 0.21951673924922943, "rewards/rejected": -0.21952924132347107, "step": 13202 }, { "epoch": 9.130705394190871, "grad_norm": 2.9854328632354736, "learning_rate": 4.829414476717381e-06, "log_odds_chosen": 10.503463745117188, "log_odds_ratio": -0.0001236013777088374, "logits/chosen": -0.40685075521469116, "logits/rejected": -0.43401819467544556, "logps/chosen": -0.0003842499863822013, "logps/rejected": -2.254061698913574, "loss": 0.3579, "nll_loss": 0.08946387469768524, "rewards/accuracies": 1.0, "rewards/chosen": -3.842500154860318e-05, "rewards/margins": 0.22536775469779968, "rewards/rejected": -0.22540615499019623, "step": 13203 }, { "epoch": 9.131396957123098, "grad_norm": 3.4810919761657715, "learning_rate": 4.825572460427232e-06, "log_odds_chosen": 11.446053504943848, "log_odds_ratio": -3.7420730222947896e-05, "logits/chosen": -0.3691715598106384, "logits/rejected": -0.3924041986465454, "logps/chosen": -0.00024286792904604226, "logps/rejected": -2.8767917156219482, "loss": 0.3574, "nll_loss": 0.08934895694255829, "rewards/accuracies": 1.0, "rewards/chosen": -2.4286793632199988e-05, "rewards/margins": 0.2876548767089844, "rewards/rejected": -0.28767916560173035, "step": 13204 }, { "epoch": 9.132088520055325, "grad_norm": 3.536254644393921, "learning_rate": 4.821730444137084e-06, "log_odds_chosen": 11.507623672485352, "log_odds_ratio": -2.183894866902847e-05, "logits/chosen": 0.04829786717891693, "logits/rejected": -0.0014524534344673157, "logps/chosen": -0.00013335171388462186, "logps/rejected": -2.441422462463379, "loss": 0.359, "nll_loss": 0.08974535018205643, "rewards/accuracies": 1.0, "rewards/chosen": -1.3335172297956888e-05, "rewards/margins": 0.24412892758846283, "rewards/rejected": -0.24414226412773132, "step": 13205 }, { "epoch": 9.132780082987551, "grad_norm": 3.27201509475708, "learning_rate": 4.817888427846934e-06, "log_odds_chosen": 11.764434814453125, "log_odds_ratio": -0.00012924049224238843, "logits/chosen": 0.09308671951293945, "logits/rejected": -0.0931982696056366, "logps/chosen": -0.0001480036007706076, "logps/rejected": -2.6317474842071533, "loss": 0.3179, "nll_loss": 0.07945744693279266, "rewards/accuracies": 1.0, "rewards/chosen": -1.4800360077060759e-05, "rewards/margins": 0.26315996050834656, "rewards/rejected": -0.26317477226257324, "step": 13206 }, { "epoch": 9.133471645919778, "grad_norm": 3.4189751148223877, "learning_rate": 4.814046411556785e-06, "log_odds_chosen": 11.023716926574707, "log_odds_ratio": -3.465122790657915e-05, "logits/chosen": -0.3066454827785492, "logits/rejected": -0.2448291927576065, "logps/chosen": -0.0004443526268005371, "logps/rejected": -2.4091989994049072, "loss": 0.5667, "nll_loss": 0.14166930317878723, "rewards/accuracies": 1.0, "rewards/chosen": -4.443526268005371e-05, "rewards/margins": 0.2408754825592041, "rewards/rejected": -0.24091993272304535, "step": 13207 }, { "epoch": 9.134163208852005, "grad_norm": 4.831146240234375, "learning_rate": 4.810204395266636e-06, "log_odds_chosen": 12.028083801269531, "log_odds_ratio": -4.996353527531028e-05, "logits/chosen": 0.03542035073041916, "logits/rejected": 0.009335717186331749, "logps/chosen": -0.000172814674442634, "logps/rejected": -2.9869327545166016, "loss": 0.5162, "nll_loss": 0.12903599441051483, "rewards/accuracies": 1.0, "rewards/chosen": -1.7281467080465518e-05, "rewards/margins": 0.2986759841442108, "rewards/rejected": -0.2986932694911957, "step": 13208 }, { "epoch": 9.134854771784232, "grad_norm": 3.973660469055176, "learning_rate": 4.8063623789764875e-06, "log_odds_chosen": 11.09183120727539, "log_odds_ratio": -2.030867472058162e-05, "logits/chosen": -0.3006454408168793, "logits/rejected": -0.3231848478317261, "logps/chosen": -0.00012072438403265551, "logps/rejected": -1.8342299461364746, "loss": 0.4669, "nll_loss": 0.11673028767108917, "rewards/accuracies": 1.0, "rewards/chosen": -1.2072438948962372e-05, "rewards/margins": 0.18341092765331268, "rewards/rejected": -0.1834229826927185, "step": 13209 }, { "epoch": 9.135546334716459, "grad_norm": 3.219853401184082, "learning_rate": 4.802520362686338e-06, "log_odds_chosen": 10.024149894714355, "log_odds_ratio": -0.0005982537404634058, "logits/chosen": -0.10521053522825241, "logits/rejected": -0.18743349611759186, "logps/chosen": -0.0005603090394288301, "logps/rejected": -1.577427864074707, "loss": 0.2468, "nll_loss": 0.0616336427628994, "rewards/accuracies": 1.0, "rewards/chosen": -5.6030909036053345e-05, "rewards/margins": 0.1576867550611496, "rewards/rejected": -0.15774278342723846, "step": 13210 }, { "epoch": 9.136237897648686, "grad_norm": 3.0521786212921143, "learning_rate": 4.798678346396189e-06, "log_odds_chosen": 12.086573600769043, "log_odds_ratio": -4.3436888518044725e-05, "logits/chosen": 0.36556896567344666, "logits/rejected": 0.10947079211473465, "logps/chosen": -0.00013567760470323265, "logps/rejected": -3.012777328491211, "loss": 0.3374, "nll_loss": 0.08434304594993591, "rewards/accuracies": 1.0, "rewards/chosen": -1.3567760106525384e-05, "rewards/margins": 0.3012641370296478, "rewards/rejected": -0.30127769708633423, "step": 13211 }, { "epoch": 9.136929460580912, "grad_norm": 3.5349905490875244, "learning_rate": 4.794836330106041e-06, "log_odds_chosen": 11.848288536071777, "log_odds_ratio": -0.0001640029513509944, "logits/chosen": -0.2809010148048401, "logits/rejected": -0.46270230412483215, "logps/chosen": -0.0003104759962297976, "logps/rejected": -2.47737717628479, "loss": 0.3626, "nll_loss": 0.09063741564750671, "rewards/accuracies": 1.0, "rewards/chosen": -3.1047602533362806e-05, "rewards/margins": 0.24770666658878326, "rewards/rejected": -0.24773772060871124, "step": 13212 }, { "epoch": 9.13762102351314, "grad_norm": 4.55886697769165, "learning_rate": 4.7909943138158905e-06, "log_odds_chosen": 11.779890060424805, "log_odds_ratio": -1.923369745782111e-05, "logits/chosen": -0.012806426733732224, "logits/rejected": -0.1646498739719391, "logps/chosen": -0.00022397295106202364, "logps/rejected": -2.825650453567505, "loss": 0.4991, "nll_loss": 0.12476147711277008, "rewards/accuracies": 1.0, "rewards/chosen": -2.2397296561393887e-05, "rewards/margins": 0.2825426459312439, "rewards/rejected": -0.28256505727767944, "step": 13213 }, { "epoch": 9.138312586445366, "grad_norm": 2.7805097103118896, "learning_rate": 4.787152297525742e-06, "log_odds_chosen": 11.518321990966797, "log_odds_ratio": -4.23527744715102e-05, "logits/chosen": 0.10589182376861572, "logits/rejected": -0.06496121734380722, "logps/chosen": -0.0002213009138358757, "logps/rejected": -2.6526756286621094, "loss": 0.3069, "nll_loss": 0.07672608643770218, "rewards/accuracies": 1.0, "rewards/chosen": -2.2130094293970615e-05, "rewards/margins": 0.2652454376220703, "rewards/rejected": -0.265267550945282, "step": 13214 }, { "epoch": 9.139004149377593, "grad_norm": 3.2939071655273438, "learning_rate": 4.783310281235592e-06, "log_odds_chosen": 10.641448020935059, "log_odds_ratio": -0.00021939512225799263, "logits/chosen": -0.47907182574272156, "logits/rejected": -0.47848352789878845, "logps/chosen": -0.0005785435787402093, "logps/rejected": -2.2549679279327393, "loss": 0.4797, "nll_loss": 0.11991085112094879, "rewards/accuracies": 1.0, "rewards/chosen": -5.7854358601616696e-05, "rewards/margins": 0.2254389524459839, "rewards/rejected": -0.2254967987537384, "step": 13215 }, { "epoch": 9.13969571230982, "grad_norm": 2.7339344024658203, "learning_rate": 4.7794682649454436e-06, "log_odds_chosen": 12.13232135772705, "log_odds_ratio": -1.1469980563560966e-05, "logits/chosen": -0.4271049201488495, "logits/rejected": -0.41360318660736084, "logps/chosen": -0.00045071684871800244, "logps/rejected": -2.8040518760681152, "loss": 0.3522, "nll_loss": 0.08804069459438324, "rewards/accuracies": 1.0, "rewards/chosen": -4.507168705458753e-05, "rewards/margins": 0.2803601324558258, "rewards/rejected": -0.2804052233695984, "step": 13216 }, { "epoch": 9.140387275242047, "grad_norm": 3.3445003032684326, "learning_rate": 4.775626248655294e-06, "log_odds_chosen": 11.834087371826172, "log_odds_ratio": -3.875298716593534e-05, "logits/chosen": -0.34143972396850586, "logits/rejected": -0.41385823488235474, "logps/chosen": -0.0002676868753042072, "logps/rejected": -2.8256959915161133, "loss": 0.6308, "nll_loss": 0.15769058465957642, "rewards/accuracies": 1.0, "rewards/chosen": -2.676868643902708e-05, "rewards/margins": 0.2825428247451782, "rewards/rejected": -0.28256961703300476, "step": 13217 }, { "epoch": 9.141078838174273, "grad_norm": 2.49408221244812, "learning_rate": 4.771784232365145e-06, "log_odds_chosen": 11.425577163696289, "log_odds_ratio": -2.738785588007886e-05, "logits/chosen": -0.13537722826004028, "logits/rejected": -0.1677563190460205, "logps/chosen": -0.0001892523287096992, "logps/rejected": -2.5062432289123535, "loss": 0.2329, "nll_loss": 0.05821177735924721, "rewards/accuracies": 1.0, "rewards/chosen": -1.8925233234767802e-05, "rewards/margins": 0.25060540437698364, "rewards/rejected": -0.25062432885169983, "step": 13218 }, { "epoch": 9.1417704011065, "grad_norm": 3.0973517894744873, "learning_rate": 4.767942216074997e-06, "log_odds_chosen": 10.915397644042969, "log_odds_ratio": -2.8263664717087522e-05, "logits/chosen": -0.27408403158187866, "logits/rejected": -0.3834518790245056, "logps/chosen": -0.00018645863747224212, "logps/rejected": -2.13749098777771, "loss": 0.2723, "nll_loss": 0.06806787848472595, "rewards/accuracies": 1.0, "rewards/chosen": -1.8645861928234808e-05, "rewards/margins": 0.21373045444488525, "rewards/rejected": -0.21374909579753876, "step": 13219 }, { "epoch": 9.142461964038727, "grad_norm": 4.3579206466674805, "learning_rate": 4.764100199784847e-06, "log_odds_chosen": 11.356622695922852, "log_odds_ratio": -4.268779593985528e-05, "logits/chosen": -0.3055403232574463, "logits/rejected": -0.40657907724380493, "logps/chosen": -0.00023026179405860603, "logps/rejected": -2.4457850456237793, "loss": 0.3781, "nll_loss": 0.09453283250331879, "rewards/accuracies": 1.0, "rewards/chosen": -2.3026179405860603e-05, "rewards/margins": 0.2445555031299591, "rewards/rejected": -0.2445785105228424, "step": 13220 }, { "epoch": 9.143153526970954, "grad_norm": 2.9954302310943604, "learning_rate": 4.760258183494698e-06, "log_odds_chosen": 10.43281364440918, "log_odds_ratio": -6.213808956090361e-05, "logits/chosen": -0.08556319773197174, "logits/rejected": -0.05594261735677719, "logps/chosen": -0.000477094785310328, "logps/rejected": -2.231978416442871, "loss": 0.2928, "nll_loss": 0.07320213317871094, "rewards/accuracies": 1.0, "rewards/chosen": -4.7709479986224324e-05, "rewards/margins": 0.2231501340866089, "rewards/rejected": -0.2231978476047516, "step": 13221 }, { "epoch": 9.14384508990318, "grad_norm": 3.0216176509857178, "learning_rate": 4.756416167204549e-06, "log_odds_chosen": 11.871658325195312, "log_odds_ratio": -9.193705773213878e-05, "logits/chosen": -0.07090628892183304, "logits/rejected": -0.13959215581417084, "logps/chosen": -0.00019610798335634172, "logps/rejected": -3.1729345321655273, "loss": 0.251, "nll_loss": 0.06273071467876434, "rewards/accuracies": 1.0, "rewards/chosen": -1.9610799427027814e-05, "rewards/margins": 0.3172738552093506, "rewards/rejected": -0.3172934651374817, "step": 13222 }, { "epoch": 9.144536652835408, "grad_norm": 2.8475561141967773, "learning_rate": 4.7525741509144005e-06, "log_odds_chosen": 10.512632369995117, "log_odds_ratio": -3.802761057158932e-05, "logits/chosen": -0.2520372271537781, "logits/rejected": -0.31475120782852173, "logps/chosen": -0.00022900404292158782, "logps/rejected": -1.776663064956665, "loss": 0.302, "nll_loss": 0.07548577338457108, "rewards/accuracies": 1.0, "rewards/chosen": -2.2900405383552425e-05, "rewards/margins": 0.17764338850975037, "rewards/rejected": -0.1776663064956665, "step": 13223 }, { "epoch": 9.145228215767634, "grad_norm": 4.809469223022461, "learning_rate": 4.74873213462425e-06, "log_odds_chosen": 10.421841621398926, "log_odds_ratio": -0.0001059464702848345, "logits/chosen": 0.10107007622718811, "logits/rejected": 0.04501248896121979, "logps/chosen": -0.0002126324106939137, "logps/rejected": -2.0651650428771973, "loss": 0.4475, "nll_loss": 0.11186723411083221, "rewards/accuracies": 1.0, "rewards/chosen": -2.1263243979774415e-05, "rewards/margins": 0.2064952552318573, "rewards/rejected": -0.20651650428771973, "step": 13224 }, { "epoch": 9.145919778699861, "grad_norm": 4.251705169677734, "learning_rate": 4.744890118334102e-06, "log_odds_chosen": 11.55975341796875, "log_odds_ratio": -3.190529241692275e-05, "logits/chosen": -0.27173903584480286, "logits/rejected": -0.3221912384033203, "logps/chosen": -0.0002216800203314051, "logps/rejected": -2.4351489543914795, "loss": 0.3814, "nll_loss": 0.09534931182861328, "rewards/accuracies": 1.0, "rewards/chosen": -2.216800203314051e-05, "rewards/margins": 0.2434927225112915, "rewards/rejected": -0.24351489543914795, "step": 13225 }, { "epoch": 9.146611341632088, "grad_norm": 4.158973217010498, "learning_rate": 4.741048102043953e-06, "log_odds_chosen": 10.777334213256836, "log_odds_ratio": -7.380648457910866e-05, "logits/chosen": -0.21886664628982544, "logits/rejected": -0.2848142981529236, "logps/chosen": -0.0001596217043697834, "logps/rejected": -2.1103477478027344, "loss": 0.3993, "nll_loss": 0.09982932358980179, "rewards/accuracies": 1.0, "rewards/chosen": -1.596217043697834e-05, "rewards/margins": 0.21101883053779602, "rewards/rejected": -0.21103478968143463, "step": 13226 }, { "epoch": 9.147302904564315, "grad_norm": 3.8205955028533936, "learning_rate": 4.7372060857538035e-06, "log_odds_chosen": 10.89959716796875, "log_odds_ratio": -5.4202715546125546e-05, "logits/chosen": -0.03708948194980621, "logits/rejected": -0.12110021710395813, "logps/chosen": -0.0002881829859688878, "logps/rejected": -2.0008976459503174, "loss": 0.3845, "nll_loss": 0.09611043334007263, "rewards/accuracies": 1.0, "rewards/chosen": -2.88182982330909e-05, "rewards/margins": 0.20006093382835388, "rewards/rejected": -0.20008975267410278, "step": 13227 }, { "epoch": 9.147994467496542, "grad_norm": 3.11427640914917, "learning_rate": 4.733364069463655e-06, "log_odds_chosen": 10.746748924255371, "log_odds_ratio": -0.00014942459529265761, "logits/chosen": -0.20317408442497253, "logits/rejected": -0.17767639458179474, "logps/chosen": -0.0002456876100040972, "logps/rejected": -2.1270229816436768, "loss": 0.3072, "nll_loss": 0.07679583132266998, "rewards/accuracies": 1.0, "rewards/chosen": -2.4568758817622438e-05, "rewards/margins": 0.2126777470111847, "rewards/rejected": -0.21270230412483215, "step": 13228 }, { "epoch": 9.148686030428768, "grad_norm": 2.43300199508667, "learning_rate": 4.729522053173506e-06, "log_odds_chosen": 9.599930763244629, "log_odds_ratio": -0.0009399011032655835, "logits/chosen": -0.5691751837730408, "logits/rejected": -0.6450884938240051, "logps/chosen": -0.0008461083052679896, "logps/rejected": -1.8899251222610474, "loss": 0.2957, "nll_loss": 0.07382595539093018, "rewards/accuracies": 1.0, "rewards/chosen": -8.461083780275658e-05, "rewards/margins": 0.1889079213142395, "rewards/rejected": -0.18899253010749817, "step": 13229 }, { "epoch": 9.149377593360995, "grad_norm": 3.4470419883728027, "learning_rate": 4.725680036883357e-06, "log_odds_chosen": 11.664287567138672, "log_odds_ratio": -4.2229145037708804e-05, "logits/chosen": 0.18879088759422302, "logits/rejected": 0.10272565484046936, "logps/chosen": -0.00011901649122592062, "logps/rejected": -2.4768638610839844, "loss": 0.4301, "nll_loss": 0.10751504451036453, "rewards/accuracies": 1.0, "rewards/chosen": -1.1901649486389942e-05, "rewards/margins": 0.24767446517944336, "rewards/rejected": -0.24768637120723724, "step": 13230 }, { "epoch": 9.150069156293222, "grad_norm": 3.0610811710357666, "learning_rate": 4.721838020593207e-06, "log_odds_chosen": 10.91439437866211, "log_odds_ratio": -0.0006155165028758347, "logits/chosen": -0.028582245111465454, "logits/rejected": -0.03339751809835434, "logps/chosen": -0.0004672374634537846, "logps/rejected": -2.9429073333740234, "loss": 0.3093, "nll_loss": 0.07725828886032104, "rewards/accuracies": 1.0, "rewards/chosen": -4.672374780056998e-05, "rewards/margins": 0.29424402117729187, "rewards/rejected": -0.2942907214164734, "step": 13231 }, { "epoch": 9.150760719225449, "grad_norm": 4.357141494750977, "learning_rate": 4.717996004303058e-06, "log_odds_chosen": 11.859966278076172, "log_odds_ratio": -1.0671762538549956e-05, "logits/chosen": 0.17385049164295197, "logits/rejected": 0.0562586784362793, "logps/chosen": -8.415168122155592e-05, "logps/rejected": -2.2523984909057617, "loss": 0.4487, "nll_loss": 0.1121809184551239, "rewards/accuracies": 1.0, "rewards/chosen": -8.415167940256651e-06, "rewards/margins": 0.22523143887519836, "rewards/rejected": -0.2252398431301117, "step": 13232 }, { "epoch": 9.151452282157676, "grad_norm": 2.952986240386963, "learning_rate": 4.71415398801291e-06, "log_odds_chosen": 10.926140785217285, "log_odds_ratio": -0.0020880952943116426, "logits/chosen": -0.3445931673049927, "logits/rejected": -0.3721742630004883, "logps/chosen": -0.01039061602205038, "logps/rejected": -2.7033534049987793, "loss": 0.3446, "nll_loss": 0.0859515517950058, "rewards/accuracies": 1.0, "rewards/chosen": -0.0010390614625066519, "rewards/margins": 0.2692962884902954, "rewards/rejected": -0.2703353464603424, "step": 13233 }, { "epoch": 9.152143845089903, "grad_norm": 2.7857892513275146, "learning_rate": 4.7103119717227604e-06, "log_odds_chosen": 11.978271484375, "log_odds_ratio": -2.0148203475400805e-05, "logits/chosen": -0.4585052728652954, "logits/rejected": -0.5114193558692932, "logps/chosen": -0.00017187956837005913, "logps/rejected": -2.7586655616760254, "loss": 0.4332, "nll_loss": 0.10829560458660126, "rewards/accuracies": 1.0, "rewards/chosen": -1.7187956473208033e-05, "rewards/margins": 0.2758493423461914, "rewards/rejected": -0.2758665382862091, "step": 13234 }, { "epoch": 9.15283540802213, "grad_norm": 4.290009021759033, "learning_rate": 4.706469955432611e-06, "log_odds_chosen": 9.976953506469727, "log_odds_ratio": -0.00010702211875468493, "logits/chosen": -0.26274943351745605, "logits/rejected": -0.33820897340774536, "logps/chosen": -0.0008673262782394886, "logps/rejected": -2.315030574798584, "loss": 0.6599, "nll_loss": 0.16497503221035004, "rewards/accuracies": 1.0, "rewards/chosen": -8.67326307343319e-05, "rewards/margins": 0.23141631484031677, "rewards/rejected": -0.23150306940078735, "step": 13235 }, { "epoch": 9.153526970954356, "grad_norm": 2.912217378616333, "learning_rate": 4.702627939142462e-06, "log_odds_chosen": 11.16337776184082, "log_odds_ratio": -1.8966689822264016e-05, "logits/chosen": -0.6119264364242554, "logits/rejected": -0.5414671897888184, "logps/chosen": -0.00011048486339859664, "logps/rejected": -1.7643336057662964, "loss": 0.3305, "nll_loss": 0.0826151967048645, "rewards/accuracies": 1.0, "rewards/chosen": -1.1048487067455426e-05, "rewards/margins": 0.1764223277568817, "rewards/rejected": -0.17643338441848755, "step": 13236 }, { "epoch": 9.154218533886583, "grad_norm": 2.99233341217041, "learning_rate": 4.6987859228523135e-06, "log_odds_chosen": 10.353715896606445, "log_odds_ratio": -0.00020194250100757927, "logits/chosen": -0.7038260102272034, "logits/rejected": -0.6964368224143982, "logps/chosen": -0.0006942212348803878, "logps/rejected": -2.1969351768493652, "loss": 0.3749, "nll_loss": 0.09371718019247055, "rewards/accuracies": 1.0, "rewards/chosen": -6.942212348803878e-05, "rewards/margins": 0.2196240872144699, "rewards/rejected": -0.21969351172447205, "step": 13237 }, { "epoch": 9.15491009681881, "grad_norm": 2.7351162433624268, "learning_rate": 4.6949439065621634e-06, "log_odds_chosen": 11.356271743774414, "log_odds_ratio": -8.974706724984571e-05, "logits/chosen": -0.26438623666763306, "logits/rejected": -0.2929072380065918, "logps/chosen": -0.0002658174198586494, "logps/rejected": -2.7258095741271973, "loss": 0.2832, "nll_loss": 0.07078118622303009, "rewards/accuracies": 1.0, "rewards/chosen": -2.6581741622067057e-05, "rewards/margins": 0.2725543677806854, "rewards/rejected": -0.27258095145225525, "step": 13238 }, { "epoch": 9.155601659751037, "grad_norm": 4.187948226928711, "learning_rate": 4.691101890272015e-06, "log_odds_chosen": 12.833627700805664, "log_odds_ratio": -1.6834528651088476e-05, "logits/chosen": -0.151898592710495, "logits/rejected": -0.24054181575775146, "logps/chosen": -0.0004079265345353633, "logps/rejected": -4.010830402374268, "loss": 0.4739, "nll_loss": 0.11846268177032471, "rewards/accuracies": 1.0, "rewards/chosen": -4.079265636391938e-05, "rewards/margins": 0.40104228258132935, "rewards/rejected": -0.4010831117630005, "step": 13239 }, { "epoch": 9.156293222683264, "grad_norm": 3.280129909515381, "learning_rate": 4.687259873981866e-06, "log_odds_chosen": 10.422073364257812, "log_odds_ratio": -0.0007630666368640959, "logits/chosen": -0.3538857102394104, "logits/rejected": -0.34520047903060913, "logps/chosen": -0.0005645141354762018, "logps/rejected": -2.46002459526062, "loss": 0.5715, "nll_loss": 0.14279311895370483, "rewards/accuracies": 1.0, "rewards/chosen": -5.6451412092428654e-05, "rewards/margins": 0.245946004986763, "rewards/rejected": -0.2460024654865265, "step": 13240 }, { "epoch": 9.15698478561549, "grad_norm": 3.231613874435425, "learning_rate": 4.6834178576917165e-06, "log_odds_chosen": 10.341863632202148, "log_odds_ratio": -0.0001687141921138391, "logits/chosen": -0.25097715854644775, "logits/rejected": -0.28076058626174927, "logps/chosen": -0.00027557072462514043, "logps/rejected": -1.966200351715088, "loss": 0.3784, "nll_loss": 0.09458751976490021, "rewards/accuracies": 1.0, "rewards/chosen": -2.7557072826311924e-05, "rewards/margins": 0.19659249484539032, "rewards/rejected": -0.19662004709243774, "step": 13241 }, { "epoch": 9.157676348547717, "grad_norm": 2.4268250465393066, "learning_rate": 4.679575841401568e-06, "log_odds_chosen": 9.504556655883789, "log_odds_ratio": -0.00011133919178973883, "logits/chosen": -0.1775788515806198, "logits/rejected": -0.1743740439414978, "logps/chosen": -0.00029458932112902403, "logps/rejected": -1.212559700012207, "loss": 0.3033, "nll_loss": 0.07581616938114166, "rewards/accuracies": 1.0, "rewards/chosen": -2.9458933568093926e-05, "rewards/margins": 0.12122651189565659, "rewards/rejected": -0.12125596404075623, "step": 13242 }, { "epoch": 9.158367911479944, "grad_norm": 4.34454345703125, "learning_rate": 4.675733825111419e-06, "log_odds_chosen": 10.734809875488281, "log_odds_ratio": -0.00022274142247624695, "logits/chosen": -0.38664209842681885, "logits/rejected": -0.3307802379131317, "logps/chosen": -0.0003078333684243262, "logps/rejected": -2.6354517936706543, "loss": 0.3499, "nll_loss": 0.08745646476745605, "rewards/accuracies": 1.0, "rewards/chosen": -3.0783339752815664e-05, "rewards/margins": 0.2635143995285034, "rewards/rejected": -0.2635451555252075, "step": 13243 }, { "epoch": 9.159059474412171, "grad_norm": 4.3627495765686035, "learning_rate": 4.67189180882127e-06, "log_odds_chosen": 10.740904808044434, "log_odds_ratio": -9.477129788137972e-05, "logits/chosen": -0.17556828260421753, "logits/rejected": -0.27577683329582214, "logps/chosen": -0.00023830297868698835, "logps/rejected": -1.8701783418655396, "loss": 0.3844, "nll_loss": 0.09609566628932953, "rewards/accuracies": 1.0, "rewards/chosen": -2.3830298232496716e-05, "rewards/margins": 0.18699400126934052, "rewards/rejected": -0.18701782822608948, "step": 13244 }, { "epoch": 9.159751037344398, "grad_norm": 3.9714725017547607, "learning_rate": 4.66804979253112e-06, "log_odds_chosen": 11.494601249694824, "log_odds_ratio": -1.8003996956394985e-05, "logits/chosen": -0.4515254497528076, "logits/rejected": -0.48374801874160767, "logps/chosen": -0.0001993764890357852, "logps/rejected": -2.5851736068725586, "loss": 0.4451, "nll_loss": 0.11128557473421097, "rewards/accuracies": 1.0, "rewards/chosen": -1.993764817598276e-05, "rewards/margins": 0.2584974467754364, "rewards/rejected": -0.25851738452911377, "step": 13245 }, { "epoch": 9.160442600276625, "grad_norm": 1.8536094427108765, "learning_rate": 4.664207776240972e-06, "log_odds_chosen": 10.494796752929688, "log_odds_ratio": -0.0001465547102270648, "logits/chosen": -0.7109159231185913, "logits/rejected": -0.7627403736114502, "logps/chosen": -0.00023228241479955614, "logps/rejected": -1.7776033878326416, "loss": 0.2253, "nll_loss": 0.05630149319767952, "rewards/accuracies": 1.0, "rewards/chosen": -2.3228241843753494e-05, "rewards/margins": 0.17773711681365967, "rewards/rejected": -0.17776033282279968, "step": 13246 }, { "epoch": 9.161134163208851, "grad_norm": 3.3924193382263184, "learning_rate": 4.660365759950822e-06, "log_odds_chosen": 9.826598167419434, "log_odds_ratio": -0.0005562108126468956, "logits/chosen": -0.6887165307998657, "logits/rejected": -0.7289565801620483, "logps/chosen": -0.0017379340715706348, "logps/rejected": -2.0645713806152344, "loss": 0.3568, "nll_loss": 0.08914747089147568, "rewards/accuracies": 1.0, "rewards/chosen": -0.00017379340715706348, "rewards/margins": 0.2062833607196808, "rewards/rejected": -0.20645713806152344, "step": 13247 }, { "epoch": 9.161825726141078, "grad_norm": 3.9670066833496094, "learning_rate": 4.6565237436606735e-06, "log_odds_chosen": 10.267412185668945, "log_odds_ratio": -0.0028830033261328936, "logits/chosen": -0.37665268778800964, "logits/rejected": -0.3267573118209839, "logps/chosen": -0.021847128868103027, "logps/rejected": -2.6090478897094727, "loss": 0.294, "nll_loss": 0.0732061117887497, "rewards/accuracies": 1.0, "rewards/chosen": -0.002184713026508689, "rewards/margins": 0.2587200403213501, "rewards/rejected": -0.2609047591686249, "step": 13248 }, { "epoch": 9.162517289073305, "grad_norm": 3.581749200820923, "learning_rate": 4.652681727370524e-06, "log_odds_chosen": 10.095499992370605, "log_odds_ratio": -8.256595174316317e-05, "logits/chosen": 0.1312323659658432, "logits/rejected": 0.16536569595336914, "logps/chosen": -0.0003430332290008664, "logps/rejected": -1.7387230396270752, "loss": 0.4115, "nll_loss": 0.10287502408027649, "rewards/accuracies": 1.0, "rewards/chosen": -3.4303324355278164e-05, "rewards/margins": 0.17383801937103271, "rewards/rejected": -0.17387232184410095, "step": 13249 }, { "epoch": 9.163208852005532, "grad_norm": 3.686554431915283, "learning_rate": 4.648839711080375e-06, "log_odds_chosen": 11.71548080444336, "log_odds_ratio": -2.5854322302620858e-05, "logits/chosen": 0.076650470495224, "logits/rejected": -0.05228927731513977, "logps/chosen": -8.165779581759125e-05, "logps/rejected": -2.2370681762695312, "loss": 0.2813, "nll_loss": 0.0703134834766388, "rewards/accuracies": 1.0, "rewards/chosen": -8.165779945557006e-06, "rewards/margins": 0.2236986607313156, "rewards/rejected": -0.22370684146881104, "step": 13250 }, { "epoch": 9.163900414937759, "grad_norm": 4.031307697296143, "learning_rate": 4.6449976947902266e-06, "log_odds_chosen": 11.770303726196289, "log_odds_ratio": -3.01509444398107e-05, "logits/chosen": -0.37642228603363037, "logits/rejected": -0.41639071702957153, "logps/chosen": -0.00029356483719311655, "logps/rejected": -3.3386707305908203, "loss": 0.6717, "nll_loss": 0.16791526973247528, "rewards/accuracies": 1.0, "rewards/chosen": -2.9356484446907416e-05, "rewards/margins": 0.33383771777153015, "rewards/rejected": -0.33386707305908203, "step": 13251 }, { "epoch": 9.164591977869986, "grad_norm": 3.6375107765197754, "learning_rate": 4.6411556785000765e-06, "log_odds_chosen": 10.574867248535156, "log_odds_ratio": -8.866946154739708e-05, "logits/chosen": -0.46668383479118347, "logits/rejected": -0.4627547860145569, "logps/chosen": -0.000216979649849236, "logps/rejected": -2.068161725997925, "loss": 0.3352, "nll_loss": 0.08379573374986649, "rewards/accuracies": 1.0, "rewards/chosen": -2.169796425732784e-05, "rewards/margins": 0.20679447054862976, "rewards/rejected": -0.2068161815404892, "step": 13252 }, { "epoch": 9.165283540802212, "grad_norm": 4.423868656158447, "learning_rate": 4.637313662209928e-06, "log_odds_chosen": 10.797757148742676, "log_odds_ratio": -5.5326938309008256e-05, "logits/chosen": 0.00524507462978363, "logits/rejected": 0.0028075315058231354, "logps/chosen": -0.00048614057595841587, "logps/rejected": -2.4063234329223633, "loss": 0.5175, "nll_loss": 0.12936095893383026, "rewards/accuracies": 1.0, "rewards/chosen": -4.8614056140650064e-05, "rewards/margins": 0.24058371782302856, "rewards/rejected": -0.24063235521316528, "step": 13253 }, { "epoch": 9.16597510373444, "grad_norm": 4.309619903564453, "learning_rate": 4.633471645919779e-06, "log_odds_chosen": 11.022064208984375, "log_odds_ratio": -0.00012898718705400825, "logits/chosen": -0.18947575986385345, "logits/rejected": -0.22546103596687317, "logps/chosen": -0.0002583316236268729, "logps/rejected": -2.0356574058532715, "loss": 1.0224, "nll_loss": 0.25558555126190186, "rewards/accuracies": 1.0, "rewards/chosen": -2.5833163817878813e-05, "rewards/margins": 0.2035399079322815, "rewards/rejected": -0.20356574654579163, "step": 13254 }, { "epoch": 9.166666666666666, "grad_norm": 2.9900362491607666, "learning_rate": 4.6296296296296296e-06, "log_odds_chosen": 11.046536445617676, "log_odds_ratio": -0.00019564780814107507, "logits/chosen": -0.1926698386669159, "logits/rejected": -0.20588555932044983, "logps/chosen": -0.00046490365639328957, "logps/rejected": -2.1357336044311523, "loss": 0.2424, "nll_loss": 0.06057025492191315, "rewards/accuracies": 1.0, "rewards/chosen": -4.6490364184137434e-05, "rewards/margins": 0.2135268747806549, "rewards/rejected": -0.21357333660125732, "step": 13255 }, { "epoch": 9.167358229598893, "grad_norm": 2.9802370071411133, "learning_rate": 4.625787613339481e-06, "log_odds_chosen": 10.443476676940918, "log_odds_ratio": -0.00010125573317054659, "logits/chosen": -0.32919472455978394, "logits/rejected": -0.3984687626361847, "logps/chosen": -0.0003405904571991414, "logps/rejected": -1.5294239521026611, "loss": 0.3343, "nll_loss": 0.08357033878564835, "rewards/accuracies": 1.0, "rewards/chosen": -3.405904499231838e-05, "rewards/margins": 0.1529083251953125, "rewards/rejected": -0.15294238924980164, "step": 13256 }, { "epoch": 9.16804979253112, "grad_norm": 2.9573252201080322, "learning_rate": 4.621945597049332e-06, "log_odds_chosen": 10.908576965332031, "log_odds_ratio": -0.0001610955805517733, "logits/chosen": 0.013558404520154, "logits/rejected": 0.03549904748797417, "logps/chosen": -0.0005097885732538998, "logps/rejected": -2.575289249420166, "loss": 0.3081, "nll_loss": 0.07699886709451675, "rewards/accuracies": 1.0, "rewards/chosen": -5.0978858780581504e-05, "rewards/margins": 0.2574779689311981, "rewards/rejected": -0.25752896070480347, "step": 13257 }, { "epoch": 9.168741355463347, "grad_norm": 3.787930965423584, "learning_rate": 4.618103580759183e-06, "log_odds_chosen": 10.715057373046875, "log_odds_ratio": -8.827356941765174e-05, "logits/chosen": -0.6768827438354492, "logits/rejected": -0.727750301361084, "logps/chosen": -0.00017680115706752986, "logps/rejected": -1.949446678161621, "loss": 0.4487, "nll_loss": 0.11216624081134796, "rewards/accuracies": 1.0, "rewards/chosen": -1.7680114979157224e-05, "rewards/margins": 0.19492697715759277, "rewards/rejected": -0.19494464993476868, "step": 13258 }, { "epoch": 9.169432918395573, "grad_norm": 3.8608124256134033, "learning_rate": 4.614261564469033e-06, "log_odds_chosen": 11.478191375732422, "log_odds_ratio": -1.4476814612862654e-05, "logits/chosen": -0.4526783227920532, "logits/rejected": -0.45420515537261963, "logps/chosen": -7.430824916809797e-05, "logps/rejected": -1.9516966342926025, "loss": 0.29, "nll_loss": 0.07249864190816879, "rewards/accuracies": 1.0, "rewards/chosen": -7.430824098264566e-06, "rewards/margins": 0.19516223669052124, "rewards/rejected": -0.19516965746879578, "step": 13259 }, { "epoch": 9.1701244813278, "grad_norm": 2.8934109210968018, "learning_rate": 4.610419548178885e-06, "log_odds_chosen": 10.930181503295898, "log_odds_ratio": -9.478238644078374e-05, "logits/chosen": -0.47720345854759216, "logits/rejected": -0.492647647857666, "logps/chosen": -0.00046956821461208165, "logps/rejected": -2.098984479904175, "loss": 0.3205, "nll_loss": 0.080109603703022, "rewards/accuracies": 1.0, "rewards/chosen": -4.695682582678273e-05, "rewards/margins": 0.2098514884710312, "rewards/rejected": -0.2098984569311142, "step": 13260 }, { "epoch": 9.170816044260027, "grad_norm": 4.492011070251465, "learning_rate": 4.606577531888735e-06, "log_odds_chosen": 11.076122283935547, "log_odds_ratio": -7.659607945242897e-05, "logits/chosen": -0.44519495964050293, "logits/rejected": -0.4828508794307709, "logps/chosen": -0.00019283223082311451, "logps/rejected": -1.979741096496582, "loss": 0.368, "nll_loss": 0.09198319911956787, "rewards/accuracies": 1.0, "rewards/chosen": -1.928322308231145e-05, "rewards/margins": 0.19795484840869904, "rewards/rejected": -0.19797411561012268, "step": 13261 }, { "epoch": 9.171507607192254, "grad_norm": 3.5539863109588623, "learning_rate": 4.6027355155985865e-06, "log_odds_chosen": 10.251945495605469, "log_odds_ratio": -0.00012411485658958554, "logits/chosen": -0.4729505777359009, "logits/rejected": -0.5075870156288147, "logps/chosen": -0.00024340019444935024, "logps/rejected": -1.953089714050293, "loss": 0.3644, "nll_loss": 0.0910826325416565, "rewards/accuracies": 1.0, "rewards/chosen": -2.4340020900126547e-05, "rewards/margins": 0.1952846348285675, "rewards/rejected": -0.19530898332595825, "step": 13262 }, { "epoch": 9.17219917012448, "grad_norm": 4.126911640167236, "learning_rate": 4.598893499308437e-06, "log_odds_chosen": 10.471588134765625, "log_odds_ratio": -3.9286449464270845e-05, "logits/chosen": -0.24379703402519226, "logits/rejected": -0.3949686884880066, "logps/chosen": -0.00031498580938205123, "logps/rejected": -2.1726913452148438, "loss": 0.4379, "nll_loss": 0.10947485268115997, "rewards/accuracies": 1.0, "rewards/chosen": -3.1498577300226316e-05, "rewards/margins": 0.2172376811504364, "rewards/rejected": -0.2172691524028778, "step": 13263 }, { "epoch": 9.172890733056708, "grad_norm": 4.387599945068359, "learning_rate": 4.595051483018288e-06, "log_odds_chosen": 12.87693977355957, "log_odds_ratio": -7.407785233226605e-06, "logits/chosen": -0.02308526635169983, "logits/rejected": -0.10359898954629898, "logps/chosen": -6.886209303047508e-05, "logps/rejected": -3.2710678577423096, "loss": 0.4755, "nll_loss": 0.11887811124324799, "rewards/accuracies": 1.0, "rewards/chosen": -6.886210030643269e-06, "rewards/margins": 0.32709988951683044, "rewards/rejected": -0.327106773853302, "step": 13264 }, { "epoch": 9.173582295988934, "grad_norm": 2.787508964538574, "learning_rate": 4.59120946672814e-06, "log_odds_chosen": 10.15397834777832, "log_odds_ratio": -9.300727106165141e-05, "logits/chosen": -0.5829552412033081, "logits/rejected": -0.643100380897522, "logps/chosen": -0.0002786455152090639, "logps/rejected": -1.7501752376556396, "loss": 0.2358, "nll_loss": 0.05893716216087341, "rewards/accuracies": 1.0, "rewards/chosen": -2.786455297609791e-05, "rewards/margins": 0.17498967051506042, "rewards/rejected": -0.17501753568649292, "step": 13265 }, { "epoch": 9.174273858921161, "grad_norm": 3.6737401485443115, "learning_rate": 4.5873674504379895e-06, "log_odds_chosen": 10.786094665527344, "log_odds_ratio": -6.616486643906683e-05, "logits/chosen": -0.32876649498939514, "logits/rejected": -0.3970365524291992, "logps/chosen": -0.00031801205477677286, "logps/rejected": -2.0390477180480957, "loss": 0.7942, "nll_loss": 0.19853723049163818, "rewards/accuracies": 1.0, "rewards/chosen": -3.180120620527305e-05, "rewards/margins": 0.20387297868728638, "rewards/rejected": -0.20390477776527405, "step": 13266 }, { "epoch": 9.174965421853388, "grad_norm": 2.290215015411377, "learning_rate": 4.583525434147841e-06, "log_odds_chosen": 10.114067077636719, "log_odds_ratio": -0.0001489850110374391, "logits/chosen": -0.42826682329177856, "logits/rejected": -0.4314981698989868, "logps/chosen": -0.00025290303165093064, "logps/rejected": -1.6824004650115967, "loss": 0.261, "nll_loss": 0.06524519622325897, "rewards/accuracies": 1.0, "rewards/chosen": -2.529030098230578e-05, "rewards/margins": 0.16821476817131042, "rewards/rejected": -0.1682400405406952, "step": 13267 }, { "epoch": 9.175656984785615, "grad_norm": 4.102094650268555, "learning_rate": 4.579683417857692e-06, "log_odds_chosen": 11.35265064239502, "log_odds_ratio": -4.903479202766903e-05, "logits/chosen": -0.20633630454540253, "logits/rejected": -0.19792324304580688, "logps/chosen": -0.0001682281435932964, "logps/rejected": -2.393183946609497, "loss": 0.433, "nll_loss": 0.10825060307979584, "rewards/accuracies": 1.0, "rewards/chosen": -1.682281435932964e-05, "rewards/margins": 0.23930156230926514, "rewards/rejected": -0.23931840062141418, "step": 13268 }, { "epoch": 9.176348547717842, "grad_norm": 3.2632980346679688, "learning_rate": 4.575841401567543e-06, "log_odds_chosen": 10.976491928100586, "log_odds_ratio": -3.518206358421594e-05, "logits/chosen": -0.20566090941429138, "logits/rejected": -0.23544615507125854, "logps/chosen": -0.0001013718792819418, "logps/rejected": -1.7854700088500977, "loss": 0.3001, "nll_loss": 0.07501350343227386, "rewards/accuracies": 1.0, "rewards/chosen": -1.013718792819418e-05, "rewards/margins": 0.17853687703609467, "rewards/rejected": -0.17854700982570648, "step": 13269 }, { "epoch": 9.177040110650069, "grad_norm": 3.7404978275299072, "learning_rate": 4.571999385277393e-06, "log_odds_chosen": 10.493355751037598, "log_odds_ratio": -0.00016397205763496459, "logits/chosen": 0.13229244947433472, "logits/rejected": 0.10793633759021759, "logps/chosen": -0.0002441149263177067, "logps/rejected": -1.7673245668411255, "loss": 0.5835, "nll_loss": 0.1458517611026764, "rewards/accuracies": 1.0, "rewards/chosen": -2.441149263177067e-05, "rewards/margins": 0.17670804262161255, "rewards/rejected": -0.17673246562480927, "step": 13270 }, { "epoch": 9.177731673582295, "grad_norm": 3.478229284286499, "learning_rate": 4.568157368987245e-06, "log_odds_chosen": 12.566650390625, "log_odds_ratio": -1.7088979802792892e-05, "logits/chosen": -0.2346772700548172, "logits/rejected": -0.2684975266456604, "logps/chosen": -0.00018270526197738945, "logps/rejected": -3.8987607955932617, "loss": 0.3757, "nll_loss": 0.09392865002155304, "rewards/accuracies": 1.0, "rewards/chosen": -1.8270526197738945e-05, "rewards/margins": 0.38985779881477356, "rewards/rejected": -0.3898760676383972, "step": 13271 }, { "epoch": 9.178423236514522, "grad_norm": 3.211944103240967, "learning_rate": 4.564315352697096e-06, "log_odds_chosen": 9.982566833496094, "log_odds_ratio": -0.00025987907429225743, "logits/chosen": -0.03721824660897255, "logits/rejected": -0.15641994774341583, "logps/chosen": -0.00024055814719758928, "logps/rejected": -1.7016408443450928, "loss": 0.3644, "nll_loss": 0.0910676121711731, "rewards/accuracies": 1.0, "rewards/chosen": -2.4055814719758928e-05, "rewards/margins": 0.17014002799987793, "rewards/rejected": -0.170164093375206, "step": 13272 }, { "epoch": 9.179114799446749, "grad_norm": 2.8588945865631104, "learning_rate": 4.5604733364069464e-06, "log_odds_chosen": 11.598443984985352, "log_odds_ratio": -2.767339719866868e-05, "logits/chosen": -0.44341886043548584, "logits/rejected": -0.48085397481918335, "logps/chosen": -0.00019865854119416326, "logps/rejected": -2.748101234436035, "loss": 0.2194, "nll_loss": 0.0548417866230011, "rewards/accuracies": 1.0, "rewards/chosen": -1.9865852664224803e-05, "rewards/margins": 0.2747902572154999, "rewards/rejected": -0.2748101055622101, "step": 13273 }, { "epoch": 9.179806362378976, "grad_norm": 2.5782322883605957, "learning_rate": 4.556631320116798e-06, "log_odds_chosen": 10.460678100585938, "log_odds_ratio": -0.0001101654561352916, "logits/chosen": 0.017034312710165977, "logits/rejected": 0.014345534145832062, "logps/chosen": -0.00030546486959792674, "logps/rejected": -1.9944336414337158, "loss": 0.2767, "nll_loss": 0.06916406750679016, "rewards/accuracies": 1.0, "rewards/chosen": -3.054648914257996e-05, "rewards/margins": 0.19941282272338867, "rewards/rejected": -0.19944337010383606, "step": 13274 }, { "epoch": 9.180497925311203, "grad_norm": 4.89523458480835, "learning_rate": 4.552789303826648e-06, "log_odds_chosen": 11.174288749694824, "log_odds_ratio": -0.00012297074135858566, "logits/chosen": 0.06360907107591629, "logits/rejected": -0.05227748304605484, "logps/chosen": -0.00015693520253989846, "logps/rejected": -2.271845817565918, "loss": 0.4063, "nll_loss": 0.1015675738453865, "rewards/accuracies": 1.0, "rewards/chosen": -1.5693520253989846e-05, "rewards/margins": 0.22716888785362244, "rewards/rejected": -0.22718459367752075, "step": 13275 }, { "epoch": 9.18118948824343, "grad_norm": 2.1220638751983643, "learning_rate": 4.5489472875364995e-06, "log_odds_chosen": 11.140460014343262, "log_odds_ratio": -8.715956209925935e-05, "logits/chosen": 0.07599927484989166, "logits/rejected": 0.04757479950785637, "logps/chosen": -0.00021662801736965775, "logps/rejected": -2.2675039768218994, "loss": 0.2438, "nll_loss": 0.0609496645629406, "rewards/accuracies": 1.0, "rewards/chosen": -2.1662805011146702e-05, "rewards/margins": 0.22672873735427856, "rewards/rejected": -0.22675038874149323, "step": 13276 }, { "epoch": 9.181881051175656, "grad_norm": 3.8795008659362793, "learning_rate": 4.54510527124635e-06, "log_odds_chosen": 10.69237232208252, "log_odds_ratio": -0.00011646945495158434, "logits/chosen": -0.2491566240787506, "logits/rejected": -0.28201591968536377, "logps/chosen": -0.0005097347311675549, "logps/rejected": -2.259579658508301, "loss": 0.4445, "nll_loss": 0.11111761629581451, "rewards/accuracies": 1.0, "rewards/chosen": -5.09734709339682e-05, "rewards/margins": 0.22590696811676025, "rewards/rejected": -0.2259579747915268, "step": 13277 }, { "epoch": 9.182572614107883, "grad_norm": 3.0771408081054688, "learning_rate": 4.541263254956201e-06, "log_odds_chosen": 11.507431030273438, "log_odds_ratio": -1.9733077351702377e-05, "logits/chosen": -0.7390185594558716, "logits/rejected": -0.6592539548873901, "logps/chosen": -0.00012037526903441176, "logps/rejected": -2.3912291526794434, "loss": 0.2545, "nll_loss": 0.06361792236566544, "rewards/accuracies": 1.0, "rewards/chosen": -1.2037526175845414e-05, "rewards/margins": 0.23911088705062866, "rewards/rejected": -0.23912294209003448, "step": 13278 }, { "epoch": 9.18326417704011, "grad_norm": 3.7033040523529053, "learning_rate": 4.537421238666053e-06, "log_odds_chosen": 12.416447639465332, "log_odds_ratio": -1.0927147741313092e-05, "logits/chosen": -0.19545245170593262, "logits/rejected": -0.265900194644928, "logps/chosen": -0.0001679717533988878, "logps/rejected": -3.451341152191162, "loss": 0.3186, "nll_loss": 0.07965927571058273, "rewards/accuracies": 1.0, "rewards/chosen": -1.679717388469726e-05, "rewards/margins": 0.34511736035346985, "rewards/rejected": -0.3451341390609741, "step": 13279 }, { "epoch": 9.183955739972337, "grad_norm": 3.743194341659546, "learning_rate": 4.533579222375903e-06, "log_odds_chosen": 10.992870330810547, "log_odds_ratio": -0.00026691905804909766, "logits/chosen": -0.2858811914920807, "logits/rejected": -0.30002665519714355, "logps/chosen": -0.00018830844783224165, "logps/rejected": -1.7395424842834473, "loss": 0.4446, "nll_loss": 0.11112523823976517, "rewards/accuracies": 1.0, "rewards/chosen": -1.8830845874617808e-05, "rewards/margins": 0.1739354282617569, "rewards/rejected": -0.17395423352718353, "step": 13280 }, { "epoch": 9.184647302904564, "grad_norm": 3.806933879852295, "learning_rate": 4.529737206085754e-06, "log_odds_chosen": 10.361151695251465, "log_odds_ratio": -0.0004903482622466981, "logits/chosen": -0.4798329472541809, "logits/rejected": -0.44972285628318787, "logps/chosen": -0.0005268111126497388, "logps/rejected": -2.0253071784973145, "loss": 0.415, "nll_loss": 0.10369092226028442, "rewards/accuracies": 1.0, "rewards/chosen": -5.268111635814421e-05, "rewards/margins": 0.2024780511856079, "rewards/rejected": -0.20253071188926697, "step": 13281 }, { "epoch": 9.18533886583679, "grad_norm": 3.293419361114502, "learning_rate": 4.525895189795605e-06, "log_odds_chosen": 11.961520195007324, "log_odds_ratio": -1.2666841030295473e-05, "logits/chosen": -0.01091938465833664, "logits/rejected": -0.1974954605102539, "logps/chosen": -0.0001375441934214905, "logps/rejected": -2.760972738265991, "loss": 0.3582, "nll_loss": 0.08954711258411407, "rewards/accuracies": 1.0, "rewards/chosen": -1.3754419342149049e-05, "rewards/margins": 0.2760835289955139, "rewards/rejected": -0.27609729766845703, "step": 13282 }, { "epoch": 9.186030428769017, "grad_norm": 3.072108745574951, "learning_rate": 4.522053173505456e-06, "log_odds_chosen": 11.435327529907227, "log_odds_ratio": -5.203645196161233e-05, "logits/chosen": -0.22686409950256348, "logits/rejected": -0.29669320583343506, "logps/chosen": -0.0002450290776323527, "logps/rejected": -2.336642265319824, "loss": 0.2849, "nll_loss": 0.07121407985687256, "rewards/accuracies": 1.0, "rewards/chosen": -2.450290776323527e-05, "rewards/margins": 0.23363973200321198, "rewards/rejected": -0.23366422951221466, "step": 13283 }, { "epoch": 9.186721991701244, "grad_norm": 3.901740789413452, "learning_rate": 4.518211157215306e-06, "log_odds_chosen": 11.893566131591797, "log_odds_ratio": -2.3749666070216335e-05, "logits/chosen": -0.12406025826931, "logits/rejected": -0.11168057471513748, "logps/chosen": -0.0004723105812445283, "logps/rejected": -3.253361225128174, "loss": 0.3863, "nll_loss": 0.09658458828926086, "rewards/accuracies": 1.0, "rewards/chosen": -4.7231056669261307e-05, "rewards/margins": 0.32528889179229736, "rewards/rejected": -0.3253360986709595, "step": 13284 }, { "epoch": 9.187413554633471, "grad_norm": 4.156968593597412, "learning_rate": 4.514369140925158e-06, "log_odds_chosen": 11.90109634399414, "log_odds_ratio": -3.2485702831763774e-05, "logits/chosen": -0.5516144037246704, "logits/rejected": -0.6592393517494202, "logps/chosen": -0.0002664893982000649, "logps/rejected": -3.0378479957580566, "loss": 0.3442, "nll_loss": 0.08603926748037338, "rewards/accuracies": 1.0, "rewards/chosen": -2.6648936909623444e-05, "rewards/margins": 0.3037582039833069, "rewards/rejected": -0.3037848174571991, "step": 13285 }, { "epoch": 9.188105117565698, "grad_norm": 3.408931016921997, "learning_rate": 4.510527124635009e-06, "log_odds_chosen": 11.743860244750977, "log_odds_ratio": -2.8227870643604547e-05, "logits/chosen": -0.17038024961948395, "logits/rejected": -0.19767898321151733, "logps/chosen": -0.00032378954347223043, "logps/rejected": -3.1211838722229004, "loss": 0.2861, "nll_loss": 0.07151287794113159, "rewards/accuracies": 1.0, "rewards/chosen": -3.237895725760609e-05, "rewards/margins": 0.3120860159397125, "rewards/rejected": -0.31211841106414795, "step": 13286 }, { "epoch": 9.188796680497925, "grad_norm": 3.6295013427734375, "learning_rate": 4.5066851083448595e-06, "log_odds_chosen": 10.94865894317627, "log_odds_ratio": -7.326849299715832e-05, "logits/chosen": 0.1300758719444275, "logits/rejected": 0.01392633467912674, "logps/chosen": -0.00020176218822598457, "logps/rejected": -2.1468212604522705, "loss": 0.3371, "nll_loss": 0.08427339792251587, "rewards/accuracies": 1.0, "rewards/chosen": -2.017621955019422e-05, "rewards/margins": 0.21466195583343506, "rewards/rejected": -0.21468213200569153, "step": 13287 }, { "epoch": 9.189488243430151, "grad_norm": 3.9760258197784424, "learning_rate": 4.502843092054711e-06, "log_odds_chosen": 11.105104446411133, "log_odds_ratio": -5.5100786994444206e-05, "logits/chosen": -0.17760206758975983, "logits/rejected": -0.240696519613266, "logps/chosen": -0.00025667899171821773, "logps/rejected": -2.6248059272766113, "loss": 0.4704, "nll_loss": 0.11759471893310547, "rewards/accuracies": 1.0, "rewards/chosen": -2.5667901354609057e-05, "rewards/margins": 0.26245489716529846, "rewards/rejected": -0.26248058676719666, "step": 13288 }, { "epoch": 9.190179806362378, "grad_norm": 2.7358086109161377, "learning_rate": 4.499001075764561e-06, "log_odds_chosen": 12.274123191833496, "log_odds_ratio": -8.188303581846412e-06, "logits/chosen": -0.38431859016418457, "logits/rejected": -0.46995633840560913, "logps/chosen": -0.00018513904069550335, "logps/rejected": -3.022996425628662, "loss": 0.2276, "nll_loss": 0.056895844638347626, "rewards/accuracies": 1.0, "rewards/chosen": -1.8513903341954574e-05, "rewards/margins": 0.3022811710834503, "rewards/rejected": -0.3022996783256531, "step": 13289 }, { "epoch": 9.190871369294605, "grad_norm": 2.785196542739868, "learning_rate": 4.4951590594744126e-06, "log_odds_chosen": 10.875653266906738, "log_odds_ratio": -0.0002187418140238151, "logits/chosen": -0.19698688387870789, "logits/rejected": -0.34480729699134827, "logps/chosen": -0.0002780867216642946, "logps/rejected": -2.1473608016967773, "loss": 0.2618, "nll_loss": 0.06543861329555511, "rewards/accuracies": 1.0, "rewards/chosen": -2.7808673621620983e-05, "rewards/margins": 0.21470826864242554, "rewards/rejected": -0.21473607420921326, "step": 13290 }, { "epoch": 9.191562932226832, "grad_norm": 3.181849241256714, "learning_rate": 4.491317043184263e-06, "log_odds_chosen": 12.038965225219727, "log_odds_ratio": -1.9076065655099228e-05, "logits/chosen": -0.6734594106674194, "logits/rejected": -0.8279802203178406, "logps/chosen": -8.055789658101276e-05, "logps/rejected": -2.6196889877319336, "loss": 0.3114, "nll_loss": 0.07784523814916611, "rewards/accuracies": 1.0, "rewards/chosen": -8.055790203798097e-06, "rewards/margins": 0.26196086406707764, "rewards/rejected": -0.2619689106941223, "step": 13291 }, { "epoch": 9.192254495159059, "grad_norm": 4.373355865478516, "learning_rate": 4.487475026894114e-06, "log_odds_chosen": 11.565893173217773, "log_odds_ratio": -2.1030267816968262e-05, "logits/chosen": -0.22011518478393555, "logits/rejected": -0.19861875474452972, "logps/chosen": -0.0004119708319194615, "logps/rejected": -2.8756916522979736, "loss": 0.4777, "nll_loss": 0.11942359805107117, "rewards/accuracies": 1.0, "rewards/chosen": -4.119707955396734e-05, "rewards/margins": 0.28752797842025757, "rewards/rejected": -0.28756919503211975, "step": 13292 }, { "epoch": 9.192946058091286, "grad_norm": 2.9223194122314453, "learning_rate": 4.483633010603965e-06, "log_odds_chosen": 11.167993545532227, "log_odds_ratio": -2.5012381229316816e-05, "logits/chosen": 0.07321306318044662, "logits/rejected": -0.00747278705239296, "logps/chosen": -0.00012964705820195377, "logps/rejected": -2.1343207359313965, "loss": 0.3096, "nll_loss": 0.07740969210863113, "rewards/accuracies": 1.0, "rewards/chosen": -1.2964705092599615e-05, "rewards/margins": 0.2134191244840622, "rewards/rejected": -0.21343207359313965, "step": 13293 }, { "epoch": 9.193637621023512, "grad_norm": 2.6296606063842773, "learning_rate": 4.479790994313816e-06, "log_odds_chosen": 10.825248718261719, "log_odds_ratio": -4.6251479943748564e-05, "logits/chosen": -0.4593941271305084, "logits/rejected": -0.4678587019443512, "logps/chosen": -0.0001595508656464517, "logps/rejected": -1.4237827062606812, "loss": 0.234, "nll_loss": 0.058507852256298065, "rewards/accuracies": 1.0, "rewards/chosen": -1.5955087292240933e-05, "rewards/margins": 0.1423623263835907, "rewards/rejected": -0.14237827062606812, "step": 13294 }, { "epoch": 9.19432918395574, "grad_norm": 4.109340190887451, "learning_rate": 4.475948978023667e-06, "log_odds_chosen": 12.034053802490234, "log_odds_ratio": -3.0341378078446724e-05, "logits/chosen": 0.08396705240011215, "logits/rejected": 0.06960800290107727, "logps/chosen": -0.00012110539682907984, "logps/rejected": -2.9148576259613037, "loss": 0.4502, "nll_loss": 0.11255712807178497, "rewards/accuracies": 1.0, "rewards/chosen": -1.2110540410503745e-05, "rewards/margins": 0.2914736568927765, "rewards/rejected": -0.2914857566356659, "step": 13295 }, { "epoch": 9.195020746887966, "grad_norm": 4.959160327911377, "learning_rate": 4.472106961733518e-06, "log_odds_chosen": 11.456915855407715, "log_odds_ratio": -2.070013215416111e-05, "logits/chosen": 0.10722736269235611, "logits/rejected": -0.04543411731719971, "logps/chosen": -0.0005359066999517381, "logps/rejected": -2.7827417850494385, "loss": 0.61, "nll_loss": 0.15250572562217712, "rewards/accuracies": 1.0, "rewards/chosen": -5.359066926757805e-05, "rewards/margins": 0.27822059392929077, "rewards/rejected": -0.27827420830726624, "step": 13296 }, { "epoch": 9.195712309820193, "grad_norm": 3.350621461868286, "learning_rate": 4.4682649454433695e-06, "log_odds_chosen": 11.722689628601074, "log_odds_ratio": -1.1843771972053219e-05, "logits/chosen": -0.07416524738073349, "logits/rejected": -0.33439549803733826, "logps/chosen": -0.000422697514295578, "logps/rejected": -3.2973790168762207, "loss": 0.266, "nll_loss": 0.06649604439735413, "rewards/accuracies": 1.0, "rewards/chosen": -4.226975215715356e-05, "rewards/margins": 0.32969561219215393, "rewards/rejected": -0.3297378718852997, "step": 13297 }, { "epoch": 9.19640387275242, "grad_norm": 3.1698036193847656, "learning_rate": 4.464422929153219e-06, "log_odds_chosen": 12.69196891784668, "log_odds_ratio": -1.0068518349726219e-05, "logits/chosen": -0.200626403093338, "logits/rejected": -0.22722738981246948, "logps/chosen": -7.099766662577167e-05, "logps/rejected": -3.128727912902832, "loss": 0.4568, "nll_loss": 0.11419538408517838, "rewards/accuracies": 1.0, "rewards/chosen": -7.099766662577167e-06, "rewards/margins": 0.3128657042980194, "rewards/rejected": -0.31287282705307007, "step": 13298 }, { "epoch": 9.197095435684647, "grad_norm": 2.4214847087860107, "learning_rate": 4.460580912863071e-06, "log_odds_chosen": 11.50594711303711, "log_odds_ratio": -2.9842894946341403e-05, "logits/chosen": -0.27305135130882263, "logits/rejected": -0.24569541215896606, "logps/chosen": -8.768929546931759e-05, "logps/rejected": -2.1344499588012695, "loss": 0.2693, "nll_loss": 0.06731890141963959, "rewards/accuracies": 1.0, "rewards/chosen": -8.768928637437057e-06, "rewards/margins": 0.21343624591827393, "rewards/rejected": -0.2134450376033783, "step": 13299 }, { "epoch": 9.197786998616873, "grad_norm": 3.382467031478882, "learning_rate": 4.456738896572922e-06, "log_odds_chosen": 10.984672546386719, "log_odds_ratio": -5.946517194388434e-05, "logits/chosen": 0.5020791292190552, "logits/rejected": 0.43189650774002075, "logps/chosen": -0.0009921141900122166, "logps/rejected": -2.1780381202697754, "loss": 0.4387, "nll_loss": 0.10966768860816956, "rewards/accuracies": 1.0, "rewards/chosen": -9.921141463564709e-05, "rewards/margins": 0.21770460903644562, "rewards/rejected": -0.21780380606651306, "step": 13300 }, { "epoch": 9.1984785615491, "grad_norm": 3.9634034633636475, "learning_rate": 4.4528968802827725e-06, "log_odds_chosen": 10.432125091552734, "log_odds_ratio": -0.0005259269964881241, "logits/chosen": -0.11210166662931442, "logits/rejected": -0.1380247175693512, "logps/chosen": -0.0006339678075164557, "logps/rejected": -2.1972599029541016, "loss": 0.4485, "nll_loss": 0.11206686496734619, "rewards/accuracies": 1.0, "rewards/chosen": -6.339678657241166e-05, "rewards/margins": 0.2196625918149948, "rewards/rejected": -0.21972598135471344, "step": 13301 }, { "epoch": 9.199170124481327, "grad_norm": 3.054126262664795, "learning_rate": 4.449054863992624e-06, "log_odds_chosen": 10.148401260375977, "log_odds_ratio": -0.0006780875264666975, "logits/chosen": -0.3775310516357422, "logits/rejected": -0.4329449534416199, "logps/chosen": -0.00037620688090100884, "logps/rejected": -1.6183583736419678, "loss": 0.2996, "nll_loss": 0.07482132315635681, "rewards/accuracies": 1.0, "rewards/chosen": -3.7620688090100884e-05, "rewards/margins": 0.16179822385311127, "rewards/rejected": -0.16183583438396454, "step": 13302 }, { "epoch": 9.199861687413554, "grad_norm": 3.3058624267578125, "learning_rate": 4.445212847702474e-06, "log_odds_chosen": 11.110430717468262, "log_odds_ratio": -0.00015114396228455007, "logits/chosen": -0.46796417236328125, "logits/rejected": -0.47808900475502014, "logps/chosen": -7.569074659841135e-05, "logps/rejected": -1.7901885509490967, "loss": 0.4575, "nll_loss": 0.11437175422906876, "rewards/accuracies": 1.0, "rewards/chosen": -7.569075023639016e-06, "rewards/margins": 0.1790112853050232, "rewards/rejected": -0.17901885509490967, "step": 13303 }, { "epoch": 9.20055325034578, "grad_norm": 2.1154303550720215, "learning_rate": 4.441370831412326e-06, "log_odds_chosen": 10.566178321838379, "log_odds_ratio": -0.00013299538113642484, "logits/chosen": 0.21447968482971191, "logits/rejected": 0.19621631503105164, "logps/chosen": -0.00033608500962145627, "logps/rejected": -1.8940387964248657, "loss": 0.2311, "nll_loss": 0.05776310712099075, "rewards/accuracies": 1.0, "rewards/chosen": -3.3608499506954104e-05, "rewards/margins": 0.1893702745437622, "rewards/rejected": -0.18940389156341553, "step": 13304 }, { "epoch": 9.201244813278008, "grad_norm": 4.0034098625183105, "learning_rate": 4.437528815122176e-06, "log_odds_chosen": 11.385026931762695, "log_odds_ratio": -3.283590922364965e-05, "logits/chosen": -0.2957315444946289, "logits/rejected": -0.40880489349365234, "logps/chosen": -0.0003526700893417001, "logps/rejected": -2.501044511795044, "loss": 0.5239, "nll_loss": 0.13096894323825836, "rewards/accuracies": 1.0, "rewards/chosen": -3.526701038936153e-05, "rewards/margins": 0.25006917119026184, "rewards/rejected": -0.25010445713996887, "step": 13305 }, { "epoch": 9.201936376210234, "grad_norm": 4.126883029937744, "learning_rate": 4.433686798832027e-06, "log_odds_chosen": 12.719415664672852, "log_odds_ratio": -5.7122347243421245e-06, "logits/chosen": -0.20841145515441895, "logits/rejected": -0.295016348361969, "logps/chosen": -0.00010687931353459135, "logps/rejected": -3.5534002780914307, "loss": 0.3601, "nll_loss": 0.09003311395645142, "rewards/accuracies": 1.0, "rewards/chosen": -1.0687931535358075e-05, "rewards/margins": 0.35532933473587036, "rewards/rejected": -0.35534003376960754, "step": 13306 }, { "epoch": 9.202627939142461, "grad_norm": 3.3515937328338623, "learning_rate": 4.429844782541878e-06, "log_odds_chosen": 10.834237098693848, "log_odds_ratio": -0.0002241548936581239, "logits/chosen": -0.34491413831710815, "logits/rejected": -0.36780351400375366, "logps/chosen": -0.00019741068535950035, "logps/rejected": -1.9140173196792603, "loss": 0.31, "nll_loss": 0.07748197019100189, "rewards/accuracies": 1.0, "rewards/chosen": -1.9741069991141558e-05, "rewards/margins": 0.19138199090957642, "rewards/rejected": -0.19140173494815826, "step": 13307 }, { "epoch": 9.203319502074688, "grad_norm": 3.089019775390625, "learning_rate": 4.4260027662517294e-06, "log_odds_chosen": 10.662595748901367, "log_odds_ratio": -3.3396681828889996e-05, "logits/chosen": -0.2811889350414276, "logits/rejected": -0.3584701418876648, "logps/chosen": -0.00044508109567686915, "logps/rejected": -2.1020030975341797, "loss": 0.3703, "nll_loss": 0.09256477653980255, "rewards/accuracies": 1.0, "rewards/chosen": -4.4508109567686915e-05, "rewards/margins": 0.21015578508377075, "rewards/rejected": -0.21020029485225677, "step": 13308 }, { "epoch": 9.204011065006915, "grad_norm": 4.094654560089111, "learning_rate": 4.42216074996158e-06, "log_odds_chosen": 11.015249252319336, "log_odds_ratio": -4.134257687837817e-05, "logits/chosen": 0.17103615403175354, "logits/rejected": 0.05913734436035156, "logps/chosen": -0.00017921102698892355, "logps/rejected": -2.214588165283203, "loss": 0.468, "nll_loss": 0.11699028313159943, "rewards/accuracies": 1.0, "rewards/chosen": -1.79211056092754e-05, "rewards/margins": 0.2214408963918686, "rewards/rejected": -0.2214588224887848, "step": 13309 }, { "epoch": 9.204702627939142, "grad_norm": 2.4857873916625977, "learning_rate": 4.418318733671431e-06, "log_odds_chosen": 10.965414047241211, "log_odds_ratio": -0.0001001676864689216, "logits/chosen": -0.0682789534330368, "logits/rejected": -0.13583692908287048, "logps/chosen": -0.000550756580196321, "logps/rejected": -1.8792402744293213, "loss": 0.2672, "nll_loss": 0.06679284572601318, "rewards/accuracies": 1.0, "rewards/chosen": -5.507566311280243e-05, "rewards/margins": 0.18786895275115967, "rewards/rejected": -0.18792402744293213, "step": 13310 }, { "epoch": 9.205394190871369, "grad_norm": 3.659666061401367, "learning_rate": 4.4144767173812825e-06, "log_odds_chosen": 11.955492973327637, "log_odds_ratio": -2.0881549062323757e-05, "logits/chosen": -0.3871539235115051, "logits/rejected": -0.38239070773124695, "logps/chosen": -7.013310823822394e-05, "logps/rejected": -2.347487211227417, "loss": 0.3603, "nll_loss": 0.09007039666175842, "rewards/accuracies": 1.0, "rewards/chosen": -7.013311005721334e-06, "rewards/margins": 0.2347417175769806, "rewards/rejected": -0.2347487509250641, "step": 13311 }, { "epoch": 9.206085753803595, "grad_norm": 5.377493381500244, "learning_rate": 4.4106347010911324e-06, "log_odds_chosen": 12.305624008178711, "log_odds_ratio": -9.869532732409425e-06, "logits/chosen": 0.05969385802745819, "logits/rejected": 0.022483449429273605, "logps/chosen": -9.224964014720172e-05, "logps/rejected": -2.647648811340332, "loss": 0.5416, "nll_loss": 0.13539725542068481, "rewards/accuracies": 1.0, "rewards/chosen": -9.224963832821231e-06, "rewards/margins": 0.2647556662559509, "rewards/rejected": -0.2647649049758911, "step": 13312 }, { "epoch": 9.206777316735822, "grad_norm": 2.8959801197052, "learning_rate": 4.406792684800984e-06, "log_odds_chosen": 10.231405258178711, "log_odds_ratio": -0.00010332741658203304, "logits/chosen": -0.26172447204589844, "logits/rejected": -0.3718671202659607, "logps/chosen": -0.00044131246977485716, "logps/rejected": -1.9781062602996826, "loss": 0.3332, "nll_loss": 0.08329068124294281, "rewards/accuracies": 1.0, "rewards/chosen": -4.413124770508148e-05, "rewards/margins": 0.1977664977312088, "rewards/rejected": -0.19781062006950378, "step": 13313 }, { "epoch": 9.207468879668049, "grad_norm": 4.0241217613220215, "learning_rate": 4.402950668510835e-06, "log_odds_chosen": 11.831079483032227, "log_odds_ratio": -3.080906390096061e-05, "logits/chosen": 0.3046078085899353, "logits/rejected": 0.1909351944923401, "logps/chosen": -0.00017704170022625476, "logps/rejected": -3.0421605110168457, "loss": 0.5471, "nll_loss": 0.13676731288433075, "rewards/accuracies": 1.0, "rewards/chosen": -1.7704169295029715e-05, "rewards/margins": 0.30419835448265076, "rewards/rejected": -0.30421608686447144, "step": 13314 }, { "epoch": 9.208160442600276, "grad_norm": 3.4288268089294434, "learning_rate": 4.3991086522206855e-06, "log_odds_chosen": 11.901636123657227, "log_odds_ratio": -3.4411190426908433e-05, "logits/chosen": -0.17353087663650513, "logits/rejected": -0.27299991250038147, "logps/chosen": -0.00021824287250638008, "logps/rejected": -2.9237382411956787, "loss": 0.4169, "nll_loss": 0.10421036183834076, "rewards/accuracies": 1.0, "rewards/chosen": -2.182428761443589e-05, "rewards/margins": 0.29235199093818665, "rewards/rejected": -0.2923738360404968, "step": 13315 }, { "epoch": 9.208852005532503, "grad_norm": 4.281277656555176, "learning_rate": 4.395266635930536e-06, "log_odds_chosen": 10.813922882080078, "log_odds_ratio": -0.00017469703743699938, "logits/chosen": -0.1653359830379486, "logits/rejected": -0.20505306124687195, "logps/chosen": -0.0006260251393541694, "logps/rejected": -2.2798168659210205, "loss": 0.3885, "nll_loss": 0.09710270166397095, "rewards/accuracies": 1.0, "rewards/chosen": -6.260250665945932e-05, "rewards/margins": 0.2279190868139267, "rewards/rejected": -0.22798168659210205, "step": 13316 }, { "epoch": 9.20954356846473, "grad_norm": 4.29088020324707, "learning_rate": 4.391424619640387e-06, "log_odds_chosen": 12.491415977478027, "log_odds_ratio": -6.058309281797847e-06, "logits/chosen": 0.06783643364906311, "logits/rejected": 0.06438513845205307, "logps/chosen": -0.00016462888743262738, "logps/rejected": -3.220424175262451, "loss": 0.4483, "nll_loss": 0.11207491159439087, "rewards/accuracies": 1.0, "rewards/chosen": -1.646288910706062e-05, "rewards/margins": 0.32202598452568054, "rewards/rejected": -0.32204243540763855, "step": 13317 }, { "epoch": 9.210235131396956, "grad_norm": 2.623915433883667, "learning_rate": 4.387582603350239e-06, "log_odds_chosen": 10.996440887451172, "log_odds_ratio": -3.61188140232116e-05, "logits/chosen": -0.515557050704956, "logits/rejected": -0.6479478478431702, "logps/chosen": -0.00018121147877536714, "logps/rejected": -2.1568994522094727, "loss": 0.2525, "nll_loss": 0.06312118470668793, "rewards/accuracies": 1.0, "rewards/chosen": -1.812115078791976e-05, "rewards/margins": 0.2156718373298645, "rewards/rejected": -0.21568995714187622, "step": 13318 }, { "epoch": 9.210926694329183, "grad_norm": 4.4565510749816895, "learning_rate": 4.383740587060089e-06, "log_odds_chosen": 11.984901428222656, "log_odds_ratio": -1.0817630936799105e-05, "logits/chosen": -0.5716065168380737, "logits/rejected": -0.678408682346344, "logps/chosen": -0.00011433548934292048, "logps/rejected": -2.6132736206054688, "loss": 0.443, "nll_loss": 0.11074677109718323, "rewards/accuracies": 1.0, "rewards/chosen": -1.1433548934292048e-05, "rewards/margins": 0.2613159418106079, "rewards/rejected": -0.2613273859024048, "step": 13319 }, { "epoch": 9.21161825726141, "grad_norm": 2.0655133724212646, "learning_rate": 4.37989857076994e-06, "log_odds_chosen": 10.456897735595703, "log_odds_ratio": -6.552055856445804e-05, "logits/chosen": -0.38059210777282715, "logits/rejected": -0.4759252667427063, "logps/chosen": -0.00036717430339194834, "logps/rejected": -2.2479348182678223, "loss": 0.1991, "nll_loss": 0.049757011234760284, "rewards/accuracies": 1.0, "rewards/chosen": -3.671742888400331e-05, "rewards/margins": 0.22475677728652954, "rewards/rejected": -0.22479349374771118, "step": 13320 }, { "epoch": 9.212309820193637, "grad_norm": 4.373488903045654, "learning_rate": 4.376056554479791e-06, "log_odds_chosen": 10.9266357421875, "log_odds_ratio": -4.441215787664987e-05, "logits/chosen": 0.042728446424007416, "logits/rejected": -0.03302290290594101, "logps/chosen": -0.00030370999593287706, "logps/rejected": -1.8644747734069824, "loss": 0.436, "nll_loss": 0.10898858308792114, "rewards/accuracies": 1.0, "rewards/chosen": -3.0371000320883468e-05, "rewards/margins": 0.1864171326160431, "rewards/rejected": -0.18644748628139496, "step": 13321 }, { "epoch": 9.213001383125864, "grad_norm": 2.570322036743164, "learning_rate": 4.3722145381896425e-06, "log_odds_chosen": 10.678181648254395, "log_odds_ratio": -3.701847163029015e-05, "logits/chosen": -0.30406653881073, "logits/rejected": -0.30958351492881775, "logps/chosen": -0.00010654401557985693, "logps/rejected": -1.6126346588134766, "loss": 0.2565, "nll_loss": 0.06412629783153534, "rewards/accuracies": 1.0, "rewards/chosen": -1.0654401194187813e-05, "rewards/margins": 0.16125282645225525, "rewards/rejected": -0.16126346588134766, "step": 13322 }, { "epoch": 9.21369294605809, "grad_norm": 3.3682193756103516, "learning_rate": 4.368372521899493e-06, "log_odds_chosen": 11.65042495727539, "log_odds_ratio": -7.514456228818744e-05, "logits/chosen": -0.3465287983417511, "logits/rejected": -0.3993972837924957, "logps/chosen": -0.00011759632616303861, "logps/rejected": -2.6790201663970947, "loss": 0.3229, "nll_loss": 0.08072992414236069, "rewards/accuracies": 1.0, "rewards/chosen": -1.17596318887081e-05, "rewards/margins": 0.26789024472236633, "rewards/rejected": -0.2679020166397095, "step": 13323 }, { "epoch": 9.214384508990317, "grad_norm": 4.059265613555908, "learning_rate": 4.364530505609344e-06, "log_odds_chosen": 10.750175476074219, "log_odds_ratio": -0.0003338758833706379, "logits/chosen": -0.21408270299434662, "logits/rejected": -0.2968137860298157, "logps/chosen": -0.0005572262452915311, "logps/rejected": -2.1982357501983643, "loss": 0.5274, "nll_loss": 0.1318206787109375, "rewards/accuracies": 1.0, "rewards/chosen": -5.57226303499192e-05, "rewards/margins": 0.21976785361766815, "rewards/rejected": -0.21982358396053314, "step": 13324 }, { "epoch": 9.215076071922544, "grad_norm": 3.622119665145874, "learning_rate": 4.3606884893191956e-06, "log_odds_chosen": 10.936710357666016, "log_odds_ratio": -3.496746649034321e-05, "logits/chosen": -0.4875425100326538, "logits/rejected": -0.5060606598854065, "logps/chosen": -0.00024507613852620125, "logps/rejected": -2.3551130294799805, "loss": 0.3314, "nll_loss": 0.08283500373363495, "rewards/accuracies": 1.0, "rewards/chosen": -2.4507613488822244e-05, "rewards/margins": 0.23548682034015656, "rewards/rejected": -0.23551130294799805, "step": 13325 }, { "epoch": 9.215767634854771, "grad_norm": 4.0741472244262695, "learning_rate": 4.3568464730290455e-06, "log_odds_chosen": 10.13569450378418, "log_odds_ratio": -0.00016341662558261305, "logits/chosen": -0.26546046137809753, "logits/rejected": -0.41923052072525024, "logps/chosen": -0.0007290366338565946, "logps/rejected": -1.5351698398590088, "loss": 0.4524, "nll_loss": 0.11307627707719803, "rewards/accuracies": 1.0, "rewards/chosen": -7.290366193046793e-05, "rewards/margins": 0.1534440815448761, "rewards/rejected": -0.1535169780254364, "step": 13326 }, { "epoch": 9.216459197786998, "grad_norm": 4.307971477508545, "learning_rate": 4.353004456738897e-06, "log_odds_chosen": 10.221899032592773, "log_odds_ratio": -0.00010868780373129994, "logits/chosen": -0.07241721451282501, "logits/rejected": -0.1888885498046875, "logps/chosen": -0.000540388748049736, "logps/rejected": -2.0440444946289062, "loss": 0.456, "nll_loss": 0.11398278921842575, "rewards/accuracies": 1.0, "rewards/chosen": -5.403887917054817e-05, "rewards/margins": 0.20435041189193726, "rewards/rejected": -0.20440445840358734, "step": 13327 }, { "epoch": 9.217150760719225, "grad_norm": 2.491574287414551, "learning_rate": 4.349162440448748e-06, "log_odds_chosen": 10.440887451171875, "log_odds_ratio": -9.795461664907634e-05, "logits/chosen": -0.1491602510213852, "logits/rejected": -0.3474409282207489, "logps/chosen": -0.00017148329061456025, "logps/rejected": -1.7223557233810425, "loss": 0.3691, "nll_loss": 0.09227250516414642, "rewards/accuracies": 1.0, "rewards/chosen": -1.7148329789051786e-05, "rewards/margins": 0.1722184121608734, "rewards/rejected": -0.17223556339740753, "step": 13328 }, { "epoch": 9.217842323651452, "grad_norm": 3.437854528427124, "learning_rate": 4.3453204241585986e-06, "log_odds_chosen": 11.172818183898926, "log_odds_ratio": -2.2042973796487786e-05, "logits/chosen": 0.32086265087127686, "logits/rejected": 0.015460759401321411, "logps/chosen": -0.0001118913060054183, "logps/rejected": -2.108009099960327, "loss": 0.398, "nll_loss": 0.09949032962322235, "rewards/accuracies": 1.0, "rewards/chosen": -1.118913041864289e-05, "rewards/margins": 0.2107897251844406, "rewards/rejected": -0.2108009159564972, "step": 13329 }, { "epoch": 9.218533886583678, "grad_norm": 5.149376392364502, "learning_rate": 4.341478407868449e-06, "log_odds_chosen": 11.058161735534668, "log_odds_ratio": -0.00018130963144358248, "logits/chosen": -0.3922494053840637, "logits/rejected": -0.32720354199409485, "logps/chosen": -0.0002020241809077561, "logps/rejected": -2.359410285949707, "loss": 0.5601, "nll_loss": 0.14000999927520752, "rewards/accuracies": 1.0, "rewards/chosen": -2.0202420273562893e-05, "rewards/margins": 0.23592083156108856, "rewards/rejected": -0.2359410524368286, "step": 13330 }, { "epoch": 9.219225449515905, "grad_norm": 4.437367916107178, "learning_rate": 4.337636391578301e-06, "log_odds_chosen": 10.904949188232422, "log_odds_ratio": -0.0001882653741631657, "logits/chosen": -0.10703521966934204, "logits/rejected": -0.24360337853431702, "logps/chosen": -0.00041447189869359136, "logps/rejected": -2.1529417037963867, "loss": 0.6033, "nll_loss": 0.15079988539218903, "rewards/accuracies": 1.0, "rewards/chosen": -4.14471905969549e-05, "rewards/margins": 0.21525274217128754, "rewards/rejected": -0.21529419720172882, "step": 13331 }, { "epoch": 9.219917012448132, "grad_norm": 2.943000316619873, "learning_rate": 4.333794375288152e-06, "log_odds_chosen": 10.67567253112793, "log_odds_ratio": -4.084103420609608e-05, "logits/chosen": 0.1298995167016983, "logits/rejected": 0.08547534048557281, "logps/chosen": -0.00013424924691207707, "logps/rejected": -1.811115026473999, "loss": 0.2824, "nll_loss": 0.07060249149799347, "rewards/accuracies": 1.0, "rewards/chosen": -1.3424925782601349e-05, "rewards/margins": 0.1810980886220932, "rewards/rejected": -0.18111151456832886, "step": 13332 }, { "epoch": 9.220608575380359, "grad_norm": 3.3386802673339844, "learning_rate": 4.329952358998002e-06, "log_odds_chosen": 10.83498477935791, "log_odds_ratio": -8.374982280656695e-05, "logits/chosen": -0.31788182258605957, "logits/rejected": -0.46187710762023926, "logps/chosen": -0.00046914478298276663, "logps/rejected": -2.571054458618164, "loss": 0.3678, "nll_loss": 0.09194758534431458, "rewards/accuracies": 1.0, "rewards/chosen": -4.6914479753468186e-05, "rewards/margins": 0.2570585608482361, "rewards/rejected": -0.2571054697036743, "step": 13333 }, { "epoch": 9.221300138312586, "grad_norm": 2.7725768089294434, "learning_rate": 4.326110342707853e-06, "log_odds_chosen": 10.606285095214844, "log_odds_ratio": -0.00010153828770853579, "logits/chosen": -0.6269538402557373, "logits/rejected": -0.6746143698692322, "logps/chosen": -0.0003140957560390234, "logps/rejected": -2.2495834827423096, "loss": 0.3671, "nll_loss": 0.09176291525363922, "rewards/accuracies": 1.0, "rewards/chosen": -3.1409577786689624e-05, "rewards/margins": 0.22492694854736328, "rewards/rejected": -0.22495834529399872, "step": 13334 }, { "epoch": 9.221991701244812, "grad_norm": 3.7023515701293945, "learning_rate": 4.322268326417704e-06, "log_odds_chosen": 10.880495071411133, "log_odds_ratio": -2.8478403692133725e-05, "logits/chosen": -0.34293243288993835, "logits/rejected": -0.37764379382133484, "logps/chosen": -0.0004085856198798865, "logps/rejected": -2.6674582958221436, "loss": 0.3928, "nll_loss": 0.09819929301738739, "rewards/accuracies": 1.0, "rewards/chosen": -4.085856198798865e-05, "rewards/margins": 0.2667049765586853, "rewards/rejected": -0.26674583554267883, "step": 13335 }, { "epoch": 9.22268326417704, "grad_norm": 2.603764772415161, "learning_rate": 4.3184263101275555e-06, "log_odds_chosen": 11.418258666992188, "log_odds_ratio": -1.953284845512826e-05, "logits/chosen": -0.6696931719779968, "logits/rejected": -0.7942060232162476, "logps/chosen": -5.880265234736726e-05, "logps/rejected": -1.7821133136749268, "loss": 0.2921, "nll_loss": 0.07302499562501907, "rewards/accuracies": 1.0, "rewards/chosen": -5.880265234736726e-06, "rewards/margins": 0.1782054454088211, "rewards/rejected": -0.17821133136749268, "step": 13336 }, { "epoch": 9.223374827109266, "grad_norm": 2.756286859512329, "learning_rate": 4.314584293837405e-06, "log_odds_chosen": 12.973590850830078, "log_odds_ratio": -6.149369710328756e-06, "logits/chosen": -0.1303907334804535, "logits/rejected": -0.09264279156923294, "logps/chosen": -7.317406561924145e-05, "logps/rejected": -3.3638851642608643, "loss": 0.2846, "nll_loss": 0.07113946974277496, "rewards/accuracies": 1.0, "rewards/chosen": -7.317406925722025e-06, "rewards/margins": 0.3363812267780304, "rewards/rejected": -0.3363885283470154, "step": 13337 }, { "epoch": 9.224066390041493, "grad_norm": 5.033344268798828, "learning_rate": 4.310742277547257e-06, "log_odds_chosen": 10.399462699890137, "log_odds_ratio": -9.517098806099966e-05, "logits/chosen": -0.491542786359787, "logits/rejected": -0.5521076917648315, "logps/chosen": -0.00017324337386526167, "logps/rejected": -1.5467714071273804, "loss": 0.2613, "nll_loss": 0.06531417369842529, "rewards/accuracies": 1.0, "rewards/chosen": -1.7324337022728287e-05, "rewards/margins": 0.15465980768203735, "rewards/rejected": -0.1546771377325058, "step": 13338 }, { "epoch": 9.22475795297372, "grad_norm": 3.2574455738067627, "learning_rate": 4.306900261257108e-06, "log_odds_chosen": 11.016514778137207, "log_odds_ratio": -3.3485335734440014e-05, "logits/chosen": -0.7905449271202087, "logits/rejected": -0.7238627076148987, "logps/chosen": -0.00017339608166366816, "logps/rejected": -2.120157480239868, "loss": 0.2839, "nll_loss": 0.07096044719219208, "rewards/accuracies": 1.0, "rewards/chosen": -1.7339609257760458e-05, "rewards/margins": 0.21199840307235718, "rewards/rejected": -0.21201574802398682, "step": 13339 }, { "epoch": 9.225449515905947, "grad_norm": 3.7507407665252686, "learning_rate": 4.3030582449669585e-06, "log_odds_chosen": 11.375577926635742, "log_odds_ratio": -3.370269769220613e-05, "logits/chosen": -0.35966721177101135, "logits/rejected": -0.32348862290382385, "logps/chosen": -0.00022426230134442449, "logps/rejected": -2.071716785430908, "loss": 0.3721, "nll_loss": 0.09301963448524475, "rewards/accuracies": 1.0, "rewards/chosen": -2.2426227587857284e-05, "rewards/margins": 0.20714925229549408, "rewards/rejected": -0.20717167854309082, "step": 13340 }, { "epoch": 9.226141078838173, "grad_norm": 3.5205812454223633, "learning_rate": 4.29921622867681e-06, "log_odds_chosen": 12.366856575012207, "log_odds_ratio": -7.017895768512972e-06, "logits/chosen": -0.059585437178611755, "logits/rejected": 0.06428371369838715, "logps/chosen": -0.00019455334404483438, "logps/rejected": -3.157578945159912, "loss": 0.3864, "nll_loss": 0.09659731388092041, "rewards/accuracies": 1.0, "rewards/chosen": -1.9455334040685557e-05, "rewards/margins": 0.3157384395599365, "rewards/rejected": -0.3157579302787781, "step": 13341 }, { "epoch": 9.2268326417704, "grad_norm": 3.987541913986206, "learning_rate": 4.295374212386661e-06, "log_odds_chosen": 10.470842361450195, "log_odds_ratio": -0.00010064824164146557, "logits/chosen": -0.3783642053604126, "logits/rejected": -0.4988144636154175, "logps/chosen": -0.0002472895721439272, "logps/rejected": -1.7773921489715576, "loss": 0.2752, "nll_loss": 0.06877979636192322, "rewards/accuracies": 1.0, "rewards/chosen": -2.472895721439272e-05, "rewards/margins": 0.17771446704864502, "rewards/rejected": -0.1777392029762268, "step": 13342 }, { "epoch": 9.227524204702627, "grad_norm": 6.074353218078613, "learning_rate": 4.291532196096512e-06, "log_odds_chosen": 11.384307861328125, "log_odds_ratio": -0.0003111205587629229, "logits/chosen": -0.09209015965461731, "logits/rejected": -0.08078590035438538, "logps/chosen": -0.0010177076328545809, "logps/rejected": -2.2785487174987793, "loss": 0.383, "nll_loss": 0.09571006894111633, "rewards/accuracies": 1.0, "rewards/chosen": -0.00010177076183026657, "rewards/margins": 0.22775310277938843, "rewards/rejected": -0.2278548777103424, "step": 13343 }, { "epoch": 9.228215767634854, "grad_norm": 3.827584743499756, "learning_rate": 4.287690179806362e-06, "log_odds_chosen": 10.490351676940918, "log_odds_ratio": -4.755572081194259e-05, "logits/chosen": -0.32035011053085327, "logits/rejected": -0.34849417209625244, "logps/chosen": -0.00032143102725967765, "logps/rejected": -2.4039080142974854, "loss": 0.4004, "nll_loss": 0.10010391473770142, "rewards/accuracies": 1.0, "rewards/chosen": -3.214309981558472e-05, "rewards/margins": 0.24035868048667908, "rewards/rejected": -0.240390807390213, "step": 13344 }, { "epoch": 9.22890733056708, "grad_norm": 98.08912658691406, "learning_rate": 4.283848163516214e-06, "log_odds_chosen": 8.916812896728516, "log_odds_ratio": -0.06944891810417175, "logits/chosen": -0.22531327605247498, "logits/rejected": -0.18448351323604584, "logps/chosen": -0.0012186645762994885, "logps/rejected": -1.728334665298462, "loss": 0.6155, "nll_loss": 0.14693495631217957, "rewards/accuracies": 1.0, "rewards/chosen": -0.00012186646927148104, "rewards/margins": 0.17271161079406738, "rewards/rejected": -0.17283347249031067, "step": 13345 }, { "epoch": 9.229598893499308, "grad_norm": 3.4264307022094727, "learning_rate": 4.280006147226065e-06, "log_odds_chosen": 11.563746452331543, "log_odds_ratio": -0.00013787433272227645, "logits/chosen": -0.7487600445747375, "logits/rejected": -0.8325913548469543, "logps/chosen": -0.000709613086655736, "logps/rejected": -3.6102347373962402, "loss": 0.3832, "nll_loss": 0.09579276293516159, "rewards/accuracies": 1.0, "rewards/chosen": -7.096131594153121e-05, "rewards/margins": 0.3609524965286255, "rewards/rejected": -0.361023485660553, "step": 13346 }, { "epoch": 9.230290456431534, "grad_norm": 2.988722801208496, "learning_rate": 4.2761641309359154e-06, "log_odds_chosen": 11.471531867980957, "log_odds_ratio": -1.3612433576781768e-05, "logits/chosen": -0.3343314528465271, "logits/rejected": -0.35128065943717957, "logps/chosen": -0.00019705847080331296, "logps/rejected": -2.150318145751953, "loss": 0.4472, "nll_loss": 0.11179892718791962, "rewards/accuracies": 1.0, "rewards/chosen": -1.9705847080331296e-05, "rewards/margins": 0.21501211822032928, "rewards/rejected": -0.21503181755542755, "step": 13347 }, { "epoch": 9.230982019363761, "grad_norm": 3.773630142211914, "learning_rate": 4.272322114645767e-06, "log_odds_chosen": 11.12164306640625, "log_odds_ratio": -9.118324669543654e-05, "logits/chosen": -0.21132421493530273, "logits/rejected": -0.22750277817249298, "logps/chosen": -0.0004267761541996151, "logps/rejected": -3.0749218463897705, "loss": 0.3305, "nll_loss": 0.08262672275304794, "rewards/accuracies": 1.0, "rewards/chosen": -4.267761323717423e-05, "rewards/margins": 0.3074495196342468, "rewards/rejected": -0.307492196559906, "step": 13348 }, { "epoch": 9.231673582295988, "grad_norm": 2.9947738647460938, "learning_rate": 4.268480098355617e-06, "log_odds_chosen": 11.24973201751709, "log_odds_ratio": -9.979541937354952e-05, "logits/chosen": -0.3087311387062073, "logits/rejected": -0.367544561624527, "logps/chosen": -0.00039041758282110095, "logps/rejected": -2.6293392181396484, "loss": 0.3028, "nll_loss": 0.07570140063762665, "rewards/accuracies": 1.0, "rewards/chosen": -3.9041759009705856e-05, "rewards/margins": 0.2628948986530304, "rewards/rejected": -0.2629339396953583, "step": 13349 }, { "epoch": 9.232365145228215, "grad_norm": 2.789212465286255, "learning_rate": 4.2646380820654685e-06, "log_odds_chosen": 10.9905424118042, "log_odds_ratio": -4.088755667908117e-05, "logits/chosen": -0.40984347462654114, "logits/rejected": -0.5168039798736572, "logps/chosen": -0.00014427973655983806, "logps/rejected": -1.7862883806228638, "loss": 0.3145, "nll_loss": 0.07861229032278061, "rewards/accuracies": 1.0, "rewards/chosen": -1.4427974747377448e-05, "rewards/margins": 0.17861440777778625, "rewards/rejected": -0.1786288321018219, "step": 13350 }, { "epoch": 9.233056708160442, "grad_norm": 4.028530597686768, "learning_rate": 4.2607960657753184e-06, "log_odds_chosen": 11.188385963439941, "log_odds_ratio": -3.841559373540804e-05, "logits/chosen": 0.10398241132497787, "logits/rejected": 0.01779722422361374, "logps/chosen": -0.00019596872152760625, "logps/rejected": -2.461146116256714, "loss": 0.4678, "nll_loss": 0.11693590134382248, "rewards/accuracies": 1.0, "rewards/chosen": -1.9596871425164863e-05, "rewards/margins": 0.24609503149986267, "rewards/rejected": -0.2461146116256714, "step": 13351 }, { "epoch": 9.233748271092669, "grad_norm": 2.9448742866516113, "learning_rate": 4.25695404948517e-06, "log_odds_chosen": 10.838666915893555, "log_odds_ratio": -0.00013965301332063973, "logits/chosen": 0.1105722114443779, "logits/rejected": 0.08422739803791046, "logps/chosen": -0.000485410651890561, "logps/rejected": -2.761599063873291, "loss": 0.3813, "nll_loss": 0.09530912339687347, "rewards/accuracies": 1.0, "rewards/chosen": -4.854106737184338e-05, "rewards/margins": 0.276111364364624, "rewards/rejected": -0.2761599123477936, "step": 13352 }, { "epoch": 9.234439834024895, "grad_norm": 3.085446357727051, "learning_rate": 4.253112033195021e-06, "log_odds_chosen": 11.253827095031738, "log_odds_ratio": -5.087409226689488e-05, "logits/chosen": -0.0010985136032104492, "logits/rejected": -0.08064904808998108, "logps/chosen": -0.00032150166225619614, "logps/rejected": -2.4000725746154785, "loss": 0.3155, "nll_loss": 0.07887643575668335, "rewards/accuracies": 1.0, "rewards/chosen": -3.215016477042809e-05, "rewards/margins": 0.23997509479522705, "rewards/rejected": -0.24000725150108337, "step": 13353 }, { "epoch": 9.235131396957122, "grad_norm": 3.1542537212371826, "learning_rate": 4.2492700169048715e-06, "log_odds_chosen": 10.027928352355957, "log_odds_ratio": -7.808022201061249e-05, "logits/chosen": -0.3447612524032593, "logits/rejected": -0.37404900789260864, "logps/chosen": -0.0002975011302623898, "logps/rejected": -1.6374207735061646, "loss": 0.2792, "nll_loss": 0.06978316605091095, "rewards/accuracies": 1.0, "rewards/chosen": -2.975011375383474e-05, "rewards/margins": 0.16371232271194458, "rewards/rejected": -0.1637420654296875, "step": 13354 }, { "epoch": 9.235822959889349, "grad_norm": 3.949903726577759, "learning_rate": 4.245428000614723e-06, "log_odds_chosen": 12.837517738342285, "log_odds_ratio": -7.425682724715443e-06, "logits/chosen": -0.4023365378379822, "logits/rejected": -0.4887845516204834, "logps/chosen": -6.580314948223531e-05, "logps/rejected": -3.1313557624816895, "loss": 0.3661, "nll_loss": 0.09151305258274078, "rewards/accuracies": 1.0, "rewards/chosen": -6.580315130122472e-06, "rewards/margins": 0.3131290078163147, "rewards/rejected": -0.31313556432724, "step": 13355 }, { "epoch": 9.236514522821576, "grad_norm": 2.7862908840179443, "learning_rate": 4.241585984324574e-06, "log_odds_chosen": 11.264383316040039, "log_odds_ratio": -1.9450360923656262e-05, "logits/chosen": -0.47059521079063416, "logits/rejected": -0.518010139465332, "logps/chosen": -0.00010168216249439865, "logps/rejected": -1.9483394622802734, "loss": 0.2726, "nll_loss": 0.0681493729352951, "rewards/accuracies": 1.0, "rewards/chosen": -1.0168217158934567e-05, "rewards/margins": 0.19482378661632538, "rewards/rejected": -0.19483394920825958, "step": 13356 }, { "epoch": 9.237206085753803, "grad_norm": 3.5049498081207275, "learning_rate": 4.237743968034425e-06, "log_odds_chosen": 10.488624572753906, "log_odds_ratio": -0.00019752327352762222, "logits/chosen": -0.39020171761512756, "logits/rejected": -0.4895351529121399, "logps/chosen": -0.000247740390477702, "logps/rejected": -1.9443116188049316, "loss": 0.3809, "nll_loss": 0.09521554410457611, "rewards/accuracies": 1.0, "rewards/chosen": -2.4774039047770202e-05, "rewards/margins": 0.1944064050912857, "rewards/rejected": -0.19443117082118988, "step": 13357 }, { "epoch": 9.23789764868603, "grad_norm": 4.490814208984375, "learning_rate": 4.233901951744275e-06, "log_odds_chosen": 11.172880172729492, "log_odds_ratio": -8.803656965028495e-05, "logits/chosen": -0.5653146505355835, "logits/rejected": -0.534360408782959, "logps/chosen": -0.0006889343494549394, "logps/rejected": -2.551142692565918, "loss": 0.4827, "nll_loss": 0.1206570714712143, "rewards/accuracies": 1.0, "rewards/chosen": -6.889343057991937e-05, "rewards/margins": 0.2550453543663025, "rewards/rejected": -0.25511425733566284, "step": 13358 }, { "epoch": 9.238589211618256, "grad_norm": 2.8272547721862793, "learning_rate": 4.230059935454127e-06, "log_odds_chosen": 11.310001373291016, "log_odds_ratio": -6.405496969819069e-05, "logits/chosen": -0.3014225363731384, "logits/rejected": -0.46181774139404297, "logps/chosen": -0.00023675702686887234, "logps/rejected": -2.3247127532958984, "loss": 0.3121, "nll_loss": 0.07801727950572968, "rewards/accuracies": 1.0, "rewards/chosen": -2.3675704142078757e-05, "rewards/margins": 0.23244759440422058, "rewards/rejected": -0.2324712574481964, "step": 13359 }, { "epoch": 9.239280774550483, "grad_norm": 3.4311835765838623, "learning_rate": 4.226217919163977e-06, "log_odds_chosen": 11.17218017578125, "log_odds_ratio": -0.00012092379620298743, "logits/chosen": -0.7945163249969482, "logits/rejected": -0.7751541137695312, "logps/chosen": -0.00014527878374792635, "logps/rejected": -1.9728879928588867, "loss": 0.2791, "nll_loss": 0.06976176053285599, "rewards/accuracies": 1.0, "rewards/chosen": -1.4527876373904292e-05, "rewards/margins": 0.19727426767349243, "rewards/rejected": -0.19728878140449524, "step": 13360 }, { "epoch": 9.23997233748271, "grad_norm": 3.174574375152588, "learning_rate": 4.2223759028738285e-06, "log_odds_chosen": 11.811761856079102, "log_odds_ratio": -1.3907579159422312e-05, "logits/chosen": -0.6732967495918274, "logits/rejected": -0.7310502529144287, "logps/chosen": -0.00011698234447976574, "logps/rejected": -2.339672327041626, "loss": 0.3738, "nll_loss": 0.09343670308589935, "rewards/accuracies": 1.0, "rewards/chosen": -1.1698235539370216e-05, "rewards/margins": 0.23395554721355438, "rewards/rejected": -0.23396724462509155, "step": 13361 }, { "epoch": 9.240663900414937, "grad_norm": 7.092174053192139, "learning_rate": 4.218533886583679e-06, "log_odds_chosen": 10.788871765136719, "log_odds_ratio": -5.844702900503762e-05, "logits/chosen": -0.4904933273792267, "logits/rejected": -0.5811038017272949, "logps/chosen": -0.0001661910500843078, "logps/rejected": -2.029539108276367, "loss": 0.7281, "nll_loss": 0.1820286214351654, "rewards/accuracies": 1.0, "rewards/chosen": -1.661910573602654e-05, "rewards/margins": 0.2029373049736023, "rewards/rejected": -0.20295390486717224, "step": 13362 }, { "epoch": 9.241355463347164, "grad_norm": 4.031524658203125, "learning_rate": 4.21469187029353e-06, "log_odds_chosen": 11.320475578308105, "log_odds_ratio": -0.00017843538080342114, "logits/chosen": -0.1563161313533783, "logits/rejected": -0.33736416697502136, "logps/chosen": -0.00020673539256677032, "logps/rejected": -2.886279582977295, "loss": 0.4222, "nll_loss": 0.10553856194019318, "rewards/accuracies": 1.0, "rewards/chosen": -2.0673540348070674e-05, "rewards/margins": 0.28860729932785034, "rewards/rejected": -0.2886279821395874, "step": 13363 }, { "epoch": 9.24204702627939, "grad_norm": 3.389230489730835, "learning_rate": 4.2108498540033816e-06, "log_odds_chosen": 10.792014122009277, "log_odds_ratio": -0.0001391873083775863, "logits/chosen": -0.6205494999885559, "logits/rejected": -0.654026210308075, "logps/chosen": -0.00013583633699454367, "logps/rejected": -1.9432293176651, "loss": 0.3645, "nll_loss": 0.09111414849758148, "rewards/accuracies": 1.0, "rewards/chosen": -1.358363533654483e-05, "rewards/margins": 0.19430936872959137, "rewards/rejected": -0.19432294368743896, "step": 13364 }, { "epoch": 9.242738589211617, "grad_norm": 2.394958257675171, "learning_rate": 4.207007837713232e-06, "log_odds_chosen": 10.103708267211914, "log_odds_ratio": -0.00018401000124868006, "logits/chosen": -0.25698402523994446, "logits/rejected": -0.341422975063324, "logps/chosen": -0.0002892519405577332, "logps/rejected": -1.5689125061035156, "loss": 0.2296, "nll_loss": 0.05737714469432831, "rewards/accuracies": 1.0, "rewards/chosen": -2.892519478336908e-05, "rewards/margins": 0.15686233341693878, "rewards/rejected": -0.15689125657081604, "step": 13365 }, { "epoch": 9.243430152143844, "grad_norm": 3.7048909664154053, "learning_rate": 4.203165821423083e-06, "log_odds_chosen": 10.272027015686035, "log_odds_ratio": -0.00011471907782834023, "logits/chosen": -0.7627463936805725, "logits/rejected": -0.7447300553321838, "logps/chosen": -0.0012812871718779206, "logps/rejected": -2.3495349884033203, "loss": 0.3925, "nll_loss": 0.09810735285282135, "rewards/accuracies": 1.0, "rewards/chosen": -0.00012812871136702597, "rewards/margins": 0.23482537269592285, "rewards/rejected": -0.23495349287986755, "step": 13366 }, { "epoch": 9.244121715076071, "grad_norm": 3.1079623699188232, "learning_rate": 4.199323805132934e-06, "log_odds_chosen": 11.256549835205078, "log_odds_ratio": -8.348859410034493e-05, "logits/chosen": -0.49389970302581787, "logits/rejected": -0.5464988946914673, "logps/chosen": -0.0013827980728819966, "logps/rejected": -2.9009451866149902, "loss": 0.3342, "nll_loss": 0.08353433012962341, "rewards/accuracies": 1.0, "rewards/chosen": -0.0001382798218401149, "rewards/margins": 0.2899562418460846, "rewards/rejected": -0.2900945246219635, "step": 13367 }, { "epoch": 9.244813278008298, "grad_norm": 2.8402726650238037, "learning_rate": 4.1954817888427846e-06, "log_odds_chosen": 10.971306800842285, "log_odds_ratio": -7.381623436231166e-05, "logits/chosen": 0.45187243819236755, "logits/rejected": 0.08216936886310577, "logps/chosen": -0.0003103939234279096, "logps/rejected": -2.2004811763763428, "loss": 0.2843, "nll_loss": 0.071077361702919, "rewards/accuracies": 1.0, "rewards/chosen": -3.10393916151952e-05, "rewards/margins": 0.22001707553863525, "rewards/rejected": -0.22004812955856323, "step": 13368 }, { "epoch": 9.245504840940525, "grad_norm": 3.845113515853882, "learning_rate": 4.191639772552636e-06, "log_odds_chosen": 12.36893081665039, "log_odds_ratio": -8.689417882123962e-06, "logits/chosen": -0.8230301141738892, "logits/rejected": -0.818099856376648, "logps/chosen": -0.00011422030365793034, "logps/rejected": -2.9345250129699707, "loss": 0.4345, "nll_loss": 0.10861419141292572, "rewards/accuracies": 1.0, "rewards/chosen": -1.1422031093388796e-05, "rewards/margins": 0.29344111680984497, "rewards/rejected": -0.29345250129699707, "step": 13369 }, { "epoch": 9.246196403872752, "grad_norm": 3.3559730052948, "learning_rate": 4.187797756262487e-06, "log_odds_chosen": 11.219395637512207, "log_odds_ratio": -6.94270056555979e-05, "logits/chosen": -0.12866298854351044, "logits/rejected": -0.1472199559211731, "logps/chosen": -0.00026041181990876794, "logps/rejected": -2.4518699645996094, "loss": 0.2824, "nll_loss": 0.07060521841049194, "rewards/accuracies": 1.0, "rewards/chosen": -2.6041183446068317e-05, "rewards/margins": 0.24516098201274872, "rewards/rejected": -0.24518701434135437, "step": 13370 }, { "epoch": 9.24688796680498, "grad_norm": 2.970355749130249, "learning_rate": 4.183955739972338e-06, "log_odds_chosen": 11.566314697265625, "log_odds_ratio": -2.120799763360992e-05, "logits/chosen": -0.33343368768692017, "logits/rejected": -0.4095820486545563, "logps/chosen": -0.0002107325999531895, "logps/rejected": -2.442662477493286, "loss": 0.3911, "nll_loss": 0.0977800190448761, "rewards/accuracies": 1.0, "rewards/chosen": -2.107325963152107e-05, "rewards/margins": 0.24424517154693604, "rewards/rejected": -0.24426622688770294, "step": 13371 }, { "epoch": 9.247579529737205, "grad_norm": 4.310222625732422, "learning_rate": 4.180113723682188e-06, "log_odds_chosen": 10.756083488464355, "log_odds_ratio": -8.993431401904672e-05, "logits/chosen": -0.3720986247062683, "logits/rejected": -0.4264030456542969, "logps/chosen": -0.0008469214662909508, "logps/rejected": -2.52754807472229, "loss": 0.4429, "nll_loss": 0.11071190237998962, "rewards/accuracies": 1.0, "rewards/chosen": -8.469213935313746e-05, "rewards/margins": 0.25267013907432556, "rewards/rejected": -0.252754807472229, "step": 13372 }, { "epoch": 9.248271092669434, "grad_norm": 4.2071428298950195, "learning_rate": 4.17627170739204e-06, "log_odds_chosen": 12.32644271850586, "log_odds_ratio": -9.070672604138963e-06, "logits/chosen": -0.4278135895729065, "logits/rejected": -0.4966719150543213, "logps/chosen": -0.00012434230302460492, "logps/rejected": -3.097768783569336, "loss": 0.4411, "nll_loss": 0.11027605831623077, "rewards/accuracies": 1.0, "rewards/chosen": -1.2434231393854134e-05, "rewards/margins": 0.30976441502571106, "rewards/rejected": -0.3097768723964691, "step": 13373 }, { "epoch": 9.248962655601659, "grad_norm": 3.6200366020202637, "learning_rate": 4.17242969110189e-06, "log_odds_chosen": 11.759748458862305, "log_odds_ratio": -1.9820596207864583e-05, "logits/chosen": -0.36331379413604736, "logits/rejected": -0.3966585695743561, "logps/chosen": -0.00013985123950988054, "logps/rejected": -2.560699462890625, "loss": 0.4649, "nll_loss": 0.11621088534593582, "rewards/accuracies": 1.0, "rewards/chosen": -1.3985122677695472e-05, "rewards/margins": 0.25605595111846924, "rewards/rejected": -0.25606992840766907, "step": 13374 }, { "epoch": 9.249654218533887, "grad_norm": 3.5906012058258057, "learning_rate": 4.1685876748117415e-06, "log_odds_chosen": 10.80752944946289, "log_odds_ratio": -0.0004959495854564011, "logits/chosen": -0.7887055277824402, "logits/rejected": -0.6762387156486511, "logps/chosen": -0.00022910605184733868, "logps/rejected": -2.1070122718811035, "loss": 0.3611, "nll_loss": 0.09021367132663727, "rewards/accuracies": 1.0, "rewards/chosen": -2.2910604457138106e-05, "rewards/margins": 0.2106783092021942, "rewards/rejected": -0.21070122718811035, "step": 13375 }, { "epoch": 9.250345781466113, "grad_norm": 2.9550132751464844, "learning_rate": 4.164745658521592e-06, "log_odds_chosen": 11.08054256439209, "log_odds_ratio": -3.7290137697709724e-05, "logits/chosen": -0.44283416867256165, "logits/rejected": -0.4498756229877472, "logps/chosen": -0.00014053418999537826, "logps/rejected": -1.827393889427185, "loss": 0.313, "nll_loss": 0.07825109362602234, "rewards/accuracies": 1.0, "rewards/chosen": -1.405342118232511e-05, "rewards/margins": 0.18272534012794495, "rewards/rejected": -0.18273937702178955, "step": 13376 }, { "epoch": 9.251037344398341, "grad_norm": 3.1746561527252197, "learning_rate": 4.160903642231443e-06, "log_odds_chosen": 11.631880760192871, "log_odds_ratio": -2.470656909281388e-05, "logits/chosen": -0.30462899804115295, "logits/rejected": -0.4730372428894043, "logps/chosen": -0.0001536312629468739, "logps/rejected": -2.7632954120635986, "loss": 0.3668, "nll_loss": 0.09170671552419662, "rewards/accuracies": 1.0, "rewards/chosen": -1.536312629468739e-05, "rewards/margins": 0.2763141691684723, "rewards/rejected": -0.27632951736450195, "step": 13377 }, { "epoch": 9.251728907330566, "grad_norm": 4.26964807510376, "learning_rate": 4.157061625941295e-06, "log_odds_chosen": 10.886106491088867, "log_odds_ratio": -5.6561413657618687e-05, "logits/chosen": -0.2692357897758484, "logits/rejected": -0.25641119480133057, "logps/chosen": -0.001114910002797842, "logps/rejected": -2.8041157722473145, "loss": 0.4504, "nll_loss": 0.11258277297019958, "rewards/accuracies": 1.0, "rewards/chosen": -0.00011149099736940116, "rewards/margins": 0.2803000807762146, "rewards/rejected": -0.28041160106658936, "step": 13378 }, { "epoch": 9.252420470262795, "grad_norm": 2.7620761394500732, "learning_rate": 4.153219609651145e-06, "log_odds_chosen": 11.351924896240234, "log_odds_ratio": -8.459114178549498e-05, "logits/chosen": -0.6529866456985474, "logits/rejected": -0.5455139875411987, "logps/chosen": -0.00045952797518111765, "logps/rejected": -2.823627233505249, "loss": 0.3881, "nll_loss": 0.09702349454164505, "rewards/accuracies": 1.0, "rewards/chosen": -4.595279824570753e-05, "rewards/margins": 0.28231680393218994, "rewards/rejected": -0.2823627293109894, "step": 13379 }, { "epoch": 9.25311203319502, "grad_norm": 6.673590183258057, "learning_rate": 4.149377593360996e-06, "log_odds_chosen": 11.442913055419922, "log_odds_ratio": -6.825349555583671e-05, "logits/chosen": -0.5800237655639648, "logits/rejected": -0.6468181610107422, "logps/chosen": -8.98464786587283e-05, "logps/rejected": -2.273469924926758, "loss": 0.4706, "nll_loss": 0.11765521764755249, "rewards/accuracies": 1.0, "rewards/chosen": -8.98464804777177e-06, "rewards/margins": 0.2273380309343338, "rewards/rejected": -0.2273470014333725, "step": 13380 }, { "epoch": 9.253803596127248, "grad_norm": 3.455115556716919, "learning_rate": 4.145535577070847e-06, "log_odds_chosen": 11.363089561462402, "log_odds_ratio": -1.7025658962666057e-05, "logits/chosen": -0.21496909856796265, "logits/rejected": -0.279478520154953, "logps/chosen": -0.00015081878518685699, "logps/rejected": -2.4104347229003906, "loss": 0.4615, "nll_loss": 0.11536456644535065, "rewards/accuracies": 1.0, "rewards/chosen": -1.508187961007934e-05, "rewards/margins": 0.24102838337421417, "rewards/rejected": -0.24104347825050354, "step": 13381 }, { "epoch": 9.254495159059474, "grad_norm": 11.020970344543457, "learning_rate": 4.1416935607806984e-06, "log_odds_chosen": 12.45634651184082, "log_odds_ratio": -2.0810161004192196e-05, "logits/chosen": -0.5012654662132263, "logits/rejected": -0.5107147097587585, "logps/chosen": -0.0001061395087162964, "logps/rejected": -3.116454601287842, "loss": 0.326, "nll_loss": 0.08149020373821259, "rewards/accuracies": 1.0, "rewards/chosen": -1.0613951417326462e-05, "rewards/margins": 0.3116348385810852, "rewards/rejected": -0.3116454482078552, "step": 13382 }, { "epoch": 9.255186721991702, "grad_norm": 2.0821995735168457, "learning_rate": 4.137851544490548e-06, "log_odds_chosen": 11.173574447631836, "log_odds_ratio": -0.00016994534234981984, "logits/chosen": -0.4637315273284912, "logits/rejected": -0.48671579360961914, "logps/chosen": -0.0007016340969130397, "logps/rejected": -2.4009807109832764, "loss": 0.2577, "nll_loss": 0.06439636647701263, "rewards/accuracies": 1.0, "rewards/chosen": -7.01634053257294e-05, "rewards/margins": 0.24002791941165924, "rewards/rejected": -0.24009808897972107, "step": 13383 }, { "epoch": 9.255878284923927, "grad_norm": 3.860811948776245, "learning_rate": 4.1340095282004e-06, "log_odds_chosen": 11.584202766418457, "log_odds_ratio": -3.2246229238808155e-05, "logits/chosen": -0.4585683345794678, "logits/rejected": -0.49702373147010803, "logps/chosen": -6.798960384912789e-05, "logps/rejected": -1.8565504550933838, "loss": 0.4233, "nll_loss": 0.10581757873296738, "rewards/accuracies": 1.0, "rewards/chosen": -6.798960384912789e-06, "rewards/margins": 0.18564824759960175, "rewards/rejected": -0.18565505743026733, "step": 13384 }, { "epoch": 9.256569847856156, "grad_norm": 3.1565005779266357, "learning_rate": 4.130167511910251e-06, "log_odds_chosen": 10.394317626953125, "log_odds_ratio": -9.309701272286475e-05, "logits/chosen": -0.19676190614700317, "logits/rejected": -0.27044621109962463, "logps/chosen": -0.0005752279539592564, "logps/rejected": -2.072195053100586, "loss": 0.3627, "nll_loss": 0.09065388888120651, "rewards/accuracies": 1.0, "rewards/chosen": -5.7522796851117164e-05, "rewards/margins": 0.20716197788715363, "rewards/rejected": -0.20721949636936188, "step": 13385 }, { "epoch": 9.25726141078838, "grad_norm": 5.39926815032959, "learning_rate": 4.1263254956201014e-06, "log_odds_chosen": 10.37912368774414, "log_odds_ratio": -0.0001625988370506093, "logits/chosen": -0.24881801009178162, "logits/rejected": -0.22858017683029175, "logps/chosen": -0.00021170491527300328, "logps/rejected": -1.6691802740097046, "loss": 0.4196, "nll_loss": 0.10487478971481323, "rewards/accuracies": 1.0, "rewards/chosen": -2.117049189109821e-05, "rewards/margins": 0.16689686477184296, "rewards/rejected": -0.16691802442073822, "step": 13386 }, { "epoch": 9.25795297372061, "grad_norm": 3.983370304107666, "learning_rate": 4.122483479329953e-06, "log_odds_chosen": 10.968613624572754, "log_odds_ratio": -3.0124385375529528e-05, "logits/chosen": -0.5946142673492432, "logits/rejected": -0.35130080580711365, "logps/chosen": -0.00022189400624483824, "logps/rejected": -2.337252616882324, "loss": 0.7163, "nll_loss": 0.179067462682724, "rewards/accuracies": 1.0, "rewards/chosen": -2.2189400624483824e-05, "rewards/margins": 0.23370307683944702, "rewards/rejected": -0.23372526466846466, "step": 13387 }, { "epoch": 9.258644536652836, "grad_norm": 3.8471121788024902, "learning_rate": 4.118641463039803e-06, "log_odds_chosen": 9.967870712280273, "log_odds_ratio": -0.00011659861047519371, "logits/chosen": -0.24749401211738586, "logits/rejected": -0.2510087788105011, "logps/chosen": -0.0004633513162843883, "logps/rejected": -1.5519118309020996, "loss": 0.2452, "nll_loss": 0.061281684786081314, "rewards/accuracies": 1.0, "rewards/chosen": -4.633513162843883e-05, "rewards/margins": 0.1551448553800583, "rewards/rejected": -0.15519118309020996, "step": 13388 }, { "epoch": 9.259336099585063, "grad_norm": 3.9011762142181396, "learning_rate": 4.1147994467496545e-06, "log_odds_chosen": 10.798189163208008, "log_odds_ratio": -5.881582910660654e-05, "logits/chosen": 0.12064599990844727, "logits/rejected": 0.006398455239832401, "logps/chosen": -0.0001932132727233693, "logps/rejected": -1.985304832458496, "loss": 0.4197, "nll_loss": 0.10492676496505737, "rewards/accuracies": 1.0, "rewards/chosen": -1.932132727233693e-05, "rewards/margins": 0.19851118326187134, "rewards/rejected": -0.19853049516677856, "step": 13389 }, { "epoch": 9.26002766251729, "grad_norm": 2.5057811737060547, "learning_rate": 4.110957430459505e-06, "log_odds_chosen": 10.980243682861328, "log_odds_ratio": -2.486979792593047e-05, "logits/chosen": -0.4247814416885376, "logits/rejected": -0.4757453501224518, "logps/chosen": -0.00019985140534117818, "logps/rejected": -2.2650866508483887, "loss": 0.2637, "nll_loss": 0.06593403965234756, "rewards/accuracies": 1.0, "rewards/chosen": -1.9985140170319937e-05, "rewards/margins": 0.22648866474628448, "rewards/rejected": -0.22650866210460663, "step": 13390 }, { "epoch": 9.260719225449517, "grad_norm": 3.2654213905334473, "learning_rate": 4.107115414169356e-06, "log_odds_chosen": 11.39018726348877, "log_odds_ratio": -7.856798765715212e-05, "logits/chosen": -0.42784425616264343, "logits/rejected": -0.4002974033355713, "logps/chosen": -0.00047765413182787597, "logps/rejected": -3.1111772060394287, "loss": 0.3503, "nll_loss": 0.08757692575454712, "rewards/accuracies": 1.0, "rewards/chosen": -4.7765417548362166e-05, "rewards/margins": 0.31106996536254883, "rewards/rejected": -0.3111177086830139, "step": 13391 }, { "epoch": 9.261410788381744, "grad_norm": 4.967043399810791, "learning_rate": 4.103273397879208e-06, "log_odds_chosen": 10.794997215270996, "log_odds_ratio": -3.917513458873145e-05, "logits/chosen": -0.70682692527771, "logits/rejected": -0.7203770875930786, "logps/chosen": -0.00022574425383936614, "logps/rejected": -2.185096263885498, "loss": 0.7308, "nll_loss": 0.18269097805023193, "rewards/accuracies": 1.0, "rewards/chosen": -2.2574426111532375e-05, "rewards/margins": 0.21848703920841217, "rewards/rejected": -0.21850961446762085, "step": 13392 }, { "epoch": 9.26210235131397, "grad_norm": 3.9451189041137695, "learning_rate": 4.099431381589058e-06, "log_odds_chosen": 10.230953216552734, "log_odds_ratio": -8.301199704874307e-05, "logits/chosen": -0.3331199288368225, "logits/rejected": -0.3650832176208496, "logps/chosen": -0.0002603030006866902, "logps/rejected": -1.8632946014404297, "loss": 0.4404, "nll_loss": 0.11009424924850464, "rewards/accuracies": 1.0, "rewards/chosen": -2.6030296794488095e-05, "rewards/margins": 0.18630343675613403, "rewards/rejected": -0.18632946908473969, "step": 13393 }, { "epoch": 9.262793914246197, "grad_norm": 3.347769260406494, "learning_rate": 4.095589365298909e-06, "log_odds_chosen": 10.683270454406738, "log_odds_ratio": -0.0002262951893499121, "logits/chosen": -0.3697054386138916, "logits/rejected": -0.37479496002197266, "logps/chosen": -0.0003731002798303962, "logps/rejected": -2.1226487159729004, "loss": 0.3429, "nll_loss": 0.08570367842912674, "rewards/accuracies": 1.0, "rewards/chosen": -3.73100301658269e-05, "rewards/margins": 0.21222756803035736, "rewards/rejected": -0.21226486563682556, "step": 13394 }, { "epoch": 9.263485477178424, "grad_norm": 3.057373523712158, "learning_rate": 4.09174734900876e-06, "log_odds_chosen": 11.90007495880127, "log_odds_ratio": -9.3462695076596e-06, "logits/chosen": -0.3630257248878479, "logits/rejected": -0.3154976963996887, "logps/chosen": -0.0001326186174992472, "logps/rejected": -2.3629908561706543, "loss": 0.3556, "nll_loss": 0.08889217674732208, "rewards/accuracies": 1.0, "rewards/chosen": -1.3261863387015183e-05, "rewards/margins": 0.23628583550453186, "rewards/rejected": -0.2362990826368332, "step": 13395 }, { "epoch": 9.264177040110651, "grad_norm": 3.1586709022521973, "learning_rate": 4.0879053327186115e-06, "log_odds_chosen": 12.43212890625, "log_odds_ratio": -1.1175688996445388e-05, "logits/chosen": -0.3281235694885254, "logits/rejected": -0.36359724402427673, "logps/chosen": -0.00022547683329321444, "logps/rejected": -3.7944984436035156, "loss": 0.4362, "nll_loss": 0.10905685275793076, "rewards/accuracies": 1.0, "rewards/chosen": -2.2547683329321444e-05, "rewards/margins": 0.3794272840023041, "rewards/rejected": -0.37944984436035156, "step": 13396 }, { "epoch": 9.264868603042878, "grad_norm": 3.0777478218078613, "learning_rate": 4.084063316428461e-06, "log_odds_chosen": 11.970759391784668, "log_odds_ratio": -1.6800740922917612e-05, "logits/chosen": -0.4665202796459198, "logits/rejected": -0.6135318279266357, "logps/chosen": -0.00027859132387675345, "logps/rejected": -3.044900894165039, "loss": 0.2766, "nll_loss": 0.06914292275905609, "rewards/accuracies": 1.0, "rewards/chosen": -2.785913056868594e-05, "rewards/margins": 0.3044622540473938, "rewards/rejected": -0.3044900894165039, "step": 13397 }, { "epoch": 9.265560165975105, "grad_norm": 3.3075602054595947, "learning_rate": 4.080221300138313e-06, "log_odds_chosen": 11.618972778320312, "log_odds_ratio": -1.8447863112669438e-05, "logits/chosen": -0.35420989990234375, "logits/rejected": -0.3924306333065033, "logps/chosen": -8.088632603175938e-05, "logps/rejected": -2.1522703170776367, "loss": 0.3113, "nll_loss": 0.07781346887350082, "rewards/accuracies": 1.0, "rewards/chosen": -8.088632966973819e-06, "rewards/margins": 0.21521896123886108, "rewards/rejected": -0.21522706747055054, "step": 13398 }, { "epoch": 9.266251728907331, "grad_norm": 3.825265407562256, "learning_rate": 4.076379283848164e-06, "log_odds_chosen": 10.867203712463379, "log_odds_ratio": -0.00012594895088113844, "logits/chosen": -0.16059978306293488, "logits/rejected": -0.40944528579711914, "logps/chosen": -0.0003734501078724861, "logps/rejected": -2.837820291519165, "loss": 0.4377, "nll_loss": 0.10940054059028625, "rewards/accuracies": 1.0, "rewards/chosen": -3.734500933205709e-05, "rewards/margins": 0.2837446928024292, "rewards/rejected": -0.283782035112381, "step": 13399 }, { "epoch": 9.266943291839558, "grad_norm": 4.893101692199707, "learning_rate": 4.0725372675580145e-06, "log_odds_chosen": 10.198366165161133, "log_odds_ratio": -0.0001832096022553742, "logits/chosen": 0.3699157238006592, "logits/rejected": 0.3098328113555908, "logps/chosen": -0.000413937697885558, "logps/rejected": -1.8508220911026, "loss": 0.4808, "nll_loss": 0.12018544971942902, "rewards/accuracies": 1.0, "rewards/chosen": -4.139377051615156e-05, "rewards/margins": 0.18504083156585693, "rewards/rejected": -0.18508222699165344, "step": 13400 }, { "epoch": 9.267634854771785, "grad_norm": 4.754893779754639, "learning_rate": 4.068695251267866e-06, "log_odds_chosen": 10.338882446289062, "log_odds_ratio": -0.00017796538304537535, "logits/chosen": -0.13030719757080078, "logits/rejected": -0.13309229910373688, "logps/chosen": -0.0021453266963362694, "logps/rejected": -2.7111756801605225, "loss": 0.5487, "nll_loss": 0.13715097308158875, "rewards/accuracies": 1.0, "rewards/chosen": -0.00021453265799209476, "rewards/margins": 0.2709030508995056, "rewards/rejected": -0.27111756801605225, "step": 13401 }, { "epoch": 9.268326417704012, "grad_norm": 3.9337031841278076, "learning_rate": 4.064853234977716e-06, "log_odds_chosen": 10.759675979614258, "log_odds_ratio": -3.709693919518031e-05, "logits/chosen": -0.3434372842311859, "logits/rejected": -0.2868998944759369, "logps/chosen": -0.0002389464934822172, "logps/rejected": -2.3696932792663574, "loss": 0.251, "nll_loss": 0.06274402141571045, "rewards/accuracies": 1.0, "rewards/chosen": -2.3894648620625958e-05, "rewards/margins": 0.23694540560245514, "rewards/rejected": -0.23696933686733246, "step": 13402 }, { "epoch": 9.269017980636239, "grad_norm": 3.7900757789611816, "learning_rate": 4.0610112186875676e-06, "log_odds_chosen": 10.866965293884277, "log_odds_ratio": -2.9216182156233117e-05, "logits/chosen": -0.07639175653457642, "logits/rejected": -0.0732036828994751, "logps/chosen": -0.00033576946589164436, "logps/rejected": -2.242311477661133, "loss": 0.3549, "nll_loss": 0.08871279656887054, "rewards/accuracies": 1.0, "rewards/chosen": -3.35769473167602e-05, "rewards/margins": 0.22419756650924683, "rewards/rejected": -0.22423113882541656, "step": 13403 }, { "epoch": 9.269709543568466, "grad_norm": 3.4664368629455566, "learning_rate": 4.057169202397418e-06, "log_odds_chosen": 11.006653785705566, "log_odds_ratio": -6.016073530190624e-05, "logits/chosen": -0.18965166807174683, "logits/rejected": -0.1772252321243286, "logps/chosen": -9.795861114980653e-05, "logps/rejected": -1.912332534790039, "loss": 0.3006, "nll_loss": 0.07513882219791412, "rewards/accuracies": 1.0, "rewards/chosen": -9.795860933081713e-06, "rewards/margins": 0.19122344255447388, "rewards/rejected": -0.19123321771621704, "step": 13404 }, { "epoch": 9.270401106500692, "grad_norm": 3.510704278945923, "learning_rate": 4.053327186107269e-06, "log_odds_chosen": 12.442310333251953, "log_odds_ratio": -7.347447535721585e-06, "logits/chosen": -0.5683971643447876, "logits/rejected": -0.56528240442276, "logps/chosen": -0.00013179892266634852, "logps/rejected": -3.196437120437622, "loss": 0.4168, "nll_loss": 0.10418683290481567, "rewards/accuracies": 1.0, "rewards/chosen": -1.3179893358028494e-05, "rewards/margins": 0.31963056325912476, "rewards/rejected": -0.3196437358856201, "step": 13405 }, { "epoch": 9.27109266943292, "grad_norm": 3.9302003383636475, "learning_rate": 4.04948516981712e-06, "log_odds_chosen": 12.506831169128418, "log_odds_ratio": -2.83784611383453e-05, "logits/chosen": 0.01308729313313961, "logits/rejected": -0.02067945897579193, "logps/chosen": -0.00013831471733283252, "logps/rejected": -2.956017017364502, "loss": 0.442, "nll_loss": 0.11050447821617126, "rewards/accuracies": 1.0, "rewards/chosen": -1.3831473552272655e-05, "rewards/margins": 0.2955878973007202, "rewards/rejected": -0.2956017255783081, "step": 13406 }, { "epoch": 9.271784232365146, "grad_norm": 4.038262844085693, "learning_rate": 4.045643153526971e-06, "log_odds_chosen": 10.31809139251709, "log_odds_ratio": -7.249046757351607e-05, "logits/chosen": -0.2748219966888428, "logits/rejected": -0.2382151484489441, "logps/chosen": -0.0004780899325851351, "logps/rejected": -1.9250624179840088, "loss": 0.2966, "nll_loss": 0.07414360344409943, "rewards/accuracies": 1.0, "rewards/chosen": -4.780899689649232e-05, "rewards/margins": 0.19245845079421997, "rewards/rejected": -0.19250623881816864, "step": 13407 }, { "epoch": 9.272475795297373, "grad_norm": 2.4974985122680664, "learning_rate": 4.041801137236822e-06, "log_odds_chosen": 10.449653625488281, "log_odds_ratio": -0.00027347650029696524, "logits/chosen": -0.08837146311998367, "logits/rejected": -0.08992377668619156, "logps/chosen": -0.0009034351096488535, "logps/rejected": -1.8143136501312256, "loss": 0.3107, "nll_loss": 0.07764752954244614, "rewards/accuracies": 1.0, "rewards/chosen": -9.03435138752684e-05, "rewards/margins": 0.1813410371541977, "rewards/rejected": -0.181431382894516, "step": 13408 }, { "epoch": 9.2731673582296, "grad_norm": 2.697535991668701, "learning_rate": 4.037959120946673e-06, "log_odds_chosen": 11.835368156433105, "log_odds_ratio": -2.7249665436102077e-05, "logits/chosen": -0.5644937753677368, "logits/rejected": -0.5307819843292236, "logps/chosen": -0.00026464249822311103, "logps/rejected": -2.5975027084350586, "loss": 0.3318, "nll_loss": 0.08295246958732605, "rewards/accuracies": 1.0, "rewards/chosen": -2.6464249458513223e-05, "rewards/margins": 0.25972384214401245, "rewards/rejected": -0.2597503066062927, "step": 13409 }, { "epoch": 9.273858921161827, "grad_norm": 4.584612846374512, "learning_rate": 4.0341171046565245e-06, "log_odds_chosen": 11.347679138183594, "log_odds_ratio": -1.986040297197178e-05, "logits/chosen": 0.06065264344215393, "logits/rejected": -0.03498959168791771, "logps/chosen": -0.00016907777171581984, "logps/rejected": -2.6138739585876465, "loss": 0.4471, "nll_loss": 0.11177139729261398, "rewards/accuracies": 1.0, "rewards/chosen": -1.6907777535379864e-05, "rewards/margins": 0.2613704800605774, "rewards/rejected": -0.2613874077796936, "step": 13410 }, { "epoch": 9.274550484094053, "grad_norm": 2.4704439640045166, "learning_rate": 4.030275088366374e-06, "log_odds_chosen": 10.868306159973145, "log_odds_ratio": -6.289570592343807e-05, "logits/chosen": -0.2185719907283783, "logits/rejected": -0.2263566255569458, "logps/chosen": -0.0001450084673706442, "logps/rejected": -1.8913393020629883, "loss": 0.2828, "nll_loss": 0.07068517059087753, "rewards/accuracies": 1.0, "rewards/chosen": -1.45008461913676e-05, "rewards/margins": 0.18911944329738617, "rewards/rejected": -0.1891339123249054, "step": 13411 }, { "epoch": 9.27524204702628, "grad_norm": 2.365954637527466, "learning_rate": 4.026433072076226e-06, "log_odds_chosen": 11.505110740661621, "log_odds_ratio": -3.883875615429133e-05, "logits/chosen": -0.2714047133922577, "logits/rejected": -0.2261641025543213, "logps/chosen": -0.00017098369426093996, "logps/rejected": -2.8133089542388916, "loss": 0.2827, "nll_loss": 0.07066500186920166, "rewards/accuracies": 1.0, "rewards/chosen": -1.7098369426093996e-05, "rewards/margins": 0.28131380677223206, "rewards/rejected": -0.2813309133052826, "step": 13412 }, { "epoch": 9.275933609958507, "grad_norm": 3.3742008209228516, "learning_rate": 4.022591055786077e-06, "log_odds_chosen": 12.555534362792969, "log_odds_ratio": -2.3210215658764355e-05, "logits/chosen": -0.03291553258895874, "logits/rejected": -0.1688704490661621, "logps/chosen": -0.00019250065088272095, "logps/rejected": -3.8435990810394287, "loss": 0.392, "nll_loss": 0.09799201786518097, "rewards/accuracies": 1.0, "rewards/chosen": -1.9250066543463618e-05, "rewards/margins": 0.38434064388275146, "rewards/rejected": -0.3843598961830139, "step": 13413 }, { "epoch": 9.276625172890734, "grad_norm": 1.7878432273864746, "learning_rate": 4.0187490394959275e-06, "log_odds_chosen": 11.162397384643555, "log_odds_ratio": -0.0003439478459767997, "logits/chosen": -0.7669256925582886, "logits/rejected": -0.7717230319976807, "logps/chosen": -0.00041456997860223055, "logps/rejected": -2.3134167194366455, "loss": 0.2227, "nll_loss": 0.05564933270215988, "rewards/accuracies": 1.0, "rewards/chosen": -4.1457002225797623e-05, "rewards/margins": 0.2313002347946167, "rewards/rejected": -0.2313416749238968, "step": 13414 }, { "epoch": 9.27731673582296, "grad_norm": 2.39280104637146, "learning_rate": 4.014907023205779e-06, "log_odds_chosen": 10.64444351196289, "log_odds_ratio": -8.962116407928988e-05, "logits/chosen": -0.4091259241104126, "logits/rejected": -0.4069325923919678, "logps/chosen": -0.00017965941515285522, "logps/rejected": -1.992423415184021, "loss": 0.2297, "nll_loss": 0.05740624666213989, "rewards/accuracies": 1.0, "rewards/chosen": -1.7965941879083402e-05, "rewards/margins": 0.19922436773777008, "rewards/rejected": -0.19924233853816986, "step": 13415 }, { "epoch": 9.278008298755188, "grad_norm": 3.6200830936431885, "learning_rate": 4.01106500691563e-06, "log_odds_chosen": 10.999204635620117, "log_odds_ratio": -0.00010778568685054779, "logits/chosen": -0.10634081065654755, "logits/rejected": -0.11936096847057343, "logps/chosen": -0.0006000410066917539, "logps/rejected": -2.684356689453125, "loss": 0.3256, "nll_loss": 0.08139465004205704, "rewards/accuracies": 1.0, "rewards/chosen": -6.000410212436691e-05, "rewards/margins": 0.2683756649494171, "rewards/rejected": -0.26843565702438354, "step": 13416 }, { "epoch": 9.278699861687414, "grad_norm": 2.941218614578247, "learning_rate": 4.007222990625481e-06, "log_odds_chosen": 11.12997055053711, "log_odds_ratio": -0.003518062410876155, "logits/chosen": -0.28821274638175964, "logits/rejected": -0.33975598216056824, "logps/chosen": -0.002335771918296814, "logps/rejected": -2.54252290725708, "loss": 0.2761, "nll_loss": 0.0686764121055603, "rewards/accuracies": 1.0, "rewards/chosen": -0.00023357720056083053, "rewards/margins": 0.25401872396469116, "rewards/rejected": -0.25425228476524353, "step": 13417 }, { "epoch": 9.279391424619641, "grad_norm": 2.9035792350769043, "learning_rate": 4.003380974335331e-06, "log_odds_chosen": 10.532563209533691, "log_odds_ratio": -0.0002298601029906422, "logits/chosen": -0.0022521987557411194, "logits/rejected": 0.034229494631290436, "logps/chosen": -0.0010790006490424275, "logps/rejected": -2.0970377922058105, "loss": 0.3064, "nll_loss": 0.07658690214157104, "rewards/accuracies": 1.0, "rewards/chosen": -0.00010790007945615798, "rewards/margins": 0.20959590375423431, "rewards/rejected": -0.20970380306243896, "step": 13418 }, { "epoch": 9.280082987551868, "grad_norm": 3.781830310821533, "learning_rate": 3.999538958045182e-06, "log_odds_chosen": 11.045536994934082, "log_odds_ratio": -7.346242637140676e-05, "logits/chosen": -0.298697829246521, "logits/rejected": -0.25931185483932495, "logps/chosen": -0.000305239693261683, "logps/rejected": -2.1760785579681396, "loss": 0.2925, "nll_loss": 0.07311448454856873, "rewards/accuracies": 1.0, "rewards/chosen": -3.052396641578525e-05, "rewards/margins": 0.21757732331752777, "rewards/rejected": -0.21760785579681396, "step": 13419 }, { "epoch": 9.280774550484095, "grad_norm": 3.483137607574463, "learning_rate": 3.995696941755033e-06, "log_odds_chosen": 10.634360313415527, "log_odds_ratio": -3.846009713015519e-05, "logits/chosen": 0.11051935702562332, "logits/rejected": 0.08204380422830582, "logps/chosen": -8.226620411733165e-05, "logps/rejected": -1.5227370262145996, "loss": 0.2834, "nll_loss": 0.07085447758436203, "rewards/accuracies": 1.0, "rewards/chosen": -8.226620593632106e-06, "rewards/margins": 0.15226547420024872, "rewards/rejected": -0.15227369964122772, "step": 13420 }, { "epoch": 9.281466113416322, "grad_norm": 1.9649827480316162, "learning_rate": 3.9918549254648844e-06, "log_odds_chosen": 9.993119239807129, "log_odds_ratio": -0.0001120996312238276, "logits/chosen": -0.22370699048042297, "logits/rejected": -0.3054266571998596, "logps/chosen": -0.00021666797692887485, "logps/rejected": -1.4185731410980225, "loss": 0.1858, "nll_loss": 0.046433914452791214, "rewards/accuracies": 1.0, "rewards/chosen": -2.166679951187689e-05, "rewards/margins": 0.14183564484119415, "rewards/rejected": -0.14185731112957, "step": 13421 }, { "epoch": 9.282157676348548, "grad_norm": 4.2012104988098145, "learning_rate": 3.988012909174735e-06, "log_odds_chosen": 10.43075180053711, "log_odds_ratio": -0.0003460758307483047, "logits/chosen": -0.3779338598251343, "logits/rejected": -0.47758790850639343, "logps/chosen": -0.000670717447064817, "logps/rejected": -2.635446548461914, "loss": 0.927, "nll_loss": 0.23170319199562073, "rewards/accuracies": 1.0, "rewards/chosen": -6.707174179609865e-05, "rewards/margins": 0.2634775936603546, "rewards/rejected": -0.26354464888572693, "step": 13422 }, { "epoch": 9.282849239280775, "grad_norm": 3.3624515533447266, "learning_rate": 3.984170892884586e-06, "log_odds_chosen": 11.48715591430664, "log_odds_ratio": -5.0667844334384426e-05, "logits/chosen": -0.24923592805862427, "logits/rejected": -0.28355079889297485, "logps/chosen": -0.0003556256997399032, "logps/rejected": -2.6016101837158203, "loss": 0.4092, "nll_loss": 0.10228350013494492, "rewards/accuracies": 1.0, "rewards/chosen": -3.5562567063607275e-05, "rewards/margins": 0.26012542843818665, "rewards/rejected": -0.26016098260879517, "step": 13423 }, { "epoch": 9.283540802213002, "grad_norm": 3.0172131061553955, "learning_rate": 3.9803288765944375e-06, "log_odds_chosen": 10.21627426147461, "log_odds_ratio": -0.00044269065256230533, "logits/chosen": -0.38946762681007385, "logits/rejected": -0.4330682158470154, "logps/chosen": -0.0002675445284694433, "logps/rejected": -1.7040836811065674, "loss": 0.3104, "nll_loss": 0.07755580544471741, "rewards/accuracies": 1.0, "rewards/chosen": -2.6754454665933736e-05, "rewards/margins": 0.17038163542747498, "rewards/rejected": -0.17040836811065674, "step": 13424 }, { "epoch": 9.284232365145229, "grad_norm": 2.2918384075164795, "learning_rate": 3.976486860304287e-06, "log_odds_chosen": 10.539897918701172, "log_odds_ratio": -0.00033111555967479944, "logits/chosen": -0.2676827013492584, "logits/rejected": -0.28647491335868835, "logps/chosen": -0.0006111696711741388, "logps/rejected": -1.587890625, "loss": 0.2487, "nll_loss": 0.062130894511938095, "rewards/accuracies": 1.0, "rewards/chosen": -6.111696711741388e-05, "rewards/margins": 0.15872792899608612, "rewards/rejected": -0.15878905355930328, "step": 13425 }, { "epoch": 9.284923928077456, "grad_norm": 3.063905715942383, "learning_rate": 3.972644844014139e-06, "log_odds_chosen": 11.524633407592773, "log_odds_ratio": -2.583039713499602e-05, "logits/chosen": -0.5895035266876221, "logits/rejected": -0.6758289337158203, "logps/chosen": -8.647509093862027e-05, "logps/rejected": -2.294323444366455, "loss": 0.3178, "nll_loss": 0.07944169640541077, "rewards/accuracies": 1.0, "rewards/chosen": -8.647508366266266e-06, "rewards/margins": 0.22942368686199188, "rewards/rejected": -0.2294323444366455, "step": 13426 }, { "epoch": 9.285615491009683, "grad_norm": 2.2570669651031494, "learning_rate": 3.96880282772399e-06, "log_odds_chosen": 11.683237075805664, "log_odds_ratio": -2.6745978175313212e-05, "logits/chosen": -0.3830067217350006, "logits/rejected": -0.3945922255516052, "logps/chosen": -0.00012965469795744866, "logps/rejected": -2.250619649887085, "loss": 0.2252, "nll_loss": 0.05630715191364288, "rewards/accuracies": 1.0, "rewards/chosen": -1.2965469068149105e-05, "rewards/margins": 0.2250490039587021, "rewards/rejected": -0.22506198287010193, "step": 13427 }, { "epoch": 9.28630705394191, "grad_norm": 3.58709454536438, "learning_rate": 3.9649608114338405e-06, "log_odds_chosen": 11.472620964050293, "log_odds_ratio": -3.926477438653819e-05, "logits/chosen": 0.07597567141056061, "logits/rejected": -0.1412590742111206, "logps/chosen": -9.301173849962652e-05, "logps/rejected": -1.7634772062301636, "loss": 0.4221, "nll_loss": 0.10552544891834259, "rewards/accuracies": 1.0, "rewards/chosen": -9.301174031861592e-06, "rewards/margins": 0.17633843421936035, "rewards/rejected": -0.17634771764278412, "step": 13428 }, { "epoch": 9.286998616874136, "grad_norm": 3.3155059814453125, "learning_rate": 3.961118795143691e-06, "log_odds_chosen": 11.708847045898438, "log_odds_ratio": -3.0453265935648233e-05, "logits/chosen": -0.400894433259964, "logits/rejected": -0.3575477600097656, "logps/chosen": -0.00024603097699582577, "logps/rejected": -2.225172758102417, "loss": 0.3595, "nll_loss": 0.08986914902925491, "rewards/accuracies": 1.0, "rewards/chosen": -2.4603097699582577e-05, "rewards/margins": 0.22249269485473633, "rewards/rejected": -0.22251728177070618, "step": 13429 }, { "epoch": 9.287690179806363, "grad_norm": 4.786239147186279, "learning_rate": 3.957276778853543e-06, "log_odds_chosen": 12.104966163635254, "log_odds_ratio": -7.745972652628552e-06, "logits/chosen": -0.4614717960357666, "logits/rejected": -0.43710124492645264, "logps/chosen": -0.00013096739712636918, "logps/rejected": -2.6526381969451904, "loss": 0.4942, "nll_loss": 0.12354452908039093, "rewards/accuracies": 1.0, "rewards/chosen": -1.3096740076434799e-05, "rewards/margins": 0.2652507424354553, "rewards/rejected": -0.2652638554573059, "step": 13430 }, { "epoch": 9.28838174273859, "grad_norm": 4.398645401000977, "learning_rate": 3.953434762563394e-06, "log_odds_chosen": 11.697811126708984, "log_odds_ratio": -2.1033920347690582e-05, "logits/chosen": -0.3223825991153717, "logits/rejected": -0.3768712878227234, "logps/chosen": -0.00012502839672379196, "logps/rejected": -2.744093894958496, "loss": 0.7729, "nll_loss": 0.19321200251579285, "rewards/accuracies": 1.0, "rewards/chosen": -1.250284185516648e-05, "rewards/margins": 0.2743968963623047, "rewards/rejected": -0.27440938353538513, "step": 13431 }, { "epoch": 9.289073305670817, "grad_norm": 4.682480812072754, "learning_rate": 3.949592746273244e-06, "log_odds_chosen": 12.311955451965332, "log_odds_ratio": -1.3734586900682189e-05, "logits/chosen": -0.13822859525680542, "logits/rejected": -0.2811431884765625, "logps/chosen": -0.00023069609596859664, "logps/rejected": -3.5613608360290527, "loss": 0.5211, "nll_loss": 0.1302732676267624, "rewards/accuracies": 1.0, "rewards/chosen": -2.3069609596859664e-05, "rewards/margins": 0.3561130464076996, "rewards/rejected": -0.35613611340522766, "step": 13432 }, { "epoch": 9.289764868603044, "grad_norm": 3.0834381580352783, "learning_rate": 3.945750729983096e-06, "log_odds_chosen": 11.342191696166992, "log_odds_ratio": -2.7563957701204345e-05, "logits/chosen": -0.24458184838294983, "logits/rejected": -0.3116031587123871, "logps/chosen": -0.00011202124733245, "logps/rejected": -2.335583209991455, "loss": 0.4059, "nll_loss": 0.10148320347070694, "rewards/accuracies": 1.0, "rewards/chosen": -1.120212436944712e-05, "rewards/margins": 0.2335471212863922, "rewards/rejected": -0.23355832695960999, "step": 13433 }, { "epoch": 9.29045643153527, "grad_norm": 2.614386558532715, "learning_rate": 3.941908713692946e-06, "log_odds_chosen": 10.352871894836426, "log_odds_ratio": -0.00029394158627837896, "logits/chosen": -0.45531344413757324, "logits/rejected": -0.40713274478912354, "logps/chosen": -0.0005957625689916313, "logps/rejected": -1.8678221702575684, "loss": 0.2736, "nll_loss": 0.06837328523397446, "rewards/accuracies": 1.0, "rewards/chosen": -5.957625762675889e-05, "rewards/margins": 0.18672263622283936, "rewards/rejected": -0.18678221106529236, "step": 13434 }, { "epoch": 9.291147994467497, "grad_norm": 3.3317110538482666, "learning_rate": 3.9380666974027975e-06, "log_odds_chosen": 11.031869888305664, "log_odds_ratio": -9.012289956444874e-05, "logits/chosen": -0.5779318809509277, "logits/rejected": -0.7733064889907837, "logps/chosen": -0.0001790263195289299, "logps/rejected": -1.7726452350616455, "loss": 0.3714, "nll_loss": 0.09283643215894699, "rewards/accuracies": 1.0, "rewards/chosen": -1.790263195289299e-05, "rewards/margins": 0.17724663019180298, "rewards/rejected": -0.17726454138755798, "step": 13435 }, { "epoch": 9.291839557399724, "grad_norm": 3.116892099380493, "learning_rate": 3.934224681112647e-06, "log_odds_chosen": 10.351808547973633, "log_odds_ratio": -7.368012302322313e-05, "logits/chosen": -0.32790011167526245, "logits/rejected": -0.3104146718978882, "logps/chosen": -0.000142537901410833, "logps/rejected": -1.4875270128250122, "loss": 0.3242, "nll_loss": 0.08104795962572098, "rewards/accuracies": 1.0, "rewards/chosen": -1.42537901410833e-05, "rewards/margins": 0.14873844385147095, "rewards/rejected": -0.14875270426273346, "step": 13436 }, { "epoch": 9.292531120331951, "grad_norm": 3.672302722930908, "learning_rate": 3.930382664822499e-06, "log_odds_chosen": 12.376562118530273, "log_odds_ratio": -5.7372599258087575e-06, "logits/chosen": -0.2653083801269531, "logits/rejected": -0.30238568782806396, "logps/chosen": -7.231077324831858e-05, "logps/rejected": -2.5421056747436523, "loss": 0.432, "nll_loss": 0.10799235105514526, "rewards/accuracies": 1.0, "rewards/chosen": -7.231077233882388e-06, "rewards/margins": 0.25420331954956055, "rewards/rejected": -0.25421056151390076, "step": 13437 }, { "epoch": 9.293222683264178, "grad_norm": 3.056394100189209, "learning_rate": 3.9265406485323505e-06, "log_odds_chosen": 11.484671592712402, "log_odds_ratio": -3.2231018849415705e-05, "logits/chosen": -0.3656775653362274, "logits/rejected": -0.36800307035446167, "logps/chosen": -0.00019055130542255938, "logps/rejected": -2.2714104652404785, "loss": 0.3314, "nll_loss": 0.08284640312194824, "rewards/accuracies": 1.0, "rewards/chosen": -1.9055129087064415e-05, "rewards/margins": 0.2271220088005066, "rewards/rejected": -0.22714105248451233, "step": 13438 }, { "epoch": 9.293914246196405, "grad_norm": 3.0379509925842285, "learning_rate": 3.9226986322422004e-06, "log_odds_chosen": 10.669838905334473, "log_odds_ratio": -0.00021413953800220042, "logits/chosen": -0.21338048577308655, "logits/rejected": -0.2577047348022461, "logps/chosen": -0.0002600338775664568, "logps/rejected": -2.0176901817321777, "loss": 0.3005, "nll_loss": 0.07510216534137726, "rewards/accuracies": 1.0, "rewards/chosen": -2.600338848424144e-05, "rewards/margins": 0.201743021607399, "rewards/rejected": -0.20176903903484344, "step": 13439 }, { "epoch": 9.294605809128631, "grad_norm": 4.63295841217041, "learning_rate": 3.918856615952052e-06, "log_odds_chosen": 11.448358535766602, "log_odds_ratio": -1.9698167307069525e-05, "logits/chosen": -0.5362762212753296, "logits/rejected": -0.5343048572540283, "logps/chosen": -0.00015766645083203912, "logps/rejected": -2.31644868850708, "loss": 0.4271, "nll_loss": 0.10677941143512726, "rewards/accuracies": 1.0, "rewards/chosen": -1.5766647265991196e-05, "rewards/margins": 0.23162910342216492, "rewards/rejected": -0.231644868850708, "step": 13440 }, { "epoch": 9.295297372060858, "grad_norm": 2.1920297145843506, "learning_rate": 3.915014599661903e-06, "log_odds_chosen": 11.539931297302246, "log_odds_ratio": -9.448492346564308e-05, "logits/chosen": -0.7264431118965149, "logits/rejected": -0.8167514801025391, "logps/chosen": -0.0003868629573844373, "logps/rejected": -3.05387020111084, "loss": 0.2399, "nll_loss": 0.05997336655855179, "rewards/accuracies": 1.0, "rewards/chosen": -3.868629573844373e-05, "rewards/margins": 0.30534833669662476, "rewards/rejected": -0.305387020111084, "step": 13441 }, { "epoch": 9.295988934993085, "grad_norm": 3.0261287689208984, "learning_rate": 3.9111725833717535e-06, "log_odds_chosen": 11.672271728515625, "log_odds_ratio": -3.1384017347591e-05, "logits/chosen": -0.5126137137413025, "logits/rejected": -0.555162787437439, "logps/chosen": -0.00015418983821291476, "logps/rejected": -2.4113316535949707, "loss": 0.397, "nll_loss": 0.09924092143774033, "rewards/accuracies": 1.0, "rewards/chosen": -1.5418983821291476e-05, "rewards/margins": 0.24111774563789368, "rewards/rejected": -0.24113315343856812, "step": 13442 }, { "epoch": 9.296680497925312, "grad_norm": 4.1650567054748535, "learning_rate": 3.907330567081604e-06, "log_odds_chosen": 11.079200744628906, "log_odds_ratio": -0.0002563607122283429, "logits/chosen": -0.021878689527511597, "logits/rejected": -0.13688160479068756, "logps/chosen": -0.0005930270999670029, "logps/rejected": -2.562225341796875, "loss": 0.3913, "nll_loss": 0.09778881818056107, "rewards/accuracies": 1.0, "rewards/chosen": -5.930270708631724e-05, "rewards/margins": 0.25616323947906494, "rewards/rejected": -0.25622254610061646, "step": 13443 }, { "epoch": 9.297372060857539, "grad_norm": 3.347186803817749, "learning_rate": 3.903488550791456e-06, "log_odds_chosen": 11.050576210021973, "log_odds_ratio": -9.413971565663815e-05, "logits/chosen": -0.3477120101451874, "logits/rejected": -0.35860568284988403, "logps/chosen": -9.553891140967607e-05, "logps/rejected": -1.656992793083191, "loss": 0.355, "nll_loss": 0.08875176310539246, "rewards/accuracies": 1.0, "rewards/chosen": -9.553890777169727e-06, "rewards/margins": 0.16568972170352936, "rewards/rejected": -0.16569927334785461, "step": 13444 }, { "epoch": 9.298063623789766, "grad_norm": 2.4853501319885254, "learning_rate": 3.899646534501307e-06, "log_odds_chosen": 10.950776100158691, "log_odds_ratio": -5.783190135844052e-05, "logits/chosen": -0.26249608397483826, "logits/rejected": -0.2371029406785965, "logps/chosen": -0.00018176852609030902, "logps/rejected": -2.1492209434509277, "loss": 0.2225, "nll_loss": 0.05562479421496391, "rewards/accuracies": 1.0, "rewards/chosen": -1.8176853700424545e-05, "rewards/margins": 0.21490392088890076, "rewards/rejected": -0.21492210030555725, "step": 13445 }, { "epoch": 9.298755186721992, "grad_norm": 2.5416414737701416, "learning_rate": 3.895804518211157e-06, "log_odds_chosen": 11.243158340454102, "log_odds_ratio": -5.964957381365821e-05, "logits/chosen": -0.11643625795841217, "logits/rejected": -0.15829455852508545, "logps/chosen": -0.00025903433561325073, "logps/rejected": -2.2749953269958496, "loss": 0.2535, "nll_loss": 0.06337232142686844, "rewards/accuracies": 1.0, "rewards/chosen": -2.5903436835506e-05, "rewards/margins": 0.22747361660003662, "rewards/rejected": -0.22749951481819153, "step": 13446 }, { "epoch": 9.29944674965422, "grad_norm": 4.396009922027588, "learning_rate": 3.891962501921009e-06, "log_odds_chosen": 11.116652488708496, "log_odds_ratio": -3.810801717918366e-05, "logits/chosen": -0.03421090170741081, "logits/rejected": -0.05119268223643303, "logps/chosen": -0.00012173371214885265, "logps/rejected": -2.0002799034118652, "loss": 0.4754, "nll_loss": 0.1188463568687439, "rewards/accuracies": 1.0, "rewards/chosen": -1.2173370123491623e-05, "rewards/margins": 0.2000158131122589, "rewards/rejected": -0.20002800226211548, "step": 13447 }, { "epoch": 9.300138312586446, "grad_norm": 4.451956272125244, "learning_rate": 3.888120485630859e-06, "log_odds_chosen": 11.3140869140625, "log_odds_ratio": -3.882058445014991e-05, "logits/chosen": -0.7956528663635254, "logits/rejected": -0.8160718679428101, "logps/chosen": -0.00028218800434842706, "logps/rejected": -2.6495447158813477, "loss": 0.3688, "nll_loss": 0.09219682216644287, "rewards/accuracies": 1.0, "rewards/chosen": -2.8218801162438467e-05, "rewards/margins": 0.2649262547492981, "rewards/rejected": -0.26495444774627686, "step": 13448 }, { "epoch": 9.300829875518673, "grad_norm": 2.387260913848877, "learning_rate": 3.8842784693407105e-06, "log_odds_chosen": 11.304975509643555, "log_odds_ratio": -2.020270039793104e-05, "logits/chosen": -0.2679097056388855, "logits/rejected": -0.35723209381103516, "logps/chosen": -0.00014827624545432627, "logps/rejected": -2.125303268432617, "loss": 0.234, "nll_loss": 0.05850508436560631, "rewards/accuracies": 1.0, "rewards/chosen": -1.4827624909230508e-05, "rewards/margins": 0.2125154733657837, "rewards/rejected": -0.21253031492233276, "step": 13449 }, { "epoch": 9.3015214384509, "grad_norm": 3.363272190093994, "learning_rate": 3.880436453050561e-06, "log_odds_chosen": 11.56545352935791, "log_odds_ratio": -3.806597669608891e-05, "logits/chosen": -0.5218194723129272, "logits/rejected": -0.7576302289962769, "logps/chosen": -0.0003641648218035698, "logps/rejected": -2.0262107849121094, "loss": 0.3822, "nll_loss": 0.09554532915353775, "rewards/accuracies": 1.0, "rewards/chosen": -3.6416480725165457e-05, "rewards/margins": 0.20258468389511108, "rewards/rejected": -0.20262108743190765, "step": 13450 }, { "epoch": 9.302213001383127, "grad_norm": 4.251006126403809, "learning_rate": 3.876594436760412e-06, "log_odds_chosen": 10.876567840576172, "log_odds_ratio": -0.00013768920325674117, "logits/chosen": -0.537798285484314, "logits/rejected": -0.6344647407531738, "logps/chosen": -0.00023333955323323607, "logps/rejected": -2.351746082305908, "loss": 0.3429, "nll_loss": 0.08570535480976105, "rewards/accuracies": 1.0, "rewards/chosen": -2.333395605091937e-05, "rewards/margins": 0.23515130579471588, "rewards/rejected": -0.23517462611198425, "step": 13451 }, { "epoch": 9.302904564315353, "grad_norm": 4.054286479949951, "learning_rate": 3.872752420470263e-06, "log_odds_chosen": 11.610682487487793, "log_odds_ratio": -2.241161018901039e-05, "logits/chosen": -0.3463277816772461, "logits/rejected": -0.3877999782562256, "logps/chosen": -0.0001846577797550708, "logps/rejected": -2.8598146438598633, "loss": 0.4372, "nll_loss": 0.10929237306118011, "rewards/accuracies": 1.0, "rewards/chosen": -1.8465776520315558e-05, "rewards/margins": 0.2859629988670349, "rewards/rejected": -0.2859814763069153, "step": 13452 }, { "epoch": 9.30359612724758, "grad_norm": 3.626314163208008, "learning_rate": 3.8689104041801135e-06, "log_odds_chosen": 11.272414207458496, "log_odds_ratio": -5.84806184633635e-05, "logits/chosen": -0.5375299453735352, "logits/rejected": -0.5248052477836609, "logps/chosen": -0.00018739163351710886, "logps/rejected": -2.3324246406555176, "loss": 0.4116, "nll_loss": 0.10289761424064636, "rewards/accuracies": 1.0, "rewards/chosen": -1.8739163351710886e-05, "rewards/margins": 0.23322373628616333, "rewards/rejected": -0.2332424819469452, "step": 13453 }, { "epoch": 9.304287690179807, "grad_norm": 3.117380142211914, "learning_rate": 3.865068387889965e-06, "log_odds_chosen": 12.008508682250977, "log_odds_ratio": -2.961501741083339e-05, "logits/chosen": -0.3070840537548065, "logits/rejected": -0.3590050935745239, "logps/chosen": -0.00015943381004035473, "logps/rejected": -2.8896145820617676, "loss": 0.2863, "nll_loss": 0.07156022638082504, "rewards/accuracies": 1.0, "rewards/chosen": -1.594338027643971e-05, "rewards/margins": 0.2889455258846283, "rewards/rejected": -0.2889614403247833, "step": 13454 }, { "epoch": 9.304979253112034, "grad_norm": 5.104377746582031, "learning_rate": 3.861226371599816e-06, "log_odds_chosen": 11.075465202331543, "log_odds_ratio": -0.0001627878227736801, "logits/chosen": -0.1711142212152481, "logits/rejected": -0.28870540857315063, "logps/chosen": -0.00043384850141592324, "logps/rejected": -2.306971549987793, "loss": 0.6368, "nll_loss": 0.1591799110174179, "rewards/accuracies": 1.0, "rewards/chosen": -4.3384850869188085e-05, "rewards/margins": 0.230653777718544, "rewards/rejected": -0.2306971549987793, "step": 13455 }, { "epoch": 9.30567081604426, "grad_norm": 3.257077693939209, "learning_rate": 3.8573843553096666e-06, "log_odds_chosen": 11.359479904174805, "log_odds_ratio": -4.4101354433223605e-05, "logits/chosen": -0.15206146240234375, "logits/rejected": -0.22837527096271515, "logps/chosen": -0.00023172479995992035, "logps/rejected": -2.734315872192383, "loss": 0.2831, "nll_loss": 0.07076961547136307, "rewards/accuracies": 1.0, "rewards/chosen": -2.3172480723587796e-05, "rewards/margins": 0.2734084129333496, "rewards/rejected": -0.27343159914016724, "step": 13456 }, { "epoch": 9.306362378976488, "grad_norm": 3.722757577896118, "learning_rate": 3.853542339019517e-06, "log_odds_chosen": 11.233427047729492, "log_odds_ratio": -0.0003971302940044552, "logits/chosen": -0.14773187041282654, "logits/rejected": -0.14477570354938507, "logps/chosen": -0.0011931579792872071, "logps/rejected": -2.492964029312134, "loss": 0.3946, "nll_loss": 0.0986117273569107, "rewards/accuracies": 1.0, "rewards/chosen": -0.00011931579501833767, "rewards/margins": 0.24917706847190857, "rewards/rejected": -0.2492963820695877, "step": 13457 }, { "epoch": 9.307053941908714, "grad_norm": 2.218470573425293, "learning_rate": 3.849700322729369e-06, "log_odds_chosen": 10.07578182220459, "log_odds_ratio": -0.00019474061264190823, "logits/chosen": -0.2753535807132721, "logits/rejected": -0.3422609567642212, "logps/chosen": -0.0010008374229073524, "logps/rejected": -2.065652847290039, "loss": 0.2113, "nll_loss": 0.052812688052654266, "rewards/accuracies": 1.0, "rewards/chosen": -0.00010008375102188438, "rewards/margins": 0.2064652144908905, "rewards/rejected": -0.20656529068946838, "step": 13458 }, { "epoch": 9.307745504840941, "grad_norm": 4.743387699127197, "learning_rate": 3.845858306439219e-06, "log_odds_chosen": 11.158632278442383, "log_odds_ratio": -2.11762326216558e-05, "logits/chosen": -0.21728329360485077, "logits/rejected": -0.2781408429145813, "logps/chosen": -0.000218193992623128, "logps/rejected": -2.3770692348480225, "loss": 0.4833, "nll_loss": 0.12082695960998535, "rewards/accuracies": 1.0, "rewards/chosen": -2.181939998990856e-05, "rewards/margins": 0.23768511414527893, "rewards/rejected": -0.23770692944526672, "step": 13459 }, { "epoch": 9.308437067773168, "grad_norm": 5.226717948913574, "learning_rate": 3.84201629014907e-06, "log_odds_chosen": 10.973608016967773, "log_odds_ratio": -0.00014043868577573448, "logits/chosen": -0.2824869453907013, "logits/rejected": -0.3398590683937073, "logps/chosen": -0.00011858497600769624, "logps/rejected": -1.9236409664154053, "loss": 0.6101, "nll_loss": 0.1525149792432785, "rewards/accuracies": 1.0, "rewards/chosen": -1.1858497600769624e-05, "rewards/margins": 0.19235223531723022, "rewards/rejected": -0.19236409664154053, "step": 13460 }, { "epoch": 9.309128630705395, "grad_norm": 2.705129384994507, "learning_rate": 3.838174273858921e-06, "log_odds_chosen": 10.589807510375977, "log_odds_ratio": -0.00017924222629517317, "logits/chosen": -0.7581220865249634, "logits/rejected": -0.7678347229957581, "logps/chosen": -0.00028634577756747603, "logps/rejected": -2.0105509757995605, "loss": 0.2845, "nll_loss": 0.07111126184463501, "rewards/accuracies": 1.0, "rewards/chosen": -2.8634578484343365e-05, "rewards/margins": 0.20102645456790924, "rewards/rejected": -0.20105509459972382, "step": 13461 }, { "epoch": 9.309820193637622, "grad_norm": 3.6058995723724365, "learning_rate": 3.834332257568772e-06, "log_odds_chosen": 10.95798397064209, "log_odds_ratio": -5.1791306759696454e-05, "logits/chosen": -0.6187546253204346, "logits/rejected": -0.4810827672481537, "logps/chosen": -0.00013288088666740805, "logps/rejected": -1.8508840799331665, "loss": 0.3569, "nll_loss": 0.08922485262155533, "rewards/accuracies": 1.0, "rewards/chosen": -1.3288088666740805e-05, "rewards/margins": 0.18507513403892517, "rewards/rejected": -0.1850884109735489, "step": 13462 }, { "epoch": 9.310511756569849, "grad_norm": 3.3807432651519775, "learning_rate": 3.8304902412786235e-06, "log_odds_chosen": 10.725987434387207, "log_odds_ratio": -9.697769564809278e-05, "logits/chosen": -0.062388740479946136, "logits/rejected": -0.0435502827167511, "logps/chosen": -0.00022250697657000273, "logps/rejected": -1.8565727472305298, "loss": 0.2896, "nll_loss": 0.07238153368234634, "rewards/accuracies": 1.0, "rewards/chosen": -2.2250696929404512e-05, "rewards/margins": 0.1856350302696228, "rewards/rejected": -0.18565726280212402, "step": 13463 }, { "epoch": 9.311203319502075, "grad_norm": 3.468080520629883, "learning_rate": 3.826648224988474e-06, "log_odds_chosen": 10.823293685913086, "log_odds_ratio": -0.00020776645396836102, "logits/chosen": -0.33366209268569946, "logits/rejected": -0.2669678330421448, "logps/chosen": -0.00029635181999765337, "logps/rejected": -2.1758363246917725, "loss": 0.432, "nll_loss": 0.10798037052154541, "rewards/accuracies": 1.0, "rewards/chosen": -2.9635182727361098e-05, "rewards/margins": 0.2175540030002594, "rewards/rejected": -0.21758362650871277, "step": 13464 }, { "epoch": 9.311894882434302, "grad_norm": 2.700617551803589, "learning_rate": 3.822806208698325e-06, "log_odds_chosen": 13.142247200012207, "log_odds_ratio": -2.230983409390319e-05, "logits/chosen": -0.005779445171356201, "logits/rejected": -0.09649817645549774, "logps/chosen": -0.00014073318743612617, "logps/rejected": -3.8190064430236816, "loss": 0.3025, "nll_loss": 0.07562220096588135, "rewards/accuracies": 1.0, "rewards/chosen": -1.4073319107410498e-05, "rewards/margins": 0.3818865418434143, "rewards/rejected": -0.3819006085395813, "step": 13465 }, { "epoch": 9.312586445366529, "grad_norm": 4.4598870277404785, "learning_rate": 3.818964192408176e-06, "log_odds_chosen": 11.616842269897461, "log_odds_ratio": -2.9827209800714627e-05, "logits/chosen": -0.1599445343017578, "logits/rejected": -0.3907346725463867, "logps/chosen": -0.00012314711057115346, "logps/rejected": -2.275468587875366, "loss": 0.4764, "nll_loss": 0.11909401416778564, "rewards/accuracies": 1.0, "rewards/chosen": -1.2314711966610048e-05, "rewards/margins": 0.22753453254699707, "rewards/rejected": -0.22754687070846558, "step": 13466 }, { "epoch": 9.313278008298756, "grad_norm": 5.46815824508667, "learning_rate": 3.815122176118027e-06, "log_odds_chosen": 12.737980842590332, "log_odds_ratio": -5.220482762524625e-06, "logits/chosen": -0.6087969541549683, "logits/rejected": -0.6135598421096802, "logps/chosen": -0.00015955566777847707, "logps/rejected": -3.849729061126709, "loss": 0.4696, "nll_loss": 0.11739481985569, "rewards/accuracies": 1.0, "rewards/chosen": -1.595556750544347e-05, "rewards/margins": 0.3849569857120514, "rewards/rejected": -0.3849729299545288, "step": 13467 }, { "epoch": 9.313969571230983, "grad_norm": 8.528974533081055, "learning_rate": 3.8112801598278777e-06, "log_odds_chosen": 10.95968246459961, "log_odds_ratio": -4.384573912830092e-05, "logits/chosen": -0.18643268942832947, "logits/rejected": -0.21873146295547485, "logps/chosen": -0.00013041615602560341, "logps/rejected": -1.957237720489502, "loss": 0.3603, "nll_loss": 0.09007404744625092, "rewards/accuracies": 1.0, "rewards/chosen": -1.304161469306564e-05, "rewards/margins": 0.19571073353290558, "rewards/rejected": -0.1957237720489502, "step": 13468 }, { "epoch": 9.31466113416321, "grad_norm": 4.134918212890625, "learning_rate": 3.807438143537729e-06, "log_odds_chosen": 12.330034255981445, "log_odds_ratio": -6.079011654946953e-06, "logits/chosen": -0.008475244045257568, "logits/rejected": 0.025699757039546967, "logps/chosen": -9.574719297233969e-05, "logps/rejected": -2.944679021835327, "loss": 0.3604, "nll_loss": 0.09009113162755966, "rewards/accuracies": 1.0, "rewards/chosen": -9.57472002482973e-06, "rewards/margins": 0.2944583296775818, "rewards/rejected": -0.2944679260253906, "step": 13469 }, { "epoch": 9.315352697095436, "grad_norm": 3.7410848140716553, "learning_rate": 3.803596127247579e-06, "log_odds_chosen": 11.439793586730957, "log_odds_ratio": -1.828746280807536e-05, "logits/chosen": -0.25906312465667725, "logits/rejected": -0.34541481733322144, "logps/chosen": -0.00023132732894737273, "logps/rejected": -2.2295823097229004, "loss": 0.4508, "nll_loss": 0.1126941591501236, "rewards/accuracies": 1.0, "rewards/chosen": -2.3132733986130916e-05, "rewards/margins": 0.22293512523174286, "rewards/rejected": -0.22295823693275452, "step": 13470 }, { "epoch": 9.316044260027663, "grad_norm": 4.06542444229126, "learning_rate": 3.7997541109574304e-06, "log_odds_chosen": 11.705513000488281, "log_odds_ratio": -2.102018697769381e-05, "logits/chosen": -0.30078208446502686, "logits/rejected": -0.12992164492607117, "logps/chosen": -8.429591252934188e-05, "logps/rejected": -2.435026168823242, "loss": 0.3457, "nll_loss": 0.0864274799823761, "rewards/accuracies": 1.0, "rewards/chosen": -8.429591616732068e-06, "rewards/margins": 0.2434941828250885, "rewards/rejected": -0.24350261688232422, "step": 13471 }, { "epoch": 9.31673582295989, "grad_norm": 4.054144382476807, "learning_rate": 3.7959120946672815e-06, "log_odds_chosen": 10.893832206726074, "log_odds_ratio": -8.980531129054725e-05, "logits/chosen": -0.13907435536384583, "logits/rejected": -0.008393503725528717, "logps/chosen": -0.000236863037571311, "logps/rejected": -2.596144199371338, "loss": 0.5103, "nll_loss": 0.1275601089000702, "rewards/accuracies": 1.0, "rewards/chosen": -2.3686305212322623e-05, "rewards/margins": 0.259590744972229, "rewards/rejected": -0.25961440801620483, "step": 13472 }, { "epoch": 9.317427385892117, "grad_norm": 3.4377999305725098, "learning_rate": 3.7920700783771323e-06, "log_odds_chosen": 11.774809837341309, "log_odds_ratio": -1.6027501260396093e-05, "logits/chosen": -0.09235573559999466, "logits/rejected": -0.14144612848758698, "logps/chosen": -0.00010810409730765969, "logps/rejected": -2.3190908432006836, "loss": 0.3623, "nll_loss": 0.09056393057107925, "rewards/accuracies": 1.0, "rewards/chosen": -1.0810409548867028e-05, "rewards/margins": 0.2318982630968094, "rewards/rejected": -0.23190905153751373, "step": 13473 }, { "epoch": 9.318118948824344, "grad_norm": 3.6212639808654785, "learning_rate": 3.7882280620869834e-06, "log_odds_chosen": 10.931177139282227, "log_odds_ratio": -3.4371048968750983e-05, "logits/chosen": -0.64949631690979, "logits/rejected": -0.6834389567375183, "logps/chosen": -0.00039314801688306034, "logps/rejected": -2.1690139770507812, "loss": 0.4247, "nll_loss": 0.10616564005613327, "rewards/accuracies": 1.0, "rewards/chosen": -3.931480387109332e-05, "rewards/margins": 0.21686206758022308, "rewards/rejected": -0.21690139174461365, "step": 13474 }, { "epoch": 9.31881051175657, "grad_norm": 3.314685106277466, "learning_rate": 3.7843860457968346e-06, "log_odds_chosen": 11.922871589660645, "log_odds_ratio": -9.834478987613693e-06, "logits/chosen": -0.37454918026924133, "logits/rejected": -0.3413027226924896, "logps/chosen": -8.132911898428574e-05, "logps/rejected": -2.511720895767212, "loss": 0.2946, "nll_loss": 0.07365487515926361, "rewards/accuracies": 1.0, "rewards/chosen": -8.132911716529634e-06, "rewards/margins": 0.25116395950317383, "rewards/rejected": -0.25117209553718567, "step": 13475 }, { "epoch": 9.319502074688797, "grad_norm": 3.1171579360961914, "learning_rate": 3.780544029506685e-06, "log_odds_chosen": 11.269782066345215, "log_odds_ratio": -7.540702790720388e-05, "logits/chosen": -0.4870750308036804, "logits/rejected": -0.5211501717567444, "logps/chosen": -0.00012674767640419304, "logps/rejected": -2.312211513519287, "loss": 0.311, "nll_loss": 0.0777503028512001, "rewards/accuracies": 1.0, "rewards/chosen": -1.2674767276621424e-05, "rewards/margins": 0.23120847344398499, "rewards/rejected": -0.23122113943099976, "step": 13476 }, { "epoch": 9.320193637621024, "grad_norm": 2.3931541442871094, "learning_rate": 3.776702013216536e-06, "log_odds_chosen": 10.98151969909668, "log_odds_ratio": -3.289541564299725e-05, "logits/chosen": -0.3848639130592346, "logits/rejected": -0.37438511848449707, "logps/chosen": -0.00015288082067854702, "logps/rejected": -1.8499668836593628, "loss": 0.3197, "nll_loss": 0.0799197405576706, "rewards/accuracies": 1.0, "rewards/chosen": -1.5288083886844106e-05, "rewards/margins": 0.18498140573501587, "rewards/rejected": -0.18499669432640076, "step": 13477 }, { "epoch": 9.320885200553251, "grad_norm": 2.7862470149993896, "learning_rate": 3.7728599969263873e-06, "log_odds_chosen": 11.56342887878418, "log_odds_ratio": -1.7754273358150385e-05, "logits/chosen": -0.5026286244392395, "logits/rejected": -0.6119534373283386, "logps/chosen": -8.075307414401323e-05, "logps/rejected": -2.1262624263763428, "loss": 0.3213, "nll_loss": 0.08032898604869843, "rewards/accuracies": 1.0, "rewards/chosen": -8.075307960098144e-06, "rewards/margins": 0.2126181721687317, "rewards/rejected": -0.21262626349925995, "step": 13478 }, { "epoch": 9.321576763485478, "grad_norm": 2.863743305206299, "learning_rate": 3.769017980636238e-06, "log_odds_chosen": 11.144573211669922, "log_odds_ratio": -2.4799961465760134e-05, "logits/chosen": -0.31004148721694946, "logits/rejected": -0.3101674020290375, "logps/chosen": -0.00019185274140909314, "logps/rejected": -2.265904664993286, "loss": 0.2478, "nll_loss": 0.06194061040878296, "rewards/accuracies": 1.0, "rewards/chosen": -1.9185274140909314e-05, "rewards/margins": 0.22657127678394318, "rewards/rejected": -0.22659045457839966, "step": 13479 }, { "epoch": 9.322268326417705, "grad_norm": 3.149684190750122, "learning_rate": 3.7651759643460892e-06, "log_odds_chosen": 11.424112319946289, "log_odds_ratio": -4.159379022894427e-05, "logits/chosen": -0.2655598521232605, "logits/rejected": -0.37703606486320496, "logps/chosen": -0.00010751900117611513, "logps/rejected": -2.443114995956421, "loss": 0.2764, "nll_loss": 0.06908674538135529, "rewards/accuracies": 1.0, "rewards/chosen": -1.0751899935712572e-05, "rewards/margins": 0.2443007528781891, "rewards/rejected": -0.24431152641773224, "step": 13480 }, { "epoch": 9.322959889349931, "grad_norm": 4.372343063354492, "learning_rate": 3.7613339480559404e-06, "log_odds_chosen": 12.416967391967773, "log_odds_ratio": -2.7587606382439844e-05, "logits/chosen": 0.1218680888414383, "logits/rejected": -0.008615031838417053, "logps/chosen": -9.726442658575252e-05, "logps/rejected": -2.9957611560821533, "loss": 0.4528, "nll_loss": 0.1131986677646637, "rewards/accuracies": 1.0, "rewards/chosen": -9.726443749968894e-06, "rewards/margins": 0.299566388130188, "rewards/rejected": -0.29957613348960876, "step": 13481 }, { "epoch": 9.323651452282158, "grad_norm": 4.135457992553711, "learning_rate": 3.7574919317657907e-06, "log_odds_chosen": 11.986932754516602, "log_odds_ratio": -1.0640229447744787e-05, "logits/chosen": -0.5449897050857544, "logits/rejected": -0.5930085778236389, "logps/chosen": -0.00015816489758435637, "logps/rejected": -2.755146026611328, "loss": 0.3185, "nll_loss": 0.07961829006671906, "rewards/accuracies": 1.0, "rewards/chosen": -1.581649121362716e-05, "rewards/margins": 0.27549877762794495, "rewards/rejected": -0.2755146026611328, "step": 13482 }, { "epoch": 9.324343015214385, "grad_norm": 3.3711698055267334, "learning_rate": 3.753649915475642e-06, "log_odds_chosen": 10.710826873779297, "log_odds_ratio": -0.00037505064392462373, "logits/chosen": -0.26216813921928406, "logits/rejected": -0.3119490444660187, "logps/chosen": -0.0003268842410761863, "logps/rejected": -2.353797435760498, "loss": 0.4368, "nll_loss": 0.10915695130825043, "rewards/accuracies": 1.0, "rewards/chosen": -3.268842556281015e-05, "rewards/margins": 0.23534703254699707, "rewards/rejected": -0.23537972569465637, "step": 13483 }, { "epoch": 9.325034578146612, "grad_norm": 3.8483965396881104, "learning_rate": 3.749807899185493e-06, "log_odds_chosen": 11.60401439666748, "log_odds_ratio": -4.468131737667136e-05, "logits/chosen": -0.21380922198295593, "logits/rejected": -0.27483001351356506, "logps/chosen": -0.00020263693295419216, "logps/rejected": -2.752741813659668, "loss": 0.2783, "nll_loss": 0.0695706233382225, "rewards/accuracies": 1.0, "rewards/chosen": -2.026369475061074e-05, "rewards/margins": 0.27525392174720764, "rewards/rejected": -0.2752741873264313, "step": 13484 }, { "epoch": 9.325726141078839, "grad_norm": 4.260168075561523, "learning_rate": 3.7459658828953434e-06, "log_odds_chosen": 12.041501998901367, "log_odds_ratio": -2.186280835303478e-05, "logits/chosen": -0.1144566684961319, "logits/rejected": -0.17487488687038422, "logps/chosen": -0.00019972564768977463, "logps/rejected": -3.208420515060425, "loss": 0.4325, "nll_loss": 0.10811209678649902, "rewards/accuracies": 1.0, "rewards/chosen": -1.9972565496573225e-05, "rewards/margins": 0.3208220899105072, "rewards/rejected": -0.32084208726882935, "step": 13485 }, { "epoch": 9.326417704011066, "grad_norm": 3.800992727279663, "learning_rate": 3.7421238666051946e-06, "log_odds_chosen": 10.691490173339844, "log_odds_ratio": -0.0006114224088378251, "logits/chosen": -0.18019437789916992, "logits/rejected": -0.22118288278579712, "logps/chosen": -0.001764062442816794, "logps/rejected": -2.772888660430908, "loss": 0.3505, "nll_loss": 0.08756895363330841, "rewards/accuracies": 1.0, "rewards/chosen": -0.00017640624719206244, "rewards/margins": 0.2771124839782715, "rewards/rejected": -0.27728885412216187, "step": 13486 }, { "epoch": 9.327109266943292, "grad_norm": 5.743640899658203, "learning_rate": 3.7382818503150453e-06, "log_odds_chosen": 10.67558765411377, "log_odds_ratio": -7.596887007821351e-05, "logits/chosen": -0.13736465573310852, "logits/rejected": -0.2244989573955536, "logps/chosen": -0.0007089751306921244, "logps/rejected": -2.3339829444885254, "loss": 0.4601, "nll_loss": 0.11502114683389664, "rewards/accuracies": 1.0, "rewards/chosen": -7.089751306921244e-05, "rewards/margins": 0.23332740366458893, "rewards/rejected": -0.23339828848838806, "step": 13487 }, { "epoch": 9.32780082987552, "grad_norm": 2.9741744995117188, "learning_rate": 3.7344398340248965e-06, "log_odds_chosen": 11.594825744628906, "log_odds_ratio": -5.341541691450402e-05, "logits/chosen": -0.24379974603652954, "logits/rejected": -0.23881873488426208, "logps/chosen": -0.0002033656492130831, "logps/rejected": -3.0501716136932373, "loss": 0.2306, "nll_loss": 0.05764034017920494, "rewards/accuracies": 1.0, "rewards/chosen": -2.033656528510619e-05, "rewards/margins": 0.3049968183040619, "rewards/rejected": -0.3050171732902527, "step": 13488 }, { "epoch": 9.328492392807746, "grad_norm": 4.511500358581543, "learning_rate": 3.7305978177347476e-06, "log_odds_chosen": 11.463151931762695, "log_odds_ratio": -0.000139111332828179, "logits/chosen": -0.14151470363140106, "logits/rejected": -0.196761816740036, "logps/chosen": -0.00032821958302520216, "logps/rejected": -3.2633323669433594, "loss": 0.3561, "nll_loss": 0.0890030488371849, "rewards/accuracies": 1.0, "rewards/chosen": -3.2821957574924454e-05, "rewards/margins": 0.3263004422187805, "rewards/rejected": -0.326333224773407, "step": 13489 }, { "epoch": 9.329183955739973, "grad_norm": 4.139351844787598, "learning_rate": 3.726755801444598e-06, "log_odds_chosen": 11.706854820251465, "log_odds_ratio": -2.9966075089760125e-05, "logits/chosen": -0.14848864078521729, "logits/rejected": -0.19505658745765686, "logps/chosen": -0.00011898233060492203, "logps/rejected": -2.731010675430298, "loss": 0.4292, "nll_loss": 0.10730849206447601, "rewards/accuracies": 1.0, "rewards/chosen": -1.1898233424290083e-05, "rewards/margins": 0.2730891704559326, "rewards/rejected": -0.2731010615825653, "step": 13490 }, { "epoch": 9.3298755186722, "grad_norm": 2.797802448272705, "learning_rate": 3.722913785154449e-06, "log_odds_chosen": 9.388402938842773, "log_odds_ratio": -0.00041358559974469244, "logits/chosen": -0.6194337606430054, "logits/rejected": -0.7150942087173462, "logps/chosen": -0.0005718135507777333, "logps/rejected": -1.6147042512893677, "loss": 0.3182, "nll_loss": 0.0794985443353653, "rewards/accuracies": 1.0, "rewards/chosen": -5.718135798815638e-05, "rewards/margins": 0.1614132523536682, "rewards/rejected": -0.161470428109169, "step": 13491 }, { "epoch": 9.330567081604427, "grad_norm": 2.23926043510437, "learning_rate": 3.7190717688643003e-06, "log_odds_chosen": 10.670015335083008, "log_odds_ratio": -9.015700197778642e-05, "logits/chosen": -0.3055238127708435, "logits/rejected": -0.3231578767299652, "logps/chosen": -0.00011767053365474567, "logps/rejected": -1.6536319255828857, "loss": 0.2644, "nll_loss": 0.06609741598367691, "rewards/accuracies": 1.0, "rewards/chosen": -1.1767053365474567e-05, "rewards/margins": 0.16535143554210663, "rewards/rejected": -0.16536319255828857, "step": 13492 }, { "epoch": 9.331258644536653, "grad_norm": 2.5508315563201904, "learning_rate": 3.7152297525741506e-06, "log_odds_chosen": 11.282350540161133, "log_odds_ratio": -8.483113197144121e-05, "logits/chosen": -0.23872199654579163, "logits/rejected": -0.19816704094409943, "logps/chosen": -0.00013775471597909927, "logps/rejected": -1.9430524110794067, "loss": 0.321, "nll_loss": 0.08024564385414124, "rewards/accuracies": 1.0, "rewards/chosen": -1.3775472325505689e-05, "rewards/margins": 0.19429144263267517, "rewards/rejected": -0.19430524110794067, "step": 13493 }, { "epoch": 9.33195020746888, "grad_norm": 4.62529182434082, "learning_rate": 3.711387736284002e-06, "log_odds_chosen": 11.182456970214844, "log_odds_ratio": -0.0008582415757700801, "logits/chosen": 0.2925998568534851, "logits/rejected": 0.2288024127483368, "logps/chosen": -0.0004990738234482706, "logps/rejected": -2.758685827255249, "loss": 0.5416, "nll_loss": 0.1353091448545456, "rewards/accuracies": 1.0, "rewards/chosen": -4.990738307242282e-05, "rewards/margins": 0.27581867575645447, "rewards/rejected": -0.27586856484413147, "step": 13494 }, { "epoch": 9.332641770401107, "grad_norm": 4.5020670890808105, "learning_rate": 3.707545719993853e-06, "log_odds_chosen": 12.771818161010742, "log_odds_ratio": -7.950256986077875e-06, "logits/chosen": -0.4980790615081787, "logits/rejected": -0.5255163908004761, "logps/chosen": -0.00014841003576293588, "logps/rejected": -3.522858142852783, "loss": 0.5237, "nll_loss": 0.13092757761478424, "rewards/accuracies": 1.0, "rewards/chosen": -1.4841003576293588e-05, "rewards/margins": 0.3522709906101227, "rewards/rejected": -0.35228586196899414, "step": 13495 }, { "epoch": 9.333333333333334, "grad_norm": 2.8230652809143066, "learning_rate": 3.7037037037037037e-06, "log_odds_chosen": 10.333768844604492, "log_odds_ratio": -9.811281051952392e-05, "logits/chosen": -0.14630591869354248, "logits/rejected": -0.13303673267364502, "logps/chosen": -0.00028036831645295024, "logps/rejected": -1.8686578273773193, "loss": 0.294, "nll_loss": 0.07350200414657593, "rewards/accuracies": 1.0, "rewards/chosen": -2.8036831281497143e-05, "rewards/margins": 0.18683774769306183, "rewards/rejected": -0.18686577677726746, "step": 13496 }, { "epoch": 9.33402489626556, "grad_norm": 3.8429489135742188, "learning_rate": 3.699861687413555e-06, "log_odds_chosen": 11.658157348632812, "log_odds_ratio": -3.7459019949892536e-05, "logits/chosen": 0.11421719938516617, "logits/rejected": 0.05861104279756546, "logps/chosen": -0.00013772404054179788, "logps/rejected": -2.393380641937256, "loss": 0.4082, "nll_loss": 0.10205702483654022, "rewards/accuracies": 1.0, "rewards/chosen": -1.3772404599876609e-05, "rewards/margins": 0.23932427167892456, "rewards/rejected": -0.23933804035186768, "step": 13497 }, { "epoch": 9.334716459197788, "grad_norm": 3.278484582901001, "learning_rate": 3.696019671123406e-06, "log_odds_chosen": 10.718673706054688, "log_odds_ratio": -9.96225280687213e-05, "logits/chosen": -0.35212522745132446, "logits/rejected": -0.4008997976779938, "logps/chosen": -0.0001456753961974755, "logps/rejected": -1.6912918090820312, "loss": 0.2434, "nll_loss": 0.0608292818069458, "rewards/accuracies": 1.0, "rewards/chosen": -1.456754034734331e-05, "rewards/margins": 0.1691146194934845, "rewards/rejected": -0.16912919282913208, "step": 13498 }, { "epoch": 9.335408022130014, "grad_norm": 2.9812114238739014, "learning_rate": 3.6921776548332564e-06, "log_odds_chosen": 13.193338394165039, "log_odds_ratio": -6.2142416936694644e-06, "logits/chosen": -0.36012908816337585, "logits/rejected": -0.4613358974456787, "logps/chosen": -4.835828804061748e-05, "logps/rejected": -3.165411949157715, "loss": 0.3395, "nll_loss": 0.08488430827856064, "rewards/accuracies": 1.0, "rewards/chosen": -4.835828804061748e-06, "rewards/margins": 0.3165363371372223, "rewards/rejected": -0.3165411651134491, "step": 13499 }, { "epoch": 9.336099585062241, "grad_norm": 4.30682897567749, "learning_rate": 3.6883356385431076e-06, "log_odds_chosen": 11.123899459838867, "log_odds_ratio": -2.2451797121902928e-05, "logits/chosen": -0.2035236656665802, "logits/rejected": -0.08595597743988037, "logps/chosen": -0.00021680566715076566, "logps/rejected": -2.085360288619995, "loss": 0.496, "nll_loss": 0.12400057911872864, "rewards/accuracies": 1.0, "rewards/chosen": -2.1680567442672327e-05, "rewards/margins": 0.20851436257362366, "rewards/rejected": -0.2085360288619995, "step": 13500 }, { "epoch": 9.336791147994468, "grad_norm": 3.1607930660247803, "learning_rate": 3.6844936222529588e-06, "log_odds_chosen": 10.761314392089844, "log_odds_ratio": -9.914707334246486e-05, "logits/chosen": -0.08080196380615234, "logits/rejected": -0.11017942428588867, "logps/chosen": -0.00016276085807476193, "logps/rejected": -2.0921876430511475, "loss": 0.2914, "nll_loss": 0.07284814119338989, "rewards/accuracies": 1.0, "rewards/chosen": -1.627608435228467e-05, "rewards/margins": 0.20920248329639435, "rewards/rejected": -0.20921877026557922, "step": 13501 }, { "epoch": 9.337482710926695, "grad_norm": 2.8824057579040527, "learning_rate": 3.6806516059628095e-06, "log_odds_chosen": 10.971266746520996, "log_odds_ratio": -3.601726348279044e-05, "logits/chosen": -0.4004353880882263, "logits/rejected": -0.43088674545288086, "logps/chosen": -0.00027602817863225937, "logps/rejected": -2.114922046661377, "loss": 0.3238, "nll_loss": 0.08093824237585068, "rewards/accuracies": 1.0, "rewards/chosen": -2.76028185908217e-05, "rewards/margins": 0.21146461367607117, "rewards/rejected": -0.21149222552776337, "step": 13502 }, { "epoch": 9.338174273858922, "grad_norm": 7.601047992706299, "learning_rate": 3.6768095896726607e-06, "log_odds_chosen": 10.525729179382324, "log_odds_ratio": -4.52158747066278e-05, "logits/chosen": 0.05261586606502533, "logits/rejected": 0.04327049106359482, "logps/chosen": -0.0005245390348136425, "logps/rejected": -2.4437553882598877, "loss": 0.6654, "nll_loss": 0.16633406281471252, "rewards/accuracies": 1.0, "rewards/chosen": -5.245389911578968e-05, "rewards/margins": 0.24432307481765747, "rewards/rejected": -0.2443755567073822, "step": 13503 }, { "epoch": 9.338865836791149, "grad_norm": 3.025954008102417, "learning_rate": 3.672967573382511e-06, "log_odds_chosen": 10.200634002685547, "log_odds_ratio": -0.00014758994802832603, "logits/chosen": -0.06436628848314285, "logits/rejected": -0.11504589766263962, "logps/chosen": -0.0005985183524899185, "logps/rejected": -1.6737840175628662, "loss": 0.3746, "nll_loss": 0.09363247454166412, "rewards/accuracies": 1.0, "rewards/chosen": -5.9851834521396086e-05, "rewards/margins": 0.16731856763362885, "rewards/rejected": -0.16737841069698334, "step": 13504 }, { "epoch": 9.339557399723375, "grad_norm": 3.8002161979675293, "learning_rate": 3.669125557092362e-06, "log_odds_chosen": 11.298839569091797, "log_odds_ratio": -3.492705582175404e-05, "logits/chosen": -0.3517683744430542, "logits/rejected": -0.37183061242103577, "logps/chosen": -0.00021029937488492578, "logps/rejected": -2.008366584777832, "loss": 0.3739, "nll_loss": 0.09347833693027496, "rewards/accuracies": 1.0, "rewards/chosen": -2.1029936760896817e-05, "rewards/margins": 0.2008156180381775, "rewards/rejected": -0.200836643576622, "step": 13505 }, { "epoch": 9.340248962655602, "grad_norm": 3.177788734436035, "learning_rate": 3.6652835408022133e-06, "log_odds_chosen": 10.39039134979248, "log_odds_ratio": -5.2843250159639865e-05, "logits/chosen": -0.2426442801952362, "logits/rejected": -0.34015002846717834, "logps/chosen": -0.00031000032322481275, "logps/rejected": -1.9575690031051636, "loss": 0.3766, "nll_loss": 0.09413687884807587, "rewards/accuracies": 1.0, "rewards/chosen": -3.1000032322481275e-05, "rewards/margins": 0.19572588801383972, "rewards/rejected": -0.19575689733028412, "step": 13506 }, { "epoch": 9.340940525587829, "grad_norm": 2.606773853302002, "learning_rate": 3.6614415245120637e-06, "log_odds_chosen": 11.13302230834961, "log_odds_ratio": -4.279446875443682e-05, "logits/chosen": -0.6950966715812683, "logits/rejected": -0.5016571879386902, "logps/chosen": -0.00021491489314939827, "logps/rejected": -2.5477190017700195, "loss": 0.2904, "nll_loss": 0.07259637862443924, "rewards/accuracies": 1.0, "rewards/chosen": -2.149149077013135e-05, "rewards/margins": 0.25475040078163147, "rewards/rejected": -0.2547719180583954, "step": 13507 }, { "epoch": 9.341632088520056, "grad_norm": 2.2499380111694336, "learning_rate": 3.657599508221915e-06, "log_odds_chosen": 11.097082138061523, "log_odds_ratio": -0.0005031878827139735, "logits/chosen": -0.5092487335205078, "logits/rejected": -0.5142608284950256, "logps/chosen": -0.00017486378783360124, "logps/rejected": -1.891782283782959, "loss": 0.2769, "nll_loss": 0.06917122006416321, "rewards/accuracies": 1.0, "rewards/chosen": -1.748637623677496e-05, "rewards/margins": 0.18916073441505432, "rewards/rejected": -0.1891782432794571, "step": 13508 }, { "epoch": 9.342323651452283, "grad_norm": 3.7919974327087402, "learning_rate": 3.653757491931766e-06, "log_odds_chosen": 11.685168266296387, "log_odds_ratio": -0.0007330195512622595, "logits/chosen": -0.24235190451145172, "logits/rejected": -0.3833863437175751, "logps/chosen": -0.0025347373448312283, "logps/rejected": -3.4914391040802, "loss": 0.3644, "nll_loss": 0.09102737158536911, "rewards/accuracies": 1.0, "rewards/chosen": -0.0002534736995585263, "rewards/margins": 0.34889042377471924, "rewards/rejected": -0.3491438925266266, "step": 13509 }, { "epoch": 9.34301521438451, "grad_norm": 1.976906657218933, "learning_rate": 3.6499154756416168e-06, "log_odds_chosen": 9.503071784973145, "log_odds_ratio": -0.0006813781219534576, "logits/chosen": 0.028965137898921967, "logits/rejected": -0.10996140539646149, "logps/chosen": -0.0010135057382285595, "logps/rejected": -1.596954584121704, "loss": 0.1912, "nll_loss": 0.047726936638355255, "rewards/accuracies": 1.0, "rewards/chosen": -0.00010135059710592031, "rewards/margins": 0.1595941036939621, "rewards/rejected": -0.15969546139240265, "step": 13510 }, { "epoch": 9.343706777316736, "grad_norm": 2.922912120819092, "learning_rate": 3.646073459351468e-06, "log_odds_chosen": 10.952808380126953, "log_odds_ratio": -0.00014453314361162484, "logits/chosen": -0.34083905816078186, "logits/rejected": -0.3925648629665375, "logps/chosen": -0.00035240783472545445, "logps/rejected": -2.0025248527526855, "loss": 0.2737, "nll_loss": 0.0683998242020607, "rewards/accuracies": 1.0, "rewards/chosen": -3.5240784200141206e-05, "rewards/margins": 0.20021723210811615, "rewards/rejected": -0.2002524733543396, "step": 13511 }, { "epoch": 9.344398340248963, "grad_norm": 3.6394662857055664, "learning_rate": 3.642231443061319e-06, "log_odds_chosen": 9.833059310913086, "log_odds_ratio": -0.0003041258896701038, "logits/chosen": -0.010110607370734215, "logits/rejected": -0.1232147365808487, "logps/chosen": -0.0005445755086839199, "logps/rejected": -1.7158136367797852, "loss": 0.4296, "nll_loss": 0.1073751449584961, "rewards/accuracies": 1.0, "rewards/chosen": -5.4457548685604706e-05, "rewards/margins": 0.17152690887451172, "rewards/rejected": -0.17158135771751404, "step": 13512 }, { "epoch": 9.34508990318119, "grad_norm": 3.334577798843384, "learning_rate": 3.6383894267711694e-06, "log_odds_chosen": 10.367897033691406, "log_odds_ratio": -0.0002346929832128808, "logits/chosen": -0.906934380531311, "logits/rejected": -0.8934662938117981, "logps/chosen": -0.0001801918842829764, "logps/rejected": -1.6105437278747559, "loss": 0.5023, "nll_loss": 0.12554189562797546, "rewards/accuracies": 1.0, "rewards/chosen": -1.80191891558934e-05, "rewards/margins": 0.16103635728359222, "rewards/rejected": -0.16105437278747559, "step": 13513 }, { "epoch": 9.345781466113417, "grad_norm": 5.024738788604736, "learning_rate": 3.6345474104810206e-06, "log_odds_chosen": 12.38737964630127, "log_odds_ratio": -4.9194541134056635e-06, "logits/chosen": -0.4815788269042969, "logits/rejected": -0.5164158344268799, "logps/chosen": -0.0001025611418299377, "logps/rejected": -3.0465731620788574, "loss": 0.694, "nll_loss": 0.17351019382476807, "rewards/accuracies": 1.0, "rewards/chosen": -1.0256115274387412e-05, "rewards/margins": 0.3046470582485199, "rewards/rejected": -0.30465734004974365, "step": 13514 }, { "epoch": 9.346473029045644, "grad_norm": 3.6809616088867188, "learning_rate": 3.6307053941908718e-06, "log_odds_chosen": 11.304495811462402, "log_odds_ratio": -0.00010354755795560777, "logits/chosen": -0.08116520941257477, "logits/rejected": -0.15369859337806702, "logps/chosen": -0.00045098719419911504, "logps/rejected": -2.8794734477996826, "loss": 0.4327, "nll_loss": 0.10816633701324463, "rewards/accuracies": 1.0, "rewards/chosen": -4.509872087510303e-05, "rewards/margins": 0.28790223598480225, "rewards/rejected": -0.2879473567008972, "step": 13515 }, { "epoch": 9.34716459197787, "grad_norm": 3.3048770427703857, "learning_rate": 3.626863377900722e-06, "log_odds_chosen": 10.46618366241455, "log_odds_ratio": -0.00015984366473276168, "logits/chosen": -0.006419524550437927, "logits/rejected": -0.06340011954307556, "logps/chosen": -0.00023882980167400092, "logps/rejected": -1.9816560745239258, "loss": 0.3738, "nll_loss": 0.09342585504055023, "rewards/accuracies": 1.0, "rewards/chosen": -2.388297980360221e-05, "rewards/margins": 0.19814172387123108, "rewards/rejected": -0.198165625333786, "step": 13516 }, { "epoch": 9.347856154910097, "grad_norm": 3.3105244636535645, "learning_rate": 3.6230213616105733e-06, "log_odds_chosen": 10.370229721069336, "log_odds_ratio": -0.0002457791124470532, "logits/chosen": -0.4260295629501343, "logits/rejected": -0.5149073600769043, "logps/chosen": -0.0004372486437205225, "logps/rejected": -1.9320878982543945, "loss": 0.3102, "nll_loss": 0.0775316059589386, "rewards/accuracies": 1.0, "rewards/chosen": -4.372486364445649e-05, "rewards/margins": 0.19316506385803223, "rewards/rejected": -0.19320878386497498, "step": 13517 }, { "epoch": 9.348547717842324, "grad_norm": 3.5203912258148193, "learning_rate": 3.6191793453204245e-06, "log_odds_chosen": 10.76219654083252, "log_odds_ratio": -5.4000083764549345e-05, "logits/chosen": -0.07482422888278961, "logits/rejected": -0.11013496667146683, "logps/chosen": -0.00028690596809610724, "logps/rejected": -2.247020721435547, "loss": 0.6432, "nll_loss": 0.1607947051525116, "rewards/accuracies": 1.0, "rewards/chosen": -2.869059971999377e-05, "rewards/margins": 0.22467339038848877, "rewards/rejected": -0.22470206022262573, "step": 13518 }, { "epoch": 9.349239280774551, "grad_norm": 3.769413471221924, "learning_rate": 3.615337329030275e-06, "log_odds_chosen": 10.713618278503418, "log_odds_ratio": -0.00011112882202723995, "logits/chosen": -0.492983341217041, "logits/rejected": -0.34525761008262634, "logps/chosen": -0.00021097969147376716, "logps/rejected": -1.9031600952148438, "loss": 0.5288, "nll_loss": 0.1321868896484375, "rewards/accuracies": 1.0, "rewards/chosen": -2.109797060256824e-05, "rewards/margins": 0.19029492139816284, "rewards/rejected": -0.19031602144241333, "step": 13519 }, { "epoch": 9.349930843706778, "grad_norm": 3.34236741065979, "learning_rate": 3.6114953127401264e-06, "log_odds_chosen": 12.383539199829102, "log_odds_ratio": -1.4433892829401884e-05, "logits/chosen": 0.05255540832877159, "logits/rejected": -0.03551376610994339, "logps/chosen": -6.500162271549925e-05, "logps/rejected": -2.6960530281066895, "loss": 0.3563, "nll_loss": 0.08907345682382584, "rewards/accuracies": 1.0, "rewards/chosen": -6.500162726297276e-06, "rewards/margins": 0.2695987820625305, "rewards/rejected": -0.26960527896881104, "step": 13520 }, { "epoch": 9.350622406639005, "grad_norm": 2.60010027885437, "learning_rate": 3.6076532964499767e-06, "log_odds_chosen": 9.934788703918457, "log_odds_ratio": -0.0001642591378185898, "logits/chosen": -0.22894614934921265, "logits/rejected": -0.28230470418930054, "logps/chosen": -0.000867595721501857, "logps/rejected": -1.7370736598968506, "loss": 0.2212, "nll_loss": 0.055284351110458374, "rewards/accuracies": 1.0, "rewards/chosen": -8.675957360537723e-05, "rewards/margins": 0.17362061142921448, "rewards/rejected": -0.17370736598968506, "step": 13521 }, { "epoch": 9.351313969571232, "grad_norm": 3.1682677268981934, "learning_rate": 3.603811280159828e-06, "log_odds_chosen": 10.130863189697266, "log_odds_ratio": -0.0001920114445965737, "logits/chosen": -0.4609912633895874, "logits/rejected": -0.5363480448722839, "logps/chosen": -0.0002744471130426973, "logps/rejected": -1.774971604347229, "loss": 0.274, "nll_loss": 0.06847333163022995, "rewards/accuracies": 1.0, "rewards/chosen": -2.7444712031865492e-05, "rewards/margins": 0.17746974527835846, "rewards/rejected": -0.17749716341495514, "step": 13522 }, { "epoch": 9.352005532503458, "grad_norm": 3.280946731567383, "learning_rate": 3.599969263869679e-06, "log_odds_chosen": 12.321030616760254, "log_odds_ratio": -7.711152647971176e-06, "logits/chosen": -0.28302809596061707, "logits/rejected": -0.3848038911819458, "logps/chosen": -0.0001048395351972431, "logps/rejected": -2.958585262298584, "loss": 0.3042, "nll_loss": 0.07606004178524017, "rewards/accuracies": 1.0, "rewards/chosen": -1.0483954611117952e-05, "rewards/margins": 0.29584801197052, "rewards/rejected": -0.2958585023880005, "step": 13523 }, { "epoch": 9.352697095435685, "grad_norm": 7.865887641906738, "learning_rate": 3.59612724757953e-06, "log_odds_chosen": 11.32748031616211, "log_odds_ratio": -1.6802272511995398e-05, "logits/chosen": -0.3086088001728058, "logits/rejected": -0.2242552787065506, "logps/chosen": -0.000512672879267484, "logps/rejected": -2.8091330528259277, "loss": 0.6089, "nll_loss": 0.15222682058811188, "rewards/accuracies": 1.0, "rewards/chosen": -5.1267288654344156e-05, "rewards/margins": 0.280862033367157, "rewards/rejected": -0.2809132933616638, "step": 13524 }, { "epoch": 9.353388658367912, "grad_norm": 3.4927072525024414, "learning_rate": 3.592285231289381e-06, "log_odds_chosen": 10.934383392333984, "log_odds_ratio": -0.0001778034056769684, "logits/chosen": -0.05815959721803665, "logits/rejected": -0.07613163441419601, "logps/chosen": -0.0001998421794269234, "logps/rejected": -2.3929872512817383, "loss": 0.2906, "nll_loss": 0.07262570410966873, "rewards/accuracies": 1.0, "rewards/chosen": -1.998421794269234e-05, "rewards/margins": 0.23927873373031616, "rewards/rejected": -0.2392987310886383, "step": 13525 }, { "epoch": 9.354080221300139, "grad_norm": 2.775463342666626, "learning_rate": 3.588443214999232e-06, "log_odds_chosen": 11.355659484863281, "log_odds_ratio": -0.0002898540406022221, "logits/chosen": -0.28086039423942566, "logits/rejected": -0.30784451961517334, "logps/chosen": -0.00010843494965229183, "logps/rejected": -2.1209423542022705, "loss": 0.3015, "nll_loss": 0.07533597201108932, "rewards/accuracies": 1.0, "rewards/chosen": -1.0843496056622826e-05, "rewards/margins": 0.2120833843946457, "rewards/rejected": -0.21209421753883362, "step": 13526 }, { "epoch": 9.354771784232366, "grad_norm": 3.300349235534668, "learning_rate": 3.5846011987090825e-06, "log_odds_chosen": 10.913179397583008, "log_odds_ratio": -3.2291423849528655e-05, "logits/chosen": -0.29905587434768677, "logits/rejected": -0.41569286584854126, "logps/chosen": -0.00027541533927433193, "logps/rejected": -2.266528367996216, "loss": 0.2815, "nll_loss": 0.07037490606307983, "rewards/accuracies": 1.0, "rewards/chosen": -2.754153683781624e-05, "rewards/margins": 0.22662532329559326, "rewards/rejected": -0.2266528606414795, "step": 13527 }, { "epoch": 9.355463347164592, "grad_norm": 3.9342613220214844, "learning_rate": 3.5807591824189336e-06, "log_odds_chosen": 11.960512161254883, "log_odds_ratio": -1.677579894021619e-05, "logits/chosen": 0.3055367171764374, "logits/rejected": 0.2745853662490845, "logps/chosen": -0.0001570849126437679, "logps/rejected": -2.8881330490112305, "loss": 0.4841, "nll_loss": 0.12102716416120529, "rewards/accuracies": 1.0, "rewards/chosen": -1.570849235577043e-05, "rewards/margins": 0.2887975871562958, "rewards/rejected": -0.2888132929801941, "step": 13528 }, { "epoch": 9.35615491009682, "grad_norm": 2.7532241344451904, "learning_rate": 3.576917166128785e-06, "log_odds_chosen": 10.52255630493164, "log_odds_ratio": -0.00011265225475654006, "logits/chosen": -0.18711841106414795, "logits/rejected": -0.18893063068389893, "logps/chosen": -0.0004456713213585317, "logps/rejected": -1.7828774452209473, "loss": 0.2779, "nll_loss": 0.06947155296802521, "rewards/accuracies": 1.0, "rewards/chosen": -4.4567128497874364e-05, "rewards/margins": 0.17824319005012512, "rewards/rejected": -0.17828774452209473, "step": 13529 }, { "epoch": 9.356846473029046, "grad_norm": 3.0096664428710938, "learning_rate": 3.573075149838635e-06, "log_odds_chosen": 10.857948303222656, "log_odds_ratio": -4.2508509068284184e-05, "logits/chosen": -0.3942891061306, "logits/rejected": -0.47457262873649597, "logps/chosen": -0.00019530697318259627, "logps/rejected": -1.931833028793335, "loss": 0.4284, "nll_loss": 0.10710678994655609, "rewards/accuracies": 1.0, "rewards/chosen": -1.9530696590663865e-05, "rewards/margins": 0.19316376745700836, "rewards/rejected": -0.1931833028793335, "step": 13530 }, { "epoch": 9.357538035961273, "grad_norm": 3.6303272247314453, "learning_rate": 3.5692331335484863e-06, "log_odds_chosen": 12.418657302856445, "log_odds_ratio": -9.459313332627062e-06, "logits/chosen": -0.06569956243038177, "logits/rejected": -0.25190234184265137, "logps/chosen": -5.5378652177751064e-05, "logps/rejected": -2.470158100128174, "loss": 0.5091, "nll_loss": 0.12726396322250366, "rewards/accuracies": 1.0, "rewards/chosen": -5.537865490623517e-06, "rewards/margins": 0.24701027572155, "rewards/rejected": -0.2470158189535141, "step": 13531 }, { "epoch": 9.3582295988935, "grad_norm": 2.818519115447998, "learning_rate": 3.5653911172583375e-06, "log_odds_chosen": 10.561389923095703, "log_odds_ratio": -8.231533138314262e-05, "logits/chosen": -0.40165674686431885, "logits/rejected": -0.47748011350631714, "logps/chosen": -0.00016759091522544622, "logps/rejected": -1.9250514507293701, "loss": 0.3667, "nll_loss": 0.09166901558637619, "rewards/accuracies": 1.0, "rewards/chosen": -1.6759091522544622e-05, "rewards/margins": 0.1924883872270584, "rewards/rejected": -0.1925051361322403, "step": 13532 }, { "epoch": 9.358921161825727, "grad_norm": 2.5160977840423584, "learning_rate": 3.5615491009681882e-06, "log_odds_chosen": 10.444723129272461, "log_odds_ratio": -0.00020006597333122045, "logits/chosen": -0.13105201721191406, "logits/rejected": -0.04617917537689209, "logps/chosen": -0.0007053934969007969, "logps/rejected": -1.9502530097961426, "loss": 0.2094, "nll_loss": 0.05233754962682724, "rewards/accuracies": 1.0, "rewards/chosen": -7.053935405565426e-05, "rewards/margins": 0.1949547529220581, "rewards/rejected": -0.19502530992031097, "step": 13533 }, { "epoch": 9.359612724757953, "grad_norm": 3.0994884967803955, "learning_rate": 3.5577070846780394e-06, "log_odds_chosen": 10.954906463623047, "log_odds_ratio": -4.9613285227678716e-05, "logits/chosen": -0.5060032606124878, "logits/rejected": -0.5835158824920654, "logps/chosen": -0.00023126085579860955, "logps/rejected": -2.014693260192871, "loss": 0.3486, "nll_loss": 0.08713746815919876, "rewards/accuracies": 1.0, "rewards/chosen": -2.3126085579860955e-05, "rewards/margins": 0.20144620537757874, "rewards/rejected": -0.2014693170785904, "step": 13534 }, { "epoch": 9.36030428769018, "grad_norm": 3.6979379653930664, "learning_rate": 3.5538650683878906e-06, "log_odds_chosen": 10.76142406463623, "log_odds_ratio": -8.735790470382199e-05, "logits/chosen": -0.24934163689613342, "logits/rejected": -0.2001289278268814, "logps/chosen": -0.0001878843322629109, "logps/rejected": -2.0889670848846436, "loss": 0.524, "nll_loss": 0.13099606335163116, "rewards/accuracies": 1.0, "rewards/chosen": -1.8788434317684732e-05, "rewards/margins": 0.20887792110443115, "rewards/rejected": -0.2088966965675354, "step": 13535 }, { "epoch": 9.360995850622407, "grad_norm": 3.5815634727478027, "learning_rate": 3.550023052097741e-06, "log_odds_chosen": 11.135117530822754, "log_odds_ratio": -5.961426722933538e-05, "logits/chosen": -0.32283806800842285, "logits/rejected": -0.3588043749332428, "logps/chosen": -0.0002041382249444723, "logps/rejected": -2.0124354362487793, "loss": 0.4955, "nll_loss": 0.12385807931423187, "rewards/accuracies": 1.0, "rewards/chosen": -2.041382140305359e-05, "rewards/margins": 0.20122313499450684, "rewards/rejected": -0.2012435495853424, "step": 13536 }, { "epoch": 9.361687413554634, "grad_norm": 3.3385045528411865, "learning_rate": 3.546181035807592e-06, "log_odds_chosen": 10.644510269165039, "log_odds_ratio": -4.055405588587746e-05, "logits/chosen": 0.35951119661331177, "logits/rejected": 0.1921553611755371, "logps/chosen": -0.00018380882102064788, "logps/rejected": -1.9298346042633057, "loss": 0.2987, "nll_loss": 0.07466232776641846, "rewards/accuracies": 1.0, "rewards/chosen": -1.838088246586267e-05, "rewards/margins": 0.1929650753736496, "rewards/rejected": -0.192983478307724, "step": 13537 }, { "epoch": 9.36237897648686, "grad_norm": 2.018005847930908, "learning_rate": 3.5423390195174424e-06, "log_odds_chosen": 10.15301513671875, "log_odds_ratio": -0.00021057360572740436, "logits/chosen": -0.2207718789577484, "logits/rejected": -0.31473612785339355, "logps/chosen": -0.00041511974995955825, "logps/rejected": -2.0513081550598145, "loss": 0.2247, "nll_loss": 0.056156452745199203, "rewards/accuracies": 1.0, "rewards/chosen": -4.151197936153039e-05, "rewards/margins": 0.2050892859697342, "rewards/rejected": -0.20513081550598145, "step": 13538 }, { "epoch": 9.363070539419088, "grad_norm": 4.0689897537231445, "learning_rate": 3.5384970032272936e-06, "log_odds_chosen": 9.644641876220703, "log_odds_ratio": -0.0006806966848671436, "logits/chosen": -0.6376623511314392, "logits/rejected": -0.7098978161811829, "logps/chosen": -0.0008343122899532318, "logps/rejected": -1.6593842506408691, "loss": 0.3388, "nll_loss": 0.08463947474956512, "rewards/accuracies": 1.0, "rewards/chosen": -8.343123772647232e-05, "rewards/margins": 0.16585499048233032, "rewards/rejected": -0.16593842208385468, "step": 13539 }, { "epoch": 9.363762102351314, "grad_norm": 3.441516876220703, "learning_rate": 3.5346549869371448e-06, "log_odds_chosen": 12.501133918762207, "log_odds_ratio": -1.264120055566309e-05, "logits/chosen": 0.14218124747276306, "logits/rejected": 0.021220367401838303, "logps/chosen": -0.00011060711403843015, "logps/rejected": -3.2021191120147705, "loss": 0.32, "nll_loss": 0.07998687773942947, "rewards/accuracies": 1.0, "rewards/chosen": -1.1060712495236658e-05, "rewards/margins": 0.3202008605003357, "rewards/rejected": -0.32021191716194153, "step": 13540 }, { "epoch": 9.364453665283541, "grad_norm": 3.1656370162963867, "learning_rate": 3.5308129706469955e-06, "log_odds_chosen": 9.825788497924805, "log_odds_ratio": -0.0001689522177912295, "logits/chosen": -0.1688910722732544, "logits/rejected": -0.07189778238534927, "logps/chosen": -0.0009861242724582553, "logps/rejected": -1.7967033386230469, "loss": 0.2544, "nll_loss": 0.06358574330806732, "rewards/accuracies": 1.0, "rewards/chosen": -9.861243597697467e-05, "rewards/margins": 0.1795717179775238, "rewards/rejected": -0.1796703338623047, "step": 13541 }, { "epoch": 9.365145228215768, "grad_norm": 3.3450043201446533, "learning_rate": 3.5269709543568467e-06, "log_odds_chosen": 12.491061210632324, "log_odds_ratio": -6.405562089639716e-06, "logits/chosen": -0.25810593366622925, "logits/rejected": -0.314283549785614, "logps/chosen": -4.827147495234385e-05, "logps/rejected": -2.621676445007324, "loss": 0.4573, "nll_loss": 0.11432111263275146, "rewards/accuracies": 1.0, "rewards/chosen": -4.827148131880676e-06, "rewards/margins": 0.26216280460357666, "rewards/rejected": -0.26216763257980347, "step": 13542 }, { "epoch": 9.365836791147995, "grad_norm": 2.7860682010650635, "learning_rate": 3.523128938066698e-06, "log_odds_chosen": 11.803909301757812, "log_odds_ratio": -1.233218517882051e-05, "logits/chosen": -0.45608076453208923, "logits/rejected": -0.5399580597877502, "logps/chosen": -5.7952653151005507e-05, "logps/rejected": -1.9664535522460938, "loss": 0.3966, "nll_loss": 0.09915214031934738, "rewards/accuracies": 1.0, "rewards/chosen": -5.79526476940373e-06, "rewards/margins": 0.19663956761360168, "rewards/rejected": -0.1966453492641449, "step": 13543 }, { "epoch": 9.366528354080222, "grad_norm": 13.490571975708008, "learning_rate": 3.519286921776548e-06, "log_odds_chosen": 10.731107711791992, "log_odds_ratio": -4.895938764093444e-05, "logits/chosen": -0.1185360997915268, "logits/rejected": -0.2674499750137329, "logps/chosen": -0.00047680726856924593, "logps/rejected": -2.3153059482574463, "loss": 0.4476, "nll_loss": 0.11190719902515411, "rewards/accuracies": 1.0, "rewards/chosen": -4.768072540173307e-05, "rewards/margins": 0.23148292303085327, "rewards/rejected": -0.23153060674667358, "step": 13544 }, { "epoch": 9.367219917012449, "grad_norm": 3.817535638809204, "learning_rate": 3.5154449054863993e-06, "log_odds_chosen": 10.996612548828125, "log_odds_ratio": -0.0001471824652981013, "logits/chosen": 0.03633652627468109, "logits/rejected": -0.11121993511915207, "logps/chosen": -0.00016094991588033736, "logps/rejected": -2.0416789054870605, "loss": 0.5539, "nll_loss": 0.1384541392326355, "rewards/accuracies": 1.0, "rewards/chosen": -1.609499304322526e-05, "rewards/margins": 0.20415179431438446, "rewards/rejected": -0.20416787266731262, "step": 13545 }, { "epoch": 9.367911479944675, "grad_norm": 2.8662376403808594, "learning_rate": 3.5116028891962505e-06, "log_odds_chosen": 10.102569580078125, "log_odds_ratio": -8.055663056438789e-05, "logits/chosen": -0.33110880851745605, "logits/rejected": -0.3435867130756378, "logps/chosen": -0.00019017732120119035, "logps/rejected": -1.3068668842315674, "loss": 0.2805, "nll_loss": 0.07012133300304413, "rewards/accuracies": 1.0, "rewards/chosen": -1.901773430290632e-05, "rewards/margins": 0.13066768646240234, "rewards/rejected": -0.1306867003440857, "step": 13546 }, { "epoch": 9.368603042876902, "grad_norm": 4.5606770515441895, "learning_rate": 3.5077608729061013e-06, "log_odds_chosen": 10.703825950622559, "log_odds_ratio": -0.00022275917581282556, "logits/chosen": -0.19523394107818604, "logits/rejected": -0.2622426152229309, "logps/chosen": -0.0007412299746647477, "logps/rejected": -2.1645567417144775, "loss": 0.5679, "nll_loss": 0.14194399118423462, "rewards/accuracies": 1.0, "rewards/chosen": -7.412301056319848e-05, "rewards/margins": 0.21638154983520508, "rewards/rejected": -0.21645568311214447, "step": 13547 }, { "epoch": 9.369294605809129, "grad_norm": 2.658365249633789, "learning_rate": 3.5039188566159524e-06, "log_odds_chosen": 11.70455551147461, "log_odds_ratio": -2.6133679057238623e-05, "logits/chosen": -0.3863418698310852, "logits/rejected": -0.41382431983947754, "logps/chosen": -0.00024077700800262392, "logps/rejected": -2.854800224304199, "loss": 0.3191, "nll_loss": 0.07977981120347977, "rewards/accuracies": 1.0, "rewards/chosen": -2.407770080026239e-05, "rewards/margins": 0.28545594215393066, "rewards/rejected": -0.2854800224304199, "step": 13548 }, { "epoch": 9.369986168741356, "grad_norm": 3.614183187484741, "learning_rate": 3.5000768403258036e-06, "log_odds_chosen": 10.417539596557617, "log_odds_ratio": -0.0005081373383291066, "logits/chosen": -0.12346908450126648, "logits/rejected": -0.2217111736536026, "logps/chosen": -0.0017699197633191943, "logps/rejected": -2.1381521224975586, "loss": 0.3942, "nll_loss": 0.09849061071872711, "rewards/accuracies": 1.0, "rewards/chosen": -0.00017699197633191943, "rewards/margins": 0.21363824605941772, "rewards/rejected": -0.21381521224975586, "step": 13549 }, { "epoch": 9.370677731673583, "grad_norm": 2.7907590866088867, "learning_rate": 3.496234824035654e-06, "log_odds_chosen": 10.472977638244629, "log_odds_ratio": -0.00026426269323565066, "logits/chosen": -0.19486777484416962, "logits/rejected": -0.1657772958278656, "logps/chosen": -0.0003529631649143994, "logps/rejected": -1.6553034782409668, "loss": 0.3223, "nll_loss": 0.08055949956178665, "rewards/accuracies": 1.0, "rewards/chosen": -3.529631794663146e-05, "rewards/margins": 0.16549506783485413, "rewards/rejected": -0.16553035378456116, "step": 13550 }, { "epoch": 9.37136929460581, "grad_norm": 3.4401087760925293, "learning_rate": 3.492392807745505e-06, "log_odds_chosen": 11.224440574645996, "log_odds_ratio": -9.510615927865729e-05, "logits/chosen": -0.26112285256385803, "logits/rejected": -0.26466330885887146, "logps/chosen": -0.0003577464958652854, "logps/rejected": -2.475301742553711, "loss": 0.3851, "nll_loss": 0.09625741094350815, "rewards/accuracies": 1.0, "rewards/chosen": -3.5774646676145494e-05, "rewards/margins": 0.2474943995475769, "rewards/rejected": -0.24753017723560333, "step": 13551 }, { "epoch": 9.372060857538036, "grad_norm": 3.604793071746826, "learning_rate": 3.4885507914553563e-06, "log_odds_chosen": 11.031092643737793, "log_odds_ratio": -0.0006941432366147637, "logits/chosen": -0.0888419970870018, "logits/rejected": -0.19537392258644104, "logps/chosen": -0.0005400891532190144, "logps/rejected": -2.552731990814209, "loss": 0.8016, "nll_loss": 0.20033778250217438, "rewards/accuracies": 1.0, "rewards/chosen": -5.4008916777092963e-05, "rewards/margins": 0.2552191913127899, "rewards/rejected": -0.2552731931209564, "step": 13552 }, { "epoch": 9.372752420470263, "grad_norm": 2.4608335494995117, "learning_rate": 3.4847087751652066e-06, "log_odds_chosen": 11.443750381469727, "log_odds_ratio": -1.4160999853629619e-05, "logits/chosen": -0.269876092672348, "logits/rejected": -0.3958069086074829, "logps/chosen": -5.696548760170117e-05, "logps/rejected": -1.4379843473434448, "loss": 0.2563, "nll_loss": 0.06408420950174332, "rewards/accuracies": 1.0, "rewards/chosen": -5.696549123967998e-06, "rewards/margins": 0.14379273355007172, "rewards/rejected": -0.14379842579364777, "step": 13553 }, { "epoch": 9.37344398340249, "grad_norm": 4.183465480804443, "learning_rate": 3.4808667588750578e-06, "log_odds_chosen": 10.022726058959961, "log_odds_ratio": -0.0005112159997224808, "logits/chosen": -0.361375629901886, "logits/rejected": -0.33961185812950134, "logps/chosen": -0.0018962868489325047, "logps/rejected": -2.9116106033325195, "loss": 0.3587, "nll_loss": 0.0896359458565712, "rewards/accuracies": 1.0, "rewards/chosen": -0.00018962868489325047, "rewards/margins": 0.29097142815589905, "rewards/rejected": -0.29116106033325195, "step": 13554 }, { "epoch": 9.374135546334717, "grad_norm": 3.236473798751831, "learning_rate": 3.4770247425849085e-06, "log_odds_chosen": 11.414098739624023, "log_odds_ratio": -3.5743047192227095e-05, "logits/chosen": -0.10795624554157257, "logits/rejected": -0.2149205058813095, "logps/chosen": -0.00015265934052877128, "logps/rejected": -2.235002279281616, "loss": 0.4326, "nll_loss": 0.108148954808712, "rewards/accuracies": 1.0, "rewards/chosen": -1.526593587186653e-05, "rewards/margins": 0.22348496317863464, "rewards/rejected": -0.22350022196769714, "step": 13555 }, { "epoch": 9.374827109266944, "grad_norm": 2.6837217807769775, "learning_rate": 3.4731827262947597e-06, "log_odds_chosen": 11.048116683959961, "log_odds_ratio": -7.135642954381183e-05, "logits/chosen": -0.4107449948787689, "logits/rejected": -0.4265679717063904, "logps/chosen": -0.00013635572395287454, "logps/rejected": -1.8748329877853394, "loss": 0.304, "nll_loss": 0.0759982243180275, "rewards/accuracies": 1.0, "rewards/chosen": -1.3635572940984275e-05, "rewards/margins": 0.18746967613697052, "rewards/rejected": -0.1874833106994629, "step": 13556 }, { "epoch": 9.37551867219917, "grad_norm": 3.1047263145446777, "learning_rate": 3.469340710004611e-06, "log_odds_chosen": 12.321937561035156, "log_odds_ratio": -2.3595224774908274e-05, "logits/chosen": -0.13789021968841553, "logits/rejected": -0.3051018714904785, "logps/chosen": -0.00010697266407078132, "logps/rejected": -3.1257381439208984, "loss": 0.3415, "nll_loss": 0.08536285907030106, "rewards/accuracies": 1.0, "rewards/chosen": -1.069726567948237e-05, "rewards/margins": 0.31256312131881714, "rewards/rejected": -0.31257379055023193, "step": 13557 }, { "epoch": 9.376210235131397, "grad_norm": 2.6061549186706543, "learning_rate": 3.465498693714461e-06, "log_odds_chosen": 10.358039855957031, "log_odds_ratio": -0.00012019602581858635, "logits/chosen": -0.07744506001472473, "logits/rejected": -0.11549359560012817, "logps/chosen": -0.0004092513117939234, "logps/rejected": -1.9624541997909546, "loss": 0.2435, "nll_loss": 0.060864921659231186, "rewards/accuracies": 1.0, "rewards/chosen": -4.092513336217962e-05, "rewards/margins": 0.1962045133113861, "rewards/rejected": -0.19624543190002441, "step": 13558 }, { "epoch": 9.376901798063624, "grad_norm": 3.5974655151367188, "learning_rate": 3.4616566774243124e-06, "log_odds_chosen": 10.450204849243164, "log_odds_ratio": -0.00016834316193126142, "logits/chosen": -0.2307269126176834, "logits/rejected": -0.2927913963794708, "logps/chosen": -0.00027439341647550464, "logps/rejected": -1.9489752054214478, "loss": 0.2947, "nll_loss": 0.07364586740732193, "rewards/accuracies": 1.0, "rewards/chosen": -2.7439338737167418e-05, "rewards/margins": 0.19487008452415466, "rewards/rejected": -0.19489750266075134, "step": 13559 }, { "epoch": 9.377593360995851, "grad_norm": 3.8792929649353027, "learning_rate": 3.4578146611341635e-06, "log_odds_chosen": 11.759347915649414, "log_odds_ratio": -1.827812593546696e-05, "logits/chosen": -0.21874378621578217, "logits/rejected": -0.2008637636899948, "logps/chosen": -0.00014112150529399514, "logps/rejected": -2.4289422035217285, "loss": 0.4251, "nll_loss": 0.10627898573875427, "rewards/accuracies": 1.0, "rewards/chosen": -1.4112150893197395e-05, "rewards/margins": 0.24288010597229004, "rewards/rejected": -0.2428942322731018, "step": 13560 }, { "epoch": 9.378284923928078, "grad_norm": 2.9272608757019043, "learning_rate": 3.453972644844014e-06, "log_odds_chosen": 10.84752082824707, "log_odds_ratio": -5.029505700804293e-05, "logits/chosen": -0.13805429637432098, "logits/rejected": -0.06397004425525665, "logps/chosen": -0.00026461438392288983, "logps/rejected": -2.0349204540252686, "loss": 0.4125, "nll_loss": 0.10310757160186768, "rewards/accuracies": 1.0, "rewards/chosen": -2.646143730089534e-05, "rewards/margins": 0.20346559584140778, "rewards/rejected": -0.20349204540252686, "step": 13561 }, { "epoch": 9.378976486860305, "grad_norm": 3.037715435028076, "learning_rate": 3.450130628553865e-06, "log_odds_chosen": 9.759491920471191, "log_odds_ratio": -0.0010099021019414067, "logits/chosen": -0.31357407569885254, "logits/rejected": -0.20637246966362, "logps/chosen": -0.0014631549129262567, "logps/rejected": -1.0898208618164062, "loss": 0.3014, "nll_loss": 0.0752379521727562, "rewards/accuracies": 1.0, "rewards/chosen": -0.0001463155058445409, "rewards/margins": 0.10883576422929764, "rewards/rejected": -0.10898207128047943, "step": 13562 }, { "epoch": 9.379668049792532, "grad_norm": 3.0630035400390625, "learning_rate": 3.4462886122637162e-06, "log_odds_chosen": 10.501922607421875, "log_odds_ratio": -6.71866800985299e-05, "logits/chosen": -0.47390761971473694, "logits/rejected": -0.532874345779419, "logps/chosen": -0.00016098772175610065, "logps/rejected": -1.3961749076843262, "loss": 0.339, "nll_loss": 0.08475200831890106, "rewards/accuracies": 1.0, "rewards/chosen": -1.6098772903205827e-05, "rewards/margins": 0.13960139453411102, "rewards/rejected": -0.13961750268936157, "step": 13563 }, { "epoch": 9.380359612724758, "grad_norm": 2.68406081199646, "learning_rate": 3.442446595973567e-06, "log_odds_chosen": 10.071020126342773, "log_odds_ratio": -6.841091817477718e-05, "logits/chosen": -0.2392948567867279, "logits/rejected": -0.16483944654464722, "logps/chosen": -0.0002607560600154102, "logps/rejected": -1.6807191371917725, "loss": 0.2379, "nll_loss": 0.05947023630142212, "rewards/accuracies": 1.0, "rewards/chosen": -2.6075604182551615e-05, "rewards/margins": 0.16804584860801697, "rewards/rejected": -0.168071910738945, "step": 13564 }, { "epoch": 9.381051175656985, "grad_norm": 3.561326503753662, "learning_rate": 3.438604579683418e-06, "log_odds_chosen": 10.621723175048828, "log_odds_ratio": -6.102824772824533e-05, "logits/chosen": -0.6733594536781311, "logits/rejected": -0.7504304647445679, "logps/chosen": -0.0002749867853708565, "logps/rejected": -1.9878876209259033, "loss": 0.4111, "nll_loss": 0.10276590287685394, "rewards/accuracies": 1.0, "rewards/chosen": -2.749867780948989e-05, "rewards/margins": 0.19876126945018768, "rewards/rejected": -0.19878877699375153, "step": 13565 }, { "epoch": 9.381742738589212, "grad_norm": 3.828990936279297, "learning_rate": 3.4347625633932693e-06, "log_odds_chosen": 10.754388809204102, "log_odds_ratio": -6.629389827139676e-05, "logits/chosen": 0.1312224566936493, "logits/rejected": 0.031041786074638367, "logps/chosen": -0.00022781691222917289, "logps/rejected": -2.1744894981384277, "loss": 0.443, "nll_loss": 0.11074355244636536, "rewards/accuracies": 1.0, "rewards/chosen": -2.2781690859119408e-05, "rewards/margins": 0.21742618083953857, "rewards/rejected": -0.21744894981384277, "step": 13566 }, { "epoch": 9.382434301521439, "grad_norm": 4.1905012130737305, "learning_rate": 3.4309205471031196e-06, "log_odds_chosen": 11.427661895751953, "log_odds_ratio": -2.659208985278383e-05, "logits/chosen": -0.31396639347076416, "logits/rejected": -0.3941642940044403, "logps/chosen": -0.00021044274035375565, "logps/rejected": -2.6817564964294434, "loss": 0.3957, "nll_loss": 0.09892689436674118, "rewards/accuracies": 1.0, "rewards/chosen": -2.1044272216386162e-05, "rewards/margins": 0.2681546211242676, "rewards/rejected": -0.2681756615638733, "step": 13567 }, { "epoch": 9.383125864453666, "grad_norm": 3.7993035316467285, "learning_rate": 3.427078530812971e-06, "log_odds_chosen": 10.846572875976562, "log_odds_ratio": -3.981238114647567e-05, "logits/chosen": -0.326119065284729, "logits/rejected": -0.3857240378856659, "logps/chosen": -0.0003091458638664335, "logps/rejected": -1.9830150604248047, "loss": 0.3483, "nll_loss": 0.08706822991371155, "rewards/accuracies": 1.0, "rewards/chosen": -3.091458711423911e-05, "rewards/margins": 0.19827060401439667, "rewards/rejected": -0.1983015239238739, "step": 13568 }, { "epoch": 9.383817427385893, "grad_norm": 3.793304443359375, "learning_rate": 3.423236514522822e-06, "log_odds_chosen": 11.753762245178223, "log_odds_ratio": -0.00012055758270435035, "logits/chosen": -0.368549644947052, "logits/rejected": -0.4517800211906433, "logps/chosen": -0.00020971563935745507, "logps/rejected": -2.952338457107544, "loss": 0.4279, "nll_loss": 0.10697045177221298, "rewards/accuracies": 1.0, "rewards/chosen": -2.097156539093703e-05, "rewards/margins": 0.29521286487579346, "rewards/rejected": -0.2952338457107544, "step": 13569 }, { "epoch": 9.38450899031812, "grad_norm": 3.0754928588867188, "learning_rate": 3.4193944982326727e-06, "log_odds_chosen": 11.249407768249512, "log_odds_ratio": -6.009052231092937e-05, "logits/chosen": -0.5221173763275146, "logits/rejected": -0.5405316352844238, "logps/chosen": -0.0003820542187895626, "logps/rejected": -2.40143084526062, "loss": 0.3792, "nll_loss": 0.09480046480894089, "rewards/accuracies": 1.0, "rewards/chosen": -3.8205420423764735e-05, "rewards/margins": 0.24010486900806427, "rewards/rejected": -0.2401430904865265, "step": 13570 }, { "epoch": 9.385200553250346, "grad_norm": 3.1722779273986816, "learning_rate": 3.415552481942524e-06, "log_odds_chosen": 11.105411529541016, "log_odds_ratio": -4.717747287941165e-05, "logits/chosen": -0.8160011768341064, "logits/rejected": -0.8017624616622925, "logps/chosen": -0.0002255855652038008, "logps/rejected": -2.233698844909668, "loss": 0.4063, "nll_loss": 0.10156512260437012, "rewards/accuracies": 1.0, "rewards/chosen": -2.255855724797584e-05, "rewards/margins": 0.22334732115268707, "rewards/rejected": -0.22336986660957336, "step": 13571 }, { "epoch": 9.385892116182573, "grad_norm": 3.226372241973877, "learning_rate": 3.4117104656523742e-06, "log_odds_chosen": 10.567256927490234, "log_odds_ratio": -0.00023033078468870372, "logits/chosen": -0.3000712990760803, "logits/rejected": -0.2451561689376831, "logps/chosen": -0.0003794012009166181, "logps/rejected": -2.2354483604431152, "loss": 0.4107, "nll_loss": 0.1026480570435524, "rewards/accuracies": 1.0, "rewards/chosen": -3.794012445723638e-05, "rewards/margins": 0.223506897687912, "rewards/rejected": -0.22354485094547272, "step": 13572 }, { "epoch": 9.3865836791148, "grad_norm": 3.852189064025879, "learning_rate": 3.4078684493622254e-06, "log_odds_chosen": 11.41202163696289, "log_odds_ratio": -1.611007610335946e-05, "logits/chosen": -0.08447438478469849, "logits/rejected": -0.17714518308639526, "logps/chosen": -0.0001326056953985244, "logps/rejected": -2.1824405193328857, "loss": 0.4716, "nll_loss": 0.11790943145751953, "rewards/accuracies": 1.0, "rewards/chosen": -1.3260570085549261e-05, "rewards/margins": 0.21823078393936157, "rewards/rejected": -0.2182440459728241, "step": 13573 }, { "epoch": 9.387275242047027, "grad_norm": 4.368622303009033, "learning_rate": 3.4040264330720766e-06, "log_odds_chosen": 10.459915161132812, "log_odds_ratio": -8.878041990101337e-05, "logits/chosen": -0.2901434898376465, "logits/rejected": -0.3067079782485962, "logps/chosen": -0.00019604206318035722, "logps/rejected": -1.8046751022338867, "loss": 0.3493, "nll_loss": 0.08731767535209656, "rewards/accuracies": 1.0, "rewards/chosen": -1.9604207409429364e-05, "rewards/margins": 0.18044789135456085, "rewards/rejected": -0.18046751618385315, "step": 13574 }, { "epoch": 9.387966804979254, "grad_norm": 2.562063694000244, "learning_rate": 3.400184416781927e-06, "log_odds_chosen": 11.21015453338623, "log_odds_ratio": -6.06346657150425e-05, "logits/chosen": -0.2250322699546814, "logits/rejected": -0.30319520831108093, "logps/chosen": -0.0002839433145709336, "logps/rejected": -2.7567548751831055, "loss": 0.2747, "nll_loss": 0.0686735287308693, "rewards/accuracies": 1.0, "rewards/chosen": -2.8394333639880642e-05, "rewards/margins": 0.27564704418182373, "rewards/rejected": -0.2756754755973816, "step": 13575 }, { "epoch": 9.38865836791148, "grad_norm": 3.084099531173706, "learning_rate": 3.396342400491778e-06, "log_odds_chosen": 10.3840913772583, "log_odds_ratio": -0.00026426592376083136, "logits/chosen": -0.00226283585652709, "logits/rejected": -0.09975391626358032, "logps/chosen": -0.00034463696647435427, "logps/rejected": -2.0437979698181152, "loss": 0.3703, "nll_loss": 0.09254927933216095, "rewards/accuracies": 1.0, "rewards/chosen": -3.446369737503119e-05, "rewards/margins": 0.20434533059597015, "rewards/rejected": -0.20437979698181152, "step": 13576 }, { "epoch": 9.389349930843707, "grad_norm": 4.571847438812256, "learning_rate": 3.3925003842016292e-06, "log_odds_chosen": 10.668195724487305, "log_odds_ratio": -9.47575899772346e-05, "logits/chosen": -0.3503793478012085, "logits/rejected": -0.5184992551803589, "logps/chosen": -0.00021498440764844418, "logps/rejected": -2.1632885932922363, "loss": 0.543, "nll_loss": 0.13575081527233124, "rewards/accuracies": 1.0, "rewards/chosen": -2.1498439309652895e-05, "rewards/margins": 0.2163073569536209, "rewards/rejected": -0.21632885932922363, "step": 13577 }, { "epoch": 9.390041493775934, "grad_norm": 3.6178324222564697, "learning_rate": 3.38865836791148e-06, "log_odds_chosen": 11.287225723266602, "log_odds_ratio": -4.2001665860880166e-05, "logits/chosen": -0.2348712980747223, "logits/rejected": -0.3508765697479248, "logps/chosen": -9.699821384856477e-05, "logps/rejected": -1.6754095554351807, "loss": 0.3395, "nll_loss": 0.08488013595342636, "rewards/accuracies": 1.0, "rewards/chosen": -9.699820111563895e-06, "rewards/margins": 0.16753125190734863, "rewards/rejected": -0.16754095256328583, "step": 13578 }, { "epoch": 9.39073305670816, "grad_norm": 2.393125295639038, "learning_rate": 3.384816351621331e-06, "log_odds_chosen": 10.588972091674805, "log_odds_ratio": -6.092490002629347e-05, "logits/chosen": -0.5662620663642883, "logits/rejected": -0.5864916443824768, "logps/chosen": -0.0003362030256539583, "logps/rejected": -1.9976005554199219, "loss": 0.3772, "nll_loss": 0.09428730607032776, "rewards/accuracies": 1.0, "rewards/chosen": -3.3620304748183116e-05, "rewards/margins": 0.1997264325618744, "rewards/rejected": -0.1997600644826889, "step": 13579 }, { "epoch": 9.391424619640388, "grad_norm": 3.167736053466797, "learning_rate": 3.3809743353311823e-06, "log_odds_chosen": 11.302907943725586, "log_odds_ratio": -2.863583722501062e-05, "logits/chosen": -0.6793009042739868, "logits/rejected": -0.5627678632736206, "logps/chosen": -0.0001633332867641002, "logps/rejected": -2.2593603134155273, "loss": 0.3868, "nll_loss": 0.09669610857963562, "rewards/accuracies": 1.0, "rewards/chosen": -1.6333327948814258e-05, "rewards/margins": 0.2259196937084198, "rewards/rejected": -0.22593602538108826, "step": 13580 }, { "epoch": 9.392116182572614, "grad_norm": 3.3908193111419678, "learning_rate": 3.3771323190410327e-06, "log_odds_chosen": 11.220902442932129, "log_odds_ratio": -0.00010893019498325884, "logits/chosen": 0.024650298058986664, "logits/rejected": -0.0366368368268013, "logps/chosen": -0.0008727542590349913, "logps/rejected": -2.7380731105804443, "loss": 0.4389, "nll_loss": 0.10972367227077484, "rewards/accuracies": 1.0, "rewards/chosen": -8.72754244483076e-05, "rewards/margins": 0.27372005581855774, "rewards/rejected": -0.2738073468208313, "step": 13581 }, { "epoch": 9.392807745504841, "grad_norm": 2.68562650680542, "learning_rate": 3.373290302750884e-06, "log_odds_chosen": 10.752114295959473, "log_odds_ratio": -5.1588660426205024e-05, "logits/chosen": -0.15629924833774567, "logits/rejected": -0.14297765493392944, "logps/chosen": -0.0006921213353052735, "logps/rejected": -2.4885218143463135, "loss": 0.2704, "nll_loss": 0.06759613752365112, "rewards/accuracies": 1.0, "rewards/chosen": -6.921213935129344e-05, "rewards/margins": 0.24878299236297607, "rewards/rejected": -0.2488521933555603, "step": 13582 }, { "epoch": 9.393499308437068, "grad_norm": 4.6912150382995605, "learning_rate": 3.369448286460735e-06, "log_odds_chosen": 11.584748268127441, "log_odds_ratio": -0.00010558907524682581, "logits/chosen": -0.40621644258499146, "logits/rejected": -0.3990243673324585, "logps/chosen": -0.00022080856433603913, "logps/rejected": -2.1555638313293457, "loss": 0.5407, "nll_loss": 0.1351681500673294, "rewards/accuracies": 1.0, "rewards/chosen": -2.2080857888795435e-05, "rewards/margins": 0.2155342996120453, "rewards/rejected": -0.21555638313293457, "step": 13583 }, { "epoch": 9.394190871369295, "grad_norm": 4.056237697601318, "learning_rate": 3.3656062701705853e-06, "log_odds_chosen": 10.888897895812988, "log_odds_ratio": -0.00024027469044085592, "logits/chosen": -0.4823153614997864, "logits/rejected": -0.48899656534194946, "logps/chosen": -0.00022984632232692093, "logps/rejected": -2.356065034866333, "loss": 0.4007, "nll_loss": 0.10016251355409622, "rewards/accuracies": 1.0, "rewards/chosen": -2.298463004990481e-05, "rewards/margins": 0.23558352887630463, "rewards/rejected": -0.23560652136802673, "step": 13584 }, { "epoch": 9.394882434301522, "grad_norm": 3.921373128890991, "learning_rate": 3.3617642538804365e-06, "log_odds_chosen": 12.349092483520508, "log_odds_ratio": -8.765801794652361e-06, "logits/chosen": 0.1655922532081604, "logits/rejected": 0.11520100384950638, "logps/chosen": -0.0002407803403912112, "logps/rejected": -3.638532876968384, "loss": 0.3757, "nll_loss": 0.09393280744552612, "rewards/accuracies": 1.0, "rewards/chosen": -2.4078035494312644e-05, "rewards/margins": 0.36382919549942017, "rewards/rejected": -0.3638532757759094, "step": 13585 }, { "epoch": 9.395573997233749, "grad_norm": 3.0867762565612793, "learning_rate": 3.3579222375902877e-06, "log_odds_chosen": 10.678717613220215, "log_odds_ratio": -0.0002471938787493855, "logits/chosen": -0.2576814889907837, "logits/rejected": -0.2743965685367584, "logps/chosen": -0.00034111557761207223, "logps/rejected": -2.0099055767059326, "loss": 0.3935, "nll_loss": 0.09835696220397949, "rewards/accuracies": 1.0, "rewards/chosen": -3.411155557841994e-05, "rewards/margins": 0.20095646381378174, "rewards/rejected": -0.20099057257175446, "step": 13586 }, { "epoch": 9.396265560165975, "grad_norm": 3.657719612121582, "learning_rate": 3.3540802213001384e-06, "log_odds_chosen": 10.942753791809082, "log_odds_ratio": -0.00029641768196597695, "logits/chosen": -0.25797197222709656, "logits/rejected": -0.23642107844352722, "logps/chosen": -0.00017646155902184546, "logps/rejected": -2.24796462059021, "loss": 0.2469, "nll_loss": 0.0617036335170269, "rewards/accuracies": 1.0, "rewards/chosen": -1.7646154446993023e-05, "rewards/margins": 0.22477883100509644, "rewards/rejected": -0.22479647397994995, "step": 13587 }, { "epoch": 9.396957123098202, "grad_norm": 4.464614391326904, "learning_rate": 3.3502382050099896e-06, "log_odds_chosen": 10.541181564331055, "log_odds_ratio": -0.00013117294292896986, "logits/chosen": -0.20644135773181915, "logits/rejected": -0.10292874276638031, "logps/chosen": -0.0005109063349664211, "logps/rejected": -2.394510269165039, "loss": 0.8101, "nll_loss": 0.20250877737998962, "rewards/accuracies": 1.0, "rewards/chosen": -5.10906356794294e-05, "rewards/margins": 0.23939993977546692, "rewards/rejected": -0.23945102095603943, "step": 13588 }, { "epoch": 9.39764868603043, "grad_norm": 2.5152103900909424, "learning_rate": 3.34639618871984e-06, "log_odds_chosen": 10.59862232208252, "log_odds_ratio": -9.659049101173878e-05, "logits/chosen": -0.029881253838539124, "logits/rejected": -0.1297704428434372, "logps/chosen": -0.00021189470135141164, "logps/rejected": -1.72607421875, "loss": 0.2238, "nll_loss": 0.05594668164849281, "rewards/accuracies": 1.0, "rewards/chosen": -2.1189471226534806e-05, "rewards/margins": 0.17258621752262115, "rewards/rejected": -0.1726074069738388, "step": 13589 }, { "epoch": 9.398340248962656, "grad_norm": 2.9175498485565186, "learning_rate": 3.342554172429691e-06, "log_odds_chosen": 10.26060676574707, "log_odds_ratio": -0.0002207197976531461, "logits/chosen": 0.13548356294631958, "logits/rejected": 0.15024137496948242, "logps/chosen": -0.0009417575201950967, "logps/rejected": -1.7678678035736084, "loss": 0.3158, "nll_loss": 0.07892054319381714, "rewards/accuracies": 1.0, "rewards/chosen": -9.417576075065881e-05, "rewards/margins": 0.17669261991977692, "rewards/rejected": -0.17678678035736084, "step": 13590 }, { "epoch": 9.399031811894883, "grad_norm": 3.9631402492523193, "learning_rate": 3.3387121561395423e-06, "log_odds_chosen": 12.132984161376953, "log_odds_ratio": -1.4265860045270529e-05, "logits/chosen": 0.06901467591524124, "logits/rejected": -0.20833554863929749, "logps/chosen": -0.0004912492004223168, "logps/rejected": -3.343538761138916, "loss": 0.4169, "nll_loss": 0.10422386229038239, "rewards/accuracies": 1.0, "rewards/chosen": -4.9124922952614725e-05, "rewards/margins": 0.33430472016334534, "rewards/rejected": -0.33435386419296265, "step": 13591 }, { "epoch": 9.39972337482711, "grad_norm": 3.6970267295837402, "learning_rate": 3.334870139849393e-06, "log_odds_chosen": 11.283451080322266, "log_odds_ratio": -4.216269371681847e-05, "logits/chosen": -0.38629505038261414, "logits/rejected": -0.41579392552375793, "logps/chosen": -9.976735600503162e-05, "logps/rejected": -2.153491973876953, "loss": 0.4248, "nll_loss": 0.10618871450424194, "rewards/accuracies": 1.0, "rewards/chosen": -9.976735782402102e-06, "rewards/margins": 0.21533921360969543, "rewards/rejected": -0.2153491973876953, "step": 13592 }, { "epoch": 9.400414937759336, "grad_norm": 5.877899646759033, "learning_rate": 3.331028123559244e-06, "log_odds_chosen": 9.553054809570312, "log_odds_ratio": -0.0002016788930632174, "logits/chosen": -0.7149819135665894, "logits/rejected": -0.5551720261573792, "logps/chosen": -0.0003842208825517446, "logps/rejected": -1.6851762533187866, "loss": 0.5968, "nll_loss": 0.14917269349098206, "rewards/accuracies": 1.0, "rewards/chosen": -3.8422091165557504e-05, "rewards/margins": 0.16847920417785645, "rewards/rejected": -0.168517604470253, "step": 13593 }, { "epoch": 9.401106500691563, "grad_norm": 2.704509735107422, "learning_rate": 3.3271861072690954e-06, "log_odds_chosen": 10.636608123779297, "log_odds_ratio": -0.00022637513757217675, "logits/chosen": -0.517031192779541, "logits/rejected": -0.5399819612503052, "logps/chosen": -0.0005620784359052777, "logps/rejected": -2.193096160888672, "loss": 0.3281, "nll_loss": 0.08199042081832886, "rewards/accuracies": 1.0, "rewards/chosen": -5.620783849735744e-05, "rewards/margins": 0.21925342082977295, "rewards/rejected": -0.21930962800979614, "step": 13594 }, { "epoch": 9.40179806362379, "grad_norm": 3.62241530418396, "learning_rate": 3.3233440909789457e-06, "log_odds_chosen": 11.132572174072266, "log_odds_ratio": -8.268863894045353e-05, "logits/chosen": -0.5297135710716248, "logits/rejected": -0.5522671937942505, "logps/chosen": -0.0009948884835466743, "logps/rejected": -2.285141944885254, "loss": 0.3795, "nll_loss": 0.09486062824726105, "rewards/accuracies": 1.0, "rewards/chosen": -9.948885417543352e-05, "rewards/margins": 0.22841474413871765, "rewards/rejected": -0.22851420938968658, "step": 13595 }, { "epoch": 9.402489626556017, "grad_norm": 3.3328349590301514, "learning_rate": 3.319502074688797e-06, "log_odds_chosen": 11.376626014709473, "log_odds_ratio": -5.180388325243257e-05, "logits/chosen": -0.4596288800239563, "logits/rejected": -0.5163432359695435, "logps/chosen": -0.00018895625544246286, "logps/rejected": -2.4481046199798584, "loss": 0.3318, "nll_loss": 0.08293630927801132, "rewards/accuracies": 1.0, "rewards/chosen": -1.8895625544246286e-05, "rewards/margins": 0.24479156732559204, "rewards/rejected": -0.24481046199798584, "step": 13596 }, { "epoch": 9.403181189488244, "grad_norm": 2.345792770385742, "learning_rate": 3.315660058398648e-06, "log_odds_chosen": 10.897357940673828, "log_odds_ratio": -6.9019639340695e-05, "logits/chosen": -0.06716062873601913, "logits/rejected": -0.031017400324344635, "logps/chosen": -0.0003638725320342928, "logps/rejected": -2.4356253147125244, "loss": 0.281, "nll_loss": 0.07024391740560532, "rewards/accuracies": 1.0, "rewards/chosen": -3.638725320342928e-05, "rewards/margins": 0.2435261458158493, "rewards/rejected": -0.2435625195503235, "step": 13597 }, { "epoch": 9.40387275242047, "grad_norm": 3.051527738571167, "learning_rate": 3.3118180421084984e-06, "log_odds_chosen": 10.573110580444336, "log_odds_ratio": -6.965044303797185e-05, "logits/chosen": -0.13037006556987762, "logits/rejected": -0.13248313963413239, "logps/chosen": -0.0001720719737932086, "logps/rejected": -1.8427140712738037, "loss": 0.2718, "nll_loss": 0.06793493032455444, "rewards/accuracies": 1.0, "rewards/chosen": -1.7207195924129337e-05, "rewards/margins": 0.18425419926643372, "rewards/rejected": -0.1842714101076126, "step": 13598 }, { "epoch": 9.404564315352697, "grad_norm": 3.171217918395996, "learning_rate": 3.3079760258183495e-06, "log_odds_chosen": 11.250415802001953, "log_odds_ratio": -1.9207192963222042e-05, "logits/chosen": -0.4102725088596344, "logits/rejected": -0.47221803665161133, "logps/chosen": -0.00015858103870414197, "logps/rejected": -2.2595107555389404, "loss": 0.3371, "nll_loss": 0.0842801183462143, "rewards/accuracies": 1.0, "rewards/chosen": -1.5858104234212078e-05, "rewards/margins": 0.2259352207183838, "rewards/rejected": -0.22595107555389404, "step": 13599 }, { "epoch": 9.405255878284924, "grad_norm": 2.929114580154419, "learning_rate": 3.3041340095282007e-06, "log_odds_chosen": 11.336652755737305, "log_odds_ratio": -3.686985655804165e-05, "logits/chosen": -0.05324409902095795, "logits/rejected": -0.06757915019989014, "logps/chosen": -0.00010137058416148648, "logps/rejected": -1.7882221937179565, "loss": 0.2707, "nll_loss": 0.06766844540834427, "rewards/accuracies": 1.0, "rewards/chosen": -1.0137058779946528e-05, "rewards/margins": 0.17881208658218384, "rewards/rejected": -0.17882221937179565, "step": 13600 }, { "epoch": 9.405947441217151, "grad_norm": 3.7205023765563965, "learning_rate": 3.3002919932380515e-06, "log_odds_chosen": 10.108559608459473, "log_odds_ratio": -0.00035753127303905785, "logits/chosen": -0.2789788246154785, "logits/rejected": -0.31889981031417847, "logps/chosen": -0.0004472219734452665, "logps/rejected": -1.4498944282531738, "loss": 0.2628, "nll_loss": 0.0656713992357254, "rewards/accuracies": 1.0, "rewards/chosen": -4.4722200982505456e-05, "rewards/margins": 0.14494472742080688, "rewards/rejected": -0.14498944580554962, "step": 13601 }, { "epoch": 9.406639004149378, "grad_norm": 3.527688503265381, "learning_rate": 3.2964499769479026e-06, "log_odds_chosen": 10.635601043701172, "log_odds_ratio": -4.5054053771309555e-05, "logits/chosen": -0.5978109240531921, "logits/rejected": -0.6777760982513428, "logps/chosen": -0.0001139695305027999, "logps/rejected": -1.6677496433258057, "loss": 0.3322, "nll_loss": 0.08304670453071594, "rewards/accuracies": 1.0, "rewards/chosen": -1.1396952686482109e-05, "rewards/margins": 0.1667635589838028, "rewards/rejected": -0.1667749583721161, "step": 13602 }, { "epoch": 9.407330567081605, "grad_norm": 3.7321369647979736, "learning_rate": 3.292607960657754e-06, "log_odds_chosen": 11.718954086303711, "log_odds_ratio": -1.348754722130252e-05, "logits/chosen": -0.35870301723480225, "logits/rejected": -0.3781982660293579, "logps/chosen": -0.00016278823022730649, "logps/rejected": -2.7064929008483887, "loss": 0.2841, "nll_loss": 0.07101425528526306, "rewards/accuracies": 1.0, "rewards/chosen": -1.627882375032641e-05, "rewards/margins": 0.2706330120563507, "rewards/rejected": -0.2706492841243744, "step": 13603 }, { "epoch": 9.408022130013832, "grad_norm": 4.767492771148682, "learning_rate": 3.288765944367604e-06, "log_odds_chosen": 12.546222686767578, "log_odds_ratio": -6.613996902160579e-06, "logits/chosen": -0.011980824172496796, "logits/rejected": -0.09855447709560394, "logps/chosen": -0.00015213618462439626, "logps/rejected": -3.185060501098633, "loss": 0.6174, "nll_loss": 0.15435130894184113, "rewards/accuracies": 1.0, "rewards/chosen": -1.5213619008136448e-05, "rewards/margins": 0.3184908330440521, "rewards/rejected": -0.31850606203079224, "step": 13604 }, { "epoch": 9.408713692946058, "grad_norm": 1.7180169820785522, "learning_rate": 3.2849239280774553e-06, "log_odds_chosen": 11.02637004852295, "log_odds_ratio": -3.8207799661904573e-05, "logits/chosen": -0.1217493861913681, "logits/rejected": -0.10812407732009888, "logps/chosen": -4.604986315825954e-05, "logps/rejected": -1.4926437139511108, "loss": 0.2321, "nll_loss": 0.05801209807395935, "rewards/accuracies": 1.0, "rewards/chosen": -4.604986770573305e-06, "rewards/margins": 0.1492597609758377, "rewards/rejected": -0.1492643654346466, "step": 13605 }, { "epoch": 9.409405255878285, "grad_norm": 5.552383899688721, "learning_rate": 3.2810819117873056e-06, "log_odds_chosen": 11.0220308303833, "log_odds_ratio": -5.878361116629094e-05, "logits/chosen": -0.24672137200832367, "logits/rejected": -0.1756354570388794, "logps/chosen": -0.00029366809758357704, "logps/rejected": -2.6202406883239746, "loss": 0.3598, "nll_loss": 0.08994251489639282, "rewards/accuracies": 1.0, "rewards/chosen": -2.9366809030761942e-05, "rewards/margins": 0.26199471950531006, "rewards/rejected": -0.2620241045951843, "step": 13606 }, { "epoch": 9.410096818810512, "grad_norm": 2.8825390338897705, "learning_rate": 3.277239895497157e-06, "log_odds_chosen": 11.942837715148926, "log_odds_ratio": -0.000203387564397417, "logits/chosen": -0.03993723541498184, "logits/rejected": -0.06068715453147888, "logps/chosen": -0.00022810781956650317, "logps/rejected": -3.365323066711426, "loss": 0.4158, "nll_loss": 0.10393252968788147, "rewards/accuracies": 1.0, "rewards/chosen": -2.281078377563972e-05, "rewards/margins": 0.33650949597358704, "rewards/rejected": -0.336532324552536, "step": 13607 }, { "epoch": 9.410788381742739, "grad_norm": 2.55307674407959, "learning_rate": 3.273397879207008e-06, "log_odds_chosen": 10.70759105682373, "log_odds_ratio": -0.0005642443429678679, "logits/chosen": -0.437193900346756, "logits/rejected": -0.45698311924934387, "logps/chosen": -0.0011037236545234919, "logps/rejected": -2.3171181678771973, "loss": 0.2515, "nll_loss": 0.06281343102455139, "rewards/accuracies": 1.0, "rewards/chosen": -0.00011037235526600853, "rewards/margins": 0.23160143196582794, "rewards/rejected": -0.23171180486679077, "step": 13608 }, { "epoch": 9.411479944674966, "grad_norm": 4.183548450469971, "learning_rate": 3.2695558629168587e-06, "log_odds_chosen": 11.013273239135742, "log_odds_ratio": -0.00017840656801126897, "logits/chosen": -0.7780969142913818, "logits/rejected": -0.8504067063331604, "logps/chosen": -0.00031670788303017616, "logps/rejected": -2.138556480407715, "loss": 0.3203, "nll_loss": 0.08006297051906586, "rewards/accuracies": 1.0, "rewards/chosen": -3.1670788303017616e-05, "rewards/margins": 0.21382398903369904, "rewards/rejected": -0.21385566890239716, "step": 13609 }, { "epoch": 9.412171507607193, "grad_norm": 3.3250057697296143, "learning_rate": 3.26571384662671e-06, "log_odds_chosen": 11.108063697814941, "log_odds_ratio": -3.6314133467385545e-05, "logits/chosen": -0.3911839425563812, "logits/rejected": -0.39810705184936523, "logps/chosen": -0.00027260807109996676, "logps/rejected": -2.261627197265625, "loss": 0.3394, "nll_loss": 0.08484381437301636, "rewards/accuracies": 1.0, "rewards/chosen": -2.7260806746198796e-05, "rewards/margins": 0.22613544762134552, "rewards/rejected": -0.22616273164749146, "step": 13610 }, { "epoch": 9.41286307053942, "grad_norm": 5.000128746032715, "learning_rate": 3.261871830336561e-06, "log_odds_chosen": 10.38177490234375, "log_odds_ratio": -6.147993553895503e-05, "logits/chosen": -0.5801539421081543, "logits/rejected": -0.686652660369873, "logps/chosen": -0.000287293252767995, "logps/rejected": -1.8929868936538696, "loss": 0.3666, "nll_loss": 0.09163911640644073, "rewards/accuracies": 1.0, "rewards/chosen": -2.8729327823384665e-05, "rewards/margins": 0.18926995992660522, "rewards/rejected": -0.18929868936538696, "step": 13611 }, { "epoch": 9.413554633471646, "grad_norm": 4.563615798950195, "learning_rate": 3.2580298140464114e-06, "log_odds_chosen": 11.843263626098633, "log_odds_ratio": -0.00014839382492937148, "logits/chosen": -0.28000980615615845, "logits/rejected": -0.3588707447052002, "logps/chosen": -0.00023143761791288853, "logps/rejected": -3.1376800537109375, "loss": 0.4461, "nll_loss": 0.11149781942367554, "rewards/accuracies": 1.0, "rewards/chosen": -2.314376069989521e-05, "rewards/margins": 0.31374484300613403, "rewards/rejected": -0.3137679994106293, "step": 13612 }, { "epoch": 9.414246196403873, "grad_norm": 3.096818447113037, "learning_rate": 3.2541877977562626e-06, "log_odds_chosen": 10.367470741271973, "log_odds_ratio": -0.0001926126569742337, "logits/chosen": -0.3956316411495209, "logits/rejected": -0.5229665040969849, "logps/chosen": -0.0002295288140885532, "logps/rejected": -1.6280049085617065, "loss": 0.4132, "nll_loss": 0.1032683402299881, "rewards/accuracies": 1.0, "rewards/chosen": -2.295288140885532e-05, "rewards/margins": 0.16277754306793213, "rewards/rejected": -0.16280049085617065, "step": 13613 }, { "epoch": 9.4149377593361, "grad_norm": 4.1682939529418945, "learning_rate": 3.2503457814661137e-06, "log_odds_chosen": 11.22652530670166, "log_odds_ratio": -3.537128213793039e-05, "logits/chosen": -0.06997202336788177, "logits/rejected": -0.05267438292503357, "logps/chosen": -0.0002715398441068828, "logps/rejected": -2.250588893890381, "loss": 0.4923, "nll_loss": 0.12307889759540558, "rewards/accuracies": 1.0, "rewards/chosen": -2.7153984774486162e-05, "rewards/margins": 0.22503173351287842, "rewards/rejected": -0.2250588983297348, "step": 13614 }, { "epoch": 9.415629322268327, "grad_norm": 4.823745250701904, "learning_rate": 3.2465037651759645e-06, "log_odds_chosen": 11.2670316696167, "log_odds_ratio": -0.11680711805820465, "logits/chosen": -0.14811766147613525, "logits/rejected": -0.19620651006698608, "logps/chosen": -0.0237438902258873, "logps/rejected": -2.5987982749938965, "loss": 0.4275, "nll_loss": 0.09518326818943024, "rewards/accuracies": 0.875, "rewards/chosen": -0.0023743887431919575, "rewards/margins": 0.2575054466724396, "rewards/rejected": -0.25987982749938965, "step": 13615 }, { "epoch": 9.416320885200554, "grad_norm": 3.1150922775268555, "learning_rate": 3.2426617488858157e-06, "log_odds_chosen": 11.053929328918457, "log_odds_ratio": -9.234283788828179e-05, "logits/chosen": -0.2426833212375641, "logits/rejected": -0.27916908264160156, "logps/chosen": -0.0002188576472690329, "logps/rejected": -2.3410890102386475, "loss": 0.3349, "nll_loss": 0.08371220529079437, "rewards/accuracies": 1.0, "rewards/chosen": -2.1885763999307528e-05, "rewards/margins": 0.2340870201587677, "rewards/rejected": -0.23410890996456146, "step": 13616 }, { "epoch": 9.41701244813278, "grad_norm": 2.70302677154541, "learning_rate": 3.238819732595667e-06, "log_odds_chosen": 11.673267364501953, "log_odds_ratio": -2.1553594706347212e-05, "logits/chosen": -0.324625700712204, "logits/rejected": -0.26671531796455383, "logps/chosen": -6.083353218855336e-05, "logps/rejected": -2.182375431060791, "loss": 0.4002, "nll_loss": 0.1000453308224678, "rewards/accuracies": 1.0, "rewards/chosen": -6.083352673158515e-06, "rewards/margins": 0.2182314693927765, "rewards/rejected": -0.21823754906654358, "step": 13617 }, { "epoch": 9.417704011065007, "grad_norm": 3.1561553478240967, "learning_rate": 3.234977716305517e-06, "log_odds_chosen": 11.751758575439453, "log_odds_ratio": -1.6239227988990024e-05, "logits/chosen": -0.387655109167099, "logits/rejected": -0.47167304158210754, "logps/chosen": -0.000126562881632708, "logps/rejected": -2.393993616104126, "loss": 0.2997, "nll_loss": 0.07492394745349884, "rewards/accuracies": 1.0, "rewards/chosen": -1.2656288163270801e-05, "rewards/margins": 0.23938670754432678, "rewards/rejected": -0.23939937353134155, "step": 13618 }, { "epoch": 9.418395573997234, "grad_norm": 3.0932955741882324, "learning_rate": 3.2311357000153683e-06, "log_odds_chosen": 11.690707206726074, "log_odds_ratio": -4.184385761618614e-05, "logits/chosen": -0.2753382921218872, "logits/rejected": -0.4104476273059845, "logps/chosen": -6.605208182008937e-05, "logps/rejected": -2.1179964542388916, "loss": 0.3442, "nll_loss": 0.08604513108730316, "rewards/accuracies": 1.0, "rewards/chosen": -6.605208000109997e-06, "rewards/margins": 0.21179303526878357, "rewards/rejected": -0.21179965138435364, "step": 13619 }, { "epoch": 9.41908713692946, "grad_norm": 3.613025665283203, "learning_rate": 3.2272936837252195e-06, "log_odds_chosen": 12.161299705505371, "log_odds_ratio": -1.4612392988055944e-05, "logits/chosen": 0.09467865526676178, "logits/rejected": 0.08084909617900848, "logps/chosen": -0.0002366297267144546, "logps/rejected": -3.197540760040283, "loss": 0.5081, "nll_loss": 0.12701593339443207, "rewards/accuracies": 1.0, "rewards/chosen": -2.3662971216253936e-05, "rewards/margins": 0.31973040103912354, "rewards/rejected": -0.31975409388542175, "step": 13620 }, { "epoch": 9.419778699861688, "grad_norm": 4.123516082763672, "learning_rate": 3.22345166743507e-06, "log_odds_chosen": 11.306703567504883, "log_odds_ratio": -2.3380784114124253e-05, "logits/chosen": -0.33806487917900085, "logits/rejected": -0.32109004259109497, "logps/chosen": -0.0001318022550549358, "logps/rejected": -2.3551292419433594, "loss": 0.6867, "nll_loss": 0.17167049646377563, "rewards/accuracies": 1.0, "rewards/chosen": -1.318022441409994e-05, "rewards/margins": 0.23549973964691162, "rewards/rejected": -0.23551294207572937, "step": 13621 }, { "epoch": 9.420470262793915, "grad_norm": 3.9194085597991943, "learning_rate": 3.219609651144921e-06, "log_odds_chosen": 11.74150562286377, "log_odds_ratio": -1.1522912245709449e-05, "logits/chosen": -0.007314398884773254, "logits/rejected": -0.030431300401687622, "logps/chosen": -0.0001474516757298261, "logps/rejected": -2.6600990295410156, "loss": 0.4314, "nll_loss": 0.10784394294023514, "rewards/accuracies": 1.0, "rewards/chosen": -1.4745167391083669e-05, "rewards/margins": 0.26599520444869995, "rewards/rejected": -0.2660099267959595, "step": 13622 }, { "epoch": 9.421161825726141, "grad_norm": 4.476114273071289, "learning_rate": 3.2157676348547718e-06, "log_odds_chosen": 11.353679656982422, "log_odds_ratio": -0.00018491236551199108, "logits/chosen": -0.18393199145793915, "logits/rejected": -0.2646377682685852, "logps/chosen": -0.00029539887327700853, "logps/rejected": -2.67562198638916, "loss": 0.8482, "nll_loss": 0.2120363414287567, "rewards/accuracies": 1.0, "rewards/chosen": -2.9539889510488138e-05, "rewards/margins": 0.2675326466560364, "rewards/rejected": -0.2675621807575226, "step": 13623 }, { "epoch": 9.421853388658368, "grad_norm": 3.667562246322632, "learning_rate": 3.211925618564623e-06, "log_odds_chosen": 10.851970672607422, "log_odds_ratio": -0.00023015293118078262, "logits/chosen": -0.2953568696975708, "logits/rejected": -0.3751392662525177, "logps/chosen": -0.0002698083408176899, "logps/rejected": -2.3600590229034424, "loss": 0.4117, "nll_loss": 0.10289748758077621, "rewards/accuracies": 1.0, "rewards/chosen": -2.6980836992152035e-05, "rewards/margins": 0.23597891628742218, "rewards/rejected": -0.23600590229034424, "step": 13624 }, { "epoch": 9.422544951590595, "grad_norm": 2.6978375911712646, "learning_rate": 3.208083602274474e-06, "log_odds_chosen": 11.028204917907715, "log_odds_ratio": -4.765874473378062e-05, "logits/chosen": -0.21027493476867676, "logits/rejected": -0.31045985221862793, "logps/chosen": -0.00022212699695955962, "logps/rejected": -2.5450971126556396, "loss": 0.2484, "nll_loss": 0.06209544464945793, "rewards/accuracies": 1.0, "rewards/chosen": -2.2212700059753843e-05, "rewards/margins": 0.2544875144958496, "rewards/rejected": -0.25450971722602844, "step": 13625 }, { "epoch": 9.423236514522822, "grad_norm": 3.2922449111938477, "learning_rate": 3.2042415859843244e-06, "log_odds_chosen": 9.94178581237793, "log_odds_ratio": -0.0003998736501671374, "logits/chosen": -0.3220575749874115, "logits/rejected": -0.406940221786499, "logps/chosen": -0.0004172759654466063, "logps/rejected": -1.5480931997299194, "loss": 0.4327, "nll_loss": 0.10814134776592255, "rewards/accuracies": 1.0, "rewards/chosen": -4.172759872744791e-05, "rewards/margins": 0.15476760268211365, "rewards/rejected": -0.15480931103229523, "step": 13626 }, { "epoch": 9.423928077455049, "grad_norm": 3.3185558319091797, "learning_rate": 3.2003995696941756e-06, "log_odds_chosen": 11.532296180725098, "log_odds_ratio": -1.9236800653743558e-05, "logits/chosen": 0.012846432626247406, "logits/rejected": -0.1282588094472885, "logps/chosen": -9.658637281972915e-05, "logps/rejected": -2.0812268257141113, "loss": 0.3085, "nll_loss": 0.07712876051664352, "rewards/accuracies": 1.0, "rewards/chosen": -9.658637281972915e-06, "rewards/margins": 0.20811301469802856, "rewards/rejected": -0.20812267065048218, "step": 13627 }, { "epoch": 9.424619640387276, "grad_norm": 2.815317153930664, "learning_rate": 3.1965575534040268e-06, "log_odds_chosen": 11.8434476852417, "log_odds_ratio": -1.1227140930714086e-05, "logits/chosen": -0.5030456781387329, "logits/rejected": -0.6363354921340942, "logps/chosen": -0.00013441324699670076, "logps/rejected": -2.511301040649414, "loss": 0.3466, "nll_loss": 0.08665873855352402, "rewards/accuracies": 1.0, "rewards/chosen": -1.3441325791063718e-05, "rewards/margins": 0.25111669301986694, "rewards/rejected": -0.2511301040649414, "step": 13628 }, { "epoch": 9.425311203319502, "grad_norm": 3.614622116088867, "learning_rate": 3.192715537113877e-06, "log_odds_chosen": 12.06743049621582, "log_odds_ratio": -1.551922468934208e-05, "logits/chosen": -0.4491950273513794, "logits/rejected": -0.509068489074707, "logps/chosen": -0.0001239084085682407, "logps/rejected": -2.926715612411499, "loss": 0.7057, "nll_loss": 0.17642636597156525, "rewards/accuracies": 1.0, "rewards/chosen": -1.2390841220621951e-05, "rewards/margins": 0.2926591634750366, "rewards/rejected": -0.2926715910434723, "step": 13629 }, { "epoch": 9.42600276625173, "grad_norm": 3.2747886180877686, "learning_rate": 3.1888735208237283e-06, "log_odds_chosen": 10.869483947753906, "log_odds_ratio": -8.397969941142946e-05, "logits/chosen": -0.2558768689632416, "logits/rejected": -0.5263897180557251, "logps/chosen": -0.0003626463876571506, "logps/rejected": -2.1652169227600098, "loss": 0.3101, "nll_loss": 0.07751419395208359, "rewards/accuracies": 1.0, "rewards/chosen": -3.626464240369387e-05, "rewards/margins": 0.2164854258298874, "rewards/rejected": -0.21652168035507202, "step": 13630 }, { "epoch": 9.426694329183956, "grad_norm": 4.786688804626465, "learning_rate": 3.1850315045335794e-06, "log_odds_chosen": 10.476531982421875, "log_odds_ratio": -0.00017410835425835103, "logits/chosen": 0.17468611896038055, "logits/rejected": 0.08674517273902893, "logps/chosen": -0.0007101238006725907, "logps/rejected": -2.145505905151367, "loss": 0.4957, "nll_loss": 0.12391538918018341, "rewards/accuracies": 1.0, "rewards/chosen": -7.101238588802516e-05, "rewards/margins": 0.21447956562042236, "rewards/rejected": -0.21455058455467224, "step": 13631 }, { "epoch": 9.427385892116183, "grad_norm": 5.189729690551758, "learning_rate": 3.18118948824343e-06, "log_odds_chosen": 11.972164154052734, "log_odds_ratio": -1.0619540262268856e-05, "logits/chosen": 0.05772440880537033, "logits/rejected": 0.0032345205545425415, "logps/chosen": -7.343491597566754e-05, "logps/rejected": -2.3607099056243896, "loss": 0.3151, "nll_loss": 0.07877691835165024, "rewards/accuracies": 1.0, "rewards/chosen": -7.343490779021522e-06, "rewards/margins": 0.2360636591911316, "rewards/rejected": -0.23607099056243896, "step": 13632 }, { "epoch": 9.42807745504841, "grad_norm": 3.6475706100463867, "learning_rate": 3.1773474719532814e-06, "log_odds_chosen": 12.083813667297363, "log_odds_ratio": -1.8943410395877436e-05, "logits/chosen": -0.002671957015991211, "logits/rejected": -0.03937486559152603, "logps/chosen": -0.00010588954319246113, "logps/rejected": -2.9970786571502686, "loss": 0.3612, "nll_loss": 0.0903020054101944, "rewards/accuracies": 1.0, "rewards/chosen": -1.0588954864942934e-05, "rewards/margins": 0.29969727993011475, "rewards/rejected": -0.29970788955688477, "step": 13633 }, { "epoch": 9.428769017980636, "grad_norm": 2.840242862701416, "learning_rate": 3.1735054556631325e-06, "log_odds_chosen": 10.816787719726562, "log_odds_ratio": -0.0001226270542247221, "logits/chosen": -0.1480427384376526, "logits/rejected": -0.30245691537857056, "logps/chosen": -0.0001767796347849071, "logps/rejected": -2.009460926055908, "loss": 0.2778, "nll_loss": 0.06943796575069427, "rewards/accuracies": 1.0, "rewards/chosen": -1.7677964933682233e-05, "rewards/margins": 0.20092841982841492, "rewards/rejected": -0.20094609260559082, "step": 13634 }, { "epoch": 9.429460580912863, "grad_norm": 4.10830545425415, "learning_rate": 3.169663439372983e-06, "log_odds_chosen": 11.465187072753906, "log_odds_ratio": -1.9342684026923962e-05, "logits/chosen": -0.08761780709028244, "logits/rejected": -0.23344786465168, "logps/chosen": -0.00020120250701438636, "logps/rejected": -2.2758188247680664, "loss": 0.6442, "nll_loss": 0.16103589534759521, "rewards/accuracies": 1.0, "rewards/chosen": -2.0120251065236516e-05, "rewards/margins": 0.22756177186965942, "rewards/rejected": -0.22758188843727112, "step": 13635 }, { "epoch": 9.43015214384509, "grad_norm": 3.0645132064819336, "learning_rate": 3.165821423082834e-06, "log_odds_chosen": 11.983230590820312, "log_odds_ratio": -1.6645788491587155e-05, "logits/chosen": -0.011146046221256256, "logits/rejected": -0.09030141681432724, "logps/chosen": -0.00019530183635652065, "logps/rejected": -3.040992021560669, "loss": 0.3178, "nll_loss": 0.07945729047060013, "rewards/accuracies": 1.0, "rewards/chosen": -1.9530183635652065e-05, "rewards/margins": 0.30407968163490295, "rewards/rejected": -0.3040992021560669, "step": 13636 }, { "epoch": 9.430843706777317, "grad_norm": 2.893550157546997, "learning_rate": 3.161979406792685e-06, "log_odds_chosen": 10.124031066894531, "log_odds_ratio": -9.303042315877974e-05, "logits/chosen": -0.17404676973819733, "logits/rejected": -0.3016105890274048, "logps/chosen": -0.0003833910741377622, "logps/rejected": -2.239431142807007, "loss": 0.3076, "nll_loss": 0.07688341289758682, "rewards/accuracies": 1.0, "rewards/chosen": -3.833910886896774e-05, "rewards/margins": 0.2239047884941101, "rewards/rejected": -0.22394311428070068, "step": 13637 }, { "epoch": 9.431535269709544, "grad_norm": 2.878291606903076, "learning_rate": 3.158137390502536e-06, "log_odds_chosen": 10.961097717285156, "log_odds_ratio": -9.159816545434296e-05, "logits/chosen": -0.39119625091552734, "logits/rejected": -0.4431838393211365, "logps/chosen": -0.0002546052564866841, "logps/rejected": -2.232316255569458, "loss": 0.337, "nll_loss": 0.0842406302690506, "rewards/accuracies": 1.0, "rewards/chosen": -2.5460527467657812e-05, "rewards/margins": 0.22320617735385895, "rewards/rejected": -0.22323161363601685, "step": 13638 }, { "epoch": 9.43222683264177, "grad_norm": 4.693680763244629, "learning_rate": 3.154295374212387e-06, "log_odds_chosen": 12.475543975830078, "log_odds_ratio": -8.55696271173656e-06, "logits/chosen": 0.04066818952560425, "logits/rejected": -0.0014166105538606644, "logps/chosen": -0.0001762946485541761, "logps/rejected": -3.719608783721924, "loss": 0.554, "nll_loss": 0.13849200308322906, "rewards/accuracies": 1.0, "rewards/chosen": -1.7629463400226086e-05, "rewards/margins": 0.37194323539733887, "rewards/rejected": -0.3719608783721924, "step": 13639 }, { "epoch": 9.432918395573997, "grad_norm": 3.4750702381134033, "learning_rate": 3.1504533579222375e-06, "log_odds_chosen": 10.41798210144043, "log_odds_ratio": -0.0006747535662725568, "logits/chosen": -0.17689725756645203, "logits/rejected": -0.1873033195734024, "logps/chosen": -0.0006824568845331669, "logps/rejected": -1.7990126609802246, "loss": 0.3511, "nll_loss": 0.08769867569208145, "rewards/accuracies": 1.0, "rewards/chosen": -6.82456957292743e-05, "rewards/margins": 0.17983302474021912, "rewards/rejected": -0.17990127205848694, "step": 13640 }, { "epoch": 9.433609958506224, "grad_norm": 3.9087653160095215, "learning_rate": 3.1466113416320886e-06, "log_odds_chosen": 9.801446914672852, "log_odds_ratio": -0.00026155952946282923, "logits/chosen": -0.31932926177978516, "logits/rejected": -0.31377002596855164, "logps/chosen": -0.000133975685457699, "logps/rejected": -1.313755989074707, "loss": 0.5769, "nll_loss": 0.14420348405838013, "rewards/accuracies": 1.0, "rewards/chosen": -1.339756818197202e-05, "rewards/margins": 0.13136222958564758, "rewards/rejected": -0.13137562572956085, "step": 13641 }, { "epoch": 9.434301521438451, "grad_norm": 3.930058240890503, "learning_rate": 3.14276932534194e-06, "log_odds_chosen": 9.756757736206055, "log_odds_ratio": -0.0005803716485388577, "logits/chosen": -0.3408500552177429, "logits/rejected": -0.3672954738140106, "logps/chosen": -0.0016345781041309237, "logps/rejected": -1.7084448337554932, "loss": 0.3289, "nll_loss": 0.08217174559831619, "rewards/accuracies": 1.0, "rewards/chosen": -0.00016345782205462456, "rewards/margins": 0.17068102955818176, "rewards/rejected": -0.17084449529647827, "step": 13642 }, { "epoch": 9.434993084370678, "grad_norm": 3.024298667907715, "learning_rate": 3.13892730905179e-06, "log_odds_chosen": 11.261756896972656, "log_odds_ratio": -4.068774069310166e-05, "logits/chosen": -0.28770825266838074, "logits/rejected": -0.29424336552619934, "logps/chosen": -0.00022447235824074596, "logps/rejected": -2.1742238998413086, "loss": 0.3234, "nll_loss": 0.08083842694759369, "rewards/accuracies": 1.0, "rewards/chosen": -2.2447236915468238e-05, "rewards/margins": 0.2173999398946762, "rewards/rejected": -0.21742239594459534, "step": 13643 }, { "epoch": 9.435684647302905, "grad_norm": 2.7786617279052734, "learning_rate": 3.1350852927616413e-06, "log_odds_chosen": 11.020342826843262, "log_odds_ratio": -2.372608651057817e-05, "logits/chosen": -0.29740408062934875, "logits/rejected": -0.3028227686882019, "logps/chosen": -0.0001481901272200048, "logps/rejected": -2.0162127017974854, "loss": 0.3055, "nll_loss": 0.07637762278318405, "rewards/accuracies": 1.0, "rewards/chosen": -1.4819013813394122e-05, "rewards/margins": 0.20160646736621857, "rewards/rejected": -0.20162126421928406, "step": 13644 }, { "epoch": 9.436376210235132, "grad_norm": 4.810678958892822, "learning_rate": 3.1312432764714925e-06, "log_odds_chosen": 11.225969314575195, "log_odds_ratio": -0.00010552228923188522, "logits/chosen": 0.10643033683300018, "logits/rejected": 0.022820040583610535, "logps/chosen": -0.0006701986421830952, "logps/rejected": -2.911445379257202, "loss": 0.7676, "nll_loss": 0.19187721610069275, "rewards/accuracies": 1.0, "rewards/chosen": -6.701986421830952e-05, "rewards/margins": 0.29107752442359924, "rewards/rejected": -0.29114454984664917, "step": 13645 }, { "epoch": 9.437067773167358, "grad_norm": 2.9477415084838867, "learning_rate": 3.1274012601813432e-06, "log_odds_chosen": 11.44828987121582, "log_odds_ratio": -2.495289118087385e-05, "logits/chosen": -0.32208365201950073, "logits/rejected": -0.2861567437648773, "logps/chosen": -0.00014589079364668578, "logps/rejected": -2.2691187858581543, "loss": 0.407, "nll_loss": 0.10175222903490067, "rewards/accuracies": 1.0, "rewards/chosen": -1.45890808198601e-05, "rewards/margins": 0.22689726948738098, "rewards/rejected": -0.22691187262535095, "step": 13646 }, { "epoch": 9.437759336099585, "grad_norm": 6.134434700012207, "learning_rate": 3.1235592438911944e-06, "log_odds_chosen": 9.971586227416992, "log_odds_ratio": -0.0002822284877765924, "logits/chosen": 0.033015862107276917, "logits/rejected": 0.017898201942443848, "logps/chosen": -0.00030175555730238557, "logps/rejected": -1.692743182182312, "loss": 0.417, "nll_loss": 0.10421252995729446, "rewards/accuracies": 1.0, "rewards/chosen": -3.0175559004419483e-05, "rewards/margins": 0.16924414038658142, "rewards/rejected": -0.16927431523799896, "step": 13647 }, { "epoch": 9.438450899031812, "grad_norm": 2.9997198581695557, "learning_rate": 3.119717227601045e-06, "log_odds_chosen": 10.828164100646973, "log_odds_ratio": -4.2587987991282716e-05, "logits/chosen": -0.5107184052467346, "logits/rejected": -0.566586971282959, "logps/chosen": -0.0003297154908068478, "logps/rejected": -2.3637232780456543, "loss": 0.2313, "nll_loss": 0.05781985819339752, "rewards/accuracies": 1.0, "rewards/chosen": -3.2971551263472065e-05, "rewards/margins": 0.23633936047554016, "rewards/rejected": -0.23637235164642334, "step": 13648 }, { "epoch": 9.439142461964039, "grad_norm": 4.279697895050049, "learning_rate": 3.1158752113108963e-06, "log_odds_chosen": 10.188965797424316, "log_odds_ratio": -0.0001124302507378161, "logits/chosen": -0.3338659703731537, "logits/rejected": -0.43446221947669983, "logps/chosen": -0.0022492276038974524, "logps/rejected": -2.589181661605835, "loss": 0.4241, "nll_loss": 0.10601003468036652, "rewards/accuracies": 1.0, "rewards/chosen": -0.00022492274001706392, "rewards/margins": 0.25869324803352356, "rewards/rejected": -0.2589181661605835, "step": 13649 }, { "epoch": 9.439834024896266, "grad_norm": 4.09467077255249, "learning_rate": 3.112033195020747e-06, "log_odds_chosen": 11.217891693115234, "log_odds_ratio": -3.357167952344753e-05, "logits/chosen": 0.32028713822364807, "logits/rejected": 0.35995882749557495, "logps/chosen": -0.00047433891450054944, "logps/rejected": -2.3951239585876465, "loss": 0.2861, "nll_loss": 0.07152174413204193, "rewards/accuracies": 1.0, "rewards/chosen": -4.7433892177650705e-05, "rewards/margins": 0.23946496844291687, "rewards/rejected": -0.23951241374015808, "step": 13650 }, { "epoch": 9.440525587828493, "grad_norm": 2.9829654693603516, "learning_rate": 3.108191178730598e-06, "log_odds_chosen": 11.757881164550781, "log_odds_ratio": -5.9907364629907534e-05, "logits/chosen": -0.2075476348400116, "logits/rejected": -0.11603280901908875, "logps/chosen": -0.0003911016683559865, "logps/rejected": -3.1237878799438477, "loss": 0.3611, "nll_loss": 0.09027508646249771, "rewards/accuracies": 1.0, "rewards/chosen": -3.9110171201173216e-05, "rewards/margins": 0.3123396635055542, "rewards/rejected": -0.31237876415252686, "step": 13651 }, { "epoch": 9.44121715076072, "grad_norm": 2.7247939109802246, "learning_rate": 3.1043491624404486e-06, "log_odds_chosen": 11.124319076538086, "log_odds_ratio": -4.741606244351715e-05, "logits/chosen": -0.6477873921394348, "logits/rejected": -0.5830503106117249, "logps/chosen": -0.00016284243611153215, "logps/rejected": -1.8327804803848267, "loss": 0.3479, "nll_loss": 0.08697590976953506, "rewards/accuracies": 1.0, "rewards/chosen": -1.6284242519759573e-05, "rewards/margins": 0.18326175212860107, "rewards/rejected": -0.18327805399894714, "step": 13652 }, { "epoch": 9.441908713692946, "grad_norm": 2.818121910095215, "learning_rate": 3.1005071461502997e-06, "log_odds_chosen": 11.644196510314941, "log_odds_ratio": -1.876071110018529e-05, "logits/chosen": -0.6093887686729431, "logits/rejected": -0.6418349742889404, "logps/chosen": -0.00027470127679407597, "logps/rejected": -2.701404094696045, "loss": 0.3011, "nll_loss": 0.07527416199445724, "rewards/accuracies": 1.0, "rewards/chosen": -2.747012877080124e-05, "rewards/margins": 0.27011293172836304, "rewards/rejected": -0.2701404094696045, "step": 13653 }, { "epoch": 9.442600276625173, "grad_norm": 3.2062249183654785, "learning_rate": 3.096665129860151e-06, "log_odds_chosen": 10.758845329284668, "log_odds_ratio": -8.449627057416365e-05, "logits/chosen": -0.06728816777467728, "logits/rejected": -0.12761613726615906, "logps/chosen": -0.0004660533741116524, "logps/rejected": -2.3059773445129395, "loss": 0.3629, "nll_loss": 0.09072056412696838, "rewards/accuracies": 1.0, "rewards/chosen": -4.660533886635676e-05, "rewards/margins": 0.23055113852024078, "rewards/rejected": -0.23059773445129395, "step": 13654 }, { "epoch": 9.4432918395574, "grad_norm": 5.059345722198486, "learning_rate": 3.0928231135700017e-06, "log_odds_chosen": 11.177997589111328, "log_odds_ratio": -9.584966755937785e-05, "logits/chosen": -0.2947097420692444, "logits/rejected": -0.41434311866760254, "logps/chosen": -0.0005000841920264065, "logps/rejected": -2.6178932189941406, "loss": 0.3753, "nll_loss": 0.09381620585918427, "rewards/accuracies": 1.0, "rewards/chosen": -5.0008420657832175e-05, "rewards/margins": 0.2617393136024475, "rewards/rejected": -0.26178932189941406, "step": 13655 }, { "epoch": 9.443983402489627, "grad_norm": 3.8090786933898926, "learning_rate": 3.088981097279853e-06, "log_odds_chosen": 11.203605651855469, "log_odds_ratio": -0.0001385588984703645, "logits/chosen": -0.11259017884731293, "logits/rejected": -0.05078551918268204, "logps/chosen": -0.00046949341776780784, "logps/rejected": -2.737952470779419, "loss": 0.3455, "nll_loss": 0.08635265380144119, "rewards/accuracies": 1.0, "rewards/chosen": -4.694933886639774e-05, "rewards/margins": 0.27374833822250366, "rewards/rejected": -0.2737952768802643, "step": 13656 }, { "epoch": 9.444674965421854, "grad_norm": 2.3689358234405518, "learning_rate": 3.0851390809897036e-06, "log_odds_chosen": 10.809409141540527, "log_odds_ratio": -3.6527962947729975e-05, "logits/chosen": -0.3216843008995056, "logits/rejected": -0.37638893723487854, "logps/chosen": -0.00045874243369325995, "logps/rejected": -1.871604084968567, "loss": 0.2578, "nll_loss": 0.06445614248514175, "rewards/accuracies": 1.0, "rewards/chosen": -4.5874243369325995e-05, "rewards/margins": 0.18711453676223755, "rewards/rejected": -0.1871604323387146, "step": 13657 }, { "epoch": 9.44536652835408, "grad_norm": 3.8250749111175537, "learning_rate": 3.0812970646995543e-06, "log_odds_chosen": 11.257601737976074, "log_odds_ratio": -4.2512434447417036e-05, "logits/chosen": -0.41863155364990234, "logits/rejected": -0.4046270251274109, "logps/chosen": -0.0001472465810365975, "logps/rejected": -2.14986515045166, "loss": 0.3402, "nll_loss": 0.08504797518253326, "rewards/accuracies": 1.0, "rewards/chosen": -1.472465828555869e-05, "rewards/margins": 0.21497179567813873, "rewards/rejected": -0.21498653292655945, "step": 13658 }, { "epoch": 9.446058091286307, "grad_norm": 3.0374255180358887, "learning_rate": 3.0774550484094055e-06, "log_odds_chosen": 11.082832336425781, "log_odds_ratio": -8.868123404681683e-05, "logits/chosen": 0.3002154231071472, "logits/rejected": 0.32750892639160156, "logps/chosen": -0.000218449771637097, "logps/rejected": -2.1682121753692627, "loss": 0.2893, "nll_loss": 0.07232128083705902, "rewards/accuracies": 1.0, "rewards/chosen": -2.1844974980922416e-05, "rewards/margins": 0.21679937839508057, "rewards/rejected": -0.21682122349739075, "step": 13659 }, { "epoch": 9.446749654218534, "grad_norm": 2.465670108795166, "learning_rate": 3.0736130321192562e-06, "log_odds_chosen": 11.993000030517578, "log_odds_ratio": -1.03414704426541e-05, "logits/chosen": -0.018259674310684204, "logits/rejected": -0.09901609271764755, "logps/chosen": -0.00011617916607065126, "logps/rejected": -2.3807053565979004, "loss": 0.2366, "nll_loss": 0.059150226414203644, "rewards/accuracies": 1.0, "rewards/chosen": -1.1617918062256649e-05, "rewards/margins": 0.238058939576149, "rewards/rejected": -0.238070547580719, "step": 13660 }, { "epoch": 9.447441217150761, "grad_norm": 3.112118721008301, "learning_rate": 3.0697710158291074e-06, "log_odds_chosen": 11.837477684020996, "log_odds_ratio": -7.899626507423818e-05, "logits/chosen": -0.566035807132721, "logits/rejected": -0.6351016759872437, "logps/chosen": -0.0003235914628021419, "logps/rejected": -3.252774238586426, "loss": 0.3784, "nll_loss": 0.09459532797336578, "rewards/accuracies": 1.0, "rewards/chosen": -3.2359148463001475e-05, "rewards/margins": 0.32524505257606506, "rewards/rejected": -0.3252774178981781, "step": 13661 }, { "epoch": 9.448132780082988, "grad_norm": 3.8702402114868164, "learning_rate": 3.065928999538958e-06, "log_odds_chosen": 10.895008087158203, "log_odds_ratio": -6.9529349275399e-05, "logits/chosen": -0.025634005665779114, "logits/rejected": -0.1855071783065796, "logps/chosen": -0.0006080082966946065, "logps/rejected": -2.0902671813964844, "loss": 0.4369, "nll_loss": 0.10922367870807648, "rewards/accuracies": 1.0, "rewards/chosen": -6.080083403503522e-05, "rewards/margins": 0.20896592736244202, "rewards/rejected": -0.20902672410011292, "step": 13662 }, { "epoch": 9.448824343015215, "grad_norm": 2.9250028133392334, "learning_rate": 3.0620869832488093e-06, "log_odds_chosen": 12.242441177368164, "log_odds_ratio": -3.040230330952909e-05, "logits/chosen": -0.34707507491111755, "logits/rejected": -0.39765727519989014, "logps/chosen": -0.00011096797970822081, "logps/rejected": -2.8269543647766113, "loss": 0.3002, "nll_loss": 0.07505042105913162, "rewards/accuracies": 1.0, "rewards/chosen": -1.1096798516518902e-05, "rewards/margins": 0.2826843857765198, "rewards/rejected": -0.282695472240448, "step": 13663 }, { "epoch": 9.449515905947441, "grad_norm": 3.9676806926727295, "learning_rate": 3.05824496695866e-06, "log_odds_chosen": 10.78483772277832, "log_odds_ratio": -3.852003283100203e-05, "logits/chosen": -0.45215755701065063, "logits/rejected": -0.4137214422225952, "logps/chosen": -0.00024637990281917155, "logps/rejected": -2.412273406982422, "loss": 0.3628, "nll_loss": 0.0907062217593193, "rewards/accuracies": 1.0, "rewards/chosen": -2.46379931923002e-05, "rewards/margins": 0.241202712059021, "rewards/rejected": -0.24122732877731323, "step": 13664 }, { "epoch": 9.450207468879668, "grad_norm": 3.1666104793548584, "learning_rate": 3.054402950668511e-06, "log_odds_chosen": 9.979389190673828, "log_odds_ratio": -0.00020525579748209566, "logits/chosen": -0.1879369020462036, "logits/rejected": -0.12144997715950012, "logps/chosen": -0.0005892362678423524, "logps/rejected": -1.3971165418624878, "loss": 0.407, "nll_loss": 0.10174022614955902, "rewards/accuracies": 1.0, "rewards/chosen": -5.8923629694618285e-05, "rewards/margins": 0.13965272903442383, "rewards/rejected": -0.1397116482257843, "step": 13665 }, { "epoch": 9.450899031811895, "grad_norm": 2.9881982803344727, "learning_rate": 3.050560934378362e-06, "log_odds_chosen": 10.837393760681152, "log_odds_ratio": -8.207259816117585e-05, "logits/chosen": -0.6228085160255432, "logits/rejected": -0.6411685347557068, "logps/chosen": -0.0004132247995585203, "logps/rejected": -2.03163480758667, "loss": 0.4245, "nll_loss": 0.1061079204082489, "rewards/accuracies": 1.0, "rewards/chosen": -4.13224843214266e-05, "rewards/margins": 0.20312216877937317, "rewards/rejected": -0.20316347479820251, "step": 13666 }, { "epoch": 9.451590594744122, "grad_norm": 3.928393602371216, "learning_rate": 3.0467189180882128e-06, "log_odds_chosen": 11.026008605957031, "log_odds_ratio": -5.968601908534765e-05, "logits/chosen": -0.504901647567749, "logits/rejected": -0.5036959648132324, "logps/chosen": -0.00010204267164226621, "logps/rejected": -1.9367821216583252, "loss": 0.5695, "nll_loss": 0.14236928522586823, "rewards/accuracies": 1.0, "rewards/chosen": -1.020426680042874e-05, "rewards/margins": 0.19366800785064697, "rewards/rejected": -0.19367821514606476, "step": 13667 }, { "epoch": 9.452282157676349, "grad_norm": 3.4829812049865723, "learning_rate": 3.0428769017980635e-06, "log_odds_chosen": 10.596809387207031, "log_odds_ratio": -0.0004383395134937018, "logits/chosen": -0.12057967483997345, "logits/rejected": -0.09670063853263855, "logps/chosen": -0.00024910306092351675, "logps/rejected": -2.325828790664673, "loss": 0.3445, "nll_loss": 0.08608973771333694, "rewards/accuracies": 1.0, "rewards/chosen": -2.4910305000958033e-05, "rewards/margins": 0.2325579822063446, "rewards/rejected": -0.23258288204669952, "step": 13668 }, { "epoch": 9.452973720608576, "grad_norm": 4.093359470367432, "learning_rate": 3.0390348855079147e-06, "log_odds_chosen": 10.298482894897461, "log_odds_ratio": -0.001447304617613554, "logits/chosen": -0.021594732999801636, "logits/rejected": 0.001156628131866455, "logps/chosen": -0.0005646768258884549, "logps/rejected": -1.9710403680801392, "loss": 0.5585, "nll_loss": 0.1394735723733902, "rewards/accuracies": 1.0, "rewards/chosen": -5.646768113365397e-05, "rewards/margins": 0.19704757630825043, "rewards/rejected": -0.19710403680801392, "step": 13669 }, { "epoch": 9.453665283540802, "grad_norm": 4.57317590713501, "learning_rate": 3.035192869217766e-06, "log_odds_chosen": 11.619937896728516, "log_odds_ratio": -0.00013235447113402188, "logits/chosen": -0.5300256609916687, "logits/rejected": -0.45142507553100586, "logps/chosen": -0.00031192644382826984, "logps/rejected": -2.820838212966919, "loss": 0.482, "nll_loss": 0.12048950046300888, "rewards/accuracies": 1.0, "rewards/chosen": -3.1192645110422745e-05, "rewards/margins": 0.2820526361465454, "rewards/rejected": -0.2820838391780853, "step": 13670 }, { "epoch": 9.45435684647303, "grad_norm": 3.9143404960632324, "learning_rate": 3.0313508529276166e-06, "log_odds_chosen": 11.59218692779541, "log_odds_ratio": -7.889211701694876e-05, "logits/chosen": 0.2372472584247589, "logits/rejected": 0.1737135797739029, "logps/chosen": -7.292727968888357e-05, "logps/rejected": -2.157414436340332, "loss": 0.3916, "nll_loss": 0.09789082407951355, "rewards/accuracies": 1.0, "rewards/chosen": -7.292728241736768e-06, "rewards/margins": 0.21573415398597717, "rewards/rejected": -0.21574144065380096, "step": 13671 }, { "epoch": 9.455048409405256, "grad_norm": 3.0498509407043457, "learning_rate": 3.0275088366374674e-06, "log_odds_chosen": 11.467384338378906, "log_odds_ratio": -2.097031392622739e-05, "logits/chosen": -0.2295285165309906, "logits/rejected": -0.21985048055648804, "logps/chosen": -5.9011006669607013e-05, "logps/rejected": -1.7431241273880005, "loss": 0.2288, "nll_loss": 0.05719863995909691, "rewards/accuracies": 1.0, "rewards/chosen": -5.901100848859642e-06, "rewards/margins": 0.17430651187896729, "rewards/rejected": -0.17431241273880005, "step": 13672 }, { "epoch": 9.455739972337483, "grad_norm": 4.1104416847229, "learning_rate": 3.0236668203473185e-06, "log_odds_chosen": 11.44217300415039, "log_odds_ratio": -0.00010912174911936745, "logits/chosen": -0.2721630334854126, "logits/rejected": -0.3509449064731598, "logps/chosen": -0.00014903565170243382, "logps/rejected": -2.6721222400665283, "loss": 0.3536, "nll_loss": 0.08839452266693115, "rewards/accuracies": 1.0, "rewards/chosen": -1.4903566807333846e-05, "rewards/margins": 0.2671973705291748, "rewards/rejected": -0.26721224188804626, "step": 13673 }, { "epoch": 9.45643153526971, "grad_norm": 2.9885518550872803, "learning_rate": 3.0198248040571693e-06, "log_odds_chosen": 10.360175132751465, "log_odds_ratio": -0.00012097896251361817, "logits/chosen": -0.6109101176261902, "logits/rejected": -0.6060300469398499, "logps/chosen": -0.000436235626693815, "logps/rejected": -1.8446308374404907, "loss": 0.3533, "nll_loss": 0.08830248564481735, "rewards/accuracies": 1.0, "rewards/chosen": -4.362356412457302e-05, "rewards/margins": 0.1844194382429123, "rewards/rejected": -0.18446308374404907, "step": 13674 }, { "epoch": 9.457123098201937, "grad_norm": 5.488043785095215, "learning_rate": 3.01598278776702e-06, "log_odds_chosen": 9.340782165527344, "log_odds_ratio": -0.00013256016245577484, "logits/chosen": -0.7102746367454529, "logits/rejected": -0.5713024139404297, "logps/chosen": -0.0008446794236078858, "logps/rejected": -2.0710833072662354, "loss": 0.6072, "nll_loss": 0.1517745852470398, "rewards/accuracies": 1.0, "rewards/chosen": -8.446794527117163e-05, "rewards/margins": 0.20702385902404785, "rewards/rejected": -0.20710833370685577, "step": 13675 }, { "epoch": 9.457814661134163, "grad_norm": 3.401562213897705, "learning_rate": 3.012140771476871e-06, "log_odds_chosen": 11.066591262817383, "log_odds_ratio": -3.361454582773149e-05, "logits/chosen": -0.09300799667835236, "logits/rejected": -0.09423156082630157, "logps/chosen": -0.00017423040117137134, "logps/rejected": -2.2629432678222656, "loss": 0.3336, "nll_loss": 0.08340008556842804, "rewards/accuracies": 1.0, "rewards/chosen": -1.7423040844732895e-05, "rewards/margins": 0.22627690434455872, "rewards/rejected": -0.22629432380199432, "step": 13676 }, { "epoch": 9.45850622406639, "grad_norm": 3.094069242477417, "learning_rate": 3.0082987551867224e-06, "log_odds_chosen": 12.419988632202148, "log_odds_ratio": -6.88181398800225e-06, "logits/chosen": -0.588909924030304, "logits/rejected": -0.5188385248184204, "logps/chosen": -9.001026046462357e-05, "logps/rejected": -3.1227521896362305, "loss": 0.4008, "nll_loss": 0.1001872792840004, "rewards/accuracies": 1.0, "rewards/chosen": -9.001025318866596e-06, "rewards/margins": 0.31226620078086853, "rewards/rejected": -0.3122752010822296, "step": 13677 }, { "epoch": 9.459197786998617, "grad_norm": 2.294238328933716, "learning_rate": 3.004456738896573e-06, "log_odds_chosen": 10.08603286743164, "log_odds_ratio": -0.00013286015018820763, "logits/chosen": -0.19353480637073517, "logits/rejected": -0.13654178380966187, "logps/chosen": -0.0006431926740333438, "logps/rejected": -1.8902684450149536, "loss": 0.2685, "nll_loss": 0.06711249053478241, "rewards/accuracies": 1.0, "rewards/chosen": -6.43192688585259e-05, "rewards/margins": 0.18896251916885376, "rewards/rejected": -0.1890268474817276, "step": 13678 }, { "epoch": 9.459889349930844, "grad_norm": 4.15861701965332, "learning_rate": 3.000614722606424e-06, "log_odds_chosen": 11.671398162841797, "log_odds_ratio": -2.1137140720384195e-05, "logits/chosen": -0.030270785093307495, "logits/rejected": -0.15903228521347046, "logps/chosen": -0.00015941433957777917, "logps/rejected": -2.824725866317749, "loss": 0.4703, "nll_loss": 0.1175684779882431, "rewards/accuracies": 1.0, "rewards/chosen": -1.5941433957777917e-05, "rewards/margins": 0.282456636428833, "rewards/rejected": -0.2824726104736328, "step": 13679 }, { "epoch": 9.46058091286307, "grad_norm": 2.5023744106292725, "learning_rate": 2.996772706316275e-06, "log_odds_chosen": 11.48115348815918, "log_odds_ratio": -2.6544312277110294e-05, "logits/chosen": -0.31745773553848267, "logits/rejected": -0.4156531095504761, "logps/chosen": -6.688917346764356e-05, "logps/rejected": -1.7893203496932983, "loss": 0.2732, "nll_loss": 0.06830289214849472, "rewards/accuracies": 1.0, "rewards/chosen": -6.688916982966475e-06, "rewards/margins": 0.17892535030841827, "rewards/rejected": -0.1789320409297943, "step": 13680 }, { "epoch": 9.461272475795298, "grad_norm": 3.793719530105591, "learning_rate": 2.992930690026126e-06, "log_odds_chosen": 12.253263473510742, "log_odds_ratio": -0.0001680817367741838, "logits/chosen": 0.17600244283676147, "logits/rejected": 0.19702959060668945, "logps/chosen": -0.001068698475137353, "logps/rejected": -3.698801279067993, "loss": 0.307, "nll_loss": 0.07672946155071259, "rewards/accuracies": 1.0, "rewards/chosen": -0.00010686985478969291, "rewards/margins": 0.369773268699646, "rewards/rejected": -0.36988016963005066, "step": 13681 }, { "epoch": 9.461964038727524, "grad_norm": 3.7785212993621826, "learning_rate": 2.9890886737359765e-06, "log_odds_chosen": 11.784876823425293, "log_odds_ratio": -1.636009619687684e-05, "logits/chosen": 0.1131448894739151, "logits/rejected": 0.024499140679836273, "logps/chosen": -0.00020417144696693867, "logps/rejected": -2.9145703315734863, "loss": 0.3781, "nll_loss": 0.0945122167468071, "rewards/accuracies": 1.0, "rewards/chosen": -2.0417144696693867e-05, "rewards/margins": 0.29143664240837097, "rewards/rejected": -0.29145705699920654, "step": 13682 }, { "epoch": 9.462655601659751, "grad_norm": 3.6575255393981934, "learning_rate": 2.9852466574458277e-06, "log_odds_chosen": 11.061687469482422, "log_odds_ratio": -9.567139204591513e-05, "logits/chosen": -0.2089971899986267, "logits/rejected": -0.25104543566703796, "logps/chosen": -0.00028398202266544104, "logps/rejected": -2.3885231018066406, "loss": 0.3151, "nll_loss": 0.07875557988882065, "rewards/accuracies": 1.0, "rewards/chosen": -2.8398204449331388e-05, "rewards/margins": 0.23882392048835754, "rewards/rejected": -0.23885229229927063, "step": 13683 }, { "epoch": 9.463347164591978, "grad_norm": 2.5947320461273193, "learning_rate": 2.981404641155679e-06, "log_odds_chosen": 10.604299545288086, "log_odds_ratio": -6.143797509139404e-05, "logits/chosen": -0.367992639541626, "logits/rejected": -0.2641603946685791, "logps/chosen": -0.0002503315918147564, "logps/rejected": -1.903537392616272, "loss": 0.3265, "nll_loss": 0.08162114024162292, "rewards/accuracies": 1.0, "rewards/chosen": -2.503315954527352e-05, "rewards/margins": 0.19032873213291168, "rewards/rejected": -0.19035375118255615, "step": 13684 }, { "epoch": 9.464038727524205, "grad_norm": 4.084955215454102, "learning_rate": 2.9775626248655296e-06, "log_odds_chosen": 11.059584617614746, "log_odds_ratio": -7.182247645687312e-05, "logits/chosen": -0.09201770275831223, "logits/rejected": -0.1854029893875122, "logps/chosen": -0.0005451062461361289, "logps/rejected": -2.800729274749756, "loss": 0.5028, "nll_loss": 0.12568828463554382, "rewards/accuracies": 1.0, "rewards/chosen": -5.451062315842137e-05, "rewards/margins": 0.2800183892250061, "rewards/rejected": -0.2800729274749756, "step": 13685 }, { "epoch": 9.464730290456432, "grad_norm": 2.895047426223755, "learning_rate": 2.9737206085753804e-06, "log_odds_chosen": 12.01188850402832, "log_odds_ratio": -8.314920705743134e-06, "logits/chosen": -0.38543713092803955, "logits/rejected": -0.5218108296394348, "logps/chosen": -0.00016034372674766928, "logps/rejected": -3.107083320617676, "loss": 0.4189, "nll_loss": 0.10472860932350159, "rewards/accuracies": 1.0, "rewards/chosen": -1.603437340236269e-05, "rewards/margins": 0.31069228053092957, "rewards/rejected": -0.31070831418037415, "step": 13686 }, { "epoch": 9.465421853388658, "grad_norm": 2.4275357723236084, "learning_rate": 2.9698785922852316e-06, "log_odds_chosen": 11.681462287902832, "log_odds_ratio": -9.818573016673326e-05, "logits/chosen": -0.10170315206050873, "logits/rejected": -0.09424428641796112, "logps/chosen": -0.0002597036655060947, "logps/rejected": -2.9518909454345703, "loss": 0.296, "nll_loss": 0.0739840418100357, "rewards/accuracies": 1.0, "rewards/chosen": -2.5970368369598873e-05, "rewards/margins": 0.2951631247997284, "rewards/rejected": -0.2951890826225281, "step": 13687 }, { "epoch": 9.466113416320885, "grad_norm": 3.9739270210266113, "learning_rate": 2.9660365759950823e-06, "log_odds_chosen": 11.264009475708008, "log_odds_ratio": -7.518980419263244e-05, "logits/chosen": -0.34022510051727295, "logits/rejected": -0.4449506402015686, "logps/chosen": -0.0002356268814764917, "logps/rejected": -2.1614341735839844, "loss": 0.398, "nll_loss": 0.0994982123374939, "rewards/accuracies": 1.0, "rewards/chosen": -2.3562686692457646e-05, "rewards/margins": 0.21611987054347992, "rewards/rejected": -0.2161434292793274, "step": 13688 }, { "epoch": 9.466804979253112, "grad_norm": 3.0976598262786865, "learning_rate": 2.962194559704933e-06, "log_odds_chosen": 10.38652229309082, "log_odds_ratio": -7.720041321590543e-05, "logits/chosen": -0.3017534911632538, "logits/rejected": -0.3283177614212036, "logps/chosen": -0.0004530068254098296, "logps/rejected": -1.8843767642974854, "loss": 0.3019, "nll_loss": 0.07546820491552353, "rewards/accuracies": 1.0, "rewards/chosen": -4.530068326857872e-05, "rewards/margins": 0.18839238584041595, "rewards/rejected": -0.18843768537044525, "step": 13689 }, { "epoch": 9.467496542185339, "grad_norm": 4.3513031005859375, "learning_rate": 2.9583525434147842e-06, "log_odds_chosen": 12.240751266479492, "log_odds_ratio": -9.313460395787843e-06, "logits/chosen": -0.1769258677959442, "logits/rejected": -0.19462567567825317, "logps/chosen": -0.00016390776727348566, "logps/rejected": -3.3067760467529297, "loss": 0.2872, "nll_loss": 0.07179521769285202, "rewards/accuracies": 1.0, "rewards/chosen": -1.6390775272157043e-05, "rewards/margins": 0.33066120743751526, "rewards/rejected": -0.3306775987148285, "step": 13690 }, { "epoch": 9.468188105117566, "grad_norm": 3.3704264163970947, "learning_rate": 2.954510527124635e-06, "log_odds_chosen": 11.753616333007812, "log_odds_ratio": -9.331144246971235e-05, "logits/chosen": -0.5359658598899841, "logits/rejected": -0.49007073044776917, "logps/chosen": -0.0002047703310381621, "logps/rejected": -2.6890151500701904, "loss": 0.3271, "nll_loss": 0.0817599892616272, "rewards/accuracies": 1.0, "rewards/chosen": -2.0477034922805615e-05, "rewards/margins": 0.26888102293014526, "rewards/rejected": -0.2689014971256256, "step": 13691 }, { "epoch": 9.468879668049793, "grad_norm": 3.193803548812866, "learning_rate": 2.950668510834486e-06, "log_odds_chosen": 11.477971076965332, "log_odds_ratio": -6.550650869030505e-05, "logits/chosen": -0.4765413701534271, "logits/rejected": -0.5337736010551453, "logps/chosen": -0.00011261247709626332, "logps/rejected": -2.128232717514038, "loss": 0.25, "nll_loss": 0.06248745322227478, "rewards/accuracies": 1.0, "rewards/chosen": -1.1261247891525272e-05, "rewards/margins": 0.21281200647354126, "rewards/rejected": -0.212823286652565, "step": 13692 }, { "epoch": 9.46957123098202, "grad_norm": 3.092468500137329, "learning_rate": 2.9468264945443373e-06, "log_odds_chosen": 11.156269073486328, "log_odds_ratio": -0.0001080305955838412, "logits/chosen": -0.4892697036266327, "logits/rejected": -0.6489784717559814, "logps/chosen": -0.00033197173615917563, "logps/rejected": -2.2593331336975098, "loss": 0.3406, "nll_loss": 0.08514631539583206, "rewards/accuracies": 1.0, "rewards/chosen": -3.3197175071109086e-05, "rewards/margins": 0.22590012848377228, "rewards/rejected": -0.22593331336975098, "step": 13693 }, { "epoch": 9.470262793914246, "grad_norm": 3.5633842945098877, "learning_rate": 2.942984478254188e-06, "log_odds_chosen": 9.765762329101562, "log_odds_ratio": -0.000332854047883302, "logits/chosen": -0.47233837842941284, "logits/rejected": -0.42558562755584717, "logps/chosen": -0.00043346098391339183, "logps/rejected": -2.0893874168395996, "loss": 0.2935, "nll_loss": 0.07334748655557632, "rewards/accuracies": 1.0, "rewards/chosen": -4.334609548095614e-05, "rewards/margins": 0.20889541506767273, "rewards/rejected": -0.20893874764442444, "step": 13694 }, { "epoch": 9.470954356846473, "grad_norm": 4.938905715942383, "learning_rate": 2.939142461964039e-06, "log_odds_chosen": 11.603968620300293, "log_odds_ratio": -3.054105764022097e-05, "logits/chosen": -0.26857519149780273, "logits/rejected": -0.2991647720336914, "logps/chosen": -0.0002254693245049566, "logps/rejected": -2.439152479171753, "loss": 0.5257, "nll_loss": 0.13143321871757507, "rewards/accuracies": 1.0, "rewards/chosen": -2.254693208669778e-05, "rewards/margins": 0.24389272928237915, "rewards/rejected": -0.24391527473926544, "step": 13695 }, { "epoch": 9.4716459197787, "grad_norm": 3.45434832572937, "learning_rate": 2.9353004456738896e-06, "log_odds_chosen": 11.478617668151855, "log_odds_ratio": -1.8379865650786087e-05, "logits/chosen": -0.023225925862789154, "logits/rejected": -0.0734102874994278, "logps/chosen": -0.00010731960355769843, "logps/rejected": -2.3385214805603027, "loss": 0.3996, "nll_loss": 0.09990224242210388, "rewards/accuracies": 1.0, "rewards/chosen": -1.0731960173870903e-05, "rewards/margins": 0.2338414192199707, "rewards/rejected": -0.23385214805603027, "step": 13696 }, { "epoch": 9.472337482710927, "grad_norm": 2.682950496673584, "learning_rate": 2.9314584293837407e-06, "log_odds_chosen": 10.489137649536133, "log_odds_ratio": -6.562238559126854e-05, "logits/chosen": -0.08150936663150787, "logits/rejected": -0.13377483189105988, "logps/chosen": -0.00015974488633219153, "logps/rejected": -1.6511993408203125, "loss": 0.336, "nll_loss": 0.08398184180259705, "rewards/accuracies": 1.0, "rewards/chosen": -1.5974488633219153e-05, "rewards/margins": 0.1651039570569992, "rewards/rejected": -0.16511991620063782, "step": 13697 }, { "epoch": 9.473029045643154, "grad_norm": 2.681863307952881, "learning_rate": 2.9276164130935915e-06, "log_odds_chosen": 10.831472396850586, "log_odds_ratio": -0.00016914252773858607, "logits/chosen": -0.34048521518707275, "logits/rejected": -0.32761192321777344, "logps/chosen": -0.00020769353432115167, "logps/rejected": -1.6908882856369019, "loss": 0.2172, "nll_loss": 0.054294973611831665, "rewards/accuracies": 1.0, "rewards/chosen": -2.0769353795913048e-05, "rewards/margins": 0.16906805336475372, "rewards/rejected": -0.16908882558345795, "step": 13698 }, { "epoch": 9.47372060857538, "grad_norm": 3.5782458782196045, "learning_rate": 2.9237743968034427e-06, "log_odds_chosen": 10.741741180419922, "log_odds_ratio": -0.00011454321793280542, "logits/chosen": -0.36604902148246765, "logits/rejected": -0.3194882571697235, "logps/chosen": -0.00023873275495134294, "logps/rejected": -2.1600375175476074, "loss": 0.2892, "nll_loss": 0.07228127121925354, "rewards/accuracies": 1.0, "rewards/chosen": -2.3873277314123698e-05, "rewards/margins": 0.2159799039363861, "rewards/rejected": -0.21600376069545746, "step": 13699 }, { "epoch": 9.474412171507607, "grad_norm": 3.9181885719299316, "learning_rate": 2.919932380513294e-06, "log_odds_chosen": 10.678892135620117, "log_odds_ratio": -0.0003496372955851257, "logits/chosen": -0.46870332956314087, "logits/rejected": -0.5139002799987793, "logps/chosen": -0.0002608651702757925, "logps/rejected": -1.6953610181808472, "loss": 0.3021, "nll_loss": 0.07547971606254578, "rewards/accuracies": 1.0, "rewards/chosen": -2.6086519937962294e-05, "rewards/margins": 0.16951002180576324, "rewards/rejected": -0.16953611373901367, "step": 13700 }, { "epoch": 9.475103734439834, "grad_norm": 3.2762069702148438, "learning_rate": 2.9160903642231446e-06, "log_odds_chosen": 10.687110900878906, "log_odds_ratio": -0.0008537794346921146, "logits/chosen": -0.38689276576042175, "logits/rejected": -0.4276360273361206, "logps/chosen": -0.0003648015554063022, "logps/rejected": -2.2275314331054688, "loss": 0.3875, "nll_loss": 0.09679935872554779, "rewards/accuracies": 1.0, "rewards/chosen": -3.648015626822598e-05, "rewards/margins": 0.22271665930747986, "rewards/rejected": -0.22275316715240479, "step": 13701 }, { "epoch": 9.475795297372061, "grad_norm": 3.8926045894622803, "learning_rate": 2.9122483479329953e-06, "log_odds_chosen": 11.083564758300781, "log_odds_ratio": -5.3327985369833186e-05, "logits/chosen": -0.26325908303260803, "logits/rejected": -0.2744675576686859, "logps/chosen": -0.00030690288986079395, "logps/rejected": -2.491783857345581, "loss": 0.4966, "nll_loss": 0.12413786351680756, "rewards/accuracies": 1.0, "rewards/chosen": -3.069029116886668e-05, "rewards/margins": 0.2491477131843567, "rewards/rejected": -0.24917840957641602, "step": 13702 }, { "epoch": 9.476486860304288, "grad_norm": 4.4134440422058105, "learning_rate": 2.908406331642846e-06, "log_odds_chosen": 11.066388130187988, "log_odds_ratio": -3.332171399961226e-05, "logits/chosen": -0.23798823356628418, "logits/rejected": -0.18991810083389282, "logps/chosen": -0.00045601866440847516, "logps/rejected": -2.242123603820801, "loss": 0.6391, "nll_loss": 0.15978127717971802, "rewards/accuracies": 1.0, "rewards/chosen": -4.560186789603904e-05, "rewards/margins": 0.22416675090789795, "rewards/rejected": -0.22421236336231232, "step": 13703 }, { "epoch": 9.477178423236515, "grad_norm": 3.9003124237060547, "learning_rate": 2.9045643153526973e-06, "log_odds_chosen": 11.239566802978516, "log_odds_ratio": -0.00013829019735567272, "logits/chosen": -0.3914109170436859, "logits/rejected": -0.41909319162368774, "logps/chosen": -0.0008354556048288941, "logps/rejected": -2.3318867683410645, "loss": 0.3443, "nll_loss": 0.0860588327050209, "rewards/accuracies": 1.0, "rewards/chosen": -8.354555757250637e-05, "rewards/margins": 0.2331051081418991, "rewards/rejected": -0.2331886738538742, "step": 13704 }, { "epoch": 9.477869986168741, "grad_norm": 3.461466073989868, "learning_rate": 2.900722299062548e-06, "log_odds_chosen": 11.788957595825195, "log_odds_ratio": -2.456989932397846e-05, "logits/chosen": -0.1686708927154541, "logits/rejected": -0.2556487023830414, "logps/chosen": -0.00015625954256393015, "logps/rejected": -2.564842700958252, "loss": 0.4262, "nll_loss": 0.10655610263347626, "rewards/accuracies": 1.0, "rewards/chosen": -1.5625955711584538e-05, "rewards/margins": 0.25646865367889404, "rewards/rejected": -0.2564842998981476, "step": 13705 }, { "epoch": 9.478561549100968, "grad_norm": 3.256856918334961, "learning_rate": 2.896880282772399e-06, "log_odds_chosen": 11.125326156616211, "log_odds_ratio": -7.740782893961295e-05, "logits/chosen": -0.21685834228992462, "logits/rejected": -0.22548425197601318, "logps/chosen": -0.0001516193151473999, "logps/rejected": -2.313654899597168, "loss": 0.2757, "nll_loss": 0.0689086988568306, "rewards/accuracies": 1.0, "rewards/chosen": -1.5161932424234692e-05, "rewards/margins": 0.23135033249855042, "rewards/rejected": -0.23136550188064575, "step": 13706 }, { "epoch": 9.479253112033195, "grad_norm": 3.6939895153045654, "learning_rate": 2.8930382664822504e-06, "log_odds_chosen": 12.723363876342773, "log_odds_ratio": -5.737080755352508e-06, "logits/chosen": -0.17502564191818237, "logits/rejected": -0.16039277613162994, "logps/chosen": -6.294964259723201e-05, "logps/rejected": -2.9757256507873535, "loss": 0.4568, "nll_loss": 0.11420326679944992, "rewards/accuracies": 1.0, "rewards/chosen": -6.2949643506726716e-06, "rewards/margins": 0.297566294670105, "rewards/rejected": -0.2975725829601288, "step": 13707 }, { "epoch": 9.479944674965422, "grad_norm": 3.37601637840271, "learning_rate": 2.889196250192101e-06, "log_odds_chosen": 12.085586547851562, "log_odds_ratio": -1.5522884496022016e-05, "logits/chosen": -0.3996522128582001, "logits/rejected": -0.4061252176761627, "logps/chosen": -9.973048872780055e-05, "logps/rejected": -2.785703182220459, "loss": 0.4483, "nll_loss": 0.11207623779773712, "rewards/accuracies": 1.0, "rewards/chosen": -9.973049600375816e-06, "rewards/margins": 0.2785603702068329, "rewards/rejected": -0.27857035398483276, "step": 13708 }, { "epoch": 9.480636237897649, "grad_norm": 3.2007503509521484, "learning_rate": 2.885354233901952e-06, "log_odds_chosen": 12.089580535888672, "log_odds_ratio": -1.4233486581360921e-05, "logits/chosen": -0.06223127245903015, "logits/rejected": -0.054247599095106125, "logps/chosen": -0.00021037404076196253, "logps/rejected": -3.1310501098632812, "loss": 0.3306, "nll_loss": 0.08265635371208191, "rewards/accuracies": 1.0, "rewards/chosen": -2.1037403712398373e-05, "rewards/margins": 0.3130840063095093, "rewards/rejected": -0.313105046749115, "step": 13709 }, { "epoch": 9.481327800829876, "grad_norm": 3.856358051300049, "learning_rate": 2.881512217611803e-06, "log_odds_chosen": 9.892733573913574, "log_odds_ratio": -0.00014180310245137662, "logits/chosen": -0.11307594925165176, "logits/rejected": -0.1626780778169632, "logps/chosen": -0.000550577009562403, "logps/rejected": -2.0717012882232666, "loss": 0.3495, "nll_loss": 0.08736885339021683, "rewards/accuracies": 1.0, "rewards/chosen": -5.505770241143182e-05, "rewards/margins": 0.2071150839328766, "rewards/rejected": -0.20717012882232666, "step": 13710 }, { "epoch": 9.482019363762102, "grad_norm": 3.731860637664795, "learning_rate": 2.8776702013216538e-06, "log_odds_chosen": 11.344371795654297, "log_odds_ratio": -0.00012071852688677609, "logits/chosen": -0.43114885687828064, "logits/rejected": -0.5573596358299255, "logps/chosen": -0.00047632993664592505, "logps/rejected": -3.5855040550231934, "loss": 0.3692, "nll_loss": 0.09229099750518799, "rewards/accuracies": 1.0, "rewards/chosen": -4.763299511978403e-05, "rewards/margins": 0.35850274562835693, "rewards/rejected": -0.35855039954185486, "step": 13711 }, { "epoch": 9.48271092669433, "grad_norm": 3.2189559936523438, "learning_rate": 2.8738281850315045e-06, "log_odds_chosen": 9.976612091064453, "log_odds_ratio": -0.00010828935046447441, "logits/chosen": -0.520516037940979, "logits/rejected": -0.5231585502624512, "logps/chosen": -0.001182026695460081, "logps/rejected": -1.9440940618515015, "loss": 0.3307, "nll_loss": 0.08265582472085953, "rewards/accuracies": 1.0, "rewards/chosen": -0.00011820268264273182, "rewards/margins": 0.19429121911525726, "rewards/rejected": -0.19440940022468567, "step": 13712 }, { "epoch": 9.483402489626556, "grad_norm": 3.685671091079712, "learning_rate": 2.8699861687413553e-06, "log_odds_chosen": 11.894643783569336, "log_odds_ratio": -1.2605565643752925e-05, "logits/chosen": -0.04930278658866882, "logits/rejected": -0.13070863485336304, "logps/chosen": -0.00010384486813563854, "logps/rejected": -2.5991384983062744, "loss": 0.3739, "nll_loss": 0.09347648173570633, "rewards/accuracies": 1.0, "rewards/chosen": -1.0384486813563854e-05, "rewards/margins": 0.2599034905433655, "rewards/rejected": -0.2599138915538788, "step": 13713 }, { "epoch": 9.484094052558783, "grad_norm": 3.278616189956665, "learning_rate": 2.8661441524512064e-06, "log_odds_chosen": 11.319167137145996, "log_odds_ratio": -0.00026694638654589653, "logits/chosen": -0.306878924369812, "logits/rejected": -0.38049980998039246, "logps/chosen": -0.001009898609481752, "logps/rejected": -2.2372772693634033, "loss": 0.3337, "nll_loss": 0.08339577913284302, "rewards/accuracies": 1.0, "rewards/chosen": -0.00010098986967932433, "rewards/margins": 0.2236267328262329, "rewards/rejected": -0.2237277328968048, "step": 13714 }, { "epoch": 9.48478561549101, "grad_norm": 3.5239064693450928, "learning_rate": 2.8623021361610576e-06, "log_odds_chosen": 11.55378532409668, "log_odds_ratio": -4.032679134979844e-05, "logits/chosen": -0.12040985375642776, "logits/rejected": -0.15733376145362854, "logps/chosen": -0.00027053779922425747, "logps/rejected": -2.9848201274871826, "loss": 0.3486, "nll_loss": 0.08713582158088684, "rewards/accuracies": 1.0, "rewards/chosen": -2.705378210521303e-05, "rewards/margins": 0.29845497012138367, "rewards/rejected": -0.2984820008277893, "step": 13715 }, { "epoch": 9.485477178423237, "grad_norm": 4.460223197937012, "learning_rate": 2.8584601198709084e-06, "log_odds_chosen": 12.06035327911377, "log_odds_ratio": -1.8679675122257322e-05, "logits/chosen": -0.47401994466781616, "logits/rejected": -0.42134177684783936, "logps/chosen": -5.195748235564679e-05, "logps/rejected": -2.2302098274230957, "loss": 0.3507, "nll_loss": 0.08767677843570709, "rewards/accuracies": 1.0, "rewards/chosen": -5.19574859936256e-06, "rewards/margins": 0.22301580011844635, "rewards/rejected": -0.2230209857225418, "step": 13716 }, { "epoch": 9.486168741355463, "grad_norm": 2.486361026763916, "learning_rate": 2.8546181035807595e-06, "log_odds_chosen": 11.190108299255371, "log_odds_ratio": -6.145198130980134e-05, "logits/chosen": -0.695064902305603, "logits/rejected": -0.7149726152420044, "logps/chosen": -0.0001509027642896399, "logps/rejected": -2.284609794616699, "loss": 0.236, "nll_loss": 0.05898681655526161, "rewards/accuracies": 1.0, "rewards/chosen": -1.509027697466081e-05, "rewards/margins": 0.22844591736793518, "rewards/rejected": -0.22846101224422455, "step": 13717 }, { "epoch": 9.48686030428769, "grad_norm": 4.049819469451904, "learning_rate": 2.8507760872906103e-06, "log_odds_chosen": 12.161651611328125, "log_odds_ratio": -8.164747669070493e-06, "logits/chosen": -0.5261566638946533, "logits/rejected": -0.5017327070236206, "logps/chosen": -0.00010774727707030252, "logps/rejected": -2.7548489570617676, "loss": 0.4114, "nll_loss": 0.10285009443759918, "rewards/accuracies": 1.0, "rewards/chosen": -1.0774727343232371e-05, "rewards/margins": 0.27547410130500793, "rewards/rejected": -0.2754848599433899, "step": 13718 }, { "epoch": 9.487551867219917, "grad_norm": 4.140195846557617, "learning_rate": 2.846934071000461e-06, "log_odds_chosen": 8.858113288879395, "log_odds_ratio": -0.0964498221874237, "logits/chosen": -0.5958190560340881, "logits/rejected": -0.6393790245056152, "logps/chosen": -0.021119430661201477, "logps/rejected": -1.9574973583221436, "loss": 0.3022, "nll_loss": 0.06590069830417633, "rewards/accuracies": 0.875, "rewards/chosen": -0.002111943205818534, "rewards/margins": 0.19363778829574585, "rewards/rejected": -0.19574972987174988, "step": 13719 }, { "epoch": 9.488243430152144, "grad_norm": 3.137244701385498, "learning_rate": 2.8430920547103118e-06, "log_odds_chosen": 11.436861038208008, "log_odds_ratio": -1.8819559045368806e-05, "logits/chosen": -0.23692496120929718, "logits/rejected": -0.4107188582420349, "logps/chosen": -0.0001498030760558322, "logps/rejected": -2.4895315170288086, "loss": 0.4337, "nll_loss": 0.1084178239107132, "rewards/accuracies": 1.0, "rewards/chosen": -1.4980309060774744e-05, "rewards/margins": 0.2489381730556488, "rewards/rejected": -0.24895316362380981, "step": 13720 }, { "epoch": 9.48893499308437, "grad_norm": 3.764981746673584, "learning_rate": 2.839250038420163e-06, "log_odds_chosen": 11.15414810180664, "log_odds_ratio": -2.7917967599933036e-05, "logits/chosen": -0.05560293793678284, "logits/rejected": -0.19690510630607605, "logps/chosen": -0.00021685943647753447, "logps/rejected": -2.401160955429077, "loss": 0.3767, "nll_loss": 0.09417416900396347, "rewards/accuracies": 1.0, "rewards/chosen": -2.168594437534921e-05, "rewards/margins": 0.24009442329406738, "rewards/rejected": -0.24011610448360443, "step": 13721 }, { "epoch": 9.489626556016598, "grad_norm": 3.811582326889038, "learning_rate": 2.835408022130014e-06, "log_odds_chosen": 11.078816413879395, "log_odds_ratio": -0.00021127502259332687, "logits/chosen": -0.5352858304977417, "logits/rejected": -0.5483341217041016, "logps/chosen": -0.0009039980941452086, "logps/rejected": -2.568899154663086, "loss": 0.28, "nll_loss": 0.06998679786920547, "rewards/accuracies": 1.0, "rewards/chosen": -9.039981523528695e-05, "rewards/margins": 0.25679951906204224, "rewards/rejected": -0.2568899095058441, "step": 13722 }, { "epoch": 9.490318118948824, "grad_norm": 4.578982353210449, "learning_rate": 2.831566005839865e-06, "log_odds_chosen": 10.710317611694336, "log_odds_ratio": -0.00025594281032681465, "logits/chosen": -0.7265045642852783, "logits/rejected": -0.7505810260772705, "logps/chosen": -0.00025931381969712675, "logps/rejected": -1.9837772846221924, "loss": 0.4493, "nll_loss": 0.11230659484863281, "rewards/accuracies": 1.0, "rewards/chosen": -2.5931381969712675e-05, "rewards/margins": 0.19835181534290314, "rewards/rejected": -0.19837772846221924, "step": 13723 }, { "epoch": 9.491009681881051, "grad_norm": 3.612924575805664, "learning_rate": 2.827723989549716e-06, "log_odds_chosen": 12.415328025817871, "log_odds_ratio": -2.9161874408600852e-05, "logits/chosen": -0.05908776819705963, "logits/rejected": -0.1314144879579544, "logps/chosen": -0.00014797823678236455, "logps/rejected": -3.427035331726074, "loss": 0.5031, "nll_loss": 0.12576347589492798, "rewards/accuracies": 1.0, "rewards/chosen": -1.4797822586842813e-05, "rewards/margins": 0.3426887094974518, "rewards/rejected": -0.34270352125167847, "step": 13724 }, { "epoch": 9.491701244813278, "grad_norm": 4.0577778816223145, "learning_rate": 2.823881973259567e-06, "log_odds_chosen": 10.035642623901367, "log_odds_ratio": -0.00023546107695437968, "logits/chosen": -0.19884175062179565, "logits/rejected": -0.31768226623535156, "logps/chosen": -0.0004935381002724171, "logps/rejected": -1.8631964921951294, "loss": 0.3125, "nll_loss": 0.07809849083423615, "rewards/accuracies": 1.0, "rewards/chosen": -4.935381002724171e-05, "rewards/margins": 0.18627029657363892, "rewards/rejected": -0.18631964921951294, "step": 13725 }, { "epoch": 9.492392807745505, "grad_norm": 3.204608201980591, "learning_rate": 2.8200399569694176e-06, "log_odds_chosen": 11.073084831237793, "log_odds_ratio": -0.0004762558382935822, "logits/chosen": -0.4759052097797394, "logits/rejected": -0.5320380926132202, "logps/chosen": -0.000281669752439484, "logps/rejected": -2.417051315307617, "loss": 0.33, "nll_loss": 0.08244785666465759, "rewards/accuracies": 1.0, "rewards/chosen": -2.8166976335342042e-05, "rewards/margins": 0.2416769564151764, "rewards/rejected": -0.24170511960983276, "step": 13726 }, { "epoch": 9.493084370677732, "grad_norm": 4.095341682434082, "learning_rate": 2.8161979406792687e-06, "log_odds_chosen": 11.520950317382812, "log_odds_ratio": -2.6311652618460357e-05, "logits/chosen": -0.07997718453407288, "logits/rejected": -0.11879251897335052, "logps/chosen": -0.00034241483081132174, "logps/rejected": -2.570692539215088, "loss": 0.4557, "nll_loss": 0.11391951143741608, "rewards/accuracies": 1.0, "rewards/chosen": -3.424148599151522e-05, "rewards/margins": 0.2570350170135498, "rewards/rejected": -0.2570692300796509, "step": 13727 }, { "epoch": 9.493775933609959, "grad_norm": 3.238722801208496, "learning_rate": 2.8123559243891195e-06, "log_odds_chosen": 11.319939613342285, "log_odds_ratio": -3.77092364942655e-05, "logits/chosen": -0.34053710103034973, "logits/rejected": -0.3158467710018158, "logps/chosen": -0.00011048711894545704, "logps/rejected": -1.9384033679962158, "loss": 0.3732, "nll_loss": 0.09329046308994293, "rewards/accuracies": 1.0, "rewards/chosen": -1.1048712622141466e-05, "rewards/margins": 0.19382929801940918, "rewards/rejected": -0.19384033977985382, "step": 13728 }, { "epoch": 9.494467496542185, "grad_norm": 2.9551427364349365, "learning_rate": 2.8085139080989706e-06, "log_odds_chosen": 11.750532150268555, "log_odds_ratio": -2.050580042123329e-05, "logits/chosen": -0.5918290615081787, "logits/rejected": -0.6298559904098511, "logps/chosen": -9.227452392224222e-05, "logps/rejected": -2.4829556941986084, "loss": 0.2679, "nll_loss": 0.06697001308202744, "rewards/accuracies": 1.0, "rewards/chosen": -9.227452210325282e-06, "rewards/margins": 0.24828633666038513, "rewards/rejected": -0.24829556047916412, "step": 13729 }, { "epoch": 9.495159059474412, "grad_norm": 4.240682601928711, "learning_rate": 2.8046718918088214e-06, "log_odds_chosen": 11.138177871704102, "log_odds_ratio": -9.627740655560046e-05, "logits/chosen": -0.40648186206817627, "logits/rejected": -0.4481659531593323, "logps/chosen": -0.0002279129985254258, "logps/rejected": -2.282442092895508, "loss": 0.4667, "nll_loss": 0.11667248606681824, "rewards/accuracies": 1.0, "rewards/chosen": -2.279130058013834e-05, "rewards/margins": 0.22822141647338867, "rewards/rejected": -0.22824421525001526, "step": 13730 }, { "epoch": 9.495850622406639, "grad_norm": 3.6743686199188232, "learning_rate": 2.8008298755186726e-06, "log_odds_chosen": 11.219482421875, "log_odds_ratio": -3.4768607292789966e-05, "logits/chosen": -0.47874248027801514, "logits/rejected": -0.5910977125167847, "logps/chosen": -0.00014219920558389276, "logps/rejected": -2.0327465534210205, "loss": 0.3731, "nll_loss": 0.09327490627765656, "rewards/accuracies": 1.0, "rewards/chosen": -1.4219920558389276e-05, "rewards/margins": 0.2032604068517685, "rewards/rejected": -0.2032746523618698, "step": 13731 }, { "epoch": 9.496542185338866, "grad_norm": 2.6880970001220703, "learning_rate": 2.7969878592285233e-06, "log_odds_chosen": 10.376435279846191, "log_odds_ratio": -0.00012236303882673383, "logits/chosen": -0.5876057147979736, "logits/rejected": -0.6403185725212097, "logps/chosen": -0.00017380820645485073, "logps/rejected": -1.4591680765151978, "loss": 0.3207, "nll_loss": 0.08016160130500793, "rewards/accuracies": 1.0, "rewards/chosen": -1.7380822100676596e-05, "rewards/margins": 0.1458994299173355, "rewards/rejected": -0.14591681957244873, "step": 13732 }, { "epoch": 9.497233748271093, "grad_norm": 6.102115631103516, "learning_rate": 2.793145842938374e-06, "log_odds_chosen": 10.565109252929688, "log_odds_ratio": -0.00015378330135717988, "logits/chosen": -0.5142971873283386, "logits/rejected": -0.605043888092041, "logps/chosen": -0.0008334450540132821, "logps/rejected": -1.8944003582000732, "loss": 0.3937, "nll_loss": 0.0984080508351326, "rewards/accuracies": 1.0, "rewards/chosen": -8.334450831171125e-05, "rewards/margins": 0.1893567144870758, "rewards/rejected": -0.1894400417804718, "step": 13733 }, { "epoch": 9.49792531120332, "grad_norm": 4.143298149108887, "learning_rate": 2.7893038266482252e-06, "log_odds_chosen": 12.307402610778809, "log_odds_ratio": -9.185643648379482e-06, "logits/chosen": -0.2419433444738388, "logits/rejected": -0.35360732674598694, "logps/chosen": -0.000124764846987091, "logps/rejected": -3.047055721282959, "loss": 0.4336, "nll_loss": 0.10840722173452377, "rewards/accuracies": 1.0, "rewards/chosen": -1.2476483789214399e-05, "rewards/margins": 0.3046931028366089, "rewards/rejected": -0.30470559000968933, "step": 13734 }, { "epoch": 9.498616874135546, "grad_norm": 3.105912685394287, "learning_rate": 2.785461810358076e-06, "log_odds_chosen": 11.952037811279297, "log_odds_ratio": -3.523328632581979e-05, "logits/chosen": -0.3825800120830536, "logits/rejected": -0.4227985143661499, "logps/chosen": -0.0001465064415242523, "logps/rejected": -2.528470754623413, "loss": 0.4094, "nll_loss": 0.10234944522380829, "rewards/accuracies": 1.0, "rewards/chosen": -1.4650645425717812e-05, "rewards/margins": 0.25283244252204895, "rewards/rejected": -0.2528470754623413, "step": 13735 }, { "epoch": 9.499308437067773, "grad_norm": 3.376985549926758, "learning_rate": 2.7816197940679267e-06, "log_odds_chosen": 11.052566528320312, "log_odds_ratio": -7.253669900819659e-05, "logits/chosen": -0.17732563614845276, "logits/rejected": -0.38987892866134644, "logps/chosen": -0.00033917598193511367, "logps/rejected": -2.2939813137054443, "loss": 0.2221, "nll_loss": 0.055507708340883255, "rewards/accuracies": 1.0, "rewards/chosen": -3.3917596738319844e-05, "rewards/margins": 0.22936421632766724, "rewards/rejected": -0.22939813137054443, "step": 13736 }, { "epoch": 9.5, "grad_norm": 4.159225940704346, "learning_rate": 2.777777777777778e-06, "log_odds_chosen": 11.19643783569336, "log_odds_ratio": -5.351271101972088e-05, "logits/chosen": -0.29468539357185364, "logits/rejected": -0.3837074041366577, "logps/chosen": -0.00014817621558904648, "logps/rejected": -2.212210178375244, "loss": 0.3631, "nll_loss": 0.09078159928321838, "rewards/accuracies": 1.0, "rewards/chosen": -1.481762228650041e-05, "rewards/margins": 0.2212061733007431, "rewards/rejected": -0.22122101485729218, "step": 13737 }, { "epoch": 9.500691562932227, "grad_norm": 4.34866189956665, "learning_rate": 2.773935761487629e-06, "log_odds_chosen": 12.640228271484375, "log_odds_ratio": -1.353957486571744e-05, "logits/chosen": -0.09187261760234833, "logits/rejected": -0.09283635020256042, "logps/chosen": -0.00016267431783489883, "logps/rejected": -3.7860047817230225, "loss": 0.3538, "nll_loss": 0.08845299482345581, "rewards/accuracies": 1.0, "rewards/chosen": -1.6267431419692002e-05, "rewards/margins": 0.37858423590660095, "rewards/rejected": -0.37860050797462463, "step": 13738 }, { "epoch": 9.501383125864454, "grad_norm": 4.006698131561279, "learning_rate": 2.77009374519748e-06, "log_odds_chosen": 11.847143173217773, "log_odds_ratio": -4.2443374695722014e-05, "logits/chosen": -0.14370986819267273, "logits/rejected": -0.0862513855099678, "logps/chosen": -0.0001187943562399596, "logps/rejected": -2.390091896057129, "loss": 0.3347, "nll_loss": 0.08366773277521133, "rewards/accuracies": 1.0, "rewards/chosen": -1.1879436897288542e-05, "rewards/margins": 0.23899732530117035, "rewards/rejected": -0.23900920152664185, "step": 13739 }, { "epoch": 9.50207468879668, "grad_norm": 3.6946158409118652, "learning_rate": 2.7662517289073306e-06, "log_odds_chosen": 10.731229782104492, "log_odds_ratio": -6.909217336215079e-05, "logits/chosen": -0.6922059059143066, "logits/rejected": -0.6929417848587036, "logps/chosen": -0.0004905189271084964, "logps/rejected": -1.811964511871338, "loss": 0.3459, "nll_loss": 0.08647441118955612, "rewards/accuracies": 1.0, "rewards/chosen": -4.9051894166041166e-05, "rewards/margins": 0.18114739656448364, "rewards/rejected": -0.1811964511871338, "step": 13740 }, { "epoch": 9.502766251728907, "grad_norm": 3.3442227840423584, "learning_rate": 2.7624097126171818e-06, "log_odds_chosen": 9.403704643249512, "log_odds_ratio": -0.00022261112462729216, "logits/chosen": -0.5050197839736938, "logits/rejected": -0.6509999632835388, "logps/chosen": -0.00037455116398632526, "logps/rejected": -1.067748785018921, "loss": 0.3388, "nll_loss": 0.08468709886074066, "rewards/accuracies": 1.0, "rewards/chosen": -3.7455116398632526e-05, "rewards/margins": 0.106737419962883, "rewards/rejected": -0.10677488148212433, "step": 13741 }, { "epoch": 9.503457814661134, "grad_norm": 3.956580638885498, "learning_rate": 2.7585676963270325e-06, "log_odds_chosen": 11.188089370727539, "log_odds_ratio": -3.0993913242127746e-05, "logits/chosen": -0.5652532577514648, "logits/rejected": -0.6602020263671875, "logps/chosen": -0.00010985941480612382, "logps/rejected": -2.1618151664733887, "loss": 0.4445, "nll_loss": 0.11113232374191284, "rewards/accuracies": 1.0, "rewards/chosen": -1.0985942026309203e-05, "rewards/margins": 0.216170534491539, "rewards/rejected": -0.21618153154850006, "step": 13742 }, { "epoch": 9.504149377593361, "grad_norm": 2.5939555168151855, "learning_rate": 2.7547256800368833e-06, "log_odds_chosen": 11.381129264831543, "log_odds_ratio": -3.200870196451433e-05, "logits/chosen": -0.01486485730856657, "logits/rejected": -0.04487267881631851, "logps/chosen": -0.00017249659867957234, "logps/rejected": -2.412909746170044, "loss": 0.2677, "nll_loss": 0.06691596657037735, "rewards/accuracies": 1.0, "rewards/chosen": -1.7249660231755115e-05, "rewards/margins": 0.24127374589443207, "rewards/rejected": -0.24129100143909454, "step": 13743 }, { "epoch": 9.504840940525588, "grad_norm": 4.674348831176758, "learning_rate": 2.7508836637467344e-06, "log_odds_chosen": 10.176416397094727, "log_odds_ratio": -9.241919906344265e-05, "logits/chosen": 0.023514077067375183, "logits/rejected": -0.02323298156261444, "logps/chosen": -0.0006039447034709156, "logps/rejected": -1.9654035568237305, "loss": 0.3913, "nll_loss": 0.0978233814239502, "rewards/accuracies": 1.0, "rewards/chosen": -6.039447180228308e-05, "rewards/margins": 0.1964799463748932, "rewards/rejected": -0.19654035568237305, "step": 13744 }, { "epoch": 9.505532503457815, "grad_norm": 2.398162364959717, "learning_rate": 2.7470416474565856e-06, "log_odds_chosen": 10.539690971374512, "log_odds_ratio": -0.00018237254698760808, "logits/chosen": 0.11595743149518967, "logits/rejected": -0.02506435476243496, "logps/chosen": -0.00022768854978494346, "logps/rejected": -1.79641592502594, "loss": 0.325, "nll_loss": 0.08123064786195755, "rewards/accuracies": 1.0, "rewards/chosen": -2.2768854250898585e-05, "rewards/margins": 0.17961883544921875, "rewards/rejected": -0.17964158952236176, "step": 13745 }, { "epoch": 9.506224066390041, "grad_norm": 2.861630916595459, "learning_rate": 2.7431996311664363e-06, "log_odds_chosen": 10.886007308959961, "log_odds_ratio": -0.00011090746556874365, "logits/chosen": -0.06716457009315491, "logits/rejected": -0.08194438368082047, "logps/chosen": -0.00025152985472232103, "logps/rejected": -2.21199369430542, "loss": 0.249, "nll_loss": 0.06222885847091675, "rewards/accuracies": 1.0, "rewards/chosen": -2.5152985472232103e-05, "rewards/margins": 0.2211742103099823, "rewards/rejected": -0.22119936347007751, "step": 13746 }, { "epoch": 9.506915629322268, "grad_norm": 2.145341396331787, "learning_rate": 2.739357614876287e-06, "log_odds_chosen": 12.142789840698242, "log_odds_ratio": -1.9502431314322166e-05, "logits/chosen": -0.4173874855041504, "logits/rejected": -0.544904887676239, "logps/chosen": -0.00026509028975851834, "logps/rejected": -3.7034568786621094, "loss": 0.2385, "nll_loss": 0.059627942740917206, "rewards/accuracies": 1.0, "rewards/chosen": -2.6509029339649715e-05, "rewards/margins": 0.3703191578388214, "rewards/rejected": -0.37034571170806885, "step": 13747 }, { "epoch": 9.507607192254495, "grad_norm": 3.220600128173828, "learning_rate": 2.7355155985861383e-06, "log_odds_chosen": 11.07638931274414, "log_odds_ratio": -3.4151064028264955e-05, "logits/chosen": -0.5607274174690247, "logits/rejected": -0.6396695375442505, "logps/chosen": -4.824280767934397e-05, "logps/rejected": -1.467947006225586, "loss": 0.3997, "nll_loss": 0.09991302341222763, "rewards/accuracies": 1.0, "rewards/chosen": -4.824280495085986e-06, "rewards/margins": 0.14678986370563507, "rewards/rejected": -0.14679470658302307, "step": 13748 }, { "epoch": 9.508298755186722, "grad_norm": 2.3197197914123535, "learning_rate": 2.731673582295989e-06, "log_odds_chosen": 11.177478790283203, "log_odds_ratio": -4.273248487152159e-05, "logits/chosen": -0.18425364792346954, "logits/rejected": -0.3179050087928772, "logps/chosen": -0.00013618064986076206, "logps/rejected": -2.3722050189971924, "loss": 0.3034, "nll_loss": 0.07583820074796677, "rewards/accuracies": 1.0, "rewards/chosen": -1.3618064258480445e-05, "rewards/margins": 0.23720687627792358, "rewards/rejected": -0.23722049593925476, "step": 13749 }, { "epoch": 9.508990318118949, "grad_norm": 2.5504002571105957, "learning_rate": 2.7278315660058398e-06, "log_odds_chosen": 11.458822250366211, "log_odds_ratio": -3.5846809623762965e-05, "logits/chosen": -0.4976326823234558, "logits/rejected": -0.6373475790023804, "logps/chosen": -0.00011164910392835736, "logps/rejected": -1.9494824409484863, "loss": 0.302, "nll_loss": 0.07548534870147705, "rewards/accuracies": 1.0, "rewards/chosen": -1.1164910574734677e-05, "rewards/margins": 0.1949370801448822, "rewards/rejected": -0.1949482411146164, "step": 13750 }, { "epoch": 9.509681881051176, "grad_norm": 4.700429916381836, "learning_rate": 2.723989549715691e-06, "log_odds_chosen": 11.000699043273926, "log_odds_ratio": -0.0001611942716408521, "logits/chosen": -0.09237821400165558, "logits/rejected": -0.04532511904835701, "logps/chosen": -0.0004388167290017009, "logps/rejected": -2.4760220050811768, "loss": 0.6004, "nll_loss": 0.15007996559143066, "rewards/accuracies": 1.0, "rewards/chosen": -4.3881671444978565e-05, "rewards/margins": 0.24755831062793732, "rewards/rejected": -0.2476022094488144, "step": 13751 }, { "epoch": 9.510373443983402, "grad_norm": 3.0092380046844482, "learning_rate": 2.720147533425542e-06, "log_odds_chosen": 12.049249649047852, "log_odds_ratio": -3.0345461709657684e-05, "logits/chosen": -0.13598808646202087, "logits/rejected": -0.16782134771347046, "logps/chosen": -0.0001091573212761432, "logps/rejected": -2.8934497833251953, "loss": 0.3649, "nll_loss": 0.09121409058570862, "rewards/accuracies": 1.0, "rewards/chosen": -1.0915731763816439e-05, "rewards/margins": 0.2893340587615967, "rewards/rejected": -0.2893449664115906, "step": 13752 }, { "epoch": 9.51106500691563, "grad_norm": 4.360344886779785, "learning_rate": 2.716305517135393e-06, "log_odds_chosen": 10.946246147155762, "log_odds_ratio": -0.00011370900028850883, "logits/chosen": -0.034930601716041565, "logits/rejected": -0.21249479055404663, "logps/chosen": -0.00041773245902732015, "logps/rejected": -2.1995179653167725, "loss": 0.5481, "nll_loss": 0.13701015710830688, "rewards/accuracies": 1.0, "rewards/chosen": -4.1773251723498106e-05, "rewards/margins": 0.21991004049777985, "rewards/rejected": -0.2199518233537674, "step": 13753 }, { "epoch": 9.511756569847856, "grad_norm": 5.424455642700195, "learning_rate": 2.7124635008452436e-06, "log_odds_chosen": 11.442152976989746, "log_odds_ratio": -2.2443495254265144e-05, "logits/chosen": -0.3222275376319885, "logits/rejected": -0.42254069447517395, "logps/chosen": -0.00016010711260605603, "logps/rejected": -2.557199001312256, "loss": 0.4625, "nll_loss": 0.11561720073223114, "rewards/accuracies": 1.0, "rewards/chosen": -1.6010711988201365e-05, "rewards/margins": 0.2557039260864258, "rewards/rejected": -0.2557199001312256, "step": 13754 }, { "epoch": 9.512448132780083, "grad_norm": 5.503417491912842, "learning_rate": 2.7086214845550948e-06, "log_odds_chosen": 10.576732635498047, "log_odds_ratio": -0.00010041467612609267, "logits/chosen": -0.3194338083267212, "logits/rejected": -0.34131085872650146, "logps/chosen": -0.0002541765570640564, "logps/rejected": -2.326648235321045, "loss": 0.4973, "nll_loss": 0.12430934607982635, "rewards/accuracies": 1.0, "rewards/chosen": -2.5417659344384447e-05, "rewards/margins": 0.2326394021511078, "rewards/rejected": -0.2326648086309433, "step": 13755 }, { "epoch": 9.51313969571231, "grad_norm": 3.766571044921875, "learning_rate": 2.7047794682649455e-06, "log_odds_chosen": 12.046109199523926, "log_odds_ratio": -1.1743547474907245e-05, "logits/chosen": -0.8128657341003418, "logits/rejected": -0.7834848165512085, "logps/chosen": -0.00014546149759553373, "logps/rejected": -2.5551412105560303, "loss": 0.3761, "nll_loss": 0.09401434659957886, "rewards/accuracies": 1.0, "rewards/chosen": -1.4546150850947015e-05, "rewards/margins": 0.25549960136413574, "rewards/rejected": -0.25551414489746094, "step": 13756 }, { "epoch": 9.513831258644537, "grad_norm": 3.58656907081604, "learning_rate": 2.7009374519747963e-06, "log_odds_chosen": 11.6583833694458, "log_odds_ratio": -3.437104169279337e-05, "logits/chosen": -0.2658565938472748, "logits/rejected": -0.35223588347435, "logps/chosen": -0.00015039359277579933, "logps/rejected": -2.4674582481384277, "loss": 0.5114, "nll_loss": 0.1278354823589325, "rewards/accuracies": 1.0, "rewards/chosen": -1.5039359823276754e-05, "rewards/margins": 0.246730774641037, "rewards/rejected": -0.24674582481384277, "step": 13757 }, { "epoch": 9.514522821576763, "grad_norm": 2.4052798748016357, "learning_rate": 2.6970954356846475e-06, "log_odds_chosen": 11.306896209716797, "log_odds_ratio": -3.7681104004150257e-05, "logits/chosen": -0.21047669649124146, "logits/rejected": -0.1311059594154358, "logps/chosen": -0.00017037391080521047, "logps/rejected": -2.7428433895111084, "loss": 0.3149, "nll_loss": 0.07871001958847046, "rewards/accuracies": 1.0, "rewards/chosen": -1.7037391444318928e-05, "rewards/margins": 0.274267315864563, "rewards/rejected": -0.27428436279296875, "step": 13758 }, { "epoch": 9.51521438450899, "grad_norm": 3.084327220916748, "learning_rate": 2.693253419394498e-06, "log_odds_chosen": 11.337088584899902, "log_odds_ratio": -5.701879126718268e-05, "logits/chosen": -0.43909841775894165, "logits/rejected": -0.5297845602035522, "logps/chosen": -0.00023459625663235784, "logps/rejected": -2.462244749069214, "loss": 0.4307, "nll_loss": 0.10767786204814911, "rewards/accuracies": 1.0, "rewards/chosen": -2.3459626390831545e-05, "rewards/margins": 0.2462010383605957, "rewards/rejected": -0.24622449278831482, "step": 13759 }, { "epoch": 9.515905947441217, "grad_norm": 2.876373291015625, "learning_rate": 2.6894114031043494e-06, "log_odds_chosen": 11.915162086486816, "log_odds_ratio": -1.1002990504493937e-05, "logits/chosen": -0.01239769160747528, "logits/rejected": -0.014579236507415771, "logps/chosen": -0.00014055031351745129, "logps/rejected": -2.851630687713623, "loss": 0.3221, "nll_loss": 0.08052629232406616, "rewards/accuracies": 1.0, "rewards/chosen": -1.4055030987947248e-05, "rewards/margins": 0.2851490378379822, "rewards/rejected": -0.2851630747318268, "step": 13760 }, { "epoch": 9.516597510373444, "grad_norm": 4.359734535217285, "learning_rate": 2.6855693868142005e-06, "log_odds_chosen": 10.538614273071289, "log_odds_ratio": -0.00028232764452695847, "logits/chosen": -0.39528805017471313, "logits/rejected": -0.4545425474643707, "logps/chosen": -0.0003071337123401463, "logps/rejected": -1.9423317909240723, "loss": 0.4818, "nll_loss": 0.12042504549026489, "rewards/accuracies": 1.0, "rewards/chosen": -3.071337050641887e-05, "rewards/margins": 0.1942024528980255, "rewards/rejected": -0.19423317909240723, "step": 13761 }, { "epoch": 9.51728907330567, "grad_norm": 2.967798948287964, "learning_rate": 2.6817273705240513e-06, "log_odds_chosen": 10.072077751159668, "log_odds_ratio": -0.0020614732056856155, "logits/chosen": -0.3225874900817871, "logits/rejected": -0.28027576208114624, "logps/chosen": -0.016036754474043846, "logps/rejected": -1.8646742105484009, "loss": 0.3828, "nll_loss": 0.09549985826015472, "rewards/accuracies": 1.0, "rewards/chosen": -0.0016036754241213202, "rewards/margins": 0.18486374616622925, "rewards/rejected": -0.18646740913391113, "step": 13762 }, { "epoch": 9.517980636237898, "grad_norm": 5.028452396392822, "learning_rate": 2.677885354233902e-06, "log_odds_chosen": 11.795109748840332, "log_odds_ratio": -0.00018023568554781377, "logits/chosen": -0.052124328911304474, "logits/rejected": -0.04959341511130333, "logps/chosen": -0.000244269089307636, "logps/rejected": -2.9171204566955566, "loss": 0.4495, "nll_loss": 0.11235899478197098, "rewards/accuracies": 1.0, "rewards/chosen": -2.4426910385955125e-05, "rewards/margins": 0.29168763756752014, "rewards/rejected": -0.29171207547187805, "step": 13763 }, { "epoch": 9.518672199170124, "grad_norm": 3.2558481693267822, "learning_rate": 2.674043337943753e-06, "log_odds_chosen": 11.236143112182617, "log_odds_ratio": -1.880578747659456e-05, "logits/chosen": 0.16380754113197327, "logits/rejected": 0.18336796760559082, "logps/chosen": -0.0001271188084501773, "logps/rejected": -2.213346242904663, "loss": 0.2965, "nll_loss": 0.07413183152675629, "rewards/accuracies": 1.0, "rewards/chosen": -1.271188011742197e-05, "rewards/margins": 0.22132191061973572, "rewards/rejected": -0.22133463621139526, "step": 13764 }, { "epoch": 9.519363762102351, "grad_norm": 3.0914859771728516, "learning_rate": 2.670201321653604e-06, "log_odds_chosen": 11.55090618133545, "log_odds_ratio": -9.511876123724505e-05, "logits/chosen": -0.6392191052436829, "logits/rejected": -0.7223262190818787, "logps/chosen": -0.00022679113317281008, "logps/rejected": -3.00004243850708, "loss": 0.3672, "nll_loss": 0.09178087115287781, "rewards/accuracies": 1.0, "rewards/chosen": -2.2679112589685246e-05, "rewards/margins": 0.29998156428337097, "rewards/rejected": -0.300004243850708, "step": 13765 }, { "epoch": 9.520055325034578, "grad_norm": 2.6469478607177734, "learning_rate": 2.6663593053634547e-06, "log_odds_chosen": 11.283838272094727, "log_odds_ratio": -3.390854908502661e-05, "logits/chosen": -0.47625666856765747, "logits/rejected": -0.5266400575637817, "logps/chosen": -0.00020409503486007452, "logps/rejected": -1.99119234085083, "loss": 0.3722, "nll_loss": 0.09304707497358322, "rewards/accuracies": 1.0, "rewards/chosen": -2.0409504941198975e-05, "rewards/margins": 0.19909882545471191, "rewards/rejected": -0.19911924004554749, "step": 13766 }, { "epoch": 9.520746887966805, "grad_norm": 3.793987512588501, "learning_rate": 2.662517289073306e-06, "log_odds_chosen": 10.413166046142578, "log_odds_ratio": -0.0005908824969083071, "logits/chosen": -0.392555296421051, "logits/rejected": -0.43396303057670593, "logps/chosen": -0.0013371091336011887, "logps/rejected": -2.43149471282959, "loss": 0.4323, "nll_loss": 0.10801825672388077, "rewards/accuracies": 1.0, "rewards/chosen": -0.0001337109279120341, "rewards/margins": 0.24301576614379883, "rewards/rejected": -0.24314947426319122, "step": 13767 }, { "epoch": 9.521438450899032, "grad_norm": 4.248869895935059, "learning_rate": 2.658675272783157e-06, "log_odds_chosen": 11.54124641418457, "log_odds_ratio": -5.0189213652629405e-05, "logits/chosen": -0.0543501079082489, "logits/rejected": -0.17935310304164886, "logps/chosen": -0.00014576385729014874, "logps/rejected": -2.3155760765075684, "loss": 0.5594, "nll_loss": 0.1398562639951706, "rewards/accuracies": 1.0, "rewards/chosen": -1.4576385183318052e-05, "rewards/margins": 0.23154304921627045, "rewards/rejected": -0.23155760765075684, "step": 13768 }, { "epoch": 9.522130013831259, "grad_norm": 2.860259771347046, "learning_rate": 2.654833256493008e-06, "log_odds_chosen": 12.623004913330078, "log_odds_ratio": -2.0028579456266016e-05, "logits/chosen": -0.19771257042884827, "logits/rejected": -0.24933721125125885, "logps/chosen": -8.964027801994234e-05, "logps/rejected": -3.1500256061553955, "loss": 0.314, "nll_loss": 0.07849594950675964, "rewards/accuracies": 1.0, "rewards/chosen": -8.964027983893175e-06, "rewards/margins": 0.31499359011650085, "rewards/rejected": -0.31500256061553955, "step": 13769 }, { "epoch": 9.522821576763485, "grad_norm": 3.8954451084136963, "learning_rate": 2.6509912402028586e-06, "log_odds_chosen": 10.765434265136719, "log_odds_ratio": -0.0008625364862382412, "logits/chosen": -0.28756552934646606, "logits/rejected": -0.36020129919052124, "logps/chosen": -0.001342854229733348, "logps/rejected": -2.6182684898376465, "loss": 0.6883, "nll_loss": 0.17197641730308533, "rewards/accuracies": 1.0, "rewards/chosen": -0.0001342854229733348, "rewards/margins": 0.2616925835609436, "rewards/rejected": -0.26182684302330017, "step": 13770 }, { "epoch": 9.523513139695712, "grad_norm": 3.4728455543518066, "learning_rate": 2.6471492239127093e-06, "log_odds_chosen": 11.453036308288574, "log_odds_ratio": -3.844147067866288e-05, "logits/chosen": -0.444953978061676, "logits/rejected": -0.4889020621776581, "logps/chosen": -0.00020016248163301498, "logps/rejected": -2.6657907962799072, "loss": 0.7643, "nll_loss": 0.19108229875564575, "rewards/accuracies": 1.0, "rewards/chosen": -2.0016246708109975e-05, "rewards/margins": 0.26655906438827515, "rewards/rejected": -0.2665790915489197, "step": 13771 }, { "epoch": 9.524204702627939, "grad_norm": 4.564413547515869, "learning_rate": 2.6433072076225605e-06, "log_odds_chosen": 12.367158889770508, "log_odds_ratio": -1.2168299690529238e-05, "logits/chosen": 0.1791875958442688, "logits/rejected": 0.1064046323299408, "logps/chosen": -0.00012622703798115253, "logps/rejected": -3.2469067573547363, "loss": 0.6618, "nll_loss": 0.1654365360736847, "rewards/accuracies": 1.0, "rewards/chosen": -1.262270234292373e-05, "rewards/margins": 0.32467809319496155, "rewards/rejected": -0.32469069957733154, "step": 13772 }, { "epoch": 9.524896265560166, "grad_norm": 3.1376585960388184, "learning_rate": 2.6394651913324112e-06, "log_odds_chosen": 10.973043441772461, "log_odds_ratio": -0.00015945962513796985, "logits/chosen": -0.26559609174728394, "logits/rejected": -0.25664129853248596, "logps/chosen": -0.0012450111098587513, "logps/rejected": -2.3380062580108643, "loss": 0.3606, "nll_loss": 0.09012934565544128, "rewards/accuracies": 1.0, "rewards/chosen": -0.00012450110807549208, "rewards/margins": 0.23367613554000854, "rewards/rejected": -0.23380064964294434, "step": 13773 }, { "epoch": 9.525587828492393, "grad_norm": 2.822948932647705, "learning_rate": 2.635623175042262e-06, "log_odds_chosen": 12.173641204833984, "log_odds_ratio": -1.9636803699540906e-05, "logits/chosen": -0.5914163589477539, "logits/rejected": -0.5313130617141724, "logps/chosen": -8.995865209726617e-05, "logps/rejected": -2.55480694770813, "loss": 0.2704, "nll_loss": 0.06760506331920624, "rewards/accuracies": 1.0, "rewards/chosen": -8.995865755423438e-06, "rewards/margins": 0.25547170639038086, "rewards/rejected": -0.25548070669174194, "step": 13774 }, { "epoch": 9.52627939142462, "grad_norm": 3.493384838104248, "learning_rate": 2.6317811587521136e-06, "log_odds_chosen": 11.011516571044922, "log_odds_ratio": -9.456177212996408e-05, "logits/chosen": -0.28284674882888794, "logits/rejected": -0.26557302474975586, "logps/chosen": -0.0042744167149066925, "logps/rejected": -2.7121171951293945, "loss": 0.3721, "nll_loss": 0.09302199631929398, "rewards/accuracies": 1.0, "rewards/chosen": -0.00042744172969833016, "rewards/margins": 0.27078428864479065, "rewards/rejected": -0.27121174335479736, "step": 13775 }, { "epoch": 9.526970954356846, "grad_norm": 3.317343235015869, "learning_rate": 2.6279391424619643e-06, "log_odds_chosen": 12.224468231201172, "log_odds_ratio": -1.7389818822266534e-05, "logits/chosen": -0.43882882595062256, "logits/rejected": -0.4803587794303894, "logps/chosen": -0.0001255149400094524, "logps/rejected": -2.820533514022827, "loss": 0.415, "nll_loss": 0.10373595356941223, "rewards/accuracies": 1.0, "rewards/chosen": -1.255149345524842e-05, "rewards/margins": 0.2820408344268799, "rewards/rejected": -0.2820533812046051, "step": 13776 }, { "epoch": 9.527662517289073, "grad_norm": 4.189540863037109, "learning_rate": 2.624097126171815e-06, "log_odds_chosen": 11.56295108795166, "log_odds_ratio": -4.864947550231591e-05, "logits/chosen": -0.4536324143409729, "logits/rejected": -0.4786219596862793, "logps/chosen": -0.00020644332107622176, "logps/rejected": -2.809377908706665, "loss": 0.6721, "nll_loss": 0.1680191308259964, "rewards/accuracies": 1.0, "rewards/chosen": -2.0644332835217938e-05, "rewards/margins": 0.28091713786125183, "rewards/rejected": -0.2809377610683441, "step": 13777 }, { "epoch": 9.5283540802213, "grad_norm": 3.0645949840545654, "learning_rate": 2.620255109881666e-06, "log_odds_chosen": 10.801593780517578, "log_odds_ratio": -6.805037264712155e-05, "logits/chosen": -0.5784319639205933, "logits/rejected": -0.5846536159515381, "logps/chosen": -0.0001794451236492023, "logps/rejected": -1.995002031326294, "loss": 0.2643, "nll_loss": 0.06606464087963104, "rewards/accuracies": 1.0, "rewards/chosen": -1.794451236492023e-05, "rewards/margins": 0.199482262134552, "rewards/rejected": -0.1995002031326294, "step": 13778 }, { "epoch": 9.529045643153527, "grad_norm": 4.208901882171631, "learning_rate": 2.616413093591517e-06, "log_odds_chosen": 11.72228717803955, "log_odds_ratio": -2.457138180034235e-05, "logits/chosen": -0.4067918360233307, "logits/rejected": -0.502360463142395, "logps/chosen": -0.000477884488645941, "logps/rejected": -3.3087949752807617, "loss": 0.324, "nll_loss": 0.08099675178527832, "rewards/accuracies": 1.0, "rewards/chosen": -4.7788445954211056e-05, "rewards/margins": 0.3308316767215729, "rewards/rejected": -0.33087947964668274, "step": 13779 }, { "epoch": 9.529737206085754, "grad_norm": 2.3598437309265137, "learning_rate": 2.6125710773013677e-06, "log_odds_chosen": 11.954383850097656, "log_odds_ratio": -0.00010405042849015445, "logits/chosen": -0.6290899515151978, "logits/rejected": -0.5953158140182495, "logps/chosen": -0.00013793556718155742, "logps/rejected": -2.5401864051818848, "loss": 0.2037, "nll_loss": 0.05092133581638336, "rewards/accuracies": 1.0, "rewards/chosen": -1.3793557627650443e-05, "rewards/margins": 0.2540048360824585, "rewards/rejected": -0.254018634557724, "step": 13780 }, { "epoch": 9.53042876901798, "grad_norm": 3.0388574600219727, "learning_rate": 2.6087290610112185e-06, "log_odds_chosen": 12.778547286987305, "log_odds_ratio": -1.059939677361399e-05, "logits/chosen": -0.6642726063728333, "logits/rejected": -0.701655387878418, "logps/chosen": -0.00012015497486572713, "logps/rejected": -3.0411171913146973, "loss": 0.4084, "nll_loss": 0.102105513215065, "rewards/accuracies": 1.0, "rewards/chosen": -1.2015498214168474e-05, "rewards/margins": 0.3040997087955475, "rewards/rejected": -0.3041117191314697, "step": 13781 }, { "epoch": 9.531120331950207, "grad_norm": 3.123870849609375, "learning_rate": 2.6048870447210697e-06, "log_odds_chosen": 10.889333724975586, "log_odds_ratio": -8.684580097906291e-05, "logits/chosen": -0.5890014171600342, "logits/rejected": -0.6292410492897034, "logps/chosen": -0.0002101728750858456, "logps/rejected": -1.6682124137878418, "loss": 0.3341, "nll_loss": 0.08351971209049225, "rewards/accuracies": 1.0, "rewards/chosen": -2.101728750858456e-05, "rewards/margins": 0.16680021584033966, "rewards/rejected": -0.16682124137878418, "step": 13782 }, { "epoch": 9.531811894882434, "grad_norm": 2.1983208656311035, "learning_rate": 2.601045028430921e-06, "log_odds_chosen": 11.260841369628906, "log_odds_ratio": -4.079763311892748e-05, "logits/chosen": -0.6290895342826843, "logits/rejected": -0.6792348623275757, "logps/chosen": -7.228052709251642e-05, "logps/rejected": -1.6622785329818726, "loss": 0.2882, "nll_loss": 0.07205353677272797, "rewards/accuracies": 1.0, "rewards/chosen": -7.228052709251642e-06, "rewards/margins": 0.16622062027454376, "rewards/rejected": -0.16622784733772278, "step": 13783 }, { "epoch": 9.532503457814661, "grad_norm": 3.228482484817505, "learning_rate": 2.5972030121407716e-06, "log_odds_chosen": 11.143197059631348, "log_odds_ratio": -2.7463487640488893e-05, "logits/chosen": -0.19031089544296265, "logits/rejected": -0.2810859978199005, "logps/chosen": -0.0001386001822538674, "logps/rejected": -1.8822040557861328, "loss": 0.2618, "nll_loss": 0.06545382738113403, "rewards/accuracies": 1.0, "rewards/chosen": -1.38600189529825e-05, "rewards/margins": 0.1882065385580063, "rewards/rejected": -0.18822041153907776, "step": 13784 }, { "epoch": 9.533195020746888, "grad_norm": 4.018812656402588, "learning_rate": 2.5933609958506228e-06, "log_odds_chosen": 11.427385330200195, "log_odds_ratio": -0.00019970980065409094, "logits/chosen": -0.5215947031974792, "logits/rejected": -0.579155445098877, "logps/chosen": -0.00018272115266881883, "logps/rejected": -2.2558345794677734, "loss": 0.4895, "nll_loss": 0.1223452240228653, "rewards/accuracies": 1.0, "rewards/chosen": -1.8272115994477645e-05, "rewards/margins": 0.22556518018245697, "rewards/rejected": -0.22558343410491943, "step": 13785 }, { "epoch": 9.533886583679115, "grad_norm": 3.184753179550171, "learning_rate": 2.5895189795604735e-06, "log_odds_chosen": 10.723051071166992, "log_odds_ratio": -0.00016598444199189544, "logits/chosen": -0.5750604867935181, "logits/rejected": -0.712570309638977, "logps/chosen": -0.00014530037879012525, "logps/rejected": -2.0900259017944336, "loss": 0.3324, "nll_loss": 0.08307978510856628, "rewards/accuracies": 1.0, "rewards/chosen": -1.4530039152305108e-05, "rewards/margins": 0.20898807048797607, "rewards/rejected": -0.20900261402130127, "step": 13786 }, { "epoch": 9.534578146611342, "grad_norm": 3.268193483352661, "learning_rate": 2.5856769632703243e-06, "log_odds_chosen": 10.619029998779297, "log_odds_ratio": -3.353383726789616e-05, "logits/chosen": -0.2768457531929016, "logits/rejected": -0.2528938949108124, "logps/chosen": -0.00025384750915691257, "logps/rejected": -1.9944524765014648, "loss": 0.3209, "nll_loss": 0.08022896945476532, "rewards/accuracies": 1.0, "rewards/chosen": -2.5384753826074302e-05, "rewards/margins": 0.19941987097263336, "rewards/rejected": -0.19944524765014648, "step": 13787 }, { "epoch": 9.535269709543568, "grad_norm": 3.37003755569458, "learning_rate": 2.581834946980175e-06, "log_odds_chosen": 12.37077808380127, "log_odds_ratio": -1.2251228326931596e-05, "logits/chosen": -0.27846595644950867, "logits/rejected": -0.27231597900390625, "logps/chosen": -0.00015653952141292393, "logps/rejected": -3.245314121246338, "loss": 0.3284, "nll_loss": 0.08210815489292145, "rewards/accuracies": 1.0, "rewards/chosen": -1.5653953596483916e-05, "rewards/margins": 0.32451578974723816, "rewards/rejected": -0.3245314657688141, "step": 13788 }, { "epoch": 9.535961272475795, "grad_norm": 3.0098512172698975, "learning_rate": 2.577992930690026e-06, "log_odds_chosen": 11.0325927734375, "log_odds_ratio": -2.921521445387043e-05, "logits/chosen": -0.5826141238212585, "logits/rejected": -0.6744647026062012, "logps/chosen": -0.0001600942632649094, "logps/rejected": -2.2958791255950928, "loss": 0.291, "nll_loss": 0.07275387644767761, "rewards/accuracies": 1.0, "rewards/chosen": -1.6009425962693058e-05, "rewards/margins": 0.22957190871238708, "rewards/rejected": -0.22958789765834808, "step": 13789 }, { "epoch": 9.536652835408022, "grad_norm": 4.2000885009765625, "learning_rate": 2.5741509143998774e-06, "log_odds_chosen": 11.735769271850586, "log_odds_ratio": -1.6273430446744896e-05, "logits/chosen": -0.32315850257873535, "logits/rejected": -0.39081114530563354, "logps/chosen": -0.00011309140973025933, "logps/rejected": -2.3739523887634277, "loss": 0.5507, "nll_loss": 0.13766588270664215, "rewards/accuracies": 1.0, "rewards/chosen": -1.1309141882520635e-05, "rewards/margins": 0.23738393187522888, "rewards/rejected": -0.23739522695541382, "step": 13790 }, { "epoch": 9.537344398340249, "grad_norm": 3.6808247566223145, "learning_rate": 2.570308898109728e-06, "log_odds_chosen": 10.931219100952148, "log_odds_ratio": -8.441173122264445e-05, "logits/chosen": -0.44997841119766235, "logits/rejected": -0.45926445722579956, "logps/chosen": -0.0002950811176560819, "logps/rejected": -2.4755983352661133, "loss": 0.3674, "nll_loss": 0.09184225648641586, "rewards/accuracies": 1.0, "rewards/chosen": -2.9508113584597595e-05, "rewards/margins": 0.24753034114837646, "rewards/rejected": -0.24755984544754028, "step": 13791 }, { "epoch": 9.538035961272476, "grad_norm": 2.8941354751586914, "learning_rate": 2.5664668818195793e-06, "log_odds_chosen": 11.258584022521973, "log_odds_ratio": -3.4192082239314914e-05, "logits/chosen": -0.1735733300447464, "logits/rejected": -0.1983909159898758, "logps/chosen": -0.00016744263120926917, "logps/rejected": -2.520059585571289, "loss": 0.3083, "nll_loss": 0.0770687460899353, "rewards/accuracies": 1.0, "rewards/chosen": -1.6744263120926917e-05, "rewards/margins": 0.2519892454147339, "rewards/rejected": -0.25200599431991577, "step": 13792 }, { "epoch": 9.538727524204702, "grad_norm": 3.580810785293579, "learning_rate": 2.56262486552943e-06, "log_odds_chosen": 11.275768280029297, "log_odds_ratio": -2.0234165276633576e-05, "logits/chosen": -0.2527013421058655, "logits/rejected": -0.2792891263961792, "logps/chosen": -8.296287705888972e-05, "logps/rejected": -1.984834909439087, "loss": 0.4227, "nll_loss": 0.10567609965801239, "rewards/accuracies": 1.0, "rewards/chosen": -8.296288797282614e-06, "rewards/margins": 0.198475182056427, "rewards/rejected": -0.19848348200321198, "step": 13793 }, { "epoch": 9.53941908713693, "grad_norm": 3.8068368434906006, "learning_rate": 2.5587828492392808e-06, "log_odds_chosen": 11.381519317626953, "log_odds_ratio": -2.498861067579128e-05, "logits/chosen": -0.34718751907348633, "logits/rejected": -0.399546355009079, "logps/chosen": -0.00018592897686176002, "logps/rejected": -2.734633445739746, "loss": 0.5206, "nll_loss": 0.13015493750572205, "rewards/accuracies": 1.0, "rewards/chosen": -1.8592898413771763e-05, "rewards/margins": 0.2734447717666626, "rewards/rejected": -0.2734633684158325, "step": 13794 }, { "epoch": 9.540110650069156, "grad_norm": 2.8936984539031982, "learning_rate": 2.5549408329491315e-06, "log_odds_chosen": 10.034442901611328, "log_odds_ratio": -0.0002595040714368224, "logits/chosen": -0.5466099381446838, "logits/rejected": -0.4480135142803192, "logps/chosen": -0.0003597848699428141, "logps/rejected": -1.7597925662994385, "loss": 0.206, "nll_loss": 0.051484182476997375, "rewards/accuracies": 1.0, "rewards/chosen": -3.597848626668565e-05, "rewards/margins": 0.1759432852268219, "rewards/rejected": -0.17597925662994385, "step": 13795 }, { "epoch": 9.540802213001383, "grad_norm": 3.7214016914367676, "learning_rate": 2.5510988166589827e-06, "log_odds_chosen": 10.998615264892578, "log_odds_ratio": -0.0001421015476807952, "logits/chosen": -0.3131212890148163, "logits/rejected": -0.41555941104888916, "logps/chosen": -0.0002672660048119724, "logps/rejected": -2.176845073699951, "loss": 0.3728, "nll_loss": 0.09317442774772644, "rewards/accuracies": 1.0, "rewards/chosen": -2.6726600481197238e-05, "rewards/margins": 0.21765778958797455, "rewards/rejected": -0.21768450736999512, "step": 13796 }, { "epoch": 9.54149377593361, "grad_norm": 2.209973096847534, "learning_rate": 2.5472568003688334e-06, "log_odds_chosen": 11.791141510009766, "log_odds_ratio": -7.219630060717463e-05, "logits/chosen": -0.2285546213388443, "logits/rejected": -0.38388004899024963, "logps/chosen": -0.0002000272215809673, "logps/rejected": -2.9354567527770996, "loss": 0.2675, "nll_loss": 0.06686940044164658, "rewards/accuracies": 1.0, "rewards/chosen": -2.000272252189461e-05, "rewards/margins": 0.29352566599845886, "rewards/rejected": -0.293545663356781, "step": 13797 }, { "epoch": 9.542185338865837, "grad_norm": 3.551180124282837, "learning_rate": 2.5434147840786846e-06, "log_odds_chosen": 11.445298194885254, "log_odds_ratio": -1.558000258228276e-05, "logits/chosen": -0.2221369743347168, "logits/rejected": -0.2530161440372467, "logps/chosen": -0.00038774916902184486, "logps/rejected": -2.6247973442077637, "loss": 0.4239, "nll_loss": 0.10597167909145355, "rewards/accuracies": 1.0, "rewards/chosen": -3.8774916902184486e-05, "rewards/margins": 0.26244091987609863, "rewards/rejected": -0.262479692697525, "step": 13798 }, { "epoch": 9.542876901798063, "grad_norm": 4.352911472320557, "learning_rate": 2.539572767788536e-06, "log_odds_chosen": 11.785818099975586, "log_odds_ratio": -2.144151039829012e-05, "logits/chosen": -0.3466078042984009, "logits/rejected": -0.3404996395111084, "logps/chosen": -0.00010007733362726867, "logps/rejected": -2.582352876663208, "loss": 0.4948, "nll_loss": 0.12369333952665329, "rewards/accuracies": 1.0, "rewards/chosen": -1.0007733180827927e-05, "rewards/margins": 0.2582252621650696, "rewards/rejected": -0.25823527574539185, "step": 13799 }, { "epoch": 9.54356846473029, "grad_norm": 3.131136894226074, "learning_rate": 2.5357307514983865e-06, "log_odds_chosen": 11.537412643432617, "log_odds_ratio": -4.232880746712908e-05, "logits/chosen": -0.3534148335456848, "logits/rejected": -0.27187490463256836, "logps/chosen": -0.00012684140529017895, "logps/rejected": -2.396341562271118, "loss": 0.3547, "nll_loss": 0.08867879211902618, "rewards/accuracies": 1.0, "rewards/chosen": -1.2684140529017895e-05, "rewards/margins": 0.23962149024009705, "rewards/rejected": -0.23963415622711182, "step": 13800 }, { "epoch": 9.544260027662517, "grad_norm": 3.412196397781372, "learning_rate": 2.5318887352082373e-06, "log_odds_chosen": 11.617633819580078, "log_odds_ratio": -2.581049375294242e-05, "logits/chosen": -0.36974507570266724, "logits/rejected": -0.48222726583480835, "logps/chosen": -0.0004511342558544129, "logps/rejected": -3.2871181964874268, "loss": 0.4721, "nll_loss": 0.11802138388156891, "rewards/accuracies": 1.0, "rewards/chosen": -4.511342558544129e-05, "rewards/margins": 0.32866671681404114, "rewards/rejected": -0.3287118077278137, "step": 13801 }, { "epoch": 9.544951590594744, "grad_norm": 3.6055257320404053, "learning_rate": 2.5280467189180885e-06, "log_odds_chosen": 11.49981689453125, "log_odds_ratio": -7.038934563752264e-05, "logits/chosen": -0.3161396384239197, "logits/rejected": -0.30127403140068054, "logps/chosen": -0.0005343385855667293, "logps/rejected": -3.03310489654541, "loss": 0.378, "nll_loss": 0.09448744356632233, "rewards/accuracies": 1.0, "rewards/chosen": -5.3433861467055976e-05, "rewards/margins": 0.3032570481300354, "rewards/rejected": -0.3033105134963989, "step": 13802 }, { "epoch": 9.54564315352697, "grad_norm": 3.4347176551818848, "learning_rate": 2.5242047026279392e-06, "log_odds_chosen": 10.892858505249023, "log_odds_ratio": -2.7437276003183797e-05, "logits/chosen": -0.08749085664749146, "logits/rejected": -0.015900779515504837, "logps/chosen": -0.00021966373606119305, "logps/rejected": -2.3338217735290527, "loss": 0.2875, "nll_loss": 0.07186231017112732, "rewards/accuracies": 1.0, "rewards/chosen": -2.1966374333715066e-05, "rewards/margins": 0.23336023092269897, "rewards/rejected": -0.2333821952342987, "step": 13803 }, { "epoch": 9.546334716459198, "grad_norm": 2.9281575679779053, "learning_rate": 2.52036268633779e-06, "log_odds_chosen": 10.796745300292969, "log_odds_ratio": -0.00015486503252759576, "logits/chosen": -0.3453628420829773, "logits/rejected": -0.31174102425575256, "logps/chosen": -0.0006333081400953233, "logps/rejected": -2.9530837535858154, "loss": 0.2823, "nll_loss": 0.07056043297052383, "rewards/accuracies": 1.0, "rewards/chosen": -6.333081546472386e-05, "rewards/margins": 0.29524505138397217, "rewards/rejected": -0.2953084111213684, "step": 13804 }, { "epoch": 9.547026279391424, "grad_norm": 3.473527431488037, "learning_rate": 2.516520670047641e-06, "log_odds_chosen": 10.990711212158203, "log_odds_ratio": -4.315027763368562e-05, "logits/chosen": -0.12099762260913849, "logits/rejected": -0.1815110743045807, "logps/chosen": -0.00015486619668081403, "logps/rejected": -2.163987398147583, "loss": 0.3474, "nll_loss": 0.08683499693870544, "rewards/accuracies": 1.0, "rewards/chosen": -1.5486619304283522e-05, "rewards/margins": 0.21638326346874237, "rewards/rejected": -0.21639874577522278, "step": 13805 }, { "epoch": 9.547717842323651, "grad_norm": 4.62323522567749, "learning_rate": 2.5126786537574923e-06, "log_odds_chosen": 10.207351684570312, "log_odds_ratio": -0.0003335881920065731, "logits/chosen": -0.27806001901626587, "logits/rejected": -0.1864968240261078, "logps/chosen": -0.0035468749701976776, "logps/rejected": -2.6968271732330322, "loss": 0.4498, "nll_loss": 0.11242831498384476, "rewards/accuracies": 1.0, "rewards/chosen": -0.00035468745045363903, "rewards/margins": 0.2693280279636383, "rewards/rejected": -0.26968270540237427, "step": 13806 }, { "epoch": 9.548409405255878, "grad_norm": 3.1464242935180664, "learning_rate": 2.508836637467343e-06, "log_odds_chosen": 10.235457420349121, "log_odds_ratio": -0.0002480056427884847, "logits/chosen": -0.6790138483047485, "logits/rejected": -0.6777001023292542, "logps/chosen": -0.0003498023725114763, "logps/rejected": -1.6847186088562012, "loss": 0.333, "nll_loss": 0.08322387933731079, "rewards/accuracies": 1.0, "rewards/chosen": -3.4980235795956105e-05, "rewards/margins": 0.16843688488006592, "rewards/rejected": -0.16847185790538788, "step": 13807 }, { "epoch": 9.549100968188105, "grad_norm": 3.7750470638275146, "learning_rate": 2.504994621177194e-06, "log_odds_chosen": 10.81887435913086, "log_odds_ratio": -8.63458335516043e-05, "logits/chosen": -0.40970128774642944, "logits/rejected": -0.42641782760620117, "logps/chosen": -0.00040343630826100707, "logps/rejected": -2.3015646934509277, "loss": 0.3517, "nll_loss": 0.08792360126972198, "rewards/accuracies": 1.0, "rewards/chosen": -4.034363155369647e-05, "rewards/margins": 0.2301161289215088, "rewards/rejected": -0.23015648126602173, "step": 13808 }, { "epoch": 9.549792531120332, "grad_norm": 4.055153846740723, "learning_rate": 2.501152604887045e-06, "log_odds_chosen": 10.715600967407227, "log_odds_ratio": -5.44217909919098e-05, "logits/chosen": -0.2824767529964447, "logits/rejected": -0.22777681052684784, "logps/chosen": -0.0002521896967664361, "logps/rejected": -2.063735008239746, "loss": 0.4458, "nll_loss": 0.11144724488258362, "rewards/accuracies": 1.0, "rewards/chosen": -2.5218971131835133e-05, "rewards/margins": 0.2063482701778412, "rewards/rejected": -0.20637348294258118, "step": 13809 }, { "epoch": 9.550484094052559, "grad_norm": 4.018936634063721, "learning_rate": 2.4973105885968957e-06, "log_odds_chosen": 10.957100868225098, "log_odds_ratio": -2.922149360529147e-05, "logits/chosen": -0.1165858656167984, "logits/rejected": -0.140200674533844, "logps/chosen": -0.000530008168425411, "logps/rejected": -2.5369997024536133, "loss": 0.5002, "nll_loss": 0.12503990530967712, "rewards/accuracies": 1.0, "rewards/chosen": -5.300081829773262e-05, "rewards/margins": 0.25364699959754944, "rewards/rejected": -0.25369998812675476, "step": 13810 }, { "epoch": 9.551175656984785, "grad_norm": 4.327420711517334, "learning_rate": 2.4934685723067465e-06, "log_odds_chosen": 11.74692153930664, "log_odds_ratio": -1.655887535889633e-05, "logits/chosen": -0.28256115317344666, "logits/rejected": -0.32909637689590454, "logps/chosen": -0.0001346078934147954, "logps/rejected": -2.7365636825561523, "loss": 0.4023, "nll_loss": 0.10056796669960022, "rewards/accuracies": 1.0, "rewards/chosen": -1.3460790796671063e-05, "rewards/margins": 0.273642897605896, "rewards/rejected": -0.27365636825561523, "step": 13811 }, { "epoch": 9.551867219917012, "grad_norm": 3.934828042984009, "learning_rate": 2.4896265560165977e-06, "log_odds_chosen": 10.9265775680542, "log_odds_ratio": -5.796810728497803e-05, "logits/chosen": -0.14236091077327728, "logits/rejected": -0.23647207021713257, "logps/chosen": -0.00025920968619175255, "logps/rejected": -2.556171178817749, "loss": 0.4271, "nll_loss": 0.10676690936088562, "rewards/accuracies": 1.0, "rewards/chosen": -2.592097189335618e-05, "rewards/margins": 0.25559118390083313, "rewards/rejected": -0.2556171119213104, "step": 13812 }, { "epoch": 9.552558782849239, "grad_norm": 3.546806812286377, "learning_rate": 2.485784539726449e-06, "log_odds_chosen": 10.0706787109375, "log_odds_ratio": -9.912428504321724e-05, "logits/chosen": -0.43360263109207153, "logits/rejected": -0.4856250584125519, "logps/chosen": -0.0004174561472609639, "logps/rejected": -2.019066333770752, "loss": 0.3284, "nll_loss": 0.08210201561450958, "rewards/accuracies": 1.0, "rewards/chosen": -4.174561763647944e-05, "rewards/margins": 0.20186486840248108, "rewards/rejected": -0.20190662145614624, "step": 13813 }, { "epoch": 9.553250345781466, "grad_norm": 3.4718127250671387, "learning_rate": 2.4819425234362996e-06, "log_odds_chosen": 8.758975982666016, "log_odds_ratio": -0.0022941383067518473, "logits/chosen": -0.1509694755077362, "logits/rejected": -0.1860388070344925, "logps/chosen": -0.016561385244131088, "logps/rejected": -1.6161742210388184, "loss": 0.3722, "nll_loss": 0.09281736612319946, "rewards/accuracies": 1.0, "rewards/chosen": -0.0016561385709792376, "rewards/margins": 0.1599612832069397, "rewards/rejected": -0.1616174280643463, "step": 13814 }, { "epoch": 9.553941908713693, "grad_norm": 2.690258026123047, "learning_rate": 2.4781005071461503e-06, "log_odds_chosen": 10.737163543701172, "log_odds_ratio": -6.156474410090595e-05, "logits/chosen": -0.4985949695110321, "logits/rejected": -0.4808202385902405, "logps/chosen": -0.0002015067293541506, "logps/rejected": -2.085129737854004, "loss": 0.2381, "nll_loss": 0.05951059237122536, "rewards/accuracies": 1.0, "rewards/chosen": -2.0150675482000224e-05, "rewards/margins": 0.20849281549453735, "rewards/rejected": -0.20851297676563263, "step": 13815 }, { "epoch": 9.55463347164592, "grad_norm": 3.3727006912231445, "learning_rate": 2.4742584908560015e-06, "log_odds_chosen": 9.605585098266602, "log_odds_ratio": -0.0002903227577917278, "logits/chosen": -0.68758225440979, "logits/rejected": -0.6676366925239563, "logps/chosen": -0.0005176997510716319, "logps/rejected": -1.6474573612213135, "loss": 0.4287, "nll_loss": 0.10714066028594971, "rewards/accuracies": 1.0, "rewards/chosen": -5.176998092792928e-05, "rewards/margins": 0.16469396650791168, "rewards/rejected": -0.16474571824073792, "step": 13816 }, { "epoch": 9.555325034578146, "grad_norm": 3.6826722621917725, "learning_rate": 2.4704164745658522e-06, "log_odds_chosen": 9.754058837890625, "log_odds_ratio": -9.896468691295013e-05, "logits/chosen": -0.8700242638587952, "logits/rejected": -0.8466467261314392, "logps/chosen": -0.0003343577263876796, "logps/rejected": -1.5087486505508423, "loss": 0.4219, "nll_loss": 0.10547720640897751, "rewards/accuracies": 1.0, "rewards/chosen": -3.3435775549151e-05, "rewards/margins": 0.15084142982959747, "rewards/rejected": -0.15087486803531647, "step": 13817 }, { "epoch": 9.556016597510373, "grad_norm": 3.3866477012634277, "learning_rate": 2.466574458275703e-06, "log_odds_chosen": 11.469823837280273, "log_odds_ratio": -9.968294034479186e-05, "logits/chosen": -0.18187442421913147, "logits/rejected": -0.2844538688659668, "logps/chosen": -0.00022320245625451207, "logps/rejected": -2.6314992904663086, "loss": 0.2389, "nll_loss": 0.05972164124250412, "rewards/accuracies": 1.0, "rewards/chosen": -2.2320244170259684e-05, "rewards/margins": 0.2631275951862335, "rewards/rejected": -0.2631498873233795, "step": 13818 }, { "epoch": 9.5567081604426, "grad_norm": 4.043667316436768, "learning_rate": 2.462732441985554e-06, "log_odds_chosen": 11.33417797088623, "log_odds_ratio": -2.4894938178476878e-05, "logits/chosen": -0.19683483242988586, "logits/rejected": -0.26682889461517334, "logps/chosen": -0.00023350583796855062, "logps/rejected": -2.2510480880737305, "loss": 0.3719, "nll_loss": 0.09298249334096909, "rewards/accuracies": 1.0, "rewards/chosen": -2.3350585252046585e-05, "rewards/margins": 0.2250814437866211, "rewards/rejected": -0.22510480880737305, "step": 13819 }, { "epoch": 9.557399723374827, "grad_norm": 4.178382396697998, "learning_rate": 2.458890425695405e-06, "log_odds_chosen": 10.40451431274414, "log_odds_ratio": -0.00011425888078520074, "logits/chosen": 0.1248646080493927, "logits/rejected": 0.07189249992370605, "logps/chosen": -0.0009131749393418431, "logps/rejected": -2.2126245498657227, "loss": 0.5634, "nll_loss": 0.14083731174468994, "rewards/accuracies": 1.0, "rewards/chosen": -9.131749538937584e-05, "rewards/margins": 0.22117114067077637, "rewards/rejected": -0.22126245498657227, "step": 13820 }, { "epoch": 9.558091286307054, "grad_norm": 3.49820613861084, "learning_rate": 2.455048409405256e-06, "log_odds_chosen": 10.636669158935547, "log_odds_ratio": -0.00011038091906812042, "logits/chosen": -0.16480788588523865, "logits/rejected": -0.18782483041286469, "logps/chosen": -0.00019006957882083952, "logps/rejected": -2.0420327186584473, "loss": 0.3931, "nll_loss": 0.09826451539993286, "rewards/accuracies": 1.0, "rewards/chosen": -1.900695679069031e-05, "rewards/margins": 0.20418426394462585, "rewards/rejected": -0.2042032778263092, "step": 13821 }, { "epoch": 9.55878284923928, "grad_norm": 3.947436571121216, "learning_rate": 2.451206393115107e-06, "log_odds_chosen": 10.386070251464844, "log_odds_ratio": -5.714597864425741e-05, "logits/chosen": -0.2845008671283722, "logits/rejected": -0.3634602725505829, "logps/chosen": -0.00021994822600390762, "logps/rejected": -1.699218988418579, "loss": 0.3057, "nll_loss": 0.0764106884598732, "rewards/accuracies": 1.0, "rewards/chosen": -2.199482150899712e-05, "rewards/margins": 0.16989991068840027, "rewards/rejected": -0.1699219048023224, "step": 13822 }, { "epoch": 9.559474412171507, "grad_norm": 3.815812110900879, "learning_rate": 2.447364376824958e-06, "log_odds_chosen": 10.201431274414062, "log_odds_ratio": -0.00012514113041106611, "logits/chosen": -0.5025345683097839, "logits/rejected": -0.6607664823532104, "logps/chosen": -0.00035802845377475023, "logps/rejected": -2.074693202972412, "loss": 0.5202, "nll_loss": 0.13002927601337433, "rewards/accuracies": 1.0, "rewards/chosen": -3.580284464987926e-05, "rewards/margins": 0.2074335217475891, "rewards/rejected": -0.20746931433677673, "step": 13823 }, { "epoch": 9.560165975103734, "grad_norm": 3.921386241912842, "learning_rate": 2.4435223605348088e-06, "log_odds_chosen": 11.03492546081543, "log_odds_ratio": -9.24068663152866e-05, "logits/chosen": -0.23196052014827728, "logits/rejected": -0.2074502408504486, "logps/chosen": -0.0001746982743497938, "logps/rejected": -2.3940048217773438, "loss": 0.3778, "nll_loss": 0.09443871676921844, "rewards/accuracies": 1.0, "rewards/chosen": -1.74698270711815e-05, "rewards/margins": 0.2393830120563507, "rewards/rejected": -0.2394004911184311, "step": 13824 }, { "epoch": 9.560857538035961, "grad_norm": 4.664690017700195, "learning_rate": 2.4396803442446595e-06, "log_odds_chosen": 10.925382614135742, "log_odds_ratio": -0.00015570037066936493, "logits/chosen": -0.31630760431289673, "logits/rejected": -0.2322784662246704, "logps/chosen": -0.000406614359235391, "logps/rejected": -2.5762383937835693, "loss": 0.5605, "nll_loss": 0.14011088013648987, "rewards/accuracies": 1.0, "rewards/chosen": -4.0661438106326386e-05, "rewards/margins": 0.2575831413269043, "rewards/rejected": -0.2576238214969635, "step": 13825 }, { "epoch": 9.561549100968188, "grad_norm": 3.627612829208374, "learning_rate": 2.4358383279545107e-06, "log_odds_chosen": 10.90926742553711, "log_odds_ratio": -5.2584851800929755e-05, "logits/chosen": -0.22938194870948792, "logits/rejected": -0.2999710440635681, "logps/chosen": -0.0003280373348388821, "logps/rejected": -2.518974781036377, "loss": 0.3191, "nll_loss": 0.0797610729932785, "rewards/accuracies": 1.0, "rewards/chosen": -3.280373493907973e-05, "rewards/margins": 0.2518646717071533, "rewards/rejected": -0.2518974840641022, "step": 13826 }, { "epoch": 9.562240663900415, "grad_norm": 4.754434585571289, "learning_rate": 2.4319963116643614e-06, "log_odds_chosen": 11.580238342285156, "log_odds_ratio": -3.5747376387007535e-05, "logits/chosen": -0.5771031975746155, "logits/rejected": -0.5557994842529297, "logps/chosen": -0.0005113329389132559, "logps/rejected": -2.7893121242523193, "loss": 0.6148, "nll_loss": 0.15368834137916565, "rewards/accuracies": 1.0, "rewards/chosen": -5.113328734296374e-05, "rewards/margins": 0.2788800895214081, "rewards/rejected": -0.278931200504303, "step": 13827 }, { "epoch": 9.562932226832642, "grad_norm": 3.742189645767212, "learning_rate": 2.4281542953742126e-06, "log_odds_chosen": 11.10738468170166, "log_odds_ratio": -0.0001169455936178565, "logits/chosen": -0.6030516028404236, "logits/rejected": -0.5496112108230591, "logps/chosen": -0.00011338586773490533, "logps/rejected": -2.0595290660858154, "loss": 0.4938, "nll_loss": 0.12342959642410278, "rewards/accuracies": 1.0, "rewards/chosen": -1.1338586773490533e-05, "rewards/margins": 0.2059415727853775, "rewards/rejected": -0.20595292747020721, "step": 13828 }, { "epoch": 9.563623789764868, "grad_norm": 3.6209042072296143, "learning_rate": 2.4243122790840634e-06, "log_odds_chosen": 11.689399719238281, "log_odds_ratio": -0.0004498852649703622, "logits/chosen": -0.5256933569908142, "logits/rejected": -0.429162859916687, "logps/chosen": -0.0005315897287800908, "logps/rejected": -2.733124017715454, "loss": 0.5217, "nll_loss": 0.1303684264421463, "rewards/accuracies": 1.0, "rewards/chosen": -5.315896851243451e-05, "rewards/margins": 0.2732592225074768, "rewards/rejected": -0.27331238985061646, "step": 13829 }, { "epoch": 9.564315352697095, "grad_norm": 2.6820294857025146, "learning_rate": 2.4204702627939145e-06, "log_odds_chosen": 10.56318473815918, "log_odds_ratio": -0.00020509613386821002, "logits/chosen": -0.16411474347114563, "logits/rejected": -0.22590361535549164, "logps/chosen": -0.0004030780401080847, "logps/rejected": -2.0240650177001953, "loss": 0.2231, "nll_loss": 0.05575563758611679, "rewards/accuracies": 1.0, "rewards/chosen": -4.0307808376383036e-05, "rewards/margins": 0.2023661732673645, "rewards/rejected": -0.20240649580955505, "step": 13830 }, { "epoch": 9.565006915629322, "grad_norm": 3.4184882640838623, "learning_rate": 2.4166282465037653e-06, "log_odds_chosen": 10.659126281738281, "log_odds_ratio": -4.517474371823482e-05, "logits/chosen": -0.4295801818370819, "logits/rejected": -0.4659000337123871, "logps/chosen": -0.00013040596968494356, "logps/rejected": -1.8989781141281128, "loss": 0.3486, "nll_loss": 0.08713482320308685, "rewards/accuracies": 1.0, "rewards/chosen": -1.3040596968494356e-05, "rewards/margins": 0.18988478183746338, "rewards/rejected": -0.1898978352546692, "step": 13831 }, { "epoch": 9.565698478561549, "grad_norm": 3.020599842071533, "learning_rate": 2.412786230213616e-06, "log_odds_chosen": 11.195077896118164, "log_odds_ratio": -4.840535984840244e-05, "logits/chosen": -0.45517709851264954, "logits/rejected": -0.4778594970703125, "logps/chosen": -0.00019780338334385306, "logps/rejected": -2.1995885372161865, "loss": 0.3524, "nll_loss": 0.08808855712413788, "rewards/accuracies": 1.0, "rewards/chosen": -1.978034015337471e-05, "rewards/margins": 0.21993906795978546, "rewards/rejected": -0.21995887160301208, "step": 13832 }, { "epoch": 9.566390041493776, "grad_norm": 2.6750314235687256, "learning_rate": 2.408944213923467e-06, "log_odds_chosen": 12.286481857299805, "log_odds_ratio": -8.945756235334557e-06, "logits/chosen": -0.7853397130966187, "logits/rejected": -0.806201696395874, "logps/chosen": -8.892019832273945e-05, "logps/rejected": -2.449873447418213, "loss": 0.3711, "nll_loss": 0.09277424961328506, "rewards/accuracies": 1.0, "rewards/chosen": -8.892019650375005e-06, "rewards/margins": 0.24497844278812408, "rewards/rejected": -0.2449873387813568, "step": 13833 }, { "epoch": 9.567081604426003, "grad_norm": 2.950885772705078, "learning_rate": 2.405102197633318e-06, "log_odds_chosen": 10.587726593017578, "log_odds_ratio": -0.00012081609747838229, "logits/chosen": -0.6059221625328064, "logits/rejected": -0.6290983557701111, "logps/chosen": -0.0004665871092583984, "logps/rejected": -2.1101551055908203, "loss": 0.2502, "nll_loss": 0.06253961473703384, "rewards/accuracies": 1.0, "rewards/chosen": -4.6658708015456796e-05, "rewards/margins": 0.21096885204315186, "rewards/rejected": -0.211015522480011, "step": 13834 }, { "epoch": 9.56777316735823, "grad_norm": 3.7712626457214355, "learning_rate": 2.401260181343169e-06, "log_odds_chosen": 10.279624938964844, "log_odds_ratio": -0.00013441775809042156, "logits/chosen": -0.5828365683555603, "logits/rejected": -0.6356889605522156, "logps/chosen": -0.002119675977155566, "logps/rejected": -2.069326400756836, "loss": 0.4315, "nll_loss": 0.10785016417503357, "rewards/accuracies": 1.0, "rewards/chosen": -0.00021196759189479053, "rewards/margins": 0.20672067999839783, "rewards/rejected": -0.2069326490163803, "step": 13835 }, { "epoch": 9.568464730290456, "grad_norm": 2.1596806049346924, "learning_rate": 2.3974181650530203e-06, "log_odds_chosen": 11.440327644348145, "log_odds_ratio": -7.01888493495062e-05, "logits/chosen": -0.23839282989501953, "logits/rejected": -0.2481321394443512, "logps/chosen": -0.00018663023365661502, "logps/rejected": -2.5868582725524902, "loss": 0.2811, "nll_loss": 0.0702705979347229, "rewards/accuracies": 1.0, "rewards/chosen": -1.8663024093257263e-05, "rewards/margins": 0.25866714119911194, "rewards/rejected": -0.258685827255249, "step": 13836 }, { "epoch": 9.569156293222683, "grad_norm": 3.1561505794525146, "learning_rate": 2.393576148762871e-06, "log_odds_chosen": 13.4840087890625, "log_odds_ratio": -1.5572994016110897e-05, "logits/chosen": -0.8446952700614929, "logits/rejected": -0.8324875235557556, "logps/chosen": -0.00013882649363949895, "logps/rejected": -3.4737942218780518, "loss": 0.3884, "nll_loss": 0.09709928929805756, "rewards/accuracies": 1.0, "rewards/chosen": -1.3882649909646716e-05, "rewards/margins": 0.3473655581474304, "rewards/rejected": -0.3473794162273407, "step": 13837 }, { "epoch": 9.56984785615491, "grad_norm": 3.678325891494751, "learning_rate": 2.3897341324727218e-06, "log_odds_chosen": 10.045623779296875, "log_odds_ratio": -0.00022260332480072975, "logits/chosen": -0.38605496287345886, "logits/rejected": -0.4284819960594177, "logps/chosen": -0.00029918731888756156, "logps/rejected": -1.6647403240203857, "loss": 0.341, "nll_loss": 0.08522848039865494, "rewards/accuracies": 1.0, "rewards/chosen": -2.9918732252554037e-05, "rewards/margins": 0.16644412279129028, "rewards/rejected": -0.16647404432296753, "step": 13838 }, { "epoch": 9.570539419087137, "grad_norm": 3.3484020233154297, "learning_rate": 2.3858921161825725e-06, "log_odds_chosen": 11.903501510620117, "log_odds_ratio": -2.8621461751754396e-05, "logits/chosen": -0.12507346272468567, "logits/rejected": -0.16093148291110992, "logps/chosen": -0.0002613436954561621, "logps/rejected": -3.074439525604248, "loss": 0.2855, "nll_loss": 0.07138177752494812, "rewards/accuracies": 1.0, "rewards/chosen": -2.613437027321197e-05, "rewards/margins": 0.3074178397655487, "rewards/rejected": -0.3074439764022827, "step": 13839 }, { "epoch": 9.571230982019364, "grad_norm": 3.287785530090332, "learning_rate": 2.3820500998924237e-06, "log_odds_chosen": 10.767134666442871, "log_odds_ratio": -4.5068540202919394e-05, "logits/chosen": -0.22858965396881104, "logits/rejected": -0.270693838596344, "logps/chosen": -0.00025440676836296916, "logps/rejected": -2.1813197135925293, "loss": 0.3567, "nll_loss": 0.08916851878166199, "rewards/accuracies": 1.0, "rewards/chosen": -2.5440676836296916e-05, "rewards/margins": 0.21810652315616608, "rewards/rejected": -0.21813197433948517, "step": 13840 }, { "epoch": 9.57192254495159, "grad_norm": 2.4846994876861572, "learning_rate": 2.3782080836022745e-06, "log_odds_chosen": 10.437719345092773, "log_odds_ratio": -0.00014845086843706667, "logits/chosen": -0.6213191151618958, "logits/rejected": -0.6971963047981262, "logps/chosen": -0.00021114877017680556, "logps/rejected": -1.7383289337158203, "loss": 0.2353, "nll_loss": 0.05881960690021515, "rewards/accuracies": 1.0, "rewards/chosen": -2.1114876290084794e-05, "rewards/margins": 0.17381176352500916, "rewards/rejected": -0.17383289337158203, "step": 13841 }, { "epoch": 9.572614107883817, "grad_norm": 3.5097603797912598, "learning_rate": 2.374366067312125e-06, "log_odds_chosen": 11.36322021484375, "log_odds_ratio": -4.9892110837390646e-05, "logits/chosen": -0.5468803644180298, "logits/rejected": -0.5342287421226501, "logps/chosen": -0.00011988454207312316, "logps/rejected": -2.2961316108703613, "loss": 0.4119, "nll_loss": 0.1029641330242157, "rewards/accuracies": 1.0, "rewards/chosen": -1.1988455298705958e-05, "rewards/margins": 0.2296011745929718, "rewards/rejected": -0.22961315512657166, "step": 13842 }, { "epoch": 9.573305670816044, "grad_norm": 3.7719080448150635, "learning_rate": 2.3705240510219764e-06, "log_odds_chosen": 11.050481796264648, "log_odds_ratio": -4.697371332440525e-05, "logits/chosen": -0.004522927105426788, "logits/rejected": -0.10563942790031433, "logps/chosen": -0.0002663819177541882, "logps/rejected": -2.487330198287964, "loss": 0.5121, "nll_loss": 0.1280326545238495, "rewards/accuracies": 1.0, "rewards/chosen": -2.6638190320227295e-05, "rewards/margins": 0.2487063854932785, "rewards/rejected": -0.2487330287694931, "step": 13843 }, { "epoch": 9.57399723374827, "grad_norm": 2.42683482170105, "learning_rate": 2.3666820347318276e-06, "log_odds_chosen": 10.184989929199219, "log_odds_ratio": -8.84734108694829e-05, "logits/chosen": -0.47841382026672363, "logits/rejected": -0.49283891916275024, "logps/chosen": -0.0002696047304198146, "logps/rejected": -1.7976648807525635, "loss": 0.2798, "nll_loss": 0.06993197649717331, "rewards/accuracies": 1.0, "rewards/chosen": -2.6960471586789936e-05, "rewards/margins": 0.17973953485488892, "rewards/rejected": -0.1797664910554886, "step": 13844 }, { "epoch": 9.574688796680498, "grad_norm": 2.9095869064331055, "learning_rate": 2.3628400184416783e-06, "log_odds_chosen": 11.749897003173828, "log_odds_ratio": -1.533768227091059e-05, "logits/chosen": -0.5682963728904724, "logits/rejected": -0.6184790134429932, "logps/chosen": -0.0009056901326403022, "logps/rejected": -2.6393790245056152, "loss": 0.3163, "nll_loss": 0.0790674239397049, "rewards/accuracies": 1.0, "rewards/chosen": -9.056901762960479e-05, "rewards/margins": 0.26384735107421875, "rewards/rejected": -0.26393792033195496, "step": 13845 }, { "epoch": 9.575380359612724, "grad_norm": 4.509950637817383, "learning_rate": 2.358998002151529e-06, "log_odds_chosen": 10.542989730834961, "log_odds_ratio": -9.281733218813315e-05, "logits/chosen": -0.3523010015487671, "logits/rejected": -0.36976903676986694, "logps/chosen": -0.00023095075448509306, "logps/rejected": -2.0211093425750732, "loss": 0.4824, "nll_loss": 0.12059512734413147, "rewards/accuracies": 1.0, "rewards/chosen": -2.309507726749871e-05, "rewards/margins": 0.20208783447742462, "rewards/rejected": -0.20211093127727509, "step": 13846 }, { "epoch": 9.576071922544951, "grad_norm": 3.3705577850341797, "learning_rate": 2.3551559858613802e-06, "log_odds_chosen": 10.59005355834961, "log_odds_ratio": -4.5606426283484325e-05, "logits/chosen": -0.3730345666408539, "logits/rejected": -0.30688732862472534, "logps/chosen": -0.00017936564108822495, "logps/rejected": -1.9504601955413818, "loss": 0.3621, "nll_loss": 0.09051172435283661, "rewards/accuracies": 1.0, "rewards/chosen": -1.7936563381226733e-05, "rewards/margins": 0.19502808153629303, "rewards/rejected": -0.19504602253437042, "step": 13847 }, { "epoch": 9.576763485477178, "grad_norm": 3.9667961597442627, "learning_rate": 2.351313969571231e-06, "log_odds_chosen": 11.09381103515625, "log_odds_ratio": -0.0005342273507267237, "logits/chosen": -0.44047313928604126, "logits/rejected": -0.476299911737442, "logps/chosen": -0.0002753309381660074, "logps/rejected": -2.385202407836914, "loss": 0.4616, "nll_loss": 0.11535707116127014, "rewards/accuracies": 1.0, "rewards/chosen": -2.7533096726983786e-05, "rewards/margins": 0.2384927123785019, "rewards/rejected": -0.23852024972438812, "step": 13848 }, { "epoch": 9.577455048409405, "grad_norm": 3.3474607467651367, "learning_rate": 2.3474719532810817e-06, "log_odds_chosen": 10.757822036743164, "log_odds_ratio": -0.0001743780157994479, "logits/chosen": -0.35189270973205566, "logits/rejected": -0.4230746328830719, "logps/chosen": -0.00025460452889092267, "logps/rejected": -1.9166150093078613, "loss": 0.3665, "nll_loss": 0.09159547090530396, "rewards/accuracies": 1.0, "rewards/chosen": -2.546044925111346e-05, "rewards/margins": 0.19163604080677032, "rewards/rejected": -0.1916615068912506, "step": 13849 }, { "epoch": 9.578146611341632, "grad_norm": 5.058994770050049, "learning_rate": 2.343629936990933e-06, "log_odds_chosen": 9.74898624420166, "log_odds_ratio": -0.04051890969276428, "logits/chosen": 0.23132355511188507, "logits/rejected": 0.2241034060716629, "logps/chosen": -0.015972377732396126, "logps/rejected": -2.487572431564331, "loss": 0.4615, "nll_loss": 0.11132311820983887, "rewards/accuracies": 1.0, "rewards/chosen": -0.0015972377732396126, "rewards/margins": 0.24715998768806458, "rewards/rejected": -0.2487572282552719, "step": 13850 }, { "epoch": 9.578838174273859, "grad_norm": 2.210313558578491, "learning_rate": 2.339787920700784e-06, "log_odds_chosen": 9.881656646728516, "log_odds_ratio": -0.00016877069720067084, "logits/chosen": -0.5011169910430908, "logits/rejected": -0.4511227011680603, "logps/chosen": -0.0003738755185622722, "logps/rejected": -1.4024717807769775, "loss": 0.203, "nll_loss": 0.05072278156876564, "rewards/accuracies": 1.0, "rewards/chosen": -3.738755185622722e-05, "rewards/margins": 0.14020980894565582, "rewards/rejected": -0.1402471959590912, "step": 13851 }, { "epoch": 9.579529737206085, "grad_norm": 3.426076650619507, "learning_rate": 2.335945904410635e-06, "log_odds_chosen": 12.249835968017578, "log_odds_ratio": -3.26341760228388e-05, "logits/chosen": -0.23818932473659515, "logits/rejected": -0.2377479523420334, "logps/chosen": -0.00019521964713931084, "logps/rejected": -3.2837133407592773, "loss": 0.3596, "nll_loss": 0.08990383893251419, "rewards/accuracies": 1.0, "rewards/chosen": -1.9521965441526845e-05, "rewards/margins": 0.32835185527801514, "rewards/rejected": -0.3283713757991791, "step": 13852 }, { "epoch": 9.580221300138312, "grad_norm": 3.3171756267547607, "learning_rate": 2.332103888120486e-06, "log_odds_chosen": 11.941597938537598, "log_odds_ratio": -7.733933671261184e-06, "logits/chosen": -0.16036270558834076, "logits/rejected": -0.14073586463928223, "logps/chosen": -5.014177440898493e-05, "logps/rejected": -2.030078649520874, "loss": 0.3642, "nll_loss": 0.0910460352897644, "rewards/accuracies": 1.0, "rewards/chosen": -5.014177531847963e-06, "rewards/margins": 0.20300287008285522, "rewards/rejected": -0.20300786197185516, "step": 13853 }, { "epoch": 9.58091286307054, "grad_norm": 2.973254442214966, "learning_rate": 2.3282618718303367e-06, "log_odds_chosen": 11.087392807006836, "log_odds_ratio": -2.4515626137144864e-05, "logits/chosen": -0.5213772058486938, "logits/rejected": -0.6343944072723389, "logps/chosen": -8.429721492575482e-05, "logps/rejected": -1.803528070449829, "loss": 0.2552, "nll_loss": 0.06380080431699753, "rewards/accuracies": 1.0, "rewards/chosen": -8.429721674474422e-06, "rewards/margins": 0.1803443729877472, "rewards/rejected": -0.1803528070449829, "step": 13854 }, { "epoch": 9.581604426002766, "grad_norm": 2.7656378746032715, "learning_rate": 2.3244198555401875e-06, "log_odds_chosen": 11.633098602294922, "log_odds_ratio": -3.7952137063257396e-05, "logits/chosen": -0.5965179204940796, "logits/rejected": -0.5844557285308838, "logps/chosen": -0.00021014529920648783, "logps/rejected": -2.6297168731689453, "loss": 0.2653, "nll_loss": 0.06631910800933838, "rewards/accuracies": 1.0, "rewards/chosen": -2.1014529920648783e-05, "rewards/margins": 0.26295068860054016, "rewards/rejected": -0.2629716992378235, "step": 13855 }, { "epoch": 9.582295988934993, "grad_norm": 3.864344358444214, "learning_rate": 2.3205778392500382e-06, "log_odds_chosen": 10.258223533630371, "log_odds_ratio": -0.00022534048184752464, "logits/chosen": 0.31239843368530273, "logits/rejected": 0.2385031133890152, "logps/chosen": -0.00014834718604106456, "logps/rejected": -1.5856529474258423, "loss": 0.534, "nll_loss": 0.13346663117408752, "rewards/accuracies": 1.0, "rewards/chosen": -1.4834718058409635e-05, "rewards/margins": 0.1585504710674286, "rewards/rejected": -0.15856531262397766, "step": 13856 }, { "epoch": 9.58298755186722, "grad_norm": 3.465324640274048, "learning_rate": 2.3167358229598894e-06, "log_odds_chosen": 11.916043281555176, "log_odds_ratio": -1.703957786958199e-05, "logits/chosen": -0.11070622503757477, "logits/rejected": 0.04024065285921097, "logps/chosen": -0.0001028151746140793, "logps/rejected": -2.7856462001800537, "loss": 0.4053, "nll_loss": 0.10132232308387756, "rewards/accuracies": 1.0, "rewards/chosen": -1.028151746140793e-05, "rewards/margins": 0.27855435013771057, "rewards/rejected": -0.2785646319389343, "step": 13857 }, { "epoch": 9.583679114799446, "grad_norm": 3.2913126945495605, "learning_rate": 2.3128938066697406e-06, "log_odds_chosen": 10.219085693359375, "log_odds_ratio": -7.539245416410267e-05, "logits/chosen": -0.7612735629081726, "logits/rejected": -0.6986575126647949, "logps/chosen": -0.00046573198051191866, "logps/rejected": -2.052171468734741, "loss": 0.5792, "nll_loss": 0.14479374885559082, "rewards/accuracies": 1.0, "rewards/chosen": -4.6573197323596105e-05, "rewards/margins": 0.20517057180404663, "rewards/rejected": -0.2052171379327774, "step": 13858 }, { "epoch": 9.584370677731673, "grad_norm": 3.505521774291992, "learning_rate": 2.3090517903795913e-06, "log_odds_chosen": 11.715803146362305, "log_odds_ratio": -5.075123772257939e-05, "logits/chosen": 0.008179709315299988, "logits/rejected": -0.07167384028434753, "logps/chosen": -0.00020146294264122844, "logps/rejected": -2.7927322387695312, "loss": 0.3128, "nll_loss": 0.07819778472185135, "rewards/accuracies": 1.0, "rewards/chosen": -2.0146295355516486e-05, "rewards/margins": 0.2792530655860901, "rewards/rejected": -0.27927321195602417, "step": 13859 }, { "epoch": 9.5850622406639, "grad_norm": 2.9111785888671875, "learning_rate": 2.3052097740894425e-06, "log_odds_chosen": 10.325887680053711, "log_odds_ratio": -9.02124447748065e-05, "logits/chosen": -0.510438084602356, "logits/rejected": -0.5248683094978333, "logps/chosen": -0.00034332674113102257, "logps/rejected": -2.0093259811401367, "loss": 0.2889, "nll_loss": 0.07222461700439453, "rewards/accuracies": 1.0, "rewards/chosen": -3.433267556829378e-05, "rewards/margins": 0.20089825987815857, "rewards/rejected": -0.2009325921535492, "step": 13860 }, { "epoch": 9.585753803596127, "grad_norm": 3.1902220249176025, "learning_rate": 2.3013677577992933e-06, "log_odds_chosen": 11.665227890014648, "log_odds_ratio": -2.9300321330083534e-05, "logits/chosen": -0.01832330971956253, "logits/rejected": 0.08498133718967438, "logps/chosen": -0.00017734138236846775, "logps/rejected": -2.832918643951416, "loss": 0.398, "nll_loss": 0.09950891882181168, "rewards/accuracies": 1.0, "rewards/chosen": -1.7734138964442536e-05, "rewards/margins": 0.2832741439342499, "rewards/rejected": -0.28329187631607056, "step": 13861 }, { "epoch": 9.586445366528354, "grad_norm": 3.167797803878784, "learning_rate": 2.297525741509144e-06, "log_odds_chosen": 9.807540893554688, "log_odds_ratio": -0.00013357223360799253, "logits/chosen": -0.36445263028144836, "logits/rejected": -0.37488484382629395, "logps/chosen": -0.00033911221544258296, "logps/rejected": -1.630523681640625, "loss": 0.3235, "nll_loss": 0.08087349683046341, "rewards/accuracies": 1.0, "rewards/chosen": -3.3911226637428626e-05, "rewards/margins": 0.16301846504211426, "rewards/rejected": -0.16305238008499146, "step": 13862 }, { "epoch": 9.58713692946058, "grad_norm": 10.112004280090332, "learning_rate": 2.2936837252189948e-06, "log_odds_chosen": 10.343138694763184, "log_odds_ratio": -0.0001180757099064067, "logits/chosen": 0.09524346888065338, "logits/rejected": 0.11811558902263641, "logps/chosen": -0.00031170775764621794, "logps/rejected": -2.2311322689056396, "loss": 0.6984, "nll_loss": 0.17459022998809814, "rewards/accuracies": 1.0, "rewards/chosen": -3.117077721981332e-05, "rewards/margins": 0.2230820655822754, "rewards/rejected": -0.22311323881149292, "step": 13863 }, { "epoch": 9.587828492392807, "grad_norm": 2.779890537261963, "learning_rate": 2.289841708928846e-06, "log_odds_chosen": 11.135278701782227, "log_odds_ratio": -2.6663194148568437e-05, "logits/chosen": -0.44592225551605225, "logits/rejected": -0.49121996760368347, "logps/chosen": -0.0001397555461153388, "logps/rejected": -2.0133450031280518, "loss": 0.305, "nll_loss": 0.07624665647745132, "rewards/accuracies": 1.0, "rewards/chosen": -1.3975553883938119e-05, "rewards/margins": 0.20132051408290863, "rewards/rejected": -0.20133450627326965, "step": 13864 }, { "epoch": 9.588520055325034, "grad_norm": 5.535505294799805, "learning_rate": 2.2859996926386967e-06, "log_odds_chosen": 11.428829193115234, "log_odds_ratio": -3.691585880005732e-05, "logits/chosen": -0.14533650875091553, "logits/rejected": -0.18048177659511566, "logps/chosen": -0.00014477927470579743, "logps/rejected": -2.263044834136963, "loss": 0.4133, "nll_loss": 0.10332509130239487, "rewards/accuracies": 1.0, "rewards/chosen": -1.4477927834377624e-05, "rewards/margins": 0.2262900173664093, "rewards/rejected": -0.22630450129508972, "step": 13865 }, { "epoch": 9.589211618257261, "grad_norm": 2.9248528480529785, "learning_rate": 2.282157676348548e-06, "log_odds_chosen": 10.032796859741211, "log_odds_ratio": -8.267858356703073e-05, "logits/chosen": -0.1600610464811325, "logits/rejected": -0.16781851649284363, "logps/chosen": -0.00020983436843380332, "logps/rejected": -1.4508682489395142, "loss": 0.4767, "nll_loss": 0.11916402727365494, "rewards/accuracies": 1.0, "rewards/chosen": -2.098343611578457e-05, "rewards/margins": 0.14506584405899048, "rewards/rejected": -0.14508682489395142, "step": 13866 }, { "epoch": 9.589903181189488, "grad_norm": 3.026639223098755, "learning_rate": 2.278315660058399e-06, "log_odds_chosen": 11.75421142578125, "log_odds_ratio": -0.0001594477507751435, "logits/chosen": 0.08484269678592682, "logits/rejected": 0.09943962097167969, "logps/chosen": -7.933591405162588e-05, "logps/rejected": -2.1240549087524414, "loss": 0.3204, "nll_loss": 0.08007805049419403, "rewards/accuracies": 1.0, "rewards/chosen": -7.933591405162588e-06, "rewards/margins": 0.21239754557609558, "rewards/rejected": -0.2124055027961731, "step": 13867 }, { "epoch": 9.590594744121715, "grad_norm": 2.964529514312744, "learning_rate": 2.2744736437682498e-06, "log_odds_chosen": 11.529214859008789, "log_odds_ratio": -6.775275687687099e-05, "logits/chosen": -0.5893282890319824, "logits/rejected": -0.5058203935623169, "logps/chosen": -0.00045186851639300585, "logps/rejected": -2.9115042686462402, "loss": 0.3896, "nll_loss": 0.09739640355110168, "rewards/accuracies": 1.0, "rewards/chosen": -4.518685818766244e-05, "rewards/margins": 0.2911052405834198, "rewards/rejected": -0.29115042090415955, "step": 13868 }, { "epoch": 9.591286307053942, "grad_norm": 2.6916327476501465, "learning_rate": 2.2706316274781005e-06, "log_odds_chosen": 11.157115936279297, "log_odds_ratio": -0.00023880114895291626, "logits/chosen": -0.18521174788475037, "logits/rejected": -0.14967837929725647, "logps/chosen": -0.00012755044735968113, "logps/rejected": -1.9115530252456665, "loss": 0.3149, "nll_loss": 0.07870766520500183, "rewards/accuracies": 1.0, "rewards/chosen": -1.2755044735968113e-05, "rewards/margins": 0.19114252924919128, "rewards/rejected": -0.1911552995443344, "step": 13869 }, { "epoch": 9.591977869986168, "grad_norm": 4.349775314331055, "learning_rate": 2.2667896111879517e-06, "log_odds_chosen": 11.370203018188477, "log_odds_ratio": -4.0916835132520646e-05, "logits/chosen": -0.10869202762842178, "logits/rejected": -0.1567172408103943, "logps/chosen": -0.00013811516691930592, "logps/rejected": -2.493417739868164, "loss": 0.4697, "nll_loss": 0.11743083596229553, "rewards/accuracies": 1.0, "rewards/chosen": -1.381151560053695e-05, "rewards/margins": 0.24932795763015747, "rewards/rejected": -0.24934178590774536, "step": 13870 }, { "epoch": 9.592669432918395, "grad_norm": 4.363363742828369, "learning_rate": 2.2629475948978024e-06, "log_odds_chosen": 12.449634552001953, "log_odds_ratio": -5.222107574809343e-06, "logits/chosen": -0.07508926093578339, "logits/rejected": -0.06943850219249725, "logps/chosen": -9.200820932164788e-05, "logps/rejected": -2.8852505683898926, "loss": 0.4792, "nll_loss": 0.11979883909225464, "rewards/accuracies": 1.0, "rewards/chosen": -9.200821295962669e-06, "rewards/margins": 0.2885158658027649, "rewards/rejected": -0.2885250449180603, "step": 13871 }, { "epoch": 9.593360995850622, "grad_norm": 5.529043197631836, "learning_rate": 2.259105578607653e-06, "log_odds_chosen": 11.823799133300781, "log_odds_ratio": -2.6034242182504386e-05, "logits/chosen": -0.05254924297332764, "logits/rejected": -0.12436945736408234, "logps/chosen": -0.00015545799396932125, "logps/rejected": -2.569683790206909, "loss": 0.5739, "nll_loss": 0.14347787201404572, "rewards/accuracies": 1.0, "rewards/chosen": -1.5545800124527887e-05, "rewards/margins": 0.25695285201072693, "rewards/rejected": -0.2569683790206909, "step": 13872 }, { "epoch": 9.594052558782849, "grad_norm": 3.2023260593414307, "learning_rate": 2.2552635623175044e-06, "log_odds_chosen": 10.723604202270508, "log_odds_ratio": -5.572741065407172e-05, "logits/chosen": -0.3888397812843323, "logits/rejected": -0.3862932622432709, "logps/chosen": -0.00012830989726353437, "logps/rejected": -1.6381213665008545, "loss": 0.324, "nll_loss": 0.08099858462810516, "rewards/accuracies": 1.0, "rewards/chosen": -1.2830989362555556e-05, "rewards/margins": 0.16379928588867188, "rewards/rejected": -0.16381213068962097, "step": 13873 }, { "epoch": 9.594744121715076, "grad_norm": 3.7831342220306396, "learning_rate": 2.2514215460273555e-06, "log_odds_chosen": 11.306631088256836, "log_odds_ratio": -6.008523632772267e-05, "logits/chosen": -0.34363579750061035, "logits/rejected": -0.3957400321960449, "logps/chosen": -0.00016849691746756434, "logps/rejected": -2.431356191635132, "loss": 0.4075, "nll_loss": 0.10185706615447998, "rewards/accuracies": 1.0, "rewards/chosen": -1.6849691746756434e-05, "rewards/margins": 0.2431187778711319, "rewards/rejected": -0.24313563108444214, "step": 13874 }, { "epoch": 9.595435684647303, "grad_norm": 3.419001340866089, "learning_rate": 2.2475795297372063e-06, "log_odds_chosen": 11.561694145202637, "log_odds_ratio": -0.00010816368012456223, "logits/chosen": -0.4011830687522888, "logits/rejected": -0.34620726108551025, "logps/chosen": -0.00036602304317057133, "logps/rejected": -2.66555118560791, "loss": 0.526, "nll_loss": 0.13147681951522827, "rewards/accuracies": 1.0, "rewards/chosen": -3.66023086826317e-05, "rewards/margins": 0.2665185332298279, "rewards/rejected": -0.26655513048171997, "step": 13875 }, { "epoch": 9.59612724757953, "grad_norm": 4.279079914093018, "learning_rate": 2.243737513447057e-06, "log_odds_chosen": 10.130746841430664, "log_odds_ratio": -0.0002273559512104839, "logits/chosen": -0.3535163402557373, "logits/rejected": -0.47031304240226746, "logps/chosen": -0.0006931457901373506, "logps/rejected": -2.424685478210449, "loss": 0.4603, "nll_loss": 0.11506015807390213, "rewards/accuracies": 1.0, "rewards/chosen": -6.931458483450115e-05, "rewards/margins": 0.24239924550056458, "rewards/rejected": -0.24246855080127716, "step": 13876 }, { "epoch": 9.596818810511756, "grad_norm": 3.7910823822021484, "learning_rate": 2.239895497156908e-06, "log_odds_chosen": 10.984574317932129, "log_odds_ratio": -8.284907380584627e-05, "logits/chosen": -0.06811004877090454, "logits/rejected": -0.14354632794857025, "logps/chosen": -0.00013478229811880738, "logps/rejected": -2.0187888145446777, "loss": 0.3664, "nll_loss": 0.09160007536411285, "rewards/accuracies": 1.0, "rewards/chosen": -1.3478229448082857e-05, "rewards/margins": 0.20186540484428406, "rewards/rejected": -0.2018788754940033, "step": 13877 }, { "epoch": 9.597510373443983, "grad_norm": 3.4868521690368652, "learning_rate": 2.236053480866759e-06, "log_odds_chosen": 11.839643478393555, "log_odds_ratio": -1.3536369806388393e-05, "logits/chosen": 0.13439291715621948, "logits/rejected": 0.010013069957494736, "logps/chosen": -0.0003978666791226715, "logps/rejected": -3.200017213821411, "loss": 0.3056, "nll_loss": 0.07640783488750458, "rewards/accuracies": 1.0, "rewards/chosen": -3.978666791226715e-05, "rewards/margins": 0.31996193528175354, "rewards/rejected": -0.3200017213821411, "step": 13878 }, { "epoch": 9.59820193637621, "grad_norm": 2.500501871109009, "learning_rate": 2.2322114645766097e-06, "log_odds_chosen": 10.714197158813477, "log_odds_ratio": -5.230196984484792e-05, "logits/chosen": -0.3795170783996582, "logits/rejected": -0.44043371081352234, "logps/chosen": -0.00018136526341550052, "logps/rejected": -2.1343531608581543, "loss": 0.3357, "nll_loss": 0.08392222970724106, "rewards/accuracies": 1.0, "rewards/chosen": -1.8136528524337336e-05, "rewards/margins": 0.213417187333107, "rewards/rejected": -0.2134353220462799, "step": 13879 }, { "epoch": 9.598893499308437, "grad_norm": 7.815499782562256, "learning_rate": 2.228369448286461e-06, "log_odds_chosen": 11.23033618927002, "log_odds_ratio": -0.00029832214931957424, "logits/chosen": -0.12109455466270447, "logits/rejected": -0.20619699358940125, "logps/chosen": -0.00010856491280719638, "logps/rejected": -2.1542465686798096, "loss": 0.7267, "nll_loss": 0.18164558708667755, "rewards/accuracies": 1.0, "rewards/chosen": -1.0856490916921757e-05, "rewards/margins": 0.21541380882263184, "rewards/rejected": -0.21542467176914215, "step": 13880 }, { "epoch": 9.599585062240664, "grad_norm": 3.6803181171417236, "learning_rate": 2.224527431996312e-06, "log_odds_chosen": 11.81740665435791, "log_odds_ratio": -4.97671753691975e-05, "logits/chosen": -0.6466069221496582, "logits/rejected": -0.6269413232803345, "logps/chosen": -0.0004883024375885725, "logps/rejected": -3.5680038928985596, "loss": 0.4366, "nll_loss": 0.10914058983325958, "rewards/accuracies": 1.0, "rewards/chosen": -4.883024303126149e-05, "rewards/margins": 0.35675156116485596, "rewards/rejected": -0.356800377368927, "step": 13881 }, { "epoch": 9.60027662517289, "grad_norm": 2.4747352600097656, "learning_rate": 2.220685415706163e-06, "log_odds_chosen": 10.944051742553711, "log_odds_ratio": -2.3117994714993984e-05, "logits/chosen": -0.5824406743049622, "logits/rejected": -0.2831963300704956, "logps/chosen": -0.00017456647765357047, "logps/rejected": -2.2170677185058594, "loss": 0.2905, "nll_loss": 0.07261285185813904, "rewards/accuracies": 1.0, "rewards/chosen": -1.745664849295281e-05, "rewards/margins": 0.22168932855129242, "rewards/rejected": -0.22170677781105042, "step": 13882 }, { "epoch": 9.600968188105117, "grad_norm": 4.029825210571289, "learning_rate": 2.2168433994160135e-06, "log_odds_chosen": 10.969319343566895, "log_odds_ratio": -2.87359634967288e-05, "logits/chosen": -0.15640632808208466, "logits/rejected": -0.18583597242832184, "logps/chosen": -0.00016927471733652055, "logps/rejected": -2.1790919303894043, "loss": 0.368, "nll_loss": 0.09199932962656021, "rewards/accuracies": 1.0, "rewards/chosen": -1.6927471733652055e-05, "rewards/margins": 0.21789227426052094, "rewards/rejected": -0.21790921688079834, "step": 13883 }, { "epoch": 9.601659751037344, "grad_norm": 3.1205484867095947, "learning_rate": 2.2130013831258647e-06, "log_odds_chosen": 12.199348449707031, "log_odds_ratio": -9.655268513597548e-06, "logits/chosen": -0.12709780037403107, "logits/rejected": -0.17708489298820496, "logps/chosen": -0.00011951341002713889, "logps/rejected": -3.046433448791504, "loss": 0.3709, "nll_loss": 0.09273400157690048, "rewards/accuracies": 1.0, "rewards/chosen": -1.1951340638916008e-05, "rewards/margins": 0.30463141202926636, "rewards/rejected": -0.30464333295822144, "step": 13884 }, { "epoch": 9.60235131396957, "grad_norm": 2.562211751937866, "learning_rate": 2.2091593668357155e-06, "log_odds_chosen": 10.068363189697266, "log_odds_ratio": -0.00015095957496669143, "logits/chosen": -0.15639328956604004, "logits/rejected": -0.19314239919185638, "logps/chosen": -0.00022999334032647312, "logps/rejected": -1.699575662612915, "loss": 0.2787, "nll_loss": 0.06966718286275864, "rewards/accuracies": 1.0, "rewards/chosen": -2.2999334760243073e-05, "rewards/margins": 0.16993457078933716, "rewards/rejected": -0.16995757818222046, "step": 13885 }, { "epoch": 9.603042876901798, "grad_norm": 3.483767032623291, "learning_rate": 2.2053173505455662e-06, "log_odds_chosen": 12.795426368713379, "log_odds_ratio": -4.1594525100663304e-05, "logits/chosen": -0.3526758551597595, "logits/rejected": -0.4029574990272522, "logps/chosen": -0.0001118480577133596, "logps/rejected": -3.419715642929077, "loss": 0.3472, "nll_loss": 0.08679346740245819, "rewards/accuracies": 1.0, "rewards/chosen": -1.118480577133596e-05, "rewards/margins": 0.34196043014526367, "rewards/rejected": -0.34197157621383667, "step": 13886 }, { "epoch": 9.603734439834025, "grad_norm": 2.8883213996887207, "learning_rate": 2.2014753342554174e-06, "log_odds_chosen": 10.48097038269043, "log_odds_ratio": -8.504984725732356e-05, "logits/chosen": -0.3213323950767517, "logits/rejected": -0.3076537251472473, "logps/chosen": -0.0004419469041749835, "logps/rejected": -2.042109966278076, "loss": 0.2717, "nll_loss": 0.06790684163570404, "rewards/accuracies": 1.0, "rewards/chosen": -4.419469041749835e-05, "rewards/margins": 0.20416679978370667, "rewards/rejected": -0.20421099662780762, "step": 13887 }, { "epoch": 9.604426002766251, "grad_norm": 3.354330062866211, "learning_rate": 2.197633317965268e-06, "log_odds_chosen": 11.06173324584961, "log_odds_ratio": -2.4618173483759165e-05, "logits/chosen": -0.22894491255283356, "logits/rejected": -0.22772037982940674, "logps/chosen": -0.00034024231717921793, "logps/rejected": -2.3082427978515625, "loss": 0.3857, "nll_loss": 0.09642204642295837, "rewards/accuracies": 1.0, "rewards/chosen": -3.4024233173113316e-05, "rewards/margins": 0.23079025745391846, "rewards/rejected": -0.230824276804924, "step": 13888 }, { "epoch": 9.605117565698478, "grad_norm": 4.197360992431641, "learning_rate": 2.1937913016751193e-06, "log_odds_chosen": 10.922062873840332, "log_odds_ratio": -2.6824922315427102e-05, "logits/chosen": -0.09128378331661224, "logits/rejected": -0.1841985583305359, "logps/chosen": -0.00023976791999302804, "logps/rejected": -2.2560863494873047, "loss": 0.4823, "nll_loss": 0.12056770920753479, "rewards/accuracies": 1.0, "rewards/chosen": -2.397679418209009e-05, "rewards/margins": 0.22558467090129852, "rewards/rejected": -0.22560864686965942, "step": 13889 }, { "epoch": 9.605809128630705, "grad_norm": 3.4729881286621094, "learning_rate": 2.18994928538497e-06, "log_odds_chosen": 11.6234769821167, "log_odds_ratio": -1.936111038958188e-05, "logits/chosen": -0.6802545189857483, "logits/rejected": -0.603384256362915, "logps/chosen": -8.280224574264139e-05, "logps/rejected": -2.281142234802246, "loss": 0.3669, "nll_loss": 0.09171170741319656, "rewards/accuracies": 1.0, "rewards/chosen": -8.2802253018599e-06, "rewards/margins": 0.22810596227645874, "rewards/rejected": -0.22811424732208252, "step": 13890 }, { "epoch": 9.606500691562932, "grad_norm": 2.6800572872161865, "learning_rate": 2.1861072690948212e-06, "log_odds_chosen": 11.265277862548828, "log_odds_ratio": -1.970949597307481e-05, "logits/chosen": -0.1551126390695572, "logits/rejected": -0.20388063788414001, "logps/chosen": -8.644152694614604e-05, "logps/rejected": -1.9881235361099243, "loss": 0.2974, "nll_loss": 0.07434845715761185, "rewards/accuracies": 1.0, "rewards/chosen": -8.644152330816723e-06, "rewards/margins": 0.19880370795726776, "rewards/rejected": -0.198812335729599, "step": 13891 }, { "epoch": 9.607192254495159, "grad_norm": 3.867112636566162, "learning_rate": 2.182265252804672e-06, "log_odds_chosen": 11.430559158325195, "log_odds_ratio": -2.1698100681533106e-05, "logits/chosen": -0.36402902007102966, "logits/rejected": -0.3568218946456909, "logps/chosen": -0.00016789848450571299, "logps/rejected": -1.9694077968597412, "loss": 0.4019, "nll_loss": 0.1004675030708313, "rewards/accuracies": 1.0, "rewards/chosen": -1.678984881436918e-05, "rewards/margins": 0.19692397117614746, "rewards/rejected": -0.19694077968597412, "step": 13892 }, { "epoch": 9.607883817427386, "grad_norm": 2.5731377601623535, "learning_rate": 2.1784232365145227e-06, "log_odds_chosen": 11.45120906829834, "log_odds_ratio": -1.9578732462832704e-05, "logits/chosen": -0.4507691264152527, "logits/rejected": -0.5357175469398499, "logps/chosen": -0.00018559573800303042, "logps/rejected": -2.5820508003234863, "loss": 0.2696, "nll_loss": 0.06739149987697601, "rewards/accuracies": 1.0, "rewards/chosen": -1.8559574527898803e-05, "rewards/margins": 0.2581865191459656, "rewards/rejected": -0.2582050859928131, "step": 13893 }, { "epoch": 9.608575380359612, "grad_norm": 3.959655284881592, "learning_rate": 2.174581220224374e-06, "log_odds_chosen": 11.153068542480469, "log_odds_ratio": -0.00017264412599615753, "logits/chosen": -0.05517587810754776, "logits/rejected": -0.016276437789201736, "logps/chosen": -0.00031824197503738105, "logps/rejected": -2.672032356262207, "loss": 0.4601, "nll_loss": 0.11499620974063873, "rewards/accuracies": 1.0, "rewards/chosen": -3.182419823133387e-05, "rewards/margins": 0.2671714425086975, "rewards/rejected": -0.2672032415866852, "step": 13894 }, { "epoch": 9.60926694329184, "grad_norm": 4.496756553649902, "learning_rate": 2.1707392039342247e-06, "log_odds_chosen": 11.771528244018555, "log_odds_ratio": -1.823817910917569e-05, "logits/chosen": -0.1999531090259552, "logits/rejected": -0.2911415696144104, "logps/chosen": -0.0001995821949094534, "logps/rejected": -3.107776403427124, "loss": 0.6024, "nll_loss": 0.15058927237987518, "rewards/accuracies": 1.0, "rewards/chosen": -1.9958217308158055e-05, "rewards/margins": 0.31075769662857056, "rewards/rejected": -0.3107776641845703, "step": 13895 }, { "epoch": 9.609958506224066, "grad_norm": 3.24926495552063, "learning_rate": 2.166897187644076e-06, "log_odds_chosen": 10.774518966674805, "log_odds_ratio": -3.353306601638906e-05, "logits/chosen": 0.20405946671962738, "logits/rejected": 0.040008507668972015, "logps/chosen": -0.0013074527960270643, "logps/rejected": -2.6293416023254395, "loss": 0.4048, "nll_loss": 0.10120494663715363, "rewards/accuracies": 1.0, "rewards/chosen": -0.00013074529124423862, "rewards/margins": 0.26280343532562256, "rewards/rejected": -0.2629341781139374, "step": 13896 }, { "epoch": 9.610650069156293, "grad_norm": 4.835551738739014, "learning_rate": 2.1630551713539266e-06, "log_odds_chosen": 10.513887405395508, "log_odds_ratio": -0.00015845283633098006, "logits/chosen": -0.0554099939763546, "logits/rejected": -0.13395971059799194, "logps/chosen": -0.00025557586923241615, "logps/rejected": -1.9353229999542236, "loss": 0.3504, "nll_loss": 0.08758310228586197, "rewards/accuracies": 1.0, "rewards/chosen": -2.5557586923241615e-05, "rewards/margins": 0.19350674748420715, "rewards/rejected": -0.1935323029756546, "step": 13897 }, { "epoch": 9.61134163208852, "grad_norm": 4.209211826324463, "learning_rate": 2.1592131550637777e-06, "log_odds_chosen": 10.88747787475586, "log_odds_ratio": -4.2023919377243146e-05, "logits/chosen": -0.1938972771167755, "logits/rejected": -0.1918545663356781, "logps/chosen": -0.0004351716488599777, "logps/rejected": -2.025944471359253, "loss": 0.5448, "nll_loss": 0.13619840145111084, "rewards/accuracies": 1.0, "rewards/chosen": -4.351716779638082e-05, "rewards/margins": 0.2025509476661682, "rewards/rejected": -0.20259445905685425, "step": 13898 }, { "epoch": 9.612033195020746, "grad_norm": 3.5081121921539307, "learning_rate": 2.1553711387736285e-06, "log_odds_chosen": 11.063932418823242, "log_odds_ratio": -3.3935433748411015e-05, "logits/chosen": -0.4234510660171509, "logits/rejected": -0.4184904396533966, "logps/chosen": -0.00022541767975781113, "logps/rejected": -2.182734727859497, "loss": 0.5019, "nll_loss": 0.12546983361244202, "rewards/accuracies": 1.0, "rewards/chosen": -2.2541767975781113e-05, "rewards/margins": 0.21825093030929565, "rewards/rejected": -0.21827347576618195, "step": 13899 }, { "epoch": 9.612724757952973, "grad_norm": 3.9560019969940186, "learning_rate": 2.1515291224834792e-06, "log_odds_chosen": 10.76880931854248, "log_odds_ratio": -5.8079334849026054e-05, "logits/chosen": -0.054325178265571594, "logits/rejected": -0.10271134972572327, "logps/chosen": -0.00017941728583537042, "logps/rejected": -1.9920737743377686, "loss": 0.3548, "nll_loss": 0.08869768679141998, "rewards/accuracies": 1.0, "rewards/chosen": -1.7941729311132804e-05, "rewards/margins": 0.1991894394159317, "rewards/rejected": -0.1992073804140091, "step": 13900 }, { "epoch": 9.6134163208852, "grad_norm": 2.539076328277588, "learning_rate": 2.1476871061933304e-06, "log_odds_chosen": 10.76081657409668, "log_odds_ratio": -0.00012573970889206976, "logits/chosen": -0.16937761008739471, "logits/rejected": -0.2564617395401001, "logps/chosen": -0.00027681823121383786, "logps/rejected": -2.3275856971740723, "loss": 0.2439, "nll_loss": 0.06097453832626343, "rewards/accuracies": 1.0, "rewards/chosen": -2.7681826395564713e-05, "rewards/margins": 0.232730895280838, "rewards/rejected": -0.23275858163833618, "step": 13901 }, { "epoch": 9.614107883817427, "grad_norm": 2.375969648361206, "learning_rate": 2.143845089903181e-06, "log_odds_chosen": 10.718599319458008, "log_odds_ratio": -5.3689509513787925e-05, "logits/chosen": -0.0198553204536438, "logits/rejected": -0.153344064950943, "logps/chosen": -0.0011022513499483466, "logps/rejected": -1.8870506286621094, "loss": 0.2744, "nll_loss": 0.06859680265188217, "rewards/accuracies": 1.0, "rewards/chosen": -0.00011022514081560075, "rewards/margins": 0.18859481811523438, "rewards/rejected": -0.18870505690574646, "step": 13902 }, { "epoch": 9.614799446749654, "grad_norm": 2.5292680263519287, "learning_rate": 2.1400030736130323e-06, "log_odds_chosen": 10.61046314239502, "log_odds_ratio": -5.377128763939254e-05, "logits/chosen": -0.14776532351970673, "logits/rejected": -0.16533081233501434, "logps/chosen": -0.00022588277352042496, "logps/rejected": -1.6798335313796997, "loss": 0.2788, "nll_loss": 0.06968588382005692, "rewards/accuracies": 1.0, "rewards/chosen": -2.2588277715840377e-05, "rewards/margins": 0.1679607778787613, "rewards/rejected": -0.16798336803913116, "step": 13903 }, { "epoch": 9.61549100968188, "grad_norm": 4.080808162689209, "learning_rate": 2.1361610573228835e-06, "log_odds_chosen": 12.258644104003906, "log_odds_ratio": -1.09394122773665e-05, "logits/chosen": -0.3831287920475006, "logits/rejected": -0.47929805517196655, "logps/chosen": -0.00010576730710454285, "logps/rejected": -2.9734280109405518, "loss": 0.4296, "nll_loss": 0.10739340633153915, "rewards/accuracies": 1.0, "rewards/chosen": -1.0576731256151106e-05, "rewards/margins": 0.297332227230072, "rewards/rejected": -0.29734277725219727, "step": 13904 }, { "epoch": 9.616182572614107, "grad_norm": 2.973907470703125, "learning_rate": 2.1323190410327343e-06, "log_odds_chosen": 9.683038711547852, "log_odds_ratio": -0.0007329158834181726, "logits/chosen": 0.13456621766090393, "logits/rejected": 0.10806768387556076, "logps/chosen": -0.0017268441151827574, "logps/rejected": -1.5090343952178955, "loss": 0.2626, "nll_loss": 0.06558310985565186, "rewards/accuracies": 1.0, "rewards/chosen": -0.00017268442024942487, "rewards/margins": 0.15073075890541077, "rewards/rejected": -0.15090343356132507, "step": 13905 }, { "epoch": 9.616874135546334, "grad_norm": 3.142827272415161, "learning_rate": 2.128477024742585e-06, "log_odds_chosen": 11.427976608276367, "log_odds_ratio": -2.8424015908967704e-05, "logits/chosen": -0.47292864322662354, "logits/rejected": -0.47138306498527527, "logps/chosen": -0.00012114901619497687, "logps/rejected": -2.115497589111328, "loss": 0.3664, "nll_loss": 0.0915897786617279, "rewards/accuracies": 1.0, "rewards/chosen": -1.2114901437598746e-05, "rewards/margins": 0.2115376591682434, "rewards/rejected": -0.2115497589111328, "step": 13906 }, { "epoch": 9.617565698478561, "grad_norm": 17.765735626220703, "learning_rate": 2.1246350084524358e-06, "log_odds_chosen": 10.096508026123047, "log_odds_ratio": -0.0651700347661972, "logits/chosen": -0.03107866644859314, "logits/rejected": -0.19261762499809265, "logps/chosen": -0.010400773957371712, "logps/rejected": -3.08225154876709, "loss": 0.7105, "nll_loss": 0.17109745740890503, "rewards/accuracies": 1.0, "rewards/chosen": -0.0010400773026049137, "rewards/margins": 0.3071851134300232, "rewards/rejected": -0.30822518467903137, "step": 13907 }, { "epoch": 9.618257261410788, "grad_norm": 3.9724233150482178, "learning_rate": 2.120792992162287e-06, "log_odds_chosen": 10.521794319152832, "log_odds_ratio": -0.00015680433716624975, "logits/chosen": 0.06511492282152176, "logits/rejected": 0.00012525171041488647, "logps/chosen": -0.00021123632905073464, "logps/rejected": -1.658719778060913, "loss": 0.5107, "nll_loss": 0.12765847146511078, "rewards/accuracies": 1.0, "rewards/chosen": -2.1123632905073464e-05, "rewards/margins": 0.16585084795951843, "rewards/rejected": -0.1658719778060913, "step": 13908 }, { "epoch": 9.618948824343015, "grad_norm": 2.43673038482666, "learning_rate": 2.1169509758721377e-06, "log_odds_chosen": 10.04551887512207, "log_odds_ratio": -0.00024950189981609583, "logits/chosen": -0.14085373282432556, "logits/rejected": -0.1827581524848938, "logps/chosen": -0.0010271634673699737, "logps/rejected": -2.2036585807800293, "loss": 0.2702, "nll_loss": 0.0675291195511818, "rewards/accuracies": 1.0, "rewards/chosen": -0.00010271634528180584, "rewards/margins": 0.22026312351226807, "rewards/rejected": -0.22036585211753845, "step": 13909 }, { "epoch": 9.619640387275242, "grad_norm": 2.886796236038208, "learning_rate": 2.1131089595819884e-06, "log_odds_chosen": 11.632570266723633, "log_odds_ratio": -1.8629045371199027e-05, "logits/chosen": -0.13659584522247314, "logits/rejected": -0.128843292593956, "logps/chosen": -0.00016159014194272459, "logps/rejected": -2.6940741539001465, "loss": 0.2098, "nll_loss": 0.05246026813983917, "rewards/accuracies": 1.0, "rewards/chosen": -1.615901419427246e-05, "rewards/margins": 0.2693912386894226, "rewards/rejected": -0.26940739154815674, "step": 13910 }, { "epoch": 9.620331950207468, "grad_norm": 2.791591167449951, "learning_rate": 2.1092669432918396e-06, "log_odds_chosen": 11.185066223144531, "log_odds_ratio": -2.2798591089667752e-05, "logits/chosen": -0.33810728788375854, "logits/rejected": -0.4625110328197479, "logps/chosen": -0.00020533816132228822, "logps/rejected": -1.8624236583709717, "loss": 0.3463, "nll_loss": 0.08656968921422958, "rewards/accuracies": 1.0, "rewards/chosen": -2.0533816496026702e-05, "rewards/margins": 0.18622180819511414, "rewards/rejected": -0.18624237179756165, "step": 13911 }, { "epoch": 9.621023513139695, "grad_norm": 3.6606931686401367, "learning_rate": 2.1054249270016908e-06, "log_odds_chosen": 11.881719589233398, "log_odds_ratio": -4.465238453121856e-05, "logits/chosen": -0.5093518495559692, "logits/rejected": -0.41257208585739136, "logps/chosen": -0.00023769350082147866, "logps/rejected": -3.3733842372894287, "loss": 0.2947, "nll_loss": 0.07367514818906784, "rewards/accuracies": 1.0, "rewards/chosen": -2.3769349354552105e-05, "rewards/margins": 0.3373146653175354, "rewards/rejected": -0.3373384475708008, "step": 13912 }, { "epoch": 9.621715076071922, "grad_norm": 2.6280322074890137, "learning_rate": 2.1015829107115415e-06, "log_odds_chosen": 9.698561668395996, "log_odds_ratio": -0.0002886706788558513, "logits/chosen": -0.5018261075019836, "logits/rejected": -0.5304703712463379, "logps/chosen": -0.0005297226598486304, "logps/rejected": -1.5569900274276733, "loss": 0.253, "nll_loss": 0.0632205531001091, "rewards/accuracies": 1.0, "rewards/chosen": -5.2972267440054566e-05, "rewards/margins": 0.15564602613449097, "rewards/rejected": -0.1556989997625351, "step": 13913 }, { "epoch": 9.622406639004149, "grad_norm": 3.4627389907836914, "learning_rate": 2.0977408944213923e-06, "log_odds_chosen": 11.172958374023438, "log_odds_ratio": -0.00016042341303545982, "logits/chosen": -0.2492392212152481, "logits/rejected": -0.2748255729675293, "logps/chosen": -0.0007011451525613666, "logps/rejected": -2.1626133918762207, "loss": 0.3737, "nll_loss": 0.09341553598642349, "rewards/accuracies": 1.0, "rewards/chosen": -7.01145181665197e-05, "rewards/margins": 0.21619121730327606, "rewards/rejected": -0.21626132726669312, "step": 13914 }, { "epoch": 9.623098201936376, "grad_norm": 4.134411811828613, "learning_rate": 2.0938988781312434e-06, "log_odds_chosen": 11.793828010559082, "log_odds_ratio": -1.4233235560823232e-05, "logits/chosen": -0.28255489468574524, "logits/rejected": -0.26436835527420044, "logps/chosen": -0.00012407053145579994, "logps/rejected": -2.460082530975342, "loss": 0.4142, "nll_loss": 0.10354601591825485, "rewards/accuracies": 1.0, "rewards/chosen": -1.2407054782670457e-05, "rewards/margins": 0.24599584937095642, "rewards/rejected": -0.2460082769393921, "step": 13915 }, { "epoch": 9.623789764868603, "grad_norm": 3.257009744644165, "learning_rate": 2.090056861841094e-06, "log_odds_chosen": 10.670095443725586, "log_odds_ratio": -0.0003247207496315241, "logits/chosen": -0.47446343302726746, "logits/rejected": -0.46082183718681335, "logps/chosen": -0.0009013573289848864, "logps/rejected": -2.077688217163086, "loss": 0.4598, "nll_loss": 0.11492659151554108, "rewards/accuracies": 1.0, "rewards/chosen": -9.013573435368016e-05, "rewards/margins": 0.20767870545387268, "rewards/rejected": -0.20776882767677307, "step": 13916 }, { "epoch": 9.62448132780083, "grad_norm": 3.5385079383850098, "learning_rate": 2.086214845550945e-06, "log_odds_chosen": 10.039594650268555, "log_odds_ratio": -0.0008481164113618433, "logits/chosen": -0.4221859872341156, "logits/rejected": -0.46361225843429565, "logps/chosen": -0.0007482378277927637, "logps/rejected": -2.10674786567688, "loss": 0.3021, "nll_loss": 0.07544497400522232, "rewards/accuracies": 1.0, "rewards/chosen": -7.482377986889333e-05, "rewards/margins": 0.21059995889663696, "rewards/rejected": -0.21067479252815247, "step": 13917 }, { "epoch": 9.625172890733056, "grad_norm": 3.3730711936950684, "learning_rate": 2.082372829260796e-06, "log_odds_chosen": 11.064151763916016, "log_odds_ratio": -8.68295828695409e-05, "logits/chosen": -0.40897443890571594, "logits/rejected": -0.45414721965789795, "logps/chosen": -0.0001387879892718047, "logps/rejected": -2.230348587036133, "loss": 0.3715, "nll_loss": 0.09286414831876755, "rewards/accuracies": 1.0, "rewards/chosen": -1.3878798199584708e-05, "rewards/margins": 0.223021000623703, "rewards/rejected": -0.22303488850593567, "step": 13918 }, { "epoch": 9.625864453665283, "grad_norm": 2.669193744659424, "learning_rate": 2.0785308129706473e-06, "log_odds_chosen": 11.192930221557617, "log_odds_ratio": -0.00021025211026426405, "logits/chosen": -0.32236677408218384, "logits/rejected": -0.29362133145332336, "logps/chosen": -0.0008011598256416619, "logps/rejected": -2.3582253456115723, "loss": 0.3081, "nll_loss": 0.07700303196907043, "rewards/accuracies": 1.0, "rewards/chosen": -8.011598401935771e-05, "rewards/margins": 0.23574241995811462, "rewards/rejected": -0.23582251369953156, "step": 13919 }, { "epoch": 9.62655601659751, "grad_norm": 5.058048725128174, "learning_rate": 2.074688796680498e-06, "log_odds_chosen": 11.47877311706543, "log_odds_ratio": -2.503236464690417e-05, "logits/chosen": -0.035071179270744324, "logits/rejected": -0.12748485803604126, "logps/chosen": -0.00045203138142824173, "logps/rejected": -3.1081864833831787, "loss": 0.5406, "nll_loss": 0.13513976335525513, "rewards/accuracies": 1.0, "rewards/chosen": -4.520314178080298e-05, "rewards/margins": 0.31077346205711365, "rewards/rejected": -0.3108186721801758, "step": 13920 }, { "epoch": 9.627247579529737, "grad_norm": 3.9089407920837402, "learning_rate": 2.0708467803903492e-06, "log_odds_chosen": 10.992070198059082, "log_odds_ratio": -0.0001330882078036666, "logits/chosen": -0.2942034602165222, "logits/rejected": -0.3216381371021271, "logps/chosen": -0.0002567889168858528, "logps/rejected": -2.4235384464263916, "loss": 0.3938, "nll_loss": 0.09844039380550385, "rewards/accuracies": 1.0, "rewards/chosen": -2.567889168858528e-05, "rewards/margins": 0.24232818186283112, "rewards/rejected": -0.24235385656356812, "step": 13921 }, { "epoch": 9.627939142461964, "grad_norm": 3.0660769939422607, "learning_rate": 2.0670047641002e-06, "log_odds_chosen": 11.104628562927246, "log_odds_ratio": -3.0097862691036426e-05, "logits/chosen": -0.5589832067489624, "logits/rejected": -0.6116610765457153, "logps/chosen": -5.852892354596406e-05, "logps/rejected": -1.5549559593200684, "loss": 0.4344, "nll_loss": 0.10860708355903625, "rewards/accuracies": 1.0, "rewards/chosen": -5.852892172697466e-06, "rewards/margins": 0.1554897427558899, "rewards/rejected": -0.15549558401107788, "step": 13922 }, { "epoch": 9.62863070539419, "grad_norm": 6.13004732131958, "learning_rate": 2.0631627478100507e-06, "log_odds_chosen": 10.39303970336914, "log_odds_ratio": -0.000225274998228997, "logits/chosen": -0.6050529479980469, "logits/rejected": -0.6999838352203369, "logps/chosen": -0.00015310509479604661, "logps/rejected": -1.6478575468063354, "loss": 0.5678, "nll_loss": 0.14191612601280212, "rewards/accuracies": 1.0, "rewards/chosen": -1.5310510207200423e-05, "rewards/margins": 0.1647704392671585, "rewards/rejected": -0.16478575766086578, "step": 13923 }, { "epoch": 9.629322268326417, "grad_norm": 3.988724946975708, "learning_rate": 2.0593207315199015e-06, "log_odds_chosen": 11.240147590637207, "log_odds_ratio": -3.00856918329373e-05, "logits/chosen": -0.26336485147476196, "logits/rejected": -0.32718032598495483, "logps/chosen": -0.00012234252062626183, "logps/rejected": -2.0191023349761963, "loss": 0.4075, "nll_loss": 0.10188324004411697, "rewards/accuracies": 1.0, "rewards/chosen": -1.2234251698828302e-05, "rewards/margins": 0.2018980085849762, "rewards/rejected": -0.20191024243831635, "step": 13924 }, { "epoch": 9.630013831258644, "grad_norm": 4.1808037757873535, "learning_rate": 2.0554787152297526e-06, "log_odds_chosen": 11.633804321289062, "log_odds_ratio": -1.0782297977129929e-05, "logits/chosen": -0.2822992503643036, "logits/rejected": -0.29951217770576477, "logps/chosen": -0.00015155112487263978, "logps/rejected": -2.631167411804199, "loss": 0.3955, "nll_loss": 0.0988800898194313, "rewards/accuracies": 1.0, "rewards/chosen": -1.5155113032960799e-05, "rewards/margins": 0.26310157775878906, "rewards/rejected": -0.263116717338562, "step": 13925 }, { "epoch": 9.630705394190871, "grad_norm": 3.0692636966705322, "learning_rate": 2.051636698939604e-06, "log_odds_chosen": 10.706825256347656, "log_odds_ratio": -5.6105287512764335e-05, "logits/chosen": -0.6887298226356506, "logits/rejected": -0.6536301374435425, "logps/chosen": -0.0005047488957643509, "logps/rejected": -2.0179996490478516, "loss": 0.222, "nll_loss": 0.05550269037485123, "rewards/accuracies": 1.0, "rewards/chosen": -5.0474893214413896e-05, "rewards/margins": 0.20174948871135712, "rewards/rejected": -0.20179995894432068, "step": 13926 }, { "epoch": 9.631396957123098, "grad_norm": 3.3400537967681885, "learning_rate": 2.0477946826494546e-06, "log_odds_chosen": 12.713604927062988, "log_odds_ratio": -1.0083525921800174e-05, "logits/chosen": -0.12084333598613739, "logits/rejected": -0.1845424622297287, "logps/chosen": -0.00010912872676271945, "logps/rejected": -3.221863269805908, "loss": 0.3719, "nll_loss": 0.09298507869243622, "rewards/accuracies": 1.0, "rewards/chosen": -1.0912872312474065e-05, "rewards/margins": 0.32217538356781006, "rewards/rejected": -0.32218629121780396, "step": 13927 }, { "epoch": 9.632088520055325, "grad_norm": 2.9674794673919678, "learning_rate": 2.0439526663593057e-06, "log_odds_chosen": 11.599284172058105, "log_odds_ratio": -2.6393710868433118e-05, "logits/chosen": -0.552137017250061, "logits/rejected": -0.6465173363685608, "logps/chosen": -0.0001403492351528257, "logps/rejected": -2.0705151557922363, "loss": 0.3808, "nll_loss": 0.0952024906873703, "rewards/accuracies": 1.0, "rewards/chosen": -1.4034923879080452e-05, "rewards/margins": 0.20703749358654022, "rewards/rejected": -0.20705154538154602, "step": 13928 }, { "epoch": 9.632780082987551, "grad_norm": 4.203634738922119, "learning_rate": 2.0401106500691565e-06, "log_odds_chosen": 10.64388656616211, "log_odds_ratio": -0.00011743738286895677, "logits/chosen": 0.017155641689896584, "logits/rejected": 0.012800104916095734, "logps/chosen": -0.0005038772942498326, "logps/rejected": -2.2548394203186035, "loss": 0.371, "nll_loss": 0.09274850785732269, "rewards/accuracies": 1.0, "rewards/chosen": -5.038773451815359e-05, "rewards/margins": 0.22543352842330933, "rewards/rejected": -0.22548392415046692, "step": 13929 }, { "epoch": 9.633471645919778, "grad_norm": 2.5074808597564697, "learning_rate": 2.0362686337790072e-06, "log_odds_chosen": 10.935724258422852, "log_odds_ratio": -0.00020738595048896968, "logits/chosen": -0.5184868574142456, "logits/rejected": -0.5800386071205139, "logps/chosen": -0.00023217473062686622, "logps/rejected": -1.9821714162826538, "loss": 0.3235, "nll_loss": 0.08084598928689957, "rewards/accuracies": 1.0, "rewards/chosen": -2.32174716074951e-05, "rewards/margins": 0.19819393754005432, "rewards/rejected": -0.19821715354919434, "step": 13930 }, { "epoch": 9.634163208852005, "grad_norm": 2.7048091888427734, "learning_rate": 2.032426617488858e-06, "log_odds_chosen": 10.152687072753906, "log_odds_ratio": -9.616788884159178e-05, "logits/chosen": -0.4461780786514282, "logits/rejected": -0.5649304389953613, "logps/chosen": -0.0006437112460844219, "logps/rejected": -1.70163094997406, "loss": 0.241, "nll_loss": 0.060244545340538025, "rewards/accuracies": 1.0, "rewards/chosen": -6.437112460844219e-05, "rewards/margins": 0.17009872198104858, "rewards/rejected": -0.170163094997406, "step": 13931 }, { "epoch": 9.634854771784232, "grad_norm": 3.0434532165527344, "learning_rate": 2.028584601198709e-06, "log_odds_chosen": 12.094767570495605, "log_odds_ratio": -1.862308636191301e-05, "logits/chosen": -0.30809926986694336, "logits/rejected": -0.3018673360347748, "logps/chosen": -0.00010675377416191623, "logps/rejected": -2.613502264022827, "loss": 0.3352, "nll_loss": 0.08379031717777252, "rewards/accuracies": 1.0, "rewards/chosen": -1.0675378689484205e-05, "rewards/margins": 0.26133957505226135, "rewards/rejected": -0.26135024428367615, "step": 13932 }, { "epoch": 9.635546334716459, "grad_norm": 4.059986114501953, "learning_rate": 2.02474258490856e-06, "log_odds_chosen": 11.331657409667969, "log_odds_ratio": -7.682020077481866e-05, "logits/chosen": -0.44414669275283813, "logits/rejected": -0.4349921643733978, "logps/chosen": -0.0002738929179031402, "logps/rejected": -2.484193801879883, "loss": 0.3711, "nll_loss": 0.09277647733688354, "rewards/accuracies": 1.0, "rewards/chosen": -2.7389291062718257e-05, "rewards/margins": 0.2483920156955719, "rewards/rejected": -0.248419389128685, "step": 13933 }, { "epoch": 9.636237897648686, "grad_norm": 3.5578558444976807, "learning_rate": 2.020900568618411e-06, "log_odds_chosen": 10.544318199157715, "log_odds_ratio": -6.59368306514807e-05, "logits/chosen": -0.18671739101409912, "logits/rejected": -0.2391272932291031, "logps/chosen": -0.00027044754824601114, "logps/rejected": -2.0963134765625, "loss": 0.2938, "nll_loss": 0.07343493402004242, "rewards/accuracies": 1.0, "rewards/chosen": -2.704475809878204e-05, "rewards/margins": 0.20960432291030884, "rewards/rejected": -0.20963135361671448, "step": 13934 }, { "epoch": 9.636929460580912, "grad_norm": 3.4241902828216553, "learning_rate": 2.0170585523282622e-06, "log_odds_chosen": 10.410786628723145, "log_odds_ratio": -9.618296462576836e-05, "logits/chosen": -0.01987304911017418, "logits/rejected": -0.09989184886217117, "logps/chosen": -0.00017437424685340375, "logps/rejected": -1.4026689529418945, "loss": 0.3551, "nll_loss": 0.08875397592782974, "rewards/accuracies": 1.0, "rewards/chosen": -1.7437425412936136e-05, "rewards/margins": 0.14024946093559265, "rewards/rejected": -0.14026689529418945, "step": 13935 }, { "epoch": 9.63762102351314, "grad_norm": 2.408499240875244, "learning_rate": 2.013216536038113e-06, "log_odds_chosen": 10.555112838745117, "log_odds_ratio": -0.00019298665574751794, "logits/chosen": -0.09522217512130737, "logits/rejected": -0.11984743177890778, "logps/chosen": -0.00025845691561698914, "logps/rejected": -1.6210821866989136, "loss": 0.226, "nll_loss": 0.056473828852176666, "rewards/accuracies": 1.0, "rewards/chosen": -2.5845693016890436e-05, "rewards/margins": 0.16208237409591675, "rewards/rejected": -0.16210821270942688, "step": 13936 }, { "epoch": 9.638312586445366, "grad_norm": 3.2268059253692627, "learning_rate": 2.0093745197479637e-06, "log_odds_chosen": 10.40433120727539, "log_odds_ratio": -6.741328252246603e-05, "logits/chosen": -0.013442234136164188, "logits/rejected": 0.015229137614369392, "logps/chosen": -0.0002590430958662182, "logps/rejected": -1.8909542560577393, "loss": 0.3726, "nll_loss": 0.09313929080963135, "rewards/accuracies": 1.0, "rewards/chosen": -2.5904311769409105e-05, "rewards/margins": 0.18906950950622559, "rewards/rejected": -0.18909543752670288, "step": 13937 }, { "epoch": 9.639004149377593, "grad_norm": 3.6599769592285156, "learning_rate": 2.005532503457815e-06, "log_odds_chosen": 10.732551574707031, "log_odds_ratio": -0.0011148906778544188, "logits/chosen": -0.4779934883117676, "logits/rejected": -0.4871721863746643, "logps/chosen": -0.0004535532498266548, "logps/rejected": -2.415609836578369, "loss": 0.4094, "nll_loss": 0.10223733633756638, "rewards/accuracies": 1.0, "rewards/chosen": -4.535532571026124e-05, "rewards/margins": 0.2415156364440918, "rewards/rejected": -0.24156101047992706, "step": 13938 }, { "epoch": 9.63969571230982, "grad_norm": 2.5770082473754883, "learning_rate": 2.0016904871676657e-06, "log_odds_chosen": 11.127652168273926, "log_odds_ratio": -2.9959042876726016e-05, "logits/chosen": -0.504085123538971, "logits/rejected": -0.5476514101028442, "logps/chosen": -0.0001619804388610646, "logps/rejected": -2.0527985095977783, "loss": 0.3142, "nll_loss": 0.07854273915290833, "rewards/accuracies": 1.0, "rewards/chosen": -1.6198046068893746e-05, "rewards/margins": 0.2052636742591858, "rewards/rejected": -0.2052798867225647, "step": 13939 }, { "epoch": 9.640387275242047, "grad_norm": 2.991600275039673, "learning_rate": 1.9978484708775164e-06, "log_odds_chosen": 11.098523139953613, "log_odds_ratio": -7.594098133267835e-05, "logits/chosen": -0.1997908055782318, "logits/rejected": -0.26286157965660095, "logps/chosen": -0.0006000112625770271, "logps/rejected": -2.43660569190979, "loss": 0.3706, "nll_loss": 0.09263789653778076, "rewards/accuracies": 1.0, "rewards/chosen": -6.000112625770271e-05, "rewards/margins": 0.24360056221485138, "rewards/rejected": -0.243660569190979, "step": 13940 }, { "epoch": 9.641078838174273, "grad_norm": 3.1971094608306885, "learning_rate": 1.9940064545873676e-06, "log_odds_chosen": 10.228924751281738, "log_odds_ratio": -0.00011588910274440423, "logits/chosen": -0.38264912366867065, "logits/rejected": -0.45704877376556396, "logps/chosen": -0.000311953917844221, "logps/rejected": -1.974640965461731, "loss": 0.4756, "nll_loss": 0.11888445168733597, "rewards/accuracies": 1.0, "rewards/chosen": -3.11953917844221e-05, "rewards/margins": 0.19743289053440094, "rewards/rejected": -0.19746409356594086, "step": 13941 }, { "epoch": 9.6417704011065, "grad_norm": 3.8516626358032227, "learning_rate": 1.9901644382972188e-06, "log_odds_chosen": 11.560503005981445, "log_odds_ratio": -1.5859755876590498e-05, "logits/chosen": -0.10931985825300217, "logits/rejected": -0.006788960192352533, "logps/chosen": -0.00013140823284629732, "logps/rejected": -2.5448997020721436, "loss": 0.3484, "nll_loss": 0.08708598464727402, "rewards/accuracies": 1.0, "rewards/chosen": -1.3140824194124434e-05, "rewards/margins": 0.2544768452644348, "rewards/rejected": -0.2544899582862854, "step": 13942 }, { "epoch": 9.642461964038727, "grad_norm": 3.120718240737915, "learning_rate": 1.9863224220070695e-06, "log_odds_chosen": 11.14166259765625, "log_odds_ratio": -5.944729127804749e-05, "logits/chosen": -0.23091326653957367, "logits/rejected": -0.37017300724983215, "logps/chosen": -0.00030458703986369073, "logps/rejected": -2.427988290786743, "loss": 0.3297, "nll_loss": 0.08242782205343246, "rewards/accuracies": 1.0, "rewards/chosen": -3.0458708351943642e-05, "rewards/margins": 0.24276837706565857, "rewards/rejected": -0.2427988052368164, "step": 13943 }, { "epoch": 9.643153526970954, "grad_norm": 3.2770447731018066, "learning_rate": 1.9824804057169203e-06, "log_odds_chosen": 10.39407730102539, "log_odds_ratio": -6.861618021503091e-05, "logits/chosen": -0.5030243992805481, "logits/rejected": -0.4254170358181, "logps/chosen": -0.0003515210119076073, "logps/rejected": -2.0133328437805176, "loss": 0.425, "nll_loss": 0.1062437891960144, "rewards/accuracies": 1.0, "rewards/chosen": -3.515210119076073e-05, "rewards/margins": 0.20129813253879547, "rewards/rejected": -0.20133328437805176, "step": 13944 }, { "epoch": 9.64384508990318, "grad_norm": 3.685124635696411, "learning_rate": 1.9786383894267714e-06, "log_odds_chosen": 11.571239471435547, "log_odds_ratio": -1.943923780345358e-05, "logits/chosen": -0.10035257786512375, "logits/rejected": -0.12497510015964508, "logps/chosen": -0.00018849805928766727, "logps/rejected": -2.77651309967041, "loss": 0.3319, "nll_loss": 0.08296985924243927, "rewards/accuracies": 1.0, "rewards/chosen": -1.8849805201170966e-05, "rewards/margins": 0.2776325047016144, "rewards/rejected": -0.2776513397693634, "step": 13945 }, { "epoch": 9.644536652835408, "grad_norm": 4.463271617889404, "learning_rate": 1.974796373136622e-06, "log_odds_chosen": 11.625158309936523, "log_odds_ratio": -1.385235464113066e-05, "logits/chosen": -0.5605673789978027, "logits/rejected": -0.5917690396308899, "logps/chosen": -0.00016648485325276852, "logps/rejected": -2.598931074142456, "loss": 0.5527, "nll_loss": 0.13818588852882385, "rewards/accuracies": 1.0, "rewards/chosen": -1.6648486052872613e-05, "rewards/margins": 0.25987643003463745, "rewards/rejected": -0.2598930895328522, "step": 13946 }, { "epoch": 9.645228215767634, "grad_norm": 4.580418109893799, "learning_rate": 1.970954356846473e-06, "log_odds_chosen": 10.304842948913574, "log_odds_ratio": -6.0571030189748853e-05, "logits/chosen": 0.23934561014175415, "logits/rejected": 0.1578591763973236, "logps/chosen": -0.0003283784899394959, "logps/rejected": -2.1980645656585693, "loss": 0.5329, "nll_loss": 0.1332293152809143, "rewards/accuracies": 1.0, "rewards/chosen": -3.283785190433264e-05, "rewards/margins": 0.21977362036705017, "rewards/rejected": -0.2198064625263214, "step": 13947 }, { "epoch": 9.645919778699861, "grad_norm": 2.5201478004455566, "learning_rate": 1.9671123405563237e-06, "log_odds_chosen": 10.01999282836914, "log_odds_ratio": -6.546937220264226e-05, "logits/chosen": -0.2158067226409912, "logits/rejected": -0.15728622674942017, "logps/chosen": -0.00015559874009341002, "logps/rejected": -1.4551308155059814, "loss": 0.2542, "nll_loss": 0.06354384869337082, "rewards/accuracies": 1.0, "rewards/chosen": -1.555987364554312e-05, "rewards/margins": 0.14549751579761505, "rewards/rejected": -0.14551308751106262, "step": 13948 }, { "epoch": 9.646611341632088, "grad_norm": 2.6810781955718994, "learning_rate": 1.9632703242661753e-06, "log_odds_chosen": 10.072636604309082, "log_odds_ratio": -0.0002688949170988053, "logits/chosen": -0.31802284717559814, "logits/rejected": -0.34241783618927, "logps/chosen": -0.00027778628282248974, "logps/rejected": -1.521261215209961, "loss": 0.2867, "nll_loss": 0.07165224850177765, "rewards/accuracies": 1.0, "rewards/chosen": -2.7778627554653212e-05, "rewards/margins": 0.15209835767745972, "rewards/rejected": -0.15212613344192505, "step": 13949 }, { "epoch": 9.647302904564315, "grad_norm": 3.1169934272766113, "learning_rate": 1.959428307976026e-06, "log_odds_chosen": 11.934995651245117, "log_odds_ratio": -8.18016087578144e-06, "logits/chosen": -0.14000201225280762, "logits/rejected": -0.21201661229133606, "logps/chosen": -5.927299935137853e-05, "logps/rejected": -2.194277286529541, "loss": 0.356, "nll_loss": 0.08899751305580139, "rewards/accuracies": 1.0, "rewards/chosen": -5.9273002079862636e-06, "rewards/margins": 0.21942180395126343, "rewards/rejected": -0.21942773461341858, "step": 13950 }, { "epoch": 9.647994467496542, "grad_norm": 3.84570574760437, "learning_rate": 1.9555862916858768e-06, "log_odds_chosen": 10.816938400268555, "log_odds_ratio": -0.00033466549939475954, "logits/chosen": 0.020725198090076447, "logits/rejected": -0.060677967965602875, "logps/chosen": -0.00038195756496861577, "logps/rejected": -2.639225721359253, "loss": 0.3431, "nll_loss": 0.0857374295592308, "rewards/accuracies": 1.0, "rewards/chosen": -3.819575431407429e-05, "rewards/margins": 0.26388436555862427, "rewards/rejected": -0.2639225721359253, "step": 13951 }, { "epoch": 9.648686030428768, "grad_norm": 3.859541177749634, "learning_rate": 1.951744275395728e-06, "log_odds_chosen": 12.209653854370117, "log_odds_ratio": -7.651913620065898e-06, "logits/chosen": -0.3404412865638733, "logits/rejected": -0.3990596532821655, "logps/chosen": -9.441153815714642e-05, "logps/rejected": -2.768007516860962, "loss": 0.5145, "nll_loss": 0.128626748919487, "rewards/accuracies": 1.0, "rewards/chosen": -9.441153451916762e-06, "rewards/margins": 0.2767913341522217, "rewards/rejected": -0.2768007516860962, "step": 13952 }, { "epoch": 9.649377593360995, "grad_norm": 5.899332046508789, "learning_rate": 1.9479022591055787e-06, "log_odds_chosen": 12.843746185302734, "log_odds_ratio": -2.0442003005882725e-05, "logits/chosen": -0.026304662227630615, "logits/rejected": -0.09566555917263031, "logps/chosen": -0.00028729261248372495, "logps/rejected": -4.177236557006836, "loss": 0.4666, "nll_loss": 0.11665841937065125, "rewards/accuracies": 1.0, "rewards/chosen": -2.8729262339766137e-05, "rewards/margins": 0.41769489645957947, "rewards/rejected": -0.4177236557006836, "step": 13953 }, { "epoch": 9.650069156293222, "grad_norm": 3.5560855865478516, "learning_rate": 1.9440602428154294e-06, "log_odds_chosen": 10.799585342407227, "log_odds_ratio": -6.642687367275357e-05, "logits/chosen": -0.3646691143512726, "logits/rejected": -0.4457007646560669, "logps/chosen": -0.00032388101681135595, "logps/rejected": -2.1229469776153564, "loss": 0.39, "nll_loss": 0.09749428927898407, "rewards/accuracies": 1.0, "rewards/chosen": -3.238810313632712e-05, "rewards/margins": 0.2122623324394226, "rewards/rejected": -0.21229471266269684, "step": 13954 }, { "epoch": 9.650760719225449, "grad_norm": 3.677841901779175, "learning_rate": 1.9402182265252806e-06, "log_odds_chosen": 11.822620391845703, "log_odds_ratio": -1.5757483197376132e-05, "logits/chosen": -0.7204559445381165, "logits/rejected": -0.6808112263679504, "logps/chosen": -0.00016307370970025659, "logps/rejected": -2.5352365970611572, "loss": 0.7112, "nll_loss": 0.1778022050857544, "rewards/accuracies": 1.0, "rewards/chosen": -1.630737097002566e-05, "rewards/margins": 0.2535073757171631, "rewards/rejected": -0.25352364778518677, "step": 13955 }, { "epoch": 9.651452282157676, "grad_norm": 4.258469104766846, "learning_rate": 1.9363762102351314e-06, "log_odds_chosen": 12.353096008300781, "log_odds_ratio": -6.92297635396244e-06, "logits/chosen": -0.13911697268486023, "logits/rejected": -0.2312823385000229, "logps/chosen": -6.1727077991236e-05, "logps/rejected": -2.645348072052002, "loss": 0.4578, "nll_loss": 0.1144505962729454, "rewards/accuracies": 1.0, "rewards/chosen": -6.172708253870951e-06, "rewards/margins": 0.26452863216400146, "rewards/rejected": -0.2645348310470581, "step": 13956 }, { "epoch": 9.652143845089903, "grad_norm": 2.751460313796997, "learning_rate": 1.9325341939449825e-06, "log_odds_chosen": 12.060667037963867, "log_odds_ratio": -1.2571328625199385e-05, "logits/chosen": -0.4585002660751343, "logits/rejected": -0.45212164521217346, "logps/chosen": -0.00021719810320064425, "logps/rejected": -2.7318403720855713, "loss": 0.3136, "nll_loss": 0.07839718461036682, "rewards/accuracies": 1.0, "rewards/chosen": -2.1719810320064425e-05, "rewards/margins": 0.273162305355072, "rewards/rejected": -0.27318403124809265, "step": 13957 }, { "epoch": 9.65283540802213, "grad_norm": 3.256422996520996, "learning_rate": 1.9286921776548333e-06, "log_odds_chosen": 11.365076065063477, "log_odds_ratio": -2.1767995349364355e-05, "logits/chosen": -0.08825594931840897, "logits/rejected": -0.23204001784324646, "logps/chosen": -0.00021763856057077646, "logps/rejected": -2.7843446731567383, "loss": 0.3723, "nll_loss": 0.09308039397001266, "rewards/accuracies": 1.0, "rewards/chosen": -2.1763855329481885e-05, "rewards/margins": 0.27841272950172424, "rewards/rejected": -0.27843448519706726, "step": 13958 }, { "epoch": 9.653526970954356, "grad_norm": 3.9554879665374756, "learning_rate": 1.9248501613646845e-06, "log_odds_chosen": 9.385603904724121, "log_odds_ratio": -0.000505188072565943, "logits/chosen": -0.32343584299087524, "logits/rejected": -0.19714388251304626, "logps/chosen": -0.0008970825583674014, "logps/rejected": -1.9236663579940796, "loss": 0.6538, "nll_loss": 0.16340425610542297, "rewards/accuracies": 1.0, "rewards/chosen": -8.970825001597404e-05, "rewards/margins": 0.19227692484855652, "rewards/rejected": -0.19236664474010468, "step": 13959 }, { "epoch": 9.654218533886583, "grad_norm": 5.494601249694824, "learning_rate": 1.921008145074535e-06, "log_odds_chosen": 10.197301864624023, "log_odds_ratio": -8.86005800566636e-05, "logits/chosen": -0.22364522516727448, "logits/rejected": -0.3473617732524872, "logps/chosen": -0.0006021163426339626, "logps/rejected": -1.9727119207382202, "loss": 0.5145, "nll_loss": 0.128618061542511, "rewards/accuracies": 1.0, "rewards/chosen": -6.021163790137507e-05, "rewards/margins": 0.19721098244190216, "rewards/rejected": -0.1972711980342865, "step": 13960 }, { "epoch": 9.65491009681881, "grad_norm": 4.0596466064453125, "learning_rate": 1.917166128784386e-06, "log_odds_chosen": 11.051063537597656, "log_odds_ratio": -0.0009821861749514937, "logits/chosen": -0.592627763748169, "logits/rejected": -0.6205758452415466, "logps/chosen": -0.0003839967539533973, "logps/rejected": -2.1661429405212402, "loss": 0.5071, "nll_loss": 0.12668871879577637, "rewards/accuracies": 1.0, "rewards/chosen": -3.8399673940148205e-05, "rewards/margins": 0.21657592058181763, "rewards/rejected": -0.21661432087421417, "step": 13961 }, { "epoch": 9.655601659751037, "grad_norm": 3.5549631118774414, "learning_rate": 1.913324112494237e-06, "log_odds_chosen": 11.564865112304688, "log_odds_ratio": -4.4520056690089405e-05, "logits/chosen": -0.10879924893379211, "logits/rejected": -0.18709474802017212, "logps/chosen": -0.0003732958575710654, "logps/rejected": -2.9977054595947266, "loss": 0.3898, "nll_loss": 0.09745587408542633, "rewards/accuracies": 1.0, "rewards/chosen": -3.732958430191502e-05, "rewards/margins": 0.2997332215309143, "rewards/rejected": -0.2997705638408661, "step": 13962 }, { "epoch": 9.656293222683264, "grad_norm": 2.9617433547973633, "learning_rate": 1.909482096204088e-06, "log_odds_chosen": 10.506338119506836, "log_odds_ratio": -6.779128307243809e-05, "logits/chosen": -0.7193613648414612, "logits/rejected": -0.7034804224967957, "logps/chosen": -0.00013443415809888393, "logps/rejected": -1.504642367362976, "loss": 0.3665, "nll_loss": 0.0916147232055664, "rewards/accuracies": 1.0, "rewards/chosen": -1.3443415809888393e-05, "rewards/margins": 0.15045081079006195, "rewards/rejected": -0.1504642516374588, "step": 13963 }, { "epoch": 9.65698478561549, "grad_norm": 3.313770294189453, "learning_rate": 1.9056400799139388e-06, "log_odds_chosen": 9.815048217773438, "log_odds_ratio": -0.00028208905132487416, "logits/chosen": -0.2507280111312866, "logits/rejected": -0.3469865322113037, "logps/chosen": -0.00044885973329655826, "logps/rejected": -1.6550946235656738, "loss": 0.313, "nll_loss": 0.07822908461093903, "rewards/accuracies": 1.0, "rewards/chosen": -4.488597551244311e-05, "rewards/margins": 0.1654645800590515, "rewards/rejected": -0.16550946235656738, "step": 13964 }, { "epoch": 9.657676348547717, "grad_norm": 2.9807019233703613, "learning_rate": 1.9017980636237896e-06, "log_odds_chosen": 10.997095108032227, "log_odds_ratio": -3.3784810511860996e-05, "logits/chosen": -0.08487127721309662, "logits/rejected": -0.13770827651023865, "logps/chosen": -0.00013816017599310726, "logps/rejected": -1.9608837366104126, "loss": 0.2567, "nll_loss": 0.06416875123977661, "rewards/accuracies": 1.0, "rewards/chosen": -1.3816017599310726e-05, "rewards/margins": 0.19607457518577576, "rewards/rejected": -0.19608840346336365, "step": 13965 }, { "epoch": 9.658367911479944, "grad_norm": 3.188283681869507, "learning_rate": 1.8979560473336408e-06, "log_odds_chosen": 12.547552108764648, "log_odds_ratio": -5.1370989240240306e-06, "logits/chosen": -0.4965924918651581, "logits/rejected": -0.517026960849762, "logps/chosen": -0.00013113873137626797, "logps/rejected": -2.892089366912842, "loss": 0.3937, "nll_loss": 0.09843617677688599, "rewards/accuracies": 1.0, "rewards/chosen": -1.3113873137626797e-05, "rewards/margins": 0.28919583559036255, "rewards/rejected": -0.28920894861221313, "step": 13966 }, { "epoch": 9.659059474412171, "grad_norm": 3.3707354068756104, "learning_rate": 1.8941140310434917e-06, "log_odds_chosen": 11.181507110595703, "log_odds_ratio": -6.762929842807353e-05, "logits/chosen": -0.5147676467895508, "logits/rejected": -0.505560040473938, "logps/chosen": -0.0001584067940711975, "logps/rejected": -2.233450174331665, "loss": 0.3739, "nll_loss": 0.09346611797809601, "rewards/accuracies": 1.0, "rewards/chosen": -1.5840680134715512e-05, "rewards/margins": 0.22332918643951416, "rewards/rejected": -0.22334501147270203, "step": 13967 }, { "epoch": 9.659751037344398, "grad_norm": 3.804871082305908, "learning_rate": 1.8902720147533425e-06, "log_odds_chosen": 10.533234596252441, "log_odds_ratio": -0.0006375403027050197, "logits/chosen": -0.37808045744895935, "logits/rejected": -0.4201991558074951, "logps/chosen": -0.0006506206700578332, "logps/rejected": -1.9480440616607666, "loss": 0.2891, "nll_loss": 0.07221576571464539, "rewards/accuracies": 1.0, "rewards/chosen": -6.50620786473155e-05, "rewards/margins": 0.19473935663700104, "rewards/rejected": -0.19480441510677338, "step": 13968 }, { "epoch": 9.660442600276625, "grad_norm": 4.535920143127441, "learning_rate": 1.8864299984631936e-06, "log_odds_chosen": 10.594648361206055, "log_odds_ratio": -4.910709321848117e-05, "logits/chosen": -0.28199848532676697, "logits/rejected": -0.3965606093406677, "logps/chosen": -0.00027819437673315406, "logps/rejected": -2.096233367919922, "loss": 0.5034, "nll_loss": 0.12584403157234192, "rewards/accuracies": 1.0, "rewards/chosen": -2.781944021990057e-05, "rewards/margins": 0.20959553122520447, "rewards/rejected": -0.2096233367919922, "step": 13969 }, { "epoch": 9.661134163208851, "grad_norm": 2.5646848678588867, "learning_rate": 1.8825879821730446e-06, "log_odds_chosen": 10.840084075927734, "log_odds_ratio": -5.749518459197134e-05, "logits/chosen": -0.622244119644165, "logits/rejected": -0.6063990592956543, "logps/chosen": -0.00022037388407625258, "logps/rejected": -2.235664129257202, "loss": 0.2875, "nll_loss": 0.07186561077833176, "rewards/accuracies": 1.0, "rewards/chosen": -2.20373894990189e-05, "rewards/margins": 0.22354437410831451, "rewards/rejected": -0.22356641292572021, "step": 13970 }, { "epoch": 9.661825726141078, "grad_norm": 5.075047492980957, "learning_rate": 1.8787459658828954e-06, "log_odds_chosen": 12.194637298583984, "log_odds_ratio": -2.2917814931133762e-05, "logits/chosen": -0.004379307851195335, "logits/rejected": -0.012477247044444084, "logps/chosen": -0.00013443827629089355, "logps/rejected": -3.050947427749634, "loss": 0.3824, "nll_loss": 0.09560485184192657, "rewards/accuracies": 1.0, "rewards/chosen": -1.3443827810988296e-05, "rewards/margins": 0.305081307888031, "rewards/rejected": -0.30509474873542786, "step": 13971 }, { "epoch": 9.662517289073305, "grad_norm": 3.017207384109497, "learning_rate": 1.8749039495927465e-06, "log_odds_chosen": 10.548418045043945, "log_odds_ratio": -5.83583750994876e-05, "logits/chosen": -0.512605607509613, "logits/rejected": -0.5638319253921509, "logps/chosen": -0.00021537914290092885, "logps/rejected": -2.041076898574829, "loss": 0.2392, "nll_loss": 0.059799715876579285, "rewards/accuracies": 1.0, "rewards/chosen": -2.1537915017688647e-05, "rewards/margins": 0.20408615469932556, "rewards/rejected": -0.20410770177841187, "step": 13972 }, { "epoch": 9.663208852005532, "grad_norm": 3.117672920227051, "learning_rate": 1.8710619333025973e-06, "log_odds_chosen": 12.20585823059082, "log_odds_ratio": -2.3021353626972996e-05, "logits/chosen": -0.308549165725708, "logits/rejected": -0.14046195149421692, "logps/chosen": -9.588651300873607e-05, "logps/rejected": -2.887348175048828, "loss": 0.2555, "nll_loss": 0.06386178731918335, "rewards/accuracies": 1.0, "rewards/chosen": -9.588651664671488e-06, "rewards/margins": 0.28872519731521606, "rewards/rejected": -0.2887347936630249, "step": 13973 }, { "epoch": 9.663900414937759, "grad_norm": 2.9554359912872314, "learning_rate": 1.8672199170124482e-06, "log_odds_chosen": 11.596789360046387, "log_odds_ratio": -1.0424893844174221e-05, "logits/chosen": -0.39971041679382324, "logits/rejected": -0.46779516339302063, "logps/chosen": -7.597896183142439e-05, "logps/rejected": -2.0296382904052734, "loss": 0.3011, "nll_loss": 0.07528509944677353, "rewards/accuracies": 1.0, "rewards/chosen": -7.597896001243498e-06, "rewards/margins": 0.2029562145471573, "rewards/rejected": -0.20296382904052734, "step": 13974 }, { "epoch": 9.664591977869986, "grad_norm": 3.62790846824646, "learning_rate": 1.863377900722299e-06, "log_odds_chosen": 12.152950286865234, "log_odds_ratio": -7.841685874154791e-05, "logits/chosen": -0.5727795362472534, "logits/rejected": -0.6163936853408813, "logps/chosen": -0.0001449670089641586, "logps/rejected": -3.1027400493621826, "loss": 0.5028, "nll_loss": 0.12568151950836182, "rewards/accuracies": 1.0, "rewards/chosen": -1.4496701624011621e-05, "rewards/margins": 0.31025952100753784, "rewards/rejected": -0.31027403473854065, "step": 13975 }, { "epoch": 9.665283540802212, "grad_norm": 3.7316770553588867, "learning_rate": 1.8595358844321502e-06, "log_odds_chosen": 10.631937980651855, "log_odds_ratio": -0.00010631579061737284, "logits/chosen": 0.005236908793449402, "logits/rejected": -0.029435843229293823, "logps/chosen": -0.00042771699372678995, "logps/rejected": -2.0606510639190674, "loss": 0.4573, "nll_loss": 0.1143130213022232, "rewards/accuracies": 1.0, "rewards/chosen": -4.277170228306204e-05, "rewards/margins": 0.20602233707904816, "rewards/rejected": -0.2060651183128357, "step": 13976 }, { "epoch": 9.66597510373444, "grad_norm": 5.226464748382568, "learning_rate": 1.855693868142001e-06, "log_odds_chosen": 12.1885986328125, "log_odds_ratio": -7.177217412390746e-06, "logits/chosen": -0.0014483742415904999, "logits/rejected": -0.04183807224035263, "logps/chosen": -0.00012206320388941094, "logps/rejected": -2.8245863914489746, "loss": 0.5347, "nll_loss": 0.1336742490530014, "rewards/accuracies": 1.0, "rewards/chosen": -1.2206321116536856e-05, "rewards/margins": 0.28244641423225403, "rewards/rejected": -0.28245866298675537, "step": 13977 }, { "epoch": 9.666666666666666, "grad_norm": 3.0053915977478027, "learning_rate": 1.8518518518518519e-06, "log_odds_chosen": 10.44801139831543, "log_odds_ratio": -9.702295938041061e-05, "logits/chosen": -0.615515410900116, "logits/rejected": -0.634098470211029, "logps/chosen": -0.0006284684641286731, "logps/rejected": -2.4199600219726562, "loss": 0.3143, "nll_loss": 0.07857717573642731, "rewards/accuracies": 1.0, "rewards/chosen": -6.284684059210122e-05, "rewards/margins": 0.2419331669807434, "rewards/rejected": -0.24199600517749786, "step": 13978 }, { "epoch": 9.667358229598893, "grad_norm": 3.3845856189727783, "learning_rate": 1.848009835561703e-06, "log_odds_chosen": 12.12214469909668, "log_odds_ratio": -2.5276207452407107e-05, "logits/chosen": -0.30605417490005493, "logits/rejected": -0.2163485586643219, "logps/chosen": -0.00045134034007787704, "logps/rejected": -3.1922848224639893, "loss": 0.3106, "nll_loss": 0.07765016704797745, "rewards/accuracies": 1.0, "rewards/chosen": -4.513404201134108e-05, "rewards/margins": 0.3191833198070526, "rewards/rejected": -0.31922847032546997, "step": 13979 }, { "epoch": 9.66804979253112, "grad_norm": 2.010406017303467, "learning_rate": 1.8441678192715538e-06, "log_odds_chosen": 10.314872741699219, "log_odds_ratio": -6.38093042653054e-05, "logits/chosen": -0.4727852940559387, "logits/rejected": -0.4534512758255005, "logps/chosen": -0.000576147111132741, "logps/rejected": -1.9994373321533203, "loss": 0.2105, "nll_loss": 0.052621982991695404, "rewards/accuracies": 1.0, "rewards/chosen": -5.761471766163595e-05, "rewards/margins": 0.19988611340522766, "rewards/rejected": -0.19994375109672546, "step": 13980 }, { "epoch": 9.668741355463347, "grad_norm": 3.694669008255005, "learning_rate": 1.8403258029814048e-06, "log_odds_chosen": 10.413873672485352, "log_odds_ratio": -9.337106894236058e-05, "logits/chosen": -0.06772250682115555, "logits/rejected": -0.3364441394805908, "logps/chosen": -0.0002473013009876013, "logps/rejected": -2.0732505321502686, "loss": 0.5063, "nll_loss": 0.12656481564044952, "rewards/accuracies": 1.0, "rewards/chosen": -2.473013046255801e-05, "rewards/margins": 0.2073003202676773, "rewards/rejected": -0.2073250561952591, "step": 13981 }, { "epoch": 9.669432918395573, "grad_norm": 5.21959924697876, "learning_rate": 1.8364837866912555e-06, "log_odds_chosen": 11.933232307434082, "log_odds_ratio": -1.4127767826721538e-05, "logits/chosen": 0.22804135084152222, "logits/rejected": 0.21710431575775146, "logps/chosen": -0.0001345960918115452, "logps/rejected": -2.759626865386963, "loss": 0.5374, "nll_loss": 0.13435040414333344, "rewards/accuracies": 1.0, "rewards/chosen": -1.345960936305346e-05, "rewards/margins": 0.27594923973083496, "rewards/rejected": -0.2759627103805542, "step": 13982 }, { "epoch": 9.6701244813278, "grad_norm": 2.4474329948425293, "learning_rate": 1.8326417704011067e-06, "log_odds_chosen": 10.466503143310547, "log_odds_ratio": -0.00013020877668168396, "logits/chosen": -0.09109814465045929, "logits/rejected": -0.18959535658359528, "logps/chosen": -0.00045913312351331115, "logps/rejected": -2.278885841369629, "loss": 0.2638, "nll_loss": 0.06593704223632812, "rewards/accuracies": 1.0, "rewards/chosen": -4.591331526171416e-05, "rewards/margins": 0.22784265875816345, "rewards/rejected": -0.22788859903812408, "step": 13983 }, { "epoch": 9.670816044260027, "grad_norm": 4.067322731018066, "learning_rate": 1.8287997541109574e-06, "log_odds_chosen": 12.389582633972168, "log_odds_ratio": -3.250035297241993e-05, "logits/chosen": -0.2228613793849945, "logits/rejected": -0.293897807598114, "logps/chosen": -0.00021752913016825914, "logps/rejected": -3.7773804664611816, "loss": 0.2975, "nll_loss": 0.07436374574899673, "rewards/accuracies": 1.0, "rewards/chosen": -2.175291592720896e-05, "rewards/margins": 0.3777163028717041, "rewards/rejected": -0.3777380585670471, "step": 13984 }, { "epoch": 9.671507607192254, "grad_norm": 2.84019136428833, "learning_rate": 1.8249577378208084e-06, "log_odds_chosen": 11.020377159118652, "log_odds_ratio": -0.00011550134513527155, "logits/chosen": -0.524024248123169, "logits/rejected": -0.569028377532959, "logps/chosen": -0.0003480208106338978, "logps/rejected": -2.0050735473632812, "loss": 0.2781, "nll_loss": 0.06951781362295151, "rewards/accuracies": 1.0, "rewards/chosen": -3.4802083973772824e-05, "rewards/margins": 0.20047254860401154, "rewards/rejected": -0.20050735771656036, "step": 13985 }, { "epoch": 9.67219917012448, "grad_norm": 3.9230411052703857, "learning_rate": 1.8211157215306596e-06, "log_odds_chosen": 11.75917911529541, "log_odds_ratio": -1.6298314221785404e-05, "logits/chosen": -0.01063506118953228, "logits/rejected": -0.06654591858386993, "logps/chosen": -0.00010372167162131518, "logps/rejected": -2.515432834625244, "loss": 0.5497, "nll_loss": 0.1374278962612152, "rewards/accuracies": 1.0, "rewards/chosen": -1.037216770782834e-05, "rewards/margins": 0.2515329122543335, "rewards/rejected": -0.2515432834625244, "step": 13986 }, { "epoch": 9.672890733056708, "grad_norm": 3.6679883003234863, "learning_rate": 1.8172737052405103e-06, "log_odds_chosen": 11.404784202575684, "log_odds_ratio": -5.806395711260848e-05, "logits/chosen": -0.5198478698730469, "logits/rejected": -0.5719497799873352, "logps/chosen": -0.0005329885752871633, "logps/rejected": -3.326045036315918, "loss": 0.3997, "nll_loss": 0.09991982579231262, "rewards/accuracies": 1.0, "rewards/chosen": -5.3298852435545996e-05, "rewards/margins": 0.3325512409210205, "rewards/rejected": -0.3326045274734497, "step": 13987 }, { "epoch": 9.673582295988934, "grad_norm": 3.1682469844818115, "learning_rate": 1.813431688950361e-06, "log_odds_chosen": 11.663154602050781, "log_odds_ratio": -0.0005123711889609694, "logits/chosen": -0.014620475471019745, "logits/rejected": -0.15799535810947418, "logps/chosen": -0.0006218062480911613, "logps/rejected": -3.092310667037964, "loss": 0.2362, "nll_loss": 0.059004656970500946, "rewards/accuracies": 1.0, "rewards/chosen": -6.218061753315851e-05, "rewards/margins": 0.30916887521743774, "rewards/rejected": -0.30923107266426086, "step": 13988 }, { "epoch": 9.674273858921161, "grad_norm": 2.75168776512146, "learning_rate": 1.8095896726602122e-06, "log_odds_chosen": 11.537948608398438, "log_odds_ratio": -2.661232247191947e-05, "logits/chosen": -0.8945484161376953, "logits/rejected": -0.7763729095458984, "logps/chosen": -6.327818118734285e-05, "logps/rejected": -1.9054579734802246, "loss": 0.3813, "nll_loss": 0.09531690180301666, "rewards/accuracies": 1.0, "rewards/chosen": -6.327818482532166e-06, "rewards/margins": 0.19053946435451508, "rewards/rejected": -0.19054579734802246, "step": 13989 }, { "epoch": 9.674965421853388, "grad_norm": 3.947296142578125, "learning_rate": 1.8057476563700632e-06, "log_odds_chosen": 11.823331832885742, "log_odds_ratio": -3.6443863791646436e-05, "logits/chosen": -0.24902662634849548, "logits/rejected": -0.2904735505580902, "logps/chosen": -0.00020042044343426824, "logps/rejected": -2.9156477451324463, "loss": 0.4597, "nll_loss": 0.11491231620311737, "rewards/accuracies": 1.0, "rewards/chosen": -2.004204725380987e-05, "rewards/margins": 0.29154473543167114, "rewards/rejected": -0.29156479239463806, "step": 13990 }, { "epoch": 9.675656984785615, "grad_norm": 2.841975450515747, "learning_rate": 1.801905640079914e-06, "log_odds_chosen": 11.474272727966309, "log_odds_ratio": -5.509784023161046e-05, "logits/chosen": -0.17017102241516113, "logits/rejected": -0.17466890811920166, "logps/chosen": -0.00014371874567586929, "logps/rejected": -2.071979522705078, "loss": 0.3136, "nll_loss": 0.07839880883693695, "rewards/accuracies": 1.0, "rewards/chosen": -1.4371873476193286e-05, "rewards/margins": 0.2071835696697235, "rewards/rejected": -0.20719794929027557, "step": 13991 }, { "epoch": 9.676348547717842, "grad_norm": 3.145448684692383, "learning_rate": 1.798063623789765e-06, "log_odds_chosen": 11.523065567016602, "log_odds_ratio": -1.7595630197320133e-05, "logits/chosen": -0.3231140971183777, "logits/rejected": -0.3559204936027527, "logps/chosen": -0.00013811652024742216, "logps/rejected": -2.501310348510742, "loss": 0.2778, "nll_loss": 0.0694592297077179, "rewards/accuracies": 1.0, "rewards/chosen": -1.3811652024742216e-05, "rewards/margins": 0.2501172423362732, "rewards/rejected": -0.2501310408115387, "step": 13992 }, { "epoch": 9.677040110650069, "grad_norm": 3.7536604404449463, "learning_rate": 1.794221607499616e-06, "log_odds_chosen": 11.987735748291016, "log_odds_ratio": -1.530145527794957e-05, "logits/chosen": -0.35948991775512695, "logits/rejected": -0.2973094582557678, "logps/chosen": -0.00017380458302795887, "logps/rejected": -2.8969004154205322, "loss": 0.3402, "nll_loss": 0.08504220843315125, "rewards/accuracies": 1.0, "rewards/chosen": -1.738046012178529e-05, "rewards/margins": 0.2896726429462433, "rewards/rejected": -0.2896900475025177, "step": 13993 }, { "epoch": 9.677731673582295, "grad_norm": 3.7913312911987305, "learning_rate": 1.7903795912094668e-06, "log_odds_chosen": 10.949464797973633, "log_odds_ratio": -6.61562880850397e-05, "logits/chosen": -0.39711904525756836, "logits/rejected": -0.4672422409057617, "logps/chosen": -0.000134202215122059, "logps/rejected": -1.9377895593643188, "loss": 0.3026, "nll_loss": 0.07563167065382004, "rewards/accuracies": 1.0, "rewards/chosen": -1.3420220057014376e-05, "rewards/margins": 0.1937655359506607, "rewards/rejected": -0.19377896189689636, "step": 13994 }, { "epoch": 9.678423236514522, "grad_norm": 2.5605897903442383, "learning_rate": 1.7865375749193176e-06, "log_odds_chosen": 10.683719635009766, "log_odds_ratio": -0.00015510033699683845, "logits/chosen": 0.04777078330516815, "logits/rejected": -0.020088866353034973, "logps/chosen": -0.0005237612058408558, "logps/rejected": -2.3416595458984375, "loss": 0.2956, "nll_loss": 0.0738738402724266, "rewards/accuracies": 1.0, "rewards/chosen": -5.237612276687287e-05, "rewards/margins": 0.23411358892917633, "rewards/rejected": -0.2341659516096115, "step": 13995 }, { "epoch": 9.679114799446749, "grad_norm": 2.956864595413208, "learning_rate": 1.7826955586291687e-06, "log_odds_chosen": 10.05513858795166, "log_odds_ratio": -0.0002707884996198118, "logits/chosen": -0.1300402730703354, "logits/rejected": -0.1567571759223938, "logps/chosen": -0.00043549295514822006, "logps/rejected": -1.9624979496002197, "loss": 0.2858, "nll_loss": 0.0714205801486969, "rewards/accuracies": 1.0, "rewards/chosen": -4.3549294787226245e-05, "rewards/margins": 0.1962062418460846, "rewards/rejected": -0.1962497979402542, "step": 13996 }, { "epoch": 9.679806362378976, "grad_norm": 3.7831461429595947, "learning_rate": 1.7788535423390197e-06, "log_odds_chosen": 10.855228424072266, "log_odds_ratio": -5.6292657973244786e-05, "logits/chosen": -0.22141483426094055, "logits/rejected": -0.22882868349552155, "logps/chosen": -0.0003413744561839849, "logps/rejected": -2.0822219848632812, "loss": 0.4089, "nll_loss": 0.10222725570201874, "rewards/accuracies": 1.0, "rewards/chosen": -3.41374434356112e-05, "rewards/margins": 0.20818807184696198, "rewards/rejected": -0.20822221040725708, "step": 13997 }, { "epoch": 9.680497925311203, "grad_norm": 3.2380263805389404, "learning_rate": 1.7750115260488705e-06, "log_odds_chosen": 11.399898529052734, "log_odds_ratio": -2.5035338694578968e-05, "logits/chosen": -0.20180514454841614, "logits/rejected": -0.18763187527656555, "logps/chosen": -0.00019273949146736413, "logps/rejected": -2.396219253540039, "loss": 0.3219, "nll_loss": 0.08048289269208908, "rewards/accuracies": 1.0, "rewards/chosen": -1.9273949874332175e-05, "rewards/margins": 0.23960265517234802, "rewards/rejected": -0.23962193727493286, "step": 13998 }, { "epoch": 9.68118948824343, "grad_norm": 2.449667453765869, "learning_rate": 1.7711695097587212e-06, "log_odds_chosen": 9.883462905883789, "log_odds_ratio": -0.0003530489047989249, "logits/chosen": -0.47197049856185913, "logits/rejected": -0.38646137714385986, "logps/chosen": -0.0005334184388630092, "logps/rejected": -1.6608302593231201, "loss": 0.303, "nll_loss": 0.07572139799594879, "rewards/accuracies": 1.0, "rewards/chosen": -5.33418424311094e-05, "rewards/margins": 0.1660296767950058, "rewards/rejected": -0.16608302295207977, "step": 13999 }, { "epoch": 9.681881051175656, "grad_norm": 2.9047279357910156, "learning_rate": 1.7673274934685724e-06, "log_odds_chosen": 10.272042274475098, "log_odds_ratio": -0.015053609386086464, "logits/chosen": -0.09649567306041718, "logits/rejected": -0.07403193414211273, "logps/chosen": -0.004163277801126242, "logps/rejected": -1.9767775535583496, "loss": 0.2624, "nll_loss": 0.06408600509166718, "rewards/accuracies": 1.0, "rewards/chosen": -0.0004163277626503259, "rewards/margins": 0.19726142287254333, "rewards/rejected": -0.19767774641513824, "step": 14000 }, { "epoch": 9.682572614107883, "grad_norm": 2.8445565700531006, "learning_rate": 1.7634854771784233e-06, "log_odds_chosen": 11.194666862487793, "log_odds_ratio": -7.283731974894181e-05, "logits/chosen": -0.1809304803609848, "logits/rejected": -0.24531452357769012, "logps/chosen": -0.00035821396159008145, "logps/rejected": -2.685930013656616, "loss": 0.3867, "nll_loss": 0.09666649252176285, "rewards/accuracies": 1.0, "rewards/chosen": -3.582139470381662e-05, "rewards/margins": 0.26855719089508057, "rewards/rejected": -0.2685930132865906, "step": 14001 }, { "epoch": 9.68326417704011, "grad_norm": 3.799746513366699, "learning_rate": 1.759643460888274e-06, "log_odds_chosen": 11.528482437133789, "log_odds_ratio": -3.163529618177563e-05, "logits/chosen": -0.36452963948249817, "logits/rejected": -0.4858834743499756, "logps/chosen": -7.771144737489522e-05, "logps/rejected": -1.7862298488616943, "loss": 0.4304, "nll_loss": 0.10759089887142181, "rewards/accuracies": 1.0, "rewards/chosen": -7.771145646984223e-06, "rewards/margins": 0.1786152422428131, "rewards/rejected": -0.1786229908466339, "step": 14002 }, { "epoch": 9.683955739972337, "grad_norm": 3.057624101638794, "learning_rate": 1.7558014445981253e-06, "log_odds_chosen": 8.92990779876709, "log_odds_ratio": -0.0007533504394814372, "logits/chosen": -0.5203484296798706, "logits/rejected": -0.4924953579902649, "logps/chosen": -0.000799480068963021, "logps/rejected": -1.1398868560791016, "loss": 0.2708, "nll_loss": 0.06761455535888672, "rewards/accuracies": 1.0, "rewards/chosen": -7.994801126187667e-05, "rewards/margins": 0.11390873789787292, "rewards/rejected": -0.11398869752883911, "step": 14003 }, { "epoch": 9.684647302904564, "grad_norm": 3.033080577850342, "learning_rate": 1.7519594283079762e-06, "log_odds_chosen": 10.711329460144043, "log_odds_ratio": -4.0234081097878516e-05, "logits/chosen": 0.03341685235500336, "logits/rejected": 0.015133783221244812, "logps/chosen": -0.00013347969797905535, "logps/rejected": -1.9615447521209717, "loss": 0.3171, "nll_loss": 0.07927147299051285, "rewards/accuracies": 1.0, "rewards/chosen": -1.3347968888410833e-05, "rewards/margins": 0.19614112377166748, "rewards/rejected": -0.19615447521209717, "step": 14004 }, { "epoch": 9.68533886583679, "grad_norm": 2.9966540336608887, "learning_rate": 1.748117412017827e-06, "log_odds_chosen": 11.109415054321289, "log_odds_ratio": -0.00030459227855317295, "logits/chosen": -0.2702929973602295, "logits/rejected": -0.3118366301059723, "logps/chosen": -0.0005579779390245676, "logps/rejected": -2.5292561054229736, "loss": 0.2598, "nll_loss": 0.06492118537425995, "rewards/accuracies": 1.0, "rewards/chosen": -5.579779826803133e-05, "rewards/margins": 0.2528698444366455, "rewards/rejected": -0.2529256343841553, "step": 14005 }, { "epoch": 9.686030428769017, "grad_norm": 4.1271209716796875, "learning_rate": 1.7442753957276781e-06, "log_odds_chosen": 10.542098999023438, "log_odds_ratio": -0.00016738972044549882, "logits/chosen": -0.23534046113491058, "logits/rejected": -0.4081006348133087, "logps/chosen": -0.00025860758614726365, "logps/rejected": -1.8603882789611816, "loss": 0.3008, "nll_loss": 0.07518626004457474, "rewards/accuracies": 1.0, "rewards/chosen": -2.5860757887130603e-05, "rewards/margins": 0.18601296842098236, "rewards/rejected": -0.18603883683681488, "step": 14006 }, { "epoch": 9.686721991701244, "grad_norm": 4.879711627960205, "learning_rate": 1.7404333794375289e-06, "log_odds_chosen": 12.63100814819336, "log_odds_ratio": -2.0368574041640386e-05, "logits/chosen": -0.2950138449668884, "logits/rejected": -0.385187566280365, "logps/chosen": -0.00016288757615257055, "logps/rejected": -3.399386167526245, "loss": 0.4562, "nll_loss": 0.1140429824590683, "rewards/accuracies": 1.0, "rewards/chosen": -1.6288757251459174e-05, "rewards/margins": 0.33992233872413635, "rewards/rejected": -0.3399386405944824, "step": 14007 }, { "epoch": 9.687413554633471, "grad_norm": 3.5236964225769043, "learning_rate": 1.7365913631473799e-06, "log_odds_chosen": 10.040060997009277, "log_odds_ratio": -0.00023548353055957705, "logits/chosen": -0.4871000051498413, "logits/rejected": -0.46208345890045166, "logps/chosen": -0.00017160980496555567, "logps/rejected": -1.4598802328109741, "loss": 0.5502, "nll_loss": 0.13752034306526184, "rewards/accuracies": 1.0, "rewards/chosen": -1.7160980860353447e-05, "rewards/margins": 0.14597088098526, "rewards/rejected": -0.14598803222179413, "step": 14008 }, { "epoch": 9.688105117565698, "grad_norm": 3.6535000801086426, "learning_rate": 1.7327493468572306e-06, "log_odds_chosen": 9.759282112121582, "log_odds_ratio": -0.00044369022361934185, "logits/chosen": -0.16132892668247223, "logits/rejected": -0.22028201818466187, "logps/chosen": -0.0008336616447195411, "logps/rejected": -1.7865080833435059, "loss": 0.4812, "nll_loss": 0.12024475634098053, "rewards/accuracies": 1.0, "rewards/chosen": -8.336616156157106e-05, "rewards/margins": 0.17856745421886444, "rewards/rejected": -0.17865081131458282, "step": 14009 }, { "epoch": 9.688796680497925, "grad_norm": 4.613658905029297, "learning_rate": 1.7289073305670818e-06, "log_odds_chosen": 11.295829772949219, "log_odds_ratio": -0.00011873643234139308, "logits/chosen": 0.01779123581945896, "logits/rejected": -0.0042826710268855095, "logps/chosen": -0.0004925333778373897, "logps/rejected": -2.9457249641418457, "loss": 0.5291, "nll_loss": 0.13225911557674408, "rewards/accuracies": 1.0, "rewards/chosen": -4.9253339966526255e-05, "rewards/margins": 0.2945232689380646, "rewards/rejected": -0.29457250237464905, "step": 14010 }, { "epoch": 9.689488243430151, "grad_norm": 3.4785525798797607, "learning_rate": 1.7250653142769325e-06, "log_odds_chosen": 11.194210052490234, "log_odds_ratio": -0.00019617436919361353, "logits/chosen": -0.5711617469787598, "logits/rejected": -0.6833369731903076, "logps/chosen": -0.0003467965289019048, "logps/rejected": -2.1996796131134033, "loss": 0.4503, "nll_loss": 0.11255437135696411, "rewards/accuracies": 1.0, "rewards/chosen": -3.467965143499896e-05, "rewards/margins": 0.21993330121040344, "rewards/rejected": -0.21996797621250153, "step": 14011 }, { "epoch": 9.690179806362378, "grad_norm": 3.3688061237335205, "learning_rate": 1.7212232979867835e-06, "log_odds_chosen": 10.838143348693848, "log_odds_ratio": -2.946642052847892e-05, "logits/chosen": 0.006400890648365021, "logits/rejected": 0.06212189793586731, "logps/chosen": -0.0001350159727735445, "logps/rejected": -1.9210996627807617, "loss": 0.3849, "nll_loss": 0.09623268991708755, "rewards/accuracies": 1.0, "rewards/chosen": -1.3501597095455509e-05, "rewards/margins": 0.19209645688533783, "rewards/rejected": -0.19210997223854065, "step": 14012 }, { "epoch": 9.690871369294605, "grad_norm": 4.070370674133301, "learning_rate": 1.7173812816966347e-06, "log_odds_chosen": 10.671753883361816, "log_odds_ratio": -0.00023755105212330818, "logits/chosen": 0.0998513251543045, "logits/rejected": 0.1142597496509552, "logps/chosen": -0.0007802036125212908, "logps/rejected": -2.0398645401000977, "loss": 0.3847, "nll_loss": 0.09615591168403625, "rewards/accuracies": 1.0, "rewards/chosen": -7.80203627073206e-05, "rewards/margins": 0.20390844345092773, "rewards/rejected": -0.20398646593093872, "step": 14013 }, { "epoch": 9.691562932226832, "grad_norm": 3.36007022857666, "learning_rate": 1.7135392654064854e-06, "log_odds_chosen": 12.014041900634766, "log_odds_ratio": -9.326961844635662e-06, "logits/chosen": -0.37761032581329346, "logits/rejected": -0.6232584118843079, "logps/chosen": -0.00013342205784283578, "logps/rejected": -2.4795708656311035, "loss": 0.3714, "nll_loss": 0.0928611233830452, "rewards/accuracies": 1.0, "rewards/chosen": -1.33422063299804e-05, "rewards/margins": 0.24794375896453857, "rewards/rejected": -0.24795711040496826, "step": 14014 }, { "epoch": 9.692254495159059, "grad_norm": 3.4856786727905273, "learning_rate": 1.7096972491163364e-06, "log_odds_chosen": 12.255451202392578, "log_odds_ratio": -7.380766874121036e-06, "logits/chosen": -0.35279661417007446, "logits/rejected": -0.3281911015510559, "logps/chosen": -0.00017538013344164938, "logps/rejected": -3.205848217010498, "loss": 0.4235, "nll_loss": 0.10587650537490845, "rewards/accuracies": 1.0, "rewards/chosen": -1.753801370796282e-05, "rewards/margins": 0.3205672800540924, "rewards/rejected": -0.320584774017334, "step": 14015 }, { "epoch": 9.692946058091286, "grad_norm": 2.7672152519226074, "learning_rate": 1.7058552328261871e-06, "log_odds_chosen": 12.378790855407715, "log_odds_ratio": -9.387677710037678e-05, "logits/chosen": -0.11450943350791931, "logits/rejected": -0.2026877999305725, "logps/chosen": -0.00015812195488251746, "logps/rejected": -3.1105387210845947, "loss": 0.2959, "nll_loss": 0.07396487146615982, "rewards/accuracies": 1.0, "rewards/chosen": -1.5812194760655984e-05, "rewards/margins": 0.311038076877594, "rewards/rejected": -0.3110538721084595, "step": 14016 }, { "epoch": 9.693637621023512, "grad_norm": 2.6869497299194336, "learning_rate": 1.7020132165360383e-06, "log_odds_chosen": 10.067146301269531, "log_odds_ratio": -0.00024698564084246755, "logits/chosen": -0.6013608574867249, "logits/rejected": -0.6596066951751709, "logps/chosen": -0.00031246000435203314, "logps/rejected": -1.901986002922058, "loss": 0.2878, "nll_loss": 0.07193221896886826, "rewards/accuracies": 1.0, "rewards/chosen": -3.124599970760755e-05, "rewards/margins": 0.1901673525571823, "rewards/rejected": -0.190198615193367, "step": 14017 }, { "epoch": 9.69432918395574, "grad_norm": 4.2703094482421875, "learning_rate": 1.698171200245889e-06, "log_odds_chosen": 9.801267623901367, "log_odds_ratio": -0.0002336654142709449, "logits/chosen": -0.11382442712783813, "logits/rejected": -0.18640612065792084, "logps/chosen": -0.0006799734546802938, "logps/rejected": -1.7594125270843506, "loss": 0.444, "nll_loss": 0.11097525805234909, "rewards/accuracies": 1.0, "rewards/chosen": -6.7997352743987e-05, "rewards/margins": 0.1758732795715332, "rewards/rejected": -0.17594125866889954, "step": 14018 }, { "epoch": 9.695020746887966, "grad_norm": 3.3787882328033447, "learning_rate": 1.69432918395574e-06, "log_odds_chosen": 11.508310317993164, "log_odds_ratio": -7.473472214769572e-05, "logits/chosen": -0.5702272653579712, "logits/rejected": -0.5165907144546509, "logps/chosen": -0.0003059214213863015, "logps/rejected": -2.46089243888855, "loss": 0.3218, "nll_loss": 0.08045011758804321, "rewards/accuracies": 1.0, "rewards/chosen": -3.059214213863015e-05, "rewards/margins": 0.2460586428642273, "rewards/rejected": -0.24608924984931946, "step": 14019 }, { "epoch": 9.695712309820193, "grad_norm": 3.9944427013397217, "learning_rate": 1.6904871676655912e-06, "log_odds_chosen": 10.361598014831543, "log_odds_ratio": -0.0032728288788348436, "logits/chosen": 0.045859482139348984, "logits/rejected": 0.14110252261161804, "logps/chosen": -0.023929793387651443, "logps/rejected": -2.801180839538574, "loss": 0.3426, "nll_loss": 0.0853225439786911, "rewards/accuracies": 1.0, "rewards/chosen": -0.0023929793387651443, "rewards/margins": 0.27772510051727295, "rewards/rejected": -0.28011807799339294, "step": 14020 }, { "epoch": 9.69640387275242, "grad_norm": 3.809525728225708, "learning_rate": 1.686645151375442e-06, "log_odds_chosen": 13.034082412719727, "log_odds_ratio": -7.79420315666357e-06, "logits/chosen": 0.15093648433685303, "logits/rejected": 0.14870363473892212, "logps/chosen": -0.00018803677812684327, "logps/rejected": -3.9701337814331055, "loss": 0.427, "nll_loss": 0.1067470982670784, "rewards/accuracies": 1.0, "rewards/chosen": -1.8803677448886447e-05, "rewards/margins": 0.39699459075927734, "rewards/rejected": -0.3970133662223816, "step": 14021 }, { "epoch": 9.697095435684647, "grad_norm": 3.7040340900421143, "learning_rate": 1.6828031350852927e-06, "log_odds_chosen": 10.49951171875, "log_odds_ratio": -0.00019893118587788194, "logits/chosen": -0.5705597400665283, "logits/rejected": -0.540901780128479, "logps/chosen": -0.00047733585233800113, "logps/rejected": -1.9048418998718262, "loss": 0.3755, "nll_loss": 0.09386002272367477, "rewards/accuracies": 1.0, "rewards/chosen": -4.773358887177892e-05, "rewards/margins": 0.190436452627182, "rewards/rejected": -0.1904841959476471, "step": 14022 }, { "epoch": 9.697786998616873, "grad_norm": 3.799828290939331, "learning_rate": 1.6789611187951438e-06, "log_odds_chosen": 11.155202865600586, "log_odds_ratio": -7.58213282097131e-05, "logits/chosen": -0.449858158826828, "logits/rejected": -0.48318931460380554, "logps/chosen": -0.00021174876019358635, "logps/rejected": -1.8886882066726685, "loss": 0.3028, "nll_loss": 0.07569512724876404, "rewards/accuracies": 1.0, "rewards/chosen": -2.1174875655560754e-05, "rewards/margins": 0.1888476461172104, "rewards/rejected": -0.18886882066726685, "step": 14023 }, { "epoch": 9.6984785615491, "grad_norm": 3.440284490585327, "learning_rate": 1.6751191025049948e-06, "log_odds_chosen": 10.947023391723633, "log_odds_ratio": -7.497541810153052e-05, "logits/chosen": -0.039105042815208435, "logits/rejected": -0.09824025630950928, "logps/chosen": -0.0003042893949896097, "logps/rejected": -2.4186761379241943, "loss": 0.2919, "nll_loss": 0.0729638859629631, "rewards/accuracies": 1.0, "rewards/chosen": -3.0428940590354614e-05, "rewards/margins": 0.24183718860149384, "rewards/rejected": -0.24186763167381287, "step": 14024 }, { "epoch": 9.699170124481327, "grad_norm": 4.413259983062744, "learning_rate": 1.6712770862148456e-06, "log_odds_chosen": 10.910194396972656, "log_odds_ratio": -0.0002598028222564608, "logits/chosen": 0.010938696563243866, "logits/rejected": 0.030111849308013916, "logps/chosen": -0.00035252771340310574, "logps/rejected": -2.6153974533081055, "loss": 0.3088, "nll_loss": 0.07716973125934601, "rewards/accuracies": 1.0, "rewards/chosen": -3.5252771340310574e-05, "rewards/margins": 0.26150450110435486, "rewards/rejected": -0.2615397572517395, "step": 14025 }, { "epoch": 9.699861687413554, "grad_norm": 2.3560967445373535, "learning_rate": 1.6674350699246965e-06, "log_odds_chosen": 11.574258804321289, "log_odds_ratio": -5.458891610032879e-05, "logits/chosen": -0.3051443099975586, "logits/rejected": -0.31580889225006104, "logps/chosen": -0.00020766808302141726, "logps/rejected": -2.629638910293579, "loss": 0.2865, "nll_loss": 0.07160836458206177, "rewards/accuracies": 1.0, "rewards/chosen": -2.0766809029737487e-05, "rewards/margins": 0.2629431188106537, "rewards/rejected": -0.2629638910293579, "step": 14026 }, { "epoch": 9.70055325034578, "grad_norm": 2.802356481552124, "learning_rate": 1.6635930536345477e-06, "log_odds_chosen": 10.46877384185791, "log_odds_ratio": -6.196806498337537e-05, "logits/chosen": -0.28871241211891174, "logits/rejected": -0.23788052797317505, "logps/chosen": -0.00011537078535184264, "logps/rejected": -1.5588353872299194, "loss": 0.2662, "nll_loss": 0.06653650850057602, "rewards/accuracies": 1.0, "rewards/chosen": -1.1537079444678966e-05, "rewards/margins": 0.15587201714515686, "rewards/rejected": -0.1558835655450821, "step": 14027 }, { "epoch": 9.701244813278008, "grad_norm": 2.6260311603546143, "learning_rate": 1.6597510373443984e-06, "log_odds_chosen": 9.96330451965332, "log_odds_ratio": -0.00013285898603498936, "logits/chosen": -0.005995616316795349, "logits/rejected": 0.18605396151542664, "logps/chosen": -0.0004144633130636066, "logps/rejected": -1.845277190208435, "loss": 0.2063, "nll_loss": 0.05155202001333237, "rewards/accuracies": 1.0, "rewards/chosen": -4.1446332033956423e-05, "rewards/margins": 0.1844862699508667, "rewards/rejected": -0.18452772498130798, "step": 14028 }, { "epoch": 9.701936376210234, "grad_norm": 2.945845365524292, "learning_rate": 1.6559090210542492e-06, "log_odds_chosen": 11.385857582092285, "log_odds_ratio": -1.8085802366840653e-05, "logits/chosen": -0.001389726996421814, "logits/rejected": -0.02717539668083191, "logps/chosen": -8.305132359964773e-05, "logps/rejected": -2.020082712173462, "loss": 0.2967, "nll_loss": 0.07418528199195862, "rewards/accuracies": 1.0, "rewards/chosen": -8.305132723762654e-06, "rewards/margins": 0.2019999772310257, "rewards/rejected": -0.20200827717781067, "step": 14029 }, { "epoch": 9.702627939142461, "grad_norm": 2.373103141784668, "learning_rate": 1.6520670047641004e-06, "log_odds_chosen": 10.701430320739746, "log_odds_ratio": -8.253100531874225e-05, "logits/chosen": -0.5777424573898315, "logits/rejected": -0.654435396194458, "logps/chosen": -0.0001618588576093316, "logps/rejected": -2.025099515914917, "loss": 0.2145, "nll_loss": 0.053607311099767685, "rewards/accuracies": 1.0, "rewards/chosen": -1.618588612473104e-05, "rewards/margins": 0.20249375700950623, "rewards/rejected": -0.20250995457172394, "step": 14030 }, { "epoch": 9.703319502074688, "grad_norm": 3.8033952713012695, "learning_rate": 1.6482249884739513e-06, "log_odds_chosen": 10.59766960144043, "log_odds_ratio": -0.0005519393598660827, "logits/chosen": -0.07405143231153488, "logits/rejected": -0.07039715349674225, "logps/chosen": -0.0005530752241611481, "logps/rejected": -2.344395637512207, "loss": 0.3574, "nll_loss": 0.08929930627346039, "rewards/accuracies": 1.0, "rewards/chosen": -5.530752241611481e-05, "rewards/margins": 0.23438426852226257, "rewards/rejected": -0.23443956673145294, "step": 14031 }, { "epoch": 9.704011065006915, "grad_norm": 4.2601141929626465, "learning_rate": 1.644382972183802e-06, "log_odds_chosen": 12.247598648071289, "log_odds_ratio": -0.00012447117478586733, "logits/chosen": 0.069765105843544, "logits/rejected": 0.03893115371465683, "logps/chosen": -0.00020795888849534094, "logps/rejected": -3.519665241241455, "loss": 0.4654, "nll_loss": 0.11634733527898788, "rewards/accuracies": 1.0, "rewards/chosen": -2.079589103232138e-05, "rewards/margins": 0.351945698261261, "rewards/rejected": -0.3519665002822876, "step": 14032 }, { "epoch": 9.704702627939142, "grad_norm": 3.4472925662994385, "learning_rate": 1.6405409558936528e-06, "log_odds_chosen": 10.819717407226562, "log_odds_ratio": -5.671809412888251e-05, "logits/chosen": -0.0314161479473114, "logits/rejected": -0.18641072511672974, "logps/chosen": -0.00015227627591229975, "logps/rejected": -1.8129750490188599, "loss": 0.4014, "nll_loss": 0.10035184770822525, "rewards/accuracies": 1.0, "rewards/chosen": -1.5227628864522558e-05, "rewards/margins": 0.18128228187561035, "rewards/rejected": -0.18129751086235046, "step": 14033 }, { "epoch": 9.705394190871369, "grad_norm": 2.233940839767456, "learning_rate": 1.636698939603504e-06, "log_odds_chosen": 10.519308090209961, "log_odds_ratio": -6.494284025393426e-05, "logits/chosen": 0.16154597699642181, "logits/rejected": 0.16004878282546997, "logps/chosen": -0.00028291059425100684, "logps/rejected": -1.9881476163864136, "loss": 0.2677, "nll_loss": 0.06692343205213547, "rewards/accuracies": 1.0, "rewards/chosen": -2.829106233548373e-05, "rewards/margins": 0.19878648221492767, "rewards/rejected": -0.1988147497177124, "step": 14034 }, { "epoch": 9.706085753803595, "grad_norm": 3.530731439590454, "learning_rate": 1.632856923313355e-06, "log_odds_chosen": 10.872820854187012, "log_odds_ratio": -0.0001081073860405013, "logits/chosen": -0.13833673298358917, "logits/rejected": -0.13667529821395874, "logps/chosen": -0.00039223130443133414, "logps/rejected": -2.3692827224731445, "loss": 0.3193, "nll_loss": 0.07980798184871674, "rewards/accuracies": 1.0, "rewards/chosen": -3.922313408111222e-05, "rewards/margins": 0.23688904941082, "rewards/rejected": -0.2369282841682434, "step": 14035 }, { "epoch": 9.706777316735822, "grad_norm": 3.414837598800659, "learning_rate": 1.6290149070232057e-06, "log_odds_chosen": 10.04426383972168, "log_odds_ratio": -0.00011767448449973017, "logits/chosen": -0.22231408953666687, "logits/rejected": -0.1594626009464264, "logps/chosen": -0.0003671125741675496, "logps/rejected": -1.8401079177856445, "loss": 0.4563, "nll_loss": 0.11405658721923828, "rewards/accuracies": 1.0, "rewards/chosen": -3.671126250992529e-05, "rewards/margins": 0.18397407233715057, "rewards/rejected": -0.1840108036994934, "step": 14036 }, { "epoch": 9.707468879668049, "grad_norm": 3.143347978591919, "learning_rate": 1.6251728907330569e-06, "log_odds_chosen": 10.427767753601074, "log_odds_ratio": -0.00018148086383007467, "logits/chosen": -0.10258910059928894, "logits/rejected": -0.12172472476959229, "logps/chosen": -0.0007102714153006673, "logps/rejected": -2.601733684539795, "loss": 0.3016, "nll_loss": 0.0753839984536171, "rewards/accuracies": 1.0, "rewards/chosen": -7.102714153006673e-05, "rewards/margins": 0.26010236144065857, "rewards/rejected": -0.26017338037490845, "step": 14037 }, { "epoch": 9.708160442600276, "grad_norm": 2.39312744140625, "learning_rate": 1.6213308744429078e-06, "log_odds_chosen": 11.521331787109375, "log_odds_ratio": -6.457349081756547e-05, "logits/chosen": -0.6590924859046936, "logits/rejected": -0.7450777888298035, "logps/chosen": -0.000132291839690879, "logps/rejected": -2.1752631664276123, "loss": 0.2123, "nll_loss": 0.05306620895862579, "rewards/accuracies": 1.0, "rewards/chosen": -1.322918433288578e-05, "rewards/margins": 0.2175130695104599, "rewards/rejected": -0.21752631664276123, "step": 14038 }, { "epoch": 9.708852005532503, "grad_norm": 3.785318613052368, "learning_rate": 1.6174888581527586e-06, "log_odds_chosen": 11.339120864868164, "log_odds_ratio": -0.0005568203050643206, "logits/chosen": -0.38010960817337036, "logits/rejected": -0.3457328975200653, "logps/chosen": -0.0005193545948714018, "logps/rejected": -3.0819265842437744, "loss": 0.4132, "nll_loss": 0.10325435549020767, "rewards/accuracies": 1.0, "rewards/chosen": -5.1935458031948656e-05, "rewards/margins": 0.30814072489738464, "rewards/rejected": -0.3081927001476288, "step": 14039 }, { "epoch": 9.70954356846473, "grad_norm": 2.951171636581421, "learning_rate": 1.6136468418626098e-06, "log_odds_chosen": 12.310433387756348, "log_odds_ratio": -1.6809477529022843e-05, "logits/chosen": -0.5569875836372375, "logits/rejected": -0.508375346660614, "logps/chosen": -0.00024335035413969308, "logps/rejected": -3.2429840564727783, "loss": 0.3514, "nll_loss": 0.08783870935440063, "rewards/accuracies": 1.0, "rewards/chosen": -2.433503686916083e-05, "rewards/margins": 0.32427406311035156, "rewards/rejected": -0.3242984116077423, "step": 14040 }, { "epoch": 9.710235131396956, "grad_norm": 3.1393754482269287, "learning_rate": 1.6098048255724605e-06, "log_odds_chosen": 10.602744102478027, "log_odds_ratio": -6.755004142178223e-05, "logits/chosen": -0.24596892297267914, "logits/rejected": -0.28606274724006653, "logps/chosen": -0.00040816832915879786, "logps/rejected": -2.2621121406555176, "loss": 0.3513, "nll_loss": 0.08782707154750824, "rewards/accuracies": 1.0, "rewards/chosen": -4.081683437107131e-05, "rewards/margins": 0.2261703908443451, "rewards/rejected": -0.22621119022369385, "step": 14041 }, { "epoch": 9.710926694329183, "grad_norm": 2.1659014225006104, "learning_rate": 1.6059628092823115e-06, "log_odds_chosen": 11.307205200195312, "log_odds_ratio": -0.0003245777916163206, "logits/chosen": -0.3967916965484619, "logits/rejected": -0.4056175947189331, "logps/chosen": -0.0033080969005823135, "logps/rejected": -2.641173839569092, "loss": 0.2131, "nll_loss": 0.05323443189263344, "rewards/accuracies": 1.0, "rewards/chosen": -0.0003308096493128687, "rewards/margins": 0.26378658413887024, "rewards/rejected": -0.26411738991737366, "step": 14042 }, { "epoch": 9.71161825726141, "grad_norm": 2.597486734390259, "learning_rate": 1.6021207929921622e-06, "log_odds_chosen": 10.689266204833984, "log_odds_ratio": -0.0007740746950730681, "logits/chosen": 0.0743560940027237, "logits/rejected": 0.11470898985862732, "logps/chosen": -0.004252062179148197, "logps/rejected": -3.105828046798706, "loss": 0.2565, "nll_loss": 0.06405926495790482, "rewards/accuracies": 1.0, "rewards/chosen": -0.00042520626448094845, "rewards/margins": 0.3101575970649719, "rewards/rejected": -0.31058281660079956, "step": 14043 }, { "epoch": 9.712309820193637, "grad_norm": 2.9477479457855225, "learning_rate": 1.5982787767020134e-06, "log_odds_chosen": 10.683327674865723, "log_odds_ratio": -0.0007875992450863123, "logits/chosen": 0.42970871925354004, "logits/rejected": 0.38266023993492126, "logps/chosen": -0.000497455068398267, "logps/rejected": -2.429025411605835, "loss": 0.2053, "nll_loss": 0.05125453323125839, "rewards/accuracies": 1.0, "rewards/chosen": -4.9745507567422464e-05, "rewards/margins": 0.24285279214382172, "rewards/rejected": -0.24290254712104797, "step": 14044 }, { "epoch": 9.713001383125864, "grad_norm": 3.5443201065063477, "learning_rate": 1.5944367604118641e-06, "log_odds_chosen": 11.064702987670898, "log_odds_ratio": -0.0003790265473071486, "logits/chosen": -0.3422020673751831, "logits/rejected": -0.36000049114227295, "logps/chosen": -0.0004840421606786549, "logps/rejected": -2.4914891719818115, "loss": 0.4072, "nll_loss": 0.10174968838691711, "rewards/accuracies": 1.0, "rewards/chosen": -4.840421388507821e-05, "rewards/margins": 0.24910053610801697, "rewards/rejected": -0.24914893507957458, "step": 14045 }, { "epoch": 9.71369294605809, "grad_norm": 2.883375883102417, "learning_rate": 1.590594744121715e-06, "log_odds_chosen": 9.93283748626709, "log_odds_ratio": -0.00039232539711520076, "logits/chosen": -0.042561545968055725, "logits/rejected": -0.27848780155181885, "logps/chosen": -0.0007758078863844275, "logps/rejected": -1.8716483116149902, "loss": 0.2954, "nll_loss": 0.0738009661436081, "rewards/accuracies": 1.0, "rewards/chosen": -7.758078572805971e-05, "rewards/margins": 0.1870872676372528, "rewards/rejected": -0.18716485798358917, "step": 14046 }, { "epoch": 9.714384508990317, "grad_norm": 2.6166839599609375, "learning_rate": 1.5867527278315663e-06, "log_odds_chosen": 11.069677352905273, "log_odds_ratio": -2.7290288926451467e-05, "logits/chosen": -0.5398693680763245, "logits/rejected": -0.5795652270317078, "logps/chosen": -0.00012801631237380207, "logps/rejected": -2.0744822025299072, "loss": 0.2607, "nll_loss": 0.06516466289758682, "rewards/accuracies": 1.0, "rewards/chosen": -1.280163269257173e-05, "rewards/margins": 0.20743544399738312, "rewards/rejected": -0.20744824409484863, "step": 14047 }, { "epoch": 9.715076071922544, "grad_norm": 3.791430711746216, "learning_rate": 1.582910711541417e-06, "log_odds_chosen": 10.99785041809082, "log_odds_ratio": -4.2141480662394315e-05, "logits/chosen": 0.2726088762283325, "logits/rejected": 0.2046135663986206, "logps/chosen": -0.0003061083552893251, "logps/rejected": -2.4845235347747803, "loss": 0.3876, "nll_loss": 0.09688502550125122, "rewards/accuracies": 1.0, "rewards/chosen": -3.061083407374099e-05, "rewards/margins": 0.24842172861099243, "rewards/rejected": -0.2484523504972458, "step": 14048 }, { "epoch": 9.715767634854771, "grad_norm": 4.07421350479126, "learning_rate": 1.579068695251268e-06, "log_odds_chosen": 10.782313346862793, "log_odds_ratio": -3.886540434905328e-05, "logits/chosen": 0.03747441619634628, "logits/rejected": -0.0050468891859054565, "logps/chosen": -0.00014611854567192495, "logps/rejected": -1.8385167121887207, "loss": 0.3533, "nll_loss": 0.08832766115665436, "rewards/accuracies": 1.0, "rewards/chosen": -1.4611853657697793e-05, "rewards/margins": 0.18383705615997314, "rewards/rejected": -0.18385165929794312, "step": 14049 }, { "epoch": 9.716459197786998, "grad_norm": 3.4308087825775146, "learning_rate": 1.5752266789611187e-06, "log_odds_chosen": 12.339305877685547, "log_odds_ratio": -7.251634542626562e-06, "logits/chosen": -0.43659594655036926, "logits/rejected": -0.44429975748062134, "logps/chosen": -6.530222890432924e-05, "logps/rejected": -2.59944486618042, "loss": 0.3676, "nll_loss": 0.09190993756055832, "rewards/accuracies": 1.0, "rewards/chosen": -6.530222890432924e-06, "rewards/margins": 0.25993794202804565, "rewards/rejected": -0.25994449853897095, "step": 14050 }, { "epoch": 9.717150760719225, "grad_norm": 2.8448033332824707, "learning_rate": 1.57138466267097e-06, "log_odds_chosen": 8.927064895629883, "log_odds_ratio": -0.0007420360925607383, "logits/chosen": -0.3754725754261017, "logits/rejected": -0.4090180993080139, "logps/chosen": -0.0015167912933975458, "logps/rejected": -1.7306694984436035, "loss": 0.3376, "nll_loss": 0.08433172106742859, "rewards/accuracies": 1.0, "rewards/chosen": -0.00015167915262281895, "rewards/margins": 0.1729152649641037, "rewards/rejected": -0.17306694388389587, "step": 14051 }, { "epoch": 9.717842323651452, "grad_norm": 4.093862533569336, "learning_rate": 1.5675426463808206e-06, "log_odds_chosen": 12.276299476623535, "log_odds_ratio": -9.109542588703334e-06, "logits/chosen": -0.23019427061080933, "logits/rejected": -0.30176666378974915, "logps/chosen": -9.692131425254047e-05, "logps/rejected": -2.8041582107543945, "loss": 0.5366, "nll_loss": 0.1341451108455658, "rewards/accuracies": 1.0, "rewards/chosen": -9.692131243355107e-06, "rewards/margins": 0.28040611743927, "rewards/rejected": -0.280415803194046, "step": 14052 }, { "epoch": 9.718533886583678, "grad_norm": 3.794600486755371, "learning_rate": 1.5637006300906716e-06, "log_odds_chosen": 10.613341331481934, "log_odds_ratio": -0.00021652613941114396, "logits/chosen": 0.2754127085208893, "logits/rejected": 0.1355481892824173, "logps/chosen": -0.0013486103853210807, "logps/rejected": -2.005495548248291, "loss": 0.4825, "nll_loss": 0.12059873342514038, "rewards/accuracies": 1.0, "rewards/chosen": -0.0001348610530840233, "rewards/margins": 0.2004147171974182, "rewards/rejected": -0.20054957270622253, "step": 14053 }, { "epoch": 9.719225449515905, "grad_norm": 2.87605619430542, "learning_rate": 1.5598586138005226e-06, "log_odds_chosen": 11.48611831665039, "log_odds_ratio": -0.0001008669423754327, "logits/chosen": -0.7142854332923889, "logits/rejected": -0.7215863466262817, "logps/chosen": -0.0003769434697460383, "logps/rejected": -2.316336154937744, "loss": 0.3937, "nll_loss": 0.09841687232255936, "rewards/accuracies": 1.0, "rewards/chosen": -3.769434624700807e-05, "rewards/margins": 0.23159591853618622, "rewards/rejected": -0.23163361847400665, "step": 14054 }, { "epoch": 9.719917012448132, "grad_norm": 4.028690338134766, "learning_rate": 1.5560165975103735e-06, "log_odds_chosen": 11.993674278259277, "log_odds_ratio": -1.3392660548561253e-05, "logits/chosen": 0.05926985293626785, "logits/rejected": -0.0457332506775856, "logps/chosen": -0.0001267297484446317, "logps/rejected": -2.551068067550659, "loss": 0.3361, "nll_loss": 0.08402892202138901, "rewards/accuracies": 1.0, "rewards/chosen": -1.267297466256423e-05, "rewards/margins": 0.25509414076805115, "rewards/rejected": -0.2551068067550659, "step": 14055 }, { "epoch": 9.720608575380359, "grad_norm": 4.912097930908203, "learning_rate": 1.5521745812202243e-06, "log_odds_chosen": 11.190658569335938, "log_odds_ratio": -4.0823182644089684e-05, "logits/chosen": -0.05123640596866608, "logits/rejected": -0.08516818284988403, "logps/chosen": -0.00025188413565047085, "logps/rejected": -2.3816003799438477, "loss": 0.4144, "nll_loss": 0.10360151529312134, "rewards/accuracies": 1.0, "rewards/chosen": -2.5188412109855562e-05, "rewards/margins": 0.23813486099243164, "rewards/rejected": -0.23816005885601044, "step": 14056 }, { "epoch": 9.721300138312586, "grad_norm": 4.297684192657471, "learning_rate": 1.5483325649300755e-06, "log_odds_chosen": 11.821123123168945, "log_odds_ratio": -2.1368965462897904e-05, "logits/chosen": -0.24659223854541779, "logits/rejected": -0.33540570735931396, "logps/chosen": -0.000154897992615588, "logps/rejected": -2.5837361812591553, "loss": 0.3602, "nll_loss": 0.09005637466907501, "rewards/accuracies": 1.0, "rewards/chosen": -1.5489800716750324e-05, "rewards/margins": 0.2583581209182739, "rewards/rejected": -0.2583736181259155, "step": 14057 }, { "epoch": 9.721991701244812, "grad_norm": 2.7587263584136963, "learning_rate": 1.5444905486399264e-06, "log_odds_chosen": 10.231828689575195, "log_odds_ratio": -0.00010190495231654495, "logits/chosen": -0.07780411839485168, "logits/rejected": -0.18501782417297363, "logps/chosen": -0.0002624362532515079, "logps/rejected": -1.7737529277801514, "loss": 0.2391, "nll_loss": 0.059752993285655975, "rewards/accuracies": 1.0, "rewards/chosen": -2.624362605274655e-05, "rewards/margins": 0.1773490607738495, "rewards/rejected": -0.17737528681755066, "step": 14058 }, { "epoch": 9.72268326417704, "grad_norm": 3.6375765800476074, "learning_rate": 1.5406485323497772e-06, "log_odds_chosen": 10.606718063354492, "log_odds_ratio": -4.8315603635273874e-05, "logits/chosen": -0.718386173248291, "logits/rejected": -0.7688785195350647, "logps/chosen": -0.0005027923616580665, "logps/rejected": -2.3363733291625977, "loss": 0.3991, "nll_loss": 0.0997781902551651, "rewards/accuracies": 1.0, "rewards/chosen": -5.0279242714168504e-05, "rewards/margins": 0.23358707129955292, "rewards/rejected": -0.23363733291625977, "step": 14059 }, { "epoch": 9.723374827109266, "grad_norm": 4.166598796844482, "learning_rate": 1.5368065160596281e-06, "log_odds_chosen": 11.076985359191895, "log_odds_ratio": -3.0693649023305625e-05, "logits/chosen": -0.4552866220474243, "logits/rejected": -0.4488312005996704, "logps/chosen": -0.0007647222955711186, "logps/rejected": -2.5392777919769287, "loss": 0.5003, "nll_loss": 0.1250789910554886, "rewards/accuracies": 1.0, "rewards/chosen": -7.647223537787795e-05, "rewards/margins": 0.2538512945175171, "rewards/rejected": -0.2539277672767639, "step": 14060 }, { "epoch": 9.724066390041493, "grad_norm": 3.4372000694274902, "learning_rate": 1.532964499769479e-06, "log_odds_chosen": 10.523962020874023, "log_odds_ratio": -0.0003658041823655367, "logits/chosen": -0.061703141778707504, "logits/rejected": -0.15716056525707245, "logps/chosen": -0.0014027312863618135, "logps/rejected": -2.742238759994507, "loss": 0.2906, "nll_loss": 0.07262028753757477, "rewards/accuracies": 1.0, "rewards/chosen": -0.00014027312863618135, "rewards/margins": 0.27408361434936523, "rewards/rejected": -0.27422386407852173, "step": 14061 }, { "epoch": 9.72475795297372, "grad_norm": 3.768315553665161, "learning_rate": 1.52912248347933e-06, "log_odds_chosen": 10.620694160461426, "log_odds_ratio": -8.276679000118747e-05, "logits/chosen": 0.28818756341934204, "logits/rejected": 0.23599982261657715, "logps/chosen": -0.00031609414145350456, "logps/rejected": -1.734850525856018, "loss": 0.4504, "nll_loss": 0.11258061230182648, "rewards/accuracies": 1.0, "rewards/chosen": -3.160941560054198e-05, "rewards/margins": 0.1734534502029419, "rewards/rejected": -0.17348507046699524, "step": 14062 }, { "epoch": 9.725449515905947, "grad_norm": 3.50580096244812, "learning_rate": 1.525280467189181e-06, "log_odds_chosen": 10.935100555419922, "log_odds_ratio": -5.6851513363653794e-05, "logits/chosen": 0.04261190444231033, "logits/rejected": 0.08111607283353806, "logps/chosen": -0.0002661603211890906, "logps/rejected": -2.5173263549804688, "loss": 0.3063, "nll_loss": 0.07656723260879517, "rewards/accuracies": 1.0, "rewards/chosen": -2.6616033210302703e-05, "rewards/margins": 0.25170600414276123, "rewards/rejected": -0.25173258781433105, "step": 14063 }, { "epoch": 9.726141078838173, "grad_norm": 3.284005880355835, "learning_rate": 1.5214384508990318e-06, "log_odds_chosen": 11.105777740478516, "log_odds_ratio": -2.1458101400639862e-05, "logits/chosen": -0.54180508852005, "logits/rejected": -0.5738696455955505, "logps/chosen": -0.0002820981899276376, "logps/rejected": -2.3394103050231934, "loss": 0.3549, "nll_loss": 0.08873511850833893, "rewards/accuracies": 1.0, "rewards/chosen": -2.8209818992763758e-05, "rewards/margins": 0.23391284048557281, "rewards/rejected": -0.23394104838371277, "step": 14064 }, { "epoch": 9.7268326417704, "grad_norm": 2.8504526615142822, "learning_rate": 1.517596434608883e-06, "log_odds_chosen": 11.10925579071045, "log_odds_ratio": -8.551261998945847e-05, "logits/chosen": 0.4728356897830963, "logits/rejected": 0.6037408113479614, "logps/chosen": -0.00030483852606266737, "logps/rejected": -2.6879568099975586, "loss": 0.2293, "nll_loss": 0.05732450261712074, "rewards/accuracies": 1.0, "rewards/chosen": -3.0483850423479453e-05, "rewards/margins": 0.2687651813030243, "rewards/rejected": -0.2687956690788269, "step": 14065 }, { "epoch": 9.727524204702627, "grad_norm": 3.3343968391418457, "learning_rate": 1.5137544183187337e-06, "log_odds_chosen": 10.605195999145508, "log_odds_ratio": -0.00013343783211894333, "logits/chosen": -0.3856803774833679, "logits/rejected": -0.45289888978004456, "logps/chosen": -0.00033911195350810885, "logps/rejected": -1.8947904109954834, "loss": 0.3649, "nll_loss": 0.09120230376720428, "rewards/accuracies": 1.0, "rewards/chosen": -3.391119389561936e-05, "rewards/margins": 0.1894451379776001, "rewards/rejected": -0.1894790530204773, "step": 14066 }, { "epoch": 9.728215767634854, "grad_norm": 3.192932367324829, "learning_rate": 1.5099124020285846e-06, "log_odds_chosen": 11.377513885498047, "log_odds_ratio": -3.7108355172676966e-05, "logits/chosen": -0.3142525851726532, "logits/rejected": -0.3797409236431122, "logps/chosen": -9.22659964999184e-05, "logps/rejected": -2.035445213317871, "loss": 0.3276, "nll_loss": 0.08189854025840759, "rewards/accuracies": 1.0, "rewards/chosen": -9.22660001378972e-06, "rewards/margins": 0.2035352885723114, "rewards/rejected": -0.2035445272922516, "step": 14067 }, { "epoch": 9.72890733056708, "grad_norm": 2.9361958503723145, "learning_rate": 1.5060703857384356e-06, "log_odds_chosen": 11.60842514038086, "log_odds_ratio": -2.894986027968116e-05, "logits/chosen": -0.3391823470592499, "logits/rejected": -0.36785149574279785, "logps/chosen": -0.00030659729964099824, "logps/rejected": -3.0317115783691406, "loss": 0.3211, "nll_loss": 0.08026459068059921, "rewards/accuracies": 1.0, "rewards/chosen": -3.06597285089083e-05, "rewards/margins": 0.3031404912471771, "rewards/rejected": -0.30317115783691406, "step": 14068 }, { "epoch": 9.729598893499308, "grad_norm": 2.9858357906341553, "learning_rate": 1.5022283694482866e-06, "log_odds_chosen": 12.086821556091309, "log_odds_ratio": -3.983519127359614e-05, "logits/chosen": -0.3994476795196533, "logits/rejected": -0.456809937953949, "logps/chosen": -0.00023153756046667695, "logps/rejected": -3.2445569038391113, "loss": 0.3513, "nll_loss": 0.08781701326370239, "rewards/accuracies": 1.0, "rewards/chosen": -2.3153756046667695e-05, "rewards/margins": 0.32443252205848694, "rewards/rejected": -0.3244556784629822, "step": 14069 }, { "epoch": 9.730290456431534, "grad_norm": 3.037654161453247, "learning_rate": 1.4983863531581375e-06, "log_odds_chosen": 11.219244003295898, "log_odds_ratio": -1.625965887797065e-05, "logits/chosen": -0.147754967212677, "logits/rejected": -0.2532964050769806, "logps/chosen": -0.00011137408000649884, "logps/rejected": -2.091094493865967, "loss": 0.3681, "nll_loss": 0.0920318067073822, "rewards/accuracies": 1.0, "rewards/chosen": -1.1137407454953063e-05, "rewards/margins": 0.20909832417964935, "rewards/rejected": -0.20910947024822235, "step": 14070 }, { "epoch": 9.730982019363761, "grad_norm": 4.7024455070495605, "learning_rate": 1.4945443368679883e-06, "log_odds_chosen": 10.624773025512695, "log_odds_ratio": -7.10480599082075e-05, "logits/chosen": -0.6009021997451782, "logits/rejected": -0.5314351320266724, "logps/chosen": -0.0009751567849889398, "logps/rejected": -2.0029373168945312, "loss": 0.3716, "nll_loss": 0.09289713203907013, "rewards/accuracies": 1.0, "rewards/chosen": -9.75156799540855e-05, "rewards/margins": 0.20019623637199402, "rewards/rejected": -0.20029374957084656, "step": 14071 }, { "epoch": 9.731673582295988, "grad_norm": 3.3876678943634033, "learning_rate": 1.4907023205778394e-06, "log_odds_chosen": 11.144800186157227, "log_odds_ratio": -0.00040390901267528534, "logits/chosen": -0.40883949398994446, "logits/rejected": -0.3468576669692993, "logps/chosen": -0.0008895749342627823, "logps/rejected": -2.9034881591796875, "loss": 0.2573, "nll_loss": 0.06428197026252747, "rewards/accuracies": 1.0, "rewards/chosen": -8.895749488146976e-05, "rewards/margins": 0.2902598977088928, "rewards/rejected": -0.2903488278388977, "step": 14072 }, { "epoch": 9.732365145228215, "grad_norm": 3.847625732421875, "learning_rate": 1.4868603042876902e-06, "log_odds_chosen": 11.44306468963623, "log_odds_ratio": -7.029860717011616e-05, "logits/chosen": -0.3494161069393158, "logits/rejected": -0.381717711687088, "logps/chosen": -0.00027682489599101245, "logps/rejected": -2.269761085510254, "loss": 0.3172, "nll_loss": 0.07928379625082016, "rewards/accuracies": 1.0, "rewards/chosen": -2.7682490326697007e-05, "rewards/margins": 0.22694844007492065, "rewards/rejected": -0.22697609663009644, "step": 14073 }, { "epoch": 9.733056708160442, "grad_norm": 3.9256770610809326, "learning_rate": 1.4830182879975412e-06, "log_odds_chosen": 11.631043434143066, "log_odds_ratio": -9.395475353812799e-05, "logits/chosen": -0.08485838770866394, "logits/rejected": -0.20589959621429443, "logps/chosen": -0.00025275791995227337, "logps/rejected": -2.6631438732147217, "loss": 0.3937, "nll_loss": 0.09841006249189377, "rewards/accuracies": 1.0, "rewards/chosen": -2.5275794541812502e-05, "rewards/margins": 0.2662891149520874, "rewards/rejected": -0.26631438732147217, "step": 14074 }, { "epoch": 9.733748271092669, "grad_norm": 3.187096118927002, "learning_rate": 1.4791762717073921e-06, "log_odds_chosen": 10.009942054748535, "log_odds_ratio": -0.0011052628979086876, "logits/chosen": -0.5161486268043518, "logits/rejected": -0.5463173389434814, "logps/chosen": -0.0007038781768642366, "logps/rejected": -1.7462892532348633, "loss": 0.695, "nll_loss": 0.17363958060741425, "rewards/accuracies": 1.0, "rewards/chosen": -7.038781768642366e-05, "rewards/margins": 0.17455855011940002, "rewards/rejected": -0.17462894320487976, "step": 14075 }, { "epoch": 9.734439834024897, "grad_norm": 3.8338570594787598, "learning_rate": 1.475334255417243e-06, "log_odds_chosen": 10.597758293151855, "log_odds_ratio": -7.467473187716678e-05, "logits/chosen": -0.2802301347255707, "logits/rejected": -0.3332068622112274, "logps/chosen": -0.00017298806051257998, "logps/rejected": -1.433288335800171, "loss": 0.3614, "nll_loss": 0.09034281224012375, "rewards/accuracies": 1.0, "rewards/chosen": -1.7298805687460117e-05, "rewards/margins": 0.1433115303516388, "rewards/rejected": -0.14332884550094604, "step": 14076 }, { "epoch": 9.735131396957122, "grad_norm": 5.281632423400879, "learning_rate": 1.471492239127094e-06, "log_odds_chosen": 10.167672157287598, "log_odds_ratio": -0.0002352109004277736, "logits/chosen": 0.035262420773506165, "logits/rejected": 0.0025625228881835938, "logps/chosen": -0.0005891511682420969, "logps/rejected": -1.637753963470459, "loss": 0.7929, "nll_loss": 0.1982031613588333, "rewards/accuracies": 1.0, "rewards/chosen": -5.891511682420969e-05, "rewards/margins": 0.16371646523475647, "rewards/rejected": -0.16377539932727814, "step": 14077 }, { "epoch": 9.73582295988935, "grad_norm": 4.715962886810303, "learning_rate": 1.4676502228369448e-06, "log_odds_chosen": 10.590758323669434, "log_odds_ratio": -9.746826253831387e-05, "logits/chosen": -0.19473972916603088, "logits/rejected": -0.21724197268486023, "logps/chosen": -0.00022266368614509702, "logps/rejected": -1.9685781002044678, "loss": 0.425, "nll_loss": 0.1062331572175026, "rewards/accuracies": 1.0, "rewards/chosen": -2.2266369342105463e-05, "rewards/margins": 0.19683553278446198, "rewards/rejected": -0.19685781002044678, "step": 14078 }, { "epoch": 9.736514522821576, "grad_norm": 3.3531792163848877, "learning_rate": 1.4638082065467957e-06, "log_odds_chosen": 9.810626029968262, "log_odds_ratio": -0.0006292449543252587, "logits/chosen": -0.11490876972675323, "logits/rejected": -0.23583844304084778, "logps/chosen": -0.0012257093330845237, "logps/rejected": -1.5976797342300415, "loss": 0.5769, "nll_loss": 0.1441582441329956, "rewards/accuracies": 1.0, "rewards/chosen": -0.00012257092748768628, "rewards/margins": 0.15964540839195251, "rewards/rejected": -0.1597679853439331, "step": 14079 }, { "epoch": 9.737206085753805, "grad_norm": 3.5185587406158447, "learning_rate": 1.459966190256647e-06, "log_odds_chosen": 10.632923126220703, "log_odds_ratio": -0.00013407410006038845, "logits/chosen": -0.056553907692432404, "logits/rejected": -0.1727689951658249, "logps/chosen": -0.0005626532947644591, "logps/rejected": -1.8834426403045654, "loss": 0.2605, "nll_loss": 0.06510130316019058, "rewards/accuracies": 1.0, "rewards/chosen": -5.6265325838467106e-05, "rewards/margins": 0.18828797340393066, "rewards/rejected": -0.18834425508975983, "step": 14080 }, { "epoch": 9.73789764868603, "grad_norm": 3.742751359939575, "learning_rate": 1.4561241739664977e-06, "log_odds_chosen": 10.546586990356445, "log_odds_ratio": -0.0005286220693960786, "logits/chosen": -0.020138412714004517, "logits/rejected": -0.12044990062713623, "logps/chosen": -0.0017503878334537148, "logps/rejected": -2.42435359954834, "loss": 0.4216, "nll_loss": 0.10534273833036423, "rewards/accuracies": 1.0, "rewards/chosen": -0.00017503878916613758, "rewards/margins": 0.24226033687591553, "rewards/rejected": -0.24243536591529846, "step": 14081 }, { "epoch": 9.738589211618258, "grad_norm": 3.0572142601013184, "learning_rate": 1.4522821576763486e-06, "log_odds_chosen": 8.781017303466797, "log_odds_ratio": -0.0013535015750676394, "logits/chosen": 0.25028395652770996, "logits/rejected": 0.33531999588012695, "logps/chosen": -0.001614319160580635, "logps/rejected": -1.6745295524597168, "loss": 0.4157, "nll_loss": 0.10378843545913696, "rewards/accuracies": 1.0, "rewards/chosen": -0.00016143193352036178, "rewards/margins": 0.167291522026062, "rewards/rejected": -0.16745296120643616, "step": 14082 }, { "epoch": 9.739280774550483, "grad_norm": 3.526397943496704, "learning_rate": 1.4484401413861996e-06, "log_odds_chosen": 11.265777587890625, "log_odds_ratio": -5.912484266445972e-05, "logits/chosen": -0.6020369529724121, "logits/rejected": -0.6356661915779114, "logps/chosen": -0.00014268027734942734, "logps/rejected": -1.9638330936431885, "loss": 0.3327, "nll_loss": 0.08316794037818909, "rewards/accuracies": 1.0, "rewards/chosen": -1.4268028280639555e-05, "rewards/margins": 0.19636905193328857, "rewards/rejected": -0.19638332724571228, "step": 14083 }, { "epoch": 9.739972337482712, "grad_norm": 3.143381357192993, "learning_rate": 1.4445981250960506e-06, "log_odds_chosen": 12.47369384765625, "log_odds_ratio": -2.576132283138577e-05, "logits/chosen": -0.23411519825458527, "logits/rejected": -0.39735114574432373, "logps/chosen": -0.0001566018327139318, "logps/rejected": -3.5539331436157227, "loss": 0.3253, "nll_loss": 0.0813322439789772, "rewards/accuracies": 1.0, "rewards/chosen": -1.566018363519106e-05, "rewards/margins": 0.3553776741027832, "rewards/rejected": -0.35539335012435913, "step": 14084 }, { "epoch": 9.740663900414937, "grad_norm": 3.986825942993164, "learning_rate": 1.4407561088059015e-06, "log_odds_chosen": 10.651996612548828, "log_odds_ratio": -6.728620792273432e-05, "logits/chosen": 0.058619819581508636, "logits/rejected": 0.007906101644039154, "logps/chosen": -0.0003832450311165303, "logps/rejected": -2.0348761081695557, "loss": 0.5029, "nll_loss": 0.12572330236434937, "rewards/accuracies": 1.0, "rewards/chosen": -3.8324506022036076e-05, "rewards/margins": 0.2034492790699005, "rewards/rejected": -0.2034876048564911, "step": 14085 }, { "epoch": 9.741355463347166, "grad_norm": 3.143564224243164, "learning_rate": 1.4369140925157523e-06, "log_odds_chosen": 10.742366790771484, "log_odds_ratio": -0.00018134075799025595, "logits/chosen": -0.2008328139781952, "logits/rejected": -0.19422832131385803, "logps/chosen": -0.00031903735361993313, "logps/rejected": -2.401066780090332, "loss": 0.6903, "nll_loss": 0.17255018651485443, "rewards/accuracies": 1.0, "rewards/chosen": -3.190373536199331e-05, "rewards/margins": 0.2400747835636139, "rewards/rejected": -0.2401067018508911, "step": 14086 }, { "epoch": 9.74204702627939, "grad_norm": 3.724709987640381, "learning_rate": 1.4330720762256032e-06, "log_odds_chosen": 11.134361267089844, "log_odds_ratio": -0.00011482177069410682, "logits/chosen": -0.2082558125257492, "logits/rejected": -0.3872357904911041, "logps/chosen": -0.00015495551633648574, "logps/rejected": -1.985574722290039, "loss": 0.4225, "nll_loss": 0.10561499744653702, "rewards/accuracies": 1.0, "rewards/chosen": -1.5495550542254932e-05, "rewards/margins": 0.19854196906089783, "rewards/rejected": -0.19855748116970062, "step": 14087 }, { "epoch": 9.74273858921162, "grad_norm": 3.1600730419158936, "learning_rate": 1.4292300599354542e-06, "log_odds_chosen": 11.408288955688477, "log_odds_ratio": -1.736992817313876e-05, "logits/chosen": -0.055584512650966644, "logits/rejected": -0.17049917578697205, "logps/chosen": -0.000266480928985402, "logps/rejected": -2.9400196075439453, "loss": 0.3232, "nll_loss": 0.08080445230007172, "rewards/accuracies": 1.0, "rewards/chosen": -2.6648091079550795e-05, "rewards/margins": 0.293975293636322, "rewards/rejected": -0.2940019369125366, "step": 14088 }, { "epoch": 9.743430152143844, "grad_norm": 2.75327467918396, "learning_rate": 1.4253880436453051e-06, "log_odds_chosen": 9.775684356689453, "log_odds_ratio": -0.00013458194734994322, "logits/chosen": -0.45873522758483887, "logits/rejected": -0.43231844902038574, "logps/chosen": -0.0002898464153986424, "logps/rejected": -1.6174246072769165, "loss": 0.3435, "nll_loss": 0.08585477620363235, "rewards/accuracies": 1.0, "rewards/chosen": -2.898464117606636e-05, "rewards/margins": 0.16171349585056305, "rewards/rejected": -0.16174247860908508, "step": 14089 }, { "epoch": 9.744121715076073, "grad_norm": 4.789213180541992, "learning_rate": 1.4215460273551559e-06, "log_odds_chosen": 11.066625595092773, "log_odds_ratio": -0.00024053329252637923, "logits/chosen": 0.1454014629125595, "logits/rejected": 0.29094868898391724, "logps/chosen": -0.00014530331827700138, "logps/rejected": -2.399749279022217, "loss": 0.7863, "nll_loss": 0.1965598315000534, "rewards/accuracies": 1.0, "rewards/chosen": -1.453033291909378e-05, "rewards/margins": 0.23996040225028992, "rewards/rejected": -0.23997493088245392, "step": 14090 }, { "epoch": 9.744813278008298, "grad_norm": 3.5538330078125, "learning_rate": 1.417704011065007e-06, "log_odds_chosen": 10.93421745300293, "log_odds_ratio": -0.0001779910089680925, "logits/chosen": -0.421045184135437, "logits/rejected": -0.3911956548690796, "logps/chosen": -0.00013790714729111642, "logps/rejected": -2.344021797180176, "loss": 0.4124, "nll_loss": 0.10307969152927399, "rewards/accuracies": 1.0, "rewards/chosen": -1.3790715456707403e-05, "rewards/margins": 0.23438839614391327, "rewards/rejected": -0.23440217971801758, "step": 14091 }, { "epoch": 9.745504840940526, "grad_norm": 3.080880641937256, "learning_rate": 1.413861994774858e-06, "log_odds_chosen": 13.354349136352539, "log_odds_ratio": -5.88564853387652e-06, "logits/chosen": -0.5883753895759583, "logits/rejected": -0.5448213219642639, "logps/chosen": -7.660967821720988e-05, "logps/rejected": -3.779228687286377, "loss": 0.2712, "nll_loss": 0.06779833137989044, "rewards/accuracies": 1.0, "rewards/chosen": -7.660968549316749e-06, "rewards/margins": 0.37791526317596436, "rewards/rejected": -0.3779228925704956, "step": 14092 }, { "epoch": 9.746196403872752, "grad_norm": 4.574988842010498, "learning_rate": 1.4100199784847088e-06, "log_odds_chosen": 11.685251235961914, "log_odds_ratio": -0.0002756851026788354, "logits/chosen": 0.12553559243679047, "logits/rejected": 0.0865488052368164, "logps/chosen": -0.0003497231809888035, "logps/rejected": -2.762530565261841, "loss": 0.4701, "nll_loss": 0.11749804019927979, "rewards/accuracies": 1.0, "rewards/chosen": -3.4972315916093066e-05, "rewards/margins": 0.27621808648109436, "rewards/rejected": -0.2762530446052551, "step": 14093 }, { "epoch": 9.74688796680498, "grad_norm": 2.5818259716033936, "learning_rate": 1.4061779621945597e-06, "log_odds_chosen": 12.163631439208984, "log_odds_ratio": -4.104728577658534e-05, "logits/chosen": -0.267192542552948, "logits/rejected": -0.366630882024765, "logps/chosen": -0.00015620008343830705, "logps/rejected": -2.9856929779052734, "loss": 0.2523, "nll_loss": 0.06308251619338989, "rewards/accuracies": 1.0, "rewards/chosen": -1.5620007616234943e-05, "rewards/margins": 0.2985536754131317, "rewards/rejected": -0.29856929183006287, "step": 14094 }, { "epoch": 9.747579529737205, "grad_norm": 2.803849697113037, "learning_rate": 1.4023359459044107e-06, "log_odds_chosen": 10.526567459106445, "log_odds_ratio": -5.086886085337028e-05, "logits/chosen": -0.4517253637313843, "logits/rejected": -0.5013437271118164, "logps/chosen": -0.00023212407540995628, "logps/rejected": -1.738023042678833, "loss": 0.2217, "nll_loss": 0.05542437359690666, "rewards/accuracies": 1.0, "rewards/chosen": -2.3212407540995628e-05, "rewards/margins": 0.17377911508083344, "rewards/rejected": -0.17380231618881226, "step": 14095 }, { "epoch": 9.748271092669434, "grad_norm": 3.0401809215545654, "learning_rate": 1.3984939296142617e-06, "log_odds_chosen": 10.050030708312988, "log_odds_ratio": -0.0003560371696949005, "logits/chosen": 0.07473733276128769, "logits/rejected": 0.1357569545507431, "logps/chosen": -0.0013845351058989763, "logps/rejected": -1.765272617340088, "loss": 0.3357, "nll_loss": 0.0838976800441742, "rewards/accuracies": 1.0, "rewards/chosen": -0.0001384535280521959, "rewards/margins": 0.17638880014419556, "rewards/rejected": -0.1765272468328476, "step": 14096 }, { "epoch": 9.748962655601659, "grad_norm": 2.90578556060791, "learning_rate": 1.3946519133241126e-06, "log_odds_chosen": 11.149063110351562, "log_odds_ratio": -3.0750688893022016e-05, "logits/chosen": 0.1374821811914444, "logits/rejected": 0.033144012093544006, "logps/chosen": -0.00010943791130557656, "logps/rejected": -2.0966885089874268, "loss": 0.3125, "nll_loss": 0.07813267409801483, "rewards/accuracies": 1.0, "rewards/chosen": -1.0943791494355537e-05, "rewards/margins": 0.20965790748596191, "rewards/rejected": -0.2096688449382782, "step": 14097 }, { "epoch": 9.749654218533887, "grad_norm": 2.961550712585449, "learning_rate": 1.3908098970339634e-06, "log_odds_chosen": 10.718459129333496, "log_odds_ratio": -0.00010540042421780527, "logits/chosen": -0.46274954080581665, "logits/rejected": -0.42374998331069946, "logps/chosen": -0.0003471905365586281, "logps/rejected": -2.034414291381836, "loss": 0.3213, "nll_loss": 0.08032146096229553, "rewards/accuracies": 1.0, "rewards/chosen": -3.471905438345857e-05, "rewards/margins": 0.2034067064523697, "rewards/rejected": -0.20344141125679016, "step": 14098 }, { "epoch": 9.750345781466113, "grad_norm": 3.322176218032837, "learning_rate": 1.3869678807438145e-06, "log_odds_chosen": 11.455936431884766, "log_odds_ratio": -4.330392403062433e-05, "logits/chosen": -0.07885990291833878, "logits/rejected": -0.1312030702829361, "logps/chosen": -9.869838686427101e-05, "logps/rejected": -2.3764195442199707, "loss": 0.3969, "nll_loss": 0.09921538084745407, "rewards/accuracies": 1.0, "rewards/chosen": -9.86983832262922e-06, "rewards/margins": 0.23763209581375122, "rewards/rejected": -0.23764196038246155, "step": 14099 }, { "epoch": 9.751037344398341, "grad_norm": 2.997025966644287, "learning_rate": 1.3831258644536653e-06, "log_odds_chosen": 11.968637466430664, "log_odds_ratio": -1.6224121281993575e-05, "logits/chosen": -0.6310082077980042, "logits/rejected": -0.5447397828102112, "logps/chosen": -4.5612454414367676e-05, "logps/rejected": -1.7740814685821533, "loss": 0.3439, "nll_loss": 0.08597853779792786, "rewards/accuracies": 1.0, "rewards/chosen": -4.561245532386238e-06, "rewards/margins": 0.17740359902381897, "rewards/rejected": -0.1774081587791443, "step": 14100 }, { "epoch": 9.751728907330566, "grad_norm": 3.8272266387939453, "learning_rate": 1.3792838481635163e-06, "log_odds_chosen": 11.665167808532715, "log_odds_ratio": -0.00018376082880422473, "logits/chosen": -0.4063437581062317, "logits/rejected": -0.3796899914741516, "logps/chosen": -0.0002080218109767884, "logps/rejected": -2.5441761016845703, "loss": 0.3762, "nll_loss": 0.09402955323457718, "rewards/accuracies": 1.0, "rewards/chosen": -2.080218109767884e-05, "rewards/margins": 0.25439679622650146, "rewards/rejected": -0.2544175982475281, "step": 14101 }, { "epoch": 9.752420470262795, "grad_norm": 3.6981446743011475, "learning_rate": 1.3754418318733672e-06, "log_odds_chosen": 10.13759994506836, "log_odds_ratio": -0.00014721702609676868, "logits/chosen": 0.0600414052605629, "logits/rejected": -0.016490664333105087, "logps/chosen": -0.0003654182655736804, "logps/rejected": -1.8190926313400269, "loss": 0.3529, "nll_loss": 0.08821893483400345, "rewards/accuracies": 1.0, "rewards/chosen": -3.65418272849638e-05, "rewards/margins": 0.18187272548675537, "rewards/rejected": -0.18190926313400269, "step": 14102 }, { "epoch": 9.75311203319502, "grad_norm": 3.9282379150390625, "learning_rate": 1.3715998155832182e-06, "log_odds_chosen": 11.93588924407959, "log_odds_ratio": -1.2728256479022093e-05, "logits/chosen": -0.49063944816589355, "logits/rejected": -0.5796186923980713, "logps/chosen": -0.0003584186197258532, "logps/rejected": -2.8449249267578125, "loss": 0.5295, "nll_loss": 0.1323769986629486, "rewards/accuracies": 1.0, "rewards/chosen": -3.584186197258532e-05, "rewards/margins": 0.28445667028427124, "rewards/rejected": -0.28449252247810364, "step": 14103 }, { "epoch": 9.753803596127248, "grad_norm": 3.6572389602661133, "learning_rate": 1.3677577992930691e-06, "log_odds_chosen": 11.625504493713379, "log_odds_ratio": -2.5438281227252446e-05, "logits/chosen": -0.12430409342050552, "logits/rejected": -0.2552037835121155, "logps/chosen": -9.886729822028428e-05, "logps/rejected": -2.3856372833251953, "loss": 0.4087, "nll_loss": 0.10217204689979553, "rewards/accuracies": 1.0, "rewards/chosen": -9.886729458230548e-06, "rewards/margins": 0.23855382204055786, "rewards/rejected": -0.23856371641159058, "step": 14104 }, { "epoch": 9.754495159059474, "grad_norm": 3.4196536540985107, "learning_rate": 1.3639157830029199e-06, "log_odds_chosen": 11.092937469482422, "log_odds_ratio": -0.00010019134788308293, "logits/chosen": -0.506554901599884, "logits/rejected": -0.5191491842269897, "logps/chosen": -0.000254131096880883, "logps/rejected": -2.1024882793426514, "loss": 0.41, "nll_loss": 0.10248668491840363, "rewards/accuracies": 1.0, "rewards/chosen": -2.5413108232896775e-05, "rewards/margins": 0.21022343635559082, "rewards/rejected": -0.21024884283542633, "step": 14105 }, { "epoch": 9.755186721991702, "grad_norm": 3.407672166824341, "learning_rate": 1.360073766712771e-06, "log_odds_chosen": 10.763035774230957, "log_odds_ratio": -6.49708672426641e-05, "logits/chosen": -0.11657628417015076, "logits/rejected": -0.12170778959989548, "logps/chosen": -0.0002784933312796056, "logps/rejected": -2.243659019470215, "loss": 0.3516, "nll_loss": 0.08789139986038208, "rewards/accuracies": 1.0, "rewards/chosen": -2.7849335310747847e-05, "rewards/margins": 0.22433805465698242, "rewards/rejected": -0.22436591982841492, "step": 14106 }, { "epoch": 9.755878284923927, "grad_norm": 2.614415168762207, "learning_rate": 1.3562317504226218e-06, "log_odds_chosen": 10.69253158569336, "log_odds_ratio": -3.6323992389952764e-05, "logits/chosen": -0.21887744963169098, "logits/rejected": -0.16397805511951447, "logps/chosen": -8.981427527032793e-05, "logps/rejected": -1.5139155387878418, "loss": 0.3785, "nll_loss": 0.09461327642202377, "rewards/accuracies": 1.0, "rewards/chosen": -8.981427527032793e-06, "rewards/margins": 0.15138258039951324, "rewards/rejected": -0.15139156579971313, "step": 14107 }, { "epoch": 9.756569847856156, "grad_norm": 3.89406681060791, "learning_rate": 1.3523897341324728e-06, "log_odds_chosen": 10.90273380279541, "log_odds_ratio": -0.0001827479136409238, "logits/chosen": -0.27213039994239807, "logits/rejected": -0.33823931217193604, "logps/chosen": -0.0006309926393441856, "logps/rejected": -2.185246467590332, "loss": 0.4562, "nll_loss": 0.11403346806764603, "rewards/accuracies": 1.0, "rewards/chosen": -6.309926538961008e-05, "rewards/margins": 0.21846157312393188, "rewards/rejected": -0.21852466464042664, "step": 14108 }, { "epoch": 9.75726141078838, "grad_norm": 3.9044744968414307, "learning_rate": 1.3485477178423237e-06, "log_odds_chosen": 10.96763801574707, "log_odds_ratio": -2.465880061208736e-05, "logits/chosen": -0.09640151262283325, "logits/rejected": -0.16491639614105225, "logps/chosen": -0.0002413954061921686, "logps/rejected": -2.499875545501709, "loss": 0.4665, "nll_loss": 0.11662641167640686, "rewards/accuracies": 1.0, "rewards/chosen": -2.413954098301474e-05, "rewards/margins": 0.2499634176492691, "rewards/rejected": -0.24998754262924194, "step": 14109 }, { "epoch": 9.75795297372061, "grad_norm": 2.787626028060913, "learning_rate": 1.3447057015521747e-06, "log_odds_chosen": 10.05321216583252, "log_odds_ratio": -6.740433309460059e-05, "logits/chosen": -0.26694992184638977, "logits/rejected": -0.28863584995269775, "logps/chosen": -0.00020133465295657516, "logps/rejected": -1.5156182050704956, "loss": 0.3212, "nll_loss": 0.08028507977724075, "rewards/accuracies": 1.0, "rewards/chosen": -2.0133464204263873e-05, "rewards/margins": 0.15154169499874115, "rewards/rejected": -0.15156181156635284, "step": 14110 }, { "epoch": 9.758644536652834, "grad_norm": 2.6926894187927246, "learning_rate": 1.3408636852620256e-06, "log_odds_chosen": 11.286189079284668, "log_odds_ratio": -2.019215389736928e-05, "logits/chosen": -0.7049732208251953, "logits/rejected": -0.7106414437294006, "logps/chosen": -8.282619091914967e-05, "logps/rejected": -1.9253034591674805, "loss": 0.3249, "nll_loss": 0.08121532201766968, "rewards/accuracies": 1.0, "rewards/chosen": -8.282619091914967e-06, "rewards/margins": 0.1925220787525177, "rewards/rejected": -0.19253036379814148, "step": 14111 }, { "epoch": 9.759336099585063, "grad_norm": 3.2615058422088623, "learning_rate": 1.3370216689718764e-06, "log_odds_chosen": 11.126974105834961, "log_odds_ratio": -2.0196584955556318e-05, "logits/chosen": -0.35881438851356506, "logits/rejected": -0.39509907364845276, "logps/chosen": -0.00017172233492601663, "logps/rejected": -2.2962188720703125, "loss": 0.3438, "nll_loss": 0.08595031499862671, "rewards/accuracies": 1.0, "rewards/chosen": -1.7172233128803782e-05, "rewards/margins": 0.22960472106933594, "rewards/rejected": -0.22962188720703125, "step": 14112 }, { "epoch": 9.760027662517288, "grad_norm": 2.589125871658325, "learning_rate": 1.3331796526817274e-06, "log_odds_chosen": 9.937051773071289, "log_odds_ratio": -0.0012398697435855865, "logits/chosen": -0.1563034951686859, "logits/rejected": -0.029356352984905243, "logps/chosen": -0.009704858995974064, "logps/rejected": -1.7757169008255005, "loss": 0.261, "nll_loss": 0.06513293832540512, "rewards/accuracies": 1.0, "rewards/chosen": -0.0009704858530312777, "rewards/margins": 0.17660120129585266, "rewards/rejected": -0.17757169902324677, "step": 14113 }, { "epoch": 9.760719225449517, "grad_norm": 3.666938066482544, "learning_rate": 1.3293376363915785e-06, "log_odds_chosen": 11.526802062988281, "log_odds_ratio": -4.0625600377097726e-05, "logits/chosen": 0.01854725182056427, "logits/rejected": -0.040144093334674835, "logps/chosen": -0.00013797509018331766, "logps/rejected": -2.309964895248413, "loss": 0.3983, "nll_loss": 0.09956299513578415, "rewards/accuracies": 1.0, "rewards/chosen": -1.3797509382129647e-05, "rewards/margins": 0.2309826910495758, "rewards/rejected": -0.2309964895248413, "step": 14114 }, { "epoch": 9.761410788381742, "grad_norm": 2.951451063156128, "learning_rate": 1.3254956201014293e-06, "log_odds_chosen": 11.107068061828613, "log_odds_ratio": -6.79503646097146e-05, "logits/chosen": -0.41734617948532104, "logits/rejected": -0.5176043510437012, "logps/chosen": -0.0003165987436659634, "logps/rejected": -2.4256505966186523, "loss": 0.3921, "nll_loss": 0.0980195701122284, "rewards/accuracies": 1.0, "rewards/chosen": -3.165987436659634e-05, "rewards/margins": 0.24253341555595398, "rewards/rejected": -0.2425650656223297, "step": 14115 }, { "epoch": 9.76210235131397, "grad_norm": 2.88873553276062, "learning_rate": 1.3216536038112802e-06, "log_odds_chosen": 10.593223571777344, "log_odds_ratio": -6.995137664489448e-05, "logits/chosen": -0.2237984538078308, "logits/rejected": -0.32712095975875854, "logps/chosen": -0.002185999881476164, "logps/rejected": -2.6842517852783203, "loss": 0.2768, "nll_loss": 0.06920219212770462, "rewards/accuracies": 1.0, "rewards/chosen": -0.00021859999105799943, "rewards/margins": 0.26820656657218933, "rewards/rejected": -0.2684251666069031, "step": 14116 }, { "epoch": 9.762793914246195, "grad_norm": 4.580874919891357, "learning_rate": 1.317811587521131e-06, "log_odds_chosen": 11.170866012573242, "log_odds_ratio": -2.7959044018643908e-05, "logits/chosen": -0.21377623081207275, "logits/rejected": -0.22888220846652985, "logps/chosen": -0.00028878834564238787, "logps/rejected": -2.9187369346618652, "loss": 0.3573, "nll_loss": 0.08931108564138412, "rewards/accuracies": 1.0, "rewards/chosen": -2.8878834200440906e-05, "rewards/margins": 0.29184481501579285, "rewards/rejected": -0.2918736934661865, "step": 14117 }, { "epoch": 9.763485477178424, "grad_norm": 2.4250340461730957, "learning_rate": 1.3139695712309822e-06, "log_odds_chosen": 10.966585159301758, "log_odds_ratio": -3.959906462114304e-05, "logits/chosen": -0.29696375131607056, "logits/rejected": -0.23120468854904175, "logps/chosen": -0.0001043882584781386, "logps/rejected": -1.4906551837921143, "loss": 0.3578, "nll_loss": 0.08944513648748398, "rewards/accuracies": 1.0, "rewards/chosen": -1.0438825484015979e-05, "rewards/margins": 0.14905507862567902, "rewards/rejected": -0.1490655243396759, "step": 14118 }, { "epoch": 9.76417704011065, "grad_norm": 4.123467445373535, "learning_rate": 1.310127554940833e-06, "log_odds_chosen": 10.684322357177734, "log_odds_ratio": -3.5656423278851435e-05, "logits/chosen": -0.05151619762182236, "logits/rejected": -0.14460456371307373, "logps/chosen": -0.000281482411082834, "logps/rejected": -2.3089118003845215, "loss": 0.3749, "nll_loss": 0.09372323751449585, "rewards/accuracies": 1.0, "rewards/chosen": -2.8148242563474923e-05, "rewards/margins": 0.2308630496263504, "rewards/rejected": -0.23089119791984558, "step": 14119 }, { "epoch": 9.764868603042878, "grad_norm": 4.185392379760742, "learning_rate": 1.3062855386506839e-06, "log_odds_chosen": 11.940038681030273, "log_odds_ratio": -7.481678494514199e-06, "logits/chosen": -0.09267014265060425, "logits/rejected": -0.2242162525653839, "logps/chosen": -0.00013214690261520445, "logps/rejected": -2.61649227142334, "loss": 0.4487, "nll_loss": 0.11216644197702408, "rewards/accuracies": 1.0, "rewards/chosen": -1.3214690625318326e-05, "rewards/margins": 0.26163601875305176, "rewards/rejected": -0.2616492211818695, "step": 14120 }, { "epoch": 9.765560165975103, "grad_norm": 3.458895683288574, "learning_rate": 1.3024435223605348e-06, "log_odds_chosen": 10.002806663513184, "log_odds_ratio": -0.00016798570868559182, "logits/chosen": 0.08603809773921967, "logits/rejected": 0.07021744549274445, "logps/chosen": -0.00020438554929569364, "logps/rejected": -1.3508172035217285, "loss": 0.5933, "nll_loss": 0.14831441640853882, "rewards/accuracies": 1.0, "rewards/chosen": -2.0438554201973602e-05, "rewards/margins": 0.13506127893924713, "rewards/rejected": -0.1350817084312439, "step": 14121 }, { "epoch": 9.766251728907331, "grad_norm": 2.6571707725524902, "learning_rate": 1.2986015060703858e-06, "log_odds_chosen": 11.517476081848145, "log_odds_ratio": -3.799406476900913e-05, "logits/chosen": -0.2700203061103821, "logits/rejected": -0.21803030371665955, "logps/chosen": -0.0003085459757130593, "logps/rejected": -3.0179555416107178, "loss": 0.2957, "nll_loss": 0.0739310160279274, "rewards/accuracies": 1.0, "rewards/chosen": -3.0854600481688976e-05, "rewards/margins": 0.30176469683647156, "rewards/rejected": -0.3017955422401428, "step": 14122 }, { "epoch": 9.766943291839558, "grad_norm": 4.420147895812988, "learning_rate": 1.2947594897802368e-06, "log_odds_chosen": 11.551900863647461, "log_odds_ratio": -0.00017292052507400513, "logits/chosen": -0.1685771644115448, "logits/rejected": -0.20248129963874817, "logps/chosen": -0.00016953478916548193, "logps/rejected": -2.5348751544952393, "loss": 0.483, "nll_loss": 0.12072944641113281, "rewards/accuracies": 1.0, "rewards/chosen": -1.695347782515455e-05, "rewards/margins": 0.2534705698490143, "rewards/rejected": -0.2534875273704529, "step": 14123 }, { "epoch": 9.767634854771785, "grad_norm": 3.5160269737243652, "learning_rate": 1.2909174734900875e-06, "log_odds_chosen": 11.154373168945312, "log_odds_ratio": -4.2867439333349466e-05, "logits/chosen": -0.006218772381544113, "logits/rejected": -0.006272992119193077, "logps/chosen": -0.0006437020492739975, "logps/rejected": -3.0088915824890137, "loss": 0.4935, "nll_loss": 0.12338031828403473, "rewards/accuracies": 1.0, "rewards/chosen": -6.43702078377828e-05, "rewards/margins": 0.3008247911930084, "rewards/rejected": -0.30088916420936584, "step": 14124 }, { "epoch": 9.768326417704012, "grad_norm": 2.4684836864471436, "learning_rate": 1.2870754571999387e-06, "log_odds_chosen": 10.261589050292969, "log_odds_ratio": -0.00013145655975677073, "logits/chosen": -0.29296594858169556, "logits/rejected": -0.2342432290315628, "logps/chosen": -0.00031795038376003504, "logps/rejected": -1.6441881656646729, "loss": 0.3265, "nll_loss": 0.08161911368370056, "rewards/accuracies": 1.0, "rewards/chosen": -3.179503983119503e-05, "rewards/margins": 0.16438701748847961, "rewards/rejected": -0.1644188016653061, "step": 14125 }, { "epoch": 9.769017980636239, "grad_norm": 2.7318315505981445, "learning_rate": 1.2832334409097896e-06, "log_odds_chosen": 11.669432640075684, "log_odds_ratio": -2.1845677110832185e-05, "logits/chosen": -0.42241886258125305, "logits/rejected": -0.42704257369041443, "logps/chosen": -0.0001758452272042632, "logps/rejected": -2.379481315612793, "loss": 0.2661, "nll_loss": 0.06652377545833588, "rewards/accuracies": 1.0, "rewards/chosen": -1.7584523448022082e-05, "rewards/margins": 0.2379305362701416, "rewards/rejected": -0.23794810473918915, "step": 14126 }, { "epoch": 9.769709543568466, "grad_norm": 4.480816841125488, "learning_rate": 1.2793914246196404e-06, "log_odds_chosen": 12.366174697875977, "log_odds_ratio": -6.5960643951257225e-06, "logits/chosen": -0.36253371834754944, "logits/rejected": -0.18785274028778076, "logps/chosen": -5.130483987159096e-05, "logps/rejected": -2.4808356761932373, "loss": 0.3719, "nll_loss": 0.09298262000083923, "rewards/accuracies": 1.0, "rewards/chosen": -5.130484169058036e-06, "rewards/margins": 0.24807843565940857, "rewards/rejected": -0.24808356165885925, "step": 14127 }, { "epoch": 9.770401106500692, "grad_norm": 4.40105676651001, "learning_rate": 1.2755494083294913e-06, "log_odds_chosen": 11.451199531555176, "log_odds_ratio": -2.284867878188379e-05, "logits/chosen": 0.037913352251052856, "logits/rejected": -0.0362299308180809, "logps/chosen": -0.0001210991686093621, "logps/rejected": -2.365833282470703, "loss": 0.4228, "nll_loss": 0.10570620000362396, "rewards/accuracies": 1.0, "rewards/chosen": -1.2109916497138329e-05, "rewards/margins": 0.23657123744487762, "rewards/rejected": -0.23658335208892822, "step": 14128 }, { "epoch": 9.77109266943292, "grad_norm": 3.923491954803467, "learning_rate": 1.2717073920393423e-06, "log_odds_chosen": 11.70336627960205, "log_odds_ratio": -3.302631012047641e-05, "logits/chosen": -0.0079636350274086, "logits/rejected": -0.04412021487951279, "logps/chosen": -0.00017817021580412984, "logps/rejected": -2.770371437072754, "loss": 0.3479, "nll_loss": 0.08698327839374542, "rewards/accuracies": 1.0, "rewards/chosen": -1.7817021216615103e-05, "rewards/margins": 0.27701929211616516, "rewards/rejected": -0.2770371437072754, "step": 14129 }, { "epoch": 9.771784232365146, "grad_norm": 2.299215078353882, "learning_rate": 1.2678653757491933e-06, "log_odds_chosen": 12.437353134155273, "log_odds_ratio": -7.882959835114889e-06, "logits/chosen": -0.9748541116714478, "logits/rejected": -1.050576090812683, "logps/chosen": -8.047391020227224e-05, "logps/rejected": -2.585092067718506, "loss": 0.3165, "nll_loss": 0.07913664728403091, "rewards/accuracies": 1.0, "rewards/chosen": -8.047391020227224e-06, "rewards/margins": 0.2585011422634125, "rewards/rejected": -0.25850921869277954, "step": 14130 }, { "epoch": 9.772475795297373, "grad_norm": 3.3570504188537598, "learning_rate": 1.2640233594590442e-06, "log_odds_chosen": 10.04572582244873, "log_odds_ratio": -0.00029344053473323584, "logits/chosen": -0.30483633279800415, "logits/rejected": -0.3217681646347046, "logps/chosen": -0.0002906577428802848, "logps/rejected": -1.6494433879852295, "loss": 0.3324, "nll_loss": 0.08306828141212463, "rewards/accuracies": 1.0, "rewards/chosen": -2.9065777198411524e-05, "rewards/margins": 0.1649152934551239, "rewards/rejected": -0.1649443656206131, "step": 14131 }, { "epoch": 9.7731673582296, "grad_norm": 3.0047953128814697, "learning_rate": 1.260181343168895e-06, "log_odds_chosen": 10.727274894714355, "log_odds_ratio": -6.0682545154122636e-05, "logits/chosen": -0.1944286823272705, "logits/rejected": -0.11104271560907364, "logps/chosen": -9.876063995761797e-05, "logps/rejected": -1.8163423538208008, "loss": 0.4689, "nll_loss": 0.11722764372825623, "rewards/accuracies": 1.0, "rewards/chosen": -9.876064723357558e-06, "rewards/margins": 0.18162435293197632, "rewards/rejected": -0.18163424730300903, "step": 14132 }, { "epoch": 9.773858921161827, "grad_norm": 3.318375825881958, "learning_rate": 1.2563393268787462e-06, "log_odds_chosen": 10.844013214111328, "log_odds_ratio": -4.2838320950977504e-05, "logits/chosen": -0.18170973658561707, "logits/rejected": -0.3086509704589844, "logps/chosen": -0.00013860626495443285, "logps/rejected": -1.9294129610061646, "loss": 0.438, "nll_loss": 0.10948415100574493, "rewards/accuracies": 1.0, "rewards/chosen": -1.3860626495443285e-05, "rewards/margins": 0.19292744994163513, "rewards/rejected": -0.1929413080215454, "step": 14133 }, { "epoch": 9.774550484094053, "grad_norm": 4.006042957305908, "learning_rate": 1.252497310588597e-06, "log_odds_chosen": 11.966079711914062, "log_odds_ratio": -3.672724051284604e-05, "logits/chosen": 0.12438063323497772, "logits/rejected": 0.23474116623401642, "logps/chosen": -0.0010203744750469923, "logps/rejected": -3.957035779953003, "loss": 0.4223, "nll_loss": 0.1055690348148346, "rewards/accuracies": 1.0, "rewards/chosen": -0.00010203745478065684, "rewards/margins": 0.3956015408039093, "rewards/rejected": -0.39570358395576477, "step": 14134 }, { "epoch": 9.77524204702628, "grad_norm": 3.421947717666626, "learning_rate": 1.2486552942984479e-06, "log_odds_chosen": 10.71963119506836, "log_odds_ratio": -9.687233250588179e-05, "logits/chosen": -0.14968188107013702, "logits/rejected": -0.2112322747707367, "logps/chosen": -0.00012315776257310063, "logps/rejected": -1.5105361938476562, "loss": 0.3276, "nll_loss": 0.08189380168914795, "rewards/accuracies": 1.0, "rewards/chosen": -1.231577516591642e-05, "rewards/margins": 0.15104131400585175, "rewards/rejected": -0.15105360746383667, "step": 14135 }, { "epoch": 9.775933609958507, "grad_norm": 3.2914109230041504, "learning_rate": 1.2448132780082988e-06, "log_odds_chosen": 11.381830215454102, "log_odds_ratio": -2.1068624846520834e-05, "logits/chosen": -0.35839736461639404, "logits/rejected": -0.292026162147522, "logps/chosen": -0.00011267801892245188, "logps/rejected": -2.0828683376312256, "loss": 0.3396, "nll_loss": 0.08490855246782303, "rewards/accuracies": 1.0, "rewards/chosen": -1.126780261984095e-05, "rewards/margins": 0.20827557146549225, "rewards/rejected": -0.2082868218421936, "step": 14136 }, { "epoch": 9.776625172890734, "grad_norm": 2.6957051753997803, "learning_rate": 1.2409712617181498e-06, "log_odds_chosen": 11.469804763793945, "log_odds_ratio": -1.659486224525608e-05, "logits/chosen": -0.4436866044998169, "logits/rejected": -0.41482651233673096, "logps/chosen": -7.341312448261306e-05, "logps/rejected": -1.9058144092559814, "loss": 0.3009, "nll_loss": 0.0752357617020607, "rewards/accuracies": 1.0, "rewards/chosen": -7.341312084463425e-06, "rewards/margins": 0.19057410955429077, "rewards/rejected": -0.19058147072792053, "step": 14137 }, { "epoch": 9.77731673582296, "grad_norm": 3.438163995742798, "learning_rate": 1.2371292454280007e-06, "log_odds_chosen": 10.702985763549805, "log_odds_ratio": -0.00013088583364151418, "logits/chosen": -0.2541540265083313, "logits/rejected": -0.26329177618026733, "logps/chosen": -0.0002986131585203111, "logps/rejected": -2.1693620681762695, "loss": 0.4543, "nll_loss": 0.1135587990283966, "rewards/accuracies": 1.0, "rewards/chosen": -2.9861312214052305e-05, "rewards/margins": 0.2169063538312912, "rewards/rejected": -0.21693623065948486, "step": 14138 }, { "epoch": 9.778008298755188, "grad_norm": 3.345736265182495, "learning_rate": 1.2332872291378515e-06, "log_odds_chosen": 12.125631332397461, "log_odds_ratio": -3.3365773560944945e-05, "logits/chosen": -0.47693562507629395, "logits/rejected": -0.5634940266609192, "logps/chosen": -0.0001726750488160178, "logps/rejected": -3.035978317260742, "loss": 0.3919, "nll_loss": 0.09797894209623337, "rewards/accuracies": 1.0, "rewards/chosen": -1.7267502698814496e-05, "rewards/margins": 0.3035805821418762, "rewards/rejected": -0.3035978376865387, "step": 14139 }, { "epoch": 9.778699861687414, "grad_norm": 4.212039947509766, "learning_rate": 1.2294452128477025e-06, "log_odds_chosen": 10.795297622680664, "log_odds_ratio": -0.00023799219343345612, "logits/chosen": -0.22696000337600708, "logits/rejected": -0.37522876262664795, "logps/chosen": -0.00017699907766655087, "logps/rejected": -1.976933479309082, "loss": 0.5068, "nll_loss": 0.12667426466941833, "rewards/accuracies": 1.0, "rewards/chosen": -1.7699907402857207e-05, "rewards/margins": 0.1976756453514099, "rewards/rejected": -0.1976933479309082, "step": 14140 }, { "epoch": 9.779391424619641, "grad_norm": 3.0429298877716064, "learning_rate": 1.2256031965575534e-06, "log_odds_chosen": 9.816993713378906, "log_odds_ratio": -0.00025687177549116313, "logits/chosen": -0.4343181848526001, "logits/rejected": -0.4792379140853882, "logps/chosen": -0.004632133059203625, "logps/rejected": -2.4986414909362793, "loss": 0.2904, "nll_loss": 0.07257814705371857, "rewards/accuracies": 1.0, "rewards/chosen": -0.000463213276816532, "rewards/margins": 0.24940095841884613, "rewards/rejected": -0.24986416101455688, "step": 14141 }, { "epoch": 9.780082987551868, "grad_norm": 3.5660789012908936, "learning_rate": 1.2217611802674044e-06, "log_odds_chosen": 11.582379341125488, "log_odds_ratio": -1.1961707059526816e-05, "logits/chosen": -0.11018684506416321, "logits/rejected": -0.1884918063879013, "logps/chosen": -8.266264921985567e-05, "logps/rejected": -2.0520644187927246, "loss": 0.4035, "nll_loss": 0.10088561475276947, "rewards/accuracies": 1.0, "rewards/chosen": -8.266264558187686e-06, "rewards/margins": 0.20519816875457764, "rewards/rejected": -0.20520645380020142, "step": 14142 }, { "epoch": 9.780774550484095, "grad_norm": 3.748652219772339, "learning_rate": 1.2179191639772553e-06, "log_odds_chosen": 10.329841613769531, "log_odds_ratio": -0.000146003148984164, "logits/chosen": 0.018064171075820923, "logits/rejected": -0.08140605688095093, "logps/chosen": -0.0004367720102891326, "logps/rejected": -2.023275375366211, "loss": 0.477, "nll_loss": 0.1192474290728569, "rewards/accuracies": 1.0, "rewards/chosen": -4.3677198846125975e-05, "rewards/margins": 0.2022838294506073, "rewards/rejected": -0.20232751965522766, "step": 14143 }, { "epoch": 9.781466113416322, "grad_norm": 2.5951290130615234, "learning_rate": 1.2140771476871063e-06, "log_odds_chosen": 10.93802547454834, "log_odds_ratio": -3.0638646421721205e-05, "logits/chosen": -0.2951814532279968, "logits/rejected": -0.36579304933547974, "logps/chosen": -0.00030578882433474064, "logps/rejected": -2.4859442710876465, "loss": 0.3166, "nll_loss": 0.07915446907281876, "rewards/accuracies": 1.0, "rewards/chosen": -3.05788817058783e-05, "rewards/margins": 0.24856385588645935, "rewards/rejected": -0.24859443306922913, "step": 14144 }, { "epoch": 9.782157676348548, "grad_norm": 3.2371087074279785, "learning_rate": 1.2102351313969573e-06, "log_odds_chosen": 10.779736518859863, "log_odds_ratio": -0.0002104683080688119, "logits/chosen": -0.3973849415779114, "logits/rejected": -0.3972107172012329, "logps/chosen": -0.00013605685671791434, "logps/rejected": -1.7934966087341309, "loss": 0.3629, "nll_loss": 0.09071143716573715, "rewards/accuracies": 1.0, "rewards/chosen": -1.3605684216599911e-05, "rewards/margins": 0.1793360561132431, "rewards/rejected": -0.17934966087341309, "step": 14145 }, { "epoch": 9.782849239280775, "grad_norm": 2.9736454486846924, "learning_rate": 1.206393115106808e-06, "log_odds_chosen": 11.041149139404297, "log_odds_ratio": -3.184582237736322e-05, "logits/chosen": -0.7083456516265869, "logits/rejected": -0.7434857487678528, "logps/chosen": -0.0002840912784449756, "logps/rejected": -2.2934787273406982, "loss": 0.3777, "nll_loss": 0.09442107379436493, "rewards/accuracies": 1.0, "rewards/chosen": -2.8409125661710277e-05, "rewards/margins": 0.2293194681406021, "rewards/rejected": -0.22934786975383759, "step": 14146 }, { "epoch": 9.783540802213002, "grad_norm": 4.897305965423584, "learning_rate": 1.202551098816659e-06, "log_odds_chosen": 11.59496784210205, "log_odds_ratio": -2.3090786271495745e-05, "logits/chosen": -0.11693152785301208, "logits/rejected": -0.05875653773546219, "logps/chosen": -0.00021892010408919305, "logps/rejected": -2.939328908920288, "loss": 0.3021, "nll_loss": 0.07552653551101685, "rewards/accuracies": 1.0, "rewards/chosen": -2.1892010408919305e-05, "rewards/margins": 0.2939109802246094, "rewards/rejected": -0.2939329147338867, "step": 14147 }, { "epoch": 9.784232365145229, "grad_norm": 4.216211318969727, "learning_rate": 1.1987090825265101e-06, "log_odds_chosen": 11.53899097442627, "log_odds_ratio": -1.641822382225655e-05, "logits/chosen": -0.5299835801124573, "logits/rejected": -0.5817488431930542, "logps/chosen": -0.00015015192911960185, "logps/rejected": -2.3932838439941406, "loss": 0.3286, "nll_loss": 0.0821424350142479, "rewards/accuracies": 1.0, "rewards/chosen": -1.5015193639555946e-05, "rewards/margins": 0.23931337893009186, "rewards/rejected": -0.23932838439941406, "step": 14148 }, { "epoch": 9.784923928077456, "grad_norm": 6.387366771697998, "learning_rate": 1.1948670662363609e-06, "log_odds_chosen": 10.867435455322266, "log_odds_ratio": -0.00010485449456609786, "logits/chosen": -0.01297275722026825, "logits/rejected": -0.08890549838542938, "logps/chosen": -0.00029303666087798774, "logps/rejected": -2.1026883125305176, "loss": 0.8159, "nll_loss": 0.20396147668361664, "rewards/accuracies": 1.0, "rewards/chosen": -2.9303666451596655e-05, "rewards/margins": 0.21023951470851898, "rewards/rejected": -0.21026882529258728, "step": 14149 }, { "epoch": 9.785615491009683, "grad_norm": 3.6339125633239746, "learning_rate": 1.1910250499462119e-06, "log_odds_chosen": 12.274009704589844, "log_odds_ratio": -3.182486034347676e-05, "logits/chosen": -0.05865704268217087, "logits/rejected": 0.04080052673816681, "logps/chosen": -0.0001506828557467088, "logps/rejected": -3.2266464233398438, "loss": 0.3411, "nll_loss": 0.08526080846786499, "rewards/accuracies": 1.0, "rewards/chosen": -1.5068284483277239e-05, "rewards/margins": 0.3226495683193207, "rewards/rejected": -0.32266464829444885, "step": 14150 }, { "epoch": 9.78630705394191, "grad_norm": 3.0847744941711426, "learning_rate": 1.1871830336560626e-06, "log_odds_chosen": 10.046998977661133, "log_odds_ratio": -0.0005083397263661027, "logits/chosen": -0.4494885206222534, "logits/rejected": -0.44672513008117676, "logps/chosen": -0.0005137314437888563, "logps/rejected": -1.7720887660980225, "loss": 0.313, "nll_loss": 0.07818809151649475, "rewards/accuracies": 1.0, "rewards/chosen": -5.1373142923694104e-05, "rewards/margins": 0.17715752124786377, "rewards/rejected": -0.17720890045166016, "step": 14151 }, { "epoch": 9.786998616874136, "grad_norm": 2.7055211067199707, "learning_rate": 1.1833410173659138e-06, "log_odds_chosen": 12.727792739868164, "log_odds_ratio": -6.9703046392533e-06, "logits/chosen": -0.5745997428894043, "logits/rejected": -0.5524734258651733, "logps/chosen": -8.036150393309072e-05, "logps/rejected": -2.9690189361572266, "loss": 0.2696, "nll_loss": 0.06739067286252975, "rewards/accuracies": 1.0, "rewards/chosen": -8.036150575208012e-06, "rewards/margins": 0.2968938946723938, "rewards/rejected": -0.2969019114971161, "step": 14152 }, { "epoch": 9.787690179806363, "grad_norm": 4.3523850440979, "learning_rate": 1.1794990010757645e-06, "log_odds_chosen": 11.877840042114258, "log_odds_ratio": -1.3758017303189263e-05, "logits/chosen": 0.049047283828258514, "logits/rejected": -0.08200374990701675, "logps/chosen": -0.0001703656162135303, "logps/rejected": -2.9096250534057617, "loss": 0.483, "nll_loss": 0.12073956429958344, "rewards/accuracies": 1.0, "rewards/chosen": -1.7036563804140314e-05, "rewards/margins": 0.29094547033309937, "rewards/rejected": -0.2909625172615051, "step": 14153 }, { "epoch": 9.78838174273859, "grad_norm": 2.886366605758667, "learning_rate": 1.1756569847856155e-06, "log_odds_chosen": 11.11473274230957, "log_odds_ratio": -5.9133606555406004e-05, "logits/chosen": 0.02898319810628891, "logits/rejected": -0.15263637900352478, "logps/chosen": -0.0001661498099565506, "logps/rejected": -2.322352409362793, "loss": 0.2655, "nll_loss": 0.06635863333940506, "rewards/accuracies": 1.0, "rewards/chosen": -1.6614982087048702e-05, "rewards/margins": 0.23221862316131592, "rewards/rejected": -0.23223522305488586, "step": 14154 }, { "epoch": 9.789073305670817, "grad_norm": 4.71960973739624, "learning_rate": 1.1718149684954664e-06, "log_odds_chosen": 10.76363468170166, "log_odds_ratio": -0.00012997673184145242, "logits/chosen": -0.12062954157590866, "logits/rejected": -0.13581763207912445, "logps/chosen": -0.0002602715394459665, "logps/rejected": -2.5428872108459473, "loss": 0.3783, "nll_loss": 0.09456643462181091, "rewards/accuracies": 1.0, "rewards/chosen": -2.6027153580798768e-05, "rewards/margins": 0.25426268577575684, "rewards/rejected": -0.2542887032032013, "step": 14155 }, { "epoch": 9.789764868603044, "grad_norm": 3.4646763801574707, "learning_rate": 1.1679729522053174e-06, "log_odds_chosen": 10.0863037109375, "log_odds_ratio": -0.00011547702160896733, "logits/chosen": -0.5279322266578674, "logits/rejected": -0.5816812515258789, "logps/chosen": -0.0002697250456549227, "logps/rejected": -1.5220682621002197, "loss": 0.327, "nll_loss": 0.08173598349094391, "rewards/accuracies": 1.0, "rewards/chosen": -2.69725078396732e-05, "rewards/margins": 0.15217985212802887, "rewards/rejected": -0.15220682322978973, "step": 14156 }, { "epoch": 9.79045643153527, "grad_norm": 3.5090792179107666, "learning_rate": 1.1641309359151684e-06, "log_odds_chosen": 10.63144588470459, "log_odds_ratio": -0.0001910420978674665, "logits/chosen": -0.49909472465515137, "logits/rejected": -0.4897599518299103, "logps/chosen": -0.0013054630253463984, "logps/rejected": -1.859399676322937, "loss": 0.3322, "nll_loss": 0.08304300904273987, "rewards/accuracies": 1.0, "rewards/chosen": -0.00013054630835540593, "rewards/margins": 0.1858094334602356, "rewards/rejected": -0.1859399676322937, "step": 14157 }, { "epoch": 9.791147994467497, "grad_norm": 3.027678966522217, "learning_rate": 1.1602889196250191e-06, "log_odds_chosen": 11.527877807617188, "log_odds_ratio": -1.5956939023453742e-05, "logits/chosen": -0.35433340072631836, "logits/rejected": -0.3891632556915283, "logps/chosen": -0.00012688693823292851, "logps/rejected": -2.255835771560669, "loss": 0.3571, "nll_loss": 0.08926960825920105, "rewards/accuracies": 1.0, "rewards/chosen": -1.268869345949497e-05, "rewards/margins": 0.2255708873271942, "rewards/rejected": -0.22558358311653137, "step": 14158 }, { "epoch": 9.791839557399724, "grad_norm": 4.357656478881836, "learning_rate": 1.1564469033348703e-06, "log_odds_chosen": 10.750991821289062, "log_odds_ratio": -0.00014336322783492506, "logits/chosen": -0.34541675448417664, "logits/rejected": -0.3815138638019562, "logps/chosen": -0.00023925396089907736, "logps/rejected": -2.113788604736328, "loss": 0.451, "nll_loss": 0.11273743957281113, "rewards/accuracies": 1.0, "rewards/chosen": -2.3925394998514093e-05, "rewards/margins": 0.21135492622852325, "rewards/rejected": -0.21137885749340057, "step": 14159 }, { "epoch": 9.792531120331951, "grad_norm": 2.9158616065979004, "learning_rate": 1.1526048870447213e-06, "log_odds_chosen": 10.863975524902344, "log_odds_ratio": -9.102724288823083e-05, "logits/chosen": 0.1784595102071762, "logits/rejected": 0.03506646305322647, "logps/chosen": -0.00021359164384193718, "logps/rejected": -2.2265512943267822, "loss": 0.291, "nll_loss": 0.07275011390447617, "rewards/accuracies": 1.0, "rewards/chosen": -2.13591647479916e-05, "rewards/margins": 0.22263376414775848, "rewards/rejected": -0.22265511751174927, "step": 14160 }, { "epoch": 9.793222683264178, "grad_norm": 3.547389268875122, "learning_rate": 1.148762870754572e-06, "log_odds_chosen": 9.15870475769043, "log_odds_ratio": -0.000654935953207314, "logits/chosen": -0.08512475341558456, "logits/rejected": 0.048164308071136475, "logps/chosen": -0.0006864020833745599, "logps/rejected": -1.2858558893203735, "loss": 0.5277, "nll_loss": 0.13187168538570404, "rewards/accuracies": 1.0, "rewards/chosen": -6.864021270303056e-05, "rewards/margins": 0.12851695716381073, "rewards/rejected": -0.1285855919122696, "step": 14161 }, { "epoch": 9.793914246196405, "grad_norm": 4.508517265319824, "learning_rate": 1.144920854464423e-06, "log_odds_chosen": 10.289685249328613, "log_odds_ratio": -0.06541527807712555, "logits/chosen": -0.5938727855682373, "logits/rejected": -0.6513862609863281, "logps/chosen": -0.01044410653412342, "logps/rejected": -2.716381311416626, "loss": 0.3712, "nll_loss": 0.08626425266265869, "rewards/accuracies": 1.0, "rewards/chosen": -0.0010444107465445995, "rewards/margins": 0.2705937325954437, "rewards/rejected": -0.2716381549835205, "step": 14162 }, { "epoch": 9.794605809128631, "grad_norm": 3.4274098873138428, "learning_rate": 1.141078838174274e-06, "log_odds_chosen": 11.125181198120117, "log_odds_ratio": -4.395114228827879e-05, "logits/chosen": -0.3660215437412262, "logits/rejected": -0.4066369831562042, "logps/chosen": -0.00020414634491316974, "logps/rejected": -2.4690892696380615, "loss": 0.3781, "nll_loss": 0.09451229870319366, "rewards/accuracies": 1.0, "rewards/chosen": -2.0414634491316974e-05, "rewards/margins": 0.24688851833343506, "rewards/rejected": -0.24690893292427063, "step": 14163 }, { "epoch": 9.795297372060858, "grad_norm": 2.3276455402374268, "learning_rate": 1.1372368218841249e-06, "log_odds_chosen": 10.200767517089844, "log_odds_ratio": -0.00020588882034644485, "logits/chosen": -0.2072562575340271, "logits/rejected": -0.2718925178050995, "logps/chosen": -0.0003796897944994271, "logps/rejected": -1.9515058994293213, "loss": 0.2726, "nll_loss": 0.06811818480491638, "rewards/accuracies": 1.0, "rewards/chosen": -3.796897362917662e-05, "rewards/margins": 0.19511263072490692, "rewards/rejected": -0.19515059888362885, "step": 14164 }, { "epoch": 9.795988934993085, "grad_norm": 3.7078750133514404, "learning_rate": 1.1333948055939758e-06, "log_odds_chosen": 10.788431167602539, "log_odds_ratio": -7.90195626905188e-05, "logits/chosen": 0.46831220388412476, "logits/rejected": 0.37939536571502686, "logps/chosen": -0.00032136685331352055, "logps/rejected": -1.9700994491577148, "loss": 0.3841, "nll_loss": 0.09601810574531555, "rewards/accuracies": 1.0, "rewards/chosen": -3.213668242096901e-05, "rewards/margins": 0.19697780907154083, "rewards/rejected": -0.19700995087623596, "step": 14165 }, { "epoch": 9.796680497925312, "grad_norm": 5.544235706329346, "learning_rate": 1.1295527893038266e-06, "log_odds_chosen": 11.110427856445312, "log_odds_ratio": -5.360724389902316e-05, "logits/chosen": -0.31555014848709106, "logits/rejected": -0.44030508399009705, "logps/chosen": -0.0003340440453030169, "logps/rejected": -2.5095624923706055, "loss": 0.4651, "nll_loss": 0.11626134067773819, "rewards/accuracies": 1.0, "rewards/chosen": -3.340440889587626e-05, "rewards/margins": 0.2509228587150574, "rewards/rejected": -0.2509562373161316, "step": 14166 }, { "epoch": 9.797372060857539, "grad_norm": 2.5396666526794434, "learning_rate": 1.1257107730136778e-06, "log_odds_chosen": 9.936513900756836, "log_odds_ratio": -0.00018028414342552423, "logits/chosen": -0.5710381269454956, "logits/rejected": -0.5857884287834167, "logps/chosen": -0.00018568903033155948, "logps/rejected": -1.4176135063171387, "loss": 0.2561, "nll_loss": 0.06399574130773544, "rewards/accuracies": 1.0, "rewards/chosen": -1.8568902305560187e-05, "rewards/margins": 0.1417427808046341, "rewards/rejected": -0.14176134765148163, "step": 14167 }, { "epoch": 9.798063623789766, "grad_norm": 4.6397809982299805, "learning_rate": 1.1218687567235285e-06, "log_odds_chosen": 11.111590385437012, "log_odds_ratio": -8.788368722889572e-05, "logits/chosen": -0.14068683981895447, "logits/rejected": -0.2126733958721161, "logps/chosen": -0.00029392243595793843, "logps/rejected": -2.6638545989990234, "loss": 0.5023, "nll_loss": 0.12556704878807068, "rewards/accuracies": 1.0, "rewards/chosen": -2.9392243959591724e-05, "rewards/margins": 0.26635608077049255, "rewards/rejected": -0.2663854658603668, "step": 14168 }, { "epoch": 9.798755186721992, "grad_norm": 2.9909815788269043, "learning_rate": 1.1180267404333795e-06, "log_odds_chosen": 11.690807342529297, "log_odds_ratio": -1.2376316590234637e-05, "logits/chosen": 0.055607229471206665, "logits/rejected": -0.10118089616298676, "logps/chosen": -0.0001790410024113953, "logps/rejected": -2.558742046356201, "loss": 0.331, "nll_loss": 0.08274025470018387, "rewards/accuracies": 1.0, "rewards/chosen": -1.790409987734165e-05, "rewards/margins": 0.25585630536079407, "rewards/rejected": -0.2558741867542267, "step": 14169 }, { "epoch": 9.79944674965422, "grad_norm": 3.224093198776245, "learning_rate": 1.1141847241432304e-06, "log_odds_chosen": 9.783575057983398, "log_odds_ratio": -0.00012982710904907435, "logits/chosen": -0.068440280854702, "logits/rejected": -0.05813627690076828, "logps/chosen": -0.0009000495774671435, "logps/rejected": -1.8843883275985718, "loss": 0.3821, "nll_loss": 0.09550738334655762, "rewards/accuracies": 1.0, "rewards/chosen": -9.000496356748044e-05, "rewards/margins": 0.18834882974624634, "rewards/rejected": -0.18843884766101837, "step": 14170 }, { "epoch": 9.800138312586446, "grad_norm": 3.4567954540252686, "learning_rate": 1.1103427078530814e-06, "log_odds_chosen": 11.08468246459961, "log_odds_ratio": -0.00014502073463518173, "logits/chosen": -0.5457359552383423, "logits/rejected": -0.5854384899139404, "logps/chosen": -0.00023909546143840998, "logps/rejected": -1.9390183687210083, "loss": 0.4471, "nll_loss": 0.11176192760467529, "rewards/accuracies": 1.0, "rewards/chosen": -2.3909547962830402e-05, "rewards/margins": 0.1938779354095459, "rewards/rejected": -0.19390185177326202, "step": 14171 }, { "epoch": 9.800829875518673, "grad_norm": 3.547675609588623, "learning_rate": 1.1065006915629324e-06, "log_odds_chosen": 11.780421257019043, "log_odds_ratio": -1.080137190001551e-05, "logits/chosen": -0.40957897901535034, "logits/rejected": -0.3727770745754242, "logps/chosen": -6.516015127999708e-05, "logps/rejected": -2.1233139038085938, "loss": 0.3879, "nll_loss": 0.09698373079299927, "rewards/accuracies": 1.0, "rewards/chosen": -6.516015218949178e-06, "rewards/margins": 0.21232487261295319, "rewards/rejected": -0.2123313844203949, "step": 14172 }, { "epoch": 9.8015214384509, "grad_norm": 3.0268282890319824, "learning_rate": 1.1026586752727831e-06, "log_odds_chosen": 10.072689056396484, "log_odds_ratio": -0.00012022092414554209, "logits/chosen": 0.08040404319763184, "logits/rejected": 0.019931059330701828, "logps/chosen": -0.00023253823746927083, "logps/rejected": -1.420956015586853, "loss": 0.5183, "nll_loss": 0.1295713484287262, "rewards/accuracies": 1.0, "rewards/chosen": -2.3253825929714367e-05, "rewards/margins": 0.14207234978675842, "rewards/rejected": -0.14209561049938202, "step": 14173 }, { "epoch": 9.802213001383127, "grad_norm": 2.7813079357147217, "learning_rate": 1.098816658982634e-06, "log_odds_chosen": 11.032231330871582, "log_odds_ratio": -0.0002524368173908442, "logits/chosen": -0.7510668635368347, "logits/rejected": -0.7767488360404968, "logps/chosen": -0.0005029549356549978, "logps/rejected": -2.3083441257476807, "loss": 0.3432, "nll_loss": 0.08577017486095428, "rewards/accuracies": 1.0, "rewards/chosen": -5.029549356549978e-05, "rewards/margins": 0.2307841032743454, "rewards/rejected": -0.23083440959453583, "step": 14174 }, { "epoch": 9.802904564315353, "grad_norm": 3.758674144744873, "learning_rate": 1.094974642692485e-06, "log_odds_chosen": 11.322364807128906, "log_odds_ratio": -4.581090615829453e-05, "logits/chosen": -0.10903534293174744, "logits/rejected": -0.1322764754295349, "logps/chosen": -0.00021902378648519516, "logps/rejected": -2.3018741607666016, "loss": 0.5231, "nll_loss": 0.13077309727668762, "rewards/accuracies": 1.0, "rewards/chosen": -2.190238046750892e-05, "rewards/margins": 0.2301655113697052, "rewards/rejected": -0.23018741607666016, "step": 14175 }, { "epoch": 9.80359612724758, "grad_norm": 4.369509220123291, "learning_rate": 1.091132626402336e-06, "log_odds_chosen": 11.108938217163086, "log_odds_ratio": -0.000450789782917127, "logits/chosen": -0.150197371840477, "logits/rejected": -0.30005329847335815, "logps/chosen": -0.0004888575640507042, "logps/rejected": -2.422581434249878, "loss": 0.6459, "nll_loss": 0.16144119203090668, "rewards/accuracies": 1.0, "rewards/chosen": -4.88857549498789e-05, "rewards/margins": 0.2422092705965042, "rewards/rejected": -0.24225814640522003, "step": 14176 }, { "epoch": 9.804287690179807, "grad_norm": 3.787135601043701, "learning_rate": 1.087290610112187e-06, "log_odds_chosen": 10.38783073425293, "log_odds_ratio": -0.00010526390542509034, "logits/chosen": -0.14729399979114532, "logits/rejected": -0.11241314560174942, "logps/chosen": -0.0001371078978991136, "logps/rejected": -1.7678358554840088, "loss": 0.3392, "nll_loss": 0.08480089157819748, "rewards/accuracies": 1.0, "rewards/chosen": -1.3710789062315598e-05, "rewards/margins": 0.17676988244056702, "rewards/rejected": -0.17678357660770416, "step": 14177 }, { "epoch": 9.804979253112034, "grad_norm": 3.134108304977417, "learning_rate": 1.083448593822038e-06, "log_odds_chosen": 11.161537170410156, "log_odds_ratio": -2.4501694497303106e-05, "logits/chosen": -0.44808104634284973, "logits/rejected": -0.3272436261177063, "logps/chosen": -0.00018843442376237363, "logps/rejected": -2.60626220703125, "loss": 0.483, "nll_loss": 0.12075001746416092, "rewards/accuracies": 1.0, "rewards/chosen": -1.8843442376237363e-05, "rewards/margins": 0.26060739159584045, "rewards/rejected": -0.2606262266635895, "step": 14178 }, { "epoch": 9.80567081604426, "grad_norm": 3.9424610137939453, "learning_rate": 1.0796065775318889e-06, "log_odds_chosen": 11.937067031860352, "log_odds_ratio": -2.6453697500983253e-05, "logits/chosen": 0.09776285290718079, "logits/rejected": -0.04009261727333069, "logps/chosen": -0.00026516837533563375, "logps/rejected": -3.3824143409729004, "loss": 0.4357, "nll_loss": 0.10891351103782654, "rewards/accuracies": 1.0, "rewards/chosen": -2.6516838261159137e-05, "rewards/margins": 0.3382148742675781, "rewards/rejected": -0.33824142813682556, "step": 14179 }, { "epoch": 9.806362378976488, "grad_norm": 3.355971097946167, "learning_rate": 1.0757645612417396e-06, "log_odds_chosen": 11.36531925201416, "log_odds_ratio": -2.120831049978733e-05, "logits/chosen": 0.018348708748817444, "logits/rejected": -0.00679410994052887, "logps/chosen": -0.00022142543457448483, "logps/rejected": -2.782313346862793, "loss": 0.2885, "nll_loss": 0.07211624085903168, "rewards/accuracies": 1.0, "rewards/chosen": -2.2142543457448483e-05, "rewards/margins": 0.2782091796398163, "rewards/rejected": -0.27823132276535034, "step": 14180 }, { "epoch": 9.807053941908714, "grad_norm": 3.0975801944732666, "learning_rate": 1.0719225449515906e-06, "log_odds_chosen": 11.767681121826172, "log_odds_ratio": -1.4651730452897027e-05, "logits/chosen": -0.28424689173698425, "logits/rejected": -0.37487995624542236, "logps/chosen": -0.00011077235831180587, "logps/rejected": -2.540308713912964, "loss": 0.3767, "nll_loss": 0.0941721498966217, "rewards/accuracies": 1.0, "rewards/chosen": -1.1077236194978468e-05, "rewards/margins": 0.2540197968482971, "rewards/rejected": -0.25403088331222534, "step": 14181 }, { "epoch": 9.807745504840941, "grad_norm": 2.4416236877441406, "learning_rate": 1.0680805286614418e-06, "log_odds_chosen": 10.600057601928711, "log_odds_ratio": -6.607848627027124e-05, "logits/chosen": -0.12181997299194336, "logits/rejected": -0.22408144176006317, "logps/chosen": -0.00020478527585510164, "logps/rejected": -1.9726923704147339, "loss": 0.2484, "nll_loss": 0.06209355592727661, "rewards/accuracies": 1.0, "rewards/chosen": -2.0478526494116522e-05, "rewards/margins": 0.19724875688552856, "rewards/rejected": -0.1972692459821701, "step": 14182 }, { "epoch": 9.808437067773168, "grad_norm": 2.202310562133789, "learning_rate": 1.0642385123712925e-06, "log_odds_chosen": 10.932657241821289, "log_odds_ratio": -0.00032081958488561213, "logits/chosen": -0.07163320481777191, "logits/rejected": -0.02166604995727539, "logps/chosen": -0.0005710614495910704, "logps/rejected": -2.6383872032165527, "loss": 0.2048, "nll_loss": 0.05117820203304291, "rewards/accuracies": 1.0, "rewards/chosen": -5.7106146414298564e-05, "rewards/margins": 0.2637816071510315, "rewards/rejected": -0.2638387084007263, "step": 14183 }, { "epoch": 9.809128630705395, "grad_norm": 2.7330756187438965, "learning_rate": 1.0603964960811435e-06, "log_odds_chosen": 11.486774444580078, "log_odds_ratio": -3.485183697193861e-05, "logits/chosen": 0.036774277687072754, "logits/rejected": 0.06198891997337341, "logps/chosen": -0.00046196073526516557, "logps/rejected": -3.1498825550079346, "loss": 0.3413, "nll_loss": 0.08531951159238815, "rewards/accuracies": 1.0, "rewards/chosen": -4.619607352651656e-05, "rewards/margins": 0.31494206190109253, "rewards/rejected": -0.31498825550079346, "step": 14184 }, { "epoch": 9.809820193637622, "grad_norm": 3.961125135421753, "learning_rate": 1.0565544797909942e-06, "log_odds_chosen": 11.124430656433105, "log_odds_ratio": -4.4068317947676405e-05, "logits/chosen": -0.37122130393981934, "logits/rejected": -0.46189895272254944, "logps/chosen": -0.00010646507143974304, "logps/rejected": -2.1137115955352783, "loss": 0.3482, "nll_loss": 0.08704251050949097, "rewards/accuracies": 1.0, "rewards/chosen": -1.0646506780176423e-05, "rewards/margins": 0.21136051416397095, "rewards/rejected": -0.21137115359306335, "step": 14185 }, { "epoch": 9.810511756569849, "grad_norm": 3.631211519241333, "learning_rate": 1.0527124635008454e-06, "log_odds_chosen": 10.711901664733887, "log_odds_ratio": -5.7884266425389796e-05, "logits/chosen": -0.3625379204750061, "logits/rejected": -0.2845163345336914, "logps/chosen": -0.0003204490931238979, "logps/rejected": -2.4358041286468506, "loss": 0.4626, "nll_loss": 0.11565632373094559, "rewards/accuracies": 1.0, "rewards/chosen": -3.2044910767581314e-05, "rewards/margins": 0.24354836344718933, "rewards/rejected": -0.2435804307460785, "step": 14186 }, { "epoch": 9.811203319502075, "grad_norm": 3.038205623626709, "learning_rate": 1.0488704472106961e-06, "log_odds_chosen": 10.539594650268555, "log_odds_ratio": -0.0004320595180615783, "logits/chosen": -0.3175621032714844, "logits/rejected": -0.3884882926940918, "logps/chosen": -0.000516570289619267, "logps/rejected": -2.3882803916931152, "loss": 0.3221, "nll_loss": 0.0804857388138771, "rewards/accuracies": 1.0, "rewards/chosen": -5.16570289619267e-05, "rewards/margins": 0.23877638578414917, "rewards/rejected": -0.23882803320884705, "step": 14187 }, { "epoch": 9.811894882434302, "grad_norm": 4.937509059906006, "learning_rate": 1.045028430920547e-06, "log_odds_chosen": 11.531312942504883, "log_odds_ratio": -1.8641001588548534e-05, "logits/chosen": -0.37507709860801697, "logits/rejected": -0.44480010867118835, "logps/chosen": -0.000322213425533846, "logps/rejected": -2.9958252906799316, "loss": 0.8522, "nll_loss": 0.21304495632648468, "rewards/accuracies": 1.0, "rewards/chosen": -3.222134546376765e-05, "rewards/margins": 0.299550324678421, "rewards/rejected": -0.2995825409889221, "step": 14188 }, { "epoch": 9.812586445366529, "grad_norm": 3.532475709915161, "learning_rate": 1.041186414630398e-06, "log_odds_chosen": 9.091978073120117, "log_odds_ratio": -0.0005241170874796808, "logits/chosen": -0.5417889952659607, "logits/rejected": -0.5371156334877014, "logps/chosen": -0.0008866861462593079, "logps/rejected": -1.4519448280334473, "loss": 0.3514, "nll_loss": 0.08780322968959808, "rewards/accuracies": 1.0, "rewards/chosen": -8.866861753631383e-05, "rewards/margins": 0.14510582387447357, "rewards/rejected": -0.14519450068473816, "step": 14189 }, { "epoch": 9.813278008298756, "grad_norm": 3.668975591659546, "learning_rate": 1.037344398340249e-06, "log_odds_chosen": 10.498470306396484, "log_odds_ratio": -0.00035588949685916305, "logits/chosen": -0.26290363073349, "logits/rejected": -0.25256627798080444, "logps/chosen": -0.0015520071610808372, "logps/rejected": -2.535429000854492, "loss": 0.3628, "nll_loss": 0.09067076444625854, "rewards/accuracies": 1.0, "rewards/chosen": -0.00015520071610808372, "rewards/margins": 0.2533876895904541, "rewards/rejected": -0.2535429000854492, "step": 14190 }, { "epoch": 9.813969571230983, "grad_norm": 3.6465094089508057, "learning_rate": 1.0335023820501e-06, "log_odds_chosen": 11.360379219055176, "log_odds_ratio": -0.00039515478420071304, "logits/chosen": -0.20723837614059448, "logits/rejected": -0.36116114258766174, "logps/chosen": -0.0006092398543842137, "logps/rejected": -2.817495584487915, "loss": 0.4128, "nll_loss": 0.10316009819507599, "rewards/accuracies": 1.0, "rewards/chosen": -6.0923983255634084e-05, "rewards/margins": 0.2816886305809021, "rewards/rejected": -0.28174954652786255, "step": 14191 }, { "epoch": 9.81466113416321, "grad_norm": 2.419128656387329, "learning_rate": 1.0296603657599507e-06, "log_odds_chosen": 10.698506355285645, "log_odds_ratio": -7.404476491501555e-05, "logits/chosen": -0.17025601863861084, "logits/rejected": -0.19649499654769897, "logps/chosen": -0.0002077743411064148, "logps/rejected": -2.078397750854492, "loss": 0.23, "nll_loss": 0.057494472712278366, "rewards/accuracies": 1.0, "rewards/chosen": -2.0777435565833002e-05, "rewards/margins": 0.20781899988651276, "rewards/rejected": -0.20783977210521698, "step": 14192 }, { "epoch": 9.815352697095436, "grad_norm": 3.329080581665039, "learning_rate": 1.025818349469802e-06, "log_odds_chosen": 11.043357849121094, "log_odds_ratio": -3.2028674468165264e-05, "logits/chosen": -0.07440190017223358, "logits/rejected": -0.053846318274736404, "logps/chosen": -0.0005089318146929145, "logps/rejected": -2.821354627609253, "loss": 0.4336, "nll_loss": 0.10839445888996124, "rewards/accuracies": 1.0, "rewards/chosen": -5.0893184379674494e-05, "rewards/margins": 0.282084584236145, "rewards/rejected": -0.2821354866027832, "step": 14193 }, { "epoch": 9.816044260027663, "grad_norm": 3.651942491531372, "learning_rate": 1.0219763331796529e-06, "log_odds_chosen": 10.818357467651367, "log_odds_ratio": -0.0002754127490334213, "logits/chosen": -0.3202933669090271, "logits/rejected": -0.3279436528682709, "logps/chosen": -0.0005293386057019234, "logps/rejected": -2.5842478275299072, "loss": 0.3804, "nll_loss": 0.09506859630346298, "rewards/accuracies": 1.0, "rewards/chosen": -5.2933864935766906e-05, "rewards/margins": 0.25837188959121704, "rewards/rejected": -0.2584247887134552, "step": 14194 }, { "epoch": 9.81673582295989, "grad_norm": 2.4412009716033936, "learning_rate": 1.0181343168895036e-06, "log_odds_chosen": 11.217656135559082, "log_odds_ratio": -6.202785152709112e-05, "logits/chosen": -0.5744768977165222, "logits/rejected": -0.6186306476593018, "logps/chosen": -0.00016856074216775596, "logps/rejected": -2.4487104415893555, "loss": 0.2595, "nll_loss": 0.06486619263887405, "rewards/accuracies": 1.0, "rewards/chosen": -1.6856072761584073e-05, "rewards/margins": 0.24485419690608978, "rewards/rejected": -0.24487105011940002, "step": 14195 }, { "epoch": 9.817427385892117, "grad_norm": 2.520139455795288, "learning_rate": 1.0142923005993546e-06, "log_odds_chosen": 10.983305931091309, "log_odds_ratio": -0.00019241197151131928, "logits/chosen": -0.5942064523696899, "logits/rejected": -0.6731418371200562, "logps/chosen": -0.0005326925893314183, "logps/rejected": -3.039681911468506, "loss": 0.2778, "nll_loss": 0.06942148506641388, "rewards/accuracies": 1.0, "rewards/chosen": -5.326926111592911e-05, "rewards/margins": 0.3039149343967438, "rewards/rejected": -0.3039681911468506, "step": 14196 }, { "epoch": 9.818118948824344, "grad_norm": 4.07489013671875, "learning_rate": 1.0104502843092055e-06, "log_odds_chosen": 10.513197898864746, "log_odds_ratio": -5.2534029236994684e-05, "logits/chosen": -0.5547770261764526, "logits/rejected": -0.7435232996940613, "logps/chosen": -0.0002200988819822669, "logps/rejected": -1.8414217233657837, "loss": 0.3489, "nll_loss": 0.08720815181732178, "rewards/accuracies": 1.0, "rewards/chosen": -2.200988819822669e-05, "rewards/margins": 0.18412016332149506, "rewards/rejected": -0.18414217233657837, "step": 14197 }, { "epoch": 9.81881051175657, "grad_norm": 2.957580804824829, "learning_rate": 1.0066082680190565e-06, "log_odds_chosen": 10.992133140563965, "log_odds_ratio": -4.3501433538040146e-05, "logits/chosen": -0.5260941386222839, "logits/rejected": -0.648362934589386, "logps/chosen": -0.00034256701474078, "logps/rejected": -2.743771553039551, "loss": 0.3164, "nll_loss": 0.07908672839403152, "rewards/accuracies": 1.0, "rewards/chosen": -3.425670001888648e-05, "rewards/margins": 0.2743428945541382, "rewards/rejected": -0.27437716722488403, "step": 14198 }, { "epoch": 9.819502074688797, "grad_norm": 3.9296913146972656, "learning_rate": 1.0027662517289075e-06, "log_odds_chosen": 11.788595199584961, "log_odds_ratio": -1.3948605555924587e-05, "logits/chosen": -0.25325828790664673, "logits/rejected": -0.32087087631225586, "logps/chosen": -8.637905557407066e-05, "logps/rejected": -2.1206068992614746, "loss": 0.4042, "nll_loss": 0.10105835646390915, "rewards/accuracies": 1.0, "rewards/chosen": -8.637905921204947e-06, "rewards/margins": 0.21205206215381622, "rewards/rejected": -0.21206068992614746, "step": 14199 }, { "epoch": 9.820193637621024, "grad_norm": 3.424304723739624, "learning_rate": 9.989242354387582e-07, "log_odds_chosen": 11.120806694030762, "log_odds_ratio": -5.018915544496849e-05, "logits/chosen": -0.07096761465072632, "logits/rejected": -0.2540345788002014, "logps/chosen": -0.0002843011461663991, "logps/rejected": -2.281825065612793, "loss": 0.4318, "nll_loss": 0.10793668031692505, "rewards/accuracies": 1.0, "rewards/chosen": -2.8430116799427196e-05, "rewards/margins": 0.22815406322479248, "rewards/rejected": -0.22818250954151154, "step": 14200 }, { "epoch": 9.820885200553251, "grad_norm": 4.0108819007873535, "learning_rate": 9.950822191486094e-07, "log_odds_chosen": 12.207027435302734, "log_odds_ratio": -5.891701675864169e-06, "logits/chosen": -0.18299826979637146, "logits/rejected": -0.17729896306991577, "logps/chosen": -0.0001339554728474468, "logps/rejected": -2.995695114135742, "loss": 0.521, "nll_loss": 0.13025128841400146, "rewards/accuracies": 1.0, "rewards/chosen": -1.3395549103734083e-05, "rewards/margins": 0.2995561361312866, "rewards/rejected": -0.2995695471763611, "step": 14201 }, { "epoch": 9.821576763485478, "grad_norm": 3.378380060195923, "learning_rate": 9.912402028584601e-07, "log_odds_chosen": 10.768424034118652, "log_odds_ratio": -6.376580859068781e-05, "logits/chosen": -0.41383251547813416, "logits/rejected": -0.4662688076496124, "logps/chosen": -0.00010562510578893125, "logps/rejected": -1.8373764753341675, "loss": 0.4542, "nll_loss": 0.1135462075471878, "rewards/accuracies": 1.0, "rewards/chosen": -1.0562511306488886e-05, "rewards/margins": 0.18372708559036255, "rewards/rejected": -0.18373766541481018, "step": 14202 }, { "epoch": 9.822268326417705, "grad_norm": 3.4288904666900635, "learning_rate": 9.87398186568311e-07, "log_odds_chosen": 11.275004386901855, "log_odds_ratio": -8.114479715004563e-05, "logits/chosen": -0.4122001528739929, "logits/rejected": -0.5066218972206116, "logps/chosen": -0.00015522413013968617, "logps/rejected": -2.3961973190307617, "loss": 0.347, "nll_loss": 0.08673498779535294, "rewards/accuracies": 1.0, "rewards/chosen": -1.5522411558777094e-05, "rewards/margins": 0.23960421979427338, "rewards/rejected": -0.23961973190307617, "step": 14203 }, { "epoch": 9.822959889349931, "grad_norm": 3.5420782566070557, "learning_rate": 9.835561702781618e-07, "log_odds_chosen": 12.537866592407227, "log_odds_ratio": -6.367015885189176e-05, "logits/chosen": 0.0796017125248909, "logits/rejected": 0.002951107919216156, "logps/chosen": -0.00025438476586714387, "logps/rejected": -4.141080856323242, "loss": 0.3596, "nll_loss": 0.08990590274333954, "rewards/accuracies": 1.0, "rewards/chosen": -2.543847767810803e-05, "rewards/margins": 0.4140826165676117, "rewards/rejected": -0.4141080677509308, "step": 14204 }, { "epoch": 9.823651452282158, "grad_norm": 4.7772088050842285, "learning_rate": 9.79714153988013e-07, "log_odds_chosen": 11.233759880065918, "log_odds_ratio": -4.31876651418861e-05, "logits/chosen": -0.37256962060928345, "logits/rejected": -0.3746577501296997, "logps/chosen": -0.0002783732197713107, "logps/rejected": -2.4954299926757812, "loss": 0.5566, "nll_loss": 0.13915464282035828, "rewards/accuracies": 1.0, "rewards/chosen": -2.783732270472683e-05, "rewards/margins": 0.24951516091823578, "rewards/rejected": -0.2495429962873459, "step": 14205 }, { "epoch": 9.824343015214385, "grad_norm": 2.7360641956329346, "learning_rate": 9.75872137697864e-07, "log_odds_chosen": 11.81240463256836, "log_odds_ratio": -2.6226813133689575e-05, "logits/chosen": -0.2089729905128479, "logits/rejected": -0.3485751748085022, "logps/chosen": -0.00027663970831781626, "logps/rejected": -2.7306594848632812, "loss": 0.3143, "nll_loss": 0.07858486473560333, "rewards/accuracies": 1.0, "rewards/chosen": -2.766397301456891e-05, "rewards/margins": 0.27303826808929443, "rewards/rejected": -0.2730659246444702, "step": 14206 }, { "epoch": 9.825034578146612, "grad_norm": 4.1154866218566895, "learning_rate": 9.720301214077147e-07, "log_odds_chosen": 12.137218475341797, "log_odds_ratio": -1.3980280527903233e-05, "logits/chosen": -0.16195784509181976, "logits/rejected": -0.3471846878528595, "logps/chosen": -0.00012617021275218576, "logps/rejected": -3.104794979095459, "loss": 0.3952, "nll_loss": 0.09880155324935913, "rewards/accuracies": 1.0, "rewards/chosen": -1.2617021639016457e-05, "rewards/margins": 0.3104668855667114, "rewards/rejected": -0.3104795217514038, "step": 14207 }, { "epoch": 9.825726141078839, "grad_norm": 3.9353859424591064, "learning_rate": 9.681881051175657e-07, "log_odds_chosen": 10.912803649902344, "log_odds_ratio": -0.00021392188500612974, "logits/chosen": 0.3478098213672638, "logits/rejected": 0.3224194049835205, "logps/chosen": -0.0004368519294075668, "logps/rejected": -2.136322021484375, "loss": 0.5987, "nll_loss": 0.1496632695198059, "rewards/accuracies": 1.0, "rewards/chosen": -4.3685191485565156e-05, "rewards/margins": 0.21358852088451385, "rewards/rejected": -0.21363219618797302, "step": 14208 }, { "epoch": 9.826417704011066, "grad_norm": 3.2916836738586426, "learning_rate": 9.643460888274166e-07, "log_odds_chosen": 10.82497787475586, "log_odds_ratio": -9.243319072993472e-05, "logits/chosen": -0.21201613545417786, "logits/rejected": -0.23995202779769897, "logps/chosen": -0.0002386215201113373, "logps/rejected": -2.293454170227051, "loss": 0.337, "nll_loss": 0.08425004780292511, "rewards/accuracies": 1.0, "rewards/chosen": -2.386215237493161e-05, "rewards/margins": 0.22932155430316925, "rewards/rejected": -0.2293454110622406, "step": 14209 }, { "epoch": 9.827109266943292, "grad_norm": 2.628417730331421, "learning_rate": 9.605040725372676e-07, "log_odds_chosen": 9.22122573852539, "log_odds_ratio": -0.00034783576847985387, "logits/chosen": -0.1604822874069214, "logits/rejected": -0.29971182346343994, "logps/chosen": -0.000420909549575299, "logps/rejected": -1.3741449117660522, "loss": 0.259, "nll_loss": 0.06471599638462067, "rewards/accuracies": 1.0, "rewards/chosen": -4.209095277474262e-05, "rewards/margins": 0.13737240433692932, "rewards/rejected": -0.13741448521614075, "step": 14210 }, { "epoch": 9.82780082987552, "grad_norm": 3.577942371368408, "learning_rate": 9.566620562471186e-07, "log_odds_chosen": 10.938081741333008, "log_odds_ratio": -8.411614544456825e-05, "logits/chosen": 0.007461972534656525, "logits/rejected": -0.1085021048784256, "logps/chosen": -0.0009497880819253623, "logps/rejected": -2.315056562423706, "loss": 0.4293, "nll_loss": 0.10732435435056686, "rewards/accuracies": 1.0, "rewards/chosen": -9.497880819253623e-05, "rewards/margins": 0.2314106822013855, "rewards/rejected": -0.2315056473016739, "step": 14211 }, { "epoch": 9.828492392807746, "grad_norm": 3.1475579738616943, "learning_rate": 9.528200399569694e-07, "log_odds_chosen": 11.785881042480469, "log_odds_ratio": -0.00021281295630615205, "logits/chosen": -0.15384456515312195, "logits/rejected": -0.16469359397888184, "logps/chosen": -9.269881411455572e-05, "logps/rejected": -2.4875898361206055, "loss": 0.327, "nll_loss": 0.08173118531703949, "rewards/accuracies": 1.0, "rewards/chosen": -9.269881047657691e-06, "rewards/margins": 0.24874970316886902, "rewards/rejected": -0.24875898659229279, "step": 14212 }, { "epoch": 9.829183955739973, "grad_norm": 2.7176268100738525, "learning_rate": 9.489780236668204e-07, "log_odds_chosen": 12.190725326538086, "log_odds_ratio": -2.826091986207757e-05, "logits/chosen": -0.07545314729213715, "logits/rejected": -0.1277007907629013, "logps/chosen": -0.0003076986758969724, "logps/rejected": -3.1703543663024902, "loss": 0.416, "nll_loss": 0.10398771613836288, "rewards/accuracies": 1.0, "rewards/chosen": -3.0769868317293e-05, "rewards/margins": 0.3170046806335449, "rewards/rejected": -0.3170354664325714, "step": 14213 }, { "epoch": 9.8298755186722, "grad_norm": 3.433530330657959, "learning_rate": 9.451360073766712e-07, "log_odds_chosen": 10.720812797546387, "log_odds_ratio": -8.576504478696734e-05, "logits/chosen": -0.3028118908405304, "logits/rejected": -0.3917955160140991, "logps/chosen": -0.0011436090571805835, "logps/rejected": -2.5318808555603027, "loss": 0.2916, "nll_loss": 0.07288795709609985, "rewards/accuracies": 1.0, "rewards/chosen": -0.00011436091153882444, "rewards/margins": 0.25307372212409973, "rewards/rejected": -0.2531880736351013, "step": 14214 }, { "epoch": 9.830567081604427, "grad_norm": 3.285998821258545, "learning_rate": 9.412939910865223e-07, "log_odds_chosen": 10.454912185668945, "log_odds_ratio": -0.00021035922691226006, "logits/chosen": -0.2515740990638733, "logits/rejected": -0.34814438223838806, "logps/chosen": -0.00033893357613123953, "logps/rejected": -1.6399266719818115, "loss": 0.4488, "nll_loss": 0.11218146979808807, "rewards/accuracies": 1.0, "rewards/chosen": -3.389335688552819e-05, "rewards/margins": 0.16395878791809082, "rewards/rejected": -0.16399268805980682, "step": 14215 }, { "epoch": 9.831258644536653, "grad_norm": 3.471129894256592, "learning_rate": 9.374519747963733e-07, "log_odds_chosen": 11.554468154907227, "log_odds_ratio": -0.00011103285942226648, "logits/chosen": -0.4782823324203491, "logits/rejected": -0.5865257382392883, "logps/chosen": -0.0003853056114166975, "logps/rejected": -3.031780242919922, "loss": 0.3246, "nll_loss": 0.08112723380327225, "rewards/accuracies": 1.0, "rewards/chosen": -3.853056114166975e-05, "rewards/margins": 0.30313950777053833, "rewards/rejected": -0.30317801237106323, "step": 14216 }, { "epoch": 9.83195020746888, "grad_norm": 2.7864818572998047, "learning_rate": 9.336099585062241e-07, "log_odds_chosen": 10.391328811645508, "log_odds_ratio": -0.00019343834719620645, "logits/chosen": -0.4431155025959015, "logits/rejected": -0.5533031821250916, "logps/chosen": -0.00038062711246311665, "logps/rejected": -1.70412278175354, "loss": 0.2584, "nll_loss": 0.06458286941051483, "rewards/accuracies": 1.0, "rewards/chosen": -3.8062717067077756e-05, "rewards/margins": 0.17037422955036163, "rewards/rejected": -0.17041230201721191, "step": 14217 }, { "epoch": 9.832641770401107, "grad_norm": 3.4246859550476074, "learning_rate": 9.297679422160751e-07, "log_odds_chosen": 11.085762023925781, "log_odds_ratio": -3.4930937545141205e-05, "logits/chosen": -0.5329622030258179, "logits/rejected": -0.5232717394828796, "logps/chosen": -0.00013040719204582274, "logps/rejected": -2.0230469703674316, "loss": 0.3623, "nll_loss": 0.09056548774242401, "rewards/accuracies": 1.0, "rewards/chosen": -1.3040720659773797e-05, "rewards/margins": 0.20229166746139526, "rewards/rejected": -0.2023046910762787, "step": 14218 }, { "epoch": 9.833333333333334, "grad_norm": 4.81691837310791, "learning_rate": 9.259259259259259e-07, "log_odds_chosen": 11.22233772277832, "log_odds_ratio": -0.00020713395497296005, "logits/chosen": 0.19287648797035217, "logits/rejected": -0.06851596385240555, "logps/chosen": -0.00026251928647980094, "logps/rejected": -2.5511860847473145, "loss": 0.322, "nll_loss": 0.08047536760568619, "rewards/accuracies": 1.0, "rewards/chosen": -2.6251927920384333e-05, "rewards/margins": 0.2550923526287079, "rewards/rejected": -0.25511860847473145, "step": 14219 }, { "epoch": 9.83402489626556, "grad_norm": 2.9679412841796875, "learning_rate": 9.220839096357769e-07, "log_odds_chosen": 10.381444931030273, "log_odds_ratio": -0.00014931659097783267, "logits/chosen": -0.21991831064224243, "logits/rejected": -0.24379611015319824, "logps/chosen": -0.0001824432547437027, "logps/rejected": -1.773210883140564, "loss": 0.327, "nll_loss": 0.08174421638250351, "rewards/accuracies": 1.0, "rewards/chosen": -1.824432547437027e-05, "rewards/margins": 0.17730283737182617, "rewards/rejected": -0.17732109129428864, "step": 14220 }, { "epoch": 9.834716459197788, "grad_norm": 2.674473762512207, "learning_rate": 9.182418933456278e-07, "log_odds_chosen": 10.52938461303711, "log_odds_ratio": -0.0003815424279309809, "logits/chosen": -0.16022010147571564, "logits/rejected": -0.19140568375587463, "logps/chosen": -0.0003706804709509015, "logps/rejected": -2.0917491912841797, "loss": 0.2643, "nll_loss": 0.06602489948272705, "rewards/accuracies": 1.0, "rewards/chosen": -3.706804636749439e-05, "rewards/margins": 0.20913787186145782, "rewards/rejected": -0.20917494595050812, "step": 14221 }, { "epoch": 9.835408022130014, "grad_norm": 5.377455711364746, "learning_rate": 9.143998770554787e-07, "log_odds_chosen": 12.041780471801758, "log_odds_ratio": -2.566107468737755e-05, "logits/chosen": -0.07172747701406479, "logits/rejected": -0.10550010949373245, "logps/chosen": -0.00014010498125571758, "logps/rejected": -3.049236297607422, "loss": 0.5647, "nll_loss": 0.14116749167442322, "rewards/accuracies": 1.0, "rewards/chosen": -1.4010498489369638e-05, "rewards/margins": 0.3049096465110779, "rewards/rejected": -0.3049236536026001, "step": 14222 }, { "epoch": 9.836099585062241, "grad_norm": 3.151463270187378, "learning_rate": 9.105578607653298e-07, "log_odds_chosen": 12.666389465332031, "log_odds_ratio": -1.4467575965682045e-05, "logits/chosen": -0.38619983196258545, "logits/rejected": -0.42439472675323486, "logps/chosen": -0.0003368236939422786, "logps/rejected": -3.4165196418762207, "loss": 0.3543, "nll_loss": 0.08858034014701843, "rewards/accuracies": 1.0, "rewards/chosen": -3.36823686666321e-05, "rewards/margins": 0.34161829948425293, "rewards/rejected": -0.341651976108551, "step": 14223 }, { "epoch": 9.836791147994468, "grad_norm": 3.383626699447632, "learning_rate": 9.067158444751805e-07, "log_odds_chosen": 11.358930587768555, "log_odds_ratio": -8.952614007284865e-05, "logits/chosen": -0.5634051561355591, "logits/rejected": -0.5803734064102173, "logps/chosen": -0.0001293225068366155, "logps/rejected": -1.9455944299697876, "loss": 0.3954, "nll_loss": 0.09883418679237366, "rewards/accuracies": 1.0, "rewards/chosen": -1.2932249774166849e-05, "rewards/margins": 0.19454652070999146, "rewards/rejected": -0.19455945491790771, "step": 14224 }, { "epoch": 9.837482710926695, "grad_norm": 3.4224886894226074, "learning_rate": 9.028738281850316e-07, "log_odds_chosen": 11.326183319091797, "log_odds_ratio": -2.355646574869752e-05, "logits/chosen": -0.05391174927353859, "logits/rejected": -0.19636398553848267, "logps/chosen": -0.00014157703844830394, "logps/rejected": -2.18880295753479, "loss": 0.2927, "nll_loss": 0.07316230237483978, "rewards/accuracies": 1.0, "rewards/chosen": -1.4157703844830394e-05, "rewards/margins": 0.21886613965034485, "rewards/rejected": -0.218880295753479, "step": 14225 }, { "epoch": 9.838174273858922, "grad_norm": 3.9258923530578613, "learning_rate": 8.990318118948824e-07, "log_odds_chosen": 12.06605339050293, "log_odds_ratio": -1.743772554618772e-05, "logits/chosen": -0.12857088446617126, "logits/rejected": -0.2564074397087097, "logps/chosen": -9.755059727467597e-05, "logps/rejected": -2.5448834896087646, "loss": 0.3866, "nll_loss": 0.09664377570152283, "rewards/accuracies": 1.0, "rewards/chosen": -9.755060091265477e-06, "rewards/margins": 0.2544785737991333, "rewards/rejected": -0.25448834896087646, "step": 14226 }, { "epoch": 9.838865836791149, "grad_norm": 3.148038864135742, "learning_rate": 8.951897956047334e-07, "log_odds_chosen": 10.782970428466797, "log_odds_ratio": -9.98452742351219e-05, "logits/chosen": -0.19792483747005463, "logits/rejected": -0.1644812673330307, "logps/chosen": -0.00033488357439637184, "logps/rejected": -2.11006236076355, "loss": 0.2978, "nll_loss": 0.07445169985294342, "rewards/accuracies": 1.0, "rewards/chosen": -3.348835889482871e-05, "rewards/margins": 0.21097274124622345, "rewards/rejected": -0.21100623905658722, "step": 14227 }, { "epoch": 9.839557399723375, "grad_norm": 2.8470401763916016, "learning_rate": 8.913477793145844e-07, "log_odds_chosen": 11.369443893432617, "log_odds_ratio": -4.5292501454241574e-05, "logits/chosen": -0.07345202565193176, "logits/rejected": -0.16434796154499054, "logps/chosen": -0.0002378990175202489, "logps/rejected": -2.261381149291992, "loss": 0.4839, "nll_loss": 0.12097690999507904, "rewards/accuracies": 1.0, "rewards/chosen": -2.3789905753801577e-05, "rewards/margins": 0.22611434757709503, "rewards/rejected": -0.2261381298303604, "step": 14228 }, { "epoch": 9.840248962655602, "grad_norm": 2.4687888622283936, "learning_rate": 8.875057630244352e-07, "log_odds_chosen": 11.013246536254883, "log_odds_ratio": -0.00010485532402526587, "logits/chosen": -0.04445496201515198, "logits/rejected": 0.013508342206478119, "logps/chosen": -0.00022190046729519963, "logps/rejected": -2.511152505874634, "loss": 0.2488, "nll_loss": 0.062182098627090454, "rewards/accuracies": 1.0, "rewards/chosen": -2.2190048184711486e-05, "rewards/margins": 0.2510930597782135, "rewards/rejected": -0.2511152923107147, "step": 14229 }, { "epoch": 9.840940525587829, "grad_norm": 2.7241017818450928, "learning_rate": 8.836637467342862e-07, "log_odds_chosen": 10.521742820739746, "log_odds_ratio": -5.390686419559643e-05, "logits/chosen": -0.00449778139591217, "logits/rejected": -0.0959646999835968, "logps/chosen": -0.00020149025658611208, "logps/rejected": -1.9966009855270386, "loss": 0.3227, "nll_loss": 0.08067253232002258, "rewards/accuracies": 1.0, "rewards/chosen": -2.0149025658611208e-05, "rewards/margins": 0.1996399611234665, "rewards/rejected": -0.19966010749340057, "step": 14230 }, { "epoch": 9.841632088520056, "grad_norm": 4.300022125244141, "learning_rate": 8.79821730444137e-07, "log_odds_chosen": 10.618481636047363, "log_odds_ratio": -0.00010173715418204665, "logits/chosen": -0.24378979206085205, "logits/rejected": -0.3016761839389801, "logps/chosen": -0.00034635435440577567, "logps/rejected": -1.9258100986480713, "loss": 0.4667, "nll_loss": 0.11666956543922424, "rewards/accuracies": 1.0, "rewards/chosen": -3.463543544057757e-05, "rewards/margins": 0.19254638254642487, "rewards/rejected": -0.19258102774620056, "step": 14231 }, { "epoch": 9.842323651452283, "grad_norm": 4.205569267272949, "learning_rate": 8.759797141539881e-07, "log_odds_chosen": 11.185318946838379, "log_odds_ratio": -2.8937647584825754e-05, "logits/chosen": -0.1755310446023941, "logits/rejected": -0.21790622174739838, "logps/chosen": -0.0003062895266339183, "logps/rejected": -2.576779365539551, "loss": 0.6641, "nll_loss": 0.1660199612379074, "rewards/accuracies": 1.0, "rewards/chosen": -3.062895484617911e-05, "rewards/margins": 0.257647305727005, "rewards/rejected": -0.25767794251441956, "step": 14232 }, { "epoch": 9.84301521438451, "grad_norm": 3.5787975788116455, "learning_rate": 8.721376978638391e-07, "log_odds_chosen": 11.742012023925781, "log_odds_ratio": -0.00013934174785390496, "logits/chosen": -0.05586977303028107, "logits/rejected": -0.16371804475784302, "logps/chosen": -0.00023717660224065185, "logps/rejected": -2.860356092453003, "loss": 0.3523, "nll_loss": 0.08806522935628891, "rewards/accuracies": 1.0, "rewards/chosen": -2.3717660951660946e-05, "rewards/margins": 0.28601187467575073, "rewards/rejected": -0.28603559732437134, "step": 14233 }, { "epoch": 9.843706777316736, "grad_norm": 3.3110666275024414, "learning_rate": 8.682956815736899e-07, "log_odds_chosen": 10.989479064941406, "log_odds_ratio": -8.622042514616624e-05, "logits/chosen": -0.6725688576698303, "logits/rejected": -0.6157367825508118, "logps/chosen": -0.00035073357867076993, "logps/rejected": -2.6909520626068115, "loss": 0.382, "nll_loss": 0.09548350423574448, "rewards/accuracies": 1.0, "rewards/chosen": -3.507336077746004e-05, "rewards/margins": 0.2690601348876953, "rewards/rejected": -0.26909518241882324, "step": 14234 }, { "epoch": 9.844398340248963, "grad_norm": 3.361088991165161, "learning_rate": 8.644536652835409e-07, "log_odds_chosen": 11.052905082702637, "log_odds_ratio": -0.0004103026003576815, "logits/chosen": -0.20317518711090088, "logits/rejected": -0.28955215215682983, "logps/chosen": -0.0008011145400814712, "logps/rejected": -2.3988914489746094, "loss": 0.4576, "nll_loss": 0.1143680214881897, "rewards/accuracies": 1.0, "rewards/chosen": -8.011145837372169e-05, "rewards/margins": 0.2398090362548828, "rewards/rejected": -0.23988914489746094, "step": 14235 }, { "epoch": 9.84508990318119, "grad_norm": 2.968040704727173, "learning_rate": 8.606116489933917e-07, "log_odds_chosen": 10.519956588745117, "log_odds_ratio": -6.622553337365389e-05, "logits/chosen": -0.32500743865966797, "logits/rejected": -0.26284950971603394, "logps/chosen": -0.00019432193948887289, "logps/rejected": -1.7805821895599365, "loss": 0.3513, "nll_loss": 0.08781195431947708, "rewards/accuracies": 1.0, "rewards/chosen": -1.9432191038504243e-05, "rewards/margins": 0.17803877592086792, "rewards/rejected": -0.1780582070350647, "step": 14236 }, { "epoch": 9.845781466113417, "grad_norm": 3.02480149269104, "learning_rate": 8.567696327032427e-07, "log_odds_chosen": 11.432319641113281, "log_odds_ratio": -6.157150346552953e-05, "logits/chosen": -0.03514774143695831, "logits/rejected": -0.14978571236133575, "logps/chosen": -0.00016551982844248414, "logps/rejected": -2.245384693145752, "loss": 0.3142, "nll_loss": 0.07853604853153229, "rewards/accuracies": 1.0, "rewards/chosen": -1.6551983208046295e-05, "rewards/margins": 0.2245219349861145, "rewards/rejected": -0.22453849017620087, "step": 14237 }, { "epoch": 9.846473029045644, "grad_norm": 4.3203253746032715, "learning_rate": 8.529276164130936e-07, "log_odds_chosen": 11.395495414733887, "log_odds_ratio": -1.157735641754698e-05, "logits/chosen": -0.4227936565876007, "logits/rejected": -0.3641412854194641, "logps/chosen": -0.00022150250151753426, "logps/rejected": -2.7379443645477295, "loss": 0.4385, "nll_loss": 0.10962338745594025, "rewards/accuracies": 1.0, "rewards/chosen": -2.215025233454071e-05, "rewards/margins": 0.27377229928970337, "rewards/rejected": -0.2737944424152374, "step": 14238 }, { "epoch": 9.84716459197787, "grad_norm": 3.5373475551605225, "learning_rate": 8.490856001229445e-07, "log_odds_chosen": 11.616133689880371, "log_odds_ratio": -3.7899713788647205e-05, "logits/chosen": 0.08863887190818787, "logits/rejected": 0.13405123353004456, "logps/chosen": -0.0002468510647304356, "logps/rejected": -2.6139259338378906, "loss": 0.5372, "nll_loss": 0.1342896819114685, "rewards/accuracies": 1.0, "rewards/chosen": -2.4685105017852038e-05, "rewards/margins": 0.26136791706085205, "rewards/rejected": -0.26139259338378906, "step": 14239 }, { "epoch": 9.847856154910097, "grad_norm": 3.6691622734069824, "learning_rate": 8.452435838327956e-07, "log_odds_chosen": 10.332157135009766, "log_odds_ratio": -8.147610060404986e-05, "logits/chosen": -0.35312798619270325, "logits/rejected": -0.38212132453918457, "logps/chosen": -0.0007140946108847857, "logps/rejected": -2.3038883209228516, "loss": 0.4133, "nll_loss": 0.10331538319587708, "rewards/accuracies": 1.0, "rewards/chosen": -7.140946399886161e-05, "rewards/margins": 0.23031742870807648, "rewards/rejected": -0.2303888499736786, "step": 14240 }, { "epoch": 9.848547717842324, "grad_norm": 2.72764253616333, "learning_rate": 8.414015675426463e-07, "log_odds_chosen": 9.962644577026367, "log_odds_ratio": -6.466775084845722e-05, "logits/chosen": -0.2865466773509979, "logits/rejected": -0.30098193883895874, "logps/chosen": -0.0002605429326649755, "logps/rejected": -1.6753520965576172, "loss": 0.3607, "nll_loss": 0.09016528725624084, "rewards/accuracies": 1.0, "rewards/chosen": -2.6054294721689075e-05, "rewards/margins": 0.16750916838645935, "rewards/rejected": -0.1675352305173874, "step": 14241 }, { "epoch": 9.849239280774551, "grad_norm": 4.229187488555908, "learning_rate": 8.375595512524974e-07, "log_odds_chosen": 11.752243041992188, "log_odds_ratio": -2.328364280401729e-05, "logits/chosen": -0.46780213713645935, "logits/rejected": -0.4287949204444885, "logps/chosen": -0.0002484232245478779, "logps/rejected": -3.091888904571533, "loss": 0.4013, "nll_loss": 0.10031718015670776, "rewards/accuracies": 1.0, "rewards/chosen": -2.484232209098991e-05, "rewards/margins": 0.3091640770435333, "rewards/rejected": -0.3091889023780823, "step": 14242 }, { "epoch": 9.849930843706778, "grad_norm": 3.541060447692871, "learning_rate": 8.337175349623483e-07, "log_odds_chosen": 11.745586395263672, "log_odds_ratio": -1.5079102013260126e-05, "logits/chosen": -0.46697476506233215, "logits/rejected": -0.46221745014190674, "logps/chosen": -0.0005979898851364851, "logps/rejected": -2.6756162643432617, "loss": 0.4069, "nll_loss": 0.10171540081501007, "rewards/accuracies": 1.0, "rewards/chosen": -5.979898924124427e-05, "rewards/margins": 0.2675018310546875, "rewards/rejected": -0.2675616145133972, "step": 14243 }, { "epoch": 9.850622406639005, "grad_norm": 3.7117698192596436, "learning_rate": 8.298755186721992e-07, "log_odds_chosen": 10.491707801818848, "log_odds_ratio": -0.0003969599201809615, "logits/chosen": -0.2933369576931, "logits/rejected": -0.44635993242263794, "logps/chosen": -0.00018704970716498792, "logps/rejected": -1.8389921188354492, "loss": 0.4396, "nll_loss": 0.10986747592687607, "rewards/accuracies": 1.0, "rewards/chosen": -1.870496998890303e-05, "rewards/margins": 0.1838805079460144, "rewards/rejected": -0.18389922380447388, "step": 14244 }, { "epoch": 9.851313969571232, "grad_norm": 4.036135196685791, "learning_rate": 8.260335023820502e-07, "log_odds_chosen": 11.864362716674805, "log_odds_ratio": -9.471379598835483e-06, "logits/chosen": -0.03377307951450348, "logits/rejected": -0.16422481834888458, "logps/chosen": -7.505207759095356e-05, "logps/rejected": -2.2845170497894287, "loss": 0.6574, "nll_loss": 0.16434094309806824, "rewards/accuracies": 1.0, "rewards/chosen": -7.505208486691117e-06, "rewards/margins": 0.2284441888332367, "rewards/rejected": -0.2284516990184784, "step": 14245 }, { "epoch": 9.852005532503458, "grad_norm": 2.320402145385742, "learning_rate": 8.22191486091901e-07, "log_odds_chosen": 11.187734603881836, "log_odds_ratio": -2.727216633502394e-05, "logits/chosen": -0.40191370248794556, "logits/rejected": -0.43743768334388733, "logps/chosen": -0.0002643515763338655, "logps/rejected": -2.5183866024017334, "loss": 0.256, "nll_loss": 0.06400003284215927, "rewards/accuracies": 1.0, "rewards/chosen": -2.6435158360982314e-05, "rewards/margins": 0.251812219619751, "rewards/rejected": -0.25183865427970886, "step": 14246 }, { "epoch": 9.852697095435685, "grad_norm": 2.3957173824310303, "learning_rate": 8.18349469801752e-07, "log_odds_chosen": 10.161579132080078, "log_odds_ratio": -0.0006505093188025057, "logits/chosen": -0.6251837015151978, "logits/rejected": -0.6660484075546265, "logps/chosen": -0.00022856263967696577, "logps/rejected": -1.7181360721588135, "loss": 0.2409, "nll_loss": 0.060165874660015106, "rewards/accuracies": 1.0, "rewards/chosen": -2.2856263967696577e-05, "rewards/margins": 0.17179076373577118, "rewards/rejected": -0.17181360721588135, "step": 14247 }, { "epoch": 9.853388658367912, "grad_norm": 2.6903257369995117, "learning_rate": 8.145074535116028e-07, "log_odds_chosen": 11.195083618164062, "log_odds_ratio": -8.327865361934528e-05, "logits/chosen": -0.16145284473896027, "logits/rejected": -0.25972306728363037, "logps/chosen": -0.00040092156268656254, "logps/rejected": -3.030973434448242, "loss": 0.2734, "nll_loss": 0.06833411753177643, "rewards/accuracies": 1.0, "rewards/chosen": -4.0092156268656254e-05, "rewards/margins": 0.3030572533607483, "rewards/rejected": -0.30309736728668213, "step": 14248 }, { "epoch": 9.854080221300139, "grad_norm": 5.38814640045166, "learning_rate": 8.106654372214539e-07, "log_odds_chosen": 11.149696350097656, "log_odds_ratio": -2.1174480934860185e-05, "logits/chosen": 0.13153231143951416, "logits/rejected": -0.09815803915262222, "logps/chosen": -0.00029714504489675164, "logps/rejected": -2.774975538253784, "loss": 0.5003, "nll_loss": 0.12506981194019318, "rewards/accuracies": 1.0, "rewards/chosen": -2.9714505217270926e-05, "rewards/margins": 0.27746787667274475, "rewards/rejected": -0.2774975597858429, "step": 14249 }, { "epoch": 9.854771784232366, "grad_norm": 3.2264857292175293, "learning_rate": 8.068234209313049e-07, "log_odds_chosen": 11.523049354553223, "log_odds_ratio": -6.451080844271928e-05, "logits/chosen": -0.28169524669647217, "logits/rejected": -0.28970491886138916, "logps/chosen": -0.0004202952259220183, "logps/rejected": -2.4245822429656982, "loss": 0.4129, "nll_loss": 0.10322895646095276, "rewards/accuracies": 1.0, "rewards/chosen": -4.202951822662726e-05, "rewards/margins": 0.24241620302200317, "rewards/rejected": -0.2424582540988922, "step": 14250 }, { "epoch": 9.855463347164592, "grad_norm": 3.9890167713165283, "learning_rate": 8.029814046411557e-07, "log_odds_chosen": 10.884989738464355, "log_odds_ratio": -4.29791361966636e-05, "logits/chosen": -0.07643628865480423, "logits/rejected": -0.24363578855991364, "logps/chosen": -0.00019140413496643305, "logps/rejected": -2.163062334060669, "loss": 0.5284, "nll_loss": 0.13209296762943268, "rewards/accuracies": 1.0, "rewards/chosen": -1.9140414224239066e-05, "rewards/margins": 0.21628707647323608, "rewards/rejected": -0.21630622446537018, "step": 14251 }, { "epoch": 9.85615491009682, "grad_norm": 3.9284536838531494, "learning_rate": 7.991393883510067e-07, "log_odds_chosen": 11.530202865600586, "log_odds_ratio": -3.462812674115412e-05, "logits/chosen": -0.32096803188323975, "logits/rejected": -0.4255814552307129, "logps/chosen": -0.00027573731495067477, "logps/rejected": -2.7849910259246826, "loss": 0.3758, "nll_loss": 0.09394600987434387, "rewards/accuracies": 1.0, "rewards/chosen": -2.7573729312280193e-05, "rewards/margins": 0.27847155928611755, "rewards/rejected": -0.27849912643432617, "step": 14252 }, { "epoch": 9.856846473029046, "grad_norm": 3.034029483795166, "learning_rate": 7.952973720608575e-07, "log_odds_chosen": 11.338737487792969, "log_odds_ratio": -3.79706580133643e-05, "logits/chosen": -0.006503105163574219, "logits/rejected": -0.08917421102523804, "logps/chosen": -0.0002537562686484307, "logps/rejected": -2.6884939670562744, "loss": 0.3293, "nll_loss": 0.08232768625020981, "rewards/accuracies": 1.0, "rewards/chosen": -2.5375627956236713e-05, "rewards/margins": 0.26882404088974, "rewards/rejected": -0.2688494026660919, "step": 14253 }, { "epoch": 9.857538035961273, "grad_norm": 4.269557476043701, "learning_rate": 7.914553557707085e-07, "log_odds_chosen": 10.948267936706543, "log_odds_ratio": -8.185338811017573e-05, "logits/chosen": -0.30610525608062744, "logits/rejected": -0.3892078399658203, "logps/chosen": -0.0004585545975714922, "logps/rejected": -2.365933418273926, "loss": 0.335, "nll_loss": 0.08375367522239685, "rewards/accuracies": 1.0, "rewards/chosen": -4.585546048474498e-05, "rewards/margins": 0.23654749989509583, "rewards/rejected": -0.2365933507680893, "step": 14254 }, { "epoch": 9.8582295988935, "grad_norm": 3.503242015838623, "learning_rate": 7.876133394805594e-07, "log_odds_chosen": 11.935919761657715, "log_odds_ratio": -1.5749861631775275e-05, "logits/chosen": -0.49925288558006287, "logits/rejected": -0.5630971789360046, "logps/chosen": -7.692461076658219e-05, "logps/rejected": -2.2525434494018555, "loss": 0.412, "nll_loss": 0.1030036062002182, "rewards/accuracies": 1.0, "rewards/chosen": -7.692462531849742e-06, "rewards/margins": 0.22524665296077728, "rewards/rejected": -0.2252543419599533, "step": 14255 }, { "epoch": 9.858921161825727, "grad_norm": 3.271296501159668, "learning_rate": 7.837713231904103e-07, "log_odds_chosen": 10.795654296875, "log_odds_ratio": -0.0001503465318819508, "logits/chosen": 0.001009523868560791, "logits/rejected": -0.06132131814956665, "logps/chosen": -0.0003437385312281549, "logps/rejected": -2.3663485050201416, "loss": 0.3405, "nll_loss": 0.085112065076828, "rewards/accuracies": 1.0, "rewards/chosen": -3.437385385041125e-05, "rewards/margins": 0.23660047352313995, "rewards/rejected": -0.23663485050201416, "step": 14256 }, { "epoch": 9.859612724757953, "grad_norm": 2.5387489795684814, "learning_rate": 7.799293069002613e-07, "log_odds_chosen": 10.57435131072998, "log_odds_ratio": -6.567249511135742e-05, "logits/chosen": -0.35600507259368896, "logits/rejected": -0.3159923851490021, "logps/chosen": -0.00017069655586965382, "logps/rejected": -1.6613383293151855, "loss": 0.2971, "nll_loss": 0.07425770163536072, "rewards/accuracies": 1.0, "rewards/chosen": -1.706965485936962e-05, "rewards/margins": 0.16611677408218384, "rewards/rejected": -0.1661338359117508, "step": 14257 }, { "epoch": 9.86030428769018, "grad_norm": 2.987234354019165, "learning_rate": 7.760872906101121e-07, "log_odds_chosen": 10.598260879516602, "log_odds_ratio": -9.142129420069978e-05, "logits/chosen": -0.21032246947288513, "logits/rejected": -0.27841508388519287, "logps/chosen": -0.00016935166786424816, "logps/rejected": -1.6317389011383057, "loss": 0.2541, "nll_loss": 0.06352090835571289, "rewards/accuracies": 1.0, "rewards/chosen": -1.6935167877818458e-05, "rewards/margins": 0.16315695643424988, "rewards/rejected": -0.1631738841533661, "step": 14258 }, { "epoch": 9.860995850622407, "grad_norm": 3.952584743499756, "learning_rate": 7.722452743199632e-07, "log_odds_chosen": 10.416267395019531, "log_odds_ratio": -5.154755126568489e-05, "logits/chosen": -0.133428692817688, "logits/rejected": -0.1537066251039505, "logps/chosen": -0.0003270559827797115, "logps/rejected": -2.1095032691955566, "loss": 0.4201, "nll_loss": 0.10501537472009659, "rewards/accuracies": 1.0, "rewards/chosen": -3.270560046075843e-05, "rewards/margins": 0.2109176069498062, "rewards/rejected": -0.2109503298997879, "step": 14259 }, { "epoch": 9.861687413554634, "grad_norm": 3.255803346633911, "learning_rate": 7.684032580298141e-07, "log_odds_chosen": 11.39590835571289, "log_odds_ratio": -7.392850966425613e-05, "logits/chosen": -0.34807223081588745, "logits/rejected": -0.4470960199832916, "logps/chosen": -0.00012161783524788916, "logps/rejected": -2.456490993499756, "loss": 0.4438, "nll_loss": 0.11094728112220764, "rewards/accuracies": 1.0, "rewards/chosen": -1.2161783160991035e-05, "rewards/margins": 0.2456369251012802, "rewards/rejected": -0.24564911425113678, "step": 14260 }, { "epoch": 9.86237897648686, "grad_norm": 2.4818081855773926, "learning_rate": 7.64561241739665e-07, "log_odds_chosen": 11.260456085205078, "log_odds_ratio": -2.046274494205136e-05, "logits/chosen": -0.101976178586483, "logits/rejected": -0.2396450936794281, "logps/chosen": -6.300826498772949e-05, "logps/rejected": -1.7347753047943115, "loss": 0.248, "nll_loss": 0.061990223824977875, "rewards/accuracies": 1.0, "rewards/chosen": -6.300826498772949e-06, "rewards/margins": 0.17347121238708496, "rewards/rejected": -0.17347753047943115, "step": 14261 }, { "epoch": 9.863070539419088, "grad_norm": 2.9214489459991455, "learning_rate": 7.607192254495159e-07, "log_odds_chosen": 11.829577445983887, "log_odds_ratio": -2.8550321076181717e-05, "logits/chosen": -0.16604715585708618, "logits/rejected": -0.19383998215198517, "logps/chosen": -0.0003231268492527306, "logps/rejected": -2.782902717590332, "loss": 0.3157, "nll_loss": 0.07892406731843948, "rewards/accuracies": 1.0, "rewards/chosen": -3.2312687835656106e-05, "rewards/margins": 0.27825796604156494, "rewards/rejected": -0.2782903015613556, "step": 14262 }, { "epoch": 9.863762102351314, "grad_norm": 3.5143449306488037, "learning_rate": 7.568772091593668e-07, "log_odds_chosen": 11.174112319946289, "log_odds_ratio": -6.027439667377621e-05, "logits/chosen": -0.12853315472602844, "logits/rejected": -0.09809570014476776, "logps/chosen": -0.00025325047317892313, "logps/rejected": -2.588475227355957, "loss": 0.2653, "nll_loss": 0.06632782518863678, "rewards/accuracies": 1.0, "rewards/chosen": -2.5325047317892313e-05, "rewards/margins": 0.2588222026824951, "rewards/rejected": -0.25884753465652466, "step": 14263 }, { "epoch": 9.864453665283541, "grad_norm": 2.2648425102233887, "learning_rate": 7.530351928692178e-07, "log_odds_chosen": 10.080967903137207, "log_odds_ratio": -0.00010812430264195427, "logits/chosen": -0.5540417432785034, "logits/rejected": -0.5870547294616699, "logps/chosen": -0.0004334770201239735, "logps/rejected": -1.931509017944336, "loss": 0.2214, "nll_loss": 0.05533323436975479, "rewards/accuracies": 1.0, "rewards/chosen": -4.334770346758887e-05, "rewards/margins": 0.19310756027698517, "rewards/rejected": -0.19315090775489807, "step": 14264 }, { "epoch": 9.865145228215768, "grad_norm": 3.299065351486206, "learning_rate": 7.491931765790688e-07, "log_odds_chosen": 10.757427215576172, "log_odds_ratio": -0.00043512610136531293, "logits/chosen": -0.007109713740646839, "logits/rejected": -0.14529041945934296, "logps/chosen": -0.0008351364522241056, "logps/rejected": -2.858060359954834, "loss": 0.3195, "nll_loss": 0.07981950789690018, "rewards/accuracies": 1.0, "rewards/chosen": -8.351364522241056e-05, "rewards/margins": 0.285722553730011, "rewards/rejected": -0.2858060598373413, "step": 14265 }, { "epoch": 9.865836791147995, "grad_norm": 3.2676212787628174, "learning_rate": 7.453511602889197e-07, "log_odds_chosen": 11.02478313446045, "log_odds_ratio": -6.122445483924821e-05, "logits/chosen": -0.12963497638702393, "logits/rejected": -0.22275590896606445, "logps/chosen": -0.00026319074095226824, "logps/rejected": -2.292569637298584, "loss": 0.4047, "nll_loss": 0.10117470473051071, "rewards/accuracies": 1.0, "rewards/chosen": -2.6319075914216228e-05, "rewards/margins": 0.22923067212104797, "rewards/rejected": -0.22925697267055511, "step": 14266 }, { "epoch": 9.866528354080222, "grad_norm": 3.312446355819702, "learning_rate": 7.415091439987706e-07, "log_odds_chosen": 11.558563232421875, "log_odds_ratio": -3.94942908314988e-05, "logits/chosen": -0.3568577170372009, "logits/rejected": -0.5602548122406006, "logps/chosen": -8.418185461778194e-05, "logps/rejected": -2.275566577911377, "loss": 0.3074, "nll_loss": 0.07684757560491562, "rewards/accuracies": 1.0, "rewards/chosen": -8.418184734182432e-06, "rewards/margins": 0.22754821181297302, "rewards/rejected": -0.22755663096904755, "step": 14267 }, { "epoch": 9.867219917012449, "grad_norm": 3.094496011734009, "learning_rate": 7.376671277086215e-07, "log_odds_chosen": 11.407001495361328, "log_odds_ratio": -0.00010264647426083684, "logits/chosen": -0.46122875809669495, "logits/rejected": -0.34138795733451843, "logps/chosen": -0.0001607881422387436, "logps/rejected": -2.7173984050750732, "loss": 0.2876, "nll_loss": 0.07190103828907013, "rewards/accuracies": 1.0, "rewards/chosen": -1.6078816770459525e-05, "rewards/margins": 0.27172374725341797, "rewards/rejected": -0.2717398405075073, "step": 14268 }, { "epoch": 9.867911479944675, "grad_norm": 4.279833793640137, "learning_rate": 7.338251114184724e-07, "log_odds_chosen": 12.136764526367188, "log_odds_ratio": -2.284317451994866e-05, "logits/chosen": -0.23695890605449677, "logits/rejected": -0.2460167109966278, "logps/chosen": -0.00017700533499009907, "logps/rejected": -3.117013931274414, "loss": 0.3232, "nll_loss": 0.08079381287097931, "rewards/accuracies": 1.0, "rewards/chosen": -1.770053495420143e-05, "rewards/margins": 0.311683714389801, "rewards/rejected": -0.3117014169692993, "step": 14269 }, { "epoch": 9.868603042876902, "grad_norm": 4.246036052703857, "learning_rate": 7.299830951283235e-07, "log_odds_chosen": 11.855365753173828, "log_odds_ratio": -4.492932930588722e-05, "logits/chosen": -0.1479170173406601, "logits/rejected": -0.1711617112159729, "logps/chosen": -0.0001270500069949776, "logps/rejected": -3.0261948108673096, "loss": 0.4588, "nll_loss": 0.11470459401607513, "rewards/accuracies": 1.0, "rewards/chosen": -1.2704999790003058e-05, "rewards/margins": 0.3026067912578583, "rewards/rejected": -0.30261948704719543, "step": 14270 }, { "epoch": 9.869294605809129, "grad_norm": 4.595867156982422, "learning_rate": 7.261410788381743e-07, "log_odds_chosen": 12.388121604919434, "log_odds_ratio": -9.911271263263188e-06, "logits/chosen": -0.35695046186447144, "logits/rejected": -0.4349959194660187, "logps/chosen": -0.00011795929458457977, "logps/rejected": -3.168016195297241, "loss": 0.4223, "nll_loss": 0.10557819902896881, "rewards/accuracies": 1.0, "rewards/chosen": -1.1795929822255857e-05, "rewards/margins": 0.316789835691452, "rewards/rejected": -0.31680163741111755, "step": 14271 }, { "epoch": 9.869986168741356, "grad_norm": 3.5730643272399902, "learning_rate": 7.222990625480253e-07, "log_odds_chosen": 10.779461860656738, "log_odds_ratio": -7.836138684069738e-05, "logits/chosen": -0.30118629336357117, "logits/rejected": -0.34218257665634155, "logps/chosen": -0.0006731522735208273, "logps/rejected": -2.5577590465545654, "loss": 0.3795, "nll_loss": 0.09486451745033264, "rewards/accuracies": 1.0, "rewards/chosen": -6.731522444169968e-05, "rewards/margins": 0.25570860505104065, "rewards/rejected": -0.25577589869499207, "step": 14272 }, { "epoch": 9.870677731673583, "grad_norm": 4.24649715423584, "learning_rate": 7.184570462578761e-07, "log_odds_chosen": 12.680334091186523, "log_odds_ratio": -1.203614374389872e-05, "logits/chosen": -0.012453213334083557, "logits/rejected": -0.04921615123748779, "logps/chosen": -7.618685049237683e-05, "logps/rejected": -3.263230323791504, "loss": 0.4144, "nll_loss": 0.10360294580459595, "rewards/accuracies": 1.0, "rewards/chosen": -7.618685231136624e-06, "rewards/margins": 0.32631543278694153, "rewards/rejected": -0.3263230621814728, "step": 14273 }, { "epoch": 9.87136929460581, "grad_norm": 3.6585779190063477, "learning_rate": 7.146150299677271e-07, "log_odds_chosen": 10.362115859985352, "log_odds_ratio": -4.786492354469374e-05, "logits/chosen": 0.03239941596984863, "logits/rejected": -0.016241123899817467, "logps/chosen": -0.0001966757990885526, "logps/rejected": -1.8835418224334717, "loss": 0.4046, "nll_loss": 0.10113903880119324, "rewards/accuracies": 1.0, "rewards/chosen": -1.96675810002489e-05, "rewards/margins": 0.18833452463150024, "rewards/rejected": -0.18835417926311493, "step": 14274 }, { "epoch": 9.872060857538036, "grad_norm": 3.5085196495056152, "learning_rate": 7.107730136775779e-07, "log_odds_chosen": 10.342247009277344, "log_odds_ratio": -0.00015586627705488354, "logits/chosen": -0.18690171837806702, "logits/rejected": -0.18983352184295654, "logps/chosen": -0.00023725997016299516, "logps/rejected": -1.7420761585235596, "loss": 0.3701, "nll_loss": 0.09251439571380615, "rewards/accuracies": 1.0, "rewards/chosen": -2.3725997380097397e-05, "rewards/margins": 0.1741839051246643, "rewards/rejected": -0.1742076277732849, "step": 14275 }, { "epoch": 9.872752420470263, "grad_norm": 2.949233055114746, "learning_rate": 7.06930997387429e-07, "log_odds_chosen": 11.369131088256836, "log_odds_ratio": -3.7902838812442496e-05, "logits/chosen": -0.4845868647098541, "logits/rejected": -0.5072664022445679, "logps/chosen": -0.00026072681066580117, "logps/rejected": -2.3432700634002686, "loss": 0.3909, "nll_loss": 0.09773064404726028, "rewards/accuracies": 1.0, "rewards/chosen": -2.6072681066580117e-05, "rewards/margins": 0.23430094122886658, "rewards/rejected": -0.2343270182609558, "step": 14276 }, { "epoch": 9.87344398340249, "grad_norm": 4.4616522789001465, "learning_rate": 7.030889810972799e-07, "log_odds_chosen": 11.328014373779297, "log_odds_ratio": -3.821388236247003e-05, "logits/chosen": -0.10368916392326355, "logits/rejected": -0.19956597685813904, "logps/chosen": -0.0010826927609741688, "logps/rejected": -2.758289337158203, "loss": 0.5929, "nll_loss": 0.1482183188199997, "rewards/accuracies": 1.0, "rewards/chosen": -0.00010826926882145926, "rewards/margins": 0.2757207155227661, "rewards/rejected": -0.2758289575576782, "step": 14277 }, { "epoch": 9.874135546334717, "grad_norm": 2.6527774333953857, "learning_rate": 6.992469648071308e-07, "log_odds_chosen": 11.004871368408203, "log_odds_ratio": -0.00011215964332222939, "logits/chosen": -0.6274237036705017, "logits/rejected": -0.6948424577713013, "logps/chosen": -0.0003716836217790842, "logps/rejected": -2.4248266220092773, "loss": 0.2473, "nll_loss": 0.061821553856134415, "rewards/accuracies": 1.0, "rewards/chosen": -3.716836727107875e-05, "rewards/margins": 0.24244548380374908, "rewards/rejected": -0.24248266220092773, "step": 14278 }, { "epoch": 9.874827109266944, "grad_norm": 2.395679473876953, "learning_rate": 6.954049485169817e-07, "log_odds_chosen": 12.594627380371094, "log_odds_ratio": -1.518072713224683e-05, "logits/chosen": -0.3285084366798401, "logits/rejected": -0.3908994793891907, "logps/chosen": -0.00010526237019803375, "logps/rejected": -3.351914405822754, "loss": 0.2661, "nll_loss": 0.06652521342039108, "rewards/accuracies": 1.0, "rewards/chosen": -1.0526237019803375e-05, "rewards/margins": 0.33518093824386597, "rewards/rejected": -0.3351914584636688, "step": 14279 }, { "epoch": 9.87551867219917, "grad_norm": 4.559573650360107, "learning_rate": 6.915629322268326e-07, "log_odds_chosen": 12.000591278076172, "log_odds_ratio": -7.7763048466295e-05, "logits/chosen": -0.10347125679254532, "logits/rejected": -0.0704481452703476, "logps/chosen": -0.00014326543896459043, "logps/rejected": -2.82658314704895, "loss": 0.4159, "nll_loss": 0.1039554625749588, "rewards/accuracies": 1.0, "rewards/chosen": -1.4326544260256924e-05, "rewards/margins": 0.28264403343200684, "rewards/rejected": -0.28265830874443054, "step": 14280 }, { "epoch": 9.876210235131397, "grad_norm": 3.346869707107544, "learning_rate": 6.877209159366836e-07, "log_odds_chosen": 11.646493911743164, "log_odds_ratio": -3.573457070160657e-05, "logits/chosen": -0.6695969700813293, "logits/rejected": -0.6724919676780701, "logps/chosen": -0.0005850328598171473, "logps/rejected": -2.9864587783813477, "loss": 0.3558, "nll_loss": 0.08893509954214096, "rewards/accuracies": 1.0, "rewards/chosen": -5.850328670931049e-05, "rewards/margins": 0.2985873818397522, "rewards/rejected": -0.29864588379859924, "step": 14281 }, { "epoch": 9.876901798063624, "grad_norm": 4.491700172424316, "learning_rate": 6.838788996465346e-07, "log_odds_chosen": 11.605969429016113, "log_odds_ratio": -3.0743733077542856e-05, "logits/chosen": 0.2914671003818512, "logits/rejected": 0.125943124294281, "logps/chosen": -0.0001813894195947796, "logps/rejected": -2.674973726272583, "loss": 0.5663, "nll_loss": 0.14156340062618256, "rewards/accuracies": 1.0, "rewards/chosen": -1.8138942323275842e-05, "rewards/margins": 0.2674792408943176, "rewards/rejected": -0.26749736070632935, "step": 14282 }, { "epoch": 9.877593360995851, "grad_norm": 3.6197402477264404, "learning_rate": 6.800368833563855e-07, "log_odds_chosen": 11.6904296875, "log_odds_ratio": -3.449645373621024e-05, "logits/chosen": -0.25123652815818787, "logits/rejected": -0.260647714138031, "logps/chosen": -0.0001352946856059134, "logps/rejected": -2.788188934326172, "loss": 0.2937, "nll_loss": 0.07341036945581436, "rewards/accuracies": 1.0, "rewards/chosen": -1.3529470379580744e-05, "rewards/margins": 0.27880537509918213, "rewards/rejected": -0.27881887555122375, "step": 14283 }, { "epoch": 9.878284923928078, "grad_norm": 4.248296737670898, "learning_rate": 6.761948670662364e-07, "log_odds_chosen": 12.074800491333008, "log_odds_ratio": -2.439486706862226e-05, "logits/chosen": -0.20641836524009705, "logits/rejected": -0.26033246517181396, "logps/chosen": -0.0001606778532732278, "logps/rejected": -2.9791653156280518, "loss": 0.4337, "nll_loss": 0.10842004418373108, "rewards/accuracies": 1.0, "rewards/chosen": -1.6067786418716423e-05, "rewards/margins": 0.2979004681110382, "rewards/rejected": -0.2979165315628052, "step": 14284 }, { "epoch": 9.878976486860305, "grad_norm": 2.8815829753875732, "learning_rate": 6.723528507760873e-07, "log_odds_chosen": 12.093220710754395, "log_odds_ratio": -1.1571889444894623e-05, "logits/chosen": -0.5799591541290283, "logits/rejected": -0.5917587280273438, "logps/chosen": -5.5582597269676626e-05, "logps/rejected": -2.379427909851074, "loss": 0.2728, "nll_loss": 0.06819252669811249, "rewards/accuracies": 1.0, "rewards/chosen": -5.558259545068722e-06, "rewards/margins": 0.23793722689151764, "rewards/rejected": -0.23794278502464294, "step": 14285 }, { "epoch": 9.879668049792532, "grad_norm": 2.925414800643921, "learning_rate": 6.685108344859382e-07, "log_odds_chosen": 11.433218955993652, "log_odds_ratio": -7.641038246219978e-05, "logits/chosen": 0.006106570363044739, "logits/rejected": -0.09651975333690643, "logps/chosen": -0.0002864729904104024, "logps/rejected": -2.8932034969329834, "loss": 0.8084, "nll_loss": 0.20209553837776184, "rewards/accuracies": 1.0, "rewards/chosen": -2.8647300496231765e-05, "rewards/margins": 0.28929170966148376, "rewards/rejected": -0.28932034969329834, "step": 14286 }, { "epoch": 9.880359612724758, "grad_norm": 4.226961612701416, "learning_rate": 6.646688181957893e-07, "log_odds_chosen": 10.29315185546875, "log_odds_ratio": -0.00028602348174899817, "logits/chosen": -0.4428179860115051, "logits/rejected": -0.55384761095047, "logps/chosen": -0.0006280643865466118, "logps/rejected": -2.452889919281006, "loss": 0.454, "nll_loss": 0.11347101628780365, "rewards/accuracies": 1.0, "rewards/chosen": -6.280643719946966e-05, "rewards/margins": 0.2452262043952942, "rewards/rejected": -0.24528899788856506, "step": 14287 }, { "epoch": 9.881051175656985, "grad_norm": 3.1403894424438477, "learning_rate": 6.608268019056401e-07, "log_odds_chosen": 11.154643058776855, "log_odds_ratio": -3.0978779250290245e-05, "logits/chosen": -0.10836577415466309, "logits/rejected": -0.1866854578256607, "logps/chosen": -0.00019304529996588826, "logps/rejected": -2.0414133071899414, "loss": 0.3154, "nll_loss": 0.07883745431900024, "rewards/accuracies": 1.0, "rewards/chosen": -1.930452708620578e-05, "rewards/margins": 0.20412203669548035, "rewards/rejected": -0.20414134860038757, "step": 14288 }, { "epoch": 9.881742738589212, "grad_norm": 2.4305622577667236, "learning_rate": 6.569847856154911e-07, "log_odds_chosen": 11.386059761047363, "log_odds_ratio": -6.501846655737609e-05, "logits/chosen": -0.32185888290405273, "logits/rejected": -0.4893759787082672, "logps/chosen": -8.296041050925851e-05, "logps/rejected": -1.9257011413574219, "loss": 0.2682, "nll_loss": 0.06703435629606247, "rewards/accuracies": 1.0, "rewards/chosen": -8.296042324218433e-06, "rewards/margins": 0.1925618201494217, "rewards/rejected": -0.19257010519504547, "step": 14289 }, { "epoch": 9.882434301521439, "grad_norm": 4.274883270263672, "learning_rate": 6.531427693253419e-07, "log_odds_chosen": 10.623971939086914, "log_odds_ratio": -5.016976501792669e-05, "logits/chosen": 0.021292902529239655, "logits/rejected": -0.05526788532733917, "logps/chosen": -0.0001984376722248271, "logps/rejected": -1.9755374193191528, "loss": 0.4367, "nll_loss": 0.1091596782207489, "rewards/accuracies": 1.0, "rewards/chosen": -1.984376649488695e-05, "rewards/margins": 0.19753390550613403, "rewards/rejected": -0.19755373895168304, "step": 14290 }, { "epoch": 9.883125864453666, "grad_norm": 3.6415889263153076, "learning_rate": 6.493007530351929e-07, "log_odds_chosen": 10.185497283935547, "log_odds_ratio": -0.0002156754635507241, "logits/chosen": -0.5327581167221069, "logits/rejected": -0.5787546038627625, "logps/chosen": -0.0012383005814626813, "logps/rejected": -1.8759710788726807, "loss": 0.393, "nll_loss": 0.09823187440633774, "rewards/accuracies": 1.0, "rewards/chosen": -0.00012383004650473595, "rewards/margins": 0.18747326731681824, "rewards/rejected": -0.1875970959663391, "step": 14291 }, { "epoch": 9.883817427385893, "grad_norm": 2.7681779861450195, "learning_rate": 6.454587367450438e-07, "log_odds_chosen": 10.560806274414062, "log_odds_ratio": -0.00027869484620168805, "logits/chosen": -0.5162404179573059, "logits/rejected": -0.6527389883995056, "logps/chosen": -0.0005228912341408432, "logps/rejected": -1.8825963735580444, "loss": 0.3032, "nll_loss": 0.0757603719830513, "rewards/accuracies": 1.0, "rewards/chosen": -5.228912050370127e-05, "rewards/margins": 0.18820734322071075, "rewards/rejected": -0.18825963139533997, "step": 14292 }, { "epoch": 9.88450899031812, "grad_norm": 4.2536211013793945, "learning_rate": 6.416167204548948e-07, "log_odds_chosen": 10.498558044433594, "log_odds_ratio": -9.042402962222695e-05, "logits/chosen": -0.6295912265777588, "logits/rejected": -0.5925350785255432, "logps/chosen": -0.0004221365961711854, "logps/rejected": -2.2527432441711426, "loss": 0.5501, "nll_loss": 0.13752688467502594, "rewards/accuracies": 1.0, "rewards/chosen": -4.221366179990582e-05, "rewards/margins": 0.2252321094274521, "rewards/rejected": -0.22527432441711426, "step": 14293 }, { "epoch": 9.885200553250346, "grad_norm": 2.9185569286346436, "learning_rate": 6.377747041647457e-07, "log_odds_chosen": 11.82805061340332, "log_odds_ratio": -1.629330108698923e-05, "logits/chosen": -0.11088446527719498, "logits/rejected": -0.16331283748149872, "logps/chosen": -0.0006101735634729266, "logps/rejected": -3.084566593170166, "loss": 0.2856, "nll_loss": 0.0713898241519928, "rewards/accuracies": 1.0, "rewards/chosen": -6.101735198171809e-05, "rewards/margins": 0.308395653963089, "rewards/rejected": -0.3084566593170166, "step": 14294 }, { "epoch": 9.885892116182573, "grad_norm": 2.9352762699127197, "learning_rate": 6.339326878745966e-07, "log_odds_chosen": 10.382514953613281, "log_odds_ratio": -0.00020576248061843216, "logits/chosen": -0.45164620876312256, "logits/rejected": -0.571010172367096, "logps/chosen": -0.0007919521303847432, "logps/rejected": -2.304452896118164, "loss": 0.3793, "nll_loss": 0.09479214251041412, "rewards/accuracies": 1.0, "rewards/chosen": -7.91952115832828e-05, "rewards/margins": 0.2303660809993744, "rewards/rejected": -0.2304452806711197, "step": 14295 }, { "epoch": 9.8865836791148, "grad_norm": 2.45688796043396, "learning_rate": 6.300906715844475e-07, "log_odds_chosen": 11.476516723632812, "log_odds_ratio": -8.425705891568214e-05, "logits/chosen": -0.10602383315563202, "logits/rejected": -0.13981786370277405, "logps/chosen": -0.00018015142995864153, "logps/rejected": -1.9917879104614258, "loss": 0.2963, "nll_loss": 0.07405611127614975, "rewards/accuracies": 1.0, "rewards/chosen": -1.8015143723459914e-05, "rewards/margins": 0.19916076958179474, "rewards/rejected": -0.1991787850856781, "step": 14296 }, { "epoch": 9.887275242047027, "grad_norm": 4.807474136352539, "learning_rate": 6.262486552942985e-07, "log_odds_chosen": 11.048713684082031, "log_odds_ratio": -3.208504858775996e-05, "logits/chosen": 0.037749409675598145, "logits/rejected": 0.01808573305606842, "logps/chosen": -0.00015323254046961665, "logps/rejected": -2.082486391067505, "loss": 0.4806, "nll_loss": 0.12015396356582642, "rewards/accuracies": 1.0, "rewards/chosen": -1.5323254046961665e-05, "rewards/margins": 0.2082333266735077, "rewards/rejected": -0.20824863016605377, "step": 14297 }, { "epoch": 9.887966804979254, "grad_norm": 3.227276563644409, "learning_rate": 6.224066390041494e-07, "log_odds_chosen": 9.382593154907227, "log_odds_ratio": -0.0006197717739269137, "logits/chosen": 0.034344106912612915, "logits/rejected": -0.012226209044456482, "logps/chosen": -0.0031241520773619413, "logps/rejected": -2.3328871726989746, "loss": 0.3996, "nll_loss": 0.09983502328395844, "rewards/accuracies": 1.0, "rewards/chosen": -0.0003124152426607907, "rewards/margins": 0.23297631740570068, "rewards/rejected": -0.2332887351512909, "step": 14298 }, { "epoch": 9.88865836791148, "grad_norm": 5.094573020935059, "learning_rate": 6.185646227140004e-07, "log_odds_chosen": 11.684231758117676, "log_odds_ratio": -4.3627602281048894e-05, "logits/chosen": -0.16069498658180237, "logits/rejected": -0.2361924946308136, "logps/chosen": -0.00020580008276738226, "logps/rejected": -2.831536293029785, "loss": 0.571, "nll_loss": 0.1427527368068695, "rewards/accuracies": 1.0, "rewards/chosen": -2.058000973192975e-05, "rewards/margins": 0.28313305974006653, "rewards/rejected": -0.2831536531448364, "step": 14299 }, { "epoch": 9.889349930843707, "grad_norm": 2.746864080429077, "learning_rate": 6.147226064238512e-07, "log_odds_chosen": 12.112071990966797, "log_odds_ratio": -2.832992686307989e-05, "logits/chosen": -0.23692499101161957, "logits/rejected": -0.21913860738277435, "logps/chosen": -0.00011797455226769671, "logps/rejected": -2.8220291137695312, "loss": 0.3201, "nll_loss": 0.08001617342233658, "rewards/accuracies": 1.0, "rewards/chosen": -1.1797455954365432e-05, "rewards/margins": 0.28219112753868103, "rewards/rejected": -0.28220292925834656, "step": 14300 }, { "epoch": 9.890041493775934, "grad_norm": 2.84747314453125, "learning_rate": 6.108805901337022e-07, "log_odds_chosen": 10.872987747192383, "log_odds_ratio": -3.52791539626196e-05, "logits/chosen": -0.15608876943588257, "logits/rejected": -0.1359274983406067, "logps/chosen": -0.00016623694682493806, "logps/rejected": -2.1368963718414307, "loss": 0.3956, "nll_loss": 0.09889744222164154, "rewards/accuracies": 1.0, "rewards/chosen": -1.6623695046291687e-05, "rewards/margins": 0.21367302536964417, "rewards/rejected": -0.2136896550655365, "step": 14301 }, { "epoch": 9.89073305670816, "grad_norm": 3.633258819580078, "learning_rate": 6.070385738435532e-07, "log_odds_chosen": 10.81205940246582, "log_odds_ratio": -5.5275151680689305e-05, "logits/chosen": -0.2276846170425415, "logits/rejected": -0.21954035758972168, "logps/chosen": -0.00034371885703876615, "logps/rejected": -2.4575982093811035, "loss": 0.7281, "nll_loss": 0.1820145845413208, "rewards/accuracies": 1.0, "rewards/chosen": -3.4371885703876615e-05, "rewards/margins": 0.24572545289993286, "rewards/rejected": -0.24575982987880707, "step": 14302 }, { "epoch": 9.891424619640388, "grad_norm": 4.360518455505371, "learning_rate": 6.03196557553404e-07, "log_odds_chosen": 10.949689865112305, "log_odds_ratio": -0.0003759180544875562, "logits/chosen": -0.17199602723121643, "logits/rejected": -0.18793082237243652, "logps/chosen": -0.000553618127014488, "logps/rejected": -2.2656283378601074, "loss": 0.3991, "nll_loss": 0.09972595423460007, "rewards/accuracies": 1.0, "rewards/chosen": -5.5361815611831844e-05, "rewards/margins": 0.22650748491287231, "rewards/rejected": -0.22656285762786865, "step": 14303 }, { "epoch": 9.892116182572614, "grad_norm": 3.440701961517334, "learning_rate": 5.993545412632551e-07, "log_odds_chosen": 11.22562313079834, "log_odds_ratio": -3.425776958465576e-05, "logits/chosen": -0.30449116230010986, "logits/rejected": -0.35918286442756653, "logps/chosen": -0.00014760816702619195, "logps/rejected": -2.243481159210205, "loss": 0.4307, "nll_loss": 0.10767467319965363, "rewards/accuracies": 1.0, "rewards/chosen": -1.4760817066417076e-05, "rewards/margins": 0.22433334589004517, "rewards/rejected": -0.22434811294078827, "step": 14304 }, { "epoch": 9.892807745504841, "grad_norm": 3.14928936958313, "learning_rate": 5.955125249731059e-07, "log_odds_chosen": 11.337374687194824, "log_odds_ratio": -3.201282015652396e-05, "logits/chosen": -0.1693303883075714, "logits/rejected": -0.17716658115386963, "logps/chosen": -0.00022396880376618356, "logps/rejected": -2.830944538116455, "loss": 0.3083, "nll_loss": 0.07706800103187561, "rewards/accuracies": 1.0, "rewards/chosen": -2.239688183180988e-05, "rewards/margins": 0.2830720543861389, "rewards/rejected": -0.2830944359302521, "step": 14305 }, { "epoch": 9.893499308437068, "grad_norm": 3.699977159500122, "learning_rate": 5.916705086829569e-07, "log_odds_chosen": 10.842156410217285, "log_odds_ratio": -0.0004522549279499799, "logits/chosen": -0.06826915591955185, "logits/rejected": -0.2108014076948166, "logps/chosen": -0.0010594420600682497, "logps/rejected": -2.3867008686065674, "loss": 0.3698, "nll_loss": 0.09239698201417923, "rewards/accuracies": 1.0, "rewards/chosen": -0.00010594420018605888, "rewards/margins": 0.238564133644104, "rewards/rejected": -0.23867008090019226, "step": 14306 }, { "epoch": 9.894190871369295, "grad_norm": 2.9475674629211426, "learning_rate": 5.878284923928077e-07, "log_odds_chosen": 11.217729568481445, "log_odds_ratio": -9.593695722287521e-05, "logits/chosen": -0.11455540359020233, "logits/rejected": -0.13295046985149384, "logps/chosen": -0.0002060092519968748, "logps/rejected": -2.5001935958862305, "loss": 0.3923, "nll_loss": 0.09807466715574265, "rewards/accuracies": 1.0, "rewards/chosen": -2.0600926291081123e-05, "rewards/margins": 0.24999874830245972, "rewards/rejected": -0.2500193417072296, "step": 14307 }, { "epoch": 9.894882434301522, "grad_norm": 3.6130259037017822, "learning_rate": 5.839864761026587e-07, "log_odds_chosen": 10.944160461425781, "log_odds_ratio": -5.0609152822289616e-05, "logits/chosen": -0.17907792329788208, "logits/rejected": -0.35081222653388977, "logps/chosen": -0.0004822844348382205, "logps/rejected": -3.0487985610961914, "loss": 0.7491, "nll_loss": 0.1872725784778595, "rewards/accuracies": 1.0, "rewards/chosen": -4.822844493901357e-05, "rewards/margins": 0.3048316538333893, "rewards/rejected": -0.3048798739910126, "step": 14308 }, { "epoch": 9.895573997233749, "grad_norm": 3.746443748474121, "learning_rate": 5.801444598125096e-07, "log_odds_chosen": 12.997480392456055, "log_odds_ratio": -1.1382105185475666e-05, "logits/chosen": -0.10519464313983917, "logits/rejected": -0.11598517000675201, "logps/chosen": -0.00033332454040646553, "logps/rejected": -4.251444339752197, "loss": 0.2877, "nll_loss": 0.07191663980484009, "rewards/accuracies": 1.0, "rewards/chosen": -3.333245331305079e-05, "rewards/margins": 0.42511114478111267, "rewards/rejected": -0.4251444637775421, "step": 14309 }, { "epoch": 9.896265560165975, "grad_norm": 3.0396363735198975, "learning_rate": 5.763024435223606e-07, "log_odds_chosen": 11.321372985839844, "log_odds_ratio": -5.7202312746085227e-05, "logits/chosen": 0.09867669641971588, "logits/rejected": -0.03381189703941345, "logps/chosen": -0.00016327225603163242, "logps/rejected": -2.585301637649536, "loss": 0.356, "nll_loss": 0.08898219466209412, "rewards/accuracies": 1.0, "rewards/chosen": -1.632722523936536e-05, "rewards/margins": 0.25851383805274963, "rewards/rejected": -0.2585301697254181, "step": 14310 }, { "epoch": 9.896957123098202, "grad_norm": 3.3631041049957275, "learning_rate": 5.724604272322115e-07, "log_odds_chosen": 11.977582931518555, "log_odds_ratio": -0.00012897477427031845, "logits/chosen": -0.14217886328697205, "logits/rejected": -0.2045181840658188, "logps/chosen": -0.0004048075061291456, "logps/rejected": -3.6177620887756348, "loss": 0.4012, "nll_loss": 0.10028345882892609, "rewards/accuracies": 1.0, "rewards/chosen": -4.048075061291456e-05, "rewards/margins": 0.3617357313632965, "rewards/rejected": -0.361776202917099, "step": 14311 }, { "epoch": 9.89764868603043, "grad_norm": 3.0941312313079834, "learning_rate": 5.686184109420624e-07, "log_odds_chosen": 12.279654502868652, "log_odds_ratio": -5.036395668867044e-05, "logits/chosen": -0.18718120455741882, "logits/rejected": -0.17902915179729462, "logps/chosen": -0.00018711428856477141, "logps/rejected": -3.5618364810943604, "loss": 0.3191, "nll_loss": 0.07976827025413513, "rewards/accuracies": 1.0, "rewards/chosen": -1.8711431039264426e-05, "rewards/margins": 0.35616499185562134, "rewards/rejected": -0.3561837077140808, "step": 14312 }, { "epoch": 9.898340248962656, "grad_norm": 2.8663947582244873, "learning_rate": 5.647763946519133e-07, "log_odds_chosen": 11.31735897064209, "log_odds_ratio": -4.939009886584245e-05, "logits/chosen": -0.44607430696487427, "logits/rejected": -0.4172176718711853, "logps/chosen": -0.00024118662986438721, "logps/rejected": -2.5889973640441895, "loss": 0.2682, "nll_loss": 0.06704474240541458, "rewards/accuracies": 1.0, "rewards/chosen": -2.411866262264084e-05, "rewards/margins": 0.25887560844421387, "rewards/rejected": -0.2588997483253479, "step": 14313 }, { "epoch": 9.899031811894883, "grad_norm": 3.3275301456451416, "learning_rate": 5.609343783617643e-07, "log_odds_chosen": 11.67763900756836, "log_odds_ratio": -2.294234036526177e-05, "logits/chosen": -0.5921186208724976, "logits/rejected": -0.5702080130577087, "logps/chosen": -0.0003165464149788022, "logps/rejected": -3.1312780380249023, "loss": 0.4145, "nll_loss": 0.10362927615642548, "rewards/accuracies": 1.0, "rewards/chosen": -3.165464295307174e-05, "rewards/margins": 0.31309616565704346, "rewards/rejected": -0.3131278157234192, "step": 14314 }, { "epoch": 9.89972337482711, "grad_norm": 2.9337375164031982, "learning_rate": 5.570923620716152e-07, "log_odds_chosen": 12.45496940612793, "log_odds_ratio": -1.6639531168038957e-05, "logits/chosen": -0.36686575412750244, "logits/rejected": -0.34807348251342773, "logps/chosen": -0.00012428374611772597, "logps/rejected": -3.021192789077759, "loss": 0.3055, "nll_loss": 0.07638576626777649, "rewards/accuracies": 1.0, "rewards/chosen": -1.2428373338480014e-05, "rewards/margins": 0.3021068871021271, "rewards/rejected": -0.30211928486824036, "step": 14315 }, { "epoch": 9.900414937759336, "grad_norm": 2.6304478645324707, "learning_rate": 5.532503457814662e-07, "log_odds_chosen": 10.704986572265625, "log_odds_ratio": -7.087891572155058e-05, "logits/chosen": -0.03459363430738449, "logits/rejected": -0.02272067219018936, "logps/chosen": -0.0003902169701177627, "logps/rejected": -2.3840231895446777, "loss": 0.452, "nll_loss": 0.113001748919487, "rewards/accuracies": 1.0, "rewards/chosen": -3.9021695556584746e-05, "rewards/margins": 0.23836329579353333, "rewards/rejected": -0.2384023219347, "step": 14316 }, { "epoch": 9.901106500691563, "grad_norm": 3.5170085430145264, "learning_rate": 5.49408329491317e-07, "log_odds_chosen": 12.182526588439941, "log_odds_ratio": -0.00010460546764079481, "logits/chosen": -0.009993776679039001, "logits/rejected": -0.03831420838832855, "logps/chosen": -0.0008481538970954716, "logps/rejected": -3.2034897804260254, "loss": 0.3519, "nll_loss": 0.08796828985214233, "rewards/accuracies": 1.0, "rewards/chosen": -8.481539407512173e-05, "rewards/margins": 0.32026416063308716, "rewards/rejected": -0.32034897804260254, "step": 14317 }, { "epoch": 9.90179806362379, "grad_norm": 2.901359796524048, "learning_rate": 5.45566313201168e-07, "log_odds_chosen": 10.570993423461914, "log_odds_ratio": -6.009342541801743e-05, "logits/chosen": -0.05773099139332771, "logits/rejected": -0.17567658424377441, "logps/chosen": -0.00017942303384188563, "logps/rejected": -1.751425862312317, "loss": 0.2901, "nll_loss": 0.07250886410474777, "rewards/accuracies": 1.0, "rewards/chosen": -1.7942304111784324e-05, "rewards/margins": 0.1751246452331543, "rewards/rejected": -0.1751425862312317, "step": 14318 }, { "epoch": 9.902489626556017, "grad_norm": 3.4182724952697754, "learning_rate": 5.41724296911019e-07, "log_odds_chosen": 10.695033073425293, "log_odds_ratio": -4.265415191184729e-05, "logits/chosen": -0.0365283340215683, "logits/rejected": -0.08216466009616852, "logps/chosen": -0.00021733081666752696, "logps/rejected": -2.2307868003845215, "loss": 0.34, "nll_loss": 0.08500487357378006, "rewards/accuracies": 1.0, "rewards/chosen": -2.17330834857421e-05, "rewards/margins": 0.22305697202682495, "rewards/rejected": -0.22307869791984558, "step": 14319 }, { "epoch": 9.903181189488244, "grad_norm": 4.249139785766602, "learning_rate": 5.378822806208698e-07, "log_odds_chosen": 11.20050048828125, "log_odds_ratio": -2.2386917407857254e-05, "logits/chosen": -0.02520899474620819, "logits/rejected": -0.03752049803733826, "logps/chosen": -0.0003164797672070563, "logps/rejected": -2.747769832611084, "loss": 0.3104, "nll_loss": 0.07758967578411102, "rewards/accuracies": 1.0, "rewards/chosen": -3.164797817589715e-05, "rewards/margins": 0.2747453451156616, "rewards/rejected": -0.27477699518203735, "step": 14320 }, { "epoch": 9.90387275242047, "grad_norm": 4.353235721588135, "learning_rate": 5.340402643307209e-07, "log_odds_chosen": 11.509032249450684, "log_odds_ratio": -6.0765403759432957e-05, "logits/chosen": -0.13491703569889069, "logits/rejected": -0.15714114904403687, "logps/chosen": -0.00016875413712114096, "logps/rejected": -2.7386293411254883, "loss": 0.4921, "nll_loss": 0.12302062660455704, "rewards/accuracies": 1.0, "rewards/chosen": -1.687541589490138e-05, "rewards/margins": 0.2738460600376129, "rewards/rejected": -0.27386295795440674, "step": 14321 }, { "epoch": 9.904564315352697, "grad_norm": 5.303290367126465, "learning_rate": 5.301982480405717e-07, "log_odds_chosen": 11.984261512756348, "log_odds_ratio": -3.151076452923007e-05, "logits/chosen": -0.14473620057106018, "logits/rejected": -0.23054178059101105, "logps/chosen": -0.00011740185436792672, "logps/rejected": -2.885850191116333, "loss": 0.6141, "nll_loss": 0.15351364016532898, "rewards/accuracies": 1.0, "rewards/chosen": -1.174018416350009e-05, "rewards/margins": 0.288573294878006, "rewards/rejected": -0.28858503699302673, "step": 14322 }, { "epoch": 9.905255878284924, "grad_norm": 4.3790740966796875, "learning_rate": 5.263562317504227e-07, "log_odds_chosen": 12.246095657348633, "log_odds_ratio": -1.5059587894938886e-05, "logits/chosen": -0.03567267954349518, "logits/rejected": -0.15627005696296692, "logps/chosen": -7.786623609717935e-05, "logps/rejected": -2.4729762077331543, "loss": 0.3954, "nll_loss": 0.09885958582162857, "rewards/accuracies": 1.0, "rewards/chosen": -7.786624337313697e-06, "rewards/margins": 0.24728982150554657, "rewards/rejected": -0.24729761481285095, "step": 14323 }, { "epoch": 9.905947441217151, "grad_norm": 2.7396340370178223, "learning_rate": 5.225142154602735e-07, "log_odds_chosen": 11.351285934448242, "log_odds_ratio": -1.8908889614976943e-05, "logits/chosen": -0.6820241212844849, "logits/rejected": -0.6081110835075378, "logps/chosen": -0.00010962403030134737, "logps/rejected": -2.1085121631622314, "loss": 0.3534, "nll_loss": 0.08834048360586166, "rewards/accuracies": 1.0, "rewards/chosen": -1.0962403393932618e-05, "rewards/margins": 0.21084025502204895, "rewards/rejected": -0.21085122227668762, "step": 14324 }, { "epoch": 9.906639004149378, "grad_norm": 3.673875093460083, "learning_rate": 5.186721991701245e-07, "log_odds_chosen": 11.471330642700195, "log_odds_ratio": -9.9008837423753e-05, "logits/chosen": -0.03928861767053604, "logits/rejected": -0.2071848213672638, "logps/chosen": -0.00031385914189741015, "logps/rejected": -3.0085816383361816, "loss": 0.5239, "nll_loss": 0.1309712678194046, "rewards/accuracies": 1.0, "rewards/chosen": -3.13859163725283e-05, "rewards/margins": 0.3008267879486084, "rewards/rejected": -0.30085819959640503, "step": 14325 }, { "epoch": 9.907330567081605, "grad_norm": 3.081761598587036, "learning_rate": 5.148301828799754e-07, "log_odds_chosen": 9.377728462219238, "log_odds_ratio": -0.0008418531506322324, "logits/chosen": -0.3373812437057495, "logits/rejected": -0.3121347427368164, "logps/chosen": -0.00022931865532882512, "logps/rejected": -1.6012835502624512, "loss": 0.2925, "nll_loss": 0.07303423434495926, "rewards/accuracies": 1.0, "rewards/chosen": -2.2931868443265557e-05, "rewards/margins": 0.16010543704032898, "rewards/rejected": -0.16012835502624512, "step": 14326 }, { "epoch": 9.908022130013832, "grad_norm": 3.6323537826538086, "learning_rate": 5.109881665898264e-07, "log_odds_chosen": 10.823930740356445, "log_odds_ratio": -0.0002582712040748447, "logits/chosen": -0.5242524147033691, "logits/rejected": -0.5194587707519531, "logps/chosen": -0.00021303680841811, "logps/rejected": -2.167452096939087, "loss": 0.2817, "nll_loss": 0.07040915638208389, "rewards/accuracies": 1.0, "rewards/chosen": -2.130368011421524e-05, "rewards/margins": 0.21672390401363373, "rewards/rejected": -0.21674521267414093, "step": 14327 }, { "epoch": 9.908713692946058, "grad_norm": 3.7610743045806885, "learning_rate": 5.071461502996773e-07, "log_odds_chosen": 11.104131698608398, "log_odds_ratio": -7.880770863266662e-05, "logits/chosen": -0.23604810237884521, "logits/rejected": -0.311259388923645, "logps/chosen": -0.0001373798295389861, "logps/rejected": -2.1140708923339844, "loss": 0.4371, "nll_loss": 0.10926076024770737, "rewards/accuracies": 1.0, "rewards/chosen": -1.3737982044403907e-05, "rewards/margins": 0.2113933563232422, "rewards/rejected": -0.21140709519386292, "step": 14328 }, { "epoch": 9.909405255878285, "grad_norm": 3.5242393016815186, "learning_rate": 5.033041340095282e-07, "log_odds_chosen": 9.724529266357422, "log_odds_ratio": -0.0002068439789582044, "logits/chosen": -0.14855721592903137, "logits/rejected": -0.25745317339897156, "logps/chosen": -0.00022229237947613, "logps/rejected": -1.5355801582336426, "loss": 0.3654, "nll_loss": 0.09133844077587128, "rewards/accuracies": 1.0, "rewards/chosen": -2.2229240130400285e-05, "rewards/margins": 0.15353579819202423, "rewards/rejected": -0.15355801582336426, "step": 14329 }, { "epoch": 9.910096818810512, "grad_norm": 3.3911397457122803, "learning_rate": 4.994621177193791e-07, "log_odds_chosen": 10.772411346435547, "log_odds_ratio": -0.00016185276035685092, "logits/chosen": -0.0470561645925045, "logits/rejected": -0.04483966901898384, "logps/chosen": -0.0006423345184884965, "logps/rejected": -2.230781316757202, "loss": 0.4028, "nll_loss": 0.10068729519844055, "rewards/accuracies": 1.0, "rewards/chosen": -6.423345621442422e-05, "rewards/margins": 0.22301389276981354, "rewards/rejected": -0.22307811677455902, "step": 14330 }, { "epoch": 9.910788381742739, "grad_norm": 2.422025680541992, "learning_rate": 4.956201014292301e-07, "log_odds_chosen": 9.661433219909668, "log_odds_ratio": -0.000198134541278705, "logits/chosen": -0.24157515168190002, "logits/rejected": -0.36891329288482666, "logps/chosen": -0.00032799746259115636, "logps/rejected": -1.5236164331436157, "loss": 0.2083, "nll_loss": 0.05205736309289932, "rewards/accuracies": 1.0, "rewards/chosen": -3.279974771430716e-05, "rewards/margins": 0.15232884883880615, "rewards/rejected": -0.1523616462945938, "step": 14331 }, { "epoch": 9.911479944674966, "grad_norm": 3.357443332672119, "learning_rate": 4.917780851390809e-07, "log_odds_chosen": 11.180157661437988, "log_odds_ratio": -3.0052715374040417e-05, "logits/chosen": -0.16521982848644257, "logits/rejected": -0.12277151644229889, "logps/chosen": -6.565694638993591e-05, "logps/rejected": -1.5582237243652344, "loss": 0.267, "nll_loss": 0.06674351543188095, "rewards/accuracies": 1.0, "rewards/chosen": -6.565694548044121e-06, "rewards/margins": 0.15581580996513367, "rewards/rejected": -0.15582238137722015, "step": 14332 }, { "epoch": 9.912171507607193, "grad_norm": 3.570207357406616, "learning_rate": 4.87936068848932e-07, "log_odds_chosen": 11.282625198364258, "log_odds_ratio": -4.635823279386386e-05, "logits/chosen": -0.20219162106513977, "logits/rejected": -0.32793179154396057, "logps/chosen": -0.00024112232495099306, "logps/rejected": -2.8642830848693848, "loss": 0.3108, "nll_loss": 0.07770770788192749, "rewards/accuracies": 1.0, "rewards/chosen": -2.4112232495099306e-05, "rewards/margins": 0.28640419244766235, "rewards/rejected": -0.2864283323287964, "step": 14333 }, { "epoch": 9.91286307053942, "grad_norm": 3.489907741546631, "learning_rate": 4.840940525587828e-07, "log_odds_chosen": 11.93216323852539, "log_odds_ratio": -2.781835246423725e-05, "logits/chosen": -0.2622598111629486, "logits/rejected": -0.3802410066127777, "logps/chosen": -0.00022485022782348096, "logps/rejected": -2.870073080062866, "loss": 0.4957, "nll_loss": 0.12392807006835938, "rewards/accuracies": 1.0, "rewards/chosen": -2.2485022782348096e-05, "rewards/margins": 0.2869848608970642, "rewards/rejected": -0.28700733184814453, "step": 14334 }, { "epoch": 9.913554633471646, "grad_norm": 3.2331087589263916, "learning_rate": 4.802520362686338e-07, "log_odds_chosen": 11.829715728759766, "log_odds_ratio": -2.7710057111107744e-05, "logits/chosen": -0.19258259236812592, "logits/rejected": -0.24621924757957458, "logps/chosen": -0.00016499309276696295, "logps/rejected": -2.9383063316345215, "loss": 0.3404, "nll_loss": 0.0851057916879654, "rewards/accuracies": 1.0, "rewards/chosen": -1.6499310731887817e-05, "rewards/margins": 0.29381415247917175, "rewards/rejected": -0.29383063316345215, "step": 14335 }, { "epoch": 9.914246196403873, "grad_norm": 4.226347923278809, "learning_rate": 4.764100199784847e-07, "log_odds_chosen": 11.72370719909668, "log_odds_ratio": -2.9156955861253664e-05, "logits/chosen": -0.29745885729789734, "logits/rejected": -0.3365001678466797, "logps/chosen": -0.0002458348171785474, "logps/rejected": -2.796137809753418, "loss": 0.4286, "nll_loss": 0.10715927183628082, "rewards/accuracies": 1.0, "rewards/chosen": -2.4583481717854738e-05, "rewards/margins": 0.2795892059803009, "rewards/rejected": -0.27961376309394836, "step": 14336 }, { "epoch": 9.9149377593361, "grad_norm": 3.6933679580688477, "learning_rate": 4.725680036883356e-07, "log_odds_chosen": 12.086358070373535, "log_odds_ratio": -8.461339348286856e-06, "logits/chosen": -0.27729731798171997, "logits/rejected": -0.46082037687301636, "logps/chosen": -0.00010570999438641593, "logps/rejected": -2.9097843170166016, "loss": 0.5001, "nll_loss": 0.1250213086605072, "rewards/accuracies": 1.0, "rewards/chosen": -1.0570998711045831e-05, "rewards/margins": 0.2909678816795349, "rewards/rejected": -0.29097843170166016, "step": 14337 }, { "epoch": 9.915629322268327, "grad_norm": 3.8641040325164795, "learning_rate": 4.6872598739818663e-07, "log_odds_chosen": 11.767168045043945, "log_odds_ratio": -9.326574399892706e-06, "logits/chosen": -0.46089547872543335, "logits/rejected": -0.692821204662323, "logps/chosen": -7.577265205327421e-05, "logps/rejected": -2.1478333473205566, "loss": 0.3482, "nll_loss": 0.08706062287092209, "rewards/accuracies": 1.0, "rewards/chosen": -7.577265478175832e-06, "rewards/margins": 0.21477577090263367, "rewards/rejected": -0.21478332579135895, "step": 14338 }, { "epoch": 9.916320885200554, "grad_norm": 3.4284377098083496, "learning_rate": 4.6488397110803754e-07, "log_odds_chosen": 8.728742599487305, "log_odds_ratio": -0.0006372688221745193, "logits/chosen": -0.48407578468322754, "logits/rejected": -0.44575226306915283, "logps/chosen": -0.0006253526080399752, "logps/rejected": -1.2256580591201782, "loss": 0.31, "nll_loss": 0.07742569595575333, "rewards/accuracies": 1.0, "rewards/chosen": -6.253526225918904e-05, "rewards/margins": 0.12250328063964844, "rewards/rejected": -0.12256580591201782, "step": 14339 }, { "epoch": 9.91701244813278, "grad_norm": 2.6981019973754883, "learning_rate": 4.6104195481788845e-07, "log_odds_chosen": 9.52665901184082, "log_odds_ratio": -0.00023078435333445668, "logits/chosen": -0.554850161075592, "logits/rejected": -0.513104259967804, "logps/chosen": -0.0002982397563755512, "logps/rejected": -1.2079306840896606, "loss": 0.1931, "nll_loss": 0.0482604056596756, "rewards/accuracies": 1.0, "rewards/chosen": -2.9823975637555122e-05, "rewards/margins": 0.12076324224472046, "rewards/rejected": -0.12079307436943054, "step": 14340 }, { "epoch": 9.917704011065007, "grad_norm": 2.396088123321533, "learning_rate": 4.5719993852773936e-07, "log_odds_chosen": 11.458711624145508, "log_odds_ratio": -5.126070755068213e-05, "logits/chosen": -0.5153904557228088, "logits/rejected": -0.5878928899765015, "logps/chosen": -0.00014841034135315567, "logps/rejected": -2.667332410812378, "loss": 0.2098, "nll_loss": 0.05243738740682602, "rewards/accuracies": 1.0, "rewards/chosen": -1.484103540860815e-05, "rewards/margins": 0.26671838760375977, "rewards/rejected": -0.26673322916030884, "step": 14341 }, { "epoch": 9.918395573997234, "grad_norm": 2.856847047805786, "learning_rate": 4.5335792223759026e-07, "log_odds_chosen": 10.811798095703125, "log_odds_ratio": -5.029054227634333e-05, "logits/chosen": 0.1117456704378128, "logits/rejected": 0.1272241324186325, "logps/chosen": -0.0001538070064270869, "logps/rejected": -1.848840355873108, "loss": 0.2877, "nll_loss": 0.07192511856555939, "rewards/accuracies": 1.0, "rewards/chosen": -1.538070137030445e-05, "rewards/margins": 0.18486866354942322, "rewards/rejected": -0.18488404154777527, "step": 14342 }, { "epoch": 9.91908713692946, "grad_norm": 3.0931427478790283, "learning_rate": 4.495159059474412e-07, "log_odds_chosen": 9.95556640625, "log_odds_ratio": -0.00023802714713383466, "logits/chosen": -0.2813582420349121, "logits/rejected": -0.35545971989631653, "logps/chosen": -0.00039546735933981836, "logps/rejected": -1.5701475143432617, "loss": 0.3322, "nll_loss": 0.08301551640033722, "rewards/accuracies": 1.0, "rewards/chosen": -3.9546735933981836e-05, "rewards/margins": 0.15697520971298218, "rewards/rejected": -0.15701475739479065, "step": 14343 }, { "epoch": 9.919778699861688, "grad_norm": 2.814404249191284, "learning_rate": 4.456738896572922e-07, "log_odds_chosen": 10.750186920166016, "log_odds_ratio": -0.00018831017951015383, "logits/chosen": -0.25252482295036316, "logits/rejected": -0.23979459702968597, "logps/chosen": -0.0008598081185482442, "logps/rejected": -2.1662676334381104, "loss": 0.3028, "nll_loss": 0.07567182928323746, "rewards/accuracies": 1.0, "rewards/chosen": -8.598080603405833e-05, "rewards/margins": 0.21654078364372253, "rewards/rejected": -0.21662676334381104, "step": 14344 }, { "epoch": 9.920470262793915, "grad_norm": 3.1870203018188477, "learning_rate": 4.418318733671431e-07, "log_odds_chosen": 11.540773391723633, "log_odds_ratio": -2.0432918972801417e-05, "logits/chosen": -0.2521243095397949, "logits/rejected": -0.26861798763275146, "logps/chosen": -0.0001973059552256018, "logps/rejected": -2.646104097366333, "loss": 0.3702, "nll_loss": 0.09253590553998947, "rewards/accuracies": 1.0, "rewards/chosen": -1.973059625015594e-05, "rewards/margins": 0.26459068059921265, "rewards/rejected": -0.2646104097366333, "step": 14345 }, { "epoch": 9.921161825726141, "grad_norm": 5.119821071624756, "learning_rate": 4.3798985707699405e-07, "log_odds_chosen": 11.276300430297852, "log_odds_ratio": -0.00011375157191650942, "logits/chosen": -0.19327619671821594, "logits/rejected": -0.32824161648750305, "logps/chosen": -0.0003952342667616904, "logps/rejected": -2.8914079666137695, "loss": 0.4067, "nll_loss": 0.10165176540613174, "rewards/accuracies": 1.0, "rewards/chosen": -3.95234274037648e-05, "rewards/margins": 0.2891013026237488, "rewards/rejected": -0.2891407907009125, "step": 14346 }, { "epoch": 9.921853388658368, "grad_norm": 3.130152940750122, "learning_rate": 4.3414784078684496e-07, "log_odds_chosen": 11.177153587341309, "log_odds_ratio": -2.2892505512572825e-05, "logits/chosen": -0.23346176743507385, "logits/rejected": -0.3023627996444702, "logps/chosen": -8.725168299861252e-05, "logps/rejected": -1.9590811729431152, "loss": 0.329, "nll_loss": 0.08225492388010025, "rewards/accuracies": 1.0, "rewards/chosen": -8.725168299861252e-06, "rewards/margins": 0.19589939713478088, "rewards/rejected": -0.1959080994129181, "step": 14347 }, { "epoch": 9.922544951590595, "grad_norm": 3.5128986835479736, "learning_rate": 4.3030582449669587e-07, "log_odds_chosen": 9.824265480041504, "log_odds_ratio": -0.00019305248861201108, "logits/chosen": -0.335892915725708, "logits/rejected": -0.4563968777656555, "logps/chosen": -0.00021023111185058951, "logps/rejected": -1.5053505897521973, "loss": 0.2965, "nll_loss": 0.07410359382629395, "rewards/accuracies": 1.0, "rewards/chosen": -2.102311009366531e-05, "rewards/margins": 0.15051403641700745, "rewards/rejected": -0.15053506195545197, "step": 14348 }, { "epoch": 9.923236514522822, "grad_norm": 3.2810566425323486, "learning_rate": 4.264638082065468e-07, "log_odds_chosen": 11.708654403686523, "log_odds_ratio": -4.994563278160058e-05, "logits/chosen": -0.03914834186434746, "logits/rejected": -0.14430376887321472, "logps/chosen": -0.00014019644004292786, "logps/rejected": -2.522564172744751, "loss": 0.3258, "nll_loss": 0.08145511150360107, "rewards/accuracies": 1.0, "rewards/chosen": -1.4019644368090667e-05, "rewards/margins": 0.25224241614341736, "rewards/rejected": -0.25225645303726196, "step": 14349 }, { "epoch": 9.923928077455049, "grad_norm": 3.1786246299743652, "learning_rate": 4.226217919163978e-07, "log_odds_chosen": 9.759382247924805, "log_odds_ratio": -0.00024910017964430153, "logits/chosen": -0.1662512570619583, "logits/rejected": -0.286848783493042, "logps/chosen": -0.000488889985717833, "logps/rejected": -1.896082878112793, "loss": 0.3639, "nll_loss": 0.09095057845115662, "rewards/accuracies": 1.0, "rewards/chosen": -4.8889003664953634e-05, "rewards/margins": 0.18955940008163452, "rewards/rejected": -0.18960830569267273, "step": 14350 }, { "epoch": 9.924619640387276, "grad_norm": 3.9964568614959717, "learning_rate": 4.187797756262487e-07, "log_odds_chosen": 11.948275566101074, "log_odds_ratio": -3.0497600164380856e-05, "logits/chosen": -0.1427125781774521, "logits/rejected": -0.2857535481452942, "logps/chosen": -0.00014905542775522918, "logps/rejected": -2.870856285095215, "loss": 0.349, "nll_loss": 0.08725368976593018, "rewards/accuracies": 1.0, "rewards/chosen": -1.4905541320331395e-05, "rewards/margins": 0.28707072138786316, "rewards/rejected": -0.287085622549057, "step": 14351 }, { "epoch": 9.925311203319502, "grad_norm": 3.8953027725219727, "learning_rate": 4.149377593360996e-07, "log_odds_chosen": 11.3707275390625, "log_odds_ratio": -7.19654854037799e-05, "logits/chosen": -0.15170668065547943, "logits/rejected": -0.09434667229652405, "logps/chosen": -0.00013707912876270711, "logps/rejected": -2.4071221351623535, "loss": 0.4143, "nll_loss": 0.10356798022985458, "rewards/accuracies": 1.0, "rewards/chosen": -1.3707913240068592e-05, "rewards/margins": 0.24069853127002716, "rewards/rejected": -0.2407122403383255, "step": 14352 }, { "epoch": 9.92600276625173, "grad_norm": 3.3584933280944824, "learning_rate": 4.110957430459505e-07, "log_odds_chosen": 11.33407974243164, "log_odds_ratio": -0.0006362693966366351, "logits/chosen": -0.029478400945663452, "logits/rejected": 0.0747109204530716, "logps/chosen": -0.0005637967842631042, "logps/rejected": -2.222421884536743, "loss": 0.2766, "nll_loss": 0.06908503919839859, "rewards/accuracies": 1.0, "rewards/chosen": -5.637968206428923e-05, "rewards/margins": 0.22218582034111023, "rewards/rejected": -0.22224220633506775, "step": 14353 }, { "epoch": 9.926694329183956, "grad_norm": 2.657400369644165, "learning_rate": 4.072537267558014e-07, "log_odds_chosen": 11.29609489440918, "log_odds_ratio": -5.056093505118042e-05, "logits/chosen": -0.36670732498168945, "logits/rejected": -0.37637168169021606, "logps/chosen": -0.00039156334241852164, "logps/rejected": -2.510091781616211, "loss": 0.2492, "nll_loss": 0.06230463087558746, "rewards/accuracies": 1.0, "rewards/chosen": -3.915633715223521e-05, "rewards/margins": 0.2509700059890747, "rewards/rejected": -0.25100916624069214, "step": 14354 }, { "epoch": 9.927385892116183, "grad_norm": 2.7479970455169678, "learning_rate": 4.0341171046565244e-07, "log_odds_chosen": 11.697278022766113, "log_odds_ratio": -0.00018010164785664529, "logits/chosen": -0.36440762877464294, "logits/rejected": -0.36176443099975586, "logps/chosen": -0.0001285705075133592, "logps/rejected": -2.5904908180236816, "loss": 0.2776, "nll_loss": 0.06938271224498749, "rewards/accuracies": 1.0, "rewards/chosen": -1.2857051842729561e-05, "rewards/margins": 0.25903621315956116, "rewards/rejected": -0.25904908776283264, "step": 14355 }, { "epoch": 9.92807745504841, "grad_norm": 3.3048360347747803, "learning_rate": 3.9956969417550335e-07, "log_odds_chosen": 10.689544677734375, "log_odds_ratio": -4.125951454625465e-05, "logits/chosen": -0.082699716091156, "logits/rejected": -0.11700746417045593, "logps/chosen": -0.0004147653526160866, "logps/rejected": -2.2804675102233887, "loss": 0.3935, "nll_loss": 0.09838270395994186, "rewards/accuracies": 1.0, "rewards/chosen": -4.147654180997051e-05, "rewards/margins": 0.2280052900314331, "rewards/rejected": -0.22804677486419678, "step": 14356 }, { "epoch": 9.928769017980636, "grad_norm": 3.060988187789917, "learning_rate": 3.9572767788535425e-07, "log_odds_chosen": 10.875423431396484, "log_odds_ratio": -0.0001337130379397422, "logits/chosen": -0.5028753280639648, "logits/rejected": -0.475473552942276, "logps/chosen": -0.00013390640378929675, "logps/rejected": -1.8304393291473389, "loss": 0.2411, "nll_loss": 0.06026024371385574, "rewards/accuracies": 1.0, "rewards/chosen": -1.3390640560828615e-05, "rewards/margins": 0.18303054571151733, "rewards/rejected": -0.1830439269542694, "step": 14357 }, { "epoch": 9.929460580912863, "grad_norm": 3.6875088214874268, "learning_rate": 3.9188566159520516e-07, "log_odds_chosen": 11.18298625946045, "log_odds_ratio": -3.897779970429838e-05, "logits/chosen": -0.215055450797081, "logits/rejected": -0.2545468211174011, "logps/chosen": -0.0002388485736446455, "logps/rejected": -2.675607442855835, "loss": 0.415, "nll_loss": 0.10374531149864197, "rewards/accuracies": 1.0, "rewards/chosen": -2.3884858819656074e-05, "rewards/margins": 0.26753684878349304, "rewards/rejected": -0.267560750246048, "step": 14358 }, { "epoch": 9.93015214384509, "grad_norm": 3.312593460083008, "learning_rate": 3.8804364530505607e-07, "log_odds_chosen": 10.246174812316895, "log_odds_ratio": -4.975176489097066e-05, "logits/chosen": -0.3240154981613159, "logits/rejected": -0.3295287489891052, "logps/chosen": -0.00044594379141926765, "logps/rejected": -1.9773529767990112, "loss": 0.2867, "nll_loss": 0.0716669112443924, "rewards/accuracies": 1.0, "rewards/chosen": -4.459438059711829e-05, "rewards/margins": 0.1976906955242157, "rewards/rejected": -0.19773529469966888, "step": 14359 }, { "epoch": 9.930843706777317, "grad_norm": 2.8286046981811523, "learning_rate": 3.8420162901490703e-07, "log_odds_chosen": 13.840531349182129, "log_odds_ratio": -4.998999429517426e-06, "logits/chosen": -0.17049254477024078, "logits/rejected": -0.17025065422058105, "logps/chosen": -6.283095717662945e-05, "logps/rejected": -4.116872787475586, "loss": 0.3128, "nll_loss": 0.0782066360116005, "rewards/accuracies": 1.0, "rewards/chosen": -6.283095899561886e-06, "rewards/margins": 0.41168099641799927, "rewards/rejected": -0.4116872549057007, "step": 14360 }, { "epoch": 9.931535269709544, "grad_norm": 3.902099132537842, "learning_rate": 3.8035961272475794e-07, "log_odds_chosen": 11.704643249511719, "log_odds_ratio": -1.2796294868167024e-05, "logits/chosen": -0.47484564781188965, "logits/rejected": -0.4412471055984497, "logps/chosen": -0.00021034966630395502, "logps/rejected": -2.4094064235687256, "loss": 0.4744, "nll_loss": 0.11860089749097824, "rewards/accuracies": 1.0, "rewards/chosen": -2.103496626659762e-05, "rewards/margins": 0.24091961979866028, "rewards/rejected": -0.2409406453371048, "step": 14361 }, { "epoch": 9.93222683264177, "grad_norm": 2.629956007003784, "learning_rate": 3.765175964346089e-07, "log_odds_chosen": 10.24508285522461, "log_odds_ratio": -0.00032763052149675786, "logits/chosen": -0.47297993302345276, "logits/rejected": -0.3988914489746094, "logps/chosen": -0.001132260193116963, "logps/rejected": -1.8652898073196411, "loss": 0.3144, "nll_loss": 0.07857255637645721, "rewards/accuracies": 1.0, "rewards/chosen": -0.00011322601494612172, "rewards/margins": 0.18641576170921326, "rewards/rejected": -0.1865289807319641, "step": 14362 }, { "epoch": 9.932918395573997, "grad_norm": 3.8364431858062744, "learning_rate": 3.7267558014445986e-07, "log_odds_chosen": 10.027873992919922, "log_odds_ratio": -0.0002950678754132241, "logits/chosen": -0.2743924558162689, "logits/rejected": -0.29987969994544983, "logps/chosen": -0.000396082759834826, "logps/rejected": -1.961113691329956, "loss": 0.3966, "nll_loss": 0.09912268817424774, "rewards/accuracies": 1.0, "rewards/chosen": -3.96082759834826e-05, "rewards/margins": 0.19607174396514893, "rewards/rejected": -0.19611136615276337, "step": 14363 }, { "epoch": 9.933609958506224, "grad_norm": 3.0874459743499756, "learning_rate": 3.6883356385431077e-07, "log_odds_chosen": 10.096710205078125, "log_odds_ratio": -0.00023568206233903766, "logits/chosen": -0.04615066200494766, "logits/rejected": -0.14368771016597748, "logps/chosen": -0.00041334485285915434, "logps/rejected": -1.6168841123580933, "loss": 0.2994, "nll_loss": 0.07481654733419418, "rewards/accuracies": 1.0, "rewards/chosen": -4.133448237553239e-05, "rewards/margins": 0.16164706647396088, "rewards/rejected": -0.1616884171962738, "step": 14364 }, { "epoch": 9.934301521438451, "grad_norm": 3.1047251224517822, "learning_rate": 3.6499154756416173e-07, "log_odds_chosen": 10.717859268188477, "log_odds_ratio": -0.00011625559272943065, "logits/chosen": -0.3506580591201782, "logits/rejected": -0.4321751892566681, "logps/chosen": -0.00015862606232985854, "logps/rejected": -2.001960039138794, "loss": 0.303, "nll_loss": 0.0757325291633606, "rewards/accuracies": 1.0, "rewards/chosen": -1.5862606232985854e-05, "rewards/margins": 0.20018012821674347, "rewards/rejected": -0.20019599795341492, "step": 14365 }, { "epoch": 9.934993084370678, "grad_norm": 2.378512382507324, "learning_rate": 3.6114953127401264e-07, "log_odds_chosen": 10.845108985900879, "log_odds_ratio": -8.855470514390618e-05, "logits/chosen": -0.43037089705467224, "logits/rejected": -0.4957068860530853, "logps/chosen": -0.00022176709899213165, "logps/rejected": -2.273205041885376, "loss": 0.2282, "nll_loss": 0.05704750120639801, "rewards/accuracies": 1.0, "rewards/chosen": -2.2176711354404688e-05, "rewards/margins": 0.2272983193397522, "rewards/rejected": -0.22732050716876984, "step": 14366 }, { "epoch": 9.935684647302905, "grad_norm": 4.0295281410217285, "learning_rate": 3.5730751498386355e-07, "log_odds_chosen": 11.803579330444336, "log_odds_ratio": -0.00016219766985159367, "logits/chosen": -0.608587384223938, "logits/rejected": -0.47692131996154785, "logps/chosen": -0.0001110218872781843, "logps/rejected": -2.783219814300537, "loss": 0.4327, "nll_loss": 0.10816709697246552, "rewards/accuracies": 1.0, "rewards/chosen": -1.1102188182121608e-05, "rewards/margins": 0.2783108949661255, "rewards/rejected": -0.2783219814300537, "step": 14367 }, { "epoch": 9.936376210235132, "grad_norm": 4.525236129760742, "learning_rate": 3.534654986937145e-07, "log_odds_chosen": 10.090860366821289, "log_odds_ratio": -0.00014848806313239038, "logits/chosen": -0.38946732878685, "logits/rejected": -0.33470335602760315, "logps/chosen": -0.0006121266633272171, "logps/rejected": -1.7874327898025513, "loss": 0.467, "nll_loss": 0.11672414094209671, "rewards/accuracies": 1.0, "rewards/chosen": -6.121266778791323e-05, "rewards/margins": 0.17868207395076752, "rewards/rejected": -0.17874330282211304, "step": 14368 }, { "epoch": 9.937067773167358, "grad_norm": 2.479583263397217, "learning_rate": 3.496234824035654e-07, "log_odds_chosen": 11.412271499633789, "log_odds_ratio": -1.3052333088126034e-05, "logits/chosen": -0.3329862356185913, "logits/rejected": -0.31148213148117065, "logps/chosen": -0.00014164020831231028, "logps/rejected": -2.179635524749756, "loss": 0.3027, "nll_loss": 0.07568074017763138, "rewards/accuracies": 1.0, "rewards/chosen": -1.4164021195028909e-05, "rewards/margins": 0.21794936060905457, "rewards/rejected": -0.21796351671218872, "step": 14369 }, { "epoch": 9.937759336099585, "grad_norm": 3.3530638217926025, "learning_rate": 3.457814661134163e-07, "log_odds_chosen": 11.313865661621094, "log_odds_ratio": -1.7188747733598575e-05, "logits/chosen": -0.4860305190086365, "logits/rejected": -0.5446900129318237, "logps/chosen": -0.00016923845396377146, "logps/rejected": -2.2236130237579346, "loss": 0.3453, "nll_loss": 0.08631302416324615, "rewards/accuracies": 1.0, "rewards/chosen": -1.6923844668781385e-05, "rewards/margins": 0.22234439849853516, "rewards/rejected": -0.22236132621765137, "step": 14370 }, { "epoch": 9.938450899031812, "grad_norm": 4.311180114746094, "learning_rate": 3.419394498232673e-07, "log_odds_chosen": 12.050071716308594, "log_odds_ratio": -2.2124790120869875e-05, "logits/chosen": 0.15515440702438354, "logits/rejected": 0.11022113263607025, "logps/chosen": -0.00015703440294601023, "logps/rejected": -3.03157639503479, "loss": 0.4012, "nll_loss": 0.10030660778284073, "rewards/accuracies": 1.0, "rewards/chosen": -1.5703441022196785e-05, "rewards/margins": 0.30314192175865173, "rewards/rejected": -0.30315762758255005, "step": 14371 }, { "epoch": 9.939142461964039, "grad_norm": 3.2761802673339844, "learning_rate": 3.380974335331182e-07, "log_odds_chosen": 11.795350074768066, "log_odds_ratio": -2.7355852580512874e-05, "logits/chosen": -0.21269749104976654, "logits/rejected": -0.24452780187129974, "logps/chosen": -0.00038405804662033916, "logps/rejected": -2.8474626541137695, "loss": 0.2882, "nll_loss": 0.07203909754753113, "rewards/accuracies": 1.0, "rewards/chosen": -3.840580757241696e-05, "rewards/margins": 0.2847078740596771, "rewards/rejected": -0.28474628925323486, "step": 14372 }, { "epoch": 9.939834024896266, "grad_norm": 4.4887309074401855, "learning_rate": 3.342554172429691e-07, "log_odds_chosen": 10.536214828491211, "log_odds_ratio": -5.4568004998145625e-05, "logits/chosen": -0.05506886541843414, "logits/rejected": -0.09337669610977173, "logps/chosen": -0.00011480246030259877, "logps/rejected": -1.565328598022461, "loss": 0.5551, "nll_loss": 0.13875915110111237, "rewards/accuracies": 1.0, "rewards/chosen": -1.1480246030259877e-05, "rewards/margins": 0.15652137994766235, "rewards/rejected": -0.1565328687429428, "step": 14373 }, { "epoch": 9.940525587828493, "grad_norm": 3.0688681602478027, "learning_rate": 3.3041340095282006e-07, "log_odds_chosen": 12.513391494750977, "log_odds_ratio": -1.2958620573044755e-05, "logits/chosen": -0.49493831396102905, "logits/rejected": -0.532113790512085, "logps/chosen": -0.00010285632743034512, "logps/rejected": -3.2977731227874756, "loss": 0.3057, "nll_loss": 0.07642143964767456, "rewards/accuracies": 1.0, "rewards/chosen": -1.0285633834428154e-05, "rewards/margins": 0.32976701855659485, "rewards/rejected": -0.3297773003578186, "step": 14374 }, { "epoch": 9.94121715076072, "grad_norm": 3.959526300430298, "learning_rate": 3.2657138466267097e-07, "log_odds_chosen": 11.419652938842773, "log_odds_ratio": -0.0004229422484058887, "logits/chosen": 0.07121730595827103, "logits/rejected": -0.13295045495033264, "logps/chosen": -0.0003118419263046235, "logps/rejected": -2.7346346378326416, "loss": 0.3539, "nll_loss": 0.0884229764342308, "rewards/accuracies": 1.0, "rewards/chosen": -3.118419408565387e-05, "rewards/margins": 0.27343228459358215, "rewards/rejected": -0.2734634578227997, "step": 14375 }, { "epoch": 9.941908713692946, "grad_norm": 3.3876402378082275, "learning_rate": 3.227293683725219e-07, "log_odds_chosen": 11.020099639892578, "log_odds_ratio": -8.45960748847574e-05, "logits/chosen": -0.23261260986328125, "logits/rejected": -0.3242484927177429, "logps/chosen": -0.00022357783745974302, "logps/rejected": -2.103334903717041, "loss": 0.3831, "nll_loss": 0.09575633704662323, "rewards/accuracies": 1.0, "rewards/chosen": -2.2357784473570064e-05, "rewards/margins": 0.2103111296892166, "rewards/rejected": -0.21033348143100739, "step": 14376 }, { "epoch": 9.942600276625173, "grad_norm": 4.13154411315918, "learning_rate": 3.1888735208237284e-07, "log_odds_chosen": 10.373748779296875, "log_odds_ratio": -8.665035420563072e-05, "logits/chosen": -0.2737635374069214, "logits/rejected": -0.3789776563644409, "logps/chosen": -0.00020896026398986578, "logps/rejected": -1.7434680461883545, "loss": 0.5489, "nll_loss": 0.1372080147266388, "rewards/accuracies": 1.0, "rewards/chosen": -2.0896026398986578e-05, "rewards/margins": 0.17432589828968048, "rewards/rejected": -0.17434681951999664, "step": 14377 }, { "epoch": 9.9432918395574, "grad_norm": 3.0957772731781006, "learning_rate": 3.1504533579222375e-07, "log_odds_chosen": 10.974006652832031, "log_odds_ratio": -4.768053986481391e-05, "logits/chosen": -0.23501087725162506, "logits/rejected": -0.29930877685546875, "logps/chosen": -0.0003055653069168329, "logps/rejected": -2.1016600131988525, "loss": 0.2729, "nll_loss": 0.06820801645517349, "rewards/accuracies": 1.0, "rewards/chosen": -3.055652996408753e-05, "rewards/margins": 0.21013543009757996, "rewards/rejected": -0.21016600728034973, "step": 14378 }, { "epoch": 9.943983402489627, "grad_norm": 3.494986057281494, "learning_rate": 3.112033195020747e-07, "log_odds_chosen": 11.643218040466309, "log_odds_ratio": -3.819255289272405e-05, "logits/chosen": 0.0929754376411438, "logits/rejected": -0.049822524189949036, "logps/chosen": -0.00013487892283592373, "logps/rejected": -2.6793737411499023, "loss": 0.3038, "nll_loss": 0.07595498859882355, "rewards/accuracies": 1.0, "rewards/chosen": -1.3487893738783896e-05, "rewards/margins": 0.26792389154434204, "rewards/rejected": -0.26793739199638367, "step": 14379 }, { "epoch": 9.944674965421854, "grad_norm": 3.9710512161254883, "learning_rate": 3.073613032119256e-07, "log_odds_chosen": 11.226348876953125, "log_odds_ratio": -5.1108327170368284e-05, "logits/chosen": -0.392134428024292, "logits/rejected": -0.4214463531970978, "logps/chosen": -0.00013843349006492645, "logps/rejected": -2.1756904125213623, "loss": 0.3873, "nll_loss": 0.09682579338550568, "rewards/accuracies": 1.0, "rewards/chosen": -1.3843347915099002e-05, "rewards/margins": 0.2175552099943161, "rewards/rejected": -0.21756905317306519, "step": 14380 }, { "epoch": 9.94536652835408, "grad_norm": 4.002619743347168, "learning_rate": 3.035192869217766e-07, "log_odds_chosen": 12.835500717163086, "log_odds_ratio": -1.0827752703335136e-05, "logits/chosen": -0.45156607031822205, "logits/rejected": -0.5535336136817932, "logps/chosen": -7.671228377148509e-05, "logps/rejected": -3.319952964782715, "loss": 0.4184, "nll_loss": 0.10460923612117767, "rewards/accuracies": 1.0, "rewards/chosen": -7.67122855904745e-06, "rewards/margins": 0.3319876194000244, "rewards/rejected": -0.33199530839920044, "step": 14381 }, { "epoch": 9.946058091286307, "grad_norm": 3.6625680923461914, "learning_rate": 2.9967727063162754e-07, "log_odds_chosen": 11.213696479797363, "log_odds_ratio": -4.3532319978112355e-05, "logits/chosen": 0.1309901475906372, "logits/rejected": -0.013646259903907776, "logps/chosen": -0.00030758522916585207, "logps/rejected": -2.5861880779266357, "loss": 0.6305, "nll_loss": 0.15761451423168182, "rewards/accuracies": 1.0, "rewards/chosen": -3.075852509937249e-05, "rewards/margins": 0.258588045835495, "rewards/rejected": -0.2586188018321991, "step": 14382 }, { "epoch": 9.946749654218534, "grad_norm": 3.9107625484466553, "learning_rate": 2.9583525434147844e-07, "log_odds_chosen": 11.015143394470215, "log_odds_ratio": -0.00011202752648387104, "logits/chosen": -0.4123598635196686, "logits/rejected": -0.41012248396873474, "logps/chosen": -0.00012248425628058612, "logps/rejected": -1.9108706712722778, "loss": 0.3176, "nll_loss": 0.0793963372707367, "rewards/accuracies": 1.0, "rewards/chosen": -1.2248425264260732e-05, "rewards/margins": 0.19107483327388763, "rewards/rejected": -0.19108708202838898, "step": 14383 }, { "epoch": 9.947441217150761, "grad_norm": 4.414669990539551, "learning_rate": 2.9199323805132935e-07, "log_odds_chosen": 11.332856178283691, "log_odds_ratio": -1.5987745427992195e-05, "logits/chosen": -0.03796715661883354, "logits/rejected": -0.1524902582168579, "logps/chosen": -0.0013471555430442095, "logps/rejected": -2.820084571838379, "loss": 0.729, "nll_loss": 0.1822463423013687, "rewards/accuracies": 1.0, "rewards/chosen": -0.00013471556303557009, "rewards/margins": 0.28187376260757446, "rewards/rejected": -0.28200846910476685, "step": 14384 }, { "epoch": 9.948132780082988, "grad_norm": 3.4981753826141357, "learning_rate": 2.881512217611803e-07, "log_odds_chosen": 11.192922592163086, "log_odds_ratio": -5.592240268015303e-05, "logits/chosen": -0.16741448640823364, "logits/rejected": -0.2337617129087448, "logps/chosen": -0.00023815588792786002, "logps/rejected": -2.6667916774749756, "loss": 0.3633, "nll_loss": 0.09081831574440002, "rewards/accuracies": 1.0, "rewards/chosen": -2.381558806519024e-05, "rewards/margins": 0.2666553258895874, "rewards/rejected": -0.26667916774749756, "step": 14385 }, { "epoch": 9.948824343015215, "grad_norm": 4.5940260887146, "learning_rate": 2.843092054710312e-07, "log_odds_chosen": 10.830406188964844, "log_odds_ratio": -0.000504222116433084, "logits/chosen": -0.00815871637314558, "logits/rejected": -0.0809616893529892, "logps/chosen": -0.00018051249207928777, "logps/rejected": -2.215250015258789, "loss": 0.4995, "nll_loss": 0.12483423948287964, "rewards/accuracies": 1.0, "rewards/chosen": -1.80512506631203e-05, "rewards/margins": 0.22150695323944092, "rewards/rejected": -0.22152499854564667, "step": 14386 }, { "epoch": 9.949515905947441, "grad_norm": 3.0476293563842773, "learning_rate": 2.8046718918088213e-07, "log_odds_chosen": 11.848611831665039, "log_odds_ratio": -3.5934321203967556e-05, "logits/chosen": 0.06482765823602676, "logits/rejected": 0.010766156017780304, "logps/chosen": -0.00028337494586594403, "logps/rejected": -3.363632917404175, "loss": 0.2712, "nll_loss": 0.06780073046684265, "rewards/accuracies": 1.0, "rewards/chosen": -2.833749749697745e-05, "rewards/margins": 0.3363349735736847, "rewards/rejected": -0.3363633155822754, "step": 14387 }, { "epoch": 9.950207468879668, "grad_norm": 5.000711917877197, "learning_rate": 2.766251728907331e-07, "log_odds_chosen": 11.645959854125977, "log_odds_ratio": -1.2625767340068705e-05, "logits/chosen": -0.46102869510650635, "logits/rejected": -0.5022953748703003, "logps/chosen": -9.16571807465516e-05, "logps/rejected": -2.1655919551849365, "loss": 0.2875, "nll_loss": 0.07186735421419144, "rewards/accuracies": 1.0, "rewards/chosen": -9.165718438453041e-06, "rewards/margins": 0.21655002236366272, "rewards/rejected": -0.21655918657779694, "step": 14388 }, { "epoch": 9.950899031811895, "grad_norm": 3.3046367168426514, "learning_rate": 2.72783156600584e-07, "log_odds_chosen": 11.407812118530273, "log_odds_ratio": -2.3163374862633646e-05, "logits/chosen": 0.016744054853916168, "logits/rejected": -0.06968782097101212, "logps/chosen": -0.00015086446364875883, "logps/rejected": -2.4506261348724365, "loss": 0.489, "nll_loss": 0.12225218862295151, "rewards/accuracies": 1.0, "rewards/chosen": -1.5086447092471644e-05, "rewards/margins": 0.24504750967025757, "rewards/rejected": -0.24506260454654694, "step": 14389 }, { "epoch": 9.951590594744122, "grad_norm": 4.441773891448975, "learning_rate": 2.689411403104349e-07, "log_odds_chosen": 11.36505126953125, "log_odds_ratio": -3.684817784233019e-05, "logits/chosen": -0.2129819393157959, "logits/rejected": -0.16193649172782898, "logps/chosen": -0.0001590220199432224, "logps/rejected": -2.540721893310547, "loss": 0.4842, "nll_loss": 0.12104177474975586, "rewards/accuracies": 1.0, "rewards/chosen": -1.590220199432224e-05, "rewards/margins": 0.25405630469322205, "rewards/rejected": -0.2540721893310547, "step": 14390 }, { "epoch": 9.952282157676349, "grad_norm": 3.1770615577697754, "learning_rate": 2.6509912402028587e-07, "log_odds_chosen": 12.067096710205078, "log_odds_ratio": -1.1090942280134186e-05, "logits/chosen": -0.5020029544830322, "logits/rejected": -0.47750067710876465, "logps/chosen": -6.147479871287942e-05, "logps/rejected": -2.369724750518799, "loss": 0.3183, "nll_loss": 0.07957428693771362, "rewards/accuracies": 1.0, "rewards/chosen": -6.147480235085823e-06, "rewards/margins": 0.2369663417339325, "rewards/rejected": -0.23697249591350555, "step": 14391 }, { "epoch": 9.952973720608576, "grad_norm": 3.6418378353118896, "learning_rate": 2.612571077301368e-07, "log_odds_chosen": 11.277714729309082, "log_odds_ratio": -2.0814361050724983e-05, "logits/chosen": -0.0488487109541893, "logits/rejected": -0.10118186473846436, "logps/chosen": -0.00013960029173176736, "logps/rejected": -2.152407169342041, "loss": 0.4439, "nll_loss": 0.11096364259719849, "rewards/accuracies": 1.0, "rewards/chosen": -1.3960028809378855e-05, "rewards/margins": 0.21522676944732666, "rewards/rejected": -0.2152407169342041, "step": 14392 }, { "epoch": 9.953665283540802, "grad_norm": 2.9789321422576904, "learning_rate": 2.574150914399877e-07, "log_odds_chosen": 11.723701477050781, "log_odds_ratio": -6.824049341958016e-05, "logits/chosen": -0.2967926561832428, "logits/rejected": -0.3067656457424164, "logps/chosen": -0.00016774365212768316, "logps/rejected": -2.2390599250793457, "loss": 0.4298, "nll_loss": 0.10745257139205933, "rewards/accuracies": 1.0, "rewards/chosen": -1.6774365576566197e-05, "rewards/margins": 0.22388924658298492, "rewards/rejected": -0.223906010389328, "step": 14393 }, { "epoch": 9.95435684647303, "grad_norm": 3.5029759407043457, "learning_rate": 2.5357307514983864e-07, "log_odds_chosen": 10.046087265014648, "log_odds_ratio": -0.00013034732546657324, "logits/chosen": -0.19458447396755219, "logits/rejected": -0.22558461129665375, "logps/chosen": -0.00021201715571805835, "logps/rejected": -1.4606382846832275, "loss": 0.3774, "nll_loss": 0.09434570372104645, "rewards/accuracies": 1.0, "rewards/chosen": -2.1201714844210073e-05, "rewards/margins": 0.1460426300764084, "rewards/rejected": -0.14606383442878723, "step": 14394 }, { "epoch": 9.955048409405256, "grad_norm": 4.0619635581970215, "learning_rate": 2.4973105885968955e-07, "log_odds_chosen": 12.413434982299805, "log_odds_ratio": -8.567772601963952e-05, "logits/chosen": 0.010890178382396698, "logits/rejected": -0.04578394815325737, "logps/chosen": -0.0005771232536062598, "logps/rejected": -3.222485065460205, "loss": 0.4735, "nll_loss": 0.11836060881614685, "rewards/accuracies": 1.0, "rewards/chosen": -5.771232827100903e-05, "rewards/margins": 0.3221907615661621, "rewards/rejected": -0.32224851846694946, "step": 14395 }, { "epoch": 9.955739972337483, "grad_norm": 2.319875955581665, "learning_rate": 2.4588904256954046e-07, "log_odds_chosen": 10.2796049118042, "log_odds_ratio": -0.0003484278277028352, "logits/chosen": -0.867690920829773, "logits/rejected": -0.8488110303878784, "logps/chosen": -0.0004555814084596932, "logps/rejected": -1.552926778793335, "loss": 0.3101, "nll_loss": 0.07749606668949127, "rewards/accuracies": 1.0, "rewards/chosen": -4.5558139390777797e-05, "rewards/margins": 0.15524712204933167, "rewards/rejected": -0.15529268980026245, "step": 14396 }, { "epoch": 9.95643153526971, "grad_norm": 3.616581916809082, "learning_rate": 2.420470262793914e-07, "log_odds_chosen": 11.643377304077148, "log_odds_ratio": -3.979198299930431e-05, "logits/chosen": -0.4350857734680176, "logits/rejected": -0.5141991972923279, "logps/chosen": -0.0005210883100517094, "logps/rejected": -3.020252227783203, "loss": 0.3239, "nll_loss": 0.08097027242183685, "rewards/accuracies": 1.0, "rewards/chosen": -5.210883682593703e-05, "rewards/margins": 0.3019731044769287, "rewards/rejected": -0.3020251989364624, "step": 14397 }, { "epoch": 9.957123098201937, "grad_norm": 3.0220160484313965, "learning_rate": 2.3820500998924236e-07, "log_odds_chosen": 10.704514503479004, "log_odds_ratio": -0.000209915975574404, "logits/chosen": -0.10353732109069824, "logits/rejected": -0.1952836513519287, "logps/chosen": -0.000387487409170717, "logps/rejected": -2.056887149810791, "loss": 0.3214, "nll_loss": 0.08033111691474915, "rewards/accuracies": 1.0, "rewards/chosen": -3.8748738006688654e-05, "rewards/margins": 0.2056499719619751, "rewards/rejected": -0.2056887298822403, "step": 14398 }, { "epoch": 9.957814661134163, "grad_norm": 3.386103630065918, "learning_rate": 2.3436299369909332e-07, "log_odds_chosen": 10.593547821044922, "log_odds_ratio": -0.00010035329614765942, "logits/chosen": -0.3432830572128296, "logits/rejected": -0.4551471769809723, "logps/chosen": -0.00039374135667458177, "logps/rejected": -1.9529142379760742, "loss": 0.375, "nll_loss": 0.0937325730919838, "rewards/accuracies": 1.0, "rewards/chosen": -3.9374139305436984e-05, "rewards/margins": 0.19525204598903656, "rewards/rejected": -0.1952914148569107, "step": 14399 }, { "epoch": 9.95850622406639, "grad_norm": 3.4891092777252197, "learning_rate": 2.3052097740894422e-07, "log_odds_chosen": 11.950043678283691, "log_odds_ratio": -7.900898708612658e-06, "logits/chosen": -0.31247663497924805, "logits/rejected": -0.3481077551841736, "logps/chosen": -0.00016259380208794028, "logps/rejected": -2.870516300201416, "loss": 0.3762, "nll_loss": 0.09405548125505447, "rewards/accuracies": 1.0, "rewards/chosen": -1.6259382391581312e-05, "rewards/margins": 0.28703537583351135, "rewards/rejected": -0.28705161809921265, "step": 14400 }, { "epoch": 9.959197786998617, "grad_norm": 4.95497989654541, "learning_rate": 2.2667896111879513e-07, "log_odds_chosen": 11.518646240234375, "log_odds_ratio": -2.0016508642584085e-05, "logits/chosen": -0.09684181213378906, "logits/rejected": -0.07883979380130768, "logps/chosen": -0.00022995812469162047, "logps/rejected": -2.7280092239379883, "loss": 0.4319, "nll_loss": 0.1079646497964859, "rewards/accuracies": 1.0, "rewards/chosen": -2.299581319675781e-05, "rewards/margins": 0.2727779150009155, "rewards/rejected": -0.27280092239379883, "step": 14401 }, { "epoch": 9.959889349930844, "grad_norm": 2.1735763549804688, "learning_rate": 2.228369448286461e-07, "log_odds_chosen": 11.362403869628906, "log_odds_ratio": -0.00016163713007699698, "logits/chosen": -0.5279222726821899, "logits/rejected": -0.39347508549690247, "logps/chosen": -0.0002045204018941149, "logps/rejected": -2.1512932777404785, "loss": 0.2093, "nll_loss": 0.05231678858399391, "rewards/accuracies": 1.0, "rewards/chosen": -2.0452042008400895e-05, "rewards/margins": 0.21510887145996094, "rewards/rejected": -0.21512934565544128, "step": 14402 }, { "epoch": 9.96058091286307, "grad_norm": 3.0434203147888184, "learning_rate": 2.1899492853849703e-07, "log_odds_chosen": 11.224910736083984, "log_odds_ratio": -3.419286076677963e-05, "logits/chosen": -0.719974160194397, "logits/rejected": -0.7234589457511902, "logps/chosen": -0.00011317985627101734, "logps/rejected": -1.707182765007019, "loss": 0.3209, "nll_loss": 0.08021647483110428, "rewards/accuracies": 1.0, "rewards/chosen": -1.1317985809000675e-05, "rewards/margins": 0.17070695757865906, "rewards/rejected": -0.17071828246116638, "step": 14403 }, { "epoch": 9.961272475795298, "grad_norm": 3.3985514640808105, "learning_rate": 2.1515291224834794e-07, "log_odds_chosen": 11.151140213012695, "log_odds_ratio": -2.503224641259294e-05, "logits/chosen": -0.7156893610954285, "logits/rejected": -0.7925446629524231, "logps/chosen": -0.00011096397793153301, "logps/rejected": -1.8769021034240723, "loss": 0.3496, "nll_loss": 0.08739632368087769, "rewards/accuracies": 1.0, "rewards/chosen": -1.1096398338850122e-05, "rewards/margins": 0.18767911195755005, "rewards/rejected": -0.18769021332263947, "step": 14404 }, { "epoch": 9.961964038727524, "grad_norm": 2.7494256496429443, "learning_rate": 2.113108959581989e-07, "log_odds_chosen": 11.359611511230469, "log_odds_ratio": -2.5823699616012163e-05, "logits/chosen": -0.7222087979316711, "logits/rejected": -0.7345014810562134, "logps/chosen": -0.00012434877862688154, "logps/rejected": -2.3068008422851562, "loss": 0.4386, "nll_loss": 0.10965168476104736, "rewards/accuracies": 1.0, "rewards/chosen": -1.2434877135092393e-05, "rewards/margins": 0.2306676208972931, "rewards/rejected": -0.23068007826805115, "step": 14405 }, { "epoch": 9.962655601659751, "grad_norm": 3.3446850776672363, "learning_rate": 2.074688796680498e-07, "log_odds_chosen": 11.147770881652832, "log_odds_ratio": -2.3483353288611397e-05, "logits/chosen": -0.2715800106525421, "logits/rejected": -0.38775211572647095, "logps/chosen": -0.00018793967319652438, "logps/rejected": -2.3913774490356445, "loss": 0.3135, "nll_loss": 0.07836674153804779, "rewards/accuracies": 1.0, "rewards/chosen": -1.8793965864460915e-05, "rewards/margins": 0.23911894857883453, "rewards/rejected": -0.23913775384426117, "step": 14406 }, { "epoch": 9.963347164591978, "grad_norm": 3.412565231323242, "learning_rate": 2.036268633779007e-07, "log_odds_chosen": 11.36739730834961, "log_odds_ratio": -0.00020149120246060193, "logits/chosen": -0.3841862082481384, "logits/rejected": -0.37498146295547485, "logps/chosen": -0.0002477857342455536, "logps/rejected": -2.7530517578125, "loss": 0.3472, "nll_loss": 0.08676967024803162, "rewards/accuracies": 1.0, "rewards/chosen": -2.4778573788353242e-05, "rewards/margins": 0.2752804458141327, "rewards/rejected": -0.27530521154403687, "step": 14407 }, { "epoch": 9.964038727524205, "grad_norm": 3.8611674308776855, "learning_rate": 1.9978484708775167e-07, "log_odds_chosen": 11.597522735595703, "log_odds_ratio": -2.0960025722160935e-05, "logits/chosen": -0.5457533001899719, "logits/rejected": -0.5970411896705627, "logps/chosen": -0.00017053255578503013, "logps/rejected": -2.5950927734375, "loss": 0.4092, "nll_loss": 0.10229329019784927, "rewards/accuracies": 1.0, "rewards/chosen": -1.7053256669896655e-05, "rewards/margins": 0.25949224829673767, "rewards/rejected": -0.25950929522514343, "step": 14408 }, { "epoch": 9.964730290456432, "grad_norm": 5.490063667297363, "learning_rate": 1.9594283079760258e-07, "log_odds_chosen": 11.967035293579102, "log_odds_ratio": -8.80186416907236e-06, "logits/chosen": -0.34099629521369934, "logits/rejected": -0.3364622890949249, "logps/chosen": -6.214046879904345e-05, "logps/rejected": -2.297642230987549, "loss": 0.731, "nll_loss": 0.18274804949760437, "rewards/accuracies": 1.0, "rewards/chosen": -6.214047061803285e-06, "rewards/margins": 0.22975800931453705, "rewards/rejected": -0.22976422309875488, "step": 14409 }, { "epoch": 9.965421853388658, "grad_norm": 3.1003382205963135, "learning_rate": 1.9210081450745352e-07, "log_odds_chosen": 11.369587898254395, "log_odds_ratio": -1.9927889297832735e-05, "logits/chosen": -0.5328479409217834, "logits/rejected": -0.5892972946166992, "logps/chosen": -0.00016084310482256114, "logps/rejected": -2.1601061820983887, "loss": 0.3009, "nll_loss": 0.07521713525056839, "rewards/accuracies": 1.0, "rewards/chosen": -1.6084311937447637e-05, "rewards/margins": 0.21599456667900085, "rewards/rejected": -0.21601064503192902, "step": 14410 }, { "epoch": 9.966113416320885, "grad_norm": 3.565310001373291, "learning_rate": 1.8825879821730445e-07, "log_odds_chosen": 10.874410629272461, "log_odds_ratio": -0.0001317542337346822, "logits/chosen": -0.225718691945076, "logits/rejected": -0.2878406047821045, "logps/chosen": -0.00030120619339868426, "logps/rejected": -2.397109270095825, "loss": 0.4632, "nll_loss": 0.11577460914850235, "rewards/accuracies": 1.0, "rewards/chosen": -3.012062188645359e-05, "rewards/margins": 0.23968079686164856, "rewards/rejected": -0.23971092700958252, "step": 14411 }, { "epoch": 9.966804979253112, "grad_norm": 2.7302663326263428, "learning_rate": 1.8441678192715538e-07, "log_odds_chosen": 10.82535457611084, "log_odds_ratio": -7.440607441822067e-05, "logits/chosen": -0.397928923368454, "logits/rejected": -0.3490590453147888, "logps/chosen": -0.00016319297719746828, "logps/rejected": -2.080110549926758, "loss": 0.2296, "nll_loss": 0.05740056559443474, "rewards/accuracies": 1.0, "rewards/chosen": -1.631929808354471e-05, "rewards/margins": 0.2079947590827942, "rewards/rejected": -0.20801107585430145, "step": 14412 }, { "epoch": 9.967496542185339, "grad_norm": 3.359323501586914, "learning_rate": 1.8057476563700632e-07, "log_odds_chosen": 11.5607271194458, "log_odds_ratio": -2.955112540803384e-05, "logits/chosen": -0.5813517570495605, "logits/rejected": -0.6620681285858154, "logps/chosen": -0.0001213313516927883, "logps/rejected": -2.29302978515625, "loss": 0.4426, "nll_loss": 0.11064665019512177, "rewards/accuracies": 1.0, "rewards/chosen": -1.213313498737989e-05, "rewards/margins": 0.22929087281227112, "rewards/rejected": -0.22930298745632172, "step": 14413 }, { "epoch": 9.968188105117566, "grad_norm": 5.813520908355713, "learning_rate": 1.7673274934685725e-07, "log_odds_chosen": 9.556093215942383, "log_odds_ratio": -0.1622939556837082, "logits/chosen": 0.14559586346149445, "logits/rejected": 0.039752352982759476, "logps/chosen": -0.033193714916706085, "logps/rejected": -2.0233352184295654, "loss": 0.4383, "nll_loss": 0.09335532784461975, "rewards/accuracies": 0.875, "rewards/chosen": -0.003319371724501252, "rewards/margins": 0.19901415705680847, "rewards/rejected": -0.20233353972434998, "step": 14414 }, { "epoch": 9.968879668049793, "grad_norm": 2.3858280181884766, "learning_rate": 1.7289073305670816e-07, "log_odds_chosen": 9.702201843261719, "log_odds_ratio": -0.0002256479929201305, "logits/chosen": -0.3610384166240692, "logits/rejected": -0.35918182134628296, "logps/chosen": -0.0002743570366874337, "logps/rejected": -1.2810546159744263, "loss": 0.2601, "nll_loss": 0.0649990364909172, "rewards/accuracies": 1.0, "rewards/chosen": -2.7435704396339133e-05, "rewards/margins": 0.12807804346084595, "rewards/rejected": -0.12810547649860382, "step": 14415 }, { "epoch": 9.96957123098202, "grad_norm": 3.2431888580322266, "learning_rate": 1.690487167665591e-07, "log_odds_chosen": 10.424958229064941, "log_odds_ratio": -8.65029142005369e-05, "logits/chosen": -0.08212536573410034, "logits/rejected": 0.022407136857509613, "logps/chosen": -0.00048625317867845297, "logps/rejected": -2.2663631439208984, "loss": 0.4031, "nll_loss": 0.10076534748077393, "rewards/accuracies": 1.0, "rewards/chosen": -4.862531568505801e-05, "rewards/margins": 0.22658771276474, "rewards/rejected": -0.22663632035255432, "step": 14416 }, { "epoch": 9.970262793914246, "grad_norm": 2.993499994277954, "learning_rate": 1.6520670047641003e-07, "log_odds_chosen": 12.383956909179688, "log_odds_ratio": -1.850670378189534e-05, "logits/chosen": -0.055138956755399704, "logits/rejected": -0.17104335129261017, "logps/chosen": -0.00026264588814228773, "logps/rejected": -3.508105516433716, "loss": 0.3427, "nll_loss": 0.08568139374256134, "rewards/accuracies": 1.0, "rewards/chosen": -2.6264588086633012e-05, "rewards/margins": 0.3507842719554901, "rewards/rejected": -0.35081055760383606, "step": 14417 }, { "epoch": 9.970954356846473, "grad_norm": 2.7699737548828125, "learning_rate": 1.6136468418626094e-07, "log_odds_chosen": 10.448410034179688, "log_odds_ratio": -0.0002700125623960048, "logits/chosen": -0.24522686004638672, "logits/rejected": -0.2643146812915802, "logps/chosen": -0.0003112297272309661, "logps/rejected": -1.7951382398605347, "loss": 0.2979, "nll_loss": 0.0744408369064331, "rewards/accuracies": 1.0, "rewards/chosen": -3.1122970540309325e-05, "rewards/margins": 0.17948269844055176, "rewards/rejected": -0.1795138269662857, "step": 14418 }, { "epoch": 9.9716459197787, "grad_norm": 3.2262375354766846, "learning_rate": 1.5752266789611187e-07, "log_odds_chosen": 11.268388748168945, "log_odds_ratio": -6.620370550081134e-05, "logits/chosen": -0.6279283761978149, "logits/rejected": -0.6332384943962097, "logps/chosen": -0.000160843541380018, "logps/rejected": -2.012922763824463, "loss": 0.329, "nll_loss": 0.08223171532154083, "rewards/accuracies": 1.0, "rewards/chosen": -1.6084355593193322e-05, "rewards/margins": 0.20127618312835693, "rewards/rejected": -0.2012922763824463, "step": 14419 }, { "epoch": 9.972337482710927, "grad_norm": 3.0317883491516113, "learning_rate": 1.536806516059628e-07, "log_odds_chosen": 11.24959659576416, "log_odds_ratio": -2.1257339540170506e-05, "logits/chosen": -0.3580717444419861, "logits/rejected": -0.34256482124328613, "logps/chosen": -0.00010173609189223498, "logps/rejected": -1.8382275104522705, "loss": 0.2935, "nll_loss": 0.07337658852338791, "rewards/accuracies": 1.0, "rewards/chosen": -1.0173609553021379e-05, "rewards/margins": 0.18381257355213165, "rewards/rejected": -0.18382275104522705, "step": 14420 }, { "epoch": 9.973029045643154, "grad_norm": 3.33316969871521, "learning_rate": 1.4983863531581377e-07, "log_odds_chosen": 11.102269172668457, "log_odds_ratio": -6.263488467084244e-05, "logits/chosen": -0.5760700106620789, "logits/rejected": -0.757520318031311, "logps/chosen": -0.00015352622722275555, "logps/rejected": -2.201449394226074, "loss": 0.3569, "nll_loss": 0.08921611309051514, "rewards/accuracies": 1.0, "rewards/chosen": -1.5352623449871317e-05, "rewards/margins": 0.22012959420681, "rewards/rejected": -0.22014492750167847, "step": 14421 }, { "epoch": 9.97372060857538, "grad_norm": 3.053219795227051, "learning_rate": 1.4599661902566468e-07, "log_odds_chosen": 12.511886596679688, "log_odds_ratio": -1.9515664462232962e-05, "logits/chosen": -0.4584193825721741, "logits/rejected": -0.47777336835861206, "logps/chosen": -0.00012212220462970436, "logps/rejected": -3.465627670288086, "loss": 0.3561, "nll_loss": 0.08903485536575317, "rewards/accuracies": 1.0, "rewards/chosen": -1.2212220099172555e-05, "rewards/margins": 0.3465505838394165, "rewards/rejected": -0.34656277298927307, "step": 14422 }, { "epoch": 9.974412171507607, "grad_norm": 3.221226215362549, "learning_rate": 1.421546027355156e-07, "log_odds_chosen": 10.802603721618652, "log_odds_ratio": -6.027729614288546e-05, "logits/chosen": 0.020891718566417694, "logits/rejected": 0.010172158479690552, "logps/chosen": -0.00024104956537485123, "logps/rejected": -1.9035903215408325, "loss": 0.3388, "nll_loss": 0.08469771593809128, "rewards/accuracies": 1.0, "rewards/chosen": -2.410495471849572e-05, "rewards/margins": 0.19033494591712952, "rewards/rejected": -0.19035905599594116, "step": 14423 }, { "epoch": 9.975103734439834, "grad_norm": 3.4250340461730957, "learning_rate": 1.3831258644536654e-07, "log_odds_chosen": 10.85274887084961, "log_odds_ratio": -7.843859930289909e-05, "logits/chosen": -0.16926470398902893, "logits/rejected": -0.16885489225387573, "logps/chosen": -0.00040010723751038313, "logps/rejected": -2.4334042072296143, "loss": 0.3691, "nll_loss": 0.09226728975772858, "rewards/accuracies": 1.0, "rewards/chosen": -4.001072375103831e-05, "rewards/margins": 0.2433004081249237, "rewards/rejected": -0.2433404177427292, "step": 14424 }, { "epoch": 9.975795297372061, "grad_norm": 3.7283456325531006, "learning_rate": 1.3447057015521745e-07, "log_odds_chosen": 10.44284439086914, "log_odds_ratio": -0.00015514253755100071, "logits/chosen": 0.31527790427207947, "logits/rejected": 0.3427479863166809, "logps/chosen": -0.00020504721032921225, "logps/rejected": -1.915432095527649, "loss": 0.3656, "nll_loss": 0.09138018637895584, "rewards/accuracies": 1.0, "rewards/chosen": -2.0504719941527583e-05, "rewards/margins": 0.1915227174758911, "rewards/rejected": -0.19154320657253265, "step": 14425 }, { "epoch": 9.976486860304288, "grad_norm": 4.250152111053467, "learning_rate": 1.306285538650684e-07, "log_odds_chosen": 11.869510650634766, "log_odds_ratio": -9.183246220345609e-06, "logits/chosen": -0.340200275182724, "logits/rejected": -0.3542943000793457, "logps/chosen": -0.00025191018357872963, "logps/rejected": -2.6849536895751953, "loss": 0.51, "nll_loss": 0.12750062346458435, "rewards/accuracies": 1.0, "rewards/chosen": -2.519101690268144e-05, "rewards/margins": 0.2684701979160309, "rewards/rejected": -0.2684953808784485, "step": 14426 }, { "epoch": 9.977178423236515, "grad_norm": 3.831174850463867, "learning_rate": 1.2678653757491932e-07, "log_odds_chosen": 11.098943710327148, "log_odds_ratio": -3.759023820748553e-05, "logits/chosen": -0.3505597412586212, "logits/rejected": -0.391792356967926, "logps/chosen": -0.00024021898570936173, "logps/rejected": -2.4518837928771973, "loss": 0.2844, "nll_loss": 0.07108572125434875, "rewards/accuracies": 1.0, "rewards/chosen": -2.402190148131922e-05, "rewards/margins": 0.24516433477401733, "rewards/rejected": -0.2451883852481842, "step": 14427 }, { "epoch": 9.977869986168741, "grad_norm": 5.215520858764648, "learning_rate": 1.2294452128477023e-07, "log_odds_chosen": 11.317865371704102, "log_odds_ratio": -2.5791119696805254e-05, "logits/chosen": -0.09801247715950012, "logits/rejected": -0.0976279005408287, "logps/chosen": -0.00020278405281715095, "logps/rejected": -2.345594882965088, "loss": 0.5098, "nll_loss": 0.12745140492916107, "rewards/accuracies": 1.0, "rewards/chosen": -2.0278404917917214e-05, "rewards/margins": 0.2345392107963562, "rewards/rejected": -0.23455950617790222, "step": 14428 }, { "epoch": 9.978561549100968, "grad_norm": 3.929835081100464, "learning_rate": 1.1910250499462118e-07, "log_odds_chosen": 12.225106239318848, "log_odds_ratio": -0.00011829940194729716, "logits/chosen": -0.17743200063705444, "logits/rejected": -0.2416902482509613, "logps/chosen": -0.0002096697426168248, "logps/rejected": -3.256639242172241, "loss": 0.3582, "nll_loss": 0.08954370021820068, "rewards/accuracies": 1.0, "rewards/chosen": -2.0966976080671884e-05, "rewards/margins": 0.32564300298690796, "rewards/rejected": -0.3256639540195465, "step": 14429 }, { "epoch": 9.979253112033195, "grad_norm": 2.8752622604370117, "learning_rate": 1.1526048870447211e-07, "log_odds_chosen": 11.139347076416016, "log_odds_ratio": -9.415207023266703e-05, "logits/chosen": -0.59767746925354, "logits/rejected": -0.6264057159423828, "logps/chosen": -0.00012206398241687566, "logps/rejected": -1.9091721773147583, "loss": 0.3657, "nll_loss": 0.09141746163368225, "rewards/accuracies": 1.0, "rewards/chosen": -1.2206397514091805e-05, "rewards/margins": 0.19090501964092255, "rewards/rejected": -0.1909172236919403, "step": 14430 }, { "epoch": 9.979944674965422, "grad_norm": 2.5739731788635254, "learning_rate": 1.1141847241432305e-07, "log_odds_chosen": 10.908953666687012, "log_odds_ratio": -3.49894653481897e-05, "logits/chosen": -0.10505393147468567, "logits/rejected": -0.11533316969871521, "logps/chosen": -0.0002562586741987616, "logps/rejected": -1.855210304260254, "loss": 0.2667, "nll_loss": 0.06666852533817291, "rewards/accuracies": 1.0, "rewards/chosen": -2.562586814747192e-05, "rewards/margins": 0.18549540638923645, "rewards/rejected": -0.18552103638648987, "step": 14431 }, { "epoch": 9.980636237897649, "grad_norm": 3.690086603164673, "learning_rate": 1.0757645612417397e-07, "log_odds_chosen": 12.060749053955078, "log_odds_ratio": -1.1755011655623093e-05, "logits/chosen": -0.6370924711227417, "logits/rejected": -0.655462384223938, "logps/chosen": -0.00011227516370126978, "logps/rejected": -2.708317995071411, "loss": 0.3352, "nll_loss": 0.08378816395998001, "rewards/accuracies": 1.0, "rewards/chosen": -1.1227516552025918e-05, "rewards/margins": 0.2708205580711365, "rewards/rejected": -0.270831823348999, "step": 14432 }, { "epoch": 9.981327800829876, "grad_norm": 3.6096031665802, "learning_rate": 1.037344398340249e-07, "log_odds_chosen": 10.562856674194336, "log_odds_ratio": -0.00021144159836694598, "logits/chosen": -0.02058953046798706, "logits/rejected": -0.1223272830247879, "logps/chosen": -0.0002965645689982921, "logps/rejected": -2.3732357025146484, "loss": 0.3817, "nll_loss": 0.09541618824005127, "rewards/accuracies": 1.0, "rewards/chosen": -2.965645762742497e-05, "rewards/margins": 0.23729392886161804, "rewards/rejected": -0.2373235821723938, "step": 14433 }, { "epoch": 9.982019363762102, "grad_norm": 3.1935596466064453, "learning_rate": 9.989242354387584e-08, "log_odds_chosen": 10.641152381896973, "log_odds_ratio": -0.0001305602490901947, "logits/chosen": -0.2632865607738495, "logits/rejected": -0.11204138398170471, "logps/chosen": -0.00028467128868214786, "logps/rejected": -2.1090238094329834, "loss": 0.6777, "nll_loss": 0.16941051185131073, "rewards/accuracies": 1.0, "rewards/chosen": -2.8467127776821144e-05, "rewards/margins": 0.21087393164634705, "rewards/rejected": -0.2109023928642273, "step": 14434 }, { "epoch": 9.98271092669433, "grad_norm": 2.954907178878784, "learning_rate": 9.605040725372676e-08, "log_odds_chosen": 11.83912181854248, "log_odds_ratio": -2.0430359654710628e-05, "logits/chosen": -0.29502013325691223, "logits/rejected": -0.32154542207717896, "logps/chosen": -6.92913745297119e-05, "logps/rejected": -2.161543846130371, "loss": 0.3088, "nll_loss": 0.0771905928850174, "rewards/accuracies": 1.0, "rewards/chosen": -6.92913727107225e-06, "rewards/margins": 0.21614745259284973, "rewards/rejected": -0.21615436673164368, "step": 14435 }, { "epoch": 9.983402489626556, "grad_norm": 3.4380013942718506, "learning_rate": 9.220839096357769e-08, "log_odds_chosen": 11.68551254272461, "log_odds_ratio": -1.8098944565281272e-05, "logits/chosen": -0.4443289339542389, "logits/rejected": -0.5476394891738892, "logps/chosen": -9.755351493367925e-05, "logps/rejected": -2.384434223175049, "loss": 0.4065, "nll_loss": 0.10162050276994705, "rewards/accuracies": 1.0, "rewards/chosen": -9.755351129570045e-06, "rewards/margins": 0.23843365907669067, "rewards/rejected": -0.23844340443611145, "step": 14436 }, { "epoch": 9.984094052558783, "grad_norm": 4.005263328552246, "learning_rate": 8.836637467342863e-08, "log_odds_chosen": 11.748692512512207, "log_odds_ratio": -1.6541533113922924e-05, "logits/chosen": -0.19354397058486938, "logits/rejected": -0.20091736316680908, "logps/chosen": -0.00013413293345365673, "logps/rejected": -2.7356362342834473, "loss": 0.423, "nll_loss": 0.10575605928897858, "rewards/accuracies": 1.0, "rewards/chosen": -1.3413294254860375e-05, "rewards/margins": 0.27355021238327026, "rewards/rejected": -0.2735636234283447, "step": 14437 }, { "epoch": 9.98478561549101, "grad_norm": 3.510989189147949, "learning_rate": 8.452435838327955e-08, "log_odds_chosen": 11.33831787109375, "log_odds_ratio": -3.435353210079484e-05, "logits/chosen": -0.33414196968078613, "logits/rejected": -0.3761923015117645, "logps/chosen": -0.0003766388981603086, "logps/rejected": -2.5318145751953125, "loss": 0.3936, "nll_loss": 0.09838636219501495, "rewards/accuracies": 1.0, "rewards/chosen": -3.7663892726413906e-05, "rewards/margins": 0.2531437873840332, "rewards/rejected": -0.25318145751953125, "step": 14438 }, { "epoch": 9.985477178423237, "grad_norm": 2.6459414958953857, "learning_rate": 8.068234209313047e-08, "log_odds_chosen": 11.280380249023438, "log_odds_ratio": -2.8480833861976862e-05, "logits/chosen": -0.7788600921630859, "logits/rejected": -0.7748812437057495, "logps/chosen": -6.958026642678306e-05, "logps/rejected": -1.900242567062378, "loss": 0.305, "nll_loss": 0.07623657584190369, "rewards/accuracies": 1.0, "rewards/chosen": -6.958027370274067e-06, "rewards/margins": 0.19001729786396027, "rewards/rejected": -0.1900242567062378, "step": 14439 }, { "epoch": 9.986168741355463, "grad_norm": 3.695897340774536, "learning_rate": 7.68403258029814e-08, "log_odds_chosen": 11.930622100830078, "log_odds_ratio": -1.7742391719366424e-05, "logits/chosen": -0.3199276030063629, "logits/rejected": -0.32262054085731506, "logps/chosen": -0.00020379522175062448, "logps/rejected": -2.6338376998901367, "loss": 0.4075, "nll_loss": 0.101873017847538, "rewards/accuracies": 1.0, "rewards/chosen": -2.0379520719870925e-05, "rewards/margins": 0.2633633613586426, "rewards/rejected": -0.26338374614715576, "step": 14440 }, { "epoch": 9.98686030428769, "grad_norm": 3.20121169090271, "learning_rate": 7.299830951283234e-08, "log_odds_chosen": 11.908764839172363, "log_odds_ratio": -9.826524546951987e-06, "logits/chosen": 0.030937325209379196, "logits/rejected": 0.012108508497476578, "logps/chosen": -0.0002181961026508361, "logps/rejected": -2.7337958812713623, "loss": 0.439, "nll_loss": 0.10974864661693573, "rewards/accuracies": 1.0, "rewards/chosen": -2.1819610992679372e-05, "rewards/margins": 0.2733577787876129, "rewards/rejected": -0.2733795940876007, "step": 14441 }, { "epoch": 9.987551867219917, "grad_norm": 2.5336647033691406, "learning_rate": 6.915629322268327e-08, "log_odds_chosen": 10.848555564880371, "log_odds_ratio": -0.00010116137855220586, "logits/chosen": -0.2958114445209503, "logits/rejected": -0.3266344666481018, "logps/chosen": -0.0001003117358777672, "logps/rejected": -1.5897066593170166, "loss": 0.2334, "nll_loss": 0.05832758545875549, "rewards/accuracies": 1.0, "rewards/chosen": -1.003117358777672e-05, "rewards/margins": 0.15896062552928925, "rewards/rejected": -0.1589706540107727, "step": 14442 }, { "epoch": 9.988243430152144, "grad_norm": 3.3334195613861084, "learning_rate": 6.53142769325342e-08, "log_odds_chosen": 10.656942367553711, "log_odds_ratio": -5.3099036449566483e-05, "logits/chosen": -0.02133992314338684, "logits/rejected": -0.08297806978225708, "logps/chosen": -0.0001351845421595499, "logps/rejected": -1.7620038986206055, "loss": 0.4026, "nll_loss": 0.10065347701311111, "rewards/accuracies": 1.0, "rewards/chosen": -1.351845457975287e-05, "rewards/margins": 0.17618687450885773, "rewards/rejected": -0.17620038986206055, "step": 14443 }, { "epoch": 9.98893499308437, "grad_norm": 3.3461174964904785, "learning_rate": 6.147226064238511e-08, "log_odds_chosen": 9.42183780670166, "log_odds_ratio": -0.00031376894912682474, "logits/chosen": -0.0702032744884491, "logits/rejected": -0.15273018181324005, "logps/chosen": -0.0004545687697827816, "logps/rejected": -1.6560826301574707, "loss": 0.3266, "nll_loss": 0.08162922412157059, "rewards/accuracies": 1.0, "rewards/chosen": -4.5456879888661206e-05, "rewards/margins": 0.16556280851364136, "rewards/rejected": -0.1656082570552826, "step": 14444 }, { "epoch": 9.989626556016598, "grad_norm": 3.5137791633605957, "learning_rate": 5.7630244352236056e-08, "log_odds_chosen": 10.902608871459961, "log_odds_ratio": -0.00014107978495303541, "logits/chosen": -0.15415823459625244, "logits/rejected": -0.16600894927978516, "logps/chosen": -0.00030136233544908464, "logps/rejected": -2.6479830741882324, "loss": 0.2951, "nll_loss": 0.0737719014286995, "rewards/accuracies": 1.0, "rewards/chosen": -3.013623609149363e-05, "rewards/margins": 0.26476815342903137, "rewards/rejected": -0.26479828357696533, "step": 14445 }, { "epoch": 9.990318118948824, "grad_norm": 2.9514148235321045, "learning_rate": 5.3788228062086984e-08, "log_odds_chosen": 11.803411483764648, "log_odds_ratio": -1.0912965990428347e-05, "logits/chosen": -0.056035421788692474, "logits/rejected": -0.1221289113163948, "logps/chosen": -0.00013051855785306543, "logps/rejected": -2.565654993057251, "loss": 0.3331, "nll_loss": 0.08327168971300125, "rewards/accuracies": 1.0, "rewards/chosen": -1.3051856512902305e-05, "rewards/margins": 0.25655242800712585, "rewards/rejected": -0.25656551122665405, "step": 14446 }, { "epoch": 9.991009681881051, "grad_norm": 4.212923049926758, "learning_rate": 4.994621177193792e-08, "log_odds_chosen": 9.935128211975098, "log_odds_ratio": -0.00034432156826369464, "logits/chosen": -0.46043860912323, "logits/rejected": -0.47046327590942383, "logps/chosen": -0.0035317661240696907, "logps/rejected": -2.0355629920959473, "loss": 0.3522, "nll_loss": 0.08801926672458649, "rewards/accuracies": 1.0, "rewards/chosen": -0.0003531765833031386, "rewards/margins": 0.20320314168930054, "rewards/rejected": -0.20355631411075592, "step": 14447 }, { "epoch": 9.991701244813278, "grad_norm": 4.088373184204102, "learning_rate": 4.6104195481788846e-08, "log_odds_chosen": 12.469947814941406, "log_odds_ratio": -1.4285373254097067e-05, "logits/chosen": -0.07809141278266907, "logits/rejected": -0.1154666543006897, "logps/chosen": -0.00018460096907801926, "logps/rejected": -3.5400447845458984, "loss": 0.3694, "nll_loss": 0.09235501289367676, "rewards/accuracies": 1.0, "rewards/chosen": -1.8460097635397688e-05, "rewards/margins": 0.3539860248565674, "rewards/rejected": -0.35400447249412537, "step": 14448 }, { "epoch": 9.992392807745505, "grad_norm": 2.9412035942077637, "learning_rate": 4.2262179191639774e-08, "log_odds_chosen": 11.648869514465332, "log_odds_ratio": -2.198399670305662e-05, "logits/chosen": -0.9148153066635132, "logits/rejected": -0.9539530277252197, "logps/chosen": -0.00016754664829932153, "logps/rejected": -2.500732421875, "loss": 0.4408, "nll_loss": 0.1101963147521019, "rewards/accuracies": 1.0, "rewards/chosen": -1.6754665921325795e-05, "rewards/margins": 0.25005650520324707, "rewards/rejected": -0.25007325410842896, "step": 14449 }, { "epoch": 9.993084370677732, "grad_norm": 4.903641700744629, "learning_rate": 3.84201629014907e-08, "log_odds_chosen": 10.70071029663086, "log_odds_ratio": -0.00033215072471648455, "logits/chosen": -0.1328817903995514, "logits/rejected": -0.20120401680469513, "logps/chosen": -0.00018549786182120442, "logps/rejected": -2.3652138710021973, "loss": 0.4958, "nll_loss": 0.12390975654125214, "rewards/accuracies": 1.0, "rewards/chosen": -1.8549788364907727e-05, "rewards/margins": 0.23650284111499786, "rewards/rejected": -0.236521378159523, "step": 14450 }, { "epoch": 9.993775933609959, "grad_norm": 4.2302985191345215, "learning_rate": 3.4578146611341636e-08, "log_odds_chosen": 10.842260360717773, "log_odds_ratio": -4.8358084313804284e-05, "logits/chosen": -0.30342334508895874, "logits/rejected": -0.3700042963027954, "logps/chosen": -0.00016310744103975594, "logps/rejected": -1.9715077877044678, "loss": 0.5477, "nll_loss": 0.13690924644470215, "rewards/accuracies": 1.0, "rewards/chosen": -1.6310745195369236e-05, "rewards/margins": 0.19713447988033295, "rewards/rejected": -0.1971507966518402, "step": 14451 }, { "epoch": 9.994467496542185, "grad_norm": 2.7045373916625977, "learning_rate": 3.073613032119256e-08, "log_odds_chosen": 11.430469512939453, "log_odds_ratio": -2.6685371267376468e-05, "logits/chosen": -0.035802312195301056, "logits/rejected": -0.12288626283407211, "logps/chosen": -0.00012464431347325444, "logps/rejected": -2.233880043029785, "loss": 0.3057, "nll_loss": 0.07642503082752228, "rewards/accuracies": 1.0, "rewards/chosen": -1.2464431165426504e-05, "rewards/margins": 0.22337555885314941, "rewards/rejected": -0.22338801622390747, "step": 14452 }, { "epoch": 9.995159059474412, "grad_norm": 3.083169460296631, "learning_rate": 2.6894114031043492e-08, "log_odds_chosen": 10.873865127563477, "log_odds_ratio": -0.00016370532102882862, "logits/chosen": -0.3911985158920288, "logits/rejected": -0.4098545014858246, "logps/chosen": -0.00013889935507904738, "logps/rejected": -1.9613475799560547, "loss": 0.466, "nll_loss": 0.11647483706474304, "rewards/accuracies": 1.0, "rewards/chosen": -1.3889934962207917e-05, "rewards/margins": 0.19612087309360504, "rewards/rejected": -0.1961347460746765, "step": 14453 }, { "epoch": 9.995850622406639, "grad_norm": 2.367278814315796, "learning_rate": 2.3052097740894423e-08, "log_odds_chosen": 11.183568000793457, "log_odds_ratio": -3.5808894608635455e-05, "logits/chosen": -0.21817639470100403, "logits/rejected": -0.1968151032924652, "logps/chosen": -0.0003093630075454712, "logps/rejected": -2.3852624893188477, "loss": 0.3001, "nll_loss": 0.07503216713666916, "rewards/accuracies": 1.0, "rewards/chosen": -3.093630220973864e-05, "rewards/margins": 0.23849530518054962, "rewards/rejected": -0.23852625489234924, "step": 14454 }, { "epoch": 9.996542185338866, "grad_norm": 3.72048020362854, "learning_rate": 1.921008145074535e-08, "log_odds_chosen": 12.282805442810059, "log_odds_ratio": -1.186850386147853e-05, "logits/chosen": -0.5011136531829834, "logits/rejected": -0.5989029407501221, "logps/chosen": -0.00011271586117800325, "logps/rejected": -2.921992778778076, "loss": 0.3838, "nll_loss": 0.09595651924610138, "rewards/accuracies": 1.0, "rewards/chosen": -1.1271586117800325e-05, "rewards/margins": 0.29218801856040955, "rewards/rejected": -0.2921992838382721, "step": 14455 }, { "epoch": 9.997233748271093, "grad_norm": 3.6513216495513916, "learning_rate": 1.536806516059628e-08, "log_odds_chosen": 12.021650314331055, "log_odds_ratio": -1.8507635104469955e-05, "logits/chosen": -0.26748454570770264, "logits/rejected": -0.48302775621414185, "logps/chosen": -0.00019425532082095742, "logps/rejected": -2.976384162902832, "loss": 0.4706, "nll_loss": 0.11765141785144806, "rewards/accuracies": 1.0, "rewards/chosen": -1.9425533537287265e-05, "rewards/margins": 0.2976189851760864, "rewards/rejected": -0.2976384162902832, "step": 14456 }, { "epoch": 9.99792531120332, "grad_norm": 3.2580089569091797, "learning_rate": 1.1526048870447212e-08, "log_odds_chosen": 11.5446195602417, "log_odds_ratio": -3.38861791533418e-05, "logits/chosen": -0.2588220536708832, "logits/rejected": -0.3084482252597809, "logps/chosen": -0.00026432055165059865, "logps/rejected": -2.583191394805908, "loss": 0.3673, "nll_loss": 0.09181944280862808, "rewards/accuracies": 1.0, "rewards/chosen": -2.6432056984049268e-05, "rewards/margins": 0.25829267501831055, "rewards/rejected": -0.2583191394805908, "step": 14457 }, { "epoch": 9.998616874135546, "grad_norm": 2.574554204940796, "learning_rate": 7.68403258029814e-09, "log_odds_chosen": 10.828826904296875, "log_odds_ratio": -6.853970262454823e-05, "logits/chosen": -0.13539712131023407, "logits/rejected": -0.27097612619400024, "logps/chosen": -0.0007511146832257509, "logps/rejected": -2.2712390422821045, "loss": 0.2194, "nll_loss": 0.05484360456466675, "rewards/accuracies": 1.0, "rewards/chosen": -7.511146395700052e-05, "rewards/margins": 0.22704878449440002, "rewards/rejected": -0.2271239161491394, "step": 14458 }, { "epoch": 9.999308437067773, "grad_norm": 2.449514865875244, "learning_rate": 3.84201629014907e-09, "log_odds_chosen": 10.208294868469238, "log_odds_ratio": -0.0011579160345718265, "logits/chosen": -0.21203842759132385, "logits/rejected": -0.27797165513038635, "logps/chosen": -0.002736873459070921, "logps/rejected": -2.4325757026672363, "loss": 0.2492, "nll_loss": 0.062181442975997925, "rewards/accuracies": 1.0, "rewards/chosen": -0.0002736873284447938, "rewards/margins": 0.2429838627576828, "rewards/rejected": -0.2432575672864914, "step": 14459 }, { "epoch": 10.0, "grad_norm": 2.7562191486358643, "learning_rate": 0.0, "log_odds_chosen": 11.591655731201172, "log_odds_ratio": -3.828236003755592e-05, "logits/chosen": -0.5259432196617126, "logits/rejected": -0.5628337264060974, "logps/chosen": -0.00029507066938094795, "logps/rejected": -2.732250452041626, "loss": 0.337, "nll_loss": 0.08423374593257904, "rewards/accuracies": 1.0, "rewards/chosen": -2.9507067665690556e-05, "rewards/margins": 0.2731955647468567, "rewards/rejected": -0.2732250690460205, "step": 14460 } ], "logging_steps": 1, "max_steps": 14460, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }